aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2020-01-24 22:00:03 +0000
committerDimitry Andric <dim@FreeBSD.org>2020-01-24 22:00:03 +0000
commit480093f4440d54b30b3025afeac24b48f2ba7a2e (patch)
tree162e72994062888647caf0d875428db9445491a8 /contrib/llvm-project/llvm
parent489b1cf2ecf5b9b4a394857987014bfb09067726 (diff)
parent706b4fc47bbc608932d3b491ae19a3b9cde9497b (diff)
downloadsrc-480093f4440d54b30b3025afeac24b48f2ba7a2e.tar.gz
src-480093f4440d54b30b3025afeac24b48f2ba7a2e.zip
Merge ^/vendor/lvm-project/master up to its last change (upstream commit
e26a78e70), and resolve conflicts.
Notes
Notes: svn path=/projects/clang1000-import/; revision=357095
Diffstat (limited to 'contrib/llvm-project/llvm')
-rw-r--r--contrib/llvm-project/llvm/include/llvm-c/Analysis.h9
-rw-r--r--contrib/llvm-project/llvm/include/llvm-c/BitReader.h9
-rw-r--r--contrib/llvm-project/llvm/include/llvm-c/BitWriter.h9
-rw-r--r--contrib/llvm-project/llvm/include/llvm-c/Comdat.h9
-rw-r--r--contrib/llvm-project/llvm/include/llvm-c/Core.h13
-rw-r--r--contrib/llvm-project/llvm/include/llvm-c/DebugInfo.h17
-rw-r--r--contrib/llvm-project/llvm/include/llvm-c/Disassembler.h9
-rw-r--r--contrib/llvm-project/llvm/include/llvm-c/Error.h10
-rw-r--r--contrib/llvm-project/llvm/include/llvm-c/ErrorHandling.h10
-rw-r--r--contrib/llvm-project/llvm/include/llvm-c/ExecutionEngine.h9
-rw-r--r--contrib/llvm-project/llvm/include/llvm-c/ExternC.h39
-rw-r--r--contrib/llvm-project/llvm/include/llvm-c/IRReader.h9
-rw-r--r--contrib/llvm-project/llvm/include/llvm-c/Initialization.h9
-rw-r--r--contrib/llvm-project/llvm/include/llvm-c/LinkTimeOptimizer.h10
-rw-r--r--contrib/llvm-project/llvm/include/llvm-c/Linker.h9
-rw-r--r--contrib/llvm-project/llvm/include/llvm-c/Object.h9
-rw-r--r--contrib/llvm-project/llvm/include/llvm-c/OrcBindings.h9
-rw-r--r--contrib/llvm-project/llvm/include/llvm-c/Remarks.h8
-rw-r--r--contrib/llvm-project/llvm/include/llvm-c/Support.h9
-rw-r--r--contrib/llvm-project/llvm/include/llvm-c/Target.h9
-rw-r--r--contrib/llvm-project/llvm/include/llvm-c/TargetMachine.h10
-rw-r--r--contrib/llvm-project/llvm/include/llvm-c/Transforms/AggressiveInstCombine.h9
-rw-r--r--contrib/llvm-project/llvm/include/llvm-c/Transforms/Coroutines.h17
-rw-r--r--contrib/llvm-project/llvm/include/llvm-c/Transforms/IPO.h9
-rw-r--r--contrib/llvm-project/llvm/include/llvm-c/Transforms/InstCombine.h9
-rw-r--r--contrib/llvm-project/llvm/include/llvm-c/Transforms/PassManagerBuilder.h9
-rw-r--r--contrib/llvm-project/llvm/include/llvm-c/Transforms/Scalar.h9
-rw-r--r--contrib/llvm-project/llvm/include/llvm-c/Transforms/Utils.h9
-rw-r--r--contrib/llvm-project/llvm/include/llvm-c/Transforms/Vectorize.h9
-rw-r--r--contrib/llvm-project/llvm/include/llvm-c/Types.h9
-rw-r--r--contrib/llvm-project/llvm/include/llvm-c/lto.h25
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ADT/APFloat.h19
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ADT/APInt.h71
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ADT/ArrayRef.h10
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ADT/BitVector.h32
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ADT/DenseMap.h16
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ADT/DirectedGraph.h3
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ADT/EnumeratedArray.h48
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ADT/FloatingPointMode.h62
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ADT/FoldingSet.h10
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ADT/Hashing.h7
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ADT/ImmutableSet.h3
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ADT/Optional.h16
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ADT/PointerUnion.h12
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ADT/SCCIterator.h5
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ADT/STLExtras.h55
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ADT/SmallPtrSet.h26
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ADT/SmallSet.h25
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ADT/SmallVector.h11
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ADT/Statistic.h2
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ADT/StringRef.h11
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ADT/Triple.h17
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ADT/Twine.h4
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ADT/iterator.h5
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ADT/iterator_range.h5
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/AliasAnalysis.h9
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/AliasSetTracker.h4
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/BranchProbabilityInfo.h14
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/DDG.h97
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/DependenceAnalysis.h11
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/DependenceGraphBuilder.h61
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/GlobalsModRef.h3
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/GuardUtils.h10
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/InstructionSimplify.h4
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/IntervalPartition.h4
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/LazyValueInfo.h4
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/LegacyDivergenceAnalysis.h8
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/LoopAccessAnalysis.h18
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/LoopInfo.h17
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/LoopInfoImpl.h6
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/LoopPass.h4
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/MemorySSA.h9
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/MustExecute.h55
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/PhiValues.h2
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/PostDominators.h11
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/PtrUseVisitor.h4
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/ScalarEvolution.h69
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/TargetLibraryInfo.h102
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/TargetTransformInfo.h124
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h34
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/Utils/Local.h27
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/ValueTracking.h14
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/VecFuncs.def11
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/VectorUtils.h64
-rw-r--r--contrib/llvm-project/llvm/include/llvm/BinaryFormat/COFF.h10
-rw-r--r--contrib/llvm-project/llvm/include/llvm/BinaryFormat/Dwarf.def5
-rw-r--r--contrib/llvm-project/llvm/include/llvm/BinaryFormat/Dwarf.h4
-rw-r--r--contrib/llvm-project/llvm/include/llvm/BinaryFormat/ELF.h5
-rw-r--r--contrib/llvm-project/llvm/include/llvm/BinaryFormat/MinidumpConstants.def31
-rw-r--r--contrib/llvm-project/llvm/include/llvm/BinaryFormat/Wasm.h2
-rw-r--r--contrib/llvm-project/llvm/include/llvm/BinaryFormat/XCOFF.h3
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Bitcode/BitcodeWriter.h6
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Bitcode/LLVMBitCodes.h1
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Bitstream/BitstreamReader.h2
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/AsmPrinter.h15
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/BasicTTIImpl.h32
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/CommandFlags.inc107
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/DFAPacketizer.h45
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/DIE.h2
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/FastISel.h5
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/FaultMaps.h3
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h2
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/CSEInfo.h4
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h2
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h21
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/GISelKnownBits.h4
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h3
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelector.h15
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h38
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h249
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/Legalizer.h15
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h4
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h12
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/Localizer.h5
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h46
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/RegisterBankInfo.h5
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/Utils.h5
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/ISDOpcodes.h45
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/LiveInterval.h27
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/LiveIntervalUnion.h1
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/LiveIntervals.h2
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/LivePhysRegs.h3
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/LiveRegUnits.h13
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/LiveStacks.h1
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/LiveVariables.h1
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/LowLevelType.h9
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/MIRFormatter.h83
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/MIRParser/MIParser.h4
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/MIRParser/MIRParser.h14
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/MachineBlockFrequencyInfo.h3
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/MachineBranchProbabilityInfo.h5
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/MachineCombinerPattern.h45
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/MachineDominators.h5
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/MachineFrameInfo.h2
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/MachineFunction.h58
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/MachineInstr.h15
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/MachineInstrBuilder.h2
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/MachineInstrBundle.h255
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/MachineLoopInfo.h8
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/MachineLoopUtils.h5
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/MachineMemOperand.h2
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/MachineOperand.h19
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h4
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/MachineOutliner.h12
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/MachinePipeliner.h1
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/MachineScheduler.h19
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/MachineSizeOpts.h39
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/ModuloSchedule.h17
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/NonRelocatableStringpool.h83
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/ParallelCG.h2
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/Passes.h12
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/PseudoSourceValue.h2
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/ReachingDefAnalysis.h44
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/RegisterUsageInfo.h1
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/SelectionDAG.h87
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/SelectionDAGISel.h14
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/SelectionDAGNodes.h120
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/SlotIndexes.h9
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/StackMaps.h24
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/StackProtector.h7
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/SwitchLoweringUtils.h5
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/TailDuplicator.h6
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/TargetCallingConv.h8
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/TargetFrameLowering.h13
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/TargetInstrInfo.h88
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/TargetLowering.h298
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h9
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/TargetSchedule.h8
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h4
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/ValueTypes.h30
-rw-r--r--contrib/llvm-project/llvm/include/llvm/DWARFLinker/DWARFLinker.h198
-rw-r--r--contrib/llvm-project/llvm/include/llvm/DWARFLinker/DWARFLinkerCompileUnit.h330
-rw-r--r--contrib/llvm-project/llvm/include/llvm/DWARFLinker/DWARFLinkerDeclContext.h169
-rw-r--r--contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h2
-rw-r--r--contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/SymbolRecord.h28
-rw-r--r--contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/TypeRecord.h52
-rw-r--r--contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/TypeRecordHelpers.h37
-rw-r--r--contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFAddressRange.h18
-rw-r--r--contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h10
-rw-r--r--contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFDataExtractor.h2
-rw-r--r--contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAranges.h8
-rw-r--r--contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h17
-rw-r--r--contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h153
-rw-r--r--contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugMacro.h4
-rw-r--r--contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugRnglists.h6
-rw-r--r--contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h5
-rw-r--r--contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFLocationExpression.h49
-rw-r--r--contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFObject.h2
-rw-r--r--contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h44
-rw-r--r--contrib/llvm-project/llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h31
-rw-r--r--contrib/llvm-project/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h6
-rw-r--r--contrib/llvm-project/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h56
-rw-r--r--contrib/llvm-project/llvm/include/llvm/DebugInfo/GSYM/InlineInfo.h49
-rw-r--r--contrib/llvm-project/llvm/include/llvm/DebugInfo/GSYM/LineTable.h21
-rw-r--r--contrib/llvm-project/llvm/include/llvm/DebugInfo/GSYM/LookupResult.h61
-rw-r--r--contrib/llvm-project/llvm/include/llvm/DebugInfo/GSYM/Range.h18
-rw-r--r--contrib/llvm-project/llvm/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h6
-rw-r--r--contrib/llvm-project/llvm/include/llvm/DebugInfo/PDB/Native/DbiModuleList.h1
-rw-r--r--contrib/llvm-project/llvm/include/llvm/DebugInfo/PDB/Native/HashTable.h1
-rw-r--r--contrib/llvm-project/llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h5
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Demangle/ItaniumDemangle.h82
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h4
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Demangle/Utility.h8
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h427
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ExecutionEngine/JITSymbol.h18
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/Core.h399
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/DebugUtils.h58
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h28
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h2
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h23
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/LLJIT.h4
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h4
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/ObjectTransformLayer.h6
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h76
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/OrcError.h2
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h4
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/RPC/RPCSerialization.h (renamed from contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/RPCSerialization.h)4
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/RPC/RPCUtils.h (renamed from contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/RPCUtils.h)2
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/RPC/RawByteChannel.h (renamed from contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/RawByteChannel.h)4
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h2
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/Speculation.h35
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h109
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h250
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def289
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/Argument.h5
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/Attributes.td1
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/AutoUpgrade.h5
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/CallSite.h35
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/CallingConv.h6
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/Constant.h18
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/ConstantRange.h30
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/Constants.h9
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/ConstrainedOps.def86
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/DIBuilder.h18
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/DebugInfoFlags.def7
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/DebugInfoMetadata.h125
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/Dominators.h4
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/FPEnv.h70
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/Function.h10
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/GlobalValue.h2
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/IRBuilder.h343
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/IRPrintingPasses.h6
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/InstVisitor.h1
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/InstrTypes.h34
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/Instruction.def3
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/Instruction.h2
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/Instructions.h71
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/IntrinsicInst.h126
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/Intrinsics.h11
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/Intrinsics.td92
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsAArch64.td854
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsAMDGPU.td2
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsARM.td350
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsNVVM.td57
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsRISCV.td90
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsWebAssembly.td9
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/LLVMContext.h10
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/LegacyPassManager.h2
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/LegacyPassManagers.h5
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/Metadata.h29
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/Module.h39
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/ModuleSummaryIndex.h51
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/NoFolder.h2
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/Operator.h9
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/PassManager.h15
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/PatternMatch.h67
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/RemarkStreamer.h2
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/RuntimeLibcalls.def6
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/ValueHandle.h20
-rw-r--r--contrib/llvm-project/llvm/include/llvm/InitializePasses.h9
-rw-r--r--contrib/llvm-project/llvm/include/llvm/LTO/Config.h15
-rw-r--r--contrib/llvm-project/llvm/include/llvm/LTO/LTO.h7
-rw-r--r--contrib/llvm-project/llvm/include/llvm/LTO/LTOBackend.h6
-rw-r--r--contrib/llvm-project/llvm/include/llvm/LTO/legacy/LTOCodeGenerator.h10
-rw-r--r--contrib/llvm-project/llvm/include/llvm/LinkAllPasses.h3
-rw-r--r--contrib/llvm-project/llvm/include/llvm/MC/MCAsmBackend.h49
-rw-r--r--contrib/llvm-project/llvm/include/llvm/MC/MCAsmInfo.h9
-rw-r--r--contrib/llvm-project/llvm/include/llvm/MC/MCAsmInfoELF.h4
-rw-r--r--contrib/llvm-project/llvm/include/llvm/MC/MCAsmInfoXCOFF.h6
-rw-r--r--contrib/llvm-project/llvm/include/llvm/MC/MCAssembler.h5
-rw-r--r--contrib/llvm-project/llvm/include/llvm/MC/MCCodeEmitter.h6
-rw-r--r--contrib/llvm-project/llvm/include/llvm/MC/MCCodePadder.h241
-rw-r--r--contrib/llvm-project/llvm/include/llvm/MC/MCDisassembler/MCDisassembler.h4
-rw-r--r--contrib/llvm-project/llvm/include/llvm/MC/MCDwarf.h2
-rw-r--r--contrib/llvm-project/llvm/include/llvm/MC/MCELFStreamer.h5
-rw-r--r--contrib/llvm-project/llvm/include/llvm/MC/MCFragment.h245
-rw-r--r--contrib/llvm-project/llvm/include/llvm/MC/MCInst.h5
-rw-r--r--contrib/llvm-project/llvm/include/llvm/MC/MCInstPrinter.h53
-rw-r--r--contrib/llvm-project/llvm/include/llvm/MC/MCInstrDesc.h24
-rw-r--r--contrib/llvm-project/llvm/include/llvm/MC/MCMachObjectWriter.h4
-rw-r--r--contrib/llvm-project/llvm/include/llvm/MC/MCObjectFileInfo.h187
-rw-r--r--contrib/llvm-project/llvm/include/llvm/MC/MCObjectStreamer.h28
-rw-r--r--contrib/llvm-project/llvm/include/llvm/MC/MCParser/AsmCond.h2
-rw-r--r--contrib/llvm-project/llvm/include/llvm/MC/MCParser/MCParsedAsmOperand.h8
-rw-r--r--contrib/llvm-project/llvm/include/llvm/MC/MCParser/MCTargetAsmParser.h50
-rw-r--r--contrib/llvm-project/llvm/include/llvm/MC/MCRegisterInfo.h110
-rw-r--r--contrib/llvm-project/llvm/include/llvm/MC/MCSection.h34
-rw-r--r--contrib/llvm-project/llvm/include/llvm/MC/MCSectionXCOFF.h14
-rw-r--r--contrib/llvm-project/llvm/include/llvm/MC/MCStreamer.h25
-rw-r--r--contrib/llvm-project/llvm/include/llvm/MC/MCSymbol.h10
-rw-r--r--contrib/llvm-project/llvm/include/llvm/MC/MCSymbolWasm.h6
-rw-r--r--contrib/llvm-project/llvm/include/llvm/MC/MCSymbolXCOFF.h14
-rw-r--r--contrib/llvm-project/llvm/include/llvm/MC/MCTargetOptions.h1
-rw-r--r--contrib/llvm-project/llvm/include/llvm/MC/MCTargetOptionsCommandFlags.inc3
-rw-r--r--contrib/llvm-project/llvm/include/llvm/MC/MCXCOFFStreamer.h3
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Object/ELF.h22
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Object/ELFObjectFile.h2
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Object/MachO.h2
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Object/ObjectFile.h2
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Object/Wasm.h1
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Object/XCOFFObjectFile.h27
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ObjectYAML/DWARFYAML.h2
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ObjectYAML/ELFYAML.h260
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ObjectYAML/YAML.h3
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Pass.h53
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Passes/PassBuilder.h17
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h2
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ProfileData/InstrProf.h2
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ProfileData/InstrProfData.inc6
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ProfileData/SampleProf.h5
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ProfileData/SampleProfReader.h9
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ProfileData/SampleProfWriter.h4
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Remarks/Remark.h36
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Remarks/RemarkFormat.h3
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Remarks/RemarkLinker.h100
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/AArch64TargetParser.def22
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/AArch64TargetParser.h9
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/AMDGPUMetadata.h1
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/ARMTargetParser.def2
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/Allocator.h2
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/Automaton.h12
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/BinaryStreamArray.h11
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/BinaryStreamReader.h4
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/BinaryStreamRef.h6
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/CodeGen.h9
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/CommandLine.h126
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/Compiler.h26
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/CrashRecoveryContext.h16
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/DataExtractor.h7
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/Error.h16
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/FileCheck.h1
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/FileCollector.h6
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/FileOutputBuffer.h4
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/FormatVariadic.h2
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/GenericDomTree.h4
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/InitLLVM.h11
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/JSON.h7
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/KnownBits.h15
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/LineIterator.h10
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/LockFileManager.h4
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/LowLevelTypeImpl.h2
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/MachineValueType.h98
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/MathExtras.h5
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/Memory.h11
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/Options.h119
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/Path.h43
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/Signals.h36
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/SourceMgr.h8
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/SpecialCaseList.h8
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/TargetOpcodes.def13
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/TargetRegistry.h17
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/Threading.h30
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/TimeProfiler.h7
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/Timer.h21
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/TypeSize.h5
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/VersionTuple.h7
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/VirtualFileSystem.h26
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/YAMLTraits.h7
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/raw_ostream.h29
-rw-r--r--contrib/llvm-project/llvm/include/llvm/TableGen/Record.h10
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Target/GenericOpcodes.td30
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Target/GlobalISel/Combine.td49
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td4
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Target/GlobalISel/Target.td8
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Target/Target.td7
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Target/TargetCallingConv.td5
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Target/TargetMachine.h9
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Target/TargetOptions.h12
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Target/TargetSelectionDAG.td36
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/CFGuard.h26
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Coroutines.h8
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/IPO/Attributor.h909
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/IPO/FunctionImport.h2
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/IPO/MergeFunctions.h32
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h2
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/InstCombine/InstCombine.h17
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/InstCombine/InstCombineWorklist.h7
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Scalar.h17
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/ConstantHoisting.h2
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/GVN.h27
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/JumpThreading.h12
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/LICM.h1
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/LoopUnrollAndJamPass.h7
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/LoopUnrollPass.h1
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/LowerMatrixIntrinsics.h24
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Utils.h7
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h4
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Utils/CodeMoverUtils.h56
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Utils/Debugify.h (renamed from contrib/llvm-project/llvm/tools/opt/Debugify.h)10
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Utils/FunctionImportUtils.h13
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Utils/GuardUtils.h19
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Utils/InjectTLIMappings.h37
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Utils/Local.h4
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Utils/LoopUtils.h5
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Utils/ModuleUtils.h8
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h1
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Utils/SizeOpts.h87
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Utils/UnifyFunctionExitNodes.h4
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Vectorize.h1
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h1
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h3
-rw-r--r--contrib/llvm-project/llvm/include/llvm/XRay/FDRRecords.h6
-rw-r--r--contrib/llvm-project/llvm/include/llvm/module.modulemap31
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/AliasAnalysis.cpp15
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/AliasSetTracker.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/AssumptionCache.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/BasicAliasAnalysis.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/BlockFrequencyInfo.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/BranchProbabilityInfo.cpp140
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/CFGPrinter.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/CFLSteensAliasAnalysis.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/CallGraph.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/CallPrinter.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/ConstantFolding.cpp22
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/CostModel.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/DDG.cpp112
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/Delinearization.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/DemandedBits.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/DependenceAnalysis.cpp64
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/DependenceGraphBuilder.cpp179
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/DomPrinter.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/DomTreeUpdater.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/DominanceFrontier.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/GlobalsModRef.cpp20
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/GuardUtils.cpp75
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/IVDescriptors.cpp52
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/IVUsers.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/InlineCost.cpp906
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/InstCount.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/InstructionPrecedenceTracking.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/InstructionSimplify.cpp221
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/IntervalPartition.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/LazyBlockFrequencyInfo.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/LazyBranchProbabilityInfo.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/LazyValueInfo.cpp65
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/Lint.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/Loads.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/LoopAccessAnalysis.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/LoopCacheAnalysis.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/LoopInfo.cpp13
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/LoopPass.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/MemDepPrinter.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/MemDerefPrinter.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/MemoryBuiltins.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/MemoryLocation.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/MemorySSA.cpp103
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/MemorySSAUpdater.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/MustExecute.cpp198
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/OptimizationRemarkEmitter.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/PhiValues.cpp63
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/PostDominators.cpp29
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/ProfileSummaryInfo.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/RegionInfo.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/RegionPrinter.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/ScalarEvolution.cpp285
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/ScalarEvolutionExpander.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/ScopedNoAliasAA.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/StackSafetyAnalysis.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/TargetLibraryInfo.cpp49
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/TargetTransformInfo.cpp64
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp18
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/VFABIDemangling.cpp38
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/ValueTracking.cpp223
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/VectorUtils.cpp67
-rw-r--r--contrib/llvm-project/llvm/lib/AsmParser/LLLexer.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/AsmParser/LLParser.cpp51
-rw-r--r--contrib/llvm-project/llvm/lib/AsmParser/LLParser.h1
-rw-r--r--contrib/llvm-project/llvm/lib/AsmParser/LLToken.h5
-rw-r--r--contrib/llvm-project/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/BinaryFormat/Dwarf.cpp15
-rw-r--r--contrib/llvm-project/llvm/lib/BinaryFormat/XCOFF.cpp34
-rw-r--r--contrib/llvm-project/llvm/lib/Bitcode/Reader/BitcodeReader.cpp49
-rw-r--r--contrib/llvm-project/llvm/lib/Bitcode/Reader/MetadataLoader.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp145
-rw-r--r--contrib/llvm-project/llvm/lib/Bitcode/Writer/BitcodeWriterPass.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Bitstream/Reader/BitstreamReader.cpp38
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/Analysis.cpp27
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp144
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp72
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp32
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIE.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h38
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h8
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp52
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h5
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp359
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h14
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp32
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h13
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp28
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp61
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.h9
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AtomicExpandPass.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp174
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.h4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/BranchRelaxation.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/BreakFalseDeps.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/CFGuardLongjmp.cpp120
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/CFIInstrInserter.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/CodeGen.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp246
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/DFAPacketizer.cpp62
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/DwarfEHPrepare.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/EarlyIfConversion.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/EdgeBundles.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ExpandMemCmp.cpp44
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ExpandReductions.cpp46
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/FEntryInserter.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/FaultMaps.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/FinalizeISel.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/FuncletLayout.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GCMetadata.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GCRootLowering.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp172
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp74
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp70
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp131
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp248
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Localizer.cpp13
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp19
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp15
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp22
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp17
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalMerge.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/HardwareLoops.cpp115
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/IfConversion.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ImplicitNullChecks.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/IndirectBrExpandPass.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/InlineSpiller.cpp19
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/InterleavedAccessPass.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/IntrinsicLowering.cpp36
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LLVMTargetMachine.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues.cpp659
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp226
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveInterval.cpp19
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveIntervals.cpp23
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LivePhysRegs.cpp30
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveRangeShrink.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveRegMatrix.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveRegUnits.cpp45
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LowLevelType.cpp29
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LowerEmuTLS.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp63
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MIRNamerPass.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.h4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIParser.cpp326
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIRParser.cpp45
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MIRPrinter.cpp35
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MIRPrintingPass.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp424
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MIRVRegNamerUtils.h98
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineBasicBlock.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp84
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineCSE.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineCombiner.cpp24
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineCopyPropagation.cpp236
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineDominanceFrontier.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineDominators.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineFrameInfo.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp52
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp23
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineInstrBundle.cpp31
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineLICM.cpp63
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineLoopInfo.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineLoopUtils.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineModuleInfo.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineOperand.cpp67
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineOutliner.cpp148
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachinePostDominators.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineRegionInfo.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp68
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp208
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineSizeOpts.cpp122
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineTraceMetrics.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineVerifier.cpp93
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MacroFusion.cpp27
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp217
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/NonRelocatableStringpool.cpp54
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/OptimizePHIs.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ParallelCG.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/PatchableFunction.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/PeepholeOptimizer.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/PostRAHazardRecognizer.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/PostRASchedulerList.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp29
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ProcessImplicitDefs.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ReachingDefAnalysis.cpp141
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp141
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegUsageInfoCollector.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegisterClassInfo.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp227
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegisterScavenging.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RenameIndependentSubregs.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ResetMachineFunctionPass.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SafeStack.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp15
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp906
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp20
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp770
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp1388
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp458
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp65
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h35
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp1020
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp488
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp438
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp698
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h27
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp20
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp209
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp587
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ShadowStackGCLowering.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ShrinkWrap.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SjLjEHPrepare.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SlotIndexes.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SpillPlacement.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/StackColoring.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/StackMaps.cpp24
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/StackProtector.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/StackSlotColoring.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SwitchLoweringUtils.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TailDuplication.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TailDuplicator.cpp17
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp13
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TargetInstrInfo.cpp80
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringBase.cpp84
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp61
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TargetOptionsImpl.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TargetPassConfig.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TargetSubtargetInfo.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TypePromotion.cpp (renamed from contrib/llvm-project/llvm/lib/Target/ARM/ARMCodeGenPrepare.cpp)492
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/UnreachableBlockElim.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ValueTypes.cpp136
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/WasmEHPrepare.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/WinEHPrepare.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/XRayInstrumentation.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/DWARFLinker/DWARFLinker.cpp17
-rw-r--r--contrib/llvm-project/llvm/lib/DWARFLinker/DWARFLinkerCompileUnit.cpp144
-rw-r--r--contrib/llvm-project/llvm/lib/DWARFLinker/DWARFLinkerDeclContext.cpp209
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/CodeView/SymbolRecordHelpers.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/CodeView/TypeStreamMerger.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFAddressRange.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp154
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDebugAranges.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp121
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp524
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDebugMacro.cpp76
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDebugRnglists.cpp13
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp96
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFExpression.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFLocationExpression.cpp19
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp163
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp80
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp106
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/GSYM/GsymReader.cpp22
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/GSYM/InlineInfo.cpp110
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/GSYM/LineTable.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/GSYM/LookupResult.cpp69
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/GSYM/Range.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/DbiModuleDescriptor.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp15
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp94
-rw-r--r--contrib/llvm-project/llvm/lib/Demangle/ItaniumDemangle.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/ExecutionEngineBindings.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/EHFrameSupport.cpp655
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/EHFrameSupportImpl.h96
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/JITLink.cpp93
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp32
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.h2
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.h68
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp27
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp31
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/CompileUtils.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Core.cpp653
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/DebugUtils.cpp68
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/ExecutionUtils.cpp112
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/JITTargetMachineBuilder.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp36
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Legacy.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp35
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/ObjectTransformLayer.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp19
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/OrcError/OrcError.cpp (renamed from contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/OrcError.cpp)2
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/OrcError/RPCError.cpp (renamed from contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/RPCUtils.cpp)11
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFAArch64.h365
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/TargetSelect.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Frontend/OpenMP/OMPConstants.cpp87
-rw-r--r--contrib/llvm-project/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp632
-rw-r--r--contrib/llvm-project/llvm/lib/IR/AbstractCallSite.cpp19
-rw-r--r--contrib/llvm-project/llvm/lib/IR/AsmWriter.cpp65
-rw-r--r--contrib/llvm-project/llvm/lib/IR/Attributes.cpp36
-rw-r--r--contrib/llvm-project/llvm/lib/IR/AutoUpgrade.cpp77
-rw-r--r--contrib/llvm-project/llvm/lib/IR/ConstantFold.cpp60
-rw-r--r--contrib/llvm-project/llvm/lib/IR/ConstantRange.cpp150
-rw-r--r--contrib/llvm-project/llvm/lib/IR/Constants.cpp96
-rw-r--r--contrib/llvm-project/llvm/lib/IR/ConstantsContext.h2
-rw-r--r--contrib/llvm-project/llvm/lib/IR/Core.cpp22
-rw-r--r--contrib/llvm-project/llvm/lib/IR/DIBuilder.cpp31
-rw-r--r--contrib/llvm-project/llvm/lib/IR/DataLayout.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/IR/DebugInfo.cpp13
-rw-r--r--contrib/llvm-project/llvm/lib/IR/DebugInfoMetadata.cpp56
-rw-r--r--contrib/llvm-project/llvm/lib/IR/Dominators.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/IR/FPEnv.cpp78
-rw-r--r--contrib/llvm-project/llvm/lib/IR/Function.cpp30
-rw-r--r--contrib/llvm-project/llvm/lib/IR/IRBuilder.cpp66
-rw-r--r--contrib/llvm-project/llvm/lib/IR/IRPrintingPasses.cpp38
-rw-r--r--contrib/llvm-project/llvm/lib/IR/Instruction.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/IR/Instructions.cpp67
-rw-r--r--contrib/llvm-project/llvm/lib/IR/IntrinsicInst.cpp131
-rw-r--r--contrib/llvm-project/llvm/lib/IR/LLVMContext.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/IR/LLVMContextImpl.h12
-rw-r--r--contrib/llvm-project/llvm/lib/IR/LegacyPassManager.cpp280
-rw-r--r--contrib/llvm-project/llvm/lib/IR/Metadata.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/IR/Module.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/IR/ModuleSummaryIndex.cpp56
-rw-r--r--contrib/llvm-project/llvm/lib/IR/Pass.cpp45
-rw-r--r--contrib/llvm-project/llvm/lib/IR/RemarkStreamer.cpp33
-rw-r--r--contrib/llvm-project/llvm/lib/IR/SafepointIRVerifier.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/IR/TypeFinder.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/IR/User.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/IR/Value.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/IR/Verifier.cpp213
-rw-r--r--contrib/llvm-project/llvm/lib/LTO/LTO.cpp50
-rw-r--r--contrib/llvm-project/llvm/lib/LTO/LTOBackend.cpp77
-rw-r--r--contrib/llvm-project/llvm/lib/LTO/LTOCodeGenerator.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/LTO/SummaryBasedOptimizations.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/LTO/ThinLTOCodeGenerator.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/Linker/IRMover.cpp31
-rw-r--r--contrib/llvm-project/llvm/lib/MC/ELFObjectWriter.cpp26
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCAsmBackend.cpp26
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCAsmInfo.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCAsmInfoELF.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCAsmInfoXCOFF.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCAsmStreamer.cpp19
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCAssembler.cpp104
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCCodePadder.cpp370
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCContext.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCDisassembler/Disassembler.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCDisassembler/MCDisassembler.cpp15
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCDwarf.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCELFStreamer.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCExpr.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCFragment.cpp56
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCInstPrinter.cpp90
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCObjectFileInfo.cpp27
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCObjectStreamer.cpp130
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCParser/AsmParser.cpp90
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCParser/COFFAsmParser.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCSection.cpp38
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCSectionXCOFF.cpp28
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCStreamer.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCSymbolELF.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCTargetOptions.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCValue.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCXCOFFStreamer.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/MC/WasmObjectWriter.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/MC/XCOFFObjectWriter.cpp368
-rw-r--r--contrib/llvm-project/llvm/lib/MCA/HardwareUnits/ResourceManager.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/MCA/InstrBuilder.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/MCA/Stages/InstructionTables.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Object/ELF.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Object/MachOObjectFile.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Object/ModuleSymbolTable.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Object/ObjectFile.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Object/RelocationResolver.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Object/WasmObjectFile.cpp17
-rw-r--r--contrib/llvm-project/llvm/lib/Object/XCOFFObjectFile.cpp54
-rw-r--r--contrib/llvm-project/llvm/lib/ObjectYAML/COFFEmitter.cpp13
-rw-r--r--contrib/llvm-project/llvm/lib/ObjectYAML/CodeViewYAMLDebugSections.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/ObjectYAML/DWARFEmitter.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/ObjectYAML/ELFEmitter.cpp559
-rw-r--r--contrib/llvm-project/llvm/lib/ObjectYAML/ELFYAML.cpp254
-rw-r--r--contrib/llvm-project/llvm/lib/ObjectYAML/MachOEmitter.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/ObjectYAML/MinidumpYAML.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/ObjectYAML/YAML.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/Passes/PassBuilder.cpp74
-rw-r--r--contrib/llvm-project/llvm/lib/Passes/PassRegistry.def6
-rw-r--r--contrib/llvm-project/llvm/lib/ProfileData/GCOV.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/ProfileData/InstrProfReader.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Remarks/BitstreamRemarkParser.cpp37
-rw-r--r--contrib/llvm-project/llvm/lib/Remarks/BitstreamRemarkParser.h9
-rw-r--r--contrib/llvm-project/llvm/lib/Remarks/RemarkFormat.cpp15
-rw-r--r--contrib/llvm-project/llvm/lib/Remarks/RemarkLinker.cpp126
-rw-r--r--contrib/llvm-project/llvm/lib/Support/AArch64TargetParser.cpp48
-rw-r--r--contrib/llvm-project/llvm/lib/Support/AMDGPUMetadata.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Support/APFloat.cpp184
-rw-r--r--contrib/llvm-project/llvm/lib/Support/APInt.cpp79
-rw-r--r--contrib/llvm-project/llvm/lib/Support/ARMAttributeParser.cpp18
-rw-r--r--contrib/llvm-project/llvm/lib/Support/ARMTargetParser.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Support/BinaryStreamReader.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Support/CRC.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Support/CommandLine.cpp209
-rw-r--r--contrib/llvm-project/llvm/lib/Support/Compression.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Support/CrashRecoveryContext.cpp73
-rw-r--r--contrib/llvm-project/llvm/lib/Support/DebugCounter.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Support/Error.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Support/FileCheck.cpp272
-rw-r--r--contrib/llvm-project/llvm/lib/Support/FileCheckImpl.h175
-rw-r--r--contrib/llvm-project/llvm/lib/Support/FileOutputBuffer.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Support/Host.cpp63
-rw-r--r--contrib/llvm-project/llvm/lib/Support/InitLLVM.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Support/ItaniumManglingCanonicalizer.cpp39
-rw-r--r--contrib/llvm-project/llvm/lib/Support/KnownBits.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Support/LockFileManager.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Support/Options.cpp32
-rw-r--r--contrib/llvm-project/llvm/lib/Support/Parallel.cpp82
-rw-r--r--contrib/llvm-project/llvm/lib/Support/Path.cpp41
-rw-r--r--contrib/llvm-project/llvm/lib/Support/SHA1.cpp54
-rw-r--r--contrib/llvm-project/llvm/lib/Support/Signals.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Support/SpecialCaseList.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/Support/Statistic.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/Support/StringRef.cpp19
-rw-r--r--contrib/llvm-project/llvm/lib/Support/TargetParser.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Support/Threading.cpp51
-rw-r--r--contrib/llvm-project/llvm/lib/Support/TimeProfiler.cpp34
-rw-r--r--contrib/llvm-project/llvm/lib/Support/Timer.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/Support/Triple.cpp269
-rw-r--r--contrib/llvm-project/llvm/lib/Support/Unix/Memory.inc6
-rw-r--r--contrib/llvm-project/llvm/lib/Support/Unix/Path.inc8
-rw-r--r--contrib/llvm-project/llvm/lib/Support/Unix/Signals.inc50
-rw-r--r--contrib/llvm-project/llvm/lib/Support/Unix/Threading.inc63
-rw-r--r--contrib/llvm-project/llvm/lib/Support/Unix/Unix.h9
-rw-r--r--contrib/llvm-project/llvm/lib/Support/VirtualFileSystem.cpp52
-rw-r--r--contrib/llvm-project/llvm/lib/Support/Windows/Memory.inc8
-rw-r--r--contrib/llvm-project/llvm/lib/Support/Windows/Process.inc7
-rw-r--r--contrib/llvm-project/llvm/lib/Support/Windows/Program.inc2
-rw-r--r--contrib/llvm-project/llvm/lib/Support/Windows/Signals.inc63
-rw-r--r--contrib/llvm-project/llvm/lib/Support/Windows/Threading.inc50
-rw-r--r--contrib/llvm-project/llvm/lib/Support/Windows/WindowsSupport.h8
-rw-r--r--contrib/llvm-project/llvm/lib/Support/YAMLParser.cpp64
-rw-r--r--contrib/llvm-project/llvm/lib/Support/YAMLTraits.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Support/raw_ostream.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/TableGen/Main.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/TableGen/Record.cpp57
-rw-r--r--contrib/llvm-project/llvm/lib/TableGen/TGLexer.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/TableGen/TGLexer.h26
-rw-r--r--contrib/llvm-project/llvm/lib/TableGen/TGParser.cpp258
-rw-r--r--contrib/llvm-project/llvm/lib/TableGen/TGParser.h74
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64.td123
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp87
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64CallLowering.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64CallingConvention.h3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64CallingConvention.td29
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64CompressJumpTables.cpp13
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ConditionOptimizer.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ConditionalCompares.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FastISel.cpp22
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp420
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.h11
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp201
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp1219
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.h58
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrAtomics.td6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrFormats.td165
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp1037
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.h28
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.td255
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp381
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp433
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h87
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64PreLegalizerCombiner.cpp23
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64PromoteConstant.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.h4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RegisterInfo.td38
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td1243
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedExynosM1.td850
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedExynosM3.td20
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedExynosM4.td32
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedExynosM5.td1012
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedPredExynos.td14
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedPredicates.td80
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedThunderX2T99.td19
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64StackOffset.h12
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64StackTagging.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64StackTaggingPreRA.cpp32
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Subtarget.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Subtarget.h9
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SystemOperands.td2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp35
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp26
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h35
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.h3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp18
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp21
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h16
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/SVEInstrFormats.td1261
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h19
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPU.h5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp25
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp25
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUGISel.td22
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def144
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp45
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.h29
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp38
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp101
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInline.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td24
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp573
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h37
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td252
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp425
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h9
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp19
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp32
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp675
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h15
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td141
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp61
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h23
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp208
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h31
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUUnifyMetadata.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp46
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/BUFInstructions.td4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/DSInstructions.td10
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/FLATInstructions.td111
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp18
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNRegBankReassign.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp13
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h14
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/MIMGInstructions.td10
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/R600AsmPrinter.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp48
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/R600InstrInfo.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/R600Instructions.td25
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp235
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.cpp265
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.h21
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp70
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp170
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.h25
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.td132
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstructions.td135
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp697
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp17
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp180
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h19
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineScheduler.h1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp21
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp49
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.h5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.td16
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRemoveShortExecBranches.cpp158
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp30
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SMInstructions.td10
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SOPInstructions.td23
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/TargetInfo/AMDGPUTargetInfo.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp42
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h61
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP1Instructions.td87
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP2Instructions.td43
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP3Instructions.td42
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP3PInstructions.td4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/VOPCInstructions.td4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARC/ARCAsmPrinter.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARC/ARCBranchFinalize.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARC/ARCISelLowering.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARC/ARCInstrInfo.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARC/ARCInstrInfo.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARC/ARCOptAddrMode.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARC/ARCRegisterInfo.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARC/ARCTargetMachine.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARC/Disassembler/ARCDisassembler.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARC/MCTargetDesc/ARCInstPrinter.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARC/MCTargetDesc/ARCInstPrinter.h6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARC/MCTargetDesc/ARCMCTargetDesc.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARC/TargetInfo/ARCTargetInfo.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/A15SDOptimizer.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARM.h4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARM.td32
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMAsmPrinter.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp89
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseInstrInfo.h128
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMCallLowering.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMCallingConv.cpp46
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMCallingConv.h3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMCallingConv.td35
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp20
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp21
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMFastISel.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMFrameLowering.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMFrameLowering.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMHazardRecognizer.h7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp611
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.cpp727
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.h26
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrInfo.td61
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrMVE.td2779
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrNEON.td169
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrThumb2.td228
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrVFP.td31
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMInstructionSelector.cpp18
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp27
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp968
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMParallelDSP.cpp15
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMPredicates.td8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMRegisterBankInfo.h4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMSubtarget.cpp17
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMSubtarget.h20
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetMachine.cpp33
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetMachine.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp333
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetTransformInfo.h36
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp105
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp95
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp29
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.h8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp301
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/MVETailPredication.cpp167
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/MVEVPTBlockPass.cpp161
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/Thumb1InstrInfo.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp121
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/Thumb2InstrInfo.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/Utils/ARMBaseInfo.h53
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AVR/AVRAsmPrinter.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp91
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AVR/AVRISelLowering.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AVR/AVRISelLowering.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AVR/AVRInstrFormats.td4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AVR/AVRInstrInfo.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AVR/AVRInstrInfo.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AVR/AVRInstrInfo.td14
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AVR/AVRTargetMachine.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AVR/Disassembler/AVRDisassembler.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRInstPrinter.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRInstPrinter.h6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AVR/TargetInfo/AVRTargetInfo.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp86
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/BPFAsmPrinter.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/BPFInstrInfo.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/BPFInstrInfo.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/BPFInstrInfo.td19
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/BPFMIPeephole.cpp89
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp163
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/BPFSubtarget.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/BPFTargetMachine.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/BTF.h10
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/BTFDebug.cpp157
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/BTFDebug.h19
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/MCTargetDesc/BPFInstPrinter.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/MCTargetDesc/BPFInstPrinter.h6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/TargetInfo/BPFTargetInfo.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/BitTracker.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp17
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonConstPropagation.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp96
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonGenExtract.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonGenMux.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp21
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.h4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp89
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLowering.h5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp179
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonInstrInfo.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonIntrinsics.td3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp20
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonOptimizeSZextends.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonPatterns.td4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonPseudo.td20
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonRDFOpt.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonSplitDouble.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonStoreWidening.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp42
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h19
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonVExtract.cpp44
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/RDFLiveness.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Lanai/Disassembler/LanaiDisassembler.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Lanai/Disassembler/LanaiDisassembler.h3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Lanai/LanaiAsmPrinter.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Lanai/LanaiISelLowering.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Lanai/LanaiISelLowering.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Lanai/LanaiInstrInfo.h4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Lanai/LanaiTargetMachine.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Lanai/LanaiTargetTransformInfo.h9
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Lanai/MCTargetDesc/LanaiInstPrinter.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Lanai/MCTargetDesc/LanaiInstPrinter.h6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCAsmInfo.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCAsmInfo.h3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Lanai/TargetInfo/LanaiTargetInfo.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/MSP430/Disassembler/MSP430Disassembler.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/Target/MSP430/MCTargetDesc/MSP430InstPrinter.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/MSP430/MCTargetDesc/MSP430InstPrinter.h6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp25
-rw-r--r--contrib/llvm-project/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp20
-rw-r--r--contrib/llvm-project/llvm/lib/Target/MSP430/MSP430ISelLowering.h3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/MSP430/MSP430InstrInfo.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/MSP430/MSP430MachineFunctionInfo.h10
-rw-r--r--contrib/llvm-project/llvm/lib/Target/MSP430/MSP430Subtarget.h4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/MSP430/TargetInfo/MSP430TargetInfo.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp387
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/Disassembler/MipsDisassembler.cpp32
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h12
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsInstPrinter.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsInstPrinter.h6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp18
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp15
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp24
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp35
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MicroMips32r6InstrFormats.td17
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MicroMips32r6InstrInfo.td22
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MicroMipsInstrFPU.td3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MicroMipsInstrInfo.td9
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/Mips.td12
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/Mips16ISelLowering.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/Mips16InstrInfo.cpp23
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/Mips16InstrInfo.h9
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/Mips16InstrInfo.td8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/Mips32r6InstrInfo.td10
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/Mips64InstrInfo.td25
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/Mips64r6InstrInfo.td7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsAsmPrinter.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsCallLowering.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsCallingConv.td3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsCondMov.td16
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsDSPInstrInfo.td11
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp72
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsExpandPseudo.cpp230
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsISelLowering.cpp211
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsISelLowering.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsInstrFPU.td48
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsInstrFormats.td6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsInstrInfo.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsInstrInfo.td184
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsInstructionSelector.cpp148
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp113
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsMCInstLower.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsMCInstLower.h4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsMSAInstrInfo.td1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsPreLegalizerCombiner.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp27
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsRegisterBankInfo.h4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp22
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsSEISelLowering.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp27
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsSEInstrInfo.h10
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsScheduleGeneric.td3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsScheduleP5600.td3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsSubtarget.cpp25
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsTargetMachine.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsTargetStreamer.h4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/ManagedStringPool.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelLowering.h5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXImageOptimizer.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXInstrInfo.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td42
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/NVVMIntrRange.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/NVVMReflect.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp90
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.h6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp28
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp23
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h24
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/P9InstrResources.td217
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPC.h11
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPC.td173
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp356
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCBranchCoalescing.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp401
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.cpp1362
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.h936
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstr64Bit.td34
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrAltivec.td121
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrFormats.td61
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrHTM.td16
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp551
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.h14
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.td297
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrVSX.td190
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp (renamed from contrib/llvm-project/llvm/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp)441
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCLowerMASSVEntries.cpp164
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp390
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp25
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCReduceCRLogicals.cpp13
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp35
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCRegisterInfo.h9
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCSubtarget.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCSubtarget.h45
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp34
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp92
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h17
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp61
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp13
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCV.td29
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp92
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFrameLowering.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp86
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.h13
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp259
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.h46
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.td23
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoA.td2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp18
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.h5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp33
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetMachine.h10
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/TargetInfo/RISCVTargetInfo.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/Utils/RISCVBaseInfo.cpp17
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/Utils/RISCVBaseInfo.h36
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Sparc/DelaySlotFiller.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp4
-rwxr-xr-xcontrib/llvm-project/llvm/lib/Target/Sparc/LeonPasses.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.h8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Sparc/SparcISelLowering.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Sparc/SparcISelLowering.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Sparc/SparcInstr64Bit.td2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Sparc/SparcInstrInfo.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Sparc/SparcInstrInfo.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Sparc/SparcInstrInfo.td5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Sparc/SparcTargetMachine.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp24
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.h6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZ.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp31
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZCallingConv.h7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZCallingConv.td27
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp169
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp264
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZFrameLowering.h11
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp379
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.h31
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrFP.td60
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrFormats.td30
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp58
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrInfo.h9
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrInfo.td19
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrVector.td52
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h52
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZOperands.td22
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZOperators.td40
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZPatterns.td4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp96
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp38
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTDC.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp21
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h11
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/TargetMachine.cpp19
-rw-r--r--contrib/llvm-project/llvm/lib/Target/TargetMachineC.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/InstPrinter/VEInstPrinter.cpp118
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/InstPrinter/VEInstPrinter.h49
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEMCAsmInfo.cpp40
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEMCAsmInfo.h31
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.cpp106
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.h53
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VETargetStreamer.cpp44
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VETargetStreamer.h47
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/TargetInfo/VETargetInfo.cpp23
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VE.h109
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VE.td56
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VEAsmPrinter.cpp78
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VECallingConv.td19
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VEFrameLowering.cpp325
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VEFrameLowering.h81
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VEISelDAGToDAG.cpp70
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VEISelLowering.cpp137
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VEISelLowering.h62
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VEInstrFormats.td75
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VEInstrInfo.cpp133
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VEInstrInfo.h48
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VEInstrInfo.td288
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VEMCInstLower.cpp69
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VERegisterInfo.cpp133
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VERegisterInfo.h49
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VERegisterInfo.td37
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VESubtarget.cpp99
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VESubtarget.h73
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VETargetMachine.cpp108
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VETargetMachine.h57
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VETargetTransformInfo.h50
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp38
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.h6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.h3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/TargetInfo/WebAssemblyTargetInfo.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssembly.h4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyDebugValueManager.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyDebugValueManager.h1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISD.def4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp47
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h9
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.h6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td59
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp224
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/AsmParser/X86Operand.h27
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp1670
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp1938
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h69
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.h6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp463
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h294
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.h6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp1057
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/TargetInfo/X86TargetInfo.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86.h12
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86.td133
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86AsmPrinter.cpp50
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86AsmPrinter.h6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86CallFrameOptimization.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86CallLowering.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86CallingConv.td35
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86CmovConversion.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86CondBrFolding.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86DomainReassignment.cpp6
-rwxr-xr-xcontrib/llvm-project/llvm/lib/Target/X86/X86EvexToVex.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86ExpandPseudo.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86FastISel.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86FixupBWInsts.cpp24
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86FixupLEAs.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86FixupSetCC.cpp50
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp19
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86FloatingPoint.cpp48
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86FrameLowering.cpp32
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp179
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp4018
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.h65
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86IndirectBranchTracking.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrAVX512.td705
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrControl.td23
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrFMA.td35
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrFPStack.td141
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrFormats.td6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td96
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.cpp173
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.h14
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.td61
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrMMX.td10
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrSSE.td560
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrTSX.td2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstructionSelector.cpp87
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86IntrinsicsInfo.h1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86LegalizerInfo.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86MCInstLower.cpp166
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86MacroFusion.cpp183
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86OptimizeLEAs.cpp24
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86PadShortFunction.cpp20
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86PfmCounters.td16
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86RegisterBankInfo.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86RegisterBankInfo.h4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86RegisterInfo.cpp34
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86RegisterInfo.td5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86RetpolineThunks.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86ScheduleAtom.td3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86ScheduleSLM.td30
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86ScheduleZnver2.td1548
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86Subtarget.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86Subtarget.h36
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86TargetMachine.cpp20
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86TargetTransformInfo.cpp146
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86TargetTransformInfo.h22
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86VZeroUpper.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86WinAllocaExpander.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86WinEHState.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/Target/XCore/MCTargetDesc/XCoreInstPrinter.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/XCore/MCTargetDesc/XCoreInstPrinter.h6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/XCore/XCoreISelLowering.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/XCore/XCoreInstrInfo.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/XCore/XCoreInstrInfo.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/XCore/XCoreLowerThreadLocal.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/XCore/XCoreRegisterInfo.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/XCore/XCoreTargetMachine.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/CFGuard/CFGuard.cpp305
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroEarly.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroElide.cpp17
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroInternal.h11
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroSplit.cpp55
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Coroutines/Coroutines.cpp39
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/AlwaysInliner.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/Attributor.cpp2811
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/BarrierNoopPass.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/BlockExtractor.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/CalledValuePropagation.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/ConstantMerge.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/CrossDSOCFI.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/FunctionAttrs.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/FunctionImport.cpp115
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/GlobalDCE.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/GlobalOpt.cpp155
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/GlobalSplit.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/HotColdSplitting.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/IPO.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/InferFunctionAttrs.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/InlineSimple.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/Internalize.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/LoopExtractor.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/LowerTypeTests.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/MergeFunctions.cpp74
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/PartialInlining.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp58
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/PruneEH.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/SCCP.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/SampleProfile.cpp168
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/StripSymbols.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp57
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp121
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp17
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp175
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp249
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp70
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineInternal.h9
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp50
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp181
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp104
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp99
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp95
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp201
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp61
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Instrumentation/InstrOrderFile.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp221
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Instrumentation/PoisonChecking.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ObjCARCExpand.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp688
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/ADCE.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/BDCE.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp33
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstantProp.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/DCE.cpp41
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp78
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/DivRemPairs.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/EarlyCSE.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/Float2Int.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/GVN.cpp95
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/GVNHoist.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/GVNSink.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/GuardWidening.cpp99
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp50
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/InstSimplifyPass.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/JumpThreading.cpp329
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LICM.cpp87
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopDeletion.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopDistribute.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopFuse.cpp87
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp927
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopInterchange.cpp138
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopPredication.cpp253
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopRotation.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopSink.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp105
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerAtomic.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerGuardIntrinsic.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp894
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerWidenableCondition.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/MakeGuardsExplicit.cpp23
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp68
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/MergeICmps.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/NaryReassociate.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/NewGVN.cpp21
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/Reassociate.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/Reg2Mem.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/SCCP.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/SROA.cpp134
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/Scalar.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/Scalarizer.cpp37
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp33
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/Sink.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/WarnMissedTransforms.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/AddDiscriminators.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp144
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/CanonicalizeAliases.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/CodeExtractor.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/CodeMoverUtils.cpp189
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/Debugify.cpp (renamed from contrib/llvm-project/llvm/tools/opt/Debugify.cpp)32
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp88
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/GuardUtils.cpp67
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp186
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/InlineFunction.cpp78
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/InstructionNamer.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/LCSSA.cpp19
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp99
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp19
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/LoopSimplify.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnroll.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp56
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUtils.cpp30
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/LoopVersioning.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/LowerInvoke.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/LowerSwitch.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/Mem2Reg.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/MetaRenamer.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/MisExpect.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/ModuleUtils.cpp29
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/NameAnonGlobals.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/PredicateInfo.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp73
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp119
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/SizeOpts.cpp88
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/StripGCRelocates.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/SymbolRewriter.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/Utils.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h11
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp1011
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp965
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h44
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.cpp61
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.h365
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp (renamed from contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanHCFGTransforms.cpp)14
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanTransforms.h (renamed from contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanHCFGTransforms.h)12
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanValue.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/XRay/InstrumentationMap.cpp22
-rw-r--r--contrib/llvm-project/llvm/tools/bugpoint/CrashDebugger.cpp163
-rw-r--r--contrib/llvm-project/llvm/tools/bugpoint/bugpoint.cpp21
-rw-r--r--contrib/llvm-project/llvm/tools/llc/llc.cpp28
-rw-r--r--contrib/llvm-project/llvm/tools/lli/RemoteJITUtils.h2
-rw-r--r--contrib/llvm-project/llvm/tools/lli/lli.cpp29
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-ar/llvm-ar.cpp116
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp5
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-cov/CodeCoverage.cpp5
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-cov/CoverageExporterJson.cpp16
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-cxxfilt/llvm-cxxfilt.cpp32
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-diff/DifferenceEngine.cpp9
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-dis/llvm-dis.cpp100
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-dwarfdump/Statistics.cpp153
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp2
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-lto2/llvm-lto2.cpp2
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-mc/Disassembler.cpp9
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-mc/Disassembler.h3
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-mc/llvm-mc.cpp5
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-mca/Views/BottleneckAnalysis.cpp2
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-mca/Views/DispatchStatistics.cpp3
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-mca/Views/InstructionInfoView.cpp2
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-mca/Views/ResourcePressureView.cpp2
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-mca/Views/RetireControlUnitStatistics.cpp2
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-mca/Views/SchedulerStatistics.cpp2
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-mca/Views/SummaryView.cpp2
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-mca/Views/TimelineView.cpp4
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-mca/llvm-mca.cpp6
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-nm/llvm-nm.cpp13
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-objcopy/COFF/COFFObjcopy.cpp13
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-objcopy/COFF/Object.h6
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-objcopy/COFF/Reader.cpp4
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-objcopy/COFF/Writer.cpp22
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-objcopy/CommonOpts.td8
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-objcopy/CopyConfig.cpp89
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-objcopy/CopyConfig.h12
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-objcopy/ELF/ELFObjcopy.cpp23
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-objcopy/ELF/Object.cpp207
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-objcopy/ELF/Object.h84
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-objcopy/InstallNameToolOpts.td22
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-objcopy/MachO/MachOLayoutBuilder.cpp11
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp200
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-objcopy/MachO/MachOReader.cpp40
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp19
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-objcopy/MachO/Object.cpp64
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-objcopy/MachO/Object.h82
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-objcopy/llvm-objcopy.cpp20
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-objdump/ELFDump.cpp12
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-objdump/MachODump.cpp37
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-objdump/llvm-objdump.cpp56
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-pdbutil/DumpOutputStyle.cpp2
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-pdbutil/InputFile.h2
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-readobj/ARMWinEHPrinter.cpp8
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-readobj/ELFDumper.cpp1391
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-readobj/ObjDumper.cpp2
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-readobj/ObjDumper.h1
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-readobj/Win64EHDumper.cpp54
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-readobj/XCOFFDumper.cpp13
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-readobj/llvm-readobj.cpp9
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-rtdyld/llvm-rtdyld.cpp5
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp24
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-xray/xray-account.cpp23
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-xray/xray-converter.cpp15
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-xray/xray-extract.cpp6
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-xray/xray-graph-diff.cpp43
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-xray/xray-graph.cpp23
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-xray/xray-stacks.cpp9
-rw-r--r--contrib/llvm-project/llvm/tools/opt/NewPMDriver.cpp16
-rw-r--r--contrib/llvm-project/llvm/tools/opt/PassPrinters.cpp38
-rw-r--r--contrib/llvm-project/llvm/tools/opt/PassPrinters.h4
-rw-r--r--contrib/llvm-project/llvm/tools/opt/opt.cpp72
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/AsmMatcherEmitter.cpp2
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/AsmWriterEmitter.cpp276
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/AsmWriterInst.h2
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/CodeEmitterGen.cpp2
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/CodeGenDAGPatterns.cpp80
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/CodeGenDAGPatterns.h16
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/CodeGenInstruction.cpp23
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/CodeGenInstruction.h24
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/CodeGenIntrinsics.h4
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/CodeGenRegisters.h8
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/CodeGenTarget.cpp21
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/CodeGenTarget.h4
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/DAGISelMatcherGen.cpp1
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/DFAPacketizerEmitter.cpp478
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/FixedLenDecoderEmitter.cpp3
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/GICombinerEmitter.cpp575
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/GlobalISel/GIMatchDag.cpp138
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/GlobalISel/GIMatchDag.h243
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/GlobalISel/GIMatchDagEdge.cpp38
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/GlobalISel/GIMatchDagEdge.h70
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/GlobalISel/GIMatchDagInstr.cpp48
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/GlobalISel/GIMatchDagInstr.h115
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/GlobalISel/GIMatchDagOperands.cpp153
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/GlobalISel/GIMatchDagOperands.h133
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/GlobalISel/GIMatchDagPredicate.cpp69
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/GlobalISel/GIMatchDagPredicate.h141
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/GlobalISel/GIMatchDagPredicateDependencyEdge.cpp37
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/GlobalISel/GIMatchDagPredicateDependencyEdge.h60
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/GlobalISel/GIMatchTree.cpp777
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/GlobalISel/GIMatchTree.h629
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/GlobalISelEmitter.cpp184
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/InstrDocsEmitter.cpp1
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/InstrInfoEmitter.cpp6
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/IntrinsicEmitter.cpp142
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/OptEmitter.cpp84
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/OptEmitter.h16
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/OptParserEmitter.cpp75
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/OptRSTEmitter.cpp86
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/RISCVCompressInstEmitter.cpp218
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/RegisterInfoEmitter.cpp2
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/SearchableTableEmitter.cpp4
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/SequenceToOffsetTable.h4
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/TableGen.cpp20
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/TableGenBackends.h7
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/X86DisassemblerTables.cpp88
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/X86FoldTablesEmitter.cpp6
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/X86RecognizableInstr.cpp82
1925 files changed, 84587 insertions, 36623 deletions
diff --git a/contrib/llvm-project/llvm/include/llvm-c/Analysis.h b/contrib/llvm-project/llvm/include/llvm-c/Analysis.h
index cb9e8ece3c53..270b145a4d27 100644
--- a/contrib/llvm-project/llvm/include/llvm-c/Analysis.h
+++ b/contrib/llvm-project/llvm/include/llvm-c/Analysis.h
@@ -19,11 +19,10 @@
#ifndef LLVM_C_ANALYSIS_H
#define LLVM_C_ANALYSIS_H
+#include "llvm-c/ExternC.h"
#include "llvm-c/Types.h"
-#ifdef __cplusplus
-extern "C" {
-#endif
+LLVM_C_EXTERN_C_BEGIN
/**
* @defgroup LLVMCAnalysis Analysis
@@ -58,8 +57,6 @@ void LLVMViewFunctionCFGOnly(LLVMValueRef Fn);
* @}
*/
-#ifdef __cplusplus
-}
-#endif
+LLVM_C_EXTERN_C_END
#endif
diff --git a/contrib/llvm-project/llvm/include/llvm-c/BitReader.h b/contrib/llvm-project/llvm/include/llvm-c/BitReader.h
index b307ee979f8a..012c0e63d3bb 100644
--- a/contrib/llvm-project/llvm/include/llvm-c/BitReader.h
+++ b/contrib/llvm-project/llvm/include/llvm-c/BitReader.h
@@ -19,11 +19,10 @@
#ifndef LLVM_C_BITREADER_H
#define LLVM_C_BITREADER_H
+#include "llvm-c/ExternC.h"
#include "llvm-c/Types.h"
-#ifdef __cplusplus
-extern "C" {
-#endif
+LLVM_C_EXTERN_C_BEGIN
/**
* @defgroup LLVMCBitReader Bit Reader
@@ -78,8 +77,6 @@ LLVMBool LLVMGetBitcodeModule2(LLVMMemoryBufferRef MemBuf, LLVMModuleRef *OutM);
* @}
*/
-#ifdef __cplusplus
-}
-#endif
+LLVM_C_EXTERN_C_END
#endif
diff --git a/contrib/llvm-project/llvm/include/llvm-c/BitWriter.h b/contrib/llvm-project/llvm/include/llvm-c/BitWriter.h
index 187051555b9a..ea84b6593d12 100644
--- a/contrib/llvm-project/llvm/include/llvm-c/BitWriter.h
+++ b/contrib/llvm-project/llvm/include/llvm-c/BitWriter.h
@@ -19,11 +19,10 @@
#ifndef LLVM_C_BITWRITER_H
#define LLVM_C_BITWRITER_H
+#include "llvm-c/ExternC.h"
#include "llvm-c/Types.h"
-#ifdef __cplusplus
-extern "C" {
-#endif
+LLVM_C_EXTERN_C_BEGIN
/**
* @defgroup LLVMCBitWriter Bit Writer
@@ -52,8 +51,6 @@ LLVMMemoryBufferRef LLVMWriteBitcodeToMemoryBuffer(LLVMModuleRef M);
* @}
*/
-#ifdef __cplusplus
-}
-#endif
+LLVM_C_EXTERN_C_END
#endif
diff --git a/contrib/llvm-project/llvm/include/llvm-c/Comdat.h b/contrib/llvm-project/llvm/include/llvm-c/Comdat.h
index 81fee3fc9a6b..45b4007695fd 100644
--- a/contrib/llvm-project/llvm/include/llvm-c/Comdat.h
+++ b/contrib/llvm-project/llvm/include/llvm-c/Comdat.h
@@ -14,11 +14,10 @@
#ifndef LLVM_C_COMDAT_H
#define LLVM_C_COMDAT_H
+#include "llvm-c/ExternC.h"
#include "llvm-c/Types.h"
-#ifdef __cplusplus
-extern "C" {
-#endif
+LLVM_C_EXTERN_C_BEGIN
typedef enum {
LLVMAnyComdatSelectionKind, ///< The linker may choose any COMDAT.
@@ -68,8 +67,6 @@ LLVMComdatSelectionKind LLVMGetComdatSelectionKind(LLVMComdatRef C);
*/
void LLVMSetComdatSelectionKind(LLVMComdatRef C, LLVMComdatSelectionKind Kind);
-#ifdef __cplusplus
-}
-#endif
+LLVM_C_EXTERN_C_END
#endif
diff --git a/contrib/llvm-project/llvm/include/llvm-c/Core.h b/contrib/llvm-project/llvm/include/llvm-c/Core.h
index b84970956666..7a39731d3e0c 100644
--- a/contrib/llvm-project/llvm/include/llvm-c/Core.h
+++ b/contrib/llvm-project/llvm/include/llvm-c/Core.h
@@ -16,11 +16,10 @@
#define LLVM_C_CORE_H
#include "llvm-c/ErrorHandling.h"
+#include "llvm-c/ExternC.h"
#include "llvm-c/Types.h"
-#ifdef __cplusplus
-extern "C" {
-#endif
+LLVM_C_EXTERN_C_BEGIN
/**
* @defgroup LLVMC LLVM-C: C interface to LLVM
@@ -127,6 +126,7 @@ typedef enum {
LLVMShuffleVector = 52,
LLVMExtractValue = 53,
LLVMInsertValue = 54,
+ LLVMFreeze = 68,
/* Atomic operators */
LLVMFence = 55,
@@ -1600,6 +1600,7 @@ LLVMTypeRef LLVMX86MMXType(void);
macro(ExtractValueInst) \
macro(LoadInst) \
macro(VAArgInst) \
+ macro(FreezeInst) \
macro(AtomicCmpXchgInst) \
macro(AtomicRMWInst) \
macro(FenceInst)
@@ -3907,6 +3908,8 @@ LLVMValueRef LLVMBuildExtractValue(LLVMBuilderRef, LLVMValueRef AggVal,
LLVMValueRef LLVMBuildInsertValue(LLVMBuilderRef, LLVMValueRef AggVal,
LLVMValueRef EltVal, unsigned Index,
const char *Name);
+LLVMValueRef LLVMBuildFreeze(LLVMBuilderRef, LLVMValueRef Val,
+ const char *Name);
LLVMValueRef LLVMBuildIsNull(LLVMBuilderRef, LLVMValueRef Val,
const char *Name);
@@ -4086,8 +4089,6 @@ LLVMBool LLVMIsMultithreaded(void);
* @}
*/
-#ifdef __cplusplus
-}
-#endif
+LLVM_C_EXTERN_C_END
#endif /* LLVM_C_CORE_H */
diff --git a/contrib/llvm-project/llvm/include/llvm-c/DebugInfo.h b/contrib/llvm-project/llvm/include/llvm-c/DebugInfo.h
index 41e9f96bbb92..e933fe4b3f92 100644
--- a/contrib/llvm-project/llvm/include/llvm-c/DebugInfo.h
+++ b/contrib/llvm-project/llvm/include/llvm-c/DebugInfo.h
@@ -17,10 +17,9 @@
#define LLVM_C_DEBUGINFO_H
#include "llvm-c/Core.h"
+#include "llvm-c/ExternC.h"
-#ifdef __cplusplus
-extern "C" {
-#endif
+LLVM_C_EXTERN_C_BEGIN
/**
* Debug info flags.
@@ -284,15 +283,15 @@ LLVMDIBuilderCreateFile(LLVMDIBuilderRef Builder, const char *Filename,
* \param ConfigMacrosLen The length of the C string passed to \c ConfigMacros.
* \param IncludePath The path to the module map file.
* \param IncludePathLen The length of the C string passed to \c IncludePath.
- * \param ISysRoot The Clang system root (value of -isysroot).
- * \param ISysRootLen The length of the C string passed to \c ISysRoot.
+ * \param SysRoot The Clang system root (value of -isysroot).
+ * \param SysRootLen The length of the C string passed to \c SysRoot.
*/
LLVMMetadataRef
LLVMDIBuilderCreateModule(LLVMDIBuilderRef Builder, LLVMMetadataRef ParentScope,
const char *Name, size_t NameLen,
const char *ConfigMacros, size_t ConfigMacrosLen,
const char *IncludePath, size_t IncludePathLen,
- const char *ISysRoot, size_t ISysRootLen);
+ const char *SysRoot, size_t SysRootLen);
/**
* Creates a new descriptor for a namespace with the specified parent scope.
@@ -875,7 +874,7 @@ LLVMMetadataRef
LLVMDIBuilderCreateTypedef(LLVMDIBuilderRef Builder, LLVMMetadataRef Type,
const char *Name, size_t NameLen,
LLVMMetadataRef File, unsigned LineNo,
- LLVMMetadataRef Scope);
+ LLVMMetadataRef Scope, uint32_t AlignInBits);
/**
* Create debugging information entry to establish inheritance relationship
@@ -1353,8 +1352,6 @@ void LLVMInstructionSetDebugLoc(LLVMValueRef Inst, LLVMMetadataRef Loc);
*/
LLVMMetadataKind LLVMGetMetadataKind(LLVMMetadataRef Metadata);
-#ifdef __cplusplus
-} /* end extern "C" */
-#endif
+LLVM_C_EXTERN_C_END
#endif
diff --git a/contrib/llvm-project/llvm/include/llvm-c/Disassembler.h b/contrib/llvm-project/llvm/include/llvm-c/Disassembler.h
index 3adcc3c47a3f..b1cb35da6687 100644
--- a/contrib/llvm-project/llvm/include/llvm-c/Disassembler.h
+++ b/contrib/llvm-project/llvm/include/llvm-c/Disassembler.h
@@ -16,6 +16,7 @@
#define LLVM_C_DISASSEMBLER_H
#include "llvm-c/DisassemblerTypes.h"
+#include "llvm-c/ExternC.h"
/**
* @defgroup LLVMCDisassembler Disassembler
@@ -24,9 +25,7 @@
* @{
*/
-#ifdef __cplusplus
-extern "C" {
-#endif /* !defined(__cplusplus) */
+LLVM_C_EXTERN_C_BEGIN
/**
* Create a disassembler for the TripleName. Symbolic disassembly is supported
@@ -106,8 +105,6 @@ size_t LLVMDisasmInstruction(LLVMDisasmContextRef DC, uint8_t *Bytes,
* @}
*/
-#ifdef __cplusplus
-}
-#endif /* !defined(__cplusplus) */
+LLVM_C_EXTERN_C_END
#endif /* LLVM_C_DISASSEMBLER_H */
diff --git a/contrib/llvm-project/llvm/include/llvm-c/Error.h b/contrib/llvm-project/llvm/include/llvm-c/Error.h
index 52943063c697..92f81bf38304 100644
--- a/contrib/llvm-project/llvm/include/llvm-c/Error.h
+++ b/contrib/llvm-project/llvm/include/llvm-c/Error.h
@@ -14,9 +14,9 @@
#ifndef LLVM_C_ERROR_H
#define LLVM_C_ERROR_H
-#ifdef __cplusplus
-extern "C" {
-#endif
+#include "llvm-c/ExternC.h"
+
+LLVM_C_EXTERN_C_BEGIN
#define LLVMErrorSuccess 0
@@ -62,8 +62,6 @@ void LLVMDisposeErrorMessage(char *ErrMsg);
*/
LLVMErrorTypeId LLVMGetStringErrorTypeId(void);
-#ifdef __cplusplus
-}
-#endif
+LLVM_C_EXTERN_C_END
#endif
diff --git a/contrib/llvm-project/llvm/include/llvm-c/ErrorHandling.h b/contrib/llvm-project/llvm/include/llvm-c/ErrorHandling.h
index 4927349d8983..c6f7ff3ed693 100644
--- a/contrib/llvm-project/llvm/include/llvm-c/ErrorHandling.h
+++ b/contrib/llvm-project/llvm/include/llvm-c/ErrorHandling.h
@@ -14,9 +14,9 @@
#ifndef LLVM_C_ERROR_HANDLING_H
#define LLVM_C_ERROR_HANDLING_H
-#ifdef __cplusplus
-extern "C" {
-#endif
+#include "llvm-c/ExternC.h"
+
+LLVM_C_EXTERN_C_BEGIN
typedef void (*LLVMFatalErrorHandler)(const char *Reason);
@@ -42,8 +42,6 @@ void LLVMResetFatalErrorHandler(void);
*/
void LLVMEnablePrettyStackTrace(void);
-#ifdef __cplusplus
-}
-#endif
+LLVM_C_EXTERN_C_END
#endif
diff --git a/contrib/llvm-project/llvm/include/llvm-c/ExecutionEngine.h b/contrib/llvm-project/llvm/include/llvm-c/ExecutionEngine.h
index ef714cd06384..f31b97ad7623 100644
--- a/contrib/llvm-project/llvm/include/llvm-c/ExecutionEngine.h
+++ b/contrib/llvm-project/llvm/include/llvm-c/ExecutionEngine.h
@@ -19,13 +19,12 @@
#ifndef LLVM_C_EXECUTIONENGINE_H
#define LLVM_C_EXECUTIONENGINE_H
+#include "llvm-c/ExternC.h"
#include "llvm-c/Target.h"
#include "llvm-c/TargetMachine.h"
#include "llvm-c/Types.h"
-#ifdef __cplusplus
-extern "C" {
-#endif
+LLVM_C_EXTERN_C_BEGIN
/**
* @defgroup LLVMCExecutionEngine Execution Engine
@@ -193,8 +192,6 @@ LLVMJITEventListenerRef LLVMCreatePerfJITEventListener(void);
* @}
*/
-#ifdef __cplusplus
-}
-#endif /* defined(__cplusplus) */
+LLVM_C_EXTERN_C_END
#endif
diff --git a/contrib/llvm-project/llvm/include/llvm-c/ExternC.h b/contrib/llvm-project/llvm/include/llvm-c/ExternC.h
new file mode 100644
index 000000000000..4240f7c31a3b
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm-c/ExternC.h
@@ -0,0 +1,39 @@
+/*===- llvm-c/ExternC.h - Wrapper for 'extern "C"' ----------------*- C -*-===*\
+|* *|
+|* Part of the LLVM Project, under the Apache License v2.0 with LLVM *|
+|* Exceptions. *|
+|* See https://llvm.org/LICENSE.txt for license information. *|
+|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception *|
+|* *|
+|*===----------------------------------------------------------------------===*|
+|* *|
+|* This file defines an 'extern "C"' wrapper *|
+|* *|
+\*===----------------------------------------------------------------------===*/
+
+#ifndef LLVM_C_EXTERN_C_H
+#define LLVM_C_EXTERN_C_H
+
+#ifdef __clang__
+#define LLVM_C_STRICT_PROTOTYPES_BEGIN \
+ _Pragma("clang diagnostic push") \
+ _Pragma("clang diagnostic error \"-Wstrict-prototypes\"")
+#define LLVM_C_STRICT_PROTOTYPES_END _Pragma("clang diagnostic pop")
+#else
+#define LLVM_C_STRICT_PROTOTYPES_BEGIN
+#define LLVM_C_STRICT_PROTOTYPES_END
+#endif
+
+#ifdef __cplusplus
+#define LLVM_C_EXTERN_C_BEGIN \
+ extern "C" { \
+ LLVM_C_STRICT_PROTOTYPES_BEGIN
+#define LLVM_C_EXTERN_C_END \
+ LLVM_C_STRICT_PROTOTYPES_END \
+ }
+#else
+#define LLVM_C_EXTERN_C_BEGIN LLVM_C_STRICT_PROTOTYPES_BEGIN
+#define LLVM_C_EXTERN_C_END LLVM_C_STRICT_PROTOTYPES_END
+#endif
+
+#endif
diff --git a/contrib/llvm-project/llvm/include/llvm-c/IRReader.h b/contrib/llvm-project/llvm/include/llvm-c/IRReader.h
index 4d0b696e9583..5a3f633c3d91 100644
--- a/contrib/llvm-project/llvm/include/llvm-c/IRReader.h
+++ b/contrib/llvm-project/llvm/include/llvm-c/IRReader.h
@@ -14,11 +14,10 @@
#ifndef LLVM_C_IRREADER_H
#define LLVM_C_IRREADER_H
+#include "llvm-c/ExternC.h"
#include "llvm-c/Types.h"
-#ifdef __cplusplus
-extern "C" {
-#endif
+LLVM_C_EXTERN_C_BEGIN
/**
* Read LLVM IR from a memory buffer and convert it into an in-memory Module
@@ -33,8 +32,6 @@ LLVMBool LLVMParseIRInContext(LLVMContextRef ContextRef,
LLVMMemoryBufferRef MemBuf, LLVMModuleRef *OutM,
char **OutMessage);
-#ifdef __cplusplus
-}
-#endif
+LLVM_C_EXTERN_C_END
#endif
diff --git a/contrib/llvm-project/llvm/include/llvm-c/Initialization.h b/contrib/llvm-project/llvm/include/llvm-c/Initialization.h
index 36c41dbd8d31..0d59de8728c8 100644
--- a/contrib/llvm-project/llvm/include/llvm-c/Initialization.h
+++ b/contrib/llvm-project/llvm/include/llvm-c/Initialization.h
@@ -16,11 +16,10 @@
#ifndef LLVM_C_INITIALIZATION_H
#define LLVM_C_INITIALIZATION_H
+#include "llvm-c/ExternC.h"
#include "llvm-c/Types.h"
-#ifdef __cplusplus
-extern "C" {
-#endif
+LLVM_C_EXTERN_C_BEGIN
/**
* @defgroup LLVMCInitialization Initialization Routines
@@ -49,8 +48,6 @@ void LLVMInitializeTarget(LLVMPassRegistryRef R);
* @}
*/
-#ifdef __cplusplus
-}
-#endif
+LLVM_C_EXTERN_C_END
#endif
diff --git a/contrib/llvm-project/llvm/include/llvm-c/LinkTimeOptimizer.h b/contrib/llvm-project/llvm/include/llvm-c/LinkTimeOptimizer.h
index 19b4f5cf7491..9ae65b8fe5e0 100644
--- a/contrib/llvm-project/llvm/include/llvm-c/LinkTimeOptimizer.h
+++ b/contrib/llvm-project/llvm/include/llvm-c/LinkTimeOptimizer.h
@@ -15,9 +15,9 @@
#ifndef LLVM_C_LINKTIMEOPTIMIZER_H
#define LLVM_C_LINKTIMEOPTIMIZER_H
-#ifdef __cplusplus
-extern "C" {
-#endif
+#include "llvm-c/ExternC.h"
+
+LLVM_C_EXTERN_C_BEGIN
/**
* @defgroup LLVMCLinkTimeOptimizer Link Time Optimization
@@ -61,8 +61,6 @@ extern "C" {
* @}
*/
-#ifdef __cplusplus
-}
-#endif
+ LLVM_C_EXTERN_C_END
#endif
diff --git a/contrib/llvm-project/llvm/include/llvm-c/Linker.h b/contrib/llvm-project/llvm/include/llvm-c/Linker.h
index 908513041661..1ad9cc958753 100644
--- a/contrib/llvm-project/llvm/include/llvm-c/Linker.h
+++ b/contrib/llvm-project/llvm/include/llvm-c/Linker.h
@@ -14,11 +14,10 @@
#ifndef LLVM_C_LINKER_H
#define LLVM_C_LINKER_H
+#include "llvm-c/ExternC.h"
#include "llvm-c/Types.h"
-#ifdef __cplusplus
-extern "C" {
-#endif
+LLVM_C_EXTERN_C_BEGIN
/* This enum is provided for backwards-compatibility only. It has no effect. */
typedef enum {
@@ -34,8 +33,6 @@ typedef enum {
*/
LLVMBool LLVMLinkModules2(LLVMModuleRef Dest, LLVMModuleRef Src);
-#ifdef __cplusplus
-}
-#endif
+LLVM_C_EXTERN_C_END
#endif
diff --git a/contrib/llvm-project/llvm/include/llvm-c/Object.h b/contrib/llvm-project/llvm/include/llvm-c/Object.h
index 1e9b703a68ff..9a9596aaa08c 100644
--- a/contrib/llvm-project/llvm/include/llvm-c/Object.h
+++ b/contrib/llvm-project/llvm/include/llvm-c/Object.h
@@ -19,12 +19,11 @@
#ifndef LLVM_C_OBJECT_H
#define LLVM_C_OBJECT_H
+#include "llvm-c/ExternC.h"
#include "llvm-c/Types.h"
#include "llvm/Config/llvm-config.h"
-#ifdef __cplusplus
-extern "C" {
-#endif
+LLVM_C_EXTERN_C_BEGIN
/**
* @defgroup LLVMCObject Object file reading and writing
@@ -226,8 +225,6 @@ LLVMBool LLVMIsSymbolIteratorAtEnd(LLVMObjectFileRef ObjectFile,
* @}
*/
-#ifdef __cplusplus
-}
-#endif /* defined(__cplusplus) */
+LLVM_C_EXTERN_C_END
#endif
diff --git a/contrib/llvm-project/llvm/include/llvm-c/OrcBindings.h b/contrib/llvm-project/llvm/include/llvm-c/OrcBindings.h
index 9e92371b5a3a..11cdade7c26f 100644
--- a/contrib/llvm-project/llvm/include/llvm-c/OrcBindings.h
+++ b/contrib/llvm-project/llvm/include/llvm-c/OrcBindings.h
@@ -23,12 +23,11 @@
#define LLVM_C_ORCBINDINGS_H
#include "llvm-c/Error.h"
+#include "llvm-c/ExternC.h"
#include "llvm-c/Object.h"
#include "llvm-c/TargetMachine.h"
-#ifdef __cplusplus
-extern "C" {
-#endif
+LLVM_C_EXTERN_C_BEGIN
typedef struct LLVMOrcOpaqueJITStack *LLVMOrcJITStackRef;
typedef uint64_t LLVMOrcModuleHandle;
@@ -165,8 +164,6 @@ void LLVMOrcRegisterJITEventListener(LLVMOrcJITStackRef JITStack, LLVMJITEventLi
*/
void LLVMOrcUnregisterJITEventListener(LLVMOrcJITStackRef JITStack, LLVMJITEventListenerRef L);
-#ifdef __cplusplus
-}
-#endif /* extern "C" */
+LLVM_C_EXTERN_C_END
#endif /* LLVM_C_ORCBINDINGS_H */
diff --git a/contrib/llvm-project/llvm/include/llvm-c/Remarks.h b/contrib/llvm-project/llvm/include/llvm-c/Remarks.h
index 5444aebddd60..ffe647a6554a 100644
--- a/contrib/llvm-project/llvm/include/llvm-c/Remarks.h
+++ b/contrib/llvm-project/llvm/include/llvm-c/Remarks.h
@@ -15,14 +15,16 @@
#ifndef LLVM_C_REMARKS_H
#define LLVM_C_REMARKS_H
+#include "llvm-c/ExternC.h"
#include "llvm-c/Types.h"
#ifdef __cplusplus
#include <cstddef>
-extern "C" {
#else
#include <stddef.h>
#endif /* !defined(__cplusplus) */
+LLVM_C_EXTERN_C_BEGIN
+
/**
* @defgroup LLVMCREMARKS Remarks
* @ingroup LLVMC
@@ -337,8 +339,6 @@ extern uint32_t LLVMRemarkVersion(void);
* @} // endgoup LLVMCREMARKS
*/
-#ifdef __cplusplus
-}
-#endif /* !defined(__cplusplus) */
+LLVM_C_EXTERN_C_END
#endif /* LLVM_C_REMARKS_H */
diff --git a/contrib/llvm-project/llvm/include/llvm-c/Support.h b/contrib/llvm-project/llvm/include/llvm-c/Support.h
index 097f784246c5..866df32efa98 100644
--- a/contrib/llvm-project/llvm/include/llvm-c/Support.h
+++ b/contrib/llvm-project/llvm/include/llvm-c/Support.h
@@ -15,11 +15,10 @@
#define LLVM_C_SUPPORT_H
#include "llvm-c/DataTypes.h"
+#include "llvm-c/ExternC.h"
#include "llvm-c/Types.h"
-#ifdef __cplusplus
-extern "C" {
-#endif
+LLVM_C_EXTERN_C_BEGIN
/**
* This function permanently loads the dynamic library at the given path.
@@ -58,8 +57,6 @@ void *LLVMSearchForAddressOfSymbol(const char *symbolName);
*/
void LLVMAddSymbol(const char *symbolName, void *symbolValue);
-#ifdef __cplusplus
-}
-#endif
+LLVM_C_EXTERN_C_END
#endif
diff --git a/contrib/llvm-project/llvm/include/llvm-c/Target.h b/contrib/llvm-project/llvm/include/llvm-c/Target.h
index 4ef641eaf232..518b46d55bc3 100644
--- a/contrib/llvm-project/llvm/include/llvm-c/Target.h
+++ b/contrib/llvm-project/llvm/include/llvm-c/Target.h
@@ -19,12 +19,11 @@
#ifndef LLVM_C_TARGET_H
#define LLVM_C_TARGET_H
+#include "llvm-c/ExternC.h"
#include "llvm-c/Types.h"
#include "llvm/Config/llvm-config.h"
-#ifdef __cplusplus
-extern "C" {
-#endif
+LLVM_C_EXTERN_C_BEGIN
/**
* @defgroup LLVMCTarget Target information
@@ -288,8 +287,6 @@ unsigned long long LLVMOffsetOfElement(LLVMTargetDataRef TD,
* @}
*/
-#ifdef __cplusplus
-}
-#endif /* defined(__cplusplus) */
+LLVM_C_EXTERN_C_END
#endif
diff --git a/contrib/llvm-project/llvm/include/llvm-c/TargetMachine.h b/contrib/llvm-project/llvm/include/llvm-c/TargetMachine.h
index 28d7c096871e..f82edd948b59 100644
--- a/contrib/llvm-project/llvm/include/llvm-c/TargetMachine.h
+++ b/contrib/llvm-project/llvm/include/llvm-c/TargetMachine.h
@@ -19,12 +19,12 @@
#ifndef LLVM_C_TARGETMACHINE_H
#define LLVM_C_TARGETMACHINE_H
+#include "llvm-c/ExternC.h"
#include "llvm-c/Target.h"
#include "llvm-c/Types.h"
-#ifdef __cplusplus
-extern "C" {
-#endif
+LLVM_C_EXTERN_C_BEGIN
+
typedef struct LLVMOpaqueTargetMachine *LLVMTargetMachineRef;
typedef struct LLVMTarget *LLVMTargetRef;
@@ -156,8 +156,6 @@ char* LLVMGetHostCPUFeatures(void);
/** Adds the target-specific analysis passes to the pass manager. */
void LLVMAddAnalysisPasses(LLVMTargetMachineRef T, LLVMPassManagerRef PM);
-#ifdef __cplusplus
-}
-#endif
+LLVM_C_EXTERN_C_END
#endif
diff --git a/contrib/llvm-project/llvm/include/llvm-c/Transforms/AggressiveInstCombine.h b/contrib/llvm-project/llvm/include/llvm-c/Transforms/AggressiveInstCombine.h
index c0b0141c3da1..db061a7ad1fc 100644
--- a/contrib/llvm-project/llvm/include/llvm-c/Transforms/AggressiveInstCombine.h
+++ b/contrib/llvm-project/llvm/include/llvm-c/Transforms/AggressiveInstCombine.h
@@ -15,11 +15,10 @@
#ifndef LLVM_C_TRANSFORMS_AGGRESSIVEINSTCOMBINE_H
#define LLVM_C_TRANSFORMS_AGGRESSIVEINSTCOMBINE_H
+#include "llvm-c/ExternC.h"
#include "llvm-c/Types.h"
-#ifdef __cplusplus
-extern "C" {
-#endif
+LLVM_C_EXTERN_C_BEGIN
/**
* @defgroup LLVMCTransformsAggressiveInstCombine Aggressive Instruction Combining transformations
@@ -35,9 +34,7 @@ void LLVMAddAggressiveInstCombinerPass(LLVMPassManagerRef PM);
* @}
*/
-#ifdef __cplusplus
-}
-#endif /* defined(__cplusplus) */
+LLVM_C_EXTERN_C_END
#endif
diff --git a/contrib/llvm-project/llvm/include/llvm-c/Transforms/Coroutines.h b/contrib/llvm-project/llvm/include/llvm-c/Transforms/Coroutines.h
index 227e7cf0a360..15798af7d661 100644
--- a/contrib/llvm-project/llvm/include/llvm-c/Transforms/Coroutines.h
+++ b/contrib/llvm-project/llvm/include/llvm-c/Transforms/Coroutines.h
@@ -19,11 +19,10 @@
#ifndef LLVM_C_TRANSFORMS_COROUTINES_H
#define LLVM_C_TRANSFORMS_COROUTINES_H
+#include "llvm-c/ExternC.h"
#include "llvm-c/Types.h"
-#ifdef __cplusplus
-extern "C" {
-#endif
+LLVM_C_EXTERN_C_BEGIN
/**
* @defgroup LLVMCTransformsCoroutines Coroutine transformations
@@ -32,24 +31,22 @@ extern "C" {
* @{
*/
-/** See llvm::createCoroEarlyPass function. */
+/** See llvm::createCoroEarlyLegacyPass function. */
void LLVMAddCoroEarlyPass(LLVMPassManagerRef PM);
-/** See llvm::createCoroSplitPass function. */
+/** See llvm::createCoroSplitLegacyPass function. */
void LLVMAddCoroSplitPass(LLVMPassManagerRef PM);
-/** See llvm::createCoroElidePass function. */
+/** See llvm::createCoroElideLegacyPass function. */
void LLVMAddCoroElidePass(LLVMPassManagerRef PM);
-/** See llvm::createCoroCleanupPass function. */
+/** See llvm::createCoroCleanupLegacyPass function. */
void LLVMAddCoroCleanupPass(LLVMPassManagerRef PM);
/**
* @}
*/
-#ifdef __cplusplus
-}
-#endif /* defined(__cplusplus) */
+LLVM_C_EXTERN_C_END
#endif
diff --git a/contrib/llvm-project/llvm/include/llvm-c/Transforms/IPO.h b/contrib/llvm-project/llvm/include/llvm-c/Transforms/IPO.h
index 51d007581283..cde3d2460920 100644
--- a/contrib/llvm-project/llvm/include/llvm-c/Transforms/IPO.h
+++ b/contrib/llvm-project/llvm/include/llvm-c/Transforms/IPO.h
@@ -15,11 +15,10 @@
#ifndef LLVM_C_TRANSFORMS_IPO_H
#define LLVM_C_TRANSFORMS_IPO_H
+#include "llvm-c/ExternC.h"
#include "llvm-c/Types.h"
-#ifdef __cplusplus
-extern "C" {
-#endif
+LLVM_C_EXTERN_C_BEGIN
/**
* @defgroup LLVMCTransformsIPO Interprocedural transformations
@@ -95,8 +94,6 @@ void LLVMAddStripSymbolsPass(LLVMPassManagerRef PM);
* @}
*/
-#ifdef __cplusplus
-}
-#endif /* defined(__cplusplus) */
+LLVM_C_EXTERN_C_END
#endif
diff --git a/contrib/llvm-project/llvm/include/llvm-c/Transforms/InstCombine.h b/contrib/llvm-project/llvm/include/llvm-c/Transforms/InstCombine.h
index 166f278d9a69..ebe17d667061 100644
--- a/contrib/llvm-project/llvm/include/llvm-c/Transforms/InstCombine.h
+++ b/contrib/llvm-project/llvm/include/llvm-c/Transforms/InstCombine.h
@@ -15,11 +15,10 @@
#ifndef LLVM_C_TRANSFORMS_INSTCOMBINE_H
#define LLVM_C_TRANSFORMS_INSTCOMBINE_H
+#include "llvm-c/ExternC.h"
#include "llvm-c/Types.h"
-#ifdef __cplusplus
-extern "C" {
-#endif
+LLVM_C_EXTERN_C_BEGIN
/**
* @defgroup LLVMCTransformsInstCombine Instruction Combining transformations
@@ -35,9 +34,7 @@ void LLVMAddInstructionCombiningPass(LLVMPassManagerRef PM);
* @}
*/
-#ifdef __cplusplus
-}
-#endif /* defined(__cplusplus) */
+LLVM_C_EXTERN_C_END
#endif
diff --git a/contrib/llvm-project/llvm/include/llvm-c/Transforms/PassManagerBuilder.h b/contrib/llvm-project/llvm/include/llvm-c/Transforms/PassManagerBuilder.h
index d164c00d49c5..6e13e18e063b 100644
--- a/contrib/llvm-project/llvm/include/llvm-c/Transforms/PassManagerBuilder.h
+++ b/contrib/llvm-project/llvm/include/llvm-c/Transforms/PassManagerBuilder.h
@@ -14,13 +14,12 @@
#ifndef LLVM_C_TRANSFORMS_PASSMANAGERBUILDER_H
#define LLVM_C_TRANSFORMS_PASSMANAGERBUILDER_H
+#include "llvm-c/ExternC.h"
#include "llvm-c/Types.h"
typedef struct LLVMOpaquePassManagerBuilder *LLVMPassManagerBuilderRef;
-#ifdef __cplusplus
-extern "C" {
-#endif
+LLVM_C_EXTERN_C_BEGIN
/**
* @defgroup LLVMCTransformsPassManagerBuilder Pass manager builder
@@ -83,8 +82,6 @@ void LLVMPassManagerBuilderPopulateLTOPassManager(LLVMPassManagerBuilderRef PMB,
* @}
*/
-#ifdef __cplusplus
-}
-#endif
+LLVM_C_EXTERN_C_END
#endif
diff --git a/contrib/llvm-project/llvm/include/llvm-c/Transforms/Scalar.h b/contrib/llvm-project/llvm/include/llvm-c/Transforms/Scalar.h
index 6f3a3d8b3750..93d79a205195 100644
--- a/contrib/llvm-project/llvm/include/llvm-c/Transforms/Scalar.h
+++ b/contrib/llvm-project/llvm/include/llvm-c/Transforms/Scalar.h
@@ -19,11 +19,10 @@
#ifndef LLVM_C_TRANSFORMS_SCALAR_H
#define LLVM_C_TRANSFORMS_SCALAR_H
+#include "llvm-c/ExternC.h"
#include "llvm-c/Types.h"
-#ifdef __cplusplus
-extern "C" {
-#endif
+LLVM_C_EXTERN_C_BEGIN
/**
* @defgroup LLVMCTransformsScalar Scalar transformations
@@ -166,8 +165,6 @@ void LLVMAddUnifyFunctionExitNodesPass(LLVMPassManagerRef PM);
* @}
*/
-#ifdef __cplusplus
-}
-#endif /* defined(__cplusplus) */
+LLVM_C_EXTERN_C_END
#endif
diff --git a/contrib/llvm-project/llvm/include/llvm-c/Transforms/Utils.h b/contrib/llvm-project/llvm/include/llvm-c/Transforms/Utils.h
index 63594abfa460..30d1ae63de1d 100644
--- a/contrib/llvm-project/llvm/include/llvm-c/Transforms/Utils.h
+++ b/contrib/llvm-project/llvm/include/llvm-c/Transforms/Utils.h
@@ -19,11 +19,10 @@
#ifndef LLVM_C_TRANSFORMS_UTILS_H
#define LLVM_C_TRANSFORMS_UTILS_H
+#include "llvm-c/ExternC.h"
#include "llvm-c/Types.h"
-#ifdef __cplusplus
-extern "C" {
-#endif
+LLVM_C_EXTERN_C_BEGIN
/**
* @defgroup LLVMCTransformsUtils Transformation Utilities
@@ -45,9 +44,7 @@ void LLVMAddAddDiscriminatorsPass(LLVMPassManagerRef PM);
* @}
*/
-#ifdef __cplusplus
-}
-#endif /* defined(__cplusplus) */
+LLVM_C_EXTERN_C_END
#endif
diff --git a/contrib/llvm-project/llvm/include/llvm-c/Transforms/Vectorize.h b/contrib/llvm-project/llvm/include/llvm-c/Transforms/Vectorize.h
index e383481fe4f4..0de458381399 100644
--- a/contrib/llvm-project/llvm/include/llvm-c/Transforms/Vectorize.h
+++ b/contrib/llvm-project/llvm/include/llvm-c/Transforms/Vectorize.h
@@ -20,11 +20,10 @@
#ifndef LLVM_C_TRANSFORMS_VECTORIZE_H
#define LLVM_C_TRANSFORMS_VECTORIZE_H
+#include "llvm-c/ExternC.h"
#include "llvm-c/Types.h"
-#ifdef __cplusplus
-extern "C" {
-#endif
+LLVM_C_EXTERN_C_BEGIN
/**
* @defgroup LLVMCTransformsVectorize Vectorization transformations
@@ -43,8 +42,6 @@ void LLVMAddSLPVectorizePass(LLVMPassManagerRef PM);
* @}
*/
-#ifdef __cplusplus
-}
-#endif /* defined(__cplusplus) */
+LLVM_C_EXTERN_C_END
#endif
diff --git a/contrib/llvm-project/llvm/include/llvm-c/Types.h b/contrib/llvm-project/llvm/include/llvm-c/Types.h
index 612c7d3eff32..4e02498a2348 100644
--- a/contrib/llvm-project/llvm/include/llvm-c/Types.h
+++ b/contrib/llvm-project/llvm/include/llvm-c/Types.h
@@ -15,10 +15,9 @@
#define LLVM_C_TYPES_H
#include "llvm-c/DataTypes.h"
+#include "llvm-c/ExternC.h"
-#ifdef __cplusplus
-extern "C" {
-#endif
+LLVM_C_EXTERN_C_BEGIN
/**
* @defgroup LLVMCSupportTypes Types and Enumerations
@@ -172,8 +171,6 @@ typedef struct LLVMOpaqueBinary *LLVMBinaryRef;
* @}
*/
-#ifdef __cplusplus
-}
-#endif
+LLVM_C_EXTERN_C_END
#endif
diff --git a/contrib/llvm-project/llvm/include/llvm-c/lto.h b/contrib/llvm-project/llvm/include/llvm-c/lto.h
index 41e6067cf44f..97a8f4823320 100644
--- a/contrib/llvm-project/llvm/include/llvm-c/lto.h
+++ b/contrib/llvm-project/llvm/include/llvm-c/lto.h
@@ -16,6 +16,8 @@
#ifndef LLVM_C_LTO_H
#define LLVM_C_LTO_H
+#include "llvm-c/ExternC.h"
+
#ifdef __cplusplus
#include <cstddef>
#else
@@ -44,7 +46,7 @@ typedef bool lto_bool_t;
* @{
*/
-#define LTO_API_VERSION 25
+#define LTO_API_VERSION 26
/**
* \since prior to LTO_API_VERSION=3
@@ -98,9 +100,7 @@ typedef struct LLVMOpaqueLTOCodeGenerator *lto_code_gen_t;
/** opaque reference to a thin code generator */
typedef struct LLVMOpaqueThinLTOCodeGenerator *thinlto_code_gen_t;
-#ifdef __cplusplus
-extern "C" {
-#endif
+LLVM_C_EXTERN_C_BEGIN
/**
* Returns a printable string.
@@ -514,12 +514,25 @@ lto_api_version(void);
/**
* Sets options to help debug codegen bugs.
*
+ * This function takes one or more options separated by spaces.
+ * Warning: passing file paths through this function may confuse the argument
+ * parser if the paths contain spaces.
+ *
* \since prior to LTO_API_VERSION=3
*/
extern void
lto_codegen_debug_options(lto_code_gen_t cg, const char *);
/**
+ * Same as the previous function, but takes every option separately through an
+ * array.
+ *
+ * \since prior to LTO_API_VERSION=26
+ */
+extern void lto_codegen_debug_options_array(lto_code_gen_t cg,
+ const char *const *, int number);
+
+/**
* Initializes LLVM disassemblers.
* FIXME: This doesn't really belong here.
*
@@ -900,8 +913,6 @@ extern void thinlto_codegen_set_cache_size_files(thinlto_code_gen_t cg,
* @} // endgroup LLVMCTLTO_CACHING
*/
-#ifdef __cplusplus
-}
-#endif
+LLVM_C_EXTERN_C_END
#endif /* LLVM_C_LTO_H */
diff --git a/contrib/llvm-project/llvm/include/llvm/ADT/APFloat.h b/contrib/llvm-project/llvm/include/llvm/ADT/APFloat.h
index 1c4969733791..ed25b2cd89f1 100644
--- a/contrib/llvm-project/llvm/include/llvm/ADT/APFloat.h
+++ b/contrib/llvm-project/llvm/include/llvm/ADT/APFloat.h
@@ -38,6 +38,7 @@ class StringRef;
class APFloat;
class raw_ostream;
+template <typename T> class Expected;
template <typename T> class SmallVectorImpl;
/// Enum that represents what fraction of the LSB truncated bits of an fp number
@@ -143,7 +144,7 @@ struct APFloatBase {
static const unsigned integerPartWidth = APInt::APINT_BITS_PER_WORD;
/// A signed type to represent a floating point numbers unbiased exponent.
- typedef signed short ExponentType;
+ typedef int32_t ExponentType;
/// \name Floating Point Semantics.
/// @{
@@ -299,7 +300,7 @@ public:
bool, roundingMode);
opStatus convertFromZeroExtendedInteger(const integerPart *, unsigned int,
bool, roundingMode);
- opStatus convertFromString(StringRef, roundingMode);
+ Expected<opStatus> convertFromString(StringRef, roundingMode);
APInt bitcastToAPInt() const;
double convertToDouble() const;
float convertToFloat() const;
@@ -486,7 +487,8 @@ private:
integerPart addSignificand(const IEEEFloat &);
integerPart subtractSignificand(const IEEEFloat &, integerPart);
lostFraction addOrSubtractSignificand(const IEEEFloat &, bool subtract);
- lostFraction multiplySignificand(const IEEEFloat &, const IEEEFloat *);
+ lostFraction multiplySignificand(const IEEEFloat &, IEEEFloat);
+ lostFraction multiplySignificand(const IEEEFloat&);
lostFraction divideSignificand(const IEEEFloat &);
void incrementSignificand();
void initialize(const fltSemantics *);
@@ -525,8 +527,8 @@ private:
bool *) const;
opStatus convertFromUnsignedParts(const integerPart *, unsigned int,
roundingMode);
- opStatus convertFromHexadecimalString(StringRef, roundingMode);
- opStatus convertFromDecimalString(StringRef, roundingMode);
+ Expected<opStatus> convertFromHexadecimalString(StringRef, roundingMode);
+ Expected<opStatus> convertFromDecimalString(StringRef, roundingMode);
char *convertNormalToHexString(char *, unsigned int, bool,
roundingMode) const;
opStatus roundSignificandWithExponent(const integerPart *, unsigned int, int,
@@ -648,7 +650,7 @@ public:
cmpResult compare(const DoubleAPFloat &RHS) const;
bool bitwiseIsEqual(const DoubleAPFloat &RHS) const;
APInt bitcastToAPInt() const;
- opStatus convertFromString(StringRef, roundingMode);
+ Expected<opStatus> convertFromString(StringRef, roundingMode);
opStatus next(bool nextDown);
opStatus convertToInteger(MutableArrayRef<integerPart> Input,
@@ -851,6 +853,9 @@ public:
APFloat(const fltSemantics &Semantics) : U(Semantics) {}
APFloat(const fltSemantics &Semantics, StringRef S);
APFloat(const fltSemantics &Semantics, integerPart I) : U(Semantics, I) {}
+ template <typename T, typename = typename std::enable_if<
+ std::is_floating_point<T>::value>::type>
+ APFloat(const fltSemantics &Semantics, T V) = delete;
// TODO: Remove this constructor. This isn't faster than the first one.
APFloat(const fltSemantics &Semantics, uninitializedTag)
: U(Semantics, uninitialized) {}
@@ -1105,7 +1110,7 @@ public:
APFLOAT_DISPATCH_ON_SEMANTICS(
convertFromZeroExtendedInteger(Input, InputSize, IsSigned, RM));
}
- opStatus convertFromString(StringRef, roundingMode);
+ Expected<opStatus> convertFromString(StringRef, roundingMode);
APInt bitcastToAPInt() const {
APFLOAT_DISPATCH_ON_SEMANTICS(bitcastToAPInt());
}
diff --git a/contrib/llvm-project/llvm/include/llvm/ADT/APInt.h b/contrib/llvm-project/llvm/include/llvm/ADT/APInt.h
index 8dce5a621bb3..0791a6d686a3 100644
--- a/contrib/llvm-project/llvm/include/llvm/ADT/APInt.h
+++ b/contrib/llvm-project/llvm/include/llvm/ADT/APInt.h
@@ -389,6 +389,11 @@ public:
/// \returns true if this APInt is positive.
bool isStrictlyPositive() const { return isNonNegative() && !isNullValue(); }
+ /// Determine if this APInt Value is non-positive (<= 0).
+ ///
+ /// \returns true if this APInt is non-positive.
+ bool isNonPositive() const { return !isStrictlyPositive(); }
+
/// Determine if all bits are set
///
/// This checks to see if the value has all bits of the APInt are set or not.
@@ -595,8 +600,8 @@ public:
/// Constructs an APInt value that has a contiguous range of bits set. The
/// bits from loBit (inclusive) to hiBit (exclusive) will be set. All other
/// bits will be zero. For example, with parameters(32, 0, 16) you would get
- /// 0x0000FFFF. If hiBit is less than loBit then the set bits "wrap". For
- /// example, with parameters (32, 28, 4), you would get 0xF000000F.
+ /// 0x0000FFFF. Please call getBitsSetWithWrap if \p loBit may be greater than
+ /// \p hiBit.
///
/// \param numBits the intended bit width of the result
/// \param loBit the index of the lowest bit set.
@@ -604,11 +609,23 @@ public:
///
/// \returns An APInt value with the requested bits set.
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit) {
+ assert(loBit <= hiBit && "loBit greater than hiBit");
APInt Res(numBits, 0);
Res.setBits(loBit, hiBit);
return Res;
}
+ /// Wrap version of getBitsSet.
+ /// If \p hiBit is no less than \p loBit, this is same with getBitsSet.
+ /// If \p hiBit is less than \p loBit, the set bits "wrap". For example, with
+ /// parameters (32, 28, 4), you would get 0xF000000F.
+ static APInt getBitsSetWithWrap(unsigned numBits, unsigned loBit,
+ unsigned hiBit) {
+ APInt Res(numBits, 0);
+ Res.setBitsWithWrap(loBit, hiBit);
+ return Res;
+ }
+
/// Get a value with upper bits starting at loBit set.
///
/// Constructs an APInt value that has a contiguous range of bits set. The
@@ -1109,6 +1126,10 @@ public:
APInt uadd_sat(const APInt &RHS) const;
APInt ssub_sat(const APInt &RHS) const;
APInt usub_sat(const APInt &RHS) const;
+ APInt smul_sat(const APInt &RHS) const;
+ APInt umul_sat(const APInt &RHS) const;
+ APInt sshl_sat(const APInt &RHS) const;
+ APInt ushl_sat(const APInt &RHS) const;
/// Array-indexing support.
///
@@ -1245,7 +1266,7 @@ public:
/// \returns true if *this <= RHS when considered signed.
bool sle(uint64_t RHS) const { return !sgt(RHS); }
- /// Unsigned greather than comparison
+ /// Unsigned greater than comparison
///
/// Regards both *this and RHS as unsigned quantities and compares them for
/// the validity of the greater-than relationship.
@@ -1264,7 +1285,7 @@ public:
return (!isSingleWord() && getActiveBits() > 64) || getZExtValue() > RHS;
}
- /// Signed greather than comparison
+ /// Signed greater than comparison
///
/// Regards both *this and RHS as signed quantities and compares them for the
/// validity of the greater-than relationship.
@@ -1342,6 +1363,19 @@ public:
/// that is greater than or equal to the current width.
APInt trunc(unsigned width) const;
+ /// Truncate to new width with unsigned saturation.
+ ///
+ /// If the APInt, treated as unsigned integer, can be losslessly truncated to
+ /// the new bitwidth, then return truncated APInt. Else, return max value.
+ APInt truncUSat(unsigned width) const;
+
+ /// Truncate to new width with signed saturation.
+ ///
+ /// If this APInt, treated as signed integer, can be losslessly truncated to
+ /// the new bitwidth, then return truncated APInt. Else, return either
+ /// signed min value if the APInt was negative, or signed max value.
+ APInt truncSSat(unsigned width) const;
+
/// Sign extend to a new width.
///
/// This operation sign extends the APInt to a new width. If the high order
@@ -1414,6 +1448,21 @@ public:
}
/// Set the bits from loBit (inclusive) to hiBit (exclusive) to 1.
+ /// This function handles "wrap" case when \p loBit > \p hiBit, and calls
+ /// setBits when \p loBit <= \p hiBit.
+ void setBitsWithWrap(unsigned loBit, unsigned hiBit) {
+ assert(hiBit <= BitWidth && "hiBit out of range");
+ assert(loBit <= BitWidth && "loBit out of range");
+ if (loBit <= hiBit) {
+ setBits(loBit, hiBit);
+ return;
+ }
+ setLowBits(hiBit);
+ setHighBits(BitWidth - loBit);
+ }
+
+ /// Set the bits from loBit (inclusive) to hiBit (exclusive) to 1.
+ /// This function handles case when \p loBit <= \p hiBit.
void setBits(unsigned loBit, unsigned hiBit) {
assert(hiBit <= BitWidth && "hiBit out of range");
assert(loBit <= BitWidth && "loBit out of range");
@@ -1723,13 +1772,13 @@ public:
return BitsToDouble(getWord(0));
}
- /// Converts APInt bits to a double
+ /// Converts APInt bits to a float
///
/// The conversion does not do a translation from integer to float, it just
/// re-interprets the bits as a float. Note that it is valid to do this on
/// any bit width. Exactly 32 bits will be translated.
float bitsToFloat() const {
- return BitsToFloat(getWord(0));
+ return BitsToFloat(static_cast<uint32_t>(getWord(0)));
}
/// Converts a double to APInt bits.
@@ -2158,7 +2207,7 @@ inline float RoundAPIntToFloat(const APInt &APIVal) {
/// Converts the given APInt to a float value.
///
-/// Treast the APInt as a signed value for conversion purposes.
+/// Treats the APInt as a signed value for conversion purposes.
inline float RoundSignedAPIntToFloat(const APInt &APIVal) {
return float(APIVal.signedRoundToDouble());
}
@@ -2194,7 +2243,7 @@ APInt RoundingSDiv(const APInt &A, const APInt &B, APInt::Rounding RM);
/// count as an overflow, but here we want to allow values to decrease
/// and increase as long as they are within the same interval.
/// Specifically, adding of two negative numbers should not cause an
-/// overflow (as long as the magnitude does not exceed the bith width).
+/// overflow (as long as the magnitude does not exceed the bit width).
/// On the other hand, given a positive number, adding a negative
/// number to it can give a negative result, which would cause the
/// value to go from [-2^BW, 0) to [0, 2^BW). In that sense, zero is
@@ -2216,6 +2265,12 @@ APInt RoundingSDiv(const APInt &A, const APInt &B, APInt::Rounding RM);
/// coefficients.
Optional<APInt> SolveQuadraticEquationWrap(APInt A, APInt B, APInt C,
unsigned RangeWidth);
+
+/// Compare two values, and if they are different, return the position of the
+/// most significant bit that is different in the values.
+Optional<unsigned> GetMostSignificantDifferentBit(const APInt &A,
+ const APInt &B);
+
} // End of APIntOps namespace
// See friend declaration above. This additional declaration is required in
diff --git a/contrib/llvm-project/llvm/include/llvm/ADT/ArrayRef.h b/contrib/llvm-project/llvm/include/llvm/ADT/ArrayRef.h
index f6455d3fa412..3d22442918cd 100644
--- a/contrib/llvm-project/llvm/include/llvm/ADT/ArrayRef.h
+++ b/contrib/llvm-project/llvm/include/llvm/ADT/ArrayRef.h
@@ -97,9 +97,19 @@ namespace llvm {
/*implicit*/ constexpr ArrayRef(const T (&Arr)[N]) : Data(Arr), Length(N) {}
/// Construct an ArrayRef from a std::initializer_list.
+#if LLVM_GNUC_PREREQ(9, 0, 0)
+// Disable gcc's warning in this constructor as it generates an enormous amount
+// of messages. Anyone using ArrayRef should already be aware of the fact that
+// it does not do lifetime extension.
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Winit-list-lifetime"
+#endif
/*implicit*/ ArrayRef(const std::initializer_list<T> &Vec)
: Data(Vec.begin() == Vec.end() ? (T*)nullptr : Vec.begin()),
Length(Vec.size()) {}
+#if LLVM_GNUC_PREREQ(9, 0, 0)
+#pragma GCC diagnostic pop
+#endif
/// Construct an ArrayRef<const T*> from ArrayRef<T*>. This uses SFINAE to
/// ensure that only ArrayRefs of pointers can be converted.
diff --git a/contrib/llvm-project/llvm/include/llvm/ADT/BitVector.h b/contrib/llvm-project/llvm/include/llvm/ADT/BitVector.h
index fabf5d9cd348..5284be8c4a02 100644
--- a/contrib/llvm-project/llvm/include/llvm/ADT/BitVector.h
+++ b/contrib/llvm-project/llvm/include/llvm/ADT/BitVector.h
@@ -71,7 +71,7 @@ public:
};
class BitVector {
- typedef unsigned long BitWord;
+ typedef uintptr_t BitWord;
enum { BITWORD_SIZE = (unsigned)sizeof(BitWord) * CHAR_BIT };
@@ -187,12 +187,12 @@ public:
/// all - Returns true if all bits are set.
bool all() const {
for (unsigned i = 0; i < Size / BITWORD_SIZE; ++i)
- if (Bits[i] != ~0UL)
+ if (Bits[i] != ~BitWord(0))
return false;
// If bits remain check that they are ones. The unused bits are always zero.
if (unsigned Remainder = Size % BITWORD_SIZE)
- return Bits[Size / BITWORD_SIZE] == (1UL << Remainder) - 1;
+ return Bits[Size / BITWORD_SIZE] == (BitWord(1) << Remainder) - 1;
return true;
}
@@ -285,7 +285,7 @@ public:
unsigned LastBit = (End - 1) % BITWORD_SIZE;
Copy |= maskTrailingZeros<BitWord>(LastBit + 1);
}
- if (Copy != ~0UL) {
+ if (Copy != ~BitWord(0)) {
unsigned Result = i * BITWORD_SIZE + countTrailingOnes(Copy);
return Result < size() ? Result : -1;
}
@@ -317,7 +317,7 @@ public:
Copy |= maskTrailingOnes<BitWord>(FirstBit);
}
- if (Copy != ~0UL) {
+ if (Copy != ~BitWord(0)) {
unsigned Result =
(CurrentWord + 1) * BITWORD_SIZE - countLeadingOnes(Copy) - 1;
return Result < Size ? Result : -1;
@@ -414,21 +414,21 @@ public:
if (I == E) return *this;
if (I / BITWORD_SIZE == E / BITWORD_SIZE) {
- BitWord EMask = 1UL << (E % BITWORD_SIZE);
- BitWord IMask = 1UL << (I % BITWORD_SIZE);
+ BitWord EMask = BitWord(1) << (E % BITWORD_SIZE);
+ BitWord IMask = BitWord(1) << (I % BITWORD_SIZE);
BitWord Mask = EMask - IMask;
Bits[I / BITWORD_SIZE] |= Mask;
return *this;
}
- BitWord PrefixMask = ~0UL << (I % BITWORD_SIZE);
+ BitWord PrefixMask = ~BitWord(0) << (I % BITWORD_SIZE);
Bits[I / BITWORD_SIZE] |= PrefixMask;
I = alignTo(I, BITWORD_SIZE);
for (; I + BITWORD_SIZE <= E; I += BITWORD_SIZE)
- Bits[I / BITWORD_SIZE] = ~0UL;
+ Bits[I / BITWORD_SIZE] = ~BitWord(0);
- BitWord PostfixMask = (1UL << (E % BITWORD_SIZE)) - 1;
+ BitWord PostfixMask = (BitWord(1) << (E % BITWORD_SIZE)) - 1;
if (I < E)
Bits[I / BITWORD_SIZE] |= PostfixMask;
@@ -453,21 +453,21 @@ public:
if (I == E) return *this;
if (I / BITWORD_SIZE == E / BITWORD_SIZE) {
- BitWord EMask = 1UL << (E % BITWORD_SIZE);
- BitWord IMask = 1UL << (I % BITWORD_SIZE);
+ BitWord EMask = BitWord(1) << (E % BITWORD_SIZE);
+ BitWord IMask = BitWord(1) << (I % BITWORD_SIZE);
BitWord Mask = EMask - IMask;
Bits[I / BITWORD_SIZE] &= ~Mask;
return *this;
}
- BitWord PrefixMask = ~0UL << (I % BITWORD_SIZE);
+ BitWord PrefixMask = ~BitWord(0) << (I % BITWORD_SIZE);
Bits[I / BITWORD_SIZE] &= ~PrefixMask;
I = alignTo(I, BITWORD_SIZE);
for (; I + BITWORD_SIZE <= E; I += BITWORD_SIZE)
- Bits[I / BITWORD_SIZE] = 0UL;
+ Bits[I / BITWORD_SIZE] = BitWord(0);
- BitWord PostfixMask = (1UL << (E % BITWORD_SIZE)) - 1;
+ BitWord PostfixMask = (BitWord(1) << (E % BITWORD_SIZE)) - 1;
if (I < E)
Bits[I / BITWORD_SIZE] &= ~PostfixMask;
@@ -868,7 +868,7 @@ private:
// Then set any stray high bits of the last used word.
unsigned ExtraBits = Size % BITWORD_SIZE;
if (ExtraBits) {
- BitWord ExtraBitMask = ~0UL << ExtraBits;
+ BitWord ExtraBitMask = ~BitWord(0) << ExtraBits;
if (t)
Bits[UsedWords-1] |= ExtraBitMask;
else
diff --git a/contrib/llvm-project/llvm/include/llvm/ADT/DenseMap.h b/contrib/llvm-project/llvm/include/llvm/ADT/DenseMap.h
index 948a6e6bfb38..148d319c8603 100644
--- a/contrib/llvm-project/llvm/include/llvm/ADT/DenseMap.h
+++ b/contrib/llvm-project/llvm/include/llvm/ADT/DenseMap.h
@@ -1006,13 +1006,10 @@ public:
}
void grow(unsigned AtLeast) {
- if (AtLeast >= InlineBuckets)
+ if (AtLeast > InlineBuckets)
AtLeast = std::max<unsigned>(64, NextPowerOf2(AtLeast-1));
if (Small) {
- if (AtLeast < InlineBuckets)
- return; // Nothing to do.
-
// First move the inline buckets into a temporary storage.
AlignedCharArrayUnion<BucketT[InlineBuckets]> TmpStorage;
BucketT *TmpBegin = reinterpret_cast<BucketT *>(TmpStorage.buffer);
@@ -1035,10 +1032,13 @@ public:
P->getFirst().~KeyT();
}
- // Now make this map use the large rep, and move all the entries back
- // into it.
- Small = false;
- new (getLargeRep()) LargeRep(allocateBuckets(AtLeast));
+ // AtLeast == InlineBuckets can happen if there are many tombstones,
+ // and grow() is used to remove them. Usually we always switch to the
+ // large rep here.
+ if (AtLeast > InlineBuckets) {
+ Small = false;
+ new (getLargeRep()) LargeRep(allocateBuckets(AtLeast));
+ }
this->moveFromOldBuckets(TmpBegin, TmpEnd);
return;
}
diff --git a/contrib/llvm-project/llvm/include/llvm/ADT/DirectedGraph.h b/contrib/llvm-project/llvm/include/llvm/ADT/DirectedGraph.h
index f6a358d99cd2..cfe98e178a91 100644
--- a/contrib/llvm-project/llvm/include/llvm/ADT/DirectedGraph.h
+++ b/contrib/llvm-project/llvm/include/llvm/ADT/DirectedGraph.h
@@ -48,6 +48,9 @@ public:
static_cast<const DGEdge<NodeType, EdgeType> &>(*this).getTargetNode());
}
+ /// Set the target node this edge connects to.
+ void setTargetNode(const NodeType &N) { TargetNode = N; }
+
protected:
// As the default implementation use address comparison for equality.
bool isEqualTo(const EdgeType &E) const { return this == &E; }
diff --git a/contrib/llvm-project/llvm/include/llvm/ADT/EnumeratedArray.h b/contrib/llvm-project/llvm/include/llvm/ADT/EnumeratedArray.h
new file mode 100644
index 000000000000..a9528115618c
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/ADT/EnumeratedArray.h
@@ -0,0 +1,48 @@
+//===- llvm/ADT/EnumeratedArray.h - Enumerated Array-------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an array type that can be indexed using scoped enum values.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_ENUMERATEDARRAY_H
+#define LLVM_ADT_ENUMERATEDARRAY_H
+
+#include <cassert>
+
+namespace llvm {
+
+template <typename ValueType, typename Enumeration,
+ Enumeration LargestEnum = Enumeration::Last, typename IndexType = int,
+ IndexType Size = 1 + static_cast<IndexType>(LargestEnum)>
+class EnumeratedArray {
+public:
+ EnumeratedArray() = default;
+ EnumeratedArray(ValueType V) {
+ for (IndexType IX = 0; IX < Size; ++IX) {
+ Underlying[IX] = V;
+ }
+ }
+ inline const ValueType &operator[](const Enumeration Index) const {
+ auto IX = static_cast<const IndexType>(Index);
+ assert(IX >= 0 && IX < Size && "Index is out of bounds.");
+ return Underlying[IX];
+ }
+ inline ValueType &operator[](const Enumeration Index) {
+ return const_cast<ValueType &>(
+ static_cast<const EnumeratedArray<ValueType, Enumeration, LargestEnum,
+ IndexType, Size> &>(*this)[Index]);
+ }
+
+private:
+ ValueType Underlying[Size];
+};
+
+} // namespace llvm
+
+#endif // LLVM_ADT_ENUMERATEDARRAY_H
diff --git a/contrib/llvm-project/llvm/include/llvm/ADT/FloatingPointMode.h b/contrib/llvm-project/llvm/include/llvm/ADT/FloatingPointMode.h
new file mode 100644
index 000000000000..670b2368da9f
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/ADT/FloatingPointMode.h
@@ -0,0 +1,62 @@
+//===- llvm/Support/FloatingPointMode.h -------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Utilities for dealing with flags related to floating point mode controls.
+//
+//===----------------------------------------------------------------------===/
+
+#ifndef LLVM_FLOATINGPOINTMODE_H
+#define LLVM_FLOATINGPOINTMODE_H
+
+#include "llvm/ADT/StringSwitch.h"
+
+namespace llvm {
+
+/// Represent handled modes for denormal (aka subnormal) modes in the floating
+/// point environment.
+enum class DenormalMode {
+ Invalid = -1,
+
+ /// IEEE-754 denormal numbers preserved.
+ IEEE,
+
+ /// The sign of a flushed-to-zero number is preserved in the sign of 0
+ PreserveSign,
+
+ /// Denormals are flushed to positive zero.
+ PositiveZero
+};
+
+/// Parse the expected names from the denormal-fp-math attribute.
+inline DenormalMode parseDenormalFPAttribute(StringRef Str) {
+ // Assume ieee on unspecified attribute.
+ return StringSwitch<DenormalMode>(Str)
+ .Cases("", "ieee", DenormalMode::IEEE)
+ .Case("preserve-sign", DenormalMode::PreserveSign)
+ .Case("positive-zero", DenormalMode::PositiveZero)
+ .Default(DenormalMode::Invalid);
+}
+
+/// Return the name used for the denormal handling mode used by the the
+/// expected names from the denormal-fp-math attribute.
+inline StringRef denormalModeName(DenormalMode Mode) {
+ switch (Mode) {
+ case DenormalMode::IEEE:
+ return "ieee";
+ case DenormalMode::PreserveSign:
+ return "preserve-sign";
+ case DenormalMode::PositiveZero:
+ return "positive-zero";
+ default:
+ return "";
+ }
+}
+
+}
+
+#endif // LLVM_FLOATINGPOINTMODE_H
diff --git a/contrib/llvm-project/llvm/include/llvm/ADT/FoldingSet.h b/contrib/llvm-project/llvm/include/llvm/ADT/FoldingSet.h
index d5837e51bcfc..4968b1ea7780 100644
--- a/contrib/llvm-project/llvm/include/llvm/ADT/FoldingSet.h
+++ b/contrib/llvm-project/llvm/include/llvm/ADT/FoldingSet.h
@@ -85,17 +85,17 @@ namespace llvm {
///
/// MyNode *M = MyFoldingSet.FindNodeOrInsertPos(ID, InsertPoint);
///
-/// If found then M with be non-NULL, else InsertPoint will point to where it
+/// If found then M will be non-NULL, else InsertPoint will point to where it
/// should be inserted using InsertNode.
///
-/// 3) If you get a NULL result from FindNodeOrInsertPos then you can as a new
-/// node with FindNodeOrInsertPos;
+/// 3) If you get a NULL result from FindNodeOrInsertPos then you can insert a
+/// new node with InsertNode;
///
-/// InsertNode(N, InsertPoint);
+/// MyFoldingSet.InsertNode(M, InsertPoint);
///
/// 4) Finally, if you want to remove a node from the folding set call;
///
-/// bool WasRemoved = RemoveNode(N);
+/// bool WasRemoved = MyFoldingSet.RemoveNode(M);
///
/// The result indicates whether the node existed in the folding set.
diff --git a/contrib/llvm-project/llvm/include/llvm/ADT/Hashing.h b/contrib/llvm-project/llvm/include/llvm/ADT/Hashing.h
index b22606bdb518..adcc5cf54da9 100644
--- a/contrib/llvm-project/llvm/include/llvm/ADT/Hashing.h
+++ b/contrib/llvm-project/llvm/include/llvm/ADT/Hashing.h
@@ -45,6 +45,7 @@
#define LLVM_ADT_HASHING_H
#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/SwapByteOrder.h"
#include "llvm/Support/type_traits.h"
#include <algorithm>
@@ -256,7 +257,7 @@ inline uint64_t hash_short(const char *s, size_t length, uint64_t seed) {
/// Currently, the algorithm for computing hash codes is based on CityHash and
/// keeps 56 bytes of arbitrary state.
struct hash_state {
- uint64_t h0, h1, h2, h3, h4, h5, h6;
+ uint64_t h0 = 0, h1 = 0, h2 = 0, h3 = 0, h4 = 0, h5 = 0, h6 = 0;
/// Create a new hash_state structure and initialize it based on the
/// seed and the first 64-byte chunk.
@@ -491,7 +492,7 @@ namespace detail {
/// useful at minimizing the code in the recursive calls to ease the pain
/// caused by a lack of variadic functions.
struct hash_combine_recursive_helper {
- char buffer[64];
+ char buffer[64] = {};
hash_state state;
const uint64_t seed;
@@ -539,7 +540,7 @@ public:
// store types smaller than the buffer.
if (!store_and_advance(buffer_ptr, buffer_end, data,
partial_store_size))
- abort();
+ llvm_unreachable("buffer smaller than stored type");
}
return buffer_ptr;
}
diff --git a/contrib/llvm-project/llvm/include/llvm/ADT/ImmutableSet.h b/contrib/llvm-project/llvm/include/llvm/ADT/ImmutableSet.h
index 587105431533..a6a6abfd9600 100644
--- a/contrib/llvm-project/llvm/include/llvm/ADT/ImmutableSet.h
+++ b/contrib/llvm-project/llvm/include/llvm/ADT/ImmutableSet.h
@@ -205,8 +205,7 @@ public:
ImutInfo::KeyOfValue(getValue()))) &&
"Value in left child is not less that current value");
-
- assert(!(getRight() ||
+ assert((!getRight() ||
ImutInfo::isLess(ImutInfo::KeyOfValue(getValue()),
ImutInfo::KeyOfValue(getRight()->getValue()))) &&
"Current value is not less that value of right child");
diff --git a/contrib/llvm-project/llvm/include/llvm/ADT/Optional.h b/contrib/llvm-project/llvm/include/llvm/ADT/Optional.h
index b45a74002e10..c84f9aa8b342 100644
--- a/contrib/llvm-project/llvm/include/llvm/ADT/Optional.h
+++ b/contrib/llvm-project/llvm/include/llvm/ADT/Optional.h
@@ -267,6 +267,14 @@ public:
return hasValue() ? getValue() : std::forward<U>(value);
}
+ /// Apply a function to the value if present; otherwise return None.
+ template <class Function>
+ auto map(const Function &F) const
+ -> Optional<decltype(F(getValue()))> {
+ if (*this) return F(getValue());
+ return None;
+ }
+
#if LLVM_HAS_RVALUE_REFERENCE_THIS
T &&getValue() && { return std::move(Storage.getValue()); }
T &&operator*() && { return std::move(Storage.getValue()); }
@@ -275,6 +283,14 @@ public:
T getValueOr(U &&value) && {
return hasValue() ? std::move(getValue()) : std::forward<U>(value);
}
+
+ /// Apply a function to the value if present; otherwise return None.
+ template <class Function>
+ auto map(const Function &F) &&
+ -> Optional<decltype(F(std::move(*this).getValue()))> {
+ if (*this) return F(std::move(*this).getValue());
+ return None;
+ }
#endif
};
diff --git a/contrib/llvm-project/llvm/include/llvm/ADT/PointerUnion.h b/contrib/llvm-project/llvm/include/llvm/ADT/PointerUnion.h
index 98c905775a77..40b7b000da40 100644
--- a/contrib/llvm-project/llvm/include/llvm/ADT/PointerUnion.h
+++ b/contrib/llvm-project/llvm/include/llvm/ADT/PointerUnion.h
@@ -93,7 +93,7 @@ namespace pointer_union_detail {
static constexpr int NumLowBitsAvailable = lowBitsAvailable<PTs...>();
};
- /// Implement assigment in terms of construction.
+ /// Implement assignment in terms of construction.
template <typename Derived, typename T> struct AssignableFrom {
Derived &operator=(T t) {
return static_cast<Derived &>(*this) = Derived(t);
@@ -272,16 +272,6 @@ struct PointerLikeTypeTraits<PointerUnion<PTs...>> {
PointerUnion<PTs...>::Val)>::NumLowBitsAvailable;
};
-/// A pointer union of three pointer types. See documentation for PointerUnion
-/// for usage.
-template <typename PT1, typename PT2, typename PT3>
-using PointerUnion3 = PointerUnion<PT1, PT2, PT3>;
-
-/// A pointer union of four pointer types. See documentation for PointerUnion
-/// for usage.
-template <typename PT1, typename PT2, typename PT3, typename PT4>
-using PointerUnion4 = PointerUnion<PT1, PT2, PT3, PT4>;
-
// Teach DenseMap how to use PointerUnions as keys.
template <typename ...PTs> struct DenseMapInfo<PointerUnion<PTs...>> {
using Union = PointerUnion<PTs...>;
diff --git a/contrib/llvm-project/llvm/include/llvm/ADT/SCCIterator.h b/contrib/llvm-project/llvm/include/llvm/ADT/SCCIterator.h
index eb1a5d0938cf..1e642b9f75d3 100644
--- a/contrib/llvm-project/llvm/include/llvm/ADT/SCCIterator.h
+++ b/contrib/llvm-project/llvm/include/llvm/ADT/SCCIterator.h
@@ -134,7 +134,10 @@ public:
/// has been deleted, and \c New is to be used in its place.
void ReplaceNode(NodeRef Old, NodeRef New) {
assert(nodeVisitNumbers.count(Old) && "Old not in scc_iterator?");
- nodeVisitNumbers[New] = nodeVisitNumbers[Old];
+ // Do the assignment in two steps, in case 'New' is not yet in the map, and
+ // inserting it causes the map to grow.
+ auto tempVal = nodeVisitNumbers[Old];
+ nodeVisitNumbers[New] = tempVal;
nodeVisitNumbers.erase(Old);
}
};
diff --git a/contrib/llvm-project/llvm/include/llvm/ADT/STLExtras.h b/contrib/llvm-project/llvm/include/llvm/ADT/STLExtras.h
index 274933bc5204..b61dab2459d1 100644
--- a/contrib/llvm-project/llvm/include/llvm/ADT/STLExtras.h
+++ b/contrib/llvm-project/llvm/include/llvm/ADT/STLExtras.h
@@ -17,7 +17,6 @@
#define LLVM_ADT_STLEXTRAS_H
#include "llvm/ADT/Optional.h"
-#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/iterator.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/Config/abi-breaking.h"
@@ -51,10 +50,6 @@ namespace detail {
template <typename RangeT>
using IterOfRange = decltype(std::begin(std::declval<RangeT &>()));
-template <typename RangeT>
-using ValueOfRange = typename std::remove_reference<decltype(
- *std::begin(std::declval<RangeT &>()))>::type;
-
} // end namespace detail
//===----------------------------------------------------------------------===//
@@ -198,6 +193,15 @@ constexpr bool empty(const T &RangeOrContainer) {
return adl_begin(RangeOrContainer) == adl_end(RangeOrContainer);
}
+/// Return a range covering \p RangeOrContainer with the first N elements
+/// excluded.
+template <typename T>
+auto drop_begin(T &&RangeOrContainer, size_t N) ->
+ iterator_range<decltype(adl_begin(RangeOrContainer))> {
+ return make_range(std::next(adl_begin(RangeOrContainer), N),
+ adl_end(RangeOrContainer));
+}
+
// mapped_iterator - This is a simple iterator adapter that causes a function to
// be applied whenever operator* is invoked on the iterator.
@@ -1044,6 +1048,23 @@ inline int (*get_array_pod_sort_comparator(const T &))
return array_pod_sort_comparator<T>;
}
+#ifdef EXPENSIVE_CHECKS
+namespace detail {
+
+inline unsigned presortShuffleEntropy() {
+ static unsigned Result(std::random_device{}());
+ return Result;
+}
+
+template <class IteratorTy>
+inline void presortShuffle(IteratorTy Start, IteratorTy End) {
+ std::mt19937 Generator(presortShuffleEntropy());
+ std::shuffle(Start, End, Generator);
+}
+
+} // end namespace detail
+#endif
+
/// array_pod_sort - This sorts an array with the specified start and end
/// extent. This is just like std::sort, except that it calls qsort instead of
/// using an inlined template. qsort is slightly slower than std::sort, but
@@ -1065,8 +1086,7 @@ inline void array_pod_sort(IteratorTy Start, IteratorTy End) {
auto NElts = End - Start;
if (NElts <= 1) return;
#ifdef EXPENSIVE_CHECKS
- std::mt19937 Generator(std::random_device{}());
- std::shuffle(Start, End, Generator);
+ detail::presortShuffle<IteratorTy>(Start, End);
#endif
qsort(&*Start, NElts, sizeof(*Start), get_array_pod_sort_comparator(*Start));
}
@@ -1082,8 +1102,7 @@ inline void array_pod_sort(
auto NElts = End - Start;
if (NElts <= 1) return;
#ifdef EXPENSIVE_CHECKS
- std::mt19937 Generator(std::random_device{}());
- std::shuffle(Start, End, Generator);
+ detail::presortShuffle<IteratorTy>(Start, End);
#endif
qsort(&*Start, NElts, sizeof(*Start),
reinterpret_cast<int (*)(const void *, const void *)>(Compare));
@@ -1094,8 +1113,7 @@ inline void array_pod_sort(
template <typename IteratorTy>
inline void sort(IteratorTy Start, IteratorTy End) {
#ifdef EXPENSIVE_CHECKS
- std::mt19937 Generator(std::random_device{}());
- std::shuffle(Start, End, Generator);
+ detail::presortShuffle<IteratorTy>(Start, End);
#endif
std::sort(Start, End);
}
@@ -1107,8 +1125,7 @@ template <typename Container> inline void sort(Container &&C) {
template <typename IteratorTy, typename Compare>
inline void sort(IteratorTy Start, IteratorTy End, Compare Comp) {
#ifdef EXPENSIVE_CHECKS
- std::mt19937 Generator(std::random_device{}());
- std::shuffle(Start, End, Generator);
+ detail::presortShuffle<IteratorTy>(Start, End);
#endif
std::sort(Start, End, Comp);
}
@@ -1312,15 +1329,6 @@ bool is_splat(R &&Range) {
std::equal(adl_begin(Range) + 1, adl_end(Range), adl_begin(Range)));
}
-/// Given a range of type R, iterate the entire range and return a
-/// SmallVector with elements of the vector. This is useful, for example,
-/// when you want to iterate a range and then sort the results.
-template <unsigned Size, typename R>
-SmallVector<typename std::remove_const<detail::ValueOfRange<R>>::type, Size>
-to_vector(R &&Range) {
- return {adl_begin(Range), adl_end(Range)};
-}
-
/// Provide a container algorithm similar to C++ Library Fundamentals v2's
/// `erase_if` which is equivalent to:
///
@@ -1407,6 +1415,8 @@ template <typename R> struct result_pair {
result_pair(std::size_t Index, IterOfRange<R> Iter)
: Index(Index), Iter(Iter) {}
+ result_pair<R>(const result_pair<R> &Other)
+ : Index(Other.Index), Iter(Other.Iter) {}
result_pair<R> &operator=(const result_pair<R> &Other) {
Index = Other.Index;
Iter = Other.Iter;
@@ -1455,6 +1465,7 @@ public:
return Result.Iter == RHS.Result.Iter;
}
+ enumerator_iter<R>(const enumerator_iter<R> &Other) : Result(Other.Result) {}
enumerator_iter<R> &operator=(const enumerator_iter<R> &Other) {
Result = Other.Result;
return *this;
diff --git a/contrib/llvm-project/llvm/include/llvm/ADT/SmallPtrSet.h b/contrib/llvm-project/llvm/include/llvm/ADT/SmallPtrSet.h
index 913518230d2d..1d8280063c80 100644
--- a/contrib/llvm-project/llvm/include/llvm/ADT/SmallPtrSet.h
+++ b/contrib/llvm-project/llvm/include/llvm/ADT/SmallPtrSet.h
@@ -409,6 +409,32 @@ private:
}
};
+/// Equality comparison for SmallPtrSet.
+///
+/// Iterates over elements of LHS confirming that each value from LHS is also in
+/// RHS, and that no additional values are in RHS.
+template <typename PtrType>
+bool operator==(const SmallPtrSetImpl<PtrType> &LHS,
+ const SmallPtrSetImpl<PtrType> &RHS) {
+ if (LHS.size() != RHS.size())
+ return false;
+
+ for (const auto *KV : LHS)
+ if (!RHS.count(KV))
+ return false;
+
+ return true;
+}
+
+/// Inequality comparison for SmallPtrSet.
+///
+/// Equivalent to !(LHS == RHS).
+template <typename PtrType>
+bool operator!=(const SmallPtrSetImpl<PtrType> &LHS,
+ const SmallPtrSetImpl<PtrType> &RHS) {
+ return !(LHS == RHS);
+}
+
/// SmallPtrSet - This class implements a set which is optimized for holding
/// SmallSize or less elements. This internally rounds up SmallSize to the next
/// power of two if it is not already a power of two. See the comments above
diff --git a/contrib/llvm-project/llvm/include/llvm/ADT/SmallSet.h b/contrib/llvm-project/llvm/include/llvm/ADT/SmallSet.h
index 6b128c2e2992..a03fa7dd8423 100644
--- a/contrib/llvm-project/llvm/include/llvm/ADT/SmallSet.h
+++ b/contrib/llvm-project/llvm/include/llvm/ADT/SmallSet.h
@@ -248,6 +248,31 @@ private:
template <typename PointeeType, unsigned N>
class SmallSet<PointeeType*, N> : public SmallPtrSet<PointeeType*, N> {};
+/// Equality comparison for SmallSet.
+///
+/// Iterates over elements of LHS confirming that each element is also a member
+/// of RHS, and that RHS contains no additional values.
+/// Equivalent to N calls to RHS.count.
+/// For small-set mode amortized complexity is O(N^2)
+/// For large-set mode amortized complexity is linear, worst case is O(N^2) (if
+/// every hash collides).
+template <typename T, unsigned LN, unsigned RN, typename C>
+bool operator==(const SmallSet<T, LN, C> &LHS, const SmallSet<T, RN, C> &RHS) {
+ if (LHS.size() != RHS.size())
+ return false;
+
+ // All elements in LHS must also be in RHS
+ return all_of(LHS, [&RHS](const T &E) { return RHS.count(E); });
+}
+
+/// Inequality comparison for SmallSet.
+///
+/// Equivalent to !(LHS == RHS). See operator== for performance notes.
+template <typename T, unsigned LN, unsigned RN, typename C>
+bool operator!=(const SmallSet<T, LN, C> &LHS, const SmallSet<T, RN, C> &RHS) {
+ return !(LHS == RHS);
+}
+
} // end namespace llvm
#endif // LLVM_ADT_SMALLSET_H
diff --git a/contrib/llvm-project/llvm/include/llvm/ADT/SmallVector.h b/contrib/llvm-project/llvm/include/llvm/ADT/SmallVector.h
index 17586904d212..8c46aa906905 100644
--- a/contrib/llvm-project/llvm/include/llvm/ADT/SmallVector.h
+++ b/contrib/llvm-project/llvm/include/llvm/ADT/SmallVector.h
@@ -907,6 +907,17 @@ inline size_t capacity_in_bytes(const SmallVector<T, N> &X) {
return X.capacity_in_bytes();
}
+/// Given a range of type R, iterate the entire range and return a
+/// SmallVector with elements of the vector. This is useful, for example,
+/// when you want to iterate a range and then sort the results.
+template <unsigned Size, typename R>
+SmallVector<typename std::remove_const<typename std::remove_reference<
+ decltype(*std::begin(std::declval<R &>()))>::type>::type,
+ Size>
+to_vector(R &&Range) {
+ return {std::begin(Range), std::end(Range)};
+}
+
} // end namespace llvm
namespace std {
diff --git a/contrib/llvm-project/llvm/include/llvm/ADT/Statistic.h b/contrib/llvm-project/llvm/include/llvm/ADT/Statistic.h
index b7387ddcf1c7..d7aff6c5939a 100644
--- a/contrib/llvm-project/llvm/include/llvm/ADT/Statistic.h
+++ b/contrib/llvm-project/llvm/include/llvm/ADT/Statistic.h
@@ -173,7 +173,7 @@ using Statistic = NoopStatistic;
static llvm::TrackingStatistic VARNAME = {DEBUG_TYPE, #VARNAME, DESC}
/// Enable the collection and printing of statistics.
-void EnableStatistics(bool PrintOnExit = true);
+void EnableStatistics(bool DoPrintOnExit = true);
/// Check if statistics are enabled.
bool AreStatisticsEnabled();
diff --git a/contrib/llvm-project/llvm/include/llvm/ADT/StringRef.h b/contrib/llvm-project/llvm/include/llvm/ADT/StringRef.h
index 52baab17bede..9bfaaccd953e 100644
--- a/contrib/llvm-project/llvm/include/llvm/ADT/StringRef.h
+++ b/contrib/llvm-project/llvm/include/llvm/ADT/StringRef.h
@@ -21,6 +21,12 @@
#include <type_traits>
#include <utility>
+// Declare the __builtin_strlen intrinsic for MSVC so it can be used in
+// constexpr context.
+#if defined(_MSC_VER)
+extern "C" size_t __builtin_strlen(const char *);
+#endif
+
namespace llvm {
class APInt;
@@ -71,7 +77,7 @@ namespace llvm {
static constexpr size_t strLen(const char *Str) {
#if __cplusplus > 201402L
return std::char_traits<char>::length(Str);
-#elif __has_builtin(__builtin_strlen) || defined(__GNUC__)
+#elif __has_builtin(__builtin_strlen) || defined(__GNUC__) || defined(_MSC_VER)
return __builtin_strlen(Str);
#else
const char *Begin = Str;
@@ -560,7 +566,8 @@ namespace llvm {
///
/// If \p AllowInexact is false, the function will fail if the string
/// cannot be represented exactly. Otherwise, the function only fails
- /// in case of an overflow or underflow.
+ /// in case of an overflow or underflow, or an invalid floating point
+ /// representation.
bool getAsDouble(double &Result, bool AllowInexact = true) const;
/// @}
diff --git a/contrib/llvm-project/llvm/include/llvm/ADT/Triple.h b/contrib/llvm-project/llvm/include/llvm/ADT/Triple.h
index 1ef3645a291d..76a754d671fb 100644
--- a/contrib/llvm-project/llvm/include/llvm/ADT/Triple.h
+++ b/contrib/llvm-project/llvm/include/llvm/ADT/Triple.h
@@ -95,7 +95,8 @@ public:
wasm64, // WebAssembly with 64-bit pointers
renderscript32, // 32-bit RenderScript
renderscript64, // 64-bit RenderScript
- LastArchType = renderscript64
+ ve, // NEC SX-Aurora Vector Engine
+ LastArchType = ve
};
enum SubArchType {
NoSubArch,
@@ -205,8 +206,6 @@ public:
CODE16,
EABI,
EABIHF,
- ELFv1,
- ELFv2,
Android,
Musl,
MuslEABI,
@@ -479,7 +478,7 @@ public:
return getSubArch() == Triple::ARMSubArch_v7k;
}
- /// isOSDarwin - Is this a "Darwin" OS (OS X, iOS, or watchOS).
+ /// isOSDarwin - Is this a "Darwin" OS (macOS, iOS, tvOS or watchOS).
bool isOSDarwin() const {
return isMacOSX() || isiOS() || isWatchOS();
}
@@ -732,6 +731,16 @@ public:
return getArch() == Triple::riscv32 || getArch() == Triple::riscv64;
}
+ /// Tests whether the target is x86 (32- or 64-bit).
+ bool isX86() const {
+ return getArch() == Triple::x86 || getArch() == Triple::x86_64;
+ }
+
+ /// Tests whether the target is VE
+ bool isVE() const {
+ return getArch() == Triple::ve;
+ }
+
/// Tests whether the target supports comdat
bool supportsCOMDAT() const {
return !isOSBinFormatMachO();
diff --git a/contrib/llvm-project/llvm/include/llvm/ADT/Twine.h b/contrib/llvm-project/llvm/include/llvm/ADT/Twine.h
index 4140c22aad3d..2dc7486c924f 100644
--- a/contrib/llvm-project/llvm/include/llvm/ADT/Twine.h
+++ b/contrib/llvm-project/llvm/include/llvm/ADT/Twine.h
@@ -153,11 +153,11 @@ namespace llvm {
/// LHS - The prefix in the concatenation, which may be uninitialized for
/// Null or Empty kinds.
- Child LHS;
+ Child LHS = {0};
/// RHS - The suffix in the concatenation, which may be uninitialized for
/// Null or Empty kinds.
- Child RHS;
+ Child RHS = {0};
/// LHSKind - The NodeKind of the left hand side, \see getLHSKind().
NodeKind LHSKind = EmptyKind;
diff --git a/contrib/llvm-project/llvm/include/llvm/ADT/iterator.h b/contrib/llvm-project/llvm/include/llvm/ADT/iterator.h
index 467fd4c00ec5..8fd5c11a2dcb 100644
--- a/contrib/llvm-project/llvm/include/llvm/ADT/iterator.h
+++ b/contrib/llvm-project/llvm/include/llvm/ADT/iterator.h
@@ -333,6 +333,11 @@ make_pointer_range(RangeT &&Range) {
PointerIteratorT(std::end(std::forward<RangeT>(Range))));
}
+template <typename WrappedIteratorT,
+ typename T1 = typename std::remove_reference<decltype(**std::declval<WrappedIteratorT>())>::type,
+ typename T2 = typename std::add_pointer<T1>::type>
+using raw_pointer_iterator = pointer_iterator<pointee_iterator<WrappedIteratorT, T1>, T2>;
+
// Wrapper iterator over iterator ItType, adding DataRef to the type of ItType,
// to create NodeRef = std::pair<InnerTypeOfItType, DataRef>.
template <typename ItType, typename NodeRef, typename DataRef>
diff --git a/contrib/llvm-project/llvm/include/llvm/ADT/iterator_range.h b/contrib/llvm-project/llvm/include/llvm/ADT/iterator_range.h
index aa8830943cab..f038f6bf2128 100644
--- a/contrib/llvm-project/llvm/include/llvm/ADT/iterator_range.h
+++ b/contrib/llvm-project/llvm/include/llvm/ADT/iterator_range.h
@@ -59,11 +59,6 @@ template <typename T> iterator_range<T> make_range(std::pair<T, T> p) {
return iterator_range<T>(std::move(p.first), std::move(p.second));
}
-template <typename T>
-iterator_range<decltype(adl_begin(std::declval<T>()))> drop_begin(T &&t,
- int n) {
- return make_range(std::next(adl_begin(t), n), adl_end(t));
-}
}
#endif
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/AliasAnalysis.h b/contrib/llvm-project/llvm/include/llvm/Analysis/AliasAnalysis.h
index 282142f51bb3..7c42a261ebb6 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/AliasAnalysis.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/AliasAnalysis.h
@@ -1179,14 +1179,9 @@ struct ExternalAAWrapperPass : ImmutablePass {
static char ID;
- ExternalAAWrapperPass() : ImmutablePass(ID) {
- initializeExternalAAWrapperPassPass(*PassRegistry::getPassRegistry());
- }
+ ExternalAAWrapperPass();
- explicit ExternalAAWrapperPass(CallbackT CB)
- : ImmutablePass(ID), CB(std::move(CB)) {
- initializeExternalAAWrapperPassPass(*PassRegistry::getPassRegistry());
- }
+ explicit ExternalAAWrapperPass(CallbackT CB);
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesAll();
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/AliasSetTracker.h b/contrib/llvm-project/llvm/include/llvm/Analysis/AliasSetTracker.h
index 187317e3831b..e94a758b06ba 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/AliasSetTracker.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/AliasSetTracker.h
@@ -344,8 +344,8 @@ class AliasSetTracker {
struct ASTCallbackVHDenseMapInfo : public DenseMapInfo<Value *> {};
AliasAnalysis &AA;
- MemorySSA *MSSA;
- Loop *L;
+ MemorySSA *MSSA = nullptr;
+ Loop *L = nullptr;
ilist<AliasSet> AliasSets;
using PointerMapType = DenseMap<ASTCallbackVH, AliasSet::PointerRec *,
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/BranchProbabilityInfo.h b/contrib/llvm-project/llvm/include/llvm/Analysis/BranchProbabilityInfo.h
index 97cb730d16c7..41d6c23b8d0d 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/BranchProbabilityInfo.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/BranchProbabilityInfo.h
@@ -34,6 +34,7 @@ namespace llvm {
class Function;
class LoopInfo;
class raw_ostream;
+class PostDominatorTree;
class TargetLibraryInfo;
class Value;
@@ -179,7 +180,7 @@ private:
DenseMap<Edge, BranchProbability> Probs;
/// Track the last function we run over for printing.
- const Function *LastF;
+ const Function *LastF = nullptr;
/// Track the set of blocks directly succeeded by a returning block.
SmallPtrSet<const BasicBlock *, 16> PostDominatedByUnreachable;
@@ -187,8 +188,10 @@ private:
/// Track the set of blocks that always lead to a cold call.
SmallPtrSet<const BasicBlock *, 16> PostDominatedByColdCall;
- void updatePostDominatedByUnreachable(const BasicBlock *BB);
- void updatePostDominatedByColdCall(const BasicBlock *BB);
+ void computePostDominatedByUnreachable(const Function &F,
+ PostDominatorTree *PDT);
+ void computePostDominatedByColdCall(const Function &F,
+ PostDominatorTree *PDT);
bool calcUnreachableHeuristics(const BasicBlock *BB);
bool calcMetadataWeights(const BasicBlock *BB);
bool calcColdCallHeuristics(const BasicBlock *BB);
@@ -233,10 +236,7 @@ class BranchProbabilityInfoWrapperPass : public FunctionPass {
public:
static char ID;
- BranchProbabilityInfoWrapperPass() : FunctionPass(ID) {
- initializeBranchProbabilityInfoWrapperPassPass(
- *PassRegistry::getPassRegistry());
- }
+ BranchProbabilityInfoWrapperPass();
BranchProbabilityInfo &getBPI() { return BPI; }
const BranchProbabilityInfo &getBPI() const { return BPI; }
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/DDG.h b/contrib/llvm-project/llvm/include/llvm/Analysis/DDG.h
index 0e1eb9d2cda3..22df60efd84e 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/DDG.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/DDG.h
@@ -13,12 +13,12 @@
#ifndef LLVM_ANALYSIS_DDG_H
#define LLVM_ANALYSIS_DDG_H
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DirectedGraph.h"
#include "llvm/Analysis/DependenceAnalysis.h"
#include "llvm/Analysis/DependenceGraphBuilder.h"
#include "llvm/Analysis/LoopAnalysisManager.h"
#include "llvm/IR/Instructions.h"
-#include <unordered_map>
namespace llvm {
class DDGNode;
@@ -33,7 +33,11 @@ class LPMUpdater;
/// 1. Single instruction node containing just one instruction.
/// 2. Multiple instruction node where two or more instructions from
/// the same basic block are merged into one node.
-/// 3. Root node is a special node that connects to all components such that
+/// 3. Pi-block node which is a group of other DDG nodes that are part of a
+/// strongly-connected component of the graph.
+/// A pi-block node contains more than one single or multiple instruction
+/// nodes. The root node cannot be part of a pi-block.
+/// 4. Root node is a special node that connects to all components such that
/// there is always a path from it to any node in the graph.
class DDGNode : public DDGNodeBase {
public:
@@ -43,6 +47,7 @@ public:
Unknown,
SingleInstruction,
MultiInstruction,
+ PiBlock,
Root,
};
@@ -155,6 +160,55 @@ private:
SmallVector<Instruction *, 2> InstList;
};
+/// Subclass of DDGNode representing a pi-block. A pi-block represents a group
+/// of DDG nodes that are part of a strongly-connected component of the graph.
+/// Replacing all the SCCs with pi-blocks results in an acyclic representation
+/// of the DDG. For example if we have:
+/// {a -> b}, {b -> c, d}, {c -> a}
+/// the cycle a -> b -> c -> a is abstracted into a pi-block "p" as follows:
+/// {p -> d} with "p" containing: {a -> b}, {b -> c}, {c -> a}
+class PiBlockDDGNode : public DDGNode {
+public:
+ using PiNodeList = SmallVector<DDGNode *, 4>;
+
+ PiBlockDDGNode() = delete;
+ PiBlockDDGNode(const PiNodeList &List);
+ PiBlockDDGNode(const PiBlockDDGNode &N);
+ PiBlockDDGNode(PiBlockDDGNode &&N);
+ ~PiBlockDDGNode();
+
+ PiBlockDDGNode &operator=(const PiBlockDDGNode &N) {
+ DDGNode::operator=(N);
+ NodeList = N.NodeList;
+ return *this;
+ }
+
+ PiBlockDDGNode &operator=(PiBlockDDGNode &&N) {
+ DDGNode::operator=(std::move(N));
+ NodeList = std::move(N.NodeList);
+ return *this;
+ }
+
+ /// Get the list of nodes in this pi-block.
+ const PiNodeList &getNodes() const {
+ assert(!NodeList.empty() && "Node list is empty.");
+ return NodeList;
+ }
+ PiNodeList &getNodes() {
+ return const_cast<PiNodeList &>(
+ static_cast<const PiBlockDDGNode *>(this)->getNodes());
+ }
+
+ /// Define classof to be able to use isa<>, cast<>, dyn_cast<>, etc.
+ static bool classof(const DDGNode *N) {
+ return N->getKind() == NodeKind::PiBlock;
+ }
+
+private:
+ /// List of nodes in this pi-block.
+ PiNodeList NodeList;
+};
+
/// Data Dependency Graph Edge.
/// An edge in the DDG can represent a def-use relationship or
/// a memory dependence based on the result of DependenceAnalysis.
@@ -163,7 +217,13 @@ private:
class DDGEdge : public DDGEdgeBase {
public:
/// The kind of edge in the DDG
- enum class EdgeKind { Unknown, RegisterDefUse, MemoryDependence, Rooted };
+ enum class EdgeKind {
+ Unknown,
+ RegisterDefUse,
+ MemoryDependence,
+ Rooted,
+ Last = Rooted // Must be equal to the largest enum value.
+ };
explicit DDGEdge(DDGNode &N) = delete;
DDGEdge(DDGNode &N, EdgeKind K) : DDGEdgeBase(N), Kind(K) {}
@@ -240,6 +300,7 @@ using DDGInfo = DependenceGraphInfo<DDGNode>;
/// Data Dependency Graph
class DataDependenceGraph : public DDGBase, public DDGInfo {
+ friend AbstractDependenceGraphBuilder<DataDependenceGraph>;
friend class DDGBuilder;
public:
@@ -251,14 +312,25 @@ public:
DataDependenceGraph(DataDependenceGraph &&G)
: DDGBase(std::move(G)), DDGInfo(std::move(G)) {}
DataDependenceGraph(Function &F, DependenceInfo &DI);
- DataDependenceGraph(const Loop &L, DependenceInfo &DI);
+ DataDependenceGraph(Loop &L, LoopInfo &LI, DependenceInfo &DI);
~DataDependenceGraph();
+ /// If node \p N belongs to a pi-block return a pointer to the pi-block,
+ /// otherwise return null.
+ const PiBlockDDGNode *getPiBlock(const NodeType &N) const;
+
protected:
- /// Add node \p N to the graph, if it's not added yet, and keep track of
- /// the root node. Return true if node is successfully added.
+ /// Add node \p N to the graph, if it's not added yet, and keep track of the
+ /// root node as well as pi-blocks and their members. Return true if node is
+ /// successfully added.
bool addNode(NodeType &N);
+private:
+ using PiBlockMapType = DenseMap<const NodeType *, const PiBlockDDGNode *>;
+
+ /// Mapping from graph nodes to their containing pi-blocks. If a node is not
+ /// part of a pi-block, it will not appear in this map.
+ PiBlockMapType PiBlockMap;
};
/// Concrete implementation of a pure data dependence graph builder. This class
@@ -284,6 +356,12 @@ public:
Graph.addNode(*SN);
return *SN;
}
+ DDGNode &createPiBlock(const NodeListType &L) final override {
+ auto *Pi = new PiBlockDDGNode(L);
+ assert(Pi && "Failed to allocate memory for pi-block node.");
+ Graph.addNode(*Pi);
+ return *Pi;
+ }
DDGEdge &createDefUseEdge(DDGNode &Src, DDGNode &Tgt) final override {
auto *E = new DDGEdge(Tgt, DDGEdge::EdgeKind::RegisterDefUse);
assert(E && "Failed to allocate memory for edge");
@@ -304,6 +382,13 @@ public:
return *E;
}
+ const NodeListType &getNodesInPiBlock(const DDGNode &N) final override {
+ auto *PiNode = dyn_cast<const PiBlockDDGNode>(&N);
+ assert(PiNode && "Expected a pi-block node.");
+ return PiNode->getNodes();
+ }
+
+ bool shouldCreatePiBlocks() const final override;
};
raw_ostream &operator<<(raw_ostream &OS, const DDGNode &N);
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/DependenceAnalysis.h b/contrib/llvm-project/llvm/include/llvm/Analysis/DependenceAnalysis.h
index 997013a5fc8e..0c4002c3c3ba 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/DependenceAnalysis.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/DependenceAnalysis.h
@@ -926,6 +926,12 @@ template <typename T> class ArrayRef;
bool tryDelinearize(Instruction *Src, Instruction *Dst,
SmallVectorImpl<Subscript> &Pair);
+
+ private:
+ /// checkSubscript - Helper function for checkSrcSubscript and
+ /// checkDstSubscript to avoid duplicate code
+ bool checkSubscript(const SCEV *Expr, const Loop *LoopNest,
+ SmallBitVector &Loops, bool IsSrc);
}; // class DependenceInfo
/// AnalysisPass to compute dependence information in a function
@@ -954,10 +960,7 @@ template <typename T> class ArrayRef;
class DependenceAnalysisWrapperPass : public FunctionPass {
public:
static char ID; // Class identification, replacement for typeinfo
- DependenceAnalysisWrapperPass() : FunctionPass(ID) {
- initializeDependenceAnalysisWrapperPassPass(
- *PassRegistry::getPassRegistry());
- }
+ DependenceAnalysisWrapperPass();
bool runOnFunction(Function &F) override;
void releaseMemory() override;
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/DependenceGraphBuilder.h b/contrib/llvm-project/llvm/include/llvm/Analysis/DependenceGraphBuilder.h
index 5f4bdb47043b..08a13d967da2 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/DependenceGraphBuilder.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/DependenceGraphBuilder.h
@@ -44,7 +44,9 @@ public:
/// The main entry to the graph construction algorithm. It starts by
/// creating nodes in increasing order of granularity and then
- /// adds def-use and memory edges.
+ /// adds def-use and memory edges. As one of the final stages, it
+ /// also creates pi-block nodes to facilitate codegen in transformations
+ /// that use dependence graphs.
///
/// The algorithmic complexity of this implementation is O(V^2 * I^2), where V
/// is the number of vertecies (nodes) and I is the number of instructions in
@@ -52,12 +54,21 @@ public:
/// therefore the worst-case time complexity is O(N^2). The average time
/// complexity is O((N^2)/2).
void populate() {
+ computeInstructionOrdinals();
createFineGrainedNodes();
createDefUseEdges();
createMemoryDependencyEdges();
createAndConnectRootNode();
+ createPiBlocks();
+ sortNodesTopologically();
}
+ /// Compute ordinal numbers for each instruction and store them in a map for
+ /// future look up. These ordinals are used to compute node ordinals which are
+ /// in turn used to order nodes that are part of a cycle.
+ /// Instruction ordinals are assigned based on lexical program order.
+ void computeInstructionOrdinals();
+
/// Create fine grained nodes. These are typically atomic nodes that
/// consist of a single instruction.
void createFineGrainedNodes();
@@ -74,6 +85,16 @@ public:
/// reachable from the root.
void createAndConnectRootNode();
+ /// Apply graph abstraction to groups of nodes that belong to a strongly
+ /// connected component of the graph to create larger compound nodes
+ /// called pi-blocks. The purpose of this abstraction is to isolate sets of
+ /// program elements that need to stay together during codegen and turn
+ /// the dependence graph into an acyclic graph.
+ void createPiBlocks();
+
+ /// Topologically sort the graph nodes.
+ void sortNodesTopologically();
+
protected:
/// Create the root node of the graph.
virtual NodeType &createRootNode() = 0;
@@ -81,6 +102,10 @@ protected:
/// Create an atomic node in the graph given a single instruction.
virtual NodeType &createFineGrainedNode(Instruction &I) = 0;
+ /// Create a pi-block node in the graph representing a group of nodes in an
+ /// SCC of the graph.
+ virtual NodeType &createPiBlock(const NodeListType &L) = 0;
+
/// Create a def-use edge going from \p Src to \p Tgt.
virtual EdgeType &createDefUseEdge(NodeType &Src, NodeType &Tgt) = 0;
@@ -90,15 +115,41 @@ protected:
/// Create a rooted edge going from \p Src to \p Tgt .
virtual EdgeType &createRootedEdge(NodeType &Src, NodeType &Tgt) = 0;
+ /// Given a pi-block node, return a vector of all the nodes contained within
+ /// it.
+ virtual const NodeListType &getNodesInPiBlock(const NodeType &N) = 0;
+
/// Deallocate memory of edge \p E.
virtual void destroyEdge(EdgeType &E) { delete &E; }
/// Deallocate memory of node \p N.
virtual void destroyNode(NodeType &N) { delete &N; }
+ /// Return true if creation of pi-blocks are supported and desired,
+ /// and false otherwise.
+ virtual bool shouldCreatePiBlocks() const { return true; }
+
+ /// Given an instruction \p I return its associated ordinal number.
+ size_t getOrdinal(Instruction &I) {
+ assert(InstOrdinalMap.find(&I) != InstOrdinalMap.end() &&
+ "No ordinal computed for this instruction.");
+ return InstOrdinalMap[&I];
+ }
+
+ /// Given a node \p N return its associated ordinal number.
+ size_t getOrdinal(NodeType &N) {
+ assert(NodeOrdinalMap.find(&N) != NodeOrdinalMap.end() &&
+ "No ordinal computed for this node.");
+ return NodeOrdinalMap[&N];
+ }
+
/// Map types to map instructions to nodes used when populating the graph.
using InstToNodeMap = DenseMap<Instruction *, NodeType *>;
+ /// Map Types to map instruction/nodes to an ordinal number.
+ using InstToOrdinalMap = DenseMap<Instruction *, size_t>;
+ using NodeToOrdinalMap = DenseMap<NodeType *, size_t>;
+
/// Reference to the graph that gets built by a concrete implementation of
/// this builder.
GraphType &Graph;
@@ -112,6 +163,14 @@ protected:
/// A mapping from instructions to the corresponding nodes in the graph.
InstToNodeMap IMap;
+
+ /// A mapping from each instruction to an ordinal number. This map is used to
+ /// populate the \p NodeOrdinalMap.
+ InstToOrdinalMap InstOrdinalMap;
+
+ /// A mapping from nodes to an ordinal number. This map is used to sort nodes
+ /// in a pi-block based on program order.
+ NodeToOrdinalMap NodeOrdinalMap;
};
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/GlobalsModRef.h b/contrib/llvm-project/llvm/include/llvm/Analysis/GlobalsModRef.h
index 5d1c5a05206a..fa5b16cf95eb 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/GlobalsModRef.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/GlobalsModRef.h
@@ -39,6 +39,9 @@ class GlobalsAAResult : public AAResultBase<GlobalsAAResult> {
/// The globals that do not have their addresses taken.
SmallPtrSet<const GlobalValue *, 8> NonAddressTakenGlobals;
+ /// Are there functions with local linkage that may modify globals.
+ bool UnknownFunctionsWithLocalLinkage = false;
+
/// IndirectGlobals - The memory pointed to by this global is known to be
/// 'owned' by the global.
SmallPtrSet<const GlobalValue *, 8> IndirectGlobals;
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/GuardUtils.h b/contrib/llvm-project/llvm/include/llvm/Analysis/GuardUtils.h
index 41e7b7c06c75..b83211535ec2 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/GuardUtils.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/GuardUtils.h
@@ -15,6 +15,7 @@
namespace llvm {
class BasicBlock;
+class Use;
class User;
class Value;
@@ -22,6 +23,10 @@ class Value;
/// of llvm.experimental.guard intrinsic.
bool isGuard(const User *U);
+/// Returns true iff \p U is a widenable branch (that is, parseWidenableBranch
+/// returns true).
+bool isWidenableBranch(const User *U);
+
/// Returns true iff \p U has semantics of a guard expressed in a form of a
/// widenable conditional branch to deopt block.
bool isGuardAsWidenableBranch(const User *U);
@@ -39,6 +44,11 @@ bool parseWidenableBranch(const User *U, Value *&Condition,
Value *&WidenableCondition, BasicBlock *&IfTrueBB,
BasicBlock *&IfFalseBB);
+/// Analgous to the above, but return the Uses so that that they can be
+/// modified. Unlike previous version, Condition is optional and may be null.
+bool parseWidenableBranch(User *U, Use *&Cond, Use *&WC, BasicBlock *&IfTrueBB,
+ BasicBlock *&IfFalseBB);
+
} // llvm
#endif // LLVM_ANALYSIS_GUARDUTILS_H
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/InstructionSimplify.h b/contrib/llvm-project/llvm/include/llvm/Analysis/InstructionSimplify.h
index a5ffca13046b..b661caee6848 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/InstructionSimplify.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/InstructionSimplify.h
@@ -259,6 +259,10 @@ Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
/// Given a callsite, fold the result or return null.
Value *SimplifyCall(CallBase *Call, const SimplifyQuery &Q);
+/// Given an operand for a Freeze, see if we can fold the result.
+/// If not, this returns null.
+Value *SimplifyFreezeInst(Value *Op, const SimplifyQuery &Q);
+
/// See if we can compute a simplified version of this instruction. If not,
/// return null.
Value *SimplifyInstruction(Instruction *I, const SimplifyQuery &Q,
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/IntervalPartition.h b/contrib/llvm-project/llvm/include/llvm/Analysis/IntervalPartition.h
index 5b127c25a2b8..66a99fb15bfb 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/IntervalPartition.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/IntervalPartition.h
@@ -50,9 +50,7 @@ class IntervalPartition : public FunctionPass {
public:
static char ID; // Pass identification, replacement for typeid
- IntervalPartition() : FunctionPass(ID) {
- initializeIntervalPartitionPass(*PassRegistry::getPassRegistry());
- }
+ IntervalPartition();
// run - Calculate the interval partition for this function
bool runOnFunction(Function &F) override;
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/LazyValueInfo.h b/contrib/llvm-project/llvm/include/llvm/Analysis/LazyValueInfo.h
index 570a5044f6f8..74e8f5072037 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/LazyValueInfo.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/LazyValueInfo.h
@@ -144,9 +144,7 @@ class LazyValueInfoWrapperPass : public FunctionPass {
void operator=(const LazyValueInfoWrapperPass&) = delete;
public:
static char ID;
- LazyValueInfoWrapperPass() : FunctionPass(ID) {
- initializeLazyValueInfoWrapperPassPass(*PassRegistry::getPassRegistry());
- }
+ LazyValueInfoWrapperPass();
~LazyValueInfoWrapperPass() override {
assert(!Info.PImpl && "releaseMemory not called");
}
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/LegacyDivergenceAnalysis.h b/contrib/llvm-project/llvm/include/llvm/Analysis/LegacyDivergenceAnalysis.h
index e33b8f4129f3..0770093bcd48 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/LegacyDivergenceAnalysis.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/LegacyDivergenceAnalysis.h
@@ -16,20 +16,18 @@
#define LLVM_ANALYSIS_LEGACY_DIVERGENCE_ANALYSIS_H
#include "llvm/ADT/DenseSet.h"
-#include "llvm/IR/Function.h"
-#include "llvm/Pass.h"
#include "llvm/Analysis/DivergenceAnalysis.h"
+#include "llvm/Pass.h"
namespace llvm {
class Value;
+class Function;
class GPUDivergenceAnalysis;
class LegacyDivergenceAnalysis : public FunctionPass {
public:
static char ID;
- LegacyDivergenceAnalysis() : FunctionPass(ID) {
- initializeLegacyDivergenceAnalysisPass(*PassRegistry::getPassRegistry());
- }
+ LegacyDivergenceAnalysis();
void getAnalysisUsage(AnalysisUsage &AU) const override;
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/contrib/llvm-project/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
index 9e9aaa32c64f..7f8639ac90d1 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
@@ -175,8 +175,8 @@ public:
};
MemoryDepChecker(PredicatedScalarEvolution &PSE, const Loop *L)
- : PSE(PSE), InnermostLoop(L), AccessIdx(0), MaxSafeRegisterWidth(-1U),
- FoundNonConstantDistanceDependence(false),
+ : PSE(PSE), InnermostLoop(L), AccessIdx(0), MaxSafeDepDistBytes(0),
+ MaxSafeRegisterWidth(-1U), FoundNonConstantDistanceDependence(false),
Status(VectorizationSafetyStatus::Safe), RecordDependences(true) {}
/// Register the location (instructions are given increasing numbers)
@@ -724,9 +724,7 @@ class LoopAccessLegacyAnalysis : public FunctionPass {
public:
static char ID;
- LoopAccessLegacyAnalysis() : FunctionPass(ID) {
- initializeLoopAccessLegacyAnalysisPass(*PassRegistry::getPassRegistry());
- }
+ LoopAccessLegacyAnalysis();
bool runOnFunction(Function &F) override;
@@ -750,11 +748,11 @@ private:
DenseMap<Loop *, std::unique_ptr<LoopAccessInfo>> LoopAccessInfoMap;
// The used analysis passes.
- ScalarEvolution *SE;
- const TargetLibraryInfo *TLI;
- AliasAnalysis *AA;
- DominatorTree *DT;
- LoopInfo *LI;
+ ScalarEvolution *SE = nullptr;
+ const TargetLibraryInfo *TLI = nullptr;
+ AliasAnalysis *AA = nullptr;
+ DominatorTree *DT = nullptr;
+ LoopInfo *LI = nullptr;
};
/// This analysis provides dependence information for the memory
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/LoopInfo.h b/contrib/llvm-project/llvm/include/llvm/Analysis/LoopInfo.h
index abf3863b0601..a01045124c7b 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/LoopInfo.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/LoopInfo.h
@@ -208,7 +208,7 @@ public:
bool isLoopExiting(const BlockT *BB) const {
assert(!isInvalid() && "Loop not in a valid state!");
assert(contains(BB) && "Exiting block must be part of the loop");
- for (const auto &Succ : children<const BlockT *>(BB)) {
+ for (const auto *Succ : children<const BlockT *>(BB)) {
if (!contains(Succ))
return true;
}
@@ -756,6 +756,17 @@ public:
/// - guarded by a loop guard branch.
bool isGuarded() const { return (getLoopGuardBranch() != nullptr); }
+ /// Return true if the loop is in rotated form.
+ ///
+ /// This does not check if the loop was rotated by loop rotation, instead it
+ /// only checks if the loop is in rotated form (has a valid latch that exists
+ /// the loop).
+ bool isRotatedForm() const {
+ assert(!isInvalid() && "Loop not in a valid state!");
+ BasicBlock *Latch = getLoopLatch();
+ return Latch && isLoopExiting(Latch);
+ }
+
/// Return true if the loop induction variable starts at zero and increments
/// by one each time through the loop.
bool isCanonical(ScalarEvolution &SE) const;
@@ -1211,9 +1222,7 @@ class LoopInfoWrapperPass : public FunctionPass {
public:
static char ID; // Pass identification, replacement for typeid
- LoopInfoWrapperPass() : FunctionPass(ID) {
- initializeLoopInfoWrapperPassPass(*PassRegistry::getPassRegistry());
- }
+ LoopInfoWrapperPass();
LoopInfo &getLoopInfo() { return LI; }
const LoopInfo &getLoopInfo() const { return LI; }
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/LoopInfoImpl.h b/contrib/llvm-project/llvm/include/llvm/Analysis/LoopInfoImpl.h
index 8b11e848a195..99f192a59215 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/LoopInfoImpl.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/LoopInfoImpl.h
@@ -35,7 +35,7 @@ void LoopBase<BlockT, LoopT>::getExitingBlocks(
SmallVectorImpl<BlockT *> &ExitingBlocks) const {
assert(!isInvalid() && "Loop not in a valid state!");
for (const auto BB : blocks())
- for (const auto &Succ : children<BlockT *>(BB))
+ for (auto *Succ : children<BlockT *>(BB))
if (!contains(Succ)) {
// Not in current loop? It must be an exit block.
ExitingBlocks.push_back(BB);
@@ -63,7 +63,7 @@ void LoopBase<BlockT, LoopT>::getExitBlocks(
SmallVectorImpl<BlockT *> &ExitBlocks) const {
assert(!isInvalid() && "Loop not in a valid state!");
for (const auto BB : blocks())
- for (const auto &Succ : children<BlockT *>(BB))
+ for (auto *Succ : children<BlockT *>(BB))
if (!contains(Succ))
// Not in current loop? It must be an exit block.
ExitBlocks.push_back(Succ);
@@ -142,7 +142,7 @@ void LoopBase<BlockT, LoopT>::getExitEdges(
SmallVectorImpl<Edge> &ExitEdges) const {
assert(!isInvalid() && "Loop not in a valid state!");
for (const auto BB : blocks())
- for (const auto &Succ : children<BlockT *>(BB))
+ for (auto *Succ : children<BlockT *>(BB))
if (!contains(Succ))
// Not in current loop? It must be an exit block.
ExitEdges.emplace_back(BB, Succ);
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/LoopPass.h b/contrib/llvm-project/llvm/include/llvm/Analysis/LoopPass.h
index 9215ab34ec6d..04fed15f15f9 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/LoopPass.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/LoopPass.h
@@ -162,9 +162,7 @@ private:
// pass manager calls lcssa verification for the current loop.
struct LCSSAVerificationPass : public FunctionPass {
static char ID;
- LCSSAVerificationPass() : FunctionPass(ID) {
- initializeLCSSAVerificationPassPass(*PassRegistry::getPassRegistry());
- }
+ LCSSAVerificationPass();
bool runOnFunction(Function &F) override { return false; }
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/MemorySSA.h b/contrib/llvm-project/llvm/include/llvm/Analysis/MemorySSA.h
index e89bf26a7234..9b393c9cdaa3 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/MemorySSA.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/MemorySSA.h
@@ -95,6 +95,7 @@
#include "llvm/IR/ValueHandle.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
#include <algorithm>
#include <cassert>
#include <cstddef>
@@ -785,7 +786,7 @@ public:
/// Used in various insertion functions to specify whether we are talking
/// about the beginning or end of a block.
- enum InsertionPlace { Beginning, End };
+ enum InsertionPlace { Beginning, End, BeforeTerminator };
protected:
// Used by Memory SSA annotater, dumpers, and wrapper pass
@@ -793,11 +794,9 @@ protected:
friend class MemorySSAPrinterLegacyPass;
friend class MemorySSAUpdater;
- void verifyPrevDefInPhis(Function &F) const;
- void verifyDefUses(Function &F) const;
- void verifyDomination(Function &F) const;
- void verifyOrdering(Function &F) const;
+ void verifyOrderingDominationAndDefUses(Function &F) const;
void verifyDominationNumbers(const Function &F) const;
+ void verifyPrevDefInPhis(Function &F) const;
// This is used by the use optimizer and updater.
AccessList *getWritableBlockAccesses(const BasicBlock *BB) const {
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/MustExecute.h b/contrib/llvm-project/llvm/include/llvm/Analysis/MustExecute.h
index 87cf9f85c7f1..d88cdf40ed87 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/MustExecute.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/MustExecute.h
@@ -33,8 +33,13 @@
namespace llvm {
+namespace {
+template <typename T> using GetterTy = std::function<T *(const Function &F)>;
+}
+
class Instruction;
class DominatorTree;
+class PostDominatorTree;
class Loop;
/// Captures loop safety information.
@@ -374,8 +379,14 @@ struct MustBeExecutedContextExplorer {
/// \param ExploreInterBlock Flag to indicate if instructions in blocks
/// other than the parent of PP should be
/// explored.
- MustBeExecutedContextExplorer(bool ExploreInterBlock)
- : ExploreInterBlock(ExploreInterBlock), EndIterator(*this, nullptr) {}
+ MustBeExecutedContextExplorer(
+ bool ExploreInterBlock,
+ GetterTy<const LoopInfo> LIGetter =
+ [](const Function &) { return nullptr; },
+ GetterTy<const PostDominatorTree> PDTGetter =
+ [](const Function &) { return nullptr; })
+ : ExploreInterBlock(ExploreInterBlock), LIGetter(LIGetter),
+ PDTGetter(PDTGetter), EndIterator(*this, nullptr) {}
/// Clean up the dynamically allocated iterators.
~MustBeExecutedContextExplorer() {
@@ -420,6 +431,30 @@ struct MustBeExecutedContextExplorer {
}
///}
+ /// Helper to look for \p I in the context of \p PP.
+ ///
+ /// The context is expanded until \p I was found or no more expansion is
+ /// possible.
+ ///
+ /// \returns True, iff \p I was found.
+ bool findInContextOf(const Instruction *I, const Instruction *PP) {
+ auto EIt = begin(PP), EEnd = end(PP);
+ return findInContextOf(I, EIt, EEnd);
+ }
+
+ /// Helper to look for \p I in the context defined by \p EIt and \p EEnd.
+ ///
+ /// The context is expanded until \p I was found or no more expansion is
+ /// possible.
+ ///
+ /// \returns True, iff \p I was found.
+ bool findInContextOf(const Instruction *I, iterator &EIt, iterator &EEnd) {
+ bool Found = EIt.count(I);
+ while (!Found && EIt != EEnd)
+ Found = (++EIt).getCurrentInst() == I;
+ return Found;
+ }
+
/// Return the next instruction that is guaranteed to be executed after \p PP.
///
/// \param It The iterator that is used to traverse the must be
@@ -430,6 +465,9 @@ struct MustBeExecutedContextExplorer {
getMustBeExecutedNextInstruction(MustBeExecutedIterator &It,
const Instruction *PP);
+ /// Find the next join point from \p InitBB in forward direction.
+ const BasicBlock *findForwardJoinPoint(const BasicBlock *InitBB);
+
/// Parameter that limit the performed exploration. See the constructor for
/// their meaning.
///{
@@ -437,6 +475,19 @@ struct MustBeExecutedContextExplorer {
///}
private:
+ /// Getters for common CFG analyses: LoopInfo, DominatorTree, and
+ /// PostDominatorTree.
+ ///{
+ GetterTy<const LoopInfo> LIGetter;
+ GetterTy<const PostDominatorTree> PDTGetter;
+ ///}
+
+ /// Map to cache isGuaranteedToTransferExecutionToSuccessor results.
+ DenseMap<const BasicBlock *, Optional<bool>> BlockTransferMap;
+
+ /// Map to cache containsIrreducibleCFG results.
+ DenseMap<const Function*, Optional<bool>> IrreducibleControlMap;
+
/// Map from instructions to associated must be executed iterators.
DenseMap<const Instruction *, MustBeExecutedIterator *>
InstructionIteratorMap;
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/PhiValues.h b/contrib/llvm-project/llvm/include/llvm/Analysis/PhiValues.h
index 124fa2191694..ee6eec85f198 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/PhiValues.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/PhiValues.h
@@ -108,7 +108,7 @@ private:
/// Process a phi so that its entries in the depth and reachable maps are
/// fully populated.
- void processPhi(const PHINode *PN, SmallVector<const PHINode *, 8> &Stack);
+ void processPhi(const PHINode *PN, SmallVectorImpl<const PHINode *> &Stack);
};
/// The analysis pass which yields a PhiValues
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/PostDominators.h b/contrib/llvm-project/llvm/include/llvm/Analysis/PostDominators.h
index 87d2e0318d0a..801eb3d59673 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/PostDominators.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/PostDominators.h
@@ -34,6 +34,13 @@ public:
/// Handle invalidation explicitly.
bool invalidate(Function &F, const PreservedAnalyses &PA,
FunctionAnalysisManager::Invalidator &);
+
+ // Ensure base-class overloads are visible.
+ using Base::dominates;
+
+ /// Return true if \p I1 dominates \p I2. This checks if \p I2 comes before
+ /// \p I1 if they belongs to the same basic block.
+ bool dominates(const Instruction *I1, const Instruction *I2) const;
};
/// Analysis pass which computes a \c PostDominatorTree.
@@ -68,9 +75,7 @@ struct PostDominatorTreeWrapperPass : public FunctionPass {
PostDominatorTree DT;
- PostDominatorTreeWrapperPass() : FunctionPass(ID) {
- initializePostDominatorTreeWrapperPassPass(*PassRegistry::getPassRegistry());
- }
+ PostDominatorTreeWrapperPass();
PostDominatorTree &getPostDomTree() { return DT; }
const PostDominatorTree &getPostDomTree() const { return DT; }
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/PtrUseVisitor.h b/contrib/llvm-project/llvm/include/llvm/Analysis/PtrUseVisitor.h
index fbf04c841d30..05bca2226742 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/PtrUseVisitor.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/PtrUseVisitor.h
@@ -222,9 +222,9 @@ public:
// offsets on this pointer.
// FIXME: Support a vector of pointers.
assert(I.getType()->isPointerTy());
- IntegerType *IntPtrTy = cast<IntegerType>(DL.getIntPtrType(I.getType()));
+ IntegerType *IntIdxTy = cast<IntegerType>(DL.getIndexType(I.getType()));
IsOffsetKnown = true;
- Offset = APInt(IntPtrTy->getBitWidth(), 0);
+ Offset = APInt(IntIdxTy->getBitWidth(), 0);
PI.reset();
// Enqueue the uses of this pointer.
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/ScalarEvolution.h b/contrib/llvm-project/llvm/include/llvm/Analysis/ScalarEvolution.h
index 9c55f7a5090f..5286f6a220ec 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/ScalarEvolution.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/ScalarEvolution.h
@@ -435,35 +435,6 @@ public:
}
};
-struct ExitLimitQuery {
- ExitLimitQuery(const Loop *L, BasicBlock *ExitingBlock, bool AllowPredicates)
- : L(L), ExitingBlock(ExitingBlock), AllowPredicates(AllowPredicates) {}
-
- const Loop *L;
- BasicBlock *ExitingBlock;
- bool AllowPredicates;
-};
-
-template <> struct DenseMapInfo<ExitLimitQuery> {
- static inline ExitLimitQuery getEmptyKey() {
- return ExitLimitQuery(nullptr, nullptr, true);
- }
-
- static inline ExitLimitQuery getTombstoneKey() {
- return ExitLimitQuery(nullptr, nullptr, false);
- }
-
- static unsigned getHashValue(ExitLimitQuery Val) {
- return hash_combine(hash_combine(Val.L, Val.ExitingBlock),
- Val.AllowPredicates);
- }
-
- static bool isEqual(ExitLimitQuery LHS, ExitLimitQuery RHS) {
- return LHS.L == RHS.L && LHS.ExitingBlock == RHS.ExitingBlock &&
- LHS.AllowPredicates == RHS.AllowPredicates;
- }
-};
-
/// The main scalar evolution driver. Because client code (intentionally)
/// can't do much with the SCEV objects directly, they must ask this class
/// for services.
@@ -748,13 +719,26 @@ public:
unsigned getSmallConstantTripMultiple(const Loop *L,
BasicBlock *ExitingBlock);
+
+ /// The terms "backedge taken count" and "exit count" are used
+ /// interchangeably to refer to the number of times the backedge of a loop
+ /// has executed before the loop is exited.
+ enum ExitCountKind {
+ /// An expression exactly describing the number of times the backedge has
+ /// executed when a loop is exited.
+ Exact,
+ /// A constant which provides an upper bound on the exact trip count.
+ ConstantMaximum,
+ };
+
/// Return the number of times the backedge executes before the given exit
/// would be taken; if not exactly computable, return SCEVCouldNotCompute.
/// For a single exit loop, this value is equivelent to the result of
/// getBackedgeTakenCount. The loop is guaranteed to exit (via *some* exit)
/// before the backedge is executed (ExitCount + 1) times. Note that there
/// is no guarantee about *which* exit is taken on the exiting iteration.
- const SCEV *getExitCount(const Loop *L, BasicBlock *ExitingBlock);
+ const SCEV *getExitCount(const Loop *L, BasicBlock *ExitingBlock,
+ ExitCountKind Kind = Exact);
/// If the specified loop has a predictable backedge-taken count, return it,
/// otherwise return a SCEVCouldNotCompute object. The backedge-taken count is
@@ -766,7 +750,7 @@ public:
/// Note that it is not valid to call this method on a loop without a
/// loop-invariant backedge-taken count (see
/// hasLoopInvariantBackedgeTakenCount).
- const SCEV *getBackedgeTakenCount(const Loop *L);
+ const SCEV *getBackedgeTakenCount(const Loop *L, ExitCountKind Kind = Exact);
/// Similar to getBackedgeTakenCount, except it will add a set of
/// SCEV predicates to Predicates that are required to be true in order for
@@ -779,7 +763,9 @@ public:
/// to (i.e. a "conservative over-approximation") of the value returend by
/// getBackedgeTakenCount. If such a value cannot be computed, it returns the
/// SCEVCouldNotCompute object.
- const SCEV *getConstantMaxBackedgeTakenCount(const Loop *L);
+ const SCEV *getConstantMaxBackedgeTakenCount(const Loop *L) {
+ return getBackedgeTakenCount(L, ConstantMaximum);
+ }
/// Return true if the backedge taken count is either the value returned by
/// getConstantMaxBackedgeTakenCount or zero.
@@ -1227,6 +1213,9 @@ private:
Predicates.insert(P);
}
+ /// Construct either an exact exit limit from a constant, or an unknown
+ /// one from a SCEVCouldNotCompute. No other types of SCEVs are allowed
+ /// as arguments and asserts enforce that internally.
/*implicit*/ ExitLimit(const SCEV *E);
ExitLimit(
@@ -1258,13 +1247,15 @@ private:
struct ExitNotTakenInfo {
PoisoningVH<BasicBlock> ExitingBlock;
const SCEV *ExactNotTaken;
+ const SCEV *MaxNotTaken;
std::unique_ptr<SCEVUnionPredicate> Predicate;
explicit ExitNotTakenInfo(PoisoningVH<BasicBlock> ExitingBlock,
const SCEV *ExactNotTaken,
+ const SCEV *MaxNotTaken,
std::unique_ptr<SCEVUnionPredicate> Predicate)
- : ExitingBlock(ExitingBlock), ExactNotTaken(ExactNotTaken),
- Predicate(std::move(Predicate)) {}
+ : ExitingBlock(ExitingBlock), ExactNotTaken(ExactNotTaken),
+ MaxNotTaken(ExactNotTaken), Predicate(std::move(Predicate)) {}
bool hasAlwaysTruePredicate() const {
return !Predicate || Predicate->isAlwaysTrue();
@@ -1347,6 +1338,9 @@ private:
/// Get the max backedge taken count for the loop.
const SCEV *getMax(ScalarEvolution *SE) const;
+ /// Get the max backedge taken count for the particular loop exit.
+ const SCEV *getMax(BasicBlock *ExitingBlock, ScalarEvolution *SE) const;
+
/// Return true if the number of times this backedge is taken is either the
/// value returned by getMax or zero.
bool isMaxOrZero(ScalarEvolution *SE) const;
@@ -1928,6 +1922,13 @@ public:
ScalarEvolution run(Function &F, FunctionAnalysisManager &AM);
};
+/// Verifier pass for the \c ScalarEvolutionAnalysis results.
+class ScalarEvolutionVerifierPass
+ : public PassInfoMixin<ScalarEvolutionVerifierPass> {
+public:
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+
/// Printer pass for the \c ScalarEvolutionAnalysis results.
class ScalarEvolutionPrinterPass
: public PassInfoMixin<ScalarEvolutionPrinterPass> {
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/TargetLibraryInfo.h b/contrib/llvm-project/llvm/include/llvm/Analysis/TargetLibraryInfo.h
index d4b223863c54..1bd9db756bb3 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/TargetLibraryInfo.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/TargetLibraryInfo.h
@@ -9,6 +9,7 @@
#ifndef LLVM_ANALYSIS_TARGETLIBRARYINFO_H
#define LLVM_ANALYSIS_TARGETLIBRARYINFO_H
+#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/Triple.h"
@@ -197,6 +198,10 @@ public:
/// Returns the size of the wchar_t type in bytes or 0 if the size is unknown.
/// This queries the 'wchar_size' metadata.
unsigned getWCharSize(const Module &M) const;
+
+ /// Returns the largest vectorization factor used in the list of
+ /// vector functions.
+ unsigned getWidestVF(StringRef ScalarF) const;
};
/// Provides information about what library functions are available for
@@ -208,20 +213,50 @@ class TargetLibraryInfo {
friend class TargetLibraryAnalysis;
friend class TargetLibraryInfoWrapperPass;
+ /// The global (module level) TLI info.
const TargetLibraryInfoImpl *Impl;
+ /// Support for -fno-builtin* options as function attributes, overrides
+ /// information in global TargetLibraryInfoImpl.
+ BitVector OverrideAsUnavailable;
+
public:
- explicit TargetLibraryInfo(const TargetLibraryInfoImpl &Impl) : Impl(&Impl) {}
+ explicit TargetLibraryInfo(const TargetLibraryInfoImpl &Impl,
+ Optional<const Function *> F = None)
+ : Impl(&Impl), OverrideAsUnavailable(NumLibFuncs) {
+ if (!F)
+ return;
+ if ((*F)->hasFnAttribute("no-builtins"))
+ disableAllFunctions();
+ else {
+ // Disable individual libc/libm calls in TargetLibraryInfo.
+ LibFunc LF;
+ AttributeSet FnAttrs = (*F)->getAttributes().getFnAttributes();
+ for (const Attribute &Attr : FnAttrs) {
+ if (!Attr.isStringAttribute())
+ continue;
+ auto AttrStr = Attr.getKindAsString();
+ if (!AttrStr.consume_front("no-builtin-"))
+ continue;
+ if (getLibFunc(AttrStr, LF))
+ setUnavailable(LF);
+ }
+ }
+ }
// Provide value semantics.
- TargetLibraryInfo(const TargetLibraryInfo &TLI) : Impl(TLI.Impl) {}
- TargetLibraryInfo(TargetLibraryInfo &&TLI) : Impl(TLI.Impl) {}
+ TargetLibraryInfo(const TargetLibraryInfo &TLI)
+ : Impl(TLI.Impl), OverrideAsUnavailable(TLI.OverrideAsUnavailable) {}
+ TargetLibraryInfo(TargetLibraryInfo &&TLI)
+ : Impl(TLI.Impl), OverrideAsUnavailable(TLI.OverrideAsUnavailable) {}
TargetLibraryInfo &operator=(const TargetLibraryInfo &TLI) {
Impl = TLI.Impl;
+ OverrideAsUnavailable = TLI.OverrideAsUnavailable;
return *this;
}
TargetLibraryInfo &operator=(TargetLibraryInfo &&TLI) {
Impl = TLI.Impl;
+ OverrideAsUnavailable = TLI.OverrideAsUnavailable;
return *this;
}
@@ -244,9 +279,27 @@ public:
getLibFunc(*(CS.getCalledFunction()), F);
}
+ /// Disables all builtins.
+ ///
+ /// This can be used for options like -fno-builtin.
+ void disableAllFunctions() LLVM_ATTRIBUTE_UNUSED {
+ OverrideAsUnavailable.set();
+ }
+
+ /// Forces a function to be marked as unavailable.
+ void setUnavailable(LibFunc F) LLVM_ATTRIBUTE_UNUSED {
+ OverrideAsUnavailable.set(F);
+ }
+
+ TargetLibraryInfoImpl::AvailabilityState getState(LibFunc F) const {
+ if (OverrideAsUnavailable[F])
+ return TargetLibraryInfoImpl::Unavailable;
+ return Impl->getState(F);
+ }
+
/// Tests whether a library function is available.
bool has(LibFunc F) const {
- return Impl->getState(F) != TargetLibraryInfoImpl::Unavailable;
+ return getState(F) != TargetLibraryInfoImpl::Unavailable;
}
bool isFunctionVectorizable(StringRef F, unsigned VF) const {
return Impl->isFunctionVectorizable(F, VF);
@@ -261,7 +314,7 @@ public:
/// Tests if the function is both available and a candidate for optimized code
/// generation.
bool hasOptimizedCodeGen(LibFunc F) const {
- if (Impl->getState(F) == TargetLibraryInfoImpl::Unavailable)
+ if (getState(F) == TargetLibraryInfoImpl::Unavailable)
return false;
switch (F) {
default: break;
@@ -291,7 +344,7 @@ public:
}
StringRef getName(LibFunc F) const {
- auto State = Impl->getState(F);
+ auto State = getState(F);
if (State == TargetLibraryInfoImpl::Unavailable)
return StringRef();
if (State == TargetLibraryInfoImpl::StandardName)
@@ -337,6 +390,12 @@ public:
FunctionAnalysisManager::Invalidator &) {
return false;
}
+
+ /// Returns the largest vectorization factor used in the list of
+ /// vector functions.
+ unsigned getWidestVF(StringRef ScalarF) const {
+ return Impl->getWidestVF(ScalarF);
+ }
};
/// Analysis pass providing the \c TargetLibraryInfo.
@@ -353,29 +412,24 @@ public:
/// module.
TargetLibraryAnalysis() {}
- /// Construct a library analysis with preset info.
+ /// Construct a library analysis with baseline Module-level info.
///
- /// This will directly copy the preset info into the result without
- /// consulting the module's triple.
- TargetLibraryAnalysis(TargetLibraryInfoImpl PresetInfoImpl)
- : PresetInfoImpl(std::move(PresetInfoImpl)) {}
+ /// This will be supplemented with Function-specific info in the Result.
+ TargetLibraryAnalysis(TargetLibraryInfoImpl BaselineInfoImpl)
+ : BaselineInfoImpl(std::move(BaselineInfoImpl)) {}
- TargetLibraryInfo run(Function &F, FunctionAnalysisManager &);
+ TargetLibraryInfo run(const Function &F, FunctionAnalysisManager &);
private:
friend AnalysisInfoMixin<TargetLibraryAnalysis>;
static AnalysisKey Key;
- Optional<TargetLibraryInfoImpl> PresetInfoImpl;
-
- StringMap<std::unique_ptr<TargetLibraryInfoImpl>> Impls;
-
- TargetLibraryInfoImpl &lookupInfoImpl(const Triple &T);
+ Optional<TargetLibraryInfoImpl> BaselineInfoImpl;
};
class TargetLibraryInfoWrapperPass : public ImmutablePass {
- TargetLibraryInfoImpl TLIImpl;
- TargetLibraryInfo TLI;
+ TargetLibraryAnalysis TLA;
+ Optional<TargetLibraryInfo> TLI;
virtual void anchor();
@@ -385,12 +439,10 @@ public:
explicit TargetLibraryInfoWrapperPass(const Triple &T);
explicit TargetLibraryInfoWrapperPass(const TargetLibraryInfoImpl &TLI);
- TargetLibraryInfo &getTLI(const Function &F LLVM_ATTRIBUTE_UNUSED) {
- return TLI;
- }
- const TargetLibraryInfo &
- getTLI(const Function &F LLVM_ATTRIBUTE_UNUSED) const {
- return TLI;
+ TargetLibraryInfo &getTLI(const Function &F) {
+ FunctionAnalysisManager DummyFAM;
+ TLI = TLA.run(F, DummyFAM);
+ return *TLI;
}
};
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/TargetTransformInfo.h b/contrib/llvm-project/llvm/include/llvm/Analysis/TargetTransformInfo.h
index d6fa88411654..5382d76813a7 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -36,16 +36,19 @@
namespace llvm {
namespace Intrinsic {
-enum ID : unsigned;
+typedef unsigned ID;
}
class AssumptionCache;
+class BlockFrequencyInfo;
class BranchInst;
class Function;
class GlobalValue;
class IntrinsicInst;
class LoadInst;
+class LoopAccessInfo;
class Loop;
+class ProfileSummaryInfo;
class SCEV;
class ScalarEvolution;
class StoreInst;
@@ -297,7 +300,9 @@ public:
/// \p JTSize Set a jump table size only when \p SI is suitable for a jump
/// table.
unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
- unsigned &JTSize) const;
+ unsigned &JTSize,
+ ProfileSummaryInfo *PSI,
+ BlockFrequencyInfo *BFI) const;
/// Estimate the cost of a given IR user when lowered.
///
@@ -514,6 +519,13 @@ public:
TargetLibraryInfo *LibInfo,
HardwareLoopInfo &HWLoopInfo) const;
+ /// Query the target whether it would be prefered to create a predicated vector
+ /// loop, which can avoid the need to emit a scalar epilogue loop.
+ bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
+ AssumptionCache &AC, TargetLibraryInfo *TLI,
+ DominatorTree *DT,
+ const LoopAccessInfo *LAI) const;
+
/// @}
/// \name Scalar Target Information
@@ -585,9 +597,9 @@ public:
bool isLegalNTLoad(Type *DataType, Align Alignment) const;
/// Return true if the target supports masked scatter.
- bool isLegalMaskedScatter(Type *DataType) const;
+ bool isLegalMaskedScatter(Type *DataType, MaybeAlign Alignment) const;
/// Return true if the target supports masked gather.
- bool isLegalMaskedGather(Type *DataType) const;
+ bool isLegalMaskedGather(Type *DataType, MaybeAlign Alignment) const;
/// Return true if the target supports masked compress store.
bool isLegalMaskedCompressStore(Type *DataType) const;
@@ -742,10 +754,10 @@ public:
/// Return the expected cost of materialization for the given integer
/// immediate of the specified type for a given instruction. The cost can be
/// zero if the immediate can be folded into the specified instruction.
- int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
- Type *Ty) const;
- int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
- Type *Ty) const;
+ int getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm,
+ Type *Ty) const;
+ int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
+ Type *Ty) const;
/// Return the expected cost for the given integer when optimising
/// for size. This is different than the other integer immediate cost
@@ -889,12 +901,15 @@ public:
/// \p Args is an optional argument which holds the instruction operands
/// values so the TTI can analyze those values searching for special
/// cases or optimizations based on those values.
+ /// \p CxtI is the optional original context instruction, if one exists, to
+ /// provide even more information.
int getArithmeticInstrCost(
unsigned Opcode, Type *Ty, OperandValueKind Opd1Info = OK_AnyValue,
OperandValueKind Opd2Info = OK_AnyValue,
OperandValueProperties Opd1PropInfo = OP_None,
OperandValueProperties Opd2PropInfo = OP_None,
- ArrayRef<const Value *> Args = ArrayRef<const Value *>()) const;
+ ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
+ const Instruction *CxtI = nullptr) const;
/// \return The cost of a shuffle instruction of kind Kind and of type Tp.
/// The index and subtype parameters are used by the subvector insertion and
@@ -930,8 +945,9 @@ public:
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index = -1) const;
/// \return The cost of Load and Store instructions.
- int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
- unsigned AddressSpace, const Instruction *I = nullptr) const;
+ int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
+ unsigned AddressSpace,
+ const Instruction *I = nullptr) const;
/// \return The cost of masked Load and Store instructions.
int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
@@ -1176,7 +1192,9 @@ public:
const User *U) = 0;
virtual int getMemcpyCost(const Instruction *I) = 0;
virtual unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
- unsigned &JTSize) = 0;
+ unsigned &JTSize,
+ ProfileSummaryInfo *PSI,
+ BlockFrequencyInfo *BFI) = 0;
virtual int
getUserCost(const User *U, ArrayRef<const Value *> Operands) = 0;
virtual bool hasBranchDivergence() = 0;
@@ -1194,6 +1212,12 @@ public:
AssumptionCache &AC,
TargetLibraryInfo *LibInfo,
HardwareLoopInfo &HWLoopInfo) = 0;
+ virtual bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI,
+ ScalarEvolution &SE,
+ AssumptionCache &AC,
+ TargetLibraryInfo *TLI,
+ DominatorTree *DT,
+ const LoopAccessInfo *LAI) = 0;
virtual bool isLegalAddImmediate(int64_t Imm) = 0;
virtual bool isLegalICmpImmediate(int64_t Imm) = 0;
virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
@@ -1213,8 +1237,8 @@ public:
virtual bool isLegalMaskedLoad(Type *DataType, MaybeAlign Alignment) = 0;
virtual bool isLegalNTStore(Type *DataType, Align Alignment) = 0;
virtual bool isLegalNTLoad(Type *DataType, Align Alignment) = 0;
- virtual bool isLegalMaskedScatter(Type *DataType) = 0;
- virtual bool isLegalMaskedGather(Type *DataType) = 0;
+ virtual bool isLegalMaskedScatter(Type *DataType, MaybeAlign Alignment) = 0;
+ virtual bool isLegalMaskedGather(Type *DataType, MaybeAlign Alignment) = 0;
virtual bool isLegalMaskedCompressStore(Type *DataType) = 0;
virtual bool isLegalMaskedExpandLoad(Type *DataType) = 0;
virtual bool hasDivRemOp(Type *DataType, bool IsSigned) = 0;
@@ -1254,10 +1278,10 @@ public:
virtual int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
Type *Ty) = 0;
virtual int getIntImmCost(const APInt &Imm, Type *Ty) = 0;
- virtual int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
- Type *Ty) = 0;
- virtual int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
- Type *Ty) = 0;
+ virtual int getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm,
+ Type *Ty) = 0;
+ virtual int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
+ const APInt &Imm, Type *Ty) = 0;
virtual unsigned getNumberOfRegisters(unsigned ClassID) const = 0;
virtual unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const = 0;
virtual const char* getRegisterClassName(unsigned ClassID) const = 0;
@@ -1288,12 +1312,11 @@ public:
virtual unsigned getMaxPrefetchIterationsAhead() const = 0;
virtual unsigned getMaxInterleaveFactor(unsigned VF) = 0;
- virtual unsigned
- getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
- OperandValueKind Opd2Info,
- OperandValueProperties Opd1PropInfo,
- OperandValueProperties Opd2PropInfo,
- ArrayRef<const Value *> Args) = 0;
+ virtual unsigned getArithmeticInstrCost(
+ unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
+ OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo,
+ OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args,
+ const Instruction *CxtI = nullptr) = 0;
virtual int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
Type *SubTp) = 0;
virtual int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
@@ -1305,7 +1328,7 @@ public:
Type *CondTy, const Instruction *I) = 0;
virtual int getVectorInstrCost(unsigned Opcode, Type *Val,
unsigned Index) = 0;
- virtual int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+ virtual int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
unsigned AddressSpace, const Instruction *I) = 0;
virtual int getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
unsigned Alignment,
@@ -1464,6 +1487,12 @@ public:
HardwareLoopInfo &HWLoopInfo) override {
return Impl.isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
}
+ bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
+ AssumptionCache &AC, TargetLibraryInfo *TLI,
+ DominatorTree *DT,
+ const LoopAccessInfo *LAI) override {
+ return Impl.preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LAI);
+ }
bool isLegalAddImmediate(int64_t Imm) override {
return Impl.isLegalAddImmediate(Imm);
}
@@ -1508,11 +1537,11 @@ public:
bool isLegalNTLoad(Type *DataType, Align Alignment) override {
return Impl.isLegalNTLoad(DataType, Alignment);
}
- bool isLegalMaskedScatter(Type *DataType) override {
- return Impl.isLegalMaskedScatter(DataType);
+ bool isLegalMaskedScatter(Type *DataType, MaybeAlign Alignment) override {
+ return Impl.isLegalMaskedScatter(DataType, Alignment);
}
- bool isLegalMaskedGather(Type *DataType) override {
- return Impl.isLegalMaskedGather(DataType);
+ bool isLegalMaskedGather(Type *DataType, MaybeAlign Alignment) override {
+ return Impl.isLegalMaskedGather(DataType, Alignment);
}
bool isLegalMaskedCompressStore(Type *DataType) override {
return Impl.isLegalMaskedCompressStore(DataType);
@@ -1609,13 +1638,13 @@ public:
int getIntImmCost(const APInt &Imm, Type *Ty) override {
return Impl.getIntImmCost(Imm, Ty);
}
- int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
- Type *Ty) override {
- return Impl.getIntImmCost(Opc, Idx, Imm, Ty);
+ int getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm,
+ Type *Ty) override {
+ return Impl.getIntImmCostInst(Opc, Idx, Imm, Ty);
}
- int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
- Type *Ty) override {
- return Impl.getIntImmCost(IID, Idx, Imm, Ty);
+ int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
+ Type *Ty) override {
+ return Impl.getIntImmCostIntrin(IID, Idx, Imm, Ty);
}
unsigned getNumberOfRegisters(unsigned ClassID) const override {
return Impl.getNumberOfRegisters(ClassID);
@@ -1677,17 +1706,20 @@ public:
return Impl.getMaxInterleaveFactor(VF);
}
unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
- unsigned &JTSize) override {
- return Impl.getEstimatedNumberOfCaseClusters(SI, JTSize);
- }
- unsigned
- getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
- OperandValueKind Opd2Info,
- OperandValueProperties Opd1PropInfo,
- OperandValueProperties Opd2PropInfo,
- ArrayRef<const Value *> Args) override {
+ unsigned &JTSize,
+ ProfileSummaryInfo *PSI,
+ BlockFrequencyInfo *BFI) override {
+ return Impl.getEstimatedNumberOfCaseClusters(SI, JTSize, PSI, BFI);
+ }
+ unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
+ OperandValueKind Opd1Info,
+ OperandValueKind Opd2Info,
+ OperandValueProperties Opd1PropInfo,
+ OperandValueProperties Opd2PropInfo,
+ ArrayRef<const Value *> Args,
+ const Instruction *CxtI = nullptr) override {
return Impl.getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
- Opd1PropInfo, Opd2PropInfo, Args);
+ Opd1PropInfo, Opd2PropInfo, Args, CxtI);
}
int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
Type *SubTp) override {
@@ -1711,7 +1743,7 @@ public:
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) override {
return Impl.getVectorInstrCost(Opcode, Val, Index);
}
- int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+ int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
unsigned AddressSpace, const Instruction *I) override {
return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I);
}
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/contrib/llvm-project/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index a431fa0d458b..ac0609e29270 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -114,7 +114,11 @@ public:
}
unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
- unsigned &JTSize) {
+ unsigned &JTSize,
+ ProfileSummaryInfo *PSI,
+ BlockFrequencyInfo *BFI) {
+ (void)PSI;
+ (void)BFI;
JTSize = 0;
return SI.getNumCases();
}
@@ -209,6 +213,13 @@ public:
return false;
}
+ bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
+ AssumptionCache &AC, TargetLibraryInfo *TLI,
+ DominatorTree *DT,
+ const LoopAccessInfo *LAI) const {
+ return false;
+ }
+
void getUnrollingPreferences(Loop *, ScalarEvolution &,
TTI::UnrollingPreferences &) {}
@@ -261,9 +272,13 @@ public:
return Alignment >= DataSize && isPowerOf2_32(DataSize);
}
- bool isLegalMaskedScatter(Type *DataType) { return false; }
+ bool isLegalMaskedScatter(Type *DataType, MaybeAlign Alignment) {
+ return false;
+ }
- bool isLegalMaskedGather(Type *DataType) { return false; }
+ bool isLegalMaskedGather(Type *DataType, MaybeAlign Alignment) {
+ return false;
+ }
bool isLegalMaskedCompressStore(Type *DataType) { return false; }
@@ -344,13 +359,13 @@ public:
unsigned getIntImmCost(const APInt &Imm, Type *Ty) { return TTI::TCC_Basic; }
- unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
- Type *Ty) {
+ unsigned getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm,
+ Type *Ty) {
return TTI::TCC_Free;
}
- unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
- Type *Ty) {
+ unsigned getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
+ const APInt &Imm, Type *Ty) {
return TTI::TCC_Free;
}
@@ -419,7 +434,8 @@ public:
TTI::OperandValueKind Opd2Info,
TTI::OperandValueProperties Opd1PropInfo,
TTI::OperandValueProperties Opd2PropInfo,
- ArrayRef<const Value *> Args) {
+ ArrayRef<const Value *> Args,
+ const Instruction *CxtI = nullptr) {
return 1;
}
@@ -447,7 +463,7 @@ public:
return 1;
}
- unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+ unsigned getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
unsigned AddressSpace, const Instruction *I) {
return 1;
}
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/Utils/Local.h b/contrib/llvm-project/llvm/include/llvm/Analysis/Utils/Local.h
index a63bcec9bc41..ca505960cbeb 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/Utils/Local.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/Utils/Local.h
@@ -14,6 +14,7 @@
#ifndef LLVM_ANALYSIS_UTILS_LOCAL_H
#define LLVM_ANALYSIS_UTILS_LOCAL_H
+#include "llvm/ADT/Twine.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
@@ -28,15 +29,15 @@ template <typename IRBuilderTy>
Value *EmitGEPOffset(IRBuilderTy *Builder, const DataLayout &DL, User *GEP,
bool NoAssumptions = false) {
GEPOperator *GEPOp = cast<GEPOperator>(GEP);
- Type *IntPtrTy = DL.getIntPtrType(GEP->getType());
- Value *Result = Constant::getNullValue(IntPtrTy);
+ Type *IntIdxTy = DL.getIndexType(GEP->getType());
+ Value *Result = Constant::getNullValue(IntIdxTy);
// If the GEP is inbounds, we know that none of the addressing operations will
// overflow in a signed sense.
bool isInBounds = GEPOp->isInBounds() && !NoAssumptions;
// Build a mask for high order bits.
- unsigned IntPtrWidth = IntPtrTy->getScalarType()->getIntegerBitWidth();
+ unsigned IntPtrWidth = IntIdxTy->getScalarType()->getIntegerBitWidth();
uint64_t PtrSizeMask =
std::numeric_limits<uint64_t>::max() >> (64 - IntPtrWidth);
@@ -55,17 +56,17 @@ Value *EmitGEPOffset(IRBuilderTy *Builder, const DataLayout &DL, User *GEP,
Size = DL.getStructLayout(STy)->getElementOffset(OpValue);
if (Size)
- Result = Builder->CreateAdd(Result, ConstantInt::get(IntPtrTy, Size),
+ Result = Builder->CreateAdd(Result, ConstantInt::get(IntIdxTy, Size),
GEP->getName()+".offs");
continue;
}
// Splat the constant if needed.
- if (IntPtrTy->isVectorTy() && !OpC->getType()->isVectorTy())
- OpC = ConstantVector::getSplat(IntPtrTy->getVectorNumElements(), OpC);
+ if (IntIdxTy->isVectorTy() && !OpC->getType()->isVectorTy())
+ OpC = ConstantVector::getSplat(IntIdxTy->getVectorNumElements(), OpC);
- Constant *Scale = ConstantInt::get(IntPtrTy, Size);
- Constant *OC = ConstantExpr::getIntegerCast(OpC, IntPtrTy, true /*SExt*/);
+ Constant *Scale = ConstantInt::get(IntIdxTy, Size);
+ Constant *OC = ConstantExpr::getIntegerCast(OpC, IntIdxTy, true /*SExt*/);
Scale =
ConstantExpr::getMul(OC, Scale, false /*NUW*/, isInBounds /*NSW*/);
// Emit an add instruction.
@@ -74,15 +75,15 @@ Value *EmitGEPOffset(IRBuilderTy *Builder, const DataLayout &DL, User *GEP,
}
// Splat the index if needed.
- if (IntPtrTy->isVectorTy() && !Op->getType()->isVectorTy())
- Op = Builder->CreateVectorSplat(IntPtrTy->getVectorNumElements(), Op);
+ if (IntIdxTy->isVectorTy() && !Op->getType()->isVectorTy())
+ Op = Builder->CreateVectorSplat(IntIdxTy->getVectorNumElements(), Op);
// Convert to correct type.
- if (Op->getType() != IntPtrTy)
- Op = Builder->CreateIntCast(Op, IntPtrTy, true, Op->getName()+".c");
+ if (Op->getType() != IntIdxTy)
+ Op = Builder->CreateIntCast(Op, IntIdxTy, true, Op->getName()+".c");
if (Size != 1) {
// We'll let instcombine(mul) convert this to a shl if possible.
- Op = Builder->CreateMul(Op, ConstantInt::get(IntPtrTy, Size),
+ Op = Builder->CreateMul(Op, ConstantInt::get(IntIdxTy, Size),
GEP->getName() + ".idx", false /*NUW*/,
isInBounds /*NSW*/);
}
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/ValueTracking.h b/contrib/llvm-project/llvm/include/llvm/Analysis/ValueTracking.h
index 33b064fcf9d2..89cf9abdc8ba 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/ValueTracking.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/ValueTracking.h
@@ -203,6 +203,12 @@ class Value;
/// x < -0 --> false
bool CannotBeOrderedLessThanZero(const Value *V, const TargetLibraryInfo *TLI);
+ /// Return true if the floating-point scalar value is not an infinity or if
+ /// the floating-point vector value has no infinities. Return false if a value
+ /// could ever be infinity.
+ bool isKnownNeverInfinity(const Value *V, const TargetLibraryInfo *TLI,
+ unsigned Depth = 0);
+
/// Return true if the floating-point scalar value is not a NaN or if the
/// floating-point vector value has no NaN elements. Return false if a value
/// could ever be NaN.
@@ -226,9 +232,9 @@ class Value;
/// return undef.
Value *isBytewiseValue(Value *V, const DataLayout &DL);
- /// Given an aggregrate and an sequence of indices, see if the scalar value
+ /// Given an aggregate and an sequence of indices, see if the scalar value
/// indexed is already around as a register, for example if it were inserted
- /// directly into the aggregrate.
+ /// directly into the aggregate.
///
/// If InsertBefore is not null, this function will duplicate (modified)
/// insertvalues when a part of a nested struct is extracted.
@@ -561,6 +567,10 @@ class Value;
/// the parent of I.
bool programUndefinedIfFullPoison(const Instruction *PoisonI);
+ /// Return true if this function can prove that V is never undef value
+ /// or poison value.
+ bool isGuaranteedNotToBeUndefOrPoison(const Value *V);
+
/// Specific patterns of select instructions we can match.
enum SelectPatternFlavor {
SPF_UNKNOWN = 0,
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/VecFuncs.def b/contrib/llvm-project/llvm/include/llvm/Analysis/VecFuncs.def
index 4c9206266d9a..86bec0be7546 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/VecFuncs.def
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/VecFuncs.def
@@ -8,7 +8,14 @@
// This .def file will create mappings from scalar math functions to vector
// functions along with their vectorization factor. The current support includes
-// such mappings for Accelerate framework, MASS vector library, and SVML library.
+// such mappings for Accelerate framework, MASS vector library, and SVML library.
+// This .def file also allows creating an array of vector functions supported in
+// the specified framework or library.
+
+#if defined(TLI_DEFINE_MASSV_VECFUNCS_NAMES)
+#define TLI_DEFINE_MASSV_VECFUNCS
+#define TLI_DEFINE_VECFUNC(SCAL, VEC, VF) VEC,
+#endif
#if !(defined(TLI_DEFINE_VECFUNC))
#define TLI_DEFINE_VECFUNC(SCAL, VEC, VF) {SCAL, VEC, VF},
@@ -247,4 +254,4 @@ TLI_DEFINE_VECFUNC("llvm.log.f32", "__svml_logf16", 16)
#undef TLI_DEFINE_ACCELERATE_VECFUNCS
#undef TLI_DEFINE_MASSV_VECFUNCS
#undef TLI_DEFINE_SVML_VECFUNCS
-
+#undef TLI_DEFINE_MASSV_VECFUNCS_NAMES
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/VectorUtils.h b/contrib/llvm-project/llvm/include/llvm/Analysis/VectorUtils.h
index 4a61c2bc35c7..0ca69bebc127 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/VectorUtils.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/VectorUtils.h
@@ -14,6 +14,7 @@
#define LLVM_ANALYSIS_VECTORUTILS_H
#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/Analysis/LoopAccessAnalysis.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/Support/CheckedArithmetic.h"
@@ -47,7 +48,9 @@ enum class VFISAKind {
AVX, // x86 AVX
AVX2, // x86 AVX2
AVX512, // x86 AVX512
- Unknown // Unknown ISA
+ LLVM, // LLVM internal ISA for functions that are not
+ // attached to an existing ABI via name mangling.
+ Unknown // Unknown ISA
};
/// Encapsulates information needed to describe a parameter.
@@ -79,13 +82,36 @@ struct VFParameter {
struct VFShape {
unsigned VF; // Vectorization factor.
bool IsScalable; // True if the function is a scalable function.
- VFISAKind ISA; // Instruction Set Architecture.
SmallVector<VFParameter, 8> Parameters; // List of parameter informations.
// Comparison operator.
bool operator==(const VFShape &Other) const {
- return std::tie(VF, IsScalable, ISA, Parameters) ==
- std::tie(Other.VF, Other.IsScalable, Other.ISA, Other.Parameters);
+ return std::tie(VF, IsScalable, Parameters) ==
+ std::tie(Other.VF, Other.IsScalable, Other.Parameters);
}
+
+ /// Update the parameter in position P.ParamPos to P.
+ void updateParam(VFParameter P) {
+ assert(P.ParamPos < Parameters.size() && "Invalid parameter position.");
+ Parameters[P.ParamPos] = P;
+ assert(hasValidParameterList() && "Invalid parameter list");
+ }
+
+ // Retrieve the basic vectorization shape of the function, where all
+ // parameters are mapped to VFParamKind::Vector with \p EC
+ // lanes. Specifies whether the function has a Global Predicate
+ // argument via \p HasGlobalPred.
+ static VFShape get(const CallInst &CI, ElementCount EC, bool HasGlobalPred) {
+ SmallVector<VFParameter, 8> Parameters;
+ for (unsigned I = 0; I < CI.arg_size(); ++I)
+ Parameters.push_back(VFParameter({I, VFParamKind::Vector}));
+ if (HasGlobalPred)
+ Parameters.push_back(
+ VFParameter({CI.arg_size(), VFParamKind::GlobalPredicate}));
+
+ return {EC.Min, EC.Scalable, Parameters};
+ }
+ /// Sanity check on the Parameters in the VFShape.
+ bool hasValidParameterList() const;
};
/// Holds the VFShape for a specific scalar to vector function mapping.
@@ -93,15 +119,19 @@ struct VFInfo {
VFShape Shape; // Classification of the vector function.
StringRef ScalarName; // Scalar Function Name.
StringRef VectorName; // Vector Function Name associated to this VFInfo.
+ VFISAKind ISA; // Instruction Set Architecture.
// Comparison operator.
bool operator==(const VFInfo &Other) const {
- return std::tie(Shape, ScalarName, VectorName) ==
- std::tie(Shape, Other.ScalarName, Other.VectorName);
+ return std::tie(Shape, ScalarName, VectorName, ISA) ==
+ std::tie(Shape, Other.ScalarName, Other.VectorName, Other.ISA);
}
};
namespace VFABI {
+/// LLVM Internal VFABI ISA token for vector functions.
+static constexpr char const *_LLVM_ = "_LLVM_";
+
/// Function to contruct a VFInfo out of a mangled names in the
/// following format:
///
@@ -121,14 +151,20 @@ namespace VFABI {
/// * x86 (libmvec): https://sourceware.org/glibc/wiki/libmvec and
/// https://sourceware.org/glibc/wiki/libmvec?action=AttachFile&do=view&target=VectorABI.txt
///
-///
-///
/// \param MangledName -> input string in the format
/// _ZGV<isa><mask><vlen><parameters>_<scalarname>[(<redirection>)].
Optional<VFInfo> tryDemangleForVFABI(StringRef MangledName);
/// Retrieve the `VFParamKind` from a string token.
VFParamKind getVFParamKindFromString(const StringRef Token);
+
+// Name of the attribute where the variant mappings are stored.
+static constexpr char const *MappingsAttrName = "vector-function-abi-variant";
+
+/// Populates a set of strings representing the Vector Function ABI variants
+/// associated to the CallInst CI.
+void getVectorVariantNames(const CallInst &CI,
+ SmallVectorImpl<std::string> &VariantMappings);
} // end namespace VFABI
template <typename T> class ArrayRef;
@@ -137,13 +173,12 @@ class GetElementPtrInst;
template <typename InstTy> class InterleaveGroup;
class Loop;
class ScalarEvolution;
-class TargetLibraryInfo;
class TargetTransformInfo;
class Type;
class Value;
namespace Intrinsic {
-enum ID : unsigned;
+typedef unsigned ID;
}
/// Identify if the intrinsic is trivially vectorizable.
@@ -542,13 +577,10 @@ public:
/// formation for predicated accesses, we may be able to relax this limitation
/// in the future once we handle more complicated blocks.
void reset() {
- SmallPtrSet<InterleaveGroup<Instruction> *, 4> DelSet;
- // Avoid releasing a pointer twice.
- for (auto &I : InterleaveGroupMap)
- DelSet.insert(I.second);
- for (auto *Ptr : DelSet)
- delete Ptr;
InterleaveGroupMap.clear();
+ for (auto *Ptr : InterleaveGroups)
+ delete Ptr;
+ InterleaveGroups.clear();
RequiresScalarEpilogue = false;
}
diff --git a/contrib/llvm-project/llvm/include/llvm/BinaryFormat/COFF.h b/contrib/llvm-project/llvm/include/llvm/BinaryFormat/COFF.h
index 0fe38a437725..626e0a431e93 100644
--- a/contrib/llvm-project/llvm/include/llvm/BinaryFormat/COFF.h
+++ b/contrib/llvm-project/llvm/include/llvm/BinaryFormat/COFF.h
@@ -547,7 +547,7 @@ struct PE32Header {
uint32_t AddressOfEntryPoint; // RVA
uint32_t BaseOfCode; // RVA
uint32_t BaseOfData; // RVA
- uint32_t ImageBase;
+ uint64_t ImageBase;
uint32_t SectionAlignment;
uint32_t FileAlignment;
uint16_t MajorOperatingSystemVersion;
@@ -563,10 +563,10 @@ struct PE32Header {
uint16_t Subsystem;
// FIXME: This should be DllCharacteristics to match the COFF spec.
uint16_t DLLCharacteristics;
- uint32_t SizeOfStackReserve;
- uint32_t SizeOfStackCommit;
- uint32_t SizeOfHeapReserve;
- uint32_t SizeOfHeapCommit;
+ uint64_t SizeOfStackReserve;
+ uint64_t SizeOfStackCommit;
+ uint64_t SizeOfHeapReserve;
+ uint64_t SizeOfHeapCommit;
uint32_t LoaderFlags;
// FIXME: This should be NumberOfRvaAndSizes to match the COFF spec.
uint32_t NumberOfRvaAndSize;
diff --git a/contrib/llvm-project/llvm/include/llvm/BinaryFormat/Dwarf.def b/contrib/llvm-project/llvm/include/llvm/BinaryFormat/Dwarf.def
index 34a7410f7474..3faf3be65032 100644
--- a/contrib/llvm-project/llvm/include/llvm/BinaryFormat/Dwarf.def
+++ b/contrib/llvm-project/llvm/include/llvm/BinaryFormat/Dwarf.def
@@ -405,7 +405,7 @@ HANDLE_DW_AT(0x3b31, BORLAND_closure, 0, BORLAND)
// LLVM project extensions.
HANDLE_DW_AT(0x3e00, LLVM_include_path, 0, LLVM)
HANDLE_DW_AT(0x3e01, LLVM_config_macros, 0, LLVM)
-HANDLE_DW_AT(0x3e02, LLVM_isysroot, 0, LLVM)
+HANDLE_DW_AT(0x3e02, LLVM_sysroot, 0, LLVM)
HANDLE_DW_AT(0x3e03, LLVM_tag_offset, 0, LLVM)
// Apple extensions.
HANDLE_DW_AT(0x3fe1, APPLE_optimized, 0, APPLE)
@@ -421,6 +421,7 @@ HANDLE_DW_AT(0x3fea, APPLE_property_setter, 0, APPLE)
HANDLE_DW_AT(0x3feb, APPLE_property_attribute, 0, APPLE)
HANDLE_DW_AT(0x3fec, APPLE_objc_complete_type, 0, APPLE)
HANDLE_DW_AT(0x3fed, APPLE_property, 0, APPLE)
+HANDLE_DW_AT(0x3fee, APPLE_objc_direct, 0, APPLE)
// Attribute form encodings.
HANDLE_DW_FORM(0x01, addr, 2, DWARF)
@@ -647,6 +648,8 @@ HANDLE_DW_OP(0xa9, reinterpret, 5, DWARF)
// Vendor extensions:
// Extensions for GNU-style thread-local storage.
HANDLE_DW_OP(0xe0, GNU_push_tls_address, 0, GNU)
+// Extensions for WebAssembly.
+HANDLE_DW_OP(0xed, WASM_location, 0, WASM)
// The GNU entry value extension.
HANDLE_DW_OP(0xf3, GNU_entry_value, 0, GNU)
// Extensions for Fission proposal.
diff --git a/contrib/llvm-project/llvm/include/llvm/BinaryFormat/Dwarf.h b/contrib/llvm-project/llvm/include/llvm/BinaryFormat/Dwarf.h
index 1c6aee48661c..2ad201831d2b 100644
--- a/contrib/llvm-project/llvm/include/llvm/BinaryFormat/Dwarf.h
+++ b/contrib/llvm-project/llvm/include/llvm/BinaryFormat/Dwarf.h
@@ -63,7 +63,8 @@ enum LLVMConstants : uint32_t {
DWARF_VENDOR_GNU = 3,
DWARF_VENDOR_GOOGLE = 4,
DWARF_VENDOR_LLVM = 5,
- DWARF_VENDOR_MIPS = 6
+ DWARF_VENDOR_MIPS = 6,
+ DWARF_VENDOR_WASM = 7
};
/// Constants that define the DWARF format as 32 or 64 bit.
@@ -458,6 +459,7 @@ StringRef AttributeEncodingString(unsigned Encoding);
StringRef DecimalSignString(unsigned Sign);
StringRef EndianityString(unsigned Endian);
StringRef AccessibilityString(unsigned Access);
+StringRef DefaultedMemberString(unsigned DefaultedEncodings);
StringRef VisibilityString(unsigned Visibility);
StringRef VirtualityString(unsigned Virtuality);
StringRef LanguageString(unsigned Language);
diff --git a/contrib/llvm-project/llvm/include/llvm/BinaryFormat/ELF.h b/contrib/llvm-project/llvm/include/llvm/BinaryFormat/ELF.h
index 46edfb6260be..caab91da9c83 100644
--- a/contrib/llvm-project/llvm/include/llvm/BinaryFormat/ELF.h
+++ b/contrib/llvm-project/llvm/include/llvm/BinaryFormat/ELF.h
@@ -1199,8 +1199,9 @@ enum {
PT_SUNW_EH_FRAME = 0x6474e550,
PT_SUNW_UNWIND = 0x6464e550,
- PT_GNU_STACK = 0x6474e551, // Indicates stack executability.
- PT_GNU_RELRO = 0x6474e552, // Read-only after relocation.
+ PT_GNU_STACK = 0x6474e551, // Indicates stack executability.
+ PT_GNU_RELRO = 0x6474e552, // Read-only after relocation.
+ PT_GNU_PROPERTY = 0x6474e553, // .note.gnu.property notes sections.
PT_OPENBSD_RANDOMIZE = 0x65a3dbe6, // Fill with random data.
PT_OPENBSD_WXNEEDED = 0x65a3dbe7, // Program does W^X violations.
diff --git a/contrib/llvm-project/llvm/include/llvm/BinaryFormat/MinidumpConstants.def b/contrib/llvm-project/llvm/include/llvm/BinaryFormat/MinidumpConstants.def
index aeef399af7a4..543305feea77 100644
--- a/contrib/llvm-project/llvm/include/llvm/BinaryFormat/MinidumpConstants.def
+++ b/contrib/llvm-project/llvm/include/llvm/BinaryFormat/MinidumpConstants.def
@@ -85,21 +85,22 @@ HANDLE_MDMP_STREAM_TYPE(0xFACECCCC, FacebookAppStateLog)
HANDLE_MDMP_STREAM_TYPE(0xFACEDEAD, FacebookAbortReason)
HANDLE_MDMP_STREAM_TYPE(0xFACEE000, FacebookThreadName)
-HANDLE_MDMP_ARCH(0x0000, X86) // PROCESSOR_ARCHITECTURE_INTEL
-HANDLE_MDMP_ARCH(0x0001, MIPS) // PROCESSOR_ARCHITECTURE_MIPS
-HANDLE_MDMP_ARCH(0x0002, Alpha) // PROCESSOR_ARCHITECTURE_ALPHA
-HANDLE_MDMP_ARCH(0x0003, PPC) // PROCESSOR_ARCHITECTURE_PPC
-HANDLE_MDMP_ARCH(0x0004, SHX) // PROCESSOR_ARCHITECTURE_SHX (Super-H)
-HANDLE_MDMP_ARCH(0x0005, ARM) // PROCESSOR_ARCHITECTURE_ARM
-HANDLE_MDMP_ARCH(0x0006, IA64) // PROCESSOR_ARCHITECTURE_IA64
-HANDLE_MDMP_ARCH(0x0007, Alpha64) // PROCESSOR_ARCHITECTURE_ALPHA64
-HANDLE_MDMP_ARCH(0x0008, MSIL) // PROCESSOR_ARCHITECTURE_MSIL
-HANDLE_MDMP_ARCH(0x0009, AMD64) // PROCESSOR_ARCHITECTURE_AMD64
-HANDLE_MDMP_ARCH(0x000a, X86Win64) // PROCESSOR_ARCHITECTURE_IA32_ON_WIN64
-HANDLE_MDMP_ARCH(0x8001, SPARC) // Breakpad-defined value for SPARC
-HANDLE_MDMP_ARCH(0x8002, PPC64) // Breakpad-defined value for PPC64
-HANDLE_MDMP_ARCH(0x8003, ARM64) // Breakpad-defined value for ARM64
-HANDLE_MDMP_ARCH(0x8004, MIPS64) // Breakpad-defined value for MIPS64
+HANDLE_MDMP_ARCH(0x0000, X86) // PROCESSOR_ARCHITECTURE_INTEL
+HANDLE_MDMP_ARCH(0x0001, MIPS) // PROCESSOR_ARCHITECTURE_MIPS
+HANDLE_MDMP_ARCH(0x0002, Alpha) // PROCESSOR_ARCHITECTURE_ALPHA
+HANDLE_MDMP_ARCH(0x0003, PPC) // PROCESSOR_ARCHITECTURE_PPC
+HANDLE_MDMP_ARCH(0x0004, SHX) // PROCESSOR_ARCHITECTURE_SHX (Super-H)
+HANDLE_MDMP_ARCH(0x0005, ARM) // PROCESSOR_ARCHITECTURE_ARM
+HANDLE_MDMP_ARCH(0x0006, IA64) // PROCESSOR_ARCHITECTURE_IA64
+HANDLE_MDMP_ARCH(0x0007, Alpha64) // PROCESSOR_ARCHITECTURE_ALPHA64
+HANDLE_MDMP_ARCH(0x0008, MSIL) // PROCESSOR_ARCHITECTURE_MSIL
+HANDLE_MDMP_ARCH(0x0009, AMD64) // PROCESSOR_ARCHITECTURE_AMD64
+HANDLE_MDMP_ARCH(0x000a, X86Win64) // PROCESSOR_ARCHITECTURE_IA32_ON_WIN64
+HANDLE_MDMP_ARCH(0x000c, ARM64) // PROCESSOR_ARCHITECTURE_ARM64
+HANDLE_MDMP_ARCH(0x8001, BP_SPARC) // Breakpad-defined value for SPARC
+HANDLE_MDMP_ARCH(0x8002, BP_PPC64) // Breakpad-defined value for PPC64
+HANDLE_MDMP_ARCH(0x8003, BP_ARM64) // Breakpad-defined value for ARM64
+HANDLE_MDMP_ARCH(0x8004, BP_MIPS64) // Breakpad-defined value for MIPS64
HANDLE_MDMP_PLATFORM(0x0000, Win32S) // Win32 on Windows 3.1
HANDLE_MDMP_PLATFORM(0x0001, Win32Windows) // Windows 95-98-Me
diff --git a/contrib/llvm-project/llvm/include/llvm/BinaryFormat/Wasm.h b/contrib/llvm-project/llvm/include/llvm/BinaryFormat/Wasm.h
index f550d880f68a..59f99cc8cd37 100644
--- a/contrib/llvm-project/llvm/include/llvm/BinaryFormat/Wasm.h
+++ b/contrib/llvm-project/llvm/include/llvm/BinaryFormat/Wasm.h
@@ -131,6 +131,7 @@ struct WasmFunction {
uint32_t CodeSectionOffset;
uint32_t Size;
uint32_t CodeOffset; // start of Locals and Body
+ StringRef ExportName; // from the "export" section
StringRef SymbolName; // from the "linking" section
StringRef DebugName; // from the "name" section
uint32_t Comdat; // from the "comdat info" section
@@ -179,6 +180,7 @@ struct WasmSymbolInfo {
uint32_t Flags;
StringRef ImportModule; // For undefined symbols the module of the import
StringRef ImportName; // For undefined symbols the name of the import
+ StringRef ExportName; // For symbols to be exported from the final module
union {
// For function or global symbols, the index in function or global index
// space.
diff --git a/contrib/llvm-project/llvm/include/llvm/BinaryFormat/XCOFF.h b/contrib/llvm-project/llvm/include/llvm/BinaryFormat/XCOFF.h
index 20a0f446272f..b6c3aaa51fc4 100644
--- a/contrib/llvm-project/llvm/include/llvm/BinaryFormat/XCOFF.h
+++ b/contrib/llvm-project/llvm/include/llvm/BinaryFormat/XCOFF.h
@@ -13,6 +13,7 @@
#ifndef LLVM_BINARYFORMAT_XCOFF_H
#define LLVM_BINARYFORMAT_XCOFF_H
+#include "llvm/ADT/StringRef.h"
#include <cstdint>
namespace llvm {
@@ -251,6 +252,8 @@ enum CFileCpuId : uint8_t {
TCPU_970 = 19 ///< PPC970 - PowerPC 64-bit architecture.
};
+StringRef getMappingClassString(XCOFF::StorageMappingClass SMC);
+
} // end namespace XCOFF
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/include/llvm/Bitcode/BitcodeWriter.h b/contrib/llvm-project/llvm/include/llvm/Bitcode/BitcodeWriter.h
index 39061e09cda5..4beb89d30e00 100644
--- a/contrib/llvm-project/llvm/include/llvm/Bitcode/BitcodeWriter.h
+++ b/contrib/llvm-project/llvm/include/llvm/Bitcode/BitcodeWriter.h
@@ -17,6 +17,7 @@
#include "llvm/IR/ModuleSummaryIndex.h"
#include "llvm/MC/StringTableBuilder.h"
#include "llvm/Support/Allocator.h"
+#include "llvm/Support/MemoryBuffer.h"
#include <map>
#include <memory>
#include <string>
@@ -151,6 +152,11 @@ class raw_ostream;
const std::map<std::string, GVSummaryMapTy>
*ModuleToSummariesForIndex = nullptr);
+ /// Save a copy of the llvm IR as data in the __LLVM,__bitcode section.
+ void EmbedBitcodeInModule(Module &M, MemoryBufferRef Buf, bool EmbedBitcode,
+ bool EmbedMarker,
+ const std::vector<uint8_t> *CmdArgs);
+
} // end namespace llvm
#endif // LLVM_BITCODE_BITCODEWRITER_H
diff --git a/contrib/llvm-project/llvm/include/llvm/Bitcode/LLVMBitCodes.h b/contrib/llvm-project/llvm/include/llvm/Bitcode/LLVMBitCodes.h
index 1a397068caf0..2cfd66b96502 100644
--- a/contrib/llvm-project/llvm/include/llvm/Bitcode/LLVMBitCodes.h
+++ b/contrib/llvm-project/llvm/include/llvm/Bitcode/LLVMBitCodes.h
@@ -559,6 +559,7 @@ enum FunctionCodes {
FUNC_CODE_INST_UNOP = 56, // UNOP: [opcode, ty, opval]
FUNC_CODE_INST_CALLBR = 57, // CALLBR: [attr, cc, norm, transfs,
// fnty, fnid, args...]
+ FUNC_CODE_INST_FREEZE = 58, // FREEZE: [opty, opval]
};
enum UseListCodes {
diff --git a/contrib/llvm-project/llvm/include/llvm/Bitstream/BitstreamReader.h b/contrib/llvm-project/llvm/include/llvm/Bitstream/BitstreamReader.h
index b49a969a2d8b..c476f60420fa 100644
--- a/contrib/llvm-project/llvm/include/llvm/Bitstream/BitstreamReader.h
+++ b/contrib/llvm-project/llvm/include/llvm/Bitstream/BitstreamReader.h
@@ -39,7 +39,7 @@ public:
/// This contains information emitted to BLOCKINFO_BLOCK blocks. These
/// describe abbreviations that all blocks of the specified ID inherit.
struct BlockInfo {
- unsigned BlockID;
+ unsigned BlockID = 0;
std::vector<std::shared_ptr<BitCodeAbbrev>> Abbrevs;
std::string Name;
std::vector<std::pair<unsigned, std::string>> RecordNames;
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/AsmPrinter.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/AsmPrinter.h
index a4580da5aec9..b374fd3d80af 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/AsmPrinter.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/AsmPrinter.h
@@ -48,6 +48,7 @@ class GlobalObject;
class GlobalValue;
class GlobalVariable;
class MachineBasicBlock;
+class MachineBlockFrequencyInfo;
class MachineConstantPoolValue;
class MachineDominatorTree;
class MachineFunction;
@@ -58,7 +59,6 @@ class MachineModuleInfo;
class MachineOptimizationRemarkEmitter;
class MCAsmInfo;
class MCCFIInstruction;
-struct MCCodePaddingContext;
class MCContext;
class MCExpr;
class MCInst;
@@ -69,7 +69,9 @@ class MCSymbol;
class MCTargetOptions;
class MDNode;
class Module;
+class ProfileSummaryInfo;
class raw_ostream;
+class RemarkStreamer;
class StackMaps;
class TargetLoweringObjectFile;
class TargetMachine;
@@ -107,6 +109,10 @@ public:
/// Optimization remark emitter.
MachineOptimizationRemarkEmitter *ORE;
+ MachineBlockFrequencyInfo *MBFI;
+
+ ProfileSummaryInfo *PSI;
+
/// The symbol for the current function. This is recalculated at the beginning
/// of each call to runOnMachineFunction().
MCSymbol *CurrentFnSym = nullptr;
@@ -280,6 +286,9 @@ public:
/// Emit a table with all XRay instrumentation points.
void emitXRayTable();
+ DenseMap<const MCSection *, unsigned> PatchableFunctionEntryID;
+ void emitPatchableFunctionEntries();
+
//===------------------------------------------------------------------===//
// MachineFunctionPass Implementation.
//===------------------------------------------------------------------===//
@@ -319,7 +328,7 @@ public:
void emitStackSizeSection(const MachineFunction &MF);
- void emitRemarksSection(Module &M);
+ void emitRemarksSection(RemarkStreamer &RS);
enum CFIMoveType { CFI_M_None, CFI_M_EH, CFI_M_Debug };
CFIMoveType needsCFIMoves() const;
@@ -689,8 +698,6 @@ private:
GCMetadataPrinter *GetOrCreateGCPrinter(GCStrategy &S);
/// Emit GlobalAlias or GlobalIFunc.
void emitGlobalIndirectSymbol(Module &M, const GlobalIndirectSymbol &GIS);
- void setupCodePaddingContext(const MachineBasicBlock &MBB,
- MCCodePaddingContext &Context) const;
};
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 2e57b4c9d332..30533d90bbf7 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -326,7 +326,9 @@ public:
}
unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
- unsigned &JumpTableSize) {
+ unsigned &JumpTableSize,
+ ProfileSummaryInfo *PSI,
+ BlockFrequencyInfo *BFI) {
/// Try to find the estimated number of clusters. Note that the number of
/// clusters identified in this function could be different from the actual
/// numbers found in lowering. This function ignore switches that are
@@ -374,7 +376,7 @@ public:
(MaxCaseVal - MinCaseVal)
.getLimitedValue(std::numeric_limits<uint64_t>::max() - 1) + 1;
// Check whether a range of clusters is dense enough for a jump table
- if (TLI->isSuitableForJumpTable(&SI, N, Range)) {
+ if (TLI->isSuitableForJumpTable(&SI, N, Range, PSI, BFI)) {
JumpTableSize = Range;
return 1;
}
@@ -508,6 +510,13 @@ public:
return BaseT::isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
}
+ bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
+ AssumptionCache &AC, TargetLibraryInfo *TLI,
+ DominatorTree *DT,
+ const LoopAccessInfo *LAI) {
+ return BaseT::preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LAI);
+ }
+
int getInstructionLatency(const Instruction *I) {
if (isa<LoadInst>(I))
return getST()->getSchedModel().DefaultLoadLatency;
@@ -624,7 +633,8 @@ public:
TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
- ArrayRef<const Value *> Args = ArrayRef<const Value *>()) {
+ ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
+ const Instruction *CxtI = nullptr) {
// Check if any of the operands are vector operands.
const TargetLoweringBase *TLI = getTLI();
int ISD = TLI->InstructionOpcodeToISD(Opcode);
@@ -772,9 +782,10 @@ public:
// cost of the split itself. Count that as 1, to be consistent with
// TLI->getTypeLegalizationCost().
if ((TLI->getTypeAction(Src->getContext(), TLI->getValueType(DL, Src)) ==
- TargetLowering::TypeSplitVector) ||
- (TLI->getTypeAction(Dst->getContext(), TLI->getValueType(DL, Dst)) ==
- TargetLowering::TypeSplitVector)) {
+ TargetLowering::TypeSplitVector ||
+ TLI->getTypeAction(Dst->getContext(), TLI->getValueType(DL, Dst)) ==
+ TargetLowering::TypeSplitVector) &&
+ Src->getVectorNumElements() > 1 && Dst->getVectorNumElements() > 1) {
Type *SplitDst = VectorType::get(Dst->getVectorElementType(),
Dst->getVectorNumElements() / 2);
Type *SplitSrc = VectorType::get(Src->getVectorElementType(),
@@ -869,8 +880,9 @@ public:
return LT.first;
}
- unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
- unsigned AddressSpace, const Instruction *I = nullptr) {
+ unsigned getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
+ unsigned AddressSpace,
+ const Instruction *I = nullptr) {
assert(!Src->isVoidTy() && "Invalid type");
std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Src);
@@ -921,8 +933,8 @@ public:
Cost = static_cast<T *>(this)->getMaskedMemoryOpCost(
Opcode, VecTy, Alignment, AddressSpace);
else
- Cost = static_cast<T *>(this)->getMemoryOpCost(Opcode, VecTy, Alignment,
- AddressSpace);
+ Cost = static_cast<T *>(this)->getMemoryOpCost(
+ Opcode, VecTy, MaybeAlign(Alignment), AddressSpace);
// Legalize the vector type, and get the legalized and unlegalized type
// sizes.
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/CommandFlags.inc b/contrib/llvm-project/llvm/include/llvm/CodeGen/CommandFlags.inc
index cb69e9f61405..8739b644873d 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/CommandFlags.inc
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/CommandFlags.inc
@@ -102,15 +102,15 @@ static cl::opt<llvm::ExceptionHandling> ExceptionModel(
clEnumValN(ExceptionHandling::Wasm, "wasm",
"WebAssembly exception handling")));
-static cl::opt<TargetMachine::CodeGenFileType> FileType(
- "filetype", cl::init(TargetMachine::CGFT_AssemblyFile),
+static cl::opt<CodeGenFileType> FileType(
+ "filetype", cl::init(CGFT_AssemblyFile),
cl::desc(
"Choose a file type (not all types are supported by all targets):"),
- cl::values(clEnumValN(TargetMachine::CGFT_AssemblyFile, "asm",
+ cl::values(clEnumValN(CGFT_AssemblyFile, "asm",
"Emit an assembly ('.s') file"),
- clEnumValN(TargetMachine::CGFT_ObjectFile, "obj",
+ clEnumValN(CGFT_ObjectFile, "obj",
"Emit a native object ('.o') file"),
- clEnumValN(TargetMachine::CGFT_Null, "null",
+ clEnumValN(CGFT_Null, "null",
"Emit nothing, for performance testing")));
static cl::opt<llvm::FramePointer::FP> FramePointerUsage(
@@ -151,7 +151,7 @@ static cl::opt<bool>
"attribute not to use exceptions"),
cl::init(false));
-static cl::opt<llvm::FPDenormal::DenormalMode> DenormalMode(
+static cl::opt<llvm::FPDenormal::DenormalMode> DenormalFPMath(
"denormal-fp-math",
cl::desc("Select which denormal numbers the code is permitted to require"),
cl::init(FPDenormal::IEEE),
@@ -238,6 +238,10 @@ static cl::opt<bool>
cl::desc("Emit functions into separate sections"),
cl::init(false));
+static cl::opt<unsigned> TLSSize("tls-size",
+ cl::desc("Bit size of immediate TLS offsets"),
+ cl::init(0));
+
static cl::opt<bool> EmulatedTLS("emulated-tls",
cl::desc("Use emulated TLS model"),
cl::init(false));
@@ -276,6 +280,11 @@ static cl::opt<bool>
cl::desc("Emit debug info about parameter's entry values"),
cl::init(false));
+static cl::opt<bool>
+ ForceDwarfFrameSection("force-dwarf-frame-section",
+ cl::desc("Always emit a debug frame section."),
+ cl::init(false));
+
// Common utility function tightly tied to the options listed here. Initializes
// a TargetOptions object with CodeGen flags and returns it.
static TargetOptions InitTargetOptionsFromCodeGenFlags() {
@@ -286,7 +295,7 @@ static TargetOptions InitTargetOptionsFromCodeGenFlags() {
Options.NoNaNsFPMath = EnableNoNaNsFPMath;
Options.NoSignedZerosFPMath = EnableNoSignedZerosFPMath;
Options.NoTrappingFPMath = EnableNoTrappingFPMath;
- Options.FPDenormalMode = DenormalMode;
+ Options.FPDenormalMode = DenormalFPMath;
Options.HonorSignDependentRoundingFPMathOption =
EnableHonorSignDependentRoundingFPMath;
if (FloatABIForCalls != FloatABI::Default)
@@ -300,12 +309,14 @@ static TargetOptions InitTargetOptionsFromCodeGenFlags() {
Options.DataSections = DataSections;
Options.FunctionSections = FunctionSections;
Options.UniqueSectionNames = UniqueSectionNames;
+ Options.TLSSize = TLSSize;
Options.EmulatedTLS = EmulatedTLS;
Options.ExplicitEmulatedTLS = EmulatedTLS.getNumOccurrences() > 0;
Options.ExceptionModel = ExceptionModel;
Options.EmitStackSizeSection = EnableStackSizeSection;
Options.EmitAddrsig = EnableAddrsig;
Options.EnableDebugEntryValues = EnableDebugEntryValues;
+ Options.ForceDwarfFrameSection = ForceDwarfFrameSection;
Options.MCOptions = InitMCTargetOptionsFromFlags();
@@ -366,46 +377,52 @@ LLVM_ATTRIBUTE_UNUSED static std::vector<std::string> getFeatureList() {
return Features.getFeatures();
}
+/// Set function attributes of function \p F based on CPU, Features, and command
+/// line flags.
+LLVM_ATTRIBUTE_UNUSED static void
+setFunctionAttributes(StringRef CPU, StringRef Features, Function &F) {
+ auto &Ctx = F.getContext();
+ AttributeList Attrs = F.getAttributes();
+ AttrBuilder NewAttrs;
+
+ if (!CPU.empty() && !F.hasFnAttribute("target-cpu"))
+ NewAttrs.addAttribute("target-cpu", CPU);
+ if (!Features.empty())
+ NewAttrs.addAttribute("target-features", Features);
+ if (FramePointerUsage.getNumOccurrences() > 0) {
+ if (FramePointerUsage == llvm::FramePointer::All)
+ NewAttrs.addAttribute("frame-pointer", "all");
+ else if (FramePointerUsage == llvm::FramePointer::NonLeaf)
+ NewAttrs.addAttribute("frame-pointer", "non-leaf");
+ else if (FramePointerUsage == llvm::FramePointer::None)
+ NewAttrs.addAttribute("frame-pointer", "none");
+ }
+ if (DisableTailCalls.getNumOccurrences() > 0)
+ NewAttrs.addAttribute("disable-tail-calls",
+ toStringRef(DisableTailCalls));
+ if (StackRealign)
+ NewAttrs.addAttribute("stackrealign");
+
+ if (TrapFuncName.getNumOccurrences() > 0)
+ for (auto &B : F)
+ for (auto &I : B)
+ if (auto *Call = dyn_cast<CallInst>(&I))
+ if (const auto *F = Call->getCalledFunction())
+ if (F->getIntrinsicID() == Intrinsic::debugtrap ||
+ F->getIntrinsicID() == Intrinsic::trap)
+ Call->addAttribute(
+ llvm::AttributeList::FunctionIndex,
+ Attribute::get(Ctx, "trap-func-name", TrapFuncName));
+
+ // Let NewAttrs override Attrs.
+ F.setAttributes(
+ Attrs.addAttributes(Ctx, AttributeList::FunctionIndex, NewAttrs));
+}
+
/// Set function attributes of functions in Module M based on CPU,
/// Features, and command line flags.
LLVM_ATTRIBUTE_UNUSED static void
setFunctionAttributes(StringRef CPU, StringRef Features, Module &M) {
- for (auto &F : M) {
- auto &Ctx = F.getContext();
- AttributeList Attrs = F.getAttributes();
- AttrBuilder NewAttrs;
-
- if (!CPU.empty())
- NewAttrs.addAttribute("target-cpu", CPU);
- if (!Features.empty())
- NewAttrs.addAttribute("target-features", Features);
- if (FramePointerUsage.getNumOccurrences() > 0) {
- if (FramePointerUsage == llvm::FramePointer::All)
- NewAttrs.addAttribute("frame-pointer", "all");
- else if (FramePointerUsage == llvm::FramePointer::NonLeaf)
- NewAttrs.addAttribute("frame-pointer", "non-leaf");
- else if (FramePointerUsage == llvm::FramePointer::None)
- NewAttrs.addAttribute("frame-pointer", "none");
- }
- if (DisableTailCalls.getNumOccurrences() > 0)
- NewAttrs.addAttribute("disable-tail-calls",
- toStringRef(DisableTailCalls));
- if (StackRealign)
- NewAttrs.addAttribute("stackrealign");
-
- if (TrapFuncName.getNumOccurrences() > 0)
- for (auto &B : F)
- for (auto &I : B)
- if (auto *Call = dyn_cast<CallInst>(&I))
- if (const auto *F = Call->getCalledFunction())
- if (F->getIntrinsicID() == Intrinsic::debugtrap ||
- F->getIntrinsicID() == Intrinsic::trap)
- Call->addAttribute(
- llvm::AttributeList::FunctionIndex,
- Attribute::get(Ctx, "trap-func-name", TrapFuncName));
-
- // Let NewAttrs override Attrs.
- F.setAttributes(
- Attrs.addAttributes(Ctx, AttributeList::FunctionIndex, NewAttrs));
- }
+ for (Function &F : M)
+ setFunctionAttributes(CPU, Features, F);
}
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/DFAPacketizer.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/DFAPacketizer.h
index 705465b15c4c..9cdaedc9e861 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/DFAPacketizer.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/DFAPacketizer.h
@@ -46,43 +46,18 @@ class MCInstrDesc;
class SUnit;
class TargetInstrInfo;
-// --------------------------------------------------------------------
-// Definitions shared between DFAPacketizer.cpp and DFAPacketizerEmitter.cpp
-
-// DFA_MAX_RESTERMS * DFA_MAX_RESOURCES must fit within sizeof DFAInput.
-// This is verified in DFAPacketizer.cpp:DFAPacketizer::DFAPacketizer.
-//
-// e.g. terms x resource bit combinations that fit in uint32_t:
-// 4 terms x 8 bits = 32 bits
-// 3 terms x 10 bits = 30 bits
-// 2 terms x 16 bits = 32 bits
-//
-// e.g. terms x resource bit combinations that fit in uint64_t:
-// 8 terms x 8 bits = 64 bits
-// 7 terms x 9 bits = 63 bits
-// 6 terms x 10 bits = 60 bits
-// 5 terms x 12 bits = 60 bits
-// 4 terms x 16 bits = 64 bits <--- current
-// 3 terms x 21 bits = 63 bits
-// 2 terms x 32 bits = 64 bits
-//
-#define DFA_MAX_RESTERMS 4 // The max # of AND'ed resource terms.
-#define DFA_MAX_RESOURCES 16 // The max # of resource bits in one term.
-
-using DFAInput = uint64_t;
-using DFAStateInput = int64_t;
-
-#define DFA_TBLTYPE "int64_t" // For generating DFAStateInputTable.
-// --------------------------------------------------------------------
-
class DFAPacketizer {
private:
const InstrItineraryData *InstrItins;
- Automaton<DFAInput> A;
+ Automaton<uint64_t> A;
+ /// For every itinerary, an "action" to apply to the automaton. This removes
+ /// the redundancy in actions between itinerary classes.
+ ArrayRef<unsigned> ItinActions;
public:
- DFAPacketizer(const InstrItineraryData *InstrItins, Automaton<uint64_t> a) :
- InstrItins(InstrItins), A(std::move(a)) {
+ DFAPacketizer(const InstrItineraryData *InstrItins, Automaton<uint64_t> a,
+ ArrayRef<unsigned> ItinActions)
+ : InstrItins(InstrItins), A(std::move(a)), ItinActions(ItinActions) {
// Start off with resource tracking disabled.
A.enableTranscription(false);
}
@@ -99,12 +74,6 @@ public:
A.enableTranscription(Track);
}
- // Return the DFAInput for an instruction class.
- DFAInput getInsnInput(unsigned InsnClass);
-
- // Return the DFAInput for an instruction class input vector.
- static DFAInput getInsnInput(const std::vector<unsigned> &InsnClass);
-
// Check if the resources occupied by a MCInstrDesc are available in
// the current state.
bool canReserveResources(const MCInstrDesc *MID);
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/DIE.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/DIE.h
index e8e7504a6cda..40f6b041e9b3 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/DIE.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/DIE.h
@@ -78,7 +78,7 @@ public:
/// object.
class DIEAbbrev : public FoldingSetNode {
/// Unique number for node.
- unsigned Number;
+ unsigned Number = 0;
/// Dwarf tag code.
dwarf::Tag Tag;
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/FastISel.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/FastISel.h
index 03d681feb7aa..d9c680392e50 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/FastISel.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/FastISel.h
@@ -540,6 +540,11 @@ protected:
bool selectXRayCustomEvent(const CallInst *II);
bool selectXRayTypedEvent(const CallInst *II);
+ bool shouldOptForSize(const MachineFunction *MF) const {
+ // TODO: Implement PGSO.
+ return MF->getFunction().hasOptSize();
+ }
+
private:
/// Handle PHI nodes in successor blocks.
///
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/FaultMaps.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/FaultMaps.h
index a1e2349c413e..da56c4dd13ab 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/FaultMaps.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/FaultMaps.h
@@ -36,7 +36,8 @@ public:
static const char *faultTypeToString(FaultKind);
- void recordFaultingOp(FaultKind FaultTy, const MCSymbol *HandlerLabel);
+ void recordFaultingOp(FaultKind FaultTy, const MCSymbol *FaultingLabel,
+ const MCSymbol *HandlerLabel);
void serializeToFaultMapSection();
void reset() {
FunctionInfos.clear();
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h
index f812a2f6c585..2d41f90fe053 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h
@@ -195,7 +195,7 @@ public:
/// isExportedInst - Return true if the specified value is an instruction
/// exported from its block.
- bool isExportedInst(const Value *V) {
+ bool isExportedInst(const Value *V) const {
return ValueMap.count(V);
}
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/CSEInfo.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/CSEInfo.h
index 5a44e67992ad..e56177939f46 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/CSEInfo.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/CSEInfo.h
@@ -220,9 +220,7 @@ class GISelCSEAnalysisWrapperPass : public MachineFunctionPass {
public:
static char ID;
- GISelCSEAnalysisWrapperPass() : MachineFunctionPass(ID) {
- initializeGISelCSEAnalysisWrapperPassPass(*PassRegistry::getPassRegistry());
- }
+ GISelCSEAnalysisWrapperPass();
void getAnalysisUsage(AnalysisUsage &AU) const override;
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h
index 4901a3748e4a..bc9774e09acf 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h
@@ -107,7 +107,7 @@ public:
/// make these decisions: function formal arguments, call
/// instruction args, call instruction returns and function
/// returns. However, once a decision has been made on where an
- /// arugment should go, exactly what happens can vary slightly. This
+ /// argument should go, exactly what happens can vary slightly. This
/// class abstracts the differences.
struct ValueHandler {
ValueHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 4c04dc52547d..e5ee21941e23 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -36,6 +36,18 @@ struct PreferredTuple {
MachineInstr *MI;
};
+struct IndexedLoadStoreMatchInfo {
+ Register Addr;
+ Register Base;
+ Register Offset;
+ bool IsPre;
+};
+
+struct PtrAddChain {
+ int64_t Imm;
+ Register Base;
+};
+
class CombinerHelper {
protected:
MachineIRBuilder &Builder;
@@ -84,6 +96,8 @@ public:
/// Combine \p MI into a pre-indexed or post-indexed load/store operation if
/// legal and the surrounding code makes it useful.
bool tryCombineIndexedLoadStore(MachineInstr &MI);
+ bool matchCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo);
+ void applyCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo);
bool matchElideBrByInvertingCond(MachineInstr &MI);
void applyElideBrByInvertingCond(MachineInstr &MI);
@@ -134,7 +148,7 @@ public:
///
/// For example (pre-indexed):
///
- /// $addr = G_GEP $base, $offset
+ /// $addr = G_PTR_ADD $base, $offset
/// [...]
/// $val = G_LOAD $addr
/// [...]
@@ -150,7 +164,7 @@ public:
///
/// G_STORE $val, $base
/// [...]
- /// $addr = G_GEP $base, $offset
+ /// $addr = G_PTR_ADD $base, $offset
/// [...]
/// $whatever = COPY $addr
///
@@ -161,6 +175,9 @@ public:
/// $whatever = COPY $addr
bool tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen = 0);
+ bool matchPtrAddImmedChain(MachineInstr &MI, PtrAddChain &MatchInfo);
+ bool applyPtrAddImmedChain(MachineInstr &MI, PtrAddChain &MatchInfo);
+
/// Try to transform \p MI by using all of the above
/// combine functions. Returns true if changed.
bool tryCombine(MachineInstr &MI);
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/GISelKnownBits.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/GISelKnownBits.h
index dfe5a7f3177d..d44612f54ae5 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/GISelKnownBits.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/GISelKnownBits.h
@@ -40,6 +40,10 @@ public:
const APInt &DemandedElts,
unsigned Depth = 0);
+ unsigned computeNumSignBits(Register R, const APInt &DemandedElts,
+ unsigned Depth = 0);
+ unsigned computeNumSignBits(Register R, unsigned Depth = 0);
+
// KnownBitsAPI
KnownBits getKnownBits(Register R);
// Calls getKnownBits for first operand def of MI.
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h
index bdb92aa4689d..6a2ea05f1b08 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h
@@ -483,6 +483,9 @@ private:
bool translateUserOp2(const User &U, MachineIRBuilder &MIRBuilder) {
return false;
}
+ bool translateFreeze(const User &U, MachineIRBuilder &MIRBuilder) {
+ return false;
+ }
/// @}
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelector.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelector.h
index fd3dc743000b..59d2540dd14e 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelector.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelector.h
@@ -313,6 +313,14 @@ enum {
/// - RendererFnID - Custom renderer function to call
GIR_CustomRenderer,
+ /// Render operands to the specified instruction using a custom function,
+ /// reading from a specific operand.
+ /// - InsnID - Instruction ID to modify
+ /// - OldInsnID - Instruction ID to get the matched operand from
+ /// - OpIdx - Operand index in OldInsnID the render function should read from..
+ /// - RendererFnID - Custom renderer function to call
+ GIR_CustomOperandRenderer,
+
/// Render a G_CONSTANT operator as a sign-extended immediate.
/// - NewInsnID - Instruction ID to modify
/// - OldInsnID - Instruction ID to copy from
@@ -390,6 +398,10 @@ public:
GISelKnownBits *KnownBits = nullptr;
MachineFunction *MF = nullptr;
+ virtual void setupGeneratedPerFunctionState(MachineFunction &MF) {
+ llvm_unreachable("TableGen should have emitted implementation");
+ }
+
/// Setup per-MF selector state.
virtual void setupMF(MachineFunction &mf,
GISelKnownBits &KB,
@@ -397,6 +409,7 @@ public:
CoverageInfo = &covinfo;
KnownBits = &KB;
MF = &mf;
+ setupGeneratedPerFunctionState(mf);
}
protected:
@@ -487,7 +500,7 @@ protected:
bool isOperandImmEqual(const MachineOperand &MO, int64_t Value,
const MachineRegisterInfo &MRI) const;
- /// Return true if the specified operand is a G_GEP with a G_CONSTANT on the
+ /// Return true if the specified operand is a G_PTR_ADD with a G_CONSTANT on the
/// right-hand side. GlobalISel's separation of pointer and integer types
/// means that we don't need to worry about G_OR with equivalent semantics.
bool isBaseWithConstantOffset(const MachineOperand &Root,
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h
index 08f2f54bcf90..f866f42344f6 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h
@@ -26,6 +26,7 @@
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
@@ -573,7 +574,8 @@ bool InstructionSelector::executeMatchTable(
assert(State.MIs[InsnID] != nullptr && "Used insn before defined");
MachineOperand &MO = State.MIs[InsnID]->getOperand(OpIdx);
if (!MO.isReg() ||
- &RBI.getRegBankFromRegClass(*TRI.getRegClass(RCEnum)) !=
+ &RBI.getRegBankFromRegClass(*TRI.getRegClass(RCEnum),
+ MRI.getType(MO.getReg())) !=
RBI.getRegBank(MO.getReg(), MRI, TRI)) {
if (handleReject() == RejectAndGiveUp)
return false;
@@ -640,10 +642,15 @@ bool InstructionSelector::executeMatchTable(
<< "), Value=" << Value << ")\n");
assert(State.MIs[InsnID] != nullptr && "Used insn before defined");
MachineOperand &MO = State.MIs[InsnID]->getOperand(OpIdx);
- if (!MO.isCImm() || !MO.getCImm()->equalsInt(Value)) {
- if (handleReject() == RejectAndGiveUp)
- return false;
- }
+ if (MO.isImm() && MO.getImm() == Value)
+ break;
+
+ if (MO.isCImm() && MO.getCImm()->equalsInt(Value))
+ break;
+
+ if (handleReject() == RejectAndGiveUp)
+ return false;
+
break;
}
@@ -945,8 +952,27 @@ bool InstructionSelector::executeMatchTable(
dbgs() << CurrentIdx << ": GIR_CustomRenderer(OutMIs["
<< InsnID << "], MIs[" << OldInsnID << "], "
<< RendererFnID << ")\n");
+ (ISel.*ISelInfo.CustomRenderers[RendererFnID])(
+ OutMIs[InsnID], *State.MIs[OldInsnID],
+ -1); // Not a source operand of the old instruction.
+ break;
+ }
+ case GIR_CustomOperandRenderer: {
+ int64_t InsnID = MatchTable[CurrentIdx++];
+ int64_t OldInsnID = MatchTable[CurrentIdx++];
+ int64_t OpIdx = MatchTable[CurrentIdx++];
+ int64_t RendererFnID = MatchTable[CurrentIdx++];
+ assert(OutMIs[InsnID] && "Attempted to add to undefined instruction");
+
+ DEBUG_WITH_TYPE(
+ TgtInstructionSelector::getName(),
+ dbgs() << CurrentIdx << ": GIR_CustomOperandRenderer(OutMIs["
+ << InsnID << "], MIs[" << OldInsnID << "]->getOperand("
+ << OpIdx << "), "
+ << RendererFnID << ")\n");
(ISel.*ISelInfo.CustomRenderers[RendererFnID])(OutMIs[InsnID],
- *State.MIs[OldInsnID]);
+ *State.MIs[OldInsnID],
+ OpIdx);
break;
}
case GIR_ConstrainOperandRC: {
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
index 7f960e727846..dd32a3b9e38e 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
@@ -46,7 +46,8 @@ public:
: Builder(B), MRI(MRI), LI(LI) {}
bool tryCombineAnyExt(MachineInstr &MI,
- SmallVectorImpl<MachineInstr *> &DeadInsts) {
+ SmallVectorImpl<MachineInstr *> &DeadInsts,
+ SmallVectorImpl<Register> &UpdatedDefs) {
assert(MI.getOpcode() == TargetOpcode::G_ANYEXT);
Builder.setInstr(MI);
@@ -58,6 +59,7 @@ public:
if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc)))) {
LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;);
Builder.buildAnyExtOrTrunc(DstReg, TruncSrc);
+ UpdatedDefs.push_back(DstReg);
markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts);
return true;
}
@@ -70,6 +72,7 @@ public:
m_GSExt(m_Reg(ExtSrc)),
m_GZExt(m_Reg(ExtSrc)))))) {
Builder.buildInstr(ExtMI->getOpcode(), {DstReg}, {ExtSrc});
+ UpdatedDefs.push_back(DstReg);
markInstAndDefDead(MI, *ExtMI, DeadInsts);
return true;
}
@@ -83,15 +86,17 @@ public:
auto &CstVal = SrcMI->getOperand(1);
Builder.buildConstant(
DstReg, CstVal.getCImm()->getValue().sext(DstTy.getSizeInBits()));
+ UpdatedDefs.push_back(DstReg);
markInstAndDefDead(MI, *SrcMI, DeadInsts);
return true;
}
}
- return tryFoldImplicitDef(MI, DeadInsts);
+ return tryFoldImplicitDef(MI, DeadInsts, UpdatedDefs);
}
bool tryCombineZExt(MachineInstr &MI,
- SmallVectorImpl<MachineInstr *> &DeadInsts) {
+ SmallVectorImpl<MachineInstr *> &DeadInsts,
+ SmallVectorImpl<Register> &UpdatedDefs) {
assert(MI.getOpcode() == TargetOpcode::G_ZEXT);
Builder.setInstr(MI);
@@ -108,7 +113,8 @@ public:
LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;);
LLT SrcTy = MRI.getType(SrcReg);
APInt Mask = APInt::getAllOnesValue(SrcTy.getScalarSizeInBits());
- auto MIBMask = Builder.buildConstant(DstTy, Mask.getZExtValue());
+ auto MIBMask = Builder.buildConstant(
+ DstTy, Mask.zext(DstTy.getScalarSizeInBits()));
Builder.buildAnd(DstReg, Builder.buildAnyExtOrTrunc(DstTy, TruncSrc),
MIBMask);
markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts);
@@ -124,15 +130,17 @@ public:
auto &CstVal = SrcMI->getOperand(1);
Builder.buildConstant(
DstReg, CstVal.getCImm()->getValue().zext(DstTy.getSizeInBits()));
+ UpdatedDefs.push_back(DstReg);
markInstAndDefDead(MI, *SrcMI, DeadInsts);
return true;
}
}
- return tryFoldImplicitDef(MI, DeadInsts);
+ return tryFoldImplicitDef(MI, DeadInsts, UpdatedDefs);
}
bool tryCombineSExt(MachineInstr &MI,
- SmallVectorImpl<MachineInstr *> &DeadInsts) {
+ SmallVectorImpl<MachineInstr *> &DeadInsts,
+ SmallVectorImpl<Register> &UpdatedDefs) {
assert(MI.getOpcode() == TargetOpcode::G_SEXT);
Builder.setInstr(MI);
@@ -154,12 +162,40 @@ public:
markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts);
return true;
}
- return tryFoldImplicitDef(MI, DeadInsts);
+ return tryFoldImplicitDef(MI, DeadInsts, UpdatedDefs);
+ }
+
+ bool tryCombineTrunc(MachineInstr &MI,
+ SmallVectorImpl<MachineInstr *> &DeadInsts,
+ SmallVectorImpl<Register> &UpdatedDefs) {
+ assert(MI.getOpcode() == TargetOpcode::G_TRUNC);
+
+ Builder.setInstr(MI);
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg());
+
+ // Try to fold trunc(g_constant) when the smaller constant type is legal.
+ // Can't use MIPattern because we don't have a specific constant in mind.
+ auto *SrcMI = MRI.getVRegDef(SrcReg);
+ if (SrcMI->getOpcode() == TargetOpcode::G_CONSTANT) {
+ const LLT &DstTy = MRI.getType(DstReg);
+ if (isInstLegal({TargetOpcode::G_CONSTANT, {DstTy}})) {
+ auto &CstVal = SrcMI->getOperand(1);
+ Builder.buildConstant(
+ DstReg, CstVal.getCImm()->getValue().trunc(DstTy.getSizeInBits()));
+ UpdatedDefs.push_back(DstReg);
+ markInstAndDefDead(MI, *SrcMI, DeadInsts);
+ return true;
+ }
+ }
+
+ return false;
}
/// Try to fold G_[ASZ]EXT (G_IMPLICIT_DEF).
bool tryFoldImplicitDef(MachineInstr &MI,
- SmallVectorImpl<MachineInstr *> &DeadInsts) {
+ SmallVectorImpl<MachineInstr *> &DeadInsts,
+ SmallVectorImpl<Register> &UpdatedDefs) {
unsigned Opcode = MI.getOpcode();
assert(Opcode == TargetOpcode::G_ANYEXT || Opcode == TargetOpcode::G_ZEXT ||
Opcode == TargetOpcode::G_SEXT);
@@ -176,6 +212,7 @@ public:
return false;
LLVM_DEBUG(dbgs() << ".. Combine G_ANYEXT(G_IMPLICIT_DEF): " << MI;);
Builder.buildInstr(TargetOpcode::G_IMPLICIT_DEF, {DstReg}, {});
+ UpdatedDefs.push_back(DstReg);
} else {
// G_[SZ]EXT (G_IMPLICIT_DEF) -> G_CONSTANT 0 because the top
// bits will be 0 for G_ZEXT and 0/1 for the G_SEXT.
@@ -183,6 +220,7 @@ public:
return false;
LLVM_DEBUG(dbgs() << ".. Combine G_[SZ]EXT(G_IMPLICIT_DEF): " << MI;);
Builder.buildConstant(DstReg, 0);
+ UpdatedDefs.push_back(DstReg);
}
markInstAndDefDead(MI, *DefMI, DeadInsts);
@@ -191,37 +229,60 @@ public:
return false;
}
- static unsigned canFoldMergeOpcode(unsigned MergeOp, unsigned ConvertOp,
- LLT OpTy, LLT DestTy) {
- if (OpTy.isVector() && DestTy.isVector())
- return MergeOp == TargetOpcode::G_CONCAT_VECTORS;
-
- if (OpTy.isVector() && !DestTy.isVector()) {
- if (MergeOp == TargetOpcode::G_BUILD_VECTOR)
+ static bool canFoldMergeOpcode(unsigned MergeOp, unsigned ConvertOp,
+ LLT OpTy, LLT DestTy) {
+ // Check if we found a definition that is like G_MERGE_VALUES.
+ switch (MergeOp) {
+ default:
+ return false;
+ case TargetOpcode::G_BUILD_VECTOR:
+ case TargetOpcode::G_MERGE_VALUES:
+ // The convert operation that we will need to insert is
+ // going to convert the input of that type of instruction (scalar)
+ // to the destination type (DestTy).
+ // The conversion needs to stay in the same domain (scalar to scalar
+ // and vector to vector), so if we were to allow to fold the merge
+ // we would need to insert some bitcasts.
+ // E.g.,
+ // <2 x s16> = build_vector s16, s16
+ // <2 x s32> = zext <2 x s16>
+ // <2 x s16>, <2 x s16> = unmerge <2 x s32>
+ //
+ // As is the folding would produce:
+ // <2 x s16> = zext s16 <-- scalar to vector
+ // <2 x s16> = zext s16 <-- scalar to vector
+ // Which is invalid.
+ // Instead we would want to generate:
+ // s32 = zext s16
+ // <2 x s16> = bitcast s32
+ // s32 = zext s16
+ // <2 x s16> = bitcast s32
+ //
+ // That is not done yet.
+ if (ConvertOp == 0)
return true;
+ return !DestTy.isVector();
+ case TargetOpcode::G_CONCAT_VECTORS: {
+ if (ConvertOp == 0)
+ return true;
+ if (!DestTy.isVector())
+ return false;
- if (MergeOp == TargetOpcode::G_CONCAT_VECTORS) {
- if (ConvertOp == 0)
- return true;
-
- const unsigned OpEltSize = OpTy.getElementType().getSizeInBits();
-
- // Don't handle scalarization with a cast that isn't in the same
- // direction as the vector cast. This could be handled, but it would
- // require more intermediate unmerges.
- if (ConvertOp == TargetOpcode::G_TRUNC)
- return DestTy.getSizeInBits() <= OpEltSize;
- return DestTy.getSizeInBits() >= OpEltSize;
- }
+ const unsigned OpEltSize = OpTy.getElementType().getSizeInBits();
- return false;
+ // Don't handle scalarization with a cast that isn't in the same
+ // direction as the vector cast. This could be handled, but it would
+ // require more intermediate unmerges.
+ if (ConvertOp == TargetOpcode::G_TRUNC)
+ return DestTy.getSizeInBits() <= OpEltSize;
+ return DestTy.getSizeInBits() >= OpEltSize;
+ }
}
-
- return MergeOp == TargetOpcode::G_MERGE_VALUES;
}
bool tryCombineMerges(MachineInstr &MI,
- SmallVectorImpl<MachineInstr *> &DeadInsts) {
+ SmallVectorImpl<MachineInstr *> &DeadInsts,
+ SmallVectorImpl<Register> &UpdatedDefs) {
assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
unsigned NumDefs = MI.getNumOperands() - 1;
@@ -271,8 +332,8 @@ public:
SmallVector<Register, 2> TmpRegs;
// This is a vector that is being scalarized and casted. Extract to
// the element type, and do the conversion on the scalars.
- LLT MergeEltTy
- = MRI.getType(MergeI->getOperand(0).getReg()).getElementType();
+ LLT MergeEltTy =
+ MRI.getType(MergeI->getOperand(0).getReg()).getElementType();
for (unsigned j = 0; j < NumMergeRegs; ++j)
TmpRegs.push_back(MRI.createGenericVirtualRegister(MergeEltTy));
@@ -283,6 +344,7 @@ public:
} else {
Builder.buildUnmerge(DstRegs, MergeI->getOperand(Idx + 1).getReg());
}
+ UpdatedDefs.append(DstRegs.begin(), DstRegs.end());
}
} else if (NumMergeRegs > NumDefs) {
@@ -304,31 +366,40 @@ public:
++j, ++Idx)
Regs.push_back(MergeI->getOperand(Idx).getReg());
- Builder.buildMerge(MI.getOperand(DefIdx).getReg(), Regs);
+ Register DefReg = MI.getOperand(DefIdx).getReg();
+ Builder.buildMerge(DefReg, Regs);
+ UpdatedDefs.push_back(DefReg);
}
} else {
LLT MergeSrcTy = MRI.getType(MergeI->getOperand(1).getReg());
+
+ if (!ConvertOp && DestTy != MergeSrcTy)
+ ConvertOp = TargetOpcode::G_BITCAST;
+
if (ConvertOp) {
Builder.setInstr(MI);
for (unsigned Idx = 0; Idx < NumDefs; ++Idx) {
Register MergeSrc = MergeI->getOperand(Idx + 1).getReg();
- Builder.buildInstr(ConvertOp, {MI.getOperand(Idx).getReg()},
- {MergeSrc});
+ Register DefReg = MI.getOperand(Idx).getReg();
+ Builder.buildInstr(ConvertOp, {DefReg}, {MergeSrc});
+ UpdatedDefs.push_back(DefReg);
}
markInstAndDefDead(MI, *MergeI, DeadInsts);
return true;
}
- // FIXME: is a COPY appropriate if the types mismatch? We know both
- // registers are allocatable by now.
- if (DestTy != MergeSrcTy)
- return false;
- for (unsigned Idx = 0; Idx < NumDefs; ++Idx)
- MRI.replaceRegWith(MI.getOperand(Idx).getReg(),
- MergeI->getOperand(Idx + 1).getReg());
+ assert(DestTy == MergeSrcTy &&
+ "Bitcast and the other kinds of conversions should "
+ "have happened earlier");
+
+ for (unsigned Idx = 0; Idx < NumDefs; ++Idx) {
+ Register NewDef = MergeI->getOperand(Idx + 1).getReg();
+ MRI.replaceRegWith(MI.getOperand(Idx).getReg(), NewDef);
+ UpdatedDefs.push_back(NewDef);
+ }
}
markInstAndDefDead(MI, *MergeI, DeadInsts);
@@ -347,7 +418,8 @@ public:
}
bool tryCombineExtract(MachineInstr &MI,
- SmallVectorImpl<MachineInstr *> &DeadInsts) {
+ SmallVectorImpl<MachineInstr *> &DeadInsts,
+ SmallVectorImpl<Register> &UpdatedDefs) {
assert(MI.getOpcode() == TargetOpcode::G_EXTRACT);
// Try to use the source registers from a G_MERGE_VALUES
@@ -362,13 +434,14 @@ public:
// for N >= %2.getSizeInBits() / 2
// %3 = G_EXTRACT %1, (N - %0.getSizeInBits()
- unsigned Src = lookThroughCopyInstrs(MI.getOperand(1).getReg());
- MachineInstr *MergeI = MRI.getVRegDef(Src);
+ Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg());
+ MachineInstr *MergeI = MRI.getVRegDef(SrcReg);
if (!MergeI || !isMergeLikeOpcode(MergeI->getOpcode()))
return false;
- LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
- LLT SrcTy = MRI.getType(Src);
+ Register DstReg = MI.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ LLT SrcTy = MRI.getType(SrcReg);
// TODO: Do we need to check if the resulting extract is supported?
unsigned ExtractDstSize = DstTy.getSizeInBits();
@@ -386,10 +459,9 @@ public:
// TODO: We could modify MI in place in most cases.
Builder.setInstr(MI);
- Builder.buildExtract(
- MI.getOperand(0).getReg(),
- MergeI->getOperand(MergeSrcIdx + 1).getReg(),
- Offset - MergeSrcIdx * MergeSrcSize);
+ Builder.buildExtract(DstReg, MergeI->getOperand(MergeSrcIdx + 1).getReg(),
+ Offset - MergeSrcIdx * MergeSrcSize);
+ UpdatedDefs.push_back(DstReg);
markInstAndDefDead(MI, *MergeI, DeadInsts);
return true;
}
@@ -406,30 +478,79 @@ public:
// etc, process the dead instructions now if any.
if (!DeadInsts.empty())
deleteMarkedDeadInsts(DeadInsts, WrapperObserver);
+
+ // Put here every vreg that was redefined in such a way that it's at least
+ // possible that one (or more) of its users (immediate or COPY-separated)
+ // could become artifact combinable with the new definition (or the
+ // instruction reachable from it through a chain of copies if any).
+ SmallVector<Register, 4> UpdatedDefs;
+ bool Changed = false;
switch (MI.getOpcode()) {
default:
return false;
case TargetOpcode::G_ANYEXT:
- return tryCombineAnyExt(MI, DeadInsts);
+ Changed = tryCombineAnyExt(MI, DeadInsts, UpdatedDefs);
+ break;
case TargetOpcode::G_ZEXT:
- return tryCombineZExt(MI, DeadInsts);
+ Changed = tryCombineZExt(MI, DeadInsts, UpdatedDefs);
+ break;
case TargetOpcode::G_SEXT:
- return tryCombineSExt(MI, DeadInsts);
+ Changed = tryCombineSExt(MI, DeadInsts, UpdatedDefs);
+ break;
case TargetOpcode::G_UNMERGE_VALUES:
- return tryCombineMerges(MI, DeadInsts);
+ Changed = tryCombineMerges(MI, DeadInsts, UpdatedDefs);
+ break;
case TargetOpcode::G_EXTRACT:
- return tryCombineExtract(MI, DeadInsts);
- case TargetOpcode::G_TRUNC: {
- bool Changed = false;
- for (auto &Use : MRI.use_instructions(MI.getOperand(0).getReg()))
- Changed |= tryCombineInstruction(Use, DeadInsts, WrapperObserver);
- return Changed;
+ Changed = tryCombineExtract(MI, DeadInsts, UpdatedDefs);
+ break;
+ case TargetOpcode::G_TRUNC:
+ Changed = tryCombineTrunc(MI, DeadInsts, UpdatedDefs);
+ if (!Changed) {
+ // Try to combine truncates away even if they are legal. As all artifact
+ // combines at the moment look only "up" the def-use chains, we achieve
+ // that by throwing truncates' users (with look through copies) into the
+ // ArtifactList again.
+ UpdatedDefs.push_back(MI.getOperand(0).getReg());
+ }
+ break;
}
+ // If the main loop through the ArtifactList found at least one combinable
+ // pair of artifacts, not only combine it away (as done above), but also
+ // follow the def-use chain from there to combine everything that can be
+ // combined within this def-use chain of artifacts.
+ while (!UpdatedDefs.empty()) {
+ Register NewDef = UpdatedDefs.pop_back_val();
+ assert(NewDef.isVirtual() && "Unexpected redefinition of a physreg");
+ for (MachineInstr &Use : MRI.use_instructions(NewDef)) {
+ switch (Use.getOpcode()) {
+ // Keep this list in sync with the list of all artifact combines.
+ case TargetOpcode::G_ANYEXT:
+ case TargetOpcode::G_ZEXT:
+ case TargetOpcode::G_SEXT:
+ case TargetOpcode::G_UNMERGE_VALUES:
+ case TargetOpcode::G_EXTRACT:
+ case TargetOpcode::G_TRUNC:
+ // Adding Use to ArtifactList.
+ WrapperObserver.changedInstr(Use);
+ break;
+ case TargetOpcode::COPY: {
+ Register Copy = Use.getOperand(0).getReg();
+ if (Copy.isVirtual())
+ UpdatedDefs.push_back(Copy);
+ break;
+ }
+ default:
+ // If we do not have an artifact combine for the opcode, there is no
+ // point in adding it to the ArtifactList as nothing interesting will
+ // be done to it anyway.
+ break;
+ }
+ }
}
+ return Changed;
}
private:
-
static unsigned getArtifactSrcReg(const MachineInstr &MI) {
switch (MI.getOpcode()) {
case TargetOpcode::COPY:
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/Legalizer.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/Legalizer.h
index 13cf3f7e694d..07173b9719bd 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/Legalizer.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/Legalizer.h
@@ -31,8 +31,12 @@ class Legalizer : public MachineFunctionPass {
public:
static char ID;
-private:
+ struct MFResult {
+ bool Changed;
+ const MachineInstr *FailedOn;
+ };
+private:
/// Initialize the field members using \p MF.
void init(MachineFunction &MF);
@@ -55,14 +59,19 @@ public:
}
MachineFunctionProperties getClearedProperties() const override {
- return MachineFunctionProperties()
- .set(MachineFunctionProperties::Property::NoPHIs);
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::NoPHIs);
}
bool combineExtracts(MachineInstr &MI, MachineRegisterInfo &MRI,
const TargetInstrInfo &TII);
bool runOnMachineFunction(MachineFunction &MF) override;
+
+ static MFResult
+ legalizeMachineFunction(MachineFunction &MF, const LegalizerInfo &LI,
+ ArrayRef<GISelChangeObserver *> AuxObservers,
+ MachineIRBuilder &MIRBuilder);
};
} // End namespace llvm.
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
index fbfe71255a38..ac7e5cbbdaa9 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
@@ -231,12 +231,16 @@ public:
LegalizeResult lowerFCopySign(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
LegalizeResult lowerFMinNumMaxNum(MachineInstr &MI);
LegalizeResult lowerFMad(MachineInstr &MI);
+ LegalizeResult lowerIntrinsicRound(MachineInstr &MI);
LegalizeResult lowerUnmergeValues(MachineInstr &MI);
LegalizeResult lowerShuffleVector(MachineInstr &MI);
LegalizeResult lowerDynStackAlloc(MachineInstr &MI);
LegalizeResult lowerExtract(MachineInstr &MI);
LegalizeResult lowerInsert(MachineInstr &MI);
LegalizeResult lowerSADDO_SSUBO(MachineInstr &MI);
+ LegalizeResult lowerBswap(MachineInstr &MI);
+ LegalizeResult lowerBitreverse(MachineInstr &MI);
+ LegalizeResult lowerReadRegister(MachineInstr &MI);
private:
MachineRegisterInfo &MRI;
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
index 1cf62d1fde59..29f0d6575bac 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
@@ -22,8 +22,9 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/TargetOpcodes.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/LowLevelTypeImpl.h"
+#include "llvm/Support/raw_ostream.h"
#include <cassert>
#include <cstdint>
#include <tuple>
@@ -660,6 +661,7 @@ public:
/// The instruction is unsupported.
LegalizeRuleSet &unsupported() {
+ markAllIdxsAsCovered();
return actionIf(LegalizeAction::Unsupported, always);
}
LegalizeRuleSet &unsupportedIf(LegalityPredicate Predicate) {
@@ -1156,6 +1158,12 @@ public:
virtual bool legalizeIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &MIRBuilder) const;
+ /// Return the opcode (SEXT/ZEXT/ANYEXT) that should be performed while
+ /// widening a constant of type SmallTy which targets can override.
+ /// For eg, the DAG does (SmallTy.isByteSized() ? G_SEXT : G_ZEXT) which
+ /// will be the default.
+ virtual unsigned getExtOpcodeForWideningConstant(LLT SmallTy) const;
+
private:
/// Determine what action should be taken to legalize the given generic
/// instruction opcode, type-index and type. Requires computeTables to have
@@ -1178,7 +1186,7 @@ private:
/// {65, NarrowScalar} // bit sizes [65, +inf[
/// });
/// It may be that only 64-bit pointers are supported on your target:
- /// setPointerAction(G_GEP, 0, LLT:pointer(1),
+ /// setPointerAction(G_PTR_ADD, 0, LLT:pointer(1),
/// {{1, Unsupported}, // bit sizes [ 1, 63[
/// {64, Legal}, // bit sizes [64, 65[
/// {65, Unsupported}, // bit sizes [65, +inf[
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/Localizer.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/Localizer.h
index 06de5800b8b7..ad1904725dcd 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/Localizer.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/Localizer.h
@@ -42,6 +42,10 @@ public:
static char ID;
private:
+ /// An input function to decide if the pass should run or not
+ /// on the given MachineFunction.
+ std::function<bool(const MachineFunction &)> DoNotRunPass;
+
/// MRI contains all the register class/bank information that this
/// pass uses and updates.
MachineRegisterInfo *MRI;
@@ -72,6 +76,7 @@ private:
public:
Localizer();
+ Localizer(std::function<bool(const MachineFunction &)>);
StringRef getPassName() const override { return "Localizer"; }
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
index 416f9c19f794..072a1411de8a 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
@@ -404,10 +404,11 @@ public:
/// \return a MachineInstrBuilder for the newly created instruction.
MachineInstrBuilder buildGlobalValue(const DstOp &Res, const GlobalValue *GV);
- /// Build and insert \p Res = G_GEP \p Op0, \p Op1
+ /// Build and insert \p Res = G_PTR_ADD \p Op0, \p Op1
///
- /// G_GEP adds \p Op1 bytes to the pointer specified by \p Op0,
- /// storing the resulting pointer in \p Res.
+ /// G_PTR_ADD adds \p Op1 addressible units to the pointer specified by \p Op0,
+ /// storing the resulting pointer in \p Res. Addressible units are typically
+ /// bytes but this can vary between targets.
///
/// \pre setBasicBlock or setMI must have been called.
/// \pre \p Res and \p Op0 must be generic virtual registers with pointer
@@ -415,28 +416,28 @@ public:
/// \pre \p Op1 must be a generic virtual register with scalar type.
///
/// \return a MachineInstrBuilder for the newly created instruction.
- MachineInstrBuilder buildGEP(const DstOp &Res, const SrcOp &Op0,
- const SrcOp &Op1);
+ MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0,
+ const SrcOp &Op1);
- /// Materialize and insert \p Res = G_GEP \p Op0, (G_CONSTANT \p Value)
+ /// Materialize and insert \p Res = G_PTR_ADD \p Op0, (G_CONSTANT \p Value)
///
- /// G_GEP adds \p Value bytes to the pointer specified by \p Op0,
+ /// G_PTR_ADD adds \p Value bytes to the pointer specified by \p Op0,
/// storing the resulting pointer in \p Res. If \p Value is zero then no
- /// G_GEP or G_CONSTANT will be created and \pre Op0 will be assigned to
+ /// G_PTR_ADD or G_CONSTANT will be created and \pre Op0 will be assigned to
/// \p Res.
///
/// \pre setBasicBlock or setMI must have been called.
/// \pre \p Op0 must be a generic virtual register with pointer type.
/// \pre \p ValueTy must be a scalar type.
/// \pre \p Res must be 0. This is to detect confusion between
- /// materializeGEP() and buildGEP().
+ /// materializePtrAdd() and buildPtrAdd().
/// \post \p Res will either be a new generic virtual register of the same
/// type as \p Op0 or \p Op0 itself.
///
/// \return a MachineInstrBuilder for the newly created instruction.
- Optional<MachineInstrBuilder> materializeGEP(Register &Res, Register Op0,
- const LLT &ValueTy,
- uint64_t Value);
+ Optional<MachineInstrBuilder> materializePtrAdd(Register &Res, Register Op0,
+ const LLT &ValueTy,
+ uint64_t Value);
/// Build and insert \p Res = G_PTR_MASK \p Op0, \p NumBits
///
@@ -517,6 +518,13 @@ public:
/// \return The newly created instruction.
MachineInstrBuilder buildSExt(const DstOp &Res, const SrcOp &Op);
+ /// Build and insert \p Res = G_FPEXT \p Op
+ MachineInstrBuilder buildFPExt(const DstOp &Res, const SrcOp &Op,
+ Optional<unsigned> Flags = None) {
+ return buildInstr(TargetOpcode::G_FPEXT, {Res}, {Op}, Flags);
+ }
+
+
/// Build and insert a G_PTRTOINT instruction.
MachineInstrBuilder buildPtrToInt(const DstOp &Dst, const SrcOp &Src) {
return buildInstr(TargetOpcode::G_PTRTOINT, {Dst}, {Src});
@@ -867,7 +875,8 @@ public:
/// \pre \p Res must be smaller than \p Op
///
/// \return The newly created instruction.
- MachineInstrBuilder buildFPTrunc(const DstOp &Res, const SrcOp &Op);
+ MachineInstrBuilder buildFPTrunc(const DstOp &Res, const SrcOp &Op,
+ Optional<unsigned> FLags = None);
/// Build and insert \p Res = G_TRUNC \p Op
///
@@ -1374,8 +1383,9 @@ public:
/// Build and insert \p Res = G_FMA \p Op0, \p Op1, \p Op2
MachineInstrBuilder buildFMA(const DstOp &Dst, const SrcOp &Src0,
- const SrcOp &Src1, const SrcOp &Src2) {
- return buildInstr(TargetOpcode::G_FMA, {Dst}, {Src0, Src1, Src2});
+ const SrcOp &Src1, const SrcOp &Src2,
+ Optional<unsigned> Flags = None) {
+ return buildInstr(TargetOpcode::G_FMA, {Dst}, {Src0, Src1, Src2}, Flags);
}
/// Build and insert \p Res = G_FMAD \p Op0, \p Op1, \p Op2
@@ -1403,6 +1413,12 @@ public:
return buildInstr(TargetOpcode::G_FCANONICALIZE, {Dst}, {Src0}, Flags);
}
+ /// Build and insert \p Dst = G_INTRINSIC_TRUNC \p Src0
+ MachineInstrBuilder buildIntrinsicTrunc(const DstOp &Dst, const SrcOp &Src0,
+ Optional<unsigned> Flags = None) {
+ return buildInstr(TargetOpcode::G_INTRINSIC_TRUNC, {Dst}, {Src0}, Flags);
+ }
+
/// Build and insert \p Res = G_FCOPYSIGN \p Op0, \p Op1
MachineInstrBuilder buildFCopysign(const DstOp &Dst, const SrcOp &Src0,
const SrcOp &Src1) {
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/RegisterBankInfo.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/RegisterBankInfo.h
index e84b1c3ea8b1..8725d96efd82 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/RegisterBankInfo.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/RegisterBankInfo.h
@@ -20,6 +20,7 @@
#include "llvm/ADT/iterator_range.h"
#include "llvm/CodeGen/Register.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/LowLevelTypeImpl.h"
#include <cassert>
#include <initializer_list>
#include <memory>
@@ -543,7 +544,7 @@ public:
const RegisterBank *
getRegBankFromConstraints(const MachineInstr &MI, unsigned OpIdx,
const TargetInstrInfo &TII,
- const TargetRegisterInfo &TRI) const;
+ const MachineRegisterInfo &MRI) const;
/// Helper method to apply something that is like the default mapping.
/// Basically, that means that \p OpdMapper.getMI() is left untouched
@@ -599,7 +600,7 @@ public:
///
/// \todo This should be TableGen'ed.
virtual const RegisterBank &
- getRegBankFromRegClass(const TargetRegisterClass &RC) const {
+ getRegBankFromRegClass(const TargetRegisterClass &RC, LLT Ty) const {
llvm_unreachable("The target must override this method");
}
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/Utils.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
index 8af2853473c2..429d6db20e0c 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
@@ -168,10 +168,5 @@ inline bool isKnownNeverSNaN(Register Val, const MachineRegisterInfo &MRI) {
return isKnownNeverNaN(Val, MRI, true);
}
-/// Get a rough equivalent of an MVT for a given LLT.
-MVT getMVTForLLT(LLT Ty);
-/// Get a rough equivalent of an LLT for a given MVT.
-LLT getLLTForMVT(MVT Ty);
-
} // End namespace llvm.
#endif
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/ISDOpcodes.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/ISDOpcodes.h
index 658ad31fa2a6..06140fae8790 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/ISDOpcodes.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/ISDOpcodes.h
@@ -13,6 +13,8 @@
#ifndef LLVM_CODEGEN_ISDOPCODES_H
#define LLVM_CODEGEN_ISDOPCODES_H
+#include "llvm/CodeGen/ValueTypes.h"
+
namespace llvm {
/// ISD namespace - This namespace contains an enum which represents all of the
@@ -283,6 +285,12 @@ namespace ISD {
/// bits of the first 2 operands.
SMULFIXSAT, UMULFIXSAT,
+ /// RESULT = [US]DIVFIX(LHS, RHS, SCALE) - Perform fixed point division on
+ /// 2 integers with the same width and scale. SCALE represents the scale
+ /// of both operands as fixed point numbers. This SCALE parameter must be a
+ /// constant integer.
+ SDIVFIX, UDIVFIX,
+
/// Simple binary floating point operators.
FADD, FSUB, FMUL, FDIV, FREM,
@@ -302,6 +310,7 @@ namespace ISD {
STRICT_FRINT, STRICT_FNEARBYINT, STRICT_FMAXNUM, STRICT_FMINNUM,
STRICT_FCEIL, STRICT_FFLOOR, STRICT_FROUND, STRICT_FTRUNC,
STRICT_LROUND, STRICT_LLROUND, STRICT_LRINT, STRICT_LLRINT,
+ STRICT_FMAXIMUM, STRICT_FMINIMUM,
/// STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or
/// unsigned integer. These have the same semantics as fptosi and fptoui
@@ -310,6 +319,13 @@ namespace ISD {
STRICT_FP_TO_SINT,
STRICT_FP_TO_UINT,
+ /// STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to
+ /// a floating point value. These have the same semantics as sitofp and
+ /// uitofp in IR.
+ /// They are used to limit optimizations while the DAG is being optimized.
+ STRICT_SINT_TO_FP,
+ STRICT_UINT_TO_FP,
+
/// X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating
/// point type down to the precision of the destination VT. TRUNC is a
/// flag, which is always an integer that is zero or one. If TRUNC is 0,
@@ -330,6 +346,12 @@ namespace ISD {
/// It is used to limit optimizations while the DAG is being optimized.
STRICT_FP_EXTEND,
+ /// STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used
+ /// for floating-point operands only. STRICT_FSETCC performs a quiet
+ /// comparison operation, while STRICT_FSETCCS performs a signaling
+ /// comparison operation.
+ STRICT_FSETCC, STRICT_FSETCCS,
+
/// FMA - Perform a * b + c with no intermediate rounding step.
FMA,
@@ -921,11 +943,16 @@ namespace ISD {
BUILTIN_OP_END
};
+ /// FIRST_TARGET_STRICTFP_OPCODE - Target-specific pre-isel operations
+ /// which cannot raise FP exceptions should be less than this value.
+ /// Those that do must not be less than this value.
+ static const int FIRST_TARGET_STRICTFP_OPCODE = BUILTIN_OP_END+400;
+
/// FIRST_TARGET_MEMORY_OPCODE - Target-specific pre-isel operations
/// which do not reference a specific memory location should be less than
/// this value. Those that do must not be less than this value, and can
/// be used with SelectionDAG::getMemIntrinsicNode.
- static const int FIRST_TARGET_MEMORY_OPCODE = BUILTIN_OP_END+400;
+ static const int FIRST_TARGET_MEMORY_OPCODE = BUILTIN_OP_END+500;
//===--------------------------------------------------------------------===//
/// MemIndexedMode enum - This enum defines the load / store indexed
@@ -1076,7 +1103,17 @@ namespace ISD {
/// Return the operation corresponding to !(X op Y), where 'op' is a valid
/// SetCC operation.
- CondCode getSetCCInverse(CondCode Operation, bool isInteger);
+ CondCode getSetCCInverse(CondCode Operation, EVT Type);
+
+ namespace GlobalISel {
+ /// Return the operation corresponding to !(X op Y), where 'op' is a valid
+ /// SetCC operation. The U bit of the condition code has different meanings
+ /// between floating point and integer comparisons and LLT's don't provide
+ /// this distinction. As such we need to be told whether the comparison is
+ /// floating point or integer-like. Pointers should use integer-like
+ /// comparisons.
+ CondCode getSetCCInverse(CondCode Operation, bool isIntegerLike);
+ } // end namespace GlobalISel
/// Return the operation corresponding to (Y op X) when given the operation
/// for (X op Y).
@@ -1085,12 +1122,12 @@ namespace ISD {
/// Return the result of a logical OR between different comparisons of
/// identical values: ((X op1 Y) | (X op2 Y)). This function returns
/// SETCC_INVALID if it is not possible to represent the resultant comparison.
- CondCode getSetCCOrOperation(CondCode Op1, CondCode Op2, bool isInteger);
+ CondCode getSetCCOrOperation(CondCode Op1, CondCode Op2, EVT Type);
/// Return the result of a logical AND between different comparisons of
/// identical values: ((X op1 Y) & (X op2 Y)). This function returns
/// SETCC_INVALID if it is not possible to represent the resultant comparison.
- CondCode getSetCCAndOperation(CondCode Op1, CondCode Op2, bool isInteger);
+ CondCode getSetCCAndOperation(CondCode Op1, CondCode Op2, EVT Type);
} // end llvm::ISD namespace
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/LiveInterval.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/LiveInterval.h
index 290a2381d9c9..fe5adb59dac2 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/LiveInterval.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/LiveInterval.h
@@ -836,10 +836,35 @@ namespace llvm {
/// don't defne the related lane masks after they get shrunk. E.g.,
/// when L000F gets split into L0007 and L0008 maybe only a subset
/// of the VNIs that defined L000F defines L0007.
+ ///
+ /// The clean up of the VNIs need to look at the actual instructions
+ /// to decide what is or is not live at a definition point. If the
+ /// update of the subranges occurs while the IR does not reflect these
+ /// changes, \p ComposeSubRegIdx can be used to specify how the
+ /// definition are going to be rewritten.
+ /// E.g., let say we want to merge:
+ /// V1.sub1:<2 x s32> = COPY V2.sub3:<4 x s32>
+ /// We do that by choosing a class where sub1:<2 x s32> and sub3:<4 x s32>
+ /// overlap, i.e., by choosing a class where we can find "offset + 1 == 3".
+ /// Put differently we align V2's sub3 with V1's sub1:
+ /// V2: sub0 sub1 sub2 sub3
+ /// V1: <offset> sub0 sub1
+ ///
+ /// This offset will look like a composed subregidx in the the class:
+ /// V1.(composed sub2 with sub1):<4 x s32> = COPY V2.sub3:<4 x s32>
+ /// => V1.(composed sub2 with sub1):<4 x s32> = COPY V2.sub3:<4 x s32>
+ ///
+ /// Now if we didn't rewrite the uses and def of V1, all the checks for V1
+ /// need to account for this offset.
+ /// This happens during coalescing where we update the live-ranges while
+ /// still having the old IR around because updating the IR on-the-fly
+ /// would actually clobber some information on how the live-ranges that
+ /// are being updated look like.
void refineSubRanges(BumpPtrAllocator &Allocator, LaneBitmask LaneMask,
std::function<void(LiveInterval::SubRange &)> Apply,
const SlotIndexes &Indexes,
- const TargetRegisterInfo &TRI);
+ const TargetRegisterInfo &TRI,
+ unsigned ComposeSubRegIdx = 0);
bool operator<(const LiveInterval& other) const {
const SlotIndex &thisIndex = beginIndex();
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/LiveIntervalUnion.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/LiveIntervalUnion.h
index 05506d2c3bc6..c555763a4ec2 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/LiveIntervalUnion.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/LiveIntervalUnion.h
@@ -75,6 +75,7 @@ public:
bool empty() const { return Segments.empty(); }
SlotIndex startIndex() const { return Segments.start(); }
+ SlotIndex endIndex() const { return Segments.stop(); }
// Provide public access to the underlying map to allow overlap iteration.
using Map = LiveSegments;
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/LiveIntervals.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/LiveIntervals.h
index 888d72b87bd1..2bfc99624937 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/LiveIntervals.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/LiveIntervals.h
@@ -469,7 +469,7 @@ class VirtRegMap;
void computeLiveInRegUnits();
void computeRegUnitRange(LiveRange&, unsigned Unit);
- void computeVirtRegInterval(LiveInterval&);
+ bool computeVirtRegInterval(LiveInterval&);
using ShrinkToUsesWorkList = SmallVector<std::pair<SlotIndex, VNInfo*>, 16>;
void extendSegmentsToUses(LiveRange &Segments,
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/LivePhysRegs.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/LivePhysRegs.h
index 50da0b3d5c48..085893462a08 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/LivePhysRegs.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/LivePhysRegs.h
@@ -137,6 +137,9 @@ public:
/// Live out registers are the union of the live-in registers of the successor
/// blocks and pristine registers. Live out registers of the end block are the
/// callee saved registers.
+ /// If a register is not added by this method, it is guaranteed to not be
+ /// live out from MBB, although a sub-register may be. This is true
+ /// both before and after regalloc.
void addLiveOuts(const MachineBasicBlock &MBB);
/// Adds all live-out registers of basic block \p MBB but skips pristine
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/LiveRegUnits.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/LiveRegUnits.h
index 314afad92970..1ed091e3bb5e 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/LiveRegUnits.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/LiveRegUnits.h
@@ -160,6 +160,19 @@ private:
void addPristines(const MachineFunction &MF);
};
+/// Returns an iterator range over all physical register and mask operands for
+/// \p MI and bundled instructions. This also skips any debug operands.
+inline iterator_range<filter_iterator<
+ ConstMIBundleOperands, std::function<bool(const MachineOperand &)>>>
+phys_regs_and_masks(const MachineInstr &MI) {
+ std::function<bool(const MachineOperand &)> Pred =
+ [](const MachineOperand &MOP) {
+ return MOP.isRegMask() || (MOP.isReg() && !MOP.isDebug() &&
+ Register::isPhysicalRegister(MOP.getReg()));
+ };
+ return make_filter_range(const_mi_bundle_ops(MI), Pred);
+}
+
} // end namespace llvm
#endif // LLVM_CODEGEN_LIVEREGUNITS_H
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/LiveStacks.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/LiveStacks.h
index 7c4c64d515df..1cbdb8bd86bd 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/LiveStacks.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/LiveStacks.h
@@ -17,6 +17,7 @@
#include "llvm/CodeGen/LiveInterval.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include <cassert>
#include <map>
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/LiveVariables.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/LiveVariables.h
index 71de306e2942..7b45f7d76af5 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/LiveVariables.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/LiveVariables.h
@@ -36,6 +36,7 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/InitializePasses.h"
namespace llvm {
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/LowLevelType.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/LowLevelType.h
index 687233e4e168..6295d86f749c 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/LowLevelType.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/LowLevelType.h
@@ -17,6 +17,7 @@
#define LLVM_CODEGEN_LOWLEVELTYPE_H
#include "llvm/Support/LowLevelTypeImpl.h"
+#include "llvm/Support/MachineValueType.h"
namespace llvm {
@@ -26,6 +27,14 @@ class Type;
/// Construct a low-level type based on an LLVM type.
LLT getLLTForType(Type &Ty, const DataLayout &DL);
+/// Get a rough equivalent of an MVT for a given LLT. MVT can't distinguish
+/// pointers, so these will convert to a plain integer.
+MVT getMVTForLLT(LLT Ty);
+
+/// Get a rough equivalent of an LLT for a given MVT. LLT does not yet support
+/// scalarable vector types, and will assert if used.
+LLT getLLTForMVT(MVT Ty);
+
}
#endif // LLVM_CODEGEN_LOWLEVELTYPE_H
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/MIRFormatter.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/MIRFormatter.h
new file mode 100644
index 000000000000..e57c32c5ae61
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/MIRFormatter.h
@@ -0,0 +1,83 @@
+//===-- llvm/CodeGen/MIRFormatter.h -----------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the MIRFormatter class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_MIRFORMATTER_H
+#define LLVM_CODEGEN_MIRFORMATTER_H
+
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+
+namespace llvm {
+
+struct PerFunctionMIParsingState;
+struct SlotMapping;
+
+/// MIRFormater - Interface to format MIR operand based on target
+class MIRFormatter {
+public:
+ typedef function_ref<bool(StringRef::iterator Loc, const Twine &)>
+ ErrorCallbackType;
+
+ MIRFormatter() {}
+ virtual ~MIRFormatter() = default;
+
+ /// Implement target specific printing for machine operand immediate value, so
+ /// that we can have more meaningful mnemonic than a 64-bit integer. Passing
+ /// None to OpIdx means the index is unknown.
+ virtual void printImm(raw_ostream &OS, const MachineInstr &MI,
+ Optional<unsigned> OpIdx, int64_t Imm) const {
+ OS << Imm;
+ }
+
+ /// Implement target specific parsing of immediate mnemonics. The mnemonic is
+ /// dot seperated strings.
+ virtual bool parseImmMnemonic(const unsigned OpCode, const unsigned OpIdx,
+ StringRef Src, int64_t &Imm,
+ ErrorCallbackType ErrorCallback) const {
+ llvm_unreachable("target did not implement parsing MIR immediate mnemonic");
+ }
+
+ /// Implement target specific printing of target custom pseudo source value.
+ /// Default implementation is not necessarily the correct MIR serialization
+ /// format.
+ virtual void
+ printCustomPseudoSourceValue(raw_ostream &OS, ModuleSlotTracker &MST,
+ const PseudoSourceValue &PSV) const {
+ PSV.printCustom(OS);
+ }
+
+ /// Implement target specific parsing of target custom pseudo source value.
+ virtual bool parseCustomPseudoSourceValue(
+ StringRef Src, MachineFunction &MF, PerFunctionMIParsingState &PFS,
+ const PseudoSourceValue *&PSV, ErrorCallbackType ErrorCallback) const {
+ llvm_unreachable(
+ "target did not implement parsing MIR custom pseudo source value");
+ }
+
+ /// Helper functions to print IR value as MIR serialization format which will
+ /// be useful for target specific printer, e.g. for printing IR value in
+ /// custom pseudo source value.
+ static void printIRValue(raw_ostream &OS, const Value &V,
+ ModuleSlotTracker &MST);
+
+ /// Helper functions to parse IR value from MIR serialization format which
+ /// will be useful for target specific parser, e.g. for parsing IR value for
+ /// custom pseudo source value.
+ static bool parseIRValue(StringRef Src, MachineFunction &MF,
+ PerFunctionMIParsingState &PFS, const Value *&V,
+ ErrorCallbackType ErrorCallback);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/MIRParser/MIParser.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/MIRParser/MIParser.h
index 4e32a04551c1..8ca665b23b28 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/MIRParser/MIParser.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/MIRParser/MIParser.h
@@ -171,12 +171,16 @@ struct PerFunctionMIParsingState {
DenseMap<unsigned, unsigned> ConstantPoolSlots;
DenseMap<unsigned, unsigned> JumpTableSlots;
+ /// Maps from slot numbers to function's unnamed values.
+ DenseMap<unsigned, const Value *> Slots2Values;
+
PerFunctionMIParsingState(MachineFunction &MF, SourceMgr &SM,
const SlotMapping &IRSlots,
PerTargetMIParsingState &Target);
VRegInfo &getVRegInfo(unsigned Num);
VRegInfo &getVRegInfoNamed(StringRef RegName);
+ const Value *getIRValue(unsigned Slot);
};
/// Parse the machine basic block definitions, and skip the machine
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/MIRParser/MIRParser.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/MIRParser/MIRParser.h
index 6a04e48e533c..385baea0446f 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/MIRParser/MIRParser.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/MIRParser/MIRParser.h
@@ -23,10 +23,11 @@
namespace llvm {
-class StringRef;
+class Function;
class MIRParserImpl;
class MachineModuleInfo;
class SMDiagnostic;
+class StringRef;
/// This class initializes machine functions by applying the state loaded from
/// a MIR file.
@@ -60,9 +61,11 @@ public:
/// \param Filename - The name of the file to parse.
/// \param Error - Error result info.
/// \param Context - Context which will be used for the parsed LLVM IR module.
-std::unique_ptr<MIRParser> createMIRParserFromFile(StringRef Filename,
- SMDiagnostic &Error,
- LLVMContext &Context);
+/// \param ProcessIRFunction - function to run on every IR function or stub
+/// loaded from the MIR file.
+std::unique_ptr<MIRParser> createMIRParserFromFile(
+ StringRef Filename, SMDiagnostic &Error, LLVMContext &Context,
+ std::function<void(Function &)> ProcessIRFunction = nullptr);
/// This function is another interface to the MIR serialization format parser.
///
@@ -73,7 +76,8 @@ std::unique_ptr<MIRParser> createMIRParserFromFile(StringRef Filename,
/// \param Contents - The MemoryBuffer containing the machine level IR.
/// \param Context - Context which will be used for the parsed LLVM IR module.
std::unique_ptr<MIRParser>
-createMIRParser(std::unique_ptr<MemoryBuffer> Contents, LLVMContext &Context);
+createMIRParser(std::unique_ptr<MemoryBuffer> Contents, LLVMContext &Context,
+ std::function<void(Function &)> ProcessIRFunction = nullptr);
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineBlockFrequencyInfo.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineBlockFrequencyInfo.h
index a438ecfcc25e..2a826d0b64c0 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineBlockFrequencyInfo.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineBlockFrequencyInfo.h
@@ -38,6 +38,9 @@ public:
static char ID;
MachineBlockFrequencyInfo();
+ explicit MachineBlockFrequencyInfo(MachineFunction &F,
+ MachineBranchProbabilityInfo &MBPI,
+ MachineLoopInfo &MLI);
~MachineBlockFrequencyInfo() override;
void getAnalysisUsage(AnalysisUsage &AU) const override;
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineBranchProbabilityInfo.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineBranchProbabilityInfo.h
index 2b9b2030eb97..cde3bc08692b 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineBranchProbabilityInfo.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineBranchProbabilityInfo.h
@@ -35,10 +35,7 @@ class MachineBranchProbabilityInfo : public ImmutablePass {
public:
static char ID;
- MachineBranchProbabilityInfo() : ImmutablePass(ID) {
- PassRegistry &Registry = *PassRegistry::getPassRegistry();
- initializeMachineBranchProbabilityInfoPass(Registry);
- }
+ MachineBranchProbabilityInfo();
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesAll();
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineCombinerPattern.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineCombinerPattern.h
index 503227222207..149fe043d1f5 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineCombinerPattern.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineCombinerPattern.h
@@ -38,6 +38,51 @@ enum class MachineCombinerPattern {
MULSUBX_OP2,
MULADDXI_OP1,
MULSUBXI_OP1,
+ // NEON integers vectors
+ MULADDv8i8_OP1,
+ MULADDv8i8_OP2,
+ MULADDv16i8_OP1,
+ MULADDv16i8_OP2,
+ MULADDv4i16_OP1,
+ MULADDv4i16_OP2,
+ MULADDv8i16_OP1,
+ MULADDv8i16_OP2,
+ MULADDv2i32_OP1,
+ MULADDv2i32_OP2,
+ MULADDv4i32_OP1,
+ MULADDv4i32_OP2,
+
+ MULSUBv8i8_OP1,
+ MULSUBv8i8_OP2,
+ MULSUBv16i8_OP1,
+ MULSUBv16i8_OP2,
+ MULSUBv4i16_OP1,
+ MULSUBv4i16_OP2,
+ MULSUBv8i16_OP1,
+ MULSUBv8i16_OP2,
+ MULSUBv2i32_OP1,
+ MULSUBv2i32_OP2,
+ MULSUBv4i32_OP1,
+ MULSUBv4i32_OP2,
+
+ MULADDv4i16_indexed_OP1,
+ MULADDv4i16_indexed_OP2,
+ MULADDv8i16_indexed_OP1,
+ MULADDv8i16_indexed_OP2,
+ MULADDv2i32_indexed_OP1,
+ MULADDv2i32_indexed_OP2,
+ MULADDv4i32_indexed_OP1,
+ MULADDv4i32_indexed_OP2,
+
+ MULSUBv4i16_indexed_OP1,
+ MULSUBv4i16_indexed_OP2,
+ MULSUBv8i16_indexed_OP1,
+ MULSUBv8i16_indexed_OP2,
+ MULSUBv2i32_indexed_OP1,
+ MULSUBv2i32_indexed_OP2,
+ MULSUBv4i32_indexed_OP1,
+ MULSUBv4i32_indexed_OP2,
+
// Floating Point
FMULADDH_OP1,
FMULADDH_OP2,
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineDominators.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineDominators.h
index e4d7a02f8c48..9d31232c9b95 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineDominators.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineDominators.h
@@ -81,6 +81,9 @@ public:
static char ID; // Pass ID, replacement for typeid
MachineDominatorTree();
+ explicit MachineDominatorTree(MachineFunction &MF) : MachineFunctionPass(ID) {
+ calculate(MF);
+ }
DomTreeT &getBase() {
if (!DT) DT.reset(new DomTreeT());
@@ -111,6 +114,8 @@ public:
bool runOnMachineFunction(MachineFunction &F) override;
+ void calculate(MachineFunction &F);
+
bool dominates(const MachineDomTreeNode *A,
const MachineDomTreeNode *B) const {
applySplitCriticalEdges();
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineFrameInfo.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineFrameInfo.h
index 01fc50d14a7f..05b34d92651c 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineFrameInfo.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineFrameInfo.h
@@ -553,7 +553,7 @@ public:
void setStackSize(uint64_t Size) { StackSize = Size; }
/// Estimate and return the size of the stack frame.
- unsigned estimateStackSize(const MachineFunction &MF) const;
+ uint64_t estimateStackSize(const MachineFunction &MF) const;
/// Return the correction for frame offsets.
int getOffsetAdjustment() const { return OffsetAdjustment; }
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineFunction.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineFunction.h
index fadf79569cdc..7f4a3a8c2f97 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineFunction.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineFunction.h
@@ -20,6 +20,7 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/FloatingPointMode.h"
#include "llvm/ADT/GraphTraits.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallVector.h"
@@ -36,7 +37,6 @@
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Recycler.h"
-#include "llvm/Target/TargetMachine.h"
#include <cassert>
#include <cstdint>
#include <memory>
@@ -304,6 +304,10 @@ class MachineFunction {
/// by debug and exception handling consumers.
std::vector<MCCFIInstruction> FrameInstructions;
+ /// List of basic blocks immediately following calls to _setjmp. Used to
+ /// construct a table of valid longjmp targets for Windows Control Flow Guard.
+ std::vector<MCSymbol *> LongjmpTargets;
+
/// \name Exception Handling
/// \{
@@ -322,10 +326,6 @@ class MachineFunction {
/// CodeView label annotations.
std::vector<std::pair<MCSymbol *, MDNode *>> CodeViewAnnotations;
- /// CodeView heapallocsites.
- std::vector<std::tuple<MCSymbol *, MCSymbol *, const DIType *>>
- CodeViewHeapAllocSites;
-
bool CallsEHReturn = false;
bool CallsUnwindInit = false;
bool HasEHScopes = false;
@@ -403,14 +403,7 @@ private:
/// A helper function that returns call site info for a give call
/// instruction if debug entry value support is enabled.
- CallSiteInfoMap::iterator getCallSiteInfo(const MachineInstr *MI) {
- assert(MI->isCall() &&
- "Call site info refers only to call instructions!");
-
- if (!Target.Options.EnableDebugEntryValues)
- return CallSitesInfo.end();
- return CallSitesInfo.find(MI);
- }
+ CallSiteInfoMap::iterator getCallSiteInfo(const MachineInstr *MI);
// Callbacks for insertion and removal.
void handleInsertion(MachineInstr &MI);
@@ -560,6 +553,9 @@ public:
}
void setHasWinCFI(bool v) { HasWinCFI = v; }
+ /// True if this function needs frame moves for debug or exceptions.
+ bool needsFrameMoves() const;
+
/// Get the function properties
const MachineFunctionProperties &getProperties() const { return Properties; }
MachineFunctionProperties &getProperties() { return Properties; }
@@ -579,6 +575,10 @@ public:
return const_cast<MachineFunction*>(this)->getInfo<Ty>();
}
+ /// Returns the denormal handling type for the default rounding mode of the
+ /// function.
+ DenormalMode getDenormalMode(const fltSemantics &FPType) const;
+
/// getBlockNumbered - MachineBasicBlocks are automatically numbered when they
/// are inserted into the machine function. The block number for a machine
/// basic block can be found by using the MBB::getNumber method, this method
@@ -796,18 +796,15 @@ public:
/// Allocate and initialize a register mask with @p NumRegister bits.
uint32_t *allocateRegMask();
+ ArrayRef<int> allocateShuffleMask(ArrayRef<int> Mask);
+
/// Allocate and construct an extra info structure for a `MachineInstr`.
///
/// This is allocated on the function's allocator and so lives the life of
/// the function.
- MachineInstr::ExtraInfo *
- createMIExtraInfo(ArrayRef<MachineMemOperand *> MMOs,
- MCSymbol *PreInstrSymbol = nullptr,
- MCSymbol *PostInstrSymbol = nullptr);
-
- MachineInstr::ExtraInfo *createMIExtraInfoWithMarker(
- ArrayRef<MachineMemOperand *> MMOs, MCSymbol *PreInstrSymbol,
- MCSymbol *PostInstrSymbol, MDNode *HeapAllocMarker);
+ MachineInstr::ExtraInfo *createMIExtraInfo(
+ ArrayRef<MachineMemOperand *> MMOs, MCSymbol *PreInstrSymbol = nullptr,
+ MCSymbol *PostInstrSymbol = nullptr, MDNode *HeapAllocMarker = nullptr);
/// Allocate a string and populate it with the given external symbol name.
const char *createExternalSymbolName(StringRef Name);
@@ -834,6 +831,17 @@ public:
LLVM_NODISCARD unsigned addFrameInst(const MCCFIInstruction &Inst);
+ /// Returns a reference to a list of symbols immediately following calls to
+ /// _setjmp in the function. Used to construct the longjmp target table used
+ /// by Windows Control Flow Guard.
+ const std::vector<MCSymbol *> &getLongjmpTargets() const {
+ return LongjmpTargets;
+ }
+
+ /// Add the specified symbol to the list of valid longjmp targets for Windows
+ /// Control Flow Guard.
+ void addLongjmpTarget(MCSymbol *Target) { LongjmpTargets.push_back(Target); }
+
/// \name Exception Handling
/// \{
@@ -951,14 +959,6 @@ public:
return CodeViewAnnotations;
}
- /// Record heapallocsites
- void addCodeViewHeapAllocSite(MachineInstr *I, const MDNode *MD);
-
- ArrayRef<std::tuple<MCSymbol *, MCSymbol *, const DIType *>>
- getCodeViewHeapAllocSites() const {
- return CodeViewHeapAllocSites;
- }
-
/// Return a reference to the C++ typeinfo for the current function.
const std::vector<const GlobalValue *> &getTypeInfos() const {
return TypeInfos;
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineInstr.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineInstr.h
index 76d75e8064f8..6d4ab3b2a2a5 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineInstr.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineInstr.h
@@ -104,8 +104,8 @@ public:
// no signed wrap.
IsExact = 1 << 13, // Instruction supports division is
// known to be exact.
- FPExcept = 1 << 14, // Instruction may raise floating-point
- // exceptions.
+ NoFPExcept = 1 << 14, // Instruction does not raise
+ // floatint-point exceptions.
};
private:
@@ -718,7 +718,7 @@ public:
/// block. The TargetInstrInfo::AnalyzeBranch method can be used to get more
/// information about this branch.
bool isConditionalBranch(QueryType Type = AnyInBundle) const {
- return isBranch(Type) & !isBarrier(Type) & !isIndirectBranch(Type);
+ return isBranch(Type) && !isBarrier(Type) && !isIndirectBranch(Type);
}
/// Return true if this is a branch which always
@@ -726,7 +726,7 @@ public:
/// TargetInstrInfo::AnalyzeBranch method can be used to get more information
/// about this branch.
bool isUnconditionalBranch(QueryType Type = AnyInBundle) const {
- return isBranch(Type) & isBarrier(Type) & !isIndirectBranch(Type);
+ return isBranch(Type) && isBarrier(Type) && !isIndirectBranch(Type);
}
/// Return true if this instruction has a predicate operand that
@@ -885,10 +885,10 @@ public:
/// instruction that can in principle raise an exception, as indicated
/// by the MCID::MayRaiseFPException property, *and* at the same time,
/// the instruction is used in a context where we expect floating-point
- /// exceptions might be enabled, as indicated by the FPExcept MI flag.
+ /// exceptions are not disabled, as indicated by the NoFPExcept MI flag.
bool mayRaiseFPException() const {
return hasProperty(MCID::MayRaiseFPException) &&
- getFlag(MachineInstr::MIFlag::FPExcept);
+ !getFlag(MachineInstr::MIFlag::NoFPExcept);
}
//===--------------------------------------------------------------------===//
@@ -1651,7 +1651,8 @@ public:
/// Add all implicit def and use operands to this instruction.
void addImplicitDefUseOperands(MachineFunction &MF);
- /// Scan instructions following MI and collect any matching DBG_VALUEs.
+ /// Scan instructions immediately following MI and collect any matching
+ /// DBG_VALUEs.
void collectDebugValues(SmallVectorImpl<MachineInstr *> &DbgValues);
/// Find all DBG_VALUEs that point to the register def in this instruction
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineInstrBuilder.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineInstrBuilder.h
index 880d4829ac7e..cabb9f1c97c9 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineInstrBuilder.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineInstrBuilder.h
@@ -250,7 +250,7 @@ public:
return *this;
}
- const MachineInstrBuilder &addShuffleMask(const Constant *Val) const {
+ const MachineInstrBuilder &addShuffleMask(ArrayRef<int> Val) const {
MI->addOperand(*MF, MachineOperand::CreateShuffleMask(Val));
return *this;
}
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineInstrBundle.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineInstrBundle.h
index 1810d23072d0..517f03e60933 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineInstrBundle.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineInstrBundle.h
@@ -75,12 +75,12 @@ inline MachineBasicBlock::const_instr_iterator getBundleEnd(
}
//===----------------------------------------------------------------------===//
-// MachineOperand iterator
+// MachineBundleOperand iterator
//
-/// MachineOperandIteratorBase - Iterator that can visit all operands on a
-/// MachineInstr, or all operands on a bundle of MachineInstrs. This class is
-/// not intended to be used directly, use one of the sub-classes instead.
+/// MIBundleOperandIteratorBase - Iterator that visits all operands in a bundle
+/// of MachineInstrs. This class is not intended to be used directly, use one
+/// of the sub-classes instead.
///
/// Intended use:
///
@@ -90,7 +90,10 @@ inline MachineBasicBlock::const_instr_iterator getBundleEnd(
/// ...
/// }
///
-class MachineOperandIteratorBase {
+template <typename ValueT>
+class MIBundleOperandIteratorBase
+ : public iterator_facade_base<MIBundleOperandIteratorBase<ValueT>,
+ std::forward_iterator_tag, ValueT> {
MachineBasicBlock::instr_iterator InstrI, InstrE;
MachineInstr::mop_iterator OpI, OpE;
@@ -99,35 +102,34 @@ class MachineOperandIteratorBase {
void advance() {
while (OpI == OpE) {
// Don't advance off the basic block, or into a new bundle.
- if (++InstrI == InstrE || !InstrI->isInsideBundle())
+ if (++InstrI == InstrE || !InstrI->isInsideBundle()) {
+ InstrI = InstrE;
break;
+ }
OpI = InstrI->operands_begin();
OpE = InstrI->operands_end();
}
}
protected:
- /// MachineOperandIteratorBase - Create an iterator that visits all operands
+ /// MIBundleOperandIteratorBase - Create an iterator that visits all operands
/// on MI, or all operands on every instruction in the bundle containing MI.
///
/// @param MI The instruction to examine.
- /// @param WholeBundle When true, visit all operands on the entire bundle.
///
- explicit MachineOperandIteratorBase(MachineInstr &MI, bool WholeBundle) {
- if (WholeBundle) {
- InstrI = getBundleStart(MI.getIterator());
- InstrE = MI.getParent()->instr_end();
- } else {
- InstrI = InstrE = MI.getIterator();
- ++InstrE;
- }
+ explicit MIBundleOperandIteratorBase(MachineInstr &MI) {
+ InstrI = getBundleStart(MI.getIterator());
+ InstrE = MI.getParent()->instr_end();
OpI = InstrI->operands_begin();
OpE = InstrI->operands_end();
- if (WholeBundle)
- advance();
+ advance();
}
- MachineOperand &deref() const { return *OpI; }
+ /// Constructor for an iterator past the last iteration: both instruction
+ /// iterators point to the end of the BB and OpI == OpE.
+ explicit MIBundleOperandIteratorBase(MachineBasicBlock::instr_iterator InstrE,
+ MachineInstr::mop_iterator OpE)
+ : InstrI(InstrE), InstrE(InstrE), OpI(OpE), OpE(OpE) {}
public:
/// isValid - Returns true until all the operands have been visited.
@@ -140,123 +142,148 @@ public:
advance();
}
+ ValueT &operator*() const { return *OpI; }
+ ValueT *operator->() const { return &*OpI; }
+
+ bool operator==(const MIBundleOperandIteratorBase &Arg) const {
+ // Iterators are equal, if InstrI matches and either OpIs match or OpI ==
+ // OpE match for both. The second condition allows us to construct an 'end'
+ // iterator, without finding the last instruction in a bundle up-front.
+ return InstrI == Arg.InstrI &&
+ (OpI == Arg.OpI || (OpI == OpE && Arg.OpI == Arg.OpE));
+ }
/// getOperandNo - Returns the number of the current operand relative to its
/// instruction.
///
unsigned getOperandNo() const {
return OpI - InstrI->operands_begin();
}
-
- /// VirtRegInfo - Information about a virtual register used by a set of operands.
- ///
- struct VirtRegInfo {
- /// Reads - One of the operands read the virtual register. This does not
- /// include undef or internal use operands, see MO::readsReg().
- bool Reads;
-
- /// Writes - One of the operands writes the virtual register.
- bool Writes;
-
- /// Tied - Uses and defs must use the same register. This can be because of
- /// a two-address constraint, or there may be a partial redefinition of a
- /// sub-register.
- bool Tied;
- };
-
- /// Information about how a physical register Reg is used by a set of
- /// operands.
- struct PhysRegInfo {
- /// There is a regmask operand indicating Reg is clobbered.
- /// \see MachineOperand::CreateRegMask().
- bool Clobbered;
-
- /// Reg or one of its aliases is defined. The definition may only cover
- /// parts of the register.
- bool Defined;
- /// Reg or a super-register is defined. The definition covers the full
- /// register.
- bool FullyDefined;
-
- /// Reg or one of its aliases is read. The register may only be read
- /// partially.
- bool Read;
- /// Reg or a super-register is read. The full register is read.
- bool FullyRead;
-
- /// Either:
- /// - Reg is FullyDefined and all defs of reg or an overlapping
- /// register are dead, or
- /// - Reg is completely dead because "defined" by a clobber.
- bool DeadDef;
-
- /// Reg is Defined and all defs of reg or an overlapping register are
- /// dead.
- bool PartialDeadDef;
-
- /// There is a use operand of reg or a super-register with kill flag set.
- bool Killed;
- };
-
- /// analyzeVirtReg - Analyze how the current instruction or bundle uses a
- /// virtual register. This function should not be called after operator++(),
- /// it expects a fresh iterator.
- ///
- /// @param Reg The virtual register to analyze.
- /// @param Ops When set, this vector will receive an (MI, OpNum) entry for
- /// each operand referring to Reg.
- /// @returns A filled-in RegInfo struct.
- VirtRegInfo analyzeVirtReg(unsigned Reg,
- SmallVectorImpl<std::pair<MachineInstr*, unsigned> > *Ops = nullptr);
-
- /// analyzePhysReg - Analyze how the current instruction or bundle uses a
- /// physical register. This function should not be called after operator++(),
- /// it expects a fresh iterator.
- ///
- /// @param Reg The physical register to analyze.
- /// @returns A filled-in PhysRegInfo struct.
- PhysRegInfo analyzePhysReg(unsigned Reg, const TargetRegisterInfo *TRI);
};
-/// MIOperands - Iterate over operands of a single instruction.
+/// MIBundleOperands - Iterate over all operands in a bundle of machine
+/// instructions.
///
-class MIOperands : public MachineOperandIteratorBase {
+class MIBundleOperands : public MIBundleOperandIteratorBase<MachineOperand> {
+ /// Constructor for an iterator past the last iteration.
+ MIBundleOperands(MachineBasicBlock::instr_iterator InstrE,
+ MachineInstr::mop_iterator OpE)
+ : MIBundleOperandIteratorBase(InstrE, OpE) {}
+
public:
- MIOperands(MachineInstr &MI) : MachineOperandIteratorBase(MI, false) {}
- MachineOperand &operator* () const { return deref(); }
- MachineOperand *operator->() const { return &deref(); }
+ MIBundleOperands(MachineInstr &MI) : MIBundleOperandIteratorBase(MI) {}
+
+ /// Returns an iterator past the last iteration.
+ static MIBundleOperands end(const MachineBasicBlock &MBB) {
+ return {const_cast<MachineBasicBlock &>(MBB).instr_end(),
+ const_cast<MachineBasicBlock &>(MBB).instr_begin()->operands_end()};
+ }
};
-/// ConstMIOperands - Iterate over operands of a single const instruction.
+/// ConstMIBundleOperands - Iterate over all operands in a const bundle of
+/// machine instructions.
///
-class ConstMIOperands : public MachineOperandIteratorBase {
+class ConstMIBundleOperands
+ : public MIBundleOperandIteratorBase<const MachineOperand> {
+
+ /// Constructor for an iterator past the last iteration.
+ ConstMIBundleOperands(MachineBasicBlock::instr_iterator InstrE,
+ MachineInstr::mop_iterator OpE)
+ : MIBundleOperandIteratorBase(InstrE, OpE) {}
+
public:
- ConstMIOperands(const MachineInstr &MI)
- : MachineOperandIteratorBase(const_cast<MachineInstr &>(MI), false) {}
- const MachineOperand &operator* () const { return deref(); }
- const MachineOperand *operator->() const { return &deref(); }
+ ConstMIBundleOperands(const MachineInstr &MI)
+ : MIBundleOperandIteratorBase(const_cast<MachineInstr &>(MI)) {}
+
+ /// Returns an iterator past the last iteration.
+ static ConstMIBundleOperands end(const MachineBasicBlock &MBB) {
+ return {const_cast<MachineBasicBlock &>(MBB).instr_end(),
+ const_cast<MachineBasicBlock &>(MBB).instr_begin()->operands_end()};
+ }
};
-/// MIBundleOperands - Iterate over all operands in a bundle of machine
-/// instructions.
+inline iterator_range<ConstMIBundleOperands>
+const_mi_bundle_ops(const MachineInstr &MI) {
+ return make_range(ConstMIBundleOperands(MI),
+ ConstMIBundleOperands::end(*MI.getParent()));
+}
+
+inline iterator_range<MIBundleOperands> mi_bundle_ops(MachineInstr &MI) {
+ return make_range(MIBundleOperands(MI),
+ MIBundleOperands::end(*MI.getParent()));
+}
+
+/// VirtRegInfo - Information about a virtual register used by a set of
+/// operands.
///
-class MIBundleOperands : public MachineOperandIteratorBase {
-public:
- MIBundleOperands(MachineInstr &MI) : MachineOperandIteratorBase(MI, true) {}
- MachineOperand &operator* () const { return deref(); }
- MachineOperand *operator->() const { return &deref(); }
+struct VirtRegInfo {
+ /// Reads - One of the operands read the virtual register. This does not
+ /// include undef or internal use operands, see MO::readsReg().
+ bool Reads;
+
+ /// Writes - One of the operands writes the virtual register.
+ bool Writes;
+
+ /// Tied - Uses and defs must use the same register. This can be because of
+ /// a two-address constraint, or there may be a partial redefinition of a
+ /// sub-register.
+ bool Tied;
};
-/// ConstMIBundleOperands - Iterate over all operands in a const bundle of
-/// machine instructions.
+/// AnalyzeVirtRegInBundle - Analyze how the current instruction or bundle uses
+/// a virtual register. This function should not be called after operator++(),
+/// it expects a fresh iterator.
///
-class ConstMIBundleOperands : public MachineOperandIteratorBase {
-public:
- ConstMIBundleOperands(const MachineInstr &MI)
- : MachineOperandIteratorBase(const_cast<MachineInstr &>(MI), true) {}
- const MachineOperand &operator* () const { return deref(); }
- const MachineOperand *operator->() const { return &deref(); }
+/// @param Reg The virtual register to analyze.
+/// @param Ops When set, this vector will receive an (MI, OpNum) entry for
+/// each operand referring to Reg.
+/// @returns A filled-in RegInfo struct.
+VirtRegInfo AnalyzeVirtRegInBundle(
+ MachineInstr &MI, unsigned Reg,
+ SmallVectorImpl<std::pair<MachineInstr *, unsigned>> *Ops = nullptr);
+
+/// Information about how a physical register Reg is used by a set of
+/// operands.
+struct PhysRegInfo {
+ /// There is a regmask operand indicating Reg is clobbered.
+ /// \see MachineOperand::CreateRegMask().
+ bool Clobbered;
+
+ /// Reg or one of its aliases is defined. The definition may only cover
+ /// parts of the register.
+ bool Defined;
+ /// Reg or a super-register is defined. The definition covers the full
+ /// register.
+ bool FullyDefined;
+
+ /// Reg or one of its aliases is read. The register may only be read
+ /// partially.
+ bool Read;
+ /// Reg or a super-register is read. The full register is read.
+ bool FullyRead;
+
+ /// Either:
+ /// - Reg is FullyDefined and all defs of reg or an overlapping
+ /// register are dead, or
+ /// - Reg is completely dead because "defined" by a clobber.
+ bool DeadDef;
+
+ /// Reg is Defined and all defs of reg or an overlapping register are
+ /// dead.
+ bool PartialDeadDef;
+
+ /// There is a use operand of reg or a super-register with kill flag set.
+ bool Killed;
};
+/// AnalyzePhysRegInBundle - Analyze how the current instruction or bundle uses
+/// a physical register. This function should not be called after operator++(),
+/// it expects a fresh iterator.
+///
+/// @param Reg The physical register to analyze.
+/// @returns A filled-in PhysRegInfo struct.
+PhysRegInfo AnalyzePhysRegInBundle(const MachineInstr &MI, unsigned Reg,
+ const TargetRegisterInfo *TRI);
+
} // End llvm namespace
#endif
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineLoopInfo.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineLoopInfo.h
index da6df59c739c..8a93f91ae54d 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineLoopInfo.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineLoopInfo.h
@@ -37,6 +37,7 @@
namespace llvm {
+class MachineDominatorTree;
// Implementation in LoopInfoImpl.h
class MachineLoop;
extern template class LoopBase<MachineBasicBlock, MachineLoop>;
@@ -88,8 +89,10 @@ class MachineLoopInfo : public MachineFunctionPass {
public:
static char ID; // Pass identification, replacement for typeid
- MachineLoopInfo() : MachineFunctionPass(ID) {
- initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry());
+ MachineLoopInfo();
+ explicit MachineLoopInfo(MachineDominatorTree &MDT)
+ : MachineFunctionPass(ID) {
+ calculate(MDT);
}
MachineLoopInfo(const MachineLoopInfo &) = delete;
MachineLoopInfo &operator=(const MachineLoopInfo &) = delete;
@@ -133,6 +136,7 @@ public:
/// Calculate the natural loop information.
bool runOnMachineFunction(MachineFunction &F) override;
+ void calculate(MachineDominatorTree &MDT);
void releaseMemory() override { LI.releaseMemory(); }
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineLoopUtils.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineLoopUtils.h
index 41379b75d00a..2cb0134ca848 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineLoopUtils.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineLoopUtils.h
@@ -10,6 +10,7 @@
#define LLVM_LIB_CODEGEN_MACHINELOOPUTILS_H
namespace llvm {
+class MachineLoop;
class MachineBasicBlock;
class MachineRegisterInfo;
class TargetInstrInfo;
@@ -36,6 +37,10 @@ MachineBasicBlock *PeelSingleBlockLoop(LoopPeelDirection Direction,
MachineRegisterInfo &MRI,
const TargetInstrInfo *TII);
+/// Return true if PhysReg is live outside the loop, i.e. determine if it
+/// is live in the loop exit blocks, and false otherwise.
+bool isRegLiveInExitBlocks(MachineLoop *Loop, int PhysReg);
+
} // namespace llvm
#endif // LLVM_LIB_CODEGEN_MACHINELOOPUTILS_H
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineMemOperand.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineMemOperand.h
index 33a48a235e18..7ee700c62a25 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineMemOperand.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineMemOperand.h
@@ -229,7 +229,7 @@ public:
/// Return the minimum known alignment in bytes of the base address, without
/// the offset.
- uint64_t getBaseAlignment() const { return (1u << BaseAlignLog2) >> 1; }
+ uint64_t getBaseAlignment() const { return (1ull << BaseAlignLog2) >> 1; }
/// Return the AA tags for the memory reference.
AAMDNodes getAAInfo() const { return AAInfo; }
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineOperand.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineOperand.h
index df914dc2d85e..9ba2b01cb4bd 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineOperand.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineOperand.h
@@ -163,7 +163,8 @@ private:
MachineInstr *ParentMI;
/// Contents union - This contains the payload for the various operand types.
- union {
+ union ContentsUnion {
+ ContentsUnion() {}
MachineBasicBlock *MBB; // For MO_MachineBasicBlock.
const ConstantFP *CFP; // For MO_FPImmediate.
const ConstantInt *CI; // For MO_CImmediate. Integers > 64bit.
@@ -174,7 +175,7 @@ private:
unsigned CFIIndex; // For MO_CFI.
Intrinsic::ID IntrinsicID; // For MO_IntrinsicID.
unsigned Pred; // For MO_Predicate
- const Constant *ShuffleMask; // For MO_ShuffleMask
+ ArrayRef<int> ShuffleMask; // For MO_ShuffleMask
struct { // For MO_Register.
// Register number is in SmallContents.RegNo.
@@ -278,6 +279,9 @@ public:
/// More complex way of printing a MachineOperand.
/// \param TypeToPrint specifies the generic type to be printed on uses and
/// defs. It can be determined using MachineInstr::getTypeToPrint.
+ /// \param OpIdx - specifies the index of the operand in machine instruction.
+ /// This will be used by target dependent MIR formatter. Could be None if the
+ /// index is unknown, e.g. called by dump().
/// \param PrintDef - whether we want to print `def` on an operand which
/// isDef. Sometimes, if the operand is printed before '=', we don't print
/// `def`.
@@ -294,8 +298,9 @@ public:
/// information from it's parent.
/// \param IntrinsicInfo - same as \p TRI.
void print(raw_ostream &os, ModuleSlotTracker &MST, LLT TypeToPrint,
- bool PrintDef, bool IsStandalone, bool ShouldPrintRegisterTies,
- unsigned TiedOperandIdx, const TargetRegisterInfo *TRI,
+ Optional<unsigned> OpIdx, bool PrintDef, bool IsStandalone,
+ bool ShouldPrintRegisterTies, unsigned TiedOperandIdx,
+ const TargetRegisterInfo *TRI,
const TargetIntrinsicInfo *IntrinsicInfo) const;
/// Same as print(os, TRI, IntrinsicInfo), but allows to specify the low-level
@@ -583,7 +588,7 @@ public:
return Contents.Pred;
}
- const Constant *getShuffleMask() const {
+ ArrayRef<int> getShuffleMask() const {
assert(isShuffleMask() && "Wrong MachineOperand accessor");
return Contents.ShuffleMask;
}
@@ -911,9 +916,9 @@ public:
return Op;
}
- static MachineOperand CreateShuffleMask(const Constant *C) {
+ static MachineOperand CreateShuffleMask(ArrayRef<int> Mask) {
MachineOperand Op(MachineOperand::MO_ShuffleMask);
- Op.Contents.ShuffleMask = C;
+ Op.Contents.ShuffleMask = Mask;
return Op;
}
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h
index a461a299917c..b2f8ad55fbd8 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h
@@ -182,6 +182,10 @@ public:
}
}
+ MachineBlockFrequencyInfo *getBFI() {
+ return MBFI;
+ }
+
private:
MachineFunction &MF;
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineOutliner.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineOutliner.h
index 3868fa415579..4a1b04ab3e88 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineOutliner.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineOutliner.h
@@ -37,10 +37,10 @@ enum InstrType { Legal, LegalTerminator, Illegal, Invisible };
struct Candidate {
private:
/// The start index of this \p Candidate in the instruction list.
- unsigned StartIdx;
+ unsigned StartIdx = 0;
/// The number of instructions in this \p Candidate.
- unsigned Len;
+ unsigned Len = 0;
// The first instruction in this \p Candidate.
MachineBasicBlock::iterator FirstInst;
@@ -49,20 +49,20 @@ private:
MachineBasicBlock::iterator LastInst;
// The basic block that contains this Candidate.
- MachineBasicBlock *MBB;
+ MachineBasicBlock *MBB = nullptr;
/// Cost of calling an outlined function from this point as defined by the
/// target.
- unsigned CallOverhead;
+ unsigned CallOverhead = 0;
public:
/// The index of this \p Candidate's \p OutlinedFunction in the list of
/// \p OutlinedFunctions.
- unsigned FunctionIdx;
+ unsigned FunctionIdx = 0;
/// Identifier denoting the instructions to emit to call an outlined function
/// from this point. Defined by the target.
- unsigned CallConstructionID;
+ unsigned CallConstructionID = 0;
/// Contains physical register liveness information for the MBB containing
/// this \p Candidate.
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachinePipeliner.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachinePipeliner.h
index e9cf7e115bff..24e85a953d47 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachinePipeliner.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachinePipeliner.h
@@ -46,6 +46,7 @@
#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/CodeGen/ScheduleDAGInstrs.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/InitializePasses.h"
namespace llvm {
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineScheduler.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineScheduler.h
index 333367943ac0..6cebaa47fe6a 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineScheduler.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineScheduler.h
@@ -757,7 +757,16 @@ public:
unsigned getOtherResourceCount(unsigned &OtherCritIdx);
- void releaseNode(SUnit *SU, unsigned ReadyCycle);
+ /// Release SU to make it ready. If it's not in hazard, remove it from
+ /// pending queue (if already in) and push into available queue.
+ /// Otherwise, push the SU into pending queue.
+ ///
+ /// @param SU The unit to be released.
+ /// @param ReadyCycle Until which cycle the unit is ready.
+ /// @param InPQueue Whether SU is already in pending queue.
+ /// @param Idx Position offset in pending queue (if in it).
+ void releaseNode(SUnit *SU, unsigned ReadyCycle, bool InPQueue,
+ unsigned Idx = 0);
void bumpCycle(unsigned NextCycle);
@@ -955,7 +964,7 @@ public:
if (SU->isScheduled)
return;
- Top.releaseNode(SU, SU->TopReadyCycle);
+ Top.releaseNode(SU, SU->TopReadyCycle, false);
TopCand.SU = nullptr;
}
@@ -963,7 +972,7 @@ public:
if (SU->isScheduled)
return;
- Bot.releaseNode(SU, SU->BotReadyCycle);
+ Bot.releaseNode(SU, SU->BotReadyCycle, false);
BotCand.SU = nullptr;
}
@@ -1009,7 +1018,7 @@ protected:
/// initPolicy -> initialize(DAG) -> registerRoots -> pickNode ...
class PostGenericScheduler : public GenericSchedulerBase {
protected:
- ScheduleDAGMI *DAG;
+ ScheduleDAGMI *DAG = nullptr;
SchedBoundary Top;
SmallVector<SUnit*, 8> BotRoots;
@@ -1043,7 +1052,7 @@ public:
void releaseTopNode(SUnit *SU) override {
if (SU->isScheduled)
return;
- Top.releaseNode(SU, SU->TopReadyCycle);
+ Top.releaseNode(SU, SU->TopReadyCycle, false);
}
// Only called for roots.
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineSizeOpts.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineSizeOpts.h
new file mode 100644
index 000000000000..3b02d0860ea1
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineSizeOpts.h
@@ -0,0 +1,39 @@
+//===- MachineSizeOpts.h - machine size optimization ------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains some shared machine IR code size optimization related
+// code.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_CODEGEN_MACHINE_SIZEOPTS_H
+#define LLVM_CODEGEN_MACHINE_SIZEOPTS_H
+
+#include "llvm/Transforms/Utils/SizeOpts.h"
+
+namespace llvm {
+
+class ProfileSummaryInfo;
+class MachineBasicBlock;
+class MachineBlockFrequencyInfo;
+class MachineFunction;
+
+/// Returns true if machine function \p MF is suggested to be size-optimized
+/// based on the profile.
+bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI,
+ const MachineBlockFrequencyInfo *BFI,
+ PGSOQueryType QueryType = PGSOQueryType::Other);
+/// Returns true if machine basic block \p MBB is suggested to be size-optimized
+/// based on the profile.
+bool shouldOptimizeForSize(const MachineBasicBlock *MBB,
+ ProfileSummaryInfo *PSI,
+ const MachineBlockFrequencyInfo *MBFI,
+ PGSOQueryType QueryType = PGSOQueryType::Other);
+
+} // end namespace llvm
+
+#endif // LLVM_CODEGEN_MACHINE_SIZEOPTS_H
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/ModuloSchedule.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/ModuloSchedule.h
index 81a9b63b64ca..55c52f3447b0 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/ModuloSchedule.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/ModuloSchedule.h
@@ -290,6 +290,9 @@ class PeelingModuloScheduleExpander {
/// but not produced (in the epilog) or produced but not available (in the
/// prolog).
DenseMap<MachineBasicBlock *, BitVector> AvailableStages;
+ /// When peeling the epilogue keep track of the distance between the phi
+ /// nodes and the kernel.
+ DenseMap<MachineInstr *, unsigned> PhiNodeLoopIteration;
/// CanonicalMIs and BlockMIs form a bidirectional map between any of the
/// loop kernel clones.
@@ -299,6 +302,8 @@ class PeelingModuloScheduleExpander {
/// State passed from peelKernel to peelPrologAndEpilogs().
std::deque<MachineBasicBlock *> PeeledFront, PeeledBack;
+ /// Illegal phis that need to be deleted once we re-link stages.
+ SmallVector<MachineInstr *, 4> IllegalPhisToDelete;
public:
PeelingModuloScheduleExpander(MachineFunction &MF, ModuloSchedule &S,
@@ -321,6 +326,13 @@ private:
/// Peels one iteration of the rewritten kernel (BB) in the specified
/// direction.
MachineBasicBlock *peelKernel(LoopPeelDirection LPD);
+ // Delete instructions whose stage is less than MinStage in the given basic
+ // block.
+ void filterInstructions(MachineBasicBlock *MB, int MinStage);
+ // Move instructions of the given stage from sourceBB to DestBB. Remap the phi
+ // instructions to keep a valid IR.
+ void moveStageBetweenBlocks(MachineBasicBlock *DestBB,
+ MachineBasicBlock *SourceBB, unsigned Stage);
/// Peel the kernel forwards and backwards to produce prologs and epilogs,
/// and stitch them together.
void peelPrologAndEpilogs();
@@ -342,6 +354,11 @@ private:
MI = CanonicalMIs[MI];
return Schedule.getStage(MI);
}
+ /// Helper function to find the right canonical register for a phi instruction
+ /// coming from a peeled out prologue.
+ Register getPhiCanonicalReg(MachineInstr* CanonicalPhi, MachineInstr* Phi);
+ /// Target loop info before kernel peeling.
+ std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo> Info;
};
/// Expander that simply annotates each scheduled instruction with a post-instr
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/NonRelocatableStringpool.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/NonRelocatableStringpool.h
new file mode 100644
index 000000000000..56db30ff7d6d
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/NonRelocatableStringpool.h
@@ -0,0 +1,83 @@
+//===- NonRelocatableStringpool.h -------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_NONRELOCATABLESTRINGPOOL_H
+#define LLVM_CODEGEN_NONRELOCATABLESTRINGPOOL_H
+
+#include "llvm/CodeGen/DwarfStringPoolEntry.h"
+#include "llvm/Support/Allocator.h"
+#include <cstdint>
+#include <vector>
+
+namespace llvm {
+
+/// A string table that doesn't need relocations.
+///
+/// Use this class when a string table doesn't need relocations.
+/// This class provides this ability by just associating offsets with strings.
+class NonRelocatableStringpool {
+public:
+ /// Entries are stored into the StringMap and simply linked together through
+ /// the second element of this pair in order to keep track of insertion
+ /// order.
+ using MapTy = StringMap<DwarfStringPoolEntry, BumpPtrAllocator>;
+
+ NonRelocatableStringpool(
+ std::function<StringRef(StringRef Input)> Translator = nullptr,
+ bool PutEmptyString = false)
+ : Translator(Translator) {
+ if (PutEmptyString)
+ EmptyString = getEntry("");
+ }
+
+ DwarfStringPoolEntryRef getEntry(StringRef S);
+
+ /// Get the offset of string \p S in the string table. This can insert a new
+ /// element or return the offset of a pre-existing one.
+ uint32_t getStringOffset(StringRef S) { return getEntry(S).getOffset(); }
+
+ /// Get permanent storage for \p S (but do not necessarily emit \p S in the
+ /// output section). A latter call to getStringOffset() with the same string
+ /// will chain it though.
+ ///
+ /// \returns The StringRef that points to permanent storage to use
+ /// in place of \p S.
+ StringRef internString(StringRef S);
+
+ uint64_t getSize() { return CurrentEndOffset; }
+
+ /// Return the list of strings to be emitted. This does not contain the
+ /// strings which were added via internString only.
+ std::vector<DwarfStringPoolEntryRef> getEntriesForEmission() const;
+
+private:
+ MapTy Strings;
+ uint32_t CurrentEndOffset = 0;
+ unsigned NumEntries = 0;
+ DwarfStringPoolEntryRef EmptyString;
+ std::function<StringRef(StringRef Input)> Translator;
+};
+
+/// Helper for making strong types.
+template <typename T, typename S> class StrongType : public T {
+public:
+ template <typename... Args>
+ explicit StrongType(Args... A) : T(std::forward<Args>(A)...) {}
+};
+
+/// It's very easy to introduce bugs by passing the wrong string pool.
+/// By using strong types the interface enforces that the right
+/// kind of pool is used.
+struct UniqueTag {};
+struct OffsetsTag {};
+using UniquingStringPool = StrongType<NonRelocatableStringpool, UniqueTag>;
+using OffsetsStringPool = StrongType<NonRelocatableStringpool, OffsetsTag>;
+
+} // end namespace llvm
+
+#endif // LLVM_CODEGEN_NONRELOCATABLESTRINGPOOL_H
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/ParallelCG.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/ParallelCG.h
index a44715d4fc4f..b4c761c2269e 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/ParallelCG.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/ParallelCG.h
@@ -39,7 +39,7 @@ std::unique_ptr<Module>
splitCodeGen(std::unique_ptr<Module> M, ArrayRef<raw_pwrite_stream *> OSs,
ArrayRef<llvm::raw_pwrite_stream *> BCOSs,
const std::function<std::unique_ptr<TargetMachine>()> &TMFactory,
- TargetMachine::CodeGenFileType FileType = TargetMachine::CGFT_ObjectFile,
+ CodeGenFileType FileType = CGFT_ObjectFile,
bool PreserveLocals = false);
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/Passes.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/Passes.h
index 1e765ce51e4a..4e3451d80572 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/Passes.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/Passes.h
@@ -275,6 +275,11 @@ namespace llvm {
/// MachineCSE - This pass performs global CSE on machine instructions.
extern char &MachineCSEID;
+ /// MIRCanonicalizer - This pass canonicalizes MIR by renaming vregs
+ /// according to the semantics of the instruction as well as hoists
+ /// code.
+ extern char &MIRCanonicalizerID;
+
/// ImplicitNullChecks - This pass folds null pointer checks into nearby
/// memory operations.
extern char &ImplicitNullChecksID;
@@ -451,9 +456,16 @@ namespace llvm {
/// Creates CFI Instruction Inserter pass. \see CFIInstrInserter.cpp
FunctionPass *createCFIInstrInserter();
+ /// Creates CFGuard longjmp target identification pass.
+ /// \see CFGuardLongjmp.cpp
+ FunctionPass *createCFGuardLongjmpPass();
+
/// Create Hardware Loop pass. \see HardwareLoops.cpp
FunctionPass *createHardwareLoopsPass();
+ /// Create IR Type Promotion pass. \see TypePromotion.cpp
+ FunctionPass *createTypePromotionPass();
+
} // End llvm namespace
#endif
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/PseudoSourceValue.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/PseudoSourceValue.h
index 4b3cc9145a13..593a865ea545 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/PseudoSourceValue.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/PseudoSourceValue.h
@@ -22,6 +22,7 @@ namespace llvm {
class MachineFrameInfo;
class MachineMemOperand;
+class MIRFormatter;
class raw_ostream;
class TargetInstrInfo;
@@ -52,6 +53,7 @@ private:
const PseudoSourceValue* PSV);
friend class MachineMemOperand; // For printCustom().
+ friend class MIRFormatter; // For printCustom().
/// Implement printing for PseudoSourceValue. This is called from
/// Value::print or Value's operator<<.
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/ReachingDefAnalysis.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/ReachingDefAnalysis.h
index a599fb62f5e2..5a747245a62e 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/ReachingDefAnalysis.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/ReachingDefAnalysis.h
@@ -25,6 +25,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/LoopTraversal.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/InitializePasses.h"
namespace llvm {
@@ -86,17 +87,58 @@ public:
MachineFunctionProperties getRequiredProperties() const override {
return MachineFunctionProperties().set(
- MachineFunctionProperties::Property::NoVRegs);
+ MachineFunctionProperties::Property::NoVRegs).set(
+ MachineFunctionProperties::Property::TracksLiveness);
}
/// Provides the instruction id of the closest reaching def instruction of
/// PhysReg that reaches MI, relative to the begining of MI's basic block.
int getReachingDef(MachineInstr *MI, int PhysReg);
+ /// Provides the instruction of the closest reaching def instruction of
+ /// PhysReg that reaches MI, relative to the begining of MI's basic block.
+ MachineInstr *getReachingMIDef(MachineInstr *MI, int PhysReg);
+
+ /// Provides the MI, from the given block, corresponding to the Id or a
+ /// nullptr if the id does not refer to the block.
+ MachineInstr *getInstFromId(MachineBasicBlock *MBB, int InstId);
+
+ /// Return whether A and B use the same def of PhysReg.
+ bool hasSameReachingDef(MachineInstr *A, MachineInstr *B, int PhysReg);
+
+ /// Return whether the reaching def for MI also is live out of its parent
+ /// block.
+ bool isReachingDefLiveOut(MachineInstr *MI, int PhysReg);
+
+ /// Return the local MI that produces the live out value for PhysReg, or
+ /// nullptr for a non-live out or non-local def.
+ MachineInstr *getLocalLiveOutMIDef(MachineBasicBlock *MBB,
+ int PhysReg);
+
+ /// Return whether the given register is used after MI, whether it's a local
+ /// use or a live out.
+ bool isRegUsedAfter(MachineInstr *MI, int PhysReg);
+
+ /// Provides the first instruction before MI that uses PhysReg
+ MachineInstr *getInstWithUseBefore(MachineInstr *MI, int PhysReg);
+
+ /// Provides all instructions before MI that uses PhysReg
+ void getAllInstWithUseBefore(MachineInstr *MI, int PhysReg,
+ SmallVectorImpl<MachineInstr*> &Uses);
+
/// Provides the clearance - the number of instructions since the closest
/// reaching def instuction of PhysReg that reaches MI.
int getClearance(MachineInstr *MI, MCPhysReg PhysReg);
+ /// Provides the uses, in the same block as MI, of register that MI defines.
+ /// This does not consider live-outs.
+ void getReachingLocalUses(MachineInstr *MI, int PhysReg,
+ SmallVectorImpl<MachineInstr*> &Uses);
+
+ /// Provide the number of uses, in the same block as MI, of the register that
+ /// MI defines.
+ unsigned getNumUses(MachineInstr *MI, int PhysReg);
+
private:
/// Set up LiveRegs by merging predecessor live-out values.
void enterBasicBlock(const LoopTraversal::TraversedMBBInfo &TraversedMBB);
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/RegisterUsageInfo.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/RegisterUsageInfo.h
index 33554550b9dc..53982ce5d4a2 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/RegisterUsageInfo.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/RegisterUsageInfo.h
@@ -21,6 +21,7 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include <cstdint>
#include <vector>
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/SelectionDAG.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/SelectionDAG.h
index 6b8e2dd803ba..3bfde5b4ce1d 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/SelectionDAG.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/SelectionDAG.h
@@ -58,6 +58,7 @@ namespace llvm {
class AAResults;
class BlockAddress;
+class BlockFrequencyInfo;
class Constant;
class ConstantFP;
class ConstantInt;
@@ -71,6 +72,7 @@ class MachineBasicBlock;
class MachineConstantPoolValue;
class MCSymbol;
class OptimizationRemarkEmitter;
+class ProfileSummaryInfo;
class SDDbgValue;
class SDDbgLabel;
class SelectionDAG;
@@ -235,6 +237,9 @@ class SelectionDAG {
/// whenever manipulating the DAG.
OptimizationRemarkEmitter *ORE;
+ ProfileSummaryInfo *PSI = nullptr;
+ BlockFrequencyInfo *BFI = nullptr;
+
/// The starting token.
SDNode EntryNode;
@@ -401,7 +406,8 @@ public:
/// Prepare this SelectionDAG to process code in the given MachineFunction.
void init(MachineFunction &NewMF, OptimizationRemarkEmitter &NewORE,
Pass *PassPtr, const TargetLibraryInfo *LibraryInfo,
- LegacyDivergenceAnalysis * Divergence);
+ LegacyDivergenceAnalysis * Divergence,
+ ProfileSummaryInfo *PSIin, BlockFrequencyInfo *BFIin);
void setFunctionLoweringInfo(FunctionLoweringInfo * FuncInfo) {
FLI = FuncInfo;
@@ -421,8 +427,10 @@ public:
const TargetLibraryInfo &getLibInfo() const { return *LibInfo; }
const SelectionDAGTargetInfo &getSelectionDAGInfo() const { return *TSI; }
const LegacyDivergenceAnalysis *getDivergenceAnalysis() const { return DA; }
- LLVMContext *getContext() const {return Context; }
+ LLVMContext *getContext() const { return Context; }
OptimizationRemarkEmitter &getORE() const { return *ORE; }
+ ProfileSummaryInfo *getPSI() const { return PSI; }
+ BlockFrequencyInfo *getBFI() const { return BFI; }
/// Pop up a GraphViz/gv window with the DAG rendered using 'dot'.
void viewGraph(const std::string &Title);
@@ -779,6 +787,20 @@ public:
return getNode(ISD::BUILD_VECTOR, DL, VT, Ops);
}
+ // Return a splat ISD::SPLAT_VECTOR node, consisting of Op splatted to all
+ // elements.
+ SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op) {
+ if (Op.getOpcode() == ISD::UNDEF) {
+ assert((VT.getVectorElementType() == Op.getValueType() ||
+ (VT.isInteger() &&
+ VT.getVectorElementType().bitsLE(Op.getValueType()))) &&
+ "A splatted value must have a width equal or (for integers) "
+ "greater than the vector element type!");
+ return getNode(ISD::UNDEF, SDLoc(), VT);
+ }
+ return getNode(ISD::SPLAT_VECTOR, DL, VT, Op);
+ }
+
/// Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to
/// the shuffle node in input but with swapped operands.
///
@@ -789,6 +811,11 @@ public:
/// float type VT, by either extending or rounding (by truncation).
SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT);
+ /// Convert Op, which must be a STRICT operation of float type, to the
+ /// float type VT, by either extending or rounding (by truncation).
+ std::pair<SDValue, SDValue>
+ getStrictFPExtendOrRound(SDValue Op, SDValue Chain, const SDLoc &DL, EVT VT);
+
/// Convert Op, which must be of integer type, to the
/// integer type VT, by either any-extending or truncating it.
SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT);
@@ -826,22 +853,28 @@ public:
/// Create a logical NOT operation as (XOR Val, BooleanOne).
SDValue getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT);
+ /// Returns sum of the base pointer and offset.
+ /// Unlike getObjectPtrOffset this does not set NoUnsignedWrap by default.
+ SDValue getMemBasePlusOffset(SDValue Base, int64_t Offset, const SDLoc &DL,
+ const SDNodeFlags Flags = SDNodeFlags());
+ SDValue getMemBasePlusOffset(SDValue Base, SDValue Offset, const SDLoc &DL,
+ const SDNodeFlags Flags = SDNodeFlags());
+
/// Create an add instruction with appropriate flags when used for
/// addressing some offset of an object. i.e. if a load is split into multiple
/// components, create an add nuw from the base pointer to the offset.
- SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Op, int64_t Offset) {
- EVT VT = Op.getValueType();
- return getObjectPtrOffset(SL, Op, getConstant(Offset, SL, VT));
+ SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, int64_t Offset) {
+ SDNodeFlags Flags;
+ Flags.setNoUnsignedWrap(true);
+ return getMemBasePlusOffset(Ptr, Offset, SL, Flags);
}
- SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Op, SDValue Offset) {
- EVT VT = Op.getValueType();
-
+ SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, SDValue Offset) {
// The object itself can't wrap around the address space, so it shouldn't be
// possible for the adds of the offsets to the split parts to overflow.
SDNodeFlags Flags;
Flags.setNoUnsignedWrap(true);
- return getNode(ISD::ADD, SL, VT, Op, Offset, Flags);
+ return getMemBasePlusOffset(Ptr, Offset, SL, Flags);
}
/// Return a new CALLSEQ_START node, that starts new call frame, in which
@@ -961,13 +994,17 @@ public:
/// Helper function to make it easier to build SetCC's if you just have an
/// ISD::CondCode instead of an SDValue.
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS,
- ISD::CondCode Cond) {
+ ISD::CondCode Cond, SDValue Chain = SDValue(),
+ bool IsSignaling = false) {
assert(LHS.getValueType().isVector() == RHS.getValueType().isVector() &&
"Cannot compare scalars to vectors");
assert(LHS.getValueType().isVector() == VT.isVector() &&
"Cannot compare scalars to vectors");
assert(Cond != ISD::SETCC_INVALID &&
"Cannot create a setCC of an invalid node.");
+ if (Chain)
+ return getNode(IsSignaling ? ISD::STRICT_FSETCCS : ISD::STRICT_FSETCC, DL,
+ {VT, MVT::Other}, {Chain, LHS, RHS, getCondCode(Cond)});
return getNode(ISD::SETCC, DL, VT, LHS, RHS, getCondCode(Cond));
}
@@ -1111,17 +1148,19 @@ public:
SDValue getIndexedStore(SDValue OrigStore, const SDLoc &dl, SDValue Base,
SDValue Offset, ISD::MemIndexedMode AM);
- /// Returns sum of the base pointer and offset.
- SDValue getMemBasePlusOffset(SDValue Base, unsigned Offset, const SDLoc &DL);
-
- SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr,
- SDValue Mask, SDValue Src0, EVT MemVT,
- MachineMemOperand *MMO, ISD::LoadExtType,
- bool IsExpanding = false);
+ SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base,
+ SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT,
+ MachineMemOperand *MMO, ISD::MemIndexedMode AM,
+ ISD::LoadExtType, bool IsExpanding = false);
+ SDValue getIndexedMaskedLoad(SDValue OrigLoad, const SDLoc &dl, SDValue Base,
+ SDValue Offset, ISD::MemIndexedMode AM);
SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val,
- SDValue Ptr, SDValue Mask, EVT MemVT,
- MachineMemOperand *MMO, bool IsTruncating = false,
- bool IsCompressing = false);
+ SDValue Base, SDValue Offset, SDValue Mask, EVT MemVT,
+ MachineMemOperand *MMO, ISD::MemIndexedMode AM,
+ bool IsTruncating = false, bool IsCompressing = false);
+ SDValue getIndexedMaskedStore(SDValue OrigStore, const SDLoc &dl,
+ SDValue Base, SDValue Offset,
+ ISD::MemIndexedMode AM);
SDValue getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl,
ArrayRef<SDValue> Ops, MachineMemOperand *MMO,
ISD::MemIndexType IndexType);
@@ -1697,6 +1736,14 @@ public:
return It->second.HeapAllocSite;
}
+ /// Return the current function's default denormal handling kind for the given
+ /// floating point type.
+ DenormalMode getDenormalMode(EVT VT) const {
+ return MF->getDenormalMode(EVTToAPFloatSemantics(VT));
+ }
+
+ bool shouldOptForSize() const;
+
private:
void InsertNode(SDNode *N);
bool RemoveNodeFromCSEMaps(SDNode *N);
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/SelectionDAGISel.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/SelectionDAGISel.h
index de71a21d4671..9874d782c782 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/SelectionDAGISel.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/SelectionDAGISel.h
@@ -39,6 +39,8 @@ class SwiftErrorValueTracking;
class GCFunctionInfo;
class ScheduleDAGSDNodes;
class LoadInst;
+class ProfileSummaryInfo;
+class BlockFrequencyInfo;
/// SelectionDAGISel - This is the common base class used for SelectionDAG-based
/// pattern-matching instruction selectors.
@@ -46,12 +48,12 @@ class SelectionDAGISel : public MachineFunctionPass {
public:
TargetMachine &TM;
const TargetLibraryInfo *LibInfo;
- FunctionLoweringInfo *FuncInfo;
+ std::unique_ptr<FunctionLoweringInfo> FuncInfo;
SwiftErrorValueTracking *SwiftError;
MachineFunction *MF;
MachineRegisterInfo *RegInfo;
SelectionDAG *CurDAG;
- SelectionDAGBuilder *SDB;
+ std::unique_ptr<SelectionDAGBuilder> SDB;
AAResults *AA;
GCFunctionInfo *GFI;
CodeGenOpt::Level OptLevel;
@@ -249,6 +251,11 @@ protected:
virtual StringRef getIncludePathForIndex(unsigned index) {
llvm_unreachable("Tblgen should generate the implementation of this!");
}
+
+ bool shouldOptForSize(const MachineFunction *MF) const {
+ return CurDAG->shouldOptForSize();
+ }
+
public:
// Calls to these predicates are generated by tblgen.
bool CheckAndMask(SDValue LHS, ConstantSDNode *RHS,
@@ -303,6 +310,9 @@ public:
return false;
}
+ /// Return whether the node may raise an FP exception.
+ bool mayRaiseFPException(SDNode *Node) const;
+
bool isOrEquivalentToAdd(const SDNode *N) const;
private:
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
index ceb8b72635a2..d81a4a8fd43f 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -42,6 +42,7 @@
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MachineValueType.h"
+#include "llvm/Support/TypeSize.h"
#include <algorithm>
#include <cassert>
#include <climits>
@@ -170,11 +171,15 @@ public:
}
/// Returns the size of the value in bits.
- unsigned getValueSizeInBits() const {
+ ///
+ /// If the value type is a scalable vector type, the scalable property will
+ /// be set and the runtime size will be a positive integer multiple of the
+ /// base size.
+ TypeSize getValueSizeInBits() const {
return getValueType().getSizeInBits();
}
- unsigned getScalarValueSizeInBits() const {
+ TypeSize getScalarValueSizeInBits() const {
return getValueType().getScalarType().getSizeInBits();
}
@@ -382,7 +387,7 @@ public:
Exact(false), NoNaNs(false), NoInfs(false),
NoSignedZeros(false), AllowReciprocal(false), VectorReduction(false),
AllowContract(false), ApproximateFuncs(false),
- AllowReassociation(false), NoFPExcept(true) {}
+ AllowReassociation(false), NoFPExcept(false) {}
/// Propagate the fast-math-flags from an IR FPMathOperator.
void copyFMF(const FPMathOperator &FPMO) {
@@ -445,9 +450,9 @@ public:
setDefined();
AllowReassociation = b;
}
- void setFPExcept(bool b) {
+ void setNoFPExcept(bool b) {
setDefined();
- NoFPExcept = !b;
+ NoFPExcept = b;
}
// These are accessors for each flag.
@@ -462,7 +467,7 @@ public:
bool hasAllowContract() const { return AllowContract; }
bool hasApproximateFuncs() const { return ApproximateFuncs; }
bool hasAllowReassociation() const { return AllowReassociation; }
- bool hasFPExcept() const { return !NoFPExcept; }
+ bool hasNoFPExcept() const { return NoFPExcept; }
bool isFast() const {
return NoSignedZeros && AllowReciprocal && NoNaNs && NoInfs && NoFPExcept &&
@@ -548,6 +553,7 @@ BEGIN_TWO_BYTE_PACK()
class LSBaseSDNodeBitfields {
friend class LSBaseSDNode;
+ friend class MaskedLoadStoreSDNode;
friend class MaskedGatherScatterSDNode;
uint16_t : NumMemSDNodeBits;
@@ -555,6 +561,7 @@ BEGIN_TWO_BYTE_PACK()
// This storage is shared between disparate class hierarchies to hold an
// enumeration specific to the class hierarchy in use.
// LSBaseSDNode => enum ISD::MemIndexedMode
+ // MaskedLoadStoreBaseSDNode => enum ISD::MemIndexedMode
// MaskedGatherScatterSDNode => enum ISD::MemIndexType
uint16_t AddressingMode : 3;
};
@@ -659,6 +666,15 @@ public:
/// \<target\>ISD namespace).
bool isTargetOpcode() const { return NodeType >= ISD::BUILTIN_OP_END; }
+ /// Test if this node has a target-specific opcode that may raise
+ /// FP exceptions (in the \<target\>ISD namespace and greater than
+ /// FIRST_TARGET_STRICTFP_OPCODE). Note that all target memory
+ /// opcode are currently automatically considered to possibly raise
+ /// FP exceptions as well.
+ bool isTargetStrictFPOpcode() const {
+ return NodeType >= ISD::FIRST_TARGET_STRICTFP_OPCODE;
+ }
+
/// Test if this node has a target-specific
/// memory-referencing opcode (in the \<target\>ISD namespace and
/// greater than FIRST_TARGET_MEMORY_OPCODE).
@@ -685,38 +701,9 @@ public:
switch (NodeType) {
default:
return false;
- case ISD::STRICT_FADD:
- case ISD::STRICT_FSUB:
- case ISD::STRICT_FMUL:
- case ISD::STRICT_FDIV:
- case ISD::STRICT_FREM:
- case ISD::STRICT_FMA:
- case ISD::STRICT_FSQRT:
- case ISD::STRICT_FPOW:
- case ISD::STRICT_FPOWI:
- case ISD::STRICT_FSIN:
- case ISD::STRICT_FCOS:
- case ISD::STRICT_FEXP:
- case ISD::STRICT_FEXP2:
- case ISD::STRICT_FLOG:
- case ISD::STRICT_FLOG10:
- case ISD::STRICT_FLOG2:
- case ISD::STRICT_LRINT:
- case ISD::STRICT_LLRINT:
- case ISD::STRICT_FRINT:
- case ISD::STRICT_FNEARBYINT:
- case ISD::STRICT_FMAXNUM:
- case ISD::STRICT_FMINNUM:
- case ISD::STRICT_FCEIL:
- case ISD::STRICT_FFLOOR:
- case ISD::STRICT_LROUND:
- case ISD::STRICT_LLROUND:
- case ISD::STRICT_FROUND:
- case ISD::STRICT_FTRUNC:
- case ISD::STRICT_FP_TO_SINT:
- case ISD::STRICT_FP_TO_UINT:
- case ISD::STRICT_FP_ROUND:
- case ISD::STRICT_FP_EXTEND:
+#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
+ case ISD::STRICT_##DAGN:
+#include "llvm/IR/ConstrainedOps.def"
return true;
}
}
@@ -1022,7 +1009,11 @@ public:
}
/// Returns MVT::getSizeInBits(getValueType(ResNo)).
- unsigned getValueSizeInBits(unsigned ResNo) const {
+ ///
+ /// If the value type is a scalable vector type, the scalable property will
+ /// be set and the runtime size will be a positive integer multiple of the
+ /// base size.
+ TypeSize getValueSizeInBits(unsigned ResNo) const {
return getValueType(ResNo).getSizeInBits();
}
@@ -2293,19 +2284,38 @@ public:
friend class SelectionDAG;
MaskedLoadStoreSDNode(ISD::NodeType NodeTy, unsigned Order,
- const DebugLoc &dl, SDVTList VTs, EVT MemVT,
+ const DebugLoc &dl, SDVTList VTs,
+ ISD::MemIndexedMode AM, EVT MemVT,
MachineMemOperand *MMO)
- : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) {}
+ : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) {
+ LSBaseSDNodeBits.AddressingMode = AM;
+ assert(getAddressingMode() == AM && "Value truncated");
+ }
- // MaskedLoadSDNode (Chain, ptr, mask, passthru)
- // MaskedStoreSDNode (Chain, data, ptr, mask)
+ // MaskedLoadSDNode (Chain, ptr, offset, mask, passthru)
+ // MaskedStoreSDNode (Chain, data, ptr, offset, mask)
// Mask is a vector of i1 elements
const SDValue &getBasePtr() const {
return getOperand(getOpcode() == ISD::MLOAD ? 1 : 2);
}
- const SDValue &getMask() const {
+ const SDValue &getOffset() const {
return getOperand(getOpcode() == ISD::MLOAD ? 2 : 3);
}
+ const SDValue &getMask() const {
+ return getOperand(getOpcode() == ISD::MLOAD ? 3 : 4);
+ }
+
+ /// Return the addressing mode for this load or store:
+ /// unindexed, pre-inc, pre-dec, post-inc, or post-dec.
+ ISD::MemIndexedMode getAddressingMode() const {
+ return static_cast<ISD::MemIndexedMode>(LSBaseSDNodeBits.AddressingMode);
+ }
+
+ /// Return true if this is a pre/post inc/dec load/store.
+ bool isIndexed() const { return getAddressingMode() != ISD::UNINDEXED; }
+
+ /// Return true if this is NOT a pre/post inc/dec load/store.
+ bool isUnindexed() const { return getAddressingMode() == ISD::UNINDEXED; }
static bool classof(const SDNode *N) {
return N->getOpcode() == ISD::MLOAD ||
@@ -2319,9 +2329,9 @@ public:
friend class SelectionDAG;
MaskedLoadSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
- ISD::LoadExtType ETy, bool IsExpanding, EVT MemVT,
- MachineMemOperand *MMO)
- : MaskedLoadStoreSDNode(ISD::MLOAD, Order, dl, VTs, MemVT, MMO) {
+ ISD::MemIndexedMode AM, ISD::LoadExtType ETy,
+ bool IsExpanding, EVT MemVT, MachineMemOperand *MMO)
+ : MaskedLoadStoreSDNode(ISD::MLOAD, Order, dl, VTs, AM, MemVT, MMO) {
LoadSDNodeBits.ExtTy = ETy;
LoadSDNodeBits.IsExpanding = IsExpanding;
}
@@ -2331,8 +2341,9 @@ public:
}
const SDValue &getBasePtr() const { return getOperand(1); }
- const SDValue &getMask() const { return getOperand(2); }
- const SDValue &getPassThru() const { return getOperand(3); }
+ const SDValue &getOffset() const { return getOperand(2); }
+ const SDValue &getMask() const { return getOperand(3); }
+ const SDValue &getPassThru() const { return getOperand(4); }
static bool classof(const SDNode *N) {
return N->getOpcode() == ISD::MLOAD;
@@ -2347,9 +2358,9 @@ public:
friend class SelectionDAG;
MaskedStoreSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
- bool isTrunc, bool isCompressing, EVT MemVT,
- MachineMemOperand *MMO)
- : MaskedLoadStoreSDNode(ISD::MSTORE, Order, dl, VTs, MemVT, MMO) {
+ ISD::MemIndexedMode AM, bool isTrunc, bool isCompressing,
+ EVT MemVT, MachineMemOperand *MMO)
+ : MaskedLoadStoreSDNode(ISD::MSTORE, Order, dl, VTs, AM, MemVT, MMO) {
StoreSDNodeBits.IsTruncating = isTrunc;
StoreSDNodeBits.IsCompressing = isCompressing;
}
@@ -2365,9 +2376,10 @@ public:
/// memory at base_addr.
bool isCompressingStore() const { return StoreSDNodeBits.IsCompressing; }
- const SDValue &getValue() const { return getOperand(1); }
+ const SDValue &getValue() const { return getOperand(1); }
const SDValue &getBasePtr() const { return getOperand(2); }
- const SDValue &getMask() const { return getOperand(3); }
+ const SDValue &getOffset() const { return getOperand(3); }
+ const SDValue &getMask() const { return getOperand(4); }
static bool classof(const SDNode *N) {
return N->getOpcode() == ISD::MSTORE;
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/SlotIndexes.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/SlotIndexes.h
index 2b32a4d30dff..fb833806ca8e 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/SlotIndexes.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/SlotIndexes.h
@@ -347,14 +347,9 @@ class raw_ostream;
public:
static char ID;
- SlotIndexes() : MachineFunctionPass(ID), mf(nullptr) {
- initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
- }
+ SlotIndexes();
- ~SlotIndexes() override {
- // The indexList's nodes are all allocated in the BumpPtrAllocator.
- indexList.clearAndLeakNodesUnsafely();
- }
+ ~SlotIndexes() override;
void getAnalysisUsage(AnalysisUsage &au) const override;
void releaseMemory() override;
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/StackMaps.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/StackMaps.h
index d7d88de6f682..63547e5b7c3e 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/StackMaps.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/StackMaps.h
@@ -266,13 +266,16 @@ public:
/// Generate a stackmap record for a stackmap instruction.
///
/// MI must be a raw STACKMAP, not a PATCHPOINT.
- void recordStackMap(const MachineInstr &MI);
+ void recordStackMap(const MCSymbol &L,
+ const MachineInstr &MI);
/// Generate a stackmap record for a patchpoint instruction.
- void recordPatchPoint(const MachineInstr &MI);
+ void recordPatchPoint(const MCSymbol &L,
+ const MachineInstr &MI);
/// Generate a stackmap record for a statepoint instruction.
- void recordStatepoint(const MachineInstr &MI);
+ void recordStatepoint(const MCSymbol &L,
+ const MachineInstr &MI);
/// If there is any stack map data, create a stack map section and serialize
/// the map info into it. This clears the stack map data structures
@@ -306,12 +309,15 @@ private:
/// registers that need to be recorded in the stackmap.
LiveOutVec parseRegisterLiveOutMask(const uint32_t *Mask) const;
- /// This should be called by the MC lowering code _immediately_ before
- /// lowering the MI to an MCInst. It records where the operands for the
- /// instruction are stored, and outputs a label to record the offset of
- /// the call from the start of the text section. In special cases (e.g. AnyReg
- /// calling convention) the return register is also recorded if requested.
- void recordStackMapOpers(const MachineInstr &MI, uint64_t ID,
+ /// Record the locations of the operands of the provided instruction in a
+ /// record keyed by the provided label. For instructions w/AnyReg calling
+ /// convention the return register is also recorded if requested. For
+ /// STACKMAP, and PATCHPOINT the label is expected to immediately *preceed*
+ /// lowering of the MI to MCInsts. For STATEPOINT, it expected to
+ /// immediately *follow*. It's not clear this difference was intentional,
+ /// but it exists today.
+ void recordStackMapOpers(const MCSymbol &L,
+ const MachineInstr &MI, uint64_t ID,
MachineInstr::const_mop_iterator MOI,
MachineInstr::const_mop_iterator MOE,
bool recordResult = false);
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/StackProtector.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/StackProtector.h
index 1443a40e85e1..d2ab79cb235e 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/StackProtector.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/StackProtector.h
@@ -95,8 +95,7 @@ private:
bool InStruct = false) const;
/// Check whether a stack allocation has its address taken.
- bool HasAddressTaken(const Instruction *AI,
- SmallPtrSetImpl<const PHINode *> &VisitedPHIs);
+ bool HasAddressTaken(const Instruction *AI);
/// RequiresStackProtector - Check whether or not this function needs a
/// stack protector based upon the stack protector level.
@@ -105,9 +104,7 @@ private:
public:
static char ID; // Pass identification, replacement for typeid.
- StackProtector() : FunctionPass(ID), SSPBufferSize(8) {
- initializeStackProtectorPass(*PassRegistry::getPassRegistry());
- }
+ StackProtector();
void getAnalysisUsage(AnalysisUsage &AU) const override;
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/SwitchLoweringUtils.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/SwitchLoweringUtils.h
index b8adcf759b19..4d6afa617d3a 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/SwitchLoweringUtils.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/SwitchLoweringUtils.h
@@ -19,6 +19,7 @@ namespace llvm {
class FunctionLoweringInfo;
class MachineBasicBlock;
+class BlockFrequencyInfo;
namespace SwitchCG {
@@ -264,7 +265,8 @@ public:
std::vector<BitTestBlock> BitTestCases;
void findJumpTables(CaseClusterVector &Clusters, const SwitchInst *SI,
- MachineBasicBlock *DefaultMBB);
+ MachineBasicBlock *DefaultMBB,
+ ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI);
bool buildJumpTable(const CaseClusterVector &Clusters, unsigned First,
unsigned Last, const SwitchInst *SI,
@@ -295,4 +297,3 @@ private:
} // namespace llvm
#endif // LLVM_CODEGEN_SWITCHLOWERINGUTILS_H
-
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/TailDuplicator.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/TailDuplicator.h
index 358798d5ed60..e0623a3193e5 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/TailDuplicator.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/TailDuplicator.h
@@ -25,11 +25,13 @@
namespace llvm {
class MachineBasicBlock;
+class MachineBlockFrequencyInfo;
class MachineBranchProbabilityInfo;
class MachineFunction;
class MachineInstr;
class MachineModuleInfo;
class MachineRegisterInfo;
+class ProfileSummaryInfo;
class TargetRegisterInfo;
/// Utility class to perform tail duplication.
@@ -40,6 +42,8 @@ class TailDuplicator {
const MachineModuleInfo *MMI;
MachineRegisterInfo *MRI;
MachineFunction *MF;
+ const MachineBlockFrequencyInfo *MBFI;
+ ProfileSummaryInfo *PSI;
bool PreRegAlloc;
bool LayoutMode;
unsigned TailDupSize;
@@ -65,6 +69,8 @@ public:
/// default implies using the command line value TailDupSize.
void initMF(MachineFunction &MF, bool PreRegAlloc,
const MachineBranchProbabilityInfo *MBPI,
+ const MachineBlockFrequencyInfo *MBFI,
+ ProfileSummaryInfo *PSI,
bool LayoutMode, unsigned TailDupSize = 0);
bool tailDuplicateBlocks();
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/TargetCallingConv.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/TargetCallingConv.h
index db3d1175afee..f515050efadb 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/TargetCallingConv.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/TargetCallingConv.h
@@ -38,6 +38,7 @@ namespace ISD {
unsigned IsSplitEnd : 1; ///< Last part of a split
unsigned IsSwiftSelf : 1; ///< Swift self parameter
unsigned IsSwiftError : 1; ///< Swift error parameter
+ unsigned IsCFGuardTarget : 1; ///< Control Flow Guard target
unsigned IsHva : 1; ///< HVA field for
unsigned IsHvaStart : 1; ///< HVA structure start
unsigned IsSecArgPass : 1; ///< Second argument
@@ -56,8 +57,8 @@ namespace ISD {
ArgFlagsTy()
: IsZExt(0), IsSExt(0), IsInReg(0), IsSRet(0), IsByVal(0), IsNest(0),
IsReturned(0), IsSplit(0), IsInAlloca(0), IsSplitEnd(0),
- IsSwiftSelf(0), IsSwiftError(0), IsHva(0), IsHvaStart(0),
- IsSecArgPass(0), ByValAlign(0), OrigAlign(0),
+ IsSwiftSelf(0), IsSwiftError(0), IsCFGuardTarget(0), IsHva(0),
+ IsHvaStart(0), IsSecArgPass(0), ByValAlign(0), OrigAlign(0),
IsInConsecutiveRegsLast(0), IsInConsecutiveRegs(0),
IsCopyElisionCandidate(0), IsPointer(0), ByValSize(0),
PointerAddrSpace(0) {
@@ -88,6 +89,9 @@ namespace ISD {
bool isSwiftError() const { return IsSwiftError; }
void setSwiftError() { IsSwiftError = 1; }
+ bool isCFGuardTarget() const { return IsCFGuardTarget; }
+ void setCFGuardTarget() { IsCFGuardTarget = 1; }
+
bool isHva() const { return IsHva; }
void setHva() { IsHva = 1; }
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/TargetFrameLowering.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/TargetFrameLowering.h
index 72edb27964c4..c7d4c4d7e5d4 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/TargetFrameLowering.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/TargetFrameLowering.h
@@ -282,6 +282,11 @@ public:
return getFrameIndexReference(MF, FI, FrameReg);
}
+ /// Returns the callee-saved registers as computed by determineCalleeSaves
+ /// in the BitVector \p SavedRegs.
+ virtual void getCalleeSaves(const MachineFunction &MF,
+ BitVector &SavedRegs) const;
+
/// This method determines which of the registers reported by
/// TargetRegisterInfo::getCalleeSavedRegs() should actually get saved.
/// The default implementation checks populates the \p SavedRegs bitset with
@@ -289,6 +294,9 @@ public:
/// this function to save additional registers.
/// This method also sets up the register scavenger ensuring there is a free
/// register or a frameindex available.
+ /// This method should not be called by any passes outside of PEI, because
+ /// it may change state passed in by \p MF and \p RS. The preferred
+ /// interface outside PEI is getCalleeSaves.
virtual void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
RegScavenger *RS = nullptr) const;
@@ -355,6 +363,11 @@ public:
return true;
}
+ /// Returns the StackID that scalable vectors should be associated with.
+ virtual TargetStackID::Value getStackIDForScalableVectors() const {
+ return TargetStackID::Default;
+ }
+
virtual bool isSupportedStackID(TargetStackID::Value ID) const {
switch (ID) {
default:
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/TargetInstrInfo.h
index 5011cf34c0ee..ec3c0a0194f6 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/TargetInstrInfo.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/TargetInstrInfo.h
@@ -18,6 +18,7 @@
#include "llvm/ADT/DenseMapInfo.h"
#include "llvm/ADT/None.h"
#include "llvm/CodeGen/LiveRegUnits.h"
+#include "llvm/CodeGen/MIRFormatter.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineCombinerPattern.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -51,6 +52,7 @@ class MCInst;
struct MCSchedModel;
class Module;
class ScheduleDAG;
+class ScheduleDAGMI;
class ScheduleHazardRecognizer;
class SDNode;
class SelectionDAG;
@@ -64,6 +66,22 @@ template <class T> class SmallVectorImpl;
using ParamLoadedValue = std::pair<MachineOperand, DIExpression*>;
+struct DestSourcePair {
+ const MachineOperand *Destination;
+ const MachineOperand *Source;
+
+ DestSourcePair(const MachineOperand &Dest, const MachineOperand &Src)
+ : Destination(&Dest), Source(&Src) {}
+};
+
+/// Used to describe a register and immediate addition.
+struct RegImmPair {
+ Register Reg;
+ int64_t Imm;
+
+ RegImmPair(Register Reg, int64_t Imm) : Reg(Reg), Imm(Imm) {}
+};
+
//---------------------------------------------------------------------------
///
/// TargetInstrInfo - Interface to description of machine instruction set
@@ -912,36 +930,42 @@ public:
/// large registers. See for example the ARM target.
virtual void copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI, const DebugLoc &DL,
- unsigned DestReg, unsigned SrcReg,
+ MCRegister DestReg, MCRegister SrcReg,
bool KillSrc) const {
llvm_unreachable("Target didn't implement TargetInstrInfo::copyPhysReg!");
}
protected:
- /// Target-dependent implemenation for IsCopyInstr.
+ /// Target-dependent implementation for IsCopyInstr.
/// If the specific machine instruction is a instruction that moves/copies
- /// value from one register to another register return true along with
- /// @Source machine operand and @Destination machine operand.
- virtual bool isCopyInstrImpl(const MachineInstr &MI,
- const MachineOperand *&Source,
- const MachineOperand *&Destination) const {
- return false;
+ /// value from one register to another register return destination and source
+ /// registers as machine operands.
+ virtual Optional<DestSourcePair>
+ isCopyInstrImpl(const MachineInstr &MI) const {
+ return None;
}
public:
/// If the specific machine instruction is a instruction that moves/copies
- /// value from one register to another register return true along with
- /// @Source machine operand and @Destination machine operand.
- /// For COPY-instruction the method naturally returns true, for all other
- /// instructions the method calls target-dependent implementation.
- bool isCopyInstr(const MachineInstr &MI, const MachineOperand *&Source,
- const MachineOperand *&Destination) const {
+ /// value from one register to another register return destination and source
+ /// registers as machine operands.
+ /// For COPY-instruction the method naturally returns destination and source
+ /// registers as machine operands, for all other instructions the method calls
+ /// target-dependent implementation.
+ Optional<DestSourcePair> isCopyInstr(const MachineInstr &MI) const {
if (MI.isCopy()) {
- Destination = &MI.getOperand(0);
- Source = &MI.getOperand(1);
- return true;
+ return DestSourcePair{MI.getOperand(0), MI.getOperand(1)};
}
- return isCopyInstrImpl(MI, Source, Destination);
+ return isCopyInstrImpl(MI);
+ }
+
+ /// If the specific machine instruction is an instruction that adds an
+ /// immediate value and a physical register, and stores the result in
+ /// the given physical register \c Reg, return a pair of the source
+ /// register and the offset which has been added.
+ virtual Optional<RegImmPair> isAddImmediate(const MachineInstr &MI,
+ Register Reg) const {
+ return None;
}
/// Store the specified register of the given register class to the specified
@@ -1213,6 +1237,10 @@ public:
/// Get the base operand and byte offset of an instruction that reads/writes
/// memory.
+ /// It returns false if MI does not read/write memory.
+ /// It returns false if no base operand and offset was found.
+ /// It is not guaranteed to always recognize base operand and offsets in all
+ /// cases.
virtual bool getMemOperandWithOffset(const MachineInstr &MI,
const MachineOperand *&BaseOp,
int64_t &Offset,
@@ -1343,7 +1371,7 @@ public:
/// scheduling the machine instructions before register allocation.
virtual ScheduleHazardRecognizer *
CreateTargetMIHazardRecognizer(const InstrItineraryData *,
- const ScheduleDAG *DAG) const;
+ const ScheduleDAGMI *DAG) const;
/// Allocate and return a hazard recognizer to use for this target when
/// scheduling the machine instructions after register allocation.
@@ -1621,9 +1649,9 @@ public:
virtual bool
areMemAccessesTriviallyDisjoint(const MachineInstr &MIa,
const MachineInstr &MIb) const {
- assert((MIa.mayLoad() || MIa.mayStore()) &&
+ assert(MIa.mayLoadOrStore() &&
"MIa must load from or modify a memory location");
- assert((MIb.mayLoad() || MIb.mayStore()) &&
+ assert(MIb.mayLoadOrStore() &&
"MIb must load from or modify a memory location");
return false;
}
@@ -1713,7 +1741,7 @@ public:
virtual MachineInstr *createPHISourceCopy(MachineBasicBlock &MBB,
MachineBasicBlock::iterator InsPt,
const DebugLoc &DL, Register Src,
- Register SrcSubReg,
+ unsigned SrcSubReg,
Register Dst) const {
return BuildMI(MBB, InsPt, DL, get(TargetOpcode::COPY), Dst)
.addReg(Src, 0, SrcSubReg);
@@ -1775,11 +1803,21 @@ public:
}
/// Produce the expression describing the \p MI loading a value into
- /// the parameter's forwarding register.
- virtual Optional<ParamLoadedValue>
- describeLoadedValue(const MachineInstr &MI) const;
+ /// the physical register \p Reg. This hook should only be used with
+ /// \p MIs belonging to VReg-less functions.
+ virtual Optional<ParamLoadedValue> describeLoadedValue(const MachineInstr &MI,
+ Register Reg) const;
+
+ /// Return MIR formatter to format/parse MIR operands. Target can override
+ /// this virtual function and return target specific MIR formatter.
+ virtual const MIRFormatter *getMIRFormatter() const {
+ if (!Formatter.get())
+ Formatter = std::make_unique<MIRFormatter>();
+ return Formatter.get();
+ }
private:
+ mutable std::unique_ptr<MIRFormatter> Formatter;
unsigned CallFrameSetupOpcode, CallFrameDestroyOpcode;
unsigned CatchRetOpcode;
unsigned ReturnOpcode;
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/TargetLowering.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/TargetLowering.h
index a58fca7e73f5..24daf70dc008 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -28,6 +28,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/CodeGen/DAGCombine.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
@@ -53,6 +54,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MachineValueType.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/Utils/SizeOpts.h"
#include <algorithm>
#include <cassert>
#include <climits>
@@ -188,13 +190,14 @@ public:
bool IsReturned : 1;
bool IsSwiftSelf : 1;
bool IsSwiftError : 1;
+ bool IsCFGuardTarget : 1;
uint16_t Alignment = 0;
Type *ByValType = nullptr;
ArgListEntry()
: IsSExt(false), IsZExt(false), IsInReg(false), IsSRet(false),
IsNest(false), IsByVal(false), IsInAlloca(false), IsReturned(false),
- IsSwiftSelf(false), IsSwiftError(false) {}
+ IsSwiftSelf(false), IsSwiftError(false), IsCFGuardTarget(false) {}
void setAttributes(const CallBase *Call, unsigned ArgIdx);
@@ -222,12 +225,16 @@ public:
llvm_unreachable("Invalid content kind");
}
- /// NOTE: The TargetMachine owns TLOF.
explicit TargetLoweringBase(const TargetMachine &TM);
TargetLoweringBase(const TargetLoweringBase &) = delete;
TargetLoweringBase &operator=(const TargetLoweringBase &) = delete;
virtual ~TargetLoweringBase() = default;
+ /// Return true if the target support strict float operation
+ bool isStrictFPEnabled() const {
+ return IsStrictFPEnabled;
+ }
+
protected:
/// Initialize all of the actions to default values.
void initActions();
@@ -469,6 +476,10 @@ public:
return false;
}
+ /// Return true if instruction generated for equality comparison is folded
+ /// with instruction generated for signed comparison.
+ virtual bool isEqualityCmpFoldedWithSignedCmp() const { return true; }
+
/// Return true if it is safe to transform an integer-domain bitwise operation
/// into the equivalent floating-point operation. This should be set to true
/// if the target has IEEE-754-compliant fabs/fneg operations for the input
@@ -924,6 +935,8 @@ public:
case ISD::SMULFIXSAT:
case ISD::UMULFIX:
case ISD::UMULFIXSAT:
+ case ISD::SDIVFIX:
+ case ISD::UDIVFIX:
Supported = isSupportedFixedPointOperation(Op, VT, Scale);
break;
}
@@ -937,38 +950,11 @@ public:
unsigned EqOpc;
switch (Op) {
default: llvm_unreachable("Unexpected FP pseudo-opcode");
- case ISD::STRICT_FADD: EqOpc = ISD::FADD; break;
- case ISD::STRICT_FSUB: EqOpc = ISD::FSUB; break;
- case ISD::STRICT_FMUL: EqOpc = ISD::FMUL; break;
- case ISD::STRICT_FDIV: EqOpc = ISD::FDIV; break;
- case ISD::STRICT_FREM: EqOpc = ISD::FREM; break;
- case ISD::STRICT_FSQRT: EqOpc = ISD::FSQRT; break;
- case ISD::STRICT_FPOW: EqOpc = ISD::FPOW; break;
- case ISD::STRICT_FPOWI: EqOpc = ISD::FPOWI; break;
- case ISD::STRICT_FMA: EqOpc = ISD::FMA; break;
- case ISD::STRICT_FSIN: EqOpc = ISD::FSIN; break;
- case ISD::STRICT_FCOS: EqOpc = ISD::FCOS; break;
- case ISD::STRICT_FEXP: EqOpc = ISD::FEXP; break;
- case ISD::STRICT_FEXP2: EqOpc = ISD::FEXP2; break;
- case ISD::STRICT_FLOG: EqOpc = ISD::FLOG; break;
- case ISD::STRICT_FLOG10: EqOpc = ISD::FLOG10; break;
- case ISD::STRICT_FLOG2: EqOpc = ISD::FLOG2; break;
- case ISD::STRICT_LRINT: EqOpc = ISD::LRINT; break;
- case ISD::STRICT_LLRINT: EqOpc = ISD::LLRINT; break;
- case ISD::STRICT_FRINT: EqOpc = ISD::FRINT; break;
- case ISD::STRICT_FNEARBYINT: EqOpc = ISD::FNEARBYINT; break;
- case ISD::STRICT_FMAXNUM: EqOpc = ISD::FMAXNUM; break;
- case ISD::STRICT_FMINNUM: EqOpc = ISD::FMINNUM; break;
- case ISD::STRICT_FCEIL: EqOpc = ISD::FCEIL; break;
- case ISD::STRICT_FFLOOR: EqOpc = ISD::FFLOOR; break;
- case ISD::STRICT_LROUND: EqOpc = ISD::LROUND; break;
- case ISD::STRICT_LLROUND: EqOpc = ISD::LLROUND; break;
- case ISD::STRICT_FROUND: EqOpc = ISD::FROUND; break;
- case ISD::STRICT_FTRUNC: EqOpc = ISD::FTRUNC; break;
- case ISD::STRICT_FP_TO_SINT: EqOpc = ISD::FP_TO_SINT; break;
- case ISD::STRICT_FP_TO_UINT: EqOpc = ISD::FP_TO_UINT; break;
- case ISD::STRICT_FP_ROUND: EqOpc = ISD::FP_ROUND; break;
- case ISD::STRICT_FP_EXTEND: EqOpc = ISD::FP_EXTEND; break;
+#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
+ case ISD::STRICT_##DAGN: EqOpc = ISD::DAGN; break;
+#define CMP_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
+ case ISD::STRICT_##DAGN: EqOpc = ISD::SETCC; break;
+#include "llvm/IR/ConstrainedOps.def"
}
return getOperationAction(EqOpc, VT);
@@ -1029,24 +1015,8 @@ public:
/// Return true if lowering to a jump table is suitable for a set of case
/// clusters which may contain \p NumCases cases, \p Range range of values.
virtual bool isSuitableForJumpTable(const SwitchInst *SI, uint64_t NumCases,
- uint64_t Range) const {
- // FIXME: This function check the maximum table size and density, but the
- // minimum size is not checked. It would be nice if the minimum size is
- // also combined within this function. Currently, the minimum size check is
- // performed in findJumpTable() in SelectionDAGBuiler and
- // getEstimatedNumberOfCaseClusters() in BasicTTIImpl.
- const bool OptForSize = SI->getParent()->getParent()->hasOptSize();
- const unsigned MinDensity = getMinimumJumpTableDensity(OptForSize);
- const unsigned MaxJumpTableSize = getMaximumJumpTableSize();
-
- // Check whether the number of cases is small enough and
- // the range is dense enough for a jump table.
- if ((OptForSize || Range <= MaxJumpTableSize) &&
- (NumCases * 100 >= Range * MinDensity)) {
- return true;
- }
- return false;
- }
+ uint64_t Range, ProfileSummaryInfo *PSI,
+ BlockFrequencyInfo *BFI) const;
/// Return true if lowering to a bit test is suitable for a set of case
/// clusters which contains \p NumDests unique destinations, \p Low and
@@ -1143,12 +1113,8 @@ public:
/// Return how the indexed load should be treated: either it is legal, needs
/// to be promoted to a larger size, needs to be expanded to some other code
/// sequence, or the target has a custom expander for it.
- LegalizeAction
- getIndexedLoadAction(unsigned IdxMode, MVT VT) const {
- assert(IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid() &&
- "Table isn't big enough!");
- unsigned Ty = (unsigned)VT.SimpleTy;
- return (LegalizeAction)((IndexedModeActions[Ty][IdxMode] & 0xf0) >> 4);
+ LegalizeAction getIndexedLoadAction(unsigned IdxMode, MVT VT) const {
+ return getIndexedModeAction(IdxMode, VT, IMAB_Load);
}
/// Return true if the specified indexed load is legal on this target.
@@ -1161,12 +1127,8 @@ public:
/// Return how the indexed store should be treated: either it is legal, needs
/// to be promoted to a larger size, needs to be expanded to some other code
/// sequence, or the target has a custom expander for it.
- LegalizeAction
- getIndexedStoreAction(unsigned IdxMode, MVT VT) const {
- assert(IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid() &&
- "Table isn't big enough!");
- unsigned Ty = (unsigned)VT.SimpleTy;
- return (LegalizeAction)(IndexedModeActions[Ty][IdxMode] & 0x0f);
+ LegalizeAction getIndexedStoreAction(unsigned IdxMode, MVT VT) const {
+ return getIndexedModeAction(IdxMode, VT, IMAB_Store);
}
/// Return true if the specified indexed load is legal on this target.
@@ -1176,6 +1138,34 @@ public:
getIndexedStoreAction(IdxMode, VT.getSimpleVT()) == Custom);
}
+ /// Return how the indexed load should be treated: either it is legal, needs
+ /// to be promoted to a larger size, needs to be expanded to some other code
+ /// sequence, or the target has a custom expander for it.
+ LegalizeAction getIndexedMaskedLoadAction(unsigned IdxMode, MVT VT) const {
+ return getIndexedModeAction(IdxMode, VT, IMAB_MaskedLoad);
+ }
+
+ /// Return true if the specified indexed load is legal on this target.
+ bool isIndexedMaskedLoadLegal(unsigned IdxMode, EVT VT) const {
+ return VT.isSimple() &&
+ (getIndexedMaskedLoadAction(IdxMode, VT.getSimpleVT()) == Legal ||
+ getIndexedMaskedLoadAction(IdxMode, VT.getSimpleVT()) == Custom);
+ }
+
+ /// Return how the indexed store should be treated: either it is legal, needs
+ /// to be promoted to a larger size, needs to be expanded to some other code
+ /// sequence, or the target has a custom expander for it.
+ LegalizeAction getIndexedMaskedStoreAction(unsigned IdxMode, MVT VT) const {
+ return getIndexedModeAction(IdxMode, VT, IMAB_MaskedStore);
+ }
+
+ /// Return true if the specified indexed load is legal on this target.
+ bool isIndexedMaskedStoreLegal(unsigned IdxMode, EVT VT) const {
+ return VT.isSimple() &&
+ (getIndexedMaskedStoreAction(IdxMode, VT.getSimpleVT()) == Legal ||
+ getIndexedMaskedStoreAction(IdxMode, VT.getSimpleVT()) == Custom);
+ }
+
/// Return how the condition code should be treated: either it is legal, needs
/// to be expanded to some other code sequence, or the target has a custom
/// expander for it.
@@ -1265,7 +1255,7 @@ public:
Elm = PointerTy.getTypeForEVT(Ty->getContext());
}
return EVT::getVectorVT(Ty->getContext(), EVT::getEVT(Elm, false),
- VTy->getNumElements());
+ VTy->getElementCount());
}
return getValueType(DL, Ty, AllowUnknown);
@@ -1550,16 +1540,6 @@ public:
/// have to be legal as the hook is used before type legalization.
virtual bool isSafeMemOpType(MVT /*VT*/) const { return true; }
- /// Determine if we should use _setjmp or setjmp to implement llvm.setjmp.
- bool usesUnderscoreSetJmp() const {
- return UseUnderscoreSetJmp;
- }
-
- /// Determine if we should use _longjmp or longjmp to implement llvm.longjmp.
- bool usesUnderscoreLongJmp() const {
- return UseUnderscoreLongJmp;
- }
-
/// Return lower limit for number of blocks in a jump table.
virtual unsigned getMinimumJumpTableEntries() const;
@@ -1960,18 +1940,6 @@ protected:
SchedPreferenceInfo = Pref;
}
- /// Indicate whether this target prefers to use _setjmp to implement
- /// llvm.setjmp or the version without _. Defaults to false.
- void setUseUnderscoreSetJmp(bool Val) {
- UseUnderscoreSetJmp = Val;
- }
-
- /// Indicate whether this target prefers to use _longjmp to implement
- /// llvm.longjmp or the version without _. Defaults to false.
- void setUseUnderscoreLongJmp(bool Val) {
- UseUnderscoreLongJmp = Val;
- }
-
/// Indicate the minimum number of blocks to generate jump tables.
void setMinimumJumpTableEntries(unsigned Val);
@@ -2063,13 +2031,8 @@ protected:
///
/// NOTE: All indexed mode loads are initialized to Expand in
/// TargetLowering.cpp
- void setIndexedLoadAction(unsigned IdxMode, MVT VT,
- LegalizeAction Action) {
- assert(VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE &&
- (unsigned)Action < 0xf && "Table isn't big enough!");
- // Load action are kept in the upper half.
- IndexedModeActions[(unsigned)VT.SimpleTy][IdxMode] &= ~0xf0;
- IndexedModeActions[(unsigned)VT.SimpleTy][IdxMode] |= ((uint8_t)Action) <<4;
+ void setIndexedLoadAction(unsigned IdxMode, MVT VT, LegalizeAction Action) {
+ setIndexedModeAction(IdxMode, VT, IMAB_Load, Action);
}
/// Indicate that the specified indexed store does or does not work with the
@@ -2077,13 +2040,28 @@ protected:
///
/// NOTE: All indexed mode stores are initialized to Expand in
/// TargetLowering.cpp
- void setIndexedStoreAction(unsigned IdxMode, MVT VT,
- LegalizeAction Action) {
- assert(VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE &&
- (unsigned)Action < 0xf && "Table isn't big enough!");
- // Store action are kept in the lower half.
- IndexedModeActions[(unsigned)VT.SimpleTy][IdxMode] &= ~0x0f;
- IndexedModeActions[(unsigned)VT.SimpleTy][IdxMode] |= ((uint8_t)Action);
+ void setIndexedStoreAction(unsigned IdxMode, MVT VT, LegalizeAction Action) {
+ setIndexedModeAction(IdxMode, VT, IMAB_Store, Action);
+ }
+
+ /// Indicate that the specified indexed masked load does or does not work with
+ /// the specified type and indicate what to do about it.
+ ///
+ /// NOTE: All indexed mode masked loads are initialized to Expand in
+ /// TargetLowering.cpp
+ void setIndexedMaskedLoadAction(unsigned IdxMode, MVT VT,
+ LegalizeAction Action) {
+ setIndexedModeAction(IdxMode, VT, IMAB_MaskedLoad, Action);
+ }
+
+ /// Indicate that the specified indexed masked store does or does not work
+ /// with the specified type and indicate what to do about it.
+ ///
+ /// NOTE: All indexed mode masked stores are initialized to Expand in
+ /// TargetLowering.cpp
+ void setIndexedMaskedStoreAction(unsigned IdxMode, MVT VT,
+ LegalizeAction Action) {
+ setIndexedModeAction(IdxMode, VT, IMAB_MaskedStore, Action);
}
/// Indicate that the specified condition code is or isn't supported on the
@@ -2504,7 +2482,8 @@ public:
/// Return true if an fpext operation input to an \p Opcode operation is free
/// (for instance, because half-precision floating-point numbers are
/// implicitly extended to float-precision) for an FMA instruction.
- virtual bool isFPExtFoldable(unsigned Opcode, EVT DestVT, EVT SrcVT) const {
+ virtual bool isFPExtFoldable(const SelectionDAG &DAG, unsigned Opcode,
+ EVT DestVT, EVT SrcVT) const {
assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() &&
"invalid fpext types");
return isFPExtFree(DestVT, SrcVT);
@@ -2536,10 +2515,24 @@ public:
/// not legal, but should return true if those types will eventually legalize
/// to types that support FMAs. After legalization, it will only be called on
/// types that support FMAs (via Legal or Custom actions)
- virtual bool isFMAFasterThanFMulAndFAdd(EVT) const {
+ virtual bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
+ EVT) const {
return false;
}
+ /// IR version
+ virtual bool isFMAFasterThanFMulAndFAdd(const Function &F, Type *) const {
+ return false;
+ }
+
+ /// Returns true if the FADD or FSUB node passed could legally be combined with
+ /// an fmul to form an ISD::FMAD.
+ virtual bool isFMADLegalForFAddFSub(const SelectionDAG &DAG,
+ const SDNode *N) const {
+ assert(N->getOpcode() == ISD::FADD || N->getOpcode() == ISD::FSUB);
+ return isOperationLegal(ISD::FMAD, N->getValueType(0));
+ }
+
/// Return true if it's profitable to narrow operations of type VT1 to
/// VT2. e.g. on x86, it's profitable to narrow from i32 to i8 but not from
/// i32 to i16.
@@ -2608,10 +2601,10 @@ public:
// same blocks of its users.
virtual bool shouldConsiderGEPOffsetSplit() const { return false; }
- // Return the shift amount threshold for profitable transforms into shifts.
- // Transforms creating shifts above the returned value will be avoided.
- virtual unsigned getShiftAmountThreshold(EVT VT) const {
- return VT.getScalarSizeInBits();
+ /// Return true if creating a shift of the type by the given
+ /// amount is not profitable.
+ virtual bool shouldAvoidTransformToShift(EVT VT, unsigned Amount) const {
+ return false;
}
//===--------------------------------------------------------------------===//
@@ -2683,16 +2676,6 @@ private:
/// predication.
bool JumpIsExpensive;
- /// This target prefers to use _setjmp to implement llvm.setjmp.
- ///
- /// Defaults to false.
- bool UseUnderscoreSetJmp;
-
- /// This target prefers to use _longjmp to implement llvm.longjmp.
- ///
- /// Defaults to false.
- bool UseUnderscoreLongJmp;
-
/// Information about the contents of the high-bits in boolean values held in
/// a type wider than i1. See getBooleanContents.
BooleanContent BooleanContents;
@@ -2741,7 +2724,7 @@ private:
/// This indicates the default register class to use for each ValueType the
/// target supports natively.
const TargetRegisterClass *RegClassForVT[MVT::LAST_VALUETYPE];
- unsigned char NumRegistersForVT[MVT::LAST_VALUETYPE];
+ uint16_t NumRegistersForVT[MVT::LAST_VALUETYPE];
MVT RegisterTypeForVT[MVT::LAST_VALUETYPE];
/// This indicates the "representative" register class to use for each
@@ -2781,13 +2764,13 @@ private:
/// truncating store of a specific value type and truncating type is legal.
LegalizeAction TruncStoreActions[MVT::LAST_VALUETYPE][MVT::LAST_VALUETYPE];
- /// For each indexed mode and each value type, keep a pair of LegalizeAction
+ /// For each indexed mode and each value type, keep a quad of LegalizeAction
/// that indicates how instruction selection should deal with the load /
- /// store.
+ /// store / maskedload / maskedstore.
///
/// The first dimension is the value_type for the reference. The second
/// dimension represents the various modes for load store.
- uint8_t IndexedModeActions[MVT::LAST_VALUETYPE][ISD::LAST_INDEXED_MODE];
+ uint16_t IndexedModeActions[MVT::LAST_VALUETYPE][ISD::LAST_INDEXED_MODE];
/// For each condition code (ISD::CondCode) keep a LegalizeAction that
/// indicates how instruction selection should deal with the condition code.
@@ -2830,6 +2813,32 @@ private:
/// Set default libcall names and calling conventions.
void InitLibcalls(const Triple &TT);
+ /// The bits of IndexedModeActions used to store the legalisation actions
+ /// We store the data as | ML | MS | L | S | each taking 4 bits.
+ enum IndexedModeActionsBits {
+ IMAB_Store = 0,
+ IMAB_Load = 4,
+ IMAB_MaskedStore = 8,
+ IMAB_MaskedLoad = 12
+ };
+
+ void setIndexedModeAction(unsigned IdxMode, MVT VT, unsigned Shift,
+ LegalizeAction Action) {
+ assert(VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE &&
+ (unsigned)Action < 0xf && "Table isn't big enough!");
+ unsigned Ty = (unsigned)VT.SimpleTy;
+ IndexedModeActions[Ty][IdxMode] &= ~(0xf << Shift);
+ IndexedModeActions[Ty][IdxMode] |= ((uint16_t)Action) << Shift;
+ }
+
+ LegalizeAction getIndexedModeAction(unsigned IdxMode, MVT VT,
+ unsigned Shift) const {
+ assert(IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid() &&
+ "Table isn't big enough!");
+ unsigned Ty = (unsigned)VT.SimpleTy;
+ return (LegalizeAction)((IndexedModeActions[Ty][IdxMode] >> Shift) & 0xf);
+ }
+
protected:
/// Return true if the extension represented by \p I is free.
/// \pre \p I is a sign, zero, or fp extension and
@@ -2932,6 +2941,8 @@ protected:
/// details.
MachineBasicBlock *emitXRayTypedEvent(MachineInstr &MI,
MachineBasicBlock *MBB) const;
+
+ bool IsStrictFPEnabled;
};
/// This class defines information used to lower LLVM code to legal SelectionDAG
@@ -2947,7 +2958,6 @@ public:
TargetLowering(const TargetLowering &) = delete;
TargetLowering &operator=(const TargetLowering &) = delete;
- /// NOTE: The TargetMachine owns TLOF.
explicit TargetLowering(const TargetMachine &TM);
bool isPositionIndependent() const;
@@ -3024,12 +3034,19 @@ public:
const SDLoc &DL, const SDValue OldLHS,
const SDValue OldRHS) const;
+ void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS,
+ SDValue &NewRHS, ISD::CondCode &CCCode,
+ const SDLoc &DL, const SDValue OldLHS,
+ const SDValue OldRHS, SDValue &Chain,
+ bool IsSignaling = false) const;
+
/// Returns a pair of (return value, chain).
/// It is an error to pass RTLIB::UNKNOWN_LIBCALL as \p LC.
std::pair<SDValue, SDValue> makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC,
EVT RetVT, ArrayRef<SDValue> Ops,
MakeLibCallOptions CallOptions,
- const SDLoc &dl) const;
+ const SDLoc &dl,
+ SDValue Chain = SDValue()) const;
/// Check whether parameters to a call that are passed in callee saved
/// registers are the same as from the calling function. This needs to be
@@ -3262,9 +3279,7 @@ public:
bool isBeforeLegalize() const { return Level == BeforeLegalizeTypes; }
bool isBeforeLegalizeOps() const { return Level < AfterLegalizeVectorOps; }
- bool isAfterLegalizeDAG() const {
- return Level == AfterLegalizeDAG;
- }
+ bool isAfterLegalizeDAG() const { return Level >= AfterLegalizeDAG; }
CombineLevel getDAGCombineLevel() { return Level; }
bool isCalledByLegalizer() const { return CalledByLegalizer; }
@@ -3695,7 +3710,7 @@ public:
/// Return the register ID of the name passed in. Used by named register
/// global variables extension. There is no target-independent behaviour
/// so the default action is to bail.
- virtual Register getRegisterByName(const char* RegName, EVT VT,
+ virtual Register getRegisterByName(const char* RegName, LLT Ty,
const MachineFunction &MF) const {
report_fatal_error("Named registers not implemented for this target");
}
@@ -3758,7 +3773,7 @@ public:
/// Should SelectionDAG lower an atomic store of the given kind as a normal
/// StoreSDNode (as opposed to an AtomicSDNode)? NOTE: The intention is to
/// eventually migrate all targets to the using StoreSDNodes, but porting is
- /// being done target at a time.
+ /// being done target at a time.
virtual bool lowerAtomicStoreAsStoreSDNode(const StoreInst &SI) const {
assert(SI.isAtomic() && "violated precondition");
return false;
@@ -3940,9 +3955,7 @@ public:
StringRef Constraint, MVT VT) const;
virtual unsigned getInlineAsmMemConstraint(StringRef ConstraintCode) const {
- if (ConstraintCode == "i")
- return InlineAsm::Constraint_i;
- else if (ConstraintCode == "m")
+ if (ConstraintCode == "m")
return InlineAsm::Constraint_m;
return InlineAsm::Constraint_Unknown;
}
@@ -4082,14 +4095,18 @@ public:
/// Expand float to UINT conversion
/// \param N Node to expand
/// \param Result output after conversion
+ /// \param Chain output chain after conversion
/// \returns True, if the expansion was successful, false otherwise
- bool expandFP_TO_UINT(SDNode *N, SDValue &Result, SDValue &Chain, SelectionDAG &DAG) const;
+ bool expandFP_TO_UINT(SDNode *N, SDValue &Result, SDValue &Chain,
+ SelectionDAG &DAG) const;
/// Expand UINT(i64) to double(f64) conversion
/// \param N Node to expand
/// \param Result output after conversion
+ /// \param Chain output chain after conversion
/// \returns True, if the expansion was successful, false otherwise
- bool expandUINT_TO_FP(SDNode *N, SDValue &Result, SelectionDAG &DAG) const;
+ bool expandUINT_TO_FP(SDNode *N, SDValue &Result, SDValue &Chain,
+ SelectionDAG &DAG) const;
/// Expand fminnum/fmaxnum into fminnum_ieee/fmaxnum_ieee with quieted inputs.
SDValue expandFMINNUM_FMAXNUM(SDNode *N, SelectionDAG &DAG) const;
@@ -4125,12 +4142,13 @@ public:
/// Turn load of vector type into a load of the individual elements.
/// \param LD load to expand
- /// \returns MERGE_VALUEs of the scalar loads with their chains.
- SDValue scalarizeVectorLoad(LoadSDNode *LD, SelectionDAG &DAG) const;
+ /// \returns BUILD_VECTOR and TokenFactor nodes.
+ std::pair<SDValue, SDValue> scalarizeVectorLoad(LoadSDNode *LD,
+ SelectionDAG &DAG) const;
// Turn a store of a vector type into stores of the individual elements.
/// \param ST Store with a vector value type
- /// \returns MERGE_VALUs of the individual store chains.
+ /// \returns TokenFactor of the individual store chains.
SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const;
/// Expands an unaligned load to 2 half-size loads for an integer, and
@@ -4168,6 +4186,14 @@ public:
/// method accepts integers as its arguments.
SDValue expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const;
+ /// Method for building the DAG expansion of ISD::[US]DIVFIX. This
+ /// method accepts integers as its arguments.
+ /// Note: This method may fail if the division could not be performed
+ /// within the type. Clients must retry with a wider type if this happens.
+ SDValue expandFixedPointDiv(unsigned Opcode, const SDLoc &dl,
+ SDValue LHS, SDValue RHS,
+ unsigned Scale, SelectionDAG &DAG) const;
+
/// Method for building the DAG expansion of ISD::U(ADD|SUB)O. Expansion
/// always suceeds and populates the Result and Overflow arguments.
void expandUADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow,
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
index 59f5ddbd9dac..4f58df93b93e 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
@@ -233,6 +233,15 @@ public:
MCSection *SelectSectionForGlobal(const GlobalObject *GO, SectionKind Kind,
const TargetMachine &TM) const override;
+ MCSection *getSectionForJumpTable(const Function &F,
+ const TargetMachine &TM) const override;
+
+ /// Given a constant with the SectionKind, return a section that it should be
+ /// placed in.
+ MCSection *getSectionForConstant(const DataLayout &DL, SectionKind Kind,
+ const Constant *C,
+ unsigned &Align) const override;
+
static XCOFF::StorageClass getStorageClassForGlobal(const GlobalObject *GO);
};
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/TargetSchedule.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/TargetSchedule.h
index cce85c8d7b0d..aa6b82e14aa6 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/TargetSchedule.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/TargetSchedule.h
@@ -37,8 +37,12 @@ class TargetSchedModel {
const TargetInstrInfo *TII = nullptr;
SmallVector<unsigned, 16> ResourceFactors;
- unsigned MicroOpFactor; // Multiply to normalize microops to resource units.
- unsigned ResourceLCM; // Resource units per cycle. Latency normalization factor.
+
+ // Multiply to normalize microops to resource units.
+ unsigned MicroOpFactor = 0;
+
+ // Resource units per cycle. Latency normalization factor.
+ unsigned ResourceLCM = 0;
unsigned computeInstrLatency(const MCSchedClassDesc &SCDesc) const;
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h
index 56018eca8c27..6768cea89406 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h
@@ -206,6 +206,10 @@ public:
/// which is the preferred way to influence this.
virtual bool enablePostRAScheduler() const;
+ /// True if the subtarget should run a machine scheduler after register
+ /// allocation.
+ virtual bool enablePostRAMachineScheduler() const;
+
/// True if the subtarget should run the atomic expansion pass.
virtual bool enableAtomicExpand() const;
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/ValueTypes.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/ValueTypes.h
index cd4c4ca64081..bcf417762920 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/ValueTypes.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/ValueTypes.h
@@ -18,6 +18,7 @@
#include "llvm/Support/Compiler.h"
#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/TypeSize.h"
#include <cassert>
#include <cstdint>
#include <string>
@@ -209,11 +210,13 @@ namespace llvm {
/// Return true if the bit size is a multiple of 8.
bool isByteSized() const {
- return (getSizeInBits() & 7) == 0;
+ return getSizeInBits().isByteSized();
}
/// Return true if the size is a power-of-two number of bytes.
bool isRound() const {
+ if (isScalableVector())
+ return false;
unsigned BitSize = getSizeInBits();
return BitSize >= 8 && !(BitSize & (BitSize - 1));
}
@@ -288,25 +291,38 @@ namespace llvm {
}
/// Return the size of the specified value type in bits.
- unsigned getSizeInBits() const {
+ ///
+ /// If the value type is a scalable vector type, the scalable property will
+ /// be set and the runtime size will be a positive integer multiple of the
+ /// base size.
+ TypeSize getSizeInBits() const {
if (isSimple())
return V.getSizeInBits();
return getExtendedSizeInBits();
}
- unsigned getScalarSizeInBits() const {
+ TypeSize getScalarSizeInBits() const {
return getScalarType().getSizeInBits();
}
/// Return the number of bytes overwritten by a store of the specified value
/// type.
- unsigned getStoreSize() const {
- return (getSizeInBits() + 7) / 8;
+ ///
+ /// If the value type is a scalable vector type, the scalable property will
+ /// be set and the runtime size will be a positive integer multiple of the
+ /// base size.
+ TypeSize getStoreSize() const {
+ TypeSize BaseSize = getSizeInBits();
+ return {(BaseSize.getKnownMinSize() + 7) / 8, BaseSize.isScalable()};
}
/// Return the number of bits overwritten by a store of the specified value
/// type.
- unsigned getStoreSizeInBits() const {
+ ///
+ /// If the value type is a scalable vector type, the scalable property will
+ /// be set and the runtime size will be a positive integer multiple of the
+ /// base size.
+ TypeSize getStoreSizeInBits() const {
return getStoreSize() * 8;
}
@@ -428,7 +444,7 @@ namespace llvm {
bool isExtended2048BitVector() const LLVM_READONLY;
EVT getExtendedVectorElementType() const;
unsigned getExtendedVectorNumElements() const LLVM_READONLY;
- unsigned getExtendedSizeInBits() const LLVM_READONLY;
+ TypeSize getExtendedSizeInBits() const LLVM_READONLY;
};
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/include/llvm/DWARFLinker/DWARFLinker.h b/contrib/llvm-project/llvm/include/llvm/DWARFLinker/DWARFLinker.h
new file mode 100644
index 000000000000..80df01ca0539
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/DWARFLinker/DWARFLinker.h
@@ -0,0 +1,198 @@
+//===- DWARFLinker.h --------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DWARFLINKER_DWARFLINKER_H
+#define LLVM_DWARFLINKER_DWARFLINKER_H
+
+#include "llvm/CodeGen/AccelTable.h"
+#include "llvm/CodeGen/NonRelocatableStringpool.h"
+#include "llvm/DWARFLinker/DWARFLinkerDeclContext.h"
+#include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
+#include "llvm/DebugInfo/DWARF/DWARFContext.h"
+#include "llvm/MC/MCDwarf.h"
+#include <map>
+
+namespace llvm {
+
+enum class DwarfLinkerClient { Dsymutil, LLD, General };
+
+/// Partial address range. Besides an offset, only the
+/// HighPC is stored. The structure is stored in a map where the LowPC is the
+/// key.
+struct ObjFileAddressRange {
+ /// Function HighPC.
+ uint64_t HighPC;
+ /// Offset to apply to the linked address.
+ /// should be 0 for not-linked object file.
+ int64_t Offset;
+
+ ObjFileAddressRange(uint64_t EndPC, int64_t Offset)
+ : HighPC(EndPC), Offset(Offset) {}
+
+ ObjFileAddressRange() : HighPC(0), Offset(0) {}
+};
+
+/// Map LowPC to ObjFileAddressRange.
+using RangesTy = std::map<uint64_t, ObjFileAddressRange>;
+
+/// AddressesMap represents information about valid addresses used
+/// by debug information. Valid addresses are those which points to
+/// live code sections. i.e. relocations for these addresses point
+/// into sections which would be/are placed into resulting binary.
+class AddressesMap {
+public:
+ virtual ~AddressesMap();
+
+ /// Returns true if represented addresses are from linked file.
+ /// Returns false if represented addresses are from not-linked
+ /// object file.
+ virtual bool areRelocationsResolved() const = 0;
+
+ /// Checks that there are valid relocations against a .debug_info
+ /// section. Reset current relocation pointer if neccessary.
+ virtual bool hasValidRelocs(bool ResetRelocsPtr = true) = 0;
+
+ /// Checks that there is a relocation against .debug_info
+ /// table between \p StartOffset and \p NextOffset.
+ ///
+ /// This function must be called with offsets in strictly ascending
+ /// order because it never looks back at relocations it already 'went past'.
+ /// \returns true and sets Info.InDebugMap if it is the case.
+ virtual bool hasValidRelocationAt(uint64_t StartOffset, uint64_t EndOffset,
+ CompileUnit::DIEInfo &Info) = 0;
+
+ /// Apply the valid relocations to the buffer \p Data, taking into
+ /// account that Data is at \p BaseOffset in the debug_info section.
+ ///
+ /// This function must be called with monotonic \p BaseOffset values.
+ ///
+ /// \returns true whether any reloc has been applied.
+ virtual bool applyValidRelocs(MutableArrayRef<char> Data, uint64_t BaseOffset,
+ bool IsLittleEndian) = 0;
+
+ /// Returns all valid functions address ranges(i.e., those ranges
+ /// which points to sections with code).
+ virtual RangesTy &getValidAddressRanges() = 0;
+
+ /// Erases all data.
+ virtual void clear() = 0;
+};
+
+/// DwarfEmitter presents interface to generate all debug info tables.
+class DwarfEmitter {
+public:
+ virtual ~DwarfEmitter();
+
+ /// Emit DIE containing warnings.
+ virtual void emitPaperTrailWarningsDie(const Triple &Triple, DIE &Die) = 0;
+
+ /// Emit section named SecName with content equals to
+ /// corresponding section in Obj.
+ virtual void emitSectionContents(const object::ObjectFile &Obj,
+ StringRef SecName) = 0;
+
+ /// Emit the abbreviation table \p Abbrevs to the debug_abbrev section.
+ virtual void
+ emitAbbrevs(const std::vector<std::unique_ptr<DIEAbbrev>> &Abbrevs,
+ unsigned DwarfVersion) = 0;
+
+ /// Emit the string table described by \p Pool.
+ virtual void emitStrings(const NonRelocatableStringpool &Pool) = 0;
+
+ /// Emit DWARF debug names.
+ virtual void
+ emitDebugNames(AccelTable<DWARF5AccelTableStaticData> &Table) = 0;
+
+ /// Emit Apple namespaces accelerator table.
+ virtual void
+ emitAppleNamespaces(AccelTable<AppleAccelTableStaticOffsetData> &Table) = 0;
+
+ /// Emit Apple names accelerator table.
+ virtual void
+ emitAppleNames(AccelTable<AppleAccelTableStaticOffsetData> &Table) = 0;
+
+ /// Emit Apple Objective-C accelerator table.
+ virtual void
+ emitAppleObjc(AccelTable<AppleAccelTableStaticOffsetData> &Table) = 0;
+
+ /// Emit Apple type accelerator table.
+ virtual void
+ emitAppleTypes(AccelTable<AppleAccelTableStaticTypeData> &Table) = 0;
+
+ /// Emit debug_ranges for \p FuncRange by translating the
+ /// original \p Entries.
+ virtual void emitRangesEntries(
+ int64_t UnitPcOffset, uint64_t OrigLowPc,
+ const FunctionIntervals::const_iterator &FuncRange,
+ const std::vector<DWARFDebugRangeList::RangeListEntry> &Entries,
+ unsigned AddressSize) = 0;
+
+ /// Emit debug_aranges entries for \p Unit and if \p DoRangesSection is true,
+ /// also emit the debug_ranges entries for the DW_TAG_compile_unit's
+ /// DW_AT_ranges attribute.
+ virtual void emitUnitRangesEntries(CompileUnit &Unit,
+ bool DoRangesSection) = 0;
+
+ /// Copy the debug_line over to the updated binary while unobfuscating the
+ /// file names and directories.
+ virtual void translateLineTable(DataExtractor LineData, uint64_t Offset) = 0;
+
+ /// Emit the line table described in \p Rows into the debug_line section.
+ virtual void emitLineTableForUnit(MCDwarfLineTableParams Params,
+ StringRef PrologueBytes,
+ unsigned MinInstLength,
+ std::vector<DWARFDebugLine::Row> &Rows,
+ unsigned AdddressSize) = 0;
+
+ /// Emit the .debug_pubnames contribution for \p Unit.
+ virtual void emitPubNamesForUnit(const CompileUnit &Unit) = 0;
+
+ /// Emit the .debug_pubtypes contribution for \p Unit.
+ virtual void emitPubTypesForUnit(const CompileUnit &Unit) = 0;
+
+ /// Emit a CIE.
+ virtual void emitCIE(StringRef CIEBytes) = 0;
+
+ /// Emit an FDE with data \p Bytes.
+ virtual void emitFDE(uint32_t CIEOffset, uint32_t AddreSize, uint32_t Address,
+ StringRef Bytes) = 0;
+
+ /// Emit the debug_loc contribution for \p Unit by copying the entries from
+ /// \p Dwarf and offsetting them. Update the location attributes to point to
+ /// the new entries.
+ virtual void emitLocationsForUnit(
+ const CompileUnit &Unit, DWARFContext &Dwarf,
+ std::function<void(StringRef, SmallVectorImpl<uint8_t> &)>
+ ProcessExpr) = 0;
+
+ /// Emit the compilation unit header for \p Unit in the
+ /// debug_info section.
+ ///
+ /// As a side effect, this also switches the current Dwarf version
+ /// of the MC layer to the one of U.getOrigUnit().
+ virtual void emitCompileUnitHeader(CompileUnit &Unit) = 0;
+
+ /// Recursively emit the DIE tree rooted at \p Die.
+ virtual void emitDIE(DIE &Die) = 0;
+
+ /// Returns size of generated .debug_line section.
+ virtual uint64_t getLineSectionSize() const = 0;
+
+ /// Returns size of generated .debug_frame section.
+ virtual uint64_t getFrameSectionSize() const = 0;
+
+ /// Returns size of generated .debug_ranges section.
+ virtual uint64_t getRangesSectionSize() const = 0;
+
+ /// Returns size of generated .debug_info section.
+ virtual uint64_t getDebugInfoSectionSize() const = 0;
+};
+
+} // end namespace llvm
+
+#endif // LLVM_DWARFLINKER_DWARFLINKER_H
diff --git a/contrib/llvm-project/llvm/include/llvm/DWARFLinker/DWARFLinkerCompileUnit.h b/contrib/llvm-project/llvm/include/llvm/DWARFLinker/DWARFLinkerCompileUnit.h
new file mode 100644
index 000000000000..7873a16fea52
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/DWARFLinker/DWARFLinkerCompileUnit.h
@@ -0,0 +1,330 @@
+//===- DWARFLinkerCompileUnit.h ---------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DWARFLINKER_DWARFLINKERCOMPILEUNIT_H
+#define LLVM_DWARFLINKER_DWARFLINKERCOMPILEUNIT_H
+
+#include "llvm/ADT/IntervalMap.h"
+#include "llvm/CodeGen/DIE.h"
+#include "llvm/DebugInfo/DWARF/DWARFUnit.h"
+#include "llvm/Support/DataExtractor.h"
+
+namespace llvm {
+
+class DeclContext;
+
+template <typename KeyT, typename ValT>
+using HalfOpenIntervalMap =
+ IntervalMap<KeyT, ValT, IntervalMapImpl::NodeSizer<KeyT, ValT>::LeafSize,
+ IntervalMapHalfOpenInfo<KeyT>>;
+
+using FunctionIntervals = HalfOpenIntervalMap<uint64_t, int64_t>;
+
+// FIXME: Delete this structure.
+struct PatchLocation {
+ DIE::value_iterator I;
+
+ PatchLocation() = default;
+ PatchLocation(DIE::value_iterator I) : I(I) {}
+
+ void set(uint64_t New) const {
+ assert(I);
+ const auto &Old = *I;
+ assert(Old.getType() == DIEValue::isInteger);
+ *I = DIEValue(Old.getAttribute(), Old.getForm(), DIEInteger(New));
+ }
+
+ uint64_t get() const {
+ assert(I);
+ return I->getDIEInteger().getValue();
+ }
+};
+
+/// Stores all information relating to a compile unit, be it in its original
+/// instance in the object file to its brand new cloned and generated DIE tree.
+class CompileUnit {
+public:
+ /// Information gathered about a DIE in the object file.
+ struct DIEInfo {
+ /// Address offset to apply to the described entity.
+ int64_t AddrAdjust;
+
+ /// ODR Declaration context.
+ DeclContext *Ctxt;
+
+ /// Cloned version of that DIE.
+ DIE *Clone;
+
+ /// The index of this DIE's parent.
+ uint32_t ParentIdx;
+
+ /// Is the DIE part of the linked output?
+ bool Keep : 1;
+
+ /// Was this DIE's entity found in the map?
+ bool InDebugMap : 1;
+
+ /// Is this a pure forward declaration we can strip?
+ bool Prune : 1;
+
+ /// Does DIE transitively refer an incomplete decl?
+ bool Incomplete : 1;
+ };
+
+ CompileUnit(DWARFUnit &OrigUnit, unsigned ID, bool CanUseODR,
+ StringRef ClangModuleName)
+ : OrigUnit(OrigUnit), ID(ID), Ranges(RangeAlloc),
+ ClangModuleName(ClangModuleName) {
+ Info.resize(OrigUnit.getNumDIEs());
+
+ auto CUDie = OrigUnit.getUnitDIE(false);
+ if (!CUDie) {
+ HasODR = false;
+ return;
+ }
+ if (auto Lang = dwarf::toUnsigned(CUDie.find(dwarf::DW_AT_language)))
+ HasODR = CanUseODR && (*Lang == dwarf::DW_LANG_C_plus_plus ||
+ *Lang == dwarf::DW_LANG_C_plus_plus_03 ||
+ *Lang == dwarf::DW_LANG_C_plus_plus_11 ||
+ *Lang == dwarf::DW_LANG_C_plus_plus_14 ||
+ *Lang == dwarf::DW_LANG_ObjC_plus_plus);
+ else
+ HasODR = false;
+ }
+
+ DWARFUnit &getOrigUnit() const { return OrigUnit; }
+
+ unsigned getUniqueID() const { return ID; }
+
+ void createOutputDIE() {
+ NewUnit.emplace(OrigUnit.getVersion(), OrigUnit.getAddressByteSize(),
+ OrigUnit.getUnitDIE().getTag());
+ }
+
+ DIE *getOutputUnitDIE() const {
+ if (NewUnit)
+ return &const_cast<BasicDIEUnit &>(*NewUnit).getUnitDie();
+ return nullptr;
+ }
+
+ bool hasODR() const { return HasODR; }
+ bool isClangModule() const { return !ClangModuleName.empty(); }
+ uint16_t getLanguage();
+
+ const std::string &getClangModuleName() const { return ClangModuleName; }
+
+ DIEInfo &getInfo(unsigned Idx) { return Info[Idx]; }
+ const DIEInfo &getInfo(unsigned Idx) const { return Info[Idx]; }
+
+ uint64_t getStartOffset() const { return StartOffset; }
+ uint64_t getNextUnitOffset() const { return NextUnitOffset; }
+ void setStartOffset(uint64_t DebugInfoSize) { StartOffset = DebugInfoSize; }
+
+ uint64_t getLowPc() const { return LowPc; }
+ uint64_t getHighPc() const { return HighPc; }
+ bool hasLabelAt(uint64_t Addr) const { return Labels.count(Addr); }
+
+ Optional<PatchLocation> getUnitRangesAttribute() const {
+ return UnitRangeAttribute;
+ }
+
+ const FunctionIntervals &getFunctionRanges() const { return Ranges; }
+
+ const std::vector<PatchLocation> &getRangesAttributes() const {
+ return RangeAttributes;
+ }
+
+ const std::vector<std::pair<PatchLocation, int64_t>> &
+ getLocationAttributes() const {
+ return LocationAttributes;
+ }
+
+ void setHasInterestingContent() { HasInterestingContent = true; }
+ bool hasInterestingContent() { return HasInterestingContent; }
+
+ /// Mark every DIE in this unit as kept. This function also
+ /// marks variables as InDebugMap so that they appear in the
+ /// reconstructed accelerator tables.
+ void markEverythingAsKept();
+
+ /// Compute the end offset for this unit. Must be called after the CU's DIEs
+ /// have been cloned. \returns the next unit offset (which is also the
+ /// current debug_info section size).
+ uint64_t computeNextUnitOffset();
+
+ /// Keep track of a forward reference to DIE \p Die in \p RefUnit by \p
+ /// Attr. The attribute should be fixed up later to point to the absolute
+ /// offset of \p Die in the debug_info section or to the canonical offset of
+ /// \p Ctxt if it is non-null.
+ void noteForwardReference(DIE *Die, const CompileUnit *RefUnit,
+ DeclContext *Ctxt, PatchLocation Attr);
+
+ /// Apply all fixups recorded by noteForwardReference().
+ void fixupForwardReferences();
+
+ /// Add the low_pc of a label that is relocated by applying
+ /// offset \p PCOffset.
+ void addLabelLowPc(uint64_t LabelLowPc, int64_t PcOffset);
+
+ /// Add a function range [\p LowPC, \p HighPC) that is relocated by applying
+ /// offset \p PCOffset.
+ void addFunctionRange(uint64_t LowPC, uint64_t HighPC, int64_t PCOffset);
+
+ /// Keep track of a DW_AT_range attribute that we will need to patch up later.
+ void noteRangeAttribute(const DIE &Die, PatchLocation Attr);
+
+ /// Keep track of a location attribute pointing to a location list in the
+ /// debug_loc section.
+ void noteLocationAttribute(PatchLocation Attr, int64_t PcOffset);
+
+ /// Add a name accelerator entry for \a Die with \a Name.
+ void addNamespaceAccelerator(const DIE *Die, DwarfStringPoolEntryRef Name);
+
+ /// Add a name accelerator entry for \a Die with \a Name.
+ void addNameAccelerator(const DIE *Die, DwarfStringPoolEntryRef Name,
+ bool SkipPubnamesSection = false);
+
+ /// Add various accelerator entries for \p Die with \p Name which is stored
+ /// in the string table at \p Offset. \p Name must be an Objective-C
+ /// selector.
+ void addObjCAccelerator(const DIE *Die, DwarfStringPoolEntryRef Name,
+ bool SkipPubnamesSection = false);
+
+ /// Add a type accelerator entry for \p Die with \p Name which is stored in
+ /// the string table at \p Offset.
+ void addTypeAccelerator(const DIE *Die, DwarfStringPoolEntryRef Name,
+ bool ObjcClassImplementation,
+ uint32_t QualifiedNameHash);
+
+ struct AccelInfo {
+ /// Name of the entry.
+ DwarfStringPoolEntryRef Name;
+
+ /// DIE this entry describes.
+ const DIE *Die;
+
+ /// Hash of the fully qualified name.
+ uint32_t QualifiedNameHash;
+
+ /// Emit this entry only in the apple_* sections.
+ bool SkipPubSection;
+
+ /// Is this an ObjC class implementation?
+ bool ObjcClassImplementation;
+
+ AccelInfo(DwarfStringPoolEntryRef Name, const DIE *Die,
+ bool SkipPubSection = false)
+ : Name(Name), Die(Die), SkipPubSection(SkipPubSection) {}
+
+ AccelInfo(DwarfStringPoolEntryRef Name, const DIE *Die,
+ uint32_t QualifiedNameHash, bool ObjCClassIsImplementation)
+ : Name(Name), Die(Die), QualifiedNameHash(QualifiedNameHash),
+ SkipPubSection(false),
+ ObjcClassImplementation(ObjCClassIsImplementation) {}
+ };
+
+ const std::vector<AccelInfo> &getPubnames() const { return Pubnames; }
+ const std::vector<AccelInfo> &getPubtypes() const { return Pubtypes; }
+ const std::vector<AccelInfo> &getNamespaces() const { return Namespaces; }
+ const std::vector<AccelInfo> &getObjC() const { return ObjC; }
+
+ /// Get the full path for file \a FileNum in the line table
+ StringRef getResolvedPath(unsigned FileNum) {
+ if (FileNum >= ResolvedPaths.size())
+ return StringRef();
+ return ResolvedPaths[FileNum];
+ }
+
+ /// Set the fully resolved path for the line-table's file \a FileNum
+ /// to \a Path.
+ void setResolvedPath(unsigned FileNum, StringRef Path) {
+ if (ResolvedPaths.size() <= FileNum)
+ ResolvedPaths.resize(FileNum + 1);
+ ResolvedPaths[FileNum] = Path;
+ }
+
+ MCSymbol *getLabelBegin() { return LabelBegin; }
+ void setLabelBegin(MCSymbol *S) { LabelBegin = S; }
+
+private:
+ DWARFUnit &OrigUnit;
+ unsigned ID;
+ std::vector<DIEInfo> Info; ///< DIE info indexed by DIE index.
+ Optional<BasicDIEUnit> NewUnit;
+ MCSymbol *LabelBegin = nullptr;
+
+ uint64_t StartOffset;
+ uint64_t NextUnitOffset;
+
+ uint64_t LowPc = std::numeric_limits<uint64_t>::max();
+ uint64_t HighPc = 0;
+
+ /// A list of attributes to fixup with the absolute offset of
+ /// a DIE in the debug_info section.
+ ///
+ /// The offsets for the attributes in this array couldn't be set while
+ /// cloning because for cross-cu forward references the target DIE's offset
+ /// isn't known you emit the reference attribute.
+ std::vector<
+ std::tuple<DIE *, const CompileUnit *, DeclContext *, PatchLocation>>
+ ForwardDIEReferences;
+
+ FunctionIntervals::Allocator RangeAlloc;
+
+ /// The ranges in that interval map are the PC ranges for
+ /// functions in this unit, associated with the PC offset to apply
+ /// to the addresses to get the linked address.
+ FunctionIntervals Ranges;
+
+ /// The DW_AT_low_pc of each DW_TAG_label.
+ SmallDenseMap<uint64_t, uint64_t, 1> Labels;
+
+ /// DW_AT_ranges attributes to patch after we have gathered
+ /// all the unit's function addresses.
+ /// @{
+ std::vector<PatchLocation> RangeAttributes;
+ Optional<PatchLocation> UnitRangeAttribute;
+ /// @}
+
+ /// Location attributes that need to be transferred from the
+ /// original debug_loc section to the liked one. They are stored
+ /// along with the PC offset that is to be applied to their
+ /// function's address.
+ std::vector<std::pair<PatchLocation, int64_t>> LocationAttributes;
+
+ /// Accelerator entries for the unit, both for the pub*
+ /// sections and the apple* ones.
+ /// @{
+ std::vector<AccelInfo> Pubnames;
+ std::vector<AccelInfo> Pubtypes;
+ std::vector<AccelInfo> Namespaces;
+ std::vector<AccelInfo> ObjC;
+ /// @}
+
+ /// Cached resolved paths from the line table.
+ /// Note, the StringRefs here point in to the intern (uniquing) string pool.
+ /// This means that a StringRef returned here doesn't need to then be uniqued
+ /// for the purposes of getting a unique address for each string.
+ std::vector<StringRef> ResolvedPaths;
+
+ /// Is this unit subject to the ODR rule?
+ bool HasODR;
+
+ /// Did a DIE actually contain a valid reloc?
+ bool HasInterestingContent;
+
+ /// The DW_AT_language of this unit.
+ uint16_t Language = 0;
+
+ /// If this is a Clang module, this holds the module's name.
+ std::string ClangModuleName;
+};
+
+} // end namespace llvm
+
+#endif // LLVM_DWARFLINKER_DWARFLINKERCOMPILEUNIT_H
diff --git a/contrib/llvm-project/llvm/include/llvm/DWARFLinker/DWARFLinkerDeclContext.h b/contrib/llvm-project/llvm/include/llvm/DWARFLinker/DWARFLinkerDeclContext.h
new file mode 100644
index 000000000000..db40254bf600
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/DWARFLinker/DWARFLinkerDeclContext.h
@@ -0,0 +1,169 @@
+//===- DWARFLinkerDeclContext.h ---------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DWARFLINKER_DWARFLINKERDECLCONTEXT_H
+#define LLVM_DWARFLINKER_DWARFLINKERDECLCONTEXT_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseMapInfo.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/NonRelocatableStringpool.h"
+#include "llvm/DWARFLinker/DWARFLinkerCompileUnit.h"
+#include "llvm/DebugInfo/DWARF/DWARFDie.h"
+#include "llvm/Support/Path.h"
+
+namespace llvm {
+
+struct DeclMapInfo;
+
+/// Small helper that resolves and caches file paths. This helps reduce the
+/// number of calls to realpath which is expensive. We assume the input are
+/// files, and cache the realpath of their parent. This way we can quickly
+/// resolve different files under the same path.
+class CachedPathResolver {
+public:
+ /// Resolve a path by calling realpath and cache its result. The returned
+ /// StringRef is interned in the given \p StringPool.
+ StringRef resolve(std::string Path, NonRelocatableStringpool &StringPool) {
+ StringRef FileName = sys::path::filename(Path);
+ SmallString<256> ParentPath = sys::path::parent_path(Path);
+
+ // If the ParentPath has not yet been resolved, resolve and cache it for
+ // future look-ups.
+ if (!ResolvedPaths.count(ParentPath)) {
+ SmallString<256> RealPath;
+ sys::fs::real_path(ParentPath, RealPath);
+ ResolvedPaths.insert({ParentPath, StringRef(RealPath).str()});
+ }
+
+ // Join the file name again with the resolved path.
+ SmallString<256> ResolvedPath(ResolvedPaths[ParentPath]);
+ sys::path::append(ResolvedPath, FileName);
+ return StringPool.internString(ResolvedPath);
+ }
+
+private:
+ StringMap<std::string> ResolvedPaths;
+};
+
+/// A DeclContext is a named program scope that is used for ODR uniquing of
+/// types.
+///
+/// The set of DeclContext for the ODR-subject parts of a Dwarf link is
+/// expanded (and uniqued) with each new object file processed. We need to
+/// determine the context of each DIE in an linked object file to see if the
+/// corresponding type has already been emitted.
+///
+/// The contexts are conceptually organized as a tree (eg. a function scope is
+/// contained in a namespace scope that contains other scopes), but
+/// storing/accessing them in an actual tree is too inefficient: we need to be
+/// able to very quickly query a context for a given child context by name.
+/// Storing a StringMap in each DeclContext would be too space inefficient.
+///
+/// The solution here is to give each DeclContext a link to its parent (this
+/// allows to walk up the tree), but to query the existence of a specific
+/// DeclContext using a separate DenseMap keyed on the hash of the fully
+/// qualified name of the context.
+class DeclContext {
+public:
+ using Map = DenseSet<DeclContext *, DeclMapInfo>;
+
+ DeclContext() : DefinedInClangModule(0), Parent(*this) {}
+
+ DeclContext(unsigned Hash, uint32_t Line, uint32_t ByteSize, uint16_t Tag,
+ StringRef Name, StringRef File, const DeclContext &Parent,
+ DWARFDie LastSeenDIE = DWARFDie(), unsigned CUId = 0)
+ : QualifiedNameHash(Hash), Line(Line), ByteSize(ByteSize), Tag(Tag),
+ DefinedInClangModule(0), Name(Name), File(File), Parent(Parent),
+ LastSeenDIE(LastSeenDIE), LastSeenCompileUnitID(CUId) {}
+
+ uint32_t getQualifiedNameHash() const { return QualifiedNameHash; }
+
+ bool setLastSeenDIE(CompileUnit &U, const DWARFDie &Die);
+
+ uint32_t getCanonicalDIEOffset() const { return CanonicalDIEOffset; }
+ void setCanonicalDIEOffset(uint32_t Offset) { CanonicalDIEOffset = Offset; }
+
+ bool isDefinedInClangModule() const { return DefinedInClangModule; }
+ void setDefinedInClangModule(bool Val) { DefinedInClangModule = Val; }
+
+ uint16_t getTag() const { return Tag; }
+ StringRef getName() const { return Name; }
+
+private:
+ friend DeclMapInfo;
+
+ unsigned QualifiedNameHash = 0;
+ uint32_t Line = 0;
+ uint32_t ByteSize = 0;
+ uint16_t Tag = dwarf::DW_TAG_compile_unit;
+ unsigned DefinedInClangModule : 1;
+ StringRef Name;
+ StringRef File;
+ const DeclContext &Parent;
+ DWARFDie LastSeenDIE;
+ uint32_t LastSeenCompileUnitID = 0;
+ uint32_t CanonicalDIEOffset = 0;
+};
+
+/// This class gives a tree-like API to the DenseMap that stores the
+/// DeclContext objects. It holds the BumpPtrAllocator where these objects will
+/// be allocated.
+class DeclContextTree {
+public:
+ /// Get the child of \a Context described by \a DIE in \a Unit. The
+ /// required strings will be interned in \a StringPool.
+ /// \returns The child DeclContext along with one bit that is set if
+ /// this context is invalid.
+ ///
+ /// An invalid context means it shouldn't be considered for uniquing, but its
+ /// not returning null, because some children of that context might be
+ /// uniquing candidates.
+ ///
+ /// FIXME: The invalid bit along the return value is to emulate some
+ /// dsymutil-classic functionality.
+ PointerIntPair<DeclContext *, 1>
+ getChildDeclContext(DeclContext &Context, const DWARFDie &DIE,
+ CompileUnit &Unit, UniquingStringPool &StringPool,
+ bool InClangModule);
+
+ DeclContext &getRoot() { return Root; }
+
+private:
+ BumpPtrAllocator Allocator;
+ DeclContext Root;
+ DeclContext::Map Contexts;
+
+ /// Cache resolved paths from the line table.
+ CachedPathResolver PathResolver;
+};
+
+/// Info type for the DenseMap storing the DeclContext pointers.
+struct DeclMapInfo : private DenseMapInfo<DeclContext *> {
+ using DenseMapInfo<DeclContext *>::getEmptyKey;
+ using DenseMapInfo<DeclContext *>::getTombstoneKey;
+
+ static unsigned getHashValue(const DeclContext *Ctxt) {
+ return Ctxt->QualifiedNameHash;
+ }
+
+ static bool isEqual(const DeclContext *LHS, const DeclContext *RHS) {
+ if (RHS == getEmptyKey() || RHS == getTombstoneKey())
+ return RHS == LHS;
+ return LHS->QualifiedNameHash == RHS->QualifiedNameHash &&
+ LHS->Line == RHS->Line && LHS->ByteSize == RHS->ByteSize &&
+ LHS->Name.data() == RHS->Name.data() &&
+ LHS->File.data() == RHS->File.data() &&
+ LHS->Parent.QualifiedNameHash == RHS->Parent.QualifiedNameHash;
+ }
+};
+
+} // end namespace llvm
+
+#endif // LLVM_DWARFLINKER_DWARFLINKERDECLCONTEXT_H
diff --git a/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h b/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h
index a43ce20edde6..3b103c227708 100644
--- a/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h
+++ b/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h
@@ -43,7 +43,7 @@ class GlobalTypeTableBuilder : public TypeCollection {
/// Contains a list of all records indexed by TypeIndex.toArrayIndex().
SmallVector<ArrayRef<uint8_t>, 2> SeenRecords;
- /// Contains a list of all hash values inexed by TypeIndex.toArrayIndex().
+ /// Contains a list of all hash values indexed by TypeIndex.toArrayIndex().
SmallVector<GloballyHashedType, 2> SeenHashes;
public:
diff --git a/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/SymbolRecord.h b/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/SymbolRecord.h
index 1aafa3ca9f1d..1fcef9dd06c8 100644
--- a/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/SymbolRecord.h
+++ b/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/SymbolRecord.h
@@ -79,7 +79,7 @@ public:
uint32_t Offset = 0;
uint16_t Segment = 0;
uint16_t Length = 0;
- ThunkOrdinal Thunk;
+ ThunkOrdinal Thunk = ThunkOrdinal::Standard;
StringRef Name;
ArrayRef<uint8_t> VariantData;
@@ -159,7 +159,7 @@ public:
struct DecodedAnnotation {
StringRef Name;
ArrayRef<uint8_t> Bytes;
- BinaryAnnotationsOpCode OpCode;
+ BinaryAnnotationsOpCode OpCode = BinaryAnnotationsOpCode::Invalid;
uint32_t U1 = 0;
uint32_t U2 = 0;
int32_t S1 = 0;
@@ -407,21 +407,21 @@ public:
: SymbolRecord(SymbolRecordKind::LocalSym), RecordOffset(RecordOffset) {}
TypeIndex Type;
- LocalSymFlags Flags;
+ LocalSymFlags Flags = LocalSymFlags::None;
StringRef Name;
uint32_t RecordOffset = 0;
};
struct LocalVariableAddrRange {
- uint32_t OffsetStart;
- uint16_t ISectStart;
- uint16_t Range;
+ uint32_t OffsetStart = 0;
+ uint16_t ISectStart = 0;
+ uint16_t Range = 0;
};
struct LocalVariableAddrGap {
- uint16_t GapStartOffset;
- uint16_t Range;
+ uint16_t GapStartOffset = 0;
+ uint16_t Range = 0;
};
enum : uint16_t { MaxDefRange = 0xf000 };
@@ -628,7 +628,7 @@ public:
uint32_t CodeOffset = 0;
uint16_t Segment = 0;
- ProcSymFlags Flags;
+ ProcSymFlags Flags = ProcSymFlags::None;
StringRef Name;
uint32_t RecordOffset = 0;
@@ -670,7 +670,7 @@ public:
: SymbolRecord(SymbolRecordKind::ExportSym), RecordOffset(RecordOffset) {}
uint16_t Ordinal = 0;
- ExportFlags Flags;
+ ExportFlags Flags = ExportFlags::None;
StringRef Name;
uint32_t RecordOffset = 0;
@@ -686,7 +686,7 @@ public:
TypeIndex Index;
uint32_t ModFilenameOffset = 0;
- LocalSymFlags Flags;
+ LocalSymFlags Flags = LocalSymFlags::None;
StringRef Name;
uint32_t RecordOffset = 0;
@@ -700,7 +700,7 @@ public:
: SymbolRecord(SymbolRecordKind::Compile2Sym),
RecordOffset(RecordOffset) {}
- CompileSym2Flags Flags;
+ CompileSym2Flags Flags = CompileSym2Flags::None;
CPUType Machine;
uint16_t VersionFrontendMajor = 0;
uint16_t VersionFrontendMinor = 0;
@@ -726,7 +726,7 @@ public:
: SymbolRecord(SymbolRecordKind::Compile3Sym),
RecordOffset(RecordOffset) {}
- CompileSym3Flags Flags;
+ CompileSym3Flags Flags = CompileSym3Flags::None;
CPUType Machine;
uint16_t VersionFrontendMajor = 0;
uint16_t VersionFrontendMinor = 0;
@@ -771,7 +771,7 @@ public:
uint32_t BytesOfCalleeSavedRegisters = 0;
uint32_t OffsetOfExceptionHandler = 0;
uint16_t SectionIdOfExceptionHandler = 0;
- FrameProcedureOptions Flags;
+ FrameProcedureOptions Flags = FrameProcedureOptions::None;
/// Extract the register this frame uses to refer to local variables.
RegisterId getLocalFramePtrReg(CPUType CPU) const {
diff --git a/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/TypeRecord.h b/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/TypeRecord.h
index b147dd6c3d05..35f5c0561138 100644
--- a/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/TypeRecord.h
+++ b/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/TypeRecord.h
@@ -144,7 +144,7 @@ public:
ModifierOptions getModifiers() const { return Modifiers; }
TypeIndex ModifiedType;
- ModifierOptions Modifiers;
+ ModifierOptions Modifiers = ModifierOptions::None;
};
// LF_PROCEDURE
@@ -168,7 +168,7 @@ public:
TypeIndex ReturnType;
CallingConvention CallConv;
FunctionOptions Options;
- uint16_t ParameterCount;
+ uint16_t ParameterCount = 0;
TypeIndex ArgumentList;
};
@@ -202,9 +202,9 @@ public:
TypeIndex ThisType;
CallingConvention CallConv;
FunctionOptions Options;
- uint16_t ParameterCount;
+ uint16_t ParameterCount = 0;
TypeIndex ArgumentList;
- int32_t ThisPointerAdjustment;
+ int32_t ThisPointerAdjustment = 0;
};
// LF_LABEL
@@ -351,7 +351,7 @@ public:
}
TypeIndex ReferentType;
- uint32_t Attrs;
+ uint32_t Attrs = 0;
Optional<MemberPointerInfo> MemberInfo;
void setAttrs(PointerKind PK, PointerMode PM, PointerOptions PO,
@@ -414,7 +414,7 @@ public:
TypeIndex ElementType;
TypeIndex IndexType;
- uint64_t Size;
+ uint64_t Size = 0;
StringRef Name;
};
@@ -459,7 +459,7 @@ public:
StringRef getName() const { return Name; }
StringRef getUniqueName() const { return UniqueName; }
- uint16_t MemberCount;
+ uint16_t MemberCount = 0;
ClassOptions Options;
TypeIndex FieldList;
StringRef Name;
@@ -496,7 +496,7 @@ public:
TypeIndex DerivationList;
TypeIndex VTableShape;
- uint64_t Size;
+ uint64_t Size = 0;
};
// LF_UNION
@@ -517,7 +517,7 @@ struct UnionRecord : public TagRecord {
uint64_t getSize() const { return Size; }
- uint64_t Size;
+ uint64_t Size = 0;
};
// LF_ENUM
@@ -550,8 +550,8 @@ public:
uint8_t getBitSize() const { return BitSize; }
TypeIndex Type;
- uint8_t BitSize;
- uint8_t BitOffset;
+ uint8_t BitSize = 0;
+ uint8_t BitOffset = 0;
};
// LF_VTSHAPE
@@ -592,7 +592,7 @@ public:
StringRef getName() const { return Name; }
GUID Guid;
- uint32_t Age;
+ uint32_t Age = 0;
StringRef Name;
};
@@ -644,7 +644,7 @@ public:
TypeIndex UDT;
TypeIndex SourceFile;
- uint32_t LineNumber;
+ uint32_t LineNumber = 0;
};
// LF_UDT_MOD_SRC_LINE
@@ -664,8 +664,8 @@ public:
TypeIndex UDT;
TypeIndex SourceFile;
- uint32_t LineNumber;
- uint16_t Module;
+ uint32_t LineNumber = 0;
+ uint16_t Module = 0;
};
// LF_BUILDINFO
@@ -717,7 +717,7 @@ public:
TypeIndex CompleteClass;
TypeIndex OverriddenVFTable;
- uint32_t VFPtrOffset;
+ uint32_t VFPtrOffset = 0;
std::vector<StringRef> MethodNames;
};
@@ -749,7 +749,7 @@ public:
TypeIndex Type;
MemberAttributes Attrs;
- int32_t VFTableOffset;
+ int32_t VFTableOffset = 0;
StringRef Name;
};
@@ -780,7 +780,7 @@ public:
TypeIndex getMethodList() const { return MethodList; }
StringRef getName() const { return Name; }
- uint16_t NumOverloads;
+ uint16_t NumOverloads = 0;
TypeIndex MethodList;
StringRef Name;
};
@@ -806,7 +806,7 @@ public:
MemberAttributes Attrs;
TypeIndex Type;
- uint64_t FieldOffset;
+ uint64_t FieldOffset = 0;
StringRef Name;
};
@@ -883,7 +883,7 @@ public:
MemberAttributes Attrs;
TypeIndex Type;
- uint64_t Offset;
+ uint64_t Offset = 0;
};
// LF_VBCLASS, LF_IVBCLASS
@@ -911,8 +911,8 @@ public:
MemberAttributes Attrs;
TypeIndex BaseType;
TypeIndex VBPtrType;
- uint64_t VBPtrOffset;
- uint64_t VTableIndex;
+ uint64_t VBPtrOffset = 0;
+ uint64_t VTableIndex = 0;
};
/// LF_INDEX - Used to chain two large LF_FIELDLIST or LF_METHODLIST records
@@ -941,9 +941,9 @@ public:
uint32_t getSignature() const { return Signature; }
StringRef getPrecompFilePath() const { return PrecompFilePath; }
- uint32_t StartTypeIndex;
- uint32_t TypesCount;
- uint32_t Signature;
+ uint32_t StartTypeIndex = 0;
+ uint32_t TypesCount = 0;
+ uint32_t Signature = 0;
StringRef PrecompFilePath;
};
@@ -955,7 +955,7 @@ public:
uint32_t getSignature() const { return Signature; }
- uint32_t Signature;
+ uint32_t Signature = 0;
};
} // end namespace codeview
diff --git a/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/TypeRecordHelpers.h b/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/TypeRecordHelpers.h
index e84704d99ddc..19492b93681c 100644
--- a/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/TypeRecordHelpers.h
+++ b/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/TypeRecordHelpers.h
@@ -12,16 +12,35 @@
#include "llvm/DebugInfo/CodeView/TypeRecord.h"
namespace llvm {
- namespace codeview {
- /// Given an arbitrary codeview type, determine if it is an LF_STRUCTURE,
- /// LF_CLASS, LF_INTERFACE, LF_UNION, or LF_ENUM with the forward ref class
- /// option.
- bool isUdtForwardRef(CVType CVT);
-
- /// Given a CVType which is assumed to be an LF_MODIFIER, return the
- /// TypeIndex of the type that the LF_MODIFIER modifies.
- TypeIndex getModifiedType(const CVType &CVT);
+namespace codeview {
+
+/// Given an arbitrary codeview type, determine if it is an LF_STRUCTURE,
+/// LF_CLASS, LF_INTERFACE, LF_UNION, or LF_ENUM with the forward ref class
+/// option.
+bool isUdtForwardRef(CVType CVT);
+
+/// Given a CVType which is assumed to be an LF_MODIFIER, return the
+/// TypeIndex of the type that the LF_MODIFIER modifies.
+TypeIndex getModifiedType(const CVType &CVT);
+
+/// Return true if this record should be in the IPI stream of a PDB. In an
+/// object file, these record kinds will appear mixed into the .debug$T section.
+inline bool isIdRecord(TypeLeafKind K) {
+ switch (K) {
+ case TypeLeafKind::LF_FUNC_ID:
+ case TypeLeafKind::LF_MFUNC_ID:
+ case TypeLeafKind::LF_STRING_ID:
+ case TypeLeafKind::LF_SUBSTR_LIST:
+ case TypeLeafKind::LF_BUILDINFO:
+ case TypeLeafKind::LF_UDT_SRC_LINE:
+ case TypeLeafKind::LF_UDT_MOD_SRC_LINE:
+ return true;
+ default:
+ return false;
}
}
+} // namespace codeview
+} // namespace llvm
+
#endif
diff --git a/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFAddressRange.h b/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFAddressRange.h
index 2d5f9f3c7658..7a728c2508cc 100644
--- a/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFAddressRange.h
+++ b/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFAddressRange.h
@@ -17,6 +17,7 @@
namespace llvm {
class raw_ostream;
+class DWARFObject;
struct DWARFAddressRange {
uint64_t LowPC;
@@ -26,7 +27,9 @@ struct DWARFAddressRange {
DWARFAddressRange() = default;
/// Used for unit testing.
- DWARFAddressRange(uint64_t LowPC, uint64_t HighPC, uint64_t SectionIndex = 0)
+ DWARFAddressRange(
+ uint64_t LowPC, uint64_t HighPC,
+ uint64_t SectionIndex = object::SectionedAddress::UndefSection)
: LowPC(LowPC), HighPC(HighPC), SectionIndex(SectionIndex) {}
/// Returns true if LowPC is smaller or equal to HighPC. This accounts for
@@ -42,15 +45,20 @@ struct DWARFAddressRange {
return LowPC < RHS.HighPC && RHS.LowPC < HighPC;
}
- void dump(raw_ostream &OS, uint32_t AddressSize,
- DIDumpOptions DumpOpts = {}) const;
+ void dump(raw_ostream &OS, uint32_t AddressSize, DIDumpOptions DumpOpts = {},
+ const DWARFObject *Obj = nullptr) const;
};
-static inline bool operator<(const DWARFAddressRange &LHS,
- const DWARFAddressRange &RHS) {
+inline bool operator<(const DWARFAddressRange &LHS,
+ const DWARFAddressRange &RHS) {
return std::tie(LHS.LowPC, LHS.HighPC) < std::tie(RHS.LowPC, RHS.HighPC);
}
+inline bool operator==(const DWARFAddressRange &LHS,
+ const DWARFAddressRange &RHS) {
+ return std::tie(LHS.LowPC, LHS.HighPC) == std::tie(RHS.LowPC, RHS.HighPC);
+}
+
raw_ostream &operator<<(raw_ostream &OS, const DWARFAddressRange &R);
/// DWARFAddressRangesVector - represents a set of absolute address ranges.
diff --git a/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h b/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h
index fae163622edb..a2a10d23433f 100644
--- a/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h
+++ b/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h
@@ -75,7 +75,7 @@ class DWARFContext : public DIContext {
DWARFUnitVector DWOUnits;
std::unique_ptr<DWARFDebugAbbrev> AbbrevDWO;
- std::unique_ptr<DWARFDebugLoclists> LocDWO;
+ std::unique_ptr<DWARFDebugMacro> MacroDWO;
/// The maximum DWARF version of all units.
unsigned MaxVersion = 0;
@@ -260,9 +260,6 @@ public:
/// Get a pointer to the parsed dwo abbreviations object.
const DWARFDebugAbbrev *getDebugAbbrevDWO();
- /// Get a pointer to the parsed DebugLoc object.
- const DWARFDebugLoclists *getDebugLocDWO();
-
/// Get a pointer to the parsed DebugAranges object.
const DWARFDebugAranges *getDebugAranges();
@@ -275,6 +272,9 @@ public:
/// Get a pointer to the parsed DebugMacro object.
const DWARFDebugMacro *getDebugMacro();
+ /// Get a pointer to the parsed DebugMacroDWO object.
+ const DWARFDebugMacro *getDebugMacroDWO();
+
/// Get a reference to the parsed accelerator table object.
const DWARFDebugNames &getDebugNames();
@@ -298,7 +298,7 @@ public:
/// Report any recoverable parsing problems using the callback.
Expected<const DWARFDebugLine::LineTable *>
getLineTableForUnit(DWARFUnit *U,
- std::function<void(Error)> RecoverableErrorCallback);
+ function_ref<void(Error)> RecoverableErrorCallback);
DataExtractor getStringExtractor() const {
return DataExtractor(DObj->getStrSection(), false, 0);
diff --git a/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFDataExtractor.h b/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFDataExtractor.h
index 980724c525d2..6f7ddb2ef421 100644
--- a/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFDataExtractor.h
+++ b/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFDataExtractor.h
@@ -55,8 +55,6 @@ public:
/// reflect the absolute address of this pointer.
Optional<uint64_t> getEncodedPointer(uint64_t *Offset, uint8_t Encoding,
uint64_t AbsPosOffset = 0) const;
-
- size_t size() const { return Section == nullptr ? 0 : Section->Data.size(); }
};
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAranges.h b/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAranges.h
index 172f1d2c9dbe..89b15d263580 100644
--- a/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAranges.h
+++ b/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAranges.h
@@ -21,7 +21,7 @@ class DWARFContext;
class DWARFDebugAranges {
public:
void generate(DWARFContext *CTX);
- uint32_t findAddress(uint64_t Address) const;
+ uint64_t findAddress(uint64_t Address) const;
private:
void clear();
@@ -33,7 +33,7 @@ private:
struct Range {
explicit Range(uint64_t LowPC = -1ULL, uint64_t HighPC = -1ULL,
- uint32_t CUOffset = -1U)
+ uint64_t CUOffset = -1ULL)
: LowPC(LowPC), Length(HighPC - LowPC), CUOffset(CUOffset) {}
void setHighPC(uint64_t HighPC) {
@@ -54,8 +54,8 @@ private:
}
uint64_t LowPC; /// Start of address range.
- uint32_t Length; /// End of address range (not including this address).
- uint32_t CUOffset; /// Offset of the compile unit or die.
+ uint64_t Length; /// End of address range (not including this address).
+ uint64_t CUOffset; /// Offset of the compile unit or die.
};
struct RangeEndpoint {
diff --git a/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h b/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
index c2be8304ad84..d5b6c72c0461 100644
--- a/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
+++ b/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
@@ -280,11 +280,10 @@ public:
void clear();
/// Parse prologue and all rows.
- Error parse(
- DWARFDataExtractor &DebugLineData, uint64_t *OffsetPtr,
- const DWARFContext &Ctx, const DWARFUnit *U,
- std::function<void(Error)> RecoverableErrorCallback,
- raw_ostream *OS = nullptr);
+ Error parse(DWARFDataExtractor &DebugLineData, uint64_t *OffsetPtr,
+ const DWARFContext &Ctx, const DWARFUnit *U,
+ function_ref<void(Error)> RecoverableErrorCallback,
+ raw_ostream *OS = nullptr);
using RowVector = std::vector<Row>;
using RowIter = RowVector::const_iterator;
@@ -309,10 +308,10 @@ public:
};
const LineTable *getLineTable(uint64_t Offset) const;
- Expected<const LineTable *> getOrParseLineTable(
- DWARFDataExtractor &DebugLineData, uint64_t Offset,
- const DWARFContext &Ctx, const DWARFUnit *U,
- std::function<void(Error)> RecoverableErrorCallback);
+ Expected<const LineTable *>
+ getOrParseLineTable(DWARFDataExtractor &DebugLineData, uint64_t Offset,
+ const DWARFContext &Ctx, const DWARFUnit *U,
+ function_ref<void(Error)> RecoverableErrorCallback);
/// Helper to allow for parsing of an entire .debug_line section in sequence.
class SectionParser {
diff --git a/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h b/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h
index c79d98e34f6e..3b141304f85f 100644
--- a/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h
+++ b/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h
@@ -13,6 +13,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/DebugInfo/DIContext.h"
#include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h"
+#include "llvm/DebugInfo/DWARF/DWARFLocationExpression.h"
#include "llvm/DebugInfo/DWARF/DWARFRelocMap.h"
#include <cstdint>
@@ -21,30 +22,73 @@ class DWARFUnit;
class MCRegisterInfo;
class raw_ostream;
-class DWARFDebugLoc {
+/// A single location within a location list. Entries are stored in the DWARF5
+/// form even if they originally come from a DWARF<=4 location list.
+struct DWARFLocationEntry {
+ /// The entry kind (DW_LLE_***).
+ uint8_t Kind;
+
+ /// The first value of the location entry (if applicable).
+ uint64_t Value0;
+
+ /// The second value of the location entry (if applicable).
+ uint64_t Value1;
+
+ /// The index of the section this entry is relative to (if applicable).
+ uint64_t SectionIndex;
+
+ /// The location expression itself (if applicable).
+ SmallVector<uint8_t, 4> Loc;
+};
+
+/// An abstract base class for various kinds of location tables (.debug_loc,
+/// .debug_loclists, and their dwo variants).
+class DWARFLocationTable {
public:
- /// A single location within a location list.
- struct Entry {
- /// The beginning address of the instruction range.
- uint64_t Begin;
- /// The ending address of the instruction range.
- uint64_t End;
- /// The location of the variable within the specified range.
- SmallVector<uint8_t, 4> Loc;
- };
+ DWARFLocationTable(DWARFDataExtractor Data) : Data(std::move(Data)) {}
+ virtual ~DWARFLocationTable() = default;
+
+ /// Call the user-provided callback for each entry (including the end-of-list
+ /// entry) in the location list starting at \p Offset. The callback can return
+ /// false to terminate the iteration early. Returns an error if it was unable
+ /// to parse the entire location list correctly. Upon successful termination
+ /// \p Offset will be updated point past the end of the list.
+ virtual Error visitLocationList(
+ uint64_t *Offset,
+ function_ref<bool(const DWARFLocationEntry &)> Callback) const = 0;
+
+ /// Dump the location list at the given \p Offset. The function returns true
+ /// iff it has successfully reched the end of the list. This means that one
+ /// can attempt to parse another list after the current one (\p Offset will be
+ /// updated to point past the end of the current list).
+ bool dumpLocationList(uint64_t *Offset, raw_ostream &OS,
+ Optional<object::SectionedAddress> BaseAddr,
+ const MCRegisterInfo *MRI, const DWARFObject &Obj,
+ DWARFUnit *U, DIDumpOptions DumpOpts,
+ unsigned Indent) const;
+
+ Error visitAbsoluteLocationList(
+ uint64_t Offset, Optional<object::SectionedAddress> BaseAddr,
+ std::function<Optional<object::SectionedAddress>(uint32_t)> LookupAddr,
+ function_ref<bool(Expected<DWARFLocationExpression>)> Callback) const;
+
+protected:
+ DWARFDataExtractor Data;
+
+ virtual void dumpRawEntry(const DWARFLocationEntry &Entry, raw_ostream &OS,
+ unsigned Indent, DIDumpOptions DumpOpts,
+ const DWARFObject &Obj) const = 0;
+};
+class DWARFDebugLoc final : public DWARFLocationTable {
+public:
/// A list of locations that contain one variable.
struct LocationList {
/// The beginning offset where this location list is stored in the debug_loc
/// section.
uint64_t Offset;
/// All the locations in which the variable is stored.
- SmallVector<Entry, 2> Entries;
- /// Dump this list on OS.
- void dump(raw_ostream &OS, uint64_t BaseAddress, bool IsLittleEndian,
- unsigned AddressSize, const MCRegisterInfo *MRI, DWARFUnit *U,
- DIDumpOptions DumpOpts,
- unsigned Indent) const;
+ SmallVector<DWARFLocationEntry, 2> Entries;
};
private:
@@ -54,67 +98,46 @@ private:
/// the locations in which the variable is stored.
LocationLists Locations;
- unsigned AddressSize;
-
- bool IsLittleEndian;
-
public:
+ DWARFDebugLoc(DWARFDataExtractor Data)
+ : DWARFLocationTable(std::move(Data)) {}
+
/// Print the location lists found within the debug_loc section.
- void dump(raw_ostream &OS, const MCRegisterInfo *RegInfo, DIDumpOptions DumpOpts,
+ void dump(raw_ostream &OS, const MCRegisterInfo *RegInfo,
+ const DWARFObject &Obj, DIDumpOptions DumpOpts,
Optional<uint64_t> Offset) const;
- /// Parse the debug_loc section accessible via the 'data' parameter using the
- /// address size also given in 'data' to interpret the address ranges.
- void parse(const DWARFDataExtractor &data);
-
- /// Return the location list at the given offset or nullptr.
- LocationList const *getLocationListAtOffset(uint64_t Offset) const;
+ Error visitLocationList(
+ uint64_t *Offset,
+ function_ref<bool(const DWARFLocationEntry &)> Callback) const override;
- Expected<LocationList>
- parseOneLocationList(const DWARFDataExtractor &Data, uint64_t *Offset);
+protected:
+ void dumpRawEntry(const DWARFLocationEntry &Entry, raw_ostream &OS,
+ unsigned Indent, DIDumpOptions DumpOpts,
+ const DWARFObject &Obj) const override;
};
-class DWARFDebugLoclists {
+class DWARFDebugLoclists final : public DWARFLocationTable {
public:
- struct Entry {
- uint8_t Kind;
- uint64_t Offset;
- uint64_t Value0;
- uint64_t Value1;
- SmallVector<uint8_t, 4> Loc;
- void dump(raw_ostream &OS, uint64_t &BaseAddr, bool IsLittleEndian,
- unsigned AddressSize, const MCRegisterInfo *MRI, DWARFUnit *U,
- DIDumpOptions DumpOpts, unsigned Indent, size_t MaxEncodingStringLength) const;
- };
+ DWARFDebugLoclists(DWARFDataExtractor Data, uint16_t Version)
+ : DWARFLocationTable(std::move(Data)), Version(Version) {}
- struct LocationList {
- uint64_t Offset;
- SmallVector<Entry, 2> Entries;
- void dump(raw_ostream &OS, uint64_t BaseAddr, bool IsLittleEndian,
- unsigned AddressSize, const MCRegisterInfo *RegInfo,
- DWARFUnit *U, DIDumpOptions DumpOpts, unsigned Indent) const;
- };
+ Error visitLocationList(
+ uint64_t *Offset,
+ function_ref<bool(const DWARFLocationEntry &)> Callback) const override;
-private:
- using LocationLists = SmallVector<LocationList, 4>;
+ /// Dump all location lists within the given range.
+ void dumpRange(uint64_t StartOffset, uint64_t Size, raw_ostream &OS,
+ const MCRegisterInfo *MRI, const DWARFObject &Obj,
+ DIDumpOptions DumpOpts);
- LocationLists Locations;
+protected:
+ void dumpRawEntry(const DWARFLocationEntry &Entry, raw_ostream &OS,
+ unsigned Indent, DIDumpOptions DumpOpts,
+ const DWARFObject &Obj) const override;
- unsigned AddressSize;
-
- bool IsLittleEndian;
-
-public:
- void parse(DataExtractor data, uint64_t Offset, uint64_t EndOffset, uint16_t Version);
- void dump(raw_ostream &OS, uint64_t BaseAddr, const MCRegisterInfo *RegInfo,
- DIDumpOptions DumpOpts, Optional<uint64_t> Offset) const;
-
- /// Return the location list at the given offset or nullptr.
- LocationList const *getLocationListAtOffset(uint64_t Offset) const;
-
- static Expected<LocationList> parseOneLocationList(const DataExtractor &Data,
- uint64_t *Offset,
- unsigned Version);
+private:
+ uint16_t Version;
};
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugMacro.h b/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugMacro.h
index a6c125990ca7..7880bcdf6881 100644
--- a/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugMacro.h
+++ b/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugMacro.h
@@ -42,7 +42,7 @@ class DWARFDebugMacro {
using MacroList = SmallVector<Entry, 4>;
/// A list of all the macro entries in the debug_macinfo section.
- MacroList Macros;
+ std::vector<MacroList> MacroLists;
public:
DWARFDebugMacro() = default;
@@ -54,7 +54,7 @@ public:
void parse(DataExtractor data);
/// Return whether the section has any entries.
- bool empty() const { return Macros.empty(); }
+ bool empty() const { return MacroLists.empty(); }
};
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugRnglists.h b/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugRnglists.h
index 952c41e188c7..88e5432851d6 100644
--- a/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugRnglists.h
+++ b/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugRnglists.h
@@ -47,6 +47,12 @@ class DWARFDebugRnglist : public DWARFListType<RangeListEntry> {
public:
/// Build a DWARFAddressRangesVector from a rangelist.
DWARFAddressRangesVector
+ getAbsoluteRanges(Optional<object::SectionedAddress> BaseAddr,
+ function_ref<Optional<object::SectionedAddress>(uint32_t)>
+ LookupPooledAddress) const;
+
+ /// Build a DWARFAddressRangesVector from a rangelist.
+ DWARFAddressRangesVector
getAbsoluteRanges(llvm::Optional<object::SectionedAddress> BaseAddr,
DWARFUnit &U) const;
};
diff --git a/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h b/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h
index f7f08b4a499d..158bd82edee0 100644
--- a/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h
+++ b/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h
@@ -18,6 +18,7 @@
#include "llvm/DebugInfo/DWARF/DWARFAddressRange.h"
#include "llvm/DebugInfo/DWARF/DWARFAttribute.h"
#include "llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h"
+#include "llvm/DebugInfo/DWARF/DWARFDebugLoc.h"
#include <cassert>
#include <cstdint>
#include <iterator>
@@ -188,6 +189,7 @@ public:
///
/// \returns anm optional absolute section offset value for the attribute.
Optional<uint64_t> getRangesBaseAttribute() const;
+ Optional<uint64_t> getLocBaseAttribute() const;
/// Get the DW_AT_high_pc attribute value as an address.
///
@@ -230,6 +232,9 @@ public:
bool addressRangeContainsAddress(const uint64_t Address) const;
+ Expected<DWARFLocationExpressionsVector>
+ getLocations(dwarf::Attribute Attr) const;
+
/// If a DIE represents a subprogram (or inlined subroutine), returns its
/// mangled name (or short name, if mangled is missing). This name may be
/// fetched from specification or abstract origin for this subprogram.
diff --git a/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFLocationExpression.h b/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFLocationExpression.h
new file mode 100644
index 000000000000..35aa1a78e129
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFLocationExpression.h
@@ -0,0 +1,49 @@
+//===- DWARFLocationExpression.h --------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_DWARF_DWARFLOCATIONEXPRESSION_H
+#define LLVM_DEBUGINFO_DWARF_DWARFLOCATIONEXPRESSION_H
+
+#include "llvm/ADT/Optional.h"
+#include "llvm/DebugInfo/DWARF/DWARFAddressRange.h"
+
+namespace llvm {
+
+class raw_ostream;
+
+/// Represents a single DWARF expression, whose value is location-dependent.
+/// Typically used in DW_AT_location attributes to describe the location of
+/// objects.
+struct DWARFLocationExpression {
+ /// The address range in which this expression is valid. None denotes a
+ /// default entry which is valid in addresses not covered by other location
+ /// expressions, or everywhere if there are no other expressions.
+ Optional<DWARFAddressRange> Range;
+
+ /// The expression itself.
+ SmallVector<uint8_t, 4> Expr;
+};
+
+inline bool operator==(const DWARFLocationExpression &L,
+ const DWARFLocationExpression &R) {
+ return L.Range == R.Range && L.Expr == R.Expr;
+}
+
+inline bool operator!=(const DWARFLocationExpression &L,
+ const DWARFLocationExpression &R) {
+ return !(L == R);
+}
+
+raw_ostream &operator<<(raw_ostream &OS, const DWARFLocationExpression &Loc);
+
+/// Represents a set of absolute location expressions.
+using DWARFLocationExpressionsVector = std::vector<DWARFLocationExpression>;
+
+} // end namespace llvm
+
+#endif // LLVM_DEBUGINFO_DWARF_DWARFLOCATIONEXPRESSION_H
diff --git a/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFObject.h b/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFObject.h
index 88fe3f434edc..fbcde7d7cd78 100644
--- a/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFObject.h
+++ b/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFObject.h
@@ -48,6 +48,7 @@ public:
virtual const DWARFSection &getRangesSection() const { return Dummy; }
virtual const DWARFSection &getRnglistsSection() const { return Dummy; }
virtual StringRef getMacinfoSection() const { return ""; }
+ virtual StringRef getMacinfoDWOSection() const { return ""; }
virtual const DWARFSection &getPubnamesSection() const { return Dummy; }
virtual const DWARFSection &getPubtypesSection() const { return Dummy; }
virtual const DWARFSection &getGnuPubnamesSection() const { return Dummy; }
@@ -60,6 +61,7 @@ public:
virtual StringRef getAbbrevDWOSection() const { return ""; }
virtual const DWARFSection &getLineDWOSection() const { return Dummy; }
virtual const DWARFSection &getLocDWOSection() const { return Dummy; }
+ virtual const DWARFSection &getLoclistsDWOSection() const { return Dummy; }
virtual StringRef getStrDWOSection() const { return ""; }
virtual const DWARFSection &getStrOffsetsDWOSection() const {
return Dummy;
diff --git a/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h b/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h
index 51de114a3506..b2ddb7e36b0c 100644
--- a/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h
+++ b/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h
@@ -16,6 +16,7 @@
#include "llvm/ADT/iterator_range.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h"
+#include "llvm/DebugInfo/DWARF/DWARFDebugLoc.h"
#include "llvm/DebugInfo/DWARF/DWARFDebugRangeList.h"
#include "llvm/DebugInfo/DWARF/DWARFDebugRnglists.h"
#include "llvm/DebugInfo/DWARF/DWARFDie.h"
@@ -199,17 +200,17 @@ class DWARFUnit {
const DWARFDebugAbbrev *Abbrev;
const DWARFSection *RangeSection;
uint64_t RangeSectionBase;
- /// We either keep track of the location list section or its data, depending
- /// on whether we are handling a split DWARF section or not.
- union {
- const DWARFSection *LocSection;
- StringRef LocSectionData;
- };
+ const DWARFSection *LocSection;
+ uint64_t LocSectionBase;
+
+ /// Location table of this unit.
+ std::unique_ptr<DWARFLocationTable> LocTable;
+
const DWARFSection &LineSection;
StringRef StringSection;
const DWARFSection &StringOffsetSection;
const DWARFSection *AddrOffsetSection;
- uint32_t AddrOffsetSectionBase = 0;
+ Optional<uint64_t> AddrOffsetSectionBase;
bool isLittleEndian;
bool IsDWO;
const DWARFUnitVector &UnitVector;
@@ -220,6 +221,7 @@ class DWARFUnit {
/// A table of range lists (DWARF v5 and later).
Optional<DWARFDebugRnglistTable> RngListTable;
+ Optional<DWARFListTableHeader> LoclistTableHeader;
mutable const DWARFAbbreviationDeclarationSet *Abbrevs;
llvm::Optional<object::SectionedAddress> BaseAddr;
@@ -274,8 +276,6 @@ public:
bool isDWOUnit() const { return IsDWO; }
DWARFContext& getContext() const { return Context; }
const DWARFSection &getInfoSection() const { return InfoSection; }
- const DWARFSection *getLocSection() const { return LocSection; }
- StringRef getLocSectionData() const { return LocSectionData; }
uint64_t getOffset() const { return Header.getOffset(); }
const dwarf::FormParams &getFormParams() const {
return Header.getFormParams();
@@ -308,6 +308,14 @@ public:
RangeSection = RS;
RangeSectionBase = Base;
}
+ void setLocSection(const DWARFSection *LS, uint64_t Base) {
+ LocSection = LS;
+ LocSectionBase = Base;
+ }
+
+ uint64_t getLocSectionBase() const {
+ return LocSectionBase;
+ }
Optional<object::SectionedAddress>
getAddrOffsetSectionItem(uint32_t Index) const;
@@ -319,6 +327,8 @@ public:
return DataExtractor(StringSection, false, 0);
}
+ const DWARFLocationTable &getLocationTable() { return *LocTable; }
+
/// Extract the range list referenced by this compile unit from the
/// .debug_ranges section. If the extraction is unsuccessful, an error
/// is returned. Successful extraction requires that the compile unit
@@ -417,13 +427,25 @@ public:
/// an entry in the rangelist table's offset array and is supplied by
/// DW_FORM_rnglistx.
Optional<uint64_t> getRnglistOffset(uint32_t Index) {
- if (RngListTable)
- return RngListTable->getOffsetEntry(Index);
+ if (!RngListTable)
+ return None;
+ if (Optional<uint64_t> Off = RngListTable->getOffsetEntry(Index))
+ return *Off + RangeSectionBase;
return None;
}
+ Optional<uint64_t> getLoclistOffset(uint32_t Index) {
+ if (!LoclistTableHeader)
+ return None;
+ if (Optional<uint64_t> Off = LoclistTableHeader->getOffsetEntry(Index))
+ return *Off + getLocSectionBase();
+ return None;
+ }
Expected<DWARFAddressRangesVector> collectAddressRanges();
+ Expected<DWARFLocationExpressionsVector>
+ findLoclistFromOffset(uint64_t Offset);
+
/// Returns subprogram DIE with address range encompassing the provided
/// address. The pointer is alive as long as parsed compile unit DIEs are not
/// cleared.
diff --git a/contrib/llvm-project/llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h b/contrib/llvm-project/llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h
index 63e18bb2ecd5..893cfc1eb07c 100644
--- a/contrib/llvm-project/llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h
+++ b/contrib/llvm-project/llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h
@@ -12,6 +12,7 @@
#include "llvm/ADT/Optional.h"
#include "llvm/DebugInfo/GSYM/InlineInfo.h"
#include "llvm/DebugInfo/GSYM/LineTable.h"
+#include "llvm/DebugInfo/GSYM/LookupResult.h"
#include "llvm/DebugInfo/GSYM/Range.h"
#include "llvm/DebugInfo/GSYM/StringTable.h"
#include <tuple>
@@ -21,6 +22,7 @@ namespace llvm {
class raw_ostream;
namespace gsym {
+class GsymReader;
/// Function information in GSYM files encodes information for one contiguous
/// address range. If a function has discontiguous address ranges, they will
/// need to be encoded using multiple FunctionInfo objects.
@@ -30,7 +32,7 @@ namespace gsym {
/// The function information gets the function start address as an argument
/// to the FunctionInfo::decode(...) function. This information is calculated
/// from the GSYM header and an address offset from the GSYM address offsets
-/// table. The encoded FunctionInfo information must be alinged to a 4 byte
+/// table. The encoded FunctionInfo information must be aligned to a 4 byte
/// boundary.
///
/// The encoded data for a FunctionInfo starts with fixed data that all
@@ -140,6 +142,33 @@ struct FunctionInfo {
/// function info that was successfully written into the stream.
llvm::Expected<uint64_t> encode(FileWriter &O) const;
+
+ /// Lookup an address within a FunctionInfo object's data stream.
+ ///
+ /// Instead of decoding an entire FunctionInfo object when doing lookups,
+ /// we can decode only the information we need from the FunctionInfo's data
+ /// for the specific address. The lookup result information is returned as
+ /// a LookupResult.
+ ///
+ /// \param Data The binary stream to read the data from. This object must
+ /// have the data for the object starting at offset zero. The data
+ /// can contain more data than needed.
+ ///
+ /// \param GR The GSYM reader that contains the string and file table that
+ /// will be used to fill in information in the returned result.
+ ///
+ /// \param FuncAddr The function start address decoded from the GsymReader.
+ ///
+ /// \param Addr The address to lookup.
+ ///
+ /// \returns An LookupResult or an error describing the issue that was
+ /// encountered during decoding. An error should only be returned if the
+ /// address is not contained in the FunctionInfo or if the data is corrupted.
+ static llvm::Expected<LookupResult> lookup(DataExtractor &Data,
+ const GsymReader &GR,
+ uint64_t FuncAddr,
+ uint64_t Addr);
+
uint64_t startAddress() const { return Range.Start; }
uint64_t endAddress() const { return Range.End; }
uint64_t size() const { return Range.size(); }
diff --git a/contrib/llvm-project/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h b/contrib/llvm-project/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h
index 12c8187132ba..e61eb678c82e 100644
--- a/contrib/llvm-project/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h
+++ b/contrib/llvm-project/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h
@@ -82,15 +82,15 @@ class FileWriter;
/// The resulting GSYM size is smaller and causes fewer pages to be touched
/// during address lookups when the address table is smaller. The size of the
/// address offsets in the address table is specified in the header in
-/// Header.AddrOffSize. The first offset in the address table is alinged to
-/// Header.AddrOffSize alignement to ensure efficient access when loaded into
+/// Header.AddrOffSize. The first offset in the address table is aligned to
+/// Header.AddrOffSize alignment to ensure efficient access when loaded into
/// memory.
///
/// FUNCTION INFO OFFSETS TABLE
///
/// The function info offsets table immediately follows the address table and
/// consists of Header.NumAddresses 32 bit file offsets: one for each address
-/// in the address table. This data is algined to a 4 byte boundary. The
+/// in the address table. This data is aligned to a 4 byte boundary. The
/// offsets in this table are the relative offsets from the start offset of the
/// GSYM header and point to the function info data for each address in the
/// address table. Keeping this data separate from the address table helps to
diff --git a/contrib/llvm-project/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h b/contrib/llvm-project/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h
index 113bcee9c9a3..5ba13f846798 100644
--- a/contrib/llvm-project/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h
+++ b/contrib/llvm-project/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h
@@ -1,9 +1,8 @@
//===- GsymReader.h ---------------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -94,28 +93,45 @@ public:
/// Get the full function info for an address.
///
+ /// This should be called when a client will store a copy of the complete
+ /// FunctionInfo for a given address. For one off lookups, use the lookup()
+ /// function below.
+ ///
+ /// Symbolication server processes might want to parse the entire function
+ /// info for a given address and cache it if the process stays around to
+ /// service many symbolication addresses, like for parsing profiling
+ /// information.
+ ///
/// \param Addr A virtual address from the orignal object file to lookup.
+ ///
/// \returns An expected FunctionInfo that contains the function info object
/// or an error object that indicates reason for failing to lookup the
- /// address,
+ /// address.
llvm::Expected<FunctionInfo> getFunctionInfo(uint64_t Addr) const;
+ /// Lookup an address in the a GSYM.
+ ///
+ /// Lookup just the information needed for a specific address \a Addr. This
+ /// function is faster that calling getFunctionInfo() as it will only return
+ /// information that pertains to \a Addr and allows the parsing to skip any
+ /// extra information encoded for other addresses. For example the line table
+ /// parsing can stop when a matching LineEntry has been fouhnd, and the
+ /// InlineInfo can stop parsing early once a match has been found and also
+ /// skip information that doesn't match. This avoids memory allocations and
+ /// is much faster for lookups.
+ ///
+ /// \param Addr A virtual address from the orignal object file to lookup.
+ /// \returns An expected LookupResult that contains only the information
+ /// needed for the current address, or an error object that indicates reason
+ /// for failing to lookup the address.
+ llvm::Expected<LookupResult> lookup(uint64_t Addr) const;
+
/// Get a string from the string table.
///
/// \param Offset The string table offset for the string to retrieve.
/// \returns The string from the strin table.
StringRef getString(uint32_t Offset) const { return StrTab[Offset]; }
-protected:
- /// Gets an address from the address table.
- ///
- /// Addresses are stored as offsets frrom the gsym::Header::BaseAddress.
- ///
- /// \param Index A index into the address table.
- /// \returns A resolved virtual address for adddress in the address table
- /// or llvm::None if Index is out of bounds.
- Optional<uint64_t> getAddress(size_t Index) const;
-
/// Get the a file entry for the suppplied file index.
///
/// Used to convert any file indexes in the FunctionInfo data back into
@@ -131,6 +147,16 @@ protected:
return llvm::None;
}
+protected:
+ /// Gets an address from the address table.
+ ///
+ /// Addresses are stored as offsets frrom the gsym::Header::BaseAddress.
+ ///
+ /// \param Index A index into the address table.
+ /// \returns A resolved virtual address for adddress in the address table
+ /// or llvm::None if Index is out of bounds.
+ Optional<uint64_t> getAddress(size_t Index) const;
+
/// Get an appropriate address info offsets array.
///
/// The address table in the GSYM file is stored as array of 1, 2, 4 or 8
diff --git a/contrib/llvm-project/llvm/include/llvm/DebugInfo/GSYM/InlineInfo.h b/contrib/llvm-project/llvm/include/llvm/DebugInfo/GSYM/InlineInfo.h
index 48fd9a7c1308..3b95e3e050bd 100644
--- a/contrib/llvm-project/llvm/include/llvm/DebugInfo/GSYM/InlineInfo.h
+++ b/contrib/llvm-project/llvm/include/llvm/DebugInfo/GSYM/InlineInfo.h
@@ -10,6 +10,8 @@
#define LLVM_DEBUGINFO_GSYM_INLINEINFO_H
#include "llvm/ADT/Optional.h"
+#include "llvm/DebugInfo/GSYM/LineEntry.h"
+#include "llvm/DebugInfo/GSYM/LookupResult.h"
#include "llvm/DebugInfo/GSYM/Range.h"
#include "llvm/Support/Error.h"
#include <stdint.h>
@@ -21,6 +23,7 @@ class raw_ostream;
namespace gsym {
+class GsymReader;
/// Inline information stores the name of the inline function along with
/// an array of address ranges. It also stores the call file and call line
/// that called this inline function. This allows us to unwind inline call
@@ -74,6 +77,52 @@ struct InlineInfo {
using InlineArray = std::vector<const InlineInfo *>;
+ /// Lookup a single address within the inline info data.
+ ///
+ /// Clients have the option to decode an entire InlineInfo object (using
+ /// InlineInfo::decode() ) or just find the matching inline info using this
+ /// function. The benefit of using this function is that only the information
+ /// needed for the lookup will be extracted, other info can be skipped and
+ /// parsing can stop as soon as the deepest match is found. This allows
+ /// symbolication tools to be fast and efficient and avoid allocation costs
+ /// when doing lookups.
+ ///
+ /// This function will augment the SourceLocations array \a SrcLocs with any
+ /// inline information that pertains to \a Addr. If no inline information
+ /// exists for \a Addr, then \a SrcLocs will be left untouched. If there is
+ /// inline information for \a Addr, then \a SrcLocs will be modifiied to
+ /// contain the deepest most inline function's SourceLocation at index zero
+ /// in the array and proceed up the the concrete function source file and
+ /// line at the end of the array.
+ ///
+ /// \param GR The GSYM reader that contains the string and file table that
+ /// will be used to fill in the source locations.
+ ///
+ /// \param Data The binary stream to read the data from. This object must
+ /// have the data for the LineTable object starting at offset zero. The data
+ /// can contain more data than needed.
+ ///
+ /// \param BaseAddr The base address to use when decoding the line table.
+ /// This will be the FunctionInfo's start address and will be used to
+ /// decode the correct addresses for the inline information.
+ ///
+ /// \param Addr The address to lookup.
+ ///
+ /// \param SrcLocs The inline source locations that matches \a Addr. This
+ /// array must be initialized with the matching line entry
+ /// from the line table upon entry. The name of the concrete
+ /// function must be supplied since it will get pushed to
+ /// the last SourceLocation entry and the inline information
+ /// will fill in the source file and line from the inline
+ /// information.
+ ///
+ /// \returns An error if the inline information is corrupt, or
+ /// Error::success() for all other cases, even when no information
+ /// is added to \a SrcLocs.
+ static llvm::Error lookup(const GsymReader &GR, DataExtractor &Data,
+ uint64_t BaseAddr, uint64_t Addr,
+ SourceLocations &SrcLocs);
+
/// Lookup an address in the InlineInfo object
///
/// This function is used to symbolicate an inline call stack and can
diff --git a/contrib/llvm-project/llvm/include/llvm/DebugInfo/GSYM/LineTable.h b/contrib/llvm-project/llvm/include/llvm/DebugInfo/GSYM/LineTable.h
index 3cdbccb08ced..22668e39d94c 100644
--- a/contrib/llvm-project/llvm/include/llvm/DebugInfo/GSYM/LineTable.h
+++ b/contrib/llvm-project/llvm/include/llvm/DebugInfo/GSYM/LineTable.h
@@ -119,8 +119,25 @@ class LineTable {
typedef std::vector<gsym::LineEntry> Collection;
Collection Lines; ///< All line entries in the line table.
public:
- static LineEntry lookup(DataExtractor &Data, uint64_t BaseAddr,
- uint64_t Addr);
+ /// Lookup a single address within a line table's data.
+ ///
+ /// Clients have the option to decode an entire line table using
+ /// LineTable::decode() or just find a single matching entry using this
+ /// function. The benefit of using this function is that parsed LineEntry
+ /// objects that do not match will not be stored in an array. This will avoid
+ /// memory allocation costs and parsing can stop once a match has been found.
+ ///
+ /// \param Data The binary stream to read the data from. This object must
+ /// have the data for the LineTable object starting at offset zero. The data
+ /// can contain more data than needed.
+ ///
+ /// \param BaseAddr The base address to use when decoding the line table.
+ /// This will be the FunctionInfo's start address and will be used to
+ /// initialize the line table row prior to parsing any opcodes.
+ ///
+ /// \returns An LineEntry object if a match is found, error otherwise.
+ static Expected<LineEntry> lookup(DataExtractor &Data, uint64_t BaseAddr,
+ uint64_t Addr);
/// Decode an LineTable object from a binary data stream.
///
diff --git a/contrib/llvm-project/llvm/include/llvm/DebugInfo/GSYM/LookupResult.h b/contrib/llvm-project/llvm/include/llvm/DebugInfo/GSYM/LookupResult.h
new file mode 100644
index 000000000000..746fd36208e1
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/DebugInfo/GSYM/LookupResult.h
@@ -0,0 +1,61 @@
+//===- LookupResult.h -------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_GSYM_LOOKUPRESULT_H
+#define LLVM_DEBUGINFO_GSYM_LOOKUPRESULT_H
+
+#include "llvm/DebugInfo/GSYM/Range.h"
+#include "llvm/ADT/StringRef.h"
+#include <inttypes.h>
+#include <vector>
+
+namespace llvm {
+class raw_ostream;
+namespace gsym {
+struct FileEntry;
+
+struct SourceLocation {
+ StringRef Name; ///< Function or symbol name.
+ StringRef Dir; ///< Line entry source file directory path.
+ StringRef Base; ///< Line entry source file basename.
+ uint32_t Line = 0; ///< Source file line number.
+};
+
+inline bool operator==(const SourceLocation &LHS, const SourceLocation &RHS) {
+ return LHS.Name == RHS.Name && LHS.Dir == RHS.Dir &&
+ LHS.Base == RHS.Base && LHS.Line == RHS.Line;
+}
+
+raw_ostream &operator<<(raw_ostream &OS, const SourceLocation &R);
+
+using SourceLocations = std::vector<SourceLocation>;
+
+
+struct LookupResult {
+ uint64_t LookupAddr = 0; ///< The address that this lookup pertains to.
+ AddressRange FuncRange; ///< The concrete function address range.
+ StringRef FuncName; ///< The concrete function name that contains LookupAddr.
+ /// The source locations that match this address. This information will only
+ /// be filled in if the FunctionInfo contains a line table. If an address is
+ /// for a concrete function with no inlined functions, this array will have
+ /// one entry. If an address points to an inline function, there will be one
+ /// SourceLocation for each inlined function with the last entry pointing to
+ /// the concrete function itself. This allows one address to generate
+ /// multiple locations and allows unwinding of inline call stacks. The
+ /// deepest inline function will appear at index zero in the source locations
+ /// array, and the concrete function will appear at the end of the array.
+ SourceLocations Locations;
+ std::string getSourceFile(uint32_t Index) const;
+};
+
+raw_ostream &operator<<(raw_ostream &OS, const LookupResult &R);
+
+} // namespace gsym
+} // namespace llvm
+
+#endif // #ifndef LLVM_DEBUGINFO_GSYM_LOOKUPRESULT_H
diff --git a/contrib/llvm-project/llvm/include/llvm/DebugInfo/GSYM/Range.h b/contrib/llvm-project/llvm/include/llvm/DebugInfo/GSYM/Range.h
index 37cfec713f26..49e316eae3cf 100644
--- a/contrib/llvm-project/llvm/include/llvm/DebugInfo/GSYM/Range.h
+++ b/contrib/llvm-project/llvm/include/llvm/DebugInfo/GSYM/Range.h
@@ -61,6 +61,14 @@ struct AddressRange {
void decode(DataExtractor &Data, uint64_t BaseAddr, uint64_t &Offset);
void encode(FileWriter &O, uint64_t BaseAddr) const;
/// @}
+
+ /// Skip an address range object in the specified data a the specified
+ /// offset.
+ ///
+ /// \param Data The binary stream to read the data from.
+ ///
+ /// \param Offset The byte offset within \a Data.
+ static void skip(DataExtractor &Data, uint64_t &Offset);
};
raw_ostream &operator<<(raw_ostream &OS, const AddressRange &R);
@@ -100,6 +108,16 @@ public:
void decode(DataExtractor &Data, uint64_t BaseAddr, uint64_t &Offset);
void encode(FileWriter &O, uint64_t BaseAddr) const;
/// @}
+
+ /// Skip an address range object in the specified data a the specified
+ /// offset.
+ ///
+ /// \param Data The binary stream to read the data from.
+ ///
+ /// \param Offset The byte offset within \a Data.
+ ///
+ /// \returns The number of address ranges that were skipped.
+ static uint64_t skip(DataExtractor &Data, uint64_t &Offset);
};
raw_ostream &operator<<(raw_ostream &OS, const AddressRanges &AR);
diff --git a/contrib/llvm-project/llvm/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h b/contrib/llvm-project/llvm/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h
index 568f0c98c559..7b7337f532d8 100644
--- a/contrib/llvm-project/llvm/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h
+++ b/contrib/llvm-project/llvm/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h
@@ -25,9 +25,9 @@ class DbiModuleDescriptor {
friend class DbiStreamBuilder;
public:
- DbiModuleDescriptor();
- DbiModuleDescriptor(const DbiModuleDescriptor &Info);
- ~DbiModuleDescriptor();
+ DbiModuleDescriptor() = default;
+ DbiModuleDescriptor(const DbiModuleDescriptor &Info) = default;
+ DbiModuleDescriptor &operator=(const DbiModuleDescriptor &Info) = default;
static Error initialize(BinaryStreamRef Stream, DbiModuleDescriptor &Info);
diff --git a/contrib/llvm-project/llvm/include/llvm/DebugInfo/PDB/Native/DbiModuleList.h b/contrib/llvm-project/llvm/include/llvm/DebugInfo/PDB/Native/DbiModuleList.h
index 14223273c898..5fb13ad30e91 100644
--- a/contrib/llvm-project/llvm/include/llvm/DebugInfo/PDB/Native/DbiModuleList.h
+++ b/contrib/llvm-project/llvm/include/llvm/DebugInfo/PDB/Native/DbiModuleList.h
@@ -39,6 +39,7 @@ public:
DbiModuleSourceFilesIterator(const DbiModuleList &Modules, uint32_t Modi,
uint16_t Filei);
DbiModuleSourceFilesIterator() = default;
+ DbiModuleSourceFilesIterator(const DbiModuleSourceFilesIterator &R) = default;
DbiModuleSourceFilesIterator &
operator=(const DbiModuleSourceFilesIterator &R) = default;
diff --git a/contrib/llvm-project/llvm/include/llvm/DebugInfo/PDB/Native/HashTable.h b/contrib/llvm-project/llvm/include/llvm/DebugInfo/PDB/Native/HashTable.h
index aa38417bcf4c..95c0a89551ed 100644
--- a/contrib/llvm-project/llvm/include/llvm/DebugInfo/PDB/Native/HashTable.h
+++ b/contrib/llvm-project/llvm/include/llvm/DebugInfo/PDB/Native/HashTable.h
@@ -56,6 +56,7 @@ public:
}
}
+ HashTableIterator(const HashTableIterator &R) = default;
HashTableIterator &operator=(const HashTableIterator &R) {
Map = R.Map;
return *this;
diff --git a/contrib/llvm-project/llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h b/contrib/llvm-project/llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h
index 11599fc1797d..8bfa5432b811 100644
--- a/contrib/llvm-project/llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h
+++ b/contrib/llvm-project/llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h
@@ -16,6 +16,7 @@
#include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
#include "llvm/Object/Binary.h"
#include "llvm/Object/ObjectFile.h"
+#include "llvm/Object/ELFObjectFile.h"
#include "llvm/Support/Error.h"
#include <algorithm>
#include <cstdint>
@@ -44,6 +45,7 @@ public:
std::vector<std::string> DsymHints;
std::string FallbackDebugPath;
std::string DWPName;
+ std::vector<std::string> DebugFileDirectory;
};
LLVMSymbolizer() = default;
@@ -98,6 +100,9 @@ private:
ObjectFile *lookUpDebuglinkObject(const std::string &Path,
const ObjectFile *Obj,
const std::string &ArchName);
+ ObjectFile *lookUpBuildIDObject(const std::string &Path,
+ const ELFObjectFileBase *Obj,
+ const std::string &ArchName);
/// Returns pair of pointers to object and debug object.
Expected<ObjectPair> getOrCreateObjectPair(const std::string &Path,
diff --git a/contrib/llvm-project/llvm/include/llvm/Demangle/ItaniumDemangle.h b/contrib/llvm-project/llvm/include/llvm/Demangle/ItaniumDemangle.h
index 7784e842bfeb..376e0efea423 100644
--- a/contrib/llvm-project/llvm/include/llvm/Demangle/ItaniumDemangle.h
+++ b/contrib/llvm-project/llvm/include/llvm/Demangle/ItaniumDemangle.h
@@ -607,48 +607,12 @@ public:
}
};
-class NodeOrString {
- const void *First;
- const void *Second;
-
-public:
- /* implicit */ NodeOrString(StringView Str) {
- const char *FirstChar = Str.begin();
- const char *SecondChar = Str.end();
- if (SecondChar == nullptr) {
- assert(FirstChar == SecondChar);
- ++FirstChar, ++SecondChar;
- }
- First = static_cast<const void *>(FirstChar);
- Second = static_cast<const void *>(SecondChar);
- }
-
- /* implicit */ NodeOrString(Node *N)
- : First(static_cast<const void *>(N)), Second(nullptr) {}
- NodeOrString() : First(nullptr), Second(nullptr) {}
-
- bool isString() const { return Second && First; }
- bool isNode() const { return First && !Second; }
- bool isEmpty() const { return !First && !Second; }
-
- StringView asString() const {
- assert(isString());
- return StringView(static_cast<const char *>(First),
- static_cast<const char *>(Second));
- }
-
- const Node *asNode() const {
- assert(isNode());
- return static_cast<const Node *>(First);
- }
-};
-
class ArrayType final : public Node {
const Node *Base;
- NodeOrString Dimension;
+ Node *Dimension;
public:
- ArrayType(const Node *Base_, NodeOrString Dimension_)
+ ArrayType(const Node *Base_, Node *Dimension_)
: Node(KArrayType,
/*RHSComponentCache=*/Cache::Yes,
/*ArrayCache=*/Cache::Yes),
@@ -665,10 +629,8 @@ public:
if (S.back() != ']')
S += " ";
S += "[";
- if (Dimension.isString())
- S += Dimension.asString();
- else if (Dimension.isNode())
- Dimension.asNode()->print(S);
+ if (Dimension)
+ Dimension->print(S);
S += "]";
Base->printRight(S);
}
@@ -934,10 +896,10 @@ public:
class VectorType final : public Node {
const Node *BaseType;
- const NodeOrString Dimension;
+ const Node *Dimension;
public:
- VectorType(const Node *BaseType_, NodeOrString Dimension_)
+ VectorType(const Node *BaseType_, Node *Dimension_)
: Node(KVectorType), BaseType(BaseType_),
Dimension(Dimension_) {}
@@ -946,19 +908,17 @@ public:
void printLeft(OutputStream &S) const override {
BaseType->print(S);
S += " vector[";
- if (Dimension.isNode())
- Dimension.asNode()->print(S);
- else if (Dimension.isString())
- S += Dimension.asString();
+ if (Dimension)
+ Dimension->print(S);
S += "]";
}
};
class PixelVectorType final : public Node {
- const NodeOrString Dimension;
+ const Node *Dimension;
public:
- PixelVectorType(NodeOrString Dimension_)
+ PixelVectorType(const Node *Dimension_)
: Node(KPixelVectorType), Dimension(Dimension_) {}
template<typename Fn> void match(Fn F) const { F(Dimension); }
@@ -966,7 +926,7 @@ public:
void printLeft(OutputStream &S) const override {
// FIXME: This should demangle as "vector pixel".
S += "pixel vector[";
- S += Dimension.asString();
+ Dimension->print(S);
S += "]";
}
};
@@ -2213,10 +2173,10 @@ class PODSmallVector {
static_assert(std::is_pod<T>::value,
"T is required to be a plain old data type");
- T* First;
- T* Last;
- T* Cap;
- T Inline[N];
+ T* First = nullptr;
+ T* Last = nullptr;
+ T* Cap = nullptr;
+ T Inline[N] = {0};
bool isInline() const { return First == Inline; }
@@ -3548,7 +3508,9 @@ Node *AbstractManglingParser<Derived, Alloc>::parseVectorType() {
if (!consumeIf("Dv"))
return nullptr;
if (look() >= '1' && look() <= '9') {
- StringView DimensionNumber = parseNumber();
+ Node *DimensionNumber = make<NameType>(parseNumber());
+ if (!DimensionNumber)
+ return nullptr;
if (!consumeIf('_'))
return nullptr;
if (consumeIf('p'))
@@ -3573,7 +3535,7 @@ Node *AbstractManglingParser<Derived, Alloc>::parseVectorType() {
Node *ElemType = getDerived().parseType();
if (!ElemType)
return nullptr;
- return make<VectorType>(ElemType, StringView());
+ return make<VectorType>(ElemType, /*Dimension=*/nullptr);
}
// <decltype> ::= Dt <expression> E # decltype of an id-expression or class member access (C++0x)
@@ -3599,10 +3561,12 @@ Node *AbstractManglingParser<Derived, Alloc>::parseArrayType() {
if (!consumeIf('A'))
return nullptr;
- NodeOrString Dimension;
+ Node *Dimension = nullptr;
if (std::isdigit(look())) {
- Dimension = parseNumber();
+ Dimension = make<NameType>(parseNumber());
+ if (!Dimension)
+ return nullptr;
if (!consumeIf('_'))
return nullptr;
} else if (!consumeIf('_')) {
diff --git a/contrib/llvm-project/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h b/contrib/llvm-project/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h
index 81b279fe237d..ec40eec5a05e 100644
--- a/contrib/llvm-project/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h
+++ b/contrib/llvm-project/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h
@@ -508,7 +508,7 @@ struct CustomTypeNode : public TypeNode {
void outputPre(OutputStream &OS, OutputFlags Flags) const override;
void outputPost(OutputStream &OS, OutputFlags Flags) const override;
- IdentifierNode *Identifier;
+ IdentifierNode *Identifier = nullptr;
};
struct NodeArrayNode : public Node {
@@ -584,7 +584,7 @@ struct SpecialTableSymbolNode : public SymbolNode {
void output(OutputStream &OS, OutputFlags Flags) const override;
QualifiedNameNode *TargetName = nullptr;
- Qualifiers Quals;
+ Qualifiers Quals = Qualifiers::Q_None;
};
struct LocalStaticGuardVariableNode : public SymbolNode {
diff --git a/contrib/llvm-project/llvm/include/llvm/Demangle/Utility.h b/contrib/llvm-project/llvm/include/llvm/Demangle/Utility.h
index ec23859af46a..04e1936ebbe7 100644
--- a/contrib/llvm-project/llvm/include/llvm/Demangle/Utility.h
+++ b/contrib/llvm-project/llvm/include/llvm/Demangle/Utility.h
@@ -25,9 +25,9 @@ DEMANGLE_NAMESPACE_BEGIN
// Stream that AST nodes write their string representation into after the AST
// has been parsed.
class OutputStream {
- char *Buffer;
- size_t CurrentPosition;
- size_t BufferCapacity;
+ char *Buffer = nullptr;
+ size_t CurrentPosition = 0;
+ size_t BufferCapacity = 0;
// Ensure there is at least n more positions in buffer.
void grow(size_t N) {
@@ -137,7 +137,7 @@ public:
char *getBuffer() { return Buffer; }
char *getBufferEnd() { return Buffer + CurrentPosition - 1; }
- size_t getBufferCapacity() { return BufferCapacity; }
+ size_t getBufferCapacity() const { return BufferCapacity; }
};
template <class T> class SwapAndRestore {
diff --git a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h
index b531127cf892..fa04653fa7bd 100644
--- a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h
+++ b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h
@@ -72,6 +72,7 @@ public:
: Target(&Target), Offset(Offset), Addend(Addend), K(K) {}
OffsetT getOffset() const { return Offset; }
+ void setOffset(OffsetT Offset) { this->Offset = Offset; }
Kind getKind() const { return K; }
void setKind(Kind K) { this->K = K; }
bool isRelocation() const { return K >= FirstRelocation; }
@@ -130,7 +131,6 @@ private:
uint64_t IsAbsolute : 1;
};
-using BlockOrdinal = unsigned;
using SectionOrdinal = unsigned;
/// An Addressable with content and edges.
@@ -139,10 +139,9 @@ class Block : public Addressable {
private:
/// Create a zero-fill defined addressable.
- Block(Section &Parent, BlockOrdinal Ordinal, JITTargetAddress Size,
- JITTargetAddress Address, uint64_t Alignment, uint64_t AlignmentOffset)
- : Addressable(Address, true), Parent(Parent), Size(Size),
- Ordinal(Ordinal) {
+ Block(Section &Parent, JITTargetAddress Size, JITTargetAddress Address,
+ uint64_t Alignment, uint64_t AlignmentOffset)
+ : Addressable(Address, true), Parent(Parent), Size(Size) {
assert(isPowerOf2_64(Alignment) && "Alignment must be power of 2");
assert(AlignmentOffset < Alignment &&
"Alignment offset cannot exceed alignment");
@@ -153,10 +152,10 @@ private:
}
/// Create a defined addressable for the given content.
- Block(Section &Parent, BlockOrdinal Ordinal, StringRef Content,
- JITTargetAddress Address, uint64_t Alignment, uint64_t AlignmentOffset)
+ Block(Section &Parent, StringRef Content, JITTargetAddress Address,
+ uint64_t Alignment, uint64_t AlignmentOffset)
: Addressable(Address, true), Parent(Parent), Data(Content.data()),
- Size(Content.size()), Ordinal(Ordinal) {
+ Size(Content.size()) {
assert(isPowerOf2_64(Alignment) && "Alignment must be power of 2");
assert(AlignmentOffset < Alignment &&
"Alignment offset cannot exceed alignment");
@@ -179,9 +178,6 @@ public:
/// Return the parent section for this block.
Section &getSection() const { return Parent; }
- /// Return the ordinal for this block.
- BlockOrdinal getOrdinal() const { return Ordinal; }
-
/// Returns true if this is a zero-fill block.
///
/// If true, getSize is callable but getContent is not (the content is
@@ -208,15 +204,32 @@ public:
/// Get the alignment for this content.
uint64_t getAlignment() const { return 1ull << P2Align; }
+ /// Set the alignment for this content.
+ void setAlignment(uint64_t Alignment) {
+ assert(isPowerOf2_64(Alignment) && "Alignment must be a power of two");
+ P2Align = Alignment ? countTrailingZeros(Alignment) : 0;
+ }
+
/// Get the alignment offset for this content.
uint64_t getAlignmentOffset() const { return AlignmentOffset; }
+ /// Set the alignment offset for this content.
+ void setAlignmentOffset(uint64_t AlignmentOffset) {
+ assert(AlignmentOffset < (1ull << P2Align) &&
+ "Alignment offset can't exceed alignment");
+ this->AlignmentOffset = AlignmentOffset;
+ }
+
/// Add an edge to this block.
void addEdge(Edge::Kind K, Edge::OffsetT Offset, Symbol &Target,
Edge::AddendT Addend) {
Edges.push_back(Edge(K, Offset, Target, Addend));
}
+ /// Add an edge by copying an existing one. This is typically used when
+ /// moving edges between blocks.
+ void addEdge(const Edge &E) { Edges.push_back(E); }
+
/// Return the list of edges attached to this content.
iterator_range<edge_iterator> edges() {
return make_range(Edges.begin(), Edges.end());
@@ -233,6 +246,10 @@ public:
/// Returns true if the list of edges is empty.
bool edges_empty() const { return Edges.empty(); }
+ /// Remove the edge pointed to by the given iterator.
+ /// Invalidates all iterators that point to or past the given one.
+ void removeEdge(const_edge_iterator I) { Edges.erase(I); }
+
private:
static constexpr uint64_t MaxAlignmentOffset = (1ULL << 57) - 1;
@@ -241,7 +258,6 @@ private:
Section &Parent;
const char *Data = nullptr;
size_t Size = 0;
- BlockOrdinal Ordinal = 0;
std::vector<Edge> Edges;
};
@@ -287,6 +303,7 @@ private:
JITTargetAddress Size, Linkage L, Scope S, bool IsLive,
bool IsCallable)
: Name(Name), Base(&Base), Offset(Offset), Size(Size) {
+ assert(Offset <= MaxOffset && "Offset out of range");
setLinkage(L);
setScope(S);
setLive(IsLive);
@@ -307,14 +324,14 @@ private:
}
static Symbol &constructExternal(void *SymStorage, Addressable &Base,
- StringRef Name, JITTargetAddress Size) {
+ StringRef Name, JITTargetAddress Size,
+ Linkage L) {
assert(SymStorage && "Storage cannot be null");
assert(!Base.isDefined() &&
"Cannot create external symbol from defined block");
assert(!Name.empty() && "External symbol name cannot be empty");
auto *Sym = reinterpret_cast<Symbol *>(SymStorage);
- new (Sym) Symbol(Base, 0, Name, Size, Linkage::Strong, Scope::Default,
- false, false);
+ new (Sym) Symbol(Base, 0, Name, Size, L, Scope::Default, false, false);
return *Sym;
}
@@ -334,6 +351,7 @@ private:
JITTargetAddress Size, bool IsCallable,
bool IsLive) {
assert(SymStorage && "Storage cannot be null");
+ assert(Offset < Base.getSize() && "Symbol offset is outside block");
auto *Sym = reinterpret_cast<Symbol *>(SymStorage);
new (Sym) Symbol(Base, Offset, StringRef(), Size, Linkage::Strong,
Scope::Local, IsLive, IsCallable);
@@ -345,6 +363,7 @@ private:
JITTargetAddress Size, Linkage L, Scope S,
bool IsLive, bool IsCallable) {
assert(SymStorage && "Storage cannot be null");
+ assert(Offset < Base.getSize() && "Symbol offset is outside block");
assert(!Name.empty() && "Name cannot be empty");
auto *Sym = reinterpret_cast<Symbol *>(SymStorage);
new (Sym) Symbol(Base, Offset, Name, Size, L, S, IsLive, IsCallable);
@@ -458,7 +477,7 @@ public:
/// Set the linkage for this Symbol.
void setLinkage(Linkage L) {
- assert((L == Linkage::Strong || (Base->isDefined() && !Name.empty())) &&
+ assert((L == Linkage::Strong || (!Base->isAbsolute() && !Name.empty())) &&
"Linkage can only be applied to defined named symbols");
this->L = static_cast<uint8_t>(L);
}
@@ -484,6 +503,13 @@ private:
// note: Size and IsCallable fields left unchanged.
}
+ void setBlock(Block &B) { Base = &B; }
+
+ void setOffset(uint64_t NewOffset) {
+ assert(NewOffset <= MaxOffset && "Offset out of range");
+ Offset = NewOffset;
+ }
+
static constexpr uint64_t MaxOffset = (1ULL << 59) - 1;
// FIXME: A char* or SymbolStringPtr may pack better.
@@ -532,6 +558,16 @@ public:
/// Returns the ordinal for this section.
SectionOrdinal getOrdinal() const { return SecOrdinal; }
+ /// Returns an iterator over the blocks defined in this section.
+ iterator_range<block_iterator> blocks() {
+ return make_range(Blocks.begin(), Blocks.end());
+ }
+
+ /// Returns an iterator over the blocks defined in this section.
+ iterator_range<const_block_iterator> blocks() const {
+ return make_range(Blocks.begin(), Blocks.end());
+ }
+
/// Returns an iterator over the symbols defined in this section.
iterator_range<symbol_iterator> symbols() {
return make_range(Symbols.begin(), Symbols.end());
@@ -545,12 +581,6 @@ public:
/// Return the number of symbols in this section.
SymbolSet::size_type symbols_size() { return Symbols.size(); }
- /// Return true if this section contains no symbols.
- bool symbols_empty() const { return Symbols.empty(); }
-
- /// Returns the ordinal for the next block.
- BlockOrdinal getNextBlockOrdinal() { return NextBlockOrdinal++; }
-
private:
void addSymbol(Symbol &Sym) {
assert(!Symbols.count(&Sym) && "Symbol is already in this section");
@@ -562,10 +592,20 @@ private:
Symbols.erase(&Sym);
}
+ void addBlock(Block &B) {
+ assert(!Blocks.count(&B) && "Block is already in this section");
+ Blocks.insert(&B);
+ }
+
+ void removeBlock(Block &B) {
+ assert(Blocks.count(&B) && "Block is not in this section");
+ Blocks.erase(&B);
+ }
+
StringRef Name;
sys::Memory::ProtectionFlags Prot;
SectionOrdinal SecOrdinal = 0;
- BlockOrdinal NextBlockOrdinal = 0;
+ BlockSet Blocks;
SymbolSet Symbols;
};
@@ -575,21 +615,21 @@ class SectionRange {
public:
SectionRange() = default;
SectionRange(const Section &Sec) {
- if (Sec.symbols_empty())
+ if (llvm::empty(Sec.blocks()))
return;
- First = Last = *Sec.symbols().begin();
- for (auto *Sym : Sec.symbols()) {
- if (Sym->getAddress() < First->getAddress())
- First = Sym;
- if (Sym->getAddress() > Last->getAddress())
- Last = Sym;
+ First = Last = *Sec.blocks().begin();
+ for (auto *B : Sec.blocks()) {
+ if (B->getAddress() < First->getAddress())
+ First = B;
+ if (B->getAddress() > Last->getAddress())
+ Last = B;
}
}
- Symbol *getFirstSymbol() const {
+ Block *getFirstBlock() const {
assert((!Last || First) && "First can not be null if end is non-null");
return First;
}
- Symbol *getLastSymbol() const {
+ Block *getLastBlock() const {
assert((First || !Last) && "Last can not be null if start is non-null");
return Last;
}
@@ -598,17 +638,16 @@ public:
return !First;
}
JITTargetAddress getStart() const {
- return First ? First->getBlock().getAddress() : 0;
+ return First ? First->getAddress() : 0;
}
JITTargetAddress getEnd() const {
- return Last ? Last->getBlock().getAddress() + Last->getBlock().getSize()
- : 0;
+ return Last ? Last->getAddress() + Last->getSize() : 0;
}
uint64_t getSize() const { return getEnd() - getStart(); }
private:
- Symbol *First = nullptr;
- Symbol *Last = nullptr;
+ Block *First = nullptr;
+ Block *Last = nullptr;
};
class LinkGraph {
@@ -633,12 +672,11 @@ private:
template <typename... ArgTs> Block &createBlock(ArgTs &&... Args) {
Block *B = reinterpret_cast<Block *>(Allocator.Allocate<Block>());
new (B) Block(std::forward<ArgTs>(Args)...);
- Blocks.insert(B);
+ B->getSection().addBlock(*B);
return *B;
}
void destroyBlock(Block &B) {
- Blocks.erase(&B);
B.~Block();
Allocator.Deallocate(&B);
}
@@ -648,69 +686,102 @@ private:
Allocator.Deallocate(&S);
}
+ static iterator_range<Section::block_iterator> getSectionBlocks(Section &S) {
+ return S.blocks();
+ }
+
+ static iterator_range<Section::const_block_iterator>
+ getSectionConstBlocks(Section &S) {
+ return S.blocks();
+ }
+
+ static iterator_range<Section::symbol_iterator>
+ getSectionSymbols(Section &S) {
+ return S.symbols();
+ }
+
+ static iterator_range<Section::const_symbol_iterator>
+ getSectionConstSymbols(Section &S) {
+ return S.symbols();
+ }
+
public:
using external_symbol_iterator = ExternalSymbolSet::iterator;
- using block_iterator = BlockSet::iterator;
-
using section_iterator = pointee_iterator<SectionList::iterator>;
using const_section_iterator = pointee_iterator<SectionList::const_iterator>;
- template <typename SectionItrT, typename SymbolItrT, typename T>
- class defined_symbol_iterator_impl
+ template <typename OuterItrT, typename InnerItrT, typename T,
+ iterator_range<InnerItrT> getInnerRange(
+ typename OuterItrT::reference)>
+ class nested_collection_iterator
: public iterator_facade_base<
- defined_symbol_iterator_impl<SectionItrT, SymbolItrT, T>,
+ nested_collection_iterator<OuterItrT, InnerItrT, T, getInnerRange>,
std::forward_iterator_tag, T> {
public:
- defined_symbol_iterator_impl() = default;
+ nested_collection_iterator() = default;
- defined_symbol_iterator_impl(SectionItrT SecI, SectionItrT SecE)
- : SecI(SecI), SecE(SecE),
- SymI(SecI != SecE ? SecI->symbols().begin() : SymbolItrT()) {
- moveToNextSymbolOrEnd();
+ nested_collection_iterator(OuterItrT OuterI, OuterItrT OuterE)
+ : OuterI(OuterI), OuterE(OuterE),
+ InnerI(getInnerBegin(OuterI, OuterE)) {
+ moveToNonEmptyInnerOrEnd();
}
- bool operator==(const defined_symbol_iterator_impl &RHS) const {
- return (SecI == RHS.SecI) && (SymI == RHS.SymI);
+ bool operator==(const nested_collection_iterator &RHS) const {
+ return (OuterI == RHS.OuterI) && (InnerI == RHS.InnerI);
}
T operator*() const {
- assert(SymI != SecI->symbols().end() && "Dereferencing end?");
- return *SymI;
+ assert(InnerI != getInnerRange(*OuterI).end() && "Dereferencing end?");
+ return *InnerI;
}
- defined_symbol_iterator_impl operator++() {
- ++SymI;
- moveToNextSymbolOrEnd();
+ nested_collection_iterator operator++() {
+ ++InnerI;
+ moveToNonEmptyInnerOrEnd();
return *this;
}
private:
- void moveToNextSymbolOrEnd() {
- while (SecI != SecE && SymI == SecI->symbols().end()) {
- ++SecI;
- SymI = SecI == SecE ? SymbolItrT() : SecI->symbols().begin();
+ static InnerItrT getInnerBegin(OuterItrT OuterI, OuterItrT OuterE) {
+ return OuterI != OuterE ? getInnerRange(*OuterI).begin() : InnerItrT();
+ }
+
+ void moveToNonEmptyInnerOrEnd() {
+ while (OuterI != OuterE && InnerI == getInnerRange(*OuterI).end()) {
+ ++OuterI;
+ InnerI = getInnerBegin(OuterI, OuterE);
}
}
- SectionItrT SecI, SecE;
- SymbolItrT SymI;
+ OuterItrT OuterI, OuterE;
+ InnerItrT InnerI;
};
using defined_symbol_iterator =
- defined_symbol_iterator_impl<const_section_iterator,
- Section::symbol_iterator, Symbol *>;
+ nested_collection_iterator<const_section_iterator,
+ Section::symbol_iterator, Symbol *,
+ getSectionSymbols>;
+
+ using const_defined_symbol_iterator =
+ nested_collection_iterator<const_section_iterator,
+ Section::const_symbol_iterator, const Symbol *,
+ getSectionConstSymbols>;
+
+ using block_iterator = nested_collection_iterator<const_section_iterator,
+ Section::block_iterator,
+ Block *, getSectionBlocks>;
- using const_defined_symbol_iterator = defined_symbol_iterator_impl<
- const_section_iterator, Section::const_symbol_iterator, const Symbol *>;
+ using const_block_iterator =
+ nested_collection_iterator<const_section_iterator,
+ Section::const_block_iterator, const Block *,
+ getSectionConstBlocks>;
LinkGraph(std::string Name, unsigned PointerSize,
support::endianness Endianness)
: Name(std::move(Name)), PointerSize(PointerSize),
Endianness(Endianness) {}
- ~LinkGraph();
-
/// Returns the name of this graph (usually the name of the original
/// underlying MemoryBuffer).
const std::string &getName() { return Name; }
@@ -732,23 +803,56 @@ public:
Block &createContentBlock(Section &Parent, StringRef Content,
uint64_t Address, uint64_t Alignment,
uint64_t AlignmentOffset) {
- return createBlock(Parent, Parent.getNextBlockOrdinal(), Content, Address,
- Alignment, AlignmentOffset);
+ return createBlock(Parent, Content, Address, Alignment, AlignmentOffset);
}
/// Create a zero-fill block.
Block &createZeroFillBlock(Section &Parent, uint64_t Size, uint64_t Address,
uint64_t Alignment, uint64_t AlignmentOffset) {
- return createBlock(Parent, Parent.getNextBlockOrdinal(), Size, Address,
- Alignment, AlignmentOffset);
+ return createBlock(Parent, Size, Address, Alignment, AlignmentOffset);
}
+ /// Cache type for the splitBlock function.
+ using SplitBlockCache = Optional<SmallVector<Symbol *, 8>>;
+
+ /// Splits block B at the given index which must be greater than zero.
+ /// If SplitIndex == B.getSize() then this function is a no-op and returns B.
+ /// If SplitIndex < B.getSize() then this function returns a new block
+ /// covering the range [ 0, SplitIndex ), and B is modified to cover the range
+ /// [ SplitIndex, B.size() ).
+ ///
+ /// The optional Cache parameter can be used to speed up repeated calls to
+ /// splitBlock for a single block. If the value is None the cache will be
+ /// treated as uninitialized and splitBlock will populate it. Otherwise it
+ /// is assumed to contain the list of Symbols pointing at B, sorted in
+ /// descending order of offset.
+ ///
+ /// Notes:
+ ///
+ /// 1. The newly introduced block will have a new ordinal which will be
+ /// higher than any other ordinals in the section. Clients are responsible
+ /// for re-assigning block ordinals to restore a compatible order if
+ /// needed.
+ ///
+ /// 2. The cache is not automatically updated if new symbols are introduced
+ /// between calls to splitBlock. Any newly introduced symbols may be
+ /// added to the cache manually (descending offset order must be
+ /// preserved), or the cache can be set to None and rebuilt by
+ /// splitBlock on the next call.
+ Block &splitBlock(Block &B, size_t SplitIndex,
+ SplitBlockCache *Cache = nullptr);
+
/// Add an external symbol.
/// Some formats (e.g. ELF) allow Symbols to have sizes. For Symbols whose
/// size is not known, you should substitute '0'.
- Symbol &addExternalSymbol(StringRef Name, uint64_t Size) {
- auto &Sym = Symbol::constructExternal(
- Allocator.Allocate<Symbol>(), createAddressable(0, false), Name, Size);
+ /// For external symbols Linkage determines whether the symbol must be
+ /// present during lookup: Externals with strong linkage must be found or
+ /// an error will be emitted. Externals with weak linkage are permitted to
+ /// be undefined, in which case they are assigned a value of 0.
+ Symbol &addExternalSymbol(StringRef Name, uint64_t Size, Linkage L) {
+ auto &Sym =
+ Symbol::constructExternal(Allocator.Allocate<Symbol>(),
+ createAddressable(0, false), Name, Size, L);
ExternalSymbols.insert(&Sym);
return Sym;
}
@@ -769,9 +873,8 @@ public:
uint64_t Alignment, bool IsLive) {
auto &Sym = Symbol::constructCommon(
Allocator.Allocate<Symbol>(),
- createBlock(Section, Section.getNextBlockOrdinal(), Address, Size,
- Alignment, 0),
- Name, Size, S, IsLive);
+ createBlock(Section, Size, Address, Alignment, 0), Name, Size, S,
+ IsLive);
Section.addSymbol(Sym);
return Sym;
}
@@ -811,6 +914,16 @@ public:
return nullptr;
}
+ iterator_range<block_iterator> blocks() {
+ return make_range(block_iterator(Sections.begin(), Sections.end()),
+ block_iterator(Sections.end(), Sections.end()));
+ }
+
+ iterator_range<const_block_iterator> blocks() const {
+ return make_range(const_block_iterator(Sections.begin(), Sections.end()),
+ const_block_iterator(Sections.end(), Sections.end()));
+ }
+
iterator_range<external_symbol_iterator> external_symbols() {
return make_range(ExternalSymbols.begin(), ExternalSymbols.end());
}
@@ -830,10 +943,6 @@ public:
const_defined_symbol_iterator(Sections.end(), Sections.end()));
}
- iterator_range<block_iterator> blocks() {
- return make_range(Blocks.begin(), Blocks.end());
- }
-
/// Turn a defined symbol into an external one.
void makeExternal(Symbol &Sym) {
if (Sym.getAddressable().isAbsolute()) {
@@ -881,7 +990,7 @@ public:
/// Remove a block.
void removeBlock(Block &B) {
- Blocks.erase(&B);
+ B.getSection().removeBlock(B);
destroyBlock(B);
}
@@ -902,12 +1011,150 @@ private:
std::string Name;
unsigned PointerSize;
support::endianness Endianness;
- BlockSet Blocks;
SectionList Sections;
ExternalSymbolSet ExternalSymbols;
ExternalSymbolSet AbsoluteSymbols;
};
+/// Enables easy lookup of blocks by addresses.
+class BlockAddressMap {
+public:
+ using AddrToBlockMap = std::map<JITTargetAddress, Block *>;
+ using const_iterator = AddrToBlockMap::const_iterator;
+
+ /// A block predicate that always adds all blocks.
+ static bool includeAllBlocks(const Block &B) { return true; }
+
+ /// A block predicate that always includes blocks with non-null addresses.
+ static bool includeNonNull(const Block &B) { return B.getAddress(); }
+
+ BlockAddressMap() = default;
+
+ /// Add a block to the map. Returns an error if the block overlaps with any
+ /// existing block.
+ template <typename PredFn = decltype(includeAllBlocks)>
+ Error addBlock(Block &B, PredFn Pred = includeAllBlocks) {
+ if (!Pred(B))
+ return Error::success();
+
+ auto I = AddrToBlock.upper_bound(B.getAddress());
+
+ // If we're not at the end of the map, check for overlap with the next
+ // element.
+ if (I != AddrToBlock.end()) {
+ if (B.getAddress() + B.getSize() > I->second->getAddress())
+ return overlapError(B, *I->second);
+ }
+
+ // If we're not at the start of the map, check for overlap with the previous
+ // element.
+ if (I != AddrToBlock.begin()) {
+ auto &PrevBlock = *std::prev(I)->second;
+ if (PrevBlock.getAddress() + PrevBlock.getSize() > B.getAddress())
+ return overlapError(B, PrevBlock);
+ }
+
+ AddrToBlock.insert(I, std::make_pair(B.getAddress(), &B));
+ return Error::success();
+ }
+
+ /// Add a block to the map without checking for overlap with existing blocks.
+ /// The client is responsible for ensuring that the block added does not
+ /// overlap with any existing block.
+ void addBlockWithoutChecking(Block &B) { AddrToBlock[B.getAddress()] = &B; }
+
+ /// Add a range of blocks to the map. Returns an error if any block in the
+ /// range overlaps with any other block in the range, or with any existing
+ /// block in the map.
+ template <typename BlockPtrRange,
+ typename PredFn = decltype(includeAllBlocks)>
+ Error addBlocks(BlockPtrRange &&Blocks, PredFn Pred = includeAllBlocks) {
+ for (auto *B : Blocks)
+ if (auto Err = addBlock(*B, Pred))
+ return Err;
+ return Error::success();
+ }
+
+ /// Add a range of blocks to the map without checking for overlap with
+ /// existing blocks. The client is responsible for ensuring that the block
+ /// added does not overlap with any existing block.
+ template <typename BlockPtrRange>
+ void addBlocksWithoutChecking(BlockPtrRange &&Blocks) {
+ for (auto *B : Blocks)
+ addBlockWithoutChecking(*B);
+ }
+
+ /// Iterates over (Address, Block*) pairs in ascending order of address.
+ const_iterator begin() const { return AddrToBlock.begin(); }
+ const_iterator end() const { return AddrToBlock.end(); }
+
+ /// Returns the block starting at the given address, or nullptr if no such
+ /// block exists.
+ Block *getBlockAt(JITTargetAddress Addr) const {
+ auto I = AddrToBlock.find(Addr);
+ if (I == AddrToBlock.end())
+ return nullptr;
+ return I->second;
+ }
+
+ /// Returns the block covering the given address, or nullptr if no such block
+ /// exists.
+ Block *getBlockCovering(JITTargetAddress Addr) const {
+ auto I = AddrToBlock.upper_bound(Addr);
+ if (I == AddrToBlock.begin())
+ return nullptr;
+ auto *B = std::prev(I)->second;
+ if (Addr < B->getAddress() + B->getSize())
+ return B;
+ return nullptr;
+ }
+
+private:
+ Error overlapError(Block &NewBlock, Block &ExistingBlock) {
+ auto NewBlockEnd = NewBlock.getAddress() + NewBlock.getSize();
+ auto ExistingBlockEnd =
+ ExistingBlock.getAddress() + ExistingBlock.getSize();
+ return make_error<JITLinkError>(
+ "Block at " +
+ formatv("{0:x16} -- {1:x16}", NewBlock.getAddress(), NewBlockEnd) +
+ " overlaps " +
+ formatv("{0:x16} -- {1:x16}", ExistingBlock.getAddress(),
+ ExistingBlockEnd));
+ }
+
+ AddrToBlockMap AddrToBlock;
+};
+
+/// A map of addresses to Symbols.
+class SymbolAddressMap {
+public:
+ using SymbolVector = SmallVector<Symbol *, 1>;
+
+ /// Add a symbol to the SymbolAddressMap.
+ void addSymbol(Symbol &Sym) {
+ AddrToSymbols[Sym.getAddress()].push_back(&Sym);
+ }
+
+ /// Add all symbols in a given range to the SymbolAddressMap.
+ template <typename SymbolPtrCollection>
+ void addSymbols(SymbolPtrCollection &&Symbols) {
+ for (auto *Sym : Symbols)
+ addSymbol(*Sym);
+ }
+
+ /// Returns the list of symbols that start at the given address, or nullptr if
+ /// no such symbols exist.
+ const SymbolVector *getSymbolsAt(JITTargetAddress Addr) const {
+ auto I = AddrToSymbols.find(Addr);
+ if (I == AddrToSymbols.end())
+ return nullptr;
+ return &I->second;
+ }
+
+private:
+ std::map<JITTargetAddress, SymbolVector> AddrToSymbols;
+};
+
/// A function for mutating LinkGraphs.
using LinkGraphPassFunction = std::function<Error(LinkGraph &)>;
@@ -943,6 +1190,14 @@ struct PassConfiguration {
LinkGraphPassList PostFixupPasses;
};
+/// Flags for symbol lookup.
+///
+/// FIXME: These basically duplicate orc::SymbolLookupFlags -- We should merge
+/// the two types once we have an OrcSupport library.
+enum class SymbolLookupFlags { RequiredSymbol, WeaklyReferencedSymbol };
+
+raw_ostream &operator<<(raw_ostream &OS, const SymbolLookupFlags &LF);
+
/// A map of symbol names to resolved addresses.
using AsyncLookupResult = DenseMap<StringRef, JITEvaluatedSymbol>;
@@ -977,6 +1232,8 @@ createLookupContinuation(Continuation Cont) {
/// Holds context for a single jitLink invocation.
class JITLinkContext {
public:
+ using LookupMap = DenseMap<StringRef, SymbolLookupFlags>;
+
/// Destroy a JITLinkContext.
virtual ~JITLinkContext();
@@ -994,7 +1251,7 @@ public:
/// Called by JITLink to resolve external symbols. This method is passed a
/// lookup continutation which it must call with a result to continue the
/// linking process.
- virtual void lookup(const DenseSet<StringRef> &Symbols,
+ virtual void lookup(const LookupMap &Symbols,
std::unique_ptr<JITLinkAsyncLookupContinuation> LC) = 0;
/// Called by JITLink once all defined symbols in the graph have been assigned
diff --git a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/JITSymbol.h b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/JITSymbol.h
index c0f1ca4b9876..7a2a6cfa5203 100644
--- a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/JITSymbol.h
+++ b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/JITSymbol.h
@@ -41,6 +41,11 @@ class SymbolRef;
using JITTargetAddress = uint64_t;
/// Convert a JITTargetAddress to a pointer.
+///
+/// Note: This is a raw cast of the address bit pattern to the given pointer
+/// type. When casting to a function pointer in order to execute JIT'd code
+/// jitTargetAddressToFunction should be preferred, as it will also perform
+/// pointer signing on targets that require it.
template <typename T> T jitTargetAddressToPointer(JITTargetAddress Addr) {
static_assert(std::is_pointer<T>::value, "T must be a pointer type");
uintptr_t IntPtr = static_cast<uintptr_t>(Addr);
@@ -48,6 +53,19 @@ template <typename T> T jitTargetAddressToPointer(JITTargetAddress Addr) {
return reinterpret_cast<T>(IntPtr);
}
+/// Convert a JITTargetAddress to a callable function pointer.
+///
+/// Casts the given address to a callable function pointer. This operation
+/// will perform pointer signing for platforms that require it (e.g. arm64e).
+template <typename T> T jitTargetAddressToFunction(JITTargetAddress Addr) {
+ static_assert(
+ std::is_pointer<T>::value &&
+ std::is_function<typename std::remove_pointer<T>::type>::value,
+ "T must be a function pointer type");
+ return jitTargetAddressToPointer<T>(Addr);
+}
+
+/// Convert a pointer to a JITTargetAddress.
template <typename T> JITTargetAddress pointerToJITTargetAddress(T *Ptr) {
return static_cast<JITTargetAddress>(reinterpret_cast<uintptr_t>(Ptr));
}
diff --git a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/Core.h b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/Core.h
index 4f22a4c38796..d0a9ca5c0580 100644
--- a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/Core.h
+++ b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/Core.h
@@ -45,8 +45,11 @@ using VModuleKey = uint64_t;
// efficiency).
using SymbolNameSet = DenseSet<SymbolStringPtr>;
+/// A vector of symbol names.
+using SymbolNameVector = std::vector<SymbolStringPtr>;
+
/// A map from symbol names (as SymbolStringPtrs) to JITSymbols
-/// (address/flags pairs).
+/// (address/flags pairs).
using SymbolMap = DenseMap<SymbolStringPtr, JITEvaluatedSymbol>;
/// A map from symbol names (as SymbolStringPtrs) to JITSymbolFlags.
@@ -55,8 +58,244 @@ using SymbolFlagsMap = DenseMap<SymbolStringPtr, JITSymbolFlags>;
/// A map from JITDylibs to sets of symbols.
using SymbolDependenceMap = DenseMap<JITDylib *, SymbolNameSet>;
-/// A list of (JITDylib*, bool) pairs.
-using JITDylibSearchList = std::vector<std::pair<JITDylib *, bool>>;
+/// Lookup flags that apply to each dylib in the search order for a lookup.
+///
+/// If MatchHiddenSymbolsOnly is used (the default) for a given dylib, then
+/// only symbols in that Dylib's interface will be searched. If
+/// MatchHiddenSymbols is used then symbols with hidden visibility will match
+/// as well.
+enum class JITDylibLookupFlags { MatchExportedSymbolsOnly, MatchAllSymbols };
+
+/// Lookup flags that apply to each symbol in a lookup.
+///
+/// If RequiredSymbol is used (the default) for a given symbol then that symbol
+/// must be found during the lookup or the lookup will fail returning a
+/// SymbolNotFound error. If WeaklyReferencedSymbol is used and the given
+/// symbol is not found then the query will continue, and no result for the
+/// missing symbol will be present in the result (assuming the rest of the
+/// lookup succeeds).
+enum class SymbolLookupFlags { RequiredSymbol, WeaklyReferencedSymbol };
+
+/// Describes the kind of lookup being performed. The lookup kind is passed to
+/// symbol generators (if they're invoked) to help them determine what
+/// definitions to generate.
+///
+/// Static -- Lookup is being performed as-if at static link time (e.g.
+/// generators representing static archives should pull in new
+/// definitions).
+///
+/// DLSym -- Lookup is being performed as-if at runtime (e.g. generators
+/// representing static archives should not pull in new definitions).
+enum class LookupKind { Static, DLSym };
+
+/// A list of (JITDylib*, JITDylibLookupFlags) pairs to be used as a search
+/// order during symbol lookup.
+using JITDylibSearchOrder =
+ std::vector<std::pair<JITDylib *, JITDylibLookupFlags>>;
+
+/// Convenience function for creating a search order from an ArrayRef of
+/// JITDylib*, all with the same flags.
+inline JITDylibSearchOrder makeJITDylibSearchOrder(
+ ArrayRef<JITDylib *> JDs,
+ JITDylibLookupFlags Flags = JITDylibLookupFlags::MatchExportedSymbolsOnly) {
+ JITDylibSearchOrder O;
+ O.reserve(JDs.size());
+ for (auto *JD : JDs)
+ O.push_back(std::make_pair(JD, Flags));
+ return O;
+}
+
+/// A set of symbols to look up, each associated with a SymbolLookupFlags
+/// value.
+///
+/// This class is backed by a vector and optimized for fast insertion,
+/// deletion and iteration. It does not guarantee a stable order between
+/// operations, and will not automatically detect duplicate elements (they
+/// can be manually checked by calling the validate method).
+class SymbolLookupSet {
+public:
+ using value_type = std::pair<SymbolStringPtr, SymbolLookupFlags>;
+ using UnderlyingVector = std::vector<value_type>;
+ using iterator = UnderlyingVector::iterator;
+ using const_iterator = UnderlyingVector::const_iterator;
+
+ SymbolLookupSet() = default;
+
+ explicit SymbolLookupSet(
+ SymbolStringPtr Name,
+ SymbolLookupFlags Flags = SymbolLookupFlags::RequiredSymbol) {
+ add(std::move(Name), Flags);
+ }
+
+ /// Construct a SymbolLookupSet from an initializer list of SymbolStringPtrs.
+ explicit SymbolLookupSet(
+ std::initializer_list<SymbolStringPtr> Names,
+ SymbolLookupFlags Flags = SymbolLookupFlags::RequiredSymbol) {
+ Symbols.reserve(Names.size());
+ for (auto &Name : Names)
+ add(std::move(Name), Flags);
+ }
+
+ /// Construct a SymbolLookupSet from a SymbolNameSet with the given
+ /// Flags used for each value.
+ explicit SymbolLookupSet(
+ const SymbolNameSet &Names,
+ SymbolLookupFlags Flags = SymbolLookupFlags::RequiredSymbol) {
+ Symbols.reserve(Names.size());
+ for (const auto &Name : Names)
+ add(Name, Flags);
+ }
+
+ /// Construct a SymbolLookupSet from a vector of symbols with the given Flags
+ /// used for each value.
+ /// If the ArrayRef contains duplicates it is up to the client to remove these
+ /// before using this instance for lookup.
+ explicit SymbolLookupSet(
+ ArrayRef<SymbolStringPtr> Names,
+ SymbolLookupFlags Flags = SymbolLookupFlags::RequiredSymbol) {
+ Symbols.reserve(Names.size());
+ for (const auto &Name : Names)
+ add(Name, Flags);
+ }
+
+ /// Add an element to the set. The client is responsible for checking that
+ /// duplicates are not added.
+ void add(SymbolStringPtr Name,
+ SymbolLookupFlags Flags = SymbolLookupFlags::RequiredSymbol) {
+ Symbols.push_back(std::make_pair(std::move(Name), Flags));
+ }
+
+ bool empty() const { return Symbols.empty(); }
+ UnderlyingVector::size_type size() const { return Symbols.size(); }
+ iterator begin() { return Symbols.begin(); }
+ iterator end() { return Symbols.end(); }
+ const_iterator begin() const { return Symbols.begin(); }
+ const_iterator end() const { return Symbols.end(); }
+
+ /// Removes the Ith element of the vector, replacing it with the last element.
+ void remove(UnderlyingVector::size_type I) {
+ std::swap(Symbols[I], Symbols.back());
+ Symbols.pop_back();
+ }
+
+ /// Removes the element pointed to by the given iterator. This iterator and
+ /// all subsequent ones (including end()) are invalidated.
+ void remove(iterator I) { remove(I - begin()); }
+
+ /// Removes all elements matching the given predicate, which must be callable
+ /// as bool(const SymbolStringPtr &, SymbolLookupFlags Flags).
+ template <typename PredFn> void remove_if(PredFn &&Pred) {
+ UnderlyingVector::size_type I = 0;
+ while (I != Symbols.size()) {
+ const auto &Name = Symbols[I].first;
+ auto Flags = Symbols[I].second;
+ if (Pred(Name, Flags))
+ remove(I);
+ else
+ ++I;
+ }
+ }
+
+ /// Loop over the elements of this SymbolLookupSet, applying the Body function
+ /// to each one. Body must be callable as
+ /// bool(const SymbolStringPtr &, SymbolLookupFlags).
+ /// If Body returns true then the element just passed in is removed from the
+ /// set. If Body returns false then the element is retained.
+ template <typename BodyFn>
+ auto forEachWithRemoval(BodyFn &&Body) -> typename std::enable_if<
+ std::is_same<decltype(Body(std::declval<const SymbolStringPtr &>(),
+ std::declval<SymbolLookupFlags>())),
+ bool>::value>::type {
+ UnderlyingVector::size_type I = 0;
+ while (I != Symbols.size()) {
+ const auto &Name = Symbols[I].first;
+ auto Flags = Symbols[I].second;
+ if (Body(Name, Flags))
+ remove(I);
+ else
+ ++I;
+ }
+ }
+
+ /// Loop over the elements of this SymbolLookupSet, applying the Body function
+ /// to each one. Body must be callable as
+ /// Expected<bool>(const SymbolStringPtr &, SymbolLookupFlags).
+ /// If Body returns a failure value, the loop exits immediately. If Body
+ /// returns true then the element just passed in is removed from the set. If
+ /// Body returns false then the element is retained.
+ template <typename BodyFn>
+ auto forEachWithRemoval(BodyFn &&Body) -> typename std::enable_if<
+ std::is_same<decltype(Body(std::declval<const SymbolStringPtr &>(),
+ std::declval<SymbolLookupFlags>())),
+ Expected<bool>>::value,
+ Error>::type {
+ UnderlyingVector::size_type I = 0;
+ while (I != Symbols.size()) {
+ const auto &Name = Symbols[I].first;
+ auto Flags = Symbols[I].second;
+ auto Remove = Body(Name, Flags);
+ if (!Remove)
+ return Remove.takeError();
+ if (*Remove)
+ remove(I);
+ else
+ ++I;
+ }
+ return Error::success();
+ }
+
+ /// Construct a SymbolNameVector from this instance by dropping the Flags
+ /// values.
+ SymbolNameVector getSymbolNames() const {
+ SymbolNameVector Names;
+ Names.reserve(Symbols.size());
+ for (auto &KV : Symbols)
+ Names.push_back(KV.first);
+ return Names;
+ }
+
+ /// Sort the lookup set by pointer value. This sort is fast but sensitive to
+ /// allocation order and so should not be used where a consistent order is
+ /// required.
+ void sortByAddress() {
+ llvm::sort(Symbols, [](const value_type &LHS, const value_type &RHS) {
+ return LHS.first < RHS.first;
+ });
+ }
+
+ /// Sort the lookup set lexicographically. This sort is slow but the order
+ /// is unaffected by allocation order.
+ void sortByName() {
+ llvm::sort(Symbols, [](const value_type &LHS, const value_type &RHS) {
+ return *LHS.first < *RHS.first;
+ });
+ }
+
+ /// Remove any duplicate elements. If a SymbolLookupSet is not duplicate-free
+ /// by construction, this method can be used to turn it into a proper set.
+ void removeDuplicates() {
+ sortByAddress();
+ auto LastI = std::unique(Symbols.begin(), Symbols.end());
+ Symbols.erase(LastI, Symbols.end());
+ }
+
+#ifndef NDEBUG
+ /// Returns true if this set contains any duplicates. This should only be used
+ /// in assertions.
+ bool containsDuplicates() {
+ if (Symbols.size() < 2)
+ return false;
+ sortByAddress();
+ for (UnderlyingVector::size_type I = 1; I != Symbols.size(); ++I)
+ if (Symbols[I].first == Symbols[I - 1].first)
+ return true;
+ return true;
+ }
+#endif
+
+private:
+ UnderlyingVector Symbols;
+};
struct SymbolAliasMapEntry {
SymbolAliasMapEntry() = default;
@@ -76,6 +315,9 @@ raw_ostream &operator<<(raw_ostream &OS, const SymbolStringPtr &Sym);
/// Render a SymbolNameSet.
raw_ostream &operator<<(raw_ostream &OS, const SymbolNameSet &Symbols);
+/// Render a SymbolNameVector.
+raw_ostream &operator<<(raw_ostream &OS, const SymbolNameVector &Symbols);
+
/// Render a SymbolFlagsMap entry.
raw_ostream &operator<<(raw_ostream &OS, const SymbolFlagsMap::value_type &KV);
@@ -98,8 +340,25 @@ raw_ostream &operator<<(raw_ostream &OS, const SymbolDependenceMap &Deps);
/// Render a MaterializationUnit.
raw_ostream &operator<<(raw_ostream &OS, const MaterializationUnit &MU);
-/// Render a JITDylibSearchList.
-raw_ostream &operator<<(raw_ostream &OS, const JITDylibSearchList &JDs);
+//// Render a JITDylibLookupFlags instance.
+raw_ostream &operator<<(raw_ostream &OS,
+ const JITDylibLookupFlags &JDLookupFlags);
+
+/// Rendar a SymbolLookupFlags instance.
+raw_ostream &operator<<(raw_ostream &OS, const SymbolLookupFlags &LookupFlags);
+
+/// Render a JITDylibLookupFlags instance.
+raw_ostream &operator<<(raw_ostream &OS, const LookupKind &K);
+
+/// Render a SymbolLookupSet entry.
+raw_ostream &operator<<(raw_ostream &OS, const SymbolLookupSet::value_type &KV);
+
+/// Render a SymbolLookupSet.
+raw_ostream &operator<<(raw_ostream &OS, const SymbolLookupSet &LookupSet);
+
+/// Render a JITDylibSearchOrder.
+raw_ostream &operator<<(raw_ostream &OS,
+ const JITDylibSearchOrder &SearchOrder);
/// Render a SymbolAliasMap.
raw_ostream &operator<<(raw_ostream &OS, const SymbolAliasMap &Aliases);
@@ -107,6 +366,9 @@ raw_ostream &operator<<(raw_ostream &OS, const SymbolAliasMap &Aliases);
/// Render a SymbolState.
raw_ostream &operator<<(raw_ostream &OS, const SymbolState &S);
+/// Render a LookupKind.
+raw_ostream &operator<<(raw_ostream &OS, const LookupKind &K);
+
/// Callback to notify client that symbols have been resolved.
using SymbolsResolvedCallback = unique_function<void(Expected<SymbolMap>)>;
@@ -139,12 +401,13 @@ public:
static char ID;
SymbolsNotFound(SymbolNameSet Symbols);
+ SymbolsNotFound(SymbolNameVector Symbols);
std::error_code convertToErrorCode() const override;
void log(raw_ostream &OS) const override;
- const SymbolNameSet &getSymbols() const { return Symbols; }
+ const SymbolNameVector &getSymbols() const { return Symbols; }
private:
- SymbolNameSet Symbols;
+ SymbolNameVector Symbols;
};
/// Used to notify clients that a set of symbols could not be removed.
@@ -376,7 +639,8 @@ public:
/// Note: Care must be taken that no sets of aliases form a cycle, as such
/// a cycle will result in a deadlock when any symbol in the cycle is
/// resolved.
- ReExportsMaterializationUnit(JITDylib *SourceJD, bool MatchNonExported,
+ ReExportsMaterializationUnit(JITDylib *SourceJD,
+ JITDylibLookupFlags SourceJDLookupFlags,
SymbolAliasMap Aliases, VModuleKey K);
StringRef getName() const override;
@@ -387,7 +651,7 @@ private:
static SymbolFlagsMap extractFlags(const SymbolAliasMap &Aliases);
JITDylib *SourceJD = nullptr;
- bool MatchNonExported = false;
+ JITDylibLookupFlags SourceJDLookupFlags;
SymbolAliasMap Aliases;
};
@@ -405,25 +669,26 @@ private:
inline std::unique_ptr<ReExportsMaterializationUnit>
symbolAliases(SymbolAliasMap Aliases, VModuleKey K = VModuleKey()) {
return std::make_unique<ReExportsMaterializationUnit>(
- nullptr, true, std::move(Aliases), std::move(K));
+ nullptr, JITDylibLookupFlags::MatchAllSymbols, std::move(Aliases),
+ std::move(K));
}
/// Create a materialization unit for re-exporting symbols from another JITDylib
/// with alternative names/flags.
-/// If MatchNonExported is true then non-exported symbols from SourceJD can be
-/// re-exported. If it is false, attempts to re-export a non-exported symbol
-/// will result in a "symbol not found" error.
+/// SourceJD will be searched using the given JITDylibLookupFlags.
inline std::unique_ptr<ReExportsMaterializationUnit>
reexports(JITDylib &SourceJD, SymbolAliasMap Aliases,
- bool MatchNonExported = false, VModuleKey K = VModuleKey()) {
+ JITDylibLookupFlags SourceJDLookupFlags =
+ JITDylibLookupFlags::MatchExportedSymbolsOnly,
+ VModuleKey K = VModuleKey()) {
return std::make_unique<ReExportsMaterializationUnit>(
- &SourceJD, MatchNonExported, std::move(Aliases), std::move(K));
+ &SourceJD, SourceJDLookupFlags, std::move(Aliases), std::move(K));
}
/// Build a SymbolAliasMap for the common case where you want to re-export
/// symbols from another JITDylib with the same linkage/flags.
Expected<SymbolAliasMap>
-buildSimpleReexportsAliasMap(JITDylib &SourceJD, const SymbolNameSet &Symbols);
+buildSimpleReexportsAAliasMap(JITDylib &SourceJD, const SymbolNameSet &Symbols);
/// Represents the state that a symbol has reached during materialization.
enum class SymbolState : uint8_t {
@@ -448,7 +713,7 @@ public:
/// Create a query for the given symbols. The NotifyComplete
/// callback will be called once all queried symbols reach the given
/// minimum state.
- AsynchronousSymbolQuery(const SymbolNameSet &Symbols,
+ AsynchronousSymbolQuery(const SymbolLookupSet &Symbols,
SymbolState RequiredState,
SymbolsResolvedCallback NotifyComplete);
@@ -456,6 +721,15 @@ public:
void notifySymbolMetRequiredState(const SymbolStringPtr &Name,
JITEvaluatedSymbol Sym);
+ /// Remove a symbol from the query. This is used to drop weakly referenced
+ /// symbols that are not found.
+ void dropSymbol(const SymbolStringPtr &Name) {
+ assert(ResolvedSymbols.count(Name) &&
+ "Redundant removal of weakly-referenced symbol");
+ ResolvedSymbols.erase(Name);
+ --OutstandingSymbolsCount;
+ }
+
/// Returns true if all symbols covered by this query have been
/// resolved.
bool isComplete() const { return OutstandingSymbolsCount == 0; }
@@ -497,11 +771,21 @@ class JITDylib {
friend class ExecutionSession;
friend class MaterializationResponsibility;
public:
+ /// Definition generators can be attached to JITDylibs to generate new
+ /// definitions for otherwise unresolved symbols during lookup.
class DefinitionGenerator {
public:
virtual ~DefinitionGenerator();
- virtual Expected<SymbolNameSet>
- tryToGenerate(JITDylib &Parent, const SymbolNameSet &Names) = 0;
+
+ /// DefinitionGenerators should override this method to insert new
+ /// definitions into the parent JITDylib. K specifies the kind of this
+ /// lookup. JD specifies the target JITDylib being searched, and
+ /// JDLookupFlags specifies whether the search should match against
+ /// hidden symbols. Finally, Symbols describes the set of unresolved
+ /// symbols and their associated lookup flags.
+ virtual Error tryToGenerate(LookupKind K, JITDylib &JD,
+ JITDylibLookupFlags JDLookupFlags,
+ const SymbolLookupSet &LookupSet) = 0;
};
using AsynchronousSymbolQuerySet =
@@ -552,18 +836,20 @@ public:
/// as the first in the search order (instead of this dylib) ensures that
/// definitions within this dylib resolve to the lazy-compiling stubs,
/// rather than immediately materializing the definitions in this dylib.
- void setSearchOrder(JITDylibSearchList NewSearchOrder,
- bool SearchThisJITDylibFirst = true,
- bool MatchNonExportedInThisDylib = true);
+ void setSearchOrder(JITDylibSearchOrder NewSearchOrder,
+ bool SearchThisJITDylibFirst = true);
/// Add the given JITDylib to the search order for definitions in this
/// JITDylib.
- void addToSearchOrder(JITDylib &JD, bool MatcNonExported = false);
+ void addToSearchOrder(JITDylib &JD,
+ JITDylibLookupFlags JDLookupFlags =
+ JITDylibLookupFlags::MatchExportedSymbolsOnly);
/// Replace OldJD with NewJD in the search order if OldJD is present.
/// Otherwise this operation is a no-op.
void replaceInSearchOrder(JITDylib &OldJD, JITDylib &NewJD,
- bool MatchNonExported = false);
+ JITDylibLookupFlags JDLookupFlags =
+ JITDylibLookupFlags::MatchExportedSymbolsOnly);
/// Remove the given JITDylib from the search order for this JITDylib if it is
/// present. Otherwise this operation is a no-op.
@@ -572,7 +858,7 @@ public:
/// Do something with the search order (run under the session lock).
template <typename Func>
auto withSearchOrderDo(Func &&F)
- -> decltype(F(std::declval<const JITDylibSearchList &>()));
+ -> decltype(F(std::declval<const JITDylibSearchOrder &>()));
/// Define all symbols provided by the materialization unit to be part of this
/// JITDylib.
@@ -605,8 +891,11 @@ public:
Error remove(const SymbolNameSet &Names);
/// Search the given JITDylib for the symbols in Symbols. If found, store
- /// the flags for each symbol in Flags. Returns any unresolved symbols.
- Expected<SymbolFlagsMap> lookupFlags(const SymbolNameSet &Names);
+ /// the flags for each symbol in Flags. If any required symbols are not found
+ /// then an error will be returned.
+ Expected<SymbolFlagsMap> lookupFlags(LookupKind K,
+ JITDylibLookupFlags JDLookupFlags,
+ SymbolLookupSet LookupSet);
/// Dump current JITDylib state to OS.
void dump(raw_ostream &OS);
@@ -709,20 +998,23 @@ private:
Error defineImpl(MaterializationUnit &MU);
- Expected<SymbolNameSet> lookupFlagsImpl(SymbolFlagsMap &Flags,
- const SymbolNameSet &Names);
+ void lookupFlagsImpl(SymbolFlagsMap &Result, LookupKind K,
+ JITDylibLookupFlags JDLookupFlags,
+ SymbolLookupSet &Unresolved);
- Error lodgeQuery(std::shared_ptr<AsynchronousSymbolQuery> &Q,
- SymbolNameSet &Unresolved, bool MatchNonExported,
- MaterializationUnitList &MUs);
+ Error lodgeQuery(MaterializationUnitList &MUs,
+ std::shared_ptr<AsynchronousSymbolQuery> &Q, LookupKind K,
+ JITDylibLookupFlags JDLookupFlags,
+ SymbolLookupSet &Unresolved);
- Error lodgeQueryImpl(std::shared_ptr<AsynchronousSymbolQuery> &Q,
- SymbolNameSet &Unresolved, bool MatchNonExported,
- MaterializationUnitList &MUs);
+ Error lodgeQueryImpl(MaterializationUnitList &MUs,
+ std::shared_ptr<AsynchronousSymbolQuery> &Q,
+ LookupKind K, JITDylibLookupFlags JDLookupFlags,
+ SymbolLookupSet &Unresolved);
bool lookupImpl(std::shared_ptr<AsynchronousSymbolQuery> &Q,
std::vector<std::unique_ptr<MaterializationUnit>> &MUs,
- SymbolNameSet &Unresolved);
+ SymbolLookupSet &Unresolved);
void detachQueryHelper(AsynchronousSymbolQuery &Q,
const SymbolNameSet &QuerySymbols);
@@ -754,7 +1046,7 @@ private:
UnmaterializedInfosMap UnmaterializedInfos;
MaterializingInfosMap MaterializingInfos;
std::vector<std::unique_ptr<DefinitionGenerator>> DefGenerators;
- JITDylibSearchList SearchOrder;
+ JITDylibSearchOrder SearchOrder;
};
/// An ExecutionSession represents a running JIT program.
@@ -787,10 +1079,6 @@ public:
return F();
}
- /// Get the "main" JITDylib, which is created automatically on construction of
- /// the ExecutionSession.
- JITDylib &getMainJITDylib();
-
/// Return a pointer to the "name" JITDylib.
/// Ownership of JITDylib remains within Execution Session
JITDylib *getJITDylibByName(StringRef Name);
@@ -800,8 +1088,7 @@ public:
/// The JITDylib Name is required to be unique. Clients should verify that
/// names are not being re-used (e.g. by calling getJITDylibByName) if names
/// are based on user input.
- JITDylib &createJITDylib(std::string Name,
- bool AddToMainDylibSearchOrder = true);
+ JITDylib &createJITDylib(std::string Name);
/// Allocate a module key for a new module to add to the JIT.
VModuleKey allocateVModule() {
@@ -863,8 +1150,9 @@ public:
/// dependenant symbols for this query (e.g. it is being made by a top level
/// client to get an address to call) then the value NoDependenciesToRegister
/// can be used.
- void lookup(const JITDylibSearchList &SearchOrder, SymbolNameSet Symbols,
- SymbolState RequiredState, SymbolsResolvedCallback NotifyComplete,
+ void lookup(LookupKind K, const JITDylibSearchOrder &SearchOrder,
+ SymbolLookupSet Symbols, SymbolState RequiredState,
+ SymbolsResolvedCallback NotifyComplete,
RegisterDependenciesFunction RegisterDependencies);
/// Blocking version of lookup above. Returns the resolved symbol map.
@@ -874,8 +1162,9 @@ public:
/// or an error occurs. If WaitUntilReady is false and an error occurs
/// after resolution, the function will return a success value, but the
/// error will be reported via reportErrors.
- Expected<SymbolMap> lookup(const JITDylibSearchList &SearchOrder,
- const SymbolNameSet &Symbols,
+ Expected<SymbolMap> lookup(const JITDylibSearchOrder &SearchOrder,
+ const SymbolLookupSet &Symbols,
+ LookupKind K = LookupKind::Static,
SymbolState RequiredState = SymbolState::Ready,
RegisterDependenciesFunction RegisterDependencies =
NoDependenciesToRegister);
@@ -883,7 +1172,7 @@ public:
/// Convenience version of blocking lookup.
/// Searches each of the JITDylibs in the search order in turn for the given
/// symbol.
- Expected<JITEvaluatedSymbol> lookup(const JITDylibSearchList &SearchOrder,
+ Expected<JITEvaluatedSymbol> lookup(const JITDylibSearchOrder &SearchOrder,
SymbolStringPtr Symbol);
/// Convenience version of blocking lookup.
@@ -951,7 +1240,7 @@ GeneratorT &JITDylib::addGenerator(std::unique_ptr<GeneratorT> DefGenerator) {
template <typename Func>
auto JITDylib::withSearchOrderDo(Func &&F)
- -> decltype(F(std::declval<const JITDylibSearchList &>())) {
+ -> decltype(F(std::declval<const JITDylibSearchOrder &>())) {
return ES.runSessionLocked([&]() { return F(SearchOrder); });
}
@@ -988,7 +1277,7 @@ Error JITDylib::define(std::unique_ptr<MaterializationUnitType> &MU) {
});
}
-/// ReexportsGenerator can be used with JITDylib::setGenerator to automatically
+/// ReexportsGenerator can be used with JITDylib::addGenerator to automatically
/// re-export a subset of the source JITDylib's symbols in the target.
class ReexportsGenerator : public JITDylib::DefinitionGenerator {
public:
@@ -997,15 +1286,17 @@ public:
/// Create a reexports generator. If an Allow predicate is passed, only
/// symbols for which the predicate returns true will be reexported. If no
/// Allow predicate is passed, all symbols will be exported.
- ReexportsGenerator(JITDylib &SourceJD, bool MatchNonExported = false,
+ ReexportsGenerator(JITDylib &SourceJD,
+ JITDylibLookupFlags SourceJDLookupFlags,
SymbolPredicate Allow = SymbolPredicate());
- Expected<SymbolNameSet> tryToGenerate(JITDylib &JD,
- const SymbolNameSet &Names) override;
+ Error tryToGenerate(LookupKind K, JITDylib &JD,
+ JITDylibLookupFlags JDLookupFlags,
+ const SymbolLookupSet &LookupSet) override;
private:
JITDylib &SourceJD;
- bool MatchNonExported = false;
+ JITDylibLookupFlags SourceJDLookupFlags;
SymbolPredicate Allow;
};
diff --git a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/DebugUtils.h b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/DebugUtils.h
new file mode 100644
index 000000000000..b2ef29d65ffe
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/DebugUtils.h
@@ -0,0 +1,58 @@
+//===----- DebugUtils.h - Utilities for debugging ORC JITs ------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Utilities for debugging ORC-based JITs.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_DEBUGUTILS_H
+#define LLVM_EXECUTIONENGINE_ORC_DEBUGUTILS_H
+
+#include "llvm/Support/Error.h"
+#include <memory>
+#include <string>
+
+namespace llvm {
+
+class MemoryBuffer;
+
+namespace orc {
+
+/// A function object that can be used as an ObjectTransformLayer transform
+/// to dump object files to disk at a specified path.
+class DumpObjects {
+public:
+ /// Construct a DumpObjects transform that will dump objects to disk.
+ ///
+ /// @param DumpDir specifies the path to write dumped objects to. DumpDir may
+ /// be empty, in which case files will be dumped to the working directory. If
+ /// DumpDir is non-empty then any trailing separators will be discarded.
+ ///
+ /// @param IdentifierOverride specifies a file name stem to use when dumping
+ /// objects. If empty, each MemoryBuffer's identifier will be used (with a .o
+ /// suffix added if not already present). If an identifier override is
+ /// supplied it will be used instead (since all buffers will use the same
+ /// identifier, the resulting files will be named <ident>.o, <ident>.2.o,
+ /// <ident>.3.o, and so on). IdentifierOverride should not contain an
+ /// extension, as a .o suffix will be added by DumpObjects.
+ DumpObjects(std::string DumpDir = "", std::string IdentifierOverride = "");
+
+ /// Dumps the given buffer to disk.
+ Expected<std::unique_ptr<MemoryBuffer>>
+ operator()(std::unique_ptr<MemoryBuffer> Obj);
+
+private:
+ StringRef getBufferIdentifier(MemoryBuffer &B);
+ std::string DumpDir;
+ std::string IdentifierOverride;
+};
+
+} // End namespace orc
+} // End namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_ORC_DEBUGUTILS_H
diff --git a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h
index cf0a428662ef..f7255c5af845 100644
--- a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h
+++ b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h
@@ -40,6 +40,17 @@ namespace orc {
class ObjectLayer;
+/// Run a main function, returning the result.
+///
+/// If the optional ProgramName argument is given then it will be inserted
+/// before the strings in Args as the first argument to the called function.
+///
+/// It is legal to have an empty argument list and no program name, however
+/// many main functions will expect a name argument at least, and will fail
+/// if none is provided.
+int runAsMain(int (*Main)(int, char *[]), ArrayRef<std::string> Args,
+ Optional<StringRef> ProgramName = None);
+
/// This iterator provides a convenient way to iterate over the elements
/// of an llvm.global_ctors/llvm.global_dtors instance.
///
@@ -242,7 +253,7 @@ public:
/// passes the 'Allow' predicate will be added to the JITDylib.
class DynamicLibrarySearchGenerator : public JITDylib::DefinitionGenerator {
public:
- using SymbolPredicate = std::function<bool(SymbolStringPtr)>;
+ using SymbolPredicate = std::function<bool(const SymbolStringPtr &)>;
/// Create a DynamicLibrarySearchGenerator that searches for symbols in the
/// given sys::DynamicLibrary.
@@ -268,8 +279,9 @@ public:
return Load(nullptr, GlobalPrefix, std::move(Allow));
}
- Expected<SymbolNameSet> tryToGenerate(JITDylib &JD,
- const SymbolNameSet &Names) override;
+ Error tryToGenerate(LookupKind K, JITDylib &JD,
+ JITDylibLookupFlags JDLookupFlags,
+ const SymbolLookupSet &Symbols) override;
private:
sys::DynamicLibrary Dylib;
@@ -292,13 +304,14 @@ public:
Load(ObjectLayer &L, const char *FileName);
/// Try to create a StaticLibrarySearchGenerator from the given memory buffer.
- /// Thhis call will succeed if the buffer contains a valid archive, otherwise
+ /// This call will succeed if the buffer contains a valid archive, otherwise
/// it will return an error.
static Expected<std::unique_ptr<StaticLibraryDefinitionGenerator>>
Create(ObjectLayer &L, std::unique_ptr<MemoryBuffer> ArchiveBuffer);
- Expected<SymbolNameSet> tryToGenerate(JITDylib &JD,
- const SymbolNameSet &Names) override;
+ Error tryToGenerate(LookupKind K, JITDylib &JD,
+ JITDylibLookupFlags JDLookupFlags,
+ const SymbolLookupSet &Symbols) override;
private:
StaticLibraryDefinitionGenerator(ObjectLayer &L,
@@ -307,8 +320,7 @@ private:
ObjectLayer &L;
std::unique_ptr<MemoryBuffer> ArchiveBuffer;
- object::Archive Archive;
- size_t UnrealizedObjects = 0;
+ std::unique_ptr<object::Archive> Archive;
};
} // end namespace orc
diff --git a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h
index a7ed5372d1e4..a9ab3a630a64 100644
--- a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h
+++ b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h
@@ -84,7 +84,7 @@ public:
return std::move(LTP);
}
- /// Get a free trampoline. Returns an error if one can not be provide (e.g.
+ /// Get a free trampoline. Returns an error if one can not be provided (e.g.
/// because the pool is empty and can not be grown).
Expected<JITTargetAddress> getTrampoline() override {
std::lock_guard<std::mutex> Lock(LTPMutex);
diff --git a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h
index bcbd72e68f15..4f2f55770996 100644
--- a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h
+++ b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h
@@ -79,18 +79,30 @@ public:
return *this;
}
+ /// Get the relocation model.
+ const Optional<Reloc::Model> &getRelocationModel() const { return RM; }
+
/// Set the code model.
JITTargetMachineBuilder &setCodeModel(Optional<CodeModel::Model> CM) {
this->CM = std::move(CM);
return *this;
}
+ /// Get the code model.
+ const Optional<CodeModel::Model> &getCodeModel() const { return CM; }
+
/// Set the LLVM CodeGen optimization level.
JITTargetMachineBuilder &setCodeGenOptLevel(CodeGenOpt::Level OptLevel) {
this->OptLevel = OptLevel;
return *this;
}
+ /// Set subtarget features.
+ JITTargetMachineBuilder &setFeatures(StringRef FeatureString) {
+ Features = SubtargetFeatures(FeatureString);
+ return *this;
+ }
+
/// Add subtarget features.
JITTargetMachineBuilder &
addFeatures(const std::vector<std::string> &FeatureVec);
@@ -101,6 +113,17 @@ public:
/// Access subtarget features.
const SubtargetFeatures &getFeatures() const { return Features; }
+ /// Set TargetOptions.
+ ///
+ /// Note: This operation will overwrite any previously configured options,
+ /// including EmulatedTLS and ExplicitEmulatedTLS which
+ /// the JITTargetMachineBuilder sets by default. Clients are responsible
+ /// for re-enabling these overwritten options.
+ JITTargetMachineBuilder &setOptions(TargetOptions Options) {
+ this->Options = std::move(Options);
+ return *this;
+ }
+
/// Access TargetOptions.
TargetOptions &getOptions() { return Options; }
diff --git a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/LLJIT.h b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/LLJIT.h
index b1e47d77557c..c048ff3d5522 100644
--- a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/LLJIT.h
+++ b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/LLJIT.h
@@ -117,6 +117,9 @@ public:
/// Returns a reference to the ObjLinkingLayer
ObjectLayer &getObjLinkingLayer() { return *ObjLinkingLayer; }
+ /// Returns a reference to the object transform layer.
+ ObjectTransformLayer &getObjTransformLayer() { return ObjTransformLayer; }
+
protected:
static std::unique_ptr<ObjectLayer>
createObjectLinkingLayer(LLJITBuilderState &S, ExecutionSession &ES);
@@ -140,6 +143,7 @@ protected:
std::unique_ptr<ThreadPool> CompileThreads;
std::unique_ptr<ObjectLayer> ObjLinkingLayer;
+ ObjectTransformLayer ObjTransformLayer;
std::unique_ptr<IRCompileLayer> CompileLayer;
CtorDtorRunner CtorRunner, DtorRunner;
diff --git a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h
index caf8e707516d..50d25f18891e 100644
--- a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h
+++ b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h
@@ -79,7 +79,7 @@ public:
/// Construct an ObjectLinkingLayer with the given NotifyLoaded,
/// and NotifyEmitted functors.
ObjectLinkingLayer(ExecutionSession &ES,
- jitlink::JITLinkMemoryManager &MemMgr);
+ std::unique_ptr<jitlink::JITLinkMemoryManager> MemMgr);
/// Destruct an ObjectLinkingLayer.
~ObjectLinkingLayer();
@@ -145,7 +145,7 @@ private:
Error removeAllModules();
mutable std::mutex LayerMutex;
- jitlink::JITLinkMemoryManager &MemMgr;
+ std::unique_ptr<jitlink::JITLinkMemoryManager> MemMgr;
bool OverrideObjectFlags = false;
bool AutoClaimObjectSymbols = false;
ReturnObjectBufferFunction ReturnObjectBuffer;
diff --git a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/ObjectTransformLayer.h b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/ObjectTransformLayer.h
index eac1cc3e097a..bf989cc8677c 100644
--- a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/ObjectTransformLayer.h
+++ b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/ObjectTransformLayer.h
@@ -29,11 +29,15 @@ public:
std::unique_ptr<MemoryBuffer>)>;
ObjectTransformLayer(ExecutionSession &ES, ObjectLayer &BaseLayer,
- TransformFunction Transform);
+ TransformFunction Transform = TransformFunction());
void emit(MaterializationResponsibility R,
std::unique_ptr<MemoryBuffer> O) override;
+ void setTransform(TransformFunction Transform) {
+ this->Transform = std::move(Transform);
+ }
+
private:
ObjectLayer &BaseLayer;
TransformFunction Transform;
diff --git a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h
index 38246bc480b6..2e58ddd75d31 100644
--- a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h
+++ b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h
@@ -29,7 +29,7 @@ namespace orc {
/// Generic ORC ABI support.
///
-/// This class can be substituted as the target architecure support class for
+/// This class can be substituted as the target architecture support class for
/// ORC templates that require one (e.g. IndirectStubsManagers). It does not
/// support lazy JITing however, and any attempt to use that functionality
/// will result in execution of an llvm_unreachable.
@@ -71,7 +71,7 @@ public:
};
/// Provide information about stub blocks generated by the
-/// makeIndirectStubsBlock function.
+/// makeIndirectStubsBlock function.
template <unsigned StubSizeVal> class GenericIndirectStubsInfo {
public:
const static unsigned StubSize = StubSizeVal;
@@ -95,13 +95,13 @@ public:
unsigned getNumStubs() const { return NumStubs; }
/// Get a pointer to the stub at the given index, which must be in
- /// the range 0 .. getNumStubs() - 1.
+ /// the range 0 .. getNumStubs() - 1.
void *getStub(unsigned Idx) const {
return static_cast<char *>(StubsMem.base()) + Idx * StubSize;
}
/// Get a pointer to the implementation-pointer at the given index,
- /// which must be in the range 0 .. getNumStubs() - 1.
+ /// which must be in the range 0 .. getNumStubs() - 1.
void **getPtr(unsigned Idx) const {
char *PtrsBase = static_cast<char *>(StubsMem.base()) + NumStubs * StubSize;
return reinterpret_cast<void **>(PtrsBase) + Idx;
@@ -123,19 +123,19 @@ public:
using JITReentryFn = JITTargetAddress (*)(void *CallbackMgr,
void *TrampolineId);
- /// Write the resolver code into the given memory. The user is be
- /// responsible for allocating the memory and setting permissions.
+ /// Write the resolver code into the given memory. The user is
+ /// responsible for allocating the memory and setting permissions.
static void writeResolverCode(uint8_t *ResolveMem, JITReentryFn Reentry,
void *CallbackMgr);
- /// Write the requsted number of trampolines into the given memory,
- /// which must be big enough to hold 1 pointer, plus NumTrampolines
- /// trampolines.
+ /// Write the requested number of trampolines into the given memory,
+ /// which must be big enough to hold 1 pointer, plus NumTrampolines
+ /// trampolines.
static void writeTrampolines(uint8_t *TrampolineMem, void *ResolverAddr,
unsigned NumTrampolines);
/// Emit at least MinStubs worth of indirect call stubs, rounded out to
- /// the nearest page size.
+ /// the nearest page size.
///
/// E.g. Asking for 4 stubs on x86-64, where stubs are 8-bytes, with 4k
/// pages will return a block of 512 stubs (4096 / 8 = 512). Asking for 513
@@ -154,14 +154,14 @@ public:
using IndirectStubsInfo = GenericIndirectStubsInfo<8>;
- /// Write the requsted number of trampolines into the given memory,
- /// which must be big enough to hold 1 pointer, plus NumTrampolines
- /// trampolines.
+ /// Write the requested number of trampolines into the given memory,
+ /// which must be big enough to hold 1 pointer, plus NumTrampolines
+ /// trampolines.
static void writeTrampolines(uint8_t *TrampolineMem, void *ResolverAddr,
unsigned NumTrampolines);
/// Emit at least MinStubs worth of indirect call stubs, rounded out to
- /// the nearest page size.
+ /// the nearest page size.
///
/// E.g. Asking for 4 stubs on x86-64, where stubs are 8-bytes, with 4k
/// pages will return a block of 512 stubs (4096 / 8 = 512). Asking for 513
@@ -180,8 +180,8 @@ public:
using JITReentryFn = JITTargetAddress (*)(void *CallbackMgr,
void *TrampolineId);
- /// Write the resolver code into the given memory. The user is be
- /// responsible for allocating the memory and setting permissions.
+ /// Write the resolver code into the given memory. The user is
+ /// responsible for allocating the memory and setting permissions.
static void writeResolverCode(uint8_t *ResolveMem, JITReentryFn Reentry,
void *CallbackMgr);
};
@@ -196,8 +196,8 @@ public:
using JITReentryFn = JITTargetAddress (*)(void *CallbackMgr,
void *TrampolineId);
- /// Write the resolver code into the given memory. The user is be
- /// responsible for allocating the memory and setting permissions.
+ /// Write the resolver code into the given memory. The user is
+ /// responsible for allocating the memory and setting permissions.
static void writeResolverCode(uint8_t *ResolveMem, JITReentryFn Reentry,
void *CallbackMgr);
};
@@ -216,19 +216,19 @@ public:
using JITReentryFn = JITTargetAddress (*)(void *CallbackMgr,
void *TrampolineId);
- /// Write the resolver code into the given memory. The user is be
- /// responsible for allocating the memory and setting permissions.
+ /// Write the resolver code into the given memory. The user is
+ /// responsible for allocating the memory and setting permissions.
static void writeResolverCode(uint8_t *ResolveMem, JITReentryFn Reentry,
void *CallbackMgr);
- /// Write the requsted number of trampolines into the given memory,
- /// which must be big enough to hold 1 pointer, plus NumTrampolines
- /// trampolines.
+ /// Write the requested number of trampolines into the given memory,
+ /// which must be big enough to hold 1 pointer, plus NumTrampolines
+ /// trampolines.
static void writeTrampolines(uint8_t *TrampolineMem, void *ResolverAddr,
unsigned NumTrampolines);
/// Emit at least MinStubs worth of indirect call stubs, rounded out to
- /// the nearest page size.
+ /// the nearest page size.
///
/// E.g. Asking for 4 stubs on i386, where stubs are 8-bytes, with 4k
/// pages will return a block of 512 stubs (4096 / 8 = 512). Asking for 513
@@ -249,16 +249,16 @@ public:
using JITReentryFn = JITTargetAddress (*)(void *CallbackMgr,
void *TrampolineId);
- /// @brief Write the requsted number of trampolines into the given memory,
- /// which must be big enough to hold 1 pointer, plus NumTrampolines
- /// trampolines.
+ /// Write the requested number of trampolines into the given memory,
+ /// which must be big enough to hold 1 pointer, plus NumTrampolines
+ /// trampolines.
static void writeTrampolines(uint8_t *TrampolineMem, void *ResolverAddr,unsigned NumTrampolines);
- /// @brief Write the resolver code into the given memory. The user is be
- /// responsible for allocating the memory and setting permissions.
+ /// Write the resolver code into the given memory. The user is
+ /// responsible for allocating the memory and setting permissions.
static void writeResolverCode(uint8_t *ResolveMem, JITReentryFn Reentry,void *CallbackMgr, bool isBigEndian);
- /// @brief Emit at least MinStubs worth of indirect call stubs, rounded out to
- /// the nearest page size.
+ /// Emit at least MinStubs worth of indirect call stubs, rounded out to
+ /// the nearest page size.
///
/// E.g. Asking for 4 stubs on Mips32, where stubs are 8-bytes, with 4k
/// pages will return a block of 512 stubs (4096 / 8 = 512). Asking for 513
@@ -291,17 +291,17 @@ public:
using IndirectStubsInfo = GenericIndirectStubsInfo<32>;
using JITReentryFn = JITTargetAddress (*)(void *CallbackMgr,
void *TrampolineId);
- /// @brief Write the resolver code into the given memory. The user is be
- /// responsible for allocating the memory and setting permissions.
+ /// Write the resolver code into the given memory. The user is
+ /// responsible for allocating the memory and setting permissions.
static void writeResolverCode(uint8_t *ResolveMem, JITReentryFn Reentry,void *CallbackMgr);
- /// @brief Write the requsted number of trampolines into the given memory,
- /// which must be big enough to hold 1 pointer, plus NumTrampolines
- /// trampolines.
+ /// Write the requested number of trampolines into the given memory,
+ /// which must be big enough to hold 1 pointer, plus NumTrampolines
+ /// trampolines.
static void writeTrampolines(uint8_t *TrampolineMem, void *ResolverAddr,unsigned NumTrampolines);
- /// @brief Emit at least MinStubs worth of indirect call stubs, rounded out to
- /// the nearest page size.
+ /// Emit at least MinStubs worth of indirect call stubs, rounded out to
+ /// the nearest page size.
///
/// E.g. Asking for 4 stubs on Mips64, where stubs are 8-bytes, with 4k
/// pages will return a block of 512 stubs (4096 / 8 = 512). Asking for 513
diff --git a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/OrcError.h b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/OrcError.h
index e5d6a3eca85f..61e2e49a872a 100644
--- a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/OrcError.h
+++ b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/OrcError.h
@@ -14,6 +14,8 @@
#define LLVM_EXECUTIONENGINE_ORC_ORCERROR_H
#include "llvm/Support/Error.h"
+#include "llvm/Support/raw_ostream.h"
+#include <string>
#include <system_error>
namespace llvm {
diff --git a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h
index e7b598d8f812..3ff5a5f6e90e 100644
--- a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h
+++ b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h
@@ -16,8 +16,8 @@
#define LLVM_EXECUTIONENGINE_ORC_ORCREMOTETARGETRPCAPI_H
#include "llvm/ExecutionEngine/JITSymbol.h"
-#include "llvm/ExecutionEngine/Orc/RPCUtils.h"
-#include "llvm/ExecutionEngine/Orc/RawByteChannel.h"
+#include "llvm/ExecutionEngine/Orc/RPC/RPCUtils.h"
+#include "llvm/ExecutionEngine/Orc/RPC/RawByteChannel.h"
namespace llvm {
namespace orc {
diff --git a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/RPCSerialization.h b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/RPC/RPCSerialization.h
index 752a0a34e0a1..9c69a84f4c67 100644
--- a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/RPCSerialization.h
+++ b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/RPC/RPCSerialization.h
@@ -1,4 +1,4 @@
-//===- llvm/ExecutionEngine/Orc/RPCSerialization.h --------------*- C++ -*-===//
+//===- llvm/ExecutionEngine/Orc/RPC/RPCSerialization.h --------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -9,7 +9,7 @@
#ifndef LLVM_EXECUTIONENGINE_ORC_RPCSERIALIZATION_H
#define LLVM_EXECUTIONENGINE_ORC_RPCSERIALIZATION_H
-#include "OrcError.h"
+#include "llvm/ExecutionEngine/Orc/OrcError.h"
#include "llvm/Support/thread.h"
#include <map>
#include <mutex>
diff --git a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/RPCUtils.h b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/RPC/RPCUtils.h
index ee9c2cc69c30..ed09363dcecc 100644
--- a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/RPCUtils.h
+++ b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/RPC/RPCUtils.h
@@ -23,7 +23,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ExecutionEngine/Orc/OrcError.h"
-#include "llvm/ExecutionEngine/Orc/RPCSerialization.h"
+#include "llvm/ExecutionEngine/Orc/RPC/RPCSerialization.h"
#include "llvm/Support/MSVCErrorWorkarounds.h"
#include <future>
diff --git a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/RawByteChannel.h b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/RPC/RawByteChannel.h
index 46b7c59450e6..50e26f8449df 100644
--- a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/RawByteChannel.h
+++ b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/RPC/RawByteChannel.h
@@ -1,4 +1,4 @@
-//===- llvm/ExecutionEngine/Orc/RawByteChannel.h ----------------*- C++ -*-===//
+//===- llvm/ExecutionEngine/Orc/RPC/RawByteChannel.h ----------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -10,7 +10,7 @@
#define LLVM_EXECUTIONENGINE_ORC_RAWBYTECHANNEL_H
#include "llvm/ADT/StringRef.h"
-#include "llvm/ExecutionEngine/Orc/RPCSerialization.h"
+#include "llvm/ExecutionEngine/Orc/RPC/RPCSerialization.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/Error.h"
#include <cstdint>
diff --git a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h
index c5106cf09ecc..091394795c0b 100644
--- a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h
+++ b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h
@@ -54,6 +54,8 @@ public:
RTDyldObjectLinkingLayer(ExecutionSession &ES,
GetMemoryManagerFunction GetMemoryManager);
+ ~RTDyldObjectLinkingLayer();
+
/// Emit the object.
void emit(MaterializationResponsibility R,
std::unique_ptr<MemoryBuffer> O) override;
diff --git a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/Speculation.h b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/Speculation.h
index 766a6b070f12..f6b86bb23167 100644
--- a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/Speculation.h
+++ b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/Speculation.h
@@ -100,23 +100,27 @@ private:
SymbolsInJD.insert(ImplSymbolName);
}
- DEBUG_WITH_TYPE("orc", for (auto &I
- : SpeculativeLookUpImpls) {
- llvm::dbgs() << "\n In " << I.first->getName() << " JITDylib ";
- for (auto &N : I.second)
- llvm::dbgs() << "\n Likely Symbol : " << N;
+ DEBUG_WITH_TYPE("orc", {
+ for (auto &I : SpeculativeLookUpImpls) {
+ llvm::dbgs() << "\n In " << I.first->getName() << " JITDylib ";
+ for (auto &N : I.second)
+ llvm::dbgs() << "\n Likely Symbol : " << N;
+ }
});
// for a given symbol, there may be no symbol qualified for speculatively
// compile try to fix this before jumping to this code if possible.
for (auto &LookupPair : SpeculativeLookUpImpls)
- ES.lookup(JITDylibSearchList({{LookupPair.first, true}}),
- LookupPair.second, SymbolState::Ready,
- [this](Expected<SymbolMap> Result) {
- if (auto Err = Result.takeError())
- ES.reportError(std::move(Err));
- },
- NoDependenciesToRegister);
+ ES.lookup(
+ LookupKind::Static,
+ makeJITDylibSearchOrder(LookupPair.first,
+ JITDylibLookupFlags::MatchAllSymbols),
+ SymbolLookupSet(LookupPair.second), SymbolState::Ready,
+ [this](Expected<SymbolMap> Result) {
+ if (auto Err = Result.takeError())
+ ES.reportError(std::move(Err));
+ },
+ NoDependenciesToRegister);
}
public:
@@ -151,8 +155,11 @@ public:
this->getES().reportError(ReadySymbol.takeError());
};
// Include non-exported symbols also.
- ES.lookup(JITDylibSearchList({{JD, true}}), SymbolNameSet({Target}),
- SymbolState::Ready, OnReadyFixUp, NoDependenciesToRegister);
+ ES.lookup(
+ LookupKind::Static,
+ makeJITDylibSearchOrder(JD, JITDylibLookupFlags::MatchAllSymbols),
+ SymbolLookupSet(Target, SymbolLookupFlags::WeaklyReferencedSymbol),
+ SymbolState::Ready, OnReadyFixUp, NoDependenciesToRegister);
}
}
diff --git a/contrib/llvm-project/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h b/contrib/llvm-project/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h
new file mode 100644
index 000000000000..2f9a5ee71e67
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h
@@ -0,0 +1,109 @@
+//===- OMPConstants.h - OpenMP related constants and helpers ------ C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines constans and helpers used when dealing with OpenMP.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_OPENMP_CONSTANTS_H
+#define LLVM_OPENMP_CONSTANTS_H
+
+#include "llvm/ADT/BitmaskEnum.h"
+#include "llvm/ADT/StringRef.h"
+
+namespace llvm {
+class Type;
+class Module;
+class StructType;
+class PointerType;
+class FunctionType;
+
+namespace omp {
+LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
+
+/// IDs for all OpenMP directives.
+enum class Directive {
+#define OMP_DIRECTIVE(Enum, ...) Enum,
+#include "llvm/Frontend/OpenMP/OMPKinds.def"
+};
+
+/// Make the enum values available in the llvm::omp namespace. This allows us to
+/// write something like OMPD_parallel if we have a `using namespace omp`. At
+/// the same time we do not loose the strong type guarantees of the enum class,
+/// that is we cannot pass an unsigned as Directive without an explicit cast.
+#define OMP_DIRECTIVE(Enum, ...) constexpr auto Enum = omp::Directive::Enum;
+#include "llvm/Frontend/OpenMP/OMPKinds.def"
+
+/// IDs for all omp runtime library (RTL) functions.
+enum class RuntimeFunction {
+#define OMP_RTL(Enum, ...) Enum,
+#include "llvm/Frontend/OpenMP/OMPKinds.def"
+};
+
+#define OMP_RTL(Enum, ...) constexpr auto Enum = omp::RuntimeFunction::Enum;
+#include "llvm/Frontend/OpenMP/OMPKinds.def"
+
+/// IDs for the different proc bind kinds.
+enum class ProcBindKind {
+#define OMP_PROC_BIND_KIND(Enum, Str, Value) Enum = Value,
+#include "llvm/Frontend/OpenMP/OMPKinds.def"
+};
+
+#define OMP_PROC_BIND_KIND(Enum, ...) \
+ constexpr auto Enum = omp::ProcBindKind::Enum;
+#include "llvm/Frontend/OpenMP/OMPKinds.def"
+
+/// IDs for all omp runtime library ident_t flag encodings (see
+/// their defintion in openmp/runtime/src/kmp.h).
+enum class IdentFlag {
+#define OMP_IDENT_FLAG(Enum, Str, Value) Enum = Value,
+#include "llvm/Frontend/OpenMP/OMPKinds.def"
+ LLVM_MARK_AS_BITMASK_ENUM(0x7FFFFFFF)
+};
+
+#define OMP_IDENT_FLAG(Enum, ...) constexpr auto Enum = omp::IdentFlag::Enum;
+#include "llvm/Frontend/OpenMP/OMPKinds.def"
+
+/// Parse \p Str and return the directive it matches or OMPD_unknown if none.
+Directive getOpenMPDirectiveKind(StringRef Str);
+
+/// Return a textual representation of the directive \p D.
+StringRef getOpenMPDirectiveName(Directive D);
+
+/// Forward declarations for LLVM-IR types (simple, function and structure) are
+/// generated below. Their names are defined and used in OpenMP/OMPKinds.def.
+/// Here we provide the forward declarations, the initializeTypes function will
+/// provide the values.
+///
+///{
+namespace types {
+
+#define OMP_TYPE(VarName, InitValue) extern Type *VarName;
+#define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...) \
+ extern FunctionType *VarName; \
+ extern PointerType *VarName##Ptr;
+#define OMP_STRUCT_TYPE(VarName, StrName, ...) \
+ extern StructType *VarName; \
+ extern PointerType *VarName##Ptr;
+#include "llvm/Frontend/OpenMP/OMPKinds.def"
+
+/// Helper to initialize all types defined in OpenMP/OMPKinds.def.
+void initializeTypes(Module &M);
+
+/// Helper to uninitialize all types defined in OpenMP/OMPKinds.def.
+void uninitializeTypes();
+
+} // namespace types
+///}
+
+} // end namespace omp
+
+} // end namespace llvm
+
+#endif // LLVM_OPENMP_CONSTANTS_H
diff --git a/contrib/llvm-project/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/contrib/llvm-project/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
new file mode 100644
index 000000000000..e1e1d5a30f3c
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -0,0 +1,250 @@
+//===- IR/OpenMPIRBuilder.h - OpenMP encoding builder for LLVM IR - C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the OpenMPIRBuilder class and helpers used as a convenient
+// way to create LLVM instructions for OpenMP directives.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_OPENMP_IR_IRBUILDER_H
+#define LLVM_OPENMP_IR_IRBUILDER_H
+
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/Frontend/OpenMP/OMPConstants.h"
+
+namespace llvm {
+
+/// An interface to create LLVM-IR for OpenMP directives.
+///
+/// Each OpenMP directive has a corresponding public generator method.
+class OpenMPIRBuilder {
+public:
+ /// Create a new OpenMPIRBuilder operating on the given module \p M. This will
+ /// not have an effect on \p M (see initialize).
+ OpenMPIRBuilder(Module &M) : M(M), Builder(M.getContext()) {}
+
+ /// Initialize the internal state, this will put structures types and
+ /// potentially other helpers into the underlying module. Must be called
+ /// before any other method and only once!
+ void initialize();
+
+ /// Add attributes known for \p FnID to \p Fn.
+ void addAttributes(omp::RuntimeFunction FnID, Function &Fn);
+
+ /// Type used throughout for insertion points.
+ using InsertPointTy = IRBuilder<>::InsertPoint;
+
+ /// Callback type for variable finalization (think destructors).
+ ///
+ /// \param CodeGenIP is the insertion point at which the finalization code
+ /// should be placed.
+ ///
+ /// A finalize callback knows about all objects that need finalization, e.g.
+ /// destruction, when the scope of the currently generated construct is left
+ /// at the time, and location, the callback is invoked.
+ using FinalizeCallbackTy = std::function<void(InsertPointTy CodeGenIP)>;
+
+ struct FinalizationInfo {
+ /// The finalization callback provided by the last in-flight invocation of
+ /// CreateXXXX for the directive of kind DK.
+ FinalizeCallbackTy FiniCB;
+
+ /// The directive kind of the innermost directive that has an associated
+ /// region which might require finalization when it is left.
+ omp::Directive DK;
+
+ /// Flag to indicate if the directive is cancellable.
+ bool IsCancellable;
+ };
+
+ /// Push a finalization callback on the finalization stack.
+ ///
+ /// NOTE: Temporary solution until Clang CG is gone.
+ void pushFinalizationCB(const FinalizationInfo &FI) {
+ FinalizationStack.push_back(FI);
+ }
+
+ /// Pop the last finalization callback from the finalization stack.
+ ///
+ /// NOTE: Temporary solution until Clang CG is gone.
+ void popFinalizationCB() { FinalizationStack.pop_back(); }
+
+ /// Callback type for body (=inner region) code generation
+ ///
+ /// The callback takes code locations as arguments, each describing a
+ /// location at which code might need to be generated or a location that is
+ /// the target of control transfer.
+ ///
+ /// \param AllocaIP is the insertion point at which new alloca instructions
+ /// should be placed.
+ /// \param CodeGenIP is the insertion point at which the body code should be
+ /// placed.
+ /// \param ContinuationBB is the basic block target to leave the body.
+ ///
+ /// Note that all blocks pointed to by the arguments have terminators.
+ using BodyGenCallbackTy =
+ function_ref<void(InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
+ BasicBlock &ContinuationBB)>;
+
+ /// Callback type for variable privatization (think copy & default
+ /// constructor).
+ ///
+ /// \param AllocaIP is the insertion point at which new alloca instructions
+ /// should be placed.
+ /// \param CodeGenIP is the insertion point at which the privatization code
+ /// should be placed.
+ /// \param Val The value beeing copied/created.
+ /// \param ReplVal The replacement value, thus a copy or new created version
+ /// of \p Val.
+ ///
+ /// \returns The new insertion point where code generation continues and
+ /// \p ReplVal the replacement of \p Val.
+ using PrivatizeCallbackTy = function_ref<InsertPointTy(
+ InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &Val,
+ Value *&ReplVal)>;
+
+ /// Description of a LLVM-IR insertion point (IP) and a debug/source location
+ /// (filename, line, column, ...).
+ struct LocationDescription {
+ template <typename T, typename U>
+ LocationDescription(const IRBuilder<T, U> &IRB)
+ : IP(IRB.saveIP()), DL(IRB.getCurrentDebugLocation()) {}
+ LocationDescription(const InsertPointTy &IP) : IP(IP) {}
+ LocationDescription(const InsertPointTy &IP, const DebugLoc &DL)
+ : IP(IP), DL(DL) {}
+ InsertPointTy IP;
+ DebugLoc DL;
+ };
+
+ /// Emitter methods for OpenMP directives.
+ ///
+ ///{
+
+ /// Generator for '#omp barrier'
+ ///
+ /// \param Loc The location where the barrier directive was encountered.
+ /// \param DK The kind of directive that caused the barrier.
+ /// \param ForceSimpleCall Flag to force a simple (=non-cancellation) barrier.
+ /// \param CheckCancelFlag Flag to indicate a cancel barrier return value
+ /// should be checked and acted upon.
+ ///
+ /// \returns The insertion point after the barrier.
+ InsertPointTy CreateBarrier(const LocationDescription &Loc, omp::Directive DK,
+ bool ForceSimpleCall = false,
+ bool CheckCancelFlag = true);
+
+ /// Generator for '#omp cancel'
+ ///
+ /// \param Loc The location where the directive was encountered.
+ /// \param IfCondition The evaluated 'if' clause expression, if any.
+ /// \param CanceledDirective The kind of directive that is cancled.
+ ///
+ /// \returns The insertion point after the barrier.
+ InsertPointTy CreateCancel(const LocationDescription &Loc,
+ Value *IfCondition,
+ omp::Directive CanceledDirective);
+
+ /// Generator for '#omp parallel'
+ ///
+ /// \param Loc The insert and source location description.
+ /// \param BodyGenCB Callback that will generate the region code.
+ /// \param PrivCB Callback to copy a given variable (think copy constructor).
+ /// \param FiniCB Callback to finalize variable copies.
+ /// \param IfCondition The evaluated 'if' clause expression, if any.
+ /// \param NumThreads The evaluated 'num_threads' clause expression, if any.
+ /// \param ProcBind The value of the 'proc_bind' clause (see ProcBindKind).
+ /// \param IsCancellable Flag to indicate a cancellable parallel region.
+ ///
+ /// \returns The insertion position *after* the parallel.
+ IRBuilder<>::InsertPoint
+ CreateParallel(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB,
+ PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB,
+ Value *IfCondition, Value *NumThreads,
+ omp::ProcBindKind ProcBind, bool IsCancellable);
+
+ ///}
+
+private:
+ /// Update the internal location to \p Loc.
+ bool updateToLocation(const LocationDescription &Loc) {
+ Builder.restoreIP(Loc.IP);
+ Builder.SetCurrentDebugLocation(Loc.DL);
+ return Loc.IP.getBlock() != nullptr;
+ }
+
+ /// Return the function declaration for the runtime function with \p FnID.
+ Function *getOrCreateRuntimeFunction(omp::RuntimeFunction FnID);
+
+ /// Return the (LLVM-IR) string describing the source location \p LocStr.
+ Constant *getOrCreateSrcLocStr(StringRef LocStr);
+
+ /// Return the (LLVM-IR) string describing the default source location.
+ Constant *getOrCreateDefaultSrcLocStr();
+
+ /// Return the (LLVM-IR) string describing the source location \p Loc.
+ Constant *getOrCreateSrcLocStr(const LocationDescription &Loc);
+
+ /// Return an ident_t* encoding the source location \p SrcLocStr and \p Flags.
+ Value *getOrCreateIdent(Constant *SrcLocStr,
+ omp::IdentFlag Flags = omp::IdentFlag(0));
+
+ /// Generate control flow and cleanup for cancellation.
+ ///
+ /// \param CancelFlag Flag indicating if the cancellation is performed.
+ /// \param CanceledDirective The kind of directive that is cancled.
+ void emitCancelationCheckImpl(Value *CancelFlag,
+ omp::Directive CanceledDirective);
+
+ /// Generate a barrier runtime call.
+ ///
+ /// \param Loc The location at which the request originated and is fulfilled.
+ /// \param DK The directive which caused the barrier
+ /// \param ForceSimpleCall Flag to force a simple (=non-cancellation) barrier.
+ /// \param CheckCancelFlag Flag to indicate a cancel barrier return value
+ /// should be checked and acted upon.
+ ///
+ /// \returns The insertion point after the barrier.
+ InsertPointTy emitBarrierImpl(const LocationDescription &Loc,
+ omp::Directive DK, bool ForceSimpleCall,
+ bool CheckCancelFlag);
+
+ /// The finalization stack made up of finalize callbacks currently in-flight,
+ /// wrapped into FinalizationInfo objects that reference also the finalization
+ /// target block and the kind of cancellable directive.
+ SmallVector<FinalizationInfo, 8> FinalizationStack;
+
+ /// Return true if the last entry in the finalization stack is of kind \p DK
+ /// and cancellable.
+ bool isLastFinalizationInfoCancellable(omp::Directive DK) {
+ return !FinalizationStack.empty() &&
+ FinalizationStack.back().IsCancellable &&
+ FinalizationStack.back().DK == DK;
+ }
+
+ /// Return the current thread ID.
+ ///
+ /// \param Ident The ident (ident_t*) describing the query origin.
+ Value *getOrCreateThreadID(Value *Ident);
+
+ /// The underlying LLVM-IR module
+ Module &M;
+
+ /// The LLVM-IR Builder used to create IR.
+ IRBuilder<> Builder;
+
+ /// Map to remember source location strings
+ StringMap<Constant *> SrcLocStrMap;
+
+ /// Map to remember existing ident_t*.
+ DenseMap<std::pair<Constant *, uint64_t>, GlobalVariable *> IdentMap;
+};
+
+} // end namespace llvm
+
+#endif // LLVM_IR_IRBUILDER_H
diff --git a/contrib/llvm-project/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/contrib/llvm-project/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
new file mode 100644
index 000000000000..3ec27e5c08a8
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
@@ -0,0 +1,289 @@
+//===--- OMPKinds.def - OpenMP directives, clauses, rt-calls -*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines the list of supported OpenMP directives, clauses, runtime
+/// calls, and other things that need to be listed in enums.
+///
+//===----------------------------------------------------------------------===//
+
+/// OpenMP Directives and combined directives
+///
+///{
+
+#ifndef OMP_DIRECTIVE
+#define OMP_DIRECTIVE(Enum, Str)
+#endif
+
+#define __OMP_DIRECTIVE_EXT(Name, Str) OMP_DIRECTIVE(OMPD_##Name, Str)
+#define __OMP_DIRECTIVE(Name) __OMP_DIRECTIVE_EXT(Name, #Name)
+
+__OMP_DIRECTIVE(threadprivate)
+__OMP_DIRECTIVE(parallel)
+__OMP_DIRECTIVE(task)
+__OMP_DIRECTIVE(simd)
+__OMP_DIRECTIVE(for)
+__OMP_DIRECTIVE(sections)
+__OMP_DIRECTIVE(section)
+__OMP_DIRECTIVE(single)
+__OMP_DIRECTIVE(master)
+__OMP_DIRECTIVE(critical)
+__OMP_DIRECTIVE(taskyield)
+__OMP_DIRECTIVE(barrier)
+__OMP_DIRECTIVE(taskwait)
+__OMP_DIRECTIVE(taskgroup)
+__OMP_DIRECTIVE(flush)
+__OMP_DIRECTIVE(ordered)
+__OMP_DIRECTIVE(atomic)
+__OMP_DIRECTIVE(target)
+__OMP_DIRECTIVE(teams)
+__OMP_DIRECTIVE(cancel)
+__OMP_DIRECTIVE(requires)
+__OMP_DIRECTIVE_EXT(target_data, "target data")
+__OMP_DIRECTIVE_EXT(target_enter_data, "target enter data")
+__OMP_DIRECTIVE_EXT(target_exit_data, "target exit data")
+__OMP_DIRECTIVE_EXT(target_parallel, "target parallel")
+__OMP_DIRECTIVE_EXT(target_parallel_for, "target parallel for")
+__OMP_DIRECTIVE_EXT(target_update, "target update")
+__OMP_DIRECTIVE_EXT(parallel_for, "parallel for")
+__OMP_DIRECTIVE_EXT(parallel_for_simd, "parallel for simd")
+__OMP_DIRECTIVE_EXT(parallel_master, "parallel master")
+__OMP_DIRECTIVE_EXT(parallel_sections, "parallel sections")
+__OMP_DIRECTIVE_EXT(for_simd, "for simd")
+__OMP_DIRECTIVE_EXT(cancellation_point, "cancellation point")
+__OMP_DIRECTIVE_EXT(declare_reduction, "declare reduction")
+__OMP_DIRECTIVE_EXT(declare_mapper, "declare mapper")
+__OMP_DIRECTIVE_EXT(declare_simd, "declare simd")
+__OMP_DIRECTIVE(taskloop)
+__OMP_DIRECTIVE_EXT(taskloop_simd, "taskloop simd")
+__OMP_DIRECTIVE(distribute)
+__OMP_DIRECTIVE_EXT(declare_target, "declare target")
+__OMP_DIRECTIVE_EXT(end_declare_target, "end declare target")
+__OMP_DIRECTIVE_EXT(distribute_parallel_for, "distribute parallel for")
+__OMP_DIRECTIVE_EXT(distribute_parallel_for_simd,
+ "distribute parallel for simd")
+__OMP_DIRECTIVE_EXT(distribute_simd, "distribute simd")
+__OMP_DIRECTIVE_EXT(target_parallel_for_simd, "target parallel for simd")
+__OMP_DIRECTIVE_EXT(target_simd, "target simd")
+__OMP_DIRECTIVE_EXT(teams_distribute, "teams distribute")
+__OMP_DIRECTIVE_EXT(teams_distribute_simd, "teams distribute simd")
+__OMP_DIRECTIVE_EXT(teams_distribute_parallel_for_simd,
+ "teams distribute parallel for simd")
+__OMP_DIRECTIVE_EXT(teams_distribute_parallel_for,
+ "teams distribute parallel for")
+__OMP_DIRECTIVE_EXT(target_teams, "target teams")
+__OMP_DIRECTIVE_EXT(target_teams_distribute, "target teams distribute")
+__OMP_DIRECTIVE_EXT(target_teams_distribute_parallel_for,
+ "target teams distribute parallel for")
+__OMP_DIRECTIVE_EXT(target_teams_distribute_parallel_for_simd,
+ "target teams distribute parallel for simd")
+__OMP_DIRECTIVE_EXT(target_teams_distribute_simd,
+ "target teams distribute simd")
+__OMP_DIRECTIVE(allocate)
+__OMP_DIRECTIVE_EXT(declare_variant, "declare variant")
+__OMP_DIRECTIVE_EXT(master_taskloop, "master taskloop")
+__OMP_DIRECTIVE_EXT(parallel_master_taskloop, "parallel master taskloop")
+__OMP_DIRECTIVE_EXT(master_taskloop_simd, "master taskloop simd")
+__OMP_DIRECTIVE_EXT(parallel_master_taskloop_simd,
+ "parallel master taskloop simd")
+
+// Has to be the last because Clang implicitly expects it to be.
+__OMP_DIRECTIVE(unknown)
+
+#undef __OMP_DIRECTIVE_EXT
+#undef __OMP_DIRECTIVE
+#undef OMP_DIRECTIVE
+
+///}
+
+/// Types used in runtime structs or runtime functions
+///
+///{
+
+#ifndef OMP_TYPE
+#define OMP_TYPE(VarName, InitValue)
+#endif
+
+#define __OMP_TYPE(VarName) OMP_TYPE(VarName, Type::get##VarName##Ty(Ctx))
+
+__OMP_TYPE(Void)
+__OMP_TYPE(Int8)
+__OMP_TYPE(Int32)
+__OMP_TYPE(Int8Ptr)
+__OMP_TYPE(Int32Ptr)
+
+#undef __OMP_TYPE
+#undef OMP_TYPE
+
+///}
+
+/// Struct and function types
+///
+///{
+
+#ifndef OMP_STRUCT_TYPE
+#define OMP_STRUCT_TYPE(VarName, StructName, ...)
+#endif
+
+#define __OMP_STRUCT_TYPE(VarName, Name, ...) \
+ OMP_STRUCT_TYPE(VarName, "struct." #Name, __VA_ARGS__)
+
+__OMP_STRUCT_TYPE(Ident, ident_t, Int32, Int32, Int32, Int32, Int8Ptr)
+
+#undef __OMP_STRUCT_TYPE
+#undef OMP_STRUCT_TYPE
+
+#ifndef OMP_FUNCTION_TYPE
+#define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...)
+#endif
+
+#define __OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...) \
+ OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, __VA_ARGS__)
+
+__OMP_FUNCTION_TYPE(ParallelTask, true, Void, Int32Ptr, Int32Ptr)
+
+#undef __OMP_FUNCTION_TYPE
+#undef OMP_FUNCTION_TYPE
+
+///}
+
+/// Runtime library function (and their attributes)
+///
+///{
+
+#ifndef OMP_RTL
+#define OMP_RTL(Enum, Str, IsVarArg, ReturnType, ...)
+#endif
+
+#define __OMP_RTL(Name, IsVarArg, ReturnType, ...) \
+ OMP_RTL(OMPRTL_##Name, #Name, IsVarArg, ReturnType, __VA_ARGS__)
+
+__OMP_RTL(__kmpc_barrier, false, Void, IdentPtr, Int32)
+__OMP_RTL(__kmpc_cancel, false, Int32, IdentPtr, Int32, Int32)
+__OMP_RTL(__kmpc_cancel_barrier, false, Int32, IdentPtr, Int32)
+__OMP_RTL(__kmpc_global_thread_num, false, Int32, IdentPtr)
+__OMP_RTL(__kmpc_fork_call, true, Void, IdentPtr, Int32, ParallelTaskPtr)
+__OMP_RTL(__kmpc_push_num_threads, false, Void, IdentPtr, Int32,
+ /* Int */ Int32)
+__OMP_RTL(__kmpc_push_proc_bind, false, Void, IdentPtr, Int32, /* Int */ Int32)
+__OMP_RTL(__kmpc_serialized_parallel, false, Void, IdentPtr, Int32)
+__OMP_RTL(__kmpc_end_serialized_parallel, false, Void, IdentPtr, Int32)
+
+__OMP_RTL(omp_get_thread_num, false, Int32, )
+
+#undef __OMP_RTL
+#undef OMP_RTL
+
+#define EnumAttr(Kind) Attribute::get(Ctx, Attribute::AttrKind::Kind)
+#define AttributeSet(...) \
+ AttributeSet::get(Ctx, ArrayRef<Attribute>({__VA_ARGS__}))
+
+#ifndef OMP_ATTRS_SET
+#define OMP_ATTRS_SET(VarName, AttrSet)
+#endif
+
+#define __OMP_ATTRS_SET(VarName, AttrSet) OMP_ATTRS_SET(VarName, AttrSet)
+
+__OMP_ATTRS_SET(GetterAttrs,
+ OptimisticAttributes
+ ? AttributeSet(EnumAttr(NoUnwind), EnumAttr(ReadOnly),
+ EnumAttr(NoSync), EnumAttr(NoFree))
+ : AttributeSet(EnumAttr(NoUnwind)))
+
+#undef __OMP_ATTRS_SET
+#undef OMP_ATTRS_SET
+
+#ifndef OMP_RTL_ATTRS
+#define OMP_RTL_ATTRS(Enum, FnAttrSet, RetAttrSet, ArgAttrSets)
+#endif
+
+#define __OMP_RTL_ATTRS(Name, FnAttrSet, RetAttrSet, ArgAttrSets) \
+ OMP_RTL_ATTRS(OMPRTL_##Name, FnAttrSet, RetAttrSet, ArgAttrSets)
+
+__OMP_RTL_ATTRS(__kmpc_fork_call, AttributeSet(EnumAttr(NoUnwind)),
+ AttributeSet(), {})
+
+__OMP_RTL_ATTRS(__kmpc_global_thread_num, GetterAttrs, AttributeSet(), {})
+__OMP_RTL_ATTRS(omp_get_thread_num, GetterAttrs, AttributeSet(), {})
+
+#undef __OMP_RTL_ATTRS
+#undef OMP_RTL_ATTRS
+#undef AttributeSet
+#undef EnumAttr
+
+///}
+
+/// KMP ident_t bit flags
+///
+/// In accordance with the values in `openmp/runtime/src/kmp.h`.
+///
+///{
+
+#ifndef OMP_IDENT_FLAG
+#define OMP_IDENT_FLAG(Enum, Str, Value)
+#endif
+
+#define __OMP_IDENT_FLAG(Name, Value) \
+ OMP_IDENT_FLAG(OMP_IDENT_FLAG_##Name, #Name, Value)
+
+__OMP_IDENT_FLAG(KMPC, 0x02)
+__OMP_IDENT_FLAG(BARRIER_EXPL, 0x20)
+__OMP_IDENT_FLAG(BARRIER_IMPL, 0x0040)
+__OMP_IDENT_FLAG(BARRIER_IMPL_MASK, 0x01C0)
+__OMP_IDENT_FLAG(BARRIER_IMPL_FOR, 0x0040)
+__OMP_IDENT_FLAG(BARRIER_IMPL_SECTIONS, 0x00C0)
+__OMP_IDENT_FLAG(BARRIER_IMPL_SINGLE, 0x0140)
+__OMP_IDENT_FLAG(BARRIER_IMPL_WORKSHARE, 0x01C0)
+
+#undef __OMP_IDENT_FLAG
+#undef OMP_IDENT_FLAG
+
+///}
+
+/// KMP cancel kind
+///
+///{
+
+#ifndef OMP_CANCEL_KIND
+#define OMP_CANCEL_KIND(Enum, Str, DirectiveEnum, Value)
+#endif
+
+#define __OMP_CANCEL_KIND(Name, Value) \
+ OMP_CANCEL_KIND(OMP_CANCEL_KIND_##Name, #Name, OMPD_##Name, Value)
+
+__OMP_CANCEL_KIND(parallel, 1)
+__OMP_CANCEL_KIND(for, 2)
+__OMP_CANCEL_KIND(sections, 3)
+__OMP_CANCEL_KIND(taskgroup, 4)
+
+#undef __OMP_CANCEL_KIND
+#undef OMP_CANCEL_KIND
+
+///}
+
+/// Proc bind kinds
+///
+///{
+
+#ifndef OMP_PROC_BIND_KIND
+#define OMP_PROC_BIND_KIND(Enum, Str, Value)
+#endif
+
+#define __OMP_PROC_BIND_KIND(Name, Value) \
+ OMP_PROC_BIND_KIND(OMP_PROC_BIND_##Name, #Name, Value)
+
+__OMP_PROC_BIND_KIND(master, 2)
+__OMP_PROC_BIND_KIND(close, 3)
+__OMP_PROC_BIND_KIND(spread, 4)
+__OMP_PROC_BIND_KIND(default, 6)
+__OMP_PROC_BIND_KIND(unknown, 7)
+
+#undef __OMP_PROC_BIND_KIND
+#undef OMP_PROC_BIND_KIND
+
+///}
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/Argument.h b/contrib/llvm-project/llvm/include/llvm/IR/Argument.h
index 5f514b9c47d2..244878bd3155 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/Argument.h
+++ b/contrib/llvm-project/llvm/include/llvm/IR/Argument.h
@@ -76,8 +76,13 @@ public:
bool hasByValOrInAllocaAttr() const;
/// If this is a byval or inalloca argument, return its alignment.
+ /// FIXME: Remove this function once transition to Align is over.
+ /// Use getParamAlign() instead.
unsigned getParamAlignment() const;
+ /// If this is a byval or inalloca argument, return its alignment.
+ MaybeAlign getParamAlign() const;
+
/// If this is a byval argument, return its type.
Type *getParamByValType() const;
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/Attributes.td b/contrib/llvm-project/llvm/include/llvm/IR/Attributes.td
index 153046d2311c..5d4a5f6743b7 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/Attributes.td
+++ b/contrib/llvm-project/llvm/include/llvm/IR/Attributes.td
@@ -220,6 +220,7 @@ def NoInfsFPMath : StrBoolAttr<"no-infs-fp-math">;
def NoNansFPMath : StrBoolAttr<"no-nans-fp-math">;
def UnsafeFPMath : StrBoolAttr<"unsafe-fp-math">;
def NoJumpTables : StrBoolAttr<"no-jump-tables">;
+def NoInlineLineTables : StrBoolAttr<"no-inline-line-tables">;
def ProfileSampleAccurate : StrBoolAttr<"profile-sample-accurate">;
class CompatRule<string F> {
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/AutoUpgrade.h b/contrib/llvm-project/llvm/include/llvm/IR/AutoUpgrade.h
index 66f38e5b55d1..42f50cc991de 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/AutoUpgrade.h
+++ b/contrib/llvm-project/llvm/include/llvm/IR/AutoUpgrade.h
@@ -16,6 +16,7 @@
#include "llvm/ADT/StringRef.h"
namespace llvm {
+ class AttrBuilder;
class CallInst;
class Constant;
class Function;
@@ -91,6 +92,10 @@ namespace llvm {
/// pointers.
std::string UpgradeDataLayoutString(StringRef DL, StringRef Triple);
+ /// Upgrade function attributes "no-frame-pointer-elim" and
+ /// "no-frame-pointer-elim-non-leaf" to "frame-pointer".
+ void UpgradeFramePointerAttributes(AttrBuilder &B);
+
} // End llvm namespace
#endif
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/CallSite.h b/contrib/llvm-project/llvm/include/llvm/IR/CallSite.h
index 13b1ae8d0e32..0e957c4797e8 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/CallSite.h
+++ b/contrib/llvm-project/llvm/include/llvm/IR/CallSite.h
@@ -45,7 +45,7 @@
namespace llvm {
namespace Intrinsic {
-enum ID : unsigned;
+typedef unsigned ID;
}
template <typename FunTy = const Function, typename BBTy = const BasicBlock,
@@ -693,6 +693,18 @@ private:
User::op_iterator getCallee() const;
};
+/// Establish a view to a call site for examination.
+class ImmutableCallSite : public CallSiteBase<> {
+public:
+ ImmutableCallSite() = default;
+ ImmutableCallSite(const CallInst *CI) : CallSiteBase(CI) {}
+ ImmutableCallSite(const InvokeInst *II) : CallSiteBase(II) {}
+ ImmutableCallSite(const CallBrInst *CBI) : CallSiteBase(CBI) {}
+ explicit ImmutableCallSite(const Instruction *II) : CallSiteBase(II) {}
+ explicit ImmutableCallSite(const Value *V) : CallSiteBase(V) {}
+ ImmutableCallSite(CallSite CS) : CallSiteBase(CS.getInstruction()) {}
+};
+
/// AbstractCallSite
///
/// An abstract call site is a wrapper that allows to treat direct,
@@ -765,6 +777,13 @@ public:
/// as well as the callee of the abstract call site.
AbstractCallSite(const Use *U);
+ /// Add operand uses of \p ICS that represent callback uses into \p CBUses.
+ ///
+ /// All uses added to \p CBUses can be used to create abstract call sites for
+ /// which AbstractCallSite::isCallbackCall() will return true.
+ static void getCallbackUses(ImmutableCallSite ICS,
+ SmallVectorImpl<const Use *> &CBUses);
+
/// Conversion operator to conveniently check for a valid/initialized ACS.
explicit operator bool() const { return (bool)CS; }
@@ -850,7 +869,7 @@ public:
/// callee of this ACS. Only valid for callback calls!
int getCallArgOperandNoForCallee() const {
assert(isCallbackCall());
- assert(CI.ParameterEncoding.size() && CI.ParameterEncoding[0] > 0);
+ assert(CI.ParameterEncoding.size() && CI.ParameterEncoding[0] >= 0);
return CI.ParameterEncoding[0];
}
@@ -902,18 +921,6 @@ template <> struct DenseMapInfo<CallSite> {
}
};
-/// Establish a view to a call site for examination.
-class ImmutableCallSite : public CallSiteBase<> {
-public:
- ImmutableCallSite() = default;
- ImmutableCallSite(const CallInst *CI) : CallSiteBase(CI) {}
- ImmutableCallSite(const InvokeInst *II) : CallSiteBase(II) {}
- ImmutableCallSite(const CallBrInst *CBI) : CallSiteBase(CBI) {}
- explicit ImmutableCallSite(const Instruction *II) : CallSiteBase(II) {}
- explicit ImmutableCallSite(const Value *V) : CallSiteBase(V) {}
- ImmutableCallSite(CallSite CS) : CallSiteBase(CS.getInstruction()) {}
-};
-
} // end namespace llvm
#endif // LLVM_IR_CALLSITE_H
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/CallingConv.h b/contrib/llvm-project/llvm/include/llvm/IR/CallingConv.h
index c1c979c2e2ab..d0906de3ea4e 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/CallingConv.h
+++ b/contrib/llvm-project/llvm/include/llvm/IR/CallingConv.h
@@ -80,6 +80,12 @@ namespace CallingConv {
/// be performed.
Tail = 18,
+ /// Special calling convention on Windows for calling the Control
+ /// Guard Check ICall funtion. The function takes exactly one argument
+ /// (address of the target function) passed in the first argument register,
+ /// and has no return value. All register values are preserved.
+ CFGuard_Check = 19,
+
// Target - This is the start of the target-specific calling conventions,
// e.g. fastcall and thiscall on X86.
FirstTargetCC = 64,
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/Constant.h b/contrib/llvm-project/llvm/include/llvm/IR/Constant.h
index 2b6a6e4141b9..174e7364c524 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/Constant.h
+++ b/contrib/llvm-project/llvm/include/llvm/IR/Constant.h
@@ -53,6 +53,10 @@ public:
/// Returns true if the value is one.
bool isOneValue() const;
+ /// Return true if the value is not the one value, or,
+ /// for vectors, does not contain one value elements.
+ bool isNotOneValue() const;
+
/// Return true if this is the value that would be returned by
/// getAllOnesValue.
bool isAllOnesValue() const;
@@ -64,7 +68,8 @@ public:
/// Return true if the value is negative zero or null value.
bool isZeroValue() const;
- /// Return true if the value is not the smallest signed value.
+ /// Return true if the value is not the smallest signed value, or,
+ /// for vectors, does not contain smallest signed value elements.
bool isNotMinSignedValue() const;
/// Return true if the value is the smallest signed value.
@@ -128,9 +133,10 @@ public:
Constant *getAggregateElement(unsigned Elt) const;
Constant *getAggregateElement(Constant *Elt) const;
- /// If this is a splat vector constant, meaning that all of the elements have
- /// the same value, return that value. Otherwise return 0.
- Constant *getSplatValue() const;
+ /// If all elements of the vector constant have the same value, return that
+ /// value. Otherwise, return nullptr. Ignore undefined elements by setting
+ /// AllowUndefs to true.
+ Constant *getSplatValue(bool AllowUndefs = false) const;
/// If C is a constant integer then return its value, otherwise C must be a
/// vector of constant integers, all equal, and the common value is returned.
@@ -188,6 +194,10 @@ public:
return const_cast<Constant*>(
static_cast<const Constant *>(this)->stripPointerCasts());
}
+
+ /// Try to replace undefined constant C or undefined elements in C with
+ /// Replacement. If no changes are made, the constant C is returned.
+ static Constant *replaceUndefsWith(Constant *C, Constant *Replacement);
};
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/ConstantRange.h b/contrib/llvm-project/llvm/include/llvm/IR/ConstantRange.h
index 964f9e8e9bc9..e6bac8a5f933 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/ConstantRange.h
+++ b/contrib/llvm-project/llvm/include/llvm/IR/ConstantRange.h
@@ -327,6 +327,14 @@ public:
const ConstantRange &Other) const;
/// Return a new range representing the possible values resulting
+ /// from an application of the specified overflowing binary operator to a
+ /// left hand side of this range and a right hand side of \p Other given
+ /// the provided knowledge about lack of wrapping \p NoWrapKind.
+ ConstantRange overflowingBinaryOp(Instruction::BinaryOps BinOp,
+ const ConstantRange &Other,
+ unsigned NoWrapKind) const;
+
+ /// Return a new range representing the possible values resulting
/// from an addition of a value in this range and a value in \p Other.
ConstantRange add(const ConstantRange &Other) const;
@@ -343,6 +351,14 @@ public:
ConstantRange sub(const ConstantRange &Other) const;
/// Return a new range representing the possible values resulting
+ /// from an subtraction with wrap type \p NoWrapKind of a value in this
+ /// range and a value in \p Other.
+ /// If the result range is disjoint, the preferred range is determined by the
+ /// \p PreferredRangeType.
+ ConstantRange subWithNoWrap(const ConstantRange &Other, unsigned NoWrapKind,
+ PreferredRangeType RangeType = Smallest) const;
+
+ /// Return a new range representing the possible values resulting
/// from a multiplication of a value in this range and a value in \p Other,
/// treating both this and \p Other as unsigned ranges.
ConstantRange multiply(const ConstantRange &Other) const;
@@ -418,6 +434,20 @@ public:
/// Perform a signed saturating subtraction of two constant ranges.
ConstantRange ssub_sat(const ConstantRange &Other) const;
+ /// Perform an unsigned saturating multiplication of two constant ranges.
+ ConstantRange umul_sat(const ConstantRange &Other) const;
+
+ /// Perform a signed saturating multiplication of two constant ranges.
+ ConstantRange smul_sat(const ConstantRange &Other) const;
+
+ /// Perform an unsigned saturating left shift of this constant range by a
+ /// value in \p Other.
+ ConstantRange ushl_sat(const ConstantRange &Other) const;
+
+ /// Perform a signed saturating left shift of this constant range by a
+ /// value in \p Other.
+ ConstantRange sshl_sat(const ConstantRange &Other) const;
+
/// Return a new range that is the logical not of the current set.
ConstantRange inverse() const;
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/Constants.h b/contrib/llvm-project/llvm/include/llvm/IR/Constants.h
index ca56e8b9328c..262ab439df65 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/Constants.h
+++ b/contrib/llvm-project/llvm/include/llvm/IR/Constants.h
@@ -522,9 +522,10 @@ public:
return cast<VectorType>(Value::getType());
}
- /// If this is a splat constant, meaning that all of the elements have the
- /// same value, return that value. Otherwise return NULL.
- Constant *getSplatValue() const;
+ /// If all elements of the vector constant have the same value, return that
+ /// value. Otherwise, return nullptr. Ignore undefined elements by setting
+ /// AllowUndefs to true.
+ Constant *getSplatValue(bool AllowUndefs = false) const;
/// Methods for support type inquiry through isa, cast, and dyn_cast:
static bool classof(const Value *V) {
@@ -1250,7 +1251,7 @@ public:
/// which would take a ConstantExpr parameter, but that would have spread
/// implementation details of ConstantExpr outside of Constants.cpp, which
/// would make it harder to remove ConstantExprs altogether.
- Instruction *getAsInstruction();
+ Instruction *getAsInstruction() const;
/// Methods for support type inquiry through isa, cast, and dyn_cast:
static bool classof(const Value *V) {
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/ConstrainedOps.def b/contrib/llvm-project/llvm/include/llvm/IR/ConstrainedOps.def
new file mode 100644
index 000000000000..7e24684ca654
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/IR/ConstrainedOps.def
@@ -0,0 +1,86 @@
+//===- llvm/IR/ConstrainedOps.def - Constrained intrinsics ------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Defines properties of constrained intrinsics, in particular corresponding
+// floating point operations and DAG nodes.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef INSTRUCTION
+#define INSTRUCTION(N,A,R,I,D)
+#endif
+
+// In most cases intrinsic function is handled similar to instruction.
+#ifndef FUNCTION
+#define FUNCTION INSTRUCTION
+#endif
+
+// Likewise for compare instructions.
+#ifndef CMP_INSTRUCTION
+#define CMP_INSTRUCTION INSTRUCTION
+#endif
+
+// Arguments of the entries are:
+// - instruction or intrinsic function name.
+// - Number of original instruction/intrinsic arguments.
+// - 1 if the corresponding constrained intrinsic has rounding mode argument.
+// - name of the constrained intrinsic to represent this instruction/function.
+// - DAG node corresponding to the constrained intrinsic without prefix STRICT_.
+
+// These are definitions for instructions, that are converted into constrained
+// intrinsics.
+//
+INSTRUCTION(FAdd, 2, 1, experimental_constrained_fadd, FADD)
+INSTRUCTION(FSub, 2, 1, experimental_constrained_fsub, FSUB)
+INSTRUCTION(FMul, 2, 1, experimental_constrained_fmul, FMUL)
+INSTRUCTION(FDiv, 2, 1, experimental_constrained_fdiv, FDIV)
+INSTRUCTION(FRem, 2, 1, experimental_constrained_frem, FREM)
+INSTRUCTION(FPExt, 1, 0, experimental_constrained_fpext, FP_EXTEND)
+INSTRUCTION(SIToFP, 1, 1, experimental_constrained_sitofp, SINT_TO_FP)
+INSTRUCTION(UIToFP, 1, 1, experimental_constrained_uitofp, UINT_TO_FP)
+INSTRUCTION(FPToSI, 1, 0, experimental_constrained_fptosi, FP_TO_SINT)
+INSTRUCTION(FPToUI, 1, 0, experimental_constrained_fptoui, FP_TO_UINT)
+INSTRUCTION(FPTrunc, 1, 1, experimental_constrained_fptrunc, FP_ROUND)
+
+// These are definitions for compare instructions (signaling and quiet version).
+// Both of these match to FCmp / SETCC.
+CMP_INSTRUCTION(FCmp, 2, 0, experimental_constrained_fcmp, FSETCC)
+CMP_INSTRUCTION(FCmp, 2, 0, experimental_constrained_fcmps, FSETCCS)
+
+// Theses are definitions for intrinsic functions, that are converted into
+// constrained intrinsics.
+//
+FUNCTION(ceil, 1, 0, experimental_constrained_ceil, FCEIL)
+FUNCTION(cos, 1, 1, experimental_constrained_cos, FCOS)
+FUNCTION(exp, 1, 1, experimental_constrained_exp, FEXP)
+FUNCTION(exp2, 1, 1, experimental_constrained_exp2, FEXP2)
+FUNCTION(floor, 1, 0, experimental_constrained_floor, FFLOOR)
+FUNCTION(fma, 3, 1, experimental_constrained_fma, FMA)
+FUNCTION(log, 1, 1, experimental_constrained_log, FLOG)
+FUNCTION(log10, 1, 1, experimental_constrained_log10, FLOG10)
+FUNCTION(log2, 1, 1, experimental_constrained_log2, FLOG2)
+FUNCTION(lrint, 1, 1, experimental_constrained_lrint, LRINT)
+FUNCTION(llrint, 1, 1, experimental_constrained_llrint, LLRINT)
+FUNCTION(lround, 1, 0, experimental_constrained_lround, LROUND)
+FUNCTION(llround, 1, 0, experimental_constrained_llround, LLROUND)
+FUNCTION(maxnum, 2, 0, experimental_constrained_maxnum, FMAXNUM)
+FUNCTION(minnum, 2, 0, experimental_constrained_minnum, FMINNUM)
+FUNCTION(maximum, 2, 0, experimental_constrained_maximum, FMAXIMUM)
+FUNCTION(minimum, 2, 0, experimental_constrained_minimum, FMINIMUM)
+FUNCTION(nearbyint, 1, 1, experimental_constrained_nearbyint, FNEARBYINT)
+FUNCTION(pow, 2, 1, experimental_constrained_pow, FPOW)
+FUNCTION(powi, 2, 1, experimental_constrained_powi, FPOWI)
+FUNCTION(rint, 1, 1, experimental_constrained_rint, FRINT)
+FUNCTION(round, 1, 0, experimental_constrained_round, FROUND)
+FUNCTION(sin, 1, 1, experimental_constrained_sin, FSIN)
+FUNCTION(sqrt, 1, 1, experimental_constrained_sqrt, FSQRT)
+FUNCTION(trunc, 1, 0, experimental_constrained_trunc, FTRUNC)
+
+#undef INSTRUCTION
+#undef FUNCTION
+#undef CMP_INSTRUCTION
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/DIBuilder.h b/contrib/llvm-project/llvm/include/llvm/IR/DIBuilder.h
index ad9a35b55414..f7c242554f6a 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/DIBuilder.h
+++ b/contrib/llvm-project/llvm/include/llvm/IR/DIBuilder.h
@@ -237,8 +237,10 @@ namespace llvm {
/// \param File File where this type is defined.
/// \param LineNo Line number.
/// \param Context The surrounding context for the typedef.
+ /// \param AlignInBits Alignment. (optional)
DIDerivedType *createTypedef(DIType *Ty, StringRef Name, DIFile *File,
- unsigned LineNo, DIScope *Context);
+ unsigned LineNo, DIScope *Context,
+ uint32_t AlignInBits = 0);
/// Create debugging information entry for a 'friend'.
DIDerivedType *createFriend(DIType *Ty, DIType *FriendTy);
@@ -572,7 +574,7 @@ namespace llvm {
/// \param File File where this variable is defined.
/// \param LineNo Line number.
/// \param Ty Variable Type.
- /// \param isLocalToUnit Boolean flag indicate whether this variable is
+ /// \param IsLocalToUnit Boolean flag indicate whether this variable is
/// externally visible or not.
/// \param Expr The location of the global relative to the attached
/// GlobalVariable.
@@ -581,16 +583,16 @@ namespace llvm {
/// specified)
DIGlobalVariableExpression *createGlobalVariableExpression(
DIScope *Context, StringRef Name, StringRef LinkageName, DIFile *File,
- unsigned LineNo, DIType *Ty, bool isLocalToUnit,
+ unsigned LineNo, DIType *Ty, bool IsLocalToUnit, bool isDefined = true,
DIExpression *Expr = nullptr, MDNode *Decl = nullptr,
- MDTuple *templateParams = nullptr, uint32_t AlignInBits = 0);
+ MDTuple *TemplateParams = nullptr, uint32_t AlignInBits = 0);
/// Identical to createGlobalVariable
/// except that the resulting DbgNode is temporary and meant to be RAUWed.
DIGlobalVariable *createTempGlobalVariableFwdDecl(
DIScope *Context, StringRef Name, StringRef LinkageName, DIFile *File,
- unsigned LineNo, DIType *Ty, bool isLocalToUnit, MDNode *Decl = nullptr,
- MDTuple *templateParams = nullptr, uint32_t AlignInBits = 0);
+ unsigned LineNo, DIType *Ty, bool IsLocalToUnit, MDNode *Decl = nullptr,
+ MDTuple *TemplateParams= nullptr, uint32_t AlignInBits = 0);
/// Create a new descriptor for an auto variable. This is a local variable
/// that is not a subprogram parameter.
@@ -732,11 +734,11 @@ namespace llvm {
/// A space-separated shell-quoted list of -D macro
/// definitions as they would appear on a command line.
/// \param IncludePath The path to the module map file.
- /// \param ISysRoot The clang system root (value of -isysroot).
+ /// \param SysRoot The clang system root (value of -isysroot).
DIModule *createModule(DIScope *Scope, StringRef Name,
StringRef ConfigurationMacros,
StringRef IncludePath,
- StringRef ISysRoot);
+ StringRef SysRoot);
/// This creates a descriptor for a lexical block with a new file
/// attached. This merely extends the existing
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/DebugInfoFlags.def b/contrib/llvm-project/llvm/include/llvm/IR/DebugInfoFlags.def
index f90c580f10ef..df375b6c68e8 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/DebugInfoFlags.def
+++ b/contrib/llvm-project/llvm/include/llvm/IR/DebugInfoFlags.def
@@ -50,7 +50,6 @@ HANDLE_DI_FLAG((3 << 16), VirtualInheritance)
HANDLE_DI_FLAG((1 << 18), IntroducedVirtual)
HANDLE_DI_FLAG((1 << 19), BitField)
HANDLE_DI_FLAG((1 << 20), NoReturn)
-HANDLE_DI_FLAG((1 << 21), ArgumentNotModified)
HANDLE_DI_FLAG((1 << 22), TypePassByValue)
HANDLE_DI_FLAG((1 << 23), TypePassByReference)
HANDLE_DI_FLAG((1 << 24), EnumClass)
@@ -88,11 +87,15 @@ HANDLE_DISP_FLAG((1u << 5), Pure)
HANDLE_DISP_FLAG((1u << 6), Elemental)
HANDLE_DISP_FLAG((1u << 7), Recursive)
HANDLE_DISP_FLAG((1u << 8), MainSubprogram)
+// May also utilize this Flag in future, when adding support
+// for defaulted functions
+HANDLE_DISP_FLAG((1u << 9), Deleted)
+HANDLE_DISP_FLAG((1u << 11), ObjCDirect)
#ifdef DISP_FLAG_LARGEST_NEEDED
// Intended to be used with ADT/BitmaskEnum.h.
// NOTE: Always must be equal to largest flag, check this when adding new flags.
-HANDLE_DISP_FLAG((1 << 8), Largest)
+HANDLE_DISP_FLAG((1 << 11), Largest)
#undef DISP_FLAG_LARGEST_NEEDED
#endif
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/DebugInfoMetadata.h b/contrib/llvm-project/llvm/include/llvm/IR/DebugInfoMetadata.h
index 28a59576b7c6..d6bfe504dd94 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/DebugInfoMetadata.h
+++ b/contrib/llvm-project/llvm/include/llvm/IR/DebugInfoMetadata.h
@@ -1758,6 +1758,13 @@ public:
bool isPure() const { return getSPFlags() & SPFlagPure; }
bool isElemental() const { return getSPFlags() & SPFlagElemental; }
bool isRecursive() const { return getSPFlags() & SPFlagRecursive; }
+ bool isObjCDirect() const { return getSPFlags() & SPFlagObjCDirect; }
+
+ /// Check if this is deleted member function.
+ ///
+ /// Return true if this subprogram is a C++11 special
+ /// member function declared deleted.
+ bool isDeleted() const { return getSPFlags() & SPFlagDeleted; }
/// Check if this is reference-qualified.
///
@@ -2077,34 +2084,34 @@ class DIModule : public DIScope {
static DIModule *getImpl(LLVMContext &Context, DIScope *Scope,
StringRef Name, StringRef ConfigurationMacros,
- StringRef IncludePath, StringRef ISysRoot,
+ StringRef IncludePath, StringRef SysRoot,
StorageType Storage, bool ShouldCreate = true) {
return getImpl(Context, Scope, getCanonicalMDString(Context, Name),
getCanonicalMDString(Context, ConfigurationMacros),
getCanonicalMDString(Context, IncludePath),
- getCanonicalMDString(Context, ISysRoot),
+ getCanonicalMDString(Context, SysRoot),
Storage, ShouldCreate);
}
static DIModule *getImpl(LLVMContext &Context, Metadata *Scope,
MDString *Name, MDString *ConfigurationMacros,
- MDString *IncludePath, MDString *ISysRoot,
+ MDString *IncludePath, MDString *SysRoot,
StorageType Storage, bool ShouldCreate = true);
TempDIModule cloneImpl() const {
return getTemporary(getContext(), getScope(), getName(),
getConfigurationMacros(), getIncludePath(),
- getISysRoot());
+ getSysRoot());
}
public:
DEFINE_MDNODE_GET(DIModule, (DIScope *Scope, StringRef Name,
StringRef ConfigurationMacros, StringRef IncludePath,
- StringRef ISysRoot),
- (Scope, Name, ConfigurationMacros, IncludePath, ISysRoot))
+ StringRef SysRoot),
+ (Scope, Name, ConfigurationMacros, IncludePath, SysRoot))
DEFINE_MDNODE_GET(DIModule,
(Metadata *Scope, MDString *Name, MDString *ConfigurationMacros,
- MDString *IncludePath, MDString *ISysRoot),
- (Scope, Name, ConfigurationMacros, IncludePath, ISysRoot))
+ MDString *IncludePath, MDString *SysRoot),
+ (Scope, Name, ConfigurationMacros, IncludePath, SysRoot))
TempDIModule clone() const { return cloneImpl(); }
@@ -2112,13 +2119,13 @@ public:
StringRef getName() const { return getStringOperand(1); }
StringRef getConfigurationMacros() const { return getStringOperand(2); }
StringRef getIncludePath() const { return getStringOperand(3); }
- StringRef getISysRoot() const { return getStringOperand(4); }
+ StringRef getSysRoot() const { return getStringOperand(4); }
Metadata *getRawScope() const { return getOperand(0); }
MDString *getRawName() const { return getOperandAs<MDString>(1); }
MDString *getRawConfigurationMacros() const { return getOperandAs<MDString>(2); }
MDString *getRawIncludePath() const { return getOperandAs<MDString>(3); }
- MDString *getRawISysRoot() const { return getOperandAs<MDString>(4); }
+ MDString *getRawSysRoot() const { return getOperandAs<MDString>(4); }
static bool classof(const Metadata *MD) {
return MD->getMetadataID() == DIModuleKind;
@@ -2545,6 +2552,16 @@ public:
return 0;
}
+ using ExtOps = std::array<uint64_t, 6>;
+
+ /// Returns the ops for a zero- or sign-extension in a DIExpression.
+ static ExtOps getExtOps(unsigned FromSize, unsigned ToSize, bool Signed);
+
+ /// Append a zero- or sign-extension to \p Expr. Converts the expression to a
+ /// stack value if it isn't one already.
+ static DIExpression *appendExt(const DIExpression *Expr, unsigned FromSize,
+ unsigned ToSize, bool Signed);
+
/// Check if fragments overlap between a pair of FragmentInfos.
static bool fragmentsOverlap(const FragmentInfo &A, const FragmentInfo &B) {
return fragmentCmp(A, B) == 0;
@@ -2809,11 +2826,6 @@ public:
bool isArtificial() const { return getFlags() & FlagArtificial; }
bool isObjectPointer() const { return getFlags() & FlagObjectPointer; }
- /// Check that an argument is unmodified.
- bool isNotModified() const { return getFlags() & FlagArgumentNotModified; }
- /// Set the flag if an argument is unmodified.
- void setIsNotModified() { Flags |= FlagArgumentNotModified; }
-
/// Check that a location is valid for this variable.
///
/// Check that \c DL exists, is in the same subprogram, and has the same
@@ -3247,6 +3259,89 @@ public:
}
};
+/// Identifies a unique instance of a variable.
+///
+/// Storage for identifying a potentially inlined instance of a variable,
+/// or a fragment thereof. This guarantees that exactly one variable instance
+/// may be identified by this class, even when that variable is a fragment of
+/// an aggregate variable and/or there is another inlined instance of the same
+/// source code variable nearby.
+/// This class does not necessarily uniquely identify that variable: it is
+/// possible that a DebugVariable with different parameters may point to the
+/// same variable instance, but not that one DebugVariable points to multiple
+/// variable instances.
+class DebugVariable {
+ using FragmentInfo = DIExpression::FragmentInfo;
+
+ const DILocalVariable *Variable;
+ Optional<FragmentInfo> Fragment;
+ const DILocation *InlinedAt;
+
+ /// Fragment that will overlap all other fragments. Used as default when
+ /// caller demands a fragment.
+ static const FragmentInfo DefaultFragment;
+
+public:
+ DebugVariable(const DILocalVariable *Var, Optional<FragmentInfo> FragmentInfo,
+ const DILocation *InlinedAt)
+ : Variable(Var), Fragment(FragmentInfo), InlinedAt(InlinedAt) {}
+
+ DebugVariable(const DILocalVariable *Var, const DIExpression *DIExpr,
+ const DILocation *InlinedAt)
+ : Variable(Var),
+ Fragment(DIExpr ? DIExpr->getFragmentInfo() : NoneType()),
+ InlinedAt(InlinedAt) {}
+
+ const DILocalVariable *getVariable() const { return Variable; }
+ const Optional<FragmentInfo> getFragment() const { return Fragment; }
+ const DILocation *getInlinedAt() const { return InlinedAt; }
+
+ const FragmentInfo getFragmentOrDefault() const {
+ return Fragment.getValueOr(DefaultFragment);
+ }
+
+ static bool isDefaultFragment(const FragmentInfo F) {
+ return F == DefaultFragment;
+ }
+
+ bool operator==(const DebugVariable &Other) const {
+ return std::tie(Variable, Fragment, InlinedAt) ==
+ std::tie(Other.Variable, Other.Fragment, Other.InlinedAt);
+ }
+
+ bool operator<(const DebugVariable &Other) const {
+ return std::tie(Variable, Fragment, InlinedAt) <
+ std::tie(Other.Variable, Other.Fragment, Other.InlinedAt);
+ }
+};
+
+template <> struct DenseMapInfo<DebugVariable> {
+ using FragmentInfo = DIExpression::FragmentInfo;
+
+ /// Empty key: no key should be generated that has no DILocalVariable.
+ static inline DebugVariable getEmptyKey() {
+ return DebugVariable(nullptr, NoneType(), nullptr);
+ }
+
+ /// Difference in tombstone is that the Optional is meaningful.
+ static inline DebugVariable getTombstoneKey() {
+ return DebugVariable(nullptr, {{0, 0}}, nullptr);
+ }
+
+ static unsigned getHashValue(const DebugVariable &D) {
+ unsigned HV = 0;
+ const Optional<FragmentInfo> Fragment = D.getFragment();
+ if (Fragment)
+ HV = DenseMapInfo<FragmentInfo>::getHashValue(*Fragment);
+
+ return hash_combine(D.getVariable(), HV, D.getInlinedAt());
+ }
+
+ static bool isEqual(const DebugVariable &A, const DebugVariable &B) {
+ return A == B;
+ }
+};
+
} // end namespace llvm
#undef DEFINE_MDNODE_GET_UNPACK_IMPL
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/Dominators.h b/contrib/llvm-project/llvm/include/llvm/IR/Dominators.h
index fef1c6abf8c2..6a14785a6cc3 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/Dominators.h
+++ b/contrib/llvm-project/llvm/include/llvm/IR/Dominators.h
@@ -262,9 +262,7 @@ class DominatorTreeWrapperPass : public FunctionPass {
public:
static char ID;
- DominatorTreeWrapperPass() : FunctionPass(ID) {
- initializeDominatorTreeWrapperPassPass(*PassRegistry::getPassRegistry());
- }
+ DominatorTreeWrapperPass();
DominatorTree &getDomTree() { return DT; }
const DominatorTree &getDomTree() const { return DT; }
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/FPEnv.h b/contrib/llvm-project/llvm/include/llvm/IR/FPEnv.h
new file mode 100644
index 000000000000..a1e0665d4112
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/IR/FPEnv.h
@@ -0,0 +1,70 @@
+//===- FPEnv.h ---- FP Environment ------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// @file
+/// This file contains the declarations of entities that describe floating
+/// point environment and related functions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_IR_FLOATINGPOINT_H
+#define LLVM_IR_FLOATINGPOINT_H
+
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/StringRef.h"
+#include <stdint.h>
+
+namespace llvm {
+
+namespace fp {
+
+/// Rounding mode used for floating point operations.
+///
+/// Each of these values correspond to some metadata argument value of a
+/// constrained floating point intrinsic. See the LLVM Language Reference Manual
+/// for details.
+enum RoundingMode : uint8_t {
+ rmDynamic, ///< This corresponds to "fpround.dynamic".
+ rmToNearest, ///< This corresponds to "fpround.tonearest".
+ rmDownward, ///< This corresponds to "fpround.downward".
+ rmUpward, ///< This corresponds to "fpround.upward".
+ rmTowardZero ///< This corresponds to "fpround.tozero".
+};
+
+/// Exception behavior used for floating point operations.
+///
+/// Each of these values correspond to some metadata argument value of a
+/// constrained floating point intrinsic. See the LLVM Language Reference Manual
+/// for details.
+enum ExceptionBehavior : uint8_t {
+ ebIgnore, ///< This corresponds to "fpexcept.ignore".
+ ebMayTrap, ///< This corresponds to "fpexcept.maytrap".
+ ebStrict ///< This corresponds to "fpexcept.strict".
+};
+
+}
+
+/// Returns a valid RoundingMode enumerator when given a string
+/// that is valid as input in constrained intrinsic rounding mode
+/// metadata.
+Optional<fp::RoundingMode> StrToRoundingMode(StringRef);
+
+/// For any RoundingMode enumerator, returns a string valid as input in
+/// constrained intrinsic rounding mode metadata.
+Optional<StringRef> RoundingModeToStr(fp::RoundingMode);
+
+/// Returns a valid ExceptionBehavior enumerator when given a string
+/// valid as input in constrained intrinsic exception behavior metadata.
+Optional<fp::ExceptionBehavior> StrToExceptionBehavior(StringRef);
+
+/// For any ExceptionBehavior enumerator, returns a string valid as
+/// input in constrained intrinsic exception behavior metadata.
+Optional<StringRef> ExceptionBehaviorToStr(fp::ExceptionBehavior);
+
+}
+#endif
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/Function.h b/contrib/llvm-project/llvm/include/llvm/IR/Function.h
index d586a9460d2b..d9cbcc63fa62 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/Function.h
+++ b/contrib/llvm-project/llvm/include/llvm/IR/Function.h
@@ -43,7 +43,7 @@
namespace llvm {
namespace Intrinsic {
-enum ID : unsigned;
+typedef unsigned ID;
}
class AssemblyAnnotationWriter;
@@ -435,12 +435,18 @@ public:
void addDereferenceableOrNullParamAttr(unsigned ArgNo, uint64_t Bytes);
/// Extract the alignment for a call or parameter (0=unknown).
+ /// FIXME: Remove this function once transition to Align is over.
+ /// Use getParamAlign() instead.
unsigned getParamAlignment(unsigned ArgNo) const {
- if (const auto MA = AttributeSets.getParamAlignment(ArgNo))
+ if (const auto MA = getParamAlign(ArgNo))
return MA->value();
return 0;
}
+ MaybeAlign getParamAlign(unsigned ArgNo) const {
+ return AttributeSets.getParamAlignment(ArgNo);
+ }
+
/// Extract the byval type for a parameter.
Type *getParamByValType(unsigned ArgNo) const {
Type *Ty = AttributeSets.getParamByValType(ArgNo);
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/GlobalValue.h b/contrib/llvm-project/llvm/include/llvm/IR/GlobalValue.h
index 2209881dbda6..0171356914d6 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/GlobalValue.h
+++ b/contrib/llvm-project/llvm/include/llvm/IR/GlobalValue.h
@@ -38,7 +38,7 @@ class GlobalObject;
class Module;
namespace Intrinsic {
- enum ID : unsigned;
+typedef unsigned ID;
} // end namespace Intrinsic
class GlobalValue : public Constant {
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/IRBuilder.h b/contrib/llvm-project/llvm/include/llvm/IR/IRBuilder.h
index d1ddb75cde9b..a6252b298001 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/IRBuilder.h
+++ b/contrib/llvm-project/llvm/include/llvm/IR/IRBuilder.h
@@ -97,8 +97,8 @@ protected:
FastMathFlags FMF;
bool IsFPConstrained;
- ConstrainedFPIntrinsic::ExceptionBehavior DefaultConstrainedExcept;
- ConstrainedFPIntrinsic::RoundingMode DefaultConstrainedRounding;
+ fp::ExceptionBehavior DefaultConstrainedExcept;
+ fp::RoundingMode DefaultConstrainedRounding;
ArrayRef<OperandBundleDef> DefaultOperandBundles;
@@ -106,8 +106,8 @@ public:
IRBuilderBase(LLVMContext &context, MDNode *FPMathTag = nullptr,
ArrayRef<OperandBundleDef> OpBundles = None)
: Context(context), DefaultFPMathTag(FPMathTag), IsFPConstrained(false),
- DefaultConstrainedExcept(ConstrainedFPIntrinsic::ebStrict),
- DefaultConstrainedRounding(ConstrainedFPIntrinsic::rmDynamic),
+ DefaultConstrainedExcept(fp::ebStrict),
+ DefaultConstrainedRounding(fp::rmDynamic),
DefaultOperandBundles(OpBundles) {
ClearInsertionPoint();
}
@@ -234,27 +234,39 @@ public:
bool getIsFPConstrained() { return IsFPConstrained; }
/// Set the exception handling to be used with constrained floating point
- void setDefaultConstrainedExcept(
- ConstrainedFPIntrinsic::ExceptionBehavior NewExcept) {
+ void setDefaultConstrainedExcept(fp::ExceptionBehavior NewExcept) {
DefaultConstrainedExcept = NewExcept;
}
/// Set the rounding mode handling to be used with constrained floating point
- void setDefaultConstrainedRounding(
- ConstrainedFPIntrinsic::RoundingMode NewRounding) {
+ void setDefaultConstrainedRounding(fp::RoundingMode NewRounding) {
DefaultConstrainedRounding = NewRounding;
}
/// Get the exception handling used with constrained floating point
- ConstrainedFPIntrinsic::ExceptionBehavior getDefaultConstrainedExcept() {
+ fp::ExceptionBehavior getDefaultConstrainedExcept() {
return DefaultConstrainedExcept;
}
/// Get the rounding mode handling used with constrained floating point
- ConstrainedFPIntrinsic::RoundingMode getDefaultConstrainedRounding() {
+ fp::RoundingMode getDefaultConstrainedRounding() {
return DefaultConstrainedRounding;
}
+ void setConstrainedFPFunctionAttr() {
+ assert(BB && "Must have a basic block to set any function attributes!");
+
+ Function *F = BB->getParent();
+ if (!F->hasFnAttribute(Attribute::StrictFP)) {
+ F->addFnAttr(Attribute::StrictFP);
+ }
+ }
+
+ void setConstrainedFPCallAttr(CallInst *I) {
+ if (!I->hasFnAttr(Attribute::StrictFP))
+ I->addAttribute(AttributeList::FunctionIndex, Attribute::StrictFP);
+ }
+
//===--------------------------------------------------------------------===//
// RAII helpers.
//===--------------------------------------------------------------------===//
@@ -437,15 +449,15 @@ public:
/// If the pointer isn't an i8*, it will be converted. If a TBAA tag is
/// specified, it will be added to the instruction. Likewise with alias.scope
/// and noalias tags.
- CallInst *CreateMemSet(Value *Ptr, Value *Val, uint64_t Size, unsigned Align,
- bool isVolatile = false, MDNode *TBAATag = nullptr,
- MDNode *ScopeTag = nullptr,
+ CallInst *CreateMemSet(Value *Ptr, Value *Val, uint64_t Size,
+ MaybeAlign Align, bool isVolatile = false,
+ MDNode *TBAATag = nullptr, MDNode *ScopeTag = nullptr,
MDNode *NoAliasTag = nullptr) {
return CreateMemSet(Ptr, Val, getInt64(Size), Align, isVolatile,
TBAATag, ScopeTag, NoAliasTag);
}
- CallInst *CreateMemSet(Value *Ptr, Value *Val, Value *Size, unsigned Align,
+ CallInst *CreateMemSet(Value *Ptr, Value *Val, Value *Size, MaybeAlign Align,
bool isVolatile = false, MDNode *TBAATag = nullptr,
MDNode *ScopeTag = nullptr,
MDNode *NoAliasTag = nullptr);
@@ -456,19 +468,45 @@ public:
/// If the pointer isn't an i8*, it will be converted. If a TBAA tag is
/// specified, it will be added to the instruction. Likewise with alias.scope
/// and noalias tags.
+ /// FIXME: Remove this function once transition to Align is over.
+ /// Use the version that takes Align instead of this one.
+ LLVM_ATTRIBUTE_DEPRECATED(
+ CallInst *CreateElementUnorderedAtomicMemSet(
+ Value *Ptr, Value *Val, uint64_t Size, unsigned Alignment,
+ uint32_t ElementSize, MDNode *TBAATag = nullptr,
+ MDNode *ScopeTag = nullptr, MDNode *NoAliasTag = nullptr),
+ "Use the version that takes Align instead of this one") {
+ return CreateElementUnorderedAtomicMemSet(Ptr, Val, getInt64(Size),
+ Align(Alignment), ElementSize,
+ TBAATag, ScopeTag, NoAliasTag);
+ }
+
CallInst *CreateElementUnorderedAtomicMemSet(Value *Ptr, Value *Val,
- uint64_t Size, unsigned Align,
+ uint64_t Size, Align Alignment,
uint32_t ElementSize,
MDNode *TBAATag = nullptr,
MDNode *ScopeTag = nullptr,
MDNode *NoAliasTag = nullptr) {
- return CreateElementUnorderedAtomicMemSet(Ptr, Val, getInt64(Size), Align,
+ return CreateElementUnorderedAtomicMemSet(Ptr, Val, getInt64(Size),
+ Align(Alignment), ElementSize,
+ TBAATag, ScopeTag, NoAliasTag);
+ }
+
+ /// FIXME: Remove this function once transition to Align is over.
+ /// Use the version that takes Align instead of this one.
+ LLVM_ATTRIBUTE_DEPRECATED(
+ CallInst *CreateElementUnorderedAtomicMemSet(
+ Value *Ptr, Value *Val, Value *Size, unsigned Alignment,
+ uint32_t ElementSize, MDNode *TBAATag = nullptr,
+ MDNode *ScopeTag = nullptr, MDNode *NoAliasTag = nullptr),
+ "Use the version that takes Align instead of this one") {
+ return CreateElementUnorderedAtomicMemSet(Ptr, Val, Size, Align(Alignment),
ElementSize, TBAATag, ScopeTag,
NoAliasTag);
}
CallInst *CreateElementUnorderedAtomicMemSet(Value *Ptr, Value *Val,
- Value *Size, unsigned Align,
+ Value *Size, Align Alignment,
uint32_t ElementSize,
MDNode *TBAATag = nullptr,
MDNode *ScopeTag = nullptr,
@@ -479,8 +517,23 @@ public:
/// If the pointers aren't i8*, they will be converted. If a TBAA tag is
/// specified, it will be added to the instruction. Likewise with alias.scope
/// and noalias tags.
- CallInst *CreateMemCpy(Value *Dst, unsigned DstAlign, Value *Src,
- unsigned SrcAlign, uint64_t Size,
+ /// FIXME: Remove this function once transition to Align is over.
+ /// Use the version that takes MaybeAlign instead of this one.
+ LLVM_ATTRIBUTE_DEPRECATED(
+ CallInst *CreateMemCpy(Value *Dst, unsigned DstAlign, Value *Src,
+ unsigned SrcAlign, uint64_t Size,
+ bool isVolatile = false, MDNode *TBAATag = nullptr,
+ MDNode *TBAAStructTag = nullptr,
+ MDNode *ScopeTag = nullptr,
+ MDNode *NoAliasTag = nullptr),
+ "Use the version that takes MaybeAlign instead") {
+ return CreateMemCpy(Dst, MaybeAlign(DstAlign), Src, MaybeAlign(SrcAlign),
+ getInt64(Size), isVolatile, TBAATag, TBAAStructTag,
+ ScopeTag, NoAliasTag);
+ }
+
+ CallInst *CreateMemCpy(Value *Dst, MaybeAlign DstAlign, Value *Src,
+ MaybeAlign SrcAlign, uint64_t Size,
bool isVolatile = false, MDNode *TBAATag = nullptr,
MDNode *TBAAStructTag = nullptr,
MDNode *ScopeTag = nullptr,
@@ -490,8 +543,18 @@ public:
NoAliasTag);
}
- CallInst *CreateMemCpy(Value *Dst, unsigned DstAlign, Value *Src,
- unsigned SrcAlign, Value *Size,
+ /// FIXME: Remove this function once transition to Align is over.
+ /// Use the version that takes MaybeAlign instead of this one.
+ LLVM_ATTRIBUTE_DEPRECATED(
+ CallInst *CreateMemCpy(Value *Dst, unsigned DstAlign, Value *Src,
+ unsigned SrcAlign, Value *Size,
+ bool isVolatile = false, MDNode *TBAATag = nullptr,
+ MDNode *TBAAStructTag = nullptr,
+ MDNode *ScopeTag = nullptr,
+ MDNode *NoAliasTag = nullptr),
+ "Use the version that takes MaybeAlign instead");
+ CallInst *CreateMemCpy(Value *Dst, MaybeAlign DstAlign, Value *Src,
+ MaybeAlign SrcAlign, Value *Size,
bool isVolatile = false, MDNode *TBAATag = nullptr,
MDNode *TBAAStructTag = nullptr,
MDNode *ScopeTag = nullptr,
@@ -527,16 +590,40 @@ public:
/// If the pointers aren't i8*, they will be converted. If a TBAA tag is
/// specified, it will be added to the instruction. Likewise with alias.scope
/// and noalias tags.
- CallInst *CreateMemMove(Value *Dst, unsigned DstAlign, Value *Src, unsigned SrcAlign,
- uint64_t Size, bool isVolatile = false,
- MDNode *TBAATag = nullptr, MDNode *ScopeTag = nullptr,
+ /// FIXME: Remove this function once transition to Align is over.
+ /// Use the version that takes MaybeAlign instead of this one.
+ LLVM_ATTRIBUTE_DEPRECATED(
+ CallInst *CreateMemMove(
+ Value *Dst, unsigned DstAlign, Value *Src, unsigned SrcAlign,
+ uint64_t Size, bool isVolatile = false, MDNode *TBAATag = nullptr,
+ MDNode *ScopeTag = nullptr, MDNode *NoAliasTag = nullptr),
+ "Use the version that takes MaybeAlign") {
+ return CreateMemMove(Dst, MaybeAlign(DstAlign), Src, MaybeAlign(SrcAlign),
+ getInt64(Size), isVolatile, TBAATag, ScopeTag,
+ NoAliasTag);
+ }
+ CallInst *CreateMemMove(Value *Dst, MaybeAlign DstAlign, Value *Src,
+ MaybeAlign SrcAlign, uint64_t Size,
+ bool isVolatile = false, MDNode *TBAATag = nullptr,
+ MDNode *ScopeTag = nullptr,
MDNode *NoAliasTag = nullptr) {
- return CreateMemMove(Dst, DstAlign, Src, SrcAlign, getInt64(Size), isVolatile,
- TBAATag, ScopeTag, NoAliasTag);
- }
-
- CallInst *CreateMemMove(Value *Dst, unsigned DstAlign, Value *Src, unsigned SrcAlign,
- Value *Size, bool isVolatile = false, MDNode *TBAATag = nullptr,
+ return CreateMemMove(Dst, DstAlign, Src, SrcAlign, getInt64(Size),
+ isVolatile, TBAATag, ScopeTag, NoAliasTag);
+ }
+ /// FIXME: Remove this function once transition to Align is over.
+ /// Use the version that takes MaybeAlign instead of this one.
+ LLVM_ATTRIBUTE_DEPRECATED(
+ CallInst *CreateMemMove(
+ Value *Dst, unsigned DstAlign, Value *Src, unsigned SrcAlign,
+ Value *Size, bool isVolatile = false, MDNode *TBAATag = nullptr,
+ MDNode *ScopeTag = nullptr, MDNode *NoAliasTag = nullptr),
+ "Use the version that takes MaybeAlign") {
+ return CreateMemMove(Dst, MaybeAlign(DstAlign), Src, MaybeAlign(SrcAlign),
+ Size, isVolatile, TBAATag, ScopeTag, NoAliasTag);
+ }
+ CallInst *CreateMemMove(Value *Dst, MaybeAlign DstAlign, Value *Src,
+ MaybeAlign SrcAlign, Value *Size,
+ bool isVolatile = false, MDNode *TBAATag = nullptr,
MDNode *ScopeTag = nullptr,
MDNode *NoAliasTag = nullptr);
@@ -1082,38 +1169,44 @@ private:
return (LC && RC) ? Insert(Folder.CreateBinOp(Opc, LC, RC), Name) : nullptr;
}
- Value *getConstrainedFPRounding(
- Optional<ConstrainedFPIntrinsic::RoundingMode> Rounding) {
- ConstrainedFPIntrinsic::RoundingMode UseRounding =
- DefaultConstrainedRounding;
+ Value *getConstrainedFPRounding(Optional<fp::RoundingMode> Rounding) {
+ fp::RoundingMode UseRounding = DefaultConstrainedRounding;
if (Rounding.hasValue())
UseRounding = Rounding.getValue();
- Optional<StringRef> RoundingStr =
- ConstrainedFPIntrinsic::RoundingModeToStr(UseRounding);
+ Optional<StringRef> RoundingStr = RoundingModeToStr(UseRounding);
assert(RoundingStr.hasValue() && "Garbage strict rounding mode!");
auto *RoundingMDS = MDString::get(Context, RoundingStr.getValue());
return MetadataAsValue::get(Context, RoundingMDS);
}
- Value *getConstrainedFPExcept(
- Optional<ConstrainedFPIntrinsic::ExceptionBehavior> Except) {
- ConstrainedFPIntrinsic::ExceptionBehavior UseExcept =
- DefaultConstrainedExcept;
+ Value *getConstrainedFPExcept(Optional<fp::ExceptionBehavior> Except) {
+ fp::ExceptionBehavior UseExcept = DefaultConstrainedExcept;
if (Except.hasValue())
UseExcept = Except.getValue();
- Optional<StringRef> ExceptStr =
- ConstrainedFPIntrinsic::ExceptionBehaviorToStr(UseExcept);
+ Optional<StringRef> ExceptStr = ExceptionBehaviorToStr(UseExcept);
assert(ExceptStr.hasValue() && "Garbage strict exception behavior!");
auto *ExceptMDS = MDString::get(Context, ExceptStr.getValue());
return MetadataAsValue::get(Context, ExceptMDS);
}
+ Value *getConstrainedFPPredicate(CmpInst::Predicate Predicate) {
+ assert(CmpInst::isFPPredicate(Predicate) &&
+ Predicate != CmpInst::FCMP_FALSE &&
+ Predicate != CmpInst::FCMP_TRUE &&
+ "Invalid constrained FP comparison predicate!");
+
+ StringRef PredicateStr = CmpInst::getPredicateName(Predicate);
+ auto *PredicateMDS = MDString::get(Context, PredicateStr);
+
+ return MetadataAsValue::get(Context, PredicateMDS);
+ }
+
public:
Value *CreateAdd(Value *LHS, Value *RHS, const Twine &Name = "",
bool HasNUW = false, bool HasNSW = false) {
@@ -1468,8 +1561,8 @@ public:
CallInst *CreateConstrainedFPBinOp(
Intrinsic::ID ID, Value *L, Value *R, Instruction *FMFSource = nullptr,
const Twine &Name = "", MDNode *FPMathTag = nullptr,
- Optional<ConstrainedFPIntrinsic::RoundingMode> Rounding = None,
- Optional<ConstrainedFPIntrinsic::ExceptionBehavior> Except = None) {
+ Optional<fp::RoundingMode> Rounding = None,
+ Optional<fp::ExceptionBehavior> Except = None) {
Value *RoundingV = getConstrainedFPRounding(Rounding);
Value *ExceptV = getConstrainedFPExcept(Except);
@@ -1479,6 +1572,7 @@ public:
CallInst *C = CreateIntrinsic(ID, {L->getType()},
{L, R, RoundingV, ExceptV}, nullptr, Name);
+ setConstrainedFPCallAttr(C);
setFPAttrs(C, FPMathTag, UseFMF);
return C;
}
@@ -1608,22 +1702,40 @@ public:
/// Provided to resolve 'CreateAlignedLoad(Ptr, Align, "...")'
/// correctly, instead of converting the string to 'bool' for the isVolatile
/// parameter.
+ /// FIXME: Remove this function once transition to Align is over.
+ /// Use the version that takes MaybeAlign instead of this one.
LoadInst *CreateAlignedLoad(Type *Ty, Value *Ptr, unsigned Align,
const char *Name) {
+ return CreateAlignedLoad(Ty, Ptr, MaybeAlign(Align), Name);
+ }
+ LoadInst *CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align,
+ const char *Name) {
LoadInst *LI = CreateLoad(Ty, Ptr, Name);
- LI->setAlignment(MaybeAlign(Align));
+ LI->setAlignment(Align);
return LI;
}
+ /// FIXME: Remove this function once transition to Align is over.
+ /// Use the version that takes MaybeAlign instead of this one.
LoadInst *CreateAlignedLoad(Type *Ty, Value *Ptr, unsigned Align,
const Twine &Name = "") {
+ return CreateAlignedLoad(Ty, Ptr, MaybeAlign(Align), Name);
+ }
+ LoadInst *CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align,
+ const Twine &Name = "") {
LoadInst *LI = CreateLoad(Ty, Ptr, Name);
- LI->setAlignment(MaybeAlign(Align));
+ LI->setAlignment(Align);
return LI;
}
+ /// FIXME: Remove this function once transition to Align is over.
+ /// Use the version that takes MaybeAlign instead of this one.
LoadInst *CreateAlignedLoad(Type *Ty, Value *Ptr, unsigned Align,
bool isVolatile, const Twine &Name = "") {
+ return CreateAlignedLoad(Ty, Ptr, MaybeAlign(Align), isVolatile, Name);
+ }
+ LoadInst *CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align,
+ bool isVolatile, const Twine &Name = "") {
LoadInst *LI = CreateLoad(Ty, Ptr, isVolatile, Name);
- LI->setAlignment(MaybeAlign(Align));
+ LI->setAlignment(Align);
return LI;
}
@@ -1644,6 +1756,23 @@ public:
return CreateAlignedLoad(Ptr->getType()->getPointerElementType(), Ptr,
Align, isVolatile, Name);
}
+ // Deprecated [opaque pointer types]
+ LoadInst *CreateAlignedLoad(Value *Ptr, MaybeAlign Align, const char *Name) {
+ return CreateAlignedLoad(Ptr->getType()->getPointerElementType(), Ptr,
+ Align, Name);
+ }
+ // Deprecated [opaque pointer types]
+ LoadInst *CreateAlignedLoad(Value *Ptr, MaybeAlign Align,
+ const Twine &Name = "") {
+ return CreateAlignedLoad(Ptr->getType()->getPointerElementType(), Ptr,
+ Align, Name);
+ }
+ // Deprecated [opaque pointer types]
+ LoadInst *CreateAlignedLoad(Value *Ptr, MaybeAlign Align, bool isVolatile,
+ const Twine &Name = "") {
+ return CreateAlignedLoad(Ptr->getType()->getPointerElementType(), Ptr,
+ Align, isVolatile, Name);
+ }
StoreInst *CreateAlignedStore(Value *Val, Value *Ptr, unsigned Align,
bool isVolatile = false) {
@@ -1651,7 +1780,10 @@ public:
SI->setAlignment(MaybeAlign(Align));
return SI;
}
-
+ StoreInst *CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align,
+ bool isVolatile = false) {
+ return CreateAlignedStore(Val, Ptr, Align ? Align->value() : 0, isVolatile);
+ }
FenceInst *CreateFence(AtomicOrdering Ordering,
SyncScope::ID SSID = SyncScope::System,
const Twine &Name = "") {
@@ -1927,10 +2059,16 @@ public:
}
Value *CreateUIToFP(Value *V, Type *DestTy, const Twine &Name = ""){
+ if (IsFPConstrained)
+ return CreateConstrainedFPCast(Intrinsic::experimental_constrained_uitofp,
+ V, DestTy, nullptr, Name);
return CreateCast(Instruction::UIToFP, V, DestTy, Name);
}
Value *CreateSIToFP(Value *V, Type *DestTy, const Twine &Name = ""){
+ if (IsFPConstrained)
+ return CreateConstrainedFPCast(Intrinsic::experimental_constrained_sitofp,
+ V, DestTy, nullptr, Name);
return CreateCast(Instruction::SIToFP, V, DestTy, Name);
}
@@ -2062,8 +2200,8 @@ public:
Intrinsic::ID ID, Value *V, Type *DestTy,
Instruction *FMFSource = nullptr, const Twine &Name = "",
MDNode *FPMathTag = nullptr,
- Optional<ConstrainedFPIntrinsic::RoundingMode> Rounding = None,
- Optional<ConstrainedFPIntrinsic::ExceptionBehavior> Except = None) {
+ Optional<fp::RoundingMode> Rounding = None,
+ Optional<fp::ExceptionBehavior> Except = None) {
Value *ExceptV = getConstrainedFPExcept(Except);
FastMathFlags UseFMF = FMF;
@@ -2071,19 +2209,26 @@ public:
UseFMF = FMFSource->getFastMathFlags();
CallInst *C;
+ bool HasRoundingMD = false;
switch (ID) {
- default: {
+ default:
+ break;
+#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
+ case Intrinsic::INTRINSIC: \
+ HasRoundingMD = ROUND_MODE; \
+ break;
+#include "llvm/IR/ConstrainedOps.def"
+ }
+ if (HasRoundingMD) {
Value *RoundingV = getConstrainedFPRounding(Rounding);
C = CreateIntrinsic(ID, {DestTy, V->getType()}, {V, RoundingV, ExceptV},
nullptr, Name);
- } break;
- case Intrinsic::experimental_constrained_fpext:
- case Intrinsic::experimental_constrained_fptoui:
- case Intrinsic::experimental_constrained_fptosi:
+ } else
C = CreateIntrinsic(ID, {DestTy, V->getType()}, {V, ExceptV}, nullptr,
Name);
- break;
- }
+
+ setConstrainedFPCallAttr(C);
+
if (isa<FPMathOperator>(C))
setFPAttrs(C, FPMathTag, UseFMF);
return C;
@@ -2216,14 +2361,49 @@ public:
return Insert(new ICmpInst(P, LHS, RHS), Name);
}
+ // Create a quiet floating-point comparison (i.e. one that raises an FP
+ // exception only in the case where an input is a signaling NaN).
+ // Note that this differs from CreateFCmpS only if IsFPConstrained is true.
Value *CreateFCmp(CmpInst::Predicate P, Value *LHS, Value *RHS,
const Twine &Name = "", MDNode *FPMathTag = nullptr) {
+ if (IsFPConstrained)
+ return CreateConstrainedFPCmp(Intrinsic::experimental_constrained_fcmp,
+ P, LHS, RHS, Name);
+
+ if (auto *LC = dyn_cast<Constant>(LHS))
+ if (auto *RC = dyn_cast<Constant>(RHS))
+ return Insert(Folder.CreateFCmp(P, LC, RC), Name);
+ return Insert(setFPAttrs(new FCmpInst(P, LHS, RHS), FPMathTag, FMF), Name);
+ }
+
+ // Create a signaling floating-point comparison (i.e. one that raises an FP
+ // exception whenever an input is any NaN, signaling or quiet).
+ // Note that this differs from CreateFCmp only if IsFPConstrained is true.
+ Value *CreateFCmpS(CmpInst::Predicate P, Value *LHS, Value *RHS,
+ const Twine &Name = "", MDNode *FPMathTag = nullptr) {
+ if (IsFPConstrained)
+ return CreateConstrainedFPCmp(Intrinsic::experimental_constrained_fcmps,
+ P, LHS, RHS, Name);
+
if (auto *LC = dyn_cast<Constant>(LHS))
if (auto *RC = dyn_cast<Constant>(RHS))
return Insert(Folder.CreateFCmp(P, LC, RC), Name);
return Insert(setFPAttrs(new FCmpInst(P, LHS, RHS), FPMathTag, FMF), Name);
}
+ CallInst *CreateConstrainedFPCmp(
+ Intrinsic::ID ID, CmpInst::Predicate P, Value *L, Value *R,
+ const Twine &Name = "",
+ Optional<fp::ExceptionBehavior> Except = None) {
+ Value *PredicateV = getConstrainedFPPredicate(P);
+ Value *ExceptV = getConstrainedFPExcept(Except);
+
+ CallInst *C = CreateIntrinsic(ID, {L->getType()},
+ {L, R, PredicateV, ExceptV}, nullptr, Name);
+ setConstrainedFPCallAttr(C);
+ return C;
+ }
+
//===--------------------------------------------------------------------===//
// Instruction creation methods: Other Instructions
//===--------------------------------------------------------------------===//
@@ -2240,6 +2420,8 @@ public:
ArrayRef<Value *> Args = None, const Twine &Name = "",
MDNode *FPMathTag = nullptr) {
CallInst *CI = CallInst::Create(FTy, Callee, Args, DefaultOperandBundles);
+ if (IsFPConstrained)
+ setConstrainedFPCallAttr(CI);
if (isa<FPMathOperator>(CI))
setFPAttrs(CI, FPMathTag, FMF);
return Insert(CI, Name);
@@ -2249,6 +2431,8 @@ public:
ArrayRef<OperandBundleDef> OpBundles,
const Twine &Name = "", MDNode *FPMathTag = nullptr) {
CallInst *CI = CallInst::Create(FTy, Callee, Args, OpBundles);
+ if (IsFPConstrained)
+ setConstrainedFPCallAttr(CI);
if (isa<FPMathOperator>(CI))
setFPAttrs(CI, FPMathTag, FMF);
return Insert(CI, Name);
@@ -2284,6 +2468,33 @@ public:
Args, OpBundles, Name, FPMathTag);
}
+ CallInst *CreateConstrainedFPCall(
+ Function *Callee, ArrayRef<Value *> Args, const Twine &Name = "",
+ Optional<fp::RoundingMode> Rounding = None,
+ Optional<fp::ExceptionBehavior> Except = None) {
+ llvm::SmallVector<Value *, 6> UseArgs;
+
+ for (auto *OneArg : Args)
+ UseArgs.push_back(OneArg);
+ bool HasRoundingMD = false;
+ switch (Callee->getIntrinsicID()) {
+ default:
+ break;
+#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
+ case Intrinsic::INTRINSIC: \
+ HasRoundingMD = ROUND_MODE; \
+ break;
+#include "llvm/IR/ConstrainedOps.def"
+ }
+ if (HasRoundingMD)
+ UseArgs.push_back(getConstrainedFPRounding(Rounding));
+ UseArgs.push_back(getConstrainedFPExcept(Except));
+
+ CallInst *C = CreateCall(Callee, UseArgs, Name);
+ setConstrainedFPCallAttr(C);
+ return C;
+ }
+
Value *CreateSelect(Value *C, Value *True, Value *False,
const Twine &Name = "", Instruction *MDFrom = nullptr) {
if (auto *CC = dyn_cast<Constant>(C))
@@ -2370,6 +2581,10 @@ public:
return Insert(LandingPadInst::Create(Ty, NumClauses), Name);
}
+ Value *CreateFreeze(Value *V, const Twine &Name = "") {
+ return Insert(new FreezeInst(V), Name);
+ }
+
//===--------------------------------------------------------------------===//
// Utility creation methods
//===--------------------------------------------------------------------===//
@@ -2499,8 +2714,9 @@ public:
return V;
}
- Value *CreatePreserveArrayAccessIndex(Value *Base, unsigned Dimension,
- unsigned LastIndex, MDNode *DbgInfo) {
+ Value *CreatePreserveArrayAccessIndex(Type *ElTy, Value *Base,
+ unsigned Dimension, unsigned LastIndex,
+ MDNode *DbgInfo) {
assert(isa<PointerType>(Base->getType()) &&
"Invalid Base ptr type for preserve.array.access.index.");
auto *BaseType = Base->getType();
@@ -2513,7 +2729,7 @@ public:
IdxList.push_back(LastIndexV);
Type *ResultType =
- GetElementPtrInst::getGEPReturnType(Base, IdxList);
+ GetElementPtrInst::getGEPReturnType(ElTy, Base, IdxList);
Module *M = BB->getParent()->getParent();
Function *FnPreserveArrayAccessIndex = Intrinsic::getDeclaration(
@@ -2547,8 +2763,9 @@ public:
return Fn;
}
- Value *CreatePreserveStructAccessIndex(Value *Base, unsigned Index,
- unsigned FieldIndex, MDNode *DbgInfo) {
+ Value *CreatePreserveStructAccessIndex(Type *ElTy, Value *Base,
+ unsigned Index, unsigned FieldIndex,
+ MDNode *DbgInfo) {
assert(isa<PointerType>(Base->getType()) &&
"Invalid Base ptr type for preserve.struct.access.index.");
auto *BaseType = Base->getType();
@@ -2556,7 +2773,7 @@ public:
Value *GEPIndex = getInt32(Index);
Constant *Zero = ConstantInt::get(Type::getInt32Ty(Context), 0);
Type *ResultType =
- GetElementPtrInst::getGEPReturnType(Base, {Zero, GEPIndex});
+ GetElementPtrInst::getGEPReturnType(ElTy, Base, {Zero, GEPIndex});
Module *M = BB->getParent()->getParent();
Function *FnPreserveStructAccessIndex = Intrinsic::getDeclaration(
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/IRPrintingPasses.h b/contrib/llvm-project/llvm/include/llvm/IR/IRPrintingPasses.h
index 3be9449c1a93..230db988f737 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/IRPrintingPasses.h
+++ b/contrib/llvm-project/llvm/include/llvm/IR/IRPrintingPasses.h
@@ -23,7 +23,6 @@
namespace llvm {
class Pass;
-class BasicBlockPass;
class Function;
class FunctionPass;
class Module;
@@ -43,11 +42,6 @@ ModulePass *createPrintModulePass(raw_ostream &OS,
FunctionPass *createPrintFunctionPass(raw_ostream &OS,
const std::string &Banner = "");
-/// Create and return a pass that writes the BB to the specified
-/// \c raw_ostream.
-BasicBlockPass *createPrintBasicBlockPass(raw_ostream &OS,
- const std::string &Banner = "");
-
/// Print out a name of an LLVM value without any prefixes.
///
/// The name is surrounded with ""'s and escaped if it has any special or
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/InstVisitor.h b/contrib/llvm-project/llvm/include/llvm/IR/InstVisitor.h
index fbeb2caf14e6..6168c877a2be 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/InstVisitor.h
+++ b/contrib/llvm-project/llvm/include/llvm/IR/InstVisitor.h
@@ -199,6 +199,7 @@ public:
RetTy visitFuncletPadInst(FuncletPadInst &I) { DELEGATE(Instruction); }
RetTy visitCleanupPadInst(CleanupPadInst &I) { DELEGATE(FuncletPadInst); }
RetTy visitCatchPadInst(CatchPadInst &I) { DELEGATE(FuncletPadInst); }
+ RetTy visitFreezeInst(FreezeInst &I) { DELEGATE(Instruction); }
// Handle the special instrinsic instruction classes.
RetTy visitDbgDeclareInst(DbgDeclareInst &I) { DELEGATE(DbgVariableIntrinsic);}
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/InstrTypes.h b/contrib/llvm-project/llvm/include/llvm/IR/InstrTypes.h
index 7fb94e9d8c22..b2cdd58a5046 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/InstrTypes.h
+++ b/contrib/llvm-project/llvm/include/llvm/IR/InstrTypes.h
@@ -47,7 +47,7 @@
namespace llvm {
namespace Intrinsic {
-enum ID : unsigned;
+typedef unsigned ID;
}
//===----------------------------------------------------------------------===//
@@ -1039,6 +1039,11 @@ struct OperandBundleUse {
return getTagID() == LLVMContext::OB_funclet;
}
+ /// Return true if this is a "cfguardtarget" operand bundle.
+ bool isCFGuardTargetOperandBundle() const {
+ return getTagID() == LLVMContext::OB_cfguardtarget;
+ }
+
private:
/// Pointer to an entry in LLVMContextImpl::getOrInsertBundleTag.
StringMapEntry<uint32_t> *Tag;
@@ -1267,6 +1272,19 @@ public:
return isArgOperand(&UI.getUse());
}
+ /// Given a use for a arg operand, get the arg operand number that
+ /// corresponds to it.
+ unsigned getArgOperandNo(const Use *U) const {
+ assert(isArgOperand(U) && "Arg operand # out of range!");
+ return U - arg_begin();
+ }
+
+ /// Given a value use iterator, return the arg operand number corresponding to
+ /// it. Iterator must actually correspond to a data operand.
+ unsigned getArgOperandNo(Value::const_user_iterator UI) const {
+ return getArgOperandNo(&UI.getUse());
+ }
+
/// Returns true if this CallSite passes the given Value* as an argument to
/// the called function.
bool hasArgument(const Value *V) const {
@@ -1567,19 +1585,31 @@ public:
}
/// Extract the alignment of the return value.
+ /// FIXME: Remove this function once transition to Align is over.
+ /// Use getRetAlign() instead.
unsigned getRetAlignment() const {
if (const auto MA = Attrs.getRetAlignment())
return MA->value();
return 0;
}
+ /// Extract the alignment of the return value.
+ MaybeAlign getRetAlign() const { return Attrs.getRetAlignment(); }
+
/// Extract the alignment for a call or parameter (0=unknown).
+ /// FIXME: Remove this function once transition to Align is over.
+ /// Use getParamAlign() instead.
unsigned getParamAlignment(unsigned ArgNo) const {
if (const auto MA = Attrs.getParamAlignment(ArgNo))
return MA->value();
return 0;
}
+ /// Extract the alignment for a call or parameter (0=unknown).
+ MaybeAlign getParamAlign(unsigned ArgNo) const {
+ return Attrs.getParamAlignment(ArgNo);
+ }
+
/// Extract the byval type for a call or parameter.
Type *getParamByValType(unsigned ArgNo) const {
Type *Ty = Attrs.getParamByValType(ArgNo);
@@ -1917,7 +1947,7 @@ public:
/// Is the function attribute S disallowed by some operand bundle on
/// this operand bundle user?
bool isFnAttrDisallowedByOpBundle(StringRef S) const {
- // Operand bundles only possibly disallow readnone, readonly and argmenonly
+ // Operand bundles only possibly disallow readnone, readonly and argmemonly
// attributes. All String attributes are fine.
return false;
}
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/Instruction.def b/contrib/llvm-project/llvm/include/llvm/IR/Instruction.def
index 41cdf613ad64..a5ad92f58f94 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/Instruction.def
+++ b/contrib/llvm-project/llvm/include/llvm/IR/Instruction.def
@@ -217,7 +217,8 @@ HANDLE_OTHER_INST(63, ShuffleVector, ShuffleVectorInst) // shuffle two vectors.
HANDLE_OTHER_INST(64, ExtractValue, ExtractValueInst)// extract from aggregate
HANDLE_OTHER_INST(65, InsertValue, InsertValueInst) // insert into aggregate
HANDLE_OTHER_INST(66, LandingPad, LandingPadInst) // Landing pad instruction.
- LAST_OTHER_INST(66)
+HANDLE_OTHER_INST(67, Freeze, FreezeInst) // Freeze instruction.
+ LAST_OTHER_INST(67)
#undef FIRST_TERM_INST
#undef HANDLE_TERM_INST
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/Instruction.h b/contrib/llvm-project/llvm/include/llvm/IR/Instruction.h
index 803f6977b32c..3bfa0e4afc39 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/Instruction.h
+++ b/contrib/llvm-project/llvm/include/llvm/IR/Instruction.h
@@ -129,7 +129,7 @@ public:
bool isUnaryOp() const { return isUnaryOp(getOpcode()); }
bool isBinaryOp() const { return isBinaryOp(getOpcode()); }
bool isIntDivRem() const { return isIntDivRem(getOpcode()); }
- bool isShift() { return isShift(getOpcode()); }
+ bool isShift() const { return isShift(getOpcode()); }
bool isCast() const { return isCast(getOpcode()); }
bool isFuncletPad() const { return isFuncletPad(getOpcode()); }
bool isExceptionalTerminator() const {
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/Instructions.h b/contrib/llvm-project/llvm/include/llvm/IR/Instructions.h
index fa980df03ef0..b73d5274238c 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/Instructions.h
+++ b/contrib/llvm-project/llvm/include/llvm/IR/Instructions.h
@@ -78,9 +78,9 @@ public:
AllocaInst(Type *Ty, unsigned AddrSpace,
const Twine &Name, BasicBlock *InsertAtEnd);
- AllocaInst(Type *Ty, unsigned AddrSpace, Value *ArraySize, unsigned Align,
+ AllocaInst(Type *Ty, unsigned AddrSpace, Value *ArraySize, MaybeAlign Align,
const Twine &Name = "", Instruction *InsertBefore = nullptr);
- AllocaInst(Type *Ty, unsigned AddrSpace, Value *ArraySize, unsigned Align,
+ AllocaInst(Type *Ty, unsigned AddrSpace, Value *ArraySize, MaybeAlign Align,
const Twine &Name, BasicBlock *InsertAtEnd);
/// Return true if there is an allocation size parameter to the allocation
@@ -239,14 +239,20 @@ public:
}
/// Return the alignment of the access that is being performed.
+ /// FIXME: Remove this function once transition to Align is over.
+ /// Use getAlign() instead.
unsigned getAlignment() const {
- if (const auto MA =
- decodeMaybeAlign((getSubclassDataFromInstruction() >> 1) & 31))
+ if (const auto MA = getAlign())
return MA->value();
return 0;
}
- void setAlignment(MaybeAlign Align);
+ /// Return the alignment of the access that is being performed.
+ MaybeAlign getAlign() const {
+ return decodeMaybeAlign((getSubclassDataFromInstruction() >> 1) & 31);
+ }
+
+ void setAlignment(MaybeAlign Alignment);
/// Returns the ordering constraint of this load instruction.
AtomicOrdering getOrdering() const {
@@ -365,14 +371,19 @@ public:
DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
/// Return the alignment of the access that is being performed
+ /// FIXME: Remove this function once transition to Align is over.
+ /// Use getAlign() instead.
unsigned getAlignment() const {
- if (const auto MA =
- decodeMaybeAlign((getSubclassDataFromInstruction() >> 1) & 31))
+ if (const auto MA = getAlign())
return MA->value();
return 0;
}
- void setAlignment(MaybeAlign Align);
+ MaybeAlign getAlign() const {
+ return decodeMaybeAlign((getSubclassDataFromInstruction() >> 1) & 31);
+ }
+
+ void setAlignment(MaybeAlign Alignment);
/// Returns the ordering constraint of this store instruction.
AtomicOrdering getOrdering() const {
@@ -1039,11 +1050,6 @@ public:
/// Returns the pointer type returned by the GEP
/// instruction, which may be a vector of pointers.
- static Type *getGEPReturnType(Value *Ptr, ArrayRef<Value *> IdxList) {
- return getGEPReturnType(
- cast<PointerType>(Ptr->getType()->getScalarType())->getElementType(),
- Ptr, IdxList);
- }
static Type *getGEPReturnType(Type *ElTy, Value *Ptr,
ArrayRef<Value *> IdxList) {
Type *PtrTy = PointerType::get(checkGEPType(getIndexedType(ElTy, IdxList)),
@@ -4142,13 +4148,9 @@ CallBrInst::CallBrInst(FunctionType *Ty, Value *Func, BasicBlock *DefaultDest,
ArrayRef<Value *> Args,
ArrayRef<OperandBundleDef> Bundles, int NumOperands,
const Twine &NameStr, BasicBlock *InsertAtEnd)
- : CallBase(
- cast<FunctionType>(
- cast<PointerType>(Func->getType())->getElementType())
- ->getReturnType(),
- Instruction::CallBr,
- OperandTraits<CallBase>::op_end(this) - NumOperands, NumOperands,
- InsertAtEnd) {
+ : CallBase(Ty->getReturnType(), Instruction::CallBr,
+ OperandTraits<CallBase>::op_end(this) - NumOperands, NumOperands,
+ InsertAtEnd) {
init(Ty, Func, DefaultDest, IndirectDests, Args, Bundles, NameStr);
}
@@ -5299,6 +5301,35 @@ inline unsigned getLoadStoreAddressSpace(Value *I) {
return cast<StoreInst>(I)->getPointerAddressSpace();
}
+//===----------------------------------------------------------------------===//
+// FreezeInst Class
+//===----------------------------------------------------------------------===//
+
+/// This class represents a freeze function that returns random concrete
+/// value if an operand is either a poison value or an undef value
+class FreezeInst : public UnaryInstruction {
+protected:
+ // Note: Instruction needs to be a friend here to call cloneImpl.
+ friend class Instruction;
+
+ /// Clone an identical FreezeInst
+ FreezeInst *cloneImpl() const;
+
+public:
+ explicit FreezeInst(Value *S,
+ const Twine &NameStr = "",
+ Instruction *InsertBefore = nullptr);
+ FreezeInst(Value *S, const Twine &NameStr, BasicBlock *InsertAtEnd);
+
+ // Methods for support type inquiry through isa, cast, and dyn_cast:
+ static inline bool classof(const Instruction *I) {
+ return I->getOpcode() == Freeze;
+ }
+ static inline bool classof(const Value *V) {
+ return isa<Instruction>(V) && classof(cast<Instruction>(V));
+ }
+};
+
} // end namespace llvm
#endif // LLVM_IR_INSTRUCTIONS_H
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicInst.h b/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicInst.h
index c989b4a2e72a..42a5564a4488 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicInst.h
+++ b/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicInst.h
@@ -25,6 +25,7 @@
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/FPEnv.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Instructions.h"
@@ -208,83 +209,28 @@ namespace llvm {
/// This is the common base class for constrained floating point intrinsics.
class ConstrainedFPIntrinsic : public IntrinsicInst {
public:
- /// Specifies the rounding mode to be assumed. This is only used when
- /// when constrained floating point is enabled. See the LLVM Language
- /// Reference Manual for details.
- enum RoundingMode : uint8_t {
- rmDynamic, ///< This corresponds to "fpround.dynamic".
- rmToNearest, ///< This corresponds to "fpround.tonearest".
- rmDownward, ///< This corresponds to "fpround.downward".
- rmUpward, ///< This corresponds to "fpround.upward".
- rmTowardZero ///< This corresponds to "fpround.tozero".
- };
-
- /// Specifies the required exception behavior. This is only used when
- /// when constrained floating point is used. See the LLVM Language
- /// Reference Manual for details.
- enum ExceptionBehavior : uint8_t {
- ebIgnore, ///< This corresponds to "fpexcept.ignore".
- ebMayTrap, ///< This corresponds to "fpexcept.maytrap".
- ebStrict ///< This corresponds to "fpexcept.strict".
- };
-
bool isUnaryOp() const;
bool isTernaryOp() const;
- Optional<RoundingMode> getRoundingMode() const;
- Optional<ExceptionBehavior> getExceptionBehavior() const;
-
- /// Returns a valid RoundingMode enumerator when given a string
- /// that is valid as input in constrained intrinsic rounding mode
- /// metadata.
- static Optional<RoundingMode> StrToRoundingMode(StringRef);
-
- /// For any RoundingMode enumerator, returns a string valid as input in
- /// constrained intrinsic rounding mode metadata.
- static Optional<StringRef> RoundingModeToStr(RoundingMode);
+ Optional<fp::RoundingMode> getRoundingMode() const;
+ Optional<fp::ExceptionBehavior> getExceptionBehavior() const;
- /// Returns a valid ExceptionBehavior enumerator when given a string
- /// valid as input in constrained intrinsic exception behavior metadata.
- static Optional<ExceptionBehavior> StrToExceptionBehavior(StringRef);
+ // Methods for support type inquiry through isa, cast, and dyn_cast:
+ static bool classof(const IntrinsicInst *I);
+ static bool classof(const Value *V) {
+ return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+ }
+ };
- /// For any ExceptionBehavior enumerator, returns a string valid as
- /// input in constrained intrinsic exception behavior metadata.
- static Optional<StringRef> ExceptionBehaviorToStr(ExceptionBehavior);
+ /// Constrained floating point compare intrinsics.
+ class ConstrainedFPCmpIntrinsic : public ConstrainedFPIntrinsic {
+ public:
+ FCmpInst::Predicate getPredicate() const;
// Methods for support type inquiry through isa, cast, and dyn_cast:
static bool classof(const IntrinsicInst *I) {
switch (I->getIntrinsicID()) {
- case Intrinsic::experimental_constrained_fadd:
- case Intrinsic::experimental_constrained_fsub:
- case Intrinsic::experimental_constrained_fmul:
- case Intrinsic::experimental_constrained_fdiv:
- case Intrinsic::experimental_constrained_frem:
- case Intrinsic::experimental_constrained_fma:
- case Intrinsic::experimental_constrained_fptosi:
- case Intrinsic::experimental_constrained_fptoui:
- case Intrinsic::experimental_constrained_fptrunc:
- case Intrinsic::experimental_constrained_fpext:
- case Intrinsic::experimental_constrained_sqrt:
- case Intrinsic::experimental_constrained_pow:
- case Intrinsic::experimental_constrained_powi:
- case Intrinsic::experimental_constrained_sin:
- case Intrinsic::experimental_constrained_cos:
- case Intrinsic::experimental_constrained_exp:
- case Intrinsic::experimental_constrained_exp2:
- case Intrinsic::experimental_constrained_log:
- case Intrinsic::experimental_constrained_log10:
- case Intrinsic::experimental_constrained_log2:
- case Intrinsic::experimental_constrained_lrint:
- case Intrinsic::experimental_constrained_llrint:
- case Intrinsic::experimental_constrained_rint:
- case Intrinsic::experimental_constrained_nearbyint:
- case Intrinsic::experimental_constrained_maxnum:
- case Intrinsic::experimental_constrained_minnum:
- case Intrinsic::experimental_constrained_ceil:
- case Intrinsic::experimental_constrained_floor:
- case Intrinsic::experimental_constrained_lround:
- case Intrinsic::experimental_constrained_llround:
- case Intrinsic::experimental_constrained_round:
- case Intrinsic::experimental_constrained_trunc:
+ case Intrinsic::experimental_constrained_fcmp:
+ case Intrinsic::experimental_constrained_fcmps:
return true;
default: return false;
}
@@ -402,7 +348,10 @@ namespace llvm {
return cast<PointerType>(getRawDest()->getType())->getAddressSpace();
}
+ /// FIXME: Remove this function once transition to Align is over.
+ /// Use getDestAlign() instead.
unsigned getDestAlignment() const { return getParamAlignment(ARG_DEST); }
+ MaybeAlign getDestAlign() const { return getParamAlign(ARG_DEST); }
/// Set the specified arguments of the instruction.
void setDest(Value *Ptr) {
@@ -411,11 +360,21 @@ namespace llvm {
setArgOperand(ARG_DEST, Ptr);
}
+ /// FIXME: Remove this function once transition to Align is over.
+ /// Use the version that takes MaybeAlign instead of this one.
void setDestAlignment(unsigned Alignment) {
+ setDestAlignment(MaybeAlign(Alignment));
+ }
+ void setDestAlignment(MaybeAlign Alignment) {
+ removeParamAttr(ARG_DEST, Attribute::Alignment);
+ if (Alignment)
+ addParamAttr(ARG_DEST,
+ Attribute::getWithAlignment(getContext(), *Alignment));
+ }
+ void setDestAlignment(Align Alignment) {
removeParamAttr(ARG_DEST, Attribute::Alignment);
- if (Alignment > 0)
- addParamAttr(ARG_DEST, Attribute::getWithAlignment(getContext(),
- Align(Alignment)));
+ addParamAttr(ARG_DEST,
+ Attribute::getWithAlignment(getContext(), Alignment));
}
void setLength(Value *L) {
@@ -450,22 +409,37 @@ namespace llvm {
return cast<PointerType>(getRawSource()->getType())->getAddressSpace();
}
+ /// FIXME: Remove this function once transition to Align is over.
+ /// Use getSourceAlign() instead.
unsigned getSourceAlignment() const {
return BaseCL::getParamAlignment(ARG_SOURCE);
}
+ MaybeAlign getSourceAlign() const {
+ return BaseCL::getParamAlign(ARG_SOURCE);
+ }
+
void setSource(Value *Ptr) {
assert(getRawSource()->getType() == Ptr->getType() &&
"setSource called with pointer of wrong type!");
BaseCL::setArgOperand(ARG_SOURCE, Ptr);
}
+ /// FIXME: Remove this function once transition to Align is over.
+ /// Use the version that takes MaybeAlign instead of this one.
void setSourceAlignment(unsigned Alignment) {
+ setSourceAlignment(MaybeAlign(Alignment));
+ }
+ void setSourceAlignment(MaybeAlign Alignment) {
+ BaseCL::removeParamAttr(ARG_SOURCE, Attribute::Alignment);
+ if (Alignment)
+ BaseCL::addParamAttr(ARG_SOURCE, Attribute::getWithAlignment(
+ BaseCL::getContext(), *Alignment));
+ }
+ void setSourceAlignment(Align Alignment) {
BaseCL::removeParamAttr(ARG_SOURCE, Attribute::Alignment);
- if (Alignment > 0)
- BaseCL::addParamAttr(ARG_SOURCE,
- Attribute::getWithAlignment(BaseCL::getContext(),
- Align(Alignment)));
+ BaseCL::addParamAttr(ARG_SOURCE, Attribute::getWithAlignment(
+ BaseCL::getContext(), Alignment));
}
};
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/Intrinsics.h b/contrib/llvm-project/llvm/include/llvm/IR/Intrinsics.h
index 9e4ebd915afc..58e7725fc0df 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/Intrinsics.h
+++ b/contrib/llvm-project/llvm/include/llvm/IR/Intrinsics.h
@@ -33,14 +33,17 @@ class AttributeList;
/// function known by LLVM. The enum values are returned by
/// Function::getIntrinsicID().
namespace Intrinsic {
- enum ID : unsigned {
- not_intrinsic = 0, // Must be zero
+ // Intrinsic ID type. This is an opaque typedef to facilitate splitting up
+ // the enum into target-specific enums.
+ typedef unsigned ID;
- // Get the intrinsic enums generated from Intrinsics.td
+ enum IndependentIntrinsics : unsigned {
+ not_intrinsic = 0, // Must be zero
+
+ // Get the intrinsic enums generated from Intrinsics.td
#define GET_INTRINSIC_ENUM_VALUES
#include "llvm/IR/IntrinsicEnums.inc"
#undef GET_INTRINSIC_ENUM_VALUES
- , num_intrinsics
};
/// Return the LLVM name for an intrinsic, such as "llvm.ppc.altivec.lvx".
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/Intrinsics.td b/contrib/llvm-project/llvm/include/llvm/IR/Intrinsics.td
index 7a0263f88c2a..865e4ccc9bc4 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/Intrinsics.td
+++ b/contrib/llvm-project/llvm/include/llvm/IR/Intrinsics.td
@@ -582,12 +582,6 @@ def int_maximum : Intrinsic<[llvm_anyfloat_ty],
[IntrNoMem, IntrSpeculatable, IntrWillReturn, Commutative]
>;
-// NOTE: these are internal interfaces.
-def int_setjmp : Intrinsic<[llvm_i32_ty], [llvm_ptr_ty]>;
-def int_longjmp : Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty], [IntrNoReturn]>;
-def int_sigsetjmp : Intrinsic<[llvm_i32_ty] , [llvm_ptr_ty, llvm_i32_ty]>;
-def int_siglongjmp : Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty], [IntrNoReturn]>;
-
// Internal interface for object size checking
def int_objectsize : Intrinsic<[llvm_anyint_ty],
[llvm_anyptr_ty, llvm_i1_ty,
@@ -640,6 +634,16 @@ let IntrProperties = [IntrInaccessibleMemOnly, IntrWillReturn] in {
[ llvm_anyfloat_ty,
llvm_metadata_ty ]>;
+ def int_experimental_constrained_sitofp : Intrinsic<[ llvm_anyfloat_ty ],
+ [ llvm_anyint_ty,
+ llvm_metadata_ty,
+ llvm_metadata_ty ]>;
+
+ def int_experimental_constrained_uitofp : Intrinsic<[ llvm_anyfloat_ty ],
+ [ llvm_anyint_ty,
+ llvm_metadata_ty,
+ llvm_metadata_ty ]>;
+
def int_experimental_constrained_fptrunc : Intrinsic<[ llvm_anyfloat_ty ],
[ llvm_anyfloat_ty,
llvm_metadata_ty,
@@ -714,20 +718,24 @@ let IntrProperties = [IntrInaccessibleMemOnly, IntrWillReturn] in {
def int_experimental_constrained_maxnum : Intrinsic<[ llvm_anyfloat_ty ],
[ LLVMMatchType<0>,
LLVMMatchType<0>,
- llvm_metadata_ty,
llvm_metadata_ty ]>;
def int_experimental_constrained_minnum : Intrinsic<[ llvm_anyfloat_ty ],
[ LLVMMatchType<0>,
LLVMMatchType<0>,
- llvm_metadata_ty,
llvm_metadata_ty ]>;
+ def int_experimental_constrained_maximum : Intrinsic<[ llvm_anyfloat_ty ],
+ [ LLVMMatchType<0>,
+ LLVMMatchType<0>,
+ llvm_metadata_ty ]>;
+ def int_experimental_constrained_minimum : Intrinsic<[ llvm_anyfloat_ty ],
+ [ LLVMMatchType<0>,
+ LLVMMatchType<0>,
+ llvm_metadata_ty ]>;
def int_experimental_constrained_ceil : Intrinsic<[ llvm_anyfloat_ty ],
[ LLVMMatchType<0>,
- llvm_metadata_ty,
llvm_metadata_ty ]>;
def int_experimental_constrained_floor : Intrinsic<[ llvm_anyfloat_ty ],
[ LLVMMatchType<0>,
- llvm_metadata_ty,
llvm_metadata_ty ]>;
def int_experimental_constrained_lround : Intrinsic<[ llvm_anyint_ty ],
[ llvm_anyfloat_ty,
@@ -737,14 +745,22 @@ let IntrProperties = [IntrInaccessibleMemOnly, IntrWillReturn] in {
llvm_metadata_ty ]>;
def int_experimental_constrained_round : Intrinsic<[ llvm_anyfloat_ty ],
[ LLVMMatchType<0>,
- llvm_metadata_ty,
llvm_metadata_ty ]>;
def int_experimental_constrained_trunc : Intrinsic<[ llvm_anyfloat_ty ],
[ LLVMMatchType<0>,
- llvm_metadata_ty,
llvm_metadata_ty ]>;
+
+ // Constrained floating-point comparison (quiet and signaling variants).
+ // Third operand is the predicate represented as a metadata string.
+ def int_experimental_constrained_fcmp
+ : Intrinsic<[ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty> ],
+ [ llvm_anyfloat_ty, LLVMMatchType<0>,
+ llvm_metadata_ty, llvm_metadata_ty ]>;
+ def int_experimental_constrained_fcmps
+ : Intrinsic<[ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty> ],
+ [ llvm_anyfloat_ty, LLVMMatchType<0>,
+ llvm_metadata_ty, llvm_metadata_ty ]>;
}
-// FIXME: Add intrinsic for fcmp.
// FIXME: Consider maybe adding intrinsics for sitofp, uitofp.
//===------------------------- Expect Intrinsics --------------------------===//
@@ -914,6 +930,14 @@ def int_umul_fix : Intrinsic<[llvm_anyint_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty],
[IntrNoMem, IntrSpeculatable, IntrWillReturn, Commutative, ImmArg<2>]>;
+def int_sdiv_fix : Intrinsic<[llvm_anyint_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty],
+ [IntrNoMem, ImmArg<2>]>;
+
+def int_udiv_fix : Intrinsic<[llvm_anyint_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty],
+ [IntrNoMem, ImmArg<2>]>;
+
//===------------------- Fixed Point Saturation Arithmetic Intrinsics ----------------===//
//
def int_smul_fix_sat : Intrinsic<[llvm_anyint_ty],
@@ -1070,7 +1094,7 @@ def int_experimental_guard : Intrinsic<[], [llvm_i1_ty, llvm_vararg_ty],
// Supports widenable conditions for guards represented as explicit branches.
def int_experimental_widenable_condition : Intrinsic<[llvm_i1_ty], [],
- [IntrInaccessibleMemOnly, IntrWillReturn]>;
+ [IntrInaccessibleMemOnly, IntrWillReturn, IntrSpeculatable]>;
// NOP: calls/invokes to this intrinsic are removed by codegen
def int_donothing : Intrinsic<[], [], [IntrNoMem, IntrWillReturn]>;
@@ -1232,6 +1256,42 @@ let IntrProperties = [IntrNoMem, IntrWillReturn] in {
[llvm_anyvector_ty]>;
}
+//===----- Matrix intrinsics ---------------------------------------------===//
+
+def int_matrix_transpose : Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>,
+ llvm_i32_ty,
+ llvm_i32_ty],
+ [IntrNoMem, IntrSpeculatable,
+ IntrWillReturn, ImmArg<1>, ImmArg<2>]>;
+
+def int_matrix_multiply : Intrinsic<[llvm_anyvector_ty],
+ [llvm_anyvector_ty,
+ llvm_anyvector_ty,
+ llvm_i32_ty,
+ llvm_i32_ty,
+ llvm_i32_ty],
+ [IntrNoMem, IntrSpeculatable,
+ IntrWillReturn, ImmArg<2>, ImmArg<3>,
+ ImmArg<4>]>;
+
+def int_matrix_columnwise_load : Intrinsic<[llvm_anyvector_ty],
+ [LLVMAnyPointerType<LLVMMatchType<0>>,
+ llvm_i32_ty,
+ llvm_i32_ty,
+ llvm_i32_ty],
+ [IntrReadMem, IntrWillReturn,
+ ImmArg<2>, ImmArg<3>]>;
+
+def int_matrix_columnwise_store : Intrinsic<[],
+ [llvm_anyvector_ty,
+ LLVMAnyPointerType<LLVMMatchType<0>>,
+ llvm_i32_ty,
+ llvm_i32_ty,
+ llvm_i32_ty],
+ [WriteOnly<1>, IntrWillReturn,
+ ImmArg<3>, ImmArg<4>]>;
+
//===---------- Intrinsics to control hardware supported loops ----------===//
// Specify that the value given is the number of iterations that the next loop
@@ -1254,7 +1314,9 @@ def int_loop_decrement :
// maximum number of elements processed in an iteration). Return the remaining
// number of iterations still to be executed. This is effectively a sub which
// can be used with a phi, icmp and br to control the number of iterations
-// executed, as usual.
+// executed, as usual. Any optimisations are allowed to treat it is a sub, and
+// it's scevable, so it's the backends responsibility to handle cases where it
+// may be optimised.
def int_loop_decrement_reg :
Intrinsic<[llvm_anyint_ty],
[llvm_anyint_ty, llvm_anyint_ty], [IntrNoDuplicate]>;
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsAArch64.td b/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsAArch64.td
index db01700f409f..27a2550d1857 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -33,6 +33,9 @@ def int_aarch64_udiv : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
def int_aarch64_fjcvtzs : Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
+def int_aarch64_cls: Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
+def int_aarch64_cls64: Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem]>;
+
//===----------------------------------------------------------------------===//
// HINT
@@ -443,6 +446,10 @@ let TargetPrefix = "aarch64", IntrProperties = [IntrNoMem] in {
def int_aarch64_neon_fmlsl : AdvSIMD_FP16FML_Intrinsic;
def int_aarch64_neon_fmlal2 : AdvSIMD_FP16FML_Intrinsic;
def int_aarch64_neon_fmlsl2 : AdvSIMD_FP16FML_Intrinsic;
+
+ // v8.3-A Floating-point complex add
+ def int_aarch64_neon_vcadd_rot90 : AdvSIMD_2VectorArg_Intrinsic;
+ def int_aarch64_neon_vcadd_rot270 : AdvSIMD_2VectorArg_Intrinsic;
}
let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
@@ -761,6 +768,20 @@ def llvm_nxv4f32_ty : LLVMType<nxv4f32>;
def llvm_nxv2f64_ty : LLVMType<nxv2f64>;
let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
+
+ class AdvSIMD_1Vec_PredLoad_Intrinsic
+ : Intrinsic<[llvm_anyvector_ty],
+ [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ LLVMPointerTo<0>],
+ [IntrReadMem, IntrArgMemOnly]>;
+
+ class AdvSIMD_1Vec_PredStore_Intrinsic
+ : Intrinsic<[],
+ [llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ LLVMPointerTo<0>],
+ [IntrArgMemOnly, NoCapture<2>]>;
+
class AdvSIMD_Merged1VectorArg_Intrinsic
: Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>,
@@ -768,6 +789,79 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
LLVMMatchType<0>],
[IntrNoMem]>;
+ class AdvSIMD_2VectorArgIndexed_Intrinsic
+ : Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>,
+ LLVMMatchType<0>,
+ llvm_i32_ty],
+ [IntrNoMem]>;
+
+ class AdvSIMD_3VectorArgIndexed_Intrinsic
+ : Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>,
+ LLVMMatchType<0>,
+ LLVMMatchType<0>,
+ llvm_i32_ty],
+ [IntrNoMem]>;
+
+ class AdvSIMD_Pred1VectorArg_Intrinsic
+ : Intrinsic<[llvm_anyvector_ty],
+ [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ LLVMMatchType<0>],
+ [IntrNoMem]>;
+
+ class AdvSIMD_Pred2VectorArg_Intrinsic
+ : Intrinsic<[llvm_anyvector_ty],
+ [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ LLVMMatchType<0>,
+ LLVMMatchType<0>],
+ [IntrNoMem]>;
+
+ class AdvSIMD_Pred3VectorArg_Intrinsic
+ : Intrinsic<[llvm_anyvector_ty],
+ [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ LLVMMatchType<0>,
+ LLVMMatchType<0>,
+ LLVMMatchType<0>],
+ [IntrNoMem]>;
+
+ class AdvSIMD_SVE_Compare_Intrinsic
+ : Intrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
+ [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_anyvector_ty,
+ LLVMMatchType<0>],
+ [IntrNoMem]>;
+
+ class AdvSIMD_SVE_CompareWide_Intrinsic
+ : Intrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
+ [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_anyvector_ty,
+ llvm_nxv2i64_ty],
+ [IntrNoMem]>;
+
+ class AdvSIMD_SVE_Saturating_Intrinsic
+ : Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
+ [IntrNoMem]>;
+
+ class AdvSIMD_SVE_SaturatingWithPattern_Intrinsic
+ : Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>,
+ llvm_i32_ty,
+ llvm_i32_ty],
+ [IntrNoMem, ImmArg<1>, ImmArg<2>]>;
+
+ class AdvSIMD_SVE_Saturating_N_Intrinsic<LLVMType T>
+ : Intrinsic<[T],
+ [T, llvm_anyvector_ty],
+ [IntrNoMem]>;
+
+ class AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic<LLVMType T>
+ : Intrinsic<[T],
+ [T, llvm_i32_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<1>, ImmArg<2>]>;
+
class AdvSIMD_SVE_CNT_Intrinsic
: Intrinsic<[LLVMVectorOfBitcastsToInt<0>],
[LLVMVectorOfBitcastsToInt<0>,
@@ -775,16 +869,136 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
llvm_anyvector_ty],
[IntrNoMem]>;
+ class AdvSIMD_SVE_FP_Reduce_Intrinsic
+ : Intrinsic<[llvm_anyfloat_ty],
+ [LLVMScalarOrSameVectorWidth<1, llvm_i1_ty>,
+ llvm_anyvector_ty],
+ [IntrNoMem]>;
+
+ class AdvSIMD_SVE_ReduceWithInit_Intrinsic
+ : Intrinsic<[LLVMVectorElementType<0>],
+ [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ LLVMVectorElementType<0>,
+ llvm_anyvector_ty],
+ [IntrNoMem]>;
+
+ class AdvSIMD_SVE_FP_ReduceWithInit_Intrinsic
+ : Intrinsic<[llvm_anyfloat_ty],
+ [LLVMScalarOrSameVectorWidth<1, llvm_i1_ty>,
+ LLVMMatchType<0>,
+ llvm_anyvector_ty],
+ [IntrNoMem]>;
+
+ class AdvSIMD_SVE_ShiftByImm_Intrinsic
+ : Intrinsic<[llvm_anyvector_ty],
+ [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ LLVMMatchType<0>,
+ llvm_i32_ty],
+ [IntrNoMem]>;
+
+ class AdvSIMD_SVE_ShiftWide_Intrinsic
+ : Intrinsic<[llvm_anyvector_ty],
+ [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ LLVMMatchType<0>,
+ llvm_nxv2i64_ty],
+ [IntrNoMem]>;
+
class AdvSIMD_SVE_Unpack_Intrinsic
- : Intrinsic<[llvm_anyvector_ty],
+ : Intrinsic<[llvm_anyvector_ty],
[LLVMSubdivide2VectorType<0>],
[IntrNoMem]>;
+ class AdvSIMD_SVE_CADD_Intrinsic
+ : Intrinsic<[llvm_anyvector_ty],
+ [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ LLVMMatchType<0>,
+ LLVMMatchType<0>,
+ llvm_i32_ty],
+ [IntrNoMem]>;
+
+ class AdvSIMD_SVE_CMLA_Intrinsic
+ : Intrinsic<[llvm_anyvector_ty],
+ [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ LLVMMatchType<0>,
+ LLVMMatchType<0>,
+ LLVMMatchType<0>,
+ llvm_i32_ty],
+ [IntrNoMem]>;
+
+ class AdvSIMD_SVE_CMLA_LANE_Intrinsic
+ : Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>,
+ LLVMMatchType<0>,
+ LLVMMatchType<0>,
+ llvm_i32_ty,
+ llvm_i32_ty],
+ [IntrNoMem]>;
+
+ class AdvSIMD_SVE_EXPA_Intrinsic
+ : Intrinsic<[llvm_anyvector_ty],
+ [LLVMVectorOfBitcastsToInt<0>],
+ [IntrNoMem]>;
+
+ class AdvSIMD_SVE_FCVT_Intrinsic
+ : Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_anyvector_ty],
+ [IntrNoMem]>;
+
+ class AdvSIMD_SVE_FCVTZS_Intrinsic
+ : Intrinsic<[llvm_anyvector_ty],
+ [LLVMVectorOfBitcastsToInt<0>,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_anyvector_ty],
+ [IntrNoMem]>;
+
+ class AdvSIMD_SVE_INSR_Intrinsic
+ : Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>,
+ LLVMVectorElementType<0>],
+ [IntrNoMem]>;
+
+ class AdvSIMD_SVE_PTRUE_Intrinsic
+ : Intrinsic<[llvm_anyvector_ty],
+ [llvm_i32_ty],
+ [IntrNoMem, ImmArg<0>]>;
+
class AdvSIMD_SVE_PUNPKHI_Intrinsic
: Intrinsic<[LLVMHalfElementsVectorType<0>],
[llvm_anyvector_ty],
[IntrNoMem]>;
+ class AdvSIMD_SVE_SCALE_Intrinsic
+ : Intrinsic<[llvm_anyvector_ty],
+ [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ LLVMMatchType<0>,
+ LLVMVectorOfBitcastsToInt<0>],
+ [IntrNoMem]>;
+
+ class AdvSIMD_SVE_SCVTF_Intrinsic
+ : Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_anyvector_ty],
+ [IntrNoMem]>;
+
+ class AdvSIMD_SVE_TSMUL_Intrinsic
+ : Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>,
+ LLVMVectorOfBitcastsToInt<0>],
+ [IntrNoMem]>;
+
+ class AdvSIMD_SVE_CNTB_Intrinsic
+ : Intrinsic<[llvm_i64_ty],
+ [llvm_i32_ty],
+ [IntrNoMem, ImmArg<0>]>;
+
+ class AdvSIMD_SVE_CNTP_Intrinsic
+ : Intrinsic<[llvm_i64_ty],
+ [llvm_anyvector_ty, LLVMMatchType<0>],
+ [IntrNoMem]>;
+
class AdvSIMD_SVE_DOT_Intrinsic
: Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>,
@@ -800,6 +1014,70 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
llvm_i32_ty],
[IntrNoMem]>;
+ class AdvSIMD_SVE_PTEST_Intrinsic
+ : Intrinsic<[llvm_i1_ty],
+ [llvm_anyvector_ty,
+ LLVMMatchType<0>],
+ [IntrNoMem]>;
+
+ class AdvSIMD_SVE_TBL_Intrinsic
+ : Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>,
+ LLVMVectorOfBitcastsToInt<0>],
+ [IntrNoMem]>;
+
+ class SVE2_3VectorArg_Long_Intrinsic
+ : Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>,
+ LLVMSubdivide2VectorType<0>,
+ LLVMSubdivide2VectorType<0>],
+ [IntrNoMem]>;
+
+ class SVE2_3VectorArgIndexed_Long_Intrinsic
+ : Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>,
+ LLVMSubdivide2VectorType<0>,
+ LLVMSubdivide2VectorType<0>,
+ llvm_i32_ty],
+ [IntrNoMem]>;
+
+ class SVE2_1VectorArg_Narrowing_Intrinsic
+ : Intrinsic<[LLVMSubdivide2VectorType<0>],
+ [llvm_anyvector_ty],
+ [IntrNoMem]>;
+
+ class SVE2_Merged1VectorArg_Narrowing_Intrinsic
+ : Intrinsic<[LLVMSubdivide2VectorType<0>],
+ [LLVMSubdivide2VectorType<0>,
+ llvm_anyvector_ty],
+ [IntrNoMem]>;
+ class SVE2_2VectorArg_Narrowing_Intrinsic
+ : Intrinsic<
+ [LLVMSubdivide2VectorType<0>],
+ [llvm_anyvector_ty, LLVMMatchType<0>],
+ [IntrNoMem]>;
+
+ class SVE2_Merged2VectorArg_Narrowing_Intrinsic
+ : Intrinsic<
+ [LLVMSubdivide2VectorType<0>],
+ [LLVMSubdivide2VectorType<0>, llvm_anyvector_ty, LLVMMatchType<0>],
+ [IntrNoMem]>;
+
+ class SVE2_1VectorArg_Imm_Narrowing_Intrinsic
+ : Intrinsic<[LLVMSubdivide2VectorType<0>],
+ [llvm_anyvector_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<1>]>;
+
+ class SVE2_2VectorArg_Imm_Narrowing_Intrinsic
+ : Intrinsic<[LLVMSubdivide2VectorType<0>],
+ [LLVMSubdivide2VectorType<0>, llvm_anyvector_ty,
+ llvm_i32_ty],
+ [IntrNoMem, ImmArg<2>]>;
+
+ // NOTE: There is no relationship between these intrinsics beyond an attempt
+ // to reuse currently identical class definitions.
+ class AdvSIMD_SVE_LOGB_Intrinsic : AdvSIMD_SVE_CNT_Intrinsic;
+
// This class of intrinsics are not intended to be useful within LLVM IR but
// are instead here to support some of the more regid parts of the ACLE.
class Builtin_SVCVT<string name, LLVMType OUT, LLVMType IN>
@@ -812,10 +1090,132 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
+class AdvSIMD_SVE_Reduce_Intrinsic
+ : Intrinsic<[LLVMVectorElementType<0>],
+ [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_anyvector_ty],
+ [IntrNoMem]>;
+
+class AdvSIMD_SVE_SADDV_Reduce_Intrinsic
+ : Intrinsic<[llvm_i64_ty],
+ [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_anyvector_ty],
+ [IntrNoMem]>;
+
+class AdvSIMD_SVE_WHILE_Intrinsic
+ : Intrinsic<[llvm_anyvector_ty],
+ [llvm_anyint_ty, LLVMMatchType<1>],
+ [IntrNoMem]>;
+
+class AdvSIMD_GatherLoad_64bitOffset_Intrinsic
+ : Intrinsic<[llvm_anyvector_ty],
+ [
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ LLVMPointerToElt<0>,
+ LLVMScalarOrSameVectorWidth<0, llvm_i64_ty>
+ ],
+ [IntrReadMem, IntrArgMemOnly]>;
+
+class AdvSIMD_GatherLoad_32bitOffset_Intrinsic
+ : Intrinsic<[llvm_anyvector_ty],
+ [
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ LLVMPointerToElt<0>,
+ LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>
+ ],
+ [IntrReadMem, IntrArgMemOnly]>;
+
+class AdvSIMD_GatherLoad_VecTorBase_Intrinsic
+ : Intrinsic<[llvm_anyvector_ty],
+ [
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_anyvector_ty,
+ llvm_i64_ty
+ ],
+ [IntrReadMem, IntrArgMemOnly]>;
+
+class AdvSIMD_ScatterStore_64bitOffset_Intrinsic
+ : Intrinsic<[],
+ [
+ llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ LLVMPointerToElt<0>,
+ LLVMScalarOrSameVectorWidth<0, llvm_i64_ty>
+ ],
+ [IntrWriteMem, IntrArgMemOnly]>;
+
+class AdvSIMD_ScatterStore_32bitOffset_Intrinsic
+ : Intrinsic<[],
+ [
+ llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ LLVMPointerToElt<0>,
+ LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>
+ ],
+ [IntrWriteMem, IntrArgMemOnly]>;
+
+class AdvSIMD_ScatterStore_VectorBase_Intrinsic
+ : Intrinsic<[],
+ [
+ llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_anyvector_ty, llvm_i64_ty
+ ],
+ [IntrWriteMem, IntrArgMemOnly, ImmArg<3>]>;
+
+//
+// Loads
+//
+
+def int_aarch64_sve_ldnt1 : AdvSIMD_1Vec_PredLoad_Intrinsic;
+
+//
+// Stores
+//
+
+def int_aarch64_sve_stnt1 : AdvSIMD_1Vec_PredStore_Intrinsic;
+
//
// Integer arithmetic
//
+def int_aarch64_sve_add : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_sub : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_subr : AdvSIMD_Pred2VectorArg_Intrinsic;
+
+def int_aarch64_sve_mul : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_smulh : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_umulh : AdvSIMD_Pred2VectorArg_Intrinsic;
+
+def int_aarch64_sve_sdiv : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_udiv : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_sdivr : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_udivr : AdvSIMD_Pred2VectorArg_Intrinsic;
+
+def int_aarch64_sve_smax : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_umax : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_smin : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_umin : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_sabd : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_uabd : AdvSIMD_Pred2VectorArg_Intrinsic;
+
+def int_aarch64_sve_mad : AdvSIMD_Pred3VectorArg_Intrinsic;
+def int_aarch64_sve_msb : AdvSIMD_Pred3VectorArg_Intrinsic;
+def int_aarch64_sve_mla : AdvSIMD_Pred3VectorArg_Intrinsic;
+def int_aarch64_sve_mls : AdvSIMD_Pred3VectorArg_Intrinsic;
+
+def int_aarch64_sve_saddv : AdvSIMD_SVE_SADDV_Reduce_Intrinsic;
+def int_aarch64_sve_uaddv : AdvSIMD_SVE_SADDV_Reduce_Intrinsic;
+
+def int_aarch64_sve_smaxv : AdvSIMD_SVE_Reduce_Intrinsic;
+def int_aarch64_sve_umaxv : AdvSIMD_SVE_Reduce_Intrinsic;
+def int_aarch64_sve_sminv : AdvSIMD_SVE_Reduce_Intrinsic;
+def int_aarch64_sve_uminv : AdvSIMD_SVE_Reduce_Intrinsic;
+
+def int_aarch64_sve_orv : AdvSIMD_SVE_Reduce_Intrinsic;
+def int_aarch64_sve_eorv : AdvSIMD_SVE_Reduce_Intrinsic;
+def int_aarch64_sve_andv : AdvSIMD_SVE_Reduce_Intrinsic;
+
def int_aarch64_sve_abs : AdvSIMD_Merged1VectorArg_Intrinsic;
def int_aarch64_sve_neg : AdvSIMD_Merged1VectorArg_Intrinsic;
@@ -825,32 +1225,480 @@ def int_aarch64_sve_sdot_lane : AdvSIMD_SVE_DOT_Indexed_Intrinsic;
def int_aarch64_sve_udot : AdvSIMD_SVE_DOT_Intrinsic;
def int_aarch64_sve_udot_lane : AdvSIMD_SVE_DOT_Indexed_Intrinsic;
+// Shifts
+
+def int_aarch64_sve_asr : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_asr_wide : AdvSIMD_SVE_ShiftWide_Intrinsic;
+def int_aarch64_sve_asrd : AdvSIMD_SVE_ShiftByImm_Intrinsic;
+def int_aarch64_sve_insr : AdvSIMD_SVE_INSR_Intrinsic;
+def int_aarch64_sve_lsl : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_lsl_wide : AdvSIMD_SVE_ShiftWide_Intrinsic;
+def int_aarch64_sve_lsr : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_lsr_wide : AdvSIMD_SVE_ShiftWide_Intrinsic;
+
+//
+// Integer comparisons
+//
+
+def int_aarch64_sve_cmpeq : AdvSIMD_SVE_Compare_Intrinsic;
+def int_aarch64_sve_cmpge : AdvSIMD_SVE_Compare_Intrinsic;
+def int_aarch64_sve_cmpgt : AdvSIMD_SVE_Compare_Intrinsic;
+def int_aarch64_sve_cmphi : AdvSIMD_SVE_Compare_Intrinsic;
+def int_aarch64_sve_cmphs : AdvSIMD_SVE_Compare_Intrinsic;
+def int_aarch64_sve_cmpne : AdvSIMD_SVE_Compare_Intrinsic;
+
+def int_aarch64_sve_cmpeq_wide : AdvSIMD_SVE_CompareWide_Intrinsic;
+def int_aarch64_sve_cmpge_wide : AdvSIMD_SVE_CompareWide_Intrinsic;
+def int_aarch64_sve_cmpgt_wide : AdvSIMD_SVE_CompareWide_Intrinsic;
+def int_aarch64_sve_cmphi_wide : AdvSIMD_SVE_CompareWide_Intrinsic;
+def int_aarch64_sve_cmphs_wide : AdvSIMD_SVE_CompareWide_Intrinsic;
+def int_aarch64_sve_cmple_wide : AdvSIMD_SVE_CompareWide_Intrinsic;
+def int_aarch64_sve_cmplo_wide : AdvSIMD_SVE_CompareWide_Intrinsic;
+def int_aarch64_sve_cmpls_wide : AdvSIMD_SVE_CompareWide_Intrinsic;
+def int_aarch64_sve_cmplt_wide : AdvSIMD_SVE_CompareWide_Intrinsic;
+def int_aarch64_sve_cmpne_wide : AdvSIMD_SVE_CompareWide_Intrinsic;
+
//
// Counting bits
//
+def int_aarch64_sve_cls : AdvSIMD_Merged1VectorArg_Intrinsic;
+def int_aarch64_sve_clz : AdvSIMD_Merged1VectorArg_Intrinsic;
def int_aarch64_sve_cnt : AdvSIMD_SVE_CNT_Intrinsic;
//
+// Counting elements
+//
+
+def int_aarch64_sve_cntb : AdvSIMD_SVE_CNTB_Intrinsic;
+def int_aarch64_sve_cnth : AdvSIMD_SVE_CNTB_Intrinsic;
+def int_aarch64_sve_cntw : AdvSIMD_SVE_CNTB_Intrinsic;
+def int_aarch64_sve_cntd : AdvSIMD_SVE_CNTB_Intrinsic;
+
+def int_aarch64_sve_cntp : AdvSIMD_SVE_CNTP_Intrinsic;
+
+//
+// Saturating scalar arithmetic
+//
+
+def int_aarch64_sve_sqdech : AdvSIMD_SVE_SaturatingWithPattern_Intrinsic;
+def int_aarch64_sve_sqdecw : AdvSIMD_SVE_SaturatingWithPattern_Intrinsic;
+def int_aarch64_sve_sqdecd : AdvSIMD_SVE_SaturatingWithPattern_Intrinsic;
+def int_aarch64_sve_sqdecp : AdvSIMD_SVE_Saturating_Intrinsic;
+
+def int_aarch64_sve_sqdecb_n32 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic<llvm_i32_ty>;
+def int_aarch64_sve_sqdecb_n64 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic<llvm_i64_ty>;
+def int_aarch64_sve_sqdech_n32 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic<llvm_i32_ty>;
+def int_aarch64_sve_sqdech_n64 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic<llvm_i64_ty>;
+def int_aarch64_sve_sqdecw_n32 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic<llvm_i32_ty>;
+def int_aarch64_sve_sqdecw_n64 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic<llvm_i64_ty>;
+def int_aarch64_sve_sqdecd_n32 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic<llvm_i32_ty>;
+def int_aarch64_sve_sqdecd_n64 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic<llvm_i64_ty>;
+def int_aarch64_sve_sqdecp_n32 : AdvSIMD_SVE_Saturating_N_Intrinsic<llvm_i32_ty>;
+def int_aarch64_sve_sqdecp_n64 : AdvSIMD_SVE_Saturating_N_Intrinsic<llvm_i64_ty>;
+
+def int_aarch64_sve_sqinch : AdvSIMD_SVE_SaturatingWithPattern_Intrinsic;
+def int_aarch64_sve_sqincw : AdvSIMD_SVE_SaturatingWithPattern_Intrinsic;
+def int_aarch64_sve_sqincd : AdvSIMD_SVE_SaturatingWithPattern_Intrinsic;
+def int_aarch64_sve_sqincp : AdvSIMD_SVE_Saturating_Intrinsic;
+
+def int_aarch64_sve_sqincb_n32 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic<llvm_i32_ty>;
+def int_aarch64_sve_sqincb_n64 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic<llvm_i64_ty>;
+def int_aarch64_sve_sqinch_n32 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic<llvm_i32_ty>;
+def int_aarch64_sve_sqinch_n64 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic<llvm_i64_ty>;
+def int_aarch64_sve_sqincw_n32 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic<llvm_i32_ty>;
+def int_aarch64_sve_sqincw_n64 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic<llvm_i64_ty>;
+def int_aarch64_sve_sqincd_n32 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic<llvm_i32_ty>;
+def int_aarch64_sve_sqincd_n64 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic<llvm_i64_ty>;
+def int_aarch64_sve_sqincp_n32 : AdvSIMD_SVE_Saturating_N_Intrinsic<llvm_i32_ty>;
+def int_aarch64_sve_sqincp_n64 : AdvSIMD_SVE_Saturating_N_Intrinsic<llvm_i64_ty>;
+
+def int_aarch64_sve_uqdech : AdvSIMD_SVE_SaturatingWithPattern_Intrinsic;
+def int_aarch64_sve_uqdecw : AdvSIMD_SVE_SaturatingWithPattern_Intrinsic;
+def int_aarch64_sve_uqdecd : AdvSIMD_SVE_SaturatingWithPattern_Intrinsic;
+def int_aarch64_sve_uqdecp : AdvSIMD_SVE_Saturating_Intrinsic;
+
+def int_aarch64_sve_uqdecb_n32 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic<llvm_i32_ty>;
+def int_aarch64_sve_uqdecb_n64 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic<llvm_i64_ty>;
+def int_aarch64_sve_uqdech_n32 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic<llvm_i32_ty>;
+def int_aarch64_sve_uqdech_n64 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic<llvm_i64_ty>;
+def int_aarch64_sve_uqdecw_n32 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic<llvm_i32_ty>;
+def int_aarch64_sve_uqdecw_n64 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic<llvm_i64_ty>;
+def int_aarch64_sve_uqdecd_n32 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic<llvm_i32_ty>;
+def int_aarch64_sve_uqdecd_n64 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic<llvm_i64_ty>;
+def int_aarch64_sve_uqdecp_n32 : AdvSIMD_SVE_Saturating_N_Intrinsic<llvm_i32_ty>;
+def int_aarch64_sve_uqdecp_n64 : AdvSIMD_SVE_Saturating_N_Intrinsic<llvm_i64_ty>;
+
+def int_aarch64_sve_uqinch : AdvSIMD_SVE_SaturatingWithPattern_Intrinsic;
+def int_aarch64_sve_uqincw : AdvSIMD_SVE_SaturatingWithPattern_Intrinsic;
+def int_aarch64_sve_uqincd : AdvSIMD_SVE_SaturatingWithPattern_Intrinsic;
+def int_aarch64_sve_uqincp : AdvSIMD_SVE_Saturating_Intrinsic;
+
+def int_aarch64_sve_uqincb_n32 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic<llvm_i32_ty>;
+def int_aarch64_sve_uqincb_n64 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic<llvm_i64_ty>;
+def int_aarch64_sve_uqinch_n32 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic<llvm_i32_ty>;
+def int_aarch64_sve_uqinch_n64 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic<llvm_i64_ty>;
+def int_aarch64_sve_uqincw_n32 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic<llvm_i32_ty>;
+def int_aarch64_sve_uqincw_n64 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic<llvm_i64_ty>;
+def int_aarch64_sve_uqincd_n32 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic<llvm_i32_ty>;
+def int_aarch64_sve_uqincd_n64 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic<llvm_i64_ty>;
+def int_aarch64_sve_uqincp_n32 : AdvSIMD_SVE_Saturating_N_Intrinsic<llvm_i32_ty>;
+def int_aarch64_sve_uqincp_n64 : AdvSIMD_SVE_Saturating_N_Intrinsic<llvm_i64_ty>;
+
+//
+// Reversal
+//
+
+def int_aarch64_sve_rbit : AdvSIMD_Merged1VectorArg_Intrinsic;
+def int_aarch64_sve_revb : AdvSIMD_Merged1VectorArg_Intrinsic;
+def int_aarch64_sve_revh : AdvSIMD_Merged1VectorArg_Intrinsic;
+def int_aarch64_sve_revw : AdvSIMD_Merged1VectorArg_Intrinsic;
+
+//
// Permutations and selection
//
+def int_aarch64_sve_clasta : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_clasta_n : AdvSIMD_SVE_ReduceWithInit_Intrinsic;
+def int_aarch64_sve_clastb : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_clastb_n : AdvSIMD_SVE_ReduceWithInit_Intrinsic;
+def int_aarch64_sve_compact : AdvSIMD_Pred1VectorArg_Intrinsic;
+def int_aarch64_sve_ext : AdvSIMD_2VectorArgIndexed_Intrinsic;
+def int_aarch64_sve_lasta : AdvSIMD_SVE_Reduce_Intrinsic;
+def int_aarch64_sve_lastb : AdvSIMD_SVE_Reduce_Intrinsic;
+def int_aarch64_sve_rev : AdvSIMD_1VectorArg_Intrinsic;
+def int_aarch64_sve_splice : AdvSIMD_Pred2VectorArg_Intrinsic;
def int_aarch64_sve_sunpkhi : AdvSIMD_SVE_Unpack_Intrinsic;
def int_aarch64_sve_sunpklo : AdvSIMD_SVE_Unpack_Intrinsic;
-
+def int_aarch64_sve_tbl : AdvSIMD_SVE_TBL_Intrinsic;
+def int_aarch64_sve_trn1 : AdvSIMD_2VectorArg_Intrinsic;
+def int_aarch64_sve_trn2 : AdvSIMD_2VectorArg_Intrinsic;
def int_aarch64_sve_uunpkhi : AdvSIMD_SVE_Unpack_Intrinsic;
def int_aarch64_sve_uunpklo : AdvSIMD_SVE_Unpack_Intrinsic;
+def int_aarch64_sve_uzp1 : AdvSIMD_2VectorArg_Intrinsic;
+def int_aarch64_sve_uzp2 : AdvSIMD_2VectorArg_Intrinsic;
+def int_aarch64_sve_zip1 : AdvSIMD_2VectorArg_Intrinsic;
+def int_aarch64_sve_zip2 : AdvSIMD_2VectorArg_Intrinsic;
+
+//
+// Logical operations
+//
+
+def int_aarch64_sve_and : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_bic : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_cnot : AdvSIMD_Merged1VectorArg_Intrinsic;
+def int_aarch64_sve_eor : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_not : AdvSIMD_Merged1VectorArg_Intrinsic;
+def int_aarch64_sve_orr : AdvSIMD_Pred2VectorArg_Intrinsic;
+
+//
+// Conversion
+//
+
+def int_aarch64_sve_sxtb : AdvSIMD_Merged1VectorArg_Intrinsic;
+def int_aarch64_sve_sxth : AdvSIMD_Merged1VectorArg_Intrinsic;
+def int_aarch64_sve_sxtw : AdvSIMD_Merged1VectorArg_Intrinsic;
+def int_aarch64_sve_uxtb : AdvSIMD_Merged1VectorArg_Intrinsic;
+def int_aarch64_sve_uxth : AdvSIMD_Merged1VectorArg_Intrinsic;
+def int_aarch64_sve_uxtw : AdvSIMD_Merged1VectorArg_Intrinsic;
+
+//
+// While comparisons
+//
+
+def int_aarch64_sve_whilele : AdvSIMD_SVE_WHILE_Intrinsic;
+def int_aarch64_sve_whilelo : AdvSIMD_SVE_WHILE_Intrinsic;
+def int_aarch64_sve_whilels : AdvSIMD_SVE_WHILE_Intrinsic;
+def int_aarch64_sve_whilelt : AdvSIMD_SVE_WHILE_Intrinsic;
+def int_aarch64_sve_whilege : AdvSIMD_SVE_WHILE_Intrinsic;
+def int_aarch64_sve_whilegt : AdvSIMD_SVE_WHILE_Intrinsic;
+def int_aarch64_sve_whilehs : AdvSIMD_SVE_WHILE_Intrinsic;
+def int_aarch64_sve_whilehi : AdvSIMD_SVE_WHILE_Intrinsic;
+
+//
+// Floating-point arithmetic
+//
+
+def int_aarch64_sve_fabd : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_fabs : AdvSIMD_Merged1VectorArg_Intrinsic;
+def int_aarch64_sve_fadd : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_fcadd : AdvSIMD_SVE_CADD_Intrinsic;
+def int_aarch64_sve_fcmla : AdvSIMD_SVE_CMLA_Intrinsic;
+def int_aarch64_sve_fcmla_lane : AdvSIMD_SVE_CMLA_LANE_Intrinsic;
+def int_aarch64_sve_fdiv : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_fdivr : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_fexpa_x : AdvSIMD_SVE_EXPA_Intrinsic;
+def int_aarch64_sve_fmad : AdvSIMD_Pred3VectorArg_Intrinsic;
+def int_aarch64_sve_fmax : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_fmaxnm : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_fmin : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_fminnm : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_fmla : AdvSIMD_Pred3VectorArg_Intrinsic;
+def int_aarch64_sve_fmla_lane : AdvSIMD_3VectorArgIndexed_Intrinsic;
+def int_aarch64_sve_fmls : AdvSIMD_Pred3VectorArg_Intrinsic;
+def int_aarch64_sve_fmls_lane : AdvSIMD_3VectorArgIndexed_Intrinsic;
+def int_aarch64_sve_fmsb : AdvSIMD_Pred3VectorArg_Intrinsic;
+def int_aarch64_sve_fmul : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_fmulx : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_fneg : AdvSIMD_Merged1VectorArg_Intrinsic;
+def int_aarch64_sve_fmul_lane : AdvSIMD_2VectorArgIndexed_Intrinsic;
+def int_aarch64_sve_fnmad : AdvSIMD_Pred3VectorArg_Intrinsic;
+def int_aarch64_sve_fnmla : AdvSIMD_Pred3VectorArg_Intrinsic;
+def int_aarch64_sve_fnmls : AdvSIMD_Pred3VectorArg_Intrinsic;
+def int_aarch64_sve_fnmsb : AdvSIMD_Pred3VectorArg_Intrinsic;
+def int_aarch64_sve_frecpe_x : AdvSIMD_1VectorArg_Intrinsic;
+def int_aarch64_sve_frecps_x : AdvSIMD_2VectorArg_Intrinsic;
+def int_aarch64_sve_frecpx : AdvSIMD_Merged1VectorArg_Intrinsic;
+def int_aarch64_sve_frinta : AdvSIMD_Merged1VectorArg_Intrinsic;
+def int_aarch64_sve_frinti : AdvSIMD_Merged1VectorArg_Intrinsic;
+def int_aarch64_sve_frintm : AdvSIMD_Merged1VectorArg_Intrinsic;
+def int_aarch64_sve_frintn : AdvSIMD_Merged1VectorArg_Intrinsic;
+def int_aarch64_sve_frintp : AdvSIMD_Merged1VectorArg_Intrinsic;
+def int_aarch64_sve_frintx : AdvSIMD_Merged1VectorArg_Intrinsic;
+def int_aarch64_sve_frintz : AdvSIMD_Merged1VectorArg_Intrinsic;
+def int_aarch64_sve_frsqrte_x : AdvSIMD_1VectorArg_Intrinsic;
+def int_aarch64_sve_frsqrts_x : AdvSIMD_2VectorArg_Intrinsic;
+def int_aarch64_sve_fscale : AdvSIMD_SVE_SCALE_Intrinsic;
+def int_aarch64_sve_fsqrt : AdvSIMD_Merged1VectorArg_Intrinsic;
+def int_aarch64_sve_fsub : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_fsubr : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_ftmad_x : AdvSIMD_2VectorArgIndexed_Intrinsic;
+def int_aarch64_sve_ftsmul_x : AdvSIMD_SVE_TSMUL_Intrinsic;
+def int_aarch64_sve_ftssel_x : AdvSIMD_SVE_TSMUL_Intrinsic;
+
+//
+// Floating-point reductions
+//
+
+def int_aarch64_sve_fadda : AdvSIMD_SVE_FP_ReduceWithInit_Intrinsic;
+def int_aarch64_sve_faddv : AdvSIMD_SVE_FP_Reduce_Intrinsic;
+def int_aarch64_sve_fmaxv : AdvSIMD_SVE_FP_Reduce_Intrinsic;
+def int_aarch64_sve_fmaxnmv : AdvSIMD_SVE_FP_Reduce_Intrinsic;
+def int_aarch64_sve_fminv : AdvSIMD_SVE_FP_Reduce_Intrinsic;
+def int_aarch64_sve_fminnmv : AdvSIMD_SVE_FP_Reduce_Intrinsic;
+
+//
+// Floating-point conversions
+//
+
+def int_aarch64_sve_fcvt : AdvSIMD_SVE_FCVT_Intrinsic;
+def int_aarch64_sve_fcvtzs : AdvSIMD_SVE_FCVTZS_Intrinsic;
+def int_aarch64_sve_fcvtzu : AdvSIMD_SVE_FCVTZS_Intrinsic;
+def int_aarch64_sve_scvtf : AdvSIMD_SVE_SCVTF_Intrinsic;
+def int_aarch64_sve_ucvtf : AdvSIMD_SVE_SCVTF_Intrinsic;
//
// Floating-point comparisons
//
-def int_aarch64_sve_fcvtzs_i32f16 : Builtin_SVCVT<"svcvt_s32_f16_m", llvm_nxv4i32_ty, llvm_nxv8f16_ty>;
+def int_aarch64_sve_facge : AdvSIMD_SVE_Compare_Intrinsic;
+def int_aarch64_sve_facgt : AdvSIMD_SVE_Compare_Intrinsic;
+
+def int_aarch64_sve_fcmpeq : AdvSIMD_SVE_Compare_Intrinsic;
+def int_aarch64_sve_fcmpge : AdvSIMD_SVE_Compare_Intrinsic;
+def int_aarch64_sve_fcmpgt : AdvSIMD_SVE_Compare_Intrinsic;
+def int_aarch64_sve_fcmpne : AdvSIMD_SVE_Compare_Intrinsic;
+def int_aarch64_sve_fcmpuo : AdvSIMD_SVE_Compare_Intrinsic;
+
+def int_aarch64_sve_fcvtzs_i32f16 : Builtin_SVCVT<"svcvt_s32_f16_m", llvm_nxv4i32_ty, llvm_nxv8f16_ty>;
+def int_aarch64_sve_fcvtzs_i32f64 : Builtin_SVCVT<"svcvt_s32_f64_m", llvm_nxv4i32_ty, llvm_nxv2f64_ty>;
+def int_aarch64_sve_fcvtzs_i64f16 : Builtin_SVCVT<"svcvt_s64_f16_m", llvm_nxv2i64_ty, llvm_nxv8f16_ty>;
+def int_aarch64_sve_fcvtzs_i64f32 : Builtin_SVCVT<"svcvt_s64_f32_m", llvm_nxv2i64_ty, llvm_nxv4f32_ty>;
+
+def int_aarch64_sve_fcvtzu_i32f16 : Builtin_SVCVT<"svcvt_u32_f16_m", llvm_nxv4i32_ty, llvm_nxv8f16_ty>;
+def int_aarch64_sve_fcvtzu_i32f64 : Builtin_SVCVT<"svcvt_u32_f64_m", llvm_nxv4i32_ty, llvm_nxv2f64_ty>;
+def int_aarch64_sve_fcvtzu_i64f16 : Builtin_SVCVT<"svcvt_u64_f16_m", llvm_nxv2i64_ty, llvm_nxv8f16_ty>;
+def int_aarch64_sve_fcvtzu_i64f32 : Builtin_SVCVT<"svcvt_u64_f32_m", llvm_nxv2i64_ty, llvm_nxv4f32_ty>;
+
+def int_aarch64_sve_fcvt_f16f32 : Builtin_SVCVT<"svcvt_f16_f32_m", llvm_nxv8f16_ty, llvm_nxv4f32_ty>;
+def int_aarch64_sve_fcvt_f16f64 : Builtin_SVCVT<"svcvt_f16_f64_m", llvm_nxv8f16_ty, llvm_nxv2f64_ty>;
+def int_aarch64_sve_fcvt_f32f64 : Builtin_SVCVT<"svcvt_f32_f64_m", llvm_nxv4f32_ty, llvm_nxv2f64_ty>;
+
+def int_aarch64_sve_fcvt_f32f16 : Builtin_SVCVT<"svcvt_f32_f16_m", llvm_nxv4f32_ty, llvm_nxv8f16_ty>;
+def int_aarch64_sve_fcvt_f64f16 : Builtin_SVCVT<"svcvt_f64_f16_m", llvm_nxv2f64_ty, llvm_nxv8f16_ty>;
+def int_aarch64_sve_fcvt_f64f32 : Builtin_SVCVT<"svcvt_f64_f32_m", llvm_nxv2f64_ty, llvm_nxv4f32_ty>;
+
+def int_aarch64_sve_fcvtlt_f32f16 : Builtin_SVCVT<"svcvtlt_f32_f16_m", llvm_nxv4f32_ty, llvm_nxv8f16_ty>;
+def int_aarch64_sve_fcvtlt_f64f32 : Builtin_SVCVT<"svcvtlt_f64_f32_m", llvm_nxv2f64_ty, llvm_nxv4f32_ty>;
+def int_aarch64_sve_fcvtnt_f16f32 : Builtin_SVCVT<"svcvtnt_f16_f32_m", llvm_nxv8f16_ty, llvm_nxv4f32_ty>;
+def int_aarch64_sve_fcvtnt_f32f64 : Builtin_SVCVT<"svcvtnt_f32_f64_m", llvm_nxv4f32_ty, llvm_nxv2f64_ty>;
+
+def int_aarch64_sve_fcvtx_f32f64 : Builtin_SVCVT<"svcvtx_f32_f64_m", llvm_nxv4f32_ty, llvm_nxv2f64_ty>;
+def int_aarch64_sve_fcvtxnt_f32f64 : Builtin_SVCVT<"svcvtxnt_f32_f64_m", llvm_nxv4f32_ty, llvm_nxv2f64_ty>;
+
+def int_aarch64_sve_scvtf_f16i32 : Builtin_SVCVT<"svcvt_f16_s32_m", llvm_nxv8f16_ty, llvm_nxv4i32_ty>;
+def int_aarch64_sve_scvtf_f16i64 : Builtin_SVCVT<"svcvt_f16_s64_m", llvm_nxv8f16_ty, llvm_nxv2i64_ty>;
+def int_aarch64_sve_scvtf_f32i64 : Builtin_SVCVT<"svcvt_f32_s64_m", llvm_nxv4f32_ty, llvm_nxv2i64_ty>;
+def int_aarch64_sve_scvtf_f64i32 : Builtin_SVCVT<"svcvt_f64_s32_m", llvm_nxv2f64_ty, llvm_nxv4i32_ty>;
+
+def int_aarch64_sve_ucvtf_f16i32 : Builtin_SVCVT<"svcvt_f16_u32_m", llvm_nxv8f16_ty, llvm_nxv4i32_ty>;
+def int_aarch64_sve_ucvtf_f16i64 : Builtin_SVCVT<"svcvt_f16_u64_m", llvm_nxv8f16_ty, llvm_nxv2i64_ty>;
+def int_aarch64_sve_ucvtf_f32i64 : Builtin_SVCVT<"svcvt_f32_u64_m", llvm_nxv4f32_ty, llvm_nxv2i64_ty>;
+def int_aarch64_sve_ucvtf_f64i32 : Builtin_SVCVT<"svcvt_f64_u32_m", llvm_nxv2f64_ty, llvm_nxv4i32_ty>;
+
+//
+// Predicate creation
+//
+
+def int_aarch64_sve_ptrue : AdvSIMD_SVE_PTRUE_Intrinsic;
//
// Predicate operations
//
+def int_aarch64_sve_and_z : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_bic_z : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_eor_z : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_nand_z : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_nor_z : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_orn_z : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_orr_z : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_pfirst : AdvSIMD_Pred1VectorArg_Intrinsic;
+def int_aarch64_sve_pnext : AdvSIMD_Pred1VectorArg_Intrinsic;
def int_aarch64_sve_punpkhi : AdvSIMD_SVE_PUNPKHI_Intrinsic;
def int_aarch64_sve_punpklo : AdvSIMD_SVE_PUNPKHI_Intrinsic;
+
+//
+// Testing predicates
+//
+
+def int_aarch64_sve_ptest_any : AdvSIMD_SVE_PTEST_Intrinsic;
+def int_aarch64_sve_ptest_first : AdvSIMD_SVE_PTEST_Intrinsic;
+def int_aarch64_sve_ptest_last : AdvSIMD_SVE_PTEST_Intrinsic;
+
+//
+// Gather loads:
+//
+
+// scalar + vector, 64 bit unscaled offsets
+def int_aarch64_sve_ld1_gather : AdvSIMD_GatherLoad_64bitOffset_Intrinsic;
+
+// scalar + vector, 64 bit scaled offsets
+def int_aarch64_sve_ld1_gather_index : AdvSIMD_GatherLoad_64bitOffset_Intrinsic;
+
+// scalar + vector, 32 bit unscaled offsets, sign (sxtw) or zero (zxtw)
+// extended to 64 bits
+def int_aarch64_sve_ld1_gather_sxtw : AdvSIMD_GatherLoad_32bitOffset_Intrinsic;
+def int_aarch64_sve_ld1_gather_uxtw : AdvSIMD_GatherLoad_32bitOffset_Intrinsic;
+
+// scalar + vector, 32 bit scaled offsets, sign (sxtw) or zero (zxtw) extended
+// to 64 bits
+def int_aarch64_sve_ld1_gather_sxtw_index : AdvSIMD_GatherLoad_32bitOffset_Intrinsic;
+def int_aarch64_sve_ld1_gather_uxtw_index : AdvSIMD_GatherLoad_32bitOffset_Intrinsic;
+
+// vector base + immediate index
+def int_aarch64_sve_ld1_gather_imm : AdvSIMD_GatherLoad_VecTorBase_Intrinsic;
+
+//
+// Scatter stores:
+//
+
+// scalar + vector, 64 bit unscaled offsets
+def int_aarch64_sve_st1_scatter : AdvSIMD_ScatterStore_64bitOffset_Intrinsic;
+
+// scalar + vector, 64 bit scaled offsets
+def int_aarch64_sve_st1_scatter_index
+ : AdvSIMD_ScatterStore_64bitOffset_Intrinsic;
+
+// scalar + vector, 32 bit unscaled offsets, sign (sxtw) or zero (zxtw)
+// extended to 64 bits
+def int_aarch64_sve_st1_scatter_sxtw
+ : AdvSIMD_ScatterStore_32bitOffset_Intrinsic;
+
+def int_aarch64_sve_st1_scatter_uxtw
+ : AdvSIMD_ScatterStore_32bitOffset_Intrinsic;
+
+// scalar + vector, 32 bit scaled offsets, sign (sxtw) or zero (zxtw) extended
+// to 64 bits
+def int_aarch64_sve_st1_scatter_sxtw_index
+ : AdvSIMD_ScatterStore_32bitOffset_Intrinsic;
+
+def int_aarch64_sve_st1_scatter_uxtw_index
+ : AdvSIMD_ScatterStore_32bitOffset_Intrinsic;
+
+// vector base + immediate index
+def int_aarch64_sve_st1_scatter_imm : AdvSIMD_ScatterStore_VectorBase_Intrinsic;
+
+//
+// SVE2 - Non-widening pairwise arithmetic
+//
+
+def int_aarch64_sve_faddp : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_fmaxp : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_fmaxnmp : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_fminp : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_fminnmp : AdvSIMD_Pred2VectorArg_Intrinsic;
+
+//
+// SVE2 - Floating-point widening multiply-accumulate
+//
+
+def int_aarch64_sve_fmlalb : SVE2_3VectorArg_Long_Intrinsic;
+def int_aarch64_sve_fmlalb_lane : SVE2_3VectorArgIndexed_Long_Intrinsic;
+def int_aarch64_sve_fmlalt : SVE2_3VectorArg_Long_Intrinsic;
+def int_aarch64_sve_fmlalt_lane : SVE2_3VectorArgIndexed_Long_Intrinsic;
+def int_aarch64_sve_fmlslb : SVE2_3VectorArg_Long_Intrinsic;
+def int_aarch64_sve_fmlslb_lane : SVE2_3VectorArgIndexed_Long_Intrinsic;
+def int_aarch64_sve_fmlslt : SVE2_3VectorArg_Long_Intrinsic;
+def int_aarch64_sve_fmlslt_lane : SVE2_3VectorArgIndexed_Long_Intrinsic;
+
+//
+// SVE2 - Floating-point integer binary logarithm
+//
+
+def int_aarch64_sve_flogb : AdvSIMD_SVE_LOGB_Intrinsic;
+
+//
+// SVE2 - Unary narrowing operations
+//
+
+def int_aarch64_sve_sqxtnb : SVE2_1VectorArg_Narrowing_Intrinsic;
+def int_aarch64_sve_sqxtnt : SVE2_Merged1VectorArg_Narrowing_Intrinsic;
+def int_aarch64_sve_sqxtunb : SVE2_1VectorArg_Narrowing_Intrinsic;
+def int_aarch64_sve_sqxtunt : SVE2_Merged1VectorArg_Narrowing_Intrinsic;
+def int_aarch64_sve_uqxtnb : SVE2_1VectorArg_Narrowing_Intrinsic;
+def int_aarch64_sve_uqxtnt : SVE2_Merged1VectorArg_Narrowing_Intrinsic;
+
+//
+// SVE2 - Binary narrowing DSP operations
+//
+def int_aarch64_sve_addhnb : SVE2_2VectorArg_Narrowing_Intrinsic;
+def int_aarch64_sve_addhnt : SVE2_Merged2VectorArg_Narrowing_Intrinsic;
+
+def int_aarch64_sve_raddhnb : SVE2_2VectorArg_Narrowing_Intrinsic;
+def int_aarch64_sve_raddhnt : SVE2_Merged2VectorArg_Narrowing_Intrinsic;
+
+def int_aarch64_sve_subhnb : SVE2_2VectorArg_Narrowing_Intrinsic;
+def int_aarch64_sve_subhnt : SVE2_Merged2VectorArg_Narrowing_Intrinsic;
+
+def int_aarch64_sve_rsubhnb : SVE2_2VectorArg_Narrowing_Intrinsic;
+def int_aarch64_sve_rsubhnt : SVE2_Merged2VectorArg_Narrowing_Intrinsic;
+
+// Narrowing shift right
+def int_aarch64_sve_shrnb : SVE2_1VectorArg_Imm_Narrowing_Intrinsic;
+def int_aarch64_sve_shrnt : SVE2_2VectorArg_Imm_Narrowing_Intrinsic;
+
+def int_aarch64_sve_rshrnb : SVE2_1VectorArg_Imm_Narrowing_Intrinsic;
+def int_aarch64_sve_rshrnt : SVE2_2VectorArg_Imm_Narrowing_Intrinsic;
+
+// Saturating shift right - signed input/output
+def int_aarch64_sve_sqshrnb : SVE2_1VectorArg_Imm_Narrowing_Intrinsic;
+def int_aarch64_sve_sqshrnt : SVE2_2VectorArg_Imm_Narrowing_Intrinsic;
+
+def int_aarch64_sve_sqrshrnb : SVE2_1VectorArg_Imm_Narrowing_Intrinsic;
+def int_aarch64_sve_sqrshrnt : SVE2_2VectorArg_Imm_Narrowing_Intrinsic;
+
+// Saturating shift right - unsigned input/output
+def int_aarch64_sve_uqshrnb : SVE2_1VectorArg_Imm_Narrowing_Intrinsic;
+def int_aarch64_sve_uqshrnt : SVE2_2VectorArg_Imm_Narrowing_Intrinsic;
+
+def int_aarch64_sve_uqrshrnb : SVE2_1VectorArg_Imm_Narrowing_Intrinsic;
+def int_aarch64_sve_uqrshrnt : SVE2_2VectorArg_Imm_Narrowing_Intrinsic;
+
+// Saturating shift right - signed input, unsigned output
+def int_aarch64_sve_sqshrunb : SVE2_1VectorArg_Imm_Narrowing_Intrinsic;
+def int_aarch64_sve_sqshrunt : SVE2_2VectorArg_Imm_Narrowing_Intrinsic;
+
+def int_aarch64_sve_sqrshrunb : SVE2_1VectorArg_Imm_Narrowing_Intrinsic;
+def int_aarch64_sve_sqrshrunt : SVE2_2VectorArg_Imm_Narrowing_Intrinsic;
}
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index ab6ee7f92dd1..07ca3a9229d6 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -218,7 +218,7 @@ def int_amdgcn_s_waitcnt : GCCBuiltin<"__builtin_amdgcn_s_waitcnt">,
def int_amdgcn_div_scale : Intrinsic<
// 1st parameter: Numerator
// 2nd parameter: Denominator
- // 3rd parameter: Constant to select select between first and
+ // 3rd parameter: Constant to select between first and
// second. (0 = first, 1 = second).
[llvm_anyfloat_ty, llvm_i1_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, llvm_i1_ty],
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsARM.td b/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsARM.td
index e13da6157e04..518ad7079225 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsARM.td
+++ b/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsARM.td
@@ -426,8 +426,6 @@ let IntrProperties = [IntrNoMem, Commutative] in {
def int_arm_neon_vhaddu : Neon_2Arg_Intrinsic;
def int_arm_neon_vrhadds : Neon_2Arg_Intrinsic;
def int_arm_neon_vrhaddu : Neon_2Arg_Intrinsic;
- def int_arm_neon_vqadds : Neon_2Arg_Intrinsic;
- def int_arm_neon_vqaddu : Neon_2Arg_Intrinsic;
def int_arm_neon_vraddhn : Neon_2Arg_Narrow_Intrinsic;
// Vector Multiply.
@@ -459,8 +457,6 @@ let IntrProperties = [IntrNoMem, Commutative] in {
// Vector Subtract.
def int_arm_neon_vhsubs : Neon_2Arg_Intrinsic;
def int_arm_neon_vhsubu : Neon_2Arg_Intrinsic;
-def int_arm_neon_vqsubs : Neon_2Arg_Intrinsic;
-def int_arm_neon_vqsubu : Neon_2Arg_Intrinsic;
def int_arm_neon_vrsubhn : Neon_2Arg_Narrow_Intrinsic;
// Vector Absolute Compare.
@@ -777,14 +773,352 @@ class Neon_Dot_Intrinsic
def int_arm_neon_udot : Neon_Dot_Intrinsic;
def int_arm_neon_sdot : Neon_Dot_Intrinsic;
-def int_arm_vctp8 : Intrinsic<[llvm_v16i1_ty], [llvm_i32_ty], [IntrNoMem]>;
-def int_arm_vctp16 : Intrinsic<[llvm_v8i1_ty], [llvm_i32_ty], [IntrNoMem]>;
-def int_arm_vctp32 : Intrinsic<[llvm_v4i1_ty], [llvm_i32_ty], [IntrNoMem]>;
-def int_arm_vctp64 : Intrinsic<[llvm_v2i1_ty], [llvm_i32_ty], [IntrNoMem]>;
+def int_arm_cls: Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
+def int_arm_cls64: Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem]>;
+
+def int_arm_mve_vctp8 : Intrinsic<[llvm_v16i1_ty], [llvm_i32_ty], [IntrNoMem]>;
+def int_arm_mve_vctp16 : Intrinsic<[llvm_v8i1_ty], [llvm_i32_ty], [IntrNoMem]>;
+def int_arm_mve_vctp32 : Intrinsic<[llvm_v4i1_ty], [llvm_i32_ty], [IntrNoMem]>;
+// vctp64 takes v4i1, to work around v2i1 not being a legal MVE type
+def int_arm_mve_vctp64 : Intrinsic<[llvm_v4i1_ty], [llvm_i32_ty], [IntrNoMem]>;
+
+// v8.3-A Floating-point complex add
+def int_arm_neon_vcadd_rot90 : Neon_2Arg_Intrinsic;
+def int_arm_neon_vcadd_rot270 : Neon_2Arg_Intrinsic;
// GNU eabi mcount
def int_arm_gnu_eabi_mcount : Intrinsic<[],
[],
[IntrReadMem, IntrWriteMem]>;
+def int_arm_mve_pred_i2v : Intrinsic<
+ [llvm_anyvector_ty], [llvm_i32_ty], [IntrNoMem]>;
+def int_arm_mve_pred_v2i : Intrinsic<
+ [llvm_i32_ty], [llvm_anyvector_ty], [IntrNoMem]>;
+
+multiclass IntrinsicSignSuffix<list<LLVMType> rets, list<LLVMType> params = [],
+ list<IntrinsicProperty> props = [],
+ string name = "",
+ list<SDNodeProperty> sdprops = []> {
+ def _s: Intrinsic<rets, params, props, name, sdprops>;
+ def _u: Intrinsic<rets, params, props, name, sdprops>;
+}
+
+def int_arm_mve_min_predicated: Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */,
+ llvm_anyvector_ty, LLVMMatchType<0>],
+ [IntrNoMem]>;
+def int_arm_mve_max_predicated: Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */,
+ llvm_anyvector_ty, LLVMMatchType<0>],
+ [IntrNoMem]>;
+def int_arm_mve_abd_predicated: Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */,
+ llvm_anyvector_ty, LLVMMatchType<0>], [IntrNoMem]>;
+def int_arm_mve_add_predicated: Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>],
+ [IntrNoMem]>;
+def int_arm_mve_and_predicated: Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>],
+ [IntrNoMem]>;
+def int_arm_mve_bic_predicated: Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>],
+ [IntrNoMem]>;
+def int_arm_mve_eor_predicated: Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>],
+ [IntrNoMem]>;
+def int_arm_mve_orn_predicated: Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>],
+ [IntrNoMem]>;
+def int_arm_mve_orr_predicated: Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>],
+ [IntrNoMem]>;
+def int_arm_mve_sub_predicated: Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>],
+ [IntrNoMem]>;
+def int_arm_mve_mul_predicated: Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>],
+ [IntrNoMem]>;
+def int_arm_mve_mulh_predicated: Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */,
+ llvm_anyvector_ty, LLVMMatchType<0>],
+ [IntrNoMem]>;
+def int_arm_mve_qdmulh_predicated: Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>],
+ [IntrNoMem]>;
+def int_arm_mve_rmulh_predicated: Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */,
+ llvm_anyvector_ty, LLVMMatchType<0>],
+ [IntrNoMem]>;
+def int_arm_mve_qrdmulh_predicated: Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>],
+ [IntrNoMem]>;
+def int_arm_mve_mull_int_predicated: Intrinsic<[llvm_anyvector_ty],
+ [llvm_anyvector_ty, LLVMMatchType<1>, llvm_i32_ty /* unsigned */,
+ llvm_i32_ty /* top */, llvm_anyvector_ty, LLVMMatchType<0>],
+ [IntrNoMem]>;
+def int_arm_mve_mull_poly_predicated: Intrinsic<[llvm_anyvector_ty],
+ [llvm_anyvector_ty, LLVMMatchType<1>, llvm_i32_ty, llvm_anyvector_ty,
+ LLVMMatchType<0>],
+ [IntrNoMem]>;
+def int_arm_mve_qadd_predicated: Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */,
+ llvm_anyvector_ty, LLVMMatchType<0>], [IntrNoMem]>;
+def int_arm_mve_hadd_predicated: Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */,
+ llvm_anyvector_ty, LLVMMatchType<0>], [IntrNoMem]>;
+def int_arm_mve_rhadd_predicated: Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */,
+ llvm_anyvector_ty, LLVMMatchType<0>], [IntrNoMem]>;
+def int_arm_mve_qsub_predicated: Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */,
+ llvm_anyvector_ty, LLVMMatchType<0>], [IntrNoMem]>;
+def int_arm_mve_hsub_predicated: Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */,
+ llvm_anyvector_ty, LLVMMatchType<0>], [IntrNoMem]>;
+
+defm int_arm_mve_minv: IntrinsicSignSuffix<[llvm_i32_ty],
+ [llvm_i32_ty, llvm_anyvector_ty], [IntrNoMem]>;
+defm int_arm_mve_maxv: IntrinsicSignSuffix<[llvm_i32_ty],
+ [llvm_i32_ty, llvm_anyvector_ty], [IntrNoMem]>;
+
+multiclass MVEPredicated<list<LLVMType> rets, list<LLVMType> params,
+ LLVMType pred = llvm_anyvector_ty,
+ list<IntrinsicProperty> props = [IntrNoMem]> {
+ def "": Intrinsic<rets, params, props>;
+ def _predicated: Intrinsic<rets, params # [pred], props>;
+}
+multiclass MVEPredicatedM<list<LLVMType> rets, list<LLVMType> params,
+ LLVMType pred = llvm_anyvector_ty,
+ list<IntrinsicProperty> props = [IntrNoMem]> {
+ def "": Intrinsic<rets, params, props>;
+ def _predicated: Intrinsic<rets, params # [pred,
+ !if(!eq(!cast<string>(rets[0]), "llvm_anyvector_ty"),
+ LLVMMatchType<0>, rets[0])], props>;
+}
+
+defm int_arm_mve_vcvt_narrow: MVEPredicated<[llvm_v8f16_ty],
+ [llvm_v8f16_ty, llvm_v4f32_ty, llvm_i32_ty], llvm_v4i1_ty>;
+
+defm int_arm_mve_vldr_gather_base: MVEPredicated<
+ [llvm_anyvector_ty], [llvm_anyvector_ty, llvm_i32_ty],
+ llvm_anyvector_ty, [IntrReadMem]>;
+defm int_arm_mve_vldr_gather_base_wb: MVEPredicated<
+ [llvm_anyvector_ty, llvm_anyvector_ty],
+ [LLVMMatchType<1>, llvm_i32_ty], llvm_anyvector_ty, [IntrReadMem]>;
+defm int_arm_mve_vstr_scatter_base: MVEPredicated<
+ [], [llvm_anyvector_ty, llvm_i32_ty, llvm_anyvector_ty],
+ llvm_anyvector_ty, [IntrWriteMem]>;
+defm int_arm_mve_vstr_scatter_base_wb: MVEPredicated<
+ [llvm_anyvector_ty], [LLVMMatchType<0>, llvm_i32_ty, llvm_anyvector_ty],
+ llvm_anyvector_ty, [IntrWriteMem]>;
+
+// gather_offset takes three i32 parameters. The first is the size of
+// memory element loaded, in bits. The second is a left bit shift to
+// apply to each offset in the vector parameter (must be either 0, or
+// correspond to the element size of the destination vector type). The
+// last is 1 to indicate zero extension (if the load is widening), or
+// 0 for sign extension.
+//
+// scatter_offset has the first two of those parameters, but since it
+// narrows rather than widening, it doesn't have the last one.
+defm int_arm_mve_vldr_gather_offset: MVEPredicated<
+ [llvm_anyvector_ty], [llvm_anyptr_ty, llvm_anyvector_ty,
+ llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], llvm_anyvector_ty, [IntrReadMem]>;
+defm int_arm_mve_vstr_scatter_offset: MVEPredicated<
+ [], [llvm_anyptr_ty, llvm_anyvector_ty, llvm_anyvector_ty,
+ llvm_i32_ty, llvm_i32_ty], llvm_anyvector_ty, [IntrWriteMem]>;
+
+def int_arm_mve_shl_imm_predicated: Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, llvm_i32_ty, llvm_anyvector_ty, LLVMMatchType<0>],
+ [IntrNoMem]>;
+def int_arm_mve_shr_imm_predicated: Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty, // extra i32 is unsigned flag
+ llvm_anyvector_ty, LLVMMatchType<0>],
+ [IntrNoMem]>;
+
+defm int_arm_mve_vqshl_imm: MVEPredicatedM<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, llvm_i32_ty /*shiftcount*/, llvm_i32_ty /*unsigned*/]>;
+defm int_arm_mve_vrshr_imm: MVEPredicatedM<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, llvm_i32_ty /*shiftcount*/, llvm_i32_ty /*unsigned*/]>;
+defm int_arm_mve_vqshlu_imm: MVEPredicatedM<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, llvm_i32_ty /*shiftcount*/]>;
+defm int_arm_mve_vshll_imm: MVEPredicatedM<[llvm_anyvector_ty],
+ [llvm_anyvector_ty, llvm_i32_ty /*shiftcount*/, llvm_i32_ty /*unsigned*/,
+ llvm_i32_ty /*top-half*/]>;
+
+defm int_arm_mve_vsli: MVEPredicated<
+ [llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty]>;
+defm int_arm_mve_vsri: MVEPredicated<
+ [llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty]>;
+
+defm int_arm_mve_vshrn: MVEPredicated<
+ [llvm_anyvector_ty], [LLVMMatchType<0>, llvm_anyvector_ty,
+ llvm_i32_ty /*shiftcount*/, llvm_i32_ty /*saturate*/, llvm_i32_ty /*round*/,
+ llvm_i32_ty /*unsigned-out*/, llvm_i32_ty /*unsigned-in*/,
+ llvm_i32_ty /*top-half*/]>;
+
+defm int_arm_mve_vshl_scalar: MVEPredicated<
+ [llvm_anyvector_ty], [LLVMMatchType<0>, llvm_i32_ty /*shiftcount*/,
+ llvm_i32_ty /*saturate*/, llvm_i32_ty /*round*/, llvm_i32_ty /*unsigned*/]>;
+defm int_arm_mve_vshl_vector: MVEPredicatedM<
+ [llvm_anyvector_ty], [LLVMMatchType<0>, llvm_anyvector_ty /*shiftcounts*/,
+ llvm_i32_ty /*saturate*/, llvm_i32_ty /*round*/, llvm_i32_ty /*unsigned*/]>;
+
+// MVE scalar shifts.
+class ARM_MVE_qrshift_single<list<LLVMType> value,
+ list<LLVMType> saturate = []> :
+ Intrinsic<value, value # [llvm_i32_ty] # saturate, [IntrNoMem]>;
+multiclass ARM_MVE_qrshift<list<LLVMType> saturate = []> {
+ // Most of these shifts come in 32- and 64-bit versions. But only
+ // the 64-bit ones have the extra saturation argument (if any).
+ def "": ARM_MVE_qrshift_single<[llvm_i32_ty]>;
+ def l: ARM_MVE_qrshift_single<[llvm_i32_ty, llvm_i32_ty], saturate>;
+}
+defm int_arm_mve_urshr: ARM_MVE_qrshift;
+defm int_arm_mve_uqshl: ARM_MVE_qrshift;
+defm int_arm_mve_srshr: ARM_MVE_qrshift;
+defm int_arm_mve_sqshl: ARM_MVE_qrshift;
+defm int_arm_mve_uqrshl: ARM_MVE_qrshift<[llvm_i32_ty]>;
+defm int_arm_mve_sqrshr: ARM_MVE_qrshift<[llvm_i32_ty]>;
+// LSLL and ASRL only have 64-bit versions, not 32.
+def int_arm_mve_lsll: ARM_MVE_qrshift_single<[llvm_i32_ty, llvm_i32_ty]>;
+def int_arm_mve_asrl: ARM_MVE_qrshift_single<[llvm_i32_ty, llvm_i32_ty]>;
+
+def int_arm_mve_vabd: Intrinsic<
+ [llvm_anyvector_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */],
+ [IntrNoMem]>;
+def int_arm_mve_vadc: Intrinsic<
+ [llvm_anyvector_ty, llvm_i32_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty], [IntrNoMem]>;
+def int_arm_mve_vadc_predicated: Intrinsic<
+ [llvm_anyvector_ty, llvm_i32_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>,
+ llvm_i32_ty, llvm_anyvector_ty], [IntrNoMem]>;
+def int_arm_mve_vmulh: Intrinsic<
+ [llvm_anyvector_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */],
+ [IntrNoMem]>;
+def int_arm_mve_vqdmulh: Intrinsic<
+ [llvm_anyvector_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
+def int_arm_mve_vhadd: Intrinsic<
+ [llvm_anyvector_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */],
+ [IntrNoMem]>;
+def int_arm_mve_vrhadd: Intrinsic<
+ [llvm_anyvector_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */],
+ [IntrNoMem]>;
+def int_arm_mve_vhsub: Intrinsic<
+ [llvm_anyvector_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */],
+ [IntrNoMem]>;
+def int_arm_mve_vrmulh: Intrinsic<
+ [llvm_anyvector_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */],
+ [IntrNoMem]>;
+def int_arm_mve_vqrdmulh: Intrinsic<
+ [llvm_anyvector_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
+def int_arm_mve_vmull: Intrinsic<
+ [llvm_anyvector_ty],
+ [llvm_anyvector_ty, LLVMMatchType<1>, llvm_i32_ty /* unsigned */,
+ llvm_i32_ty /* top */], [IntrNoMem]>;
+def int_arm_mve_vmull_poly: Intrinsic<
+ [llvm_anyvector_ty],
+ [llvm_anyvector_ty, LLVMMatchType<1>, llvm_i32_ty], [IntrNoMem]>;
+
+// Intrinsic with a predicated and a non-predicated case. The predicated case
+// has two additional parameters: inactive (the value for inactive lanes, can
+// be undef) and predicate.
+multiclass MVEMXPredicated<list<LLVMType> rets, list<LLVMType> flags,
+ list<LLVMType> params, LLVMType inactive,
+ LLVMType predicate,
+ list<IntrinsicProperty> props = [IntrNoMem]> {
+ def "": Intrinsic<rets, flags # params, props>;
+ def _predicated: Intrinsic<rets, flags # [inactive] # params # [predicate],
+ props>;
+}
+
+// The first two parameters are compile-time constants:
+// * Halving: 0 means halving (vhcaddq), 1 means non-halving (vcaddq)
+// instruction. Note: the flag is inverted to match the corresonding
+// bit in the instruction encoding
+// * Rotation angle: 0 mean 90 deg, 1 means 180 deg
+defm int_arm_mve_vcaddq : MVEMXPredicated<
+ [llvm_anyvector_ty],
+ [llvm_i32_ty, llvm_i32_ty], [LLVMMatchType<0>, LLVMMatchType<0>],
+ LLVMMatchType<0>, llvm_anyvector_ty>;
+
+// The first operand of the following two intrinsics is the rotation angle
+// (must be a compile-time constant):
+// 0 - 0 deg
+// 1 - 90 deg
+// 2 - 180 deg
+// 3 - 270 deg
+defm int_arm_mve_vcmulq : MVEMXPredicated<
+ [llvm_anyvector_ty],
+ [llvm_i32_ty], [LLVMMatchType<0>, LLVMMatchType<0>],
+ LLVMMatchType<0>, llvm_anyvector_ty>;
+
+defm int_arm_mve_vcmlaq : MVEPredicated<
+ [llvm_anyvector_ty],
+ [llvm_i32_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
+ llvm_anyvector_ty>;
+
+def int_arm_mve_vld2q: Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>], [llvm_anyptr_ty], [IntrReadMem]>;
+def int_arm_mve_vld4q: Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [llvm_anyptr_ty], [IntrReadMem]>;
+
+def int_arm_mve_vst2q: Intrinsic<[], [llvm_anyptr_ty, llvm_anyvector_ty, LLVMMatchType<1>, llvm_i32_ty], [IntrWriteMem]>;
+def int_arm_mve_vst4q: Intrinsic<[], [llvm_anyptr_ty, llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<1>, LLVMMatchType<1>, llvm_i32_ty], [IntrWriteMem]
+>;
+
+// MVE vector absolute difference and accumulate across vector
+// The first operand is an 'unsigned' flag. The remaining operands are:
+// * accumulator
+// * first vector operand
+// * second vector operand
+// * mask (only in predicated versions)
+defm int_arm_mve_vabav: MVEPredicated<
+ [llvm_i32_ty],
+ [llvm_i32_ty, llvm_i32_ty, llvm_anyvector_ty, LLVMMatchType<0>], llvm_anyvector_ty>;
+
+// The following 3 instrinsics are MVE vector reductions with two vector
+// operands.
+// The first 3 operands are boolean flags (must be compile-time constants):
+// * unsigned - the instruction operates on vectors of unsigned values and
+// unsigned scalars
+// * subtract - the instruction performs subtraction after multiplication of
+// lane pairs (e.g., vmlsdav vs vmladav)
+// * exchange - the instruction exchanges successive even and odd lanes of
+// the first operands before multiplication of lane pairs
+// (e.g., vmladavx vs vmladav)
+// The remaining operands are:
+// * accumulator
+// * first vector operand
+// * second vector operand
+// * mask (only in predicated versions)
+
+// Version with 32-bit result, vml{a,s}dav[a][x]
+defm int_arm_mve_vmldava: MVEPredicated<
+ [llvm_i32_ty],
+ [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
+ llvm_i32_ty, llvm_anyvector_ty, LLVMMatchType<0>],
+ llvm_anyvector_ty>;
+
+// Version with 64-bit result, vml{a,s}ldav[a][x]
+defm int_arm_mve_vmlldava: MVEPredicated<
+ [llvm_i32_ty, llvm_i32_ty],
+ [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
+ llvm_i32_ty, llvm_i32_ty, llvm_anyvector_ty, LLVMMatchType<0>],
+ llvm_anyvector_ty>;
+
+// Version with 72-bit rounded result, vrml{a,s}ldavh[a][x]
+defm int_arm_mve_vrmlldavha: MVEPredicated<
+ [llvm_i32_ty, llvm_i32_ty],
+ [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
+ llvm_i32_ty, llvm_i32_ty, llvm_anyvector_ty, LLVMMatchType<0>],
+ llvm_anyvector_ty>;
} // end TargetPrefix
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsNVVM.td b/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsNVVM.td
index 0483d965ba64..ec328d69a8dd 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsNVVM.td
+++ b/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsNVVM.td
@@ -53,6 +53,10 @@ class WMMA_REGS<string Geom, string Frag, string PtxEltType> {
string gft = Geom#":"#Frag#":"#ptx_elt_type;
string ft = frag#":"#ptx_elt_type;
list<LLVMType> regs = !cond(
+ // mma.sync.m8n8k4 uses smaller a/b fragments than wmma fp ops
+ !eq(gft,"m8n8k4:a:f16") : RepLLVMType<2, llvm_v2f16_ty>.ret,
+ !eq(gft,"m8n8k4:b:f16") : RepLLVMType<2, llvm_v2f16_ty>.ret,
+
// fp16 -> fp16/fp32 @ m16n16k16/m8n32k16/m32n8k16
// All currently supported geometries use the same fragment format,
// so we only need to consider {fragment, type}.
@@ -137,13 +141,19 @@ class MMA_SIGNATURE<WMMA_REGS A, WMMA_REGS B, WMMA_REGS C, WMMA_REGS D> {
class WMMA_NAME_MMA<string ALayout, string BLayout, int Satfinite,
WMMA_REGS A, WMMA_REGS B, WMMA_REGS C, WMMA_REGS D> {
string signature = MMA_SIGNATURE<A, B, C, D>.ret;
- string llvm = "llvm.nvvm.wmma."
- # A.geom
- # ".mma"
- # "." # ALayout
- # "." # BLayout
- # signature
- # !if(Satfinite, ".satfinite", "");
+ string llvm = !if(
+ !eq(A.geom, "m8n8k4"),
+ "llvm.nvvm.mma.m8n8k4"
+ # "." # ALayout
+ # "." # BLayout
+ # signature,
+ "llvm.nvvm.wmma."
+ # A.geom
+ # ".mma"
+ # "." # ALayout
+ # "." # BLayout
+ # signature
+ # !if(Satfinite, ".satfinite", ""));
string record = !subst(".", "_",
!subst("llvm.", "int_", llvm));
@@ -160,7 +170,7 @@ class MMA_OPS<list<string> Geom, list<string> TypeA, list<string> TypeB,
!foldl([]<list<WMMA_REGS>>, TypeA, t2, type_a, !listconcat(t2,
!foldl([]<list<WMMA_REGS>>, !if(!size(TypeB), TypeB, [type_a]), t3, type_b, !listconcat(t3,
!foldl([]<list<WMMA_REGS>>, TypeC, t4, type_c, !listconcat(t4,
- !foldl([]<list<WMMA_REGS>>, !if(!size(TypeC), TypeC, [type_c]), t5, type_d, !listconcat(t5,
+ !foldl([]<list<WMMA_REGS>>, !if(!size(TypeD), TypeD, [type_c]), t5, type_d, !listconcat(t5,
[[WMMA_REGS<geom, "a", type_a>,
WMMA_REGS<geom, "b", type_b>,
WMMA_REGS<geom, "c", type_c>,
@@ -185,19 +195,23 @@ class MMA_LDST_OPS<list<string> Geom, list<string> Frags, list<string> Types> {
// drives generation of corresponding intrinsics and instructions.
class NVVM_MMA_OPS<int _ = 0> {
list<list<WMMA_REGS>> fp_mma_ops = MMA_OPS<
+ ["m8n8k4"],
+ ["f16"], [], ["f16", "f32"], ["f16", "f32"]>.ret;
+ list<list<WMMA_REGS>> fp_wmma_ops = MMA_OPS<
["m16n16k16", "m32n8k16", "m8n32k16"],
["f16"], [], ["f16", "f32"], ["f16", "f32"]>.ret;
- list<list<WMMA_REGS>> int_mma_ops = MMA_OPS<
+ list<list<WMMA_REGS>> int_wmma_ops = MMA_OPS<
["m16n16k16", "m32n8k16", "m8n32k16"],
["s8", "u8"], [], ["s32"], []>.ret;
- list<list<WMMA_REGS>> subint_mma_ops = MMA_OPS<
+ list<list<WMMA_REGS>> subint_wmma_ops = MMA_OPS<
["m8n8k32"],
["s4", "u4"], [], ["s32"], []>.ret;
- list<list<WMMA_REGS>> bit_mma_ops = MMA_OPS<
+ list<list<WMMA_REGS>> bit_wmma_ops = MMA_OPS<
["m8n8k128"],
["b1"], [], ["s32"], []>.ret;
- list<list<WMMA_REGS>> all_mma_ops = !listconcat(fp_mma_ops, int_mma_ops,
- subint_mma_ops, bit_mma_ops);
+ list<list<WMMA_REGS>> all_mma_ops = !listconcat(
+ fp_mma_ops, fp_wmma_ops, int_wmma_ops,
+ subint_wmma_ops, bit_wmma_ops);
list<WMMA_REGS> ldst_ab_ops = MMA_LDST_OPS<
["m16n16k16", "m32n8k16", "m8n32k16"],
@@ -245,10 +259,25 @@ class NVVM_MMA_SUPPORTED<list<WMMA_REGS> frags, string layout_a, string layout_b
# ":" # frags[0].frag
;
string t = frags[0].ptx_elt_type;
+
+ // gcd is a shortcut used to identify instructions that depend on
+ // geom+frag_c+frag_d. Not all instances of this class have all fragments
+ // specified. If there are not enough fragments, the tail evaluates to '?'.
+ string gcd = frags[0].geom
+ # ":"
+ # !if(!eq(!size(frags), 4),
+ frags[2].ptx_elt_type # frags[3].ptx_elt_type,
+ "?");
list<int> ret = !cond(
// Sub-int MMA only supports fixed A/B layout.
// b1 does not support .satf.
!eq(mma#":"#satf, "b1:row:col:0") : [1],
+ // mma.m8n8k4 has no .satf modifier.
+ !and(!eq(frags[0].geom, "m8n8k4"),
+ !ne(satf, 0)): [],
+
+ // mma.m8n8k4 has no C=f32 D=f16 variant.
+ !eq(gcd, "m8n8k4:f32f16"): [],
!eq(mma, "s4:row:col") : [1],
!eq(mma, "u4:row:col") : [1],
!eq(mma, "s4:row:col") : [1],
@@ -4094,7 +4123,7 @@ class NVVM_WMMA_ST<WMMA_REGS Frag, string Layout, int WithStride>
[IntrWriteMem, IntrArgMemOnly, WriteOnly<0>, NoCapture<0>],
WMMA_NAME_LDST<"store", Frag, Layout, WithStride>.intr>;
-// Create all load/store variants
+// Create all load/store variants
foreach layout = ["row", "col"] in {
foreach stride = [0, 1] in {
foreach frag = NVVM_MMA_OPS.all_ld_ops in
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsRISCV.td b/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsRISCV.td
index 60393189b830..2039ad1a26b8 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsRISCV.td
+++ b/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsRISCV.td
@@ -10,59 +10,59 @@
//
//===----------------------------------------------------------------------===//
-let TargetPrefix = "riscv" in {
-
//===----------------------------------------------------------------------===//
// Atomics
-class MaskedAtomicRMW32Intrinsic
- : Intrinsic<[llvm_i32_ty],
- [llvm_anyptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
- [IntrArgMemOnly, NoCapture<0>, ImmArg<3>]>;
-
-class MaskedAtomicRMW32WithSextIntrinsic
- : Intrinsic<[llvm_i32_ty],
- [llvm_anyptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
- llvm_i32_ty],
- [IntrArgMemOnly, NoCapture<0>, ImmArg<4>]>;
+// Atomic Intrinsics have multiple versions for different access widths, which
+// all follow one of the following signatures (depending on how many arguments
+// they require). We carefully instantiate only specific versions of these for
+// specific integer widths, rather than using `llvm_anyint_ty`.
+//
+// In fact, as these intrinsics take `llvm_anyptr_ty`, the given names are the
+// canonical names, and the intrinsics used in the code will have a name
+// suffixed with the pointer type they are specialised for (denoted `<p>` in the
+// names below), in order to avoid type conflicts.
-def int_riscv_masked_atomicrmw_xchg_i32 : MaskedAtomicRMW32Intrinsic;
-def int_riscv_masked_atomicrmw_add_i32 : MaskedAtomicRMW32Intrinsic;
-def int_riscv_masked_atomicrmw_sub_i32 : MaskedAtomicRMW32Intrinsic;
-def int_riscv_masked_atomicrmw_nand_i32 : MaskedAtomicRMW32Intrinsic;
-def int_riscv_masked_atomicrmw_max_i32 : MaskedAtomicRMW32WithSextIntrinsic;
-def int_riscv_masked_atomicrmw_min_i32 : MaskedAtomicRMW32WithSextIntrinsic;
-def int_riscv_masked_atomicrmw_umax_i32 : MaskedAtomicRMW32Intrinsic;
-def int_riscv_masked_atomicrmw_umin_i32 : MaskedAtomicRMW32Intrinsic;
+let TargetPrefix = "riscv" in {
-def int_riscv_masked_cmpxchg_i32
- : Intrinsic<[llvm_i32_ty], [llvm_anyptr_ty, llvm_i32_ty, llvm_i32_ty,
- llvm_i32_ty, llvm_i32_ty],
- [IntrArgMemOnly, NoCapture<0>, ImmArg<4>]>;
+ // T @llvm.<name>.T.<p>(any*, T, T, T imm);
+ class MaskedAtomicRMWFourArg<LLVMType itype>
+ : Intrinsic<[itype], [llvm_anyptr_ty, itype, itype, itype],
+ [IntrArgMemOnly, NoCapture<0>, ImmArg<3>]>;
+ // T @llvm.<name>.T.<p>(any*, T, T, T, T imm);
+ class MaskedAtomicRMWFiveArg<LLVMType itype>
+ : Intrinsic<[itype], [llvm_anyptr_ty, itype, itype, itype, itype],
+ [IntrArgMemOnly, NoCapture<0>, ImmArg<4>]>;
-class MaskedAtomicRMW64Intrinsic
- : Intrinsic<[llvm_i64_ty],
- [llvm_anyptr_ty, llvm_i64_ty, llvm_i64_ty, llvm_i64_ty],
- [IntrArgMemOnly, NoCapture<0>, ImmArg<3>]>;
+ // We define 32-bit and 64-bit variants of the above, where T stands for i32
+ // or i64 respectively:
+ multiclass MaskedAtomicRMWFourArgIntrinsics {
+ // i32 @llvm.<name>.i32.<p>(any*, i32, i32, i32 imm);
+ def _i32 : MaskedAtomicRMWFourArg<llvm_i32_ty>;
+ // i64 @llvm.<name>.i32.<p>(any*, i64, i64, i64 imm);
+ def _i64 : MaskedAtomicRMWFourArg<llvm_i64_ty>;
+ }
-class MaskedAtomicRMW64WithSextIntrinsic
- : Intrinsic<[llvm_i64_ty],
- [llvm_anyptr_ty, llvm_i64_ty, llvm_i64_ty, llvm_i64_ty,
- llvm_i64_ty],
- [IntrArgMemOnly, NoCapture<0>, ImmArg<4>]>;
+ multiclass MaskedAtomicRMWFiveArgIntrinsics {
+ // i32 @llvm.<name>.i32.<p>(any*, i32, i32, i32, i32 imm);
+ def _i32 : MaskedAtomicRMWFiveArg<llvm_i32_ty>;
+ // i64 @llvm.<name>.i64.<p>(any*, i64, i64, i64, i64 imm);
+ def _i64 : MaskedAtomicRMWFiveArg<llvm_i64_ty>;
+ }
-def int_riscv_masked_atomicrmw_xchg_i64 : MaskedAtomicRMW64Intrinsic;
-def int_riscv_masked_atomicrmw_add_i64 : MaskedAtomicRMW64Intrinsic;
-def int_riscv_masked_atomicrmw_sub_i64 : MaskedAtomicRMW64Intrinsic;
-def int_riscv_masked_atomicrmw_nand_i64 : MaskedAtomicRMW64Intrinsic;
-def int_riscv_masked_atomicrmw_max_i64 : MaskedAtomicRMW64WithSextIntrinsic;
-def int_riscv_masked_atomicrmw_min_i64 : MaskedAtomicRMW64WithSextIntrinsic;
-def int_riscv_masked_atomicrmw_umax_i64 : MaskedAtomicRMW64Intrinsic;
-def int_riscv_masked_atomicrmw_umin_i64 : MaskedAtomicRMW64Intrinsic;
+ // @llvm.riscv.masked.atomicrmw.*.{i32,i64}.<p>(...)
+ defm int_riscv_masked_atomicrmw_xchg : MaskedAtomicRMWFourArgIntrinsics;
+ defm int_riscv_masked_atomicrmw_add : MaskedAtomicRMWFourArgIntrinsics;
+ defm int_riscv_masked_atomicrmw_sub : MaskedAtomicRMWFourArgIntrinsics;
+ defm int_riscv_masked_atomicrmw_nand : MaskedAtomicRMWFourArgIntrinsics;
+ // Signed min and max need an extra operand to do sign extension with.
+ defm int_riscv_masked_atomicrmw_max : MaskedAtomicRMWFiveArgIntrinsics;
+ defm int_riscv_masked_atomicrmw_min : MaskedAtomicRMWFiveArgIntrinsics;
+ // Unsigned min and max don't need the extra operand.
+ defm int_riscv_masked_atomicrmw_umax : MaskedAtomicRMWFourArgIntrinsics;
+ defm int_riscv_masked_atomicrmw_umin : MaskedAtomicRMWFourArgIntrinsics;
-def int_riscv_masked_cmpxchg_i64
- : Intrinsic<[llvm_i64_ty], [llvm_anyptr_ty, llvm_i64_ty, llvm_i64_ty,
- llvm_i64_ty, llvm_i64_ty],
- [IntrArgMemOnly, NoCapture<0>, ImmArg<4>]>;
+ // @llvm.riscv.masked.cmpxchg.{i32,i64}.<p>(...)
+ defm int_riscv_masked_cmpxchg : MaskedAtomicRMWFiveArgIntrinsics;
} // TargetPrefix = "riscv"
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsWebAssembly.td b/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsWebAssembly.td
index 810979b99934..e97700ad724a 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsWebAssembly.td
+++ b/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsWebAssembly.td
@@ -112,6 +112,11 @@ def int_wasm_sub_saturate_unsigned :
Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, LLVMMatchType<0>],
[IntrNoMem, IntrSpeculatable]>;
+def int_wasm_avgr_unsigned :
+ Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>],
+ [IntrNoMem, IntrSpeculatable]>;
+
def int_wasm_bitselect :
Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
@@ -132,6 +137,10 @@ def int_wasm_qfms :
Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
[IntrNoMem, IntrSpeculatable]>;
+def int_wasm_dot :
+ Intrinsic<[llvm_v4i32_ty],
+ [llvm_v8i16_ty, llvm_v8i16_ty],
+ [IntrNoMem, IntrSpeculatable]>;
def int_wasm_narrow_signed :
Intrinsic<[llvm_anyvector_ty],
[llvm_anyvector_ty, LLVMMatchType<1>],
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/LLVMContext.h b/contrib/llvm-project/llvm/include/llvm/IR/LLVMContext.h
index 91bd57dc5ac0..39d19b7cffd9 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/LLVMContext.h
+++ b/contrib/llvm-project/llvm/include/llvm/IR/LLVMContext.h
@@ -17,7 +17,6 @@
#include "llvm-c/Types.h"
#include "llvm/IR/DiagnosticHandler.h"
#include "llvm/Support/CBindingWrapping.h"
-#include "llvm/Support/Options.h"
#include <cstdint>
#include <memory>
#include <string>
@@ -85,6 +84,7 @@ public:
OB_deopt = 0, // "deopt"
OB_funclet = 1, // "funclet"
OB_gc_transition = 2, // "gc-transition"
+ OB_cfguardtarget = 3, // "cfguardtarget"
};
/// getMDKindID - Return a unique non-zero ID for the specified metadata kind.
@@ -287,14 +287,6 @@ public:
void emitError(const Instruction *I, const Twine &ErrorStr);
void emitError(const Twine &ErrorStr);
- /// Query for a debug option's value.
- ///
- /// This function returns typed data populated from command line parsing.
- template <typename ValT, typename Base, ValT(Base::*Mem)>
- ValT getOption() const {
- return OptionRegistry::instance().template get<ValT, Base, Mem>();
- }
-
/// Access the object which can disable optional passes and individual
/// optimizations at compile time.
OptPassGate &getOptPassGate() const;
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/LegacyPassManager.h b/contrib/llvm-project/llvm/include/llvm/IR/LegacyPassManager.h
index d6bb79ab6019..2b87143276b9 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/LegacyPassManager.h
+++ b/contrib/llvm-project/llvm/include/llvm/IR/LegacyPassManager.h
@@ -63,7 +63,7 @@ private:
PassManagerImpl *PM;
};
-/// FunctionPassManager manages FunctionPasses and BasicBlockPassManagers.
+/// FunctionPassManager manages FunctionPasses.
class FunctionPassManager : public PassManagerBase {
public:
/// FunctionPassManager ctor - This initializes the pass manager. It needs,
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/LegacyPassManagers.h b/contrib/llvm-project/llvm/include/llvm/IR/LegacyPassManagers.h
index 72bc80fb5381..5044c1f6ed31 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/LegacyPassManagers.h
+++ b/contrib/llvm-project/llvm/include/llvm/IR/LegacyPassManagers.h
@@ -53,10 +53,6 @@
// a place to implement common pass manager APIs. All pass managers derive from
// PMDataManager.
//
-// [o] class BBPassManager : public FunctionPass, public PMDataManager;
-//
-// BBPassManager manages BasicBlockPasses.
-//
// [o] class FunctionPassManager;
//
// This is a external interface used to manage FunctionPasses. This
@@ -103,7 +99,6 @@ enum PassDebuggingString {
EXECUTION_MSG, // "Executing Pass '" + PassName
MODIFICATION_MSG, // "Made Modification '" + PassName
FREEING_MSG, // " Freeing Pass '" + PassName
- ON_BASICBLOCK_MSG, // "' on BasicBlock '" + InstructionName + "'...\n"
ON_FUNCTION_MSG, // "' on Function '" + FunctionName + "'...\n"
ON_MODULE_MSG, // "' on Module '" + ModuleName + "'...\n"
ON_REGION_MSG, // "' on Region '" + Msg + "'...\n'"
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/Metadata.h b/contrib/llvm-project/llvm/include/llvm/IR/Metadata.h
index f62b1e246cca..dda939b97575 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/Metadata.h
+++ b/contrib/llvm-project/llvm/include/llvm/IR/Metadata.h
@@ -22,6 +22,7 @@
#include "llvm/ADT/PointerUnion.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/ilist_node.h"
#include "llvm/ADT/iterator_range.h"
@@ -641,26 +642,32 @@ public:
/// A collection of metadata nodes that might be associated with a
/// memory access used by the alias-analysis infrastructure.
struct AAMDNodes {
- explicit AAMDNodes(MDNode *T = nullptr, MDNode *S = nullptr,
- MDNode *N = nullptr)
- : TBAA(T), Scope(S), NoAlias(N) {}
+ explicit AAMDNodes() = default;
+ explicit AAMDNodes(MDNode *T, MDNode *TS, MDNode *S, MDNode *N)
+ : TBAA(T), TBAAStruct(TS), Scope(S), NoAlias(N) {}
bool operator==(const AAMDNodes &A) const {
- return TBAA == A.TBAA && Scope == A.Scope && NoAlias == A.NoAlias;
+ return TBAA == A.TBAA && TBAAStruct == A.TBAAStruct && Scope == A.Scope &&
+ NoAlias == A.NoAlias;
}
bool operator!=(const AAMDNodes &A) const { return !(*this == A); }
- explicit operator bool() const { return TBAA || Scope || NoAlias; }
+ explicit operator bool() const {
+ return TBAA || TBAAStruct || Scope || NoAlias;
+ }
/// The tag for type-based alias analysis.
- MDNode *TBAA;
+ MDNode *TBAA = nullptr;
+
+ /// The tag for type-based alias analysis (tbaa struct).
+ MDNode *TBAAStruct = nullptr;
/// The tag for alias scope specification (used with noalias).
- MDNode *Scope;
+ MDNode *Scope = nullptr;
/// The tag specifying the noalias scope.
- MDNode *NoAlias;
+ MDNode *NoAlias = nullptr;
/// Given two sets of AAMDNodes that apply to the same pointer,
/// give the best AAMDNodes that are compatible with both (i.e. a set of
@@ -670,6 +677,7 @@ struct AAMDNodes {
AAMDNodes intersect(const AAMDNodes &Other) {
AAMDNodes Result;
Result.TBAA = Other.TBAA == TBAA ? TBAA : nullptr;
+ Result.TBAAStruct = Other.TBAAStruct == TBAAStruct ? TBAAStruct : nullptr;
Result.Scope = Other.Scope == Scope ? Scope : nullptr;
Result.NoAlias = Other.NoAlias == NoAlias ? NoAlias : nullptr;
return Result;
@@ -681,16 +689,17 @@ template<>
struct DenseMapInfo<AAMDNodes> {
static inline AAMDNodes getEmptyKey() {
return AAMDNodes(DenseMapInfo<MDNode *>::getEmptyKey(),
- nullptr, nullptr);
+ nullptr, nullptr, nullptr);
}
static inline AAMDNodes getTombstoneKey() {
return AAMDNodes(DenseMapInfo<MDNode *>::getTombstoneKey(),
- nullptr, nullptr);
+ nullptr, nullptr, nullptr);
}
static unsigned getHashValue(const AAMDNodes &Val) {
return DenseMapInfo<MDNode *>::getHashValue(Val.TBAA) ^
+ DenseMapInfo<MDNode *>::getHashValue(Val.TBAAStruct) ^
DenseMapInfo<MDNode *>::getHashValue(Val.Scope) ^
DenseMapInfo<MDNode *>::getHashValue(Val.NoAlias);
}
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/Module.h b/contrib/llvm-project/llvm/include/llvm/IR/Module.h
index 59331142766a..68cd583c136c 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/Module.h
+++ b/contrib/llvm-project/llvm/include/llvm/IR/Module.h
@@ -660,24 +660,8 @@ public:
concat_iterator<const GlobalObject, const_iterator,
const_global_iterator>;
- iterator_range<global_object_iterator> global_objects() {
- return concat<GlobalObject>(functions(), globals());
- }
- iterator_range<const_global_object_iterator> global_objects() const {
- return concat<const GlobalObject>(functions(), globals());
- }
-
- global_object_iterator global_object_begin() {
- return global_objects().begin();
- }
- global_object_iterator global_object_end() { return global_objects().end(); }
-
- const_global_object_iterator global_object_begin() const {
- return global_objects().begin();
- }
- const_global_object_iterator global_object_end() const {
- return global_objects().end();
- }
+ iterator_range<global_object_iterator> global_objects();
+ iterator_range<const_global_object_iterator> global_objects() const;
using global_value_iterator =
concat_iterator<GlobalValue, iterator, global_iterator, alias_iterator,
@@ -686,23 +670,8 @@ public:
concat_iterator<const GlobalValue, const_iterator, const_global_iterator,
const_alias_iterator, const_ifunc_iterator>;
- iterator_range<global_value_iterator> global_values() {
- return concat<GlobalValue>(functions(), globals(), aliases(), ifuncs());
- }
- iterator_range<const_global_value_iterator> global_values() const {
- return concat<const GlobalValue>(functions(), globals(), aliases(),
- ifuncs());
- }
-
- global_value_iterator global_value_begin() { return global_values().begin(); }
- global_value_iterator global_value_end() { return global_values().end(); }
-
- const_global_value_iterator global_value_begin() const {
- return global_values().begin();
- }
- const_global_value_iterator global_value_end() const {
- return global_values().end();
- }
+ iterator_range<global_value_iterator> global_values();
+ iterator_range<const_global_value_iterator> global_values() const;
/// @}
/// @name Named Metadata Iteration
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/ModuleSummaryIndex.h b/contrib/llvm-project/llvm/include/llvm/IR/ModuleSummaryIndex.h
index be60447abd87..aa4054c8409e 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/ModuleSummaryIndex.h
+++ b/contrib/llvm-project/llvm/include/llvm/IR/ModuleSummaryIndex.h
@@ -29,6 +29,7 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/ScaledNumber.h"
#include "llvm/Support/StringSaver.h"
+#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <array>
#include <cassert>
@@ -172,7 +173,7 @@ struct ValueInfo {
RefAndFlags.setInt(HaveGVs);
}
- operator bool() const { return getRef(); }
+ explicit operator bool() const { return getRef(); }
GlobalValue::GUID getGUID() const { return getRef()->first; }
const GlobalValue *getValue() const {
@@ -547,6 +548,8 @@ public:
// Indicate if the global value cannot be inlined.
unsigned NoInline : 1;
+ // Indicate if function should be always inlined.
+ unsigned AlwaysInline : 1;
};
/// Create an empty FunctionSummary (with specified call edges).
@@ -941,6 +944,11 @@ private:
/// considered live.
bool WithGlobalValueDeadStripping = false;
+ /// Indicates that summary-based attribute propagation has run and
+ /// GVarFlags::MaybeReadonly / GVarFlags::MaybeWriteonly are really
+ /// read/write only.
+ bool WithAttributePropagation = false;
+
/// Indicates that summary-based synthetic entry count propagation has run
bool HasSyntheticEntryCounts = false;
@@ -987,6 +995,13 @@ public:
: HaveGVs(HaveGVs), EnableSplitLTOUnit(EnableSplitLTOUnit), Saver(Alloc) {
}
+ // Current version for the module summary in bitcode files.
+ // The BitcodeSummaryVersion should be bumped whenever we introduce changes
+ // in the way some record are interpreted, like flags for instance.
+ // Note that incrementing this may require changes in both BitcodeReader.cpp
+ // and BitcodeWriter.cpp.
+ static constexpr uint64_t BitcodeSummaryVersion = 8;
+
bool haveGVs() const { return HaveGVs; }
gvsummary_iterator begin() { return GlobalValueMap.begin(); }
@@ -1065,6 +1080,18 @@ public:
WithGlobalValueDeadStripping = true;
}
+ bool withAttributePropagation() const { return WithAttributePropagation; }
+ void setWithAttributePropagation() {
+ WithAttributePropagation = true;
+ }
+
+ bool isReadOnly(const GlobalVarSummary *GVS) const {
+ return WithAttributePropagation && GVS->maybeReadOnly();
+ }
+ bool isWriteOnly(const GlobalVarSummary *GVS) const {
+ return WithAttributePropagation && GVS->maybeWriteOnly();
+ }
+
bool hasSyntheticEntryCounts() const { return HasSyntheticEntryCounts; }
void setHasSyntheticEntryCounts() { HasSyntheticEntryCounts = true; }
@@ -1241,9 +1268,11 @@ public:
}
/// Helper to obtain the unpromoted name for a global value (or the original
- /// name if not promoted).
+ /// name if not promoted). Split off the rightmost ".llvm.${hash}" suffix,
+ /// because it is possible in certain clients (not clang at the moment) for
+ /// two rounds of ThinLTO optimization and therefore promotion to occur.
static StringRef getOriginalNameBeforePromote(StringRef Name) {
- std::pair<StringRef, StringRef> Pair = Name.split(".llvm.");
+ std::pair<StringRef, StringRef> Pair = Name.rsplit(".llvm.");
return Pair.first;
}
@@ -1349,13 +1378,18 @@ public:
void dump() const;
/// Export summary to dot file for GraphViz.
- void exportToDot(raw_ostream& OS) const;
+ void
+ exportToDot(raw_ostream &OS,
+ const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols) const;
/// Print out strongly connected components for debugging.
void dumpSCCs(raw_ostream &OS);
/// Analyze index and detect unmodified globals
void propagateAttributes(const DenseSet<GlobalValue::GUID> &PreservedSymbols);
+
+ /// Checks if we can import global variable from another module.
+ bool canImportGlobalVar(GlobalValueSummary *S, bool AnalyzeRefs) const;
};
/// GraphTraits definition to build SCC for the index
@@ -1427,15 +1461,6 @@ struct GraphTraits<ModuleSummaryIndex *> : public GraphTraits<ValueInfo> {
return ValueInfo(I->haveGVs(), &P);
}
};
-
-static inline bool canImportGlobalVar(GlobalValueSummary *S) {
- assert(isa<GlobalVarSummary>(S->getBaseObject()));
-
- // We don't import GV with references, because it can result
- // in promotion of local variables in the source module.
- return !GlobalValue::isInterposableLinkage(S->linkage()) &&
- !S->notEligibleToImport() && S->refs().empty();
-}
} // end namespace llvm
#endif // LLVM_IR_MODULESUMMARYINDEX_H
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/NoFolder.h b/contrib/llvm-project/llvm/include/llvm/IR/NoFolder.h
index 0e3c19f4947f..835236b1eac0 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/NoFolder.h
+++ b/contrib/llvm-project/llvm/include/llvm/IR/NoFolder.h
@@ -196,7 +196,7 @@ public:
}
Instruction *CreateFNeg(Constant *C) const {
- return BinaryOperator::CreateFNeg(C);
+ return UnaryOperator::CreateFNeg(C);
}
Instruction *CreateNot(Constant *C) const {
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/Operator.h b/contrib/llvm-project/llvm/include/llvm/IR/Operator.h
index 037f5aed03ee..35e08d9215e2 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/Operator.h
+++ b/contrib/llvm-project/llvm/include/llvm/IR/Operator.h
@@ -66,6 +66,7 @@ public:
class OverflowingBinaryOperator : public Operator {
public:
enum {
+ AnyWrap = 0,
NoUnsignedWrap = (1 << 0),
NoSignedWrap = (1 << 1)
};
@@ -394,8 +395,12 @@ public:
return true;
case Instruction::PHI:
case Instruction::Select:
- case Instruction::Call:
- return V->getType()->isFPOrFPVectorTy();
+ case Instruction::Call: {
+ Type *Ty = V->getType();
+ while (ArrayType *ArrTy = dyn_cast<ArrayType>(Ty))
+ Ty = ArrTy->getElementType();
+ return Ty->isFPOrFPVectorTy();
+ }
default:
return false;
}
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/PassManager.h b/contrib/llvm-project/llvm/include/llvm/IR/PassManager.h
index 1e1f4a92f844..58591ab380cc 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/PassManager.h
+++ b/contrib/llvm-project/llvm/include/llvm/IR/PassManager.h
@@ -1165,9 +1165,9 @@ public:
/// Result proxy object for \c OuterAnalysisManagerProxy.
class Result {
public:
- explicit Result(const AnalysisManagerT &AM) : AM(&AM) {}
+ explicit Result(const AnalysisManagerT &OuterAM) : OuterAM(&OuterAM) {}
- const AnalysisManagerT &getManager() const { return *AM; }
+ const AnalysisManagerT &getManager() const { return *OuterAM; }
/// When invalidation occurs, remove any registered invalidation events.
bool invalidate(
@@ -1219,7 +1219,7 @@ public:
}
private:
- const AnalysisManagerT *AM;
+ const AnalysisManagerT *OuterAM;
/// A map from an outer analysis ID to the set of this IR-unit's analyses
/// which need to be invalidated.
@@ -1227,14 +1227,15 @@ public:
OuterAnalysisInvalidationMap;
};
- OuterAnalysisManagerProxy(const AnalysisManagerT &AM) : AM(&AM) {}
+ OuterAnalysisManagerProxy(const AnalysisManagerT &OuterAM)
+ : OuterAM(&OuterAM) {}
/// Run the analysis pass and create our proxy result object.
- /// Nothing to see here, it just forwards the \c AM reference into the
+ /// Nothing to see here, it just forwards the \c OuterAM reference into the
/// result.
Result run(IRUnitT &, AnalysisManager<IRUnitT, ExtraArgTs...> &,
ExtraArgTs...) {
- return Result(*AM);
+ return Result(*OuterAM);
}
private:
@@ -1243,7 +1244,7 @@ private:
static AnalysisKey Key;
- const AnalysisManagerT *AM;
+ const AnalysisManagerT *OuterAM;
};
template <typename AnalysisManagerT, typename IRUnitT, typename... ExtraArgTs>
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/PatternMatch.h b/contrib/llvm-project/llvm/include/llvm/IR/PatternMatch.h
index 2851b24c05ae..6621fc9f819c 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/PatternMatch.h
+++ b/contrib/llvm-project/llvm/include/llvm/IR/PatternMatch.h
@@ -35,6 +35,7 @@
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/Value.h"
@@ -365,7 +366,7 @@ inline api_pred_ty<is_negative> m_Negative(const APInt *&V) {
struct is_nonnegative {
bool isValue(const APInt &C) { return C.isNonNegative(); }
};
-/// Match an integer or vector of nonnegative values.
+/// Match an integer or vector of non-negative values.
/// For vectors, this includes constants with undefined elements.
inline cst_pred_ty<is_nonnegative> m_NonNegative() {
return cst_pred_ty<is_nonnegative>();
@@ -374,6 +375,28 @@ inline api_pred_ty<is_nonnegative> m_NonNegative(const APInt *&V) {
return V;
}
+struct is_strictlypositive {
+ bool isValue(const APInt &C) { return C.isStrictlyPositive(); }
+};
+/// Match an integer or vector of strictly positive values.
+/// For vectors, this includes constants with undefined elements.
+inline cst_pred_ty<is_strictlypositive> m_StrictlyPositive() {
+ return cst_pred_ty<is_strictlypositive>();
+}
+inline api_pred_ty<is_strictlypositive> m_StrictlyPositive(const APInt *&V) {
+ return V;
+}
+
+struct is_nonpositive {
+ bool isValue(const APInt &C) { return C.isNonPositive(); }
+};
+/// Match an integer or vector of non-positive values.
+/// For vectors, this includes constants with undefined elements.
+inline cst_pred_ty<is_nonpositive> m_NonPositive() {
+ return cst_pred_ty<is_nonpositive>();
+}
+inline api_pred_ty<is_nonpositive> m_NonPositive(const APInt *&V) { return V; }
+
struct is_one {
bool isValue(const APInt &C) { return C.isOneValue(); }
};
@@ -558,6 +581,8 @@ inline bind_ty<const Value> m_Value(const Value *&V) { return V; }
inline bind_ty<Instruction> m_Instruction(Instruction *&I) { return I; }
/// Match a binary operator, capturing it if we match.
inline bind_ty<BinaryOperator> m_BinOp(BinaryOperator *&I) { return I; }
+/// Match a with overflow intrinsic, capturing it if we match.
+inline bind_ty<WithOverflowInst> m_WithOverflowInst(WithOverflowInst *&I) { return I; }
/// Match a ConstantInt, capturing the value if we match.
inline bind_ty<ConstantInt> m_ConstantInt(ConstantInt *&CI) { return CI; }
@@ -1230,6 +1255,12 @@ m_SelectCst(const Cond &C) {
return m_Select(C, m_ConstantInt<L>(), m_ConstantInt<R>());
}
+/// Matches FreezeInst.
+template <typename OpTy>
+inline OneOps_match<OpTy, Instruction::Freeze> m_Freeze(const OpTy &Op) {
+ return OneOps_match<OpTy, Instruction::Freeze>(Op);
+}
+
/// Matches InsertElementInst.
template <typename Val_t, typename Elt_t, typename Idx_t>
inline ThreeOps_match<Val_t, Elt_t, Idx_t, Instruction::InsertElement>
@@ -1727,6 +1758,12 @@ struct m_Intrinsic_Ty<T0, T1, T2, T3> {
Argument_match<T3>>;
};
+template <typename T0, typename T1, typename T2, typename T3, typename T4>
+struct m_Intrinsic_Ty<T0, T1, T2, T3, T4> {
+ using Ty = match_combine_and<typename m_Intrinsic_Ty<T0, T1, T2, T3>::Ty,
+ Argument_match<T4>>;
+};
+
/// Match intrinsic calls like this:
/// m_Intrinsic<Intrinsic::fabs>(m_Value(X))
template <Intrinsic::ID IntrID> inline IntrinsicID_match m_Intrinsic() {
@@ -1757,6 +1794,15 @@ m_Intrinsic(const T0 &Op0, const T1 &Op1, const T2 &Op2, const T3 &Op3) {
return m_CombineAnd(m_Intrinsic<IntrID>(Op0, Op1, Op2), m_Argument<3>(Op3));
}
+template <Intrinsic::ID IntrID, typename T0, typename T1, typename T2,
+ typename T3, typename T4>
+inline typename m_Intrinsic_Ty<T0, T1, T2, T3, T4>::Ty
+m_Intrinsic(const T0 &Op0, const T1 &Op1, const T2 &Op2, const T3 &Op3,
+ const T4 &Op4) {
+ return m_CombineAnd(m_Intrinsic<IntrID>(Op0, Op1, Op2, Op3),
+ m_Argument<4>(Op4));
+}
+
// Helper intrinsic matching specializations.
template <typename Opnd0>
inline typename m_Intrinsic_Ty<Opnd0>::Ty m_BitReverse(const Opnd0 &Op0) {
@@ -1937,6 +1983,25 @@ template <typename Val_t> inline Signum_match<Val_t> m_Signum(const Val_t &V) {
return Signum_match<Val_t>(V);
}
+template <int Ind, typename Opnd_t> struct ExtractValue_match {
+ Opnd_t Val;
+ ExtractValue_match(const Opnd_t &V) : Val(V) {}
+
+ template <typename OpTy> bool match(OpTy *V) {
+ if (auto *I = dyn_cast<ExtractValueInst>(V))
+ return I->getNumIndices() == 1 && I->getIndices()[0] == Ind &&
+ Val.match(I->getAggregateOperand());
+ return false;
+ }
+};
+
+/// Match a single index ExtractValue instruction.
+/// For example m_ExtractValue<1>(...)
+template <int Ind, typename Val_t>
+inline ExtractValue_match<Ind, Val_t> m_ExtractValue(const Val_t &V) {
+ return ExtractValue_match<Ind, Val_t>(V);
+}
+
} // end namespace PatternMatch
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/RemarkStreamer.h b/contrib/llvm-project/llvm/include/llvm/IR/RemarkStreamer.h
index 2abf6f99cb08..9ea12e8389f0 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/RemarkStreamer.h
+++ b/contrib/llvm-project/llvm/include/llvm/IR/RemarkStreamer.h
@@ -53,6 +53,8 @@ public:
Error setFilter(StringRef Filter);
/// Emit a diagnostic through the streamer.
void emit(const DiagnosticInfoOptimizationBase &Diag);
+ /// Check if the remarks also need to have associated metadata in a section.
+ bool needsSection() const;
};
template <typename ThisError>
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/RuntimeLibcalls.def b/contrib/llvm-project/llvm/include/llvm/IR/RuntimeLibcalls.def
index f6c74d497b18..fe2c32e3c975 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/RuntimeLibcalls.def
+++ b/contrib/llvm-project/llvm/include/llvm/IR/RuntimeLibcalls.def
@@ -23,7 +23,7 @@
// Declare the enumerator for each libcall, along with its default name. Some
// libcalls have different names on particular OSes or architectures. These
-// are set in InitLibcallNames() in TargetLoweringBase.cpp and/or by targets
+// are set in InitLibcalls() in TargetLoweringBase.cpp and/or by targets
// using TargetLoweringBase::setLibcallName()
#ifndef HANDLE_LIBCALL
#error "HANDLE_LIBCALL must be defined"
@@ -386,10 +386,6 @@ HANDLE_LIBCALL(UO_F32, "__unordsf2")
HANDLE_LIBCALL(UO_F64, "__unorddf2")
HANDLE_LIBCALL(UO_F128, "__unordtf2")
HANDLE_LIBCALL(UO_PPCF128, "__gcc_qunord")
-HANDLE_LIBCALL(O_F32, "__unordsf2")
-HANDLE_LIBCALL(O_F64, "__unorddf2")
-HANDLE_LIBCALL(O_F128, "__unordtf2")
-HANDLE_LIBCALL(O_PPCF128, "__gcc_qunord")
// Memory
HANDLE_LIBCALL(MEMCPY, "memcpy")
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/ValueHandle.h b/contrib/llvm-project/llvm/include/llvm/IR/ValueHandle.h
index 1135d796f7ed..50b7701f6716 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/ValueHandle.h
+++ b/contrib/llvm-project/llvm/include/llvm/IR/ValueHandle.h
@@ -171,6 +171,25 @@ template <> struct simplify_type<const WeakVH> {
static SimpleType getSimplifiedValue(const WeakVH &WVH) { return WVH; }
};
+// Specialize DenseMapInfo to allow WeakVH to participate in DenseMap.
+template <> struct DenseMapInfo<WeakVH> {
+ static inline WeakVH getEmptyKey() {
+ return WeakVH(DenseMapInfo<Value *>::getEmptyKey());
+ }
+
+ static inline WeakVH getTombstoneKey() {
+ return WeakVH(DenseMapInfo<Value *>::getTombstoneKey());
+ }
+
+ static unsigned getHashValue(const WeakVH &Val) {
+ return DenseMapInfo<Value *>::getHashValue(Val);
+ }
+
+ static bool isEqual(const WeakVH &LHS, const WeakVH &RHS) {
+ return DenseMapInfo<Value *>::isEqual(LHS, RHS);
+ }
+};
+
/// Value handle that is nullable, but tries to track the Value.
///
/// This is a value handle that tries hard to point to a Value, even across
@@ -264,6 +283,7 @@ public:
#else
AssertingVH() : ThePtr(nullptr) {}
AssertingVH(ValueTy *P) : ThePtr(GetAsValue(P)) {}
+ AssertingVH(const AssertingVH<ValueTy> &) = default;
#endif
operator ValueTy*() const {
diff --git a/contrib/llvm-project/llvm/include/llvm/InitializePasses.h b/contrib/llvm-project/llvm/include/llvm/InitializePasses.h
index 49f69340c828..a5e1310e28b9 100644
--- a/contrib/llvm-project/llvm/include/llvm/InitializePasses.h
+++ b/contrib/llvm-project/llvm/include/llvm/InitializePasses.h
@@ -91,6 +91,8 @@ void initializeCFGOnlyPrinterLegacyPassPass(PassRegistry&);
void initializeCFGOnlyViewerLegacyPassPass(PassRegistry&);
void initializeCFGPrinterLegacyPassPass(PassRegistry&);
void initializeCFGSimplifyPassPass(PassRegistry&);
+void initializeCFGuardPass(PassRegistry&);
+void initializeCFGuardLongjmpPass(PassRegistry&);
void initializeCFGViewerLegacyPassPass(PassRegistry&);
void initializeCFIInstrInserterPass(PassRegistry&);
void initializeCFLAndersAAWrapperPassPass(PassRegistry&);
@@ -178,6 +180,7 @@ void initializeIndVarSimplifyLegacyPassPass(PassRegistry&);
void initializeIndirectBrExpandPassPass(PassRegistry&);
void initializeInferAddressSpacesPass(PassRegistry&);
void initializeInferFunctionAttrsLegacyPassPass(PassRegistry&);
+void initializeInjectTLIMappingsLegacyPass(PassRegistry &);
void initializeInlineCostAnalysisPass(PassRegistry&);
void initializeInstCountPass(PassRegistry&);
void initializeInstNamerPass(PassRegistry&);
@@ -252,6 +255,7 @@ void initializeLowerIntrinsicsPass(PassRegistry&);
void initializeLowerInvokeLegacyPassPass(PassRegistry&);
void initializeLowerSwitchPass(PassRegistry&);
void initializeLowerTypeTestsPass(PassRegistry&);
+void initializeLowerMatrixIntrinsicsLegacyPassPass(PassRegistry &);
void initializeMIRCanonicalizerPass(PassRegistry &);
void initializeMIRNamerPass(PassRegistry &);
void initializeMIRPrintingPassPass(PassRegistry&);
@@ -284,7 +288,7 @@ void initializeMemoryDependenceWrapperPassPass(PassRegistry&);
void initializeMemorySSAPrinterLegacyPassPass(PassRegistry&);
void initializeMemorySSAWrapperPassPass(PassRegistry&);
void initializeMemorySanitizerLegacyPassPass(PassRegistry&);
-void initializeMergeFunctionsPass(PassRegistry&);
+void initializeMergeFunctionsLegacyPassPass(PassRegistry&);
void initializeMergeICmpsLegacyPassPass(PassRegistry &);
void initializeMergedLoadStoreMotionLegacyPassPass(PassRegistry&);
void initializeMetaRenamerPass(PassRegistry&);
@@ -332,7 +336,6 @@ void initializePostRAMachineSinkingPass(PassRegistry&);
void initializePostRASchedulerPass(PassRegistry&);
void initializePreISelIntrinsicLoweringLegacyPassPass(PassRegistry&);
void initializePredicateInfoPrinterLegacyPassPass(PassRegistry&);
-void initializePrintBasicBlockPassPass(PassRegistry&);
void initializePrintFunctionPassWrapperPass(PassRegistry&);
void initializePrintModulePassWrapperPass(PassRegistry&);
void initializeProcessImplicitDefsPass(PassRegistry&);
@@ -343,6 +346,7 @@ void initializeRABasicPass(PassRegistry&);
void initializeRAGreedyPass(PassRegistry&);
void initializeReachingDefAnalysisPass(PassRegistry&);
void initializeReassociateLegacyPassPass(PassRegistry&);
+void initializeRedundantDbgInstEliminationPass(PassRegistry&);
void initializeRegAllocFastPass(PassRegistry&);
void initializeRegBankSelectPass(PassRegistry&);
void initializeRegToMemPass(PassRegistry&);
@@ -406,6 +410,7 @@ void initializeTargetTransformInfoWrapperPassPass(PassRegistry&);
void initializeThreadSanitizerLegacyPassPass(PassRegistry&);
void initializeTwoAddressInstructionPassPass(PassRegistry&);
void initializeTypeBasedAAWrapperPassPass(PassRegistry&);
+void initializeTypePromotionPass(PassRegistry&);
void initializeUnifyFunctionExitNodesPass(PassRegistry&);
void initializeUnpackMachineBundlesPass(PassRegistry&);
void initializeUnreachableBlockElimLegacyPassPass(PassRegistry&);
diff --git a/contrib/llvm-project/llvm/include/llvm/LTO/Config.h b/contrib/llvm-project/llvm/include/llvm/LTO/Config.h
index daa6585b1113..50147300f7f7 100644
--- a/contrib/llvm-project/llvm/include/llvm/LTO/Config.h
+++ b/contrib/llvm-project/llvm/include/llvm/LTO/Config.h
@@ -14,9 +14,12 @@
#ifndef LLVM_LTO_CONFIG_H
#define LLVM_LTO_CONFIG_H
+#include "llvm/ADT/DenseSet.h"
#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/Passes/PassBuilder.h"
#include "llvm/Support/CodeGen.h"
-#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include <functional>
@@ -41,7 +44,7 @@ struct Config {
Optional<Reloc::Model> RelocModel = Reloc::PIC_;
Optional<CodeModel::Model> CodeModel = None;
CodeGenOpt::Level CGOptLevel = CodeGenOpt::Default;
- TargetMachine::CodeGenFileType CGFileType = TargetMachine::CGFT_ObjectFile;
+ CodeGenFileType CGFileType = CGFT_ObjectFile;
unsigned OptLevel = 2;
bool DisableVerify = false;
@@ -126,6 +129,9 @@ struct Config {
/// with llvm-lto2.
std::unique_ptr<raw_ostream> ResolutionFile;
+ /// Tunable parameters for passes in the default pipelines.
+ PipelineTuningOptions PTO;
+
/// The following callbacks deal with tasks, which normally represent the
/// entire optimization and code generation pipeline for what will become a
/// single native object file. Each task has a unique identifier between 0 and
@@ -183,8 +189,9 @@ struct Config {
///
/// It is called regardless of whether the backend is in-process, although it
/// is not called from individual backend processes.
- using CombinedIndexHookFn =
- std::function<bool(const ModuleSummaryIndex &Index)>;
+ using CombinedIndexHookFn = std::function<bool(
+ const ModuleSummaryIndex &Index,
+ const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols)>;
CombinedIndexHookFn CombinedIndexHook;
/// This is a convenience function that configures this Config object to write
diff --git a/contrib/llvm-project/llvm/include/llvm/LTO/LTO.h b/contrib/llvm-project/llvm/include/llvm/LTO/LTO.h
index 0a1e3e1d0e42..aa21f963d3a8 100644
--- a/contrib/llvm-project/llvm/include/llvm/LTO/LTO.h
+++ b/contrib/llvm-project/llvm/include/llvm/LTO/LTO.h
@@ -59,7 +59,7 @@ void thinLTOResolvePrevailingInIndex(
/// must apply the changes to the Module via thinLTOInternalizeModule.
void thinLTOInternalizeAndPromoteInIndex(
ModuleSummaryIndex &Index,
- function_ref<bool(StringRef, GlobalValue::GUID)> isExported,
+ function_ref<bool(StringRef, ValueInfo)> isExported,
function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
isPrevailing);
@@ -222,7 +222,7 @@ using NativeObjectCache =
/// The details of this type definition aren't important; clients can only
/// create a ThinBackend using one of the create*ThinBackend() functions below.
using ThinBackend = std::function<std::unique_ptr<ThinBackendProc>(
- Config &C, ModuleSummaryIndex &CombinedIndex,
+ const Config &C, ModuleSummaryIndex &CombinedIndex,
StringMap<GVSummaryMapTy> &ModuleToDefinedGVSummaries,
AddStreamFn AddStream, NativeObjectCache Cache)>;
@@ -306,7 +306,8 @@ private:
Config Conf;
struct RegularLTOState {
- RegularLTOState(unsigned ParallelCodeGenParallelismLevel, Config &Conf);
+ RegularLTOState(unsigned ParallelCodeGenParallelismLevel,
+ const Config &Conf);
struct CommonResolution {
uint64_t Size = 0;
MaybeAlign Align;
diff --git a/contrib/llvm-project/llvm/include/llvm/LTO/LTOBackend.h b/contrib/llvm-project/llvm/include/llvm/LTO/LTOBackend.h
index 4ff8a1993d49..de4fa308fde7 100644
--- a/contrib/llvm-project/llvm/include/llvm/LTO/LTOBackend.h
+++ b/contrib/llvm-project/llvm/include/llvm/LTO/LTOBackend.h
@@ -35,13 +35,13 @@ namespace lto {
/// Runs a regular LTO backend. The regular LTO backend can also act as the
/// regular LTO phase of ThinLTO, which may need to access the combined index.
-Error backend(Config &C, AddStreamFn AddStream,
+Error backend(const Config &C, AddStreamFn AddStream,
unsigned ParallelCodeGenParallelismLevel,
std::unique_ptr<Module> M, ModuleSummaryIndex &CombinedIndex);
/// Runs a ThinLTO backend.
-Error thinBackend(Config &C, unsigned Task, AddStreamFn AddStream, Module &M,
- const ModuleSummaryIndex &CombinedIndex,
+Error thinBackend(const Config &C, unsigned Task, AddStreamFn AddStream,
+ Module &M, const ModuleSummaryIndex &CombinedIndex,
const FunctionImporter::ImportMapTy &ImportList,
const GVSummaryMapTy &DefinedGlobals,
MapVector<StringRef, BitcodeModule> &ModuleMap);
diff --git a/contrib/llvm-project/llvm/include/llvm/LTO/legacy/LTOCodeGenerator.h b/contrib/llvm-project/llvm/include/llvm/LTO/legacy/LTOCodeGenerator.h
index 8718df4b88e6..114ba85947a5 100644
--- a/contrib/llvm-project/llvm/include/llvm/LTO/legacy/LTOCodeGenerator.h
+++ b/contrib/llvm-project/llvm/include/llvm/LTO/legacy/LTOCodeGenerator.h
@@ -35,11 +35,13 @@
#define LLVM_LTO_LTOCODEGENERATOR_H
#include "llvm-c/lto.h"
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/Module.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/ToolOutputFile.h"
#include "llvm/Target/TargetMachine.h"
@@ -87,8 +89,8 @@ struct LTOCodeGenerator {
void setCodePICModel(Optional<Reloc::Model> Model) { RelocModel = Model; }
/// Set the file type to be emitted (assembly or object code).
- /// The default is TargetMachine::CGFT_ObjectFile.
- void setFileType(TargetMachine::CodeGenFileType FT) { FileType = FT; }
+ /// The default is CGFT_ObjectFile.
+ void setFileType(CodeGenFileType FT) { FileType = FT; }
void setCpu(StringRef MCpu) { this->MCpu = MCpu; }
void setAttr(StringRef MAttr) { this->MAttr = MAttr; }
@@ -121,7 +123,7 @@ struct LTOCodeGenerator {
/// name is misleading). This function should be called before
/// LTOCodeGenerator::compilexxx(), and
/// LTOCodeGenerator::writeMergedModules().
- void setCodeGenDebugOptions(StringRef Opts);
+ void setCodeGenDebugOptions(ArrayRef<const char *> Opts);
/// Parse the options set in setCodeGenDebugOptions.
///
@@ -238,7 +240,7 @@ private:
bool ShouldInternalize = EnableLTOInternalization;
bool ShouldEmbedUselists = false;
bool ShouldRestoreGlobalsLinkage = false;
- TargetMachine::CodeGenFileType FileType = TargetMachine::CGFT_ObjectFile;
+ CodeGenFileType FileType = CGFT_ObjectFile;
std::unique_ptr<ToolOutputFile> DiagnosticOutputFile;
bool Freestanding = false;
std::unique_ptr<ToolOutputFile> StatsFile = nullptr;
diff --git a/contrib/llvm-project/llvm/include/llvm/LinkAllPasses.h b/contrib/llvm-project/llvm/include/llvm/LinkAllPasses.h
index ac88165845d3..aa64296f9428 100644
--- a/contrib/llvm-project/llvm/include/llvm/LinkAllPasses.h
+++ b/contrib/llvm-project/llvm/include/llvm/LinkAllPasses.h
@@ -159,6 +159,7 @@ namespace {
(void) llvm::createPostDomOnlyViewerPass();
(void) llvm::createPostDomViewerPass();
(void) llvm::createReassociatePass();
+ (void) llvm::createRedundantDbgInstEliminationPass();
(void) llvm::createRegionInfoPass();
(void) llvm::createRegionOnlyPrinterPass();
(void) llvm::createRegionOnlyViewerPass();
@@ -200,7 +201,6 @@ namespace {
llvm::raw_string_ostream os(buf);
(void) llvm::createPrintModulePass(os);
(void) llvm::createPrintFunctionPass(os);
- (void) llvm::createPrintBasicBlockPass(os);
(void) llvm::createModuleDebugInfoPrinterPass();
(void) llvm::createPartialInliningPass();
(void) llvm::createLintPass();
@@ -226,6 +226,7 @@ namespace {
(void) llvm::createScalarizeMaskedMemIntrinPass();
(void) llvm::createWarnMissedTransformationsPass();
(void) llvm::createHardwareLoopsPass();
+ (void)llvm::createInjectTLIMappingsLegacyPass();
(void)new llvm::IntervalPartition();
(void)new llvm::ScalarEvolutionWrapperPass();
diff --git a/contrib/llvm-project/llvm/include/llvm/MC/MCAsmBackend.h b/contrib/llvm-project/llvm/include/llvm/MC/MCAsmBackend.h
index 1f3ad6c1e547..ed7d5c7f01f4 100644
--- a/contrib/llvm-project/llvm/include/llvm/MC/MCAsmBackend.h
+++ b/contrib/llvm-project/llvm/include/llvm/MC/MCAsmBackend.h
@@ -17,21 +17,18 @@
#include "llvm/MC/MCFragment.h"
#include "llvm/Support/Endian.h"
#include <cstdint>
-#include <memory>
namespace llvm {
class MCAsmLayout;
class MCAssembler;
class MCCFIInstruction;
-class MCCodePadder;
struct MCFixupKindInfo;
class MCFragment;
class MCInst;
class MCObjectStreamer;
class MCObjectTargetWriter;
class MCObjectWriter;
-struct MCCodePaddingContext;
class MCRelaxableFragment;
class MCSubtargetInfo;
class MCValue;
@@ -39,8 +36,6 @@ class raw_pwrite_stream;
/// Generic interface to target specific assembler backends.
class MCAsmBackend {
- std::unique_ptr<MCCodePadder> CodePadder;
-
protected: // Can only create subclasses.
MCAsmBackend(support::endianness Endian);
@@ -51,6 +46,16 @@ public:
const support::endianness Endian;
+ /// Return true if this target might automatically pad instructions and thus
+ /// need to emit padding enable/disable directives around sensative code.
+ virtual bool allowAutoPadding() const { return false; }
+
+ /// Give the target a chance to manipulate state related to instruction
+ /// alignment (e.g. padding for optimization) before and after actually
+ /// emitting the instruction.
+ virtual void alignBranchesBegin(MCObjectStreamer &OS, const MCInst &Inst) {}
+ virtual void alignBranchesEnd(MCObjectStreamer &OS, const MCInst &Inst) {}
+
/// lifetime management
virtual void reset() {}
@@ -184,40 +189,6 @@ public:
virtual bool isMicroMips(const MCSymbol *Sym) const {
return false;
}
-
- /// Handles all target related code padding when starting to write a new
- /// basic block to an object file.
- ///
- /// \param OS The streamer used for writing the padding data and function.
- /// \param Context the context of the padding, Embeds the basic block's
- /// parameters.
- void handleCodePaddingBasicBlockStart(MCObjectStreamer *OS,
- const MCCodePaddingContext &Context);
- /// Handles all target related code padding after writing a block to an object
- /// file.
- ///
- /// \param Context the context of the padding, Embeds the basic block's
- /// parameters.
- void handleCodePaddingBasicBlockEnd(const MCCodePaddingContext &Context);
- /// Handles all target related code padding before writing a new instruction
- /// to an object file.
- ///
- /// \param Inst the instruction.
- void handleCodePaddingInstructionBegin(const MCInst &Inst);
- /// Handles all target related code padding after writing an instruction to an
- /// object file.
- ///
- /// \param Inst the instruction.
- void handleCodePaddingInstructionEnd(const MCInst &Inst);
-
- /// Relaxes a fragment (changes the size of the padding) according to target
- /// requirements. The new size computation is done w.r.t a layout.
- ///
- /// \param PF The fragment to relax.
- /// \param Layout Code layout information.
- ///
- /// \returns true iff any relaxation occurred.
- bool relaxFragment(MCPaddingFragment *PF, MCAsmLayout &Layout);
};
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/include/llvm/MC/MCAsmInfo.h b/contrib/llvm-project/llvm/include/llvm/MC/MCAsmInfo.h
index 3261c483e0d8..5a6dff64caef 100644
--- a/contrib/llvm-project/llvm/include/llvm/MC/MCAsmInfo.h
+++ b/contrib/llvm-project/llvm/include/llvm/MC/MCAsmInfo.h
@@ -333,6 +333,10 @@ protected:
/// protected visibility. Defaults to MCSA_Protected
MCSymbolAttr ProtectedVisibilityAttr = MCSA_Protected;
+ // This attribute is used to indicate symbols such as commons on AIX may have
+ // a storage mapping class embedded in the name.
+ bool SymbolsHaveSMC = false;
+
//===--- Dwarf Emission Directives -----------------------------------===//
/// True if target supports emission of debugging information. Defaults to
@@ -454,6 +458,9 @@ public:
unsigned Encoding,
MCStreamer &Streamer) const;
+ /// Return true if C is an acceptable character inside a symbol name.
+ virtual bool isAcceptableChar(char C) const;
+
/// Return true if the identifier \p Name does not need quotes to be
/// syntactically correct.
virtual bool isValidUnquotedName(StringRef Name) const;
@@ -584,6 +591,8 @@ public:
return ProtectedVisibilityAttr;
}
+ bool getSymbolsHaveSMC() const { return SymbolsHaveSMC; }
+
bool doesSupportDebugInformation() const { return SupportsDebugInformation; }
bool doesSupportExceptionHandling() const {
diff --git a/contrib/llvm-project/llvm/include/llvm/MC/MCAsmInfoELF.h b/contrib/llvm-project/llvm/include/llvm/MC/MCAsmInfoELF.h
index aa2e5873e2c6..408d4df76412 100644
--- a/contrib/llvm-project/llvm/include/llvm/MC/MCAsmInfoELF.h
+++ b/contrib/llvm-project/llvm/include/llvm/MC/MCAsmInfoELF.h
@@ -18,10 +18,6 @@ class MCAsmInfoELF : public MCAsmInfo {
MCSection *getNonexecutableStackSection(MCContext &Ctx) const final;
protected:
- /// Targets which have non-executable stacks by default can set this to false
- /// to disable the special section which requests a non-executable stack.
- bool UsesNonexecutableStackSection = true;
-
MCAsmInfoELF();
};
diff --git a/contrib/llvm-project/llvm/include/llvm/MC/MCAsmInfoXCOFF.h b/contrib/llvm-project/llvm/include/llvm/MC/MCAsmInfoXCOFF.h
index 4a3bacc954e0..5483899d5875 100644
--- a/contrib/llvm-project/llvm/include/llvm/MC/MCAsmInfoXCOFF.h
+++ b/contrib/llvm-project/llvm/include/llvm/MC/MCAsmInfoXCOFF.h
@@ -20,9 +20,9 @@ protected:
MCAsmInfoXCOFF();
public:
- // Return true only when the identifier Name does not need quotes to be
- // syntactically correct for XCOFF.
- bool isValidUnquotedName(StringRef Name) const override;
+ // Return true only when C is an acceptable character inside a
+ // MCSymbolXCOFF.
+ bool isAcceptableChar(char C) const override;
};
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/include/llvm/MC/MCAssembler.h b/contrib/llvm-project/llvm/include/llvm/MC/MCAssembler.h
index 4543018901a4..8c76f30222e5 100644
--- a/contrib/llvm-project/llvm/include/llvm/MC/MCAssembler.h
+++ b/contrib/llvm-project/llvm/include/llvm/MC/MCAssembler.h
@@ -191,11 +191,8 @@ private:
bool layoutSectionOnce(MCAsmLayout &Layout, MCSection &Sec);
bool relaxInstruction(MCAsmLayout &Layout, MCRelaxableFragment &IF);
-
- bool relaxPaddingFragment(MCAsmLayout &Layout, MCPaddingFragment &PF);
-
bool relaxLEB(MCAsmLayout &Layout, MCLEBFragment &IF);
-
+ bool relaxBoundaryAlign(MCAsmLayout &Layout, MCBoundaryAlignFragment &BF);
bool relaxDwarfLineAddr(MCAsmLayout &Layout, MCDwarfLineAddrFragment &DF);
bool relaxDwarfCallFrameFragment(MCAsmLayout &Layout,
MCDwarfCallFrameFragment &DF);
diff --git a/contrib/llvm-project/llvm/include/llvm/MC/MCCodeEmitter.h b/contrib/llvm-project/llvm/include/llvm/MC/MCCodeEmitter.h
index 04b4367ada7b..2794acc0753f 100644
--- a/contrib/llvm-project/llvm/include/llvm/MC/MCCodeEmitter.h
+++ b/contrib/llvm-project/llvm/include/llvm/MC/MCCodeEmitter.h
@@ -30,6 +30,12 @@ public:
/// Lifetime management
virtual void reset() {}
+ /// Emit the prefixes of given instruction on the output stream.
+ ///
+ /// \param Inst a single low-level machine instruction.
+ /// \param OS output stream.
+ virtual void emitPrefix(const MCInst &Inst, raw_ostream &OS,
+ const MCSubtargetInfo &STI) const {}
/// EncodeInstruction - Encode the given \p Inst to bytes on the output
/// stream \p OS.
virtual void encodeInstruction(const MCInst &Inst, raw_ostream &OS,
diff --git a/contrib/llvm-project/llvm/include/llvm/MC/MCCodePadder.h b/contrib/llvm-project/llvm/include/llvm/MC/MCCodePadder.h
deleted file mode 100644
index f7b1a2113a9a..000000000000
--- a/contrib/llvm-project/llvm/include/llvm/MC/MCCodePadder.h
+++ /dev/null
@@ -1,241 +0,0 @@
-//===- llvm/MC/MCCodePadder.h - MC Code Padder ------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_MC_MCCODEPADDER_H
-#define LLVM_MC_MCCODEPADDER_H
-
-#include "MCFragment.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallVector.h"
-
-namespace llvm {
-
-class MCAsmLayout;
-class MCCodePaddingPolicy;
-class MCFragment;
-class MCInst;
-class MCObjectStreamer;
-class MCSection;
-
-typedef SmallVector<const MCPaddingFragment *, 8> MCPFRange;
-
-struct MCCodePaddingContext {
- bool IsPaddingActive;
- bool IsBasicBlockReachableViaFallthrough;
- bool IsBasicBlockReachableViaBranch;
-};
-
-/// Target-independent base class incharge of all code padding decisions for a
-/// target. During encoding it determines if and where MCPaddingFragments will
-/// be located, as later on, when layout information is available, it determines
-/// their sizes.
-class MCCodePadder {
- MCCodePadder(const MCCodePadder &) = delete;
- void operator=(const MCCodePadder &) = delete;
-
- /// Determines if the MCCodePaddingPolicies are active.
- bool ArePoliciesActive;
-
- /// All the supported MCCodePaddingPolicies.
- SmallPtrSet<MCCodePaddingPolicy *, 4> CodePaddingPolicies;
-
- /// A pointer to the fragment of the instruction whose padding is currently
- /// done for.
- MCPaddingFragment *CurrHandledInstFragment;
-
- /// A map holding the jurisdiction for each padding fragment. Key: padding
- /// fragment. Value: The fragment's jurisdiction. A jurisdiction is a vector
- /// of padding fragments whose conditions are being controlled by another
- /// fragment, the key fragment.
- DenseMap<MCPaddingFragment *, MCPFRange> FragmentToJurisdiction;
- MCPFRange &getJurisdiction(MCPaddingFragment *Fragment, MCAsmLayout &Layout);
-
- /// A map holding the maximal instruction window size relevant for a padding
- /// fragment.
- DenseMap<MCPaddingFragment *, uint64_t> FragmentToMaxWindowSize;
- uint64_t getMaxWindowSize(MCPaddingFragment *Fragment, MCAsmLayout &Layout);
-
-protected:
- /// The current streamer, used to stream code padding.
- MCObjectStreamer *OS;
-
- bool addPolicy(MCCodePaddingPolicy *Policy);
-
- virtual bool
- basicBlockRequiresInsertionPoint(const MCCodePaddingContext &Context) {
- return false;
- }
-
- virtual bool instructionRequiresInsertionPoint(const MCInst &Inst) {
- return false;
- }
-
- virtual bool usePoliciesForBasicBlock(const MCCodePaddingContext &Context) {
- return Context.IsPaddingActive;
- }
-
-public:
- MCCodePadder()
- : ArePoliciesActive(false), CurrHandledInstFragment(nullptr),
- OS(nullptr) {}
- virtual ~MCCodePadder();
-
- /// Handles all target related code padding when starting to write a new
- /// basic block to an object file.
- ///
- /// \param OS The streamer used for writing the padding data and function.
- /// \param Context the context of the padding, Embeds the basic block's
- /// parameters.
- void handleBasicBlockStart(MCObjectStreamer *OS,
- const MCCodePaddingContext &Context);
- /// Handles all target related code padding when done writing a block to an
- /// object file.
- ///
- /// \param Context the context of the padding, Embeds the basic block's
- /// parameters.
- void handleBasicBlockEnd(const MCCodePaddingContext &Context);
- /// Handles all target related code padding before writing a new instruction
- /// to an object file.
- ///
- /// \param Inst the instruction.
- void handleInstructionBegin(const MCInst &Inst);
- /// Handles all target related code padding after writing an instruction to an
- /// object file.
- ///
- /// \param Inst the instruction.
- void handleInstructionEnd(const MCInst &Inst);
-
- /// Relaxes a fragment (changes the size of the padding) according to target
- /// requirements. The new size computation is done w.r.t a layout.
- ///
- /// \param Fragment The fragment to relax.
- /// \param Layout Code layout information.
- ///
- /// \returns true iff any relaxation occurred.
- bool relaxFragment(MCPaddingFragment *Fragment, MCAsmLayout &Layout);
-};
-
-/// The base class for all padding policies, i.e. a rule or set of rules to pad
-/// the generated code.
-class MCCodePaddingPolicy {
- MCCodePaddingPolicy() = delete;
- MCCodePaddingPolicy(const MCCodePaddingPolicy &) = delete;
- void operator=(const MCCodePaddingPolicy &) = delete;
-
-protected:
- /// A mask holding the kind of this policy, i.e. only the i'th bit will be set
- /// where i is the kind number.
- const uint64_t KindMask;
- /// Instruction window size relevant to this policy.
- const uint64_t WindowSize;
- /// A boolean indicating which byte of the instruction determies its
- /// instruction window. If true - the last byte of the instructions, o.w. -
- /// the first byte of the instruction.
- const bool InstByteIsLastByte;
-
- MCCodePaddingPolicy(uint64_t Kind, uint64_t WindowSize,
- bool InstByteIsLastByte)
- : KindMask(UINT64_C(1) << Kind), WindowSize(WindowSize),
- InstByteIsLastByte(InstByteIsLastByte) {}
-
- /// Computes and returns the offset of the consecutive fragment of a given
- /// fragment.
- ///
- /// \param Fragment The fragment whose consecutive offset will be computed.
- /// \param Layout Code layout information.
- ///
- /// \returns the offset of the consecutive fragment of \p Fragment.
- static uint64_t getNextFragmentOffset(const MCFragment *Fragment,
- const MCAsmLayout &Layout);
- /// Returns the instruction byte of an instruction pointed by a given
- /// MCPaddingFragment. An instruction byte is the address of the byte of an
- /// instruction which determines its instruction window.
- ///
- /// \param Fragment The fragment pointing to the instruction.
- /// \param Layout Code layout information.
- ///
- /// \returns the instruction byte of an instruction pointed by \p Fragment.
- uint64_t getFragmentInstByte(const MCPaddingFragment *Fragment,
- MCAsmLayout &Layout) const;
- uint64_t computeWindowEndAddress(const MCPaddingFragment *Fragment,
- uint64_t Offset, MCAsmLayout &Layout) const;
-
- /// Computes and returns the penalty weight of a first instruction window in a
- /// range. This requires a special function since the first window does not
- /// contain all the padding fragments in that window. It only contains all the
- /// padding fragments starting from the relevant insertion point.
- ///
- /// \param Window The first window.
- /// \param Offset The offset of the parent section relative to the beginning
- /// of the file, mod the window size.
- /// \param Layout Code layout information.
- ///
- /// \returns the penalty weight of a first instruction window in a range, \p
- /// Window.
- double computeFirstWindowPenaltyWeight(const MCPFRange &Window,
- uint64_t Offset,
- MCAsmLayout &Layout) const;
- /// Computes and returns the penalty caused by an instruction window.
- ///
- /// \param Window The instruction window.
- /// \param Offset The offset of the parent section relative to the beginning
- /// of the file, mod the window size.
- /// \param Layout Code layout information.
- ///
- /// \returns the penalty caused by \p Window.
- virtual double computeWindowPenaltyWeight(const MCPFRange &Window,
- uint64_t Offset,
- MCAsmLayout &Layout) const = 0;
-
-public:
- virtual ~MCCodePaddingPolicy() {}
-
- /// Returns the kind mask of this policy - A mask holding the kind of this
- /// policy, i.e. only the i'th bit will be set where i is the kind number.
- uint64_t getKindMask() const { return KindMask; }
- /// Returns the instruction window size relevant to this policy.
- uint64_t getWindowSize() const { return WindowSize; }
- /// Returns true if the last byte of an instruction determines its instruction
- /// window, or false if the first of an instruction determines it.
- bool isInstByteLastByte() const { return InstByteIsLastByte; }
-
- /// Returns true iff this policy needs padding for a given basic block.
- ///
- /// \param Context the context of the padding, Embeds the basic block's
- /// parameters.
- ///
- /// \returns true iff this policy needs padding for the basic block.
- virtual bool
- basicBlockRequiresPaddingFragment(const MCCodePaddingContext &Context) const {
- return false;
- }
- /// Returns true iff this policy needs padding for a given instruction.
- ///
- /// \param Inst The given instruction.
- ///
- /// \returns true iff this policy needs padding for \p Inst.
- virtual bool instructionRequiresPaddingFragment(const MCInst &Inst) const {
- return false;
- }
- /// Computes and returns the penalty caused by a range of instruction windows.
- /// The weight is computed for each window separelty and then accumulated.
- ///
- /// \param Range The range.
- /// \param Offset The offset of the parent section relative to the beginning
- /// of the file, mod the window size.
- /// \param Layout Code layout information.
- ///
- /// \returns the penalty caused by \p Range.
- double computeRangePenaltyWeight(const MCPFRange &Range, uint64_t Offset,
- MCAsmLayout &Layout) const;
-};
-
-} // namespace llvm
-
-#endif // LLVM_MC_MCCODEPADDER_H
diff --git a/contrib/llvm-project/llvm/include/llvm/MC/MCDisassembler/MCDisassembler.h b/contrib/llvm-project/llvm/include/llvm/MC/MCDisassembler/MCDisassembler.h
index 268f3ccad889..76c5215264bc 100644
--- a/contrib/llvm-project/llvm/include/llvm/MC/MCDisassembler/MCDisassembler.h
+++ b/contrib/llvm-project/llvm/include/llvm/MC/MCDisassembler/MCDisassembler.h
@@ -69,7 +69,6 @@ public:
/// \param Address - The address, in the memory space of region, of the first
/// byte of the instruction.
/// \param Bytes - A reference to the actual bytes of the instruction.
- /// \param VStream - The stream to print warnings and diagnostic messages on.
/// \param CStream - The stream to print comments and annotations on.
/// \return - MCDisassembler::Success if the instruction is valid,
/// MCDisassembler::SoftFail if the instruction was
@@ -77,7 +76,6 @@ public:
/// MCDisassembler::Fail if the instruction was invalid.
virtual DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
ArrayRef<uint8_t> Bytes, uint64_t Address,
- raw_ostream &VStream,
raw_ostream &CStream) const = 0;
/// May parse any prelude that precedes instructions after the start of a
@@ -88,13 +86,11 @@ public:
/// \param Address - The address, in the memory space of region, of the first
/// byte of the symbol.
/// \param Bytes - A reference to the actual bytes at the symbol location.
- /// \param VStream - The stream to print warnings and diagnostic messages on.
/// \param CStream - The stream to print comments and annotations on.
/// \return - MCDisassembler::Success if the bytes are valid,
/// MCDisassembler::Fail if the bytes were invalid.
virtual DecodeStatus onSymbolStart(StringRef Name, uint64_t &Size,
ArrayRef<uint8_t> Bytes, uint64_t Address,
- raw_ostream &VStream,
raw_ostream &CStream) const;
private:
diff --git a/contrib/llvm-project/llvm/include/llvm/MC/MCDwarf.h b/contrib/llvm-project/llvm/include/llvm/MC/MCDwarf.h
index a33b4b31bb06..b8ee585bb017 100644
--- a/contrib/llvm-project/llvm/include/llvm/MC/MCDwarf.h
+++ b/contrib/llvm-project/llvm/include/llvm/MC/MCDwarf.h
@@ -54,7 +54,7 @@ struct MCDwarfFile {
std::string Name;
// The index into the list of directory names for this file name.
- unsigned DirIndex;
+ unsigned DirIndex = 0;
/// The MD5 checksum, if there is one. Non-owning pointer to data allocated
/// in MCContext.
diff --git a/contrib/llvm-project/llvm/include/llvm/MC/MCELFStreamer.h b/contrib/llvm-project/llvm/include/llvm/MC/MCELFStreamer.h
index 8838d53d75b5..85534ed6c085 100644
--- a/contrib/llvm-project/llvm/include/llvm/MC/MCELFStreamer.h
+++ b/contrib/llvm-project/llvm/include/llvm/MC/MCELFStreamer.h
@@ -41,7 +41,8 @@ public:
void InitSections(bool NoExecStack) override;
void ChangeSection(MCSection *Section, const MCExpr *Subsection) override;
void EmitLabel(MCSymbol *Symbol, SMLoc Loc = SMLoc()) override;
- void EmitLabel(MCSymbol *Symbol, SMLoc Loc, MCFragment *F) override;
+ void EmitLabelAtPos(MCSymbol *Symbol, SMLoc Loc, MCFragment *F,
+ uint64_t Offset) override;
void EmitAssemblerFlag(MCAssemblerFlag Flag) override;
void EmitThumbFunc(MCSymbol *Func) override;
void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol) override;
@@ -101,7 +102,7 @@ MCELFStreamer *createARMELFStreamer(MCContext &Context,
std::unique_ptr<MCAsmBackend> TAB,
std::unique_ptr<MCObjectWriter> OW,
std::unique_ptr<MCCodeEmitter> Emitter,
- bool RelaxAll, bool IsThumb);
+ bool RelaxAll, bool IsThumb, bool IsAndroid);
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/include/llvm/MC/MCFragment.h b/contrib/llvm-project/llvm/include/llvm/MC/MCFragment.h
index b0def566c46a..ed237eb1e884 100644
--- a/contrib/llvm-project/llvm/include/llvm/MC/MCFragment.h
+++ b/contrib/llvm-project/llvm/include/llvm/MC/MCFragment.h
@@ -16,6 +16,7 @@
#include "llvm/ADT/ilist_node.h"
#include "llvm/MC/MCFixup.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/Support/Alignment.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/SMLoc.h"
#include <cstdint>
@@ -41,7 +42,7 @@ public:
FT_Dwarf,
FT_DwarfFrame,
FT_LEB,
- FT_Padding,
+ FT_BoundaryAlign,
FT_SymbolId,
FT_CVInlineLines,
FT_CVDefRange,
@@ -49,39 +50,27 @@ public:
};
private:
- FragmentType Kind;
-
-protected:
- bool HasInstructions;
-
-private:
- /// LayoutOrder - The layout order of this fragment.
- unsigned LayoutOrder;
-
/// The data for the section this fragment is in.
MCSection *Parent;
- /// Atom - The atom this fragment is in, as represented by its defining
- /// symbol.
+ /// The atom this fragment is in, as represented by its defining symbol.
const MCSymbol *Atom;
- /// \name Assembler Backend Data
- /// @{
- //
- // FIXME: This could all be kept private to the assembler implementation.
-
- /// Offset - The offset of this fragment in its section. This is ~0 until
+ /// The offset of this fragment in its section. This is ~0 until
/// initialized.
uint64_t Offset;
- /// @}
+ /// The layout order of this fragment.
+ unsigned LayoutOrder;
+
+ FragmentType Kind;
protected:
+ bool HasInstructions;
+
MCFragment(FragmentType Kind, bool HasInstructions,
MCSection *Parent = nullptr);
- ~MCFragment();
-
public:
MCFragment() = delete;
MCFragment(const MCFragment &) = delete;
@@ -108,9 +97,6 @@ public:
/// this is false, but specific fragment types may set it to true.
bool hasInstructions() const { return HasInstructions; }
- /// Return true if given frgment has FT_Dummy type.
- bool isDummy() const { return Kind == FT_Dummy; }
-
void dump() const;
};
@@ -135,8 +121,8 @@ protected:
MCSection *Sec)
: MCFragment(FType, HasInstructions, Sec) {}
- /// STI - The MCSubtargetInfo in effect when the instruction was encoded.
- /// must be non-null for instructions.
+ /// The MCSubtargetInfo in effect when the instruction was encoded.
+ /// It must be non-null for instructions.
const MCSubtargetInfo *STI = nullptr;
public:
@@ -206,7 +192,7 @@ template<unsigned ContentsSize, unsigned FixupsSize>
class MCEncodedFragmentWithFixups :
public MCEncodedFragmentWithContents<ContentsSize> {
- /// Fixups - The list of fixups in this fragment.
+ /// The list of fixups in this fragment.
SmallVector<MCFixup, FixupsSize> Fixups;
protected:
@@ -271,7 +257,7 @@ public:
///
class MCRelaxableFragment : public MCEncodedFragmentWithFixups<8, 1> {
- /// Inst - The instruction this is a fragment for.
+ /// The instruction this is a fragment for.
MCInst Inst;
public:
@@ -289,21 +275,21 @@ public:
};
class MCAlignFragment : public MCFragment {
- /// Alignment - The alignment to ensure, in bytes.
+ /// The alignment to ensure, in bytes.
unsigned Alignment;
- /// EmitNops - Flag to indicate that (optimal) NOPs should be emitted instead
+ /// Flag to indicate that (optimal) NOPs should be emitted instead
/// of using the provided value. The exact interpretation of this flag is
/// target dependent.
bool EmitNops : 1;
- /// Value - Value to use for filling padding bytes.
+ /// Value to use for filling padding bytes.
int64_t Value;
- /// ValueSize - The size of the integer (in bytes) of \p Value.
+ /// The size of the integer (in bytes) of \p Value.
unsigned ValueSize;
- /// MaxBytesToEmit - The maximum number of bytes to emit; if the alignment
+ /// The maximum number of bytes to emit; if the alignment
/// cannot be satisfied in this width then this fragment is ignored.
unsigned MaxBytesToEmit;
@@ -313,9 +299,6 @@ public:
: MCFragment(FT_Align, false, Sec), Alignment(Alignment), EmitNops(false),
Value(Value), ValueSize(ValueSize), MaxBytesToEmit(MaxBytesToEmit) {}
- /// \name Accessors
- /// @{
-
unsigned getAlignment() const { return Alignment; }
int64_t getValue() const { return Value; }
@@ -327,109 +310,15 @@ public:
bool hasEmitNops() const { return EmitNops; }
void setEmitNops(bool Value) { EmitNops = Value; }
- /// @}
-
static bool classof(const MCFragment *F) {
return F->getKind() == MCFragment::FT_Align;
}
};
-/// Fragment for adding required padding.
-/// This fragment is always inserted before an instruction, and holds that
-/// instruction as context information (as well as a mask of kinds) for
-/// determining the padding size.
-///
-class MCPaddingFragment : public MCFragment {
- /// A mask containing all the kinds relevant to this fragment. i.e. the i'th
- /// bit will be set iff kind i is relevant to this fragment.
- uint64_t PaddingPoliciesMask;
- /// A boolean indicating if this fragment will actually hold padding. If its
- /// value is false, then this fragment serves only as a placeholder,
- /// containing data to assist other insertion point in their decision making.
- bool IsInsertionPoint;
-
- uint64_t Size;
-
- struct MCInstInfo {
- bool IsInitialized;
- MCInst Inst;
- /// A boolean indicating whether the instruction pointed by this fragment is
- /// a fixed size instruction or a relaxable instruction held by a
- /// MCRelaxableFragment.
- bool IsImmutableSizedInst;
- union {
- /// If the instruction is a fixed size instruction, hold its size.
- size_t InstSize;
- /// Otherwise, hold a pointer to the MCRelaxableFragment holding it.
- MCRelaxableFragment *InstFragment;
- };
- };
- MCInstInfo InstInfo;
-
-public:
- static const uint64_t PFK_None = UINT64_C(0);
-
- enum MCPaddingFragmentKind {
- // values 0-7 are reserved for future target independet values.
-
- FirstTargetPerfNopFragmentKind = 8,
-
- /// Limit range of target MCPerfNopFragment kinds to fit in uint64_t
- MaxTargetPerfNopFragmentKind = 63
- };
-
- MCPaddingFragment(MCSection *Sec = nullptr)
- : MCFragment(FT_Padding, false, Sec), PaddingPoliciesMask(PFK_None),
- IsInsertionPoint(false), Size(UINT64_C(0)),
- InstInfo({false, MCInst(), false, {0}}) {}
-
- bool isInsertionPoint() const { return IsInsertionPoint; }
- void setAsInsertionPoint() { IsInsertionPoint = true; }
- uint64_t getPaddingPoliciesMask() const { return PaddingPoliciesMask; }
- void setPaddingPoliciesMask(uint64_t Value) { PaddingPoliciesMask = Value; }
- bool hasPaddingPolicy(uint64_t PolicyMask) const {
- assert(isPowerOf2_64(PolicyMask) &&
- "Policy mask must contain exactly one policy");
- return (getPaddingPoliciesMask() & PolicyMask) != PFK_None;
- }
- const MCInst &getInst() const {
- assert(isInstructionInitialized() && "Fragment has no instruction!");
- return InstInfo.Inst;
- }
- size_t getInstSize() const {
- assert(isInstructionInitialized() && "Fragment has no instruction!");
- if (InstInfo.IsImmutableSizedInst)
- return InstInfo.InstSize;
- assert(InstInfo.InstFragment != nullptr &&
- "Must have a valid InstFragment to retrieve InstSize from");
- return InstInfo.InstFragment->getContents().size();
- }
- void setInstAndInstSize(const MCInst &Inst, size_t InstSize) {
- InstInfo.IsInitialized = true;
- InstInfo.IsImmutableSizedInst = true;
- InstInfo.Inst = Inst;
- InstInfo.InstSize = InstSize;
- }
- void setInstAndInstFragment(const MCInst &Inst,
- MCRelaxableFragment *InstFragment) {
- InstInfo.IsInitialized = true;
- InstInfo.IsImmutableSizedInst = false;
- InstInfo.Inst = Inst;
- InstInfo.InstFragment = InstFragment;
- }
- uint64_t getSize() const { return Size; }
- void setSize(uint64_t Value) { Size = Value; }
- bool isInstructionInitialized() const { return InstInfo.IsInitialized; }
-
- static bool classof(const MCFragment *F) {
- return F->getKind() == MCFragment::FT_Padding;
- }
-};
-
class MCFillFragment : public MCFragment {
+ uint8_t ValueSize;
/// Value to use for filling bytes.
uint64_t Value;
- uint8_t ValueSize;
/// The number of bytes to insert.
const MCExpr &NumValues;
@@ -439,7 +328,7 @@ class MCFillFragment : public MCFragment {
public:
MCFillFragment(uint64_t Value, uint8_t VSize, const MCExpr &NumValues,
SMLoc Loc, MCSection *Sec = nullptr)
- : MCFragment(FT_Fill, false, Sec), Value(Value), ValueSize(VSize),
+ : MCFragment(FT_Fill, false, Sec), ValueSize(VSize), Value(Value),
NumValues(NumValues), Loc(Loc) {}
uint64_t getValue() const { return Value; }
@@ -454,22 +343,20 @@ public:
};
class MCOrgFragment : public MCFragment {
- /// The offset this fragment should start at.
- const MCExpr *Offset;
-
/// Value to use for filling bytes.
int8_t Value;
+ /// The offset this fragment should start at.
+ const MCExpr *Offset;
+
/// Source location of the directive that this fragment was created for.
SMLoc Loc;
public:
MCOrgFragment(const MCExpr &Offset, int8_t Value, SMLoc Loc,
MCSection *Sec = nullptr)
- : MCFragment(FT_Org, false, Sec), Offset(&Offset), Value(Value), Loc(Loc) {}
-
- /// \name Accessors
- /// @{
+ : MCFragment(FT_Org, false, Sec), Value(Value), Offset(&Offset),
+ Loc(Loc) {}
const MCExpr &getOffset() const { return *Offset; }
@@ -477,31 +364,26 @@ public:
SMLoc getLoc() const { return Loc; }
- /// @}
-
static bool classof(const MCFragment *F) {
return F->getKind() == MCFragment::FT_Org;
}
};
class MCLEBFragment : public MCFragment {
- /// Value - The value this fragment should contain.
- const MCExpr *Value;
-
- /// IsSigned - True if this is a sleb128, false if uleb128.
+ /// True if this is a sleb128, false if uleb128.
bool IsSigned;
+ /// The value this fragment should contain.
+ const MCExpr *Value;
+
SmallString<8> Contents;
public:
MCLEBFragment(const MCExpr &Value_, bool IsSigned_, MCSection *Sec = nullptr)
- : MCFragment(FT_LEB, false, Sec), Value(&Value_), IsSigned(IsSigned_) {
+ : MCFragment(FT_LEB, false, Sec), IsSigned(IsSigned_), Value(&Value_) {
Contents.push_back(0);
}
- /// \name Accessors
- /// @{
-
const MCExpr &getValue() const { return *Value; }
bool isSigned() const { return IsSigned; }
@@ -517,11 +399,11 @@ public:
};
class MCDwarfLineAddrFragment : public MCEncodedFragmentWithFixups<8, 1> {
- /// LineDelta - the value of the difference between the two line numbers
+ /// The value of the difference between the two line numbers
/// between two .loc dwarf directives.
int64_t LineDelta;
- /// AddrDelta - The expression for the difference of the two symbols that
+ /// The expression for the difference of the two symbols that
/// make up the address delta between two .loc dwarf directives.
const MCExpr *AddrDelta;
@@ -531,22 +413,17 @@ public:
: MCEncodedFragmentWithFixups<8, 1>(FT_Dwarf, false, Sec),
LineDelta(LineDelta), AddrDelta(&AddrDelta) {}
- /// \name Accessors
- /// @{
-
int64_t getLineDelta() const { return LineDelta; }
const MCExpr &getAddrDelta() const { return *AddrDelta; }
- /// @}
-
static bool classof(const MCFragment *F) {
return F->getKind() == MCFragment::FT_Dwarf;
}
};
class MCDwarfCallFrameFragment : public MCEncodedFragmentWithFixups<8, 1> {
- /// AddrDelta - The expression for the difference of the two symbols that
+ /// The expression for the difference of the two symbols that
/// make up the address delta between two .cfi_* dwarf directives.
const MCExpr *AddrDelta;
@@ -555,13 +432,8 @@ public:
: MCEncodedFragmentWithFixups<8, 1>(FT_DwarfFrame, false, Sec),
AddrDelta(&AddrDelta) {}
- /// \name Accessors
- /// @{
-
const MCExpr &getAddrDelta() const { return *AddrDelta; }
- /// @}
-
static bool classof(const MCFragment *F) {
return F->getKind() == MCFragment::FT_DwarfFrame;
}
@@ -575,14 +447,9 @@ public:
MCSymbolIdFragment(const MCSymbol *Sym, MCSection *Sec = nullptr)
: MCFragment(FT_SymbolId, false, Sec), Sym(Sym) {}
- /// \name Accessors
- /// @{
-
const MCSymbol *getSymbol() { return Sym; }
const MCSymbol *getSymbol() const { return Sym; }
- /// @}
-
static bool classof(const MCFragment *F) {
return F->getKind() == MCFragment::FT_SymbolId;
}
@@ -611,17 +478,12 @@ public:
StartFileId(StartFileId), StartLineNum(StartLineNum),
FnStartSym(FnStartSym), FnEndSym(FnEndSym) {}
- /// \name Accessors
- /// @{
-
const MCSymbol *getFnStartSym() const { return FnStartSym; }
const MCSymbol *getFnEndSym() const { return FnEndSym; }
SmallString<8> &getContents() { return Contents; }
const SmallString<8> &getContents() const { return Contents; }
- /// @}
-
static bool classof(const MCFragment *F) {
return F->getKind() == MCFragment::FT_CVInlineLines;
}
@@ -644,20 +506,53 @@ public:
Ranges(Ranges.begin(), Ranges.end()),
FixedSizePortion(FixedSizePortion) {}
- /// \name Accessors
- /// @{
ArrayRef<std::pair<const MCSymbol *, const MCSymbol *>> getRanges() const {
return Ranges;
}
StringRef getFixedSizePortion() const { return FixedSizePortion; }
- /// @}
static bool classof(const MCFragment *F) {
return F->getKind() == MCFragment::FT_CVDefRange;
}
};
+/// Represents required padding such that a particular other set of fragments
+/// does not cross a particular power-of-two boundary. The other fragments must
+/// follow this one within the same section.
+class MCBoundaryAlignFragment : public MCFragment {
+ /// The alignment requirement of the branch to be aligned.
+ Align AlignBoundary;
+ /// Flag to indicate whether the branch is fused. Use in determining the
+ /// region of fragments being aligned.
+ bool Fused : 1;
+ /// Flag to indicate whether NOPs should be emitted.
+ bool EmitNops : 1;
+ /// The size of the fragment. The size is lazily set during relaxation, and
+ /// is not meaningful before that.
+ uint64_t Size = 0;
+
+public:
+ MCBoundaryAlignFragment(Align AlignBoundary, bool Fused = false,
+ bool EmitNops = false, MCSection *Sec = nullptr)
+ : MCFragment(FT_BoundaryAlign, false, Sec), AlignBoundary(AlignBoundary),
+ Fused(Fused), EmitNops(EmitNops) {}
+
+ uint64_t getSize() const { return Size; }
+ void setSize(uint64_t Value) { Size = Value; }
+
+ Align getAlignment() const { return AlignBoundary; }
+
+ bool isFused() const { return Fused; }
+ void setFused(bool Value) { Fused = Value; }
+
+ bool canEmitNops() const { return EmitNops; }
+ void setEmitNops(bool Value) { EmitNops = Value; }
+
+ static bool classof(const MCFragment *F) {
+ return F->getKind() == MCFragment::FT_BoundaryAlign;
+ }
+};
} // end namespace llvm
#endif // LLVM_MC_MCFRAGMENT_H
diff --git a/contrib/llvm-project/llvm/include/llvm/MC/MCInst.h b/contrib/llvm-project/llvm/include/llvm/MC/MCInst.h
index 8df8096bba94..360dbda58fcb 100644
--- a/contrib/llvm-project/llvm/include/llvm/MC/MCInst.h
+++ b/contrib/llvm-project/llvm/include/llvm/MC/MCInst.h
@@ -157,13 +157,14 @@ public:
/// instruction.
class MCInst {
unsigned Opcode = 0;
- SMLoc Loc;
- SmallVector<MCOperand, 8> Operands;
// These flags could be used to pass some info from one target subcomponent
// to another, for example, from disassembler to asm printer. The values of
// the flags have any sense on target level only (e.g. prefixes on x86).
unsigned Flags = 0;
+ SMLoc Loc;
+ SmallVector<MCOperand, 8> Operands;
+
public:
MCInst() = default;
diff --git a/contrib/llvm-project/llvm/include/llvm/MC/MCInstPrinter.h b/contrib/llvm-project/llvm/include/llvm/MC/MCInstPrinter.h
index 4501ce3084c8..97290e73c28f 100644
--- a/contrib/llvm-project/llvm/include/llvm/MC/MCInstPrinter.h
+++ b/contrib/llvm-project/llvm/include/llvm/MC/MCInstPrinter.h
@@ -16,6 +16,7 @@ namespace llvm {
class MCAsmInfo;
class MCInst;
+class MCOperand;
class MCInstrInfo;
class MCRegisterInfo;
class MCSubtargetInfo;
@@ -34,6 +35,8 @@ enum Style {
} // end namespace HexStyle
+struct AliasMatchingData;
+
/// This is an instance of a target assembly language printer that
/// converts an MCInst to valid target assembly syntax.
class MCInstPrinter {
@@ -58,6 +61,10 @@ protected:
/// Utility function for printing annotations.
void printAnnotation(raw_ostream &OS, StringRef Annot);
+ /// Helper for matching MCInsts to alias patterns when printing instructions.
+ const char *matchAliasPatterns(const MCInst *MI, const MCSubtargetInfo *STI,
+ const AliasMatchingData &M);
+
public:
MCInstPrinter(const MCAsmInfo &mai, const MCInstrInfo &mii,
const MCRegisterInfo &mri) : MAI(mai), MII(mii), MRI(mri) {}
@@ -72,8 +79,8 @@ public:
void setCommentStream(raw_ostream &OS) { CommentStream = &OS; }
/// Print the specified MCInst to the specified raw_ostream.
- virtual void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot,
- const MCSubtargetInfo &STI) = 0;
+ virtual void printInst(const MCInst *MI, uint64_t Address, StringRef Annot,
+ const MCSubtargetInfo &STI, raw_ostream &OS) = 0;
/// Return the name of the specified opcode enum (e.g. "MOV32ri") or
/// empty if we can't resolve it.
@@ -104,6 +111,48 @@ public:
format_object<uint64_t> formatHex(uint64_t Value) const;
};
+/// Map from opcode to pattern list by binary search.
+struct PatternsForOpcode {
+ uint32_t Opcode;
+ uint16_t PatternStart;
+ uint16_t NumPatterns;
+};
+
+/// Data for each alias pattern. Includes feature bits, string, number of
+/// operands, and a variadic list of conditions to check.
+struct AliasPattern {
+ uint32_t AsmStrOffset;
+ uint32_t AliasCondStart;
+ uint8_t NumOperands;
+ uint8_t NumConds;
+};
+
+struct AliasPatternCond {
+ enum CondKind : uint8_t {
+ K_Feature, // Match only if a feature is enabled.
+ K_NegFeature, // Match only if a feature is disabled.
+ K_Ignore, // Match any operand.
+ K_Reg, // Match a specific register.
+ K_TiedReg, // Match another already matched register.
+ K_Imm, // Match a specific immediate.
+ K_RegClass, // Match registers in a class.
+ K_Custom, // Call custom matcher by index.
+ };
+
+ CondKind Kind;
+ uint32_t Value;
+};
+
+/// Tablegenerated data structures needed to match alias patterns.
+struct AliasMatchingData {
+ ArrayRef<PatternsForOpcode> OpToPatterns;
+ ArrayRef<AliasPattern> Patterns;
+ ArrayRef<AliasPatternCond> PatternConds;
+ StringRef AsmStrings;
+ bool (*ValidateMCOperand)(const MCOperand &MCOp, const MCSubtargetInfo &STI,
+ unsigned PredicateIndex);
+};
+
} // end namespace llvm
#endif // LLVM_MC_MCINSTPRINTER_H
diff --git a/contrib/llvm-project/llvm/include/llvm/MC/MCInstrDesc.h b/contrib/llvm-project/llvm/include/llvm/MC/MCInstrDesc.h
index e75a27614a22..506f2c09304c 100644
--- a/contrib/llvm-project/llvm/include/llvm/MC/MCInstrDesc.h
+++ b/contrib/llvm-project/llvm/include/llvm/MC/MCInstrDesc.h
@@ -37,7 +37,12 @@ enum OperandConstraint {
/// These are flags set on operands, but should be considered
/// private, all access should go through the MCOperandInfo accessors.
/// See the accessors for a description of what these are.
-enum OperandFlags { LookupPtrRegClass = 0, Predicate, OptionalDef };
+enum OperandFlags {
+ LookupPtrRegClass = 0,
+ Predicate,
+ OptionalDef,
+ BranchTarget
+};
/// Operands are tagged with one of the values of this enum.
enum OperandType {
@@ -98,6 +103,9 @@ public:
/// Set if this operand is a optional def.
bool isOptionalDef() const { return Flags & (1 << MCOI::OptionalDef); }
+ /// Set if this operand is a branch target.
+ bool isBranchTarget() const { return Flags & (1 << MCOI::BranchTarget); }
+
bool isGenericType() const {
return OperandType >= MCOI::OPERAND_FIRST_GENERIC &&
OperandType <= MCOI::OPERAND_LAST_GENERIC;
@@ -168,6 +176,7 @@ enum Flag {
Add,
Trap,
VariadicOpsAreDefs,
+ Authenticated,
};
}
@@ -304,7 +313,7 @@ public:
/// block. The TargetInstrInfo::AnalyzeBranch method can be used to get more
/// information about this branch.
bool isConditionalBranch() const {
- return isBranch() & !isBarrier() & !isIndirectBranch();
+ return isBranch() && !isBarrier() && !isIndirectBranch();
}
/// Return true if this is a branch which always
@@ -312,7 +321,7 @@ public:
/// TargetInstrInfo::AnalyzeBranch method can be used to get more information
/// about this branch.
bool isUnconditionalBranch() const {
- return isBranch() & isBarrier() & !isIndirectBranch();
+ return isBranch() && isBarrier() && !isIndirectBranch();
}
/// Return true if this is a branch or an instruction which directly
@@ -408,6 +417,15 @@ public:
return Flags & (1ULL << MCID::VariadicOpsAreDefs);
}
+ /// Return true if this instruction authenticates a pointer (e.g. LDRAx/BRAx
+ /// from ARMv8.3, which perform loads/branches with authentication).
+ ///
+ /// An authenticated instruction may fail in an ABI-defined manner when
+ /// operating on an invalid signed pointer.
+ bool isAuthenticated() const {
+ return Flags & (1ULL << MCID::Authenticated);
+ }
+
//===--------------------------------------------------------------------===//
// Side Effect Analysis
//===--------------------------------------------------------------------===//
diff --git a/contrib/llvm-project/llvm/include/llvm/MC/MCMachObjectWriter.h b/contrib/llvm-project/llvm/include/llvm/MC/MCMachObjectWriter.h
index 278aebee99ac..853e5066f039 100644
--- a/contrib/llvm-project/llvm/include/llvm/MC/MCMachObjectWriter.h
+++ b/contrib/llvm-project/llvm/include/llvm/MC/MCMachObjectWriter.h
@@ -28,7 +28,9 @@ class MachObjectWriter;
class MCMachObjectTargetWriter : public MCObjectTargetWriter {
const unsigned Is64Bit : 1;
const uint32_t CPUType;
- const uint32_t CPUSubtype;
+protected:
+ uint32_t CPUSubtype;
+public:
unsigned LocalDifference_RIT;
protected:
diff --git a/contrib/llvm-project/llvm/include/llvm/MC/MCObjectFileInfo.h b/contrib/llvm-project/llvm/include/llvm/MC/MCObjectFileInfo.h
index abc87bf27748..2f7f5d64b466 100644
--- a/contrib/llvm-project/llvm/include/llvm/MC/MCObjectFileInfo.h
+++ b/contrib/llvm-project/llvm/include/llvm/MC/MCObjectFileInfo.h
@@ -27,20 +27,20 @@ class MCObjectFileInfo {
protected:
/// True if .comm supports alignment. This is a hack for as long as we
/// support 10.4 Tiger, whose assembler doesn't support alignment on comm.
- bool CommDirectiveSupportsAlignment;
+ bool CommDirectiveSupportsAlignment = false;
/// True if target object file supports a weak_definition of constant 0 for an
/// omitted EH frame.
- bool SupportsWeakOmittedEHFrame;
+ bool SupportsWeakOmittedEHFrame = false;
/// True if the target object file supports emitting a compact unwind section
/// without an associated EH frame section.
- bool SupportsCompactUnwindWithoutEHFrame;
+ bool SupportsCompactUnwindWithoutEHFrame = false;
/// OmitDwarfIfHaveCompactUnwind - True if the target object file
/// supports having some functions with compact unwind and other with
/// dwarf unwind.
- bool OmitDwarfIfHaveCompactUnwind;
+ bool OmitDwarfIfHaveCompactUnwind = false;
/// FDE CFI encoding. Controls the encoding of the begin label in the
/// .eh_frame section. Unlike the LSDA encoding, personality encoding, and
@@ -49,134 +49,136 @@ protected:
unsigned FDECFIEncoding = 0;
/// Compact unwind encoding indicating that we should emit only an EH frame.
- unsigned CompactUnwindDwarfEHFrameOnly;
+ unsigned CompactUnwindDwarfEHFrameOnly = 0;
/// Section directive for standard text.
- MCSection *TextSection;
+ MCSection *TextSection = nullptr;
/// Section directive for standard data.
- MCSection *DataSection;
+ MCSection *DataSection = nullptr;
/// Section that is default initialized to zero.
- MCSection *BSSSection;
+ MCSection *BSSSection = nullptr;
/// Section that is readonly and can contain arbitrary initialized data.
/// Targets are not required to have a readonly section. If they don't,
/// various bits of code will fall back to using the data section for
/// constants.
- MCSection *ReadOnlySection;
+ MCSection *ReadOnlySection = nullptr;
/// If exception handling is supported by the target, this is the section the
/// Language Specific Data Area information is emitted to.
- MCSection *LSDASection;
+ MCSection *LSDASection = nullptr;
/// If exception handling is supported by the target and the target can
/// support a compact representation of the CIE and FDE, this is the section
/// to emit them into.
- MCSection *CompactUnwindSection;
+ MCSection *CompactUnwindSection = nullptr;
// Dwarf sections for debug info. If a target supports debug info, these must
// be set.
- MCSection *DwarfAbbrevSection;
- MCSection *DwarfInfoSection;
- MCSection *DwarfLineSection;
- MCSection *DwarfLineStrSection;
- MCSection *DwarfFrameSection;
- MCSection *DwarfPubTypesSection;
- const MCSection *DwarfDebugInlineSection;
- MCSection *DwarfStrSection;
- MCSection *DwarfLocSection;
- MCSection *DwarfARangesSection;
- MCSection *DwarfRangesSection;
- MCSection *DwarfMacinfoSection;
+ MCSection *DwarfAbbrevSection = nullptr;
+ MCSection *DwarfInfoSection = nullptr;
+ MCSection *DwarfLineSection = nullptr;
+ MCSection *DwarfLineStrSection = nullptr;
+ MCSection *DwarfFrameSection = nullptr;
+ MCSection *DwarfPubTypesSection = nullptr;
+ const MCSection *DwarfDebugInlineSection = nullptr;
+ MCSection *DwarfStrSection = nullptr;
+ MCSection *DwarfLocSection = nullptr;
+ MCSection *DwarfARangesSection = nullptr;
+ MCSection *DwarfRangesSection = nullptr;
+ MCSection *DwarfMacinfoSection = nullptr;
// The pubnames section is no longer generated by default. The generation
// can be enabled by a compiler flag.
- MCSection *DwarfPubNamesSection;
+ MCSection *DwarfPubNamesSection = nullptr;
/// Accelerator table sections. DwarfDebugNamesSection is the DWARF v5
/// accelerator table, while DwarfAccelNamesSection, DwarfAccelObjCSection,
/// DwarfAccelNamespaceSection, DwarfAccelTypesSection are pre-DWARF v5
/// extensions.
- MCSection *DwarfDebugNamesSection;
- MCSection *DwarfAccelNamesSection;
- MCSection *DwarfAccelObjCSection;
- MCSection *DwarfAccelNamespaceSection;
- MCSection *DwarfAccelTypesSection;
+ MCSection *DwarfDebugNamesSection = nullptr;
+ MCSection *DwarfAccelNamesSection = nullptr;
+ MCSection *DwarfAccelObjCSection = nullptr;
+ MCSection *DwarfAccelNamespaceSection = nullptr;
+ MCSection *DwarfAccelTypesSection = nullptr;
// These are used for the Fission separate debug information files.
- MCSection *DwarfInfoDWOSection;
- MCSection *DwarfTypesDWOSection;
- MCSection *DwarfAbbrevDWOSection;
- MCSection *DwarfStrDWOSection;
- MCSection *DwarfLineDWOSection;
- MCSection *DwarfLocDWOSection;
- MCSection *DwarfStrOffDWOSection;
+ MCSection *DwarfInfoDWOSection = nullptr;
+ MCSection *DwarfTypesDWOSection = nullptr;
+ MCSection *DwarfAbbrevDWOSection = nullptr;
+ MCSection *DwarfStrDWOSection = nullptr;
+ MCSection *DwarfLineDWOSection = nullptr;
+ MCSection *DwarfLocDWOSection = nullptr;
+ MCSection *DwarfStrOffDWOSection = nullptr;
+ MCSection *DwarfMacinfoDWOSection = nullptr;
/// The DWARF v5 string offset and address table sections.
- MCSection *DwarfStrOffSection;
- MCSection *DwarfAddrSection;
+ MCSection *DwarfStrOffSection = nullptr;
+ MCSection *DwarfAddrSection = nullptr;
/// The DWARF v5 range list section.
- MCSection *DwarfRnglistsSection;
+ MCSection *DwarfRnglistsSection = nullptr;
/// The DWARF v5 locations list section.
- MCSection *DwarfLoclistsSection;
+ MCSection *DwarfLoclistsSection = nullptr;
- /// The DWARF v5 range list section for fission.
- MCSection *DwarfRnglistsDWOSection;
+ /// The DWARF v5 range and location list sections for fission.
+ MCSection *DwarfRnglistsDWOSection = nullptr;
+ MCSection *DwarfLoclistsDWOSection = nullptr;
// These are for Fission DWP files.
- MCSection *DwarfCUIndexSection;
- MCSection *DwarfTUIndexSection;
+ MCSection *DwarfCUIndexSection = nullptr;
+ MCSection *DwarfTUIndexSection = nullptr;
/// Section for newer gnu pubnames.
- MCSection *DwarfGnuPubNamesSection;
+ MCSection *DwarfGnuPubNamesSection = nullptr;
/// Section for newer gnu pubtypes.
- MCSection *DwarfGnuPubTypesSection;
+ MCSection *DwarfGnuPubTypesSection = nullptr;
// Section for Swift AST
- MCSection *DwarfSwiftASTSection;
+ MCSection *DwarfSwiftASTSection = nullptr;
- MCSection *COFFDebugSymbolsSection;
- MCSection *COFFDebugTypesSection;
- MCSection *COFFGlobalTypeHashesSection;
+ MCSection *COFFDebugSymbolsSection = nullptr;
+ MCSection *COFFDebugTypesSection = nullptr;
+ MCSection *COFFGlobalTypeHashesSection = nullptr;
/// Extra TLS Variable Data section.
///
/// If the target needs to put additional information for a TLS variable,
/// it'll go here.
- MCSection *TLSExtraDataSection;
+ MCSection *TLSExtraDataSection = nullptr;
/// Section directive for Thread Local data. ELF, MachO, COFF, and Wasm.
- MCSection *TLSDataSection; // Defaults to ".tdata".
+ MCSection *TLSDataSection = nullptr; // Defaults to ".tdata".
/// Section directive for Thread Local uninitialized data.
///
/// Null if this target doesn't support a BSS section. ELF and MachO only.
- MCSection *TLSBSSSection; // Defaults to ".tbss".
+ MCSection *TLSBSSSection = nullptr; // Defaults to ".tbss".
/// StackMap section.
- MCSection *StackMapSection;
+ MCSection *StackMapSection = nullptr;
/// FaultMap section.
- MCSection *FaultMapSection;
+ MCSection *FaultMapSection = nullptr;
/// Remarks section.
- MCSection *RemarksSection;
+ MCSection *RemarksSection = nullptr;
/// EH frame section.
///
/// It is initialized on demand so it can be overwritten (with uniquing).
- MCSection *EHFrameSection;
+ MCSection *EHFrameSection = nullptr;
/// Section containing metadata on function stack sizes.
- MCSection *StackSizesSection;
+ MCSection *StackSizesSection = nullptr;
mutable DenseMap<const MCSymbol *, unsigned> StackSizesUniquing;
// ELF specific sections.
- MCSection *DataRelROSection;
- MCSection *MergeableConst4Section;
- MCSection *MergeableConst8Section;
- MCSection *MergeableConst16Section;
- MCSection *MergeableConst32Section;
+ MCSection *DataRelROSection = nullptr;
+ MCSection *MergeableConst4Section = nullptr;
+ MCSection *MergeableConst8Section = nullptr;
+ MCSection *MergeableConst16Section = nullptr;
+ MCSection *MergeableConst32Section = nullptr;
// MachO specific sections.
@@ -184,33 +186,35 @@ protected:
///
/// Contains the source code name of the variable, visibility and a pointer to
/// the initial value (.tdata or .tbss).
- MCSection *TLSTLVSection; // Defaults to ".tlv".
+ MCSection *TLSTLVSection = nullptr; // Defaults to ".tlv".
/// Section for thread local data initialization functions.
- const MCSection *TLSThreadInitSection; // Defaults to ".thread_init_func".
-
- MCSection *CStringSection;
- MCSection *UStringSection;
- MCSection *TextCoalSection;
- MCSection *ConstTextCoalSection;
- MCSection *ConstDataSection;
- MCSection *DataCoalSection;
- MCSection *ConstDataCoalSection;
- MCSection *DataCommonSection;
- MCSection *DataBSSSection;
- MCSection *FourByteConstantSection;
- MCSection *EightByteConstantSection;
- MCSection *SixteenByteConstantSection;
- MCSection *LazySymbolPointerSection;
- MCSection *NonLazySymbolPointerSection;
- MCSection *ThreadLocalPointerSection;
+ // Defaults to ".thread_init_func".
+ const MCSection *TLSThreadInitSection = nullptr;
+
+ MCSection *CStringSection = nullptr;
+ MCSection *UStringSection = nullptr;
+ MCSection *TextCoalSection = nullptr;
+ MCSection *ConstTextCoalSection = nullptr;
+ MCSection *ConstDataSection = nullptr;
+ MCSection *DataCoalSection = nullptr;
+ MCSection *ConstDataCoalSection = nullptr;
+ MCSection *DataCommonSection = nullptr;
+ MCSection *DataBSSSection = nullptr;
+ MCSection *FourByteConstantSection = nullptr;
+ MCSection *EightByteConstantSection = nullptr;
+ MCSection *SixteenByteConstantSection = nullptr;
+ MCSection *LazySymbolPointerSection = nullptr;
+ MCSection *NonLazySymbolPointerSection = nullptr;
+ MCSection *ThreadLocalPointerSection = nullptr;
/// COFF specific sections.
- MCSection *DrectveSection;
- MCSection *PDataSection;
- MCSection *XDataSection;
- MCSection *SXDataSection;
- MCSection *GFIDsSection;
+ MCSection *DrectveSection = nullptr;
+ MCSection *PDataSection = nullptr;
+ MCSection *XDataSection = nullptr;
+ MCSection *SXDataSection = nullptr;
+ MCSection *GFIDsSection = nullptr;
+ MCSection *GLJMPSection = nullptr;
public:
void InitMCObjectFileInfo(const Triple &TT, bool PIC, MCContext &ctx,
@@ -297,6 +301,12 @@ public:
MCSection *getDwarfRnglistsDWOSection() const {
return DwarfRnglistsDWOSection;
}
+ MCSection *getDwarfLoclistsDWOSection() const {
+ return DwarfLoclistsDWOSection;
+ }
+ MCSection *getDwarfMacinfoDWOSection() const {
+ return DwarfMacinfoDWOSection;
+ }
MCSection *getDwarfCUIndexSection() const { return DwarfCUIndexSection; }
MCSection *getDwarfTUIndexSection() const { return DwarfTUIndexSection; }
MCSection *getDwarfSwiftASTSection() const { return DwarfSwiftASTSection; }
@@ -379,6 +389,7 @@ public:
MCSection *getXDataSection() const { return XDataSection; }
MCSection *getSXDataSection() const { return SXDataSection; }
MCSection *getGFIDsSection() const { return GFIDsSection; }
+ MCSection *getGLJMPSection() const { return GLJMPSection; }
MCSection *getEHFrameSection() {
return EHFrameSection;
@@ -391,8 +402,8 @@ public:
private:
Environment Env;
- bool PositionIndependent;
- MCContext *Ctx;
+ bool PositionIndependent = false;
+ MCContext *Ctx = nullptr;
Triple TT;
VersionTuple SDKVersion;
diff --git a/contrib/llvm-project/llvm/include/llvm/MC/MCObjectStreamer.h b/contrib/llvm-project/llvm/include/llvm/MC/MCObjectStreamer.h
index 8affca49490f..9e3f87565e26 100644
--- a/contrib/llvm-project/llvm/include/llvm/MC/MCObjectStreamer.h
+++ b/contrib/llvm-project/llvm/include/llvm/MC/MCObjectStreamer.h
@@ -38,6 +38,8 @@ class MCObjectStreamer : public MCStreamer {
bool EmitEHFrame;
bool EmitDebugFrame;
SmallVector<MCSymbol *, 2> PendingLabels;
+ SmallVector<MCSection*, 2> PendingLabelSections;
+ unsigned CurSubsectionIdx;
struct PendingMCFixup {
const MCSymbol *Sym;
MCFixup Fixup;
@@ -84,22 +86,27 @@ public:
/// Optionally a \p STI can be passed in so that a new fragment is created
/// if the Subtarget differs from the current fragment.
MCDataFragment *getOrCreateDataFragment(const MCSubtargetInfo* STI = nullptr);
- MCPaddingFragment *getOrCreatePaddingFragment();
protected:
bool changeSectionImpl(MCSection *Section, const MCExpr *Subsection);
- /// If any labels have been emitted but not assigned fragments, ensure that
- /// they get assigned, either to F if possible or to a new data fragment.
- /// Optionally, it is also possible to provide an offset \p FOffset, which
- /// will be used as a symbol offset within the fragment.
+ /// Assign a label to the current Section and Subsection even though a
+ /// fragment is not yet present. Use flushPendingLabels(F) to associate
+ /// a fragment with this label.
+ void addPendingLabel(MCSymbol* label);
+
+ /// If any labels have been emitted but not assigned fragments in the current
+ /// Section and Subsection, ensure that they get assigned, either to fragment
+ /// F if possible or to a new data fragment. Optionally, one can provide an
+ /// offset \p FOffset as a symbol offset within the fragment.
void flushPendingLabels(MCFragment *F, uint64_t FOffset = 0);
public:
void visitUsedSymbol(const MCSymbol &Sym) override;
- /// Create a dummy fragment to assign any pending labels.
- void flushPendingLabels() { flushPendingLabels(nullptr); }
+ /// Create a data fragment for any pending labels across all Sections
+ /// and Subsections.
+ void flushPendingLabels();
MCAssembler &getAssembler() { return *Assembler; }
MCAssembler *getAssemblerPtr() override;
@@ -107,7 +114,8 @@ public:
/// @{
void EmitLabel(MCSymbol *Symbol, SMLoc Loc = SMLoc()) override;
- virtual void EmitLabel(MCSymbol *Symbol, SMLoc Loc, MCFragment *F);
+ virtual void EmitLabelAtPos(MCSymbol *Symbol, SMLoc Loc, MCFragment *F,
+ uint64_t Offset);
void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) override;
void EmitValueImpl(const MCExpr *Value, unsigned Size,
SMLoc Loc = SMLoc()) override;
@@ -132,10 +140,6 @@ public:
unsigned MaxBytesToEmit = 0) override;
void emitValueToOffset(const MCExpr *Offset, unsigned char Value,
SMLoc Loc) override;
- void
- EmitCodePaddingBasicBlockStart(const MCCodePaddingContext &Context) override;
- void
- EmitCodePaddingBasicBlockEnd(const MCCodePaddingContext &Context) override;
void EmitDwarfLocDirective(unsigned FileNo, unsigned Line,
unsigned Column, unsigned Flags,
unsigned Isa, unsigned Discriminator,
diff --git a/contrib/llvm-project/llvm/include/llvm/MC/MCParser/AsmCond.h b/contrib/llvm-project/llvm/include/llvm/MC/MCParser/AsmCond.h
index ea2155010081..44edd2b758ff 100644
--- a/contrib/llvm-project/llvm/include/llvm/MC/MCParser/AsmCond.h
+++ b/contrib/llvm-project/llvm/include/llvm/MC/MCParser/AsmCond.h
@@ -30,8 +30,6 @@ public:
ConditionalAssemblyType TheCond = NoCond;
bool CondMet = false;
bool Ignore = false;
-
- AsmCond() = default;
};
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/include/llvm/MC/MCParser/MCParsedAsmOperand.h b/contrib/llvm-project/llvm/include/llvm/MC/MCParser/MCParsedAsmOperand.h
index 2b6e2aa48b8f..abb95628c2a9 100644
--- a/contrib/llvm-project/llvm/include/llvm/MC/MCParser/MCParsedAsmOperand.h
+++ b/contrib/llvm-project/llvm/include/llvm/MC/MCParser/MCParsedAsmOperand.h
@@ -71,10 +71,10 @@ public:
/// variable/label? Only valid when parsing MS-style inline assembly.
virtual bool needAddressOf() const { return false; }
- /// isOffsetOf - Do we need to emit code to get the offset of the variable,
- /// rather then the value of the variable? Only valid when parsing MS-style
- /// inline assembly.
- virtual bool isOffsetOf() const { return false; }
+ /// isOffsetOfLocal - Do we need to emit code to get the offset of the local
+ /// variable, rather than its value? Only valid when parsing MS-style inline
+ /// assembly.
+ virtual bool isOffsetOfLocal() const { return false; }
/// getOffsetOfLoc - Get the location of the offset operator.
virtual SMLoc getOffsetOfLoc() const { return SMLoc(); }
diff --git a/contrib/llvm-project/llvm/include/llvm/MC/MCParser/MCTargetAsmParser.h b/contrib/llvm-project/llvm/include/llvm/MC/MCParser/MCTargetAsmParser.h
index 849dbd57f1aa..6e4821cbc7b9 100644
--- a/contrib/llvm-project/llvm/include/llvm/MC/MCParser/MCTargetAsmParser.h
+++ b/contrib/llvm-project/llvm/include/llvm/MC/MCParser/MCTargetAsmParser.h
@@ -35,6 +35,7 @@ enum AsmRewriteKind {
AOK_Align, // Rewrite align as .align.
AOK_EVEN, // Rewrite even as .even.
AOK_Emit, // Rewrite _emit as .byte.
+ AOK_CallInput, // Rewrite in terms of ${N:P}.
AOK_Input, // Rewrite in terms of $N.
AOK_Output, // Rewrite in terms of $N.
AOK_SizeDirective, // Add a sizing directive (e.g., dword ptr).
@@ -49,6 +50,7 @@ const char AsmRewritePrecedence [] = {
2, // AOK_EVEN
2, // AOK_Emit
3, // AOK_Input
+ 3, // AOK_CallInput
3, // AOK_Output
5, // AOK_SizeDirective
1, // AOK_Label
@@ -64,39 +66,27 @@ struct IntelExpr {
int64_t Imm;
StringRef BaseReg;
StringRef IndexReg;
+ StringRef OffsetName;
unsigned Scale;
- IntelExpr(bool needBracs = false) : NeedBracs(needBracs), Imm(0),
- BaseReg(StringRef()), IndexReg(StringRef()),
- Scale(1) {}
- // Compund immediate expression
- IntelExpr(int64_t imm, bool needBracs) : IntelExpr(needBracs) {
- Imm = imm;
- }
- // [Reg + ImmediateExpression]
- // We don't bother to emit an immediate expression evaluated to zero
- IntelExpr(StringRef reg, int64_t imm = 0, unsigned scale = 0,
- bool needBracs = true) :
- IntelExpr(imm, needBracs) {
- IndexReg = reg;
+ IntelExpr()
+ : NeedBracs(false), Imm(0), BaseReg(StringRef()), IndexReg(StringRef()),
+ OffsetName(StringRef()), Scale(1) {}
+ // [BaseReg + IndexReg * ScaleExpression + OFFSET name + ImmediateExpression]
+ IntelExpr(StringRef baseReg, StringRef indexReg, unsigned scale,
+ StringRef offsetName, int64_t imm, bool needBracs)
+ : NeedBracs(needBracs), Imm(imm), BaseReg(baseReg), IndexReg(indexReg),
+ OffsetName(offsetName), Scale(1) {
if (scale)
Scale = scale;
}
- // [BaseReg + IndexReg * ScaleExpression + ImmediateExpression]
- IntelExpr(StringRef baseReg, StringRef indexReg, unsigned scale = 0,
- int64_t imm = 0, bool needBracs = true) :
- IntelExpr(indexReg, imm, scale, needBracs) {
- BaseReg = baseReg;
- }
- bool hasBaseReg() const {
- return BaseReg.size();
- }
- bool hasIndexReg() const {
- return IndexReg.size();
- }
- bool hasRegs() const {
- return hasBaseReg() || hasIndexReg();
- }
+ bool hasBaseReg() const { return !BaseReg.empty(); }
+ bool hasIndexReg() const { return !IndexReg.empty(); }
+ bool hasRegs() const { return hasBaseReg() || hasIndexReg(); }
+ bool hasOffset() const { return !OffsetName.empty(); }
+ // Normally we won't emit immediates unconditionally,
+ // unless we've got no other components
+ bool emitImm() const { return !(hasRegs() || hasOffset()); }
bool isValid() const {
return (Scale == 1) ||
(hasIndexReg() && (Scale == 2 || Scale == 4 || Scale == 8));
@@ -107,13 +97,14 @@ struct AsmRewrite {
AsmRewriteKind Kind;
SMLoc Loc;
unsigned Len;
+ bool Done;
int64_t Val;
StringRef Label;
IntelExpr IntelExp;
public:
AsmRewrite(AsmRewriteKind kind, SMLoc loc, unsigned len = 0, int64_t val = 0)
- : Kind(kind), Loc(loc), Len(len), Val(val) {}
+ : Kind(kind), Loc(loc), Len(len), Done(false), Val(val) {}
AsmRewrite(AsmRewriteKind kind, SMLoc loc, unsigned len, StringRef label)
: AsmRewrite(kind, loc, len) { Label = label; }
AsmRewrite(SMLoc loc, unsigned len, IntelExpr exp)
@@ -174,6 +165,7 @@ struct DiagnosticPredicate {
: DiagnosticPredicateTy::NearMatch) {}
DiagnosticPredicate(DiagnosticPredicateTy T) : Type(T) {}
DiagnosticPredicate(const DiagnosticPredicate &) = default;
+ DiagnosticPredicate& operator=(const DiagnosticPredicate &) = default;
operator bool() const { return Type == DiagnosticPredicateTy::Match; }
bool isMatch() const { return Type == DiagnosticPredicateTy::Match; }
diff --git a/contrib/llvm-project/llvm/include/llvm/MC/MCRegisterInfo.h b/contrib/llvm-project/llvm/include/llvm/MC/MCRegisterInfo.h
index c7dc56ea588e..9864d95d19e0 100644
--- a/contrib/llvm-project/llvm/include/llvm/MC/MCRegisterInfo.h
+++ b/contrib/llvm-project/llvm/include/llvm/MC/MCRegisterInfo.h
@@ -16,11 +16,13 @@
#define LLVM_MC_MCREGISTERINFO_H
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/iterator.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/MC/LaneBitmask.h"
#include "llvm/MC/MCRegister.h"
#include <cassert>
#include <cstdint>
+#include <iterator>
#include <utility>
namespace llvm {
@@ -177,6 +179,9 @@ private:
DenseMap<MCRegister, int> L2CVRegs; // LLVM to CV regs mapping
public:
+ // Forward declaration to become a friend class of DiffListIterator.
+ template <class SubT> class mc_difflist_iterator;
+
/// DiffListIterator - Base iterator class that can traverse the
/// differentially encoded register and regunit lists in DiffLists.
/// Don't use this class directly, use one of the specialized sub-classes
@@ -220,8 +225,113 @@ public:
if (!advance())
List = nullptr;
}
+
+ template <class SubT> friend class MCRegisterInfo::mc_difflist_iterator;
+ };
+
+ /// Forward iterator using DiffListIterator.
+ template <class SubT>
+ class mc_difflist_iterator
+ : public iterator_facade_base<mc_difflist_iterator<SubT>,
+ std::forward_iterator_tag, MCPhysReg> {
+ MCRegisterInfo::DiffListIterator Iter;
+ /// Current value as MCPhysReg, so we can return a reference to it.
+ MCPhysReg Val;
+
+ protected:
+ mc_difflist_iterator(MCRegisterInfo::DiffListIterator Iter) : Iter(Iter) {}
+
+ // Allow conversion between instantiations where valid.
+ mc_difflist_iterator(MCRegister Reg, const MCPhysReg *DiffList) {
+ Iter.init(Reg, DiffList);
+ Val = *Iter;
+ }
+
+ public:
+ // Allow default construction to build variables, but this doesn't build
+ // a useful iterator.
+ mc_difflist_iterator() = default;
+
+ /// Return an iterator past the last element.
+ static SubT end() {
+ SubT End;
+ End.Iter.List = nullptr;
+ return End;
+ }
+
+ bool operator==(const mc_difflist_iterator &Arg) const {
+ return Iter.List == Arg.Iter.List;
+ }
+
+ const MCPhysReg &operator*() const { return Val; }
+
+ using mc_difflist_iterator::iterator_facade_base::operator++;
+ void operator++() {
+ assert(Iter.List && "Cannot increment the end iterator!");
+ ++Iter;
+ Val = *Iter;
+ }
};
+ /// Forward iterator over all sub-registers.
+ /// TODO: Replace remaining uses of MCSubRegIterator.
+ class mc_subreg_iterator : public mc_difflist_iterator<mc_subreg_iterator> {
+ public:
+ mc_subreg_iterator(MCRegisterInfo::DiffListIterator Iter)
+ : mc_difflist_iterator(Iter) {}
+ mc_subreg_iterator() = default;
+ mc_subreg_iterator(MCRegister Reg, const MCRegisterInfo *MCRI)
+ : mc_difflist_iterator(Reg, MCRI->DiffLists + MCRI->get(Reg).SubRegs) {}
+ };
+
+ /// Forward iterator over all super-registers.
+ /// TODO: Replace remaining uses of MCSuperRegIterator.
+ class mc_superreg_iterator
+ : public mc_difflist_iterator<mc_superreg_iterator> {
+ public:
+ mc_superreg_iterator(MCRegisterInfo::DiffListIterator Iter)
+ : mc_difflist_iterator(Iter) {}
+ mc_superreg_iterator() = default;
+ mc_superreg_iterator(MCRegister Reg, const MCRegisterInfo *MCRI)
+ : mc_difflist_iterator(Reg,
+ MCRI->DiffLists + MCRI->get(Reg).SuperRegs) {}
+ };
+
+ /// Return an iterator range over all sub-registers of \p Reg, excluding \p
+ /// Reg.
+ iterator_range<mc_subreg_iterator> subregs(MCRegister Reg) const {
+ return make_range(std::next(mc_subreg_iterator(Reg, this)),
+ mc_subreg_iterator::end());
+ }
+
+ /// Return an iterator range over all sub-registers of \p Reg, including \p
+ /// Reg.
+ iterator_range<mc_subreg_iterator> subregs_inclusive(MCRegister Reg) const {
+ return make_range({Reg, this}, mc_subreg_iterator::end());
+ }
+
+ /// Return an iterator range over all super-registers of \p Reg, excluding \p
+ /// Reg.
+ iterator_range<mc_superreg_iterator> superregs(MCRegister Reg) const {
+ return make_range(std::next(mc_superreg_iterator(Reg, this)),
+ mc_superreg_iterator::end());
+ }
+
+ /// Return an iterator range over all super-registers of \p Reg, including \p
+ /// Reg.
+ iterator_range<mc_superreg_iterator>
+ superregs_inclusive(MCRegister Reg) const {
+ return make_range({Reg, this}, mc_superreg_iterator::end());
+ }
+
+ /// Return an iterator range over all sub- and super-registers of \p Reg,
+ /// including \p Reg.
+ detail::concat_range<const MCPhysReg, iterator_range<mc_subreg_iterator>,
+ iterator_range<mc_superreg_iterator>>
+ sub_and_superregs_inclusive(MCRegister Reg) const {
+ return concat<const MCPhysReg>(subregs_inclusive(Reg), superregs(Reg));
+ }
+
// These iterators are allowed to sub-class DiffListIterator and access
// internal list pointers.
friend class MCSubRegIterator;
diff --git a/contrib/llvm-project/llvm/include/llvm/MC/MCSection.h b/contrib/llvm-project/llvm/include/llvm/MC/MCSection.h
index d057feda87d8..d80cc5b086b3 100644
--- a/contrib/llvm-project/llvm/include/llvm/MC/MCSection.h
+++ b/contrib/llvm-project/llvm/include/llvm/MC/MCSection.h
@@ -78,10 +78,6 @@ private:
/// Whether this section has had instructions emitted into it.
bool HasInstructions : 1;
- /// Whether this section has had data emitted into it.
- /// Right now this is only used by the ARM backend.
- bool HasData : 1;
-
bool IsRegistered : 1;
MCDummyFragment DummyFragment;
@@ -92,6 +88,15 @@ private:
/// below that number.
SmallVector<std::pair<unsigned, MCFragment *>, 1> SubsectionFragmentMap;
+ /// State for tracking labels that don't yet have Fragments
+ struct PendingLabel {
+ MCSymbol* Sym;
+ unsigned Subsection;
+ PendingLabel(MCSymbol* Sym, unsigned Subsection = 0)
+ : Sym(Sym), Subsection(Subsection) {}
+ };
+ SmallVector<PendingLabel, 2> PendingLabels;
+
protected:
SectionVariant Variant;
SectionKind Kind;
@@ -141,9 +146,6 @@ public:
bool hasInstructions() const { return HasInstructions; }
void setHasInstructions(bool Value) { HasInstructions = Value; }
- bool hasData() const { return HasData; }
- void setHasData(bool Value) { HasData = Value; }
-
bool isRegistered() const { return IsRegistered; }
void setIsRegistered(bool Value) { IsRegistered = Value; }
@@ -166,12 +168,6 @@ public:
iterator end() { return Fragments.end(); }
const_iterator end() const { return Fragments.end(); }
- reverse_iterator rbegin() { return Fragments.rbegin(); }
- const_reverse_iterator rbegin() const { return Fragments.rbegin(); }
-
- reverse_iterator rend() { return Fragments.rend(); }
- const_reverse_iterator rend() const { return Fragments.rend(); }
-
MCSection::iterator getSubsectionInsertionPoint(unsigned Subsection);
void dump() const;
@@ -187,6 +183,18 @@ public:
/// Check whether this section is "virtual", that is has no actual object
/// file contents.
virtual bool isVirtualSection() const = 0;
+
+ /// Add a pending label for the requested subsection. This label will be
+ /// associated with a fragment in flushPendingLabels()
+ void addPendingLabel(MCSymbol* label, unsigned Subsection = 0);
+
+ /// Associate all pending labels in a subsection with a fragment.
+ void flushPendingLabels(MCFragment *F, uint64_t FOffset = 0,
+ unsigned Subsection = 0);
+
+ /// Associate all pending labels with empty data fragments. One fragment
+ /// will be created for each subsection as necessary.
+ void flushPendingLabels();
};
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/include/llvm/MC/MCSectionXCOFF.h b/contrib/llvm-project/llvm/include/llvm/MC/MCSectionXCOFF.h
index ee302ed5ecec..611eb69c1493 100644
--- a/contrib/llvm-project/llvm/include/llvm/MC/MCSectionXCOFF.h
+++ b/contrib/llvm-project/llvm/include/llvm/MC/MCSectionXCOFF.h
@@ -16,11 +16,10 @@
#include "llvm/ADT/Twine.h"
#include "llvm/BinaryFormat/XCOFF.h"
#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCSymbolXCOFF.h"
namespace llvm {
-class MCSymbol;
-
// This class represents an XCOFF `Control Section`, more commonly referred to
// as a csect. A csect represents the smallest possible unit of data/code which
// will be relocated as a single block. A csect can either be:
@@ -38,14 +37,18 @@ class MCSectionXCOFF final : public MCSection {
XCOFF::StorageMappingClass MappingClass;
XCOFF::SymbolType Type;
XCOFF::StorageClass StorageClass;
+ MCSymbolXCOFF *const QualName;
MCSectionXCOFF(StringRef Section, XCOFF::StorageMappingClass SMC,
XCOFF::SymbolType ST, XCOFF::StorageClass SC, SectionKind K,
- MCSymbol *Begin)
+ MCSymbolXCOFF *QualName, MCSymbol *Begin)
: MCSection(SV_XCOFF, K, Begin), Name(Section), MappingClass(SMC),
- Type(ST), StorageClass(SC) {
- assert((ST == XCOFF::XTY_SD || ST == XCOFF::XTY_CM) &&
+ Type(ST), StorageClass(SC), QualName(QualName) {
+ assert((ST == XCOFF::XTY_SD || ST == XCOFF::XTY_CM || ST == XCOFF::XTY_ER) &&
"Invalid or unhandled type for csect.");
+ assert(QualName != nullptr && "QualName is needed.");
+ QualName->setStorageClass(SC);
+ QualName->setContainingCsect(this);
}
public:
@@ -59,6 +62,7 @@ public:
XCOFF::StorageMappingClass getMappingClass() const { return MappingClass; }
XCOFF::StorageClass getStorageClass() const { return StorageClass; }
XCOFF::SymbolType getCSectType() const { return Type; }
+ MCSymbolXCOFF *getQualNameSymbol() const { return QualName; }
void PrintSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
raw_ostream &OS,
diff --git a/contrib/llvm-project/llvm/include/llvm/MC/MCStreamer.h b/contrib/llvm-project/llvm/include/llvm/MC/MCStreamer.h
index 6b48580ae57c..ba1649d33d12 100644
--- a/contrib/llvm-project/llvm/include/llvm/MC/MCStreamer.h
+++ b/contrib/llvm-project/llvm/include/llvm/MC/MCStreamer.h
@@ -103,8 +103,9 @@ public:
// Allow a target to add behavior to the emitAssignment of MCStreamer.
virtual void emitAssignment(MCSymbol *Symbol, const MCExpr *Value);
- virtual void prettyPrintAsm(MCInstPrinter &InstPrinter, raw_ostream &OS,
- const MCInst &Inst, const MCSubtargetInfo &STI);
+ virtual void prettyPrintAsm(MCInstPrinter &InstPrinter, uint64_t Address,
+ const MCInst &Inst, const MCSubtargetInfo &STI,
+ raw_ostream &OS);
virtual void emitDwarfFileDirective(StringRef Directive);
@@ -222,6 +223,13 @@ class MCStreamer {
bool UseAssemblerInfoForParsing;
+ /// Is the assembler allowed to insert padding automatically? For
+ /// correctness reasons, we sometimes need to ensure instructions aren't
+ /// seperated in unexpected ways. At the moment, this feature is only
+ /// useable from an integrated assembler, but assembly syntax is under
+ /// discussion for future inclusion.
+ bool AllowAutoPadding = false;
+
protected:
MCStreamer(MCContext &Ctx);
@@ -266,6 +274,9 @@ public:
return TargetStreamer.get();
}
+ void setAllowAutoPadding(bool v) { AllowAutoPadding = v; }
+ bool getAllowAutoPadding() const { return AllowAutoPadding; }
+
/// When emitting an object file, create and emit a real label. When emitting
/// textual assembly, this should do nothing to avoid polluting our output.
virtual MCSymbol *EmitCFILabel();
@@ -546,11 +557,13 @@ public:
/// Emits an lcomm directive with XCOFF csect information.
///
- /// \param Symbol - The symbol we are emiting.
+ /// \param LabelSym - Label on the block of storage.
/// \param Size - The size of the block of storage.
- /// \param ByteAlignment - The alignment of the symbol in bytes. Must be a power
- /// of 2.
- virtual void EmitXCOFFLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ /// \param CsectSym - Csect name for the block of storage.
+ /// \param ByteAlignment - The alignment of the symbol in bytes. Must be a
+ /// power of 2.
+ virtual void EmitXCOFFLocalCommonSymbol(MCSymbol *LabelSym, uint64_t Size,
+ MCSymbol *CsectSym,
unsigned ByteAlignment);
/// Emit an ELF .size directive.
diff --git a/contrib/llvm-project/llvm/include/llvm/MC/MCSymbol.h b/contrib/llvm-project/llvm/include/llvm/MC/MCSymbol.h
index 189484deac7e..84263bf94035 100644
--- a/contrib/llvm-project/llvm/include/llvm/MC/MCSymbol.h
+++ b/contrib/llvm-project/llvm/include/llvm/MC/MCSymbol.h
@@ -178,14 +178,6 @@ private:
llvm_unreachable("Constructor throws?");
}
- MCSection *getSectionPtr() const {
- if (MCFragment *F = getFragment()) {
- assert(F != AbsolutePseudoFragment);
- return F->getParent();
- }
- return nullptr;
- }
-
/// Get a reference to the name field. Requires that we have a name
const StringMapEntry<bool> *&getNameEntryPtr() {
assert(FragmentAndHasName.getInt() && "Name is required");
@@ -267,7 +259,7 @@ public:
/// Get the section associated with a defined, non-absolute symbol.
MCSection &getSection() const {
assert(isInSection() && "Invalid accessor!");
- return *getSectionPtr();
+ return *getFragment()->getParent();
}
/// Mark the symbol as defined in the fragment \p F.
diff --git a/contrib/llvm-project/llvm/include/llvm/MC/MCSymbolWasm.h b/contrib/llvm-project/llvm/include/llvm/MC/MCSymbolWasm.h
index 95beebe3f75a..ba2068a46071 100644
--- a/contrib/llvm-project/llvm/include/llvm/MC/MCSymbolWasm.h
+++ b/contrib/llvm-project/llvm/include/llvm/MC/MCSymbolWasm.h
@@ -21,6 +21,7 @@ class MCSymbolWasm : public MCSymbol {
mutable bool IsUsedInGOT = false;
Optional<std::string> ImportModule;
Optional<std::string> ImportName;
+ Optional<std::string> ExportName;
wasm::WasmSignature *Signature = nullptr;
Optional<wasm::WasmGlobalType> GlobalType;
Optional<wasm::WasmEventType> EventType;
@@ -78,6 +79,7 @@ public:
}
void setImportModule(StringRef Name) { ImportModule = Name; }
+ bool hasImportName() const { return ImportName.hasValue(); }
const StringRef getImportName() const {
if (ImportName.hasValue()) {
return ImportName.getValue();
@@ -86,6 +88,10 @@ public:
}
void setImportName(StringRef Name) { ImportName = Name; }
+ bool hasExportName() const { return ExportName.hasValue(); }
+ const StringRef getExportName() const { return ExportName.getValue(); }
+ void setExportName(StringRef Name) { ExportName = Name; }
+
void setUsedInGOT() const { IsUsedInGOT = true; }
bool isUsedInGOT() const { return IsUsedInGOT; }
diff --git a/contrib/llvm-project/llvm/include/llvm/MC/MCSymbolXCOFF.h b/contrib/llvm-project/llvm/include/llvm/MC/MCSymbolXCOFF.h
index 98ecd2466926..07dfb5d29977 100644
--- a/contrib/llvm-project/llvm/include/llvm/MC/MCSymbolXCOFF.h
+++ b/contrib/llvm-project/llvm/include/llvm/MC/MCSymbolXCOFF.h
@@ -9,6 +9,7 @@
#define LLVM_MC_MCSYMBOLXCOFF_H
#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/BinaryFormat/XCOFF.h"
#include "llvm/MC/MCSymbol.h"
@@ -48,6 +49,19 @@ public:
return ContainingCsect;
}
+ bool hasContainingCsect() const { return ContainingCsect != nullptr; }
+
+ StringRef getUnqualifiedName() const {
+ const StringRef name = getName();
+ if (name.back() == ']') {
+ StringRef lhs, rhs;
+ std::tie(lhs, rhs) = name.rsplit('[');
+ assert(!rhs.empty() && "Invalid SMC format in XCOFF symbol.");
+ return lhs;
+ }
+ return name;
+ }
+
private:
Optional<XCOFF::StorageClass> StorageClass;
MCSectionXCOFF *ContainingCsect = nullptr;
diff --git a/contrib/llvm-project/llvm/include/llvm/MC/MCTargetOptions.h b/contrib/llvm-project/llvm/include/llvm/MC/MCTargetOptions.h
index f184620ff047..51a5fc9aa26a 100644
--- a/contrib/llvm-project/llvm/include/llvm/MC/MCTargetOptions.h
+++ b/contrib/llvm-project/llvm/include/llvm/MC/MCTargetOptions.h
@@ -46,7 +46,6 @@ public:
bool MCSaveTempLabels : 1;
bool MCUseDwarfDirectory : 1;
bool MCIncrementalLinkerCompatible : 1;
- bool MCPIECopyRelocations : 1;
bool ShowMCEncoding : 1;
bool ShowMCInst : 1;
bool AsmVerbose : 1;
diff --git a/contrib/llvm-project/llvm/include/llvm/MC/MCTargetOptionsCommandFlags.inc b/contrib/llvm-project/llvm/include/llvm/MC/MCTargetOptionsCommandFlags.inc
index 9f1177f470b9..93e21b626eac 100644
--- a/contrib/llvm-project/llvm/include/llvm/MC/MCTargetOptionsCommandFlags.inc
+++ b/contrib/llvm-project/llvm/include/llvm/MC/MCTargetOptionsCommandFlags.inc
@@ -28,8 +28,6 @@ static cl::opt<bool> IncrementalLinkerCompatible(
"When used with filetype=obj, "
"emit an object file which can be used with an incremental linker"));
-static cl::opt<bool> PIECopyRelocations("pie-copy-relocations", cl::desc("PIE Copy Relocations"));
-
static cl::opt<int> DwarfVersion("dwarf-version", cl::desc("Dwarf version"),
cl::init(0));
@@ -55,7 +53,6 @@ static MCTargetOptions InitMCTargetOptionsFromFlags() {
MCTargetOptions Options;
Options.MCRelaxAll = RelaxAll;
Options.MCIncrementalLinkerCompatible = IncrementalLinkerCompatible;
- Options.MCPIECopyRelocations = PIECopyRelocations;
Options.DwarfVersion = DwarfVersion;
Options.ShowMCInst = ShowMCInst;
Options.ABIName = ABIName;
diff --git a/contrib/llvm-project/llvm/include/llvm/MC/MCXCOFFStreamer.h b/contrib/llvm-project/llvm/include/llvm/MC/MCXCOFFStreamer.h
index b13b0031d18e..f6f8e56977d3 100644
--- a/contrib/llvm-project/llvm/include/llvm/MC/MCXCOFFStreamer.h
+++ b/contrib/llvm-project/llvm/include/llvm/MC/MCXCOFFStreamer.h
@@ -26,7 +26,8 @@ public:
uint64_t Size = 0, unsigned ByteAlignment = 0,
SMLoc Loc = SMLoc()) override;
void EmitInstToData(const MCInst &Inst, const MCSubtargetInfo &) override;
- void EmitXCOFFLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ void EmitXCOFFLocalCommonSymbol(MCSymbol *LabelSym, uint64_t Size,
+ MCSymbol *CsectSym,
unsigned ByteAlign) override;
};
diff --git a/contrib/llvm-project/llvm/include/llvm/Object/ELF.h b/contrib/llvm-project/llvm/include/llvm/Object/ELF.h
index 28b00c8413de..42c5b67ac3fa 100644
--- a/contrib/llvm-project/llvm/include/llvm/Object/ELF.h
+++ b/contrib/llvm-project/llvm/include/llvm/Object/ELF.h
@@ -402,12 +402,17 @@ ELFFile<ELFT>::getSectionContentsAsArray(const Elf_Shdr *Sec) const {
" has an invalid sh_size (" + Twine(Size) +
") which is not a multiple of its sh_entsize (" +
Twine(Sec->sh_entsize) + ")");
- if ((std::numeric_limits<uintX_t>::max() - Offset < Size) ||
- Offset + Size > Buf.size())
+ if (std::numeric_limits<uintX_t>::max() - Offset < Size)
return createError("section " + getSecIndexForError(this, Sec) +
" has a sh_offset (0x" + Twine::utohexstr(Offset) +
- ") + sh_size (0x" + Twine(Size) +
+ ") + sh_size (0x" + Twine::utohexstr(Size) +
") that cannot be represented");
+ if (Offset + Size > Buf.size())
+ return createError("section " + getSecIndexForError(this, Sec) +
+ " has a sh_offset (0x" + Twine::utohexstr(Offset) +
+ ") + sh_size (0x" + Twine::utohexstr(Size) +
+ ") that is greater than the file size (0x" +
+ Twine::utohexstr(Buf.size()) + ")");
if (Offset % alignof(T))
// TODO: this error is untested.
@@ -641,11 +646,12 @@ ELFFile<ELFT>::getSHNDXTable(const Elf_Shdr &Section,
SymTable.sh_type) +
" section (expected SHT_SYMTAB/SHT_DYNSYM)");
- if (V.size() != (SymTable.sh_size / sizeof(Elf_Sym)))
- return createError("SHT_SYMTAB_SHNDX section has sh_size (" +
- Twine(SymTable.sh_size) +
- ") which is not equal to the number of symbols (" +
- Twine(V.size()) + ")");
+ uint64_t Syms = SymTable.sh_size / sizeof(Elf_Sym);
+ if (V.size() != Syms)
+ return createError("SHT_SYMTAB_SHNDX has " + Twine(V.size()) +
+ " entries, but the symbol table associated has " +
+ Twine(Syms));
+
return V;
}
diff --git a/contrib/llvm-project/llvm/include/llvm/Object/ELFObjectFile.h b/contrib/llvm-project/llvm/include/llvm/Object/ELFObjectFile.h
index 424289a9ccaa..8a68e49477fd 100644
--- a/contrib/llvm-project/llvm/include/llvm/Object/ELFObjectFile.h
+++ b/contrib/llvm-project/llvm/include/llvm/Object/ELFObjectFile.h
@@ -723,6 +723,8 @@ template <class ELFT>
Expected<ArrayRef<uint8_t>>
ELFObjectFile<ELFT>::getSectionContents(DataRefImpl Sec) const {
const Elf_Shdr *EShdr = getSection(Sec);
+ if (EShdr->sh_type == ELF::SHT_NOBITS)
+ return makeArrayRef((const uint8_t *)base(), 0);
if (std::error_code EC =
checkOffset(getMemoryBufferRef(),
(uintptr_t)base() + EShdr->sh_offset, EShdr->sh_size))
diff --git a/contrib/llvm-project/llvm/include/llvm/Object/MachO.h b/contrib/llvm-project/llvm/include/llvm/Object/MachO.h
index 76be8049a7d4..c3ecdd93563f 100644
--- a/contrib/llvm-project/llvm/include/llvm/Object/MachO.h
+++ b/contrib/llvm-project/llvm/include/llvm/Object/MachO.h
@@ -567,7 +567,7 @@ public:
static StringRef guessLibraryShortName(StringRef Name, bool &isFramework,
StringRef &Suffix);
- static Triple::ArchType getArch(uint32_t CPUType);
+ static Triple::ArchType getArch(uint32_t CPUType, uint32_t CPUSubType);
static Triple getArchTriple(uint32_t CPUType, uint32_t CPUSubType,
const char **McpuDefault = nullptr,
const char **ArchFlag = nullptr);
diff --git a/contrib/llvm-project/llvm/include/llvm/Object/ObjectFile.h b/contrib/llvm-project/llvm/include/llvm/Object/ObjectFile.h
index adc9dbc189af..2f1493457605 100644
--- a/contrib/llvm-project/llvm/include/llvm/Object/ObjectFile.h
+++ b/contrib/llvm-project/llvm/include/llvm/Object/ObjectFile.h
@@ -155,6 +155,8 @@ inline bool operator==(const SectionedAddress &LHS,
std::tie(RHS.SectionIndex, RHS.Address);
}
+raw_ostream &operator<<(raw_ostream &OS, const SectionedAddress &Addr);
+
/// This is a value type class that represents a single symbol in the list of
/// symbols in the object file.
class SymbolRef : public BasicSymbolRef {
diff --git a/contrib/llvm-project/llvm/include/llvm/Object/Wasm.h b/contrib/llvm-project/llvm/include/llvm/Object/Wasm.h
index e130ea32ed21..8af94c4963b6 100644
--- a/contrib/llvm-project/llvm/include/llvm/Object/Wasm.h
+++ b/contrib/llvm-project/llvm/include/llvm/Object/Wasm.h
@@ -280,6 +280,7 @@ private:
uint32_t StartFunction = -1;
bool HasLinkingSection = false;
bool HasDylinkSection = false;
+ bool SeenCodeSection = false;
wasm::WasmLinkingData LinkingData;
uint32_t NumImportedGlobals = 0;
uint32_t NumImportedFunctions = 0;
diff --git a/contrib/llvm-project/llvm/include/llvm/Object/XCOFFObjectFile.h b/contrib/llvm-project/llvm/include/llvm/Object/XCOFFObjectFile.h
index 84073ce5f6cf..fcdbf7a8095c 100644
--- a/contrib/llvm-project/llvm/include/llvm/Object/XCOFFObjectFile.h
+++ b/contrib/llvm-project/llvm/include/llvm/Object/XCOFFObjectFile.h
@@ -47,7 +47,26 @@ struct XCOFFFileHeader64 {
support::ubig32_t NumberOfSymTableEntries;
};
-struct XCOFFSectionHeader32 {
+template <typename T> struct XCOFFSectionHeader {
+ // Least significant 3 bits are reserved.
+ static constexpr unsigned SectionFlagsReservedMask = 0x7;
+
+ // The low order 16 bits of section flags denotes the section type.
+ static constexpr unsigned SectionFlagsTypeMask = 0xffffu;
+
+public:
+ StringRef getName() const;
+ uint16_t getSectionType() const;
+ bool isReservedSectionType() const;
+};
+
+// Explicit extern template declarations.
+struct XCOFFSectionHeader32;
+struct XCOFFSectionHeader64;
+extern template struct XCOFFSectionHeader<XCOFFSectionHeader32>;
+extern template struct XCOFFSectionHeader<XCOFFSectionHeader64>;
+
+struct XCOFFSectionHeader32 : XCOFFSectionHeader<XCOFFSectionHeader32> {
char Name[XCOFF::NameSize];
support::ubig32_t PhysicalAddress;
support::ubig32_t VirtualAddress;
@@ -58,11 +77,9 @@ struct XCOFFSectionHeader32 {
support::ubig16_t NumberOfRelocations;
support::ubig16_t NumberOfLineNumbers;
support::big32_t Flags;
-
- StringRef getName() const;
};
-struct XCOFFSectionHeader64 {
+struct XCOFFSectionHeader64 : XCOFFSectionHeader<XCOFFSectionHeader64> {
char Name[XCOFF::NameSize];
support::ubig64_t PhysicalAddress;
support::ubig64_t VirtualAddress;
@@ -74,8 +91,6 @@ struct XCOFFSectionHeader64 {
support::ubig32_t NumberOfLineNumbers;
support::big32_t Flags;
char Padding[4];
-
- StringRef getName() const;
};
struct XCOFFSymbolEntry {
diff --git a/contrib/llvm-project/llvm/include/llvm/ObjectYAML/DWARFYAML.h b/contrib/llvm-project/llvm/include/llvm/ObjectYAML/DWARFYAML.h
index 525fd9a89242..26dabfcf27fe 100644
--- a/contrib/llvm-project/llvm/include/llvm/ObjectYAML/DWARFYAML.h
+++ b/contrib/llvm-project/llvm/include/llvm/ObjectYAML/DWARFYAML.h
@@ -161,8 +161,6 @@ struct Data {
} // end namespace DWARFYAML
} // end namespace llvm
-LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::Hex64)
-LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::Hex8)
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::DWARFYAML::AttributeAbbrev)
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::DWARFYAML::Abbrev)
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::DWARFYAML::ARangeDescriptor)
diff --git a/contrib/llvm-project/llvm/include/llvm/ObjectYAML/ELFYAML.h b/contrib/llvm-project/llvm/include/llvm/ObjectYAML/ELFYAML.h
index 0898a0e7d532..f87135e6a1b5 100644
--- a/contrib/llvm-project/llvm/include/llvm/ObjectYAML/ELFYAML.h
+++ b/contrib/llvm-project/llvm/include/llvm/ObjectYAML/ELFYAML.h
@@ -64,6 +64,8 @@ LLVM_YAML_STRONG_TYPEDEF(uint32_t, MIPS_AFL_ASE)
LLVM_YAML_STRONG_TYPEDEF(uint32_t, MIPS_AFL_FLAGS1)
LLVM_YAML_STRONG_TYPEDEF(uint32_t, MIPS_ISA)
+LLVM_YAML_STRONG_TYPEDEF(StringRef, YAMLFlowString)
+
// For now, hardcode 64 bits everywhere that 32 or 64 would be needed
// since 64-bit can hold 32-bit values too.
struct FileHeader {
@@ -124,24 +126,43 @@ struct StackSizeEntry {
llvm::yaml::Hex64 Size;
};
-struct Section {
- enum class SectionKind {
+struct NoteEntry {
+ StringRef Name;
+ yaml::BinaryRef Desc;
+ llvm::yaml::Hex32 Type;
+};
+
+struct Chunk {
+ enum class ChunkKind {
Dynamic,
Group,
RawContent,
Relocation,
+ Relr,
NoBits,
+ Note,
Hash,
+ GnuHash,
Verdef,
Verneed,
StackSizes,
SymtabShndxSection,
Symver,
MipsABIFlags,
- Addrsig
+ Addrsig,
+ Fill,
+ LinkerOptions,
+ DependentLibraries,
};
- SectionKind Kind;
+
+ ChunkKind Kind;
StringRef Name;
+
+ Chunk(ChunkKind K) : Kind(K) {}
+ virtual ~Chunk();
+};
+
+struct Section : public Chunk {
ELF_SHT Type;
Optional<ELF_SHF> Flags;
llvm::yaml::Hex64 Address;
@@ -153,9 +174,10 @@ struct Section {
// When they are, this flag is used to signal about that.
bool IsImplicit;
- Section(SectionKind Kind, bool IsImplicit = false)
- : Kind(Kind), IsImplicit(IsImplicit) {}
- virtual ~Section();
+ Section(ChunkKind Kind, bool IsImplicit = false)
+ : Chunk(Kind), IsImplicit(IsImplicit) {}
+
+ static bool classof(const Chunk *S) { return S->Kind != ChunkKind::Fill; }
// The following members are used to override section fields which is
// useful for creating invalid objects.
@@ -171,6 +193,26 @@ struct Section {
// This can be used to override the sh_size field. It does not affect the
// content written.
Optional<llvm::yaml::Hex64> ShSize;
+
+ // This can be used to override the sh_flags field.
+ Optional<llvm::yaml::Hex64> ShFlags;
+};
+
+// Fill is a block of data which is placed outside of sections. It is
+// not present in the sections header table, but it might affect the output file
+// size and program headers produced.
+struct Fill : Chunk {
+ Optional<yaml::BinaryRef> Pattern;
+ llvm::yaml::Hex64 Size;
+
+ // We have to remember the offset of the fill, because it does not have
+ // a corresponding section header, unlike a section. We might need this
+ // information when writing the output.
+ uint64_t ShOffset;
+
+ Fill() : Chunk(ChunkKind::Fill) {}
+
+ static bool classof(const Chunk *S) { return S->Kind == ChunkKind::Fill; }
};
struct StackSizesSection : Section {
@@ -178,10 +220,10 @@ struct StackSizesSection : Section {
Optional<llvm::yaml::Hex64> Size;
Optional<std::vector<StackSizeEntry>> Entries;
- StackSizesSection() : Section(SectionKind::StackSizes) {}
+ StackSizesSection() : Section(ChunkKind::StackSizes) {}
- static bool classof(const Section *S) {
- return S->Kind == SectionKind::StackSizes;
+ static bool classof(const Chunk *S) {
+ return S->Kind == ChunkKind::StackSizes;
}
static bool nameMatches(StringRef Name) {
@@ -193,11 +235,9 @@ struct DynamicSection : Section {
std::vector<DynamicEntry> Entries;
Optional<yaml::BinaryRef> Content;
- DynamicSection() : Section(SectionKind::Dynamic) {}
+ DynamicSection() : Section(ChunkKind::Dynamic) {}
- static bool classof(const Section *S) {
- return S->Kind == SectionKind::Dynamic;
- }
+ static bool classof(const Chunk *S) { return S->Kind == ChunkKind::Dynamic; }
};
struct RawContentSection : Section {
@@ -205,21 +245,29 @@ struct RawContentSection : Section {
Optional<llvm::yaml::Hex64> Size;
Optional<llvm::yaml::Hex64> Info;
- RawContentSection() : Section(SectionKind::RawContent) {}
+ RawContentSection() : Section(ChunkKind::RawContent) {}
- static bool classof(const Section *S) {
- return S->Kind == SectionKind::RawContent;
+ static bool classof(const Chunk *S) {
+ return S->Kind == ChunkKind::RawContent;
}
};
struct NoBitsSection : Section {
llvm::yaml::Hex64 Size;
- NoBitsSection() : Section(SectionKind::NoBits) {}
+ NoBitsSection() : Section(ChunkKind::NoBits) {}
- static bool classof(const Section *S) {
- return S->Kind == SectionKind::NoBits;
- }
+ static bool classof(const Chunk *S) { return S->Kind == ChunkKind::NoBits; }
+};
+
+struct NoteSection : Section {
+ Optional<yaml::BinaryRef> Content;
+ Optional<llvm::yaml::Hex64> Size;
+ Optional<std::vector<ELFYAML::NoteEntry>> Notes;
+
+ NoteSection() : Section(ChunkKind::Note) {}
+
+ static bool classof(const Chunk *S) { return S->Kind == ChunkKind::Note; }
};
struct HashSection : Section {
@@ -228,9 +276,42 @@ struct HashSection : Section {
Optional<std::vector<uint32_t>> Bucket;
Optional<std::vector<uint32_t>> Chain;
- HashSection() : Section(SectionKind::Hash) {}
+ HashSection() : Section(ChunkKind::Hash) {}
+
+ static bool classof(const Chunk *S) { return S->Kind == ChunkKind::Hash; }
+};
+
+struct GnuHashHeader {
+ // The number of hash buckets.
+ // Not used when dumping the object, but can be used to override
+ // the real number of buckets when emiting an object from a YAML document.
+ Optional<llvm::yaml::Hex32> NBuckets;
- static bool classof(const Section *S) { return S->Kind == SectionKind::Hash; }
+ // Index of the first symbol in the dynamic symbol table
+ // included in the hash table.
+ llvm::yaml::Hex32 SymNdx;
+
+ // The number of words in the Bloom filter.
+ // Not used when dumping the object, but can be used to override the real
+ // number of words in the Bloom filter when emiting an object from a YAML
+ // document.
+ Optional<llvm::yaml::Hex32> MaskWords;
+
+ // A shift constant used by the Bloom filter.
+ llvm::yaml::Hex32 Shift2;
+};
+
+struct GnuHashSection : Section {
+ Optional<yaml::BinaryRef> Content;
+
+ Optional<GnuHashHeader> Header;
+ Optional<std::vector<llvm::yaml::Hex64>> BloomFilter;
+ Optional<std::vector<llvm::yaml::Hex32>> HashBuckets;
+ Optional<std::vector<llvm::yaml::Hex32>> HashValues;
+
+ GnuHashSection() : Section(ChunkKind::GnuHash) {}
+
+ static bool classof(const Chunk *S) { return S->Kind == ChunkKind::GnuHash; }
};
struct VernauxEntry {
@@ -247,13 +328,14 @@ struct VerneedEntry {
};
struct VerneedSection : Section {
- std::vector<VerneedEntry> VerneedV;
+ Optional<yaml::BinaryRef> Content;
+ Optional<std::vector<VerneedEntry>> VerneedV;
llvm::yaml::Hex64 Info;
- VerneedSection() : Section(SectionKind::Verneed) {}
+ VerneedSection() : Section(ChunkKind::Verneed) {}
- static bool classof(const Section *S) {
- return S->Kind == SectionKind::Verneed;
+ static bool classof(const Chunk *S) {
+ return S->Kind == ChunkKind::Verneed;
}
};
@@ -271,20 +353,44 @@ struct AddrsigSection : Section {
Optional<llvm::yaml::Hex64> Size;
Optional<std::vector<AddrsigSymbol>> Symbols;
- AddrsigSection() : Section(SectionKind::Addrsig) {}
- static bool classof(const Section *S) {
- return S->Kind == SectionKind::Addrsig;
+ AddrsigSection() : Section(ChunkKind::Addrsig) {}
+
+ static bool classof(const Chunk *S) { return S->Kind == ChunkKind::Addrsig; }
+};
+
+struct LinkerOption {
+ StringRef Key;
+ StringRef Value;
+};
+
+struct LinkerOptionsSection : Section {
+ Optional<std::vector<LinkerOption>> Options;
+ Optional<yaml::BinaryRef> Content;
+
+ LinkerOptionsSection() : Section(ChunkKind::LinkerOptions) {}
+
+ static bool classof(const Chunk *S) {
+ return S->Kind == ChunkKind::LinkerOptions;
+ }
+};
+
+struct DependentLibrariesSection : Section {
+ Optional<std::vector<YAMLFlowString>> Libs;
+ Optional<yaml::BinaryRef> Content;
+
+ DependentLibrariesSection() : Section(ChunkKind::DependentLibraries) {}
+
+ static bool classof(const Chunk *S) {
+ return S->Kind == ChunkKind::DependentLibraries;
}
};
struct SymverSection : Section {
std::vector<uint16_t> Entries;
- SymverSection() : Section(SectionKind::Symver) {}
+ SymverSection() : Section(ChunkKind::Symver) {}
- static bool classof(const Section *S) {
- return S->Kind == SectionKind::Symver;
- }
+ static bool classof(const Chunk *S) { return S->Kind == ChunkKind::Symver; }
};
struct VerdefEntry {
@@ -296,27 +402,25 @@ struct VerdefEntry {
};
struct VerdefSection : Section {
- std::vector<VerdefEntry> Entries;
+ Optional<std::vector<VerdefEntry>> Entries;
+ Optional<yaml::BinaryRef> Content;
+
llvm::yaml::Hex64 Info;
- VerdefSection() : Section(SectionKind::Verdef) {}
+ VerdefSection() : Section(ChunkKind::Verdef) {}
- static bool classof(const Section *S) {
- return S->Kind == SectionKind::Verdef;
- }
+ static bool classof(const Chunk *S) { return S->Kind == ChunkKind::Verdef; }
};
struct Group : Section {
// Members of a group contain a flag and a list of section indices
// that are part of the group.
std::vector<SectionOrType> Members;
- StringRef Signature; /* Info */
+ Optional<StringRef> Signature; /* Info */
- Group() : Section(SectionKind::Group) {}
+ Group() : Section(ChunkKind::Group) {}
- static bool classof(const Section *S) {
- return S->Kind == SectionKind::Group;
- }
+ static bool classof(const Chunk *S) { return S->Kind == ChunkKind::Group; }
};
struct Relocation {
@@ -330,20 +434,31 @@ struct RelocationSection : Section {
std::vector<Relocation> Relocations;
StringRef RelocatableSec; /* Info */
- RelocationSection() : Section(SectionKind::Relocation) {}
+ RelocationSection() : Section(ChunkKind::Relocation) {}
- static bool classof(const Section *S) {
- return S->Kind == SectionKind::Relocation;
+ static bool classof(const Chunk *S) {
+ return S->Kind == ChunkKind::Relocation;
+ }
+};
+
+struct RelrSection : Section {
+ Optional<std::vector<llvm::yaml::Hex64>> Entries;
+ Optional<yaml::BinaryRef> Content;
+
+ RelrSection() : Section(ChunkKind::Relr) {}
+
+ static bool classof(const Chunk *S) {
+ return S->Kind == ChunkKind::Relr;
}
};
struct SymtabShndxSection : Section {
std::vector<uint32_t> Entries;
- SymtabShndxSection() : Section(SectionKind::SymtabShndxSection) {}
+ SymtabShndxSection() : Section(ChunkKind::SymtabShndxSection) {}
- static bool classof(const Section *S) {
- return S->Kind == SectionKind::SymtabShndxSection;
+ static bool classof(const Chunk *S) {
+ return S->Kind == ChunkKind::SymtabShndxSection;
}
};
@@ -361,23 +476,35 @@ struct MipsABIFlags : Section {
MIPS_AFL_FLAGS1 Flags1;
llvm::yaml::Hex32 Flags2;
- MipsABIFlags() : Section(SectionKind::MipsABIFlags) {}
+ MipsABIFlags() : Section(ChunkKind::MipsABIFlags) {}
- static bool classof(const Section *S) {
- return S->Kind == SectionKind::MipsABIFlags;
+ static bool classof(const Chunk *S) {
+ return S->Kind == ChunkKind::MipsABIFlags;
}
};
struct Object {
FileHeader Header;
std::vector<ProgramHeader> ProgramHeaders;
- std::vector<std::unique_ptr<Section>> Sections;
+
+ // An object might contain output section descriptions as well as
+ // custom data that does not belong to any section.
+ std::vector<std::unique_ptr<Chunk>> Chunks;
+
// Although in reality the symbols reside in a section, it is a lot
// cleaner and nicer if we read them from the YAML as a separate
// top-level key, which automatically ensures that invariants like there
// being a single SHT_SYMTAB section are upheld.
Optional<std::vector<Symbol>> Symbols;
- std::vector<Symbol> DynamicSymbols;
+ Optional<std::vector<Symbol>> DynamicSymbols;
+
+ std::vector<Section *> getSections() {
+ std::vector<Section *> Ret;
+ for (const std::unique_ptr<Chunk> &Sec : Chunks)
+ if (auto S = dyn_cast<ELFYAML::Section>(Sec.get()))
+ Ret.push_back(S);
+ return Ret;
+ }
};
} // end namespace ELFYAML
@@ -386,8 +513,10 @@ struct Object {
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::ELFYAML::AddrsigSymbol)
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::ELFYAML::StackSizeEntry)
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::ELFYAML::DynamicEntry)
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::ELFYAML::LinkerOption)
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::ELFYAML::NoteEntry)
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::ELFYAML::ProgramHeader)
-LLVM_YAML_IS_SEQUENCE_VECTOR(std::unique_ptr<llvm::ELFYAML::Section>)
+LLVM_YAML_IS_SEQUENCE_VECTOR(std::unique_ptr<llvm::ELFYAML::Chunk>)
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::ELFYAML::Symbol)
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::ELFYAML::VerdefEntry)
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::ELFYAML::VernauxEntry)
@@ -524,10 +653,18 @@ template <> struct MappingTraits<ELFYAML::StackSizeEntry> {
static void mapping(IO &IO, ELFYAML::StackSizeEntry &Rel);
};
+template <> struct MappingTraits<ELFYAML::GnuHashHeader> {
+ static void mapping(IO &IO, ELFYAML::GnuHashHeader &Rel);
+};
+
template <> struct MappingTraits<ELFYAML::DynamicEntry> {
static void mapping(IO &IO, ELFYAML::DynamicEntry &Rel);
};
+template <> struct MappingTraits<ELFYAML::NoteEntry> {
+ static void mapping(IO &IO, ELFYAML::NoteEntry &N);
+};
+
template <> struct MappingTraits<ELFYAML::VerdefEntry> {
static void mapping(IO &IO, ELFYAML::VerdefEntry &E);
};
@@ -544,14 +681,17 @@ template <> struct MappingTraits<ELFYAML::AddrsigSymbol> {
static void mapping(IO &IO, ELFYAML::AddrsigSymbol &Sym);
};
+template <> struct MappingTraits<ELFYAML::LinkerOption> {
+ static void mapping(IO &IO, ELFYAML::LinkerOption &Sym);
+};
+
template <> struct MappingTraits<ELFYAML::Relocation> {
static void mapping(IO &IO, ELFYAML::Relocation &Rel);
};
-template <>
-struct MappingTraits<std::unique_ptr<ELFYAML::Section>> {
- static void mapping(IO &IO, std::unique_ptr<ELFYAML::Section> &Section);
- static StringRef validate(IO &io, std::unique_ptr<ELFYAML::Section> &Section);
+template <> struct MappingTraits<std::unique_ptr<ELFYAML::Chunk>> {
+ static void mapping(IO &IO, std::unique_ptr<ELFYAML::Chunk> &C);
+ static StringRef validate(IO &io, std::unique_ptr<ELFYAML::Chunk> &C);
};
template <>
diff --git a/contrib/llvm-project/llvm/include/llvm/ObjectYAML/YAML.h b/contrib/llvm-project/llvm/include/llvm/ObjectYAML/YAML.h
index 37014109a615..3bf6527a7e2d 100644
--- a/contrib/llvm-project/llvm/include/llvm/ObjectYAML/YAML.h
+++ b/contrib/llvm-project/llvm/include/llvm/ObjectYAML/YAML.h
@@ -85,7 +85,8 @@ public:
/// Write the contents (regardless of whether it is binary or a
/// hex string) as binary to the given raw_ostream.
- void writeAsBinary(raw_ostream &OS) const;
+ /// N can be used to specify the maximum number of bytes.
+ void writeAsBinary(raw_ostream &OS, uint64_t N = UINT64_MAX) const;
/// Write the contents (regardless of whether it is binary or a
/// hex string) as hex to the given raw_ostream.
diff --git a/contrib/llvm-project/llvm/include/llvm/Pass.h b/contrib/llvm-project/llvm/include/llvm/Pass.h
index 1d53ae32cf37..49419844e7ad 100644
--- a/contrib/llvm-project/llvm/include/llvm/Pass.h
+++ b/contrib/llvm-project/llvm/include/llvm/Pass.h
@@ -57,13 +57,11 @@ enum PassManagerType {
PMT_FunctionPassManager, ///< FPPassManager
PMT_LoopPassManager, ///< LPPassManager
PMT_RegionPassManager, ///< RGPassManager
- PMT_BasicBlockPassManager, ///< BBPassManager
PMT_Last
};
// Different types of passes.
enum PassKind {
- PT_BasicBlock,
PT_Region,
PT_Loop,
PT_Function,
@@ -305,56 +303,6 @@ protected:
bool skipFunction(const Function &F) const;
};
-//===----------------------------------------------------------------------===//
-/// Deprecated - do not create new passes as BasicBlockPasses. Use FunctionPass
-/// with a loop over the BasicBlocks instead.
-//
-/// BasicBlockPass class - This class is used to implement most local
-/// optimizations. Optimizations should subclass this class if they
-/// meet the following constraints:
-/// 1. Optimizations are local, operating on either a basic block or
-/// instruction at a time.
-/// 2. Optimizations do not modify the CFG of the contained function, or any
-/// other basic block in the function.
-/// 3. Optimizations conform to all of the constraints of FunctionPasses.
-///
-class BasicBlockPass : public Pass {
-public:
- explicit BasicBlockPass(char &pid) : Pass(PT_BasicBlock, pid) {}
-
- /// createPrinterPass - Get a basic block printer pass.
- Pass *createPrinterPass(raw_ostream &OS,
- const std::string &Banner) const override;
-
- using llvm::Pass::doInitialization;
- using llvm::Pass::doFinalization;
-
- /// doInitialization - Virtual method overridden by BasicBlockPass subclasses
- /// to do any necessary per-function initialization.
- virtual bool doInitialization(Function &);
-
- /// runOnBasicBlock - Virtual method overriden by subclasses to do the
- /// per-basicblock processing of the pass.
- virtual bool runOnBasicBlock(BasicBlock &BB) = 0;
-
- /// doFinalization - Virtual method overriden by BasicBlockPass subclasses to
- /// do any post processing needed after all passes have run.
- virtual bool doFinalization(Function &);
-
- void preparePassManager(PMStack &PMS) override;
-
- void assignPassManager(PMStack &PMS, PassManagerType T) override;
-
- /// Return what kind of Pass Manager can manage this pass.
- PassManagerType getPotentialPassManagerType() const override;
-
-protected:
- /// Optional passes call this function to check whether the pass should be
- /// skipped. This is the case when Attribute::OptimizeNone is set or when
- /// optimization bisect is over the limit.
- bool skipBasicBlock(const BasicBlock &BB) const;
-};
-
/// If the user specifies the -time-passes argument on an LLVM tool command line
/// then the value of this boolean will be true, otherwise false.
/// This is the storage for the -time-passes option.
@@ -364,7 +312,6 @@ extern bool TimePassesIsEnabled;
// Include support files that contain important APIs commonly used by Passes,
// but that we want to separate out to make it easier to read the header files.
-#include "llvm/InitializePasses.h"
#include "llvm/PassAnalysisSupport.h"
#include "llvm/PassSupport.h"
diff --git a/contrib/llvm-project/llvm/include/llvm/Passes/PassBuilder.h b/contrib/llvm-project/llvm/include/llvm/Passes/PassBuilder.h
index f73e4b42dd4b..e7db8fd421fe 100644
--- a/contrib/llvm-project/llvm/include/llvm/Passes/PassBuilder.h
+++ b/contrib/llvm-project/llvm/include/llvm/Passes/PassBuilder.h
@@ -151,10 +151,6 @@ public:
/// Optimize quickly without destroying debuggability.
///
- /// FIXME: The current and historical behavior of this level does *not*
- /// agree with this goal, but we would like to move toward this goal in the
- /// future.
- ///
/// This level is tuned to produce a result from the optimizer as quickly
/// as possible and to avoid destroying debuggability. This tends to result
/// in a very good development mode where the compiled code will be
@@ -164,9 +160,9 @@ public:
/// debugging of the resulting binary.
///
/// As an example, complex loop transformations such as versioning,
- /// vectorization, or fusion might not make sense here due to the degree to
- /// which the executed code would differ from the source code, and the
- /// potential compile time cost.
+ /// vectorization, or fusion don't make sense here due to the degree to
+ /// which the executed code differs from the source code, and the compile time
+ /// cost.
O1,
/// Optimize for fast execution as much as possible without triggering
@@ -635,6 +631,13 @@ public:
std::string ProfileFile,
std::string ProfileRemappingFile);
+
+ /// Returns PIC. External libraries can use this to register pass
+ /// instrumentation callbacks.
+ PassInstrumentationCallbacks *getPassInstrumentationCallbacks() const {
+ return PIC;
+ }
+
private:
static Optional<std::vector<PipelineElement>>
parsePipelineText(StringRef Text);
diff --git a/contrib/llvm-project/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h b/contrib/llvm-project/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h
index 0dd0c7ec8065..f272e8c03903 100644
--- a/contrib/llvm-project/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h
+++ b/contrib/llvm-project/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h
@@ -311,7 +311,7 @@ struct FunctionRecord {
/// Regions in the function along with their counts.
std::vector<CountedRegion> CountedRegions;
/// The number of times this function was executed.
- uint64_t ExecutionCount;
+ uint64_t ExecutionCount = 0;
FunctionRecord(StringRef Name, ArrayRef<StringRef> Filenames)
: Name(Name), Filenames(Filenames.begin(), Filenames.end()) {}
diff --git a/contrib/llvm-project/llvm/include/llvm/ProfileData/InstrProf.h b/contrib/llvm-project/llvm/include/llvm/ProfileData/InstrProf.h
index c26f76949992..1f8872947c64 100644
--- a/contrib/llvm-project/llvm/include/llvm/ProfileData/InstrProf.h
+++ b/contrib/llvm-project/llvm/include/llvm/ProfileData/InstrProf.h
@@ -885,7 +885,7 @@ uint32_t InstrProfRecord::getNumValueDataForSite(uint32_t ValueKind,
std::unique_ptr<InstrProfValueData[]>
InstrProfRecord::getValueForSite(uint32_t ValueKind, uint32_t Site,
uint64_t *TotalC) const {
- uint64_t Dummy;
+ uint64_t Dummy = 0;
uint64_t &TotalCount = (TotalC == nullptr ? Dummy : *TotalC);
uint32_t N = getNumValueDataForSite(ValueKind, Site);
if (N == 0) {
diff --git a/contrib/llvm-project/llvm/include/llvm/ProfileData/InstrProfData.inc b/contrib/llvm-project/llvm/include/llvm/ProfileData/InstrProfData.inc
index 749781b9ac2d..99f41d8fef07 100644
--- a/contrib/llvm-project/llvm/include/llvm/ProfileData/InstrProfData.inc
+++ b/contrib/llvm-project/llvm/include/llvm/ProfileData/InstrProfData.inc
@@ -130,7 +130,9 @@ INSTR_PROF_VALUE_NODE(PtrToNodeT, llvm::Type::getInt8PtrTy(Ctx), Next, \
INSTR_PROF_RAW_HEADER(uint64_t, Magic, __llvm_profile_get_magic())
INSTR_PROF_RAW_HEADER(uint64_t, Version, __llvm_profile_get_version())
INSTR_PROF_RAW_HEADER(uint64_t, DataSize, DataSize)
+INSTR_PROF_RAW_HEADER(uint64_t, PaddingBytesBeforeCounters, PaddingBytesBeforeCounters)
INSTR_PROF_RAW_HEADER(uint64_t, CountersSize, CountersSize)
+INSTR_PROF_RAW_HEADER(uint64_t, PaddingBytesAfterCounters, PaddingBytesAfterCounters)
INSTR_PROF_RAW_HEADER(uint64_t, NamesSize, NamesSize)
INSTR_PROF_RAW_HEADER(uint64_t, CountersDelta, (uintptr_t)CountersBegin)
INSTR_PROF_RAW_HEADER(uint64_t, NamesDelta, (uintptr_t)NamesBegin)
@@ -628,7 +630,7 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
(uint64_t)'f' << 16 | (uint64_t)'R' << 8 | (uint64_t)129
/* Raw profile format version (start from 1). */
-#define INSTR_PROF_RAW_VERSION 4
+#define INSTR_PROF_RAW_VERSION 5
/* Indexed profile format version (start from 1). */
#define INSTR_PROF_INDEX_VERSION 5
/* Coverage mapping format vresion (start from 0). */
@@ -742,7 +744,7 @@ typedef struct InstrProfValueData {
#endif /* INSTR_PROF_DATA_INC */
#ifndef INSTR_ORDER_FILE_INC
-// The maximal # of functions: 128*1024 (the buffer size will be 128*4 KB).
+/* The maximal # of functions: 128*1024 (the buffer size will be 128*4 KB). */
#define INSTR_ORDER_FILE_BUFFER_SIZE 131072
#define INSTR_ORDER_FILE_BUFFER_BITS 17
#define INSTR_ORDER_FILE_BUFFER_MASK 0x1ffff
diff --git a/contrib/llvm-project/llvm/include/llvm/ProfileData/SampleProf.h b/contrib/llvm-project/llvm/include/llvm/ProfileData/SampleProf.h
index 55418d9d0f9c..f8be89c569b7 100644
--- a/contrib/llvm-project/llvm/include/llvm/ProfileData/SampleProf.h
+++ b/contrib/llvm-project/llvm/include/llvm/ProfileData/SampleProf.h
@@ -387,7 +387,10 @@ public:
if (FS != iter->second.end())
return &FS->second;
// If we cannot find exact match of the callee name, return the FS with
- // the max total count.
+ // the max total count. Only do this when CalleeName is not provided,
+ // i.e., only for indirect calls.
+ if (!CalleeName.empty())
+ return nullptr;
uint64_t MaxTotalSamples = 0;
const FunctionSamples *R = nullptr;
for (const auto &NameFS : iter->second)
diff --git a/contrib/llvm-project/llvm/include/llvm/ProfileData/SampleProfReader.h b/contrib/llvm-project/llvm/include/llvm/ProfileData/SampleProfReader.h
index 5a5d4cfde224..72b178edc260 100644
--- a/contrib/llvm-project/llvm/include/llvm/ProfileData/SampleProfReader.h
+++ b/contrib/llvm-project/llvm/include/llvm/ProfileData/SampleProfReader.h
@@ -358,6 +358,15 @@ public:
return getSamplesFor(CanonName);
}
+ /// Return the samples collected for function \p F, create empty
+ /// FunctionSamples if it doesn't exist.
+ FunctionSamples *getOrCreateSamplesFor(const Function &F) {
+ std::string FGUID;
+ StringRef CanonName = FunctionSamples::getCanonicalFnName(F);
+ CanonName = getRepInFormat(CanonName, getFormat(), FGUID);
+ return &Profiles[CanonName];
+ }
+
/// Return the samples collected for function \p F.
virtual FunctionSamples *getSamplesFor(StringRef Fname) {
if (Remapper) {
diff --git a/contrib/llvm-project/llvm/include/llvm/ProfileData/SampleProfWriter.h b/contrib/llvm-project/llvm/include/llvm/ProfileData/SampleProfWriter.h
index cc951594c9e2..5814f69fdcab 100644
--- a/contrib/llvm-project/llvm/include/llvm/ProfileData/SampleProfWriter.h
+++ b/contrib/llvm-project/llvm/include/llvm/ProfileData/SampleProfWriter.h
@@ -80,7 +80,7 @@ protected:
void computeSummary(const StringMap<FunctionSamples> &ProfileMap);
/// Profile format.
- SampleProfileFormat Format;
+ SampleProfileFormat Format = SPF_None;
};
/// Sample-based profile writer (text format).
@@ -227,7 +227,7 @@ private:
// Save the start of SecLBRProfile so we can compute the offset to the
// start of SecLBRProfile for each Function's Profile and will keep it
// in FuncOffsetTable.
- uint64_t SecLBRProfileStart;
+ uint64_t SecLBRProfileStart = 0;
// FuncOffsetTable maps function name to its profile offset in SecLBRProfile
// section. It is used to load function profile on demand.
MapVector<StringRef, uint64_t> FuncOffsetTable;
diff --git a/contrib/llvm-project/llvm/include/llvm/Remarks/Remark.h b/contrib/llvm-project/llvm/include/llvm/Remarks/Remark.h
index 1243311fb8c5..6211db4a8e96 100644
--- a/contrib/llvm-project/llvm/include/llvm/Remarks/Remark.h
+++ b/contrib/llvm-project/llvm/include/llvm/Remarks/Remark.h
@@ -30,8 +30,8 @@ constexpr uint64_t CurrentRemarkVersion = 0;
struct RemarkLocation {
/// Absolute path of the source file corresponding to this remark.
StringRef SourceFilePath;
- unsigned SourceLine;
- unsigned SourceColumn;
+ unsigned SourceLine = 0;
+ unsigned SourceColumn = 0;
};
// Create wrappers for C Binding types (see CBindingWrapping.h).
@@ -110,6 +110,21 @@ private:
DEFINE_SIMPLE_CONVERSION_FUNCTIONS(Remark, LLVMRemarkEntryRef)
/// Comparison operators for Remark objects and dependent objects.
+
+template <typename T>
+bool operator<(const Optional<T> &LHS, const Optional<T> &RHS) {
+ // Sorting based on optionals should result in all `None` entries to appear
+ // before the valid entries. For example, remarks with no debug location will
+ // appear first.
+ if (!LHS && !RHS)
+ return false;
+ if (!LHS && RHS)
+ return true;
+ if (LHS && !RHS)
+ return false;
+ return *LHS < *RHS;
+}
+
inline bool operator==(const RemarkLocation &LHS, const RemarkLocation &RHS) {
return LHS.SourceFilePath == RHS.SourceFilePath &&
LHS.SourceLine == RHS.SourceLine &&
@@ -120,6 +135,11 @@ inline bool operator!=(const RemarkLocation &LHS, const RemarkLocation &RHS) {
return !(LHS == RHS);
}
+inline bool operator<(const RemarkLocation &LHS, const RemarkLocation &RHS) {
+ return std::make_tuple(LHS.SourceFilePath, LHS.SourceLine, LHS.SourceColumn) <
+ std::make_tuple(RHS.SourceFilePath, RHS.SourceLine, RHS.SourceColumn);
+}
+
inline bool operator==(const Argument &LHS, const Argument &RHS) {
return LHS.Key == RHS.Key && LHS.Val == RHS.Val && LHS.Loc == RHS.Loc;
}
@@ -128,6 +148,11 @@ inline bool operator!=(const Argument &LHS, const Argument &RHS) {
return !(LHS == RHS);
}
+inline bool operator<(const Argument &LHS, const Argument &RHS) {
+ return std::make_tuple(LHS.Key, LHS.Val, LHS.Loc) <
+ std::make_tuple(RHS.Key, RHS.Val, RHS.Loc);
+}
+
inline bool operator==(const Remark &LHS, const Remark &RHS) {
return LHS.RemarkType == RHS.RemarkType && LHS.PassName == RHS.PassName &&
LHS.RemarkName == RHS.RemarkName &&
@@ -139,6 +164,13 @@ inline bool operator!=(const Remark &LHS, const Remark &RHS) {
return !(LHS == RHS);
}
+inline bool operator<(const Remark &LHS, const Remark &RHS) {
+ return std::make_tuple(LHS.RemarkType, LHS.PassName, LHS.RemarkName,
+ LHS.FunctionName, LHS.Loc, LHS.Hotness, LHS.Args) <
+ std::make_tuple(RHS.RemarkType, RHS.PassName, RHS.RemarkName,
+ RHS.FunctionName, RHS.Loc, RHS.Hotness, RHS.Args);
+}
+
} // end namespace remarks
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/include/llvm/Remarks/RemarkFormat.h b/contrib/llvm-project/llvm/include/llvm/Remarks/RemarkFormat.h
index 6dd32b226099..a432c5adf59e 100644
--- a/contrib/llvm-project/llvm/include/llvm/Remarks/RemarkFormat.h
+++ b/contrib/llvm-project/llvm/include/llvm/Remarks/RemarkFormat.h
@@ -27,6 +27,9 @@ enum class Format { Unknown, YAML, YAMLStrTab, Bitstream };
/// Parse and validate a string for the remark format.
Expected<Format> parseFormat(StringRef FormatStr);
+/// Parse and validate a magic number to a remark format.
+Expected<Format> magicToFormat(StringRef Magic);
+
} // end namespace remarks
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/include/llvm/Remarks/RemarkLinker.h b/contrib/llvm-project/llvm/include/llvm/Remarks/RemarkLinker.h
new file mode 100644
index 000000000000..c82c73d8c94f
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/Remarks/RemarkLinker.h
@@ -0,0 +1,100 @@
+//===-- llvm/Remarks/RemarkLinker.h -----------------------------*- C++/-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides an interface to link together multiple remark files.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_REMARKS_REMARK_LINKER_H
+#define LLVM_REMARKS_REMARK_LINKER_H
+
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Remarks/Remark.h"
+#include "llvm/Remarks/RemarkFormat.h"
+#include "llvm/Remarks/RemarkStringTable.h"
+#include "llvm/Support/Error.h"
+#include <memory>
+#include <set>
+
+namespace llvm {
+namespace remarks {
+
+struct RemarkLinker {
+private:
+ /// Compare through the pointers.
+ struct RemarkPtrCompare {
+ bool operator()(const std::unique_ptr<Remark> &LHS,
+ const std::unique_ptr<Remark> &RHS) const {
+ assert(LHS && RHS && "Invalid pointers to compare.");
+ return *LHS < *RHS;
+ };
+ };
+
+ /// The main string table for the remarks.
+ /// Note: all remarks should use the strings from this string table to avoid
+ /// dangling references.
+ StringTable StrTab;
+
+ /// A set holding unique remarks.
+ /// FIXME: std::set is probably not the most appropriate data structure here.
+ /// Due to the limitation of having a move-only key, there isn't another
+ /// obvious choice for now.
+ std::set<std::unique_ptr<Remark>, RemarkPtrCompare> Remarks;
+
+ /// A path to append before the external file path found in remark metadata.
+ Optional<std::string> PrependPath;
+
+ /// Keep this remark. If it's already in the set, discard it.
+ Remark &keep(std::unique_ptr<Remark> Remark);
+
+public:
+ /// Set a path to prepend to the external file path.
+ void setExternalFilePrependPath(StringRef PrependPath);
+
+ /// Link the remarks found in \p Buffer.
+ /// If \p RemarkFormat is not provided, try to deduce it from the metadata in
+ /// \p Buffer.
+ /// \p Buffer can be either a standalone remark container or just
+ /// metadata. This takes care of uniquing and merging the remarks.
+ Error link(StringRef Buffer, Optional<Format> RemarkFormat = None);
+
+ /// Link the remarks found in \p Obj by looking for the right section and
+ /// calling the method above.
+ Error link(const object::ObjectFile &Obj,
+ Optional<Format> RemarkFormat = None);
+
+ /// Serialize the linked remarks to the stream \p OS, using the format \p
+ /// RemarkFormat.
+ /// This clears internal state such as the string table.
+ /// Note: this implies that the serialization mode is standalone.
+ Error serialize(raw_ostream &OS, Format RemarksFormat) const;
+
+ /// Check whether there are any remarks linked.
+ bool empty() const { return Remarks.empty(); }
+
+ /// Return a collection of the linked unique remarks to iterate on.
+ /// Ex:
+ /// for (const Remark &R : RL.remarks() { [...] }
+ using iterator =
+ pointee_iterator<std::set<std::unique_ptr<Remark>>::iterator>;
+
+ iterator_range<iterator> remarks() const {
+ return {Remarks.begin(), Remarks.end()};
+ }
+};
+
+/// Returns a buffer with the contents of the remarks section depending on the
+/// format of the file. If the section doesn't exist, this returns an empty
+/// optional.
+Expected<Optional<StringRef>>
+getRemarksSectionContents(const object::ObjectFile &Obj);
+
+} // end namespace remarks
+} // end namespace llvm
+
+#endif /* LLVM_REMARKS_REMARK_LINKER_H */
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/AArch64TargetParser.def b/contrib/llvm-project/llvm/include/llvm/Support/AArch64TargetParser.def
index 15737265dfc3..6b25ef2ca435 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/AArch64TargetParser.def
+++ b/contrib/llvm-project/llvm/include/llvm/Support/AArch64TargetParser.def
@@ -120,10 +120,24 @@ AARCH64_CPU_NAME("neoverse-n1", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
AArch64::AEK_SSBS))
AARCH64_CPU_NAME("cyclone", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false,
(AArch64::AEK_NONE))
-AARCH64_CPU_NAME("exynos-m1", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false,
- (AArch64::AEK_CRC))
-AARCH64_CPU_NAME("exynos-m2", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false,
- (AArch64::AEK_CRC))
+AARCH64_CPU_NAME("apple-a7", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false,
+ (AArch64::AEK_NONE))
+AARCH64_CPU_NAME("apple-a8", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false,
+ (AArch64::AEK_NONE))
+AARCH64_CPU_NAME("apple-a9", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false,
+ (AArch64::AEK_NONE))
+AARCH64_CPU_NAME("apple-a10", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false,
+ (AArch64::AEK_CRC | AArch64::AEK_RDM))
+AARCH64_CPU_NAME("apple-a11", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
+ (AArch64::AEK_NONE))
+AARCH64_CPU_NAME("apple-a12", ARMV8_3A, FK_CRYPTO_NEON_FP_ARMV8, false,
+ (AArch64::AEK_FP16))
+AARCH64_CPU_NAME("apple-a13", ARMV8_4A, FK_CRYPTO_NEON_FP_ARMV8, false,
+ (AArch64::AEK_FP16 | AArch64::AEK_FP16FML))
+AARCH64_CPU_NAME("apple-s4", ARMV8_3A, FK_CRYPTO_NEON_FP_ARMV8, false,
+ (AArch64::AEK_FP16))
+AARCH64_CPU_NAME("apple-s5", ARMV8_3A, FK_CRYPTO_NEON_FP_ARMV8, false,
+ (AArch64::AEK_FP16))
AARCH64_CPU_NAME("exynos-m3", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false,
(AArch64::AEK_CRC))
AARCH64_CPU_NAME("exynos-m4", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/AArch64TargetParser.h b/contrib/llvm-project/llvm/include/llvm/Support/AArch64TargetParser.h
index 94f341c83260..fbe08945a038 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/AArch64TargetParser.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/AArch64TargetParser.h
@@ -123,6 +123,15 @@ void fillValidCPUArchList(SmallVectorImpl<StringRef> &Values);
bool isX18ReservedByDefault(const Triple &TT);
+struct ParsedBranchProtection {
+ StringRef Scope;
+ StringRef Key;
+ bool BranchTargetEnforcement;
+};
+
+bool parseBranchProtection(StringRef Spec, ParsedBranchProtection &PBP,
+ StringRef &Err);
+
} // namespace AArch64
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/AMDGPUMetadata.h b/contrib/llvm-project/llvm/include/llvm/Support/AMDGPUMetadata.h
index f7f1ec40dde9..eeef4e699c3e 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/AMDGPUMetadata.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/AMDGPUMetadata.h
@@ -75,6 +75,7 @@ enum class ValueKind : uint8_t {
HiddenDefaultQueue = 12,
HiddenCompletionAction = 13,
HiddenMultiGridSyncArg = 14,
+ HiddenHostcallBuffer = 15,
Unknown = 0xff
};
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/ARMTargetParser.def b/contrib/llvm-project/llvm/include/llvm/Support/ARMTargetParser.def
index 3e77e20762c1..7f03d9a1320a 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/ARMTargetParser.def
+++ b/contrib/llvm-project/llvm/include/llvm/Support/ARMTargetParser.def
@@ -277,8 +277,6 @@ ARM_CPU_NAME("cortex-a76ae", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
ARM_CPU_NAME("neoverse-n1", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
(ARM::AEK_FP16 | ARM::AEK_DOTPROD))
ARM_CPU_NAME("cyclone", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, ARM::AEK_CRC)
-ARM_CPU_NAME("exynos-m1", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, ARM::AEK_CRC)
-ARM_CPU_NAME("exynos-m2", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, ARM::AEK_CRC)
ARM_CPU_NAME("exynos-m3", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, ARM::AEK_CRC)
ARM_CPU_NAME("exynos-m4", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
(ARM::AEK_FP16 | ARM::AEK_DOTPROD))
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/Allocator.h b/contrib/llvm-project/llvm/include/llvm/Support/Allocator.h
index 106b90c35bf5..670335ffecbc 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/Allocator.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/Allocator.h
@@ -269,7 +269,7 @@ public:
inline LLVM_ATTRIBUTE_RETURNS_NONNULL LLVM_ATTRIBUTE_RETURNS_NOALIAS void *
Allocate(size_t Size, size_t Alignment) {
- assert(Alignment > 0 && "0-byte alignnment is not allowed. Use 1 instead.");
+ assert(Alignment > 0 && "0-byte alignment is not allowed. Use 1 instead.");
return Allocate(Size, Align(Alignment));
}
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/Automaton.h b/contrib/llvm-project/llvm/include/llvm/Support/Automaton.h
index 7c13a698e492..c2b921311a8c 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/Automaton.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/Automaton.h
@@ -117,6 +117,10 @@ public:
reset();
}
+ ArrayRef<NfaStatePair> getTransitionInfo() const {
+ return TransitionInfo;
+ }
+
void reset() {
Paths.clear();
Heads.clear();
@@ -198,7 +202,13 @@ public:
M->emplace(std::make_pair(I.FromDfaState, I.Action),
std::make_pair(I.ToDfaState, I.InfoIdx));
}
- Automaton(const Automaton &) = default;
+ Automaton(const Automaton &Other)
+ : M(Other.M), State(Other.State), Transcribe(Other.Transcribe) {
+ // Transcriber is not thread-safe, so create a new instance on copy.
+ if (Other.Transcriber)
+ Transcriber = std::make_shared<internal::NfaTranscriber>(
+ Other.Transcriber->getTransitionInfo());
+ }
/// Reset the automaton to its initial state.
void reset() {
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/BinaryStreamArray.h b/contrib/llvm-project/llvm/include/llvm/Support/BinaryStreamArray.h
index 67ba2e4189be..1634983d26ce 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/BinaryStreamArray.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/BinaryStreamArray.h
@@ -133,9 +133,9 @@ public:
Extractor &getExtractor() { return E; }
BinaryStreamRef getUnderlyingStream() const { return Stream; }
- void setUnderlyingStream(BinaryStreamRef S, uint32_t Skew = 0) {
- Stream = S;
- this->Skew = Skew;
+ void setUnderlyingStream(BinaryStreamRef NewStream, uint32_t NewSkew = 0) {
+ Stream = NewStream;
+ Skew = NewSkew;
}
void drop_front() { Skew += begin()->length(); }
@@ -143,7 +143,7 @@ public:
private:
BinaryStreamRef Stream;
Extractor E;
- uint32_t Skew;
+ uint32_t Skew = 0;
};
template <typename ValueType, typename Extractor>
@@ -274,6 +274,7 @@ public:
return !(*this == Other);
}
+ FixedStreamArray(const FixedStreamArray &) = default;
FixedStreamArray &operator=(const FixedStreamArray &) = default;
const T &operator[](uint32_t Index) const {
@@ -323,6 +324,8 @@ public:
FixedStreamArrayIterator(const FixedStreamArray<T> &Array, uint32_t Index)
: Array(Array), Index(Index) {}
+ FixedStreamArrayIterator<T>(const FixedStreamArrayIterator<T> &Other)
+ : Array(Other.Array), Index(Other.Index) {}
FixedStreamArrayIterator<T> &
operator=(const FixedStreamArrayIterator<T> &Other) {
Array = Other.Array;
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/BinaryStreamReader.h b/contrib/llvm-project/llvm/include/llvm/Support/BinaryStreamReader.h
index 9e16ce227ff8..b7d61c02667b 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/BinaryStreamReader.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/BinaryStreamReader.h
@@ -148,14 +148,14 @@ public:
/// returns an appropriate error code.
Error readStreamRef(BinaryStreamRef &Ref, uint32_t Length);
- /// Read \p Length bytes from the underlying stream into \p Stream. This is
+ /// Read \p Length bytes from the underlying stream into \p Ref. This is
/// equivalent to calling getUnderlyingStream().slice(Offset, Length).
/// Updates the stream's offset to point after the newly read object. Never
/// causes a copy.
///
/// \returns a success error code if the data was successfully read, otherwise
/// returns an appropriate error code.
- Error readSubstream(BinarySubstreamRef &Stream, uint32_t Size);
+ Error readSubstream(BinarySubstreamRef &Ref, uint32_t Length);
/// Get a pointer to an object of type T from the underlying stream, as if by
/// memcpy, and store the result into \p Dest. It is up to the caller to
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/BinaryStreamRef.h b/contrib/llvm-project/llvm/include/llvm/Support/BinaryStreamRef.h
index 7427b8da5b43..5375d6a3a761 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/BinaryStreamRef.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/BinaryStreamRef.h
@@ -198,7 +198,7 @@ public:
};
struct BinarySubstreamRef {
- uint32_t Offset; // Offset in the parent stream
+ uint32_t Offset = 0; // Offset in the parent stream
BinaryStreamRef StreamData; // Stream Data
BinarySubstreamRef slice(uint32_t Off, uint32_t Size) const {
@@ -211,8 +211,8 @@ struct BinarySubstreamRef {
BinarySubstreamRef keep_front(uint32_t N) const { return slice(0, N); }
std::pair<BinarySubstreamRef, BinarySubstreamRef>
- split(uint32_t Offset) const {
- return std::make_pair(keep_front(Offset), drop_front(Offset));
+ split(uint32_t Off) const {
+ return std::make_pair(keep_front(Off), drop_front(Off));
}
uint32_t size() const { return StreamData.getLength(); }
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/CodeGen.h b/contrib/llvm-project/llvm/include/llvm/Support/CodeGen.h
index a3f423e558cf..e2aa2b68b9d2 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/CodeGen.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/CodeGen.h
@@ -57,6 +57,15 @@ namespace llvm {
};
}
+ /// These enums are meant to be passed into addPassesToEmitFile to indicate
+ /// what type of file to emit, and returned by it to indicate what type of
+ /// file could actually be made.
+ enum CodeGenFileType {
+ CGFT_AssemblyFile,
+ CGFT_ObjectFile,
+ CGFT_Null // Do not emit any output.
+ };
+
// Specify effect of frame pointer elimination optimization.
namespace FramePointer {
enum FP {All, NonLeaf, None};
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/CommandLine.h b/contrib/llvm-project/llvm/include/llvm/Support/CommandLine.h
index 63784463e171..05374e34aa7d 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/CommandLine.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/CommandLine.h
@@ -20,6 +20,8 @@
#define LLVM_SUPPORT_COMMANDLINE_H
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
@@ -29,6 +31,7 @@
#include "llvm/ADT/iterator_range.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/VirtualFileSystem.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
#include <climits>
@@ -468,6 +471,43 @@ struct sub {
template <class Opt> void apply(Opt &O) const { O.addSubCommand(Sub); }
};
+// Specify a callback function to be called when an option is seen.
+// Can be used to set other options automatically.
+template <typename R, typename Ty> struct cb {
+ std::function<R(Ty)> CB;
+
+ cb(std::function<R(Ty)> CB) : CB(CB) {}
+
+ template <typename Opt> void apply(Opt &O) const { O.setCallback(CB); }
+};
+
+namespace detail {
+template <typename F>
+struct callback_traits : public callback_traits<decltype(&F::operator())> {};
+
+template <typename R, typename C, typename... Args>
+struct callback_traits<R (C::*)(Args...) const> {
+ using result_type = R;
+ using arg_type = typename std::tuple_element<0, std::tuple<Args...>>::type;
+ static_assert(sizeof...(Args) == 1, "callback function must have one and only one parameter");
+ static_assert(std::is_same<result_type, void>::value,
+ "callback return type must be void");
+ static_assert(
+ std::is_lvalue_reference<arg_type>::value &&
+ std::is_const<typename std::remove_reference<arg_type>::type>::value,
+ "callback arg_type must be a const lvalue reference");
+};
+} // namespace detail
+
+template <typename F>
+cb<typename detail::callback_traits<F>::result_type,
+ typename detail::callback_traits<F>::arg_type>
+callback(F CB) {
+ using result_type = typename detail::callback_traits<F>::result_type;
+ using arg_type = typename detail::callback_traits<F>::arg_type;
+ return cb<result_type, arg_type>(CB);
+}
+
//===----------------------------------------------------------------------===//
// OptionValue class
@@ -952,6 +992,50 @@ public:
extern template class basic_parser<int>;
//--------------------------------------------------
+// parser<long>
+//
+template <> class parser<long> final : public basic_parser<long> {
+public:
+ parser(Option &O) : basic_parser(O) {}
+
+ // parse - Return true on error.
+ bool parse(Option &O, StringRef ArgName, StringRef Arg, long &Val);
+
+ // getValueName - Overload in subclass to provide a better default value.
+ StringRef getValueName() const override { return "long"; }
+
+ void printOptionDiff(const Option &O, long V, OptVal Default,
+ size_t GlobalWidth) const;
+
+ // An out-of-line virtual method to provide a 'home' for this class.
+ void anchor() override;
+};
+
+extern template class basic_parser<long>;
+
+//--------------------------------------------------
+// parser<long long>
+//
+template <> class parser<long long> : public basic_parser<long long> {
+public:
+ parser(Option &O) : basic_parser(O) {}
+
+ // parse - Return true on error.
+ bool parse(Option &O, StringRef ArgName, StringRef Arg, long long &Val);
+
+ // getValueName - Overload in subclass to provide a better default value.
+ StringRef getValueName() const override { return "long"; }
+
+ void printOptionDiff(const Option &O, long long V, OptVal Default,
+ size_t GlobalWidth) const;
+
+ // An out-of-line virtual method to provide a 'home' for this class.
+ void anchor() override;
+};
+
+extern template class basic_parser<long long>;
+
+//--------------------------------------------------
// parser<unsigned>
//
template <> class parser<unsigned> : public basic_parser<unsigned> {
@@ -1341,6 +1425,7 @@ class opt : public Option,
return true; // Parse error!
this->setValue(Val);
this->setPosition(pos);
+ Callback(Val);
return false;
}
@@ -1399,6 +1484,7 @@ public:
template <class T> DataType &operator=(const T &Val) {
this->setValue(Val);
+ Callback(Val);
return this->getValue();
}
@@ -1408,6 +1494,14 @@ public:
apply(this, Ms...);
done();
}
+
+ void setCallback(
+ std::function<void(const typename ParserClass::parser_data_type &)> CB) {
+ Callback = CB;
+ }
+
+ std::function<void(const typename ParserClass::parser_data_type &)> Callback =
+ [](const typename ParserClass::parser_data_type &) {};
};
extern template class opt<unsigned>;
@@ -1547,6 +1641,7 @@ class list : public Option, public list_storage<DataType, StorageClass> {
list_storage<DataType, StorageClass>::addValue(Val);
setPosition(pos);
Positions.push_back(pos);
+ Callback(Val);
return false;
}
@@ -1593,6 +1688,14 @@ public:
apply(this, Ms...);
done();
}
+
+ void setCallback(
+ std::function<void(const typename ParserClass::parser_data_type &)> CB) {
+ Callback = CB;
+ }
+
+ std::function<void(const typename ParserClass::parser_data_type &)> Callback =
+ [](const typename ParserClass::parser_data_type &) {};
};
// multi_val - Modifier to set the number of additional values.
@@ -1693,6 +1796,7 @@ class bits : public Option, public bits_storage<DataType, Storage> {
this->addValue(Val);
setPosition(pos);
Positions.push_back(pos);
+ Callback(Val);
return false;
}
@@ -1734,6 +1838,14 @@ public:
apply(this, Ms...);
done();
}
+
+ void setCallback(
+ std::function<void(const typename ParserClass::parser_data_type &)> CB) {
+ Callback = CB;
+ }
+
+ std::function<void(const typename ParserClass::parser_data_type &)> Callback =
+ [](const typename ParserClass::parser_data_type &) {};
};
//===----------------------------------------------------------------------===//
@@ -1831,7 +1943,7 @@ void PrintHelpMessage(bool Hidden = false, bool Categorized = false);
//
/// Use this to get a StringMap to all registered named options
-/// (e.g. -help). Note \p Map Should be an empty StringMap.
+/// (e.g. -help).
///
/// \return A reference to the StringMap used by the cl APIs to parse options.
///
@@ -1964,10 +2076,16 @@ bool readConfigFile(StringRef CfgFileName, StringSaver &Saver,
/// with nullptrs in the Argv vector.
/// \param [in] RelativeNames true if names of nested response files must be
/// resolved relative to including file.
+/// \param [in] FS File system used for all file access when running the tool.
+/// \param [in] CurrentDir Path used to resolve relative rsp files. If set to
+/// None, process' cwd is used instead.
/// \return true if all @files were expanded successfully or there were none.
-bool ExpandResponseFiles(StringSaver &Saver, TokenizerCallback Tokenizer,
- SmallVectorImpl<const char *> &Argv,
- bool MarkEOLs = false, bool RelativeNames = false);
+bool ExpandResponseFiles(
+ StringSaver &Saver, TokenizerCallback Tokenizer,
+ SmallVectorImpl<const char *> &Argv, bool MarkEOLs = false,
+ bool RelativeNames = false,
+ llvm::vfs::FileSystem &FS = *llvm::vfs::getRealFileSystem(),
+ llvm::Optional<llvm::StringRef> CurrentDir = llvm::None);
/// Mark all options not part of this category as cl::ReallyHidden.
///
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/Compiler.h b/contrib/llvm-project/llvm/include/llvm/Support/Compiler.h
index cb7e57d4cd21..6f6f65cad6f5 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/Compiler.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/Compiler.h
@@ -117,11 +117,17 @@
/// not accessible from outside it. Can also be used to mark variables and
/// functions, making them private to any shared library they are linked into.
/// On PE/COFF targets, library visibility is the default, so this isn't needed.
+///
+/// LLVM_EXTERNAL_VISIBILITY - classes, functions, and variables marked with
+/// this attribute will be made public and visible outside of any shared library
+/// they are linked in to.
#if (__has_attribute(visibility) || LLVM_GNUC_PREREQ(4, 0, 0)) && \
!defined(__MINGW32__) && !defined(__CYGWIN__) && !defined(_WIN32)
#define LLVM_LIBRARY_VISIBILITY __attribute__ ((visibility("hidden")))
+#define LLVM_EXTERNAL_VISIBILITY __attribute__ ((visibility("default")))
#else
#define LLVM_LIBRARY_VISIBILITY
+#define LLVM_EXTERNAL_VISIBILITY
#endif
#if defined(__GNUC__)
@@ -406,10 +412,12 @@
#if __has_feature(memory_sanitizer)
# define LLVM_MEMORY_SANITIZER_BUILD 1
# include <sanitizer/msan_interface.h>
+# define LLVM_NO_SANITIZE_MEMORY_ATTRIBUTE __attribute__((no_sanitize_memory))
#else
# define LLVM_MEMORY_SANITIZER_BUILD 0
# define __msan_allocated_memory(p, size)
# define __msan_unpoison(p, size)
+# define LLVM_NO_SANITIZE_MEMORY_ATTRIBUTE
#endif
/// \macro LLVM_ADDRESS_SANITIZER_BUILD
@@ -504,19 +512,15 @@ void AnnotateIgnoreWritesEnd(const char *file, int line);
/// extern globals, and static globals.
///
/// This is essentially an extremely restricted analog to C++11's thread_local
-/// support, and uses that when available. However, it falls back on
-/// platform-specific or vendor-provided extensions when necessary. These
-/// extensions don't support many of the C++11 thread_local's features. You
-/// should only use this for PODs that you can statically initialize to
-/// some constant value. In almost all circumstances this is most appropriate
-/// for use with a pointer, integer, or small aggregation of pointers and
-/// integers.
+/// support. It uses thread_local if available, falling back on gcc __thread
+/// if not. __thread doesn't support many of the C++11 thread_local's
+/// features. You should only use this for PODs that you can statically
+/// initialize to some constant value. In almost all circumstances this is most
+/// appropriate for use with a pointer, integer, or small aggregation of
+/// pointers and integers.
#if LLVM_ENABLE_THREADS
-#if __has_feature(cxx_thread_local)
+#if __has_feature(cxx_thread_local) || defined(_MSC_VER)
#define LLVM_THREAD_LOCAL thread_local
-#elif defined(_MSC_VER)
-// MSVC supports this with a __declspec.
-#define LLVM_THREAD_LOCAL __declspec(thread)
#else
// Clang, GCC, and other compatible compilers used __thread prior to C++11 and
// we only need the restricted functionality that provides.
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/CrashRecoveryContext.h b/contrib/llvm-project/llvm/include/llvm/Support/CrashRecoveryContext.h
index feb449e2899c..9522c4742244 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/CrashRecoveryContext.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/CrashRecoveryContext.h
@@ -100,6 +100,14 @@ public:
/// Explicitly trigger a crash recovery in the current process, and
/// return failure from RunSafely(). This function does not return.
void HandleCrash();
+
+ /// In case of a crash, this is the crash identifier.
+ int RetCode = 0;
+
+ /// Selects whether handling of failures should be done in the same way as
+ /// for regular crashes. When this is active, a crash would print the
+ /// callstack, clean-up any temporary files and create a coredump/minidump.
+ bool DumpStackAndCleanupOnFailure = false;
};
/// Abstract base class of cleanup handlers.
@@ -111,12 +119,12 @@ public:
/// a crash recovery context.
class CrashRecoveryContextCleanup {
protected:
- CrashRecoveryContext *context;
+ CrashRecoveryContext *context = nullptr;
CrashRecoveryContextCleanup(CrashRecoveryContext *context)
- : context(context), cleanupFired(false) {}
+ : context(context) {}
public:
- bool cleanupFired;
+ bool cleanupFired = false;
virtual ~CrashRecoveryContextCleanup();
virtual void recoverResources() = 0;
@@ -127,7 +135,7 @@ public:
private:
friend class CrashRecoveryContext;
- CrashRecoveryContextCleanup *prev, *next;
+ CrashRecoveryContextCleanup *prev = nullptr, *next = nullptr;
};
/// Base class of cleanup handler that controls recovery of resources of the
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/DataExtractor.h b/contrib/llvm-project/llvm/include/llvm/Support/DataExtractor.h
index f590a1e104fb..0be478811b22 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/DataExtractor.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/DataExtractor.h
@@ -534,14 +534,14 @@ public:
/// error state of the cursor. The only way both eof and error states can be
/// true is if one attempts a read while the cursor is at the very end of the
/// data buffer.
- bool eof(const Cursor &C) const { return Data.size() == C.Offset; }
+ bool eof(const Cursor &C) const { return size() == C.Offset; }
/// Test the validity of \a offset.
///
/// @return
/// \b true if \a offset is a valid offset into the data in this
/// object, \b false otherwise.
- bool isValidOffset(uint64_t offset) const { return Data.size() > offset; }
+ bool isValidOffset(uint64_t offset) const { return size() > offset; }
/// Test the availability of \a length bytes of data from \a offset.
///
@@ -563,6 +563,9 @@ public:
return isValidOffsetForDataOfSize(offset, AddressSize);
}
+ /// Return the number of bytes in the underlying buffer.
+ size_t size() const { return Data.size(); }
+
protected:
// Make it possible for subclasses to access these fields without making them
// public.
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/Error.h b/contrib/llvm-project/llvm/include/llvm/Support/Error.h
index 350877a219bf..44676338808b 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/Error.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/Error.h
@@ -155,10 +155,10 @@ private:
/// they're moved-assigned or constructed from Success values that have already
/// been checked. This enforces checking through all levels of the call stack.
class LLVM_NODISCARD Error {
- // Both ErrorList and FileError need to be able to yank ErrorInfoBase
- // pointers out of this class to add to the error list.
+ // ErrorList needs to be able to yank ErrorInfoBase pointers out of Errors
+ // to add to the error list. It can't rely on handleErrors for this, since
+ // handleErrors does not support ErrorList handlers.
friend class ErrorList;
- friend class FileError;
// handleErrors needs to be able to set the Checked flag.
template <typename... HandlerTs>
@@ -1232,6 +1232,8 @@ public:
Err->log(OS);
}
+ StringRef getFileName() { return FileName; }
+
Error takeError() { return Error(std::move(Err)); }
std::error_code convertToErrorCode() const override;
@@ -1251,8 +1253,14 @@ private:
}
static Error build(const Twine &F, Optional<size_t> Line, Error E) {
+ std::unique_ptr<ErrorInfoBase> Payload;
+ handleAllErrors(std::move(E),
+ [&](std::unique_ptr<ErrorInfoBase> EIB) -> Error {
+ Payload = std::move(EIB);
+ return Error::success();
+ });
return Error(
- std::unique_ptr<FileError>(new FileError(F, Line, E.takePayload())));
+ std::unique_ptr<FileError>(new FileError(F, Line, std::move(Payload))));
}
std::string FileName;
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/FileCheck.h b/contrib/llvm-project/llvm/include/llvm/Support/FileCheck.h
index 2547449246a8..429e36cfcbb5 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/FileCheck.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/FileCheck.h
@@ -71,6 +71,7 @@ class FileCheckType {
public:
FileCheckType(FileCheckKind Kind = CheckNone) : Kind(Kind), Count(1) {}
FileCheckType(const FileCheckType &) = default;
+ FileCheckType &operator=(const FileCheckType &) = default;
operator FileCheckKind() const { return Kind; }
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/FileCollector.h b/contrib/llvm-project/llvm/include/llvm/Support/FileCollector.h
index 19429bd3e9b4..079fe3efab9d 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/FileCollector.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/FileCollector.h
@@ -46,7 +46,11 @@ public:
private:
void addFileImpl(StringRef SrcPath);
- bool markAsSeen(StringRef Path) { return Seen.insert(Path).second; }
+ bool markAsSeen(StringRef Path) {
+ if (Path.empty())
+ return false;
+ return Seen.insert(Path).second;
+ }
bool getRealPath(StringRef SrcPath, SmallVectorImpl<char> &Result);
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/FileOutputBuffer.h b/contrib/llvm-project/llvm/include/llvm/Support/FileOutputBuffer.h
index 999f551ebf2d..bdc1425d4361 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/FileOutputBuffer.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/FileOutputBuffer.h
@@ -32,6 +32,10 @@ public:
enum {
/// set the 'x' bit on the resulting file
F_executable = 1,
+
+ /// Don't use mmap and instead write an in-memory buffer to a file when this
+ /// buffer is closed.
+ F_no_mmap = 2,
};
/// Factory method to create an OutputBuffer object which manages a read/write
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/FormatVariadic.h b/contrib/llvm-project/llvm/include/llvm/Support/FormatVariadic.h
index 5bbda9dd626e..86a9d30cc138 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/FormatVariadic.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/FormatVariadic.h
@@ -57,7 +57,7 @@ struct ReplacementItem {
size_t Index = 0;
size_t Align = 0;
AlignStyle Where = AlignStyle::Right;
- char Pad;
+ char Pad = 0;
StringRef Options;
};
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/GenericDomTree.h b/contrib/llvm-project/llvm/include/llvm/Support/GenericDomTree.h
index 9169379f746d..2545a075062a 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/GenericDomTree.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/GenericDomTree.h
@@ -778,13 +778,13 @@ protected:
NodeRef NewBBSucc = *GraphT::child_begin(NewBB);
std::vector<NodeRef> PredBlocks;
- for (const auto &Pred : children<Inverse<N>>(NewBB))
+ for (auto Pred : children<Inverse<N>>(NewBB))
PredBlocks.push_back(Pred);
assert(!PredBlocks.empty() && "No predblocks?");
bool NewBBDominatesNewBBSucc = true;
- for (const auto &Pred : children<Inverse<N>>(NewBBSucc)) {
+ for (auto Pred : children<Inverse<N>>(NewBBSucc)) {
if (Pred != NewBB && !dominates(NewBBSucc, Pred) &&
isReachableFromEntry(Pred)) {
NewBBDominatesNewBBSucc = false;
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/InitLLVM.h b/contrib/llvm-project/llvm/include/llvm/Support/InitLLVM.h
index 8069859a3e0b..3be8d6b6d2e0 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/InitLLVM.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/InitLLVM.h
@@ -17,7 +17,8 @@
// the following one-time initializations:
//
// 1. Setting up a signal handler so that pretty stack trace is printed out
-// if a process crashes.
+// if a process crashes. A signal handler that exits when a failed write to
+// a pipe occurs may optionally be installed: this is on-by-default.
//
// 2. Set up the global new-handler which is called when a memory allocation
// attempt fails.
@@ -32,9 +33,11 @@
namespace llvm {
class InitLLVM {
public:
- InitLLVM(int &Argc, const char **&Argv);
- InitLLVM(int &Argc, char **&Argv)
- : InitLLVM(Argc, const_cast<const char **&>(Argv)) {}
+ InitLLVM(int &Argc, const char **&Argv,
+ bool InstallPipeSignalExitHandler = true);
+ InitLLVM(int &Argc, char **&Argv, bool InstallPipeSignalExitHandler = true)
+ : InitLLVM(Argc, const_cast<const char **&>(Argv),
+ InstallPipeSignalExitHandler) {}
~InitLLVM();
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/JSON.h b/contrib/llvm-project/llvm/include/llvm/Support/JSON.h
index 0ca41097dddd..2c63468c401a 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/JSON.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/JSON.h
@@ -122,6 +122,8 @@ public:
std::pair<iterator, bool> try_emplace(ObjectKey &&K, Ts &&... Args) {
return M.try_emplace(std::move(K), std::forward<Ts>(Args)...);
}
+ bool erase(StringRef K);
+ void erase(iterator I) { M.erase(I); }
iterator find(StringRef K) { return M.find_as(K); }
const_iterator find(StringRef K) const { return M.find_as(K); }
@@ -555,6 +557,9 @@ inline Object::Object(std::initializer_list<KV> Properties) {
inline std::pair<Object::iterator, bool> Object::insert(KV E) {
return try_emplace(std::move(E.K), std::move(E.V));
}
+inline bool Object::erase(StringRef K) {
+ return M.erase(ObjectKey(K));
+}
// Standard deserializers are provided for primitive types.
// See comments on Value.
@@ -709,7 +714,7 @@ public:
/// J.attribute("timestamp", int64_t(E.Time));
/// J.attributeArray("participants", [&] {
/// for (const Participant &P : E.Participants)
-/// J.string(P.toString());
+/// J.value(P.toString());
/// });
/// });
/// });
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/KnownBits.h b/contrib/llvm-project/llvm/include/llvm/Support/KnownBits.h
index 07fd94e29a1f..ff25b6fc572c 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/KnownBits.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/KnownBits.h
@@ -97,6 +97,9 @@ public:
/// Returns true if this value is known to be non-negative.
bool isNonNegative() const { return Zero.isSignBitSet(); }
+ /// Returns true if this value is known to be positive.
+ bool isStrictlyPositive() const { return Zero.isSignBitSet() && !One.isNullValue(); }
+
/// Make this value negative.
void makeNegative() {
One.setSignBit();
@@ -107,6 +110,18 @@ public:
Zero.setSignBit();
}
+ /// Return the minimal value possible given these KnownBits.
+ APInt getMinValue() const {
+ // Assume that all bits that aren't known-ones are zeros.
+ return One;
+ }
+
+ /// Return the maximal value possible given these KnownBits.
+ APInt getMaxValue() const {
+ // Assume that all bits that aren't known-zeros are ones.
+ return ~Zero;
+ }
+
/// Truncate the underlying known Zero and One bits. This is equivalent
/// to truncating the value we're tracking.
KnownBits trunc(unsigned BitWidth) const {
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/LineIterator.h b/contrib/llvm-project/llvm/include/llvm/Support/LineIterator.h
index c9f10ca975ae..2a1e47bfe5b7 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/LineIterator.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/LineIterator.h
@@ -30,16 +30,16 @@ class MemoryBuffer;
/// Note that this iterator requires the buffer to be nul terminated.
class line_iterator
: public std::iterator<std::forward_iterator_tag, StringRef> {
- const MemoryBuffer *Buffer;
- char CommentMarker;
- bool SkipBlanks;
+ const MemoryBuffer *Buffer = nullptr;
+ char CommentMarker = '\0';
+ bool SkipBlanks = true;
- unsigned LineNumber;
+ unsigned LineNumber = 1;
StringRef CurrentLine;
public:
/// Default construct an "end" iterator.
- line_iterator() : Buffer(nullptr) {}
+ line_iterator() = default;
/// Construct a new iterator around some memory buffer.
explicit line_iterator(const MemoryBuffer &Buffer, bool SkipBlanks = true,
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/LockFileManager.h b/contrib/llvm-project/llvm/include/llvm/Support/LockFileManager.h
index 57e4fbd84cd9..2efeca3b6200 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/LockFileManager.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/LockFileManager.h
@@ -77,7 +77,9 @@ public:
operator LockFileState() const { return getState(); }
/// For a shared lock, wait until the owner releases the lock.
- WaitForUnlockResult waitForUnlock();
+ /// Total timeout for the file to appear is ~1.5 minutes.
+ /// \param MaxSeconds the maximum wait time per iteration in seconds.
+ WaitForUnlockResult waitForUnlock(const unsigned MaxSeconds = 40);
/// Remove the lock file. This may delete a different lock file than
/// the one previously read if there is a race.
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/LowLevelTypeImpl.h b/contrib/llvm-project/llvm/include/llvm/Support/LowLevelTypeImpl.h
index 0e02b6e7d750..6ef7c298bc28 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/LowLevelTypeImpl.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/LowLevelTypeImpl.h
@@ -137,6 +137,8 @@ public:
: LLT::scalar(NewEltSize);
}
+ bool isByteSized() const { return (getSizeInBits() & 7) == 0; }
+
unsigned getScalarSizeInBits() const {
assert(RawData != 0 && "Invalid Type");
if (!IsVector) {
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/MachineValueType.h b/contrib/llvm-project/llvm/include/llvm/Support/MachineValueType.h
index 7f9f0b85c55e..26b45a602763 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/MachineValueType.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/MachineValueType.h
@@ -671,7 +671,12 @@ namespace llvm {
return { getVectorNumElements(), isScalableVector() };
}
- unsigned getSizeInBits() const {
+ /// Returns the size of the specified MVT in bits.
+ ///
+ /// If the value type is a scalable vector type, the scalable property will
+ /// be set and the runtime size will be a positive integer multiple of the
+ /// base size.
+ TypeSize getSizeInBits() const {
switch (SimpleTy) {
default:
llvm_unreachable("getSizeInBits called on extended MVT.");
@@ -691,25 +696,25 @@ namespace llvm {
case Metadata:
llvm_unreachable("Value type is metadata.");
case i1:
- case v1i1:
- case nxv1i1: return 1;
- case v2i1:
- case nxv2i1: return 2;
- case v4i1:
- case nxv4i1: return 4;
+ case v1i1: return TypeSize::Fixed(1);
+ case nxv1i1: return TypeSize::Scalable(1);
+ case v2i1: return TypeSize::Fixed(2);
+ case nxv2i1: return TypeSize::Scalable(2);
+ case v4i1: return TypeSize::Fixed(4);
+ case nxv4i1: return TypeSize::Scalable(4);
case i8 :
case v1i8:
- case v8i1:
+ case v8i1: return TypeSize::Fixed(8);
case nxv1i8:
- case nxv8i1: return 8;
+ case nxv8i1: return TypeSize::Scalable(8);
case i16 :
case f16:
case v16i1:
case v2i8:
- case v1i16:
+ case v1i16: return TypeSize::Fixed(16);
case nxv16i1:
case nxv2i8:
- case nxv1i16: return 16;
+ case nxv1i16: return TypeSize::Scalable(16);
case f32 :
case i32 :
case v32i1:
@@ -717,15 +722,15 @@ namespace llvm {
case v2i16:
case v2f16:
case v1f32:
- case v1i32:
+ case v1i32: return TypeSize::Fixed(32);
case nxv32i1:
case nxv4i8:
case nxv2i16:
case nxv1i32:
case nxv2f16:
- case nxv1f32: return 32;
+ case nxv1f32: return TypeSize::Scalable(32);
case v3i16:
- case v3f16: return 48;
+ case v3f16: return TypeSize::Fixed(48);
case x86mmx:
case f64 :
case i64 :
@@ -736,17 +741,17 @@ namespace llvm {
case v1i64:
case v4f16:
case v2f32:
- case v1f64:
+ case v1f64: return TypeSize::Fixed(64);
case nxv8i8:
case nxv4i16:
case nxv2i32:
case nxv1i64:
case nxv4f16:
case nxv2f32:
- case nxv1f64: return 64;
- case f80 : return 80;
+ case nxv1f64: return TypeSize::Scalable(64);
+ case f80 : return TypeSize::Fixed(80);
case v3i32:
- case v3f32: return 96;
+ case v3f32: return TypeSize::Fixed(96);
case f128:
case ppcf128:
case i128:
@@ -758,16 +763,16 @@ namespace llvm {
case v1i128:
case v8f16:
case v4f32:
- case v2f64:
+ case v2f64: return TypeSize::Fixed(128);
case nxv16i8:
case nxv8i16:
case nxv4i32:
case nxv2i64:
case nxv8f16:
case nxv4f32:
- case nxv2f64: return 128;
+ case nxv2f64: return TypeSize::Scalable(128);
case v5i32:
- case v5f32: return 160;
+ case v5f32: return TypeSize::Fixed(160);
case v256i1:
case v32i8:
case v16i16:
@@ -775,13 +780,13 @@ namespace llvm {
case v4i64:
case v16f16:
case v8f32:
- case v4f64:
+ case v4f64: return TypeSize::Fixed(256);
case nxv32i8:
case nxv16i16:
case nxv8i32:
case nxv4i64:
case nxv8f32:
- case nxv4f64: return 256;
+ case nxv4f64: return TypeSize::Scalable(256);
case v512i1:
case v64i8:
case v32i16:
@@ -789,56 +794,71 @@ namespace llvm {
case v8i64:
case v32f16:
case v16f32:
- case v8f64:
+ case v8f64: return TypeSize::Fixed(512);
case nxv32i16:
case nxv16i32:
case nxv8i64:
case nxv16f32:
- case nxv8f64: return 512;
+ case nxv8f64: return TypeSize::Scalable(512);
case v1024i1:
case v128i8:
case v64i16:
case v32i32:
case v16i64:
- case v32f32:
+ case v32f32: return TypeSize::Fixed(1024);
case nxv32i32:
- case nxv16i64: return 1024;
+ case nxv16i64: return TypeSize::Scalable(1024);
case v256i8:
case v128i16:
case v64i32:
case v32i64:
- case v64f32:
- case nxv32i64: return 2048;
+ case v64f32: return TypeSize::Fixed(2048);
+ case nxv32i64: return TypeSize::Scalable(2048);
case v128i32:
- case v128f32: return 4096;
+ case v128f32: return TypeSize::Fixed(4096);
case v256i32:
- case v256f32: return 8192;
+ case v256f32: return TypeSize::Fixed(8192);
case v512i32:
- case v512f32: return 16384;
+ case v512f32: return TypeSize::Fixed(16384);
case v1024i32:
- case v1024f32: return 32768;
+ case v1024f32: return TypeSize::Fixed(32768);
case v2048i32:
- case v2048f32: return 65536;
- case exnref: return 0; // opaque type
+ case v2048f32: return TypeSize::Fixed(65536);
+ case exnref: return TypeSize::Fixed(0); // opaque type
}
}
- unsigned getScalarSizeInBits() const {
+ TypeSize getScalarSizeInBits() const {
return getScalarType().getSizeInBits();
}
/// Return the number of bytes overwritten by a store of the specified value
/// type.
- unsigned getStoreSize() const {
- return (getSizeInBits() + 7) / 8;
+ ///
+ /// If the value type is a scalable vector type, the scalable property will
+ /// be set and the runtime size will be a positive integer multiple of the
+ /// base size.
+ TypeSize getStoreSize() const {
+ TypeSize BaseSize = getSizeInBits();
+ return {(BaseSize.getKnownMinSize() + 7) / 8, BaseSize.isScalable()};
}
/// Return the number of bits overwritten by a store of the specified value
/// type.
- unsigned getStoreSizeInBits() const {
+ ///
+ /// If the value type is a scalable vector type, the scalable property will
+ /// be set and the runtime size will be a positive integer multiple of the
+ /// base size.
+ TypeSize getStoreSizeInBits() const {
return getStoreSize() * 8;
}
+ /// Returns true if the number of bits for the type is a multiple of an
+ /// 8-bit byte.
+ bool isByteSized() const {
+ return getSizeInBits().isByteSized();
+ }
+
/// Return true if this has more bits than VT.
bool bitsGT(MVT VT) const {
return getSizeInBits() > VT.getSizeInBits();
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/MathExtras.h b/contrib/llvm-project/llvm/include/llvm/Support/MathExtras.h
index 004a6f5f6eb8..37b9669cbeed 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/MathExtras.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/MathExtras.h
@@ -732,6 +732,11 @@ inline uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator) {
return alignTo(Numerator, Denominator) / Denominator;
}
+/// Returns the integer nearest(Numerator / Denominator).
+inline uint64_t divideNearest(uint64_t Numerator, uint64_t Denominator) {
+ return (Numerator + (Denominator / 2)) / Denominator;
+}
+
/// Returns the largest uint64_t less than or equal to \p Value and is
/// \p Skew mod \p Align. \p Align must be non-zero
inline uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew = 0) {
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/Memory.h b/contrib/llvm-project/llvm/include/llvm/Support/Memory.h
index 6f22dd7080cd..c0454223b2fd 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/Memory.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/Memory.h
@@ -57,6 +57,17 @@ namespace sys {
MF_WRITE = 0x2000000,
MF_EXEC = 0x4000000,
MF_RWE_MASK = 0x7000000,
+
+ /// The \p MF_HUGE_HINT flag is used to indicate that the request for
+ /// a memory block should be satisfied with large pages if possible.
+ /// This is only a hint and small pages will be used as fallback.
+ ///
+ /// The presence or absence of this flag in the returned memory block
+ /// is (at least currently) *not* a reliable indicator that the memory
+ /// block will use or will not use large pages. On some systems a request
+ /// without this flag can be backed by large pages without this flag being
+ /// set, and on some other systems a request with this flag can fallback
+ /// to small pages without this flag being cleared.
MF_HUGE_HINT = 0x0000001
};
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/Options.h b/contrib/llvm-project/llvm/include/llvm/Support/Options.h
deleted file mode 100644
index d02ef85a75bf..000000000000
--- a/contrib/llvm-project/llvm/include/llvm/Support/Options.h
+++ /dev/null
@@ -1,119 +0,0 @@
-//===- llvm/Support/Options.h - Debug options support -----------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-/// \file
-/// This file declares helper objects for defining debug options that can be
-/// configured via the command line. The new API currently builds on the cl::opt
-/// API, but does not require the use of static globals.
-///
-/// With this API options are registered during initialization. For passes, this
-/// happens during pass initialization. Passes with options will call a static
-/// registerOptions method during initialization that registers options with the
-/// OptionRegistry. An example implementation of registerOptions is:
-///
-/// static void registerOptions() {
-/// OptionRegistry::registerOption<bool, Scalarizer,
-/// &Scalarizer::ScalarizeLoadStore>(
-/// "scalarize-load-store",
-/// "Allow the scalarizer pass to scalarize loads and store", false);
-/// }
-///
-/// When reading data for options the interface is via the LLVMContext. Option
-/// data for passes should be read from the context during doInitialization. An
-/// example of reading the above option would be:
-///
-/// ScalarizeLoadStore =
-/// M.getContext().getOption<bool,
-/// Scalarizer,
-/// &Scalarizer::ScalarizeLoadStore>();
-///
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_SUPPORT_OPTIONS_H
-#define LLVM_SUPPORT_OPTIONS_H
-
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/Support/CommandLine.h"
-
-namespace llvm {
-
-namespace detail {
-
-// Options are keyed of the unique address of a static character synthesized
-// based on template arguments.
-template <typename ValT, typename Base, ValT(Base::*Mem)> class OptionKey {
-public:
- static char ID;
-};
-
-template <typename ValT, typename Base, ValT(Base::*Mem)>
-char OptionKey<ValT, Base, Mem>::ID = 0;
-
-} // namespace detail
-
-/// Singleton class used to register debug options.
-///
-/// The OptionRegistry is responsible for managing lifetimes of the options and
-/// provides interfaces for option registration and reading values from options.
-/// This object is a singleton, only one instance should ever exist so that all
-/// options are registered in the same place.
-class OptionRegistry {
-private:
- DenseMap<void *, cl::Option *> Options;
-
- /// Adds a cl::Option to the registry.
- ///
- /// \param Key unique key for option
- /// \param O option to map to \p Key
- ///
- /// Allocated cl::Options are owned by the OptionRegistry and are deallocated
- /// on destruction or removal
- void addOption(void *Key, cl::Option *O);
-
-public:
- ~OptionRegistry();
- OptionRegistry() {}
-
- /// Returns a reference to the singleton instance.
- static OptionRegistry &instance();
-
- /// Registers an option with the OptionRegistry singleton.
- ///
- /// \tparam ValT type of the option's data
- /// \tparam Base class used to key the option
- /// \tparam Mem member of \p Base used for keying the option
- ///
- /// Options are keyed off the template parameters to generate unique static
- /// characters. The template parameters are (1) the type of the data the
- /// option stores (\p ValT), the class that will read the option (\p Base),
- /// and the member that the class will store the data into (\p Mem).
- template <typename ValT, typename Base, ValT(Base::*Mem)>
- static void registerOption(StringRef ArgStr, StringRef Desc,
- const ValT &InitValue) {
- cl::opt<ValT> *Option = new cl::opt<ValT>(ArgStr, cl::desc(Desc),
- cl::Hidden, cl::init(InitValue));
- instance().addOption(&detail::OptionKey<ValT, Base, Mem>::ID, Option);
- }
-
- /// Returns the value of the option.
- ///
- /// \tparam ValT type of the option's data
- /// \tparam Base class used to key the option
- /// \tparam Mem member of \p Base used for keying the option
- ///
- /// Reads option values based on the key generated by the template parameters.
- /// Keying for get() is the same as keying for registerOption.
- template <typename ValT, typename Base, ValT(Base::*Mem)> ValT get() const {
- auto It = Options.find(&detail::OptionKey<ValT, Base, Mem>::ID);
- assert(It != Options.end() && "Option not in OptionRegistry");
- return *(cl::opt<ValT> *)It->second;
- }
-};
-
-} // namespace llvm
-
-#endif
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/Path.h b/contrib/llvm-project/llvm/include/llvm/Support/Path.h
index 5c0bee58f188..97955f882d51 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/Path.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/Path.h
@@ -52,10 +52,10 @@ enum class Style { windows, posix, native };
class const_iterator
: public iterator_facade_base<const_iterator, std::input_iterator_tag,
const StringRef> {
- StringRef Path; ///< The entire path.
- StringRef Component; ///< The current component. Not necessarily in Path.
- size_t Position; ///< The iterators current position within Path.
- Style S; ///< The path style to use.
+ StringRef Path; ///< The entire path.
+ StringRef Component; ///< The current component. Not necessarily in Path.
+ size_t Position = 0; ///< The iterators current position within Path.
+ Style S = Style::native; ///< The path style to use.
// An end iterator has Position = Path.size() + 1.
friend const_iterator begin(StringRef path, Style style);
@@ -78,10 +78,10 @@ public:
class reverse_iterator
: public iterator_facade_base<reverse_iterator, std::input_iterator_tag,
const StringRef> {
- StringRef Path; ///< The entire path.
- StringRef Component; ///< The current component. Not necessarily in Path.
- size_t Position; ///< The iterators current position within Path.
- Style S; ///< The path style to use.
+ StringRef Path; ///< The entire path.
+ StringRef Component; ///< The current component. Not necessarily in Path.
+ size_t Position = 0; ///< The iterators current position within Path.
+ Style S = Style::native; ///< The path style to use.
friend reverse_iterator rbegin(StringRef path, Style style);
friend reverse_iterator rend(StringRef path);
@@ -121,6 +121,8 @@ reverse_iterator rend(StringRef path);
/// Remove the last component from \a path unless it is the root dir.
///
+/// Similar to the POSIX "dirname" utility.
+///
/// @code
/// directory/filename.cpp => directory/
/// directory/ => directory
@@ -150,18 +152,33 @@ void replace_extension(SmallVectorImpl<char> &path, const Twine &extension,
///
/// @code
/// /foo, /old, /new => /foo
+/// /old, /old, /new => /new
+/// /old, /old/, /new, false => /old
+/// /old, /old/, /new, true => /new
/// /old/foo, /old, /new => /new/foo
+/// /old/foo, /old/, /new => /new/foo
+/// /old/foo, /old/, /new/ => /new/foo
+/// /oldfoo, /old, /new => /oldfoo
/// /foo, <empty>, /new => /new/foo
-/// /old/foo, /old, <empty> => /foo
+/// /foo, <empty>, new => new/foo
+/// /old/foo, /old, <empty>, false => /foo
+/// /old/foo, /old, <empty>, true => foo
/// @endcode
///
/// @param Path If \a Path starts with \a OldPrefix modify to instead
/// start with \a NewPrefix.
-/// @param OldPrefix The path prefix to strip from \a Path.
+/// @param OldPrefix The path prefix to strip from \a Path. Any trailing
+/// path separator is ignored if strict is true.
/// @param NewPrefix The path prefix to replace \a NewPrefix with.
-void replace_path_prefix(SmallVectorImpl<char> &Path,
+/// @param style The path separator style
+/// @param strict If strict is true, a directory separator following
+/// \a OldPrefix will also be stripped. Otherwise, directory
+/// separators will only be matched and stripped when present
+/// in \a OldPrefix.
+/// @result true if \a Path begins with OldPrefix
+bool replace_path_prefix(SmallVectorImpl<char> &Path,
const StringRef &OldPrefix, const StringRef &NewPrefix,
- Style style = Style::native);
+ Style style = Style::native, bool strict = false);
/// Append to path.
///
@@ -295,7 +312,7 @@ StringRef parent_path(StringRef path, Style style = Style::native);
///
/// @param path Input path.
/// @result The filename part of \a path. This is defined as the last component
-/// of \a path.
+/// of \a path. Similar to the POSIX "basename" utility.
StringRef filename(StringRef path, Style style = Style::native);
/// Get stem.
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/Signals.h b/contrib/llvm-project/llvm/include/llvm/Support/Signals.h
index a4f1fad22dd5..e0a18e72f2a7 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/Signals.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/Signals.h
@@ -85,16 +85,36 @@ namespace sys {
/// thread on some platforms.
void SetInfoSignalFunction(void (*Handler)());
- /// Registers a function to be called when a "pipe" signal is delivered to
- /// the process.
+ /// Registers a function to be called in a "one-shot" manner when a pipe
+ /// signal is delivered to the process (i.e., on a failed write to a pipe).
+ /// After the pipe signal is handled once, the handler is unregistered.
///
- /// The "pipe" signal typically indicates a failed write to a pipe (SIGPIPE).
- /// The default installed handler calls `exit(EX_IOERR)`, causing the process
- /// to immediately exit with an IO error exit code.
+ /// The LLVM signal handling code will not install any handler for the pipe
+ /// signal unless one is provided with this API (see \ref
+ /// DefaultOneShotPipeSignalHandler). This handler must be provided before
+ /// any other LLVM signal handlers are installed: the \ref InitLLVM
+ /// constructor has a flag that can simplify this setup.
///
- /// This function is only applicable on POSIX systems.
- void SetPipeSignalFunction(void (*Handler)());
-
+ /// Note that the handler is not allowed to call any non-reentrant
+ /// functions. A null handler pointer disables the current installed
+ /// function. Note also that the handler may be executed on a
+ /// different thread on some platforms.
+ ///
+ /// This is a no-op on Windows.
+ void SetOneShotPipeSignalFunction(void (*Handler)());
+
+ /// On Unix systems, this function exits with an "IO error" exit code.
+ /// This is a no-op on Windows.
+ void DefaultOneShotPipeSignalHandler();
+
+ /// This function does the following:
+ /// - clean up any temporary files registered with RemoveFileOnSignal()
+ /// - dump the callstack from the exception context
+ /// - call any relevant interrupt/signal handlers
+ /// - create a core/mini dump of the exception context whenever possible
+ /// Context is a system-specific failure context: it is the signal type on
+ /// Unix; the ExceptionContext on Windows.
+ void CleanupOnSignal(uintptr_t Context);
} // End sys namespace
} // End llvm namespace
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/SourceMgr.h b/contrib/llvm-project/llvm/include/llvm/Support/SourceMgr.h
index aa6026c23d07..1b005519e5d4 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/SourceMgr.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/SourceMgr.h
@@ -61,10 +61,10 @@ private:
/// into relatively small files (often smaller than 2^8 or 2^16 bytes),
/// we select the offset vector element type dynamically based on the
/// size of Buffer.
- using VariableSizeOffsets = PointerUnion4<std::vector<uint8_t> *,
- std::vector<uint16_t> *,
- std::vector<uint32_t> *,
- std::vector<uint64_t> *>;
+ using VariableSizeOffsets = PointerUnion<std::vector<uint8_t> *,
+ std::vector<uint16_t> *,
+ std::vector<uint32_t> *,
+ std::vector<uint64_t> *>;
/// Vector of offsets into Buffer at which there are line-endings
/// (lazily populated). Once populated, the '\n' that marks the end of
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/SpecialCaseList.h b/contrib/llvm-project/llvm/include/llvm/Support/SpecialCaseList.h
index b7400266f4df..5b5b7f6124d6 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/SpecialCaseList.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/SpecialCaseList.h
@@ -55,6 +55,7 @@
#include "llvm/ADT/StringSet.h"
#include "llvm/Support/Regex.h"
#include "llvm/Support/TrigramIndex.h"
+#include "llvm/Support/VirtualFileSystem.h"
#include <string>
#include <vector>
@@ -68,7 +69,8 @@ public:
/// Parses the special case list entries from files. On failure, returns
/// 0 and writes an error message to string.
static std::unique_ptr<SpecialCaseList>
- create(const std::vector<std::string> &Paths, std::string &Error);
+ create(const std::vector<std::string> &Paths, llvm::vfs::FileSystem &FS,
+ std::string &Error);
/// Parses the special case list from a memory buffer. On failure, returns
/// 0 and writes an error message to string.
static std::unique_ptr<SpecialCaseList> create(const MemoryBuffer *MB,
@@ -76,7 +78,7 @@ public:
/// Parses the special case list entries from files. On failure, reports a
/// fatal error.
static std::unique_ptr<SpecialCaseList>
- createOrDie(const std::vector<std::string> &Paths);
+ createOrDie(const std::vector<std::string> &Paths, llvm::vfs::FileSystem &FS);
~SpecialCaseList();
@@ -103,7 +105,7 @@ protected:
// Implementations of the create*() functions that can also be used by derived
// classes.
bool createInternal(const std::vector<std::string> &Paths,
- std::string &Error);
+ vfs::FileSystem &VFS, std::string &Error);
bool createInternal(const MemoryBuffer *MB, std::string &Error);
SpecialCaseList() = default;
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/TargetOpcodes.def b/contrib/llvm-project/llvm/include/llvm/Support/TargetOpcodes.def
index 11731ac35415..e004550059d4 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/TargetOpcodes.def
+++ b/contrib/llvm-project/llvm/include/llvm/Support/TargetOpcodes.def
@@ -285,6 +285,9 @@ HANDLE_TARGET_OPCODE(G_INTRINSIC_TRUNC)
/// INTRINSIC round intrinsic.
HANDLE_TARGET_OPCODE(G_INTRINSIC_ROUND)
+/// INTRINSIC readcyclecounter
+HANDLE_TARGET_OPCODE(G_READCYCLECOUNTER)
+
/// Generic load (including anyext load)
HANDLE_TARGET_OPCODE(G_LOAD)
@@ -524,7 +527,7 @@ HANDLE_TARGET_OPCODE(G_FMINIMUM)
HANDLE_TARGET_OPCODE(G_FMAXIMUM)
/// Generic pointer offset
-HANDLE_TARGET_OPCODE(G_GEP)
+HANDLE_TARGET_OPCODE(G_PTR_ADD)
/// Clear the specified number of low bits in a pointer. This rounds the value
/// *down* to the given alignment.
@@ -611,12 +614,16 @@ HANDLE_TARGET_OPCODE(G_JUMP_TABLE)
/// Generic dynamic stack allocation.
HANDLE_TARGET_OPCODE(G_DYN_STACKALLOC)
-// TODO: Add more generic opcodes as we move along.
+/// read_register intrinsic
+HANDLE_TARGET_OPCODE(G_READ_REGISTER)
+
+/// write_register intrinsic
+HANDLE_TARGET_OPCODE(G_WRITE_REGISTER)
/// Marker for the end of the generic opcode.
/// This is used to check if an opcode is in the range of the
/// generic opcodes.
-HANDLE_TARGET_OPCODE_MARKER(PRE_ISEL_GENERIC_OPCODE_END, G_DYN_STACKALLOC)
+HANDLE_TARGET_OPCODE_MARKER(PRE_ISEL_GENERIC_OPCODE_END, G_WRITE_REGISTER)
/// BUILTIN_OP_END - This must be the last enum value in this list.
/// The target-specific post-isel opcode values start here.
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/TargetRegistry.h b/contrib/llvm-project/llvm/include/llvm/Support/TargetRegistry.h
index f4bc26b858c8..d91eabae8235 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/TargetRegistry.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/TargetRegistry.h
@@ -128,7 +128,8 @@ public:
using ArchMatchFnTy = bool (*)(Triple::ArchType Arch);
using MCAsmInfoCtorFnTy = MCAsmInfo *(*)(const MCRegisterInfo &MRI,
- const Triple &TT);
+ const Triple &TT,
+ const MCTargetOptions &Options);
using MCInstrInfoCtorFnTy = MCInstrInfo *(*)();
using MCInstrAnalysisCtorFnTy = MCInstrAnalysis *(*)(const MCInstrInfo *Info);
using MCRegInfoCtorFnTy = MCRegisterInfo *(*)(const Triple &TT);
@@ -335,11 +336,11 @@ public:
/// feature set; it should always be provided. Generally this should be
/// either the target triple from the module, or the target triple of the
/// host if that does not exist.
- MCAsmInfo *createMCAsmInfo(const MCRegisterInfo &MRI,
- StringRef TheTriple) const {
+ MCAsmInfo *createMCAsmInfo(const MCRegisterInfo &MRI, StringRef TheTriple,
+ const MCTargetOptions &Options) const {
if (!MCAsmInfoCtorFn)
return nullptr;
- return MCAsmInfoCtorFn(MRI, Triple(TheTriple));
+ return MCAsmInfoCtorFn(MRI, Triple(TheTriple), Options);
}
/// createMCInstrInfo - Create a MCInstrInfo implementation.
@@ -473,7 +474,7 @@ public:
const MCSubtargetInfo &STI, bool RelaxAll,
bool IncrementalLinkerCompatible,
bool DWARFMustBeAtTheEnd) const {
- MCStreamer *S;
+ MCStreamer *S = nullptr;
switch (T.getObjectFormat()) {
case Triple::UnknownObjectFormat:
llvm_unreachable("Unknown object format");
@@ -948,9 +949,9 @@ template <class MCAsmInfoImpl> struct RegisterMCAsmInfo {
}
private:
- static MCAsmInfo *Allocator(const MCRegisterInfo & /*MRI*/,
- const Triple &TT) {
- return new MCAsmInfoImpl(TT);
+ static MCAsmInfo *Allocator(const MCRegisterInfo & /*MRI*/, const Triple &TT,
+ const MCTargetOptions &Options) {
+ return new MCAsmInfoImpl(TT, Options);
}
};
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/Threading.h b/contrib/llvm-project/llvm/include/llvm/Support/Threading.h
index 46d413dc487b..bacab8fa23b6 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/Threading.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/Threading.h
@@ -14,6 +14,7 @@
#ifndef LLVM_SUPPORT_THREADING_H
#define LLVM_SUPPORT_THREADING_H
+#include "llvm/ADT/FunctionExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Config/llvm-config.h" // for LLVM_ON_UNIX
#include "llvm/Support/Compiler.h"
@@ -52,9 +53,8 @@ class Twine;
/// false otherwise.
bool llvm_is_multithreaded();
-/// llvm_execute_on_thread - Execute the given \p UserFn on a separate
-/// thread, passing it the provided \p UserData and waits for thread
-/// completion.
+/// Execute the given \p UserFn on a separate thread, passing it the provided \p
+/// UserData and waits for thread completion.
///
/// This function does not guarantee that the code will actually be executed
/// on a separate thread or honoring the requested stack size, but tries to do
@@ -62,10 +62,26 @@ bool llvm_is_multithreaded();
///
/// \param UserFn - The callback to execute.
/// \param UserData - An argument to pass to the callback function.
-/// \param RequestedStackSize - If non-zero, a requested size (in bytes) for
-/// the thread stack.
-void llvm_execute_on_thread(void (*UserFn)(void *), void *UserData,
- unsigned RequestedStackSize = 0);
+/// \param StackSizeInBytes - A requested size (in bytes) for the thread stack
+/// (or None for default)
+void llvm_execute_on_thread(
+ void (*UserFn)(void *), void *UserData,
+ llvm::Optional<unsigned> StackSizeInBytes = llvm::None);
+
+/// Schedule the given \p Func for execution on a separate thread, then return
+/// to the caller immediately. Roughly equivalent to
+/// `std::thread(Func).detach()`, except it allows requesting a specific stack
+/// size, if supported for the platform.
+///
+/// This function would report a fatal error if it can't execute the code
+/// on a separate thread.
+///
+/// \param Func - The callback to execute.
+/// \param StackSizeInBytes - A requested size (in bytes) for the thread stack
+/// (or None for default)
+void llvm_execute_on_thread_async(
+ llvm::unique_function<void()> Func,
+ llvm::Optional<unsigned> StackSizeInBytes = llvm::None);
#if LLVM_THREADING_USE_STD_CALL_ONCE
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/TimeProfiler.h b/contrib/llvm-project/llvm/include/llvm/Support/TimeProfiler.h
index 8cc430d0bc72..678f8c136811 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/TimeProfiler.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/TimeProfiler.h
@@ -19,7 +19,8 @@ extern TimeTraceProfiler *TimeTraceProfilerInstance;
/// Initialize the time trace profiler.
/// This sets up the global \p TimeTraceProfilerInstance
/// variable to be the profiler instance.
-void timeTraceProfilerInitialize(unsigned TimeTraceGranularity);
+void timeTraceProfilerInitialize(unsigned TimeTraceGranularity,
+ StringRef ProcName);
/// Cleanup the time trace profiler, if it was initialized.
void timeTraceProfilerCleanup();
@@ -57,6 +58,10 @@ struct TimeTraceScope {
TimeTraceScope(TimeTraceScope &&) = delete;
TimeTraceScope &operator=(TimeTraceScope &&) = delete;
+ TimeTraceScope(StringRef Name) {
+ if (TimeTraceProfilerInstance != nullptr)
+ timeTraceProfilerBegin(Name, StringRef(""));
+ }
TimeTraceScope(StringRef Name, StringRef Detail) {
if (TimeTraceProfilerInstance != nullptr)
timeTraceProfilerBegin(Name, Detail);
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/Timer.h b/contrib/llvm-project/llvm/include/llvm/Support/Timer.h
index 76c9bc7b6863..a298ecd90404 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/Timer.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/Timer.h
@@ -78,18 +78,18 @@ class Timer {
TimeRecord StartTime; ///< The time startTimer() was last called.
std::string Name; ///< The name of this time variable.
std::string Description; ///< Description of this time variable.
- bool Running; ///< Is the timer currently running?
- bool Triggered; ///< Has the timer ever been triggered?
+ bool Running = false; ///< Is the timer currently running?
+ bool Triggered = false; ///< Has the timer ever been triggered?
TimerGroup *TG = nullptr; ///< The TimerGroup this Timer is in.
- Timer **Prev; ///< Pointer to \p Next of previous timer in group.
- Timer *Next; ///< Next timer in the group.
+ Timer **Prev = nullptr; ///< Pointer to \p Next of previous timer in group.
+ Timer *Next = nullptr; ///< Next timer in the group.
public:
- explicit Timer(StringRef Name, StringRef Description) {
- init(Name, Description);
+ explicit Timer(StringRef TimerName, StringRef TimerDescription) {
+ init(TimerName, TimerDescription);
}
- Timer(StringRef Name, StringRef Description, TimerGroup &tg) {
- init(Name, Description, tg);
+ Timer(StringRef TimerName, StringRef TimerDescription, TimerGroup &tg) {
+ init(TimerName, TimerDescription, tg);
}
Timer(const Timer &RHS) {
assert(!RHS.TG && "Can only copy uninitialized timers");
@@ -102,8 +102,8 @@ public:
/// Create an uninitialized timer, client must use 'init'.
explicit Timer() {}
- void init(StringRef Name, StringRef Description);
- void init(StringRef Name, StringRef Description, TimerGroup &tg);
+ void init(StringRef TimerName, StringRef TimerDescription);
+ void init(StringRef TimerName, StringRef TimerDescription, TimerGroup &tg);
const std::string &getName() const { return Name; }
const std::string &getDescription() const { return Description; }
@@ -174,6 +174,7 @@ class TimerGroup {
std::string Description;
PrintRecord(const PrintRecord &Other) = default;
+ PrintRecord &operator=(const PrintRecord &Other) = default;
PrintRecord(const TimeRecord &Time, const std::string &Name,
const std::string &Description)
: Time(Time), Name(Name), Description(Description) {}
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/TypeSize.h b/contrib/llvm-project/llvm/include/llvm/Support/TypeSize.h
index 711679cdcacb..7ea651f0f22c 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/TypeSize.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/TypeSize.h
@@ -138,6 +138,11 @@ public:
return IsScalable;
}
+ // Returns true if the number of bits is a multiple of an 8-bit byte.
+ bool isByteSized() const {
+ return (MinSize & 7) == 0;
+ }
+
// Casts to a uint64_t if this is a fixed-width size.
//
// NOTE: This interface is obsolete and will be removed in a future version
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/VersionTuple.h b/contrib/llvm-project/llvm/include/llvm/Support/VersionTuple.h
index 14736d6b28f0..f3eeea2f7b44 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/VersionTuple.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/VersionTuple.h
@@ -87,6 +87,13 @@ public:
return Build;
}
+ /// Return a version tuple that contains only the first 3 version components.
+ VersionTuple withoutBuild() const {
+ if (HasBuild)
+ return VersionTuple(Major, Minor, Subminor);
+ return *this;
+ }
+
/// Determine if two version numbers are equivalent. If not
/// provided, minor and subminor version numbers are considered to be zero.
friend bool operator==(const VersionTuple &X, const VersionTuple &Y) {
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/VirtualFileSystem.h b/contrib/llvm-project/llvm/include/llvm/Support/VirtualFileSystem.h
index c844d9d194f0..e45e6e756786 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/VirtualFileSystem.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/VirtualFileSystem.h
@@ -126,7 +126,7 @@ public:
/// Only information available on most platforms is included.
class directory_entry {
std::string Path;
- llvm::sys::fs::file_type Type;
+ llvm::sys::fs::file_type Type = llvm::sys::fs::file_type::type_unknown;
public:
directory_entry() = default;
@@ -293,7 +293,7 @@ public:
/// \param Path A path that is modified to be an absolute path.
/// \returns success if \a path has been made absolute, otherwise a
/// platform-specific error_code.
- std::error_code makeAbsolute(SmallVectorImpl<char> &Path) const;
+ virtual std::error_code makeAbsolute(SmallVectorImpl<char> &Path) const;
};
/// Gets an \p vfs::FileSystem for the 'real' file system, as seen by
@@ -532,7 +532,7 @@ class RedirectingFileSystemParser;
/// \endverbatim
///
/// All configuration options are optional.
-/// 'case-sensitive': <boolean, default=true>
+/// 'case-sensitive': <boolean, default=(true for Posix, false for Windows)>
/// 'use-external-names': <boolean, default=true>
/// 'overlay-relative': <boolean, default=false>
/// 'fallthrough': <boolean, default=true>
@@ -651,6 +651,17 @@ private:
return ExternalFSValidWD && IsFallthrough;
}
+ // In a RedirectingFileSystem, keys can be specified in Posix or Windows
+ // style (or even a mixture of both), so this comparison helper allows
+ // slashes (representing a root) to match backslashes (and vice versa). Note
+ // that, other than the root, patch components should not contain slashes or
+ // backslashes.
+ bool pathComponentMatches(llvm::StringRef lhs, llvm::StringRef rhs) const {
+ if ((CaseSensitive ? lhs.equals(rhs) : lhs.equals_lower(rhs)))
+ return true;
+ return (lhs == "/" && rhs == "\\") || (lhs == "\\" && rhs == "/");
+ }
+
/// The root(s) of the virtual file system.
std::vector<std::unique_ptr<Entry>> Roots;
@@ -674,7 +685,12 @@ private:
/// Whether to perform case-sensitive comparisons.
///
/// Currently, case-insensitive matching only works correctly with ASCII.
- bool CaseSensitive = true;
+ bool CaseSensitive =
+#ifdef _WIN32
+ false;
+#else
+ true;
+#endif
/// IsRelativeOverlay marks whether a ExternalContentsPrefixDir path must
/// be prefixed in every 'external-contents' when reading from YAML files.
@@ -733,6 +749,8 @@ public:
std::error_code isLocal(const Twine &Path, bool &Result) override;
+ std::error_code makeAbsolute(SmallVectorImpl<char> &Path) const override;
+
directory_iterator dir_begin(const Twine &Dir, std::error_code &EC) override;
void setExternalContentsPrefixDir(StringRef PrefixDir);
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/YAMLTraits.h b/contrib/llvm-project/llvm/include/llvm/Support/YAMLTraits.h
index a3bfa7dc4678..8642069ad540 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/YAMLTraits.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/YAMLTraits.h
@@ -1531,7 +1531,7 @@ private:
document_iterator DocIterator;
std::vector<bool> BitValuesUsed;
HNode *CurrentNode = nullptr;
- bool ScalarMatchFound;
+ bool ScalarMatchFound = false;
};
///
@@ -2035,4 +2035,9 @@ template <typename T> struct StdMapStringCustomMappingTraitsImpl {
} \
}
+LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(llvm::yaml::Hex64)
+LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(llvm::yaml::Hex32)
+LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(llvm::yaml::Hex16)
+LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(llvm::yaml::Hex8)
+
#endif // LLVM_SUPPORT_YAMLTRAITS_H
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/raw_ostream.h b/contrib/llvm-project/llvm/include/llvm/Support/raw_ostream.h
index 0debc5da7a68..c8770c337588 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/raw_ostream.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/raw_ostream.h
@@ -21,6 +21,7 @@
#include <cstring>
#include <string>
#include <system_error>
+#include <type_traits>
namespace llvm {
@@ -64,7 +65,7 @@ private:
/// this buffer.
char *OutBufStart, *OutBufEnd, *OutBufCur;
- enum BufferKind {
+ enum class BufferKind {
Unbuffered = 0,
InternalBuffer,
ExternalBuffer
@@ -97,7 +98,8 @@ public:
static const Colors RESET = Colors::RESET;
explicit raw_ostream(bool unbuffered = false)
- : BufferMode(unbuffered ? Unbuffered : InternalBuffer) {
+ : BufferMode(unbuffered ? BufferKind::Unbuffered
+ : BufferKind::InternalBuffer) {
// Start out ready to flush.
OutBufStart = OutBufEnd = OutBufCur = nullptr;
}
@@ -121,13 +123,13 @@ public:
/// Set the stream to be buffered, using the specified buffer size.
void SetBufferSize(size_t Size) {
flush();
- SetBufferAndMode(new char[Size], Size, InternalBuffer);
+ SetBufferAndMode(new char[Size], Size, BufferKind::InternalBuffer);
}
size_t GetBufferSize() const {
// If we're supposed to be buffered but haven't actually gotten around
// to allocating the buffer yet, return the value that would be used.
- if (BufferMode != Unbuffered && OutBufStart == nullptr)
+ if (BufferMode != BufferKind::Unbuffered && OutBufStart == nullptr)
return preferred_buffer_size();
// Otherwise just return the size of the allocated buffer.
@@ -139,7 +141,7 @@ public:
/// when the stream is being set to unbuffered.
void SetUnbuffered() {
flush();
- SetBufferAndMode(nullptr, 0, Unbuffered);
+ SetBufferAndMode(nullptr, 0, BufferKind::Unbuffered);
}
size_t GetNumBytesInBuffer() const {
@@ -325,7 +327,7 @@ protected:
/// use only by subclasses which can arrange for the output to go directly
/// into the desired output buffer, instead of being copied on each flush.
void SetBuffer(char *BufferStart, size_t Size) {
- SetBufferAndMode(BufferStart, Size, ExternalBuffer);
+ SetBufferAndMode(BufferStart, Size, BufferKind::ExternalBuffer);
}
/// Return an efficient buffer size for the underlying output mechanism.
@@ -353,6 +355,17 @@ private:
virtual void anchor();
};
+/// Call the appropriate insertion operator, given an rvalue reference to a
+/// raw_ostream object and return a stream of the same type as the argument.
+template <typename OStream, typename T>
+typename std::enable_if<!std::is_reference<OStream>::value &&
+ std::is_base_of<raw_ostream, OStream>::value,
+ OStream &&>::type
+operator<<(OStream &&OS, const T &Value) {
+ OS << Value;
+ return std::move(OS);
+}
+
/// An abstract base class for streams implementations that also support a
/// pwrite operation. This is useful for code that can mostly stream out data,
/// but needs to patch in a header that needs to know the output size.
@@ -384,7 +397,7 @@ public:
class raw_fd_ostream : public raw_pwrite_stream {
int FD;
bool ShouldClose;
- bool SupportsSeeking;
+ bool SupportsSeeking = false;
bool ColorEnabled = true;
#ifdef _WIN32
@@ -395,7 +408,7 @@ class raw_fd_ostream : public raw_pwrite_stream {
std::error_code EC;
- uint64_t pos;
+ uint64_t pos = 0;
/// See raw_ostream::write_impl.
void write_impl(const char *Ptr, size_t Size) override;
diff --git a/contrib/llvm-project/llvm/include/llvm/TableGen/Record.h b/contrib/llvm-project/llvm/include/llvm/TableGen/Record.h
index 73ed342a6101..a553ec99aaa4 100644
--- a/contrib/llvm-project/llvm/include/llvm/TableGen/Record.h
+++ b/contrib/llvm-project/llvm/include/llvm/TableGen/Record.h
@@ -751,7 +751,7 @@ public:
///
class UnOpInit : public OpInit, public FoldingSetNode {
public:
- enum UnaryOp : uint8_t { CAST, HEAD, TAIL, SIZE, EMPTY };
+ enum UnaryOp : uint8_t { CAST, HEAD, TAIL, SIZE, EMPTY, GETOP };
private:
Init *LHS;
@@ -802,7 +802,7 @@ class BinOpInit : public OpInit, public FoldingSetNode {
public:
enum BinaryOp : uint8_t { ADD, MUL, AND, OR, SHL, SRA, SRL, LISTCONCAT,
LISTSPLAT, STRCONCAT, CONCAT, EQ, NE, LE, LT, GE,
- GT };
+ GT, SETOP };
private:
Init *LHS, *RHS;
@@ -1663,6 +1663,12 @@ public:
/// the value is not the right type.
Record *getValueAsDef(StringRef FieldName) const;
+ /// This method looks up the specified field and returns its value as a
+ /// Record, returning null if the field exists but is "uninitialized"
+ /// (i.e. set to `?`), and throwing an exception if the field does not
+ /// exist or if its value is not the right type.
+ Record *getValueAsOptionalDef(StringRef FieldName) const;
+
/// This method looks up the specified field and returns its
/// value as a bit, throwing an exception if the field does not exist or if
/// the value is not the right type.
diff --git a/contrib/llvm-project/llvm/include/llvm/Target/GenericOpcodes.td b/contrib/llvm-project/llvm/include/llvm/Target/GenericOpcodes.td
index 4b49dfd4dd18..0ee9c3916c10 100644
--- a/contrib/llvm-project/llvm/include/llvm/Target/GenericOpcodes.td
+++ b/contrib/llvm-project/llvm/include/llvm/Target/GenericOpcodes.td
@@ -330,7 +330,7 @@ def G_SELECT : GenericInstruction {
}
// Generic pointer offset.
-def G_GEP : GenericInstruction {
+def G_PTR_ADD : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src1, type1:$src2);
let hasSideEffects = 0;
@@ -670,7 +670,7 @@ def G_FEXP2 : GenericInstruction {
let hasSideEffects = 0;
}
-// Floating point base-2 logarithm of a value.
+// Floating point base-e logarithm of a value.
def G_FLOG : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src1);
@@ -758,6 +758,12 @@ def G_INTRINSIC_ROUND : GenericInstruction {
let hasSideEffects = 0;
}
+def G_READCYCLECOUNTER : GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins);
+ let hasSideEffects = 1;
+}
+
//------------------------------------------------------------------------------
// Memory ops
//------------------------------------------------------------------------------
@@ -1006,6 +1012,26 @@ def G_BRJT : GenericInstruction {
let isTerminator = 1;
}
+def G_READ_REGISTER : GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins unknown:$register);
+ let hasSideEffects = 1;
+
+ // Assume convergent. It's probably not worth the effort of somehow
+ // modeling convergent and nonconvergent register accesses.
+ let isConvergent = 1;
+}
+
+def G_WRITE_REGISTER : GenericInstruction {
+ let OutOperandList = (outs);
+ let InOperandList = (ins unknown:$register, type0:$value);
+ let hasSideEffects = 1;
+
+ // Assume convergent. It's probably not worth the effort of somehow
+ // modeling convergent and nonconvergent register accesses.
+ let isConvergent = 1;
+}
+
//------------------------------------------------------------------------------
// Vector ops
//------------------------------------------------------------------------------
diff --git a/contrib/llvm-project/llvm/include/llvm/Target/GlobalISel/Combine.td b/contrib/llvm-project/llvm/include/llvm/Target/GlobalISel/Combine.td
index dcac399fd693..35f7a41e1cbf 100644
--- a/contrib/llvm-project/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/contrib/llvm-project/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -66,11 +66,21 @@ class GIDefKindWithArgs;
/// is incorrect.
def root : GIDefKind;
+/// Declares data that is passed from the match stage to the apply stage.
+class GIDefMatchData<string type> : GIDefKind {
+ /// A C++ type name indicating the storage type.
+ string Type = type;
+}
+
+def extending_load_matchdata : GIDefMatchData<"PreferredTuple">;
+def indexed_load_store_matchdata : GIDefMatchData<"IndexedLoadStoreMatchInfo">;
+
/// The operator at the root of a GICombineRule.Match dag.
def match;
/// All arguments of the match operator must be either:
/// * A subclass of GIMatchKind
/// * A subclass of GIMatchKindWithArgs
+/// * A subclass of Instruction
/// * A MIR code block (deprecated)
/// The GIMatchKind and GIMatchKindWithArgs cases are described in more detail
/// in their definitions below.
@@ -79,6 +89,10 @@ def match;
class GIMatchKind;
class GIMatchKindWithArgs;
+/// In lieu of having proper macro support. Trivial one-off opcode checks can be
+/// performed with this.
+def wip_match_opcode : GIMatchKindWithArgs;
+
/// The operator at the root of a GICombineRule.Apply dag.
def apply;
/// All arguments of the apply operator must be subclasses of GIApplyKind, or
@@ -89,15 +103,38 @@ class GIApplyKindWithArgs;
def copy_prop : GICombineRule<
(defs root:$d),
- (match [{ return Helper.matchCombineCopy(${d}); }]),
- (apply [{ Helper.applyCombineCopy(${d}); }])>;
+ (match (COPY $d, $s):$mi,
+ [{ return Helper.matchCombineCopy(*${mi}); }]),
+ (apply [{ Helper.applyCombineCopy(*${mi}); }])>;
def trivial_combines : GICombineGroup<[copy_prop]>;
+def extending_loads : GICombineRule<
+ (defs root:$root, extending_load_matchdata:$matchinfo),
+ (match (wip_match_opcode G_LOAD, G_SEXTLOAD, G_ZEXTLOAD):$root,
+ [{ return Helper.matchCombineExtendingLoads(*${root}, ${matchinfo}); }]),
+ (apply [{ Helper.applyCombineExtendingLoads(*${root}, ${matchinfo}); }])>;
+def combines_for_extload: GICombineGroup<[extending_loads]>;
+
+def combine_indexed_load_store : GICombineRule<
+ (defs root:$root, indexed_load_store_matchdata:$matchinfo),
+ (match (wip_match_opcode G_LOAD, G_SEXTLOAD, G_ZEXTLOAD, G_STORE):$root,
+ [{ return Helper.matchCombineIndexedLoadStore(*${root}, ${matchinfo}); }]),
+ (apply [{ Helper.applyCombineIndexedLoadStore(*${root}, ${matchinfo}); }])>;
+
// FIXME: Is there a reason this wasn't in tryCombine? I've left it out of
// all_combines because it wasn't there.
def elide_br_by_inverting_cond : GICombineRule<
- (defs root:$d),
- (match [{ return Helper.matchElideBrByInvertingCond(${d}); }]),
- (apply [{ Helper.applyElideBrByInvertingCond(${d}); }])>;
+ (defs root:$root),
+ (match (wip_match_opcode G_BR):$root,
+ [{ return Helper.matchElideBrByInvertingCond(*${root}); }]),
+ (apply [{ Helper.applyElideBrByInvertingCond(*${root}); }])>;
+
+def ptr_add_immed_matchdata : GIDefMatchData<"PtrAddChain">;
+def ptr_add_immed_chain : GICombineRule<
+ (defs root:$d, ptr_add_immed_matchdata:$matchinfo),
+ (match (wip_match_opcode G_PTR_ADD):$d,
+ [{ return Helper.matchPtrAddImmedChain(*${d}, ${matchinfo}); }]),
+ (apply [{ Helper.applyPtrAddImmedChain(*${d}, ${matchinfo}); }])>;
-def all_combines : GICombineGroup<[trivial_combines]>;
+def all_combines : GICombineGroup<[trivial_combines, ptr_add_immed_chain,
+ combines_for_extload, combine_indexed_load_store]>;
diff --git a/contrib/llvm-project/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td b/contrib/llvm-project/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
index b846d2252b8d..2129588d4aed 100644
--- a/contrib/llvm-project/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
+++ b/contrib/llvm-project/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
@@ -100,6 +100,7 @@ def : GINodeEquiv<G_CTTZ_ZERO_UNDEF, cttz_zero_undef>;
def : GINodeEquiv<G_CTPOP, ctpop>;
def : GINodeEquiv<G_EXTRACT_VECTOR_ELT, vector_extract>;
def : GINodeEquiv<G_CONCAT_VECTORS, concat_vectors>;
+def : GINodeEquiv<G_BUILD_VECTOR, build_vector>;
def : GINodeEquiv<G_FCEIL, fceil>;
def : GINodeEquiv<G_FCOS, fcos>;
def : GINodeEquiv<G_FSIN, fsin>;
@@ -108,6 +109,8 @@ def : GINodeEquiv<G_FSQRT, fsqrt>;
def : GINodeEquiv<G_FFLOOR, ffloor>;
def : GINodeEquiv<G_FRINT, frint>;
def : GINodeEquiv<G_FNEARBYINT, fnearbyint>;
+def : GINodeEquiv<G_INTRINSIC_TRUNC, ftrunc>;
+def : GINodeEquiv<G_INTRINSIC_ROUND, fround>;
def : GINodeEquiv<G_FCOPYSIGN, fcopysign>;
def : GINodeEquiv<G_SMIN, smin>;
def : GINodeEquiv<G_SMAX, smax>;
@@ -117,6 +120,7 @@ def : GINodeEquiv<G_FMINNUM, fminnum>;
def : GINodeEquiv<G_FMAXNUM, fmaxnum>;
def : GINodeEquiv<G_FMINNUM_IEEE, fminnum_ieee>;
def : GINodeEquiv<G_FMAXNUM_IEEE, fmaxnum_ieee>;
+def : GINodeEquiv<G_READCYCLECOUNTER, readcyclecounter>;
// Broadly speaking G_LOAD is equivalent to ISD::LOAD but there are some
// complications that tablegen must take care of. For example, Predicates such
diff --git a/contrib/llvm-project/llvm/include/llvm/Target/GlobalISel/Target.td b/contrib/llvm-project/llvm/include/llvm/Target/GlobalISel/Target.td
index 538ca65e1162..8e868c4c207b 100644
--- a/contrib/llvm-project/llvm/include/llvm/Target/GlobalISel/Target.td
+++ b/contrib/llvm-project/llvm/include/llvm/Target/GlobalISel/Target.td
@@ -55,6 +55,12 @@ class GIComplexOperandMatcher<LLT type, string matcherfn> {
class GICustomOperandRenderer<string rendererfn> {
// The function renders the operand(s) of the matched instruction to
// the specified instruction. It should be of the form:
- // void render(MachineInstrBuilder &MIB, const MachineInstr &MI)
+ // void render(MachineInstrBuilder &MIB, const MachineInstr &MI,
+ // int OpIdx = -1)
+ //
+ // If OpIdx is specified (i.e. not invalid/negative), this
+ // references the source operand MI.getOperand(OpIdx). Otherwise,
+ // this is the value defined by MI. This is to support the case
+ // where there is no corresponding instruction to match.
string RendererFn = rendererfn;
}
diff --git a/contrib/llvm-project/llvm/include/llvm/Target/Target.td b/contrib/llvm-project/llvm/include/llvm/Target/Target.td
index dd8679661b9a..b122b51bb169 100644
--- a/contrib/llvm-project/llvm/include/llvm/Target/Target.td
+++ b/contrib/llvm-project/llvm/include/llvm/Target/Target.td
@@ -530,6 +530,7 @@ class Instruction : InstructionEncoding {
bit hasCtrlDep = 0; // Does this instruction r/w ctrl-flow chains?
bit isNotDuplicable = 0; // Is it unsafe to duplicate this instruction?
bit isConvergent = 0; // Is this instruction convergent?
+ bit isAuthenticated = 0; // Does this instruction authenticate a pointer?
bit isAsCheapAsAMove = 0; // As cheap (or cheaper) than a move instruction.
bit hasExtraSrcRegAllocReq = 0; // Sources have special regalloc requirement?
bit hasExtraDefRegAllocReq = 0; // Defs have special regalloc requirement?
@@ -1210,7 +1211,7 @@ def PATCHABLE_TAIL_CALL : StandardPseudoInstruction {
}
def PATCHABLE_EVENT_CALL : StandardPseudoInstruction {
let OutOperandList = (outs);
- let InOperandList = (ins ptr_rc:$event, i8imm:$size);
+ let InOperandList = (ins ptr_rc:$event, unknown:$size);
let AsmString = "# XRay Custom Event Log.";
let usesCustomInserter = 1;
let isCall = 1;
@@ -1220,7 +1221,7 @@ def PATCHABLE_EVENT_CALL : StandardPseudoInstruction {
}
def PATCHABLE_TYPED_EVENT_CALL : StandardPseudoInstruction {
let OutOperandList = (outs);
- let InOperandList = (ins i16imm:$type, ptr_rc:$event, i32imm:$size);
+ let InOperandList = (ins unknown:$type, ptr_rc:$event, unknown:$size);
let AsmString = "# XRay Typed Event Log.";
let usesCustomInserter = 1;
let isCall = 1;
@@ -1292,7 +1293,7 @@ class AsmParser {
// ReportMultipleNearMisses -
// When 0, the assembly matcher reports an error for one encoding or operand
// that did not match the parsed instruction.
- // When 1, the assmebly matcher returns a list of encodings that were close
+ // When 1, the assembly matcher returns a list of encodings that were close
// to matching the parsed instruction, so to allow more detailed error
// messages.
bit ReportMultipleNearMisses = 0;
diff --git a/contrib/llvm-project/llvm/include/llvm/Target/TargetCallingConv.td b/contrib/llvm-project/llvm/include/llvm/Target/TargetCallingConv.td
index 7b1973cc3828..d5f3931c3d5d 100644
--- a/contrib/llvm-project/llvm/include/llvm/Target/TargetCallingConv.td
+++ b/contrib/llvm-project/llvm/include/llvm/Target/TargetCallingConv.td
@@ -51,6 +51,11 @@ class CCIfSwiftSelf<CCAction A> : CCIf<"ArgFlags.isSwiftSelf()", A> {
class CCIfSwiftError<CCAction A> : CCIf<"ArgFlags.isSwiftError()", A> {
}
+/// CCIfCFGuardTarget - If the current argument has cfguardtarget parameter
+/// attribute, apply Action A.
+class CCIfCFGuardTarget<CCAction A> : CCIf<"ArgFlags.isCFGuardTarget()", A> {
+}
+
/// CCIfConsecutiveRegs - If the current argument has InConsecutiveRegs
/// parameter attribute, apply Action A.
class CCIfConsecutiveRegs<CCAction A> : CCIf<"ArgFlags.isInConsecutiveRegs()", A> {
diff --git a/contrib/llvm-project/llvm/include/llvm/Target/TargetMachine.h b/contrib/llvm-project/llvm/include/llvm/Target/TargetMachine.h
index 285c0ec0fb90..176ae39b17a7 100644
--- a/contrib/llvm-project/llvm/include/llvm/Target/TargetMachine.h
+++ b/contrib/llvm-project/llvm/include/llvm/Target/TargetMachine.h
@@ -271,15 +271,6 @@ public:
/// PassManagerBuilder::addExtension.
virtual void adjustPassManager(PassManagerBuilder &) {}
- /// These enums are meant to be passed into addPassesToEmitFile to indicate
- /// what type of file to emit, and returned by it to indicate what type of
- /// file could actually be made.
- enum CodeGenFileType {
- CGFT_AssemblyFile,
- CGFT_ObjectFile,
- CGFT_Null // Do not emit any output.
- };
-
/// Add passes to the specified pass manager to get the specified file
/// emitted. Typically this will involve several steps of code generation.
/// This method should return true if emission of this file type is not
diff --git a/contrib/llvm-project/llvm/include/llvm/Target/TargetOptions.h b/contrib/llvm-project/llvm/include/llvm/Target/TargetOptions.h
index 8cc2a6010879..84c6ee2a6387 100644
--- a/contrib/llvm-project/llvm/include/llvm/Target/TargetOptions.h
+++ b/contrib/llvm-project/llvm/include/llvm/Target/TargetOptions.h
@@ -107,7 +107,7 @@ namespace llvm {
public:
TargetOptions()
: PrintMachineCode(false), UnsafeFPMath(false), NoInfsFPMath(false),
- NoNaNsFPMath(false), NoTrappingFPMath(false),
+ NoNaNsFPMath(false), NoTrappingFPMath(true),
NoSignedZerosFPMath(false),
HonorSignDependentRoundingFPMathOption(false), NoZerosInBSS(false),
GuaranteedTailCallOpt(false), StackSymbolOrdering(true),
@@ -115,11 +115,11 @@ namespace llvm {
DisableIntegratedAS(false), RelaxELFRelocations(false),
FunctionSections(false), DataSections(false),
UniqueSectionNames(true), TrapUnreachable(false),
- NoTrapAfterNoreturn(false), EmulatedTLS(false),
+ NoTrapAfterNoreturn(false), TLSSize(0), EmulatedTLS(false),
ExplicitEmulatedTLS(false), EnableIPRA(false),
EmitStackSizeSection(false), EnableMachineOutliner(false),
SupportsDefaultOutlining(false), EmitAddrsig(false),
- EnableDebugEntryValues(false) {}
+ EnableDebugEntryValues(false), ForceDwarfFrameSection(false) {}
/// PrintMachineCode - This flag is enabled when the -print-machineinstrs
/// option is specified on the command line, and should enable debugging
@@ -231,6 +231,9 @@ namespace llvm {
/// noreturn calls, even if TrapUnreachable is true.
unsigned NoTrapAfterNoreturn : 1;
+ /// Bit size of immediate TLS offsets (0 == use the default).
+ unsigned TLSSize : 8;
+
/// EmulatedTLS - This flag enables emulated TLS model, using emutls
/// function in the runtime library..
unsigned EmulatedTLS : 1;
@@ -256,6 +259,9 @@ namespace llvm {
/// Emit debug info about parameter's entry values.
unsigned EnableDebugEntryValues : 1;
+ /// Emit DWARF debug frame section.
+ unsigned ForceDwarfFrameSection : 1;
+
/// FloatABIType - This setting is set by -float-abi=xxx option is specfied
/// on the command line. This setting may either be Default, Soft, or Hard.
/// Default selects the target's default behavior. Soft selects the ABI for
diff --git a/contrib/llvm-project/llvm/include/llvm/Target/TargetSelectionDAG.td b/contrib/llvm-project/llvm/include/llvm/Target/TargetSelectionDAG.td
index 441f3d7d118d..1700c6c4640d 100644
--- a/contrib/llvm-project/llvm/include/llvm/Target/TargetSelectionDAG.td
+++ b/contrib/llvm-project/llvm/include/llvm/Target/TargetSelectionDAG.td
@@ -124,7 +124,7 @@ def SDTIntSatNoShOp : SDTypeProfile<1, 2, [ // ssat with no shift
def SDTIntBinHiLoOp : SDTypeProfile<2, 2, [ // mulhi, mullo, sdivrem, udivrem
SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>,SDTCisInt<0>
]>;
-def SDTIntScaledBinOp : SDTypeProfile<1, 3, [ // smulfix, umulfix
+def SDTIntScaledBinOp : SDTypeProfile<1, 3, [ // smulfix, sdivfix, etc
SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0>, SDTCisInt<3>
]>;
@@ -224,13 +224,13 @@ def SDTIStore : SDTypeProfile<1, 3, [ // indexed store
SDTCisSameAs<0, 2>, SDTCisPtrTy<0>, SDTCisPtrTy<3>
]>;
-def SDTMaskedStore: SDTypeProfile<0, 3, [ // masked store
- SDTCisVec<0>, SDTCisPtrTy<1>, SDTCisVec<2>, SDTCisSameNumEltsAs<0, 2>
+def SDTMaskedStore: SDTypeProfile<0, 4, [ // masked store
+ SDTCisVec<0>, SDTCisPtrTy<1>, SDTCisPtrTy<2>, SDTCisVec<3>, SDTCisSameNumEltsAs<0, 3>
]>;
-def SDTMaskedLoad: SDTypeProfile<1, 3, [ // masked load
- SDTCisVec<0>, SDTCisPtrTy<1>, SDTCisVec<2>, SDTCisSameAs<0, 3>,
- SDTCisSameNumEltsAs<0, 2>
+def SDTMaskedLoad: SDTypeProfile<1, 4, [ // masked load
+ SDTCisVec<0>, SDTCisPtrTy<1>, SDTCisPtrTy<2>, SDTCisVec<3>, SDTCisSameAs<0, 4>,
+ SDTCisSameNumEltsAs<0, 3>
]>;
def SDTVecShuffle : SDTypeProfile<1, 2, [
@@ -400,6 +400,8 @@ def smulfix : SDNode<"ISD::SMULFIX" , SDTIntScaledBinOp, [SDNPCommutative]>
def smulfixsat : SDNode<"ISD::SMULFIXSAT", SDTIntScaledBinOp, [SDNPCommutative]>;
def umulfix : SDNode<"ISD::UMULFIX" , SDTIntScaledBinOp, [SDNPCommutative]>;
def umulfixsat : SDNode<"ISD::UMULFIXSAT", SDTIntScaledBinOp, [SDNPCommutative]>;
+def sdivfix : SDNode<"ISD::SDIVFIX" , SDTIntScaledBinOp>;
+def udivfix : SDNode<"ISD::UDIVFIX" , SDTIntScaledBinOp>;
def sext_inreg : SDNode<"ISD::SIGN_EXTEND_INREG", SDTExtInreg>;
def sext_invec : SDNode<"ISD::SIGN_EXTEND_VECTOR_INREG", SDTExtInvec>;
@@ -530,6 +532,12 @@ def strict_fminnum : SDNode<"ISD::STRICT_FMINNUM",
def strict_fmaxnum : SDNode<"ISD::STRICT_FMAXNUM",
SDTFPBinOp, [SDNPHasChain,
SDNPCommutative, SDNPAssociative]>;
+def strict_fminimum : SDNode<"ISD::STRICT_FMINIMUM",
+ SDTFPBinOp, [SDNPHasChain,
+ SDNPCommutative, SDNPAssociative]>;
+def strict_fmaximum : SDNode<"ISD::STRICT_FMAXIMUM",
+ SDTFPBinOp, [SDNPHasChain,
+ SDNPCommutative, SDNPAssociative]>;
def strict_fpround : SDNode<"ISD::STRICT_FP_ROUND",
SDTFPRoundOp, [SDNPHasChain]>;
def strict_fpextend : SDNode<"ISD::STRICT_FP_EXTEND",
@@ -538,6 +546,10 @@ def strict_fp_to_sint : SDNode<"ISD::STRICT_FP_TO_SINT",
SDTFPToIntOp, [SDNPHasChain]>;
def strict_fp_to_uint : SDNode<"ISD::STRICT_FP_TO_UINT",
SDTFPToIntOp, [SDNPHasChain]>;
+def strict_sint_to_fp : SDNode<"ISD::STRICT_SINT_TO_FP",
+ SDTIntToFPOp, [SDNPHasChain]>;
+def strict_uint_to_fp : SDNode<"ISD::STRICT_UINT_TO_FP",
+ SDTIntToFPOp, [SDNPHasChain]>;
def setcc : SDNode<"ISD::SETCC" , SDTSetCC>;
def select : SDNode<"ISD::SELECT" , SDTSelect>;
@@ -1380,6 +1392,12 @@ def any_fmaxnum : PatFrags<(ops node:$lhs, node:$rhs),
def any_fminnum : PatFrags<(ops node:$lhs, node:$rhs),
[(strict_fminnum node:$lhs, node:$rhs),
(fminnum node:$lhs, node:$rhs)]>;
+def any_fmaximum : PatFrags<(ops node:$lhs, node:$rhs),
+ [(strict_fmaximum node:$lhs, node:$rhs),
+ (fmaximum node:$lhs, node:$rhs)]>;
+def any_fminimum : PatFrags<(ops node:$lhs, node:$rhs),
+ [(strict_fminimum node:$lhs, node:$rhs),
+ (fminimum node:$lhs, node:$rhs)]>;
def any_fpround : PatFrags<(ops node:$src),
[(strict_fpround node:$src),
(fpround node:$src)]>;
@@ -1398,6 +1416,12 @@ def any_fp_to_sint : PatFrags<(ops node:$src),
def any_fp_to_uint : PatFrags<(ops node:$src),
[(strict_fp_to_uint node:$src),
(fp_to_uint node:$src)]>;
+def any_sint_to_fp : PatFrags<(ops node:$src),
+ [(strict_sint_to_fp node:$src),
+ (sint_to_fp node:$src)]>;
+def any_uint_to_fp : PatFrags<(ops node:$src),
+ [(strict_uint_to_fp node:$src),
+ (uint_to_fp node:$src)]>;
multiclass binary_atomic_op_ord<SDNode atomic_op> {
def #NAME#_monotonic : PatFrag<(ops node:$ptr, node:$val),
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/CFGuard.h b/contrib/llvm-project/llvm/include/llvm/Transforms/CFGuard.h
new file mode 100644
index 000000000000..86fcbc3c13e8
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/CFGuard.h
@@ -0,0 +1,26 @@
+//===-- CFGuard.h - CFGuard Transformations ---------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===---------------------------------------------------------------------===//
+// Windows Control Flow Guard passes (/guard:cf).
+//===---------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_CFGUARD_H
+#define LLVM_TRANSFORMS_CFGUARD_H
+
+namespace llvm {
+
+class FunctionPass;
+
+/// Insert Control FLow Guard checks on indirect function calls.
+FunctionPass *createCFGuardCheckPass();
+
+/// Insert Control FLow Guard dispatches on indirect function calls.
+FunctionPass *createCFGuardDispatchPass();
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Coroutines.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Coroutines.h
index 9df3ec0f3ef4..ef05f549fbc1 100644
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/Coroutines.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Coroutines.h
@@ -20,17 +20,17 @@ class PassManagerBuilder;
void addCoroutinePassesToExtensionPoints(PassManagerBuilder &Builder);
/// Lower coroutine intrinsics that are not needed by later passes.
-Pass *createCoroEarlyPass();
+Pass *createCoroEarlyLegacyPass();
/// Split up coroutines into multiple functions driving their state machines.
-Pass *createCoroSplitPass();
+Pass *createCoroSplitLegacyPass();
/// Analyze coroutines use sites, devirtualize resume/destroy calls and elide
/// heap allocation for coroutine frame where possible.
-Pass *createCoroElidePass();
+Pass *createCoroElideLegacyPass();
/// Lower all remaining coroutine intrinsics.
-Pass *createCoroCleanupPass();
+Pass *createCoroCleanupLegacyPass();
}
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/Attributor.h b/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/Attributor.h
index 3dbe0fcd76ea..f7430a83e8d7 100644
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -72,7 +72,8 @@
// - Define a class (transitively) inheriting from AbstractAttribute and one
// (which could be the same) that (transitively) inherits from AbstractState.
// For the latter, consider the already available BooleanState and
-// IntegerState if they fit your needs, e.g., you require only a bit-encoding.
+// {Inc,Dec,Bit}IntegerState if they fit your needs, e.g., you require only a
+// number tracking or bit-encoding.
// - Implement all pure methods. Also use overloading if the attribute is not
// conforming with the "default" behavior: A (set of) LLVM-IR attribute(s) for
// an argument, call site argument, function return value, or function. See
@@ -104,6 +105,7 @@
#include "llvm/Analysis/MustExecute.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/CallSite.h"
+#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/PassManager.h"
namespace llvm {
@@ -114,7 +116,7 @@ struct AAIsDead;
class Function;
-/// Simple enum class that forces the status to be spelled out explicitly.
+/// Simple enum classes that forces properties to be spelled out explicitly.
///
///{
enum class ChangeStatus {
@@ -124,6 +126,11 @@ enum class ChangeStatus {
ChangeStatus operator|(ChangeStatus l, ChangeStatus r);
ChangeStatus operator&(ChangeStatus l, ChangeStatus r);
+
+enum class DepClassTy {
+ REQUIRED,
+ OPTIONAL,
+};
///}
/// Helper to describe and deal with positions in the LLVM-IR.
@@ -251,22 +258,14 @@ struct IRPosition {
/// sufficient to determine where arguments will be manifested. This is, so
/// far, only the case for call site arguments as the value is not sufficient
/// to pinpoint them. Instead, we can use the call site as an anchor.
- ///
- ///{
- Value &getAnchorValue() {
+ Value &getAnchorValue() const {
assert(KindOrArgNo != IRP_INVALID &&
"Invalid position does not have an anchor value!");
return *AnchorVal;
}
- const Value &getAnchorValue() const {
- return const_cast<IRPosition *>(this)->getAnchorValue();
- }
- ///}
/// Return the associated function, if any.
- ///
- ///{
- Function *getAssociatedFunction() {
+ Function *getAssociatedFunction() const {
if (auto *CB = dyn_cast<CallBase>(AnchorVal))
return CB->getCalledFunction();
assert(KindOrArgNo != IRP_INVALID &&
@@ -280,32 +279,12 @@ struct IRPosition {
return cast<Instruction>(V).getFunction();
return nullptr;
}
- const Function *getAssociatedFunction() const {
- return const_cast<IRPosition *>(this)->getAssociatedFunction();
- }
- ///}
/// Return the associated argument, if any.
- ///
- ///{
- Argument *getAssociatedArgument() {
- if (auto *Arg = dyn_cast<Argument>(&getAnchorValue()))
- return Arg;
- int ArgNo = getArgNo();
- if (ArgNo < 0)
- return nullptr;
- Function *AssociatedFn = getAssociatedFunction();
- if (!AssociatedFn || AssociatedFn->arg_size() <= unsigned(ArgNo))
- return nullptr;
- return AssociatedFn->arg_begin() + ArgNo;
- }
- const Argument *getAssociatedArgument() const {
- return const_cast<IRPosition *>(this)->getAssociatedArgument();
- }
- ///}
+ Argument *getAssociatedArgument() const;
/// Return true if the position refers to a function interface, that is the
- /// function scope, the function return, or an argumnt.
+ /// function scope, the function return, or an argument.
bool isFnInterfaceKind() const {
switch (getPositionKind()) {
case IRPosition::IRP_FUNCTION:
@@ -318,9 +297,7 @@ struct IRPosition {
}
/// Return the Function surrounding the anchor value.
- ///
- ///{
- Function *getAnchorScope() {
+ Function *getAnchorScope() const {
Value &V = getAnchorValue();
if (isa<Function>(V))
return &cast<Function>(V);
@@ -330,15 +307,9 @@ struct IRPosition {
return cast<Instruction>(V).getFunction();
return nullptr;
}
- const Function *getAnchorScope() const {
- return const_cast<IRPosition *>(this)->getAnchorScope();
- }
- ///}
/// Return the context instruction, if any.
- ///
- ///{
- Instruction *getCtxI() {
+ Instruction *getCtxI() const {
Value &V = getAnchorValue();
if (auto *I = dyn_cast<Instruction>(&V))
return I;
@@ -350,15 +321,9 @@ struct IRPosition {
return &(F->getEntryBlock().front());
return nullptr;
}
- const Instruction *getCtxI() const {
- return const_cast<IRPosition *>(this)->getCtxI();
- }
- ///}
/// Return the value this abstract attribute is associated with.
- ///
- ///{
- Value &getAssociatedValue() {
+ Value &getAssociatedValue() const {
assert(KindOrArgNo != IRP_INVALID &&
"Invalid position does not have an associated value!");
if (getArgNo() < 0 || isa<Argument>(AnchorVal))
@@ -366,10 +331,6 @@ struct IRPosition {
assert(isa<CallBase>(AnchorVal) && "Expected a call base!");
return *cast<CallBase>(AnchorVal)->getArgOperand(getArgNo());
}
- const Value &getAssociatedValue() const {
- return const_cast<IRPosition *>(this)->getAssociatedValue();
- }
- ///}
/// Return the argument number of the associated value if it is an argument or
/// call site argument, otherwise a negative value.
@@ -428,8 +389,12 @@ struct IRPosition {
/// single attribute of any kind in \p AKs, there are "subsuming" positions
/// that could have an attribute as well. This method returns all attributes
/// found in \p Attrs.
+ /// \param IgnoreSubsumingPositions Flag to determine if subsuming positions,
+ /// e.g., the function position if this is an
+ /// argument position, should be ignored.
void getAttrs(ArrayRef<Attribute::AttrKind> AKs,
- SmallVectorImpl<Attribute> &Attrs) const;
+ SmallVectorImpl<Attribute> &Attrs,
+ bool IgnoreSubsumingPositions = false) const;
/// Return the attribute of kind \p AK existing in the IR at this position.
Attribute getAttr(Attribute::AttrKind AK) const {
@@ -448,7 +413,7 @@ struct IRPosition {
}
/// Remove the attribute of kind \p AKs existing in the IR at this position.
- void removeAttrs(ArrayRef<Attribute::AttrKind> AKs) {
+ void removeAttrs(ArrayRef<Attribute::AttrKind> AKs) const {
if (getPositionKind() == IRP_INVALID || getPositionKind() == IRP_FLOAT)
return;
@@ -501,6 +466,7 @@ private:
/// Verify internal invariants.
void verify();
+protected:
/// The value this position is anchored at.
Value *AnchorVal;
@@ -545,7 +511,7 @@ template <> struct DenseMapInfo<IRPosition> {
/// - the argument of the callee (IRP_ARGUMENT), if known
/// - the callee (IRP_FUNCTION), if known
/// - the position the call site argument is associated with if it is not
-/// anchored to the call site, e.g., if it is an arugment then the argument
+/// anchored to the call site, e.g., if it is an argument then the argument
/// (IRP_ARGUMENT)
class SubsumingPositionIterator {
SmallVector<IRPosition, 4> IRPositions;
@@ -642,6 +608,12 @@ struct InformationCache {
return AG.getAnalysis<AAManager>(F);
}
+ /// Return the analysis result from a pass \p AP for function \p F.
+ template <typename AP>
+ typename AP::Result *getAnalysisResultForFunction(const Function &F) {
+ return AG.getAnalysis<AP>(F);
+ }
+
/// Return SCC size on call graph for function \p F.
unsigned getSccSize(const Function &F) {
if (!SccSizeOpt.hasValue())
@@ -723,7 +695,11 @@ struct Attributor {
: InfoCache(InfoCache), DepRecomputeInterval(DepRecomputeInterval),
Whitelist(Whitelist) {}
- ~Attributor() { DeleteContainerPointers(AllAbstractAttributes); }
+ ~Attributor() {
+ DeleteContainerPointers(AllAbstractAttributes);
+ for (auto &It : ArgumentReplacementMap)
+ DeleteContainerPointers(It.second);
+ }
/// Run the analyses until a fixpoint is reached or enforced (timeout).
///
@@ -755,8 +731,10 @@ struct Attributor {
/// the `Attributor::recordDependence` method.
template <typename AAType>
const AAType &getAAFor(const AbstractAttribute &QueryingAA,
- const IRPosition &IRP, bool TrackDependence = true) {
- return getOrCreateAAFor<AAType>(IRP, &QueryingAA, TrackDependence);
+ const IRPosition &IRP, bool TrackDependence = true,
+ DepClassTy DepClass = DepClassTy::REQUIRED) {
+ return getOrCreateAAFor<AAType>(IRP, &QueryingAA, TrackDependence,
+ DepClass);
}
/// Explicitly record a dependence from \p FromAA to \p ToAA, that is if
@@ -766,10 +744,12 @@ struct Attributor {
/// with the TrackDependence flag passed to the method set to false. This can
/// be beneficial to avoid false dependences but it requires the users of
/// `getAAFor` to explicitly record true dependences through this method.
+ /// The \p DepClass flag indicates if the dependence is striclty necessary.
+ /// That means for required dependences, if \p FromAA changes to an invalid
+ /// state, \p ToAA can be moved to a pessimistic fixpoint because it required
+ /// information from \p FromAA but none are available anymore.
void recordDependence(const AbstractAttribute &FromAA,
- const AbstractAttribute &ToAA) {
- QueryMap[&FromAA].insert(const_cast<AbstractAttribute *>(&ToAA));
- }
+ const AbstractAttribute &ToAA, DepClassTy DepClass);
/// Introduce a new abstract attribute into the fixpoint analysis.
///
@@ -784,7 +764,7 @@ struct Attributor {
"'AbstractAttribute'!");
// Put the attribute in the lookup map structure and the container we use to
// keep track of all attributes.
- IRPosition &IRP = AA.getIRPosition();
+ const IRPosition &IRP = AA.getIRPosition();
auto &KindToAbstractAttributeMap = AAMap[IRP];
assert(!KindToAbstractAttributeMap.count(&AAType::ID) &&
"Attribute already in map!");
@@ -825,10 +805,80 @@ struct Attributor {
identifyDefaultAbstractAttributes(const_cast<Function &>(F));
}
- /// Record that \p I is deleted after information was manifested.
+ /// Record that \p U is to be replaces with \p NV after information was
+ /// manifested. This also triggers deletion of trivially dead istructions.
+ bool changeUseAfterManifest(Use &U, Value &NV) {
+ Value *&V = ToBeChangedUses[&U];
+ if (V && (V->stripPointerCasts() == NV.stripPointerCasts() ||
+ isa_and_nonnull<UndefValue>(V)))
+ return false;
+ assert((!V || V == &NV || isa<UndefValue>(NV)) &&
+ "Use was registered twice for replacement with different values!");
+ V = &NV;
+ return true;
+ }
+
+ /// Helper function to replace all uses of \p V with \p NV. Return true if
+ /// there is any change.
+ bool changeValueAfterManifest(Value &V, Value &NV) {
+ bool Changed = false;
+ for (auto &U : V.uses())
+ Changed |= changeUseAfterManifest(U, NV);
+
+ return Changed;
+ }
+
+ /// Get pointer operand of memory accessing instruction. If \p I is
+ /// not a memory accessing instruction, return nullptr. If \p AllowVolatile,
+ /// is set to false and the instruction is volatile, return nullptr.
+ static const Value *getPointerOperand(const Instruction *I,
+ bool AllowVolatile) {
+ if (auto *LI = dyn_cast<LoadInst>(I)) {
+ if (!AllowVolatile && LI->isVolatile())
+ return nullptr;
+ return LI->getPointerOperand();
+ }
+
+ if (auto *SI = dyn_cast<StoreInst>(I)) {
+ if (!AllowVolatile && SI->isVolatile())
+ return nullptr;
+ return SI->getPointerOperand();
+ }
+
+ if (auto *CXI = dyn_cast<AtomicCmpXchgInst>(I)) {
+ if (!AllowVolatile && CXI->isVolatile())
+ return nullptr;
+ return CXI->getPointerOperand();
+ }
+
+ if (auto *RMWI = dyn_cast<AtomicRMWInst>(I)) {
+ if (!AllowVolatile && RMWI->isVolatile())
+ return nullptr;
+ return RMWI->getPointerOperand();
+ }
+
+ return nullptr;
+ }
+
+ /// Record that \p I is to be replaced with `unreachable` after information
+ /// was manifested.
+ void changeToUnreachableAfterManifest(Instruction *I) {
+ ToBeChangedToUnreachableInsts.insert(I);
+ }
+
+ /// Record that \p II has at least one dead successor block. This information
+ /// is used, e.g., to replace \p II with a call, after information was
+ /// manifested.
+ void registerInvokeWithDeadSuccessor(InvokeInst &II) {
+ InvokeWithDeadSuccessor.push_back(&II);
+ }
+
+ /// Record that \p I is deleted after information was manifested. This also
+ /// triggers deletion of trivially dead istructions.
void deleteAfterManifest(Instruction &I) { ToBeDeletedInsts.insert(&I); }
- /// Record that \p BB is deleted after information was manifested.
+ /// Record that \p BB is deleted after information was manifested. This also
+ /// triggers deletion of trivially dead istructions.
void deleteAfterManifest(BasicBlock &BB) { ToBeDeletedBlocks.insert(&BB); }
/// Record that \p F is deleted after information was manifested.
@@ -839,6 +889,104 @@ struct Attributor {
/// If \p LivenessAA is not provided it is queried.
bool isAssumedDead(const AbstractAttribute &AA, const AAIsDead *LivenessAA);
+ /// Check \p Pred on all (transitive) uses of \p V.
+ ///
+ /// This method will evaluate \p Pred on all (transitive) uses of the
+ /// associated value and return true if \p Pred holds every time.
+ bool checkForAllUses(const function_ref<bool(const Use &, bool &)> &Pred,
+ const AbstractAttribute &QueryingAA, const Value &V);
+
+ /// Helper struct used in the communication between an abstract attribute (AA)
+ /// that wants to change the signature of a function and the Attributor which
+ /// applies the changes. The struct is partially initialized with the
+ /// information from the AA (see the constructor). All other members are
+ /// provided by the Attributor prior to invoking any callbacks.
+ struct ArgumentReplacementInfo {
+ /// Callee repair callback type
+ ///
+ /// The function repair callback is invoked once to rewire the replacement
+ /// arguments in the body of the new function. The argument replacement info
+ /// is passed, as build from the registerFunctionSignatureRewrite call, as
+ /// well as the replacement function and an iteratore to the first
+ /// replacement argument.
+ using CalleeRepairCBTy = std::function<void(
+ const ArgumentReplacementInfo &, Function &, Function::arg_iterator)>;
+
+ /// Abstract call site (ACS) repair callback type
+ ///
+ /// The abstract call site repair callback is invoked once on every abstract
+ /// call site of the replaced function (\see ReplacedFn). The callback needs
+ /// to provide the operands for the call to the new replacement function.
+ /// The number and type of the operands appended to the provided vector
+ /// (second argument) is defined by the number and types determined through
+ /// the replacement type vector (\see ReplacementTypes). The first argument
+ /// is the ArgumentReplacementInfo object registered with the Attributor
+ /// through the registerFunctionSignatureRewrite call.
+ using ACSRepairCBTy =
+ std::function<void(const ArgumentReplacementInfo &, AbstractCallSite,
+ SmallVectorImpl<Value *> &)>;
+
+ /// Simple getters, see the corresponding members for details.
+ ///{
+
+ Attributor &getAttributor() const { return A; }
+ const Function &getReplacedFn() const { return ReplacedFn; }
+ const Argument &getReplacedArg() const { return ReplacedArg; }
+ unsigned getNumReplacementArgs() const { return ReplacementTypes.size(); }
+ const SmallVectorImpl<Type *> &getReplacementTypes() const {
+ return ReplacementTypes;
+ }
+
+ ///}
+
+ private:
+ /// Constructor that takes the argument to be replaced, the types of
+ /// the replacement arguments, as well as callbacks to repair the call sites
+ /// and new function after the replacement happened.
+ ArgumentReplacementInfo(Attributor &A, Argument &Arg,
+ ArrayRef<Type *> ReplacementTypes,
+ CalleeRepairCBTy &&CalleeRepairCB,
+ ACSRepairCBTy &&ACSRepairCB)
+ : A(A), ReplacedFn(*Arg.getParent()), ReplacedArg(Arg),
+ ReplacementTypes(ReplacementTypes.begin(), ReplacementTypes.end()),
+ CalleeRepairCB(std::move(CalleeRepairCB)),
+ ACSRepairCB(std::move(ACSRepairCB)) {}
+
+ /// Reference to the attributor to allow access from the callbacks.
+ Attributor &A;
+
+ /// The "old" function replaced by ReplacementFn.
+ const Function &ReplacedFn;
+
+ /// The "old" argument replaced by new ones defined via ReplacementTypes.
+ const Argument &ReplacedArg;
+
+ /// The types of the arguments replacing ReplacedArg.
+ const SmallVector<Type *, 8> ReplacementTypes;
+
+ /// Callee repair callback, see CalleeRepairCBTy.
+ const CalleeRepairCBTy CalleeRepairCB;
+
+ /// Abstract call site (ACS) repair callback, see ACSRepairCBTy.
+ const ACSRepairCBTy ACSRepairCB;
+
+ /// Allow access to the private members from the Attributor.
+ friend struct Attributor;
+ };
+
+ /// Register a rewrite for a function signature.
+ ///
+ /// The argument \p Arg is replaced with new ones defined by the number,
+ /// order, and types in \p ReplacementTypes. The rewiring at the call sites is
+ /// done through \p ACSRepairCB and at the callee site through
+ /// \p CalleeRepairCB.
+ ///
+ /// \returns True, if the replacement was registered, false otherwise.
+ bool registerFunctionSignatureRewrite(
+ Argument &Arg, ArrayRef<Type *> ReplacementTypes,
+ ArgumentReplacementInfo::CalleeRepairCBTy &&CalleeRepairCB,
+ ArgumentReplacementInfo::ACSRepairCBTy &&ACSRepairCB);
+
/// Check \p Pred on all function call sites.
///
/// This method will evaluate \p Pred on call sites and return
@@ -913,7 +1061,8 @@ private:
template <typename AAType>
const AAType &getOrCreateAAFor(const IRPosition &IRP,
const AbstractAttribute *QueryingAA = nullptr,
- bool TrackDependence = false) {
+ bool TrackDependence = false,
+ DepClassTy DepClass = DepClassTy::OPTIONAL) {
if (const AAType *AAPtr =
lookupAAFor<AAType>(IRP, QueryingAA, TrackDependence))
return *AAPtr;
@@ -941,7 +1090,8 @@ private:
AA.update(*this);
if (TrackDependence && AA.getState().isValidState())
- QueryMap[&AA].insert(const_cast<AbstractAttribute *>(QueryingAA));
+ recordDependence(AA, const_cast<AbstractAttribute &>(*QueryingAA),
+ DepClass);
return AA;
}
@@ -949,7 +1099,8 @@ private:
template <typename AAType>
const AAType *lookupAAFor(const IRPosition &IRP,
const AbstractAttribute *QueryingAA = nullptr,
- bool TrackDependence = false) {
+ bool TrackDependence = false,
+ DepClassTy DepClass = DepClassTy::OPTIONAL) {
static_assert(std::is_base_of<AbstractAttribute, AAType>::value,
"Cannot query an attribute with a type not derived from "
"'AbstractAttribute'!");
@@ -963,12 +1114,18 @@ private:
KindToAbstractAttributeMap.lookup(&AAType::ID))) {
// Do not register a dependence on an attribute with an invalid state.
if (TrackDependence && AA->getState().isValidState())
- QueryMap[AA].insert(const_cast<AbstractAttribute *>(QueryingAA));
+ recordDependence(*AA, const_cast<AbstractAttribute &>(*QueryingAA),
+ DepClass);
return AA;
}
return nullptr;
}
+ /// Apply all requested function signature rewrites
+ /// (\see registerFunctionSignatureRewrite) and return Changed if the module
+ /// was altered.
+ ChangeStatus rewriteFunctionSignatures();
+
/// The set of all abstract attributes.
///{
using AAVector = SmallVector<AbstractAttribute *, 64>;
@@ -987,14 +1144,29 @@ private:
/// A map from abstract attributes to the ones that queried them through calls
/// to the getAAFor<...>(...) method.
///{
- using QueryMapTy =
- MapVector<const AbstractAttribute *, SetVector<AbstractAttribute *>>;
+ struct QueryMapValueTy {
+ /// Set of abstract attributes which were used but not necessarily required
+ /// for a potential optimistic state.
+ SetVector<AbstractAttribute *> OptionalAAs;
+
+ /// Set of abstract attributes which were used and which were necessarily
+ /// required for any potential optimistic state.
+ SetVector<AbstractAttribute *> RequiredAAs;
+ };
+ using QueryMapTy = MapVector<const AbstractAttribute *, QueryMapValueTy>;
QueryMapTy QueryMap;
///}
+ /// Map to remember all requested signature changes (= argument replacements).
+ DenseMap<Function *, SmallVector<ArgumentReplacementInfo *, 8>>
+ ArgumentReplacementMap;
+
/// The information cache that holds pre-processed (LLVM-IR) information.
InformationCache &InfoCache;
+ /// Set if the attribute currently updated did query a non-fix attribute.
+ bool QueriedNonFixAA;
+
/// Number of iterations until the dependences between abstract attributes are
/// recomputed.
const unsigned DepRecomputeInterval;
@@ -1005,6 +1177,16 @@ private:
/// A set to remember the functions we already assume to be live and visited.
DenseSet<const Function *> VisitedFunctions;
+ /// Uses we replace with a new value after manifest is done. We will remove
+ /// then trivially dead instructions as well.
+ DenseMap<Use *, Value *> ToBeChangedUses;
+
+ /// Instructions we replace with `unreachable` insts after manifest is done.
+ SmallDenseSet<WeakVH, 16> ToBeChangedToUnreachableInsts;
+
+ /// Invoke instructions with at least a single dead successor block.
+ SmallVector<WeakVH, 16> InvokeWithDeadSuccessor;
+
/// Functions, blocks, and instructions we delete after manifest is done.
///
///{
@@ -1027,9 +1209,10 @@ private:
///
/// All methods need to be implemented by the subclass. For the common use case,
/// a single boolean state or a bit-encoded state, the BooleanState and
-/// IntegerState classes are already provided. An abstract attribute can inherit
-/// from them to get the abstract state interface and additional methods to
-/// directly modify the state based if needed. See the class comments for help.
+/// {Inc,Dec,Bit}IntegerState classes are already provided. An abstract
+/// attribute can inherit from them to get the abstract state interface and
+/// additional methods to directly modify the state based if needed. See the
+/// class comments for help.
struct AbstractState {
virtual ~AbstractState() {}
@@ -1068,15 +1251,15 @@ struct AbstractState {
/// force/inidicate a fixpoint. If an optimistic one is indicated, the known
/// state will catch up with the assumed one, for a pessimistic fixpoint it is
/// the other way around.
-struct IntegerState : public AbstractState {
- /// Underlying integer type, we assume 32 bits to be enough.
- using base_t = uint32_t;
+template <typename base_ty, base_ty BestState, base_ty WorstState>
+struct IntegerStateBase : public AbstractState {
+ using base_t = base_ty;
- /// Initialize the (best) state.
- IntegerState(base_t BestState = ~0) : Assumed(BestState) {}
+ /// Return the best possible representable state.
+ static constexpr base_t getBestState() { return BestState; }
/// Return the worst possible representable state.
- static constexpr base_t getWorstState() { return 0; }
+ static constexpr base_t getWorstState() { return WorstState; }
/// See AbstractState::isValidState()
/// NOTE: For now we simply pretend that the worst possible state is invalid.
@@ -1103,117 +1286,338 @@ struct IntegerState : public AbstractState {
/// Return the assumed state encoding.
base_t getAssumed() const { return Assumed; }
+ /// Equality for IntegerStateBase.
+ bool
+ operator==(const IntegerStateBase<base_t, BestState, WorstState> &R) const {
+ return this->getAssumed() == R.getAssumed() &&
+ this->getKnown() == R.getKnown();
+ }
+
+ /// Inequality for IntegerStateBase.
+ bool
+ operator!=(const IntegerStateBase<base_t, BestState, WorstState> &R) const {
+ return !(*this == R);
+ }
+
+ /// "Clamp" this state with \p R. The result is subtype dependent but it is
+ /// intended that only information assumed in both states will be assumed in
+ /// this one afterwards.
+ void operator^=(const IntegerStateBase<base_t, BestState, WorstState> &R) {
+ handleNewAssumedValue(R.getAssumed());
+ }
+
+ void operator|=(const IntegerStateBase<base_t, BestState, WorstState> &R) {
+ joinOR(R.getAssumed(), R.getKnown());
+ }
+
+ void operator&=(const IntegerStateBase<base_t, BestState, WorstState> &R) {
+ joinAND(R.getAssumed(), R.getKnown());
+ }
+
+protected:
+ /// Handle a new assumed value \p Value. Subtype dependent.
+ virtual void handleNewAssumedValue(base_t Value) = 0;
+
+ /// Handle a new known value \p Value. Subtype dependent.
+ virtual void handleNewKnownValue(base_t Value) = 0;
+
+ /// Handle a value \p Value. Subtype dependent.
+ virtual void joinOR(base_t AssumedValue, base_t KnownValue) = 0;
+
+ /// Handle a new assumed value \p Value. Subtype dependent.
+ virtual void joinAND(base_t AssumedValue, base_t KnownValue) = 0;
+
+ /// The known state encoding in an integer of type base_t.
+ base_t Known = getWorstState();
+
+ /// The assumed state encoding in an integer of type base_t.
+ base_t Assumed = getBestState();
+};
+
+/// Specialization of the integer state for a bit-wise encoding.
+template <typename base_ty = uint32_t, base_ty BestState = ~base_ty(0),
+ base_ty WorstState = 0>
+struct BitIntegerState
+ : public IntegerStateBase<base_ty, BestState, WorstState> {
+ using base_t = base_ty;
+
/// Return true if the bits set in \p BitsEncoding are "known bits".
bool isKnown(base_t BitsEncoding) const {
- return (Known & BitsEncoding) == BitsEncoding;
+ return (this->Known & BitsEncoding) == BitsEncoding;
}
/// Return true if the bits set in \p BitsEncoding are "assumed bits".
bool isAssumed(base_t BitsEncoding) const {
- return (Assumed & BitsEncoding) == BitsEncoding;
+ return (this->Assumed & BitsEncoding) == BitsEncoding;
}
/// Add the bits in \p BitsEncoding to the "known bits".
- IntegerState &addKnownBits(base_t Bits) {
+ BitIntegerState &addKnownBits(base_t Bits) {
// Make sure we never miss any "known bits".
- Assumed |= Bits;
- Known |= Bits;
+ this->Assumed |= Bits;
+ this->Known |= Bits;
return *this;
}
/// Remove the bits in \p BitsEncoding from the "assumed bits" if not known.
- IntegerState &removeAssumedBits(base_t BitsEncoding) {
- // Make sure we never loose any "known bits".
- Assumed = (Assumed & ~BitsEncoding) | Known;
- return *this;
+ BitIntegerState &removeAssumedBits(base_t BitsEncoding) {
+ return intersectAssumedBits(~BitsEncoding);
}
/// Remove the bits in \p BitsEncoding from the "known bits".
- IntegerState &removeKnownBits(base_t BitsEncoding) {
- Known = (Known & ~BitsEncoding);
+ BitIntegerState &removeKnownBits(base_t BitsEncoding) {
+ this->Known = (this->Known & ~BitsEncoding);
return *this;
}
/// Keep only "assumed bits" also set in \p BitsEncoding but all known ones.
- IntegerState &intersectAssumedBits(base_t BitsEncoding) {
+ BitIntegerState &intersectAssumedBits(base_t BitsEncoding) {
// Make sure we never loose any "known bits".
- Assumed = (Assumed & BitsEncoding) | Known;
+ this->Assumed = (this->Assumed & BitsEncoding) | this->Known;
return *this;
}
+private:
+ void handleNewAssumedValue(base_t Value) override {
+ intersectAssumedBits(Value);
+ }
+ void handleNewKnownValue(base_t Value) override { addKnownBits(Value); }
+ void joinOR(base_t AssumedValue, base_t KnownValue) override {
+ this->Known |= KnownValue;
+ this->Assumed |= AssumedValue;
+ }
+ void joinAND(base_t AssumedValue, base_t KnownValue) override {
+ this->Known &= KnownValue;
+ this->Assumed &= AssumedValue;
+ }
+};
+
+/// Specialization of the integer state for an increasing value, hence ~0u is
+/// the best state and 0 the worst.
+template <typename base_ty = uint32_t, base_ty BestState = ~base_ty(0),
+ base_ty WorstState = 0>
+struct IncIntegerState
+ : public IntegerStateBase<base_ty, BestState, WorstState> {
+ using base_t = base_ty;
+
/// Take minimum of assumed and \p Value.
- IntegerState &takeAssumedMinimum(base_t Value) {
+ IncIntegerState &takeAssumedMinimum(base_t Value) {
// Make sure we never loose "known value".
- Assumed = std::max(std::min(Assumed, Value), Known);
+ this->Assumed = std::max(std::min(this->Assumed, Value), this->Known);
return *this;
}
/// Take maximum of known and \p Value.
- IntegerState &takeKnownMaximum(base_t Value) {
+ IncIntegerState &takeKnownMaximum(base_t Value) {
// Make sure we never loose "known value".
- Assumed = std::max(Value, Assumed);
- Known = std::max(Value, Known);
+ this->Assumed = std::max(Value, this->Assumed);
+ this->Known = std::max(Value, this->Known);
return *this;
}
- /// Equality for IntegerState.
- bool operator==(const IntegerState &R) const {
- return this->getAssumed() == R.getAssumed() &&
- this->getKnown() == R.getKnown();
+private:
+ void handleNewAssumedValue(base_t Value) override {
+ takeAssumedMinimum(Value);
+ }
+ void handleNewKnownValue(base_t Value) override { takeKnownMaximum(Value); }
+ void joinOR(base_t AssumedValue, base_t KnownValue) override {
+ this->Known = std::max(this->Known, KnownValue);
+ this->Assumed = std::max(this->Assumed, AssumedValue);
+ }
+ void joinAND(base_t AssumedValue, base_t KnownValue) override {
+ this->Known = std::min(this->Known, KnownValue);
+ this->Assumed = std::min(this->Assumed, AssumedValue);
}
+};
- /// Inequality for IntegerState.
- bool operator!=(const IntegerState &R) const { return !(*this == R); }
+/// Specialization of the integer state for a decreasing value, hence 0 is the
+/// best state and ~0u the worst.
+template <typename base_ty = uint32_t>
+struct DecIntegerState : public IntegerStateBase<base_ty, 0, ~base_ty(0)> {
+ using base_t = base_ty;
- /// "Clamp" this state with \p R. The result is the minimum of the assumed
- /// information but not less than what was known before.
- ///
- /// TODO: Consider replacing the operator with a call or using it only when
- /// we can also take the maximum of the known information, thus when
- /// \p R is not dependent on additional assumed state.
- IntegerState operator^=(const IntegerState &R) {
- takeAssumedMinimum(R.Assumed);
+ /// Take maximum of assumed and \p Value.
+ DecIntegerState &takeAssumedMaximum(base_t Value) {
+ // Make sure we never loose "known value".
+ this->Assumed = std::min(std::max(this->Assumed, Value), this->Known);
return *this;
}
- /// "Clamp" this state with \p R. The result is the maximum of the known
- /// information but not more than what was assumed before.
- IntegerState operator+=(const IntegerState &R) {
- takeKnownMaximum(R.Known);
+ /// Take minimum of known and \p Value.
+ DecIntegerState &takeKnownMinimum(base_t Value) {
+ // Make sure we never loose "known value".
+ this->Assumed = std::min(Value, this->Assumed);
+ this->Known = std::min(Value, this->Known);
return *this;
}
- /// Make this the minimum, known and assumed, of this state and \p R.
- IntegerState operator&=(const IntegerState &R) {
- Known = std::min(Known, R.Known);
- Assumed = std::min(Assumed, R.Assumed);
- return *this;
+private:
+ void handleNewAssumedValue(base_t Value) override {
+ takeAssumedMaximum(Value);
+ }
+ void handleNewKnownValue(base_t Value) override { takeKnownMinimum(Value); }
+ void joinOR(base_t AssumedValue, base_t KnownValue) override {
+ this->Assumed = std::min(this->Assumed, KnownValue);
+ this->Assumed = std::min(this->Assumed, AssumedValue);
+ }
+ void joinAND(base_t AssumedValue, base_t KnownValue) override {
+ this->Assumed = std::max(this->Assumed, KnownValue);
+ this->Assumed = std::max(this->Assumed, AssumedValue);
}
+};
- /// Make this the maximum, known and assumed, of this state and \p R.
- IntegerState operator|=(const IntegerState &R) {
- Known = std::max(Known, R.Known);
- Assumed = std::max(Assumed, R.Assumed);
- return *this;
+/// Simple wrapper for a single bit (boolean) state.
+struct BooleanState : public IntegerStateBase<bool, 1, 0> {
+ using base_t = IntegerStateBase::base_t;
+
+ /// Set the assumed value to \p Value but never below the known one.
+ void setAssumed(bool Value) { Assumed &= (Known | Value); }
+
+ /// Set the known and asssumed value to \p Value.
+ void setKnown(bool Value) {
+ Known |= Value;
+ Assumed |= Value;
}
-private:
- /// The known state encoding in an integer of type base_t.
- base_t Known = getWorstState();
+ /// Return true if the state is assumed to hold.
+ bool isAssumed() const { return getAssumed(); }
- /// The assumed state encoding in an integer of type base_t.
- base_t Assumed;
-};
+ /// Return true if the state is known to hold.
+ bool isKnown() const { return getKnown(); }
-/// Simple wrapper for a single bit (boolean) state.
-struct BooleanState : public IntegerState {
- BooleanState() : IntegerState(1){};
+private:
+ void handleNewAssumedValue(base_t Value) override {
+ if (!Value)
+ Assumed = Known;
+ }
+ void handleNewKnownValue(base_t Value) override {
+ if (Value)
+ Known = (Assumed = Value);
+ }
+ void joinOR(base_t AssumedValue, base_t KnownValue) override {
+ Known |= KnownValue;
+ Assumed |= AssumedValue;
+ }
+ void joinAND(base_t AssumedValue, base_t KnownValue) override {
+ Known &= KnownValue;
+ Assumed &= AssumedValue;
+ }
};
+/// State for an integer range.
+struct IntegerRangeState : public AbstractState {
+
+ /// Bitwidth of the associated value.
+ uint32_t BitWidth;
+
+ /// State representing assumed range, initially set to empty.
+ ConstantRange Assumed;
+
+ /// State representing known range, initially set to [-inf, inf].
+ ConstantRange Known;
+
+ IntegerRangeState(uint32_t BitWidth)
+ : BitWidth(BitWidth), Assumed(ConstantRange::getEmpty(BitWidth)),
+ Known(ConstantRange::getFull(BitWidth)) {}
+
+ /// Return the worst possible representable state.
+ static ConstantRange getWorstState(uint32_t BitWidth) {
+ return ConstantRange::getFull(BitWidth);
+ }
+
+ /// Return the best possible representable state.
+ static ConstantRange getBestState(uint32_t BitWidth) {
+ return ConstantRange::getEmpty(BitWidth);
+ }
+
+ /// Return associated values' bit width.
+ uint32_t getBitWidth() const { return BitWidth; }
+
+ /// See AbstractState::isValidState()
+ bool isValidState() const override {
+ return BitWidth > 0 && !Assumed.isFullSet();
+ }
+
+ /// See AbstractState::isAtFixpoint()
+ bool isAtFixpoint() const override { return Assumed == Known; }
+
+ /// See AbstractState::indicateOptimisticFixpoint(...)
+ ChangeStatus indicateOptimisticFixpoint() override {
+ Known = Assumed;
+ return ChangeStatus::CHANGED;
+ }
+
+ /// See AbstractState::indicatePessimisticFixpoint(...)
+ ChangeStatus indicatePessimisticFixpoint() override {
+ Assumed = Known;
+ return ChangeStatus::CHANGED;
+ }
+
+ /// Return the known state encoding
+ ConstantRange getKnown() const { return Known; }
+
+ /// Return the assumed state encoding.
+ ConstantRange getAssumed() const { return Assumed; }
+
+ /// Unite assumed range with the passed state.
+ void unionAssumed(const ConstantRange &R) {
+ // Don't loose a known range.
+ Assumed = Assumed.unionWith(R).intersectWith(Known);
+ }
+
+ /// See IntegerRangeState::unionAssumed(..).
+ void unionAssumed(const IntegerRangeState &R) {
+ unionAssumed(R.getAssumed());
+ }
+
+ /// Unite known range with the passed state.
+ void unionKnown(const ConstantRange &R) {
+ // Don't loose a known range.
+ Known = Known.unionWith(R);
+ Assumed = Assumed.unionWith(Known);
+ }
+
+ /// See IntegerRangeState::unionKnown(..).
+ void unionKnown(const IntegerRangeState &R) { unionKnown(R.getKnown()); }
+
+ /// Intersect known range with the passed state.
+ void intersectKnown(const ConstantRange &R) {
+ Assumed = Assumed.intersectWith(R);
+ Known = Known.intersectWith(R);
+ }
+
+ /// See IntegerRangeState::intersectKnown(..).
+ void intersectKnown(const IntegerRangeState &R) {
+ intersectKnown(R.getKnown());
+ }
+
+ /// Equality for IntegerRangeState.
+ bool operator==(const IntegerRangeState &R) const {
+ return getAssumed() == R.getAssumed() && getKnown() == R.getKnown();
+ }
+
+ /// "Clamp" this state with \p R. The result is subtype dependent but it is
+ /// intended that only information assumed in both states will be assumed in
+ /// this one afterwards.
+ IntegerRangeState operator^=(const IntegerRangeState &R) {
+ // NOTE: `^=` operator seems like `intersect` but in this case, we need to
+ // take `union`.
+ unionAssumed(R);
+ return *this;
+ }
+
+ IntegerRangeState operator&=(const IntegerRangeState &R) {
+ // NOTE: `&=` operator seems like `intersect` but in this case, we need to
+ // take `union`.
+ unionKnown(R);
+ unionAssumed(R);
+ return *this;
+ }
+};
/// Helper struct necessary as the modular build fails if the virtual method
/// IRAttribute::manifest is defined in the Attributor.cpp.
struct IRAttributeManifest {
- static ChangeStatus manifestAttrs(Attributor &A, IRPosition &IRP,
+ static ChangeStatus manifestAttrs(Attributor &A, const IRPosition &IRP,
const ArrayRef<Attribute> &DeducedAttrs);
};
@@ -1238,12 +1642,12 @@ struct IRAttribute : public IRPosition, public Base {
/// See AbstractAttribute::initialize(...).
virtual void initialize(Attributor &A) override {
- if (hasAttr(getAttrKind())) {
+ const IRPosition &IRP = this->getIRPosition();
+ if (isa<UndefValue>(IRP.getAssociatedValue()) || hasAttr(getAttrKind())) {
this->getState().indicateOptimisticFixpoint();
return;
}
- const IRPosition &IRP = this->getIRPosition();
bool IsFnInterface = IRP.isFnInterfaceKind();
const Function *FnScope = IRP.getAnchorScope();
// TODO: Not all attributes require an exact definition. Find a way to
@@ -1259,6 +1663,8 @@ struct IRAttribute : public IRPosition, public Base {
/// See AbstractAttribute::manifest(...).
ChangeStatus manifest(Attributor &A) override {
+ if (isa<UndefValue>(getIRPosition().getAssociatedValue()))
+ return ChangeStatus::UNCHANGED;
SmallVector<Attribute, 4> DeducedAttrs;
getDeducedAttributes(getAnchorValue().getContext(), DeducedAttrs);
return IRAttributeManifest::manifestAttrs(A, getIRPosition(), DeducedAttrs);
@@ -1274,11 +1680,7 @@ struct IRAttribute : public IRPosition, public Base {
}
/// Return an IR position, see struct IRPosition.
- ///
- ///{
- IRPosition &getIRPosition() override { return *this; }
const IRPosition &getIRPosition() const override { return *this; }
- ///}
};
/// Base struct for all "concrete attribute" deductions.
@@ -1383,9 +1785,6 @@ protected:
/// add statistics for them.
virtual void trackStatistics() const = 0;
- /// Return an IR position, see struct IRPosition.
- virtual IRPosition &getIRPosition() = 0;
-
/// The actual update/transfer function which has to be implemented by the
/// derived classes.
///
@@ -1404,7 +1803,11 @@ raw_ostream &operator<<(raw_ostream &OS, ChangeStatus S);
raw_ostream &operator<<(raw_ostream &OS, IRPosition::Kind);
raw_ostream &operator<<(raw_ostream &OS, const IRPosition &);
raw_ostream &operator<<(raw_ostream &OS, const AbstractState &State);
-raw_ostream &operator<<(raw_ostream &OS, const IntegerState &S);
+template <typename base_ty, base_ty BestState, base_ty WorstState>
+raw_ostream &
+operator<<(raw_ostream &OS,
+ const IntegerStateBase<base_ty, BestState, WorstState> &State);
+raw_ostream &operator<<(raw_ostream &OS, const IntegerRangeState &State);
///}
struct AttributorPass : public PassInfoMixin<AttributorPass> {
@@ -1550,6 +1953,66 @@ struct AAWillReturn
static const char ID;
};
+/// An abstract attribute for undefined behavior.
+struct AAUndefinedBehavior
+ : public StateWrapper<BooleanState, AbstractAttribute>,
+ public IRPosition {
+ AAUndefinedBehavior(const IRPosition &IRP) : IRPosition(IRP) {}
+
+ /// Return true if "undefined behavior" is assumed.
+ bool isAssumedToCauseUB() const { return getAssumed(); }
+
+ /// Return true if "undefined behavior" is assumed for a specific instruction.
+ virtual bool isAssumedToCauseUB(Instruction *I) const = 0;
+
+ /// Return true if "undefined behavior" is known.
+ bool isKnownToCauseUB() const { return getKnown(); }
+
+ /// Return true if "undefined behavior" is known for a specific instruction.
+ virtual bool isKnownToCauseUB(Instruction *I) const = 0;
+
+ /// Return an IR position, see struct IRPosition.
+ const IRPosition &getIRPosition() const override { return *this; }
+
+ /// Create an abstract attribute view for the position \p IRP.
+ static AAUndefinedBehavior &createForPosition(const IRPosition &IRP,
+ Attributor &A);
+
+ /// Unique ID (due to the unique address)
+ static const char ID;
+};
+
+/// An abstract interface to determine reachability of point A to B.
+struct AAReachability : public StateWrapper<BooleanState, AbstractAttribute>,
+ public IRPosition {
+ AAReachability(const IRPosition &IRP) : IRPosition(IRP) {}
+
+ /// Returns true if 'From' instruction is assumed to reach, 'To' instruction.
+ /// Users should provide two positions they are interested in, and the class
+ /// determines (and caches) reachability.
+ bool isAssumedReachable(const Instruction *From,
+ const Instruction *To) const {
+ return true;
+ }
+
+ /// Returns true if 'From' instruction is known to reach, 'To' instruction.
+ /// Users should provide two positions they are interested in, and the class
+ /// determines (and caches) reachability.
+ bool isKnownReachable(const Instruction *From, const Instruction *To) const {
+ return true;
+ }
+
+ /// Return an IR position, see struct IRPosition.
+ const IRPosition &getIRPosition() const override { return *this; }
+
+ /// Create an abstract attribute view for the position \p IRP.
+ static AAReachability &createForPosition(const IRPosition &IRP,
+ Attributor &A);
+
+ /// Unique ID (due to the unique address)
+ static const char ID;
+};
+
/// An abstract interface for all noalias attributes.
struct AANoAlias
: public IRAttribute<Attribute::NoAlias,
@@ -1612,6 +2075,9 @@ struct AAIsDead : public StateWrapper<BooleanState, AbstractAttribute>,
public IRPosition {
AAIsDead(const IRPosition &IRP) : IRPosition(IRP) {}
+ /// Returns true if the underlying value is assumed dead.
+ virtual bool isAssumedDead() const = 0;
+
/// Returns true if \p BB is assumed dead.
virtual bool isAssumedDead(const BasicBlock *BB) const = 0;
@@ -1639,11 +2105,7 @@ struct AAIsDead : public StateWrapper<BooleanState, AbstractAttribute>,
}
/// Return an IR position, see struct IRPosition.
- ///
- ///{
- IRPosition &getIRPosition() override { return *this; }
const IRPosition &getIRPosition() const override { return *this; }
- ///}
/// Create an abstract attribute view for the position \p IRP.
static AAIsDead &createForPosition(const IRPosition &IRP, Attributor &A);
@@ -1656,7 +2118,45 @@ struct AAIsDead : public StateWrapper<BooleanState, AbstractAttribute>,
struct DerefState : AbstractState {
/// State representing for dereferenceable bytes.
- IntegerState DerefBytesState;
+ IncIntegerState<> DerefBytesState;
+
+ /// Map representing for accessed memory offsets and sizes.
+ /// A key is Offset and a value is size.
+ /// If there is a load/store instruction something like,
+ /// p[offset] = v;
+ /// (offset, sizeof(v)) will be inserted to this map.
+ /// std::map is used because we want to iterate keys in ascending order.
+ std::map<int64_t, uint64_t> AccessedBytesMap;
+
+ /// Helper function to calculate dereferenceable bytes from current known
+ /// bytes and accessed bytes.
+ ///
+ /// int f(int *A){
+ /// *A = 0;
+ /// *(A+2) = 2;
+ /// *(A+1) = 1;
+ /// *(A+10) = 10;
+ /// }
+ /// ```
+ /// In that case, AccessedBytesMap is `{0:4, 4:4, 8:4, 40:4}`.
+ /// AccessedBytesMap is std::map so it is iterated in accending order on
+ /// key(Offset). So KnownBytes will be updated like this:
+ ///
+ /// |Access | KnownBytes
+ /// |(0, 4)| 0 -> 4
+ /// |(4, 4)| 4 -> 8
+ /// |(8, 4)| 8 -> 12
+ /// |(40, 4) | 12 (break)
+ void computeKnownDerefBytesFromAccessedMap() {
+ int64_t KnownBytes = DerefBytesState.getKnown();
+ for (auto &Access : AccessedBytesMap) {
+ if (KnownBytes < Access.first)
+ break;
+ KnownBytes = std::max(KnownBytes, Access.first + (int64_t)Access.second);
+ }
+
+ DerefBytesState.takeKnownMaximum(KnownBytes);
+ }
/// State representing that whether the value is globaly dereferenceable.
BooleanState GlobalState;
@@ -1687,6 +2187,9 @@ struct DerefState : AbstractState {
/// Update known dereferenceable bytes.
void takeKnownDerefBytesMaximum(uint64_t Bytes) {
DerefBytesState.takeKnownMaximum(Bytes);
+
+ // Known bytes might increase.
+ computeKnownDerefBytesFromAccessedMap();
}
/// Update assumed dereferenceable bytes.
@@ -1694,37 +2197,38 @@ struct DerefState : AbstractState {
DerefBytesState.takeAssumedMinimum(Bytes);
}
+ /// Add accessed bytes to the map.
+ void addAccessedBytes(int64_t Offset, uint64_t Size) {
+ AccessedBytesMap[Offset] = std::max(AccessedBytesMap[Offset], Size);
+
+ // Known bytes might increase.
+ computeKnownDerefBytesFromAccessedMap();
+ }
+
/// Equality for DerefState.
bool operator==(const DerefState &R) {
return this->DerefBytesState == R.DerefBytesState &&
this->GlobalState == R.GlobalState;
}
- /// Inequality for IntegerState.
+ /// Inequality for DerefState.
bool operator!=(const DerefState &R) { return !(*this == R); }
- /// See IntegerState::operator^=
+ /// See IntegerStateBase::operator^=
DerefState operator^=(const DerefState &R) {
DerefBytesState ^= R.DerefBytesState;
GlobalState ^= R.GlobalState;
return *this;
}
- /// See IntegerState::operator+=
- DerefState operator+=(const DerefState &R) {
- DerefBytesState += R.DerefBytesState;
- GlobalState += R.GlobalState;
- return *this;
- }
-
- /// See IntegerState::operator&=
+ /// See IntegerStateBase::operator&=
DerefState operator&=(const DerefState &R) {
DerefBytesState &= R.DerefBytesState;
GlobalState &= R.GlobalState;
return *this;
}
- /// See IntegerState::operator|=
+ /// See IntegerStateBase::operator|=
DerefState operator|=(const DerefState &R) {
DerefBytesState |= R.DerefBytesState;
GlobalState |= R.GlobalState;
@@ -1777,16 +2281,18 @@ struct AADereferenceable
static const char ID;
};
+using AAAlignmentStateType =
+ IncIntegerState<uint32_t, /* maximal alignment */ 1U << 29, 0>;
/// An abstract interface for all align attributes.
-struct AAAlign
- : public IRAttribute<Attribute::Alignment,
- StateWrapper<IntegerState, AbstractAttribute>> {
+struct AAAlign : public IRAttribute<
+ Attribute::Alignment,
+ StateWrapper<AAAlignmentStateType, AbstractAttribute>> {
AAAlign(const IRPosition &IRP) : IRAttribute(IRP) {}
/// Return assumed alignment.
unsigned getAssumedAlign() const { return getAssumed(); }
- /// Return known alignemnt.
+ /// Return known alignment.
unsigned getKnownAlign() const { return getKnown(); }
/// Create an abstract attribute view for the position \p IRP.
@@ -1798,8 +2304,9 @@ struct AAAlign
/// An abstract interface for all nocapture attributes.
struct AANoCapture
- : public IRAttribute<Attribute::NoCapture,
- StateWrapper<IntegerState, AbstractAttribute>> {
+ : public IRAttribute<
+ Attribute::NoCapture,
+ StateWrapper<BitIntegerState<uint16_t, 7, 0>, AbstractAttribute>> {
AANoCapture(const IRPosition &IRP) : IRAttribute(IRP) {}
/// State encoding bits. A set bit in the state means the property holds.
@@ -1852,11 +2359,7 @@ struct AAValueSimplify : public StateWrapper<BooleanState, AbstractAttribute>,
AAValueSimplify(const IRPosition &IRP) : IRPosition(IRP) {}
/// Return an IR position, see struct IRPosition.
- ///
- ///{
- IRPosition &getIRPosition() { return *this; }
const IRPosition &getIRPosition() const { return *this; }
- ///}
/// Return an assumed simplified value if a single candidate is found. If
/// there cannot be one, return original value. If it is not clear yet, return
@@ -1882,11 +2385,7 @@ struct AAHeapToStack : public StateWrapper<BooleanState, AbstractAttribute>,
bool isKnownHeapToStack() const { return getKnown(); }
/// Return an IR position, see struct IRPosition.
- ///
- ///{
- IRPosition &getIRPosition() { return *this; }
const IRPosition &getIRPosition() const { return *this; }
- ///}
/// Create an abstract attribute view for the position \p IRP.
static AAHeapToStack &createForPosition(const IRPosition &IRP, Attributor &A);
@@ -1897,8 +2396,9 @@ struct AAHeapToStack : public StateWrapper<BooleanState, AbstractAttribute>,
/// An abstract interface for all memory related attributes.
struct AAMemoryBehavior
- : public IRAttribute<Attribute::ReadNone,
- StateWrapper<IntegerState, AbstractAttribute>> {
+ : public IRAttribute<
+ Attribute::ReadNone,
+ StateWrapper<BitIntegerState<uint8_t, 3>, AbstractAttribute>> {
AAMemoryBehavior(const IRPosition &IRP) : IRAttribute(IRP) {}
/// State encoding bits. A set bit in the state means the property holds.
@@ -1943,6 +2443,55 @@ struct AAMemoryBehavior
static const char ID;
};
+/// An abstract interface for range value analysis.
+struct AAValueConstantRange : public IntegerRangeState,
+ public AbstractAttribute,
+ public IRPosition {
+ AAValueConstantRange(const IRPosition &IRP)
+ : IntegerRangeState(
+ IRP.getAssociatedValue().getType()->getIntegerBitWidth()),
+ IRPosition(IRP) {}
+
+ /// Return an IR position, see struct IRPosition.
+ const IRPosition &getIRPosition() const override { return *this; }
+
+ /// See AbstractAttribute::getState(...).
+ IntegerRangeState &getState() override { return *this; }
+ const AbstractState &getState() const override { return *this; }
+
+ /// Create an abstract attribute view for the position \p IRP.
+ static AAValueConstantRange &createForPosition(const IRPosition &IRP,
+ Attributor &A);
+
+ /// Return an assumed range for the assocaited value a program point \p CtxI.
+ /// If \p I is nullptr, simply return an assumed range.
+ virtual ConstantRange
+ getAssumedConstantRange(Attributor &A,
+ const Instruction *CtxI = nullptr) const = 0;
+
+ /// Return a known range for the assocaited value at a program point \p CtxI.
+ /// If \p I is nullptr, simply return a known range.
+ virtual ConstantRange
+ getKnownConstantRange(Attributor &A,
+ const Instruction *CtxI = nullptr) const = 0;
+
+ /// Return an assumed constant for the assocaited value a program point \p
+ /// CtxI.
+ Optional<ConstantInt *>
+ getAssumedConstantInt(Attributor &A, const Instruction *CtxI = nullptr) const {
+ ConstantRange RangeV = getAssumedConstantRange(A, CtxI);
+ if (auto *C = RangeV.getSingleElement())
+ return cast<ConstantInt>(
+ ConstantInt::get(getAssociatedValue().getType(), *C));
+ if (RangeV.isEmptySet())
+ return llvm::None;
+ return nullptr;
+ }
+
+ /// Unique ID (due to the unique address)
+ static const char ID;
+};
+
} // end namespace llvm
#endif // LLVM_TRANSFORMS_IPO_FUNCTIONATTRS_H
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/FunctionImport.h b/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/FunctionImport.h
index bbf270c400af..b4dde7b199ff 100644
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/FunctionImport.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/FunctionImport.h
@@ -98,7 +98,7 @@ public:
using ImportMapTy = StringMap<FunctionsToImportTy>;
/// The set contains an entry for every global value the module exports.
- using ExportSetTy = std::unordered_set<GlobalValue::GUID>;
+ using ExportSetTy = DenseSet<ValueInfo>;
/// A function of this type is used to load modules referenced by the index.
using ModuleLoaderTy =
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/MergeFunctions.h b/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/MergeFunctions.h
new file mode 100644
index 000000000000..822f0fd99188
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/MergeFunctions.h
@@ -0,0 +1,32 @@
+//===- MergeFunctions.h - Merge Identical Functions -------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass transforms simple global variables that never have their address
+// taken. If obviously true, it marks read/write globals as constant, deletes
+// variables only stored to, etc.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_IPO_MERGEFUNCTIONS_H
+#define LLVM_TRANSFORMS_IPO_MERGEFUNCTIONS_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+class Module;
+
+/// Merge identical functions.
+class MergeFunctionsPass : public PassInfoMixin<MergeFunctionsPass> {
+public:
+ PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+};
+
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_IPO_MERGEFUNCTIONS_H
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h b/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h
index 22435e4ed1e5..8af2af7f352f 100644
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h
@@ -251,7 +251,7 @@ void runWholeProgramDevirtOnIndex(
/// devirt target names for any locals that were exported.
void updateIndexWPDForExports(
ModuleSummaryIndex &Summary,
- function_ref<bool(StringRef, GlobalValue::GUID)> isExported,
+ function_ref<bool(StringRef, ValueInfo)> isExported,
std::map<ValueInfo, std::vector<VTableSlotSummary>> &LocalWPDTargetsMap);
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/InstCombine/InstCombine.h b/contrib/llvm-project/llvm/include/llvm/Transforms/InstCombine/InstCombine.h
index 8894d96e591f..d7a6662510d3 100644
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/InstCombine/InstCombine.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/InstCombine/InstCombine.h
@@ -24,13 +24,14 @@ namespace llvm {
class InstCombinePass : public PassInfoMixin<InstCombinePass> {
InstCombineWorklist Worklist;
- bool ExpensiveCombines;
+ const bool ExpensiveCombines;
+ const unsigned MaxIterations;
public:
static StringRef name() { return "InstCombinePass"; }
- explicit InstCombinePass(bool ExpensiveCombines = true)
- : ExpensiveCombines(ExpensiveCombines) {}
+ explicit InstCombinePass(bool ExpensiveCombines = true);
+ explicit InstCombinePass(bool ExpensiveCombines, unsigned MaxIterations);
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
};
@@ -42,14 +43,14 @@ public:
class InstructionCombiningPass : public FunctionPass {
InstCombineWorklist Worklist;
const bool ExpensiveCombines;
+ const unsigned MaxIterations;
public:
static char ID; // Pass identification, replacement for typeid
- InstructionCombiningPass(bool ExpensiveCombines = true)
- : FunctionPass(ID), ExpensiveCombines(ExpensiveCombines) {
- initializeInstructionCombiningPassPass(*PassRegistry::getPassRegistry());
- }
+ explicit InstructionCombiningPass(bool ExpensiveCombines = true);
+ explicit InstructionCombiningPass(bool ExpensiveCombines,
+ unsigned MaxIterations);
void getAnalysisUsage(AnalysisUsage &AU) const override;
bool runOnFunction(Function &F) override;
@@ -68,6 +69,8 @@ public:
// %Z = add int 2, %X
//
FunctionPass *createInstructionCombiningPass(bool ExpensiveCombines = true);
+FunctionPass *createInstructionCombiningPass(bool ExpensiveCombines,
+ unsigned MaxIterations);
}
#endif
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/InstCombine/InstCombineWorklist.h b/contrib/llvm-project/llvm/include/llvm/Transforms/InstCombine/InstCombineWorklist.h
index 6c33bdbafbd2..3ed0a820db10 100644
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/InstCombine/InstCombineWorklist.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/InstCombine/InstCombineWorklist.h
@@ -24,8 +24,8 @@ namespace llvm {
/// InstCombineWorklist - This is the worklist management logic for
/// InstCombine.
class InstCombineWorklist {
- SmallVector<Instruction*, 256> Worklist;
- DenseMap<Instruction*, unsigned> WorklistMap;
+ SmallVector<Instruction *, 256> Worklist;
+ DenseMap<Instruction *, unsigned> WorklistMap;
public:
InstCombineWorklist() = default;
@@ -38,6 +38,9 @@ public:
/// Add - Add the specified instruction to the worklist if it isn't already
/// in it.
void Add(Instruction *I) {
+ assert(I);
+ assert(I->getParent() && "Instruction not inserted yet?");
+
if (WorklistMap.insert(std::make_pair(I, Worklist.size())).second) {
LLVM_DEBUG(dbgs() << "IC: ADD: " << *I << '\n');
Worklist.push_back(I);
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar.h
index f06230b6f366..1f2842836303 100644
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar.h
@@ -18,7 +18,6 @@
namespace llvm {
-class BasicBlockPass;
class Function;
class FunctionPass;
class ModulePass;
@@ -50,13 +49,19 @@ FunctionPass *createSCCPPass();
//===----------------------------------------------------------------------===//
//
// DeadInstElimination - This pass quickly removes trivially dead instructions
-// without modifying the CFG of the function. It is a BasicBlockPass, so it
-// runs efficiently when queued next to other BasicBlockPass's.
+// without modifying the CFG of the function. It is a FunctionPass.
//
Pass *createDeadInstEliminationPass();
//===----------------------------------------------------------------------===//
//
+// RedundantDbgInstElimination - This pass removes redundant dbg intrinsics
+// without modifying the CFG of the function. It is a FunctionPass.
+//
+Pass *createRedundantDbgInstEliminationPass();
+
+//===----------------------------------------------------------------------===//
+//
// DeadCodeElimination - This pass is more powerful than DeadInstElimination,
// because it is worklist driven that can potentially revisit instructions when
// their other instructions become dead, to eliminate chains of dead
@@ -363,6 +368,12 @@ Pass *createLowerGuardIntrinsicPass();
//===----------------------------------------------------------------------===//
//
+// LowerMatrixIntrinsics - Lower matrix intrinsics to vector operations.
+//
+Pass *createLowerMatrixIntrinsicsPass();
+
+//===----------------------------------------------------------------------===//
+//
// LowerWidenableCondition - Lower widenable condition to i1 true.
//
Pass *createLowerWidenableConditionPass();
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/ConstantHoisting.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/ConstantHoisting.h
index 39039b093241..26d4a2476a86 100644
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/ConstantHoisting.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/ConstantHoisting.h
@@ -14,7 +14,7 @@
// cost. If the constant can be folded into the instruction (the cost is
// TCC_Free) or the cost is just a simple operation (TCC_BASIC), then we don't
// consider it expensive and leave it alone. This is the default behavior and
-// the default implementation of getIntImmCost will always return TCC_Free.
+// the default implementation of getIntImmCostInst will always return TCC_Free.
//
// If the cost is more than TCC_BASIC, then the integer constant can't be folded
// into the instruction and it might be beneficial to hoist the constant.
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/GVN.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/GVN.h
index 8a64768af6b5..5a3d30de16a3 100644
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/GVN.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/GVN.h
@@ -94,7 +94,7 @@ public:
// value number to the index of Expression in Expressions. We use it
// instead of a DenseMap because filling such mapping is faster than
// filling a DenseMap and the compile time is a little better.
- uint32_t nextExprNumber;
+ uint32_t nextExprNumber = 0;
std::vector<Expression> Expressions;
std::vector<uint32_t> ExprIdx;
@@ -107,9 +107,9 @@ public:
DenseMap<std::pair<uint32_t, const BasicBlock *>, uint32_t>;
PhiTranslateMap PhiTranslateTable;
- AliasAnalysis *AA;
- MemoryDependenceResults *MD;
- DominatorTree *DT;
+ AliasAnalysis *AA = nullptr;
+ MemoryDependenceResults *MD = nullptr;
+ DominatorTree *DT = nullptr;
uint32_t nextValueNumber = 1;
@@ -130,6 +130,7 @@ public:
ValueTable(const ValueTable &Arg);
ValueTable(ValueTable &&Arg);
~ValueTable();
+ ValueTable &operator=(const ValueTable &Arg);
uint32_t lookupOrAdd(Value *V);
uint32_t lookup(Value *V, bool Verify = true) const;
@@ -154,14 +155,14 @@ private:
friend class gvn::GVNLegacyPass;
friend struct DenseMapInfo<Expression>;
- MemoryDependenceResults *MD;
- DominatorTree *DT;
- const TargetLibraryInfo *TLI;
- AssumptionCache *AC;
+ MemoryDependenceResults *MD = nullptr;
+ DominatorTree *DT = nullptr;
+ const TargetLibraryInfo *TLI = nullptr;
+ AssumptionCache *AC = nullptr;
SetVector<BasicBlock *> DeadBlocks;
- OptimizationRemarkEmitter *ORE;
- ImplicitControlFlowTracking *ICF;
- LoopInfo *LI;
+ OptimizationRemarkEmitter *ORE = nullptr;
+ ImplicitControlFlowTracking *ICF = nullptr;
+ LoopInfo *LI = nullptr;
ValueTable VN;
@@ -293,8 +294,8 @@ private:
};
/// Create a legacy GVN pass. This also allows parameterizing whether or not
-/// loads are eliminated by the pass.
-FunctionPass *createGVNPass(bool NoLoads = false);
+/// MemDep is enabled.
+FunctionPass *createGVNPass(bool NoMemDepAnalysis = false);
/// A simple and fast domtree-based GVN pass to hoist common expressions
/// from sibling branches.
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/JumpThreading.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/JumpThreading.h
index 0464d40c45e6..a8beb8ca6e05 100644
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/JumpThreading.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/JumpThreading.h
@@ -109,7 +109,17 @@ public:
void FindLoopHeaders(Function &F);
bool ProcessBlock(BasicBlock *BB);
- bool ThreadEdge(BasicBlock *BB, const SmallVectorImpl<BasicBlock *> &PredBBs,
+ bool MaybeMergeBasicBlockIntoOnlyPred(BasicBlock *BB);
+ void UpdateSSA(BasicBlock *BB, BasicBlock *NewBB,
+ DenseMap<Instruction *, Value *> &ValueMapping);
+ DenseMap<Instruction *, Value *> CloneInstructions(BasicBlock::iterator BI,
+ BasicBlock::iterator BE,
+ BasicBlock *NewBB,
+ BasicBlock *PredBB);
+ bool TryThreadEdge(BasicBlock *BB,
+ const SmallVectorImpl<BasicBlock *> &PredBBs,
+ BasicBlock *SuccBB);
+ void ThreadEdge(BasicBlock *BB, const SmallVectorImpl<BasicBlock *> &PredBBs,
BasicBlock *SuccBB);
bool DuplicateCondBranchOnPHIIntoPred(
BasicBlock *BB, const SmallVectorImpl<BasicBlock *> &PredBBs);
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/LICM.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/LICM.h
index f0ea928abd49..a8f1c5348617 100644
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/LICM.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/LICM.h
@@ -34,6 +34,7 @@
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/IR/PassManager.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/Scalar/LoopPassManager.h"
namespace llvm {
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/LoopUnrollAndJamPass.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/LoopUnrollAndJamPass.h
index 7920269b0fb2..233963528595 100644
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/LoopUnrollAndJamPass.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/LoopUnrollAndJamPass.h
@@ -15,9 +15,7 @@
namespace llvm {
-class Loop;
-struct LoopStandardAnalysisResults;
-class LPMUpdater;
+class Function;
/// A simple loop rotation transformation.
class LoopUnrollAndJamPass : public PassInfoMixin<LoopUnrollAndJamPass> {
@@ -25,8 +23,7 @@ class LoopUnrollAndJamPass : public PassInfoMixin<LoopUnrollAndJamPass> {
public:
explicit LoopUnrollAndJamPass(int OptLevel = 2) : OptLevel(OptLevel) {}
- PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM,
- LoopStandardAnalysisResults &AR, LPMUpdater &U);
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
};
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/LoopUnrollPass.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/LoopUnrollPass.h
index afeb1f1da029..7b049bdc8ad1 100644
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/LoopUnrollPass.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/LoopUnrollPass.h
@@ -12,6 +12,7 @@
#include "llvm/ADT/Optional.h"
#include "llvm/Analysis/LoopAnalysisManager.h"
#include "llvm/IR/PassManager.h"
+#include "llvm/Support/CommandLine.h"
namespace llvm {
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/LowerMatrixIntrinsics.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/LowerMatrixIntrinsics.h
new file mode 100644
index 000000000000..2f75cd5017aa
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/LowerMatrixIntrinsics.h
@@ -0,0 +1,24 @@
+//===- LowerMatrixIntrinsics.h - Lower matrix intrinsics. -------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass lowers matrix intrinsics down to vector operations.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_SCALAR_LOWERMATRIXINTRINSICSPASS_H
+#define LLVM_TRANSFORMS_SCALAR_LOWERMATRIXINTRINSICSPASS_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+struct LowerMatrixIntrinsicsPass : PassInfoMixin<LowerMatrixIntrinsicsPass> {
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Utils.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils.h
index 6e03453babf1..bb31646ce462 100644
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/Utils.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils.h
@@ -119,6 +119,13 @@ ModulePass *createStripNonLineTableDebugInfoPass();
// number of conditional branches in the hot paths based on profiles.
//
FunctionPass *createControlHeightReductionLegacyPass();
+
+//===----------------------------------------------------------------------===//
+//
+// InjectTLIMappingsLegacy - populates the VFABI attribute with the
+// scalar-to-vector mappings from the TargetLibraryInfo.
+//
+FunctionPass *createInjectTLIMappingsLegacyPass();
}
#endif
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h
index 698e57fd0394..dec8447c9f52 100644
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h
@@ -94,6 +94,10 @@ bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU = nullptr,
MemoryDependenceResults *MemDep = nullptr,
bool PredecessorWithTwoSuccessors = false);
+/// Try to remove redundant dbg.value instructions from given basic block.
+/// Returns true if at least one instruction was removed.
+bool RemoveRedundantDbgInstrs(BasicBlock *BB);
+
/// Replace all uses of an instruction (specified by BI) with a value, then
/// remove and delete the original instruction.
void ReplaceInstWithValue(BasicBlock::InstListType &BIL,
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/CodeMoverUtils.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/CodeMoverUtils.h
new file mode 100644
index 000000000000..32eb7cc2ab04
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/CodeMoverUtils.h
@@ -0,0 +1,56 @@
+//===- Transform/Utils/CodeMoverUtils.h - CodeMover Utils -------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This family of functions determine movements are safe on basic blocks, and
+// instructions contained within a function.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_UTILS_CODEMOVERUTILS_H
+#define LLVM_TRANSFORMS_UTILS_CODEMOVERUTILS_H
+
+namespace llvm {
+
+class BasicBlock;
+class DependenceInfo;
+class DominatorTree;
+class Instruction;
+class PostDominatorTree;
+
+/// Return true if \p I0 and \p I1 are control flow equivalent.
+/// Two instructions are control flow equivalent if when one executes,
+/// the other is guaranteed to execute. This is determined using dominators
+/// and post-dominators: if A dominates B and B post-dominates A then A and B
+/// are control-flow equivalent.
+bool isControlFlowEquivalent(const Instruction &I0, const Instruction &I1,
+ const DominatorTree &DT,
+ const PostDominatorTree &PDT);
+
+/// Return true if \p BB0 and \p BB1 are control flow equivalent.
+/// Two basic blocks are control flow equivalent if when one executes, the other
+/// is guaranteed to execute. This is determined using dominators and
+/// post-dominators: if A dominates B and B post-dominates A then A and B are
+/// control-flow equivalent.
+bool isControlFlowEquivalent(const BasicBlock &BB0, const BasicBlock &BB1,
+ const DominatorTree &DT,
+ const PostDominatorTree &PDT);
+
+/// Return true if \p I can be safely moved before \p InsertPoint.
+bool isSafeToMoveBefore(Instruction &I, Instruction &InsertPoint,
+ const DominatorTree &DT, const PostDominatorTree &PDT,
+ DependenceInfo &DI);
+
+/// Move instructions from \p FromBB bottom up to the beginning of \p ToBB
+/// when proven safe.
+void moveInstsBottomUp(BasicBlock &FromBB, BasicBlock &ToBB,
+ const DominatorTree &DT, const PostDominatorTree &PDT,
+ DependenceInfo &DI);
+
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_UTILS_CODEMOVERUTILS_H
diff --git a/contrib/llvm-project/llvm/tools/opt/Debugify.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/Debugify.h
index 266f577951ae..0b5ec738750d 100644
--- a/contrib/llvm-project/llvm/tools/opt/Debugify.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/Debugify.h
@@ -10,13 +10,12 @@
///
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TOOLS_OPT_DEBUGIFY_H
-#define LLVM_TOOLS_OPT_DEBUGIFY_H
+#ifndef LLVM_TRANSFORM_UTILS_DEBUGIFY_H
+#define LLVM_TRANSFORM_UTILS_DEBUGIFY_H
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/IR/PassManager.h"
-#include "llvm/Support/raw_ostream.h"
llvm::ModulePass *createDebugifyModulePass();
llvm::FunctionPass *createDebugifyFunctionPass();
@@ -53,9 +52,6 @@ struct DebugifyStatistics {
/// Map pass names to a per-pass DebugifyStatistics instance.
using DebugifyStatsMap = llvm::MapVector<llvm::StringRef, DebugifyStatistics>;
-/// Export per-pass debugify statistics to the file specified by \p Path.
-void exportDebugifyStats(llvm::StringRef Path, const DebugifyStatsMap &Map);
-
llvm::ModulePass *
createCheckDebugifyModulePass(bool Strip = false,
llvm::StringRef NameOfWrappedPass = "",
@@ -71,4 +67,4 @@ struct NewPMCheckDebugifyPass
llvm::PreservedAnalyses run(llvm::Module &M, llvm::ModuleAnalysisManager &AM);
};
-#endif // LLVM_TOOLS_OPT_DEBUGIFY_H
+#endif // LLVM_TRANSFORM_UTILS_DEBUGIFY_H
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/FunctionImportUtils.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/FunctionImportUtils.h
index 9c2a9ea531ea..2c6c3adc8dad 100644
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/FunctionImportUtils.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/FunctionImportUtils.h
@@ -49,7 +49,7 @@ class FunctionImportGlobalProcessing {
DenseMap<const Comdat *, Comdat *> RenamedComdats;
/// Check if we should promote the given local value to global scope.
- bool shouldPromoteLocalToGlobal(const GlobalValue *SGV);
+ bool shouldPromoteLocalToGlobal(const GlobalValue *SGV, ValueInfo VI);
#ifndef NDEBUG
/// Check if the given value is a local that can't be renamed (promoted).
@@ -67,11 +67,9 @@ class FunctionImportGlobalProcessing {
/// import SGV as a definition, otherwise import as a declaration.
bool doImportAsDefinition(const GlobalValue *SGV);
- /// Get the name for SGV that should be used in the linked destination
- /// module. Specifically, this handles the case where we need to rename
- /// a local that is being promoted to global scope, which it will always
- /// do when \p DoPromote is true (or when importing a local).
- std::string getName(const GlobalValue *SGV, bool DoPromote);
+ /// Get the name for a local SGV that should be promoted and renamed to global
+ /// scope in the linked destination module.
+ std::string getPromotedName(const GlobalValue *SGV);
/// Process globals so that they can be used in ThinLTO. This includes
/// promoting local variables so that they can be reference externally by
@@ -107,9 +105,6 @@ public:
}
bool run();
-
- static bool doImportAsDefinition(const GlobalValue *SGV,
- SetVector<GlobalValue *> *GlobalsToImport);
};
/// Perform in-place global value handling on the given Module for
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/GuardUtils.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/GuardUtils.h
index 3b365c56a5c0..7ab5d9ef4f23 100644
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/GuardUtils.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/GuardUtils.h
@@ -14,15 +14,30 @@
namespace llvm {
+class BranchInst;
class CallInst;
class Function;
+class Value;
/// Splits control flow at point of \p Guard, replacing it with explicit branch
/// by the condition of guard's first argument. The taken branch then goes to
/// the block that contains \p Guard's successors, and the non-taken branch
/// goes to a newly-created deopt block that contains a sole call of the
-/// deoptimize function \p DeoptIntrinsic.
-void makeGuardControlFlowExplicit(Function *DeoptIntrinsic, CallInst *Guard);
+/// deoptimize function \p DeoptIntrinsic. If 'UseWC' is set, preserve the
+/// widenable nature of the guard by lowering to equivelent form. If not set,
+/// lower to a form without widenable semantics.
+void makeGuardControlFlowExplicit(Function *DeoptIntrinsic, CallInst *Guard,
+ bool UseWC);
+
+/// Given a branch we know is widenable (defined per Analysis/GuardUtils.h),
+/// widen it such that condition 'NewCond' is also known to hold on the taken
+/// path. Branch remains widenable after transform.
+void widenWidenableBranch(BranchInst *WidenableBR, Value *NewCond);
+
+/// Given a branch we know is widenable (defined per Analysis/GuardUtils.h),
+/// *set* it's condition such that (only) 'Cond' is known to hold on the taken
+/// path and that the branch remains widenable after transform.
+void setWidenableBranchCond(BranchInst *WidenableBR, Value *Cond);
} // llvm
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/InjectTLIMappings.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/InjectTLIMappings.h
new file mode 100644
index 000000000000..84e4fee51c26
--- /dev/null
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/InjectTLIMappings.h
@@ -0,0 +1,37 @@
+//===- InjectTLIMAppings.h - TLI to VFABI attribute injection ------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Populates the VFABI attribute with the scalar-to-vector mappings
+// from the TargetLibraryInfo.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_TRANSFORMS_UTILS_INJECTTLIMAPPINGS_H
+#define LLVM_TRANSFORMS_UTILS_INJECTTLIMAPPINGS_H
+
+#include "llvm/IR/PassManager.h"
+#include "llvm/InitializePasses.h"
+
+namespace llvm {
+class InjectTLIMappings : public PassInfoMixin<InjectTLIMappings> {
+public:
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+
+// Legacy pass
+class InjectTLIMappingsLegacy : public FunctionPass {
+public:
+ static char ID;
+ InjectTLIMappingsLegacy() : FunctionPass(ID) {
+ initializeInjectTLIMappingsLegacyPass(*PassRegistry::getPassRegistry());
+ }
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+ bool runOnFunction(Function &F) override;
+};
+
+} // End namespace llvm
+#endif // LLVM_TRANSFORMS_UTILS_INJECTTLIMAPPINGS_H
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/Local.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/Local.h
index 9fcb2f64d79b..d1dc0b3e46b9 100644
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/Local.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/Local.h
@@ -354,6 +354,10 @@ AllocaInst *findAllocaForValue(Value *V,
/// Returns true if any debug users were updated.
bool salvageDebugInfo(Instruction &I);
+/// Salvage all debug users of the instruction \p I or mark it as undef if it
+/// cannot be salvaged.
+void salvageDebugInfoOrMarkUndef(Instruction &I);
+
/// Implementation of salvageDebugInfo, applying only to instructions in
/// \p Insns, rather than all debug users of \p I.
bool salvageDebugInfoForDbgValues(Instruction &I,
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/LoopUtils.h
index d32f08717e9b..9ed96809ed99 100644
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/LoopUtils.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/LoopUtils.h
@@ -73,7 +73,8 @@ bool formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI,
///
/// Returns true if any modifications are made.
bool formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
- DominatorTree &DT, LoopInfo &LI);
+ DominatorTree &DT, LoopInfo &LI,
+ ScalarEvolution *SE);
/// Put loop into LCSSA form.
///
@@ -132,7 +133,7 @@ bool sinkRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *,
/// ORE. It returns changed status.
bool hoistRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *,
TargetLibraryInfo *, Loop *, AliasSetTracker *,
- MemorySSAUpdater *, ICFLoopSafetyInfo *,
+ MemorySSAUpdater *, ScalarEvolution *, ICFLoopSafetyInfo *,
SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *);
/// This function deletes dead loops. The caller of this function needs to
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/ModuleUtils.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/ModuleUtils.h
index c69af5588741..c2da86406e71 100644
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/ModuleUtils.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/ModuleUtils.h
@@ -13,6 +13,7 @@
#ifndef LLVM_TRANSFORMS_UTILS_MODULEUTILS_H
#define LLVM_TRANSFORMS_UTILS_MODULEUTILS_H
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/StringRef.h"
#include <utility> // for std::pair
@@ -108,6 +109,13 @@ void filterDeadComdatFunctions(
/// unique identifier for this module, so we return the empty string.
std::string getUniqueModuleId(Module *M);
+class CallInst;
+namespace VFABI {
+/// Overwrite the Vector Function ABI variants attribute with the names provide
+/// in \p VariantMappings.
+void setVectorVariantNames(CallInst *CI,
+ const SmallVector<std::string, 8> &VariantMappings);
+} // End VFABI namespace
} // End llvm namespace
#endif // LLVM_TRANSFORMS_UTILS_MODULEUTILS_H
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h
index 88c2ef787ad8..610668adcfa5 100644
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h
@@ -175,6 +175,7 @@ private:
Value *optimizeMemCmp(CallInst *CI, IRBuilder<> &B);
Value *optimizeBCmp(CallInst *CI, IRBuilder<> &B);
Value *optimizeMemCmpBCmpCommon(CallInst *CI, IRBuilder<> &B);
+ Value *optimizeMemCCpy(CallInst *CI, IRBuilder<> &B);
Value *optimizeMemPCpy(CallInst *CI, IRBuilder<> &B);
Value *optimizeMemCpy(CallInst *CI, IRBuilder<> &B);
Value *optimizeMemMove(CallInst *CI, IRBuilder<> &B);
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/SizeOpts.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/SizeOpts.h
index 1a052c694e6d..ba0f86c45263 100644
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/SizeOpts.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/SizeOpts.h
@@ -13,6 +13,20 @@
#ifndef LLVM_TRANSFORMS_UTILS_SIZEOPTS_H
#define LLVM_TRANSFORMS_UTILS_SIZEOPTS_H
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+extern cl::opt<bool> EnablePGSO;
+extern cl::opt<bool> PGSOLargeWorkingSetSizeOnly;
+extern cl::opt<bool> PGSOIRPassOrTestOnly;
+extern cl::opt<bool> PGSOColdCodeOnly;
+extern cl::opt<bool> ForcePGSO;
+extern cl::opt<int> PgsoCutoffInstrProf;
+extern cl::opt<int> PgsoCutoffSampleProf;
+
namespace llvm {
class BasicBlock;
@@ -20,14 +34,73 @@ class BlockFrequencyInfo;
class Function;
class ProfileSummaryInfo;
-/// Returns true if function \p F is suggested to be size-optimized base on the
+enum class PGSOQueryType {
+ IRPass, // A query call from an IR-level transform pass.
+ Test, // A query call from a unit test.
+ Other, // Others.
+};
+
+template<typename AdapterT, typename FuncT, typename BFIT>
+bool shouldFuncOptimizeForSizeImpl(const FuncT *F, ProfileSummaryInfo *PSI,
+ BFIT *BFI, PGSOQueryType QueryType) {
+ assert(F);
+ if (!PSI || !BFI || !PSI->hasProfileSummary())
+ return false;
+ if (ForcePGSO)
+ return true;
+ if (!EnablePGSO)
+ return false;
+ // Temporarily enable size optimizations only for the IR pass or test query
+ // sites for gradual commit/rollout. This is to be removed later.
+ if (PGSOIRPassOrTestOnly && !(QueryType == PGSOQueryType::IRPass ||
+ QueryType == PGSOQueryType::Test))
+ return false;
+ if (PGSOColdCodeOnly ||
+ (PGSOLargeWorkingSetSizeOnly && !PSI->hasLargeWorkingSetSize())) {
+ // Even if the working set size isn't large, size-optimize cold code.
+ return AdapterT::isFunctionColdInCallGraph(F, PSI, *BFI);
+ }
+ return !AdapterT::isFunctionHotInCallGraphNthPercentile(
+ PSI->hasSampleProfile() ? PgsoCutoffSampleProf : PgsoCutoffInstrProf,
+ F, PSI, *BFI);
+}
+
+template<typename AdapterT, typename BlockT, typename BFIT>
+bool shouldOptimizeForSizeImpl(const BlockT *BB, ProfileSummaryInfo *PSI,
+ BFIT *BFI, PGSOQueryType QueryType) {
+ assert(BB);
+ if (!PSI || !BFI || !PSI->hasProfileSummary())
+ return false;
+ if (ForcePGSO)
+ return true;
+ if (!EnablePGSO)
+ return false;
+ // Temporarily enable size optimizations only for the IR pass or test query
+ // sites for gradual commit/rollout. This is to be removed later.
+ if (PGSOIRPassOrTestOnly && !(QueryType == PGSOQueryType::IRPass ||
+ QueryType == PGSOQueryType::Test))
+ return false;
+ if (PGSOColdCodeOnly ||
+ (PGSOLargeWorkingSetSizeOnly && !PSI->hasLargeWorkingSetSize())) {
+ // Even if the working set size isn't large, size-optimize cold code.
+ return AdapterT::isColdBlock(BB, PSI, BFI);
+ }
+ return !AdapterT::isHotBlockNthPercentile(
+ PSI->hasSampleProfile() ? PgsoCutoffSampleProf : PgsoCutoffInstrProf,
+ BB, PSI, BFI);
+}
+
+/// Returns true if function \p F is suggested to be size-optimized based on the
/// profile.
-bool shouldOptimizeForSize(Function *F, ProfileSummaryInfo *PSI,
- BlockFrequencyInfo *BFI);
-/// Returns true if basic block \p BB is suggested to be size-optimized base
-/// on the profile.
-bool shouldOptimizeForSize(BasicBlock *BB, ProfileSummaryInfo *PSI,
- BlockFrequencyInfo *BFI);
+bool shouldOptimizeForSize(const Function *F, ProfileSummaryInfo *PSI,
+ BlockFrequencyInfo *BFI,
+ PGSOQueryType QueryType = PGSOQueryType::Other);
+
+/// Returns true if basic block \p BB is suggested to be size-optimized based on
+/// the profile.
+bool shouldOptimizeForSize(const BasicBlock *BB, ProfileSummaryInfo *PSI,
+ BlockFrequencyInfo *BFI,
+ PGSOQueryType QueryType = PGSOQueryType::Other);
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/UnifyFunctionExitNodes.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/UnifyFunctionExitNodes.h
index f68534ecd2eb..b8a4fe72ea25 100644
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/UnifyFunctionExitNodes.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/UnifyFunctionExitNodes.h
@@ -29,9 +29,7 @@ struct UnifyFunctionExitNodes : public FunctionPass {
public:
static char ID; // Pass identification, replacement for typeid
- UnifyFunctionExitNodes() : FunctionPass(ID) {
- initializeUnifyFunctionExitNodesPass(*PassRegistry::getPassRegistry());
- }
+ UnifyFunctionExitNodes();
// We can preserve non-critical-edgeness when we unify function exit nodes
void getAnalysisUsage(AnalysisUsage &AU) const override;
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Vectorize.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Vectorize.h
index 88a0e49d0fae..bca78d073003 100644
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/Vectorize.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Vectorize.h
@@ -16,7 +16,6 @@
namespace llvm {
class BasicBlock;
-class BasicBlockPass;
class Pass;
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h
index d824e2903ef3..ac6efc7c695f 100644
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h
@@ -58,6 +58,7 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/IR/PassManager.h"
+#include "llvm/Support/CommandLine.h"
#include <functional>
namespace llvm {
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h
index 32ccc8a46380..237781dfe22e 100644
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h
@@ -24,6 +24,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/IR/PassManager.h"
+#include "llvm/Support/CommandLine.h"
namespace llvm {
@@ -138,7 +139,7 @@ private:
bool vectorizeChainsInBlock(BasicBlock *BB, slpvectorizer::BoUpSLP &R);
bool vectorizeStoreChain(ArrayRef<Value *> Chain, slpvectorizer::BoUpSLP &R,
- unsigned VecRegSize);
+ unsigned Idx);
bool vectorizeStores(ArrayRef<StoreInst *> Stores, slpvectorizer::BoUpSLP &R);
diff --git a/contrib/llvm-project/llvm/include/llvm/XRay/FDRRecords.h b/contrib/llvm-project/llvm/include/llvm/XRay/FDRRecords.h
index e3e16f71e2fe..d53e6aa812f2 100644
--- a/contrib/llvm-project/llvm/include/llvm/XRay/FDRRecords.h
+++ b/contrib/llvm-project/llvm/include/llvm/XRay/FDRRecords.h
@@ -289,7 +289,7 @@ public:
};
class CallArgRecord : public MetadataRecord {
- uint64_t Arg;
+ uint64_t Arg = 0;
friend class RecordInitializer;
public:
@@ -371,8 +371,8 @@ public:
class FunctionRecord : public Record {
RecordTypes Kind;
- int32_t FuncId;
- uint32_t Delta;
+ int32_t FuncId = 0;
+ uint32_t Delta = 0;
friend class RecordInitializer;
static constexpr unsigned kFunctionRecordSize = 8;
diff --git a/contrib/llvm-project/llvm/include/llvm/module.modulemap b/contrib/llvm-project/llvm/include/llvm/module.modulemap
index ecb3b37004fd..05de40698e35 100644
--- a/contrib/llvm-project/llvm/include/llvm/module.modulemap
+++ b/contrib/llvm-project/llvm/include/llvm/module.modulemap
@@ -152,6 +152,13 @@ module LLVM_DebugInfo_CodeView {
textual header "DebugInfo/CodeView/CodeViewSymbols.def"
}
+module LLVM_DWARFLinker {
+ requires cplusplus
+
+ umbrella "DWARFLinker"
+ module * { export * }
+}
+
module LLVM_ExecutionEngine {
requires cplusplus
@@ -169,12 +176,31 @@ module LLVM_ExecutionEngine {
// FIXME: These exclude directives were added as a workaround for
// <rdar://problem/29247092> and should be removed once it is fixed.
- exclude header "ExecutionEngine/Orc/RawByteChannel.h"
- exclude header "ExecutionEngine/Orc/RPCUtils.h"
exclude header "ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h"
exclude header "ExecutionEngine/Orc/OrcRemoteTargetClient.h"
exclude header "ExecutionEngine/Orc/OrcRemoteTargetServer.h"
exclude header "ExecutionEngine/Orc/RemoteObjectLayer.h"
+
+ // Exclude headers from LLVM_OrcSupport.
+ exclude header "ExecutionEngine/Orc/OrcError.h"
+ exclude header "ExecutionEngine/Orc/RPC/RPCUtils.h"
+ exclude header "ExecutionEngine/Orc/RPC/RPCSerialization.h"
+ exclude header "ExecutionEngine/Orc/RPC/RawByteChannel.h"
+
+}
+
+// Orc utilities that don't depend only on Support (not ExecutionEngine or
+// IR). This is a workaround for ExecutionEngine's broken layering, and will
+// be removed in the future.
+module LLVM_OrcSupport {
+ requires cplusplus
+
+ header "ExecutionEngine/Orc/OrcError.h"
+ header "ExecutionEngine/Orc/RPC/RPCUtils.h"
+ header "ExecutionEngine/Orc/RPC/RPCSerialization.h"
+ header "ExecutionEngine/Orc/RPC/RawByteChannel.h"
+
+ export *
}
module LLVM_Pass {
@@ -250,6 +276,7 @@ module LLVM_IR {
module * { export * }
// These are intended for (repeated) textual inclusion.
+ textual header "IR/ConstrainedOps.def"
textual header "IR/DebugInfoFlags.def"
textual header "IR/Instruction.def"
textual header "IR/Metadata.def"
diff --git a/contrib/llvm-project/llvm/lib/Analysis/AliasAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/AliasAnalysis.cpp
index 55dd9a4cda08..1c7678a602d8 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/AliasAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/AliasAnalysis.cpp
@@ -44,6 +44,7 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/AtomicOrdering.h"
#include "llvm/Support/Casting.h"
@@ -734,6 +735,15 @@ namespace {
} // end anonymous namespace
+ExternalAAWrapperPass::ExternalAAWrapperPass() : ImmutablePass(ID) {
+ initializeExternalAAWrapperPassPass(*PassRegistry::getPassRegistry());
+}
+
+ExternalAAWrapperPass::ExternalAAWrapperPass(CallbackT CB)
+ : ImmutablePass(ID), CB(std::move(CB)) {
+ initializeExternalAAWrapperPassPass(*PassRegistry::getPassRegistry());
+}
+
char ExternalAAWrapperPass::ID = 0;
INITIALIZE_PASS(ExternalAAWrapperPass, "external-aa", "External Alias Analysis",
@@ -836,6 +846,7 @@ void AAResultsWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addUsedIfAvailable<SCEVAAWrapperPass>();
AU.addUsedIfAvailable<CFLAndersAAWrapperPass>();
AU.addUsedIfAvailable<CFLSteensAAWrapperPass>();
+ AU.addUsedIfAvailable<ExternalAAWrapperPass>();
}
AAResults llvm::createLegacyPMAAResults(Pass &P, Function &F,
@@ -861,6 +872,9 @@ AAResults llvm::createLegacyPMAAResults(Pass &P, Function &F,
AAR.addAAResult(WrapperPass->getResult());
if (auto *WrapperPass = P.getAnalysisIfAvailable<CFLSteensAAWrapperPass>())
AAR.addAAResult(WrapperPass->getResult());
+ if (auto *WrapperPass = P.getAnalysisIfAvailable<ExternalAAWrapperPass>())
+ if (WrapperPass->CB)
+ WrapperPass->CB(P, F, AAR);
return AAR;
}
@@ -904,4 +918,5 @@ void llvm::getAAResultsAnalysisUsage(AnalysisUsage &AU) {
AU.addUsedIfAvailable<GlobalsAAWrapperPass>();
AU.addUsedIfAvailable<CFLAndersAAWrapperPass>();
AU.addUsedIfAvailable<CFLSteensAAWrapperPass>();
+ AU.addUsedIfAvailable<ExternalAAWrapperPass>();
}
diff --git a/contrib/llvm-project/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp b/contrib/llvm-project/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp
index e83703867e09..2e44bbd3a8ca 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp
@@ -16,6 +16,7 @@
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
diff --git a/contrib/llvm-project/llvm/lib/Analysis/AliasSetTracker.cpp b/contrib/llvm-project/llvm/lib/Analysis/AliasSetTracker.cpp
index 79fbcd464c1b..5cc5ab597ef9 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/AliasSetTracker.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/AliasSetTracker.cpp
@@ -27,6 +27,7 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/AtomicOrdering.h"
#include "llvm/Support/Casting.h"
diff --git a/contrib/llvm-project/llvm/lib/Analysis/AssumptionCache.cpp b/contrib/llvm-project/llvm/lib/Analysis/AssumptionCache.cpp
index 129944743c5e..f4d4a5ac8f88 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/AssumptionCache.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/AssumptionCache.cpp
@@ -23,6 +23,7 @@
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/PatternMatch.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
diff --git a/contrib/llvm-project/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/BasicAliasAnalysis.cpp
index f3c30c258c19..e852d663c6b4 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/BasicAliasAnalysis.cpp
@@ -25,9 +25,9 @@
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/Analysis/PhiValues.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/Analysis/PhiValues.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Constant.h"
@@ -49,6 +49,7 @@
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
@@ -1481,7 +1482,8 @@ AliasResult BasicAAResult::aliasGEP(
// give up if we can't determine conditions that hold for every cycle:
const Value *V = DecompGEP1.VarIndices[i].V;
- KnownBits Known = computeKnownBits(V, DL, 0, &AC, nullptr, DT);
+ KnownBits Known =
+ computeKnownBits(V, DL, 0, &AC, dyn_cast<Instruction>(GEP1), DT);
bool SignKnownZero = Known.isNonNegative();
bool SignKnownOne = Known.isNegative();
@@ -2049,7 +2051,7 @@ BasicAAResult BasicAA::run(Function &F, FunctionAnalysisManager &AM) {
}
BasicAAWrapperPass::BasicAAWrapperPass() : FunctionPass(ID) {
- initializeBasicAAWrapperPassPass(*PassRegistry::getPassRegistry());
+ initializeBasicAAWrapperPassPass(*PassRegistry::getPassRegistry());
}
char BasicAAWrapperPass::ID = 0;
diff --git a/contrib/llvm-project/llvm/lib/Analysis/BlockFrequencyInfo.cpp b/contrib/llvm-project/llvm/lib/Analysis/BlockFrequencyInfo.cpp
index de183bbde173..544bd7757ae4 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/BlockFrequencyInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/BlockFrequencyInfo.cpp
@@ -20,6 +20,7 @@
#include "llvm/IR/CFG.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/PassManager.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/GraphWriter.h"
diff --git a/contrib/llvm-project/llvm/lib/Analysis/BranchProbabilityInfo.cpp b/contrib/llvm-project/llvm/lib/Analysis/BranchProbabilityInfo.cpp
index a06ee096d54c..ffba65b5ed5e 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/BranchProbabilityInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/BranchProbabilityInfo.cpp
@@ -16,6 +16,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/PostDominators.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
@@ -31,9 +32,11 @@
#include "llvm/IR/PassManager.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
@@ -61,6 +64,12 @@ INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_END(BranchProbabilityInfoWrapperPass, "branch-prob",
"Branch Probability Analysis", false, true)
+BranchProbabilityInfoWrapperPass::BranchProbabilityInfoWrapperPass()
+ : FunctionPass(ID) {
+ initializeBranchProbabilityInfoWrapperPassPass(
+ *PassRegistry::getPassRegistry());
+}
+
char BranchProbabilityInfoWrapperPass::ID = 0;
// Weights are for internal use only. They are used by heuristics to help to
@@ -138,69 +147,83 @@ static const uint32_t IH_TAKEN_WEIGHT = 1024 * 1024 - 1;
/// instruction. This is essentially never taken.
static const uint32_t IH_NONTAKEN_WEIGHT = 1;
-/// Add \p BB to PostDominatedByUnreachable set if applicable.
-void
-BranchProbabilityInfo::updatePostDominatedByUnreachable(const BasicBlock *BB) {
- const Instruction *TI = BB->getTerminator();
- if (TI->getNumSuccessors() == 0) {
- if (isa<UnreachableInst>(TI) ||
- // If this block is terminated by a call to
- // @llvm.experimental.deoptimize then treat it like an unreachable since
- // the @llvm.experimental.deoptimize call is expected to practically
- // never execute.
- BB->getTerminatingDeoptimizeCall())
- PostDominatedByUnreachable.insert(BB);
- return;
- }
+static void UpdatePDTWorklist(const BasicBlock *BB, PostDominatorTree *PDT,
+ SmallVectorImpl<const BasicBlock *> &WorkList,
+ SmallPtrSetImpl<const BasicBlock *> &TargetSet) {
+ SmallVector<BasicBlock *, 8> Descendants;
+ SmallPtrSet<const BasicBlock *, 16> NewItems;
+
+ PDT->getDescendants(const_cast<BasicBlock *>(BB), Descendants);
+ for (auto *BB : Descendants)
+ if (TargetSet.insert(BB).second)
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+ if (!TargetSet.count(*PI))
+ NewItems.insert(*PI);
+ WorkList.insert(WorkList.end(), NewItems.begin(), NewItems.end());
+}
- // If the terminator is an InvokeInst, check only the normal destination block
- // as the unwind edge of InvokeInst is also very unlikely taken.
- if (auto *II = dyn_cast<InvokeInst>(TI)) {
- if (PostDominatedByUnreachable.count(II->getNormalDest()))
- PostDominatedByUnreachable.insert(BB);
- return;
+/// Compute a set of basic blocks that are post-dominated by unreachables.
+void BranchProbabilityInfo::computePostDominatedByUnreachable(
+ const Function &F, PostDominatorTree *PDT) {
+ SmallVector<const BasicBlock *, 8> WorkList;
+ for (auto &BB : F) {
+ const Instruction *TI = BB.getTerminator();
+ if (TI->getNumSuccessors() == 0) {
+ if (isa<UnreachableInst>(TI) ||
+ // If this block is terminated by a call to
+ // @llvm.experimental.deoptimize then treat it like an unreachable
+ // since the @llvm.experimental.deoptimize call is expected to
+ // practically never execute.
+ BB.getTerminatingDeoptimizeCall())
+ UpdatePDTWorklist(&BB, PDT, WorkList, PostDominatedByUnreachable);
+ }
}
- for (auto *I : successors(BB))
- // If any of successor is not post dominated then BB is also not.
- if (!PostDominatedByUnreachable.count(I))
- return;
-
- PostDominatedByUnreachable.insert(BB);
+ while (!WorkList.empty()) {
+ const BasicBlock *BB = WorkList.pop_back_val();
+ if (PostDominatedByUnreachable.count(BB))
+ continue;
+ // If the terminator is an InvokeInst, check only the normal destination
+ // block as the unwind edge of InvokeInst is also very unlikely taken.
+ if (auto *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
+ if (PostDominatedByUnreachable.count(II->getNormalDest()))
+ UpdatePDTWorklist(BB, PDT, WorkList, PostDominatedByUnreachable);
+ }
+ // If all the successors are unreachable, BB is unreachable as well.
+ else if (!successors(BB).empty() &&
+ llvm::all_of(successors(BB), [this](const BasicBlock *Succ) {
+ return PostDominatedByUnreachable.count(Succ);
+ }))
+ UpdatePDTWorklist(BB, PDT, WorkList, PostDominatedByUnreachable);
+ }
}
-/// Add \p BB to PostDominatedByColdCall set if applicable.
-void
-BranchProbabilityInfo::updatePostDominatedByColdCall(const BasicBlock *BB) {
- assert(!PostDominatedByColdCall.count(BB));
- const Instruction *TI = BB->getTerminator();
- if (TI->getNumSuccessors() == 0)
- return;
+/// compute a set of basic blocks that are post-dominated by ColdCalls.
+void BranchProbabilityInfo::computePostDominatedByColdCall(
+ const Function &F, PostDominatorTree *PDT) {
+ SmallVector<const BasicBlock *, 8> WorkList;
+ for (auto &BB : F)
+ for (auto &I : BB)
+ if (const CallInst *CI = dyn_cast<CallInst>(&I))
+ if (CI->hasFnAttr(Attribute::Cold))
+ UpdatePDTWorklist(&BB, PDT, WorkList, PostDominatedByColdCall);
- // If all of successor are post dominated then BB is also done.
- if (llvm::all_of(successors(BB), [&](const BasicBlock *SuccBB) {
- return PostDominatedByColdCall.count(SuccBB);
- })) {
- PostDominatedByColdCall.insert(BB);
- return;
- }
+ while (!WorkList.empty()) {
+ const BasicBlock *BB = WorkList.pop_back_val();
- // If the terminator is an InvokeInst, check only the normal destination
- // block as the unwind edge of InvokeInst is also very unlikely taken.
- if (auto *II = dyn_cast<InvokeInst>(TI))
- if (PostDominatedByColdCall.count(II->getNormalDest())) {
- PostDominatedByColdCall.insert(BB);
- return;
+ // If the terminator is an InvokeInst, check only the normal destination
+ // block as the unwind edge of InvokeInst is also very unlikely taken.
+ if (auto *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
+ if (PostDominatedByColdCall.count(II->getNormalDest()))
+ UpdatePDTWorklist(BB, PDT, WorkList, PostDominatedByColdCall);
}
-
- // Otherwise, if the block itself contains a cold function, add it to the
- // set of blocks post-dominated by a cold call.
- for (auto &I : *BB)
- if (const CallInst *CI = dyn_cast<CallInst>(&I))
- if (CI->hasFnAttr(Attribute::Cold)) {
- PostDominatedByColdCall.insert(BB);
- return;
- }
+ // If all of successor are post dominated then BB is also done.
+ else if (!successors(BB).empty() &&
+ llvm::all_of(successors(BB), [this](const BasicBlock *Succ) {
+ return PostDominatedByColdCall.count(Succ);
+ }))
+ UpdatePDTWorklist(BB, PDT, WorkList, PostDominatedByColdCall);
+ }
}
/// Calculate edge weights for successors lead to unreachable.
@@ -975,13 +998,16 @@ void BranchProbabilityInfo::calculate(const Function &F, const LoopInfo &LI,
LLVM_DEBUG(dbgs() << "\n");
}
+ std::unique_ptr<PostDominatorTree> PDT =
+ std::make_unique<PostDominatorTree>(const_cast<Function &>(F));
+ computePostDominatedByUnreachable(F, PDT.get());
+ computePostDominatedByColdCall(F, PDT.get());
+
// Walk the basic blocks in post-order so that we can build up state about
// the successors of a block iteratively.
for (auto BB : post_order(&F.getEntryBlock())) {
LLVM_DEBUG(dbgs() << "Computing probabilities for " << BB->getName()
<< "\n");
- updatePostDominatedByUnreachable(BB);
- updatePostDominatedByColdCall(BB);
// If there is no at least two successors, no sense to set probability.
if (BB->getTerminator()->getNumSuccessors() < 2)
continue;
diff --git a/contrib/llvm-project/llvm/lib/Analysis/CFGPrinter.cpp b/contrib/llvm-project/llvm/lib/Analysis/CFGPrinter.cpp
index 4f4103fefa25..88e7d3bdede1 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/CFGPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/CFGPrinter.cpp
@@ -18,7 +18,9 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/CFGPrinter.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FileSystem.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp
index fd90bd1521d6..eb5c96e6eeca 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp
@@ -69,6 +69,7 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/Type.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Compiler.h"
diff --git a/contrib/llvm-project/llvm/lib/Analysis/CFLSteensAliasAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/CFLSteensAliasAnalysis.cpp
index b87aa4065392..85a8c3d2a00b 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/CFLSteensAliasAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/CFLSteensAliasAnalysis.cpp
@@ -46,6 +46,7 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/contrib/llvm-project/llvm/lib/Analysis/CallGraph.cpp b/contrib/llvm-project/llvm/lib/Analysis/CallGraph.cpp
index 70aeb1a688ee..8e8a50178518 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/CallGraph.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/CallGraph.cpp
@@ -10,10 +10,11 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Config/llvm-config.h"
-#include "llvm/IR/Module.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
diff --git a/contrib/llvm-project/llvm/lib/Analysis/CallPrinter.cpp b/contrib/llvm-project/llvm/lib/Analysis/CallPrinter.cpp
index d24cbd104bf6..7246b73bfd4b 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/CallPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/CallPrinter.cpp
@@ -16,6 +16,7 @@
#include "llvm/Analysis/CallPrinter.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/DOTGraphTraitsPass.h"
+#include "llvm/InitializePasses.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/Analysis/ConstantFolding.cpp b/contrib/llvm-project/llvm/lib/Analysis/ConstantFolding.cpp
index 8dbcf7034fda..b32924e6497a 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/ConstantFolding.cpp
@@ -37,6 +37,8 @@
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsX86.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
@@ -764,8 +766,8 @@ Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0, Constant *Op1,
Constant *CastGEPIndices(Type *SrcElemTy, ArrayRef<Constant *> Ops,
Type *ResultTy, Optional<unsigned> InRangeIndex,
const DataLayout &DL, const TargetLibraryInfo *TLI) {
- Type *IntPtrTy = DL.getIntPtrType(ResultTy);
- Type *IntPtrScalarTy = IntPtrTy->getScalarType();
+ Type *IntIdxTy = DL.getIndexType(ResultTy);
+ Type *IntIdxScalarTy = IntIdxTy->getScalarType();
bool Any = false;
SmallVector<Constant*, 32> NewIdxs;
@@ -773,11 +775,11 @@ Constant *CastGEPIndices(Type *SrcElemTy, ArrayRef<Constant *> Ops,
if ((i == 1 ||
!isa<StructType>(GetElementPtrInst::getIndexedType(
SrcElemTy, Ops.slice(1, i - 1)))) &&
- Ops[i]->getType()->getScalarType() != IntPtrScalarTy) {
+ Ops[i]->getType()->getScalarType() != IntIdxScalarTy) {
Any = true;
Type *NewType = Ops[i]->getType()->isVectorTy()
- ? IntPtrTy
- : IntPtrTy->getScalarType();
+ ? IntIdxTy
+ : IntIdxScalarTy;
NewIdxs.push_back(ConstantExpr::getCast(CastInst::getCastOpcode(Ops[i],
true,
NewType,
@@ -837,7 +839,7 @@ Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP,
if (!Ptr->getType()->isPointerTy())
return nullptr;
- Type *IntPtrTy = DL.getIntPtrType(Ptr->getType());
+ Type *IntIdxTy = DL.getIndexType(Ptr->getType());
// If this is a constant expr gep that is effectively computing an
// "offsetof", fold it into 'cast int Size to T*' instead of 'gep 0, 0, 12'
@@ -848,7 +850,7 @@ Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP,
// "inttoptr (sub (ptrtoint Ptr), V)"
if (Ops.size() == 2 && ResElemTy->isIntegerTy(8)) {
auto *CE = dyn_cast<ConstantExpr>(Ops[1]);
- assert((!CE || CE->getType() == IntPtrTy) &&
+ assert((!CE || CE->getType() == IntIdxTy) &&
"CastGEPIndices didn't canonicalize index types!");
if (CE && CE->getOpcode() == Instruction::Sub &&
CE->getOperand(0)->isNullValue()) {
@@ -863,7 +865,7 @@ Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP,
return nullptr;
}
- unsigned BitWidth = DL.getTypeSizeInBits(IntPtrTy);
+ unsigned BitWidth = DL.getTypeSizeInBits(IntIdxTy);
APInt Offset =
APInt(BitWidth,
DL.getIndexedOffsetInType(
@@ -943,7 +945,7 @@ Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP,
// The element size is 0. This may be [0 x Ty]*, so just use a zero
// index for this level and proceed to the next level to see if it can
// accommodate the offset.
- NewIdxs.push_back(ConstantInt::get(IntPtrTy, 0));
+ NewIdxs.push_back(ConstantInt::get(IntIdxTy, 0));
} else {
// The element size is non-zero divide the offset by the element
// size (rounding down), to compute the index at this level.
@@ -952,7 +954,7 @@ Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP,
if (Overflow)
break;
Offset -= NewIdx * ElemSize;
- NewIdxs.push_back(ConstantInt::get(IntPtrTy, NewIdx));
+ NewIdxs.push_back(ConstantInt::get(IntIdxTy, NewIdx));
}
} else {
auto *STy = cast<StructType>(Ty);
diff --git a/contrib/llvm-project/llvm/lib/Analysis/CostModel.cpp b/contrib/llvm-project/llvm/lib/Analysis/CostModel.cpp
index bf0cdbfd0c8b..953da964c435 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/CostModel.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/CostModel.cpp
@@ -20,6 +20,7 @@
#include "llvm/Analysis/Passes.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/Function.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
diff --git a/contrib/llvm-project/llvm/lib/Analysis/DDG.cpp b/contrib/llvm-project/llvm/lib/Analysis/DDG.cpp
index b5c3c761ad98..90ce13e6f650 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/DDG.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/DDG.cpp
@@ -9,10 +9,17 @@
// The implementation for the data dependence graph.
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/DDG.h"
+#include "llvm/ADT/SCCIterator.h"
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopIterator.h"
+#include "llvm/Support/CommandLine.h"
using namespace llvm;
+static cl::opt<bool>
+ CreatePiBlocks("ddg-pi-blocks", cl::init(true), cl::Hidden, cl::ZeroOrMore,
+ cl::desc("Create pi-block nodes."));
+
#define DEBUG_TYPE "ddg"
template class llvm::DGEdge<DDGNode, DDGEdge>;
@@ -29,9 +36,16 @@ bool DDGNode::collectInstructions(
InstructionListType &IList) const {
assert(IList.empty() && "Expected the IList to be empty on entry.");
if (isa<SimpleDDGNode>(this)) {
- for (auto *I : cast<const SimpleDDGNode>(this)->getInstructions())
+ for (Instruction *I : cast<const SimpleDDGNode>(this)->getInstructions())
if (Pred(I))
IList.push_back(I);
+ } else if (isa<PiBlockDDGNode>(this)) {
+ for (const DDGNode *PN : cast<const PiBlockDDGNode>(this)->getNodes()) {
+ assert(!isa<PiBlockDDGNode>(PN) && "Nested PiBlocks are not supported.");
+ SmallVector<Instruction *, 8> TmpIList;
+ PN->collectInstructions(Pred, TmpIList);
+ IList.insert(IList.end(), TmpIList.begin(), TmpIList.end());
+ }
} else
llvm_unreachable("unimplemented type of node");
return !IList.empty();
@@ -46,11 +60,14 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const DDGNode::NodeKind K) {
case DDGNode::NodeKind::MultiInstruction:
Out = "multi-instruction";
break;
+ case DDGNode::NodeKind::PiBlock:
+ Out = "pi-block";
+ break;
case DDGNode::NodeKind::Root:
Out = "root";
break;
case DDGNode::NodeKind::Unknown:
- Out = "??";
+ Out = "?? (error)";
break;
}
OS << Out;
@@ -61,8 +78,15 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const DDGNode &N) {
OS << "Node Address:" << &N << ":" << N.getKind() << "\n";
if (isa<SimpleDDGNode>(N)) {
OS << " Instructions:\n";
- for (auto *I : cast<const SimpleDDGNode>(N).getInstructions())
+ for (const Instruction *I : cast<const SimpleDDGNode>(N).getInstructions())
OS.indent(2) << *I << "\n";
+ } else if (isa<PiBlockDDGNode>(&N)) {
+ OS << "--- start of nodes in pi-block ---\n";
+ auto &Nodes = cast<const PiBlockDDGNode>(&N)->getNodes();
+ unsigned Count = 0;
+ for (const DDGNode *N : Nodes)
+ OS << *N << (++Count == Nodes.size() ? "" : "\n");
+ OS << "--- end of nodes in pi-block ---\n";
} else if (!isa<RootDDGNode>(N))
llvm_unreachable("unimplemented type of node");
@@ -99,6 +123,29 @@ SimpleDDGNode::SimpleDDGNode(SimpleDDGNode &&N)
SimpleDDGNode::~SimpleDDGNode() { InstList.clear(); }
//===--------------------------------------------------------------------===//
+// PiBlockDDGNode implementation
+//===--------------------------------------------------------------------===//
+
+PiBlockDDGNode::PiBlockDDGNode(const PiNodeList &List)
+ : DDGNode(NodeKind::PiBlock), NodeList(List) {
+ assert(!NodeList.empty() && "pi-block node constructed with an empty list.");
+}
+
+PiBlockDDGNode::PiBlockDDGNode(const PiBlockDDGNode &N)
+ : DDGNode(N), NodeList(N.NodeList) {
+ assert(getKind() == NodeKind::PiBlock && !NodeList.empty() &&
+ "constructing from invalid pi-block node.");
+}
+
+PiBlockDDGNode::PiBlockDDGNode(PiBlockDDGNode &&N)
+ : DDGNode(std::move(N)), NodeList(std::move(N.NodeList)) {
+ assert(getKind() == NodeKind::PiBlock && !NodeList.empty() &&
+ "constructing from invalid pi-block node.");
+}
+
+PiBlockDDGNode::~PiBlockDDGNode() { NodeList.clear(); }
+
+//===--------------------------------------------------------------------===//
// DDGEdge implementation
//===--------------------------------------------------------------------===//
@@ -115,7 +162,7 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const DDGEdge::EdgeKind K) {
Out = "rooted";
break;
case DDGEdge::EdgeKind::Unknown:
- Out = "??";
+ Out = "?? (error)";
break;
}
OS << Out;
@@ -134,19 +181,28 @@ using BasicBlockListType = SmallVector<BasicBlock *, 8>;
DataDependenceGraph::DataDependenceGraph(Function &F, DependenceInfo &D)
: DependenceGraphInfo(F.getName().str(), D) {
+ // Put the basic blocks in program order for correct dependence
+ // directions.
BasicBlockListType BBList;
- for (auto &BB : F.getBasicBlockList())
- BBList.push_back(&BB);
+ for (auto &SCC : make_range(scc_begin(&F), scc_end(&F)))
+ for (BasicBlock * BB : SCC)
+ BBList.push_back(BB);
+ std::reverse(BBList.begin(), BBList.end());
DDGBuilder(*this, D, BBList).populate();
}
-DataDependenceGraph::DataDependenceGraph(const Loop &L, DependenceInfo &D)
+DataDependenceGraph::DataDependenceGraph(Loop &L, LoopInfo &LI,
+ DependenceInfo &D)
: DependenceGraphInfo(Twine(L.getHeader()->getParent()->getName() + "." +
L.getHeader()->getName())
.str(),
D) {
+ // Put the basic blocks in program order for correct dependence
+ // directions.
+ LoopBlocksDFS DFS(&L);
+ DFS.perform(&LI);
BasicBlockListType BBList;
- for (BasicBlock *BB : L.blocks())
+ for (BasicBlock *BB : make_range(DFS.beginRPO(), DFS.endRPO()))
BBList.push_back(BB);
DDGBuilder(*this, D, BBList).populate();
}
@@ -164,23 +220,47 @@ bool DataDependenceGraph::addNode(DDGNode &N) {
return false;
// In general, if the root node is already created and linked, it is not safe
- // to add new nodes since they may be unreachable by the root.
- // TODO: Allow adding Pi-block nodes after root is created. Pi-blocks are an
- // exception because they represent components that are already reachable by
- // root.
- assert(!Root && "Root node is already added. No more nodes can be added.");
+ // to add new nodes since they may be unreachable by the root. However,
+ // pi-block nodes need to be added after the root node is linked, and they are
+ // always reachable by the root, because they represent components that are
+ // already reachable by root.
+ auto *Pi = dyn_cast<PiBlockDDGNode>(&N);
+ assert((!Root || Pi) &&
+ "Root node is already added. No more nodes can be added.");
+
if (isa<RootDDGNode>(N))
Root = &N;
+ if (Pi)
+ for (DDGNode *NI : Pi->getNodes())
+ PiBlockMap.insert(std::make_pair(NI, Pi));
+
return true;
}
+const PiBlockDDGNode *DataDependenceGraph::getPiBlock(const NodeType &N) const {
+ if (PiBlockMap.find(&N) == PiBlockMap.end())
+ return nullptr;
+ auto *Pi = PiBlockMap.find(&N)->second;
+ assert(PiBlockMap.find(Pi) == PiBlockMap.end() &&
+ "Nested pi-blocks detected.");
+ return Pi;
+}
+
raw_ostream &llvm::operator<<(raw_ostream &OS, const DataDependenceGraph &G) {
- for (auto *Node : G)
- OS << *Node << "\n";
+ for (DDGNode *Node : G)
+ // Avoid printing nodes that are part of a pi-block twice. They will get
+ // printed when the pi-block is printed.
+ if (!G.getPiBlock(*Node))
+ OS << *Node << "\n";
+ OS << "\n";
return OS;
}
+bool DDGBuilder::shouldCreatePiBlocks() const {
+ return CreatePiBlocks;
+}
+
//===--------------------------------------------------------------------===//
// DDG Analysis Passes
//===--------------------------------------------------------------------===//
@@ -190,7 +270,7 @@ DDGAnalysis::Result DDGAnalysis::run(Loop &L, LoopAnalysisManager &AM,
LoopStandardAnalysisResults &AR) {
Function *F = L.getHeader()->getParent();
DependenceInfo DI(F, &AR.AA, &AR.SE, &AR.LI);
- return std::make_unique<DataDependenceGraph>(L, DI);
+ return std::make_unique<DataDependenceGraph>(L, AR.LI, DI);
}
AnalysisKey DDGAnalysis::Key;
diff --git a/contrib/llvm-project/llvm/lib/Analysis/Delinearization.cpp b/contrib/llvm-project/llvm/lib/Analysis/Delinearization.cpp
index c1043e446beb..60cd1b5317d6 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/Delinearization.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/Delinearization.cpp
@@ -24,6 +24,7 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Type.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/contrib/llvm-project/llvm/lib/Analysis/DemandedBits.cpp b/contrib/llvm-project/llvm/lib/Analysis/DemandedBits.cpp
index 01b8ff10d355..aaee8c21f289 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/DemandedBits.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/DemandedBits.cpp
@@ -40,6 +40,7 @@
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
diff --git a/contrib/llvm-project/llvm/lib/Analysis/DependenceAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/DependenceAnalysis.cpp
index 0038c9fb9ce4..9b38053c196b 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/DependenceAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/DependenceAnalysis.cpp
@@ -61,6 +61,7 @@
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -141,6 +142,11 @@ INITIALIZE_PASS_END(DependenceAnalysisWrapperPass, "da", "Dependence Analysis",
char DependenceAnalysisWrapperPass::ID = 0;
+DependenceAnalysisWrapperPass::DependenceAnalysisWrapperPass()
+ : FunctionPass(ID) {
+ initializeDependenceAnalysisWrapperPassPass(*PassRegistry::getPassRegistry());
+}
+
FunctionPass *llvm::createDependenceAnalysisWrapperPass() {
return new DependenceAnalysisWrapperPass();
}
@@ -164,25 +170,25 @@ void DependenceAnalysisWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequiredTransitive<LoopInfoWrapperPass>();
}
-
// Used to test the dependence analyzer.
-// Looks through the function, noting loads and stores.
+// Looks through the function, noting instructions that may access memory.
// Calls depends() on every possible pair and prints out the result.
// Ignores all other instructions.
static void dumpExampleDependence(raw_ostream &OS, DependenceInfo *DA) {
auto *F = DA->getFunction();
for (inst_iterator SrcI = inst_begin(F), SrcE = inst_end(F); SrcI != SrcE;
++SrcI) {
- if (isa<StoreInst>(*SrcI) || isa<LoadInst>(*SrcI)) {
+ if (SrcI->mayReadOrWriteMemory()) {
for (inst_iterator DstI = SrcI, DstE = inst_end(F);
DstI != DstE; ++DstI) {
- if (isa<StoreInst>(*DstI) || isa<LoadInst>(*DstI)) {
- OS << "da analyze - ";
+ if (DstI->mayReadOrWriteMemory()) {
+ OS << "Src:" << *SrcI << " --> Dst:" << *DstI << "\n";
+ OS << " da analyze - ";
if (auto D = DA->depends(&*SrcI, &*DstI, true)) {
D->dump(OS);
for (unsigned Level = 1; Level <= D->getLevels(); Level++) {
if (D->isSplitable(Level)) {
- OS << "da analyze - split level = " << Level;
+ OS << " da analyze - split level = " << Level;
OS << ", iteration = " << *DA->getSplitIteration(*D, Level);
OS << "!\n";
}
@@ -876,14 +882,13 @@ void DependenceInfo::removeMatchingExtensions(Subscript *Pair) {
}
}
-
// Examine the scev and return true iff it's linear.
// Collect any loops mentioned in the set of "Loops".
-bool DependenceInfo::checkSrcSubscript(const SCEV *Src, const Loop *LoopNest,
- SmallBitVector &Loops) {
- const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Src);
+bool DependenceInfo::checkSubscript(const SCEV *Expr, const Loop *LoopNest,
+ SmallBitVector &Loops, bool IsSrc) {
+ const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr);
if (!AddRec)
- return isLoopInvariant(Src, LoopNest);
+ return isLoopInvariant(Expr, LoopNest);
const SCEV *Start = AddRec->getStart();
const SCEV *Step = AddRec->getStepRecurrence(*SE);
const SCEV *UB = SE->getBackedgeTakenCount(AddRec->getLoop());
@@ -896,33 +901,25 @@ bool DependenceInfo::checkSrcSubscript(const SCEV *Src, const Loop *LoopNest,
}
if (!isLoopInvariant(Step, LoopNest))
return false;
- Loops.set(mapSrcLoop(AddRec->getLoop()));
- return checkSrcSubscript(Start, LoopNest, Loops);
+ if (IsSrc)
+ Loops.set(mapSrcLoop(AddRec->getLoop()));
+ else
+ Loops.set(mapDstLoop(AddRec->getLoop()));
+ return checkSubscript(Start, LoopNest, Loops, IsSrc);
}
-
+// Examine the scev and return true iff it's linear.
+// Collect any loops mentioned in the set of "Loops".
+bool DependenceInfo::checkSrcSubscript(const SCEV *Src, const Loop *LoopNest,
+ SmallBitVector &Loops) {
+ return checkSubscript(Src, LoopNest, Loops, true);
+}
// Examine the scev and return true iff it's linear.
// Collect any loops mentioned in the set of "Loops".
bool DependenceInfo::checkDstSubscript(const SCEV *Dst, const Loop *LoopNest,
SmallBitVector &Loops) {
- const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Dst);
- if (!AddRec)
- return isLoopInvariant(Dst, LoopNest);
- const SCEV *Start = AddRec->getStart();
- const SCEV *Step = AddRec->getStepRecurrence(*SE);
- const SCEV *UB = SE->getBackedgeTakenCount(AddRec->getLoop());
- if (!isa<SCEVCouldNotCompute>(UB)) {
- if (SE->getTypeSizeInBits(Start->getType()) <
- SE->getTypeSizeInBits(UB->getType())) {
- if (!AddRec->getNoWrapFlags())
- return false;
- }
- }
- if (!isLoopInvariant(Step, LoopNest))
- return false;
- Loops.set(mapDstLoop(AddRec->getLoop()));
- return checkDstSubscript(Start, LoopNest, Loops);
+ return checkSubscript(Dst, LoopNest, Loops, false);
}
@@ -3407,8 +3404,7 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst,
if (Src == Dst)
PossiblyLoopIndependent = false;
- if ((!Src->mayReadFromMemory() && !Src->mayWriteToMemory()) ||
- (!Dst->mayReadFromMemory() && !Dst->mayWriteToMemory()))
+ if (!(Src->mayReadOrWriteMemory() && Dst->mayReadOrWriteMemory()))
// if both instructions don't reference memory, there's no dependence
return nullptr;
@@ -3780,8 +3776,6 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst,
return std::make_unique<FullDependence>(std::move(Result));
}
-
-
//===----------------------------------------------------------------------===//
// getSplitIteration -
// Rather than spend rarely-used space recording the splitting iteration
diff --git a/contrib/llvm-project/llvm/lib/Analysis/DependenceGraphBuilder.cpp b/contrib/llvm-project/llvm/lib/Analysis/DependenceGraphBuilder.cpp
index ed1d8351b2f0..e8a1a2fff919 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/DependenceGraphBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/DependenceGraphBuilder.cpp
@@ -10,6 +10,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/DependenceGraphBuilder.h"
+#include "llvm/ADT/EnumeratedArray.h"
#include "llvm/ADT/SCCIterator.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/DDG.h"
@@ -22,6 +23,7 @@ STATISTIC(TotalGraphs, "Number of dependence graphs created.");
STATISTIC(TotalDefUseEdges, "Number of def-use edges created.");
STATISTIC(TotalMemoryEdges, "Number of memory dependence edges created.");
STATISTIC(TotalFineGrainedNodes, "Number of fine-grained nodes created.");
+STATISTIC(TotalPiBlockNodes, "Number of pi-block nodes created.");
STATISTIC(TotalConfusedEdges,
"Number of confused memory dependencies between two nodes.");
STATISTIC(TotalEdgeReversals,
@@ -35,6 +37,15 @@ using InstructionListType = SmallVector<Instruction *, 2>;
//===--------------------------------------------------------------------===//
template <class G>
+void AbstractDependenceGraphBuilder<G>::computeInstructionOrdinals() {
+ // The BBList is expected to be in program order.
+ size_t NextOrdinal = 1;
+ for (auto *BB : BBList)
+ for (auto &I : *BB)
+ InstOrdinalMap.insert(std::make_pair(&I, NextOrdinal++));
+}
+
+template <class G>
void AbstractDependenceGraphBuilder<G>::createFineGrainedNodes() {
++TotalGraphs;
assert(IMap.empty() && "Expected empty instruction map at start");
@@ -42,6 +53,7 @@ void AbstractDependenceGraphBuilder<G>::createFineGrainedNodes() {
for (Instruction &I : *BB) {
auto &NewNode = createFineGrainedNode(I);
IMap.insert(std::make_pair(&I, &NewNode));
+ NodeOrdinalMap.insert(std::make_pair(&NewNode, getOrdinal(I)));
++TotalFineGrainedNodes;
}
}
@@ -74,6 +86,144 @@ void AbstractDependenceGraphBuilder<G>::createAndConnectRootNode() {
}
}
+template <class G> void AbstractDependenceGraphBuilder<G>::createPiBlocks() {
+ if (!shouldCreatePiBlocks())
+ return;
+
+ LLVM_DEBUG(dbgs() << "==== Start of Creation of Pi-Blocks ===\n");
+
+ // The overall algorithm is as follows:
+ // 1. Identify SCCs and for each SCC create a pi-block node containing all
+ // the nodes in that SCC.
+ // 2. Identify incoming edges incident to the nodes inside of the SCC and
+ // reconnect them to the pi-block node.
+ // 3. Identify outgoing edges from the nodes inside of the SCC to nodes
+ // outside of it and reconnect them so that the edges are coming out of the
+ // SCC node instead.
+
+ // Adding nodes as we iterate through the SCCs cause the SCC
+ // iterators to get invalidated. To prevent this invalidation, we first
+ // collect a list of nodes that are part of an SCC, and then iterate over
+ // those lists to create the pi-block nodes. Each element of the list is a
+ // list of nodes in an SCC. Note: trivial SCCs containing a single node are
+ // ignored.
+ SmallVector<NodeListType, 4> ListOfSCCs;
+ for (auto &SCC : make_range(scc_begin(&Graph), scc_end(&Graph))) {
+ if (SCC.size() > 1)
+ ListOfSCCs.emplace_back(SCC.begin(), SCC.end());
+ }
+
+ for (NodeListType &NL : ListOfSCCs) {
+ LLVM_DEBUG(dbgs() << "Creating pi-block node with " << NL.size()
+ << " nodes in it.\n");
+
+ // SCC iterator may put the nodes in an order that's different from the
+ // program order. To preserve original program order, we sort the list of
+ // nodes based on ordinal numbers computed earlier.
+ llvm::sort(NL, [&](NodeType *LHS, NodeType *RHS) {
+ return getOrdinal(*LHS) < getOrdinal(*RHS);
+ });
+
+ NodeType &PiNode = createPiBlock(NL);
+ ++TotalPiBlockNodes;
+
+ // Build a set to speed up the lookup for edges whose targets
+ // are inside the SCC.
+ SmallPtrSet<NodeType *, 4> NodesInSCC(NL.begin(), NL.end());
+
+ // We have the set of nodes in the SCC. We go through the set of nodes
+ // that are outside of the SCC and look for edges that cross the two sets.
+ for (NodeType *N : Graph) {
+
+ // Skip the SCC node and all the nodes inside of it.
+ if (*N == PiNode || NodesInSCC.count(N))
+ continue;
+
+ for (NodeType *SCCNode : NL) {
+
+ enum Direction {
+ Incoming, // Incoming edges to the SCC
+ Outgoing, // Edges going ot of the SCC
+ DirectionCount // To make the enum usable as an array index.
+ };
+
+ // Use these flags to help us avoid creating redundant edges. If there
+ // are more than one edges from an outside node to inside nodes, we only
+ // keep one edge from that node to the pi-block node. Similarly, if
+ // there are more than one edges from inside nodes to an outside node,
+ // we only keep one edge from the pi-block node to the outside node.
+ // There is a flag defined for each direction (incoming vs outgoing) and
+ // for each type of edge supported, using a two-dimensional boolean
+ // array.
+ using EdgeKind = typename EdgeType::EdgeKind;
+ EnumeratedArray<bool, EdgeKind> EdgeAlreadyCreated[DirectionCount]{
+ false, false};
+
+ auto createEdgeOfKind = [this](NodeType &Src, NodeType &Dst,
+ const EdgeKind K) {
+ switch (K) {
+ case EdgeKind::RegisterDefUse:
+ createDefUseEdge(Src, Dst);
+ break;
+ case EdgeKind::MemoryDependence:
+ createMemoryEdge(Src, Dst);
+ break;
+ case EdgeKind::Rooted:
+ createRootedEdge(Src, Dst);
+ break;
+ default:
+ llvm_unreachable("Unsupported type of edge.");
+ }
+ };
+
+ auto reconnectEdges = [&](NodeType *Src, NodeType *Dst, NodeType *New,
+ const Direction Dir) {
+ if (!Src->hasEdgeTo(*Dst))
+ return;
+ LLVM_DEBUG(dbgs()
+ << "reconnecting("
+ << (Dir == Direction::Incoming ? "incoming)" : "outgoing)")
+ << ":\nSrc:" << *Src << "\nDst:" << *Dst
+ << "\nNew:" << *New << "\n");
+ assert((Dir == Direction::Incoming || Dir == Direction::Outgoing) &&
+ "Invalid direction.");
+
+ SmallVector<EdgeType *, 10> EL;
+ Src->findEdgesTo(*Dst, EL);
+ for (EdgeType *OldEdge : EL) {
+ EdgeKind Kind = OldEdge->getKind();
+ if (!EdgeAlreadyCreated[Dir][Kind]) {
+ if (Dir == Direction::Incoming) {
+ createEdgeOfKind(*Src, *New, Kind);
+ LLVM_DEBUG(dbgs() << "created edge from Src to New.\n");
+ } else if (Dir == Direction::Outgoing) {
+ createEdgeOfKind(*New, *Dst, Kind);
+ LLVM_DEBUG(dbgs() << "created edge from New to Dst.\n");
+ }
+ EdgeAlreadyCreated[Dir][Kind] = true;
+ }
+ Src->removeEdge(*OldEdge);
+ destroyEdge(*OldEdge);
+ LLVM_DEBUG(dbgs() << "removed old edge between Src and Dst.\n\n");
+ }
+ };
+
+ // Process incoming edges incident to the pi-block node.
+ reconnectEdges(N, SCCNode, &PiNode, Direction::Incoming);
+
+ // Process edges that are coming out of the pi-block node.
+ reconnectEdges(SCCNode, N, &PiNode, Direction::Outgoing);
+ }
+ }
+ }
+
+ // Ordinal maps are no longer needed.
+ InstOrdinalMap.clear();
+ NodeOrdinalMap.clear();
+
+ LLVM_DEBUG(dbgs() << "==== End of Creation of Pi-Blocks ===\n");
+}
+
template <class G> void AbstractDependenceGraphBuilder<G>::createDefUseEdges() {
for (NodeType *N : Graph) {
InstructionListType SrcIList;
@@ -224,5 +374,34 @@ void AbstractDependenceGraphBuilder<G>::createMemoryDependencyEdges() {
}
}
+template <class G>
+void AbstractDependenceGraphBuilder<G>::sortNodesTopologically() {
+
+ // If we don't create pi-blocks, then we may not have a DAG.
+ if (!shouldCreatePiBlocks())
+ return;
+
+ SmallVector<NodeType *, 64> NodesInPO;
+ using NodeKind = typename NodeType::NodeKind;
+ for (NodeType *N : post_order(&Graph)) {
+ if (N->getKind() == NodeKind::PiBlock) {
+ // Put members of the pi-block right after the pi-block itself, for
+ // convenience.
+ const NodeListType &PiBlockMembers = getNodesInPiBlock(*N);
+ NodesInPO.insert(NodesInPO.end(), PiBlockMembers.begin(),
+ PiBlockMembers.end());
+ }
+ NodesInPO.push_back(N);
+ }
+
+ size_t OldSize = Graph.Nodes.size();
+ Graph.Nodes.clear();
+ for (NodeType *N : reverse(NodesInPO))
+ Graph.Nodes.push_back(N);
+ if (Graph.Nodes.size() != OldSize)
+ assert(false &&
+ "Expected the number of nodes to stay the same after the sort");
+}
+
template class llvm::AbstractDependenceGraphBuilder<DataDependenceGraph>;
template class llvm::DependenceGraphInfo<DDGNode>;
diff --git a/contrib/llvm-project/llvm/lib/Analysis/DomPrinter.cpp b/contrib/llvm-project/llvm/lib/Analysis/DomPrinter.cpp
index d9f43dd746ef..024a0fb49950 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/DomPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/DomPrinter.cpp
@@ -20,6 +20,7 @@
#include "llvm/Analysis/DomPrinter.h"
#include "llvm/Analysis/DOTGraphTraitsPass.h"
#include "llvm/Analysis/PostDominators.h"
+#include "llvm/InitializePasses.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/Analysis/DomTreeUpdater.cpp b/contrib/llvm-project/llvm/lib/Analysis/DomTreeUpdater.cpp
index 49215889cfd6..b374334ea371 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/DomTreeUpdater.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/DomTreeUpdater.cpp
@@ -233,7 +233,7 @@ void DomTreeUpdater::applyUpdates(ArrayRef<DominatorTree::UpdateType> Updates) {
return;
if (Strategy == UpdateStrategy::Lazy) {
- for (const auto U : Updates)
+ for (const auto &U : Updates)
if (!isSelfDominance(U))
PendUpdates.push_back(U);
@@ -253,7 +253,7 @@ void DomTreeUpdater::applyUpdatesPermissive(
SmallSet<std::pair<BasicBlock *, BasicBlock *>, 8> Seen;
SmallVector<DominatorTree::UpdateType, 8> DeduplicatedUpdates;
- for (const auto U : Updates) {
+ for (const auto &U : Updates) {
auto Edge = std::make_pair(U.getFrom(), U.getTo());
// Because it is illegal to submit updates that have already been applied
// and updates to an edge need to be strictly ordered,
diff --git a/contrib/llvm-project/llvm/lib/Analysis/DominanceFrontier.cpp b/contrib/llvm-project/llvm/lib/Analysis/DominanceFrontier.cpp
index f9a554acb7ea..14e6965f1259 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/DominanceFrontier.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/DominanceFrontier.cpp
@@ -12,6 +12,7 @@
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/PassManager.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
diff --git a/contrib/llvm-project/llvm/lib/Analysis/GlobalsModRef.cpp b/contrib/llvm-project/llvm/lib/Analysis/GlobalsModRef.cpp
index efdf9706ba3c..4361e0dc9bbd 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/GlobalsModRef.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/GlobalsModRef.cpp
@@ -25,6 +25,7 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
using namespace llvm;
@@ -286,7 +287,7 @@ GlobalsAAResult::getFunctionInfo(const Function *F) {
void GlobalsAAResult::AnalyzeGlobals(Module &M) {
SmallPtrSet<Function *, 32> TrackedFunctions;
for (Function &F : M)
- if (F.hasLocalLinkage())
+ if (F.hasLocalLinkage()) {
if (!AnalyzeUsesOfPointer(&F)) {
// Remember that we are tracking this global.
NonAddressTakenGlobals.insert(&F);
@@ -294,7 +295,9 @@ void GlobalsAAResult::AnalyzeGlobals(Module &M) {
Handles.emplace_front(*this, &F);
Handles.front().I = Handles.begin();
++NumNonAddrTakenFunctions;
- }
+ } else
+ UnknownFunctionsWithLocalLinkage = true;
+ }
SmallPtrSet<Function *, 16> Readers, Writers;
for (GlobalVariable &GV : M.globals())
@@ -526,9 +529,12 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) {
FI.setMayReadAnyGlobal();
} else {
FI.addModRefInfo(ModRefInfo::ModRef);
- // Can't say anything useful unless it's an intrinsic - they don't
- // read or write global variables of the kind considered here.
- KnowNothing = !F->isIntrinsic();
+ if (!F->onlyAccessesArgMemory())
+ FI.setMayReadAnyGlobal();
+ if (!F->isIntrinsic()) {
+ KnowNothing = true;
+ break;
+ }
}
continue;
}
@@ -927,7 +933,9 @@ ModRefInfo GlobalsAAResult::getModRefInfo(const CallBase *Call,
// global we are tracking, return information if we have it.
if (const GlobalValue *GV =
dyn_cast<GlobalValue>(GetUnderlyingObject(Loc.Ptr, DL)))
- if (GV->hasLocalLinkage())
+ // If GV is internal to this IR and there is no function with local linkage
+ // that has had their address taken, keep looking for a tighter ModRefInfo.
+ if (GV->hasLocalLinkage() && !UnknownFunctionsWithLocalLinkage)
if (const Function *F = Call->getCalledFunction())
if (NonAddressTakenGlobals.count(GV))
if (const FunctionInfo *FI = getFunctionInfo(F))
diff --git a/contrib/llvm-project/llvm/lib/Analysis/GuardUtils.cpp b/contrib/llvm-project/llvm/lib/Analysis/GuardUtils.cpp
index cad92f6e56bb..d48283279858 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/GuardUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/GuardUtils.cpp
@@ -13,19 +13,25 @@
#include "llvm/IR/PatternMatch.h"
using namespace llvm;
+using namespace llvm::PatternMatch;
bool llvm::isGuard(const User *U) {
- using namespace llvm::PatternMatch;
return match(U, m_Intrinsic<Intrinsic::experimental_guard>());
}
+bool llvm::isWidenableBranch(const User *U) {
+ Value *Condition, *WidenableCondition;
+ BasicBlock *GuardedBB, *DeoptBB;
+ return parseWidenableBranch(U, Condition, WidenableCondition, GuardedBB,
+ DeoptBB);
+}
+
bool llvm::isGuardAsWidenableBranch(const User *U) {
Value *Condition, *WidenableCondition;
BasicBlock *GuardedBB, *DeoptBB;
if (!parseWidenableBranch(U, Condition, WidenableCondition, GuardedBB,
DeoptBB))
return false;
- using namespace llvm::PatternMatch;
for (auto &Insn : *DeoptBB) {
if (match(&Insn, m_Intrinsic<Intrinsic::experimental_deoptimize>()))
return true;
@@ -38,12 +44,63 @@ bool llvm::isGuardAsWidenableBranch(const User *U) {
bool llvm::parseWidenableBranch(const User *U, Value *&Condition,
Value *&WidenableCondition,
BasicBlock *&IfTrueBB, BasicBlock *&IfFalseBB) {
- using namespace llvm::PatternMatch;
- if (!match(U, m_Br(m_And(m_Value(Condition), m_Value(WidenableCondition)),
- IfTrueBB, IfFalseBB)))
+
+ Use *C, *WC;
+ if (parseWidenableBranch(const_cast<User*>(U), C, WC, IfTrueBB, IfFalseBB)) {
+ if (C)
+ Condition = C->get();
+ else
+ Condition = ConstantInt::getTrue(IfTrueBB->getContext());
+ WidenableCondition = WC->get();
+ return true;
+ }
+ return false;
+}
+
+bool llvm::parseWidenableBranch(User *U, Use *&C,Use *&WC,
+ BasicBlock *&IfTrueBB, BasicBlock *&IfFalseBB) {
+
+ auto *BI = dyn_cast<BranchInst>(U);
+ if (!BI || !BI->isConditional())
+ return false;
+ auto *Cond = BI->getCondition();
+ if (!Cond->hasOneUse())
+ return false;
+
+ IfTrueBB = BI->getSuccessor(0);
+ IfFalseBB = BI->getSuccessor(1);
+
+ if (match(Cond, m_Intrinsic<Intrinsic::experimental_widenable_condition>())) {
+ WC = &BI->getOperandUse(0);
+ C = nullptr;
+ return true;
+ }
+
+ // Check for two cases:
+ // 1) br (i1 (and A, WC())), label %IfTrue, label %IfFalse
+ // 2) br (i1 (and WC(), B)), label %IfTrue, label %IfFalse
+ // We do not check for more generalized and trees as we should canonicalize
+ // to the form above in instcombine. (TODO)
+ Value *A, *B;
+ if (!match(Cond, m_And(m_Value(A), m_Value(B))))
return false;
- // TODO: At the moment, we only recognize the branch if the WC call in this
- // specific position. We should generalize!
- return match(WidenableCondition,
- m_Intrinsic<Intrinsic::experimental_widenable_condition>());
+ auto *And = dyn_cast<Instruction>(Cond);
+ if (!And)
+ // Could be a constexpr
+ return false;
+
+ if (match(A, m_Intrinsic<Intrinsic::experimental_widenable_condition>()) &&
+ A->hasOneUse()) {
+ WC = &And->getOperandUse(0);
+ C = &And->getOperandUse(1);
+ return true;
+ }
+
+ if (match(B, m_Intrinsic<Intrinsic::experimental_widenable_condition>()) &&
+ B->hasOneUse()) {
+ WC = &And->getOperandUse(1);
+ C = &And->getOperandUse(0);
+ return true;
+ }
+ return false;
}
diff --git a/contrib/llvm-project/llvm/lib/Analysis/IVDescriptors.cpp b/contrib/llvm-project/llvm/lib/Analysis/IVDescriptors.cpp
index 6fb600114bc6..ac81cba836f8 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/IVDescriptors.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/IVDescriptors.cpp
@@ -22,7 +22,6 @@
#include "llvm/Analysis/MustExecute.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
-#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
@@ -699,25 +698,48 @@ bool RecurrenceDescriptor::isFirstOrderRecurrence(
// Ensure every user of the phi node is dominated by the previous value.
// The dominance requirement ensures the loop vectorizer will not need to
// vectorize the initial value prior to the first iteration of the loop.
- // TODO: Consider extending this sinking to handle other kinds of instructions
- // and expressions, beyond sinking a single cast past Previous.
+ // TODO: Consider extending this sinking to handle memory instructions and
+ // phis with multiple users.
+
+ // Returns true, if all users of I are dominated by DominatedBy.
+ auto allUsesDominatedBy = [DT](Instruction *I, Instruction *DominatedBy) {
+ return all_of(I->uses(), [DT, DominatedBy](Use &U) {
+ return DT->dominates(DominatedBy, U);
+ });
+ };
+
if (Phi->hasOneUse()) {
- auto *I = Phi->user_back();
- if (I->isCast() && (I->getParent() == Phi->getParent()) && I->hasOneUse() &&
- DT->dominates(Previous, I->user_back())) {
- if (!DT->dominates(Previous, I)) // Otherwise we're good w/o sinking.
- SinkAfter[I] = Previous;
+ Instruction *I = Phi->user_back();
+
+ // If the user of the PHI is also the incoming value, we potentially have a
+ // reduction and which cannot be handled by sinking.
+ if (Previous == I)
+ return false;
+
+ // We cannot sink terminator instructions.
+ if (I->getParent()->getTerminator() == I)
+ return false;
+
+ // Do not try to sink an instruction multiple times (if multiple operands
+ // are first order recurrences).
+ // TODO: We can support this case, by sinking the instruction after the
+ // 'deepest' previous instruction.
+ if (SinkAfter.find(I) != SinkAfter.end())
+ return false;
+
+ if (DT->dominates(Previous, I)) // We already are good w/o sinking.
return true;
- }
- }
- for (User *U : Phi->users())
- if (auto *I = dyn_cast<Instruction>(U)) {
- if (!DT->dominates(Previous, I))
- return false;
+ // We can sink any instruction without side effects, as long as all users
+ // are dominated by the instruction we are sinking after.
+ if (I->getParent() == Phi->getParent() && !I->mayHaveSideEffects() &&
+ allUsesDominatedBy(I, Previous)) {
+ SinkAfter[I] = Previous;
+ return true;
}
+ }
- return true;
+ return allUsesDominatedBy(Phi, Previous);
}
/// This function returns the identity element (or neutral element) for
diff --git a/contrib/llvm-project/llvm/lib/Analysis/IVUsers.cpp b/contrib/llvm-project/llvm/lib/Analysis/IVUsers.cpp
index 681a0cf7e981..9432696b5a26 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/IVUsers.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/IVUsers.cpp
@@ -27,6 +27,7 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
diff --git a/contrib/llvm-project/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp
index 68153de8219f..dc4cbc371ef4 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp
@@ -21,6 +21,7 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/ProfileData/InstrProf.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include <string>
#include <utility>
diff --git a/contrib/llvm-project/llvm/lib/Analysis/InlineCost.cpp b/contrib/llvm-project/llvm/lib/Analysis/InlineCost.cpp
index 89811ec0e377..de83a48aad16 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/InlineCost.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/InlineCost.cpp
@@ -18,9 +18,9 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/ConstantFolding.h"
-#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
@@ -36,6 +36,7 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/PatternMatch.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -50,7 +51,7 @@ static cl::opt<int> InlineThreshold(
cl::desc("Control the amount of inlining to perform (default = 225)"));
static cl::opt<int> HintThreshold(
- "inlinehint-threshold", cl::Hidden, cl::init(325), cl::ZeroOrMore,
+ "inlinehint-threshold", cl::Hidden, cl::init(325), cl::ZeroOrMore,
cl::desc("Threshold for inlining functions with inline hint"));
static cl::opt<int>
@@ -62,7 +63,7 @@ static cl::opt<int>
// PGO before we actually hook up inliner with analysis passes such as BPI and
// BFI.
static cl::opt<int> ColdThreshold(
- "inlinecold-threshold", cl::Hidden, cl::init(45), cl::ZeroOrMore,
+ "inlinecold-threshold", cl::Hidden, cl::init(45), cl::ZeroOrMore,
cl::desc("Threshold for inlining functions with cold attribute"));
static cl::opt<int>
@@ -92,11 +93,13 @@ static cl::opt<bool> OptComputeFullInlineCost(
"exceeds the threshold."));
namespace {
-
+class InlineCostCallAnalyzer;
class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
typedef InstVisitor<CallAnalyzer, bool> Base;
friend class InstVisitor<CallAnalyzer, bool>;
+protected:
+ virtual ~CallAnalyzer() {}
/// The TargetTransformInfo available for this compilation.
const TargetTransformInfo &TTI;
@@ -123,20 +126,86 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
/// easily cacheable. Instead, use the cover function paramHasAttr.
CallBase &CandidateCall;
- /// Tunable parameters that control the analysis.
- const InlineParams &Params;
+ /// Extension points for handling callsite features.
+ /// Called after a basic block was analyzed.
+ virtual void onBlockAnalyzed(const BasicBlock *BB) {}
- /// Upper bound for the inlining cost. Bonuses are being applied to account
- /// for speculative "expected profit" of the inlining decision.
- int Threshold;
+ /// Called at the end of the analysis of the callsite. Return the outcome of
+ /// the analysis, i.e. 'InlineResult(true)' if the inlining may happen, or
+ /// the reason it can't.
+ virtual InlineResult finalizeAnalysis() { return true; }
- /// Inlining cost measured in abstract units, accounts for all the
- /// instructions expected to be executed for a given function invocation.
- /// Instructions that are statically proven to be dead based on call-site
- /// arguments are not counted here.
- int Cost = 0;
+ /// Called when we're about to start processing a basic block, and every time
+ /// we are done processing an instruction. Return true if there is no point in
+ /// continuing the analysis (e.g. we've determined already the call site is
+ /// too expensive to inline)
+ virtual bool shouldStop() { return false; }
+
+ /// Called before the analysis of the callee body starts (with callsite
+ /// contexts propagated). It checks callsite-specific information. Return a
+ /// reason analysis can't continue if that's the case, or 'true' if it may
+ /// continue.
+ virtual InlineResult onAnalysisStart() { return true; }
+
+ /// Called if the analysis engine decides SROA cannot be done for the given
+ /// alloca.
+ virtual void onDisableSROA(AllocaInst *Arg) {}
+
+ /// Called the analysis engine determines load elimination won't happen.
+ virtual void onDisableLoadElimination() {}
+
+ /// Called to account for a call.
+ virtual void onCallPenalty() {}
+
+ /// Called to account for the expectation the inlining would result in a load
+ /// elimination.
+ virtual void onLoadEliminationOpportunity() {}
- bool ComputeFullInlineCost;
+ /// Called to account for the cost of argument setup for the Call in the
+ /// callee's body (not the callsite currently under analysis).
+ virtual void onCallArgumentSetup(const CallBase &Call) {}
+
+ /// Called to account for a load relative intrinsic.
+ virtual void onLoadRelativeIntrinsic() {}
+
+ /// Called to account for a lowered call.
+ virtual void onLoweredCall(Function *F, CallBase &Call, bool IsIndirectCall) {
+ }
+
+ /// Account for a jump table of given size. Return false to stop further
+ /// processing the switch instruction
+ virtual bool onJumpTable(unsigned JumpTableSize) { return true; }
+
+ /// Account for a case cluster of given size. Return false to stop further
+ /// processing of the instruction.
+ virtual bool onCaseCluster(unsigned NumCaseCluster) { return true; }
+
+ /// Called at the end of processing a switch instruction, with the given
+ /// number of case clusters.
+ virtual void onFinalizeSwitch(unsigned JumpTableSize,
+ unsigned NumCaseCluster) {}
+
+ /// Called to account for any other instruction not specifically accounted
+ /// for.
+ virtual void onCommonInstructionSimplification() {}
+
+ /// Start accounting potential benefits due to SROA for the given alloca.
+ virtual void onInitializeSROAArg(AllocaInst *Arg) {}
+
+ /// Account SROA savings for the AllocaInst value.
+ virtual void onAggregateSROAUse(AllocaInst *V) {}
+
+ bool handleSROA(Value *V, bool DoNotDisable) {
+ // Check for SROA candidates in comparisons.
+ if (auto *SROAArg = getSROAArgForValueOrNull(V)) {
+ if (DoNotDisable) {
+ onAggregateSROAUse(SROAArg);
+ return true;
+ }
+ disableSROAForArg(SROAArg);
+ }
+ return false;
+ }
bool IsCallerRecursive = false;
bool IsRecursiveCall = false;
@@ -153,12 +222,6 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
unsigned NumInstructions = 0;
unsigned NumVectorInstructions = 0;
- /// Bonus to be applied when percentage of vector instructions in callee is
- /// high (see more details in updateThreshold).
- int VectorBonus = 0;
- /// Bonus to be applied when the callee has only one reachable basic block.
- int SingleBBBonus = 0;
-
/// While we walk the potentially-inlined instructions, we build up and
/// maintain a mapping of simplified values specific to this callsite. The
/// idea is to propagate any special information we have about arguments to
@@ -170,12 +233,12 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
/// Keep track of the values which map back (through function arguments) to
/// allocas on the caller stack which could be simplified through SROA.
- DenseMap<Value *, Value *> SROAArgValues;
+ DenseMap<Value *, AllocaInst *> SROAArgValues;
- /// The mapping of caller Alloca values to their accumulated cost savings. If
- /// we have to disable SROA for one of the allocas, this tells us how much
- /// cost must be added.
- DenseMap<Value *, int> SROAArgCosts;
+ /// Keep track of Allocas for which we believe we may get SROA optimization.
+ /// We don't delete entries in SROAArgValue because we still want
+ /// isAllocaDerivedArg to function correctly.
+ DenseSet<AllocaInst *> EnabledSROAArgValues;
/// Keep track of values which map to a pointer base and constant offset.
DenseMap<Value *, std::pair<Value *, APInt>> ConstantOffsetPtrs;
@@ -192,17 +255,20 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
/// loads.
bool EnableLoadElimination;
SmallPtrSet<Value *, 16> LoadAddrSet;
- int LoadEliminationCost = 0;
+
+ AllocaInst *getSROAArgForValueOrNull(Value *V) const {
+ auto It = SROAArgValues.find(V);
+ if (It == SROAArgValues.end() ||
+ EnabledSROAArgValues.count(It->second) == 0)
+ return nullptr;
+ return It->second;
+ }
// Custom simplification helper routines.
bool isAllocaDerivedArg(Value *V);
- bool lookupSROAArgAndCost(Value *V, Value *&Arg,
- DenseMap<Value *, int>::iterator &CostIt);
- void disableSROA(DenseMap<Value *, int>::iterator CostIt);
+ void disableSROAForArg(AllocaInst *SROAArg);
void disableSROA(Value *V);
void findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB);
- void accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,
- int InstructionCost);
void disableLoadElimination();
bool isGEPFree(GetElementPtrInst &GEP);
bool canFoldInboundsGEP(GetElementPtrInst &I);
@@ -223,32 +289,13 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
/// inlined through this particular callsite.
bool isKnownNonNullInCallee(Value *V);
- /// Update Threshold based on callsite properties such as callee
- /// attributes and callee hotness for PGO builds. The Callee is explicitly
- /// passed to support analyzing indirect calls whose target is inferred by
- /// analysis.
- void updateThreshold(CallBase &Call, Function &Callee);
-
/// Return true if size growth is allowed when inlining the callee at \p Call.
bool allowSizeGrowth(CallBase &Call);
- /// Return true if \p Call is a cold callsite.
- bool isColdCallSite(CallBase &Call, BlockFrequencyInfo *CallerBFI);
-
- /// Return a higher threshold if \p Call is a hot callsite.
- Optional<int> getHotCallSiteThreshold(CallBase &Call,
- BlockFrequencyInfo *CallerBFI);
-
// Custom analysis routines.
InlineResult analyzeBlock(BasicBlock *BB,
SmallPtrSetImpl<const Value *> &EphValues);
- /// Handle a capped 'int' increment for Cost.
- void addCost(int64_t Inc, int64_t UpperBound = INT_MAX) {
- assert(UpperBound > 0 && UpperBound <= INT_MAX && "invalid upper bound");
- Cost = (int)std::min(UpperBound, Cost + Inc);
- }
-
// Disable several entry points to the visitor so we don't accidentally use
// them by declaring but not defining them here.
void visit(Module *);
@@ -294,18 +341,12 @@ public:
std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
Optional<function_ref<BlockFrequencyInfo &(Function &)>> &GetBFI,
ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE,
- Function &Callee, CallBase &Call, const InlineParams &Params)
+ Function &Callee, CallBase &Call)
: TTI(TTI), GetAssumptionCache(GetAssumptionCache), GetBFI(GetBFI),
PSI(PSI), F(Callee), DL(F.getParent()->getDataLayout()), ORE(ORE),
- CandidateCall(Call), Params(Params), Threshold(Params.DefaultThreshold),
- ComputeFullInlineCost(OptComputeFullInlineCost ||
- Params.ComputeFullInlineCost || ORE),
- EnableLoadElimination(true) {}
+ CandidateCall(Call), EnableLoadElimination(true) {}
- InlineResult analyzeCall(CallBase &Call);
-
- int getThreshold() { return Threshold; }
- int getCost() { return Cost; }
+ InlineResult analyze();
// Keep a bunch of stats about the cost savings found so we can print them
// out when debugging.
@@ -315,12 +356,291 @@ public:
unsigned NumConstantPtrCmps = 0;
unsigned NumConstantPtrDiffs = 0;
unsigned NumInstructionsSimplified = 0;
+
+ void dump();
+};
+
+/// FIXME: if it is necessary to derive from InlineCostCallAnalyzer, note
+/// the FIXME in onLoweredCall, when instantiating an InlineCostCallAnalyzer
+class InlineCostCallAnalyzer final : public CallAnalyzer {
+ const int CostUpperBound = INT_MAX - InlineConstants::InstrCost - 1;
+ const bool ComputeFullInlineCost;
+ int LoadEliminationCost = 0;
+ /// Bonus to be applied when percentage of vector instructions in callee is
+ /// high (see more details in updateThreshold).
+ int VectorBonus = 0;
+ /// Bonus to be applied when the callee has only one reachable basic block.
+ int SingleBBBonus = 0;
+
+ /// Tunable parameters that control the analysis.
+ const InlineParams &Params;
+
+ /// Upper bound for the inlining cost. Bonuses are being applied to account
+ /// for speculative "expected profit" of the inlining decision.
+ int Threshold = 0;
+
+ /// Attempt to evaluate indirect calls to boost its inline cost.
+ const bool BoostIndirectCalls;
+
+ /// Inlining cost measured in abstract units, accounts for all the
+ /// instructions expected to be executed for a given function invocation.
+ /// Instructions that are statically proven to be dead based on call-site
+ /// arguments are not counted here.
+ int Cost = 0;
+
+ bool SingleBB = true;
+
unsigned SROACostSavings = 0;
unsigned SROACostSavingsLost = 0;
+ /// The mapping of caller Alloca values to their accumulated cost savings. If
+ /// we have to disable SROA for one of the allocas, this tells us how much
+ /// cost must be added.
+ DenseMap<AllocaInst *, int> SROAArgCosts;
+
+ /// Return true if \p Call is a cold callsite.
+ bool isColdCallSite(CallBase &Call, BlockFrequencyInfo *CallerBFI);
+
+ /// Update Threshold based on callsite properties such as callee
+ /// attributes and callee hotness for PGO builds. The Callee is explicitly
+ /// passed to support analyzing indirect calls whose target is inferred by
+ /// analysis.
+ void updateThreshold(CallBase &Call, Function &Callee);
+ /// Return a higher threshold if \p Call is a hot callsite.
+ Optional<int> getHotCallSiteThreshold(CallBase &Call,
+ BlockFrequencyInfo *CallerBFI);
+
+ /// Handle a capped 'int' increment for Cost.
+ void addCost(int64_t Inc, int64_t UpperBound = INT_MAX) {
+ assert(UpperBound > 0 && UpperBound <= INT_MAX && "invalid upper bound");
+ Cost = (int)std::min(UpperBound, Cost + Inc);
+ }
+
+ void onDisableSROA(AllocaInst *Arg) override {
+ auto CostIt = SROAArgCosts.find(Arg);
+ if (CostIt == SROAArgCosts.end())
+ return;
+ addCost(CostIt->second);
+ SROACostSavings -= CostIt->second;
+ SROACostSavingsLost += CostIt->second;
+ SROAArgCosts.erase(CostIt);
+ }
+
+ void onDisableLoadElimination() override {
+ addCost(LoadEliminationCost);
+ LoadEliminationCost = 0;
+ }
+ void onCallPenalty() override { addCost(InlineConstants::CallPenalty); }
+ void onCallArgumentSetup(const CallBase &Call) override {
+ // Pay the price of the argument setup. We account for the average 1
+ // instruction per call argument setup here.
+ addCost(Call.arg_size() * InlineConstants::InstrCost);
+ }
+ void onLoadRelativeIntrinsic() override {
+ // This is normally lowered to 4 LLVM instructions.
+ addCost(3 * InlineConstants::InstrCost);
+ }
+ void onLoweredCall(Function *F, CallBase &Call,
+ bool IsIndirectCall) override {
+ // We account for the average 1 instruction per call argument setup here.
+ addCost(Call.arg_size() * InlineConstants::InstrCost);
+
+ // If we have a constant that we are calling as a function, we can peer
+ // through it and see the function target. This happens not infrequently
+ // during devirtualization and so we want to give it a hefty bonus for
+ // inlining, but cap that bonus in the event that inlining wouldn't pan out.
+ // Pretend to inline the function, with a custom threshold.
+ if (IsIndirectCall && BoostIndirectCalls) {
+ auto IndirectCallParams = Params;
+ IndirectCallParams.DefaultThreshold =
+ InlineConstants::IndirectCallThreshold;
+ /// FIXME: if InlineCostCallAnalyzer is derived from, this may need
+ /// to instantiate the derived class.
+ InlineCostCallAnalyzer CA(TTI, GetAssumptionCache, GetBFI, PSI, ORE, *F,
+ Call, IndirectCallParams, false);
+ if (CA.analyze()) {
+ // We were able to inline the indirect call! Subtract the cost from the
+ // threshold to get the bonus we want to apply, but don't go below zero.
+ Cost -= std::max(0, CA.getThreshold() - CA.getCost());
+ }
+ } else
+ // Otherwise simply add the cost for merely making the call.
+ addCost(InlineConstants::CallPenalty);
+ }
+
+ void onFinalizeSwitch(unsigned JumpTableSize,
+ unsigned NumCaseCluster) override {
+ // If suitable for a jump table, consider the cost for the table size and
+ // branch to destination.
+ // Maximum valid cost increased in this function.
+ if (JumpTableSize) {
+ int64_t JTCost = (int64_t)JumpTableSize * InlineConstants::InstrCost +
+ 4 * InlineConstants::InstrCost;
+
+ addCost(JTCost, (int64_t)CostUpperBound);
+ return;
+ }
+ // Considering forming a binary search, we should find the number of nodes
+ // which is same as the number of comparisons when lowered. For a given
+ // number of clusters, n, we can define a recursive function, f(n), to find
+ // the number of nodes in the tree. The recursion is :
+ // f(n) = 1 + f(n/2) + f (n - n/2), when n > 3,
+ // and f(n) = n, when n <= 3.
+ // This will lead a binary tree where the leaf should be either f(2) or f(3)
+ // when n > 3. So, the number of comparisons from leaves should be n, while
+ // the number of non-leaf should be :
+ // 2^(log2(n) - 1) - 1
+ // = 2^log2(n) * 2^-1 - 1
+ // = n / 2 - 1.
+ // Considering comparisons from leaf and non-leaf nodes, we can estimate the
+ // number of comparisons in a simple closed form :
+ // n + n / 2 - 1 = n * 3 / 2 - 1
+ if (NumCaseCluster <= 3) {
+ // Suppose a comparison includes one compare and one conditional branch.
+ addCost(NumCaseCluster * 2 * InlineConstants::InstrCost);
+ return;
+ }
+
+ int64_t ExpectedNumberOfCompare = 3 * (int64_t)NumCaseCluster / 2 - 1;
+ int64_t SwitchCost =
+ ExpectedNumberOfCompare * 2 * InlineConstants::InstrCost;
+
+ addCost(SwitchCost, (int64_t)CostUpperBound);
+ }
+ void onCommonInstructionSimplification() override {
+ addCost(InlineConstants::InstrCost);
+ }
+
+ void onInitializeSROAArg(AllocaInst *Arg) override {
+ assert(Arg != nullptr &&
+ "Should not initialize SROA costs for null value.");
+ SROAArgCosts[Arg] = 0;
+ EnabledSROAArgValues.insert(Arg);
+ }
+
+ void onAggregateSROAUse(AllocaInst *SROAArg) override {
+ auto CostIt = SROAArgCosts.find(SROAArg);
+ assert(CostIt != SROAArgCosts.end() &&
+ "expected this argument to have a cost");
+ CostIt->second += InlineConstants::InstrCost;
+ SROACostSavings += InlineConstants::InstrCost;
+ }
+
+ void onBlockAnalyzed(const BasicBlock *BB) override {
+ auto *TI = BB->getTerminator();
+ // If we had any successors at this point, than post-inlining is likely to
+ // have them as well. Note that we assume any basic blocks which existed
+ // due to branches or switches which folded above will also fold after
+ // inlining.
+ if (SingleBB && TI->getNumSuccessors() > 1) {
+ // Take off the bonus we applied to the threshold.
+ Threshold -= SingleBBBonus;
+ SingleBB = false;
+ }
+ }
+ InlineResult finalizeAnalysis() override {
+ // Loops generally act a lot like calls in that they act like barriers to
+ // movement, require a certain amount of setup, etc. So when optimising for
+ // size, we penalise any call sites that perform loops. We do this after all
+ // other costs here, so will likely only be dealing with relatively small
+ // functions (and hence DT and LI will hopefully be cheap).
+ auto *Caller = CandidateCall.getFunction();
+ if (Caller->hasMinSize()) {
+ DominatorTree DT(F);
+ LoopInfo LI(DT);
+ int NumLoops = 0;
+ for (Loop *L : LI) {
+ // Ignore loops that will not be executed
+ if (DeadBlocks.count(L->getHeader()))
+ continue;
+ NumLoops++;
+ }
+ addCost(NumLoops * InlineConstants::CallPenalty);
+ }
+
+ // We applied the maximum possible vector bonus at the beginning. Now,
+ // subtract the excess bonus, if any, from the Threshold before
+ // comparing against Cost.
+ if (NumVectorInstructions <= NumInstructions / 10)
+ Threshold -= VectorBonus;
+ else if (NumVectorInstructions <= NumInstructions / 2)
+ Threshold -= VectorBonus / 2;
+
+ return Cost < std::max(1, Threshold);
+ }
+ bool shouldStop() override {
+ // Bail out the moment we cross the threshold. This means we'll under-count
+ // the cost, but only when undercounting doesn't matter.
+ return Cost >= Threshold && !ComputeFullInlineCost;
+ }
+
+ void onLoadEliminationOpportunity() override {
+ LoadEliminationCost += InlineConstants::InstrCost;
+ }
+
+ InlineResult onAnalysisStart() override {
+ // Perform some tweaks to the cost and threshold based on the direct
+ // callsite information.
+
+ // We want to more aggressively inline vector-dense kernels, so up the
+ // threshold, and we'll lower it if the % of vector instructions gets too
+ // low. Note that these bonuses are some what arbitrary and evolved over
+ // time by accident as much as because they are principled bonuses.
+ //
+ // FIXME: It would be nice to remove all such bonuses. At least it would be
+ // nice to base the bonus values on something more scientific.
+ assert(NumInstructions == 0);
+ assert(NumVectorInstructions == 0);
+
+ // Update the threshold based on callsite properties
+ updateThreshold(CandidateCall, F);
+
+ // While Threshold depends on commandline options that can take negative
+ // values, we want to enforce the invariant that the computed threshold and
+ // bonuses are non-negative.
+ assert(Threshold >= 0);
+ assert(SingleBBBonus >= 0);
+ assert(VectorBonus >= 0);
+
+ // Speculatively apply all possible bonuses to Threshold. If cost exceeds
+ // this Threshold any time, and cost cannot decrease, we can stop processing
+ // the rest of the function body.
+ Threshold += (SingleBBBonus + VectorBonus);
+
+ // Give out bonuses for the callsite, as the instructions setting them up
+ // will be gone after inlining.
+ addCost(-getCallsiteCost(this->CandidateCall, DL));
+
+ // If this function uses the coldcc calling convention, prefer not to inline
+ // it.
+ if (F.getCallingConv() == CallingConv::Cold)
+ Cost += InlineConstants::ColdccPenalty;
+
+ // Check if we're done. This can happen due to bonuses and penalties.
+ if (Cost >= Threshold && !ComputeFullInlineCost)
+ return "high cost";
+
+ return true;
+ }
+
+public:
+ InlineCostCallAnalyzer(
+ const TargetTransformInfo &TTI,
+ std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
+ Optional<function_ref<BlockFrequencyInfo &(Function &)>> &GetBFI,
+ ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE, Function &Callee,
+ CallBase &Call, const InlineParams &Params, bool BoostIndirect = true)
+ : CallAnalyzer(TTI, GetAssumptionCache, GetBFI, PSI, ORE, Callee, Call),
+ ComputeFullInlineCost(OptComputeFullInlineCost ||
+ Params.ComputeFullInlineCost || ORE),
+ Params(Params), Threshold(Params.DefaultThreshold),
+ BoostIndirectCalls(BoostIndirect) {}
void dump();
-};
+ virtual ~InlineCostCallAnalyzer() {}
+ int getThreshold() { return Threshold; }
+ int getCost() { return Cost; }
+};
} // namespace
/// Test whether the given value is an Alloca-derived function argument.
@@ -328,55 +648,21 @@ bool CallAnalyzer::isAllocaDerivedArg(Value *V) {
return SROAArgValues.count(V);
}
-/// Lookup the SROA-candidate argument and cost iterator which V maps to.
-/// Returns false if V does not map to a SROA-candidate.
-bool CallAnalyzer::lookupSROAArgAndCost(
- Value *V, Value *&Arg, DenseMap<Value *, int>::iterator &CostIt) {
- if (SROAArgValues.empty() || SROAArgCosts.empty())
- return false;
-
- DenseMap<Value *, Value *>::iterator ArgIt = SROAArgValues.find(V);
- if (ArgIt == SROAArgValues.end())
- return false;
-
- Arg = ArgIt->second;
- CostIt = SROAArgCosts.find(Arg);
- return CostIt != SROAArgCosts.end();
-}
-
-/// Disable SROA for the candidate marked by this cost iterator.
-///
-/// This marks the candidate as no longer viable for SROA, and adds the cost
-/// savings associated with it back into the inline cost measurement.
-void CallAnalyzer::disableSROA(DenseMap<Value *, int>::iterator CostIt) {
- // If we're no longer able to perform SROA we need to undo its cost savings
- // and prevent subsequent analysis.
- addCost(CostIt->second);
- SROACostSavings -= CostIt->second;
- SROACostSavingsLost += CostIt->second;
- SROAArgCosts.erase(CostIt);
+void CallAnalyzer::disableSROAForArg(AllocaInst *SROAArg) {
+ onDisableSROA(SROAArg);
+ EnabledSROAArgValues.erase(SROAArg);
disableLoadElimination();
}
-
/// If 'V' maps to a SROA candidate, disable SROA for it.
void CallAnalyzer::disableSROA(Value *V) {
- Value *SROAArg;
- DenseMap<Value *, int>::iterator CostIt;
- if (lookupSROAArgAndCost(V, SROAArg, CostIt))
- disableSROA(CostIt);
-}
-
-/// Accumulate the given cost for a particular SROA candidate.
-void CallAnalyzer::accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,
- int InstructionCost) {
- CostIt->second += InstructionCost;
- SROACostSavings += InstructionCost;
+ if (auto *SROAArg = getSROAArgForValueOrNull(V)) {
+ disableSROAForArg(SROAArg);
+ }
}
void CallAnalyzer::disableLoadElimination() {
if (EnableLoadElimination) {
- addCost(LoadEliminationCost);
- LoadEliminationCost = 0;
+ onDisableLoadElimination();
EnableLoadElimination = false;
}
}
@@ -422,9 +708,9 @@ bool CallAnalyzer::isGEPFree(GetElementPtrInst &GEP) {
Operands.push_back(GEP.getOperand(0));
for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I)
if (Constant *SimpleOp = SimplifiedValues.lookup(*I))
- Operands.push_back(SimpleOp);
- else
- Operands.push_back(*I);
+ Operands.push_back(SimpleOp);
+ else
+ Operands.push_back(*I);
return TargetTransformInfo::TCC_Free == TTI.getUserCost(&GEP, Operands);
}
@@ -445,8 +731,8 @@ bool CallAnalyzer::visitAlloca(AllocaInst &I) {
// Accumulate the allocated size.
if (I.isStaticAlloca()) {
Type *Ty = I.getAllocatedType();
- AllocatedSize = SaturatingAdd(DL.getTypeAllocSize(Ty).getFixedSize(),
- AllocatedSize);
+ AllocatedSize =
+ SaturatingAdd(DL.getTypeAllocSize(Ty).getFixedSize(), AllocatedSize);
}
// We will happily inline static alloca instructions.
@@ -548,9 +834,7 @@ bool CallAnalyzer::visitPHI(PHINode &I) {
if (FirstBaseAndOffset.first) {
ConstantOffsetPtrs[&I] = FirstBaseAndOffset;
- Value *SROAArg;
- DenseMap<Value *, int>::iterator CostIt;
- if (lookupSROAArgAndCost(FirstV, SROAArg, CostIt))
+ if (auto *SROAArg = getSROAArgForValueOrNull(FirstV))
SROAArgValues[&I] = SROAArg;
}
@@ -580,10 +864,7 @@ bool CallAnalyzer::canFoldInboundsGEP(GetElementPtrInst &I) {
}
bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) {
- Value *SROAArg;
- DenseMap<Value *, int>::iterator CostIt;
- bool SROACandidate =
- lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt);
+ auto *SROAArg = getSROAArgForValueOrNull(I.getPointerOperand());
// Lambda to check whether a GEP's indices are all constant.
auto IsGEPOffsetConstant = [&](GetElementPtrInst &GEP) {
@@ -594,7 +875,7 @@ bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) {
};
if ((I.isInBounds() && canFoldInboundsGEP(I)) || IsGEPOffsetConstant(I)) {
- if (SROACandidate)
+ if (SROAArg)
SROAArgValues[&I] = SROAArg;
// Constant GEPs are modeled as free.
@@ -602,8 +883,8 @@ bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) {
}
// Variable GEPs will require math and will disable SROA.
- if (SROACandidate)
- disableSROA(CostIt);
+ if (SROAArg)
+ disableSROAForArg(SROAArg);
return isGEPFree(I);
}
@@ -643,9 +924,7 @@ bool CallAnalyzer::visitBitCast(BitCastInst &I) {
ConstantOffsetPtrs[&I] = BaseAndOffset;
// Also look for SROA candidates here.
- Value *SROAArg;
- DenseMap<Value *, int>::iterator CostIt;
- if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt))
+ if (auto *SROAArg = getSROAArgForValueOrNull(I.getOperand(0)))
SROAArgValues[&I] = SROAArg;
// Bitcasts are always zero cost.
@@ -677,9 +956,7 @@ bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) {
// and so we can just add the integer in here. The only places where SROA is
// preserved either cannot fire on an integer, or won't in-and-of themselves
// disable SROA (ext) w/o some later use that we would see and disable.
- Value *SROAArg;
- DenseMap<Value *, int>::iterator CostIt;
- if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt))
+ if (auto *SROAArg = getSROAArgForValueOrNull(I.getOperand(0)))
SROAArgValues[&I] = SROAArg;
return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I);
@@ -703,9 +980,7 @@ bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) {
}
// "Propagate" SROA here in the same manner as we do for ptrtoint above.
- Value *SROAArg;
- DenseMap<Value *, int>::iterator CostIt;
- if (lookupSROAArgAndCost(Op, SROAArg, CostIt))
+ if (auto *SROAArg = getSROAArgForValueOrNull(Op))
SROAArgValues[&I] = SROAArg;
return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I);
@@ -732,7 +1007,7 @@ bool CallAnalyzer::visitCastInst(CastInst &I) {
case Instruction::FPToUI:
case Instruction::FPToSI:
if (TTI.getFPOpCost(I.getType()) == TargetTransformInfo::TCC_Expensive)
- addCost(InlineConstants::CallPenalty);
+ onCallPenalty();
break;
default:
break;
@@ -805,8 +1080,8 @@ bool CallAnalyzer::allowSizeGrowth(CallBase &Call) {
return true;
}
-bool CallAnalyzer::isColdCallSite(CallBase &Call,
- BlockFrequencyInfo *CallerBFI) {
+bool InlineCostCallAnalyzer::isColdCallSite(CallBase &Call,
+ BlockFrequencyInfo *CallerBFI) {
// If global profile summary is available, then callsite's coldness is
// determined based on that.
if (PSI && PSI->hasProfileSummary())
@@ -829,8 +1104,8 @@ bool CallAnalyzer::isColdCallSite(CallBase &Call,
}
Optional<int>
-CallAnalyzer::getHotCallSiteThreshold(CallBase &Call,
- BlockFrequencyInfo *CallerBFI) {
+InlineCostCallAnalyzer::getHotCallSiteThreshold(CallBase &Call,
+ BlockFrequencyInfo *CallerBFI) {
// If global profile summary is available, then callsite's hotness is
// determined based on that.
@@ -857,7 +1132,7 @@ CallAnalyzer::getHotCallSiteThreshold(CallBase &Call,
return None;
}
-void CallAnalyzer::updateThreshold(CallBase &Call, Function &Callee) {
+void InlineCostCallAnalyzer::updateThreshold(CallBase &Call, Function &Callee) {
// If no size growth is allowed for this inlining, set Threshold to 0.
if (!allowSizeGrowth(Call)) {
Threshold = 0;
@@ -1019,19 +1294,7 @@ bool CallAnalyzer::visitCmpInst(CmpInst &I) {
: ConstantInt::getFalse(I.getType());
return true;
}
- // Finally check for SROA candidates in comparisons.
- Value *SROAArg;
- DenseMap<Value *, int>::iterator CostIt;
- if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) {
- if (isa<ConstantPointerNull>(I.getOperand(1))) {
- accumulateSROACost(CostIt, InlineConstants::InstrCost);
- return true;
- }
-
- disableSROA(CostIt);
- }
-
- return false;
+ return handleSROA(I.getOperand(0), isa<ConstantPointerNull>(I.getOperand(1)));
}
bool CallAnalyzer::visitSub(BinaryOperator &I) {
@@ -1072,8 +1335,8 @@ bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) {
Value *SimpleV = nullptr;
if (auto FI = dyn_cast<FPMathOperator>(&I))
- SimpleV = SimplifyBinOp(I.getOpcode(), CLHS ? CLHS : LHS,
- CRHS ? CRHS : RHS, FI->getFastMathFlags(), DL);
+ SimpleV = SimplifyBinOp(I.getOpcode(), CLHS ? CLHS : LHS, CRHS ? CRHS : RHS,
+ FI->getFastMathFlags(), DL);
else
SimpleV =
SimplifyBinOp(I.getOpcode(), CLHS ? CLHS : LHS, CRHS ? CRHS : RHS, DL);
@@ -1095,7 +1358,7 @@ bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) {
if (I.getType()->isFloatingPointTy() &&
TTI.getFPOpCost(I.getType()) == TargetTransformInfo::TCC_Expensive &&
!match(&I, m_FNeg(m_Value())))
- addCost(InlineConstants::CallPenalty);
+ onCallPenalty();
return false;
}
@@ -1106,9 +1369,8 @@ bool CallAnalyzer::visitFNeg(UnaryOperator &I) {
if (!COp)
COp = SimplifiedValues.lookup(Op);
- Value *SimpleV = SimplifyFNegInst(COp ? COp : Op,
- cast<FPMathOperator>(I).getFastMathFlags(),
- DL);
+ Value *SimpleV = SimplifyFNegInst(
+ COp ? COp : Op, cast<FPMathOperator>(I).getFastMathFlags(), DL);
if (Constant *C = dyn_cast_or_null<Constant>(SimpleV))
SimplifiedValues[&I] = C;
@@ -1123,23 +1385,15 @@ bool CallAnalyzer::visitFNeg(UnaryOperator &I) {
}
bool CallAnalyzer::visitLoad(LoadInst &I) {
- Value *SROAArg;
- DenseMap<Value *, int>::iterator CostIt;
- if (lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt)) {
- if (I.isSimple()) {
- accumulateSROACost(CostIt, InlineConstants::InstrCost);
- return true;
- }
-
- disableSROA(CostIt);
- }
+ if (handleSROA(I.getPointerOperand(), I.isSimple()))
+ return true;
// If the data is already loaded from this address and hasn't been clobbered
// by any stores or calls, this load is likely to be redundant and can be
// eliminated.
if (EnableLoadElimination &&
!LoadAddrSet.insert(I.getPointerOperand()).second && I.isUnordered()) {
- LoadEliminationCost += InlineConstants::InstrCost;
+ onLoadEliminationOpportunity();
return true;
}
@@ -1147,16 +1401,8 @@ bool CallAnalyzer::visitLoad(LoadInst &I) {
}
bool CallAnalyzer::visitStore(StoreInst &I) {
- Value *SROAArg;
- DenseMap<Value *, int>::iterator CostIt;
- if (lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt)) {
- if (I.isSimple()) {
- accumulateSROACost(CostIt, InlineConstants::InstrCost);
- return true;
- }
-
- disableSROA(CostIt);
- }
+ if (handleSROA(I.getPointerOperand(), I.isSimple()))
+ return true;
// The store can potentially clobber loads and prevent repeated loads from
// being eliminated.
@@ -1238,97 +1484,69 @@ bool CallAnalyzer::visitCallBase(CallBase &Call) {
if (isa<CallInst>(Call) && cast<CallInst>(Call).cannotDuplicate())
ContainsNoDuplicateCall = true;
- if (Function *F = Call.getCalledFunction()) {
- // When we have a concrete function, first try to simplify it directly.
- if (simplifyCallSite(F, Call))
- return true;
-
- // Next check if it is an intrinsic we know about.
- // FIXME: Lift this into part of the InstVisitor.
- if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&Call)) {
- switch (II->getIntrinsicID()) {
- default:
- if (!Call.onlyReadsMemory() && !isAssumeLikeIntrinsic(II))
- disableLoadElimination();
- return Base::visitCallBase(Call);
-
- case Intrinsic::load_relative:
- // This is normally lowered to 4 LLVM instructions.
- addCost(3 * InlineConstants::InstrCost);
- return false;
-
- case Intrinsic::memset:
- case Intrinsic::memcpy:
- case Intrinsic::memmove:
+ Value *Callee = Call.getCalledOperand();
+ Function *F = dyn_cast_or_null<Function>(Callee);
+ bool IsIndirectCall = !F;
+ if (IsIndirectCall) {
+ // Check if this happens to be an indirect function call to a known function
+ // in this inline context. If not, we've done all we can.
+ F = dyn_cast_or_null<Function>(SimplifiedValues.lookup(Callee));
+ if (!F) {
+ onCallArgumentSetup(Call);
+
+ if (!Call.onlyReadsMemory())
disableLoadElimination();
- // SROA can usually chew through these intrinsics, but they aren't free.
- return false;
- case Intrinsic::icall_branch_funnel:
- case Intrinsic::localescape:
- HasUninlineableIntrinsic = true;
- return false;
- case Intrinsic::vastart:
- InitsVargArgs = true;
- return false;
- }
+ return Base::visitCallBase(Call);
}
+ }
- if (F == Call.getFunction()) {
- // This flag will fully abort the analysis, so don't bother with anything
- // else.
- IsRecursiveCall = true;
- return false;
- }
+ assert(F && "Expected a call to a known function");
- if (TTI.isLoweredToCall(F)) {
- // We account for the average 1 instruction per call argument setup
- // here.
- addCost(Call.arg_size() * InlineConstants::InstrCost);
+ // When we have a concrete function, first try to simplify it directly.
+ if (simplifyCallSite(F, Call))
+ return true;
- // Everything other than inline ASM will also have a significant cost
- // merely from making the call.
- if (!isa<InlineAsm>(Call.getCalledValue()))
- addCost(InlineConstants::CallPenalty);
- }
+ // Next check if it is an intrinsic we know about.
+ // FIXME: Lift this into part of the InstVisitor.
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&Call)) {
+ switch (II->getIntrinsicID()) {
+ default:
+ if (!Call.onlyReadsMemory() && !isAssumeLikeIntrinsic(II))
+ disableLoadElimination();
+ return Base::visitCallBase(Call);
+
+ case Intrinsic::load_relative:
+ onLoadRelativeIntrinsic();
+ return false;
- if (!Call.onlyReadsMemory())
+ case Intrinsic::memset:
+ case Intrinsic::memcpy:
+ case Intrinsic::memmove:
disableLoadElimination();
- return Base::visitCallBase(Call);
+ // SROA can usually chew through these intrinsics, but they aren't free.
+ return false;
+ case Intrinsic::icall_branch_funnel:
+ case Intrinsic::localescape:
+ HasUninlineableIntrinsic = true;
+ return false;
+ case Intrinsic::vastart:
+ InitsVargArgs = true;
+ return false;
+ }
}
- // Otherwise we're in a very special case -- an indirect function call. See
- // if we can be particularly clever about this.
- Value *Callee = Call.getCalledValue();
-
- // First, pay the price of the argument setup. We account for the average
- // 1 instruction per call argument setup here.
- addCost(Call.arg_size() * InlineConstants::InstrCost);
-
- // Next, check if this happens to be an indirect function call to a known
- // function in this inline context. If not, we've done all we can.
- Function *F = dyn_cast_or_null<Function>(SimplifiedValues.lookup(Callee));
- if (!F) {
- if (!Call.onlyReadsMemory())
- disableLoadElimination();
- return Base::visitCallBase(Call);
+ if (F == Call.getFunction()) {
+ // This flag will fully abort the analysis, so don't bother with anything
+ // else.
+ IsRecursiveCall = true;
+ return false;
}
- // If we have a constant that we are calling as a function, we can peer
- // through it and see the function target. This happens not infrequently
- // during devirtualization and so we want to give it a hefty bonus for
- // inlining, but cap that bonus in the event that inlining wouldn't pan
- // out. Pretend to inline the function, with a custom threshold.
- auto IndirectCallParams = Params;
- IndirectCallParams.DefaultThreshold = InlineConstants::IndirectCallThreshold;
- CallAnalyzer CA(TTI, GetAssumptionCache, GetBFI, PSI, ORE, *F, Call,
- IndirectCallParams);
- if (CA.analyzeCall(Call)) {
- // We were able to inline the indirect call! Subtract the cost from the
- // threshold to get the bonus we want to apply, but don't go below zero.
- Cost -= std::max(0, CA.getThreshold() - CA.getCost());
+ if (TTI.isLoweredToCall(F)) {
+ onLoweredCall(F, Call, IsIndirectCall);
}
- if (!F->onlyReadsMemory())
+ if (!(Call.onlyReadsMemory() || (IsIndirectCall && F->onlyReadsMemory())))
disableLoadElimination();
return Base::visitCallBase(Call);
}
@@ -1381,9 +1599,7 @@ bool CallAnalyzer::visitSelectInst(SelectInst &SI) {
if (TrueBaseAndOffset == FalseBaseAndOffset && TrueBaseAndOffset.first) {
ConstantOffsetPtrs[&SI] = TrueBaseAndOffset;
- Value *SROAArg;
- DenseMap<Value *, int>::iterator CostIt;
- if (lookupSROAArgAndCost(TrueVal, SROAArg, CostIt))
+ if (auto *SROAArg = getSROAArgForValueOrNull(TrueVal))
SROAArgValues[&SI] = SROAArg;
return true;
}
@@ -1422,9 +1638,7 @@ bool CallAnalyzer::visitSelectInst(SelectInst &SI) {
if (BaseAndOffset.first) {
ConstantOffsetPtrs[&SI] = BaseAndOffset;
- Value *SROAArg;
- DenseMap<Value *, int>::iterator CostIt;
- if (lookupSROAArgAndCost(SelectedV, SROAArg, CostIt))
+ if (auto *SROAArg = getSROAArgForValueOrNull(SelectedV))
SROAArgValues[&SI] = SROAArg;
}
@@ -1452,49 +1666,12 @@ bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) {
// inlining those. It will prevent inlining in cases where the optimization
// does not (yet) fire.
- // Maximum valid cost increased in this function.
- int CostUpperBound = INT_MAX - InlineConstants::InstrCost - 1;
-
unsigned JumpTableSize = 0;
+ BlockFrequencyInfo *BFI = GetBFI ? &((*GetBFI)(F)) : nullptr;
unsigned NumCaseCluster =
- TTI.getEstimatedNumberOfCaseClusters(SI, JumpTableSize);
+ TTI.getEstimatedNumberOfCaseClusters(SI, JumpTableSize, PSI, BFI);
- // If suitable for a jump table, consider the cost for the table size and
- // branch to destination.
- if (JumpTableSize) {
- int64_t JTCost = (int64_t)JumpTableSize * InlineConstants::InstrCost +
- 4 * InlineConstants::InstrCost;
-
- addCost(JTCost, (int64_t)CostUpperBound);
- return false;
- }
-
- // Considering forming a binary search, we should find the number of nodes
- // which is same as the number of comparisons when lowered. For a given
- // number of clusters, n, we can define a recursive function, f(n), to find
- // the number of nodes in the tree. The recursion is :
- // f(n) = 1 + f(n/2) + f (n - n/2), when n > 3,
- // and f(n) = n, when n <= 3.
- // This will lead a binary tree where the leaf should be either f(2) or f(3)
- // when n > 3. So, the number of comparisons from leaves should be n, while
- // the number of non-leaf should be :
- // 2^(log2(n) - 1) - 1
- // = 2^log2(n) * 2^-1 - 1
- // = n / 2 - 1.
- // Considering comparisons from leaf and non-leaf nodes, we can estimate the
- // number of comparisons in a simple closed form :
- // n + n / 2 - 1 = n * 3 / 2 - 1
- if (NumCaseCluster <= 3) {
- // Suppose a comparison includes one compare and one conditional branch.
- addCost(NumCaseCluster * 2 * InlineConstants::InstrCost);
- return false;
- }
-
- int64_t ExpectedNumberOfCompare = 3 * (int64_t)NumCaseCluster / 2 - 1;
- int64_t SwitchCost =
- ExpectedNumberOfCompare * 2 * InlineConstants::InstrCost;
-
- addCost(SwitchCost, (int64_t)CostUpperBound);
+ onFinalizeSwitch(JumpTableSize, NumCaseCluster);
return false;
}
@@ -1587,7 +1764,7 @@ CallAnalyzer::analyzeBlock(BasicBlock *BB,
if (Base::visit(&*I))
++NumInstructionsSimplified;
else
- addCost(InlineConstants::InstrCost);
+ onCommonInstructionSimplification();
using namespace ore;
// If the visit this instruction detected an uninlinable pattern, abort.
@@ -1632,9 +1809,7 @@ CallAnalyzer::analyzeBlock(BasicBlock *BB,
return IR;
}
- // Check if we've passed the maximum possible threshold so we don't spin in
- // huge basic blocks that will never inline.
- if (Cost >= Threshold && !ComputeFullInlineCost)
+ if (shouldStop())
return false;
}
@@ -1676,8 +1851,8 @@ ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) {
assert(V->getType()->isPointerTy() && "Unexpected operand type!");
} while (Visited.insert(V).second);
- Type *IntPtrTy = DL.getIntPtrType(V->getContext(), AS);
- return cast<ConstantInt>(ConstantInt::get(IntPtrTy, Offset));
+ Type *IdxPtrTy = DL.getIndexType(V->getType());
+ return cast<ConstantInt>(ConstantInt::get(IdxPtrTy, Offset));
}
/// Find dead blocks due to deleted CFG edges during inlining.
@@ -1725,54 +1900,17 @@ void CallAnalyzer::findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB) {
/// factors and heuristics. If this method returns false but the computed cost
/// is below the computed threshold, then inlining was forcibly disabled by
/// some artifact of the routine.
-InlineResult CallAnalyzer::analyzeCall(CallBase &Call) {
+InlineResult CallAnalyzer::analyze() {
++NumCallsAnalyzed;
- // Perform some tweaks to the cost and threshold based on the direct
- // callsite information.
-
- // We want to more aggressively inline vector-dense kernels, so up the
- // threshold, and we'll lower it if the % of vector instructions gets too
- // low. Note that these bonuses are some what arbitrary and evolved over time
- // by accident as much as because they are principled bonuses.
- //
- // FIXME: It would be nice to remove all such bonuses. At least it would be
- // nice to base the bonus values on something more scientific.
- assert(NumInstructions == 0);
- assert(NumVectorInstructions == 0);
-
- // Update the threshold based on callsite properties
- updateThreshold(Call, F);
-
- // While Threshold depends on commandline options that can take negative
- // values, we want to enforce the invariant that the computed threshold and
- // bonuses are non-negative.
- assert(Threshold >= 0);
- assert(SingleBBBonus >= 0);
- assert(VectorBonus >= 0);
-
- // Speculatively apply all possible bonuses to Threshold. If cost exceeds
- // this Threshold any time, and cost cannot decrease, we can stop processing
- // the rest of the function body.
- Threshold += (SingleBBBonus + VectorBonus);
-
- // Give out bonuses for the callsite, as the instructions setting them up
- // will be gone after inlining.
- addCost(-getCallsiteCost(Call, DL));
-
- // If this function uses the coldcc calling convention, prefer not to inline
- // it.
- if (F.getCallingConv() == CallingConv::Cold)
- Cost += InlineConstants::ColdccPenalty;
-
- // Check if we're done. This can happen due to bonuses and penalties.
- if (Cost >= Threshold && !ComputeFullInlineCost)
- return "high cost";
+ auto Result = onAnalysisStart();
+ if (!Result)
+ return Result;
if (F.empty())
return true;
- Function *Caller = Call.getFunction();
+ Function *Caller = CandidateCall.getFunction();
// Check if the caller function is recursive itself.
for (User *U : Caller->users()) {
CallBase *Call = dyn_cast<CallBase>(U);
@@ -1784,10 +1922,10 @@ InlineResult CallAnalyzer::analyzeCall(CallBase &Call) {
// Populate our simplified values by mapping from function arguments to call
// arguments with known important simplifications.
- auto CAI = Call.arg_begin();
+ auto CAI = CandidateCall.arg_begin();
for (Function::arg_iterator FAI = F.arg_begin(), FAE = F.arg_end();
FAI != FAE; ++FAI, ++CAI) {
- assert(CAI != Call.arg_end());
+ assert(CAI != CandidateCall.arg_end());
if (Constant *C = dyn_cast<Constant>(CAI))
SimplifiedValues[&*FAI] = C;
@@ -1796,9 +1934,9 @@ InlineResult CallAnalyzer::analyzeCall(CallBase &Call) {
ConstantOffsetPtrs[&*FAI] = std::make_pair(PtrArg, C->getValue());
// We can SROA any pointer arguments derived from alloca instructions.
- if (isa<AllocaInst>(PtrArg)) {
- SROAArgValues[&*FAI] = PtrArg;
- SROAArgCosts[PtrArg] = 0;
+ if (auto *SROAArg = dyn_cast<AllocaInst>(PtrArg)) {
+ SROAArgValues[&*FAI] = SROAArg;
+ onInitializeSROAArg(SROAArg);
}
}
}
@@ -1824,12 +1962,10 @@ InlineResult CallAnalyzer::analyzeCall(CallBase &Call) {
BBSetVector;
BBSetVector BBWorklist;
BBWorklist.insert(&F.getEntryBlock());
- bool SingleBB = true;
+
// Note that we *must not* cache the size, this loop grows the worklist.
for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
- // Bail out the moment we cross the threshold. This means we'll under-count
- // the cost, but only when undercounting doesn't matter.
- if (Cost >= Threshold && !ComputeFullInlineCost)
+ if (shouldStop())
break;
BasicBlock *BB = BBWorklist[Idx];
@@ -1889,57 +2025,23 @@ InlineResult CallAnalyzer::analyzeCall(CallBase &Call) {
++TIdx)
BBWorklist.insert(TI->getSuccessor(TIdx));
- // If we had any successors at this point, than post-inlining is likely to
- // have them as well. Note that we assume any basic blocks which existed
- // due to branches or switches which folded above will also fold after
- // inlining.
- if (SingleBB && TI->getNumSuccessors() > 1) {
- // Take off the bonus we applied to the threshold.
- Threshold -= SingleBBBonus;
- SingleBB = false;
- }
+ onBlockAnalyzed(BB);
}
- bool OnlyOneCallAndLocalLinkage =
- F.hasLocalLinkage() && F.hasOneUse() && &F == Call.getCalledFunction();
+ bool OnlyOneCallAndLocalLinkage = F.hasLocalLinkage() && F.hasOneUse() &&
+ &F == CandidateCall.getCalledFunction();
// If this is a noduplicate call, we can still inline as long as
// inlining this would cause the removal of the caller (so the instruction
// is not actually duplicated, just moved).
if (!OnlyOneCallAndLocalLinkage && ContainsNoDuplicateCall)
return "noduplicate";
- // Loops generally act a lot like calls in that they act like barriers to
- // movement, require a certain amount of setup, etc. So when optimising for
- // size, we penalise any call sites that perform loops. We do this after all
- // other costs here, so will likely only be dealing with relatively small
- // functions (and hence DT and LI will hopefully be cheap).
- if (Caller->hasMinSize()) {
- DominatorTree DT(F);
- LoopInfo LI(DT);
- int NumLoops = 0;
- for (Loop *L : LI) {
- // Ignore loops that will not be executed
- if (DeadBlocks.count(L->getHeader()))
- continue;
- NumLoops++;
- }
- addCost(NumLoops * InlineConstants::CallPenalty);
- }
-
- // We applied the maximum possible vector bonus at the beginning. Now,
- // subtract the excess bonus, if any, from the Threshold before
- // comparing against Cost.
- if (NumVectorInstructions <= NumInstructions / 10)
- Threshold -= VectorBonus;
- else if (NumVectorInstructions <= NumInstructions / 2)
- Threshold -= VectorBonus/2;
-
- return Cost < std::max(1, Threshold);
+ return finalizeAnalysis();
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Dump stats about this call's analysis.
-LLVM_DUMP_METHOD void CallAnalyzer::dump() {
+LLVM_DUMP_METHOD void InlineCostCallAnalyzer::dump() {
#define DEBUG_PRINT_STAT(x) dbgs() << " " #x ": " << x << "\n"
DEBUG_PRINT_STAT(NumConstantArgs);
DEBUG_PRINT_STAT(NumConstantOffsetPtrArgs);
@@ -2073,9 +2175,9 @@ InlineCost llvm::getInlineCost(
LLVM_DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName()
<< "... (caller:" << Caller->getName() << ")\n");
- CallAnalyzer CA(CalleeTTI, GetAssumptionCache, GetBFI, PSI, ORE, *Callee,
- Call, Params);
- InlineResult ShouldInline = CA.analyzeCall(Call);
+ InlineCostCallAnalyzer CA(CalleeTTI, GetAssumptionCache, GetBFI, PSI, ORE,
+ *Callee, Call, Params);
+ InlineResult ShouldInline = CA.analyze();
LLVM_DEBUG(CA.dump());
@@ -2121,16 +2223,17 @@ InlineResult llvm::isInlineViable(Function &F) {
switch (Call->getCalledFunction()->getIntrinsicID()) {
default:
break;
- // Disallow inlining of @llvm.icall.branch.funnel because current
- // backend can't separate call targets from call arguments.
case llvm::Intrinsic::icall_branch_funnel:
+ // Disallow inlining of @llvm.icall.branch.funnel because current
+ // backend can't separate call targets from call arguments.
return "disallowed inlining of @llvm.icall.branch.funnel";
- // Disallow inlining functions that call @llvm.localescape. Doing this
- // correctly would require major changes to the inliner.
case llvm::Intrinsic::localescape:
+ // Disallow inlining functions that call @llvm.localescape. Doing this
+ // correctly would require major changes to the inliner.
return "disallowed inlining of @llvm.localescape";
- // Disallow inlining of functions that initialize VarArgs with va_start.
case llvm::Intrinsic::vastart:
+ // Disallow inlining of functions that initialize VarArgs with
+ // va_start.
return "contains VarArgs initialized with va_start";
}
}
@@ -2173,7 +2276,8 @@ InlineParams llvm::getInlineParams(int Threshold) {
if (LocallyHotCallSiteThreshold.getNumOccurrences() > 0)
Params.LocallyHotCallSiteThreshold = LocallyHotCallSiteThreshold;
- // Set the ColdCallSiteThreshold knob from the -inline-cold-callsite-threshold.
+ // Set the ColdCallSiteThreshold knob from the
+ // -inline-cold-callsite-threshold.
Params.ColdCallSiteThreshold = ColdCallSiteThreshold;
// Set the OptMinSizeThreshold and OptSizeThreshold params only if the
diff --git a/contrib/llvm-project/llvm/lib/Analysis/InstCount.cpp b/contrib/llvm-project/llvm/lib/Analysis/InstCount.cpp
index 943a99a5f46d..bb9c7b7eb11f 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/InstCount.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/InstCount.cpp
@@ -14,6 +14,7 @@
#include "llvm/Analysis/Passes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InstVisitor.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
diff --git a/contrib/llvm-project/llvm/lib/Analysis/InstructionPrecedenceTracking.cpp b/contrib/llvm-project/llvm/lib/Analysis/InstructionPrecedenceTracking.cpp
index 35190ce3e11a..415797d6a378 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/InstructionPrecedenceTracking.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/InstructionPrecedenceTracking.cpp
@@ -20,6 +20,7 @@
#include "llvm/Analysis/InstructionPrecedenceTracking.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/PatternMatch.h"
+#include "llvm/Support/CommandLine.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/Analysis/InstructionSimplify.cpp b/contrib/llvm-project/llvm/lib/Analysis/InstructionSimplify.cpp
index cb8987721700..d7510c899101 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -137,6 +137,71 @@ static bool isSameCompare(Value *V, CmpInst::Predicate Pred, Value *LHS,
CRHS == LHS;
}
+/// Simplify comparison with true or false branch of select:
+/// %sel = select i1 %cond, i32 %tv, i32 %fv
+/// %cmp = icmp sle i32 %sel, %rhs
+/// Compose new comparison by substituting %sel with either %tv or %fv
+/// and see if it simplifies.
+static Value *simplifyCmpSelCase(CmpInst::Predicate Pred, Value *LHS,
+ Value *RHS, Value *Cond,
+ const SimplifyQuery &Q, unsigned MaxRecurse,
+ Constant *TrueOrFalse) {
+ Value *SimplifiedCmp = SimplifyCmpInst(Pred, LHS, RHS, Q, MaxRecurse);
+ if (SimplifiedCmp == Cond) {
+ // %cmp simplified to the select condition (%cond).
+ return TrueOrFalse;
+ } else if (!SimplifiedCmp && isSameCompare(Cond, Pred, LHS, RHS)) {
+ // It didn't simplify. However, if composed comparison is equivalent
+ // to the select condition (%cond) then we can replace it.
+ return TrueOrFalse;
+ }
+ return SimplifiedCmp;
+}
+
+/// Simplify comparison with true branch of select
+static Value *simplifyCmpSelTrueCase(CmpInst::Predicate Pred, Value *LHS,
+ Value *RHS, Value *Cond,
+ const SimplifyQuery &Q,
+ unsigned MaxRecurse) {
+ return simplifyCmpSelCase(Pred, LHS, RHS, Cond, Q, MaxRecurse,
+ getTrue(Cond->getType()));
+}
+
+/// Simplify comparison with false branch of select
+static Value *simplifyCmpSelFalseCase(CmpInst::Predicate Pred, Value *LHS,
+ Value *RHS, Value *Cond,
+ const SimplifyQuery &Q,
+ unsigned MaxRecurse) {
+ return simplifyCmpSelCase(Pred, LHS, RHS, Cond, Q, MaxRecurse,
+ getFalse(Cond->getType()));
+}
+
+/// We know comparison with both branches of select can be simplified, but they
+/// are not equal. This routine handles some logical simplifications.
+static Value *handleOtherCmpSelSimplifications(Value *TCmp, Value *FCmp,
+ Value *Cond,
+ const SimplifyQuery &Q,
+ unsigned MaxRecurse) {
+ // If the false value simplified to false, then the result of the compare
+ // is equal to "Cond && TCmp". This also catches the case when the false
+ // value simplified to false and the true value to true, returning "Cond".
+ if (match(FCmp, m_Zero()))
+ if (Value *V = SimplifyAndInst(Cond, TCmp, Q, MaxRecurse))
+ return V;
+ // If the true value simplified to true, then the result of the compare
+ // is equal to "Cond || FCmp".
+ if (match(TCmp, m_One()))
+ if (Value *V = SimplifyOrInst(Cond, FCmp, Q, MaxRecurse))
+ return V;
+ // Finally, if the false value simplified to true and the true value to
+ // false, then the result of the compare is equal to "!Cond".
+ if (match(FCmp, m_One()) && match(TCmp, m_Zero()))
+ if (Value *V = SimplifyXorInst(
+ Cond, Constant::getAllOnesValue(Cond->getType()), Q, MaxRecurse))
+ return V;
+ return nullptr;
+}
+
/// Does the given value dominate the specified phi node?
static bool valueDominatesPHI(Value *V, PHINode *P, const DominatorTree *DT) {
Instruction *I = dyn_cast<Instruction>(V);
@@ -398,6 +463,12 @@ static Value *ThreadBinOpOverSelect(Instruction::BinaryOps Opcode, Value *LHS,
/// In the case of a comparison with a select instruction, try to simplify the
/// comparison by seeing whether both branches of the select result in the same
/// value. Returns the common value if so, otherwise returns null.
+/// For example, if we have:
+/// %tmp = select i1 %cmp, i32 1, i32 2
+/// %cmp1 = icmp sle i32 %tmp, 3
+/// We can simplify %cmp1 to true, because both branches of select are
+/// less than 3. We compose new comparison by substituting %tmp with both
+/// branches of select and see if it can be simplified.
static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS,
Value *RHS, const SimplifyQuery &Q,
unsigned MaxRecurse) {
@@ -418,32 +489,14 @@ static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS,
// Now that we have "cmp select(Cond, TV, FV), RHS", analyse it.
// Does "cmp TV, RHS" simplify?
- Value *TCmp = SimplifyCmpInst(Pred, TV, RHS, Q, MaxRecurse);
- if (TCmp == Cond) {
- // It not only simplified, it simplified to the select condition. Replace
- // it with 'true'.
- TCmp = getTrue(Cond->getType());
- } else if (!TCmp) {
- // It didn't simplify. However if "cmp TV, RHS" is equal to the select
- // condition then we can replace it with 'true'. Otherwise give up.
- if (!isSameCompare(Cond, Pred, TV, RHS))
- return nullptr;
- TCmp = getTrue(Cond->getType());
- }
+ Value *TCmp = simplifyCmpSelTrueCase(Pred, TV, RHS, Cond, Q, MaxRecurse);
+ if (!TCmp)
+ return nullptr;
// Does "cmp FV, RHS" simplify?
- Value *FCmp = SimplifyCmpInst(Pred, FV, RHS, Q, MaxRecurse);
- if (FCmp == Cond) {
- // It not only simplified, it simplified to the select condition. Replace
- // it with 'false'.
- FCmp = getFalse(Cond->getType());
- } else if (!FCmp) {
- // It didn't simplify. However if "cmp FV, RHS" is equal to the select
- // condition then we can replace it with 'false'. Otherwise give up.
- if (!isSameCompare(Cond, Pred, FV, RHS))
- return nullptr;
- FCmp = getFalse(Cond->getType());
- }
+ Value *FCmp = simplifyCmpSelFalseCase(Pred, FV, RHS, Cond, Q, MaxRecurse);
+ if (!FCmp)
+ return nullptr;
// If both sides simplified to the same value, then use it as the result of
// the original comparison.
@@ -452,26 +505,8 @@ static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS,
// The remaining cases only make sense if the select condition has the same
// type as the result of the comparison, so bail out if this is not so.
- if (Cond->getType()->isVectorTy() != RHS->getType()->isVectorTy())
- return nullptr;
- // If the false value simplified to false, then the result of the compare
- // is equal to "Cond && TCmp". This also catches the case when the false
- // value simplified to false and the true value to true, returning "Cond".
- if (match(FCmp, m_Zero()))
- if (Value *V = SimplifyAndInst(Cond, TCmp, Q, MaxRecurse))
- return V;
- // If the true value simplified to true, then the result of the compare
- // is equal to "Cond || FCmp".
- if (match(TCmp, m_One()))
- if (Value *V = SimplifyOrInst(Cond, FCmp, Q, MaxRecurse))
- return V;
- // Finally, if the false value simplified to true and the true value to
- // false, then the result of the compare is equal to "!Cond".
- if (match(FCmp, m_One()) && match(TCmp, m_Zero()))
- if (Value *V =
- SimplifyXorInst(Cond, Constant::getAllOnesValue(Cond->getType()),
- Q, MaxRecurse))
- return V;
+ if (Cond->getType()->isVectorTy() == RHS->getType()->isVectorTy())
+ return handleOtherCmpSelSimplifications(TCmp, FCmp, Cond, Q, MaxRecurse);
return nullptr;
}
@@ -543,10 +578,16 @@ static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS,
// Evaluate the BinOp on the incoming phi values.
Value *CommonValue = nullptr;
- for (Value *Incoming : PI->incoming_values()) {
+ for (unsigned u = 0, e = PI->getNumIncomingValues(); u < e; ++u) {
+ Value *Incoming = PI->getIncomingValue(u);
+ Instruction *InTI = PI->getIncomingBlock(u)->getTerminator();
// If the incoming value is the phi node itself, it can safely be skipped.
if (Incoming == PI) continue;
- Value *V = SimplifyCmpInst(Pred, Incoming, RHS, Q, MaxRecurse);
+ // Change the context instruction to the "edge" that flows into the phi.
+ // This is important because that is where incoming is actually "evaluated"
+ // even though it is used later somewhere else.
+ Value *V = SimplifyCmpInst(Pred, Incoming, RHS, Q.getWithInstruction(InTI),
+ MaxRecurse);
// If the operation failed to simplify, or simplified to a different value
// to previously, then give up.
if (!V || (CommonValue && V != CommonValue))
@@ -656,16 +697,16 @@ static Constant *stripAndComputeConstantOffsets(const DataLayout &DL, Value *&V,
bool AllowNonInbounds = false) {
assert(V->getType()->isPtrOrPtrVectorTy());
- Type *IntPtrTy = DL.getIntPtrType(V->getType())->getScalarType();
- APInt Offset = APInt::getNullValue(IntPtrTy->getIntegerBitWidth());
+ Type *IntIdxTy = DL.getIndexType(V->getType())->getScalarType();
+ APInt Offset = APInt::getNullValue(IntIdxTy->getIntegerBitWidth());
V = V->stripAndAccumulateConstantOffsets(DL, Offset, AllowNonInbounds);
// As that strip may trace through `addrspacecast`, need to sext or trunc
// the offset calculated.
- IntPtrTy = DL.getIntPtrType(V->getType())->getScalarType();
- Offset = Offset.sextOrTrunc(IntPtrTy->getIntegerBitWidth());
+ IntIdxTy = DL.getIndexType(V->getType())->getScalarType();
+ Offset = Offset.sextOrTrunc(IntIdxTy->getIntegerBitWidth());
- Constant *OffsetIntPtr = ConstantInt::get(IntPtrTy, Offset);
+ Constant *OffsetIntPtr = ConstantInt::get(IntIdxTy, Offset);
if (V->getType()->isVectorTy())
return ConstantVector::getSplat(V->getType()->getVectorNumElements(),
OffsetIntPtr);
@@ -3903,18 +3944,21 @@ static Value *simplifySelectWithICmpCond(Value *CondVal, Value *TrueVal,
/// Try to simplify a select instruction when its condition operand is a
/// floating-point comparison.
-static Value *simplifySelectWithFCmp(Value *Cond, Value *T, Value *F) {
+static Value *simplifySelectWithFCmp(Value *Cond, Value *T, Value *F,
+ const SimplifyQuery &Q) {
FCmpInst::Predicate Pred;
if (!match(Cond, m_FCmp(Pred, m_Specific(T), m_Specific(F))) &&
!match(Cond, m_FCmp(Pred, m_Specific(F), m_Specific(T))))
return nullptr;
- // TODO: The transform may not be valid with -0.0. An incomplete way of
- // testing for that possibility is to check if at least one operand is a
- // non-zero constant.
+ // This transform is safe if we do not have (do not care about) -0.0 or if
+ // at least one operand is known to not be -0.0. Otherwise, the select can
+ // change the sign of a zero operand.
+ bool HasNoSignedZeros = Q.CxtI && isa<FPMathOperator>(Q.CxtI) &&
+ Q.CxtI->hasNoSignedZeros();
const APFloat *C;
- if ((match(T, m_APFloat(C)) && C->isNonZero()) ||
- (match(F, m_APFloat(C)) && C->isNonZero())) {
+ if (HasNoSignedZeros || (match(T, m_APFloat(C)) && C->isNonZero()) ||
+ (match(F, m_APFloat(C)) && C->isNonZero())) {
// (T == F) ? T : F --> F
// (F == T) ? T : F --> F
if (Pred == FCmpInst::FCMP_OEQ)
@@ -3952,6 +3996,15 @@ static Value *SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal,
return FalseVal;
}
+ // select i1 Cond, i1 true, i1 false --> i1 Cond
+ assert(Cond->getType()->isIntOrIntVectorTy(1) &&
+ "Select must have bool or bool vector condition");
+ assert(TrueVal->getType() == FalseVal->getType() &&
+ "Select must have same types for true/false ops");
+ if (Cond->getType() == TrueVal->getType() &&
+ match(TrueVal, m_One()) && match(FalseVal, m_ZeroInt()))
+ return Cond;
+
// select ?, X, X -> X
if (TrueVal == FalseVal)
return TrueVal;
@@ -3965,7 +4018,7 @@ static Value *SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal,
simplifySelectWithICmpCond(Cond, TrueVal, FalseVal, Q, MaxRecurse))
return V;
- if (Value *V = simplifySelectWithFCmp(Cond, TrueVal, FalseVal))
+ if (Value *V = simplifySelectWithFCmp(Cond, TrueVal, FalseVal, Q))
return V;
if (Value *V = foldSelectWithBinaryOp(Cond, TrueVal, FalseVal))
@@ -4023,7 +4076,7 @@ static Value *SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops,
// The following transforms are only safe if the ptrtoint cast
// doesn't truncate the pointers.
if (Ops[1]->getType()->getScalarSizeInBits() ==
- Q.DL.getIndexSizeInBits(AS)) {
+ Q.DL.getPointerSizeInBits(AS)) {
auto PtrToIntOrZero = [GEPTy](Value *P) -> Value * {
if (match(P, m_Zero()))
return Constant::getNullValue(GEPTy);
@@ -4408,6 +4461,30 @@ static Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask,
ShuffleVectorInst::commuteShuffleMask(Indices, InVecNumElts);
}
+ // A splat of an inserted scalar constant becomes a vector constant:
+ // shuf (inselt ?, C, IndexC), undef, <IndexC, IndexC...> --> <C, C...>
+ // NOTE: We may have commuted above, so analyze the updated Indices, not the
+ // original mask constant.
+ Constant *C;
+ ConstantInt *IndexC;
+ if (match(Op0, m_InsertElement(m_Value(), m_Constant(C),
+ m_ConstantInt(IndexC)))) {
+ // Match a splat shuffle mask of the insert index allowing undef elements.
+ int InsertIndex = IndexC->getZExtValue();
+ if (all_of(Indices, [InsertIndex](int MaskElt) {
+ return MaskElt == InsertIndex || MaskElt == -1;
+ })) {
+ assert(isa<UndefValue>(Op1) && "Expected undef operand 1 for splat");
+
+ // Shuffle mask undefs become undefined constant result elements.
+ SmallVector<Constant *, 16> VecC(MaskNumElts, C);
+ for (unsigned i = 0; i != MaskNumElts; ++i)
+ if (Indices[i] == -1)
+ VecC[i] = UndefValue::get(C->getType());
+ return ConstantVector::get(VecC);
+ }
+ }
+
// A shuffle of a splat is always the splat itself. Legal if the shuffle's
// value type is same as the input vectors' type.
if (auto *OpShuf = dyn_cast<ShuffleVectorInst>(Op0))
@@ -5083,6 +5160,16 @@ static Value *simplifyBinaryIntrinsic(Function *F, Value *Op0, Value *Op1,
return Op0;
}
break;
+ case Intrinsic::copysign:
+ // copysign X, X --> X
+ if (Op0 == Op1)
+ return Op0;
+ // copysign -X, X --> X
+ // copysign X, -X --> -X
+ if (match(Op0, m_FNeg(m_Specific(Op1))) ||
+ match(Op1, m_FNeg(m_Specific(Op0))))
+ return Op1;
+ break;
case Intrinsic::maxnum:
case Intrinsic::minnum:
case Intrinsic::maximum:
@@ -5232,6 +5319,19 @@ Value *llvm::SimplifyCall(CallBase *Call, const SimplifyQuery &Q) {
return ConstantFoldCall(Call, F, ConstantArgs, Q.TLI);
}
+/// Given operands for a Freeze, see if we can fold the result.
+static Value *SimplifyFreezeInst(Value *Op0) {
+ // Use a utility function defined in ValueTracking.
+ if (llvm::isGuaranteedNotToBeUndefOrPoison(Op0))
+ return Op0;
+ // We have room for improvement.
+ return nullptr;
+}
+
+Value *llvm::SimplifyFreezeInst(Value *Op0, const SimplifyQuery &Q) {
+ return ::SimplifyFreezeInst(Op0);
+}
+
/// See if we can compute a simplified version of this instruction.
/// If not, this returns null.
@@ -5374,6 +5474,9 @@ Value *llvm::SimplifyInstruction(Instruction *I, const SimplifyQuery &SQ,
Result = SimplifyCall(cast<CallInst>(I), Q);
break;
}
+ case Instruction::Freeze:
+ Result = SimplifyFreezeInst(I->getOperand(0), Q);
+ break;
#define HANDLE_CAST_INST(num, opc, clas) case Instruction::opc:
#include "llvm/IR/Instruction.def"
#undef HANDLE_CAST_INST
diff --git a/contrib/llvm-project/llvm/lib/Analysis/IntervalPartition.cpp b/contrib/llvm-project/llvm/lib/Analysis/IntervalPartition.cpp
index d12db010db6a..23ff4fd6f85e 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/IntervalPartition.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/IntervalPartition.cpp
@@ -14,6 +14,7 @@
#include "llvm/Analysis/IntervalPartition.h"
#include "llvm/Analysis/Interval.h"
#include "llvm/Analysis/IntervalIterator.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include <cassert>
#include <utility>
@@ -22,6 +23,10 @@ using namespace llvm;
char IntervalPartition::ID = 0;
+IntervalPartition::IntervalPartition() : FunctionPass(ID) {
+ initializeIntervalPartitionPass(*PassRegistry::getPassRegistry());
+}
+
INITIALIZE_PASS(IntervalPartition, "intervals",
"Interval Partition Construction", true, true)
diff --git a/contrib/llvm-project/llvm/lib/Analysis/LazyBlockFrequencyInfo.cpp b/contrib/llvm-project/llvm/lib/Analysis/LazyBlockFrequencyInfo.cpp
index 439758560284..6107cacb9533 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/LazyBlockFrequencyInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/LazyBlockFrequencyInfo.cpp
@@ -17,6 +17,7 @@
#include "llvm/Analysis/LazyBranchProbabilityInfo.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/IR/Dominators.h"
+#include "llvm/InitializePasses.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/Analysis/LazyBranchProbabilityInfo.cpp b/contrib/llvm-project/llvm/lib/Analysis/LazyBranchProbabilityInfo.cpp
index e727de468a0d..83698598e156 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/LazyBranchProbabilityInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/LazyBranchProbabilityInfo.cpp
@@ -17,6 +17,7 @@
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/Dominators.h"
+#include "llvm/InitializePasses.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/Analysis/LazyValueInfo.cpp b/contrib/llvm-project/llvm/lib/Analysis/LazyValueInfo.cpp
index 96722f32e355..bad2de9e5f5e 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/LazyValueInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/LazyValueInfo.cpp
@@ -19,8 +19,8 @@
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/ValueLattice.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/AssemblyAnnotationWriter.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/ConstantRange.h"
@@ -33,6 +33,7 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/ValueHandle.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/raw_ostream.h"
@@ -47,6 +48,9 @@ using namespace PatternMatch;
static const unsigned MaxProcessedPerValue = 500;
char LazyValueInfoWrapperPass::ID = 0;
+LazyValueInfoWrapperPass::LazyValueInfoWrapperPass() : FunctionPass(ID) {
+ initializeLazyValueInfoWrapperPassPass(*PassRegistry::getPassRegistry());
+}
INITIALIZE_PASS_BEGIN(LazyValueInfoWrapperPass, "lazy-value-info",
"Lazy Value Information Analysis", false, true)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
@@ -432,6 +436,8 @@ namespace {
BasicBlock *BB);
bool solveBlockValueOverflowIntrinsic(
ValueLatticeElement &BBLV, WithOverflowInst *WO, BasicBlock *BB);
+ bool solveBlockValueSaturatingIntrinsic(ValueLatticeElement &BBLV,
+ SaturatingInst *SI, BasicBlock *BB);
bool solveBlockValueIntrinsic(ValueLatticeElement &BBLV, IntrinsicInst *II,
BasicBlock *BB);
bool solveBlockValueExtractValue(ValueLatticeElement &BBLV,
@@ -1090,8 +1096,22 @@ bool LazyValueInfoImpl::solveBlockValueBinaryOp(ValueLatticeElement &BBLV,
return true;
}
- return solveBlockValueBinaryOpImpl(BBLV, BO, BB,
- [BO](const ConstantRange &CR1, const ConstantRange &CR2) {
+ if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(BO)) {
+ unsigned NoWrapKind = 0;
+ if (OBO->hasNoUnsignedWrap())
+ NoWrapKind |= OverflowingBinaryOperator::NoUnsignedWrap;
+ if (OBO->hasNoSignedWrap())
+ NoWrapKind |= OverflowingBinaryOperator::NoSignedWrap;
+
+ return solveBlockValueBinaryOpImpl(
+ BBLV, BO, BB,
+ [BO, NoWrapKind](const ConstantRange &CR1, const ConstantRange &CR2) {
+ return CR1.overflowingBinaryOp(BO->getOpcode(), CR2, NoWrapKind);
+ });
+ }
+
+ return solveBlockValueBinaryOpImpl(
+ BBLV, BO, BB, [BO](const ConstantRange &CR1, const ConstantRange &CR2) {
return CR1.binaryOp(BO->getOpcode(), CR2);
});
}
@@ -1104,37 +1124,46 @@ bool LazyValueInfoImpl::solveBlockValueOverflowIntrinsic(
});
}
-bool LazyValueInfoImpl::solveBlockValueIntrinsic(
- ValueLatticeElement &BBLV, IntrinsicInst *II, BasicBlock *BB) {
- switch (II->getIntrinsicID()) {
+bool LazyValueInfoImpl::solveBlockValueSaturatingIntrinsic(
+ ValueLatticeElement &BBLV, SaturatingInst *SI, BasicBlock *BB) {
+ switch (SI->getIntrinsicID()) {
case Intrinsic::uadd_sat:
- return solveBlockValueBinaryOpImpl(BBLV, II, BB,
- [](const ConstantRange &CR1, const ConstantRange &CR2) {
+ return solveBlockValueBinaryOpImpl(
+ BBLV, SI, BB, [](const ConstantRange &CR1, const ConstantRange &CR2) {
return CR1.uadd_sat(CR2);
});
case Intrinsic::usub_sat:
- return solveBlockValueBinaryOpImpl(BBLV, II, BB,
- [](const ConstantRange &CR1, const ConstantRange &CR2) {
+ return solveBlockValueBinaryOpImpl(
+ BBLV, SI, BB, [](const ConstantRange &CR1, const ConstantRange &CR2) {
return CR1.usub_sat(CR2);
});
case Intrinsic::sadd_sat:
- return solveBlockValueBinaryOpImpl(BBLV, II, BB,
- [](const ConstantRange &CR1, const ConstantRange &CR2) {
+ return solveBlockValueBinaryOpImpl(
+ BBLV, SI, BB, [](const ConstantRange &CR1, const ConstantRange &CR2) {
return CR1.sadd_sat(CR2);
});
case Intrinsic::ssub_sat:
- return solveBlockValueBinaryOpImpl(BBLV, II, BB,
- [](const ConstantRange &CR1, const ConstantRange &CR2) {
+ return solveBlockValueBinaryOpImpl(
+ BBLV, SI, BB, [](const ConstantRange &CR1, const ConstantRange &CR2) {
return CR1.ssub_sat(CR2);
});
default:
- LLVM_DEBUG(dbgs() << " compute BB '" << BB->getName()
- << "' - overdefined (unknown intrinsic).\n");
- BBLV = ValueLatticeElement::getOverdefined();
- return true;
+ llvm_unreachable("All llvm.sat intrinsic are handled.");
}
}
+bool LazyValueInfoImpl::solveBlockValueIntrinsic(ValueLatticeElement &BBLV,
+ IntrinsicInst *II,
+ BasicBlock *BB) {
+ if (auto *SI = dyn_cast<SaturatingInst>(II))
+ return solveBlockValueSaturatingIntrinsic(BBLV, SI, BB);
+
+ LLVM_DEBUG(dbgs() << " compute BB '" << BB->getName()
+ << "' - overdefined (unknown intrinsic).\n");
+ BBLV = ValueLatticeElement::getOverdefined();
+ return true;
+}
+
bool LazyValueInfoImpl::solveBlockValueExtractValue(
ValueLatticeElement &BBLV, ExtractValueInst *EVI, BasicBlock *BB) {
if (auto *WO = dyn_cast<WithOverflowInst>(EVI->getAggregateOperand()))
diff --git a/contrib/llvm-project/llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp
index 7de9d2cbfddb..0f274429f11f 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp
@@ -64,10 +64,10 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/DivergenceAnalysis.h"
-#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
#include "llvm/Analysis/Passes.h"
#include "llvm/Analysis/PostDominators.h"
#include "llvm/Analysis/TargetTransformInfo.h"
@@ -75,6 +75,8 @@
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include <vector>
@@ -281,6 +283,9 @@ void DivergencePropagator::propagate() {
// Register this pass.
char LegacyDivergenceAnalysis::ID = 0;
+LegacyDivergenceAnalysis::LegacyDivergenceAnalysis() : FunctionPass(ID) {
+ initializeLegacyDivergenceAnalysisPass(*PassRegistry::getPassRegistry());
+}
INITIALIZE_PASS_BEGIN(LegacyDivergenceAnalysis, "divergence",
"Legacy Divergence Analysis", false, true)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
diff --git a/contrib/llvm-project/llvm/lib/Analysis/Lint.cpp b/contrib/llvm-project/llvm/lib/Analysis/Lint.cpp
index db18716c64cf..ba945eb4318f 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/Lint.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/Lint.cpp
@@ -66,6 +66,7 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
diff --git a/contrib/llvm-project/llvm/lib/Analysis/Loads.cpp b/contrib/llvm-project/llvm/lib/Analysis/Loads.cpp
index 641e92eac781..a7d07c0b6183 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/Loads.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/Loads.cpp
@@ -150,7 +150,7 @@ bool llvm::isDereferenceableAndAlignedPointer(const Value *V, Type *Ty,
// Require ABI alignment for loads without alignment specification
const Align Alignment = DL.getValueOrABITypeAlignment(MA, Ty);
- APInt AccessSize(DL.getIndexTypeSizeInBits(V->getType()),
+ APInt AccessSize(DL.getPointerTypeSizeInBits(V->getType()),
DL.getTypeStoreSize(Ty));
return isDereferenceableAndAlignedPointer(V, Alignment, AccessSize, DL, CtxI,
DT);
@@ -383,10 +383,6 @@ Value *llvm::FindAvailablePtrLoadStore(Value *Ptr, Type *AccessTy,
MaxInstsToScan = ~0U;
const DataLayout &DL = ScanBB->getModule()->getDataLayout();
-
- // Try to get the store size for the type.
- auto AccessSize = LocationSize::precise(DL.getTypeStoreSize(AccessTy));
-
Value *StrippedPtr = Ptr->stripPointerCasts();
while (ScanFrom != ScanBB->begin()) {
@@ -425,6 +421,9 @@ Value *llvm::FindAvailablePtrLoadStore(Value *Ptr, Type *AccessTy,
return LI;
}
+ // Try to get the store size for the type.
+ auto AccessSize = LocationSize::precise(DL.getTypeStoreSize(AccessTy));
+
if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
Value *StorePtr = SI->getPointerOperand()->stripPointerCasts();
// If this is a store through Ptr, the value is available!
diff --git a/contrib/llvm-project/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index 3d8f77675f3a..26fa5112c29a 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -52,6 +52,7 @@
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
@@ -2397,6 +2398,10 @@ void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
PSE->print(OS, Depth);
}
+LoopAccessLegacyAnalysis::LoopAccessLegacyAnalysis() : FunctionPass(ID) {
+ initializeLoopAccessLegacyAnalysisPass(*PassRegistry::getPassRegistry());
+}
+
const LoopAccessInfo &LoopAccessLegacyAnalysis::getInfo(Loop *L) {
auto &LAI = LoopAccessInfoMap[L];
diff --git a/contrib/llvm-project/llvm/lib/Analysis/LoopCacheAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/LoopCacheAnalysis.cpp
index 10d2fe07884a..25325ec1be02 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/LoopCacheAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/LoopCacheAnalysis.cpp
@@ -29,6 +29,7 @@
#include "llvm/ADT/BreadthFirstIterator.h"
#include "llvm/ADT/Sequence.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
using namespace llvm;
@@ -283,6 +284,9 @@ CacheCostTy IndexedReference::computeRefCost(const Loop &L,
const SCEV *ElemSize = Sizes.back();
const SCEV *Stride = SE.getMulExpr(Coeff, ElemSize);
const SCEV *CacheLineSize = SE.getConstant(Stride->getType(), CLS);
+ Type *WiderType = SE.getWiderType(Stride->getType(), TripCount->getType());
+ Stride = SE.getNoopOrSignExtend(Stride, WiderType);
+ TripCount = SE.getNoopOrAnyExtend(TripCount, WiderType);
const SCEV *Numerator = SE.getMulExpr(Stride, TripCount);
RefCost = SE.getUDivExpr(Numerator, CacheLineSize);
LLVM_DEBUG(dbgs().indent(4)
@@ -313,7 +317,7 @@ bool IndexedReference::delinearize(const LoopInfo &LI) {
const SCEV *ElemSize = SE.getElementSize(&StoreOrLoadInst);
const BasicBlock *BB = StoreOrLoadInst.getParent();
- for (Loop *L = LI.getLoopFor(BB); L != nullptr; L = L->getParentLoop()) {
+ if (Loop *L = LI.getLoopFor(BB)) {
const SCEV *AccessFn =
SE.getSCEVAtScope(getPointerOperand(&StoreOrLoadInst), L);
@@ -342,7 +346,7 @@ bool IndexedReference::delinearize(const LoopInfo &LI) {
<< "ERROR: failed to delinearize reference\n");
Subscripts.clear();
Sizes.clear();
- break;
+ return false;
}
const SCEV *Div = SE.getUDivExactExpr(AccessFn, ElemSize);
@@ -453,7 +457,7 @@ CacheCost::CacheCost(const LoopVectorTy &Loops, const LoopInfo &LI,
AliasAnalysis &AA, DependenceInfo &DI,
Optional<unsigned> TRT)
: Loops(Loops), TripCounts(), LoopCosts(),
- TRT(TRT == None ? Optional<unsigned>(TemporalReuseThreshold) : TRT),
+ TRT((TRT == None) ? Optional<unsigned>(TemporalReuseThreshold) : TRT),
LI(LI), SE(SE), TTI(TTI), AA(AA), DI(DI) {
assert(!Loops.empty() && "Expecting a non-empty loop vector.");
diff --git a/contrib/llvm-project/llvm/lib/Analysis/LoopInfo.cpp b/contrib/llvm-project/llvm/lib/Analysis/LoopInfo.cpp
index dbab5db7dbc2..3dc29b40834c 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/LoopInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/LoopInfo.cpp
@@ -34,6 +34,7 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/PassManager.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -105,7 +106,8 @@ bool Loop::makeLoopInvariant(Instruction *I, bool &Changed,
I->moveBefore(InsertPt);
if (MSSAU)
if (auto *MUD = MSSAU->getMemorySSA()->getMemoryAccess(I))
- MSSAU->moveToPlace(MUD, InsertPt->getParent(), MemorySSA::End);
+ MSSAU->moveToPlace(MUD, InsertPt->getParent(),
+ MemorySSA::BeforeTerminator);
// There is possibility of hoisting this instruction above some arbitrary
// condition. Any metadata defined on it can be control dependent on this
@@ -364,12 +366,11 @@ BranchInst *Loop::getLoopGuardBranch() const {
return nullptr;
BasicBlock *Preheader = getLoopPreheader();
- BasicBlock *Latch = getLoopLatch();
- assert(Preheader && Latch &&
+ assert(Preheader && getLoopLatch() &&
"Expecting a loop with valid preheader and latch");
// Loop should be in rotate form.
- if (!isLoopExiting(Latch))
+ if (!isRotatedForm())
return nullptr;
// Disallow loops with more than one unique exit block, as we do not verify
@@ -1051,6 +1052,10 @@ MDNode *llvm::makePostTransformationMetadata(LLVMContext &Context,
// LoopInfo implementation
//
+LoopInfoWrapperPass::LoopInfoWrapperPass() : FunctionPass(ID) {
+ initializeLoopInfoWrapperPassPass(*PassRegistry::getPassRegistry());
+}
+
char LoopInfoWrapperPass::ID = 0;
INITIALIZE_PASS_BEGIN(LoopInfoWrapperPass, "loops", "Natural Loop Information",
true, true)
diff --git a/contrib/llvm-project/llvm/lib/Analysis/LoopPass.cpp b/contrib/llvm-project/llvm/lib/Analysis/LoopPass.cpp
index 4ab3798039d8..507f5f442865 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/LoopPass.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/LoopPass.cpp
@@ -20,9 +20,10 @@
#include "llvm/IR/OptBisect.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/PassTimingInfo.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/Timer.h"
#include "llvm/Support/TimeProfiler.h"
+#include "llvm/Support/Timer.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -409,6 +410,10 @@ bool LoopPass::skipLoop(const Loop *L) const {
return false;
}
+LCSSAVerificationPass::LCSSAVerificationPass() : FunctionPass(ID) {
+ initializeLCSSAVerificationPassPass(*PassRegistry::getPassRegistry());
+}
+
char LCSSAVerificationPass::ID = 0;
INITIALIZE_PASS(LCSSAVerificationPass, "lcssa-verification", "LCSSA Verifier",
false, false)
diff --git a/contrib/llvm-project/llvm/lib/Analysis/MemDepPrinter.cpp b/contrib/llvm-project/llvm/lib/Analysis/MemDepPrinter.cpp
index 6e1bb50e8893..2c57e63251c6 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/MemDepPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/MemDepPrinter.cpp
@@ -14,6 +14,7 @@
#include "llvm/Analysis/Passes.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/Analysis/MemDerefPrinter.cpp b/contrib/llvm-project/llvm/lib/Analysis/MemDerefPrinter.cpp
index 5cf516a538b5..5d824067df53 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/MemDerefPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/MemDerefPrinter.cpp
@@ -13,6 +13,7 @@
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/Analysis/MemoryBuiltins.cpp b/contrib/llvm-project/llvm/lib/Analysis/MemoryBuiltins.cpp
index 172c86eb4646..427e6fd3ace2 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/MemoryBuiltins.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/MemoryBuiltins.cpp
@@ -544,6 +544,7 @@ Value *llvm::lowerObjectSizeCall(IntrinsicInst *ObjectSize,
Builder.CreateSub(SizeOffsetPair.first, SizeOffsetPair.second);
Value *UseZero =
Builder.CreateICmpULT(SizeOffsetPair.first, SizeOffsetPair.second);
+ ResultSize = Builder.CreateZExtOrTrunc(ResultSize, ResultType);
return Builder.CreateSelect(UseZero, ConstantInt::get(ResultType, 0),
ResultSize);
}
@@ -576,7 +577,7 @@ ObjectSizeOffsetVisitor::ObjectSizeOffsetVisitor(const DataLayout &DL,
}
SizeOffsetType ObjectSizeOffsetVisitor::compute(Value *V) {
- IntTyBits = DL.getPointerTypeSizeInBits(V->getType());
+ IntTyBits = DL.getIndexTypeSizeInBits(V->getType());
Zero = APInt::getNullValue(IntTyBits);
V = V->stripPointerCasts();
@@ -746,7 +747,7 @@ ObjectSizeOffsetVisitor::visitExtractValueInst(ExtractValueInst&) {
SizeOffsetType ObjectSizeOffsetVisitor::visitGEPOperator(GEPOperator &GEP) {
SizeOffsetType PtrData = compute(GEP.getPointerOperand());
- APInt Offset(IntTyBits, 0);
+ APInt Offset(DL.getIndexTypeSizeInBits(GEP.getPointerOperand()->getType()), 0);
if (!bothKnown(PtrData) || !GEP.accumulateConstantOffset(DL, Offset))
return unknown();
@@ -834,7 +835,7 @@ ObjectSizeOffsetEvaluator::ObjectSizeOffsetEvaluator(
SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute(Value *V) {
// XXX - Are vectors of pointers possible here?
- IntTy = cast<IntegerType>(DL.getIntPtrType(V->getType()));
+ IntTy = cast<IntegerType>(DL.getIndexType(V->getType()));
Zero = ConstantInt::get(IntTy, 0);
SizeOffsetEvalType Result = compute_(V);
@@ -938,12 +939,12 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitCallSite(CallSite CS) {
}
Value *FirstArg = CS.getArgument(FnData->FstParam);
- FirstArg = Builder.CreateZExt(FirstArg, IntTy);
+ FirstArg = Builder.CreateZExtOrTrunc(FirstArg, IntTy);
if (FnData->SndParam < 0)
return std::make_pair(FirstArg, Zero);
Value *SecondArg = CS.getArgument(FnData->SndParam);
- SecondArg = Builder.CreateZExt(SecondArg, IntTy);
+ SecondArg = Builder.CreateZExtOrTrunc(SecondArg, IntTy);
Value *Size = Builder.CreateMul(FirstArg, SecondArg);
return std::make_pair(Size, Zero);
diff --git a/contrib/llvm-project/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
index 884587e020bb..a97a56e25805 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -47,6 +47,7 @@
#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/AtomicOrdering.h"
#include "llvm/Support/Casting.h"
@@ -1493,7 +1494,7 @@ void MemoryDependenceResults::RemoveCachedNonLocalPointerDependencies(
if (auto *I = dyn_cast<Instruction>(P.getPointer())) {
auto toRemoveIt = ReverseNonLocalDefsCache.find(I);
if (toRemoveIt != ReverseNonLocalDefsCache.end()) {
- for (const auto &entry : toRemoveIt->second)
+ for (const auto *entry : toRemoveIt->second)
NonLocalDefsCache.erase(entry);
ReverseNonLocalDefsCache.erase(toRemoveIt);
}
diff --git a/contrib/llvm-project/llvm/lib/Analysis/MemoryLocation.cpp b/contrib/llvm-project/llvm/lib/Analysis/MemoryLocation.cpp
index 163830eee797..103cdea148e5 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/MemoryLocation.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/MemoryLocation.cpp
@@ -12,6 +12,7 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/IntrinsicsARM.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
diff --git a/contrib/llvm-project/llvm/lib/Analysis/MemorySSA.cpp b/contrib/llvm-project/llvm/lib/Analysis/MemorySSA.cpp
index cfb8b7e7dcb5..bf8dc94bfbf9 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/MemorySSA.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/MemorySSA.cpp
@@ -38,6 +38,7 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/Use.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/AtomicOrdering.h"
#include "llvm/Support/Casting.h"
@@ -1229,6 +1230,7 @@ MemorySSA::MemorySSA(Function &Func, AliasAnalysis *AA, DominatorTree *DT)
// safe because there are no CFG changes while building MemorySSA and can
// significantly reduce the time spent by the compiler in AA, because we will
// make queries about all the instructions in the Function.
+ assert(AA && "No alias analysis?");
BatchAAResults BatchAA(*AA);
buildMemorySSA(BatchAA);
// Intentionally leave AA to nullptr while building so we don't accidently
@@ -1868,9 +1870,7 @@ LLVM_DUMP_METHOD void MemorySSA::dump() const { print(dbgs()); }
#endif
void MemorySSA::verifyMemorySSA() const {
- verifyDefUses(F);
- verifyDomination(F);
- verifyOrdering(F);
+ verifyOrderingDominationAndDefUses(F);
verifyDominationNumbers(F);
verifyPrevDefInPhis(F);
// Previously, the verification used to also verify that the clobberingAccess
@@ -1957,10 +1957,14 @@ void MemorySSA::verifyDominationNumbers(const Function &F) const {
#endif
}
-/// Verify that the order and existence of MemoryAccesses matches the
+/// Verify ordering: the order and existence of MemoryAccesses matches the
/// order and existence of memory affecting instructions.
-void MemorySSA::verifyOrdering(Function &F) const {
-#ifndef NDEBUG
+/// Verify domination: each definition dominates all of its uses.
+/// Verify def-uses: the immediate use information - walk all the memory
+/// accesses and verifying that, for each use, it appears in the appropriate
+/// def's use list
+void MemorySSA::verifyOrderingDominationAndDefUses(Function &F) const {
+#if !defined(NDEBUG)
// Walk all the blocks, comparing what the lookups think and what the access
// lists think, as well as the order in the blocks vs the order in the access
// lists.
@@ -1969,29 +1973,56 @@ void MemorySSA::verifyOrdering(Function &F) const {
for (BasicBlock &B : F) {
const AccessList *AL = getBlockAccesses(&B);
const auto *DL = getBlockDefs(&B);
- MemoryAccess *Phi = getMemoryAccess(&B);
+ MemoryPhi *Phi = getMemoryAccess(&B);
if (Phi) {
+ // Verify ordering.
ActualAccesses.push_back(Phi);
ActualDefs.push_back(Phi);
+ // Verify domination
+ for (const Use &U : Phi->uses())
+ assert(dominates(Phi, U) && "Memory PHI does not dominate it's uses");
+#if defined(EXPENSIVE_CHECKS)
+ // Verify def-uses.
+ assert(Phi->getNumOperands() == static_cast<unsigned>(std::distance(
+ pred_begin(&B), pred_end(&B))) &&
+ "Incomplete MemoryPhi Node");
+ for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) {
+ verifyUseInDefs(Phi->getIncomingValue(I), Phi);
+ assert(find(predecessors(&B), Phi->getIncomingBlock(I)) !=
+ pred_end(&B) &&
+ "Incoming phi block not a block predecessor");
+ }
+#endif
}
for (Instruction &I : B) {
- MemoryAccess *MA = getMemoryAccess(&I);
+ MemoryUseOrDef *MA = getMemoryAccess(&I);
assert((!MA || (AL && (isa<MemoryUse>(MA) || DL))) &&
"We have memory affecting instructions "
"in this block but they are not in the "
"access list or defs list");
if (MA) {
+ // Verify ordering.
ActualAccesses.push_back(MA);
- if (isa<MemoryDef>(MA))
+ if (MemoryAccess *MD = dyn_cast<MemoryDef>(MA)) {
+ // Verify ordering.
ActualDefs.push_back(MA);
+ // Verify domination.
+ for (const Use &U : MD->uses())
+ assert(dominates(MD, U) &&
+ "Memory Def does not dominate it's uses");
+ }
+#if defined(EXPENSIVE_CHECKS)
+ // Verify def-uses.
+ verifyUseInDefs(MA->getDefiningAccess(), MA);
+#endif
}
}
// Either we hit the assert, really have no accesses, or we have both
- // accesses and an access list.
- // Same with defs.
+ // accesses and an access list. Same with defs.
if (!AL && !DL)
continue;
+ // Verify ordering.
assert(AL->size() == ActualAccesses.size() &&
"We don't have the same number of accesses in the block as on the "
"access list");
@@ -2022,28 +2053,6 @@ void MemorySSA::verifyOrdering(Function &F) const {
#endif
}
-/// Verify the domination properties of MemorySSA by checking that each
-/// definition dominates all of its uses.
-void MemorySSA::verifyDomination(Function &F) const {
-#ifndef NDEBUG
- for (BasicBlock &B : F) {
- // Phi nodes are attached to basic blocks
- if (MemoryPhi *MP = getMemoryAccess(&B))
- for (const Use &U : MP->uses())
- assert(dominates(MP, U) && "Memory PHI does not dominate it's uses");
-
- for (Instruction &I : B) {
- MemoryAccess *MD = dyn_cast_or_null<MemoryDef>(getMemoryAccess(&I));
- if (!MD)
- continue;
-
- for (const Use &U : MD->uses())
- assert(dominates(MD, U) && "Memory Def does not dominate it's uses");
- }
- }
-#endif
-}
-
/// Verify the def-use lists in MemorySSA, by verifying that \p Use
/// appears in the use list of \p Def.
void MemorySSA::verifyUseInDefs(MemoryAccess *Def, MemoryAccess *Use) const {
@@ -2058,34 +2067,6 @@ void MemorySSA::verifyUseInDefs(MemoryAccess *Def, MemoryAccess *Use) const {
#endif
}
-/// Verify the immediate use information, by walking all the memory
-/// accesses and verifying that, for each use, it appears in the
-/// appropriate def's use list
-void MemorySSA::verifyDefUses(Function &F) const {
-#if !defined(NDEBUG) && defined(EXPENSIVE_CHECKS)
- for (BasicBlock &B : F) {
- // Phi nodes are attached to basic blocks
- if (MemoryPhi *Phi = getMemoryAccess(&B)) {
- assert(Phi->getNumOperands() == static_cast<unsigned>(std::distance(
- pred_begin(&B), pred_end(&B))) &&
- "Incomplete MemoryPhi Node");
- for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) {
- verifyUseInDefs(Phi->getIncomingValue(I), Phi);
- assert(find(predecessors(&B), Phi->getIncomingBlock(I)) !=
- pred_end(&B) &&
- "Incoming phi block not a block predecessor");
- }
- }
-
- for (Instruction &I : B) {
- if (MemoryUseOrDef *MA = getMemoryAccess(&I)) {
- verifyUseInDefs(MA->getDefiningAccess(), MA);
- }
- }
- }
-#endif
-}
-
/// Perform a local numbering on blocks so that instruction ordering can be
/// determined in constant time.
/// TODO: We currently just number in order. If we numbered by N, we could
diff --git a/contrib/llvm-project/llvm/lib/Analysis/MemorySSAUpdater.cpp b/contrib/llvm-project/llvm/lib/Analysis/MemorySSAUpdater.cpp
index f2d56b05d968..473268982f2d 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/MemorySSAUpdater.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/MemorySSAUpdater.cpp
@@ -1159,7 +1159,13 @@ void MemorySSAUpdater::moveAfter(MemoryUseOrDef *What, MemoryUseOrDef *Where) {
void MemorySSAUpdater::moveToPlace(MemoryUseOrDef *What, BasicBlock *BB,
MemorySSA::InsertionPlace Where) {
- return moveTo(What, BB, Where);
+ if (Where != MemorySSA::InsertionPlace::BeforeTerminator)
+ return moveTo(What, BB, Where);
+
+ if (auto *Where = MSSA->getMemoryAccess(BB->getTerminator()))
+ return moveBefore(What, Where);
+ else
+ return moveTo(What, BB, MemorySSA::InsertionPlace::End);
}
// All accesses in To used to be in From. Move to end and update access lists.
diff --git a/contrib/llvm-project/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp b/contrib/llvm-project/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp
index 519242759824..52b884fb88e0 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp
@@ -17,6 +17,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/Passes.h"
#include "llvm/IR/DebugInfo.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/contrib/llvm-project/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
index 8232bf07cafc..8a1206f49c21 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
@@ -44,6 +44,7 @@
#include "llvm/IR/ModuleSummaryIndex.h"
#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Object/ModuleSymbolTable.h"
#include "llvm/Object/SymbolicFile.h"
#include "llvm/Pass.h"
@@ -466,7 +467,8 @@ static void computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
F.hasFnAttribute(Attribute::NoRecurse), F.returnDoesNotAlias(),
// FIXME: refactor this to use the same code that inliner is using.
// Don't try to import functions with noinline attribute.
- F.getAttributes().hasFnAttribute(Attribute::NoInline)};
+ F.getAttributes().hasFnAttribute(Attribute::NoInline),
+ F.hasFnAttribute(Attribute::AlwaysInline)};
auto FuncSummary = std::make_unique<FunctionSummary>(
Flags, NumInsts, FunFlags, /*EntryCount=*/0, std::move(Refs),
CallGraphEdges.takeVector(), TypeTests.takeVector(),
@@ -703,7 +705,8 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
F->hasFnAttribute(Attribute::ReadOnly),
F->hasFnAttribute(Attribute::NoRecurse),
F->returnDoesNotAlias(),
- /* NoInline = */ false},
+ /* NoInline = */ false,
+ F->hasFnAttribute(Attribute::AlwaysInline)},
/*EntryCount=*/0, ArrayRef<ValueInfo>{},
ArrayRef<FunctionSummary::EdgeTy>{},
ArrayRef<GlobalValue::GUID>{},
@@ -817,7 +820,7 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
if (EC)
report_fatal_error(Twine("Failed to open dot file ") +
ModuleSummaryDotFile + ": " + EC.message() + "\n");
- Index.exportToDot(OSDot);
+ Index.exportToDot(OSDot, {});
}
return Index;
diff --git a/contrib/llvm-project/llvm/lib/Analysis/MustExecute.cpp b/contrib/llvm-project/llvm/lib/Analysis/MustExecute.cpp
index 44527773115d..952c2cbfec4e 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/MustExecute.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/MustExecute.cpp
@@ -12,12 +12,14 @@
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/PostDominators.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/AssemblyAnnotationWriter.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/raw_ostream.h"
@@ -353,7 +355,25 @@ ModulePass *llvm::createMustBeExecutedContextPrinter() {
}
bool MustBeExecutedContextPrinter::runOnModule(Module &M) {
- MustBeExecutedContextExplorer Explorer(true);
+ // We provide non-PM analysis here because the old PM doesn't like to query
+ // function passes from a module pass.
+ SmallVector<PostDominatorTree *, 8> PDTs;
+ SmallVector<DominatorTree *, 8> DTs;
+ SmallVector<LoopInfo *, 8> LIs;
+
+ GetterTy<LoopInfo> LIGetter = [&](const Function &F) {
+ DominatorTree *DT = new DominatorTree(const_cast<Function &>(F));
+ LoopInfo *LI = new LoopInfo(*DT);
+ DTs.push_back(DT);
+ LIs.push_back(LI);
+ return LI;
+ };
+ GetterTy<PostDominatorTree> PDTGetter = [&](const Function &F) {
+ PostDominatorTree *PDT = new PostDominatorTree(const_cast<Function &>(F));
+ PDTs.push_back(PDT);
+ return PDT;
+ };
+ MustBeExecutedContextExplorer Explorer(true, LIGetter, PDTGetter);
for (Function &F : M) {
for (Instruction &I : instructions(F)) {
dbgs() << "-- Explore context of: " << I << "\n";
@@ -363,6 +383,9 @@ bool MustBeExecutedContextPrinter::runOnModule(Module &M) {
}
}
+ DeleteContainerPointers(PDTs);
+ DeleteContainerPointers(LIs);
+ DeleteContainerPointers(DTs);
return false;
}
@@ -443,6 +466,173 @@ bool MustExecutePrinter::runOnFunction(Function &F) {
return false;
}
+/// Return true if \p L might be an endless loop.
+static bool maybeEndlessLoop(const Loop &L) {
+ if (L.getHeader()->getParent()->hasFnAttribute(Attribute::WillReturn))
+ return false;
+ // TODO: Actually try to prove it is not.
+ // TODO: If maybeEndlessLoop is going to be expensive, cache it.
+ return true;
+}
+
+static bool mayContainIrreducibleControl(const Function &F, const LoopInfo *LI) {
+ if (!LI)
+ return false;
+ using RPOTraversal = ReversePostOrderTraversal<const Function *>;
+ RPOTraversal FuncRPOT(&F);
+ return !containsIrreducibleCFG<const BasicBlock *, const RPOTraversal,
+ const LoopInfo>(FuncRPOT, *LI);
+}
+
+/// Lookup \p Key in \p Map and return the result, potentially after
+/// initializing the optional through \p Fn(\p args).
+template <typename K, typename V, typename FnTy, typename... ArgsTy>
+static V getOrCreateCachedOptional(K Key, DenseMap<K, Optional<V>> &Map,
+ FnTy &&Fn, ArgsTy&&... args) {
+ Optional<V> &OptVal = Map[Key];
+ if (!OptVal.hasValue())
+ OptVal = Fn(std::forward<ArgsTy>(args)...);
+ return OptVal.getValue();
+}
+
+const BasicBlock *
+MustBeExecutedContextExplorer::findForwardJoinPoint(const BasicBlock *InitBB) {
+ const LoopInfo *LI = LIGetter(*InitBB->getParent());
+ const PostDominatorTree *PDT = PDTGetter(*InitBB->getParent());
+
+ LLVM_DEBUG(dbgs() << "\tFind forward join point for " << InitBB->getName()
+ << (LI ? " [LI]" : "") << (PDT ? " [PDT]" : ""));
+
+ const Function &F = *InitBB->getParent();
+ const Loop *L = LI ? LI->getLoopFor(InitBB) : nullptr;
+ const BasicBlock *HeaderBB = L ? L->getHeader() : InitBB;
+ bool WillReturnAndNoThrow = (F.hasFnAttribute(Attribute::WillReturn) ||
+ (L && !maybeEndlessLoop(*L))) &&
+ F.doesNotThrow();
+ LLVM_DEBUG(dbgs() << (L ? " [in loop]" : "")
+ << (WillReturnAndNoThrow ? " [WillReturn] [NoUnwind]" : "")
+ << "\n");
+
+ // Determine the adjacent blocks in the given direction but exclude (self)
+ // loops under certain circumstances.
+ SmallVector<const BasicBlock *, 8> Worklist;
+ for (const BasicBlock *SuccBB : successors(InitBB)) {
+ bool IsLatch = SuccBB == HeaderBB;
+ // Loop latches are ignored in forward propagation if the loop cannot be
+ // endless and may not throw: control has to go somewhere.
+ if (!WillReturnAndNoThrow || !IsLatch)
+ Worklist.push_back(SuccBB);
+ }
+ LLVM_DEBUG(dbgs() << "\t\t#Worklist: " << Worklist.size() << "\n");
+
+ // If there are no other adjacent blocks, there is no join point.
+ if (Worklist.empty())
+ return nullptr;
+
+ // If there is one adjacent block, it is the join point.
+ if (Worklist.size() == 1)
+ return Worklist[0];
+
+ // Try to determine a join block through the help of the post-dominance
+ // tree. If no tree was provided, we perform simple pattern matching for one
+ // block conditionals and one block loops only.
+ const BasicBlock *JoinBB = nullptr;
+ if (PDT)
+ if (const auto *InitNode = PDT->getNode(InitBB))
+ if (const auto *IDomNode = InitNode->getIDom())
+ JoinBB = IDomNode->getBlock();
+
+ if (!JoinBB && Worklist.size() == 2) {
+ const BasicBlock *Succ0 = Worklist[0];
+ const BasicBlock *Succ1 = Worklist[1];
+ const BasicBlock *Succ0UniqueSucc = Succ0->getUniqueSuccessor();
+ const BasicBlock *Succ1UniqueSucc = Succ1->getUniqueSuccessor();
+ if (Succ0UniqueSucc == InitBB) {
+ // InitBB -> Succ0 -> InitBB
+ // InitBB -> Succ1 = JoinBB
+ JoinBB = Succ1;
+ } else if (Succ1UniqueSucc == InitBB) {
+ // InitBB -> Succ1 -> InitBB
+ // InitBB -> Succ0 = JoinBB
+ JoinBB = Succ0;
+ } else if (Succ0 == Succ1UniqueSucc) {
+ // InitBB -> Succ0 = JoinBB
+ // InitBB -> Succ1 -> Succ0 = JoinBB
+ JoinBB = Succ0;
+ } else if (Succ1 == Succ0UniqueSucc) {
+ // InitBB -> Succ0 -> Succ1 = JoinBB
+ // InitBB -> Succ1 = JoinBB
+ JoinBB = Succ1;
+ } else if (Succ0UniqueSucc == Succ1UniqueSucc) {
+ // InitBB -> Succ0 -> JoinBB
+ // InitBB -> Succ1 -> JoinBB
+ JoinBB = Succ0UniqueSucc;
+ }
+ }
+
+ if (!JoinBB && L)
+ JoinBB = L->getUniqueExitBlock();
+
+ if (!JoinBB)
+ return nullptr;
+
+ LLVM_DEBUG(dbgs() << "\t\tJoin block candidate: " << JoinBB->getName() << "\n");
+
+ // In forward direction we check if control will for sure reach JoinBB from
+ // InitBB, thus it can not be "stopped" along the way. Ways to "stop" control
+ // are: infinite loops and instructions that do not necessarily transfer
+ // execution to their successor. To check for them we traverse the CFG from
+ // the adjacent blocks to the JoinBB, looking at all intermediate blocks.
+
+ // If we know the function is "will-return" and "no-throw" there is no need
+ // for futher checks.
+ if (!F.hasFnAttribute(Attribute::WillReturn) || !F.doesNotThrow()) {
+
+ auto BlockTransfersExecutionToSuccessor = [](const BasicBlock *BB) {
+ return isGuaranteedToTransferExecutionToSuccessor(BB);
+ };
+
+ SmallPtrSet<const BasicBlock *, 16> Visited;
+ while (!Worklist.empty()) {
+ const BasicBlock *ToBB = Worklist.pop_back_val();
+ if (ToBB == JoinBB)
+ continue;
+
+ // Make sure all loops in-between are finite.
+ if (!Visited.insert(ToBB).second) {
+ if (!F.hasFnAttribute(Attribute::WillReturn)) {
+ if (!LI)
+ return nullptr;
+
+ bool MayContainIrreducibleControl = getOrCreateCachedOptional(
+ &F, IrreducibleControlMap, mayContainIrreducibleControl, F, LI);
+ if (MayContainIrreducibleControl)
+ return nullptr;
+
+ const Loop *L = LI->getLoopFor(ToBB);
+ if (L && maybeEndlessLoop(*L))
+ return nullptr;
+ }
+
+ continue;
+ }
+
+ // Make sure the block has no instructions that could stop control
+ // transfer.
+ bool TransfersExecution = getOrCreateCachedOptional(
+ ToBB, BlockTransferMap, BlockTransfersExecutionToSuccessor, ToBB);
+ if (!TransfersExecution)
+ return nullptr;
+
+ for (const BasicBlock *AdjacentBB : successors(ToBB))
+ Worklist.push_back(AdjacentBB);
+ }
+ }
+
+ LLVM_DEBUG(dbgs() << "\tJoin block: " << JoinBB->getName() << "\n");
+ return JoinBB;
+}
+
const Instruction *
MustBeExecutedContextExplorer::getMustBeExecutedNextInstruction(
MustBeExecutedIterator &It, const Instruction *PP) {
@@ -490,6 +680,12 @@ MustBeExecutedContextExplorer::getMustBeExecutedNextInstruction(
return &PP->getSuccessor(0)->front();
}
+ // Multiple successors mean we need to find the join point where control flow
+ // converges again. We use the findForwardJoinPoint helper function with
+ // information about the function and helper analyses, if available.
+ if (const BasicBlock *JoinBB = findForwardJoinPoint(PP->getParent()))
+ return &JoinBB->front();
+
LLVM_DEBUG(dbgs() << "\tNo join point found\n");
return nullptr;
}
diff --git a/contrib/llvm-project/llvm/lib/Analysis/OptimizationRemarkEmitter.cpp b/contrib/llvm-project/llvm/lib/Analysis/OptimizationRemarkEmitter.cpp
index 07a5619a35b9..44e6637f6337 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/OptimizationRemarkEmitter.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/OptimizationRemarkEmitter.cpp
@@ -18,6 +18,7 @@
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/InitializePasses.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/Analysis/PhiValues.cpp b/contrib/llvm-project/llvm/lib/Analysis/PhiValues.cpp
index 49749bc44746..198647dafbef 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/PhiValues.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/PhiValues.cpp
@@ -10,6 +10,7 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/InitializePasses.h"
using namespace llvm;
@@ -47,25 +48,28 @@ bool PhiValues::invalidate(Function &, const PreservedAnalyses &PA,
// we're ultimately interested in, and all of the reachable values, i.e.
// including phis, as that makes invalidateValue easier.
void PhiValues::processPhi(const PHINode *Phi,
- SmallVector<const PHINode *, 8> &Stack) {
+ SmallVectorImpl<const PHINode *> &Stack) {
// Initialize the phi with the next depth number.
assert(DepthMap.lookup(Phi) == 0);
assert(NextDepthNumber != UINT_MAX);
- unsigned int DepthNumber = ++NextDepthNumber;
- DepthMap[Phi] = DepthNumber;
+ unsigned int RootDepthNumber = ++NextDepthNumber;
+ DepthMap[Phi] = RootDepthNumber;
// Recursively process the incoming phis of this phi.
TrackedValues.insert(PhiValuesCallbackVH(const_cast<PHINode *>(Phi), this));
for (Value *PhiOp : Phi->incoming_values()) {
if (PHINode *PhiPhiOp = dyn_cast<PHINode>(PhiOp)) {
// Recurse if the phi has not yet been visited.
- if (DepthMap.lookup(PhiPhiOp) == 0)
+ unsigned int OpDepthNumber = DepthMap.lookup(PhiPhiOp);
+ if (OpDepthNumber == 0) {
processPhi(PhiPhiOp, Stack);
- assert(DepthMap.lookup(PhiPhiOp) != 0);
+ OpDepthNumber = DepthMap.lookup(PhiPhiOp);
+ assert(OpDepthNumber != 0);
+ }
// If the phi did not become part of a component then this phi and that
// phi are part of the same component, so adjust the depth number.
- if (!ReachableMap.count(DepthMap[PhiPhiOp]))
- DepthMap[Phi] = std::min(DepthMap[Phi], DepthMap[PhiPhiOp]);
+ if (!ReachableMap.count(OpDepthNumber))
+ DepthMap[Phi] = std::min(DepthMap[Phi], OpDepthNumber);
} else {
TrackedValues.insert(PhiValuesCallbackVH(PhiOp, this));
}
@@ -76,48 +80,59 @@ void PhiValues::processPhi(const PHINode *Phi,
// If the depth number has not changed then we've finished collecting the phis
// of a strongly connected component.
- if (DepthMap[Phi] == DepthNumber) {
+ if (DepthMap[Phi] == RootDepthNumber) {
// Collect the reachable values for this component. The phis of this
- // component will be those on top of the depth stach with the same or
+ // component will be those on top of the depth stack with the same or
// greater depth number.
- ConstValueSet Reachable;
- while (!Stack.empty() && DepthMap[Stack.back()] >= DepthNumber) {
+ ConstValueSet &Reachable = ReachableMap[RootDepthNumber];
+ while (true) {
const PHINode *ComponentPhi = Stack.pop_back_val();
Reachable.insert(ComponentPhi);
- DepthMap[ComponentPhi] = DepthNumber;
+
for (Value *Op : ComponentPhi->incoming_values()) {
if (PHINode *PhiOp = dyn_cast<PHINode>(Op)) {
// If this phi is not part of the same component then that component
// is guaranteed to have been completed before this one. Therefore we
// can just add its reachable values to the reachable values of this
// component.
- auto It = ReachableMap.find(DepthMap[PhiOp]);
- if (It != ReachableMap.end())
- Reachable.insert(It->second.begin(), It->second.end());
- } else {
+ unsigned int OpDepthNumber = DepthMap[PhiOp];
+ if (OpDepthNumber != RootDepthNumber) {
+ auto It = ReachableMap.find(OpDepthNumber);
+ if (It != ReachableMap.end())
+ Reachable.insert(It->second.begin(), It->second.end());
+ }
+ } else
Reachable.insert(Op);
- }
}
+
+ if (Stack.empty())
+ break;
+
+ unsigned int &ComponentDepthNumber = DepthMap[Stack.back()];
+ if (ComponentDepthNumber < RootDepthNumber)
+ break;
+
+ ComponentDepthNumber = RootDepthNumber;
}
- ReachableMap.insert({DepthNumber,Reachable});
// Filter out phis to get the non-phi reachable values.
- ValueSet NonPhi;
+ ValueSet &NonPhi = NonPhiReachableMap[RootDepthNumber];
for (const Value *V : Reachable)
if (!isa<PHINode>(V))
- NonPhi.insert(const_cast<Value*>(V));
- NonPhiReachableMap.insert({DepthNumber,NonPhi});
+ NonPhi.insert(const_cast<Value *>(V));
}
}
const PhiValues::ValueSet &PhiValues::getValuesForPhi(const PHINode *PN) {
- if (DepthMap.count(PN) == 0) {
+ unsigned int DepthNumber = DepthMap.lookup(PN);
+ if (DepthNumber == 0) {
SmallVector<const PHINode *, 8> Stack;
processPhi(PN, Stack);
+ DepthNumber = DepthMap.lookup(PN);
assert(Stack.empty());
+ assert(DepthNumber != 0);
}
- assert(DepthMap.lookup(PN) != 0);
- return NonPhiReachableMap[DepthMap[PN]];
+ return NonPhiReachableMap[DepthNumber];
}
void PhiValues::invalidateValue(const Value *V) {
diff --git a/contrib/llvm-project/llvm/lib/Analysis/PostDominators.cpp b/contrib/llvm-project/llvm/lib/Analysis/PostDominators.cpp
index 4afe22bd5342..f01d51504d7c 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/PostDominators.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/PostDominators.cpp
@@ -12,7 +12,9 @@
#include "llvm/Analysis/PostDominators.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/IR/PassManager.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/raw_ostream.h"
@@ -32,6 +34,11 @@ static constexpr bool ExpensiveChecksEnabled = false;
char PostDominatorTreeWrapperPass::ID = 0;
+PostDominatorTreeWrapperPass::PostDominatorTreeWrapperPass()
+ : FunctionPass(ID) {
+ initializePostDominatorTreeWrapperPassPass(*PassRegistry::getPassRegistry());
+}
+
INITIALIZE_PASS(PostDominatorTreeWrapperPass, "postdomtree",
"Post-Dominator Tree Construction", true, true)
@@ -44,6 +51,28 @@ bool PostDominatorTree::invalidate(Function &F, const PreservedAnalyses &PA,
PAC.preservedSet<CFGAnalyses>());
}
+bool PostDominatorTree::dominates(const Instruction *I1,
+ const Instruction *I2) const {
+ assert(I1 && I2 && "Expecting valid I1 and I2");
+
+ const BasicBlock *BB1 = I1->getParent();
+ const BasicBlock *BB2 = I2->getParent();
+
+ if (BB1 != BB2)
+ return Base::dominates(BB1, BB2);
+
+ // PHINodes in a block are unordered.
+ if (isa<PHINode>(I1) && isa<PHINode>(I2))
+ return false;
+
+ // Loop through the basic block until we find I1 or I2.
+ BasicBlock::const_iterator I = BB1->begin();
+ for (; &*I != I1 && &*I != I2; ++I)
+ /*empty*/;
+
+ return &*I == I2;
+}
+
bool PostDominatorTreeWrapperPass::runOnFunction(Function &F) {
DT.recalculate(F);
return false;
diff --git a/contrib/llvm-project/llvm/lib/Analysis/ProfileSummaryInfo.cpp b/contrib/llvm-project/llvm/lib/Analysis/ProfileSummaryInfo.cpp
index b99b75715025..911d39d9a263 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/ProfileSummaryInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/ProfileSummaryInfo.cpp
@@ -18,6 +18,8 @@
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/ProfileSummary.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/CommandLine.h"
using namespace llvm;
// The following two parameters determine the threshold for a count to be
diff --git a/contrib/llvm-project/llvm/lib/Analysis/RegionInfo.cpp b/contrib/llvm-project/llvm/lib/Analysis/RegionInfo.cpp
index 8ba38adfb0d2..88629517d484 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/RegionInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/RegionInfo.cpp
@@ -10,6 +10,7 @@
#include "llvm/Analysis/RegionInfo.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/InitializePasses.h"
#ifndef NDEBUG
#include "llvm/Analysis/RegionPrinter.h"
#endif
diff --git a/contrib/llvm-project/llvm/lib/Analysis/RegionPrinter.cpp b/contrib/llvm-project/llvm/lib/Analysis/RegionPrinter.cpp
index 5bdcb31fbe99..020ff85d1b98 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/RegionPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/RegionPrinter.cpp
@@ -16,6 +16,7 @@
#include "llvm/Analysis/Passes.h"
#include "llvm/Analysis/RegionInfo.h"
#include "llvm/Analysis/RegionIterator.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolution.cpp b/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolution.cpp
index 5ce0a1adeaa0..26a9a5ddf1ea 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -112,6 +112,7 @@
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/Verifier.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
@@ -220,6 +221,12 @@ static cl::opt<unsigned>
cl::desc("Size of the expression which is considered huge"),
cl::init(4096));
+static cl::opt<bool>
+ClassifyExpressions("scalar-evolution-classify-expressions",
+ cl::Hidden, cl::init(true),
+ cl::desc("When printing analysis, include information on every instruction"));
+
+
//===----------------------------------------------------------------------===//
// SCEV class definitions
//===----------------------------------------------------------------------===//
@@ -3488,7 +3495,7 @@ ScalarEvolution::getGEPExpr(GEPOperator *GEP,
const SCEV *BaseExpr = getSCEV(GEP->getPointerOperand());
// getSCEV(Base)->getType() has the same address space as Base->getType()
// because SCEV::getType() preserves the address space.
- Type *IntPtrTy = getEffectiveSCEVType(BaseExpr->getType());
+ Type *IntIdxTy = getEffectiveSCEVType(BaseExpr->getType());
// FIXME(PR23527): Don't blindly transfer the inbounds flag from the GEP
// instruction to its SCEV, because the Instruction may be guarded by control
// flow and the no-overflow bits may not be valid for the expression in any
@@ -3497,7 +3504,7 @@ ScalarEvolution::getGEPExpr(GEPOperator *GEP,
SCEV::NoWrapFlags Wrap = GEP->isInBounds() ? SCEV::FlagNSW
: SCEV::FlagAnyWrap;
- const SCEV *TotalOffset = getZero(IntPtrTy);
+ const SCEV *TotalOffset = getZero(IntIdxTy);
// The array size is unimportant. The first thing we do on CurTy is getting
// its element type.
Type *CurTy = ArrayType::get(GEP->getSourceElementType(), 0);
@@ -3507,7 +3514,7 @@ ScalarEvolution::getGEPExpr(GEPOperator *GEP,
// For a struct, add the member offset.
ConstantInt *Index = cast<SCEVConstant>(IndexExpr)->getValue();
unsigned FieldNo = Index->getZExtValue();
- const SCEV *FieldOffset = getOffsetOfExpr(IntPtrTy, STy, FieldNo);
+ const SCEV *FieldOffset = getOffsetOfExpr(IntIdxTy, STy, FieldNo);
// Add the field offset to the running total offset.
TotalOffset = getAddExpr(TotalOffset, FieldOffset);
@@ -3518,9 +3525,9 @@ ScalarEvolution::getGEPExpr(GEPOperator *GEP,
// Update CurTy to its element type.
CurTy = cast<SequentialType>(CurTy)->getElementType();
// For an array, add the element offset, explicitly scaled.
- const SCEV *ElementSize = getSizeOfExpr(IntPtrTy, CurTy);
+ const SCEV *ElementSize = getSizeOfExpr(IntIdxTy, CurTy);
// Getelementptr indices are signed.
- IndexExpr = getTruncateOrSignExtend(IndexExpr, IntPtrTy);
+ IndexExpr = getTruncateOrSignExtend(IndexExpr, IntIdxTy);
// Multiply the index by the element size to compute the element offset.
const SCEV *LocalOffset = getMulExpr(IndexExpr, ElementSize, Wrap);
@@ -3779,7 +3786,7 @@ uint64_t ScalarEvolution::getTypeSizeInBits(Type *Ty) const {
/// Return a type with the same bitwidth as the given type and which represents
/// how SCEV will treat the given type, for which isSCEVable must return
-/// true. For pointer types, this is the pointer-sized integer type.
+/// true. For pointer types, this is the pointer index sized integer type.
Type *ScalarEvolution::getEffectiveSCEVType(Type *Ty) const {
assert(isSCEVable(Ty) && "Type is not SCEVable!");
@@ -3788,7 +3795,7 @@ Type *ScalarEvolution::getEffectiveSCEVType(Type *Ty) const {
// The only other support type is pointer.
assert(Ty->isPointerTy() && "Unexpected non-pointer non-integer type!");
- return getDataLayout().getIntPtrType(Ty);
+ return getDataLayout().getIndexType(Ty);
}
Type *ScalarEvolution::getWiderType(Type *T1, Type *T2) const {
@@ -4568,6 +4575,12 @@ static Optional<BinaryOp> MatchBinaryOp(Value *V, DominatorTree &DT) {
break;
}
+ // Recognise intrinsic loop.decrement.reg, and as this has exactly the same
+ // semantics as a Sub, return a binary sub expression.
+ if (auto *II = dyn_cast<IntrinsicInst>(V))
+ if (II->getIntrinsicID() == Intrinsic::loop_decrement_reg)
+ return BinaryOp(Instruction::Sub, II->getOperand(0), II->getOperand(1));
+
return None;
}
@@ -5553,6 +5566,7 @@ ScalarEvolution::getRangeRef(const SCEV *S,
unsigned BitWidth = getTypeSizeInBits(S->getType());
ConstantRange ConservativeResult(BitWidth, /*isFullSet=*/true);
+ using OBO = OverflowingBinaryOperator;
// If the value has known zeros, the maximum value will have those known zeros
// as well.
@@ -5570,8 +5584,14 @@ ScalarEvolution::getRangeRef(const SCEV *S,
if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
ConstantRange X = getRangeRef(Add->getOperand(0), SignHint);
+ unsigned WrapType = OBO::AnyWrap;
+ if (Add->hasNoSignedWrap())
+ WrapType |= OBO::NoSignedWrap;
+ if (Add->hasNoUnsignedWrap())
+ WrapType |= OBO::NoUnsignedWrap;
for (unsigned i = 1, e = Add->getNumOperands(); i != e; ++i)
- X = X.add(getRangeRef(Add->getOperand(i), SignHint));
+ X = X.addWithNoWrap(getRangeRef(Add->getOperand(i), SignHint),
+ WrapType, RangeType);
return setRange(Add, SignHint,
ConservativeResult.intersectWith(X, RangeType));
}
@@ -5647,29 +5667,38 @@ ScalarEvolution::getRangeRef(const SCEV *S,
if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) {
// If there's no unsigned wrap, the value will never be less than its
// initial value.
- if (AddRec->hasNoUnsignedWrap())
- if (const SCEVConstant *C = dyn_cast<SCEVConstant>(AddRec->getStart()))
- if (!C->getValue()->isZero())
- ConservativeResult = ConservativeResult.intersectWith(
- ConstantRange(C->getAPInt(), APInt(BitWidth, 0)), RangeType);
-
- // If there's no signed wrap, and all the operands have the same sign or
- // zero, the value won't ever change sign.
+ if (AddRec->hasNoUnsignedWrap()) {
+ APInt UnsignedMinValue = getUnsignedRangeMin(AddRec->getStart());
+ if (!UnsignedMinValue.isNullValue())
+ ConservativeResult = ConservativeResult.intersectWith(
+ ConstantRange(UnsignedMinValue, APInt(BitWidth, 0)), RangeType);
+ }
+
+ // If there's no signed wrap, and all the operands except initial value have
+ // the same sign or zero, the value won't ever be:
+ // 1: smaller than initial value if operands are non negative,
+ // 2: bigger than initial value if operands are non positive.
+ // For both cases, value can not cross signed min/max boundary.
if (AddRec->hasNoSignedWrap()) {
bool AllNonNeg = true;
bool AllNonPos = true;
- for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) {
- if (!isKnownNonNegative(AddRec->getOperand(i))) AllNonNeg = false;
- if (!isKnownNonPositive(AddRec->getOperand(i))) AllNonPos = false;
+ for (unsigned i = 1, e = AddRec->getNumOperands(); i != e; ++i) {
+ if (!isKnownNonNegative(AddRec->getOperand(i)))
+ AllNonNeg = false;
+ if (!isKnownNonPositive(AddRec->getOperand(i)))
+ AllNonPos = false;
}
if (AllNonNeg)
ConservativeResult = ConservativeResult.intersectWith(
- ConstantRange(APInt(BitWidth, 0),
- APInt::getSignedMinValue(BitWidth)), RangeType);
+ ConstantRange::getNonEmpty(getSignedRangeMin(AddRec->getStart()),
+ APInt::getSignedMinValue(BitWidth)),
+ RangeType);
else if (AllNonPos)
ConservativeResult = ConservativeResult.intersectWith(
- ConstantRange(APInt::getSignedMinValue(BitWidth),
- APInt(BitWidth, 1)), RangeType);
+ ConstantRange::getNonEmpty(
+ APInt::getSignedMinValue(BitWidth),
+ getSignedRangeMax(AddRec->getStart()) + 1),
+ RangeType);
}
// TODO: non-affine addrec
@@ -5710,14 +5739,26 @@ ScalarEvolution::getRangeRef(const SCEV *S,
if (SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED) {
// For a SCEVUnknown, ask ValueTracking.
KnownBits Known = computeKnownBits(U->getValue(), DL, 0, &AC, nullptr, &DT);
- if (Known.One != ~Known.Zero + 1)
- ConservativeResult =
- ConservativeResult.intersectWith(
- ConstantRange(Known.One, ~Known.Zero + 1), RangeType);
+ if (Known.getBitWidth() != BitWidth)
+ Known = Known.zextOrTrunc(BitWidth, true);
+ // If Known does not result in full-set, intersect with it.
+ if (Known.getMinValue() != Known.getMaxValue() + 1)
+ ConservativeResult = ConservativeResult.intersectWith(
+ ConstantRange(Known.getMinValue(), Known.getMaxValue() + 1),
+ RangeType);
} else {
assert(SignHint == ScalarEvolution::HINT_RANGE_SIGNED &&
"generalize as needed!");
unsigned NS = ComputeNumSignBits(U->getValue(), DL, 0, &AC, nullptr, &DT);
+ // If the pointer size is larger than the index size type, this can cause
+ // NS to be larger than BitWidth. So compensate for this.
+ if (U->getType()->isPointerTy()) {
+ unsigned ptrSize = DL.getPointerTypeSizeInBits(U->getType());
+ int ptrIdxDiff = ptrSize - BitWidth;
+ if (ptrIdxDiff > 0 && ptrSize > BitWidth && NS > (unsigned)ptrIdxDiff)
+ NS -= ptrIdxDiff;
+ }
+
if (NS > 1)
ConservativeResult = ConservativeResult.intersectWith(
ConstantRange(APInt::getSignedMinValue(BitWidth).ashr(NS - 1),
@@ -6599,12 +6640,16 @@ ScalarEvolution::getSmallConstantTripMultiple(const Loop *L,
return (unsigned)Result->getZExtValue();
}
-/// Get the expression for the number of loop iterations for which this loop is
-/// guaranteed not to exit via ExitingBlock. Otherwise return
-/// SCEVCouldNotCompute.
const SCEV *ScalarEvolution::getExitCount(const Loop *L,
- BasicBlock *ExitingBlock) {
- return getBackedgeTakenInfo(L).getExact(ExitingBlock, this);
+ BasicBlock *ExitingBlock,
+ ExitCountKind Kind) {
+ switch (Kind) {
+ case Exact:
+ return getBackedgeTakenInfo(L).getExact(ExitingBlock, this);
+ case ConstantMaximum:
+ return getBackedgeTakenInfo(L).getMax(ExitingBlock, this);
+ };
+ llvm_unreachable("Invalid ExitCountKind!");
}
const SCEV *
@@ -6613,14 +6658,15 @@ ScalarEvolution::getPredicatedBackedgeTakenCount(const Loop *L,
return getPredicatedBackedgeTakenInfo(L).getExact(L, this, &Preds);
}
-const SCEV *ScalarEvolution::getBackedgeTakenCount(const Loop *L) {
- return getBackedgeTakenInfo(L).getExact(L, this);
-}
-
-/// Similar to getBackedgeTakenCount, except return the least SCEV value that is
-/// known never to be less than the actual backedge taken count.
-const SCEV *ScalarEvolution::getConstantMaxBackedgeTakenCount(const Loop *L) {
- return getBackedgeTakenInfo(L).getMax(this);
+const SCEV *ScalarEvolution::getBackedgeTakenCount(const Loop *L,
+ ExitCountKind Kind) {
+ switch (Kind) {
+ case Exact:
+ return getBackedgeTakenInfo(L).getExact(L, this);
+ case ConstantMaximum:
+ return getBackedgeTakenInfo(L).getMax(this);
+ };
+ llvm_unreachable("Invalid ExitCountKind!");
}
bool ScalarEvolution::isBackedgeTakenCountMaxOrZero(const Loop *L) {
@@ -6929,6 +6975,16 @@ ScalarEvolution::BackedgeTakenInfo::getExact(BasicBlock *ExitingBlock,
return SE->getCouldNotCompute();
}
+const SCEV *
+ScalarEvolution::BackedgeTakenInfo::getMax(BasicBlock *ExitingBlock,
+ ScalarEvolution *SE) const {
+ for (auto &ENT : ExitNotTaken)
+ if (ENT.ExitingBlock == ExitingBlock && ENT.hasAlwaysTruePredicate())
+ return ENT.MaxNotTaken;
+
+ return SE->getCouldNotCompute();
+}
+
/// getMax - Get the max backedge taken count for the loop.
const SCEV *
ScalarEvolution::BackedgeTakenInfo::getMax(ScalarEvolution *SE) const {
@@ -7020,13 +7076,15 @@ ScalarEvolution::BackedgeTakenInfo::BackedgeTakenInfo(
BasicBlock *ExitBB = EEI.first;
const ExitLimit &EL = EEI.second;
if (EL.Predicates.empty())
- return ExitNotTakenInfo(ExitBB, EL.ExactNotTaken, nullptr);
+ return ExitNotTakenInfo(ExitBB, EL.ExactNotTaken, EL.MaxNotTaken,
+ nullptr);
std::unique_ptr<SCEVUnionPredicate> Predicate(new SCEVUnionPredicate);
for (auto *Pred : EL.Predicates)
Predicate->add(Pred);
- return ExitNotTakenInfo(ExitBB, EL.ExactNotTaken, std::move(Predicate));
+ return ExitNotTakenInfo(ExitBB, EL.ExactNotTaken, EL.MaxNotTaken,
+ std::move(Predicate));
});
assert((isa<SCEVCouldNotCompute>(MaxCount) || isa<SCEVConstant>(MaxCount)) &&
"No point in having a non-constant max backedge taken count!");
@@ -7058,6 +7116,17 @@ ScalarEvolution::computeBackedgeTakenCount(const Loop *L,
// Do a union of all the predicates here.
for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
BasicBlock *ExitBB = ExitingBlocks[i];
+
+ // We canonicalize untaken exits to br (constant), ignore them so that
+ // proving an exit untaken doesn't negatively impact our ability to reason
+ // about the loop as whole.
+ if (auto *BI = dyn_cast<BranchInst>(ExitBB->getTerminator()))
+ if (auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
+ bool ExitIfTrue = !L->contains(BI->getSuccessor(0));
+ if ((ExitIfTrue && CI->isZero()) || (!ExitIfTrue && CI->isOne()))
+ continue;
+ }
+
ExitLimit EL = computeExitLimit(L, ExitBB, AllowPredicates);
assert((AllowPredicates || EL.Predicates.empty()) &&
@@ -7217,6 +7286,11 @@ ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromCondImpl(
ExitLimit EL1 = computeExitLimitFromCondCached(
Cache, L, BO->getOperand(1), ExitIfTrue,
ControlsExit && !EitherMayExit, AllowPredicates);
+ // Be robust against unsimplified IR for the form "and i1 X, true"
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1)))
+ return CI->isOne() ? EL0 : EL1;
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(0)))
+ return CI->isOne() ? EL1 : EL0;
const SCEV *BECount = getCouldNotCompute();
const SCEV *MaxBECount = getCouldNotCompute();
if (EitherMayExit) {
@@ -7265,6 +7339,11 @@ ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromCondImpl(
ExitLimit EL1 = computeExitLimitFromCondCached(
Cache, L, BO->getOperand(1), ExitIfTrue,
ControlsExit && !EitherMayExit, AllowPredicates);
+ // Be robust against unsimplified IR for the form "or i1 X, true"
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1)))
+ return CI->isZero() ? EL0 : EL1;
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(0)))
+ return CI->isZero() ? EL1 : EL0;
const SCEV *BECount = getCouldNotCompute();
const SCEV *MaxBECount = getCouldNotCompute();
if (EitherMayExit) {
@@ -10976,7 +11055,7 @@ struct SCEVCollectAddRecMultiplies {
} else if (Unknown) {
HasAddRec = true;
} else {
- bool ContainsAddRec;
+ bool ContainsAddRec = false;
SCEVHasAddRec ContiansAddRec(ContainsAddRec);
visitAll(Op, ContiansAddRec);
HasAddRec |= ContainsAddRec;
@@ -11544,77 +11623,79 @@ void ScalarEvolution::print(raw_ostream &OS) const {
// const isn't dangerous.
ScalarEvolution &SE = *const_cast<ScalarEvolution *>(this);
- OS << "Classifying expressions for: ";
- F.printAsOperand(OS, /*PrintType=*/false);
- OS << "\n";
- for (Instruction &I : instructions(F))
- if (isSCEVable(I.getType()) && !isa<CmpInst>(I)) {
- OS << I << '\n';
- OS << " --> ";
- const SCEV *SV = SE.getSCEV(&I);
- SV->print(OS);
- if (!isa<SCEVCouldNotCompute>(SV)) {
- OS << " U: ";
- SE.getUnsignedRange(SV).print(OS);
- OS << " S: ";
- SE.getSignedRange(SV).print(OS);
- }
-
- const Loop *L = LI.getLoopFor(I.getParent());
-
- const SCEV *AtUse = SE.getSCEVAtScope(SV, L);
- if (AtUse != SV) {
+ if (ClassifyExpressions) {
+ OS << "Classifying expressions for: ";
+ F.printAsOperand(OS, /*PrintType=*/false);
+ OS << "\n";
+ for (Instruction &I : instructions(F))
+ if (isSCEVable(I.getType()) && !isa<CmpInst>(I)) {
+ OS << I << '\n';
OS << " --> ";
- AtUse->print(OS);
- if (!isa<SCEVCouldNotCompute>(AtUse)) {
+ const SCEV *SV = SE.getSCEV(&I);
+ SV->print(OS);
+ if (!isa<SCEVCouldNotCompute>(SV)) {
OS << " U: ";
- SE.getUnsignedRange(AtUse).print(OS);
+ SE.getUnsignedRange(SV).print(OS);
OS << " S: ";
- SE.getSignedRange(AtUse).print(OS);
+ SE.getSignedRange(SV).print(OS);
}
- }
- if (L) {
- OS << "\t\t" "Exits: ";
- const SCEV *ExitValue = SE.getSCEVAtScope(SV, L->getParentLoop());
- if (!SE.isLoopInvariant(ExitValue, L)) {
- OS << "<<Unknown>>";
- } else {
- OS << *ExitValue;
+ const Loop *L = LI.getLoopFor(I.getParent());
+
+ const SCEV *AtUse = SE.getSCEVAtScope(SV, L);
+ if (AtUse != SV) {
+ OS << " --> ";
+ AtUse->print(OS);
+ if (!isa<SCEVCouldNotCompute>(AtUse)) {
+ OS << " U: ";
+ SE.getUnsignedRange(AtUse).print(OS);
+ OS << " S: ";
+ SE.getSignedRange(AtUse).print(OS);
+ }
}
- bool First = true;
- for (auto *Iter = L; Iter; Iter = Iter->getParentLoop()) {
- if (First) {
- OS << "\t\t" "LoopDispositions: { ";
- First = false;
+ if (L) {
+ OS << "\t\t" "Exits: ";
+ const SCEV *ExitValue = SE.getSCEVAtScope(SV, L->getParentLoop());
+ if (!SE.isLoopInvariant(ExitValue, L)) {
+ OS << "<<Unknown>>";
} else {
- OS << ", ";
+ OS << *ExitValue;
}
- Iter->getHeader()->printAsOperand(OS, /*PrintType=*/false);
- OS << ": " << loopDispositionToStr(SE.getLoopDisposition(SV, Iter));
- }
+ bool First = true;
+ for (auto *Iter = L; Iter; Iter = Iter->getParentLoop()) {
+ if (First) {
+ OS << "\t\t" "LoopDispositions: { ";
+ First = false;
+ } else {
+ OS << ", ";
+ }
- for (auto *InnerL : depth_first(L)) {
- if (InnerL == L)
- continue;
- if (First) {
- OS << "\t\t" "LoopDispositions: { ";
- First = false;
- } else {
- OS << ", ";
+ Iter->getHeader()->printAsOperand(OS, /*PrintType=*/false);
+ OS << ": " << loopDispositionToStr(SE.getLoopDisposition(SV, Iter));
}
- InnerL->getHeader()->printAsOperand(OS, /*PrintType=*/false);
- OS << ": " << loopDispositionToStr(SE.getLoopDisposition(SV, InnerL));
+ for (auto *InnerL : depth_first(L)) {
+ if (InnerL == L)
+ continue;
+ if (First) {
+ OS << "\t\t" "LoopDispositions: { ";
+ First = false;
+ } else {
+ OS << ", ";
+ }
+
+ InnerL->getHeader()->printAsOperand(OS, /*PrintType=*/false);
+ OS << ": " << loopDispositionToStr(SE.getLoopDisposition(SV, InnerL));
+ }
+
+ OS << " }";
}
- OS << " }";
+ OS << "\n";
}
-
- OS << "\n";
- }
+ }
OS << "Determining loop execution counts for: ";
F.printAsOperand(OS, /*PrintType=*/false);
@@ -11994,6 +12075,12 @@ ScalarEvolution ScalarEvolutionAnalysis::run(Function &F,
}
PreservedAnalyses
+ScalarEvolutionVerifierPass::run(Function &F, FunctionAnalysisManager &AM) {
+ AM.getResult<ScalarEvolutionAnalysis>(F).verify();
+ return PreservedAnalyses::all();
+}
+
+PreservedAnalyses
ScalarEvolutionPrinterPass::run(Function &F, FunctionAnalysisManager &AM) {
AM.getResult<ScalarEvolutionAnalysis>(F).print(OS);
return PreservedAnalyses::all();
@@ -12462,7 +12549,7 @@ PredicatedScalarEvolution::PredicatedScalarEvolution(
const PredicatedScalarEvolution &Init)
: RewriteMap(Init.RewriteMap), SE(Init.SE), L(Init.L), Preds(Init.Preds),
Generation(Init.Generation), BackedgeCount(Init.BackedgeCount) {
- for (const auto &I : Init.FlagsMap)
+ for (auto I : Init.FlagsMap)
FlagsMap.insert(I);
}
diff --git a/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
index 96da0a24cddd..79640256f695 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
@@ -19,6 +19,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
+#include "llvm/InitializePasses.h"
using namespace llvm;
AliasResult SCEVAAResult::alias(const MemoryLocation &LocA,
diff --git a/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolutionExpander.cpp b/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolutionExpander.cpp
index bceec921188e..dc5d02aa3a3c 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -414,7 +414,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
// without the other.
SplitAddRecs(Ops, Ty, SE);
- Type *IntPtrTy = DL.getIntPtrType(PTy);
+ Type *IntIdxTy = DL.getIndexType(PTy);
// Descend down the pointer's type and attempt to convert the other
// operands into GEP indices, at each level. The first index in a GEP
@@ -426,7 +426,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
// array indexing.
SmallVector<const SCEV *, 8> ScaledOps;
if (ElTy->isSized()) {
- const SCEV *ElSize = SE.getSizeOfExpr(IntPtrTy, ElTy);
+ const SCEV *ElSize = SE.getSizeOfExpr(IntIdxTy, ElTy);
if (!ElSize->isZero()) {
SmallVector<const SCEV *, 8> NewOps;
for (const SCEV *Op : Ops) {
diff --git a/contrib/llvm-project/llvm/lib/Analysis/ScopedNoAliasAA.cpp b/contrib/llvm-project/llvm/lib/Analysis/ScopedNoAliasAA.cpp
index 094e4a3d5dc8..8928678d6ab2 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/ScopedNoAliasAA.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/ScopedNoAliasAA.cpp
@@ -37,6 +37,7 @@
#include "llvm/IR/Instruction.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
diff --git a/contrib/llvm-project/llvm/lib/Analysis/StackSafetyAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/StackSafetyAnalysis.cpp
index 1b3638698950..7f5bedabbd80 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/StackSafetyAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/StackSafetyAnalysis.cpp
@@ -13,6 +13,8 @@
#include "llvm/IR/CallSite.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/Analysis/TargetLibraryInfo.cpp b/contrib/llvm-project/llvm/lib/Analysis/TargetLibraryInfo.cpp
index 230969698054..c7238db43aab 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/TargetLibraryInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/TargetLibraryInfo.cpp
@@ -13,6 +13,7 @@
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/ADT/Triple.h"
#include "llvm/IR/Constants.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
using namespace llvm;
@@ -378,10 +379,8 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
case Triple::TvOS:
case Triple::WatchOS:
TLI.setUnavailable(LibFunc_exp10l);
- if (!T.isWatchOS() && (T.isOSVersionLT(7, 0) ||
- (T.isOSVersionLT(9, 0) &&
- (T.getArch() == Triple::x86 ||
- T.getArch() == Triple::x86_64)))) {
+ if (!T.isWatchOS() &&
+ (T.isOSVersionLT(7, 0) || (T.isOSVersionLT(9, 0) && T.isX86()))) {
TLI.setUnavailable(LibFunc_exp10);
TLI.setUnavailable(LibFunc_exp10f);
} else {
@@ -1587,22 +1586,12 @@ StringRef TargetLibraryInfoImpl::getScalarizedFunction(StringRef F,
return I->ScalarFnName;
}
-TargetLibraryInfo TargetLibraryAnalysis::run(Function &F,
+TargetLibraryInfo TargetLibraryAnalysis::run(const Function &F,
FunctionAnalysisManager &) {
- if (PresetInfoImpl)
- return TargetLibraryInfo(*PresetInfoImpl);
-
- return TargetLibraryInfo(
- lookupInfoImpl(Triple(F.getParent()->getTargetTriple())));
-}
-
-TargetLibraryInfoImpl &TargetLibraryAnalysis::lookupInfoImpl(const Triple &T) {
- std::unique_ptr<TargetLibraryInfoImpl> &Impl =
- Impls[T.normalize()];
- if (!Impl)
- Impl.reset(new TargetLibraryInfoImpl(T));
-
- return *Impl;
+ if (!BaselineInfoImpl)
+ BaselineInfoImpl =
+ TargetLibraryInfoImpl(Triple(F.getParent()->getTargetTriple()));
+ return TargetLibraryInfo(*BaselineInfoImpl, &F);
}
unsigned TargetLibraryInfoImpl::getWCharSize(const Module &M) const {
@@ -1613,18 +1602,18 @@ unsigned TargetLibraryInfoImpl::getWCharSize(const Module &M) const {
}
TargetLibraryInfoWrapperPass::TargetLibraryInfoWrapperPass()
- : ImmutablePass(ID), TLIImpl(), TLI(TLIImpl) {
+ : ImmutablePass(ID), TLA(TargetLibraryInfoImpl()) {
initializeTargetLibraryInfoWrapperPassPass(*PassRegistry::getPassRegistry());
}
TargetLibraryInfoWrapperPass::TargetLibraryInfoWrapperPass(const Triple &T)
- : ImmutablePass(ID), TLIImpl(T), TLI(TLIImpl) {
+ : ImmutablePass(ID), TLA(TargetLibraryInfoImpl(T)) {
initializeTargetLibraryInfoWrapperPassPass(*PassRegistry::getPassRegistry());
}
TargetLibraryInfoWrapperPass::TargetLibraryInfoWrapperPass(
const TargetLibraryInfoImpl &TLIImpl)
- : ImmutablePass(ID), TLIImpl(TLIImpl), TLI(this->TLIImpl) {
+ : ImmutablePass(ID), TLA(TLIImpl) {
initializeTargetLibraryInfoWrapperPassPass(*PassRegistry::getPassRegistry());
}
@@ -1636,3 +1625,19 @@ INITIALIZE_PASS(TargetLibraryInfoWrapperPass, "targetlibinfo",
char TargetLibraryInfoWrapperPass::ID = 0;
void TargetLibraryInfoWrapperPass::anchor() {}
+
+unsigned TargetLibraryInfoImpl::getWidestVF(StringRef ScalarF) const {
+ ScalarF = sanitizeFunctionName(ScalarF);
+ if (ScalarF.empty())
+ return 1;
+
+ unsigned VF = 1;
+ std::vector<VecDesc>::const_iterator I =
+ llvm::lower_bound(VectorDescs, ScalarF, compareWithScalarFnName);
+ while (I != VectorDescs.end() && StringRef(I->ScalarFnName) == ScalarF) {
+ if (I->VectorizationFactor > VF)
+ VF = I->VectorizationFactor;
+ ++I;
+ }
+ return VF;
+}
diff --git a/contrib/llvm-project/llvm/lib/Analysis/TargetTransformInfo.cpp b/contrib/llvm-project/llvm/lib/Analysis/TargetTransformInfo.cpp
index c9c294873ea6..f2c63f789d89 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -7,9 +7,11 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/LoopIterator.h"
#include "llvm/Analysis/TargetTransformInfoImpl.h"
-#include "llvm/IR/CallSite.h"
#include "llvm/IR/CFG.h"
+#include "llvm/IR/CallSite.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
@@ -17,10 +19,9 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/PatternMatch.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Analysis/CFG.h"
-#include "llvm/Analysis/LoopIterator.h"
#include <utility>
using namespace llvm;
@@ -194,9 +195,10 @@ int TargetTransformInfo::getIntrinsicCost(
}
unsigned
-TargetTransformInfo::getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
- unsigned &JTSize) const {
- return TTIImpl->getEstimatedNumberOfCaseClusters(SI, JTSize);
+TargetTransformInfo::getEstimatedNumberOfCaseClusters(
+ const SwitchInst &SI, unsigned &JTSize, ProfileSummaryInfo *PSI,
+ BlockFrequencyInfo *BFI) const {
+ return TTIImpl->getEstimatedNumberOfCaseClusters(SI, JTSize, PSI, BFI);
}
int TargetTransformInfo::getUserCost(const User *U,
@@ -242,6 +244,12 @@ bool TargetTransformInfo::isHardwareLoopProfitable(
return TTIImpl->isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
}
+bool TargetTransformInfo::preferPredicateOverEpilogue(Loop *L, LoopInfo *LI,
+ ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *TLI,
+ DominatorTree *DT, const LoopAccessInfo *LAI) const {
+ return TTIImpl->preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LAI);
+}
+
void TargetTransformInfo::getUnrollingPreferences(
Loop *L, ScalarEvolution &SE, UnrollingPreferences &UP) const {
return TTIImpl->getUnrollingPreferences(L, SE, UP);
@@ -307,12 +315,14 @@ bool TargetTransformInfo::isLegalNTLoad(Type *DataType, Align Alignment) const {
return TTIImpl->isLegalNTLoad(DataType, Alignment);
}
-bool TargetTransformInfo::isLegalMaskedGather(Type *DataType) const {
- return TTIImpl->isLegalMaskedGather(DataType);
+bool TargetTransformInfo::isLegalMaskedGather(Type *DataType,
+ MaybeAlign Alignment) const {
+ return TTIImpl->isLegalMaskedGather(DataType, Alignment);
}
-bool TargetTransformInfo::isLegalMaskedScatter(Type *DataType) const {
- return TTIImpl->isLegalMaskedScatter(DataType);
+bool TargetTransformInfo::isLegalMaskedScatter(Type *DataType,
+ MaybeAlign Alignment) const {
+ return TTIImpl->isLegalMaskedScatter(DataType, Alignment);
}
bool TargetTransformInfo::isLegalMaskedCompressStore(Type *DataType) const {
@@ -454,16 +464,16 @@ int TargetTransformInfo::getIntImmCost(const APInt &Imm, Type *Ty) const {
return Cost;
}
-int TargetTransformInfo::getIntImmCost(unsigned Opcode, unsigned Idx,
- const APInt &Imm, Type *Ty) const {
- int Cost = TTIImpl->getIntImmCost(Opcode, Idx, Imm, Ty);
+int TargetTransformInfo::getIntImmCostInst(unsigned Opcode, unsigned Idx,
+ const APInt &Imm, Type *Ty) const {
+ int Cost = TTIImpl->getIntImmCostInst(Opcode, Idx, Imm, Ty);
assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost;
}
-int TargetTransformInfo::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
- const APInt &Imm, Type *Ty) const {
- int Cost = TTIImpl->getIntImmCost(IID, Idx, Imm, Ty);
+int TargetTransformInfo::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
+ const APInt &Imm, Type *Ty) const {
+ int Cost = TTIImpl->getIntImmCostIntrin(IID, Idx, Imm, Ty);
assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost;
}
@@ -584,10 +594,10 @@ TargetTransformInfo::getOperandInfo(Value *V, OperandValueProperties &OpProps) {
int TargetTransformInfo::getArithmeticInstrCost(
unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo,
- OperandValueProperties Opd2PropInfo,
- ArrayRef<const Value *> Args) const {
- int Cost = TTIImpl->getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
- Opd1PropInfo, Opd2PropInfo, Args);
+ OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args,
+ const Instruction *CxtI) const {
+ int Cost = TTIImpl->getArithmeticInstrCost(
+ Opcode, Ty, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo, Args, CxtI);
assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost;
}
@@ -639,7 +649,7 @@ int TargetTransformInfo::getVectorInstrCost(unsigned Opcode, Type *Val,
}
int TargetTransformInfo::getMemoryOpCost(unsigned Opcode, Type *Src,
- unsigned Alignment,
+ MaybeAlign Alignment,
unsigned AddressSpace,
const Instruction *I) const {
assert ((I == nullptr || I->getOpcode() == Opcode) &&
@@ -1175,7 +1185,7 @@ int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const {
Op2VK = getOperandInfo(I->getOperand(1), Op2VP);
SmallVector<const Value *, 2> Operands(I->operand_values());
return getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK, Op2VK,
- Op1VP, Op2VP, Operands);
+ Op1VP, Op2VP, Operands, I);
}
case Instruction::FNeg: {
TargetTransformInfo::OperandValueKind Op1VK, Op2VK;
@@ -1185,7 +1195,7 @@ int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const {
Op2VP = OP_None;
SmallVector<const Value *, 2> Operands(I->operand_values());
return getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK, Op2VK,
- Op1VP, Op2VP, Operands);
+ Op1VP, Op2VP, Operands, I);
}
case Instruction::Select: {
const SelectInst *SI = cast<SelectInst>(I);
@@ -1201,14 +1211,14 @@ int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const {
const StoreInst *SI = cast<StoreInst>(I);
Type *ValTy = SI->getValueOperand()->getType();
return getMemoryOpCost(I->getOpcode(), ValTy,
- SI->getAlignment(),
- SI->getPointerAddressSpace(), I);
+ MaybeAlign(SI->getAlignment()),
+ SI->getPointerAddressSpace(), I);
}
case Instruction::Load: {
const LoadInst *LI = cast<LoadInst>(I);
return getMemoryOpCost(I->getOpcode(), I->getType(),
- LI->getAlignment(),
- LI->getPointerAddressSpace(), I);
+ MaybeAlign(LI->getAlignment()),
+ LI->getPointerAddressSpace(), I);
}
case Instruction::ZExt:
case Instruction::SExt:
diff --git a/contrib/llvm-project/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
index 3b9040aa0f52..da4520066b46 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
@@ -114,6 +114,7 @@
#include "llvm/IR/Instruction.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
@@ -520,23 +521,20 @@ static const MDNode *getLeastCommonType(const MDNode *A, const MDNode *B) {
}
void Instruction::getAAMetadata(AAMDNodes &N, bool Merge) const {
- if (Merge)
+ if (Merge) {
N.TBAA =
MDNode::getMostGenericTBAA(N.TBAA, getMetadata(LLVMContext::MD_tbaa));
- else
- N.TBAA = getMetadata(LLVMContext::MD_tbaa);
-
- if (Merge)
+ N.TBAAStruct = nullptr;
N.Scope = MDNode::getMostGenericAliasScope(
N.Scope, getMetadata(LLVMContext::MD_alias_scope));
- else
- N.Scope = getMetadata(LLVMContext::MD_alias_scope);
-
- if (Merge)
N.NoAlias =
MDNode::intersect(N.NoAlias, getMetadata(LLVMContext::MD_noalias));
- else
+ } else {
+ N.TBAA = getMetadata(LLVMContext::MD_tbaa);
+ N.TBAAStruct = getMetadata(LLVMContext::MD_tbaa_struct);
+ N.Scope = getMetadata(LLVMContext::MD_alias_scope);
N.NoAlias = getMetadata(LLVMContext::MD_noalias);
+ }
}
static const MDNode *createAccessTag(const MDNode *AccessType) {
diff --git a/contrib/llvm-project/llvm/lib/Analysis/VFABIDemangling.cpp b/contrib/llvm-project/llvm/lib/Analysis/VFABIDemangling.cpp
index 6fd8ae63f5f0..a331b95e818b 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/VFABIDemangling.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/VFABIDemangling.cpp
@@ -6,6 +6,8 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallString.h"
#include "llvm/Analysis/VectorUtils.h"
using namespace llvm;
@@ -26,16 +28,20 @@ ParseRet tryParseISA(StringRef &MangledName, VFISAKind &ISA) {
if (MangledName.empty())
return ParseRet::Error;
- ISA = StringSwitch<VFISAKind>(MangledName.take_front(1))
- .Case("n", VFISAKind::AdvancedSIMD)
- .Case("s", VFISAKind::SVE)
- .Case("b", VFISAKind::SSE)
- .Case("c", VFISAKind::AVX)
- .Case("d", VFISAKind::AVX2)
- .Case("e", VFISAKind::AVX512)
- .Default(VFISAKind::Unknown);
-
- MangledName = MangledName.drop_front(1);
+ if (MangledName.startswith(VFABI::_LLVM_)) {
+ MangledName = MangledName.drop_front(strlen(VFABI::_LLVM_));
+ ISA = VFISAKind::LLVM;
+ } else {
+ ISA = StringSwitch<VFISAKind>(MangledName.take_front(1))
+ .Case("n", VFISAKind::AdvancedSIMD)
+ .Case("s", VFISAKind::SVE)
+ .Case("b", VFISAKind::SSE)
+ .Case("c", VFISAKind::AVX)
+ .Case("d", VFISAKind::AVX2)
+ .Case("e", VFISAKind::AVX512)
+ .Default(VFISAKind::Unknown);
+ MangledName = MangledName.drop_front(1);
+ }
return ParseRet::OK;
}
@@ -286,6 +292,7 @@ ParseRet tryParseAlign(StringRef &ParseString, Align &Alignment) {
// Format of the ABI name:
// _ZGV<isa><mask><vlen><parameters>_<scalarname>[(<redirection>)]
Optional<VFInfo> VFABI::tryDemangleForVFABI(StringRef MangledName) {
+ const StringRef OriginalName = MangledName;
// Assume there is no custom name <redirection>, and therefore the
// vector name consists of
// _ZGV<isa><mask><vlen><parameters>_<scalarname>.
@@ -338,7 +345,7 @@ Optional<VFInfo> VFABI::tryDemangleForVFABI(StringRef MangledName) {
}
} while (ParamFound == ParseRet::OK);
- // A valid MangledName mus have at least one valid entry in the
+ // A valid MangledName must have at least one valid entry in the
// <parameters>.
if (Parameters.empty())
return None;
@@ -369,6 +376,11 @@ Optional<VFInfo> VFABI::tryDemangleForVFABI(StringRef MangledName) {
return None;
}
+ // LLVM internal mapping via the TargetLibraryInfo (TLI) must be
+ // redirected to an existing name.
+ if (ISA == VFISAKind::LLVM && VectorName == OriginalName)
+ return None;
+
// When <mask> is "M", we need to add a parameter that is used as
// global predicate for the function.
if (IsMasked) {
@@ -390,8 +402,8 @@ Optional<VFInfo> VFABI::tryDemangleForVFABI(StringRef MangledName) {
assert(Parameters.back().ParamKind == VFParamKind::GlobalPredicate &&
"The global predicate must be the last parameter");
- const VFShape Shape({VF, IsScalable, ISA, Parameters});
- return VFInfo({Shape, ScalarName, VectorName});
+ const VFShape Shape({VF, IsScalable, Parameters});
+ return VFInfo({Shape, ScalarName, VectorName, ISA});
}
VFParamKind VFABI::getVFParamKindFromString(const StringRef Token) {
diff --git a/contrib/llvm-project/llvm/lib/Analysis/ValueTracking.cpp b/contrib/llvm-project/llvm/lib/Analysis/ValueTracking.cpp
index bbf389991836..ad6765e2514b 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/ValueTracking.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/ValueTracking.cpp
@@ -51,6 +51,8 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsAArch64.h"
+#include "llvm/IR/IntrinsicsX86.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
@@ -88,7 +90,7 @@ static unsigned getBitWidth(Type *Ty, const DataLayout &DL) {
if (unsigned BitWidth = Ty->getScalarSizeInBits())
return BitWidth;
- return DL.getIndexTypeSizeInBits(Ty);
+ return DL.getPointerTypeSizeInBits(Ty);
}
namespace {
@@ -564,17 +566,83 @@ bool llvm::isValidAssumeForContext(const Instruction *Inv,
if (Inv == CxtI)
return false;
- // The context comes first, but they're both in the same block. Make sure
- // there is nothing in between that might interrupt the control flow.
- for (BasicBlock::const_iterator I =
- std::next(BasicBlock::const_iterator(CxtI)), IE(Inv);
- I != IE; ++I)
+ // The context comes first, but they're both in the same block.
+ // Make sure there is nothing in between that might interrupt
+ // the control flow, not even CxtI itself.
+ for (BasicBlock::const_iterator I(CxtI), IE(Inv); I != IE; ++I)
if (!isGuaranteedToTransferExecutionToSuccessor(&*I))
return false;
return !isEphemeralValueOf(Inv, CxtI);
}
+static bool isKnownNonZeroFromAssume(const Value *V, const Query &Q) {
+ // Use of assumptions is context-sensitive. If we don't have a context, we
+ // cannot use them!
+ if (!Q.AC || !Q.CxtI)
+ return false;
+
+ // Note that the patterns below need to be kept in sync with the code
+ // in AssumptionCache::updateAffectedValues.
+
+ auto CmpExcludesZero = [V](ICmpInst *Cmp) {
+ auto m_V = m_CombineOr(m_Specific(V), m_PtrToInt(m_Specific(V)));
+
+ Value *RHS;
+ CmpInst::Predicate Pred;
+ if (!match(Cmp, m_c_ICmp(Pred, m_V, m_Value(RHS))))
+ return false;
+ // Canonicalize 'v' to be on the LHS of the comparison.
+ if (Cmp->getOperand(1) != RHS)
+ Pred = CmpInst::getSwappedPredicate(Pred);
+
+ // assume(v u> y) -> assume(v != 0)
+ if (Pred == ICmpInst::ICMP_UGT)
+ return true;
+
+ // assume(v != 0)
+ // We special-case this one to ensure that we handle `assume(v != null)`.
+ if (Pred == ICmpInst::ICMP_NE)
+ return match(RHS, m_Zero());
+
+ // All other predicates - rely on generic ConstantRange handling.
+ ConstantInt *CI;
+ if (!match(RHS, m_ConstantInt(CI)))
+ return false;
+ ConstantRange RHSRange(CI->getValue());
+ ConstantRange TrueValues =
+ ConstantRange::makeAllowedICmpRegion(Pred, RHSRange);
+ return !TrueValues.contains(APInt::getNullValue(CI->getBitWidth()));
+ };
+
+ for (auto &AssumeVH : Q.AC->assumptionsFor(V)) {
+ if (!AssumeVH)
+ continue;
+ CallInst *I = cast<CallInst>(AssumeVH);
+ assert(I->getFunction() == Q.CxtI->getFunction() &&
+ "Got assumption for the wrong function!");
+ if (Q.isExcluded(I))
+ continue;
+
+ // Warning: This loop can end up being somewhat performance sensitive.
+ // We're running this loop for once for each value queried resulting in a
+ // runtime of ~O(#assumes * #values).
+
+ assert(I->getCalledFunction()->getIntrinsicID() == Intrinsic::assume &&
+ "must be an assume intrinsic");
+
+ Value *Arg = I->getArgOperand(0);
+ ICmpInst *Cmp = dyn_cast<ICmpInst>(Arg);
+ if (!Cmp)
+ continue;
+
+ if (CmpExcludesZero(Cmp) && isValidAssumeForContext(I, Q.CxtI, Q.DT))
+ return true;
+ }
+
+ return false;
+}
+
static void computeKnownBitsFromAssume(const Value *V, KnownBits &Known,
unsigned Depth, const Query &Q) {
// Use of assumptions is context-sensitive. If we don't have a context, we
@@ -915,7 +983,7 @@ static void computeKnownBitsFromShiftOperator(
// If the shift amount could be greater than or equal to the bit-width of the
// LHS, the value could be poison, but bail out because the check below is
// expensive. TODO: Should we just carry on?
- if ((~Known.Zero).uge(BitWidth)) {
+ if (Known.getMaxValue().uge(BitWidth)) {
Known.resetAll();
return;
}
@@ -1135,7 +1203,7 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known,
// which fall through here.
Type *ScalarTy = SrcTy->getScalarType();
SrcBitWidth = ScalarTy->isPointerTy() ?
- Q.DL.getIndexTypeSizeInBits(ScalarTy) :
+ Q.DL.getPointerTypeSizeInBits(ScalarTy) :
Q.DL.getTypeSizeInBits(ScalarTy);
assert(SrcBitWidth && "SrcBitWidth can't be zero");
@@ -1353,6 +1421,8 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known,
for (unsigned i = 0; i != 2; ++i) {
Value *L = P->getIncomingValue(i);
Value *R = P->getIncomingValue(!i);
+ Instruction *RInst = P->getIncomingBlock(!i)->getTerminator();
+ Instruction *LInst = P->getIncomingBlock(i)->getTerminator();
Operator *LU = dyn_cast<Operator>(L);
if (!LU)
continue;
@@ -1374,13 +1444,22 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known,
L = LL;
else
continue; // Check for recurrence with L and R flipped.
+
+ // Change the context instruction to the "edge" that flows into the
+ // phi. This is important because that is where the value is actually
+ // "evaluated" even though it is used later somewhere else. (see also
+ // D69571).
+ Query RecQ = Q;
+
// Ok, we have a PHI of the form L op= R. Check for low
// zero bits.
- computeKnownBits(R, Known2, Depth + 1, Q);
+ RecQ.CxtI = RInst;
+ computeKnownBits(R, Known2, Depth + 1, RecQ);
// We need to take the minimum number of known bits
KnownBits Known3(Known);
- computeKnownBits(L, Known3, Depth + 1, Q);
+ RecQ.CxtI = LInst;
+ computeKnownBits(L, Known3, Depth + 1, RecQ);
Known.Zero.setLowBits(std::min(Known2.countMinTrailingZeros(),
Known3.countMinTrailingZeros()));
@@ -1436,14 +1515,22 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known,
Known.Zero.setAllBits();
Known.One.setAllBits();
- for (Value *IncValue : P->incoming_values()) {
+ for (unsigned u = 0, e = P->getNumIncomingValues(); u < e; ++u) {
+ Value *IncValue = P->getIncomingValue(u);
// Skip direct self references.
if (IncValue == P) continue;
+ // Change the context instruction to the "edge" that flows into the
+ // phi. This is important because that is where the value is actually
+ // "evaluated" even though it is used later somewhere else. (see also
+ // D69571).
+ Query RecQ = Q;
+ RecQ.CxtI = P->getIncomingBlock(u)->getTerminator();
+
Known2 = KnownBits(BitWidth);
// Recurse, but cap the recursion to one level, because we don't
// want to waste time spinning around in loops.
- computeKnownBits(IncValue, Known2, MaxDepth - 1, Q);
+ computeKnownBits(IncValue, Known2, MaxDepth - 1, RecQ);
Known.Zero &= Known2.Zero;
Known.One &= Known2.One;
// If all bits have been ruled out, there's no need to check
@@ -1643,7 +1730,7 @@ void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth,
Type *ScalarTy = V->getType()->getScalarType();
unsigned ExpectedWidth = ScalarTy->isPointerTy() ?
- Q.DL.getIndexTypeSizeInBits(ScalarTy) : Q.DL.getTypeSizeInBits(ScalarTy);
+ Q.DL.getPointerTypeSizeInBits(ScalarTy) : Q.DL.getTypeSizeInBits(ScalarTy);
assert(ExpectedWidth == BitWidth && "V and Known should have same BitWidth");
(void)BitWidth;
(void)ExpectedWidth;
@@ -1902,8 +1989,8 @@ static bool isGEPKnownNonNull(const GEPOperator *GEP, unsigned Depth,
static bool isKnownNonNullFromDominatingCondition(const Value *V,
const Instruction *CtxI,
const DominatorTree *DT) {
- assert(V->getType()->isPointerTy() && "V must be pointer type");
- assert(!isa<ConstantData>(V) && "Did not expect ConstantPointerNull");
+ if (isa<Constant>(V))
+ return false;
if (!CtxI || !DT)
return false;
@@ -1924,6 +2011,15 @@ static bool isKnownNonNullFromDominatingCondition(const Value *V,
Arg.hasNonNullAttr() && DT->dominates(CS.getInstruction(), CtxI))
return true;
+ // If the value is used as a load/store, then the pointer must be non null.
+ if (V == getLoadStorePointerOperand(U)) {
+ const Instruction *I = cast<Instruction>(U);
+ if (!NullPointerIsDefined(I->getFunction(),
+ V->getType()->getPointerAddressSpace()) &&
+ DT->dominates(I, CtxI))
+ return true;
+ }
+
// Consider only compare instructions uniquely controlling a branch
CmpInst::Predicate Pred;
if (!match(const_cast<User *>(U),
@@ -2050,6 +2146,9 @@ bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) {
}
}
+ if (isKnownNonZeroFromAssume(V, Q))
+ return true;
+
// Some of the tests below are recursive, so bail out if we hit the limit.
if (Depth++ >= MaxDepth)
return false;
@@ -2078,12 +2177,11 @@ bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) {
}
}
+ if (isKnownNonNullFromDominatingCondition(V, Q.CxtI, Q.DT))
+ return true;
// Check for recursive pointer simplifications.
if (V->getType()->isPointerTy()) {
- if (isKnownNonNullFromDominatingCondition(V, Q.CxtI, Q.DT))
- return true;
-
// Look through bitcast operations, GEPs, and int2ptr instructions as they
// do not alter the value, or at least not the nullness property of the
// value, e.g., int2ptr is allowed to zero/sign extend the value.
@@ -2380,7 +2478,7 @@ static unsigned ComputeNumSignBitsImpl(const Value *V, unsigned Depth,
Type *ScalarTy = V->getType()->getScalarType();
unsigned TyBits = ScalarTy->isPointerTy() ?
- Q.DL.getIndexTypeSizeInBits(ScalarTy) :
+ Q.DL.getPointerTypeSizeInBits(ScalarTy) :
Q.DL.getTypeSizeInBits(ScalarTy);
unsigned Tmp, Tmp2;
@@ -3095,6 +3193,58 @@ bool llvm::SignBitMustBeZero(const Value *V, const TargetLibraryInfo *TLI) {
return cannotBeOrderedLessThanZeroImpl(V, TLI, true, 0);
}
+bool llvm::isKnownNeverInfinity(const Value *V, const TargetLibraryInfo *TLI,
+ unsigned Depth) {
+ assert(V->getType()->isFPOrFPVectorTy() && "Querying for Inf on non-FP type");
+
+ // If we're told that infinities won't happen, assume they won't.
+ if (auto *FPMathOp = dyn_cast<FPMathOperator>(V))
+ if (FPMathOp->hasNoInfs())
+ return true;
+
+ // Handle scalar constants.
+ if (auto *CFP = dyn_cast<ConstantFP>(V))
+ return !CFP->isInfinity();
+
+ if (Depth == MaxDepth)
+ return false;
+
+ if (auto *Inst = dyn_cast<Instruction>(V)) {
+ switch (Inst->getOpcode()) {
+ case Instruction::Select: {
+ return isKnownNeverInfinity(Inst->getOperand(1), TLI, Depth + 1) &&
+ isKnownNeverInfinity(Inst->getOperand(2), TLI, Depth + 1);
+ }
+ case Instruction::UIToFP:
+ // If the input type fits into the floating type the result is finite.
+ return ilogb(APFloat::getLargest(
+ Inst->getType()->getScalarType()->getFltSemantics())) >=
+ (int)Inst->getOperand(0)->getType()->getScalarSizeInBits();
+ default:
+ break;
+ }
+ }
+
+ // Bail out for constant expressions, but try to handle vector constants.
+ if (!V->getType()->isVectorTy() || !isa<Constant>(V))
+ return false;
+
+ // For vectors, verify that each element is not infinity.
+ unsigned NumElts = V->getType()->getVectorNumElements();
+ for (unsigned i = 0; i != NumElts; ++i) {
+ Constant *Elt = cast<Constant>(V)->getAggregateElement(i);
+ if (!Elt)
+ return false;
+ if (isa<UndefValue>(Elt))
+ continue;
+ auto *CElt = dyn_cast<ConstantFP>(Elt);
+ if (!CElt || CElt->isInfinity())
+ return false;
+ }
+ // All elements were confirmed non-infinity or undefined.
+ return true;
+}
+
bool llvm::isKnownNeverNaN(const Value *V, const TargetLibraryInfo *TLI,
unsigned Depth) {
assert(V->getType()->isFPOrFPVectorTy() && "Querying for NaN on non-FP type");
@@ -3114,13 +3264,26 @@ bool llvm::isKnownNeverNaN(const Value *V, const TargetLibraryInfo *TLI,
if (auto *Inst = dyn_cast<Instruction>(V)) {
switch (Inst->getOpcode()) {
case Instruction::FAdd:
- case Instruction::FMul:
case Instruction::FSub:
+ // Adding positive and negative infinity produces NaN.
+ return isKnownNeverNaN(Inst->getOperand(0), TLI, Depth + 1) &&
+ isKnownNeverNaN(Inst->getOperand(1), TLI, Depth + 1) &&
+ (isKnownNeverInfinity(Inst->getOperand(0), TLI, Depth + 1) ||
+ isKnownNeverInfinity(Inst->getOperand(1), TLI, Depth + 1));
+
+ case Instruction::FMul:
+ // Zero multiplied with infinity produces NaN.
+ // FIXME: If neither side can be zero fmul never produces NaN.
+ return isKnownNeverNaN(Inst->getOperand(0), TLI, Depth + 1) &&
+ isKnownNeverInfinity(Inst->getOperand(0), TLI, Depth + 1) &&
+ isKnownNeverNaN(Inst->getOperand(1), TLI, Depth + 1) &&
+ isKnownNeverInfinity(Inst->getOperand(1), TLI, Depth + 1);
+
case Instruction::FDiv:
- case Instruction::FRem: {
- // TODO: Need isKnownNeverInfinity
+ case Instruction::FRem:
+ // FIXME: Only 0/0, Inf/Inf, Inf REM x and x REM 0 produce NaN.
return false;
- }
+
case Instruction::Select: {
return isKnownNeverNaN(Inst->getOperand(1), TLI, Depth + 1) &&
isKnownNeverNaN(Inst->getOperand(2), TLI, Depth + 1);
@@ -4222,6 +4385,20 @@ bool llvm::isOverflowIntrinsicNoWrap(const WithOverflowInst *WO,
return llvm::any_of(GuardingBranches, AllUsesGuardedByBranch);
}
+bool llvm::isGuaranteedNotToBeUndefOrPoison(const Value *V) {
+ // If the value is a freeze instruction, then it can never
+ // be undef or poison.
+ if (isa<FreezeInst>(V))
+ return true;
+ // TODO: Some instructions are guaranteed to return neither undef
+ // nor poison if their arguments are not poison/undef.
+
+ // TODO: Deal with other Constant subclasses.
+ if (isa<ConstantInt>(V) || isa<GlobalVariable>(V))
+ return true;
+
+ return false;
+}
OverflowResult llvm::computeOverflowForSignedAdd(const AddOperator *Add,
const DataLayout &DL,
diff --git a/contrib/llvm-project/llvm/lib/Analysis/VectorUtils.cpp b/contrib/llvm-project/llvm/lib/Analysis/VectorUtils.cpp
index 600f57ab9d71..c45ab941a142 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/VectorUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/VectorUtils.cpp
@@ -24,6 +24,7 @@
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Value.h"
+#include "llvm/Support/CommandLine.h"
#define DEBUG_TYPE "vectorutils"
@@ -1159,3 +1160,69 @@ void InterleaveGroup<Instruction>::addMetadata(Instruction *NewInst) const {
propagateMetadata(NewInst, VL);
}
}
+
+void VFABI::getVectorVariantNames(
+ const CallInst &CI, SmallVectorImpl<std::string> &VariantMappings) {
+ const StringRef S =
+ CI.getAttribute(AttributeList::FunctionIndex, VFABI::MappingsAttrName)
+ .getValueAsString();
+ if (S.empty())
+ return;
+
+ SmallVector<StringRef, 8> ListAttr;
+ S.split(ListAttr, ",");
+
+ for (auto &S : SetVector<StringRef>(ListAttr.begin(), ListAttr.end())) {
+#ifndef NDEBUG
+ Optional<VFInfo> Info = VFABI::tryDemangleForVFABI(S);
+ assert(Info.hasValue() && "Invalid name for a VFABI variant.");
+ assert(CI.getModule()->getFunction(Info.getValue().VectorName) &&
+ "Vector function is missing.");
+#endif
+ VariantMappings.push_back(S);
+ }
+}
+
+bool VFShape::hasValidParameterList() const {
+ for (unsigned Pos = 0, NumParams = Parameters.size(); Pos < NumParams;
+ ++Pos) {
+ assert(Parameters[Pos].ParamPos == Pos && "Broken parameter list.");
+
+ switch (Parameters[Pos].ParamKind) {
+ default: // Nothing to check.
+ break;
+ case VFParamKind::OMP_Linear:
+ case VFParamKind::OMP_LinearRef:
+ case VFParamKind::OMP_LinearVal:
+ case VFParamKind::OMP_LinearUVal:
+ // Compile time linear steps must be non-zero.
+ if (Parameters[Pos].LinearStepOrPos == 0)
+ return false;
+ break;
+ case VFParamKind::OMP_LinearPos:
+ case VFParamKind::OMP_LinearRefPos:
+ case VFParamKind::OMP_LinearValPos:
+ case VFParamKind::OMP_LinearUValPos:
+ // The runtime linear step must be referring to some other
+ // parameters in the signature.
+ if (Parameters[Pos].LinearStepOrPos >= int(NumParams))
+ return false;
+ // The linear step parameter must be marked as uniform.
+ if (Parameters[Parameters[Pos].LinearStepOrPos].ParamKind !=
+ VFParamKind::OMP_Uniform)
+ return false;
+ // The linear step parameter can't point at itself.
+ if (Parameters[Pos].LinearStepOrPos == int(Pos))
+ return false;
+ break;
+ case VFParamKind::GlobalPredicate:
+ // The global predicate must be the unique. Can be placed anywhere in the
+ // signature.
+ for (unsigned NextPos = Pos + 1; NextPos < NumParams; ++NextPos)
+ if (Parameters[NextPos].ParamKind == VFParamKind::GlobalPredicate)
+ return false;
+ break;
+ }
+ }
+ return true;
+}
diff --git a/contrib/llvm-project/llvm/lib/AsmParser/LLLexer.cpp b/contrib/llvm-project/llvm/lib/AsmParser/LLLexer.cpp
index 5292b0e62744..d96b5e0bff5a 100644
--- a/contrib/llvm-project/llvm/lib/AsmParser/LLLexer.cpp
+++ b/contrib/llvm-project/llvm/lib/AsmParser/LLLexer.cpp
@@ -585,6 +585,7 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(ccc);
KEYWORD(fastcc);
KEYWORD(coldcc);
+ KEYWORD(cfguard_checkcc);
KEYWORD(x86_stdcallcc);
KEYWORD(x86_fastcallcc);
KEYWORD(x86_thiscallcc);
@@ -593,6 +594,7 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(arm_aapcscc);
KEYWORD(arm_aapcs_vfpcc);
KEYWORD(aarch64_vector_pcs);
+ KEYWORD(aarch64_sve_vector_pcs);
KEYWORD(msp430_intrcc);
KEYWORD(avr_intrcc);
KEYWORD(avr_signalcc);
@@ -749,6 +751,7 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(noRecurse);
KEYWORD(returnDoesNotAlias);
KEYWORD(noInline);
+ KEYWORD(alwaysInline);
KEYWORD(calls);
KEYWORD(callee);
KEYWORD(hotness);
@@ -892,6 +895,8 @@ lltok::Kind LLLexer::LexIdentifier() {
INSTKEYWORD(catchpad, CatchPad);
INSTKEYWORD(cleanuppad, CleanupPad);
+ INSTKEYWORD(freeze, Freeze);
+
#undef INSTKEYWORD
#define DWKEYWORD(TYPE, TOKEN) \
diff --git a/contrib/llvm-project/llvm/lib/AsmParser/LLParser.cpp b/contrib/llvm-project/llvm/lib/AsmParser/LLParser.cpp
index 594537307d00..1a17f633ae16 100644
--- a/contrib/llvm-project/llvm/lib/AsmParser/LLParser.cpp
+++ b/contrib/llvm-project/llvm/lib/AsmParser/LLParser.cpp
@@ -1636,6 +1636,7 @@ bool LLParser::ParseOptionalParamAttrs(AttrBuilder &B) {
case lltok::kw_nest: B.addAttribute(Attribute::Nest); break;
case lltok::kw_noalias: B.addAttribute(Attribute::NoAlias); break;
case lltok::kw_nocapture: B.addAttribute(Attribute::NoCapture); break;
+ case lltok::kw_nofree: B.addAttribute(Attribute::NoFree); break;
case lltok::kw_nonnull: B.addAttribute(Attribute::NonNull); break;
case lltok::kw_readnone: B.addAttribute(Attribute::ReadNone); break;
case lltok::kw_readonly: B.addAttribute(Attribute::ReadOnly); break;
@@ -1921,6 +1922,7 @@ void LLParser::ParseOptionalDLLStorageClass(unsigned &Res) {
/// ::= 'fastcc'
/// ::= 'intel_ocl_bicc'
/// ::= 'coldcc'
+/// ::= 'cfguard_checkcc'
/// ::= 'x86_stdcallcc'
/// ::= 'x86_fastcallcc'
/// ::= 'x86_thiscallcc'
@@ -1929,6 +1931,7 @@ void LLParser::ParseOptionalDLLStorageClass(unsigned &Res) {
/// ::= 'arm_aapcscc'
/// ::= 'arm_aapcs_vfpcc'
/// ::= 'aarch64_vector_pcs'
+/// ::= 'aarch64_sve_vector_pcs'
/// ::= 'msp430_intrcc'
/// ::= 'avr_intrcc'
/// ::= 'avr_signalcc'
@@ -1965,6 +1968,7 @@ bool LLParser::ParseOptionalCallingConv(unsigned &CC) {
case lltok::kw_ccc: CC = CallingConv::C; break;
case lltok::kw_fastcc: CC = CallingConv::Fast; break;
case lltok::kw_coldcc: CC = CallingConv::Cold; break;
+ case lltok::kw_cfguard_checkcc: CC = CallingConv::CFGuard_Check; break;
case lltok::kw_x86_stdcallcc: CC = CallingConv::X86_StdCall; break;
case lltok::kw_x86_fastcallcc: CC = CallingConv::X86_FastCall; break;
case lltok::kw_x86_regcallcc: CC = CallingConv::X86_RegCall; break;
@@ -1974,6 +1978,9 @@ bool LLParser::ParseOptionalCallingConv(unsigned &CC) {
case lltok::kw_arm_aapcscc: CC = CallingConv::ARM_AAPCS; break;
case lltok::kw_arm_aapcs_vfpcc:CC = CallingConv::ARM_AAPCS_VFP; break;
case lltok::kw_aarch64_vector_pcs:CC = CallingConv::AArch64_VectorCall; break;
+ case lltok::kw_aarch64_sve_vector_pcs:
+ CC = CallingConv::AArch64_SVE_VectorCall;
+ break;
case lltok::kw_msp430_intrcc: CC = CallingConv::MSP430_INTR; break;
case lltok::kw_avr_intrcc: CC = CallingConv::AVR_INTR; break;
case lltok::kw_avr_signalcc: CC = CallingConv::AVR_SIGNAL; break;
@@ -4814,19 +4821,19 @@ bool LLParser::ParseDIMacroFile(MDNode *&Result, bool IsDistinct) {
/// ParseDIModule:
/// ::= !DIModule(scope: !0, name: "SomeModule", configMacros: "-DNDEBUG",
-/// includePath: "/usr/include", isysroot: "/")
+/// includePath: "/usr/include", sysroot: "/")
bool LLParser::ParseDIModule(MDNode *&Result, bool IsDistinct) {
#define VISIT_MD_FIELDS(OPTIONAL, REQUIRED) \
REQUIRED(scope, MDField, ); \
REQUIRED(name, MDStringField, ); \
OPTIONAL(configMacros, MDStringField, ); \
OPTIONAL(includePath, MDStringField, ); \
- OPTIONAL(isysroot, MDStringField, );
+ OPTIONAL(sysroot, MDStringField, );
PARSE_MD_FIELDS();
#undef VISIT_MD_FIELDS
Result = GET_OR_DISTINCT(DIModule, (Context, scope.Val, name.Val,
- configMacros.Val, includePath.Val, isysroot.Val));
+ configMacros.Val, includePath.Val, sysroot.Val));
return false;
}
@@ -5797,7 +5804,7 @@ int LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB,
if (Res != 0)
return Res;
if (FMF.any()) {
- if (!Inst->getType()->isFPOrFPVectorTy())
+ if (!isa<FPMathOperator>(Inst))
return Error(Loc, "fast-math-flags specified for select without "
"floating-point scalar or vector return type");
Inst->setFastMathFlags(FMF);
@@ -5814,7 +5821,7 @@ int LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB,
if (Res != 0)
return Res;
if (FMF.any()) {
- if (!Inst->getType()->isFPOrFPVectorTy())
+ if (!isa<FPMathOperator>(Inst))
return Error(Loc, "fast-math-flags specified for phi without "
"floating-point scalar or vector return type");
Inst->setFastMathFlags(FMF);
@@ -5822,6 +5829,7 @@ int LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB,
return 0;
}
case lltok::kw_landingpad: return ParseLandingPad(Inst, PFS);
+ case lltok::kw_freeze: return ParseFreeze(Inst, PFS);
// Call.
case lltok::kw_call: return ParseCall(Inst, PFS, CallInst::TCK_None);
case lltok::kw_tail: return ParseCall(Inst, PFS, CallInst::TCK_Tail);
@@ -6745,6 +6753,18 @@ bool LLParser::ParseLandingPad(Instruction *&Inst, PerFunctionState &PFS) {
return false;
}
+/// ParseFreeze
+/// ::= 'freeze' Type Value
+bool LLParser::ParseFreeze(Instruction *&Inst, PerFunctionState &PFS) {
+ LocTy Loc;
+ Value *Op;
+ if (ParseTypeAndValue(Op, Loc, PFS))
+ return true;
+
+ Inst = new FreezeInst(Op);
+ return false;
+}
+
/// ParseCall
/// ::= 'call' OptionalFastMathFlags OptionalCallingConv
/// OptionalAttrs Type Value ParameterList OptionalAttrs
@@ -6785,10 +6805,6 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS,
ParseOptionalOperandBundles(BundleList, PFS))
return true;
- if (FMF.any() && !RetType->isFPOrFPVectorTy())
- return Error(CallLoc, "fast-math-flags specified for call without "
- "floating-point scalar or vector return type");
-
// If RetType is a non-function pointer type, then this is the short syntax
// for the call, which means that RetType is just the return type. Infer the
// rest of the function argument types from the arguments that are present.
@@ -6851,8 +6867,12 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS,
CallInst *CI = CallInst::Create(Ty, Callee, Args, BundleList);
CI->setTailCallKind(TCK);
CI->setCallingConv(CC);
- if (FMF.any())
+ if (FMF.any()) {
+ if (!isa<FPMathOperator>(CI))
+ return Error(CallLoc, "fast-math-flags specified for call without "
+ "floating-point scalar or vector return type");
CI->setFastMathFlags(FMF);
+ }
CI->setAttributes(PAL);
ForwardRefAttrGroups[CI] = FwdRefAttrGrps;
Inst = CI;
@@ -6917,8 +6937,7 @@ int LLParser::ParseAlloc(Instruction *&Inst, PerFunctionState &PFS) {
if (Size && !Size->getType()->isIntegerTy())
return Error(SizeLoc, "element count must have integer type");
- AllocaInst *AI =
- new AllocaInst(Ty, AddrSpace, Size, Alignment ? Alignment->value() : 0);
+ AllocaInst *AI = new AllocaInst(Ty, AddrSpace, Size, Alignment);
AI->setUsedWithInAlloca(IsInAlloca);
AI->setSwiftError(IsSwiftError);
Inst = AI;
@@ -8244,6 +8263,8 @@ bool LLParser::ParseFlag(unsigned &Val) {
/// [',' 'readOnly' ':' Flag]? [',' 'noRecurse' ':' Flag]?
/// [',' 'returnDoesNotAlias' ':' Flag]? ')'
/// [',' 'noInline' ':' Flag]? ')'
+/// [',' 'alwaysInline' ':' Flag]? ')'
+
bool LLParser::ParseOptionalFFlags(FunctionSummary::FFlags &FFlags) {
assert(Lex.getKind() == lltok::kw_funcFlags);
Lex.Lex();
@@ -8285,6 +8306,12 @@ bool LLParser::ParseOptionalFFlags(FunctionSummary::FFlags &FFlags) {
return true;
FFlags.NoInline = Val;
break;
+ case lltok::kw_alwaysInline:
+ Lex.Lex();
+ if (ParseToken(lltok::colon, "expected ':'") || ParseFlag(Val))
+ return true;
+ FFlags.AlwaysInline = Val;
+ break;
default:
return Error(Lex.getLoc(), "expected function flag type");
}
diff --git a/contrib/llvm-project/llvm/lib/AsmParser/LLParser.h b/contrib/llvm-project/llvm/lib/AsmParser/LLParser.h
index abc423b4e3cd..cf2121dcc70a 100644
--- a/contrib/llvm-project/llvm/lib/AsmParser/LLParser.h
+++ b/contrib/llvm-project/llvm/lib/AsmParser/LLParser.h
@@ -600,6 +600,7 @@ namespace llvm {
int ParseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS);
int ParseExtractValue(Instruction *&Inst, PerFunctionState &PFS);
int ParseInsertValue(Instruction *&Inst, PerFunctionState &PFS);
+ bool ParseFreeze(Instruction *&I, PerFunctionState &PFS);
// Use-list order directives.
bool ParseUseListOrder(PerFunctionState *PFS = nullptr);
diff --git a/contrib/llvm-project/llvm/lib/AsmParser/LLToken.h b/contrib/llvm-project/llvm/lib/AsmParser/LLToken.h
index f49feb2dc14d..e430e0f6faa0 100644
--- a/contrib/llvm-project/llvm/lib/AsmParser/LLToken.h
+++ b/contrib/llvm-project/llvm/lib/AsmParser/LLToken.h
@@ -132,6 +132,7 @@ enum Kind {
kw_fastcc,
kw_coldcc,
kw_intel_ocl_bicc,
+ kw_cfguard_checkcc,
kw_x86_stdcallcc,
kw_x86_fastcallcc,
kw_x86_thiscallcc,
@@ -141,6 +142,7 @@ enum Kind {
kw_arm_aapcscc,
kw_arm_aapcs_vfpcc,
kw_aarch64_vector_pcs,
+ kw_aarch64_sve_vector_pcs,
kw_msp430_intrcc,
kw_avr_intrcc,
kw_avr_signalcc,
@@ -352,6 +354,8 @@ enum Kind {
kw_insertvalue,
kw_blockaddress,
+ kw_freeze,
+
// Metadata types.
kw_distinct,
@@ -380,6 +384,7 @@ enum Kind {
kw_noRecurse,
kw_returnDoesNotAlias,
kw_noInline,
+ kw_alwaysInline,
kw_calls,
kw_callee,
kw_hotness,
diff --git a/contrib/llvm-project/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp b/contrib/llvm-project/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp
index 3f36dff9f55c..d927171d556c 100644
--- a/contrib/llvm-project/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp
+++ b/contrib/llvm-project/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp
@@ -119,6 +119,7 @@ bool MetadataVerifier::verifyKernelArgs(msgpack::DocNode &Node) {
.Case("hidden_global_offset_z", true)
.Case("hidden_none", true)
.Case("hidden_printf_buffer", true)
+ .Case("hidden_hostcall_buffer", true)
.Case("hidden_default_queue", true)
.Case("hidden_completion_action", true)
.Case("hidden_multigrid_sync_arg", true)
diff --git a/contrib/llvm-project/llvm/lib/BinaryFormat/Dwarf.cpp b/contrib/llvm-project/llvm/lib/BinaryFormat/Dwarf.cpp
index d06cccdf0dfd..9ca3317418ce 100644
--- a/contrib/llvm-project/llvm/lib/BinaryFormat/Dwarf.cpp
+++ b/contrib/llvm-project/llvm/lib/BinaryFormat/Dwarf.cpp
@@ -274,6 +274,19 @@ StringRef llvm::dwarf::AccessibilityString(unsigned Access) {
return StringRef();
}
+StringRef llvm::dwarf::DefaultedMemberString(unsigned DefaultedEncodings) {
+ switch (DefaultedEncodings) {
+ // Defaulted Member Encodings codes
+ case DW_DEFAULTED_no:
+ return "DW_DEFAULTED_no";
+ case DW_DEFAULTED_in_class:
+ return "DW_DEFAULTED_in_class";
+ case DW_DEFAULTED_out_of_class:
+ return "DW_DEFAULTED_out_of_class";
+ }
+ return StringRef();
+}
+
StringRef llvm::dwarf::VisibilityString(unsigned Visibility) {
switch (Visibility) {
case DW_VIS_local:
@@ -615,6 +628,8 @@ StringRef llvm::dwarf::AttributeValueString(uint16_t Attr, unsigned Val) {
return ArrayOrderString(Val);
case DW_AT_APPLE_runtime_class:
return LanguageString(Val);
+ case DW_AT_defaulted:
+ return DefaultedMemberString(Val);
}
return StringRef();
diff --git a/contrib/llvm-project/llvm/lib/BinaryFormat/XCOFF.cpp b/contrib/llvm-project/llvm/lib/BinaryFormat/XCOFF.cpp
new file mode 100644
index 000000000000..29ccbaea3584
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/BinaryFormat/XCOFF.cpp
@@ -0,0 +1,34 @@
+//===-- llvm/BinaryFormat/XCOFF.cpp - The XCOFF file format -----*- C++/-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/BinaryFormat/XCOFF.h"
+
+using namespace llvm;
+
+StringRef XCOFF::getMappingClassString(XCOFF::StorageMappingClass SMC) {
+ switch (SMC) {
+ case XCOFF::XMC_DS:
+ return "DS";
+ case XCOFF::XMC_RW:
+ return "RW";
+ case XCOFF::XMC_PR:
+ return "PR";
+ case XCOFF::XMC_TC0:
+ return "TC0";
+ case XCOFF::XMC_BS:
+ return "BS";
+ case XCOFF::XMC_RO:
+ return "RO";
+ case XCOFF::XMC_UA:
+ return "UA";
+ case XCOFF::XMC_TC:
+ return "TC";
+ default:
+ report_fatal_error("Unhandled storage-mapping class.");
+ }
+}
diff --git a/contrib/llvm-project/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/contrib/llvm-project/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
index 15eead1de31a..33464412edc5 100644
--- a/contrib/llvm-project/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/contrib/llvm-project/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -960,6 +960,7 @@ static FunctionSummary::FFlags getDecodedFFlags(uint64_t RawFlags) {
Flags.NoRecurse = (RawFlags >> 2) & 0x1;
Flags.ReturnDoesNotAlias = (RawFlags >> 3) & 0x1;
Flags.NoInline = (RawFlags >> 4) & 0x1;
+ Flags.AlwaysInline = (RawFlags >> 5) & 0x1;
return Flags;
}
@@ -1660,6 +1661,7 @@ Error BitcodeReader::parseAttributeGroupBlock() {
}
}
+ UpgradeFramePointerAttributes(B);
MAttributeGroups[GrpID] = AttributeList::get(Context, Idx, B);
break;
}
@@ -3936,6 +3938,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
if ((I = UpgradeBitCastInst(Opc, Op, ResTy, Temp))) {
if (Temp) {
InstructionList.push_back(Temp);
+ assert(CurBB && "No current BB?");
CurBB->getInstList().push_back(Temp);
}
} else {
@@ -4641,10 +4644,9 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
// There is an optional final record for fast-math-flags if this phi has a
// floating-point type.
size_t NumArgs = (Record.size() - 1) / 2;
- if ((Record.size() - 1) % 2 == 1 && !Ty->isFPOrFPVectorTy())
- return error("Invalid record");
-
PHINode *PN = PHINode::Create(Ty, NumArgs);
+ if ((Record.size() - 1) % 2 == 1 && !isa<FPMathOperator>(PN))
+ return error("Invalid record");
InstructionList.push_back(PN);
for (unsigned i = 0; i != NumArgs; i++) {
@@ -4761,7 +4763,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
const DataLayout &DL = TheModule->getDataLayout();
unsigned AS = DL.getAllocaAddrSpace();
- AllocaInst *AI = new AllocaInst(Ty, AS, Size, Align ? Align->value() : 0);
+ AllocaInst *AI = new AllocaInst(Ty, AS, Size, Align);
AI->setUsedWithInAlloca(InAlloca);
AI->setSwiftError(SwiftError);
I = AI;
@@ -5118,6 +5120,19 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
OperandBundles.emplace_back(BundleTags[Record[0]], std::move(Inputs));
continue;
}
+
+ case bitc::FUNC_CODE_INST_FREEZE: { // FREEZE: [opty,opval]
+ unsigned OpNum = 0;
+ Value *Op = nullptr;
+ if (getValueTypePair(Record, OpNum, NextValueNo, Op, &FullTy))
+ return error("Invalid record");
+ if (OpNum != Record.size())
+ return error("Invalid record");
+
+ I = new FreezeInst(Op);
+ InstructionList.push_back(I);
+ break;
+ }
}
// Add instruction to end of current BB. If there is no current BB, reject
@@ -5139,7 +5154,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
}
// Non-void values get registered in the value table for future use.
- if (I && !I->getType()->isVoidTy()) {
+ if (!I->getType()->isVoidTy()) {
if (!FullTy) {
FullTy = I->getType();
assert(
@@ -5764,9 +5779,11 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
}
const uint64_t Version = Record[0];
const bool IsOldProfileFormat = Version == 1;
- if (Version < 1 || Version > 7)
+ if (Version < 1 || Version > ModuleSummaryIndex::BitcodeSummaryVersion)
return error("Invalid summary version " + Twine(Version) +
- ". Version should be in the range [1-7].");
+ ". Version should be in the range [1-" +
+ Twine(ModuleSummaryIndex::BitcodeSummaryVersion) +
+ "].");
Record.clear();
// Keep around the last seen summary to be used when we see an optional
@@ -5817,7 +5834,7 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
case bitc::FS_FLAGS: { // [flags]
uint64_t Flags = Record[0];
// Scan flags.
- assert(Flags <= 0x1f && "Unexpected bits in flag");
+ assert(Flags <= 0x3f && "Unexpected bits in flag");
// 1 bit: WithGlobalValueDeadStripping flag.
// Set on combined index only.
@@ -5840,6 +5857,10 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
// Set on combined index only.
if (Flags & 0x10)
TheIndex.setPartiallySplitLTOUnits();
+ // 1 bit: WithAttributePropagation flag.
+ // Set on combined index only.
+ if (Flags & 0x20)
+ TheIndex.setWithAttributePropagation();
break;
}
case bitc::FS_VALUE_GUID: { // [valueid, refguid]
@@ -5905,11 +5926,6 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
std::move(PendingTypeCheckedLoadVCalls),
std::move(PendingTypeTestAssumeConstVCalls),
std::move(PendingTypeCheckedLoadConstVCalls));
- PendingTypeTests.clear();
- PendingTypeTestAssumeVCalls.clear();
- PendingTypeCheckedLoadVCalls.clear();
- PendingTypeTestAssumeConstVCalls.clear();
- PendingTypeCheckedLoadConstVCalls.clear();
auto VIAndOriginalGUID = getValueInfoFromValueId(ValueID);
FS->setModulePath(getThisModule()->first());
FS->setOriginalName(VIAndOriginalGUID.second);
@@ -6050,11 +6066,6 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
std::move(PendingTypeCheckedLoadVCalls),
std::move(PendingTypeTestAssumeConstVCalls),
std::move(PendingTypeCheckedLoadConstVCalls));
- PendingTypeTests.clear();
- PendingTypeTestAssumeVCalls.clear();
- PendingTypeCheckedLoadVCalls.clear();
- PendingTypeTestAssumeConstVCalls.clear();
- PendingTypeCheckedLoadConstVCalls.clear();
LastSeenSummary = FS.get();
LastSeenGUID = VI.getGUID();
FS->setModulePath(ModuleIdMap[ModuleId]);
@@ -6555,7 +6566,7 @@ static Expected<bool> getEnableSplitLTOUnitFlag(BitstreamCursor &Stream,
case bitc::FS_FLAGS: { // [flags]
uint64_t Flags = Record[0];
// Scan flags.
- assert(Flags <= 0x1f && "Unexpected bits in flag");
+ assert(Flags <= 0x3f && "Unexpected bits in flag");
return Flags & 0x8;
}
diff --git a/contrib/llvm-project/llvm/lib/Bitcode/Reader/MetadataLoader.cpp b/contrib/llvm-project/llvm/lib/Bitcode/Reader/MetadataLoader.cpp
index 4da51dda8b74..d16c3b0ff59d 100644
--- a/contrib/llvm-project/llvm/lib/Bitcode/Reader/MetadataLoader.cpp
+++ b/contrib/llvm-project/llvm/lib/Bitcode/Reader/MetadataLoader.cpp
@@ -411,7 +411,7 @@ void PlaceholderQueue::flush(BitcodeReaderMetadataList &MetadataList) {
}
}
-} // anonynous namespace
+} // anonymous namespace
static Error error(const Twine &Message) {
return make_error<StringError>(
diff --git a/contrib/llvm-project/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/contrib/llvm-project/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index deb4019ea8ba..dcff7c421fc4 100644
--- a/contrib/llvm-project/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/contrib/llvm-project/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -24,9 +24,10 @@
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/Bitcode/BitcodeReader.h"
+#include "llvm/Bitcode/LLVMBitCodes.h"
#include "llvm/Bitstream/BitCodes.h"
#include "llvm/Bitstream/BitstreamWriter.h"
-#include "llvm/Bitcode/LLVMBitCodes.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
@@ -1005,6 +1006,7 @@ static uint64_t getEncodedFFlags(FunctionSummary::FFlags Flags) {
RawFlags |= (Flags.NoRecurse << 2);
RawFlags |= (Flags.ReturnDoesNotAlias << 3);
RawFlags |= (Flags.NoInline << 4);
+ RawFlags |= (Flags.AlwaysInline << 5);
return RawFlags;
}
@@ -3033,6 +3035,10 @@ void ModuleBitcodeWriter::writeInstruction(const Instruction &I,
pushValue(I.getOperand(0), InstID, Vals); // valist.
Vals.push_back(VE.getTypeID(I.getType())); // restype.
break;
+ case Instruction::Freeze:
+ Code = bitc::FUNC_CODE_INST_FREEZE;
+ pushValueAndType(I.getOperand(0), InstID, Vals);
+ break;
}
Stream.EmitRecord(Code, Vals, AbbrevToUse);
@@ -3720,11 +3726,6 @@ void ModuleBitcodeWriterBase::writeModuleLevelReferences(
NameVals.clear();
}
-// Current version for the summary.
-// This is bumped whenever we introduce changes in the way some record are
-// interpreted, like flags for instance.
-static const uint64_t INDEX_VERSION = 7;
-
/// Emit the per-module summary section alongside the rest of
/// the module's bitcode.
void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() {
@@ -3738,7 +3739,9 @@ void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() {
: bitc::FULL_LTO_GLOBALVAL_SUMMARY_BLOCK_ID,
4);
- Stream.EmitRecord(bitc::FS_VERSION, ArrayRef<uint64_t>{INDEX_VERSION});
+ Stream.EmitRecord(
+ bitc::FS_VERSION,
+ ArrayRef<uint64_t>{ModuleSummaryIndex::BitcodeSummaryVersion});
// Write the index flags.
uint64_t Flags = 0;
@@ -3885,7 +3888,9 @@ void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() {
/// Emit the combined summary section into the combined index file.
void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
Stream.EnterSubblock(bitc::GLOBALVAL_SUMMARY_BLOCK_ID, 3);
- Stream.EmitRecord(bitc::FS_VERSION, ArrayRef<uint64_t>{INDEX_VERSION});
+ Stream.EmitRecord(
+ bitc::FS_VERSION,
+ ArrayRef<uint64_t>{ModuleSummaryIndex::BitcodeSummaryVersion});
// Write the index flags.
uint64_t Flags = 0;
@@ -3899,6 +3904,8 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
Flags |= 0x8;
if (Index.partiallySplitLTOUnits())
Flags |= 0x10;
+ if (Index.withAttributePropagation())
+ Flags |= 0x20;
Stream.EmitRecord(bitc::FS_FLAGS, ArrayRef<uint64_t>{Flags});
for (const auto &GVI : valueIds()) {
@@ -4671,3 +4678,125 @@ void llvm::WriteThinLinkBitcodeToFile(const Module &M, raw_ostream &Out,
Out.write((char *)&Buffer.front(), Buffer.size());
}
+
+static const char *getSectionNameForBitcode(const Triple &T) {
+ switch (T.getObjectFormat()) {
+ case Triple::MachO:
+ return "__LLVM,__bitcode";
+ case Triple::COFF:
+ case Triple::ELF:
+ case Triple::Wasm:
+ case Triple::UnknownObjectFormat:
+ return ".llvmbc";
+ case Triple::XCOFF:
+ llvm_unreachable("XCOFF is not yet implemented");
+ break;
+ }
+ llvm_unreachable("Unimplemented ObjectFormatType");
+}
+
+static const char *getSectionNameForCommandline(const Triple &T) {
+ switch (T.getObjectFormat()) {
+ case Triple::MachO:
+ return "__LLVM,__cmdline";
+ case Triple::COFF:
+ case Triple::ELF:
+ case Triple::Wasm:
+ case Triple::UnknownObjectFormat:
+ return ".llvmcmd";
+ case Triple::XCOFF:
+ llvm_unreachable("XCOFF is not yet implemented");
+ break;
+ }
+ llvm_unreachable("Unimplemented ObjectFormatType");
+}
+
+void llvm::EmbedBitcodeInModule(llvm::Module &M, llvm::MemoryBufferRef Buf,
+ bool EmbedBitcode, bool EmbedMarker,
+ const std::vector<uint8_t> *CmdArgs) {
+ // Save llvm.compiler.used and remove it.
+ SmallVector<Constant *, 2> UsedArray;
+ SmallPtrSet<GlobalValue *, 4> UsedGlobals;
+ Type *UsedElementType = Type::getInt8Ty(M.getContext())->getPointerTo(0);
+ GlobalVariable *Used = collectUsedGlobalVariables(M, UsedGlobals, true);
+ for (auto *GV : UsedGlobals) {
+ if (GV->getName() != "llvm.embedded.module" &&
+ GV->getName() != "llvm.cmdline")
+ UsedArray.push_back(
+ ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV, UsedElementType));
+ }
+ if (Used)
+ Used->eraseFromParent();
+
+ // Embed the bitcode for the llvm module.
+ std::string Data;
+ ArrayRef<uint8_t> ModuleData;
+ Triple T(M.getTargetTriple());
+ // Create a constant that contains the bitcode.
+ // In case of embedding a marker, ignore the input Buf and use the empty
+ // ArrayRef. It is also legal to create a bitcode marker even Buf is empty.
+ if (EmbedBitcode) {
+ if (!isBitcode((const unsigned char *)Buf.getBufferStart(),
+ (const unsigned char *)Buf.getBufferEnd())) {
+ // If the input is LLVM Assembly, bitcode is produced by serializing
+ // the module. Use-lists order need to be preserved in this case.
+ llvm::raw_string_ostream OS(Data);
+ llvm::WriteBitcodeToFile(M, OS, /* ShouldPreserveUseListOrder */ true);
+ ModuleData =
+ ArrayRef<uint8_t>((const uint8_t *)OS.str().data(), OS.str().size());
+ } else
+ // If the input is LLVM bitcode, write the input byte stream directly.
+ ModuleData = ArrayRef<uint8_t>((const uint8_t *)Buf.getBufferStart(),
+ Buf.getBufferSize());
+ }
+ llvm::Constant *ModuleConstant =
+ llvm::ConstantDataArray::get(M.getContext(), ModuleData);
+ llvm::GlobalVariable *GV = new llvm::GlobalVariable(
+ M, ModuleConstant->getType(), true, llvm::GlobalValue::PrivateLinkage,
+ ModuleConstant);
+ GV->setSection(getSectionNameForBitcode(T));
+ UsedArray.push_back(
+ ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV, UsedElementType));
+ if (llvm::GlobalVariable *Old =
+ M.getGlobalVariable("llvm.embedded.module", true)) {
+ assert(Old->hasOneUse() &&
+ "llvm.embedded.module can only be used once in llvm.compiler.used");
+ GV->takeName(Old);
+ Old->eraseFromParent();
+ } else {
+ GV->setName("llvm.embedded.module");
+ }
+
+ // Skip if only bitcode needs to be embedded.
+ if (EmbedMarker) {
+ // Embed command-line options.
+ ArrayRef<uint8_t> CmdData(const_cast<uint8_t *>(CmdArgs->data()),
+ CmdArgs->size());
+ llvm::Constant *CmdConstant =
+ llvm::ConstantDataArray::get(M.getContext(), CmdData);
+ GV = new llvm::GlobalVariable(M, CmdConstant->getType(), true,
+ llvm::GlobalValue::PrivateLinkage,
+ CmdConstant);
+ GV->setSection(getSectionNameForCommandline(T));
+ UsedArray.push_back(
+ ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV, UsedElementType));
+ if (llvm::GlobalVariable *Old = M.getGlobalVariable("llvm.cmdline", true)) {
+ assert(Old->hasOneUse() &&
+ "llvm.cmdline can only be used once in llvm.compiler.used");
+ GV->takeName(Old);
+ Old->eraseFromParent();
+ } else {
+ GV->setName("llvm.cmdline");
+ }
+ }
+
+ if (UsedArray.empty())
+ return;
+
+ // Recreate llvm.compiler.used.
+ ArrayType *ATy = ArrayType::get(UsedElementType, UsedArray.size());
+ auto *NewUsed = new GlobalVariable(
+ M, ATy, false, llvm::GlobalValue::AppendingLinkage,
+ llvm::ConstantArray::get(ATy, UsedArray), "llvm.compiler.used");
+ NewUsed->setSection("llvm.metadata");
+}
diff --git a/contrib/llvm-project/llvm/lib/Bitcode/Writer/BitcodeWriterPass.cpp b/contrib/llvm-project/llvm/lib/Bitcode/Writer/BitcodeWriterPass.cpp
index 6796cf8cee54..d884415aafd5 100644
--- a/contrib/llvm-project/llvm/lib/Bitcode/Writer/BitcodeWriterPass.cpp
+++ b/contrib/llvm-project/llvm/lib/Bitcode/Writer/BitcodeWriterPass.cpp
@@ -15,6 +15,7 @@
#include "llvm/Bitcode/BitcodeWriter.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/Bitstream/Reader/BitstreamReader.cpp b/contrib/llvm-project/llvm/lib/Bitstream/Reader/BitstreamReader.cpp
index a4a97ced5457..92d7c91a1d35 100644
--- a/contrib/llvm-project/llvm/lib/Bitstream/Reader/BitstreamReader.cpp
+++ b/contrib/llvm-project/llvm/lib/Bitstream/Reader/BitstreamReader.cpp
@@ -88,37 +88,6 @@ static Expected<uint64_t> readAbbreviatedField(BitstreamCursor &Cursor,
llvm_unreachable("invalid abbreviation encoding");
}
-static Error skipAbbreviatedField(BitstreamCursor &Cursor,
- const BitCodeAbbrevOp &Op) {
- assert(!Op.isLiteral() && "Not to be used with literals!");
-
- // Decode the value as we are commanded.
- switch (Op.getEncoding()) {
- case BitCodeAbbrevOp::Array:
- case BitCodeAbbrevOp::Blob:
- llvm_unreachable("Should not reach here");
- case BitCodeAbbrevOp::Fixed:
- assert((unsigned)Op.getEncodingData() <= Cursor.MaxChunkSize);
- if (Expected<unsigned> Res = Cursor.Read((unsigned)Op.getEncodingData()))
- break;
- else
- return Res.takeError();
- case BitCodeAbbrevOp::VBR:
- assert((unsigned)Op.getEncodingData() <= Cursor.MaxChunkSize);
- if (Expected<uint64_t> Res =
- Cursor.ReadVBR64((unsigned)Op.getEncodingData()))
- break;
- else
- return Res.takeError();
- case BitCodeAbbrevOp::Char6:
- if (Expected<unsigned> Res = Cursor.Read(6))
- break;
- else
- return Res.takeError();
- }
- return ErrorSuccess();
-}
-
/// skipRecord - Read the current record and discard it.
Expected<unsigned> BitstreamCursor::skipRecord(unsigned AbbrevID) {
// Skip unabbreviated records by reading past their entries.
@@ -163,9 +132,10 @@ Expected<unsigned> BitstreamCursor::skipRecord(unsigned AbbrevID) {
if (Op.getEncoding() != BitCodeAbbrevOp::Array &&
Op.getEncoding() != BitCodeAbbrevOp::Blob) {
- if (Error Err = skipAbbreviatedField(*this, Op))
- return std::move(Err);
- continue;
+ if (Expected<uint64_t> MaybeField = readAbbreviatedField(*this, Op))
+ continue;
+ else
+ return MaybeField.takeError();
}
if (Op.getEncoding() == BitCodeAbbrevOp::Array) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/Analysis.cpp b/contrib/llvm-project/llvm/lib/CodeGen/Analysis.cpp
index 4f24f077d120..1632895fe5fa 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/Analysis.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/Analysis.cpp
@@ -262,7 +262,7 @@ static bool isNoopBitcast(Type *T1, Type *T2,
/// Look through operations that will be free to find the earliest source of
/// this value.
///
-/// @param ValLoc If V has aggegate type, we will be interested in a particular
+/// @param ValLoc If V has aggregate type, we will be interested in a particular
/// scalar component. This records its address; the reverse of this list gives a
/// sequence of indices appropriate for an extractvalue to locate the important
/// value. This value is updated during the function and on exit will indicate
@@ -567,12 +567,16 @@ bool llvm::attributesPermitTailCall(const Function *F, const Instruction *I,
AttrBuilder CalleeAttrs(cast<CallInst>(I)->getAttributes(),
AttributeList::ReturnIndex);
- // NoAlias and NonNull are completely benign as far as calling convention
+ // Following attributes are completely benign as far as calling convention
// goes, they shouldn't affect whether the call is a tail call.
CallerAttrs.removeAttribute(Attribute::NoAlias);
CalleeAttrs.removeAttribute(Attribute::NoAlias);
CallerAttrs.removeAttribute(Attribute::NonNull);
CalleeAttrs.removeAttribute(Attribute::NonNull);
+ CallerAttrs.removeAttribute(Attribute::Dereferenceable);
+ CalleeAttrs.removeAttribute(Attribute::Dereferenceable);
+ CallerAttrs.removeAttribute(Attribute::DereferenceableOrNull);
+ CalleeAttrs.removeAttribute(Attribute::DereferenceableOrNull);
if (CallerAttrs.contains(Attribute::ZExt)) {
if (!CalleeAttrs.contains(Attribute::ZExt))
@@ -611,6 +615,22 @@ bool llvm::attributesPermitTailCall(const Function *F, const Instruction *I,
return CallerAttrs == CalleeAttrs;
}
+/// Check whether B is a bitcast of a pointer type to another pointer type,
+/// which is equal to A.
+static bool isPointerBitcastEqualTo(const Value *A, const Value *B) {
+ assert(A && B && "Expected non-null inputs!");
+
+ auto *BitCastIn = dyn_cast<BitCastInst>(B);
+
+ if (!BitCastIn)
+ return false;
+
+ if (!A->getType()->isPointerTy() || !B->getType()->isPointerTy())
+ return false;
+
+ return A == BitCastIn->getOperand(0);
+}
+
bool llvm::returnTypeIsEligibleForTailCall(const Function *F,
const Instruction *I,
const ReturnInst *Ret,
@@ -643,7 +663,8 @@ bool llvm::returnTypeIsEligibleForTailCall(const Function *F,
TLI.getLibcallName(RTLIB::MEMMOVE) == StringRef("memmove")) ||
(IID == Intrinsic::memset &&
TLI.getLibcallName(RTLIB::MEMSET) == StringRef("memset"))) &&
- RetVal == Call->getArgOperand(0))
+ (RetVal == Call->getArgOperand(0) ||
+ isPointerBitcastEqualTo(RetVal, Call->getArgOperand(0))))
return true;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 73c53d6c4af5..6f9aa4dd79fd 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -31,13 +31,16 @@
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/BinaryFormat/COFF.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/CodeGen/GCMetadata.h"
#include "llvm/CodeGen/GCMetadataPrinter.h"
#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -52,6 +55,7 @@
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
+#include "llvm/CodeGen/MachineSizeOpts.h"
#include "llvm/CodeGen/StackMaps.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
@@ -81,7 +85,6 @@
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCCodePadder.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDirectives.h"
#include "llvm/MC/MCDwarf.h"
@@ -139,18 +142,13 @@ static const char *const DbgTimerDescription = "Debug Info Emission";
static const char *const EHTimerName = "write_exception";
static const char *const EHTimerDescription = "DWARF Exception Writer";
static const char *const CFGuardName = "Control Flow Guard";
-static const char *const CFGuardDescription = "Control Flow Guard Tables";
+static const char *const CFGuardDescription = "Control Flow Guard";
static const char *const CodeViewLineTablesGroupName = "linetables";
static const char *const CodeViewLineTablesGroupDescription =
"CodeView Line Tables";
STATISTIC(EmittedInsts, "Number of machine instrs printed");
-static cl::opt<bool> EnableRemarksSection(
- "remarks-section",
- cl::desc("Emit a section containing remark diagnostics metadata"),
- cl::init(false));
-
char AsmPrinter::ID = 0;
using gcp_map_type = DenseMap<GCStrategy *, std::unique_ptr<GCMetadataPrinter>>;
@@ -253,6 +251,8 @@ void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<MachineModuleInfoWrapperPass>();
AU.addRequired<MachineOptimizationRemarkEmitterPass>();
AU.addRequired<GCModuleInfo>();
+ AU.addRequired<LazyMachineBlockFrequencyInfoPass>();
+ AU.addRequired<ProfileSummaryInfoWrapperPass>();
}
bool AsmPrinter::doInitialization(Module &M) {
@@ -381,12 +381,12 @@ bool AsmPrinter::doInitialization(Module &M) {
EHTimerDescription, DWARFGroupName,
DWARFGroupDescription);
+ // Emit tables for any value of cfguard flag (i.e. cfguard=1 or cfguard=2).
if (mdconst::extract_or_null<ConstantInt>(
- MMI->getModule()->getModuleFlag("cfguardtable")))
+ MMI->getModule()->getModuleFlag("cfguard")))
Handlers.emplace_back(std::make_unique<WinCFGuard>(this), CFGuardName,
CFGuardDescription, DWARFGroupName,
DWARFGroupDescription);
-
return false;
}
@@ -879,6 +879,10 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) {
OS << MI->getOperand(0).getImm();
} else if (MI->getOperand(0).isCImm()) {
MI->getOperand(0).getCImm()->getValue().print(OS, false /*isSigned*/);
+ } else if (MI->getOperand(0).isTargetIndex()) {
+ auto Op = MI->getOperand(0);
+ OS << "!target-index(" << Op.getIndex() << "," << Op.getOffset() << ")";
+ return true;
} else {
unsigned Reg;
if (MI->getOperand(0).isReg()) {
@@ -940,7 +944,7 @@ AsmPrinter::CFIMoveType AsmPrinter::needsCFIMoves() const {
MF->getFunction().needsUnwindTableEntry())
return CFI_M_EH;
- if (MMI->hasDebugInfo())
+ if (MMI->hasDebugInfo() || MF->getTarget().Options.ForceDwarfFrameSection)
return CFI_M_Debug;
return CFI_M_None;
@@ -1065,13 +1069,9 @@ void AsmPrinter::EmitFunctionBody() {
++NumInstsInFunction;
}
- // If there is a pre-instruction symbol, emit a label for it here. If the
- // instruction was duplicated and the label has already been emitted,
- // don't re-emit the same label.
- // FIXME: Consider strengthening that to an assertion.
+ // If there is a pre-instruction symbol, emit a label for it here.
if (MCSymbol *S = MI.getPreInstrSymbol())
- if (S->isUndefined())
- OutStreamer->EmitLabel(S);
+ OutStreamer->EmitLabel(S);
if (ShouldPrintDebugScopes) {
for (const HandlerInfo &HI : Handlers) {
@@ -1124,13 +1124,9 @@ void AsmPrinter::EmitFunctionBody() {
break;
}
- // If there is a post-instruction symbol, emit a label for it here. If
- // the instruction was duplicated and the label has already been emitted,
- // don't re-emit the same label.
- // FIXME: Consider strengthening that to an assertion.
+ // If there is a post-instruction symbol, emit a label for it here.
if (MCSymbol *S = MI.getPostInstrSymbol())
- if (S->isUndefined())
- OutStreamer->EmitLabel(S);
+ OutStreamer->EmitLabel(S);
if (ShouldPrintDebugScopes) {
for (const HandlerInfo &HI : Handlers) {
@@ -1226,6 +1222,8 @@ void AsmPrinter::EmitFunctionBody() {
// Emit section containing stack size metadata.
emitStackSizeSection(*MF);
+ emitPatchableFunctionEntries();
+
if (isVerbose())
OutStreamer->GetCommentOS() << "-- End function\n";
@@ -1365,14 +1363,14 @@ void AsmPrinter::emitGlobalIndirectSymbol(Module &M,
}
}
-void AsmPrinter::emitRemarksSection(Module &M) {
- RemarkStreamer *RS = M.getContext().getRemarkStreamer();
- if (!RS)
+void AsmPrinter::emitRemarksSection(RemarkStreamer &RS) {
+ if (!RS.needsSection())
return;
- remarks::RemarkSerializer &RemarkSerializer = RS->getSerializer();
+
+ remarks::RemarkSerializer &RemarkSerializer = RS.getSerializer();
Optional<SmallString<128>> Filename;
- if (Optional<StringRef> FilenameRef = RS->getFilename()) {
+ if (Optional<StringRef> FilenameRef = RS.getFilename()) {
Filename = *FilenameRef;
sys::fs::make_absolute(*Filename);
assert(!Filename->empty() && "The filename can't be empty.");
@@ -1385,7 +1383,7 @@ void AsmPrinter::emitRemarksSection(Module &M) {
: RemarkSerializer.metaSerializer(OS);
MetaSerializer->emit();
- // Switch to the right section: .remarks/__remarks.
+ // Switch to the remarks section.
MCSection *RemarksSection =
OutContext.getObjectFileInfo()->getRemarksSection();
OutStreamer->SwitchSection(RemarksSection);
@@ -1427,8 +1425,8 @@ bool AsmPrinter::doFinalization(Module &M) {
// Emit the remarks section contents.
// FIXME: Figure out when is the safest time to emit this section. It should
// not come after debug info.
- if (EnableRemarksSection)
- emitRemarksSection(M);
+ if (RemarkStreamer *RS = M.getContext().getRemarkStreamer())
+ emitRemarksSection(*RS);
const TargetLoweringObjectFile &TLOF = getObjFileLowering();
@@ -1503,8 +1501,6 @@ bool AsmPrinter::doFinalization(Module &M) {
}
}
- OutStreamer->AddBlankLine();
-
// Print aliases in topological order, that is, for each alias a = b,
// b must be printed before a.
// This is because on some targets (e.g. PowerPC) linker expects aliases in
@@ -1666,6 +1662,7 @@ MCSymbol *AsmPrinter::getCurExceptionSym() {
void AsmPrinter::SetupMachineFunction(MachineFunction &MF) {
this->MF = &MF;
+ const Function &F = MF.getFunction();
// Get the function symbol.
if (MAI->needsFunctionDescriptors()) {
@@ -1678,7 +1675,6 @@ void AsmPrinter::SetupMachineFunction(MachineFunction &MF) {
CurrentFnSym =
OutContext.getOrCreateSymbol("." + CurrentFnDescSym->getName());
- const Function &F = MF.getFunction();
MCSectionXCOFF *FnEntryPointSec =
cast<MCSectionXCOFF>(getObjFileLowering().SectionForGlobal(&F, TM));
// Set the containing csect.
@@ -1691,7 +1687,8 @@ void AsmPrinter::SetupMachineFunction(MachineFunction &MF) {
CurrentFnBegin = nullptr;
CurExceptionSym = nullptr;
bool NeedsLocalForSize = MAI->needsLocalForSize();
- if (needFuncLabelsForEHOrDebugInfo(MF, MMI) || NeedsLocalForSize ||
+ if (F.hasFnAttribute("patchable-function-entry") ||
+ needFuncLabelsForEHOrDebugInfo(MF, MMI) || NeedsLocalForSize ||
MF.getTarget().Options.EmitStackSizeSection) {
CurrentFnBegin = createTempSymbol("func_begin");
if (NeedsLocalForSize)
@@ -1699,6 +1696,13 @@ void AsmPrinter::SetupMachineFunction(MachineFunction &MF) {
}
ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE();
+ PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+ MBFI = (PSI && PSI->hasProfileSummary()) ?
+ // ORE conditionally computes MBFI. If available, use it, otherwise
+ // request it.
+ (ORE->getBFI() ? ORE->getBFI() :
+ &getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI()) :
+ nullptr;
}
namespace {
@@ -1769,6 +1773,11 @@ void AsmPrinter::EmitConstantPool() {
if (!Sym->isUndefined())
continue;
+ if (TM.getTargetTriple().isOSBinFormatXCOFF()) {
+ cast<MCSymbolXCOFF>(Sym)->setContainingCsect(
+ cast<MCSectionXCOFF>(CPSections[i].S));
+ }
+
if (CurSection != CPSections[i].S) {
OutStreamer->SwitchSection(CPSections[i].S);
EmitAlignment(Align(CPSections[i].Alignment));
@@ -1858,10 +1867,16 @@ void AsmPrinter::EmitJumpTableInfo() {
// second label is actually referenced by the code.
if (JTInDiffSection && DL.hasLinkerPrivateGlobalPrefix())
// FIXME: This doesn't have to have any specific name, just any randomly
- // named and numbered 'l' label would work. Simplify GetJTISymbol.
+ // named and numbered local label started with 'l' would work. Simplify
+ // GetJTISymbol.
OutStreamer->EmitLabel(GetJTISymbol(JTI, true));
- OutStreamer->EmitLabel(GetJTISymbol(JTI));
+ MCSymbol* JTISymbol = GetJTISymbol(JTI);
+ if (TM.getTargetTriple().isOSBinFormatXCOFF()) {
+ cast<MCSymbolXCOFF>(JTISymbol)->setContainingCsect(
+ cast<MCSectionXCOFF>(TLOF.getSectionForJumpTable(F, TM)));
+ }
+ OutStreamer->EmitLabel(JTISymbol);
for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii)
EmitJumpTableEntry(MJTI, JTBBs[ii], JTI);
@@ -2914,19 +2929,6 @@ static void emitBasicBlockLoopComments(const MachineBasicBlock &MBB,
PrintChildLoopComment(OS, Loop, AP.getFunctionNumber());
}
-void AsmPrinter::setupCodePaddingContext(const MachineBasicBlock &MBB,
- MCCodePaddingContext &Context) const {
- assert(MF != nullptr && "Machine function must be valid");
- Context.IsPaddingActive = !MF->hasInlineAsm() &&
- !MF->getFunction().hasOptSize() &&
- TM.getOptLevel() != CodeGenOpt::None;
- Context.IsBasicBlockReachableViaFallthrough =
- std::find(MBB.pred_begin(), MBB.pred_end(), MBB.getPrevNode()) !=
- MBB.pred_end();
- Context.IsBasicBlockReachableViaBranch =
- MBB.pred_size() > 0 && !isBlockOnlyReachableByFallthrough(&MBB);
-}
-
/// EmitBasicBlockStart - This method prints the label for the specified
/// MachineBasicBlock, an alignment (if present) and a comment describing
/// it if appropriate.
@@ -2943,9 +2945,6 @@ void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) {
const Align Alignment = MBB.getAlignment();
if (Alignment != Align::None())
EmitAlignment(Alignment);
- MCCodePaddingContext Context;
- setupCodePaddingContext(MBB, Context);
- OutStreamer->EmitCodePaddingBasicBlockStart(Context);
// If the block has its address taken, emit any labels that were used to
// reference the block. It is possible that there is more than one label
@@ -2993,11 +2992,7 @@ void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) {
}
}
-void AsmPrinter::EmitBasicBlockEnd(const MachineBasicBlock &MBB) {
- MCCodePaddingContext Context;
- setupCodePaddingContext(MBB, Context);
- OutStreamer->EmitCodePaddingBasicBlockEnd(Context);
-}
+void AsmPrinter::EmitBasicBlockEnd(const MachineBasicBlock &MBB) {}
void AsmPrinter::EmitVisibility(MCSymbol *Sym, unsigned Visibility,
bool IsDefinition) const {
@@ -3202,6 +3197,41 @@ void AsmPrinter::recordSled(MCSymbol *Sled, const MachineInstr &MI,
AlwaysInstrument, &F, Version});
}
+void AsmPrinter::emitPatchableFunctionEntries() {
+ const Function &F = MF->getFunction();
+ if (!F.hasFnAttribute("patchable-function-entry"))
+ return;
+ const unsigned PointerSize = getPointerSize();
+ if (TM.getTargetTriple().isOSBinFormatELF()) {
+ auto Flags = ELF::SHF_WRITE | ELF::SHF_ALLOC;
+
+ // As of binutils 2.33, GNU as does not support section flag "o" or linkage
+ // field "unique". Use SHF_LINK_ORDER if we are using the integrated
+ // assembler.
+ if (MAI->useIntegratedAssembler()) {
+ Flags |= ELF::SHF_LINK_ORDER;
+ std::string GroupName;
+ if (F.hasComdat()) {
+ Flags |= ELF::SHF_GROUP;
+ GroupName = F.getComdat()->getName();
+ }
+ MCSection *Section = getObjFileLowering().SectionForGlobal(&F, TM);
+ unsigned UniqueID =
+ PatchableFunctionEntryID
+ .try_emplace(Section, PatchableFunctionEntryID.size())
+ .first->second;
+ OutStreamer->SwitchSection(OutContext.getELFSection(
+ "__patchable_function_entries", ELF::SHT_PROGBITS, Flags, 0,
+ GroupName, UniqueID, cast<MCSymbolELF>(CurrentFnSym)));
+ } else {
+ OutStreamer->SwitchSection(OutContext.getELFSection(
+ "__patchable_function_entries", ELF::SHT_PROGBITS, Flags));
+ }
+ EmitAlignment(Align(PointerSize));
+ OutStreamer->EmitSymbolValue(CurrentFnBegin, PointerSize);
+ }
+}
+
uint16_t AsmPrinter::getDwarfVersion() const {
return OutStreamer->getContext().getDwarfVersion();
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
index 420df26a2b8b..c631cc5360b8 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -207,11 +207,17 @@ static void EmitMSInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
}
if (Done) break;
+ bool HasCurlyBraces = false;
+ if (*LastEmitted == '{') { // ${variable}
+ ++LastEmitted; // Consume '{' character.
+ HasCurlyBraces = true;
+ }
+
// If we have ${:foo}, then this is not a real operand reference, it is a
// "magic" string reference, just like in .td files. Arrange to call
// PrintSpecial.
- if (LastEmitted[0] == '{' && LastEmitted[1] == ':') {
- LastEmitted += 2;
+ if (HasCurlyBraces && LastEmitted[0] == ':') {
+ ++LastEmitted;
const char *StrStart = LastEmitted;
const char *StrEnd = strchr(StrStart, '}');
if (!StrEnd)
@@ -238,6 +244,27 @@ static void EmitMSInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
report_fatal_error("Invalid $ operand number in inline asm string: '" +
Twine(AsmStr) + "'");
+ char Modifier[2] = { 0, 0 };
+
+ if (HasCurlyBraces) {
+ // If we have curly braces, check for a modifier character. This
+ // supports syntax like ${0:u}, which correspond to "%u0" in GCC asm.
+ if (*LastEmitted == ':') {
+ ++LastEmitted; // Consume ':' character.
+ if (*LastEmitted == 0)
+ report_fatal_error("Bad ${:} expression in inline asm string: '" +
+ Twine(AsmStr) + "'");
+
+ Modifier[0] = *LastEmitted;
+ ++LastEmitted; // Consume modifier character.
+ }
+
+ if (*LastEmitted != '}')
+ report_fatal_error("Bad ${} expression in inline asm string: '" +
+ Twine(AsmStr) + "'");
+ ++LastEmitted; // Consume '}' character.
+ }
+
// Okay, we finally have a value number. Ask the target to print this
// operand!
unsigned OpNo = InlineAsm::MIOp_FirstOperand;
@@ -262,9 +289,11 @@ static void EmitMSInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
++OpNo; // Skip over the ID number.
if (InlineAsm::isMemKind(OpFlags)) {
- Error = AP->PrintAsmMemoryOperand(MI, OpNo, /*Modifier*/ nullptr, OS);
+ Error = AP->PrintAsmMemoryOperand(
+ MI, OpNo, Modifier[0] ? Modifier : nullptr, OS);
} else {
- Error = AP->PrintAsmOperand(MI, OpNo, /*Modifier*/ nullptr, OS);
+ Error = AP->PrintAsmOperand(MI, OpNo,
+ Modifier[0] ? Modifier : nullptr, OS);
}
}
if (Error) {
@@ -427,26 +456,23 @@ static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
// FIXME: Shouldn't arch-independent output template handling go into
// PrintAsmOperand?
- if (Modifier[0] == 'l') { // Labels are target independent.
- if (MI->getOperand(OpNo).isBlockAddress()) {
- const BlockAddress *BA = MI->getOperand(OpNo).getBlockAddress();
- MCSymbol *Sym = AP->GetBlockAddressSymbol(BA);
- Sym->print(OS, AP->MAI);
- MMI->getContext().registerInlineAsmLabel(Sym);
- } else if (MI->getOperand(OpNo).isMBB()) {
- const MCSymbol *Sym = MI->getOperand(OpNo).getMBB()->getSymbol();
- Sym->print(OS, AP->MAI);
- } else {
- Error = true;
- }
+ // Labels are target independent.
+ if (MI->getOperand(OpNo).isBlockAddress()) {
+ const BlockAddress *BA = MI->getOperand(OpNo).getBlockAddress();
+ MCSymbol *Sym = AP->GetBlockAddressSymbol(BA);
+ Sym->print(OS, AP->MAI);
+ MMI->getContext().registerInlineAsmLabel(Sym);
+ } else if (MI->getOperand(OpNo).isMBB()) {
+ const MCSymbol *Sym = MI->getOperand(OpNo).getMBB()->getSymbol();
+ Sym->print(OS, AP->MAI);
+ } else if (Modifier[0] == 'l') {
+ Error = true;
+ } else if (InlineAsm::isMemKind(OpFlags)) {
+ Error = AP->PrintAsmMemoryOperand(
+ MI, OpNo, Modifier[0] ? Modifier : nullptr, OS);
} else {
- if (InlineAsm::isMemKind(OpFlags)) {
- Error = AP->PrintAsmMemoryOperand(
- MI, OpNo, Modifier[0] ? Modifier : nullptr, OS);
- } else {
- Error = AP->PrintAsmOperand(MI, OpNo,
- Modifier[0] ? Modifier : nullptr, OS);
- }
+ Error = AP->PrintAsmOperand(MI, OpNo,
+ Modifier[0] ? Modifier : nullptr, OS);
}
}
if (Error) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
index c6457f3626d1..62ad356e7f8f 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
@@ -1100,14 +1100,8 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV,
}
for (auto HeapAllocSite : FI.HeapAllocSites) {
- MCSymbol *BeginLabel = std::get<0>(HeapAllocSite);
- MCSymbol *EndLabel = std::get<1>(HeapAllocSite);
-
- // The labels might not be defined if the instruction was replaced
- // somewhere in the codegen pipeline.
- if (!BeginLabel->isDefined() || !EndLabel->isDefined())
- continue;
-
+ const MCSymbol *BeginLabel = std::get<0>(HeapAllocSite);
+ const MCSymbol *EndLabel = std::get<1>(HeapAllocSite);
const DIType *DITy = std::get<2>(HeapAllocSite);
MCSymbol *HeapAllocEnd = beginSymbolRecord(SymbolKind::S_HEAPALLOCSITE);
OS.AddComment("Call site offset");
@@ -1427,6 +1421,16 @@ void CodeViewDebug::beginFunctionImpl(const MachineFunction *MF) {
DebugLoc FnStartDL = PrologEndLoc.getFnDebugLoc();
maybeRecordLocation(FnStartDL, MF);
}
+
+ // Find heap alloc sites and emit labels around them.
+ for (const auto &MBB : *MF) {
+ for (const auto &MI : MBB) {
+ if (MI.getHeapAllocMarker()) {
+ requestLabelBeforeInsn(&MI);
+ requestLabelAfterInsn(&MI);
+ }
+ }
+ }
}
static bool shouldEmitUdt(const DIType *T) {
@@ -2850,8 +2854,18 @@ void CodeViewDebug::endFunctionImpl(const MachineFunction *MF) {
return;
}
+ // Find heap alloc sites and add to list.
+ for (const auto &MBB : *MF) {
+ for (const auto &MI : MBB) {
+ if (MDNode *MD = MI.getHeapAllocMarker()) {
+ CurFn->HeapAllocSites.push_back(std::make_tuple(getLabelBeforeInsn(&MI),
+ getLabelAfterInsn(&MI),
+ dyn_cast<DIType>(MD)));
+ }
+ }
+ }
+
CurFn->Annotations = MF->getCodeViewAnnotations();
- CurFn->HeapAllocSites = MF->getCodeViewHeapAllocSites();
CurFn->End = Asm->getFunctionEnd();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h
index 7ffd77926cf7..b56b9047e1a9 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h
@@ -148,7 +148,7 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
SmallVector<LexicalBlock *, 1> ChildBlocks;
std::vector<std::pair<MCSymbol *, MDNode *>> Annotations;
- std::vector<std::tuple<MCSymbol *, MCSymbol *, const DIType *>>
+ std::vector<std::tuple<const MCSymbol *, const MCSymbol *, const DIType *>>
HeapAllocSites;
const MCSymbol *Begin = nullptr;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIE.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
index f4134da48caa..84b86a71fa5f 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -319,8 +319,10 @@ DIEUnit::DIEUnit(uint16_t V, uint8_t A, dwarf::Tag UnitTag)
{
Die.Owner = this;
assert((UnitTag == dwarf::DW_TAG_compile_unit ||
+ UnitTag == dwarf::DW_TAG_skeleton_unit ||
UnitTag == dwarf::DW_TAG_type_unit ||
- UnitTag == dwarf::DW_TAG_partial_unit) && "expected a unit TAG");
+ UnitTag == dwarf::DW_TAG_partial_unit) &&
+ "expected a unit TAG");
}
void DIEValue::EmitValue(const AsmPrinter *AP) const {
@@ -798,6 +800,8 @@ void DIEBlock::print(raw_ostream &O) const {
//===----------------------------------------------------------------------===//
unsigned DIELocList::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
+ if (Form == dwarf::DW_FORM_loclistx)
+ return getULEB128Size(Index);
if (Form == dwarf::DW_FORM_data4)
return 4;
if (Form == dwarf::DW_FORM_sec_offset)
@@ -808,6 +812,10 @@ unsigned DIELocList::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
/// EmitValue - Emit label value.
///
void DIELocList::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const {
+ if (Form == dwarf::DW_FORM_loclistx) {
+ AP->EmitULEB128(Index);
+ return;
+ }
DwarfDebug *DD = AP->getDwarfDebug();
MCSymbol *Label = DD->getDebugLocs().getList(Index).Label;
AP->emitDwarfSymbolReference(Label, /*ForceOffset*/ DD->useSplitDwarf());
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp
index 7f9d6c618ad3..170fc8b6d49f 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp
@@ -262,7 +262,9 @@ void llvm::calculateDbgEntityHistory(const MachineFunction *MF,
DbgLabels.addInstr(L, MI);
}
- if (MI.isDebugInstr())
+ // Meta Instructions have no output and do not change any values and so
+ // can be safely ignored.
+ if (MI.isMetaInstruction())
continue;
// Not a DBG_VALUE instruction. It may clobber registers which describe
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h
index 17e39b3d3268..36278f2e9e2d 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h
@@ -20,13 +20,33 @@
namespace llvm {
class AsmPrinter;
+/// This struct describes target specific location.
+struct TargetIndexLocation {
+ int Index;
+ int Offset;
+
+ TargetIndexLocation() = default;
+ TargetIndexLocation(unsigned Idx, int64_t Offset)
+ : Index(Idx), Offset(Offset) {}
+
+ bool operator==(const TargetIndexLocation &Other) const {
+ return Index == Other.Index && Offset == Other.Offset;
+ }
+};
+
/// A single location or constant.
class DbgValueLoc {
/// Any complex address location expression for this DbgValueLoc.
const DIExpression *Expression;
/// Type of entry that this represents.
- enum EntryType { E_Location, E_Integer, E_ConstantFP, E_ConstantInt };
+ enum EntryType {
+ E_Location,
+ E_Integer,
+ E_ConstantFP,
+ E_ConstantInt,
+ E_TargetIndexLocation
+ };
enum EntryType EntryKind;
/// Either a constant,
@@ -36,8 +56,12 @@ class DbgValueLoc {
const ConstantInt *CIP;
} Constant;
- /// Or a location in the machine frame.
- MachineLocation Loc;
+ union {
+ /// Or a location in the machine frame.
+ MachineLocation Loc;
+ /// Or a location from target specific location.
+ TargetIndexLocation TIL;
+ };
public:
DbgValueLoc(const DIExpression *Expr, int64_t i)
@@ -56,8 +80,13 @@ public:
: Expression(Expr), EntryKind(E_Location), Loc(Loc) {
assert(cast<DIExpression>(Expr)->isValid());
}
+ DbgValueLoc(const DIExpression *Expr, TargetIndexLocation Loc)
+ : Expression(Expr), EntryKind(E_TargetIndexLocation), TIL(Loc) {}
bool isLocation() const { return EntryKind == E_Location; }
+ bool isTargetIndexLocation() const {
+ return EntryKind == E_TargetIndexLocation;
+ }
bool isInt() const { return EntryKind == E_Integer; }
bool isConstantFP() const { return EntryKind == E_ConstantFP; }
bool isConstantInt() const { return EntryKind == E_ConstantInt; }
@@ -65,6 +94,7 @@ public:
const ConstantFP *getConstantFP() const { return Constant.CFP; }
const ConstantInt *getConstantInt() const { return Constant.CIP; }
MachineLocation getLoc() const { return Loc; }
+ TargetIndexLocation getTargetIndexLocation() const { return TIL; }
bool isFragment() const { return getExpression()->isFragment(); }
bool isEntryVal() const { return getExpression()->isEntryValue(); }
const DIExpression *getExpression() const { return Expression; }
@@ -162,6 +192,8 @@ inline bool operator==(const DbgValueLoc &A,
switch (A.EntryKind) {
case DbgValueLoc::E_Location:
return A.Loc == B.Loc;
+ case DbgValueLoc::E_TargetIndexLocation:
+ return A.TIL == B.TIL;
case DbgValueLoc::E_Integer:
return A.Constant.Int == B.Constant.Int;
case DbgValueLoc::E_ConstantFP:
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.cpp
index f483d532ff07..8c6109880afc 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.cpp
@@ -42,4 +42,6 @@ DebugLocStream::ListBuilder::~ListBuilder() {
return;
V.initializeDbgValue(&MI);
V.setDebugLocListIndex(ListIndex);
+ if (TagOffset)
+ V.setDebugLocListTagOffset(*TagOffset);
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h
index 0db86b09d19a..10019a4720e6 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h
@@ -159,11 +159,17 @@ class DebugLocStream::ListBuilder {
DbgVariable &V;
const MachineInstr &MI;
size_t ListIndex;
+ Optional<uint8_t> TagOffset;
public:
ListBuilder(DebugLocStream &Locs, DwarfCompileUnit &CU, AsmPrinter &Asm,
DbgVariable &V, const MachineInstr &MI)
- : Locs(Locs), Asm(Asm), V(V), MI(MI), ListIndex(Locs.startList(&CU)) {}
+ : Locs(Locs), Asm(Asm), V(V), MI(MI), ListIndex(Locs.startList(&CU)),
+ TagOffset(None) {}
+
+ void setTagOffset(uint8_t TO) {
+ TagOffset = TO;
+ }
/// Finalize the list.
///
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
index 207a7284dafa..facbf22946e4 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
@@ -29,6 +29,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
using namespace llvm;
@@ -133,6 +134,8 @@ void DwarfCFIException::beginFragment(const MachineBasicBlock *MBB,
if (!hasEmittedCFISections) {
if (Asm->needsOnlyDebugCFIMoves())
Asm->OutStreamer->EmitCFISections(false, true);
+ else if (Asm->TM.Options.ForceDwarfFrameSection)
+ Asm->OutStreamer->EmitCFISections(true, true);
hasEmittedCFISections = true;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index a61c98ec1c18..38011102c7b3 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -52,10 +52,23 @@
using namespace llvm;
+static dwarf::Tag GetCompileUnitType(UnitKind Kind, DwarfDebug *DW) {
+
+ // According to DWARF Debugging Information Format Version 5,
+ // 3.1.2 Skeleton Compilation Unit Entries:
+ // "When generating a split DWARF object file (see Section 7.3.2
+ // on page 187), the compilation unit in the .debug_info section
+ // is a "skeleton" compilation unit with the tag DW_TAG_skeleton_unit"
+ if (DW->getDwarfVersion() >= 5 && Kind == UnitKind::Skeleton)
+ return dwarf::DW_TAG_skeleton_unit;
+
+ return dwarf::DW_TAG_compile_unit;
+}
+
DwarfCompileUnit::DwarfCompileUnit(unsigned UID, const DICompileUnit *Node,
AsmPrinter *A, DwarfDebug *DW,
- DwarfFile *DWU)
- : DwarfUnit(dwarf::DW_TAG_compile_unit, Node, A, DW, DWU), UniqueID(UID) {
+ DwarfFile *DWU, UnitKind Kind)
+ : DwarfUnit(GetCompileUnitType(Kind, DW), Node, A, DW, DWU), UniqueID(UID) {
insertDIE(Node, &getUnitDie());
MacroLabelBegin = Asm->createTempSymbol("cu_macro_begin");
}
@@ -65,10 +78,6 @@ DwarfCompileUnit::DwarfCompileUnit(unsigned UID, const DICompileUnit *Node,
void DwarfCompileUnit::addLabelAddress(DIE &Die, dwarf::Attribute Attribute,
const MCSymbol *Label) {
// Don't use the address pool in non-fission or in the skeleton unit itself.
- // FIXME: Once GDB supports this, it's probably worthwhile using the address
- // pool from the skeleton - maybe even in non-fission (possibly fewer
- // relocations by sharing them in the pool, but we have other ideas about how
- // to reduce the number of relocations as well/instead).
if ((!DD->useSplitDwarf() || !Skeleton) && DD->getDwarfVersion() < 5)
return addLocalLabelAddress(Die, Attribute, Label);
@@ -490,10 +499,10 @@ void DwarfCompileUnit::addScopeRangeList(DIE &ScopeDIE,
const MCSymbol *RangeSectionSym =
TLOF.getDwarfRangesSection()->getBeginSymbol();
if (isDwoUnit())
- addSectionDelta(ScopeDIE, dwarf::DW_AT_ranges, List.getSym(),
+ addSectionDelta(ScopeDIE, dwarf::DW_AT_ranges, List.Label,
RangeSectionSym);
else
- addSectionLabel(ScopeDIE, dwarf::DW_AT_ranges, List.getSym(),
+ addSectionLabel(ScopeDIE, dwarf::DW_AT_ranges, List.Label,
RangeSectionSym);
}
}
@@ -602,6 +611,10 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV,
unsigned Offset = DV.getDebugLocListIndex();
if (Offset != ~0U) {
addLocationList(*VariableDie, dwarf::DW_AT_location, Offset);
+ auto TagOffset = DV.getDebugLocListTagOffset();
+ if (TagOffset)
+ addUInt(*VariableDie, dwarf::DW_AT_LLVM_tag_offset, dwarf::DW_FORM_data1,
+ *TagOffset);
return VariableDie;
}
@@ -619,6 +632,10 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV,
DwarfExpr.addUnsignedConstant(DVal->getInt());
DwarfExpr.addExpression(Expr);
addBlock(*VariableDie, dwarf::DW_AT_location, DwarfExpr.finalize());
+ if (DwarfExpr.TagOffset)
+ addUInt(*VariableDie, dwarf::DW_AT_LLVM_tag_offset,
+ dwarf::DW_FORM_data1, *DwarfExpr.TagOffset);
+
} else
addConstantValue(*VariableDie, DVal->getInt(), DV.getType());
} else if (DVal->isConstantFP()) {
@@ -951,8 +968,8 @@ DIE &DwarfCompileUnit::constructCallSiteEntryDIE(
addAddress(CallSiteDIE, getDwarf5OrGNUAttr(dwarf::DW_AT_call_target),
MachineLocation(CallReg));
} else {
- DIE *CalleeDIE = getOrCreateSubprogramDIE(CalleeSP);
- assert(CalleeDIE && "Could not create DIE for call site entry origin");
+ DIE *CalleeDIE = getDIE(CalleeSP);
+ assert(CalleeDIE && "Could not find DIE for call site entry origin");
addDIEEntry(CallSiteDIE, getDwarf5OrGNUAttr(dwarf::DW_AT_call_origin),
*CalleeDIE);
}
@@ -1185,6 +1202,10 @@ void DwarfCompileUnit::addAddress(DIE &Die, dwarf::Attribute Attribute,
// Now attach the location information to the DIE.
addBlock(Die, Attribute, DwarfExpr.finalize());
+
+ if (DwarfExpr.TagOffset)
+ addUInt(Die, dwarf::DW_AT_LLVM_tag_offset, dwarf::DW_FORM_data1,
+ *DwarfExpr.TagOffset);
}
/// Start with the address based on the location provided, and generate the
@@ -1215,13 +1236,20 @@ void DwarfCompileUnit::addComplexAddress(const DbgVariable &DV, DIE &Die,
// Now attach the location information to the DIE.
addBlock(Die, Attribute, DwarfExpr.finalize());
+
+ if (DwarfExpr.TagOffset)
+ addUInt(Die, dwarf::DW_AT_LLVM_tag_offset, dwarf::DW_FORM_data1,
+ *DwarfExpr.TagOffset);
}
/// Add a Dwarf loclistptr attribute data and value.
void DwarfCompileUnit::addLocationList(DIE &Die, dwarf::Attribute Attribute,
unsigned Index) {
- dwarf::Form Form = DD->getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset
- : dwarf::DW_FORM_data4;
+ dwarf::Form Form = dwarf::DW_FORM_data4;
+ if (DD->getDwarfVersion() == 4)
+ Form =dwarf::DW_FORM_sec_offset;
+ if (DD->getDwarfVersion() >= 5)
+ Form =dwarf::DW_FORM_loclistx;
Die.addValue(DIEValueAllocator, Attribute, Form, DIELocList(Index));
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
index 1b7ea2673ac0..8491d078ed89 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
@@ -40,6 +40,8 @@ class MCExpr;
class MCSymbol;
class MDNode;
+enum class UnitKind { Skeleton, Full };
+
class DwarfCompileUnit final : public DwarfUnit {
/// A numeric ID unique among all CUs in the module
unsigned UniqueID;
@@ -104,7 +106,8 @@ class DwarfCompileUnit final : public DwarfUnit {
public:
DwarfCompileUnit(unsigned UID, const DICompileUnit *Node, AsmPrinter *A,
- DwarfDebug *DW, DwarfFile *DWU);
+ DwarfDebug *DW, DwarfFile *DWU,
+ UnitKind Kind = UnitKind::Full);
bool hasRangeLists() const { return HasRangeLists; }
unsigned getUniqueID() const { return UniqueID; }
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index c505e77e5acd..fa6800de7955 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -241,6 +241,11 @@ static DbgValueLoc getDebugLocValue(const MachineInstr *MI) {
MachineLocation MLoc(RegOp.getReg(), Op1.isImm());
return DbgValueLoc(Expr, MLoc);
}
+ if (MI->getOperand(0).isTargetIndex()) {
+ auto Op = MI->getOperand(0);
+ return DbgValueLoc(Expr,
+ TargetIndexLocation(Op.getIndex(), Op.getOffset()));
+ }
if (MI->getOperand(0).isImm())
return DbgValueLoc(Expr, MI->getOperand(0).getImm());
if (MI->getOperand(0).isFPImm())
@@ -535,6 +540,14 @@ void DwarfDebug::constructAbstractSubprogramScopeDIE(DwarfCompileUnit &SrcCU,
}
}
+DIE &DwarfDebug::constructSubprogramDefinitionDIE(const DISubprogram *SP) {
+ DICompileUnit *Unit = SP->getUnit();
+ assert(SP->isDefinition() && "Subprogram not a definition");
+ assert(Unit && "Subprogram definition without parent unit");
+ auto &CU = getOrCreateDwarfCompileUnit(Unit);
+ return *CU.getOrCreateSubprogramDIE(SP);
+}
+
/// Try to interpret values loaded into registers that forward parameters
/// for \p CallMI. Store parameters with interpreted value into \p Params.
static void collectCallSiteParameters(const MachineInstr *CallMI,
@@ -595,7 +608,6 @@ static void collectCallSiteParameters(const MachineInstr *CallMI,
Implicit.push_back(FwdReg);
else
Explicit.push_back(FwdReg);
- break;
}
}
}
@@ -615,8 +627,12 @@ static void collectCallSiteParameters(const MachineInstr *CallMI,
++NumCSParams;
};
- // Search for a loading value in forwaring registers.
+ // Search for a loading value in forwarding registers.
for (; I != MBB->rend(); ++I) {
+ // Skip bundle headers.
+ if (I->isBundle())
+ continue;
+
// If the next instruction is a call we can not interpret parameter's
// forwarding registers or we finished the interpretation of all parameters.
if (I->isCall())
@@ -636,32 +652,33 @@ static void collectCallSiteParameters(const MachineInstr *CallMI,
for (auto Reg : concat<unsigned>(ExplicitFwdRegDefs, ImplicitFwdRegDefs))
ForwardedRegWorklist.erase(Reg);
- // The describeLoadedValue() hook currently does not have any information
- // about which register it should describe in case of multiple defines, so
- // for now we only handle instructions where a forwarded register is (at
- // least partially) defined by the instruction's single explicit define.
- if (I->getNumExplicitDefs() != 1 || ExplicitFwdRegDefs.empty())
- continue;
- unsigned Reg = ExplicitFwdRegDefs[0];
-
- if (auto ParamValue = TII->describeLoadedValue(*I)) {
- if (ParamValue->first.isImm()) {
- int64_t Val = ParamValue->first.getImm();
- DbgValueLoc DbgLocVal(ParamValue->second, Val);
- finishCallSiteParam(DbgLocVal, Reg);
- } else if (ParamValue->first.isReg()) {
- Register RegLoc = ParamValue->first.getReg();
- unsigned SP = TLI->getStackPointerRegisterToSaveRestore();
- Register FP = TRI->getFrameRegister(*MF);
- bool IsSPorFP = (RegLoc == SP) || (RegLoc == FP);
- if (TRI->isCalleeSavedPhysReg(RegLoc, *MF) || IsSPorFP) {
- DbgValueLoc DbgLocVal(ParamValue->second,
- MachineLocation(RegLoc,
- /*IsIndirect=*/IsSPorFP));
- finishCallSiteParam(DbgLocVal, Reg);
- } else if (ShouldTryEmitEntryVals) {
- ForwardedRegWorklist.insert(RegLoc);
- RegsForEntryValues[RegLoc] = Reg;
+ for (auto ParamFwdReg : ExplicitFwdRegDefs) {
+ if (auto ParamValue = TII->describeLoadedValue(*I, ParamFwdReg)) {
+ if (ParamValue->first.isImm()) {
+ int64_t Val = ParamValue->first.getImm();
+ DbgValueLoc DbgLocVal(ParamValue->second, Val);
+ finishCallSiteParam(DbgLocVal, ParamFwdReg);
+ } else if (ParamValue->first.isReg()) {
+ Register RegLoc = ParamValue->first.getReg();
+ // TODO: For now, there is no use of describing the value loaded into the
+ // register that is also the source registers (e.g. $r0 = add $r0, x).
+ if (ParamFwdReg == RegLoc)
+ continue;
+
+ unsigned SP = TLI->getStackPointerRegisterToSaveRestore();
+ Register FP = TRI->getFrameRegister(*MF);
+ bool IsSPorFP = (RegLoc == SP) || (RegLoc == FP);
+ if (TRI->isCalleeSavedPhysReg(RegLoc, *MF) || IsSPorFP) {
+ DbgValueLoc DbgLocVal(ParamValue->second,
+ MachineLocation(RegLoc,
+ /*IsIndirect=*/IsSPorFP));
+ finishCallSiteParam(DbgLocVal, ParamFwdReg);
+ // TODO: Add support for entry value plus an expression.
+ } else if (ShouldTryEmitEntryVals &&
+ ParamValue->second->getNumElements() == 0) {
+ ForwardedRegWorklist.insert(RegLoc);
+ RegsForEntryValues[RegLoc] = ParamFwdReg;
+ }
}
}
}
@@ -707,6 +724,12 @@ void DwarfDebug::constructCallSiteEntryDIEs(const DISubprogram &SP,
// Emit call site entries for each call or tail call in the function.
for (const MachineBasicBlock &MBB : MF) {
for (const MachineInstr &MI : MBB.instrs()) {
+ // Bundles with call in them will pass the isCall() test below but do not
+ // have callee operand information so skip them here. Iterator will
+ // eventually reach the call MI.
+ if (MI.isBundle())
+ continue;
+
// Skip instructions which aren't calls. Both calls and tail-calling jump
// instructions (e.g TAILJMPd64) are classified correctly here.
if (!MI.isCall())
@@ -735,25 +758,45 @@ void DwarfDebug::constructCallSiteEntryDIEs(const DISubprogram &SP,
if (!CalleeDecl || !CalleeDecl->getSubprogram())
continue;
CalleeSP = CalleeDecl->getSubprogram();
+
+ if (CalleeSP->isDefinition()) {
+ // Ensure that a subprogram DIE for the callee is available in the
+ // appropriate CU.
+ constructSubprogramDefinitionDIE(CalleeSP);
+ } else {
+ // Create the declaration DIE if it is missing. This is required to
+ // support compilation of old bitcode with an incomplete list of
+ // retained metadata.
+ CU.getOrCreateSubprogramDIE(CalleeSP);
+ }
}
// TODO: Omit call site entries for runtime calls (objc_msgSend, etc).
bool IsTail = TII->isTailCall(MI);
+ // If MI is in a bundle, the label was created after the bundle since
+ // EmitFunctionBody iterates over top-level MIs. Get that top-level MI
+ // to search for that label below.
+ const MachineInstr *TopLevelCallMI =
+ MI.isInsideBundle() ? &*getBundleStart(MI.getIterator()) : &MI;
+
// For tail calls, for non-gdb tuning, no return PC information is needed.
// For regular calls (and tail calls in GDB tuning), the return PC
// is needed to disambiguate paths in the call graph which could lead to
// some target function.
const MCExpr *PCOffset =
- (IsTail && !tuneForGDB()) ? nullptr
- : getFunctionLocalOffsetAfterInsn(&MI);
+ (IsTail && !tuneForGDB())
+ ? nullptr
+ : getFunctionLocalOffsetAfterInsn(TopLevelCallMI);
- // Address of a call-like instruction for a normal call or a jump-like
- // instruction for a tail call. This is needed for GDB + DWARF 4 tuning.
+ // Return address of a call-like instruction for a normal call or a
+ // jump-like instruction for a tail call. This is needed for
+ // GDB + DWARF 4 tuning.
const MCSymbol *PCAddr =
- ApplyGNUExtensions ? const_cast<MCSymbol*>(getLabelAfterInsn(&MI))
- : nullptr;
+ ApplyGNUExtensions
+ ? const_cast<MCSymbol *>(getLabelAfterInsn(TopLevelCallMI))
+ : nullptr;
assert((IsTail || PCOffset || PCAddr) &&
"Call without return PC information");
@@ -837,10 +880,13 @@ void DwarfDebug::finishUnitAttributes(const DICompileUnit *DIUnit,
// This CU is either a clang module DWO or a skeleton CU.
NewCU.addUInt(Die, dwarf::DW_AT_GNU_dwo_id, dwarf::DW_FORM_data8,
DIUnit->getDWOId());
- if (!DIUnit->getSplitDebugFilename().empty())
+ if (!DIUnit->getSplitDebugFilename().empty()) {
// This is a prefabricated skeleton CU.
- NewCU.addString(Die, dwarf::DW_AT_GNU_dwo_name,
- DIUnit->getSplitDebugFilename());
+ dwarf::Attribute attrDWOName = getDwarfVersion() >= 5
+ ? dwarf::DW_AT_dwo_name
+ : dwarf::DW_AT_GNU_dwo_name;
+ NewCU.addString(Die, attrDWOName, DIUnit->getSplitDebugFilename());
+ }
}
}
// Create new DwarfCompileUnit for the given metadata node with tag
@@ -878,11 +924,6 @@ DwarfDebug::getOrCreateDwarfCompileUnit(const DICompileUnit *DIUnit) {
NewCU.setSection(Asm->getObjFileLowering().getDwarfInfoSection());
}
- // Create DIEs for function declarations used for call site debug info.
- for (auto Scope : DIUnit->getRetainedTypes())
- if (auto *SP = dyn_cast_or_null<DISubprogram>(Scope))
- NewCU.getOrCreateSubprogramDIE(SP);
-
CUMap.insert({DIUnit, &NewCU});
CUDieMap.insert({&NewCU.getUnitDie(), &NewCU});
return NewCU;
@@ -974,6 +1015,7 @@ void DwarfDebug::beginModule() {
// Create the symbol that points to the first entry following the debug
// address table (.debug_addr) header.
AddrPool.setLabel(Asm->createTempSymbol("addr_table_base"));
+ DebugLocs.setSym(Asm->createTempSymbol("loclists_table_base"));
for (DICompileUnit *CUNode : M->debug_compile_units()) {
// FIXME: Move local imported entities into a list attached to the
@@ -1077,11 +1119,17 @@ void DwarfDebug::finalizeModuleInfo() {
// If we're splitting the dwarf out now that we've got the entire
// CU then add the dwo id to it.
auto *SkCU = TheCU.getSkeleton();
- if (useSplitDwarf() && !TheCU.getUnitDie().children().empty()) {
+
+ bool HasSplitUnit = SkCU && !TheCU.getUnitDie().children().empty();
+
+ if (HasSplitUnit) {
+ dwarf::Attribute attrDWOName = getDwarfVersion() >= 5
+ ? dwarf::DW_AT_dwo_name
+ : dwarf::DW_AT_GNU_dwo_name;
finishUnitAttributes(TheCU.getCUNode(), TheCU);
- TheCU.addString(TheCU.getUnitDie(), dwarf::DW_AT_GNU_dwo_name,
+ TheCU.addString(TheCU.getUnitDie(), attrDWOName,
Asm->TM.Options.MCOptions.SplitDwarfFile);
- SkCU->addString(SkCU->getUnitDie(), dwarf::DW_AT_GNU_dwo_name,
+ SkCU->addString(SkCU->getUnitDie(), attrDWOName,
Asm->TM.Options.MCOptions.SplitDwarfFile);
// Emit a unique identifier for this CU.
uint64_t ID =
@@ -1127,29 +1175,34 @@ void DwarfDebug::finalizeModuleInfo() {
// We don't keep track of which addresses are used in which CU so this
// is a bit pessimistic under LTO.
- if (!AddrPool.isEmpty() &&
- (getDwarfVersion() >= 5 ||
- (SkCU && !TheCU.getUnitDie().children().empty())))
+ if ((!AddrPool.isEmpty() || TheCU.hasRangeLists()) &&
+ (getDwarfVersion() >= 5 || HasSplitUnit))
U.addAddrTableBase();
if (getDwarfVersion() >= 5) {
if (U.hasRangeLists())
U.addRnglistsBase();
- if (!DebugLocs.getLists().empty() && !useSplitDwarf()) {
- DebugLocs.setSym(Asm->createTempSymbol("loclists_table_base"));
- U.addSectionLabel(U.getUnitDie(), dwarf::DW_AT_loclists_base,
- DebugLocs.getSym(),
- TLOF.getDwarfLoclistsSection()->getBeginSymbol());
+ if (!DebugLocs.getLists().empty()) {
+ if (!useSplitDwarf())
+ U.addSectionLabel(U.getUnitDie(), dwarf::DW_AT_loclists_base,
+ DebugLocs.getSym(),
+ TLOF.getDwarfLoclistsSection()->getBeginSymbol());
}
}
auto *CUNode = cast<DICompileUnit>(P.first);
// If compile Unit has macros, emit "DW_AT_macro_info" attribute.
- if (CUNode->getMacros())
- U.addSectionLabel(U.getUnitDie(), dwarf::DW_AT_macro_info,
- U.getMacroLabelBegin(),
- TLOF.getDwarfMacinfoSection()->getBeginSymbol());
+ if (CUNode->getMacros()) {
+ if (useSplitDwarf())
+ TheCU.addSectionDelta(TheCU.getUnitDie(), dwarf::DW_AT_macro_info,
+ U.getMacroLabelBegin(),
+ TLOF.getDwarfMacinfoDWOSection()->getBeginSymbol());
+ else
+ U.addSectionLabel(U.getUnitDie(), dwarf::DW_AT_macro_info,
+ U.getMacroLabelBegin(),
+ TLOF.getDwarfMacinfoSection()->getBeginSymbol());
+ }
}
// Emit all frontend-produced Skeleton CUs, i.e., Clang modules.
@@ -1185,9 +1238,10 @@ void DwarfDebug::endModule() {
emitDebugStr();
if (useSplitDwarf())
+ // Emit debug_loc.dwo/debug_loclists.dwo section.
emitDebugLocDWO();
else
- // Emit info into a debug loc section.
+ // Emit debug_loc/debug_loclists section.
emitDebugLoc();
// Corresponding abbreviations into a abbrev section.
@@ -1203,8 +1257,12 @@ void DwarfDebug::endModule() {
// Emit info into a debug ranges section.
emitDebugRanges();
+ if (useSplitDwarf())
+ // Emit info into a debug macinfo.dwo section.
+ emitDebugMacinfoDWO();
+ else
// Emit info into a debug macinfo section.
- emitDebugMacinfo();
+ emitDebugMacinfo();
if (useSplitDwarf()) {
emitDebugStrDWO();
@@ -2208,6 +2266,11 @@ void DwarfDebug::emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT,
if (!DwarfExpr.addMachineRegExpression(TRI, Cursor, Location.getReg()))
return;
return DwarfExpr.addExpression(std::move(Cursor));
+ } else if (Value.isTargetIndexLocation()) {
+ TargetIndexLocation Loc = Value.getTargetIndexLocation();
+ // TODO TargetIndexLocation is a target-independent. Currently only the WebAssembly-specific
+ // encoding is supported.
+ DwarfExpr.addWasmLocation(Loc.Index, Loc.Offset);
} else if (Value.isConstantFP()) {
APInt RawBytes = Value.getConstantFP()->getValueAPF().bitcastToAPInt();
DwarfExpr.addUnsignedConstant(RawBytes);
@@ -2242,6 +2305,8 @@ void DebugLocEntry::finalize(const AsmPrinter &AP,
DwarfDebug::emitDebugLocValue(AP, BT, Value, DwarfExpr);
}
DwarfExpr.finalize();
+ if (DwarfExpr.TagOffset)
+ List.setTagOffset(*DwarfExpr.TagOffset);
}
void DwarfDebug::emitDebugLocEntryLocation(const DebugLocStream::Entry &Entry,
@@ -2296,7 +2361,7 @@ static MCSymbol *emitRnglistsTableHeader(AsmPrinter *Asm,
Asm->OutStreamer->EmitLabel(Holder.getRnglistsTableBaseSym());
for (const RangeSpanList &List : Holder.getRangeLists())
- Asm->EmitLabelDifference(List.getSym(), Holder.getRnglistsTableBaseSym(),
+ Asm->EmitLabelDifference(List.Label, Holder.getRnglistsTableBaseSym(),
4);
return TableEnd;
@@ -2313,12 +2378,13 @@ static MCSymbol *emitLoclistsTableHeader(AsmPrinter *Asm,
const auto &DebugLocs = DD.getDebugLocs();
- // FIXME: Generate the offsets table and use DW_FORM_loclistx with the
- // DW_AT_loclists_base attribute. Until then set the number of offsets to 0.
Asm->OutStreamer->AddComment("Offset entry count");
- Asm->emitInt32(0);
+ Asm->emitInt32(DebugLocs.getLists().size());
Asm->OutStreamer->EmitLabel(DebugLocs.getSym());
+ for (const auto &List : DebugLocs.getLists())
+ Asm->EmitLabelDifference(List.Label, DebugLocs.getSym(), 4);
+
return TableEnd;
}
@@ -2418,32 +2484,27 @@ static void emitRangeList(
}
}
+// Handles emission of both debug_loclist / debug_loclist.dwo
static void emitLocList(DwarfDebug &DD, AsmPrinter *Asm, const DebugLocStream::List &List) {
- emitRangeList(
- DD, Asm, List.Label, DD.getDebugLocs().getEntries(List), *List.CU,
- dwarf::DW_LLE_base_addressx, dwarf::DW_LLE_offset_pair,
- dwarf::DW_LLE_startx_length, dwarf::DW_LLE_end_of_list,
- llvm::dwarf::LocListEncodingString,
- /* ShouldUseBaseAddress */ true,
- [&](const DebugLocStream::Entry &E) {
- DD.emitDebugLocEntryLocation(E, List.CU);
- });
+ emitRangeList(DD, Asm, List.Label, DD.getDebugLocs().getEntries(List),
+ *List.CU, dwarf::DW_LLE_base_addressx,
+ dwarf::DW_LLE_offset_pair, dwarf::DW_LLE_startx_length,
+ dwarf::DW_LLE_end_of_list, llvm::dwarf::LocListEncodingString,
+ /* ShouldUseBaseAddress */ true,
+ [&](const DebugLocStream::Entry &E) {
+ DD.emitDebugLocEntryLocation(E, List.CU);
+ });
}
-// Emit locations into the .debug_loc/.debug_rnglists section.
-void DwarfDebug::emitDebugLoc() {
+void DwarfDebug::emitDebugLocImpl(MCSection *Sec) {
if (DebugLocs.getLists().empty())
return;
+ Asm->OutStreamer->SwitchSection(Sec);
+
MCSymbol *TableEnd = nullptr;
- if (getDwarfVersion() >= 5) {
- Asm->OutStreamer->SwitchSection(
- Asm->getObjFileLowering().getDwarfLoclistsSection());
+ if (getDwarfVersion() >= 5)
TableEnd = emitLoclistsTableHeader(Asm, *this);
- } else {
- Asm->OutStreamer->SwitchSection(
- Asm->getObjFileLowering().getDwarfLocSection());
- }
for (const auto &List : DebugLocs.getLists())
emitLocList(*this, Asm, List);
@@ -2452,11 +2513,28 @@ void DwarfDebug::emitDebugLoc() {
Asm->OutStreamer->EmitLabel(TableEnd);
}
+// Emit locations into the .debug_loc/.debug_loclists section.
+void DwarfDebug::emitDebugLoc() {
+ emitDebugLocImpl(
+ getDwarfVersion() >= 5
+ ? Asm->getObjFileLowering().getDwarfLoclistsSection()
+ : Asm->getObjFileLowering().getDwarfLocSection());
+}
+
+// Emit locations into the .debug_loc.dwo/.debug_loclists.dwo section.
void DwarfDebug::emitDebugLocDWO() {
+ if (getDwarfVersion() >= 5) {
+ emitDebugLocImpl(
+ Asm->getObjFileLowering().getDwarfLoclistsDWOSection());
+
+ return;
+ }
+
for (const auto &List : DebugLocs.getLists()) {
Asm->OutStreamer->SwitchSection(
Asm->getObjFileLowering().getDwarfLocDWOSection());
Asm->OutStreamer->EmitLabel(List.Label);
+
for (const auto &Entry : DebugLocs.getEntries(List)) {
// GDB only supports startx_length in pre-standard split-DWARF.
// (in v5 standard loclists, it currently* /only/ supports base_address +
@@ -2468,8 +2546,9 @@ void DwarfDebug::emitDebugLocDWO() {
Asm->emitInt8(dwarf::DW_LLE_startx_length);
unsigned idx = AddrPool.getIndex(Entry.Begin);
Asm->EmitULEB128(idx);
+ // Also the pre-standard encoding is slightly different, emitting this as
+ // an address-length entry here, but its a ULEB128 in DWARFv5 loclists.
Asm->EmitLabelDifference(Entry.End, Entry.Begin, 4);
-
emitDebugLocEntryLocation(Entry, List.CU);
}
Asm->emitInt8(dwarf::DW_LLE_end_of_list);
@@ -2639,19 +2718,33 @@ void DwarfDebug::emitDebugARanges() {
/// Emit a single range list. We handle both DWARF v5 and earlier.
static void emitRangeList(DwarfDebug &DD, AsmPrinter *Asm,
const RangeSpanList &List) {
- emitRangeList(DD, Asm, List.getSym(), List.getRanges(), List.getCU(),
+ emitRangeList(DD, Asm, List.Label, List.Ranges, *List.CU,
dwarf::DW_RLE_base_addressx, dwarf::DW_RLE_offset_pair,
dwarf::DW_RLE_startx_length, dwarf::DW_RLE_end_of_list,
llvm::dwarf::RangeListEncodingString,
- List.getCU().getCUNode()->getRangesBaseAddress() ||
+ List.CU->getCUNode()->getRangesBaseAddress() ||
DD.getDwarfVersion() >= 5,
[](auto) {});
}
-static void emitDebugRangesImpl(DwarfDebug &DD, AsmPrinter *Asm,
- const DwarfFile &Holder, MCSymbol *TableEnd) {
+void DwarfDebug::emitDebugRangesImpl(const DwarfFile &Holder, MCSection *Section) {
+ if (Holder.getRangeLists().empty())
+ return;
+
+ assert(useRangesSection());
+ assert(!CUMap.empty());
+ assert(llvm::any_of(CUMap, [](const decltype(CUMap)::value_type &Pair) {
+ return !Pair.second->getCUNode()->isDebugDirectivesOnly();
+ }));
+
+ Asm->OutStreamer->SwitchSection(Section);
+
+ MCSymbol *TableEnd = nullptr;
+ if (getDwarfVersion() >= 5)
+ TableEnd = emitRnglistsTableHeader(Asm, Holder);
+
for (const RangeSpanList &List : Holder.getRangeLists())
- emitRangeList(DD, Asm, List);
+ emitRangeList(*this, Asm, List);
if (TableEnd)
Asm->OutStreamer->EmitLabel(TableEnd);
@@ -2660,55 +2753,17 @@ static void emitDebugRangesImpl(DwarfDebug &DD, AsmPrinter *Asm,
/// Emit address ranges into the .debug_ranges section or into the DWARF v5
/// .debug_rnglists section.
void DwarfDebug::emitDebugRanges() {
- if (CUMap.empty())
- return;
-
const auto &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder;
- if (Holder.getRangeLists().empty())
- return;
-
- assert(useRangesSection());
- assert(llvm::none_of(CUMap, [](const decltype(CUMap)::value_type &Pair) {
- return Pair.second->getCUNode()->isDebugDirectivesOnly();
- }));
-
- // Start the dwarf ranges section.
- MCSymbol *TableEnd = nullptr;
- if (getDwarfVersion() >= 5) {
- Asm->OutStreamer->SwitchSection(
- Asm->getObjFileLowering().getDwarfRnglistsSection());
- TableEnd = emitRnglistsTableHeader(Asm, Holder);
- } else
- Asm->OutStreamer->SwitchSection(
- Asm->getObjFileLowering().getDwarfRangesSection());
-
- emitDebugRangesImpl(*this, Asm, Holder, TableEnd);
+ emitDebugRangesImpl(Holder,
+ getDwarfVersion() >= 5
+ ? Asm->getObjFileLowering().getDwarfRnglistsSection()
+ : Asm->getObjFileLowering().getDwarfRangesSection());
}
void DwarfDebug::emitDebugRangesDWO() {
- assert(useSplitDwarf());
-
- if (CUMap.empty())
- return;
-
- const auto &Holder = InfoHolder;
-
- if (Holder.getRangeLists().empty())
- return;
-
- assert(getDwarfVersion() >= 5);
- assert(useRangesSection());
- assert(llvm::none_of(CUMap, [](const decltype(CUMap)::value_type &Pair) {
- return Pair.second->getCUNode()->isDebugDirectivesOnly();
- }));
-
- // Start the dwarf ranges section.
- Asm->OutStreamer->SwitchSection(
- Asm->getObjFileLowering().getDwarfRnglistsDWOSection());
- MCSymbol *TableEnd = emitRnglistsTableHeader(Asm, Holder);
-
- emitDebugRangesImpl(*this, Asm, Holder, TableEnd);
+ emitDebugRangesImpl(InfoHolder,
+ Asm->getObjFileLowering().getDwarfRnglistsDWOSection());
}
void DwarfDebug::handleMacroNodes(DIMacroNodeArray Nodes, DwarfCompileUnit &U) {
@@ -2745,35 +2800,30 @@ void DwarfDebug::emitMacroFile(DIMacroFile &F, DwarfCompileUnit &U) {
Asm->EmitULEB128(dwarf::DW_MACINFO_end_file);
}
-/// Emit macros into a debug macinfo section.
-void DwarfDebug::emitDebugMacinfo() {
- if (CUMap.empty())
- return;
-
- if (llvm::all_of(CUMap, [](const decltype(CUMap)::value_type &Pair) {
- return Pair.second->getCUNode()->isDebugDirectivesOnly();
- }))
- return;
-
- // Start the dwarf macinfo section.
- Asm->OutStreamer->SwitchSection(
- Asm->getObjFileLowering().getDwarfMacinfoSection());
-
+void DwarfDebug::emitDebugMacinfoImpl(MCSection *Section) {
for (const auto &P : CUMap) {
auto &TheCU = *P.second;
- if (TheCU.getCUNode()->isDebugDirectivesOnly())
- continue;
auto *SkCU = TheCU.getSkeleton();
DwarfCompileUnit &U = SkCU ? *SkCU : TheCU;
auto *CUNode = cast<DICompileUnit>(P.first);
DIMacroNodeArray Macros = CUNode->getMacros();
- if (!Macros.empty()) {
- Asm->OutStreamer->EmitLabel(U.getMacroLabelBegin());
- handleMacroNodes(Macros, U);
- }
+ if (Macros.empty())
+ continue;
+ Asm->OutStreamer->SwitchSection(Section);
+ Asm->OutStreamer->EmitLabel(U.getMacroLabelBegin());
+ handleMacroNodes(Macros, U);
+ Asm->OutStreamer->AddComment("End Of Macro List Mark");
+ Asm->emitInt8(0);
}
- Asm->OutStreamer->AddComment("End Of Macro List Mark");
- Asm->emitInt8(0);
+}
+
+/// Emit macros into a debug macinfo section.
+void DwarfDebug::emitDebugMacinfo() {
+ emitDebugMacinfoImpl(Asm->getObjFileLowering().getDwarfMacinfoSection());
+}
+
+void DwarfDebug::emitDebugMacinfoDWO() {
+ emitDebugMacinfoImpl(Asm->getObjFileLowering().getDwarfMacinfoDWOSection());
}
// DWARF5 Experimental Separate Dwarf emitters.
@@ -2792,7 +2842,8 @@ void DwarfDebug::initSkeletonUnit(const DwarfUnit &U, DIE &Die,
DwarfCompileUnit &DwarfDebug::constructSkeletonCU(const DwarfCompileUnit &CU) {
auto OwnedUnit = std::make_unique<DwarfCompileUnit>(
- CU.getUniqueID(), CU.getCUNode(), Asm, this, &SkeletonHolder);
+ CU.getUniqueID(), CU.getCUNode(), Asm, this, &SkeletonHolder,
+ UnitKind::Skeleton);
DwarfCompileUnit &NewCU = *OwnedUnit;
NewCU.setSection(Asm->getObjFileLowering().getDwarfInfoSection());
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
index c8c511f67c2a..fd82b1f98055 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -118,6 +118,9 @@ public:
class DbgVariable : public DbgEntity {
/// Offset in DebugLocs.
unsigned DebugLocListIndex = ~0u;
+ /// DW_OP_LLVM_tag_offset value from DebugLocs.
+ Optional<uint8_t> DebugLocListTagOffset;
+
/// Single value location description.
std::unique_ptr<DbgValueLoc> ValueLoc = nullptr;
@@ -174,6 +177,8 @@ public:
void setDebugLocListIndex(unsigned O) { DebugLocListIndex = O; }
unsigned getDebugLocListIndex() const { return DebugLocListIndex; }
+ void setDebugLocListTagOffset(uint8_t O) { DebugLocListTagOffset = O; }
+ Optional<uint8_t> getDebugLocListTagOffset() const { return DebugLocListTagOffset; }
StringRef getName() const { return getVariable()->getName(); }
const DbgValueLoc *getValueLoc() const { return ValueLoc.get(); }
/// Get the FI entries, sorted by fragment offset.
@@ -437,6 +442,9 @@ class DwarfDebug : public DebugHandlerBase {
/// Construct a DIE for this abstract scope.
void constructAbstractSubprogramScopeDIE(DwarfCompileUnit &SrcCU, LexicalScope *Scope);
+ /// Construct a DIE for the subprogram definition \p SP and return it.
+ DIE &constructSubprogramDefinitionDIE(const DISubprogram *SP);
+
/// Construct DIEs for call site entries describing the calls in \p MF.
void constructCallSiteEntryDIEs(const DISubprogram &SP, DwarfCompileUnit &CU,
DIE &ScopeDIE, const MachineFunction &MF);
@@ -498,15 +506,21 @@ class DwarfDebug : public DebugHandlerBase {
/// Emit variable locations into a debug loc dwo section.
void emitDebugLocDWO();
+ void emitDebugLocImpl(MCSection *Sec);
+
/// Emit address ranges into a debug aranges section.
void emitDebugARanges();
/// Emit address ranges into a debug ranges section.
void emitDebugRanges();
void emitDebugRangesDWO();
+ void emitDebugRangesImpl(const DwarfFile &Holder, MCSection *Section);
/// Emit macros into a debug macinfo section.
void emitDebugMacinfo();
+ /// Emit macros into a debug macinfo.dwo section.
+ void emitDebugMacinfoDWO();
+ void emitDebugMacinfoImpl(MCSection *Section);
void emitMacro(DIMacro &M);
void emitMacroFile(DIMacroFile &F, DwarfCompileUnit &U);
void handleMacroNodes(DIMacroNodeArray Nodes, DwarfCompileUnit &U);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
index 1c5a244d7c5d..310647f15a5e 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
@@ -155,20 +155,18 @@ bool DwarfExpression::addMachineReg(const TargetRegisterInfo &TRI,
CurSubReg.set(Offset, Offset + Size);
// If this sub-register has a DWARF number and we haven't covered
- // its range, emit a DWARF piece for it.
- if (CurSubReg.test(Coverage)) {
+ // its range, and its range covers the value, emit a DWARF piece for it.
+ if (Offset < MaxSize && CurSubReg.test(Coverage)) {
// Emit a piece for any gap in the coverage.
if (Offset > CurPos)
- DwarfRegs.push_back({-1, Offset - CurPos, "no DWARF register encoding"});
+ DwarfRegs.push_back(
+ {-1, Offset - CurPos, "no DWARF register encoding"});
DwarfRegs.push_back(
{Reg, std::min<unsigned>(Size, MaxSize - Offset), "sub-register"});
- if (Offset >= MaxSize)
- break;
-
- // Mark it as emitted.
- Coverage.set(Offset, Offset + Size);
- CurPos = Offset + Size;
}
+ // Mark it as emitted.
+ Coverage.set(Offset, Offset + Size);
+ CurPos = Offset + Size;
}
// Failed to find any DWARF encoding.
if (CurPos == 0)
@@ -246,8 +244,8 @@ bool DwarfExpression::addMachineRegExpression(const TargetRegisterInfo &TRI,
// a call site parameter expression and if that expression is just a register
// location, emit it with addBReg and offset 0, because we should emit a DWARF
// expression representing a value, rather than a location.
- if (!isMemoryLocation() && !HasComplexExpression &&
- (!isParameterValue() || isEntryValue())) {
+ if (!isMemoryLocation() && !HasComplexExpression && (!isParameterValue() ||
+ isEntryValue())) {
for (auto &Reg : DwarfRegs) {
if (Reg.DwarfRegNo >= 0)
addReg(Reg.DwarfRegNo, Reg.Comment);
@@ -391,6 +389,7 @@ void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor,
// empty DW_OP_piece / DW_OP_bit_piece before we emitted the base
// location.
assert(OffsetInBits >= FragmentOffset && "fragment offset not added?");
+ assert(SizeInBits >= OffsetInBits - FragmentOffset && "size underflow");
// If addMachineReg already emitted DW_OP_piece operations to represent
// a super-register by splicing together sub-registers, subtract the size
@@ -436,9 +435,6 @@ void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor,
break;
case dwarf::DW_OP_deref:
assert(!isRegisterLocation());
- // For more detailed explanation see llvm.org/PR43343.
- assert(!isParameterValue() && "Parameter entry values should not be "
- "dereferenced due to safety reasons.");
if (!isMemoryLocation() && ::isMemoryLocation(ExprCursor))
// Turning this into a memory location description makes the deref
// implicit.
@@ -576,3 +572,11 @@ void DwarfExpression::emitLegacyZExt(unsigned FromBits) {
emitUnsigned((1ULL << FromBits) - 1);
emitOp(dwarf::DW_OP_and);
}
+
+void DwarfExpression::addWasmLocation(unsigned Index, int64_t Offset) {
+ assert(LocationKind == Implicit || LocationKind == Unknown);
+ LocationKind = Implicit;
+ emitOp(dwarf::DW_OP_WASM_location);
+ emitUnsigned(Index);
+ emitSigned(Offset);
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h
index 1ad46669f9b2..46c07b1d5b6b 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h
@@ -337,6 +337,10 @@ public:
void emitLegacySExt(unsigned FromBits);
void emitLegacyZExt(unsigned FromBits);
+
+ /// Emit location information expressed via WebAssembly location + offset
+ /// The Index is an identifier for locals, globals or operand stack.
+ void addWasmLocation(unsigned Index, int64_t Offset);
};
/// DwarfExpression implementation for .debug_loc entries.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp
index e3c9095d1343..e5c4db58f477 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp
@@ -126,6 +126,6 @@ void DwarfFile::addScopeLabel(LexicalScope *LS, DbgLabel *Label) {
std::pair<uint32_t, RangeSpanList *>
DwarfFile::addRange(const DwarfCompileUnit &CU, SmallVector<RangeSpan, 2> R) {
CURangeLists.push_back(
- RangeSpanList(Asm->createTempSymbol("debug_ranges"), CU, std::move(R)));
+ RangeSpanList{Asm->createTempSymbol("debug_ranges"), &CU, std::move(R)});
return std::make_pair(CURangeLists.size() - 1, &CURangeLists.back());
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h
index 35fa51fb24c4..cf293d7534d0 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h
@@ -37,21 +37,12 @@ struct RangeSpan {
const MCSymbol *End;
};
-class RangeSpanList {
-private:
+struct RangeSpanList {
// Index for locating within the debug_range section this particular span.
- MCSymbol *RangeSym;
+ MCSymbol *Label;
const DwarfCompileUnit *CU;
// List of ranges.
SmallVector<RangeSpan, 2> Ranges;
-
-public:
- RangeSpanList(MCSymbol *Sym, const DwarfCompileUnit &CU,
- SmallVector<RangeSpan, 2> Ranges)
- : RangeSym(Sym), CU(&CU), Ranges(std::move(Ranges)) {}
- MCSymbol *getSym() const { return RangeSym; }
- const DwarfCompileUnit &getCU() const { return *CU; }
- const SmallVectorImpl<RangeSpan> &getRanges() const { return Ranges; }
};
class DwarfFile {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
index 37c68c085792..1aba956c48de 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@@ -188,8 +188,9 @@ int64_t DwarfUnit::getDefaultLowerBound() const {
/// Check whether the DIE for this MDNode can be shared across CUs.
bool DwarfUnit::isShareableAcrossCUs(const DINode *D) const {
- // When the MDNode can be part of the type system, the DIE can be shared
- // across CUs.
+ // When the MDNode can be part of the type system (this includes subprogram
+ // declarations *and* subprogram definitions, even local definitions), the
+ // DIE must be shared across CUs.
// Combining type units and cross-CU DIE sharing is lower value (since
// cross-CU DIE sharing is used in LTO and removes type redundancy at that
// level already) but may be implementable for some value in projects
@@ -197,9 +198,7 @@ bool DwarfUnit::isShareableAcrossCUs(const DINode *D) const {
// together.
if (isDwoUnit() && !DD->shareAcrossDWOCUs())
return false;
- return (isa<DIType>(D) ||
- (isa<DISubprogram>(D) && !cast<DISubprogram>(D)->isDefinition())) &&
- !DD->generateTypeUnits();
+ return (isa<DIType>(D) || isa<DISubprogram>(D)) && !DD->generateTypeUnits();
}
DIE *DwarfUnit::getDIE(const DINode *D) const {
@@ -800,6 +799,15 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DIDerivedType *DTy) {
if (!Name.empty())
addString(Buffer, dwarf::DW_AT_name, Name);
+ // If alignment is specified for a typedef , create and insert DW_AT_alignment
+ // attribute in DW_TAG_typedef DIE.
+ if (Tag == dwarf::DW_TAG_typedef && DD->getDwarfVersion() >= 5) {
+ uint32_t AlignInBytes = DTy->getAlignInBytes();
+ if (AlignInBytes > 0)
+ addUInt(Buffer, dwarf::DW_AT_alignment, dwarf::DW_FORM_udata,
+ AlignInBytes);
+ }
+
// Add size if non-zero (derived types might be zero-sized.)
if (Size && Tag != dwarf::DW_TAG_pointer_type
&& Tag != dwarf::DW_TAG_ptr_to_member_type
@@ -1114,8 +1122,8 @@ DIE *DwarfUnit::getOrCreateModule(const DIModule *M) {
M->getConfigurationMacros());
if (!M->getIncludePath().empty())
addString(MDie, dwarf::DW_AT_LLVM_include_path, M->getIncludePath());
- if (!M->getISysRoot().empty())
- addString(MDie, dwarf::DW_AT_LLVM_isysroot, M->getISysRoot());
+ if (!M->getSysRoot().empty())
+ addString(MDie, dwarf::DW_AT_LLVM_sysroot, M->getSysRoot());
return &MDie;
}
@@ -1224,6 +1232,9 @@ void DwarfUnit::applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie,
Language == dwarf::DW_LANG_ObjC))
addFlag(SPDie, dwarf::DW_AT_prototyped);
+ if (SP->isObjCDirect())
+ addFlag(SPDie, dwarf::DW_AT_APPLE_objc_direct);
+
unsigned CC = 0;
DITypeRefArray Args;
if (const DISubroutineType *SPTy = SP->getType()) {
@@ -1307,6 +1318,9 @@ void DwarfUnit::applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie,
addFlag(SPDie, dwarf::DW_AT_elemental);
if (SP->isRecursive())
addFlag(SPDie, dwarf::DW_AT_recursive);
+
+ if (DD->getDwarfVersion() >= 5 && SP->isDeleted())
+ addFlag(SPDie, dwarf::DW_AT_deleted);
}
void DwarfUnit::constructSubrangeDIE(DIE &Buffer, const DISubrange *SR,
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp
index 290be81c6baa..914308d9147e 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp
@@ -6,7 +6,8 @@
//
//===----------------------------------------------------------------------===//
//
-// This file contains support for writing Win64 exception info into asm files.
+// This file contains support for writing the metadata for Windows Control Flow
+// Guard, including address-taken functions, and valid longjmp targets.
//
//===----------------------------------------------------------------------===//
@@ -17,6 +18,7 @@
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCStreamer.h"
@@ -29,16 +31,69 @@ WinCFGuard::WinCFGuard(AsmPrinter *A) : AsmPrinterHandler(), Asm(A) {}
WinCFGuard::~WinCFGuard() {}
+void WinCFGuard::endFunction(const MachineFunction *MF) {
+
+ // Skip functions without any longjmp targets.
+ if (MF->getLongjmpTargets().empty())
+ return;
+
+ // Copy the function's longjmp targets to a module-level list.
+ LongjmpTargets.insert(LongjmpTargets.end(), MF->getLongjmpTargets().begin(),
+ MF->getLongjmpTargets().end());
+}
+
+/// Returns true if this function's address is escaped in a way that might make
+/// it an indirect call target. Function::hasAddressTaken gives different
+/// results when a function is called directly with a function prototype
+/// mismatch, which requires a cast.
+static bool isPossibleIndirectCallTarget(const Function *F) {
+ SmallVector<const Value *, 4> Users{F};
+ while (!Users.empty()) {
+ const Value *FnOrCast = Users.pop_back_val();
+ for (const Use &U : FnOrCast->uses()) {
+ const User *FnUser = U.getUser();
+ if (isa<BlockAddress>(FnUser))
+ continue;
+ if (const auto *Call = dyn_cast<CallBase>(FnUser)) {
+ if (!Call->isCallee(&U))
+ return true;
+ } else if (isa<Instruction>(FnUser)) {
+ // Consider any other instruction to be an escape. This has some weird
+ // consequences like no-op intrinsics being an escape or a store *to* a
+ // function address being an escape.
+ return true;
+ } else if (const auto *C = dyn_cast<Constant>(FnUser)) {
+ // If this is a constant pointer cast of the function, don't consider
+ // this escape. Analyze the uses of the cast as well. This ensures that
+ // direct calls with mismatched prototypes don't end up in the CFG
+ // table. Consider other constants, such as vtable initializers, to
+ // escape the function.
+ if (C->stripPointerCasts() == F)
+ Users.push_back(FnUser);
+ else
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
void WinCFGuard::endModule() {
const Module *M = Asm->MMI->getModule();
std::vector<const Function *> Functions;
for (const Function &F : *M)
- if (F.hasAddressTaken())
+ if (isPossibleIndirectCallTarget(&F))
Functions.push_back(&F);
- if (Functions.empty())
+ if (Functions.empty() && LongjmpTargets.empty())
return;
auto &OS = *Asm->OutStreamer;
OS.SwitchSection(Asm->OutContext.getObjectFileInfo()->getGFIDsSection());
for (const Function *F : Functions)
OS.EmitCOFFSymbolIndex(Asm->getSymbol(F));
+
+ // Emit the symbol index of each longjmp target.
+ OS.SwitchSection(Asm->OutContext.getObjectFileInfo()->getGLJMPSection());
+ for (const MCSymbol *S : LongjmpTargets) {
+ OS.EmitCOFFSymbolIndex(S);
+ }
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.h
index def0a59ab007..494a153b05ba 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.h
@@ -6,7 +6,8 @@
//
//===----------------------------------------------------------------------===//
//
-// This file contains support for writing windows exception info into asm files.
+// This file contains support for writing the metadata for Windows Control Flow
+// Guard, including address-taken functions, and valid longjmp targets.
//
//===----------------------------------------------------------------------===//
@@ -15,12 +16,14 @@
#include "llvm/CodeGen/AsmPrinterHandler.h"
#include "llvm/Support/Compiler.h"
+#include <vector>
namespace llvm {
class LLVM_LIBRARY_VISIBILITY WinCFGuard : public AsmPrinterHandler {
/// Target of directive emission.
AsmPrinter *Asm;
+ std::vector<const MCSymbol *> LongjmpTargets;
public:
WinCFGuard(AsmPrinter *A);
@@ -28,7 +31,7 @@ public:
void setSymbolSize(const MCSymbol *Sym, uint64_t Size) override {}
- /// Emit the Control Flow Guard function ID table
+ /// Emit the Control Flow Guard function ID table.
void endModule() override;
/// Gather pre-function debug information.
@@ -39,7 +42,7 @@ public:
/// Gather post-function debug information.
/// Please note that some AsmPrinter implementations may not call
/// beginFunction at all.
- void endFunction(const MachineFunction *MF) override {}
+ void endFunction(const MachineFunction *MF) override;
/// Process beginning of an instruction.
void beginInstruction(const MachineInstr *MI) override {}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AtomicExpandPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AtomicExpandPass.cpp
index 27b298dcf6af..37a50cde6391 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -38,6 +38,7 @@
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/AtomicOrdering.h"
#include "llvm/Support/Casting.h"
@@ -674,7 +675,7 @@ static PartwordMaskValues createMaskInstrs(IRBuilder<> &Builder, Instruction *I,
Ret.ShiftAmt = Builder.CreateTrunc(Ret.ShiftAmt, Ret.WordType, "ShiftAmt");
Ret.Mask = Builder.CreateShl(
- ConstantInt::get(Ret.WordType, (1 << ValueSize * 8) - 1), Ret.ShiftAmt,
+ ConstantInt::get(Ret.WordType, (1 << (ValueSize * 8)) - 1), Ret.ShiftAmt,
"Mask");
Ret.Inv_Mask = Builder.CreateNot(Ret.Mask, "Inv_Mask");
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp b/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp
index 455916eeb82f..4b9c50aeb1d3 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp
@@ -24,6 +24,7 @@
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
@@ -38,6 +39,7 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineSizeOpts.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
@@ -46,6 +48,7 @@
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Function.h"
+#include "llvm/InitializePasses.h"
#include "llvm/MC/LaneBitmask.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Pass.h"
@@ -102,6 +105,7 @@ namespace {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<MachineBlockFrequencyInfo>();
AU.addRequired<MachineBranchProbabilityInfo>();
+ AU.addRequired<ProfileSummaryInfoWrapperPass>();
AU.addRequired<TargetPassConfig>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -128,7 +132,8 @@ bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) {
BranchFolder::MBFIWrapper MBBFreqInfo(
getAnalysis<MachineBlockFrequencyInfo>());
BranchFolder Folder(EnableTailMerge, /*CommonHoist=*/true, MBBFreqInfo,
- getAnalysis<MachineBranchProbabilityInfo>());
+ getAnalysis<MachineBranchProbabilityInfo>(),
+ &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI());
auto *MMIWP = getAnalysisIfAvailable<MachineModuleInfoWrapperPass>();
return Folder.OptimizeFunction(
MF, MF.getSubtarget().getInstrInfo(), MF.getSubtarget().getRegisterInfo(),
@@ -138,9 +143,10 @@ bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) {
BranchFolder::BranchFolder(bool defaultEnableTailMerge, bool CommonHoist,
MBFIWrapper &FreqInfo,
const MachineBranchProbabilityInfo &ProbInfo,
+ ProfileSummaryInfo *PSI,
unsigned MinTailLength)
: EnableHoistCommonCode(CommonHoist), MinCommonTailLength(MinTailLength),
- MBBFreqInfo(FreqInfo), MBPI(ProbInfo) {
+ MBBFreqInfo(FreqInfo), MBPI(ProbInfo), PSI(PSI) {
if (MinCommonTailLength == 0)
MinCommonTailLength = TailMergeSize;
switch (FlagEnableTailMerge) {
@@ -301,113 +307,56 @@ static unsigned HashEndOfMBB(const MachineBasicBlock &MBB) {
return HashMachineInstr(*I);
}
-/// Whether MI should be counted as an instruction when calculating common tail.
+/// Whether MI should be counted as an instruction when calculating common tail.
static bool countsAsInstruction(const MachineInstr &MI) {
return !(MI.isDebugInstr() || MI.isCFIInstruction());
}
-/// ComputeCommonTailLength - Given two machine basic blocks, compute the number
-/// of instructions they actually have in common together at their end. Return
-/// iterators for the first shared instruction in each block.
+/// Iterate backwards from the given iterator \p I, towards the beginning of the
+/// block. If a MI satisfying 'countsAsInstruction' is found, return an iterator
+/// pointing to that MI. If no such MI is found, return the end iterator.
+static MachineBasicBlock::iterator
+skipBackwardPastNonInstructions(MachineBasicBlock::iterator I,
+ MachineBasicBlock *MBB) {
+ while (I != MBB->begin()) {
+ --I;
+ if (countsAsInstruction(*I))
+ return I;
+ }
+ return MBB->end();
+}
+
+/// Given two machine basic blocks, return the number of instructions they
+/// actually have in common together at their end. If a common tail is found (at
+/// least by one instruction), then iterators for the first shared instruction
+/// in each block are returned as well.
+///
+/// Non-instructions according to countsAsInstruction are ignored.
static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1,
MachineBasicBlock *MBB2,
MachineBasicBlock::iterator &I1,
MachineBasicBlock::iterator &I2) {
- I1 = MBB1->end();
- I2 = MBB2->end();
+ MachineBasicBlock::iterator MBBI1 = MBB1->end();
+ MachineBasicBlock::iterator MBBI2 = MBB2->end();
unsigned TailLen = 0;
- while (I1 != MBB1->begin() && I2 != MBB2->begin()) {
- --I1; --I2;
- // Skip debugging pseudos; necessary to avoid changing the code.
- while (!countsAsInstruction(*I1)) {
- if (I1==MBB1->begin()) {
- while (!countsAsInstruction(*I2)) {
- if (I2==MBB2->begin()) {
- // I1==DBG at begin; I2==DBG at begin
- goto SkipTopCFIAndReturn;
- }
- --I2;
- }
- ++I2;
- // I1==DBG at begin; I2==non-DBG, or first of DBGs not at begin
- goto SkipTopCFIAndReturn;
- }
- --I1;
- }
- // I1==first (untested) non-DBG preceding known match
- while (!countsAsInstruction(*I2)) {
- if (I2==MBB2->begin()) {
- ++I1;
- // I1==non-DBG, or first of DBGs not at begin; I2==DBG at begin
- goto SkipTopCFIAndReturn;
- }
- --I2;
- }
- // I1, I2==first (untested) non-DBGs preceding known match
- if (!I1->isIdenticalTo(*I2) ||
+ while (true) {
+ MBBI1 = skipBackwardPastNonInstructions(MBBI1, MBB1);
+ MBBI2 = skipBackwardPastNonInstructions(MBBI2, MBB2);
+ if (MBBI1 == MBB1->end() || MBBI2 == MBB2->end())
+ break;
+ if (!MBBI1->isIdenticalTo(*MBBI2) ||
// FIXME: This check is dubious. It's used to get around a problem where
// people incorrectly expect inline asm directives to remain in the same
// relative order. This is untenable because normal compiler
// optimizations (like this one) may reorder and/or merge these
// directives.
- I1->isInlineAsm()) {
- ++I1; ++I2;
+ MBBI1->isInlineAsm()) {
break;
}
++TailLen;
- }
- // Back past possible debugging pseudos at beginning of block. This matters
- // when one block differs from the other only by whether debugging pseudos
- // are present at the beginning. (This way, the various checks later for
- // I1==MBB1->begin() work as expected.)
- if (I1 == MBB1->begin() && I2 != MBB2->begin()) {
- --I2;
- while (I2->isDebugInstr()) {
- if (I2 == MBB2->begin())
- return TailLen;
- --I2;
- }
- ++I2;
- }
- if (I2 == MBB2->begin() && I1 != MBB1->begin()) {
- --I1;
- while (I1->isDebugInstr()) {
- if (I1 == MBB1->begin())
- return TailLen;
- --I1;
- }
- ++I1;
- }
-
-SkipTopCFIAndReturn:
- // Ensure that I1 and I2 do not point to a CFI_INSTRUCTION. This can happen if
- // I1 and I2 are non-identical when compared and then one or both of them ends
- // up pointing to a CFI instruction after being incremented. For example:
- /*
- BB1:
- ...
- INSTRUCTION_A
- ADD32ri8 <- last common instruction
- ...
- BB2:
- ...
- INSTRUCTION_B
- CFI_INSTRUCTION
- ADD32ri8 <- last common instruction
- ...
- */
- // When INSTRUCTION_A and INSTRUCTION_B are compared as not equal, after
- // incrementing the iterators, I1 will point to ADD, however I2 will point to
- // the CFI instruction. Later on, this leads to BB2 being 'hacked off' at the
- // wrong place (in ReplaceTailWithBranchTo()) which results in losing this CFI
- // instruction.
- while (I1 != MBB1->end() && I1->isCFIInstruction()) {
- ++I1;
- }
-
- while (I2 != MBB2->end() && I2->isCFIInstruction()) {
- ++I2;
+ I1 = MBBI1;
+ I2 = MBBI2;
}
return TailLen;
@@ -500,7 +449,7 @@ static unsigned EstimateRuntime(MachineBasicBlock::iterator I,
continue;
if (I->isCall())
Time += 10;
- else if (I->mayLoad() || I->mayStore())
+ else if (I->mayLoadOrStore())
Time += 2;
else
++Time;
@@ -641,7 +590,9 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2,
MachineBasicBlock::iterator &I2, MachineBasicBlock *SuccBB,
MachineBasicBlock *PredBB,
DenseMap<const MachineBasicBlock *, int> &EHScopeMembership,
- bool AfterPlacement) {
+ bool AfterPlacement,
+ BranchFolder::MBFIWrapper &MBBFreqInfo,
+ ProfileSummaryInfo *PSI) {
// It is never profitable to tail-merge blocks from two different EH scopes.
if (!EHScopeMembership.empty()) {
auto EHScope1 = EHScopeMembership.find(MBB1);
@@ -659,6 +610,17 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2,
<< " and " << printMBBReference(*MBB2) << " is "
<< CommonTailLen << '\n');
+ // Move the iterators to the beginning of the MBB if we only got debug
+ // instructions before the tail. This is to avoid splitting a block when we
+ // only got debug instructions before the tail (to be invariant on -g).
+ if (skipDebugInstructionsForward(MBB1->begin(), MBB1->end()) == I1)
+ I1 = MBB1->begin();
+ if (skipDebugInstructionsForward(MBB2->begin(), MBB2->end()) == I2)
+ I2 = MBB2->begin();
+
+ bool FullBlockTail1 = I1 == MBB1->begin();
+ bool FullBlockTail2 = I2 == MBB2->begin();
+
// It's almost always profitable to merge any number of non-terminator
// instructions with the block that falls through into the common successor.
// This is true only for a single successor. For multiple successors, we are
@@ -677,7 +639,7 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2,
// are unlikely to become a fallthrough target after machine block placement.
// Tail merging these blocks is unlikely to create additional unconditional
// branches, and will reduce the size of this cold code.
- if (I1 == MBB1->begin() && I2 == MBB2->begin() &&
+ if (FullBlockTail1 && FullBlockTail2 &&
blockEndsInUnreachable(MBB1) && blockEndsInUnreachable(MBB2))
return true;
@@ -685,16 +647,16 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2,
// a position where the other could fall through into it, merge any number
// of instructions, because it can be done without a branch.
// TODO: If the blocks are not adjacent, move one of them so that they are?
- if (MBB1->isLayoutSuccessor(MBB2) && I2 == MBB2->begin())
+ if (MBB1->isLayoutSuccessor(MBB2) && FullBlockTail2)
return true;
- if (MBB2->isLayoutSuccessor(MBB1) && I1 == MBB1->begin())
+ if (MBB2->isLayoutSuccessor(MBB1) && FullBlockTail1)
return true;
// If both blocks are identical and end in a branch, merge them unless they
// both have a fallthrough predecessor and successor.
// We can only do this after block placement because it depends on whether
// there are fallthroughs, and we don't know until after layout.
- if (AfterPlacement && I1 == MBB1->begin() && I2 == MBB2->begin()) {
+ if (AfterPlacement && FullBlockTail1 && FullBlockTail2) {
auto BothFallThrough = [](MachineBasicBlock *MBB) {
if (MBB->succ_size() != 0 && !MBB->canFallThrough())
return false;
@@ -727,8 +689,12 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2,
// branch instruction, which is likely to be smaller than the 2
// instructions that would be deleted in the merge.
MachineFunction *MF = MBB1->getParent();
- return EffectiveTailLen >= 2 && MF->getFunction().hasOptSize() &&
- (I1 == MBB1->begin() || I2 == MBB2->begin());
+ bool OptForSize =
+ MF->getFunction().hasOptSize() ||
+ (llvm::shouldOptimizeForSize(MBB1, PSI, &MBBFreqInfo.getMBFI()) &&
+ llvm::shouldOptimizeForSize(MBB2, PSI, &MBBFreqInfo.getMBFI()));
+ return EffectiveTailLen >= 2 && OptForSize &&
+ (FullBlockTail1 || FullBlockTail2);
}
unsigned BranchFolder::ComputeSameTails(unsigned CurHash,
@@ -749,7 +715,7 @@ unsigned BranchFolder::ComputeSameTails(unsigned CurHash,
CommonTailLen, TrialBBI1, TrialBBI2,
SuccBB, PredBB,
EHScopeMembership,
- AfterBlockPlacement)) {
+ AfterBlockPlacement, MBBFreqInfo, PSI)) {
if (CommonTailLen > maxCommonTailLength) {
SameTails.clear();
maxCommonTailLength = CommonTailLen;
@@ -869,7 +835,7 @@ mergeOperations(MachineBasicBlock::iterator MBBIStartPos,
assert(MBBICommon->isIdenticalTo(*MBBI) && "Expected matching MIIs!");
// Merge MMOs from memory operations in the common block.
- if (MBBICommon->mayLoad() || MBBICommon->mayStore())
+ if (MBBICommon->mayLoadOrStore())
MBBICommon->cloneMergedMemRefs(*MBB->getParent(), {&*MBBICommon, &*MBBI});
// Drop undef flags if they aren't present in all merged instructions.
for (unsigned I = 0, E = MBBICommon->getNumOperands(); I != E; ++I) {
@@ -1579,8 +1545,10 @@ ReoptimizeBlock:
}
}
- if (!IsEmptyBlock(MBB) && MBB->pred_size() == 1 &&
- MF.getFunction().hasOptSize()) {
+ bool OptForSize =
+ MF.getFunction().hasOptSize() ||
+ llvm::shouldOptimizeForSize(MBB, PSI, &MBBFreqInfo.getMBFI());
+ if (!IsEmptyBlock(MBB) && MBB->pred_size() == 1 && OptForSize) {
// Changing "Jcc foo; foo: jmp bar;" into "Jcc bar;" might change the branch
// direction, thereby defeating careful block placement and regressing
// performance. Therefore, only consider this for optsize functions.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.h b/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.h
index 761ff9c7d54e..7a4c68ea09f5 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.h
@@ -27,6 +27,7 @@ class MachineFunction;
class MachineLoopInfo;
class MachineModuleInfo;
class MachineRegisterInfo;
+class ProfileSummaryInfo;
class raw_ostream;
class TargetInstrInfo;
class TargetRegisterInfo;
@@ -39,6 +40,7 @@ class TargetRegisterInfo;
bool CommonHoist,
MBFIWrapper &FreqInfo,
const MachineBranchProbabilityInfo &ProbInfo,
+ ProfileSummaryInfo *PSI,
// Min tail length to merge. Defaults to commandline
// flag. Ignored for optsize.
unsigned MinTailLength = 0);
@@ -145,6 +147,7 @@ class TargetRegisterInfo;
const BlockFrequency Freq) const;
void view(const Twine &Name, bool isSimple = true);
uint64_t getEntryFreq() const;
+ const MachineBlockFrequencyInfo &getMBFI() { return MBFI; }
private:
const MachineBlockFrequencyInfo &MBFI;
@@ -154,6 +157,7 @@ class TargetRegisterInfo;
private:
MBFIWrapper &MBBFreqInfo;
const MachineBranchProbabilityInfo &MBPI;
+ ProfileSummaryInfo *PSI;
bool TailMergeBlocks(MachineFunction &MF);
bool TryTailMergeBlocks(MachineBasicBlock* SuccBB,
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/BranchRelaxation.cpp b/contrib/llvm-project/llvm/lib/CodeGen/BranchRelaxation.cpp
index 6efdc9efa968..f05517d178ae 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/BranchRelaxation.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/BranchRelaxation.cpp
@@ -19,6 +19,7 @@
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/DebugLoc.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/BreakFalseDeps.cpp b/contrib/llvm-project/llvm/lib/CodeGen/BreakFalseDeps.cpp
index 709164e5f178..9bae9d36add1 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/BreakFalseDeps.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/BreakFalseDeps.cpp
@@ -19,13 +19,13 @@
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/ReachingDefAnalysis.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/Debug.h"
-
using namespace llvm;
namespace llvm {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CFGuardLongjmp.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CFGuardLongjmp.cpp
new file mode 100644
index 000000000000..c3bf93855111
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/CFGuardLongjmp.cpp
@@ -0,0 +1,120 @@
+//===-- CFGuardLongjmp.cpp - Longjmp symbols for CFGuard --------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains a machine function pass to insert a symbol after each
+/// call to _setjmp and store this in the MachineFunction's LongjmpTargets
+/// vector. This will be used to emit the table of valid longjmp targets used
+/// by Control Flow Guard.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/InitializePasses.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "cfguard-longjmp"
+
+STATISTIC(CFGuardLongjmpTargets,
+ "Number of Control Flow Guard longjmp targets");
+
+namespace {
+
+/// MachineFunction pass to insert a symbol after each call to _setjmp and store
+/// this in the MachineFunction's LongjmpTargets vector.
+class CFGuardLongjmp : public MachineFunctionPass {
+public:
+ static char ID;
+
+ CFGuardLongjmp() : MachineFunctionPass(ID) {
+ initializeCFGuardLongjmpPass(*PassRegistry::getPassRegistry());
+ }
+
+ StringRef getPassName() const override {
+ return "Control Flow Guard longjmp targets";
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+};
+
+} // end anonymous namespace
+
+char CFGuardLongjmp::ID = 0;
+
+INITIALIZE_PASS(CFGuardLongjmp, "CFGuardLongjmp",
+ "Insert symbols at valid longjmp targets for /guard:cf", false,
+ false)
+FunctionPass *llvm::createCFGuardLongjmpPass() { return new CFGuardLongjmp(); }
+
+bool CFGuardLongjmp::runOnMachineFunction(MachineFunction &MF) {
+
+ // Skip modules for which the cfguard flag is not set.
+ if (!MF.getMMI().getModule()->getModuleFlag("cfguard"))
+ return false;
+
+ // Skip functions that do not have calls to _setjmp.
+ if (!MF.getFunction().callsFunctionThatReturnsTwice())
+ return false;
+
+ SmallVector<MachineInstr *, 8> SetjmpCalls;
+
+ // Iterate over all instructions in the function and add calls to functions
+ // that return twice to the list of targets.
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : MBB) {
+
+ // Skip instructions that are not calls.
+ if (!MI.isCall() || MI.getNumOperands() < 1)
+ continue;
+
+ // Iterate over operands to find calls to global functions.
+ for (MachineOperand &MO : MI.operands()) {
+ if (!MO.isGlobal())
+ continue;
+
+ auto *F = dyn_cast<Function>(MO.getGlobal());
+ if (!F)
+ continue;
+
+ // If the instruction calls a function that returns twice, add
+ // it to the list of targets.
+ if (F->hasFnAttribute(Attribute::ReturnsTwice)) {
+ SetjmpCalls.push_back(&MI);
+ break;
+ }
+ }
+ }
+ }
+
+ if (SetjmpCalls.empty())
+ return false;
+
+ unsigned SetjmpNum = 0;
+
+ // For each possible target, create a new symbol and insert it immediately
+ // after the call to setjmp. Add this symbol to the MachineFunction's list
+ // of longjmp targets.
+ for (MachineInstr *Setjmp : SetjmpCalls) {
+ SmallString<128> SymbolName;
+ raw_svector_ostream(SymbolName) << "$cfgsj_" << MF.getName() << SetjmpNum++;
+ MCSymbol *SjSymbol = MF.getContext().getOrCreateSymbol(SymbolName);
+
+ Setjmp->setPostInstrSymbol(MF, SjSymbol);
+ MF.addLongjmpTarget(SjSymbol);
+ CFGuardLongjmpTargets++;
+ }
+
+ return true;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CFIInstrInserter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CFIInstrInserter.cpp
index 1a4d54231cfd..ef548c84d3c0 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/CFIInstrInserter.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/CFIInstrInserter.cpp
@@ -25,6 +25,7 @@
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
@@ -48,8 +49,7 @@ class CFIInstrInserter : public MachineFunctionPass {
}
bool runOnMachineFunction(MachineFunction &MF) override {
- if (!MF.getMMI().hasDebugInfo() &&
- !MF.getFunction().needsUnwindTableEntry())
+ if (!MF.needsFrameMoves())
return false;
MBBVector.resize(MF.getNumBlockIDs());
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CodeGen.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CodeGen.cpp
index ad9525f927e8..20fc67cc66ae 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/CodeGen.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/CodeGen.cpp
@@ -22,6 +22,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeAtomicExpandPass(Registry);
initializeBranchFolderPassPass(Registry);
initializeBranchRelaxationPass(Registry);
+ initializeCFGuardLongjmpPass(Registry);
initializeCFIInstrInserterPass(Registry);
initializeCodeGenPreparePass(Registry);
initializeDeadMachineInstructionElimPass(Registry);
@@ -104,6 +105,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeTailDuplicatePass(Registry);
initializeTargetPassConfigPass(Registry);
initializeTwoAddressInstructionPassPass(Registry);
+ initializeTypePromotionPass(Registry);
initializeUnpackMachineBundlesPass(Registry);
initializeUnreachableBlockElimLegacyPassPass(Registry);
initializeUnreachableMachineBlockElimPass(Registry);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp
index fa4432ea23ec..f05afd058746 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -30,7 +30,6 @@
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/CodeGen/Analysis.h"
@@ -61,6 +60,8 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsAArch64.h"
+#include "llvm/IR/IntrinsicsX86.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Module.h"
@@ -73,6 +74,7 @@
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/IR/ValueMap.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/BlockFrequency.h"
#include "llvm/Support/BranchProbability.h"
@@ -88,7 +90,9 @@
#include "llvm/Target/TargetOptions.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/BypassSlowDivision.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
+#include "llvm/Transforms/Utils/SizeOpts.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -222,6 +226,10 @@ static cl::opt<bool>
cl::init(true),
cl::desc("Enable splitting large offset of GEP."));
+static cl::opt<bool> EnableICMP_EQToICMP_ST(
+ "cgp-icmp-eq2icmp-st", cl::Hidden, cl::init(false),
+ cl::desc("Enable ICMP_EQ to ICMP_S(L|G)T conversion."));
+
namespace {
enum ExtType {
@@ -251,6 +259,7 @@ class TypePromotionTransaction;
const LoopInfo *LI;
std::unique_ptr<BlockFrequencyInfo> BFI;
std::unique_ptr<BranchProbabilityInfo> BPI;
+ ProfileSummaryInfo *PSI;
/// As we scan instructions optimizing them, this is the next instruction
/// to optimize. Transforms that can invalidate this should update it.
@@ -293,7 +302,7 @@ class TypePromotionTransaction;
/// Keep track of SExt promoted.
ValueToSExts ValToSExtendedUses;
- /// True if optimizing for size.
+ /// True if the function has the OptSize attribute.
bool OptSize;
/// DataLayout for the Function being processed.
@@ -370,6 +379,7 @@ class TypePromotionTransaction;
bool optimizeSwitchInst(SwitchInst *SI);
bool optimizeExtractElementInst(Instruction *Inst);
bool dupRetToEnableTailCallOpts(BasicBlock *BB, bool &ModifiedDT);
+ bool fixupDbgValue(Instruction *I);
bool placeDbgValues(Function &F);
bool canFormExtLd(const SmallVectorImpl<Instruction *> &MovedExts,
LoadInst *&LI, Instruction *&Inst, bool HasPromoted);
@@ -429,10 +439,8 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
BPI.reset(new BranchProbabilityInfo(F, *LI));
BFI.reset(new BlockFrequencyInfo(F, *BPI, *LI));
+ PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
OptSize = F.hasOptSize();
-
- ProfileSummaryInfo *PSI =
- &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
if (ProfileGuidedSectionPrefix) {
if (PSI->isFunctionHotInCallGraph(&F, *BFI))
F.setSectionPrefix(".hot");
@@ -451,7 +459,9 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
// bypassSlowDivision may create new BBs, but we don't want to reapply the
// optimization to those blocks.
BasicBlock* Next = BB->getNextNode();
- EverMadeChange |= bypassSlowDivision(BB, BypassWidths);
+ // F.hasOptSize is already checked in the outer if statement.
+ if (!llvm::shouldOptimizeForSize(BB, PSI, BFI.get()))
+ EverMadeChange |= bypassSlowDivision(BB, BypassWidths);
BB = Next;
}
}
@@ -1049,7 +1059,7 @@ bool CodeGenPrepare::simplifyOffsetableRelocate(Instruction &I) {
// Collect all the relocate calls associated with a statepoint
AllRelocateCalls.push_back(Relocate);
- // We need atleast one base pointer relocation + one derived pointer
+ // We need at least one base pointer relocation + one derived pointer
// relocation to mangle
if (AllRelocateCalls.size() < 2)
return false;
@@ -1408,6 +1418,93 @@ static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI) {
return MadeChange;
}
+/// For pattern like:
+///
+/// DomCond = icmp sgt/slt CmpOp0, CmpOp1 (might not be in DomBB)
+/// ...
+/// DomBB:
+/// ...
+/// br DomCond, TrueBB, CmpBB
+/// CmpBB: (with DomBB being the single predecessor)
+/// ...
+/// Cmp = icmp eq CmpOp0, CmpOp1
+/// ...
+///
+/// It would use two comparison on targets that lowering of icmp sgt/slt is
+/// different from lowering of icmp eq (PowerPC). This function try to convert
+/// 'Cmp = icmp eq CmpOp0, CmpOp1' to ' Cmp = icmp slt/sgt CmpOp0, CmpOp1'.
+/// After that, DomCond and Cmp can use the same comparison so reduce one
+/// comparison.
+///
+/// Return true if any changes are made.
+static bool foldICmpWithDominatingICmp(CmpInst *Cmp,
+ const TargetLowering &TLI) {
+ if (!EnableICMP_EQToICMP_ST && TLI.isEqualityCmpFoldedWithSignedCmp())
+ return false;
+
+ ICmpInst::Predicate Pred = Cmp->getPredicate();
+ if (Pred != ICmpInst::ICMP_EQ)
+ return false;
+
+ // If icmp eq has users other than BranchInst and SelectInst, converting it to
+ // icmp slt/sgt would introduce more redundant LLVM IR.
+ for (User *U : Cmp->users()) {
+ if (isa<BranchInst>(U))
+ continue;
+ if (isa<SelectInst>(U) && cast<SelectInst>(U)->getCondition() == Cmp)
+ continue;
+ return false;
+ }
+
+ // This is a cheap/incomplete check for dominance - just match a single
+ // predecessor with a conditional branch.
+ BasicBlock *CmpBB = Cmp->getParent();
+ BasicBlock *DomBB = CmpBB->getSinglePredecessor();
+ if (!DomBB)
+ return false;
+
+ // We want to ensure that the only way control gets to the comparison of
+ // interest is that a less/greater than comparison on the same operands is
+ // false.
+ Value *DomCond;
+ BasicBlock *TrueBB, *FalseBB;
+ if (!match(DomBB->getTerminator(), m_Br(m_Value(DomCond), TrueBB, FalseBB)))
+ return false;
+ if (CmpBB != FalseBB)
+ return false;
+
+ Value *CmpOp0 = Cmp->getOperand(0), *CmpOp1 = Cmp->getOperand(1);
+ ICmpInst::Predicate DomPred;
+ if (!match(DomCond, m_ICmp(DomPred, m_Specific(CmpOp0), m_Specific(CmpOp1))))
+ return false;
+ if (DomPred != ICmpInst::ICMP_SGT && DomPred != ICmpInst::ICMP_SLT)
+ return false;
+
+ // Convert the equality comparison to the opposite of the dominating
+ // comparison and swap the direction for all branch/select users.
+ // We have conceptually converted:
+ // Res = (a < b) ? <LT_RES> : (a == b) ? <EQ_RES> : <GT_RES>;
+ // to
+ // Res = (a < b) ? <LT_RES> : (a > b) ? <GT_RES> : <EQ_RES>;
+ // And similarly for branches.
+ for (User *U : Cmp->users()) {
+ if (auto *BI = dyn_cast<BranchInst>(U)) {
+ assert(BI->isConditional() && "Must be conditional");
+ BI->swapSuccessors();
+ continue;
+ }
+ if (auto *SI = dyn_cast<SelectInst>(U)) {
+ // Swap operands
+ SI->swapValues();
+ SI->swapProfMetadata();
+ continue;
+ }
+ llvm_unreachable("Must be a branch or a select");
+ }
+ Cmp->setPredicate(CmpInst::getSwappedPredicate(DomPred));
+ return true;
+}
+
bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, bool &ModifiedDT) {
if (sinkCmpExpression(Cmp, *TLI))
return true;
@@ -1418,6 +1515,9 @@ bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, bool &ModifiedDT) {
if (combineToUSubWithOverflow(Cmp, ModifiedDT))
return true;
+ if (foldICmpWithDominatingICmp(Cmp, *TLI))
+ return true;
+
return false;
}
@@ -1842,7 +1942,8 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {
// cold block. This interacts with our handling for loads and stores to
// ensure that we can fold all uses of a potential addressing computation
// into their uses. TODO: generalize this to work over profiling data
- if (!OptSize && CI->hasFnAttr(Attribute::Cold))
+ bool OptForSize = OptSize || llvm::shouldOptimizeForSize(BB, PSI, BFI.get());
+ if (!OptForSize && CI->hasFnAttr(Attribute::Cold))
for (auto &Arg : CI->arg_operands()) {
if (!Arg->getType()->isPointerTy())
continue;
@@ -1907,6 +2008,8 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {
case Intrinsic::ctlz:
// If counting zeros is expensive, try to avoid it.
return despeculateCountZeros(II, TLI, DL, ModifiedDT);
+ case Intrinsic::dbg_value:
+ return fixupDbgValue(II);
}
if (TLI) {
@@ -2777,16 +2880,24 @@ class AddressingModeMatcher {
/// When true, IsProfitableToFoldIntoAddressingMode always returns true.
bool IgnoreProfitability;
+ /// True if we are optimizing for size.
+ bool OptSize;
+
+ ProfileSummaryInfo *PSI;
+ BlockFrequencyInfo *BFI;
+
AddressingModeMatcher(
SmallVectorImpl<Instruction *> &AMI, const TargetLowering &TLI,
const TargetRegisterInfo &TRI, Type *AT, unsigned AS, Instruction *MI,
ExtAddrMode &AM, const SetOfInstrs &InsertedInsts,
InstrToOrigTy &PromotedInsts, TypePromotionTransaction &TPT,
- std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP)
+ std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP,
+ bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI)
: AddrModeInsts(AMI), TLI(TLI), TRI(TRI),
DL(MI->getModule()->getDataLayout()), AccessTy(AT), AddrSpace(AS),
MemoryInst(MI), AddrMode(AM), InsertedInsts(InsertedInsts),
- PromotedInsts(PromotedInsts), TPT(TPT), LargeOffsetGEP(LargeOffsetGEP) {
+ PromotedInsts(PromotedInsts), TPT(TPT), LargeOffsetGEP(LargeOffsetGEP),
+ OptSize(OptSize), PSI(PSI), BFI(BFI) {
IgnoreProfitability = false;
}
@@ -2804,12 +2915,14 @@ public:
const TargetLowering &TLI, const TargetRegisterInfo &TRI,
const SetOfInstrs &InsertedInsts, InstrToOrigTy &PromotedInsts,
TypePromotionTransaction &TPT,
- std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP) {
+ std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP,
+ bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) {
ExtAddrMode Result;
bool Success = AddressingModeMatcher(AddrModeInsts, TLI, TRI, AccessTy, AS,
MemoryInst, Result, InsertedInsts,
- PromotedInsts, TPT, LargeOffsetGEP)
+ PromotedInsts, TPT, LargeOffsetGEP,
+ OptSize, PSI, BFI)
.matchAddr(V, 0);
(void)Success; assert(Success && "Couldn't select *anything*?");
return Result;
@@ -4420,7 +4533,8 @@ static bool FindAllMemoryUses(
Instruction *I,
SmallVectorImpl<std::pair<Instruction *, unsigned>> &MemoryUses,
SmallPtrSetImpl<Instruction *> &ConsideredInsts, const TargetLowering &TLI,
- const TargetRegisterInfo &TRI, int SeenInsts = 0) {
+ const TargetRegisterInfo &TRI, bool OptSize, ProfileSummaryInfo *PSI,
+ BlockFrequencyInfo *BFI, int SeenInsts = 0) {
// If we already considered this instruction, we're done.
if (!ConsideredInsts.insert(I).second)
return false;
@@ -4429,8 +4543,6 @@ static bool FindAllMemoryUses(
if (!MightBeFoldableInst(I))
return true;
- const bool OptSize = I->getFunction()->hasOptSize();
-
// Loop over all the uses, recursively processing them.
for (Use &U : I->uses()) {
// Conservatively return true if we're seeing a large number or a deep chain
@@ -4471,7 +4583,9 @@ static bool FindAllMemoryUses(
if (CallInst *CI = dyn_cast<CallInst>(UserI)) {
// If this is a cold call, we can sink the addressing calculation into
// the cold path. See optimizeCallInst
- if (!OptSize && CI->hasFnAttr(Attribute::Cold))
+ bool OptForSize = OptSize ||
+ llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI);
+ if (!OptForSize && CI->hasFnAttr(Attribute::Cold))
continue;
InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue());
@@ -4483,8 +4597,8 @@ static bool FindAllMemoryUses(
continue;
}
- if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TLI, TRI,
- SeenInsts))
+ if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TLI, TRI, OptSize,
+ PSI, BFI, SeenInsts))
return true;
}
@@ -4572,7 +4686,8 @@ isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
// the use is just a particularly nice way of sinking it.
SmallVector<std::pair<Instruction*,unsigned>, 16> MemoryUses;
SmallPtrSet<Instruction*, 16> ConsideredInsts;
- if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI, TRI))
+ if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI, TRI, OptSize,
+ PSI, BFI))
return false; // Has a non-memory, non-foldable use!
// Now that we know that all uses of this instruction are part of a chain of
@@ -4608,7 +4723,7 @@ isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
TPT.getRestorationPoint();
AddressingModeMatcher Matcher(
MatchedAddrModeInsts, TLI, TRI, AddressAccessTy, AS, MemoryInst, Result,
- InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP);
+ InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP, OptSize, PSI, BFI);
Matcher.IgnoreProfitability = true;
bool Success = Matcher.matchAddr(Address, 0);
(void)Success; assert(Success && "Couldn't select *anything*?");
@@ -4714,7 +4829,8 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
0);
ExtAddrMode NewAddrMode = AddressingModeMatcher::Match(
V, AccessTy, AddrSpace, MemoryInst, AddrModeInsts, *TLI, *TRI,
- InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP);
+ InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP, OptSize, PSI,
+ BFI.get());
GetElementPtrInst *GEP = LargeOffsetGEP.first;
if (GEP && !NewGEPBases.count(GEP)) {
@@ -5932,7 +6048,9 @@ bool CodeGenPrepare::optimizeShiftInst(BinaryOperator *Shift) {
/// turn it into a branch.
bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
// If branch conversion isn't desirable, exit early.
- if (DisableSelectToBranch || OptSize || !TLI)
+ if (DisableSelectToBranch ||
+ OptSize || llvm::shouldOptimizeForSize(SI->getParent(), PSI, BFI.get()) ||
+ !TLI)
return false;
// Find all consecutive select instructions that share the same condition.
@@ -7110,42 +7228,68 @@ bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, bool &ModifiedDT) {
return MadeChange;
}
-// llvm.dbg.value is far away from the value then iSel may not be able
-// handle it properly. iSel will drop llvm.dbg.value if it can not
-// find a node corresponding to the value.
+// Some CGP optimizations may move or alter what's computed in a block. Check
+// whether a dbg.value intrinsic could be pointed at a more appropriate operand.
+bool CodeGenPrepare::fixupDbgValue(Instruction *I) {
+ assert(isa<DbgValueInst>(I));
+ DbgValueInst &DVI = *cast<DbgValueInst>(I);
+
+ // Does this dbg.value refer to a sunk address calculation?
+ Value *Location = DVI.getVariableLocation();
+ WeakTrackingVH SunkAddrVH = SunkAddrs[Location];
+ Value *SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr;
+ if (SunkAddr) {
+ // Point dbg.value at locally computed address, which should give the best
+ // opportunity to be accurately lowered. This update may change the type of
+ // pointer being referred to; however this makes no difference to debugging
+ // information, and we can't generate bitcasts that may affect codegen.
+ DVI.setOperand(0, MetadataAsValue::get(DVI.getContext(),
+ ValueAsMetadata::get(SunkAddr)));
+ return true;
+ }
+ return false;
+}
+
+// A llvm.dbg.value may be using a value before its definition, due to
+// optimizations in this pass and others. Scan for such dbg.values, and rescue
+// them by moving the dbg.value to immediately after the value definition.
+// FIXME: Ideally this should never be necessary, and this has the potential
+// to re-order dbg.value intrinsics.
bool CodeGenPrepare::placeDbgValues(Function &F) {
bool MadeChange = false;
+ DominatorTree DT(F);
+
for (BasicBlock &BB : F) {
- Instruction *PrevNonDbgInst = nullptr;
for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); BI != BE;) {
Instruction *Insn = &*BI++;
DbgValueInst *DVI = dyn_cast<DbgValueInst>(Insn);
- // Leave dbg.values that refer to an alloca alone. These
- // intrinsics describe the address of a variable (= the alloca)
- // being taken. They should not be moved next to the alloca
- // (and to the beginning of the scope), but rather stay close to
- // where said address is used.
- if (!DVI || (DVI->getValue() && isa<AllocaInst>(DVI->getValue()))) {
- PrevNonDbgInst = Insn;
+ if (!DVI)
continue;
- }
Instruction *VI = dyn_cast_or_null<Instruction>(DVI->getValue());
- if (VI && VI != PrevNonDbgInst && !VI->isTerminator()) {
- // If VI is a phi in a block with an EHPad terminator, we can't insert
- // after it.
- if (isa<PHINode>(VI) && VI->getParent()->getTerminator()->isEHPad())
- continue;
- LLVM_DEBUG(dbgs() << "Moving Debug Value before :\n"
- << *DVI << ' ' << *VI);
- DVI->removeFromParent();
- if (isa<PHINode>(VI))
- DVI->insertBefore(&*VI->getParent()->getFirstInsertionPt());
- else
- DVI->insertAfter(VI);
- MadeChange = true;
- ++NumDbgValueMoved;
- }
+
+ if (!VI || VI->isTerminator())
+ continue;
+
+ // If VI is a phi in a block with an EHPad terminator, we can't insert
+ // after it.
+ if (isa<PHINode>(VI) && VI->getParent()->getTerminator()->isEHPad())
+ continue;
+
+ // If the defining instruction dominates the dbg.value, we do not need
+ // to move the dbg.value.
+ if (DT.dominates(VI, DVI))
+ continue;
+
+ LLVM_DEBUG(dbgs() << "Moving Debug Value before :\n"
+ << *DVI << ' ' << *VI);
+ DVI->removeFromParent();
+ if (isa<PHINode>(VI))
+ DVI->insertBefore(&*VI->getParent()->getFirstInsertionPt());
+ else
+ DVI->insertAfter(VI);
+ MadeChange = true;
+ ++NumDbgValueMoved;
}
}
return MadeChange;
@@ -7201,6 +7345,10 @@ bool CodeGenPrepare::splitBranchCondition(Function &F, bool &ModifiedDT) {
if (Br1->getMetadata(LLVMContext::MD_unpredictable))
continue;
+ // The merging of mostly empty BB can cause a degenerate branch.
+ if (TBB == FBB)
+ continue;
+
unsigned Opc;
Value *Cond1, *Cond2;
if (match(LogicOp, m_And(m_OneUse(m_Value(Cond1)),
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
index 702e7e244bce..8d9d48402b31 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -261,15 +261,25 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr &MI, unsigned Count) {
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI.getOperand(i);
- if (MO.isRegMask())
- for (unsigned i = 0, e = TRI->getNumRegs(); i != e; ++i)
- if (MO.clobbersPhysReg(i)) {
+ if (MO.isRegMask()) {
+ auto ClobbersPhysRegAndSubRegs = [&](unsigned PhysReg) {
+ for (MCSubRegIterator SRI(PhysReg, TRI, true); SRI.isValid(); ++SRI)
+ if (!MO.clobbersPhysReg(*SRI))
+ return false;
+
+ return true;
+ };
+
+ for (unsigned i = 0, e = TRI->getNumRegs(); i != e; ++i) {
+ if (ClobbersPhysRegAndSubRegs(i)) {
DefIndices[i] = Count;
KillIndices[i] = ~0u;
KeepRegs.reset(i);
Classes[i] = nullptr;
RegRefs.erase(i);
}
+ }
+ }
if (!MO.isReg()) continue;
Register Reg = MO.getReg();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/DFAPacketizer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/DFAPacketizer.cpp
index a169c3cb16b2..afcf014bca40 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/DFAPacketizer.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/DFAPacketizer.cpp
@@ -52,68 +52,22 @@ static cl::opt<unsigned> InstrLimit("dfa-instr-limit", cl::Hidden,
static unsigned InstrCount = 0;
-// --------------------------------------------------------------------
-// Definitions shared between DFAPacketizer.cpp and DFAPacketizerEmitter.cpp
-
-static DFAInput addDFAFuncUnits(DFAInput Inp, unsigned FuncUnits) {
- return (Inp << DFA_MAX_RESOURCES) | FuncUnits;
-}
-
-/// Return the DFAInput for an instruction class input vector.
-/// This function is used in both DFAPacketizer.cpp and in
-/// DFAPacketizerEmitter.cpp.
-static DFAInput getDFAInsnInput(const std::vector<unsigned> &InsnClass) {
- DFAInput InsnInput = 0;
- assert((InsnClass.size() <= DFA_MAX_RESTERMS) &&
- "Exceeded maximum number of DFA terms");
- for (auto U : InsnClass)
- InsnInput = addDFAFuncUnits(InsnInput, U);
- return InsnInput;
-}
-
-// --------------------------------------------------------------------
-
-// Make sure DFA types are large enough for the number of terms & resources.
-static_assert((DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) <=
- (8 * sizeof(DFAInput)),
- "(DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) too big for DFAInput");
-static_assert(
- (DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) <= (8 * sizeof(DFAStateInput)),
- "(DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) too big for DFAStateInput");
-
-// Return the DFAInput for an instruction class.
-DFAInput DFAPacketizer::getInsnInput(unsigned InsnClass) {
- // Note: this logic must match that in DFAPacketizerDefs.h for input vectors.
- DFAInput InsnInput = 0;
- unsigned i = 0;
- (void)i;
- for (const InstrStage *IS = InstrItins->beginStage(InsnClass),
- *IE = InstrItins->endStage(InsnClass); IS != IE; ++IS) {
- InsnInput = addDFAFuncUnits(InsnInput, IS->getUnits());
- assert((i++ < DFA_MAX_RESTERMS) && "Exceeded maximum number of DFA inputs");
- }
- return InsnInput;
-}
-
-// Return the DFAInput for an instruction class input vector.
-DFAInput DFAPacketizer::getInsnInput(const std::vector<unsigned> &InsnClass) {
- return getDFAInsnInput(InsnClass);
-}
-
// Check if the resources occupied by a MCInstrDesc are available in the
// current state.
bool DFAPacketizer::canReserveResources(const MCInstrDesc *MID) {
- unsigned InsnClass = MID->getSchedClass();
- DFAInput InsnInput = getInsnInput(InsnClass);
- return A.canAdd(InsnInput);
+ unsigned Action = ItinActions[MID->getSchedClass()];
+ if (MID->getSchedClass() == 0 || Action == 0)
+ return false;
+ return A.canAdd(Action);
}
// Reserve the resources occupied by a MCInstrDesc and change the current
// state to reflect that change.
void DFAPacketizer::reserveResources(const MCInstrDesc *MID) {
- unsigned InsnClass = MID->getSchedClass();
- DFAInput InsnInput = getInsnInput(InsnClass);
- A.add(InsnInput);
+ unsigned Action = ItinActions[MID->getSchedClass()];
+ if (MID->getSchedClass() == 0 || Action == 0)
+ return;
+ A.add(Action);
}
// Check if the resources occupied by a machine instruction are available
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp b/contrib/llvm-project/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
index 9a537c859a67..d1529b08f708 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -15,6 +15,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -81,6 +82,15 @@ bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const {
if (LivePhysRegs.test(Reg) || MRI->isReserved(Reg))
return false;
} else {
+ if (MO.isDead()) {
+#ifndef NDEBUG
+ // Sanity check on uses of this dead register. All of them should be
+ // 'undef'.
+ for (auto &U : MRI->use_nodbg_operands(Reg))
+ assert(U.isUndef() && "'Undef' use on a 'dead' register is found!");
+#endif
+ continue;
+ }
for (const MachineInstr &Use : MRI->use_nodbg_instructions(Reg)) {
if (&Use != MI)
// This def has a non-debug use. Don't delete the instruction!
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/DwarfEHPrepare.cpp b/contrib/llvm-project/llvm/lib/CodeGen/DwarfEHPrepare.cpp
index ddd6cec5a178..af347fd7e73d 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/DwarfEHPrepare.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/DwarfEHPrepare.cpp
@@ -17,7 +17,6 @@
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetPassConfig.h"
@@ -30,9 +29,11 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/Utils/Local.h"
#include <cstddef>
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/EarlyIfConversion.cpp b/contrib/llvm-project/llvm/lib/CodeGen/EarlyIfConversion.cpp
index e5694218b5c3..d45e424184d7 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/EarlyIfConversion.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/EarlyIfConversion.cpp
@@ -33,6 +33,7 @@
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -940,6 +941,7 @@ class EarlyIfPredicator : public MachineFunctionPass {
TargetSchedModel SchedModel;
MachineRegisterInfo *MRI;
MachineDominatorTree *DomTree;
+ MachineBranchProbabilityInfo *MBPI;
MachineLoopInfo *Loops;
SSAIfConv IfConv;
@@ -965,10 +967,12 @@ char &llvm::EarlyIfPredicatorID = EarlyIfPredicator::ID;
INITIALIZE_PASS_BEGIN(EarlyIfPredicator, DEBUG_TYPE, "Early If Predicator",
false, false)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
INITIALIZE_PASS_END(EarlyIfPredicator, DEBUG_TYPE, "Early If Predicator", false,
false)
void EarlyIfPredicator::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineBranchProbabilityInfo>();
AU.addRequired<MachineDominatorTree>();
AU.addPreserved<MachineDominatorTree>();
AU.addRequired<MachineLoopInfo>();
@@ -978,6 +982,7 @@ void EarlyIfPredicator::getAnalysisUsage(AnalysisUsage &AU) const {
/// Apply the target heuristic to decide if the transformation is profitable.
bool EarlyIfPredicator::shouldConvertIf() {
+ auto TrueProbability = MBPI->getEdgeProbability(IfConv.Head, IfConv.TBB);
if (IfConv.isTriangle()) {
MachineBasicBlock &IfBlock =
(IfConv.TBB == IfConv.Tail) ? *IfConv.FBB : *IfConv.TBB;
@@ -992,7 +997,7 @@ bool EarlyIfPredicator::shouldConvertIf() {
}
return TII->isProfitableToIfCvt(IfBlock, Cycles, ExtraPredCost,
- BranchProbability::getUnknown());
+ TrueProbability);
}
unsigned TExtra = 0;
unsigned FExtra = 0;
@@ -1011,8 +1016,7 @@ bool EarlyIfPredicator::shouldConvertIf() {
FExtra += TII->getPredicationCost(I);
}
return TII->isProfitableToIfCvt(*IfConv.TBB, TCycle, TExtra, *IfConv.FBB,
- FCycle, FExtra,
- BranchProbability::getUnknown());
+ FCycle, FExtra, TrueProbability);
}
/// Attempt repeated if-conversion on MBB, return true if successful.
@@ -1043,6 +1047,7 @@ bool EarlyIfPredicator::runOnMachineFunction(MachineFunction &MF) {
SchedModel.init(&STI);
DomTree = &getAnalysis<MachineDominatorTree>();
Loops = getAnalysisIfAvailable<MachineLoopInfo>();
+ MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
bool Changed = false;
IfConv.runOnMachineFunction(MF);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/EdgeBundles.cpp b/contrib/llvm-project/llvm/lib/CodeGen/EdgeBundles.cpp
index 486720cadd27..dfaf7f584652 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/EdgeBundles.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/EdgeBundles.cpp
@@ -14,6 +14,7 @@
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/GraphWriter.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ExpandMemCmp.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ExpandMemCmp.cpp
index 9916f2de0414..a1adf4ef9820 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ExpandMemCmp.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ExpandMemCmp.cpp
@@ -13,6 +13,8 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
@@ -20,6 +22,8 @@
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Transforms/Utils/SizeOpts.h"
using namespace llvm;
@@ -264,9 +268,9 @@ Value *MemCmpExpansion::getPtrToElementAtOffset(Value *Source,
uint64_t OffsetBytes) {
if (OffsetBytes > 0) {
auto *ByteType = Type::getInt8Ty(CI->getContext());
- Source = Builder.CreateGEP(
+ Source = Builder.CreateConstGEP1_64(
ByteType, Builder.CreateBitCast(Source, ByteType->getPointerTo()),
- ConstantInt::get(ByteType, OffsetBytes));
+ OffsetBytes);
}
return Builder.CreateBitCast(Source, LoadSizeType->getPointerTo());
}
@@ -720,7 +724,8 @@ Value *MemCmpExpansion::getMemCmpExpansion() {
/// %phi.res = phi i32 [ %48, %loadbb3 ], [ %11, %res_block ]
/// ret i32 %phi.res
static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI,
- const TargetLowering *TLI, const DataLayout *DL) {
+ const TargetLowering *TLI, const DataLayout *DL,
+ ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) {
NumMemCmpCalls++;
// Early exit from expansion if -Oz.
@@ -741,18 +746,20 @@ static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI,
// TTI call to check if target would like to expand memcmp. Also, get the
// available load sizes.
const bool IsUsedForZeroCmp = isOnlyUsedInZeroEqualityComparison(CI);
- auto Options = TTI->enableMemCmpExpansion(CI->getFunction()->hasOptSize(),
+ bool OptForSize = CI->getFunction()->hasOptSize() ||
+ llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI);
+ auto Options = TTI->enableMemCmpExpansion(OptForSize,
IsUsedForZeroCmp);
if (!Options) return false;
if (MemCmpEqZeroNumLoadsPerBlock.getNumOccurrences())
Options.NumLoadsPerBlock = MemCmpEqZeroNumLoadsPerBlock;
- if (CI->getFunction()->hasOptSize() &&
+ if (OptForSize &&
MaxLoadsPerMemcmpOptSize.getNumOccurrences())
Options.MaxNumLoads = MaxLoadsPerMemcmpOptSize;
- if (!CI->getFunction()->hasOptSize() && MaxLoadsPerMemcmp.getNumOccurrences())
+ if (!OptForSize && MaxLoadsPerMemcmp.getNumOccurrences())
Options.MaxNumLoads = MaxLoadsPerMemcmp;
MemCmpExpansion Expansion(CI, SizeVal, Options, IsUsedForZeroCmp, *DL);
@@ -798,7 +805,11 @@ public:
&getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
const TargetTransformInfo *TTI =
&getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
- auto PA = runImpl(F, TLI, TTI, TL);
+ auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+ auto *BFI = (PSI && PSI->hasProfileSummary()) ?
+ &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() :
+ nullptr;
+ auto PA = runImpl(F, TLI, TTI, TL, PSI, BFI);
return !PA.areAllPreserved();
}
@@ -806,22 +817,26 @@ private:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<TargetLibraryInfoWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.addRequired<ProfileSummaryInfoWrapperPass>();
+ LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU);
FunctionPass::getAnalysisUsage(AU);
}
PreservedAnalyses runImpl(Function &F, const TargetLibraryInfo *TLI,
const TargetTransformInfo *TTI,
- const TargetLowering* TL);
+ const TargetLowering* TL,
+ ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI);
// Returns true if a change was made.
bool runOnBlock(BasicBlock &BB, const TargetLibraryInfo *TLI,
const TargetTransformInfo *TTI, const TargetLowering* TL,
- const DataLayout& DL);
+ const DataLayout& DL, ProfileSummaryInfo *PSI,
+ BlockFrequencyInfo *BFI);
};
bool ExpandMemCmpPass::runOnBlock(
BasicBlock &BB, const TargetLibraryInfo *TLI,
const TargetTransformInfo *TTI, const TargetLowering* TL,
- const DataLayout& DL) {
+ const DataLayout& DL, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) {
for (Instruction& I : BB) {
CallInst *CI = dyn_cast<CallInst>(&I);
if (!CI) {
@@ -830,7 +845,7 @@ bool ExpandMemCmpPass::runOnBlock(
LibFunc Func;
if (TLI->getLibFunc(ImmutableCallSite(CI), Func) &&
(Func == LibFunc_memcmp || Func == LibFunc_bcmp) &&
- expandMemCmp(CI, TTI, TL, &DL)) {
+ expandMemCmp(CI, TTI, TL, &DL, PSI, BFI)) {
return true;
}
}
@@ -840,11 +855,12 @@ bool ExpandMemCmpPass::runOnBlock(
PreservedAnalyses ExpandMemCmpPass::runImpl(
Function &F, const TargetLibraryInfo *TLI, const TargetTransformInfo *TTI,
- const TargetLowering* TL) {
+ const TargetLowering* TL, ProfileSummaryInfo *PSI,
+ BlockFrequencyInfo *BFI) {
const DataLayout& DL = F.getParent()->getDataLayout();
bool MadeChanges = false;
for (auto BBIt = F.begin(); BBIt != F.end();) {
- if (runOnBlock(*BBIt, TLI, TTI, TL, DL)) {
+ if (runOnBlock(*BBIt, TLI, TTI, TL, DL, PSI, BFI)) {
MadeChanges = true;
// If changes were made, restart the function from the beginning, since
// the structure of the function was changed.
@@ -863,6 +879,8 @@ INITIALIZE_PASS_BEGIN(ExpandMemCmpPass, "expandmemcmp",
"Expand memcmp() to load/stores", false, false)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LazyBlockFrequencyInfoPass)
+INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
INITIALIZE_PASS_END(ExpandMemCmpPass, "expandmemcmp",
"Expand memcmp() to load/stores", false, false)
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp
index 1fc57fac1489..842211c09134 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp
@@ -19,6 +19,7 @@
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ExpandReductions.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ExpandReductions.cpp
index 1069a2423b8b..4ccf1d2c8c50 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ExpandReductions.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ExpandReductions.cpp
@@ -20,6 +20,7 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
@@ -78,14 +79,32 @@ RecurrenceDescriptor::MinMaxRecurrenceKind getMRK(Intrinsic::ID ID) {
bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
bool Changed = false;
SmallVector<IntrinsicInst *, 4> Worklist;
- for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
- if (auto II = dyn_cast<IntrinsicInst>(&*I))
- Worklist.push_back(II);
+ for (auto &I : instructions(F)) {
+ if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
+ switch (II->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::experimental_vector_reduce_v2_fadd:
+ case Intrinsic::experimental_vector_reduce_v2_fmul:
+ case Intrinsic::experimental_vector_reduce_add:
+ case Intrinsic::experimental_vector_reduce_mul:
+ case Intrinsic::experimental_vector_reduce_and:
+ case Intrinsic::experimental_vector_reduce_or:
+ case Intrinsic::experimental_vector_reduce_xor:
+ case Intrinsic::experimental_vector_reduce_smax:
+ case Intrinsic::experimental_vector_reduce_smin:
+ case Intrinsic::experimental_vector_reduce_umax:
+ case Intrinsic::experimental_vector_reduce_umin:
+ case Intrinsic::experimental_vector_reduce_fmax:
+ case Intrinsic::experimental_vector_reduce_fmin:
+ if (TTI->shouldExpandReduction(II))
+ Worklist.push_back(II);
- for (auto *II : Worklist) {
- if (!TTI->shouldExpandReduction(II))
- continue;
+ break;
+ }
+ }
+ }
+ for (auto *II : Worklist) {
FastMathFlags FMF =
isa<FPMathOperator>(II) ? II->getFastMathFlags() : FastMathFlags{};
Intrinsic::ID ID = II->getIntrinsicID();
@@ -96,6 +115,7 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
Builder.setFastMathFlags(FMF);
switch (ID) {
+ default: llvm_unreachable("Unexpected intrinsic!");
case Intrinsic::experimental_vector_reduce_v2_fadd:
case Intrinsic::experimental_vector_reduce_v2_fmul: {
// FMFs must be attached to the call, otherwise it's an ordered reduction
@@ -105,11 +125,15 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
if (!FMF.allowReassoc())
Rdx = getOrderedReduction(Builder, Acc, Vec, getOpcode(ID), MRK);
else {
+ if (!isPowerOf2_32(Vec->getType()->getVectorNumElements()))
+ continue;
+
Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), MRK);
Rdx = Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(ID),
Acc, Rdx, "bin.rdx");
}
- } break;
+ break;
+ }
case Intrinsic::experimental_vector_reduce_add:
case Intrinsic::experimental_vector_reduce_mul:
case Intrinsic::experimental_vector_reduce_and:
@@ -122,10 +146,12 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
case Intrinsic::experimental_vector_reduce_fmax:
case Intrinsic::experimental_vector_reduce_fmin: {
Value *Vec = II->getArgOperand(0);
+ if (!isPowerOf2_32(Vec->getType()->getVectorNumElements()))
+ continue;
+
Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), MRK);
- } break;
- default:
- continue;
+ break;
+ }
}
II->replaceAllUsesWith(Rdx);
II->eraseFromParent();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/FEntryInserter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/FEntryInserter.cpp
index a122f490884e..4c0f30bce820 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/FEntryInserter.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/FEntryInserter.cpp
@@ -19,6 +19,7 @@
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/FaultMaps.cpp b/contrib/llvm-project/llvm/lib/CodeGen/FaultMaps.cpp
index 600f72d320eb..de0b4fa87098 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/FaultMaps.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/FaultMaps.cpp
@@ -28,11 +28,9 @@ const char *FaultMaps::WFMP = "Fault Maps: ";
FaultMaps::FaultMaps(AsmPrinter &AP) : AP(AP) {}
void FaultMaps::recordFaultingOp(FaultKind FaultTy,
+ const MCSymbol *FaultingLabel,
const MCSymbol *HandlerLabel) {
MCContext &OutContext = AP.OutStreamer->getContext();
- MCSymbol *FaultingLabel = OutContext.createTempSymbol();
-
- AP.OutStreamer->EmitLabel(FaultingLabel);
const MCExpr *FaultingOffset = MCBinaryExpr::createSub(
MCSymbolRefExpr::create(FaultingLabel, OutContext),
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/FinalizeISel.cpp b/contrib/llvm-project/llvm/lib/CodeGen/FinalizeISel.cpp
index 772d7f71bb37..00040e92a829 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/FinalizeISel.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/FinalizeISel.cpp
@@ -19,6 +19,7 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/Debug.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/FuncletLayout.cpp b/contrib/llvm-project/llvm/lib/CodeGen/FuncletLayout.cpp
index 75f6d0b8f0bf..f1222a88b054 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/FuncletLayout.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/FuncletLayout.cpp
@@ -14,6 +14,7 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/InitializePasses.h"
using namespace llvm;
#define DEBUG_TYPE "funclet-layout"
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GCMetadata.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GCMetadata.cpp
index c1d22ef89195..600d662e0f99 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GCMetadata.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GCMetadata.cpp
@@ -10,11 +10,12 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/GCMetadata.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/GCStrategy.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/Function.h"
+#include "llvm/InitializePasses.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Pass.h"
#include "llvm/Support/ErrorHandling.h"
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GCRootLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GCRootLowering.cpp
index 0dc0a5bce747..90e5f32f53b3 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GCRootLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GCRootLowering.cpp
@@ -24,6 +24,7 @@
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp
index 7d9d812d34bc..e6abfcdb92cb 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp
@@ -10,11 +10,16 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/InitializePasses.h"
#define DEBUG_TYPE "cseinfo"
using namespace llvm;
char llvm::GISelCSEAnalysisWrapperPass::ID = 0;
+GISelCSEAnalysisWrapperPass::GISelCSEAnalysisWrapperPass()
+ : MachineFunctionPass(ID) {
+ initializeGISelCSEAnalysisWrapperPassPass(*PassRegistry::getPassRegistry());
+}
INITIALIZE_PASS_BEGIN(GISelCSEAnalysisWrapperPass, DEBUG_TYPE,
"Analysis containing CSE Info", false, true)
INITIALIZE_PASS_END(GISelCSEAnalysisWrapperPass, DEBUG_TYPE,
@@ -52,7 +57,7 @@ bool CSEConfigFull::shouldCSEOpc(unsigned Opc) {
case TargetOpcode::G_ANYEXT:
case TargetOpcode::G_UNMERGE_VALUES:
case TargetOpcode::G_TRUNC:
- case TargetOpcode::G_GEP:
+ case TargetOpcode::G_PTR_ADD:
return true;
}
return false;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index cdad92f7db4f..4c2dbdd905f3 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -65,7 +65,11 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, ImmutableCallSite CS,
Info.SwiftErrorVReg = SwiftErrorVReg;
Info.IsMustTailCall = CS.isMustTailCall();
Info.IsTailCall = CS.isTailCall() &&
- isInTailCallPosition(CS, MIRBuilder.getMF().getTarget());
+ isInTailCallPosition(CS, MIRBuilder.getMF().getTarget()) &&
+ (MIRBuilder.getMF()
+ .getFunction()
+ .getFnAttribute("disable-tail-calls")
+ .getValueAsString() != "true");
Info.IsVarArg = CS.getFunctionType()->isVarArg();
return lowerCall(MIRBuilder, Info);
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 854769d283f7..a103e8e4e6e0 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -74,12 +74,35 @@ bool CombinerHelper::matchCombineCopy(MachineInstr &MI) {
return false;
Register DstReg = MI.getOperand(0).getReg();
Register SrcReg = MI.getOperand(1).getReg();
+
+ // Give up if either DstReg or SrcReg is a physical register.
+ if (Register::isPhysicalRegister(DstReg) ||
+ Register::isPhysicalRegister(SrcReg))
+ return false;
+
+ // Give up the types don't match.
LLT DstTy = MRI.getType(DstReg);
LLT SrcTy = MRI.getType(SrcReg);
- // Simple Copy Propagation.
- // a(sx) = COPY b(sx) -> Replace all uses of a with b.
- if (DstTy.isValid() && SrcTy.isValid() && DstTy == SrcTy)
+ // Give up if one has a valid LLT, but the other doesn't.
+ if (DstTy.isValid() != SrcTy.isValid())
+ return false;
+ // Give up if the types don't match.
+ if (DstTy.isValid() && SrcTy.isValid() && DstTy != SrcTy)
+ return false;
+
+ // Get the register banks and classes.
+ const RegisterBank *DstBank = MRI.getRegBankOrNull(DstReg);
+ const RegisterBank *SrcBank = MRI.getRegBankOrNull(SrcReg);
+ const TargetRegisterClass *DstRC = MRI.getRegClassOrNull(DstReg);
+ const TargetRegisterClass *SrcRC = MRI.getRegClassOrNull(SrcReg);
+
+ // Replace if the register constraints match.
+ if ((SrcRC == DstRC) && (SrcBank == DstBank))
+ return true;
+ // Replace if DstReg has no constraints.
+ if (!DstBank && !DstRC)
return true;
+
return false;
}
void CombinerHelper::applyCombineCopy(MachineInstr &MI) {
@@ -109,10 +132,7 @@ bool CombinerHelper::matchCombineConcatVectors(MachineInstr &MI, bool &IsUndef,
// Walk over all the operands of concat vectors and check if they are
// build_vector themselves or undef.
// Then collect their operands in Ops.
- for (const MachineOperand &MO : MI.operands()) {
- // Skip the instruction definition.
- if (MO.isDef())
- continue;
+ for (const MachineOperand &MO : MI.uses()) {
Register Reg = MO.getReg();
MachineInstr *Def = MRI.getVRegDef(Reg);
assert(Def && "Operand not defined");
@@ -121,12 +141,8 @@ bool CombinerHelper::matchCombineConcatVectors(MachineInstr &MI, bool &IsUndef,
IsUndef = false;
// Remember the operands of the build_vector to fold
// them into the yet-to-build flattened concat vectors.
- for (const MachineOperand &BuildVecMO : Def->operands()) {
- // Skip the definition.
- if (BuildVecMO.isDef())
- continue;
+ for (const MachineOperand &BuildVecMO : Def->uses())
Ops.push_back(BuildVecMO.getReg());
- }
break;
case TargetOpcode::G_IMPLICIT_DEF: {
LLT OpType = MRI.getType(Reg);
@@ -189,8 +205,11 @@ bool CombinerHelper::matchCombineShuffleVector(MachineInstr &MI,
LLT DstType = MRI.getType(MI.getOperand(0).getReg());
Register Src1 = MI.getOperand(1).getReg();
LLT SrcType = MRI.getType(Src1);
- unsigned DstNumElts = DstType.getNumElements();
- unsigned SrcNumElts = SrcType.getNumElements();
+ // As bizarre as it may look, shuffle vector can actually produce
+ // scalar! This is because at the IR level a <1 x ty> shuffle
+ // vector is perfectly valid.
+ unsigned DstNumElts = DstType.isVector() ? DstType.getNumElements() : 1;
+ unsigned SrcNumElts = SrcType.isVector() ? SrcType.getNumElements() : 1;
// If the resulting vector is smaller than the size of the source
// vectors being concatenated, we won't be able to replace the
@@ -199,7 +218,15 @@ bool CombinerHelper::matchCombineShuffleVector(MachineInstr &MI,
// Note: We may still be able to produce a concat_vectors fed by
// extract_vector_elt and so on. It is less clear that would
// be better though, so don't bother for now.
- if (DstNumElts < 2 * SrcNumElts)
+ //
+ // If the destination is a scalar, the size of the sources doesn't
+ // matter. we will lower the shuffle to a plain copy. This will
+ // work only if the source and destination have the same size. But
+ // that's covered by the next condition.
+ //
+ // TODO: If the size between the source and destination don't match
+ // we could still emit an extract vector element in that case.
+ if (DstNumElts < 2 * SrcNumElts && DstNumElts != 1)
return false;
// Check that the shuffle mask can be broken evenly between the
@@ -212,8 +239,7 @@ bool CombinerHelper::matchCombineShuffleVector(MachineInstr &MI,
// vectors.
unsigned NumConcat = DstNumElts / SrcNumElts;
SmallVector<int, 8> ConcatSrcs(NumConcat, -1);
- SmallVector<int, 8> Mask;
- ShuffleVectorInst::getShuffleMask(MI.getOperand(3).getShuffleMask(), Mask);
+ ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
for (unsigned i = 0; i != DstNumElts; ++i) {
int Idx = Mask[i];
// Undef value.
@@ -254,7 +280,10 @@ void CombinerHelper::applyCombineShuffleVector(MachineInstr &MI,
Builder.setInsertPt(*MI.getParent(), MI);
Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
- Builder.buildConcatVectors(NewDstReg, Ops);
+ if (Ops.size() == 1)
+ Builder.buildCopy(NewDstReg, Ops[0]);
+ else
+ Builder.buildMerge(NewDstReg, Ops);
MI.eraseFromParent();
replaceRegWith(MRI, DstReg, NewDstReg);
@@ -571,7 +600,7 @@ bool CombinerHelper::findPostIndexCandidate(MachineInstr &MI, Register &Addr,
LLVM_DEBUG(dbgs() << "Searching for post-indexing opportunity for: " << MI);
for (auto &Use : MRI.use_instructions(Base)) {
- if (Use.getOpcode() != TargetOpcode::G_GEP)
+ if (Use.getOpcode() != TargetOpcode::G_PTR_ADD)
continue;
Offset = Use.getOperand(2).getReg();
@@ -597,8 +626,8 @@ bool CombinerHelper::findPostIndexCandidate(MachineInstr &MI, Register &Addr,
// forming an indexed one.
bool MemOpDominatesAddrUses = true;
- for (auto &GEPUse : MRI.use_instructions(Use.getOperand(0).getReg())) {
- if (!dominates(MI, GEPUse)) {
+ for (auto &PtrAddUse : MRI.use_instructions(Use.getOperand(0).getReg())) {
+ if (!dominates(MI, PtrAddUse)) {
MemOpDominatesAddrUses = false;
break;
}
@@ -631,7 +660,7 @@ bool CombinerHelper::findPreIndexCandidate(MachineInstr &MI, Register &Addr,
#endif
Addr = MI.getOperand(1).getReg();
- MachineInstr *AddrDef = getOpcodeDef(TargetOpcode::G_GEP, Addr, MRI);
+ MachineInstr *AddrDef = getOpcodeDef(TargetOpcode::G_PTR_ADD, Addr, MRI);
if (!AddrDef || MRI.hasOneUse(Addr))
return false;
@@ -667,8 +696,8 @@ bool CombinerHelper::findPreIndexCandidate(MachineInstr &MI, Register &Addr,
}
}
- // FIXME: check whether all uses of the base pointer are constant GEPs. That
- // might allow us to end base's liveness here by adjusting the constant.
+ // FIXME: check whether all uses of the base pointer are constant PtrAdds.
+ // That might allow us to end base's liveness here by adjusting the constant.
for (auto &UseMI : MRI.use_instructions(Addr)) {
if (!dominates(MI, UseMI)) {
@@ -681,18 +710,36 @@ bool CombinerHelper::findPreIndexCandidate(MachineInstr &MI, Register &Addr,
}
bool CombinerHelper::tryCombineIndexedLoadStore(MachineInstr &MI) {
+ IndexedLoadStoreMatchInfo MatchInfo;
+ if (matchCombineIndexedLoadStore(MI, MatchInfo)) {
+ applyCombineIndexedLoadStore(MI, MatchInfo);
+ return true;
+ }
+ return false;
+}
+
+bool CombinerHelper::matchCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) {
unsigned Opcode = MI.getOpcode();
if (Opcode != TargetOpcode::G_LOAD && Opcode != TargetOpcode::G_SEXTLOAD &&
Opcode != TargetOpcode::G_ZEXTLOAD && Opcode != TargetOpcode::G_STORE)
return false;
- bool IsStore = Opcode == TargetOpcode::G_STORE;
- Register Addr, Base, Offset;
- bool IsPre = findPreIndexCandidate(MI, Addr, Base, Offset);
- if (!IsPre && !findPostIndexCandidate(MI, Addr, Base, Offset))
+ MatchInfo.IsPre = findPreIndexCandidate(MI, MatchInfo.Addr, MatchInfo.Base,
+ MatchInfo.Offset);
+ if (!MatchInfo.IsPre &&
+ !findPostIndexCandidate(MI, MatchInfo.Addr, MatchInfo.Base,
+ MatchInfo.Offset))
return false;
+ return true;
+}
+void CombinerHelper::applyCombineIndexedLoadStore(
+ MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) {
+ MachineInstr &AddrDef = *MRI.getUniqueVRegDef(MatchInfo.Addr);
+ MachineIRBuilder MIRBuilder(MI);
+ unsigned Opcode = MI.getOpcode();
+ bool IsStore = Opcode == TargetOpcode::G_STORE;
unsigned NewOpcode;
switch (Opcode) {
case TargetOpcode::G_LOAD:
@@ -711,25 +758,22 @@ bool CombinerHelper::tryCombineIndexedLoadStore(MachineInstr &MI) {
llvm_unreachable("Unknown load/store opcode");
}
- MachineInstr &AddrDef = *MRI.getUniqueVRegDef(Addr);
- MachineIRBuilder MIRBuilder(MI);
auto MIB = MIRBuilder.buildInstr(NewOpcode);
if (IsStore) {
- MIB.addDef(Addr);
+ MIB.addDef(MatchInfo.Addr);
MIB.addUse(MI.getOperand(0).getReg());
} else {
MIB.addDef(MI.getOperand(0).getReg());
- MIB.addDef(Addr);
+ MIB.addDef(MatchInfo.Addr);
}
- MIB.addUse(Base);
- MIB.addUse(Offset);
- MIB.addImm(IsPre);
+ MIB.addUse(MatchInfo.Base);
+ MIB.addUse(MatchInfo.Offset);
+ MIB.addImm(MatchInfo.IsPre);
MI.eraseFromParent();
AddrDef.eraseFromParent();
LLVM_DEBUG(dbgs() << " Combinined to indexed operation");
- return true;
}
bool CombinerHelper::matchElideBrByInvertingCond(MachineInstr &MI) {
@@ -1016,7 +1060,7 @@ bool CombinerHelper::optimizeMemset(MachineInstr &MI, Register Dst, Register Val
if (DstOff != 0) {
auto Offset =
MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), DstOff);
- Ptr = MIB.buildGEP(PtrTy, Dst, Offset).getReg(0);
+ Ptr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
}
MIB.buildStore(Value, Ptr, *StoreMMO);
@@ -1121,13 +1165,13 @@ bool CombinerHelper::optimizeMemcpy(MachineInstr &MI, Register Dst,
if (CurrOffset != 0) {
Offset = MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset)
.getReg(0);
- LoadPtr = MIB.buildGEP(PtrTy, Src, Offset).getReg(0);
+ LoadPtr = MIB.buildPtrAdd(PtrTy, Src, Offset).getReg(0);
}
auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO);
// Create the store.
Register StorePtr =
- CurrOffset == 0 ? Dst : MIB.buildGEP(PtrTy, Dst, Offset).getReg(0);
+ CurrOffset == 0 ? Dst : MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
MIB.buildStore(LdVal, StorePtr, *StoreMMO);
CurrOffset += CopyTy.getSizeInBytes();
Size -= CopyTy.getSizeInBytes();
@@ -1218,7 +1262,7 @@ bool CombinerHelper::optimizeMemmove(MachineInstr &MI, Register Dst,
if (CurrOffset != 0) {
auto Offset =
MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset);
- LoadPtr = MIB.buildGEP(PtrTy, Src, Offset).getReg(0);
+ LoadPtr = MIB.buildPtrAdd(PtrTy, Src, Offset).getReg(0);
}
LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0));
CurrOffset += CopyTy.getSizeInBytes();
@@ -1235,7 +1279,7 @@ bool CombinerHelper::optimizeMemmove(MachineInstr &MI, Register Dst,
if (CurrOffset != 0) {
auto Offset =
MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset);
- StorePtr = MIB.buildGEP(PtrTy, Dst, Offset).getReg(0);
+ StorePtr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
}
MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO);
CurrOffset += CopyTy.getSizeInBytes();
@@ -1295,6 +1339,52 @@ bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) {
return false;
}
+bool CombinerHelper::matchPtrAddImmedChain(MachineInstr &MI,
+ PtrAddChain &MatchInfo) {
+ // We're trying to match the following pattern:
+ // %t1 = G_PTR_ADD %base, G_CONSTANT imm1
+ // %root = G_PTR_ADD %t1, G_CONSTANT imm2
+ // -->
+ // %root = G_PTR_ADD %base, G_CONSTANT (imm1 + imm2)
+
+ if (MI.getOpcode() != TargetOpcode::G_PTR_ADD)
+ return false;
+
+ Register Add2 = MI.getOperand(1).getReg();
+ Register Imm1 = MI.getOperand(2).getReg();
+ auto MaybeImmVal = getConstantVRegValWithLookThrough(Imm1, MRI);
+ if (!MaybeImmVal)
+ return false;
+
+ MachineInstr *Add2Def = MRI.getUniqueVRegDef(Add2);
+ if (!Add2Def || Add2Def->getOpcode() != TargetOpcode::G_PTR_ADD)
+ return false;
+
+ Register Base = Add2Def->getOperand(1).getReg();
+ Register Imm2 = Add2Def->getOperand(2).getReg();
+ auto MaybeImm2Val = getConstantVRegValWithLookThrough(Imm2, MRI);
+ if (!MaybeImm2Val)
+ return false;
+
+ // Pass the combined immediate to the apply function.
+ MatchInfo.Imm = MaybeImmVal->Value + MaybeImm2Val->Value;
+ MatchInfo.Base = Base;
+ return true;
+}
+
+bool CombinerHelper::applyPtrAddImmedChain(MachineInstr &MI,
+ PtrAddChain &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
+ MachineIRBuilder MIB(MI);
+ LLT OffsetTy = MRI.getType(MI.getOperand(2).getReg());
+ auto NewOffset = MIB.buildConstant(OffsetTy, MatchInfo.Imm);
+ Observer.changingInstr(MI);
+ MI.getOperand(1).setReg(MatchInfo.Base);
+ MI.getOperand(2).setReg(NewOffset.getReg(0));
+ Observer.changedInstr(MI);
+ return true;
+}
+
bool CombinerHelper::tryCombine(MachineInstr &MI) {
if (tryCombineCopy(MI))
return true;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
index be8efa8795f3..64023ecfad82 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
@@ -179,8 +179,8 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
Known.Zero = KnownZeroOut;
break;
}
- case TargetOpcode::G_GEP: {
- // G_GEP is like G_ADD. FIXME: Is this true for all targets?
+ case TargetOpcode::G_PTR_ADD: {
+ // G_PTR_ADD is like G_ADD. FIXME: Is this true for all targets?
LLT Ty = MRI.getType(MI.getOperand(1).getReg());
if (DL.isNonIntegralAddressSpace(Ty.getAddressSpace()))
break;
@@ -373,6 +373,76 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
<< Known.One.toString(16, false) << "\n");
}
+unsigned GISelKnownBits::computeNumSignBits(Register R,
+ const APInt &DemandedElts,
+ unsigned Depth) {
+ MachineInstr &MI = *MRI.getVRegDef(R);
+ unsigned Opcode = MI.getOpcode();
+
+ if (Opcode == TargetOpcode::G_CONSTANT)
+ return MI.getOperand(1).getCImm()->getValue().getNumSignBits();
+
+ if (Depth == getMaxDepth())
+ return 1;
+
+ if (!DemandedElts)
+ return 1; // No demanded elts, better to assume we don't know anything.
+
+ LLT DstTy = MRI.getType(R);
+
+ // Handle the case where this is called on a register that does not have a
+ // type constraint. This is unlikely to occur except by looking through copies
+ // but it is possible for the initial register being queried to be in this
+ // state.
+ if (!DstTy.isValid())
+ return 1;
+
+ switch (Opcode) {
+ case TargetOpcode::COPY: {
+ MachineOperand &Src = MI.getOperand(1);
+ if (Src.getReg().isVirtual() && Src.getSubReg() == 0 &&
+ MRI.getType(Src.getReg()).isValid()) {
+ // Don't increment Depth for this one since we didn't do any work.
+ return computeNumSignBits(Src.getReg(), DemandedElts, Depth);
+ }
+
+ return 1;
+ }
+ case TargetOpcode::G_SEXT: {
+ Register Src = MI.getOperand(1).getReg();
+ LLT SrcTy = MRI.getType(Src);
+ unsigned Tmp = DstTy.getScalarSizeInBits() - SrcTy.getScalarSizeInBits();
+ return computeNumSignBits(Src, DemandedElts, Depth + 1) + Tmp;
+ }
+ case TargetOpcode::G_TRUNC: {
+ Register Src = MI.getOperand(1).getReg();
+ LLT SrcTy = MRI.getType(Src);
+
+ // Check if the sign bits of source go down as far as the truncated value.
+ unsigned DstTyBits = DstTy.getScalarSizeInBits();
+ unsigned NumSrcBits = SrcTy.getScalarSizeInBits();
+ unsigned NumSrcSignBits = computeNumSignBits(Src, DemandedElts, Depth + 1);
+ if (NumSrcSignBits > (NumSrcBits - DstTyBits))
+ return NumSrcSignBits - (NumSrcBits - DstTyBits);
+ break;
+ }
+ default:
+ break;
+ }
+
+ // TODO: Handle target instructions
+ // TODO: Fall back to known bits
+ return 1;
+}
+
+unsigned GISelKnownBits::computeNumSignBits(Register R, unsigned Depth) {
+ LLT Ty = MRI.getType(R);
+ APInt DemandedElts = Ty.isVector()
+ ? APInt::getAllOnesValue(Ty.getNumElements())
+ : APInt(1, 1);
+ return computeNumSignBits(R, DemandedElts, Depth);
+}
+
void GISelKnownBitsAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
MachineFunctionPass::getAnalysisUsage(AU);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 45cef4aca888..17eca2b0301c 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -56,6 +56,7 @@
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/MC/MCContext.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
@@ -224,12 +225,12 @@ int IRTranslator::getOrCreateFrameIndex(const AllocaInst &AI) {
if (FrameIndices.find(&AI) != FrameIndices.end())
return FrameIndices[&AI];
- unsigned ElementSize = DL->getTypeAllocSize(AI.getAllocatedType());
- unsigned Size =
+ uint64_t ElementSize = DL->getTypeAllocSize(AI.getAllocatedType());
+ uint64_t Size =
ElementSize * cast<ConstantInt>(AI.getArraySize())->getZExtValue();
// Always allocate at least one byte.
- Size = std::max(Size, 1u);
+ Size = std::max<uint64_t>(Size, 1u);
unsigned Alignment = AI.getAlignment();
if (!Alignment)
@@ -466,7 +467,7 @@ bool IRTranslator::translateSwitch(const User &U, MachineIRBuilder &MIB) {
return true;
}
- SL->findJumpTables(Clusters, &SI, DefaultMBB);
+ SL->findJumpTables(Clusters, &SI, DefaultMBB, nullptr, nullptr);
LLVM_DEBUG({
dbgs() << "Case clusters: ";
@@ -885,13 +886,15 @@ bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) {
Regs.size() == 1 ? LI.getMetadata(LLVMContext::MD_range) : nullptr;
for (unsigned i = 0; i < Regs.size(); ++i) {
Register Addr;
- MIRBuilder.materializeGEP(Addr, Base, OffsetTy, Offsets[i] / 8);
+ MIRBuilder.materializePtrAdd(Addr, Base, OffsetTy, Offsets[i] / 8);
MachinePointerInfo Ptr(LI.getPointerOperand(), Offsets[i] / 8);
unsigned BaseAlign = getMemOpAlignment(LI);
+ AAMDNodes AAMetadata;
+ LI.getAAMetadata(AAMetadata);
auto MMO = MF->getMachineMemOperand(
Ptr, Flags, (MRI->getType(Regs[i]).getSizeInBits() + 7) / 8,
- MinAlign(BaseAlign, Offsets[i] / 8), AAMDNodes(), Ranges,
+ MinAlign(BaseAlign, Offsets[i] / 8), AAMetadata, Ranges,
LI.getSyncScopeID(), LI.getOrdering());
MIRBuilder.buildLoad(Regs[i], Addr, *MMO);
}
@@ -926,13 +929,15 @@ bool IRTranslator::translateStore(const User &U, MachineIRBuilder &MIRBuilder) {
for (unsigned i = 0; i < Vals.size(); ++i) {
Register Addr;
- MIRBuilder.materializeGEP(Addr, Base, OffsetTy, Offsets[i] / 8);
+ MIRBuilder.materializePtrAdd(Addr, Base, OffsetTy, Offsets[i] / 8);
MachinePointerInfo Ptr(SI.getPointerOperand(), Offsets[i] / 8);
unsigned BaseAlign = getMemOpAlignment(SI);
+ AAMDNodes AAMetadata;
+ SI.getAAMetadata(AAMetadata);
auto MMO = MF->getMachineMemOperand(
Ptr, Flags, (MRI->getType(Vals[i]).getSizeInBits() + 7) / 8,
- MinAlign(BaseAlign, Offsets[i] / 8), AAMDNodes(), nullptr,
+ MinAlign(BaseAlign, Offsets[i] / 8), AAMetadata, nullptr,
SI.getSyncScopeID(), SI.getOrdering());
MIRBuilder.buildStore(Vals[i], Addr, *MMO);
}
@@ -1080,8 +1085,8 @@ bool IRTranslator::translateGetElementPtr(const User &U,
if (Offset != 0) {
LLT OffsetTy = getLLTForType(*OffsetIRTy, *DL);
auto OffsetMIB = MIRBuilder.buildConstant({OffsetTy}, Offset);
- BaseReg =
- MIRBuilder.buildGEP(PtrTy, BaseReg, OffsetMIB.getReg(0)).getReg(0);
+ BaseReg = MIRBuilder.buildPtrAdd(PtrTy, BaseReg, OffsetMIB.getReg(0))
+ .getReg(0);
Offset = 0;
}
@@ -1100,14 +1105,14 @@ bool IRTranslator::translateGetElementPtr(const User &U,
} else
GepOffsetReg = IdxReg;
- BaseReg = MIRBuilder.buildGEP(PtrTy, BaseReg, GepOffsetReg).getReg(0);
+ BaseReg = MIRBuilder.buildPtrAdd(PtrTy, BaseReg, GepOffsetReg).getReg(0);
}
}
if (Offset != 0) {
auto OffsetMIB =
MIRBuilder.buildConstant(getLLTForType(*OffsetIRTy, *DL), Offset);
- MIRBuilder.buildGEP(getOrCreateVReg(U), BaseReg, OffsetMIB.getReg(0));
+ MIRBuilder.buildPtrAdd(getOrCreateVReg(U), BaseReg, OffsetMIB.getReg(0));
return true;
}
@@ -1251,6 +1256,8 @@ unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID) {
return TargetOpcode::G_FSQRT;
case Intrinsic::trunc:
return TargetOpcode::G_INTRINSIC_TRUNC;
+ case Intrinsic::readcyclecounter:
+ return TargetOpcode::G_READCYCLECOUNTER;
}
return Intrinsic::not_intrinsic;
}
@@ -1412,7 +1419,8 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
Register Op1 = getOrCreateVReg(*CI.getArgOperand(1));
Register Op2 = getOrCreateVReg(*CI.getArgOperand(2));
if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict &&
- TLI.isFMAFasterThanFMulAndFAdd(TLI.getValueType(*DL, CI.getType()))) {
+ TLI.isFMAFasterThanFMulAndFAdd(*MF,
+ TLI.getValueType(*DL, CI.getType()))) {
// TODO: Revisit this to see if we should move this part of the
// lowering to the combiner.
MIRBuilder.buildInstr(TargetOpcode::G_FMA, {Dst}, {Op0, Op1, Op2},
@@ -1518,6 +1526,13 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
case Intrinsic::sideeffect:
// Discard annotate attributes, assumptions, and artificial side-effects.
return true;
+ case Intrinsic::read_register: {
+ Value *Arg = CI.getArgOperand(0);
+ MIRBuilder.buildInstr(TargetOpcode::G_READ_REGISTER)
+ .addDef(getOrCreateVReg(CI))
+ .addMetadata(cast<MDNode>(cast<MetadataAsValue>(Arg)->getMetadata()));
+ return true;
+ }
}
return false;
}
@@ -1587,7 +1602,13 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
const Function *F = CI.getCalledFunction();
// FIXME: support Windows dllimport function calls.
- if (F && F->hasDLLImportStorageClass())
+ if (F && (F->hasDLLImportStorageClass() ||
+ (MF->getTarget().getTargetTriple().isOSWindows() &&
+ F->hasExternalWeakLinkage())))
+ return false;
+
+ // FIXME: support control flow guard targets.
+ if (CI.countOperandBundlesOfType(LLVMContext::OB_cfguardtarget))
return false;
if (CI.isInlineAsm())
@@ -1683,6 +1704,10 @@ bool IRTranslator::translateInvoke(const User &U,
if (I.countOperandBundlesOfType(LLVMContext::OB_deopt))
return false;
+ // FIXME: support control flow guard targets.
+ if (I.countOperandBundlesOfType(LLVMContext::OB_cfguardtarget))
+ return false;
+
// FIXME: support Windows exception handling.
if (!isa<LandingPadInst>(EHPadBB->front()))
return false;
@@ -1908,11 +1933,14 @@ bool IRTranslator::translateExtractElement(const User &U,
bool IRTranslator::translateShuffleVector(const User &U,
MachineIRBuilder &MIRBuilder) {
+ SmallVector<int, 8> Mask;
+ ShuffleVectorInst::getShuffleMask(cast<Constant>(U.getOperand(2)), Mask);
+ ArrayRef<int> MaskAlloc = MF->allocateShuffleMask(Mask);
MIRBuilder.buildInstr(TargetOpcode::G_SHUFFLE_VECTOR)
.addDef(getOrCreateVReg(U))
.addUse(getOrCreateVReg(*U.getOperand(0)))
.addUse(getOrCreateVReg(*U.getOperand(1)))
- .addShuffleMask(cast<Constant>(U.getOperand(2)));
+ .addShuffleMask(MaskAlloc);
return true;
}
@@ -1950,11 +1978,14 @@ bool IRTranslator::translateAtomicCmpXchg(const User &U,
Register Cmp = getOrCreateVReg(*I.getCompareOperand());
Register NewVal = getOrCreateVReg(*I.getNewValOperand());
+ AAMDNodes AAMetadata;
+ I.getAAMetadata(AAMetadata);
+
MIRBuilder.buildAtomicCmpXchgWithSuccess(
OldValRes, SuccessRes, Addr, Cmp, NewVal,
*MF->getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()),
Flags, DL->getTypeStoreSize(ValType),
- getMemOpAlignment(I), AAMDNodes(), nullptr,
+ getMemOpAlignment(I), AAMetadata, nullptr,
I.getSyncScopeID(), I.getSuccessOrdering(),
I.getFailureOrdering()));
return true;
@@ -2019,12 +2050,15 @@ bool IRTranslator::translateAtomicRMW(const User &U,
break;
}
+ AAMDNodes AAMetadata;
+ I.getAAMetadata(AAMetadata);
+
MIRBuilder.buildAtomicRMW(
Opcode, Res, Addr, Val,
*MF->getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()),
Flags, DL->getTypeStoreSize(ResType),
- getMemOpAlignment(I), AAMDNodes(), nullptr,
- I.getSyncScopeID(), I.getOrdering()));
+ getMemOpAlignment(I), AAMetadata,
+ nullptr, I.getSyncScopeID(), I.getOrdering()));
return true;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp
index 28143b30d4e8..b9c90e69ddb2 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp
@@ -60,7 +60,7 @@ bool InstructionSelector::isBaseWithConstantOffset(
return false;
MachineInstr *RootI = MRI.getVRegDef(Root.getReg());
- if (RootI->getOpcode() != TargetOpcode::G_GEP)
+ if (RootI->getOpcode() != TargetOpcode::G_PTR_ADD)
return false;
MachineOperand &RHS = RootI->getOperand(2);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
index 1593e21fe07e..e789e4a333dc 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
@@ -26,6 +26,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/Debug.h"
#include "llvm/Target/TargetMachine.h"
@@ -139,22 +140,13 @@ public:
};
} // namespace
-bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
- // If the ISel pipeline failed, do not bother running that pass.
- if (MF.getProperties().hasProperty(
- MachineFunctionProperties::Property::FailedISel))
- return false;
- LLVM_DEBUG(dbgs() << "Legalize Machine IR for: " << MF.getName() << '\n');
- init(MF);
- const TargetPassConfig &TPC = getAnalysis<TargetPassConfig>();
- GISelCSEAnalysisWrapper &Wrapper =
- getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper();
- MachineOptimizationRemarkEmitter MORE(MF, /*MBFI=*/nullptr);
-
- const size_t NumBlocks = MF.size();
+Legalizer::MFResult
+Legalizer::legalizeMachineFunction(MachineFunction &MF, const LegalizerInfo &LI,
+ ArrayRef<GISelChangeObserver *> AuxObservers,
+ MachineIRBuilder &MIRBuilder) {
MachineRegisterInfo &MRI = MF.getRegInfo();
- // Populate Insts
+ // Populate worklists.
InstListTy InstList;
ArtifactListTy ArtifactList;
ReversePostOrderTraversal<MachineFunction *> RPOT(&MF);
@@ -177,48 +169,33 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
}
ArtifactList.finalize();
InstList.finalize();
- std::unique_ptr<MachineIRBuilder> MIRBuilder;
- GISelCSEInfo *CSEInfo = nullptr;
- bool EnableCSE = EnableCSEInLegalizer.getNumOccurrences()
- ? EnableCSEInLegalizer
- : TPC.isGISelCSEEnabled();
- if (EnableCSE) {
- MIRBuilder = std::make_unique<CSEMIRBuilder>();
- CSEInfo = &Wrapper.get(TPC.getCSEConfig());
- MIRBuilder->setCSEInfo(CSEInfo);
- } else
- MIRBuilder = std::make_unique<MachineIRBuilder>();
- // This observer keeps the worklist updated.
+ // This observer keeps the worklists updated.
LegalizerWorkListManager WorkListObserver(InstList, ArtifactList);
- // We want both WorkListObserver as well as CSEInfo to observe all changes.
- // Use the wrapper observer.
+ // We want both WorkListObserver as well as all the auxiliary observers (e.g.
+ // CSEInfo) to observe all changes. Use the wrapper observer.
GISelObserverWrapper WrapperObserver(&WorkListObserver);
- if (EnableCSE && CSEInfo)
- WrapperObserver.addObserver(CSEInfo);
+ for (GISelChangeObserver *Observer : AuxObservers)
+ WrapperObserver.addObserver(Observer);
+
// Now install the observer as the delegate to MF.
// This will keep all the observers notified about new insertions/deletions.
RAIIDelegateInstaller DelInstall(MF, &WrapperObserver);
- LegalizerHelper Helper(MF, WrapperObserver, *MIRBuilder.get());
- const LegalizerInfo &LInfo(Helper.getLegalizerInfo());
- LegalizationArtifactCombiner ArtCombiner(*MIRBuilder.get(), MF.getRegInfo(),
- LInfo);
+ LegalizerHelper Helper(MF, LI, WrapperObserver, MIRBuilder);
+ LegalizationArtifactCombiner ArtCombiner(MIRBuilder, MRI, LI);
auto RemoveDeadInstFromLists = [&WrapperObserver](MachineInstr *DeadMI) {
WrapperObserver.erasingInstr(*DeadMI);
};
- auto stopLegalizing = [&](MachineInstr &MI) {
- Helper.MIRBuilder.stopObservingChanges();
- reportGISelFailure(MF, TPC, MORE, "gisel-legalize",
- "unable to legalize instruction", MI);
- };
bool Changed = false;
SmallVector<MachineInstr *, 128> RetryList;
do {
+ LLVM_DEBUG(dbgs() << "=== New Iteration ===\n");
assert(RetryList.empty() && "Expected no instructions in RetryList");
unsigned NumArtifacts = ArtifactList.size();
while (!InstList.empty()) {
MachineInstr &MI = *InstList.pop_back_val();
- assert(isPreISelGenericOpcode(MI.getOpcode()) && "Expecting generic opcode");
+ assert(isPreISelGenericOpcode(MI.getOpcode()) &&
+ "Expecting generic opcode");
if (isTriviallyDead(MI, MRI)) {
LLVM_DEBUG(dbgs() << MI << "Is dead; erasing.\n");
MI.eraseFromParentAndMarkDBGValuesForRemoval();
@@ -234,11 +211,17 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
// legalizing InstList may generate artifacts that allow
// ArtifactCombiner to combine away them.
if (isArtifact(MI)) {
+ LLVM_DEBUG(dbgs() << ".. Not legalized, moving to artifacts retry\n");
+ assert(NumArtifacts == 0 &&
+ "Artifacts are only expected in instruction list starting the "
+ "second iteration, but each iteration starting second must "
+ "start with an empty artifacts list");
+ (void)NumArtifacts;
RetryList.push_back(&MI);
continue;
}
- stopLegalizing(MI);
- return false;
+ Helper.MIRBuilder.stopObservingChanges();
+ return {Changed, &MI};
}
WorkListObserver.printNewInstrs();
Changed |= Res == LegalizerHelper::Legalized;
@@ -246,18 +229,19 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
// Try to combine the instructions in RetryList again if there
// are new artifacts. If not, stop legalizing.
if (!RetryList.empty()) {
- if (ArtifactList.size() > NumArtifacts) {
+ if (!ArtifactList.empty()) {
while (!RetryList.empty())
ArtifactList.insert(RetryList.pop_back_val());
} else {
- MachineInstr *MI = *RetryList.begin();
- stopLegalizing(*MI);
- return false;
+ LLVM_DEBUG(dbgs() << "No new artifacts created, not retrying!\n");
+ Helper.MIRBuilder.stopObservingChanges();
+ return {Changed, RetryList.front()};
}
}
while (!ArtifactList.empty()) {
MachineInstr &MI = *ArtifactList.pop_back_val();
- assert(isPreISelGenericOpcode(MI.getOpcode()) && "Expecting generic opcode");
+ assert(isPreISelGenericOpcode(MI.getOpcode()) &&
+ "Expecting generic opcode");
if (isTriviallyDead(MI, MRI)) {
LLVM_DEBUG(dbgs() << MI << "Is dead\n");
RemoveDeadInstFromLists(&MI);
@@ -265,6 +249,7 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
continue;
}
SmallVector<MachineInstr *, 4> DeadInstructions;
+ LLVM_DEBUG(dbgs() << "Trying to combine: " << MI);
if (ArtCombiner.tryCombineInstruction(MI, DeadInstructions,
WrapperObserver)) {
WorkListObserver.printNewInstrs();
@@ -279,13 +264,58 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
// If this was not an artifact (that could be combined away), this might
// need special handling. Add it to InstList, so when it's processed
// there, it has to be legal or specially handled.
- else
+ else {
+ LLVM_DEBUG(dbgs() << ".. Not combined, moving to instructions list\n");
InstList.insert(&MI);
+ }
}
} while (!InstList.empty());
+ return {Changed, /*FailedOn*/ nullptr};
+}
+
+bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
+ // If the ISel pipeline failed, do not bother running that pass.
+ if (MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::FailedISel))
+ return false;
+ LLVM_DEBUG(dbgs() << "Legalize Machine IR for: " << MF.getName() << '\n');
+ init(MF);
+ const TargetPassConfig &TPC = getAnalysis<TargetPassConfig>();
+ GISelCSEAnalysisWrapper &Wrapper =
+ getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper();
+ MachineOptimizationRemarkEmitter MORE(MF, /*MBFI=*/nullptr);
+
+ const size_t NumBlocks = MF.size();
+
+ std::unique_ptr<MachineIRBuilder> MIRBuilder;
+ GISelCSEInfo *CSEInfo = nullptr;
+ bool EnableCSE = EnableCSEInLegalizer.getNumOccurrences()
+ ? EnableCSEInLegalizer
+ : TPC.isGISelCSEEnabled();
+ if (EnableCSE) {
+ MIRBuilder = std::make_unique<CSEMIRBuilder>();
+ CSEInfo = &Wrapper.get(TPC.getCSEConfig());
+ MIRBuilder->setCSEInfo(CSEInfo);
+ } else
+ MIRBuilder = std::make_unique<MachineIRBuilder>();
+
+ SmallVector<GISelChangeObserver *, 1> AuxObservers;
+ if (EnableCSE && CSEInfo) {
+ // We want CSEInfo in addition to WorkListObserver to observe all changes.
+ AuxObservers.push_back(CSEInfo);
+ }
+
+ const LegalizerInfo &LI = *MF.getSubtarget().getLegalizerInfo();
+ MFResult Result = legalizeMachineFunction(MF, LI, AuxObservers, *MIRBuilder);
+
+ if (Result.FailedOn) {
+ reportGISelFailure(MF, TPC, MORE, "gisel-legalize",
+ "unable to legalize instruction", *Result.FailedOn);
+ return false;
+ }
// For now don't support if new blocks are inserted - we would need to fix the
- // outerloop for that.
+ // outer loop for that.
if (MF.size() != NumBlocks) {
MachineOptimizationRemarkMissed R("gisel-legalize", "GISelFailure",
MF.getFunction().getSubprogram(),
@@ -294,6 +324,5 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
reportGISelFailure(MF, TPC, MORE, R);
return false;
}
-
- return Changed;
+ return Result.Changed;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 21512e543878..667e1a04dc34 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -1075,6 +1075,28 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
MI.eraseFromParent();
return Legalized;
}
+ case TargetOpcode::G_BSWAP:
+ case TargetOpcode::G_BITREVERSE: {
+ if (SizeOp0 % NarrowSize != 0)
+ return UnableToLegalize;
+
+ Observer.changingInstr(MI);
+ SmallVector<Register, 2> SrcRegs, DstRegs;
+ unsigned NumParts = SizeOp0 / NarrowSize;
+ extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
+
+ for (unsigned i = 0; i < NumParts; ++i) {
+ auto DstPart = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
+ {SrcRegs[NumParts - 1 - i]});
+ DstRegs.push_back(DstPart.getReg(0));
+ }
+
+ MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs);
+
+ Observer.changedInstr(MI);
+ MI.eraseFromParent();
+ return Legalized;
+ }
}
}
@@ -1675,7 +1697,15 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
case TargetOpcode::G_CONSTANT: {
MachineOperand &SrcMO = MI.getOperand(1);
LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
- const APInt &Val = SrcMO.getCImm()->getValue().sext(WideTy.getSizeInBits());
+ unsigned ExtOpc = LI.getExtOpcodeForWideningConstant(
+ MRI.getType(MI.getOperand(0).getReg()));
+ assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT ||
+ ExtOpc == TargetOpcode::G_ANYEXT) &&
+ "Illegal Extend");
+ const APInt &SrcVal = SrcMO.getCImm()->getValue();
+ const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT)
+ ? SrcVal.sext(WideTy.getSizeInBits())
+ : SrcVal.zext(WideTy.getSizeInBits());
Observer.changingInstr(MI);
SrcMO.setCImm(ConstantInt::get(Ctx, Val));
@@ -1748,8 +1778,8 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
Observer.changedInstr(MI);
return Legalized;
- case TargetOpcode::G_GEP:
- assert(TypeIdx == 1 && "unable to legalize pointer of GEP");
+ case TargetOpcode::G_PTR_ADD:
+ assert(TypeIdx == 1 && "unable to legalize pointer of G_PTR_ADD");
Observer.changingInstr(MI);
widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
Observer.changedInstr(MI);
@@ -1789,10 +1819,35 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
if (TypeIdx != 2)
return UnableToLegalize;
Observer.changingInstr(MI);
+ // TODO: Probably should be zext
widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
Observer.changedInstr(MI);
return Legalized;
}
+ case TargetOpcode::G_INSERT_VECTOR_ELT: {
+ if (TypeIdx == 1) {
+ Observer.changingInstr(MI);
+
+ Register VecReg = MI.getOperand(1).getReg();
+ LLT VecTy = MRI.getType(VecReg);
+ LLT WideVecTy = LLT::vector(VecTy.getNumElements(), WideTy);
+
+ widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_ANYEXT);
+ widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
+ widenScalarDst(MI, WideVecTy, 0);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+
+ if (TypeIdx == 2) {
+ Observer.changingInstr(MI);
+ // TODO: Probably should be zext
+ widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
+ Observer.changedInstr(MI);
+ }
+
+ return Legalized;
+ }
case TargetOpcode::G_FADD:
case TargetOpcode::G_FMUL:
case TargetOpcode::G_FSUB:
@@ -1998,6 +2053,8 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
}
case TargetOpcode::G_FMAD:
return lowerFMad(MI);
+ case TargetOpcode::G_INTRINSIC_ROUND:
+ return lowerIntrinsicRound(MI);
case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
Register OldValRes = MI.getOperand(0).getReg();
Register SuccessRes = MI.getOperand(1).getReg();
@@ -2058,8 +2115,9 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
auto OffsetCst =
MIRBuilder.buildConstant(LLT::scalar(64), LargeSplitSize / 8);
- Register GEPReg = MRI.createGenericVirtualRegister(PtrTy);
- auto SmallPtr = MIRBuilder.buildGEP(GEPReg, PtrReg, OffsetCst.getReg(0));
+ Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
+ auto SmallPtr =
+ MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst.getReg(0));
auto SmallLoad = MIRBuilder.buildLoad(SmallLdReg, SmallPtr.getReg(0),
*SmallMMO);
@@ -2083,7 +2141,7 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
default:
llvm_unreachable("Unexpected opcode");
case TargetOpcode::G_LOAD:
- MIRBuilder.buildAnyExt(DstReg, TmpReg);
+ MIRBuilder.buildExtOrTrunc(TargetOpcode::G_ANYEXT, DstReg, TmpReg);
break;
case TargetOpcode::G_SEXTLOAD:
MIRBuilder.buildSExt(DstReg, TmpReg);
@@ -2126,12 +2184,13 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
auto ShiftAmt = MIRBuilder.buildConstant(ExtendTy, LargeSplitSize);
auto SmallVal = MIRBuilder.buildLShr(ExtendTy, ExtVal, ShiftAmt);
- // Generate the GEP and truncating stores.
+ // Generate the PtrAdd and truncating stores.
LLT PtrTy = MRI.getType(PtrReg);
auto OffsetCst =
MIRBuilder.buildConstant(LLT::scalar(64), LargeSplitSize / 8);
- Register GEPReg = MRI.createGenericVirtualRegister(PtrTy);
- auto SmallPtr = MIRBuilder.buildGEP(GEPReg, PtrReg, OffsetCst.getReg(0));
+ Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
+ auto SmallPtr =
+ MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst.getReg(0));
MachineFunction &MF = MIRBuilder.getMF();
MachineMemOperand *LargeMMO =
@@ -2254,6 +2313,12 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
return lowerExtract(MI);
case G_INSERT:
return lowerInsert(MI);
+ case G_BSWAP:
+ return lowerBswap(MI);
+ case G_BITREVERSE:
+ return lowerBitreverse(MI);
+ case G_READ_REGISTER:
+ return lowerReadRegister(MI);
}
}
@@ -2883,7 +2948,7 @@ LegalizerHelper::reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx,
unsigned ByteOffset = Offset / 8;
Register NewAddrReg;
- MIRBuilder.materializeGEP(NewAddrReg, AddrReg, OffsetTy, ByteOffset);
+ MIRBuilder.materializePtrAdd(NewAddrReg, AddrReg, OffsetTy, ByteOffset);
MachineMemOperand *NewMMO =
MF.getMachineMemOperand(MMO, ByteOffset, ByteSize);
@@ -2960,6 +3025,9 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
case G_BSWAP:
case G_BITREVERSE:
case G_SDIV:
+ case G_UDIV:
+ case G_SREM:
+ case G_UREM:
case G_SMIN:
case G_SMAX:
case G_UMIN:
@@ -3259,7 +3327,13 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
case TargetOpcode::G_SMIN:
case TargetOpcode::G_SMAX:
case TargetOpcode::G_UMIN:
- case TargetOpcode::G_UMAX: {
+ case TargetOpcode::G_UMAX:
+ case TargetOpcode::G_FMINNUM:
+ case TargetOpcode::G_FMAXNUM:
+ case TargetOpcode::G_FMINNUM_IEEE:
+ case TargetOpcode::G_FMAXNUM_IEEE:
+ case TargetOpcode::G_FMINIMUM:
+ case TargetOpcode::G_FMAXIMUM: {
Observer.changingInstr(MI);
moreElementsVectorSrc(MI, MoreTy, 1);
moreElementsVectorSrc(MI, MoreTy, 2);
@@ -3352,7 +3426,7 @@ void LegalizerHelper::multiplyRegisters(SmallVectorImpl<Register> &DstRegs,
B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]);
Factors.push_back(Umulh.getReg(0));
}
- // Add CarrySum from additons calculated for previous DstIdx.
+ // Add CarrySum from additions calculated for previous DstIdx.
if (DstIdx != 1) {
Factors.push_back(CarrySumPrevDstIdx);
}
@@ -3824,6 +3898,14 @@ LegalizerHelper::lowerUITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
LLT DstTy = MRI.getType(Dst);
LLT SrcTy = MRI.getType(Src);
+ if (SrcTy == LLT::scalar(1)) {
+ auto True = MIRBuilder.buildFConstant(DstTy, 1.0);
+ auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
+ MIRBuilder.buildSelect(Dst, Src, True, False);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
if (SrcTy != LLT::scalar(64))
return UnableToLegalize;
@@ -3849,6 +3931,14 @@ LegalizerHelper::lowerSITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
const LLT S32 = LLT::scalar(32);
const LLT S1 = LLT::scalar(1);
+ if (SrcTy == S1) {
+ auto True = MIRBuilder.buildFConstant(DstTy, -1.0);
+ auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
+ MIRBuilder.buildSelect(Dst, Src, True, False);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
if (SrcTy != S64)
return UnableToLegalize;
@@ -3910,8 +4000,10 @@ LegalizerHelper::lowerFPTOUI(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
MachineInstrBuilder ResHighBit = MIRBuilder.buildConstant(DstTy, TwoPExpInt);
MachineInstrBuilder Res = MIRBuilder.buildXor(DstTy, ResLowBits, ResHighBit);
+ const LLT S1 = LLT::scalar(1);
+
MachineInstrBuilder FCMP =
- MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, DstTy, Src, Threshold);
+ MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, S1, Src, Threshold);
MIRBuilder.buildSelect(Dst, FCMP, FPTOSI, Res);
MI.eraseFromParent();
@@ -4042,6 +4134,33 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerFMad(MachineInstr &MI) {
}
LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerIntrinsicRound(MachineInstr &MI) {
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
+ unsigned Flags = MI.getFlags();
+ LLT Ty = MRI.getType(DstReg);
+ const LLT CondTy = Ty.changeElementSize(1);
+
+ // result = trunc(src);
+ // if (src < 0.0 && src != result)
+ // result += -1.0.
+
+ auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
+ auto Trunc = MIRBuilder.buildIntrinsicTrunc(Ty, SrcReg, Flags);
+
+ auto Lt0 = MIRBuilder.buildFCmp(CmpInst::FCMP_OLT, CondTy,
+ SrcReg, Zero, Flags);
+ auto NeTrunc = MIRBuilder.buildFCmp(CmpInst::FCMP_ONE, CondTy,
+ SrcReg, Trunc, Flags);
+ auto And = MIRBuilder.buildAnd(CondTy, Lt0, NeTrunc);
+ auto AddVal = MIRBuilder.buildSITOFP(Ty, And);
+
+ MIRBuilder.buildFAdd(DstReg, Trunc, AddVal);
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
LegalizerHelper::lowerUnmergeValues(MachineInstr &MI) {
const unsigned NumDst = MI.getNumOperands() - 1;
const Register SrcReg = MI.getOperand(NumDst).getReg();
@@ -4083,10 +4202,7 @@ LegalizerHelper::lowerShuffleVector(MachineInstr &MI) {
LLT DstTy = MRI.getType(DstReg);
LLT IdxTy = LLT::scalar(32);
- const Constant *ShufMask = MI.getOperand(3).getShuffleMask();
-
- SmallVector<int, 32> Mask;
- ShuffleVectorInst::getShuffleMask(ShufMask, Mask);
+ ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
if (DstTy.isScalar()) {
if (Src0Ty.isVector())
@@ -4151,7 +4267,7 @@ LegalizerHelper::lowerDynStackAlloc(MachineInstr &MI) {
// Subtract the final alloc from the SP. We use G_PTRTOINT here so we don't
// have to generate an extra instruction to negate the alloc and then use
- // G_GEP to add the negative offset.
+ // G_PTR_ADD to add the negative offset.
auto Alloc = MIRBuilder.buildSub(IntPtrTy, SPTmp, AllocSize);
if (Align) {
APInt AlignMask(IntPtrTy.getSizeInBits(), Align, true);
@@ -4275,3 +4391,99 @@ LegalizerHelper::lowerSADDO_SSUBO(MachineInstr &MI) {
MI.eraseFromParent();
return Legalized;
}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerBswap(MachineInstr &MI) {
+ Register Dst = MI.getOperand(0).getReg();
+ Register Src = MI.getOperand(1).getReg();
+ const LLT Ty = MRI.getType(Src);
+ unsigned SizeInBytes = Ty.getSizeInBytes();
+ unsigned BaseShiftAmt = (SizeInBytes - 1) * 8;
+
+ // Swap most and least significant byte, set remaining bytes in Res to zero.
+ auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt);
+ auto LSByteShiftedLeft = MIRBuilder.buildShl(Ty, Src, ShiftAmt);
+ auto MSByteShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
+ auto Res = MIRBuilder.buildOr(Ty, MSByteShiftedRight, LSByteShiftedLeft);
+
+ // Set i-th high/low byte in Res to i-th low/high byte from Src.
+ for (unsigned i = 1; i < SizeInBytes / 2; ++i) {
+ // AND with Mask leaves byte i unchanged and sets remaining bytes to 0.
+ APInt APMask(SizeInBytes * 8, 0xFF << (i * 8));
+ auto Mask = MIRBuilder.buildConstant(Ty, APMask);
+ auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt - 16 * i);
+ // Low byte shifted left to place of high byte: (Src & Mask) << ShiftAmt.
+ auto LoByte = MIRBuilder.buildAnd(Ty, Src, Mask);
+ auto LoShiftedLeft = MIRBuilder.buildShl(Ty, LoByte, ShiftAmt);
+ Res = MIRBuilder.buildOr(Ty, Res, LoShiftedLeft);
+ // High byte shifted right to place of low byte: (Src >> ShiftAmt) & Mask.
+ auto SrcShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
+ auto HiShiftedRight = MIRBuilder.buildAnd(Ty, SrcShiftedRight, Mask);
+ Res = MIRBuilder.buildOr(Ty, Res, HiShiftedRight);
+ }
+ Res.getInstr()->getOperand(0).setReg(Dst);
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+//{ (Src & Mask) >> N } | { (Src << N) & Mask }
+static MachineInstrBuilder SwapN(unsigned N, DstOp Dst, MachineIRBuilder &B,
+ MachineInstrBuilder Src, APInt Mask) {
+ const LLT Ty = Dst.getLLTTy(*B.getMRI());
+ MachineInstrBuilder C_N = B.buildConstant(Ty, N);
+ MachineInstrBuilder MaskLoNTo0 = B.buildConstant(Ty, Mask);
+ auto LHS = B.buildLShr(Ty, B.buildAnd(Ty, Src, MaskLoNTo0), C_N);
+ auto RHS = B.buildAnd(Ty, B.buildShl(Ty, Src, C_N), MaskLoNTo0);
+ return B.buildOr(Dst, LHS, RHS);
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerBitreverse(MachineInstr &MI) {
+ Register Dst = MI.getOperand(0).getReg();
+ Register Src = MI.getOperand(1).getReg();
+ const LLT Ty = MRI.getType(Src);
+ unsigned Size = Ty.getSizeInBits();
+
+ MachineInstrBuilder BSWAP =
+ MIRBuilder.buildInstr(TargetOpcode::G_BSWAP, {Ty}, {Src});
+
+ // swap high and low 4 bits in 8 bit blocks 7654|3210 -> 3210|7654
+ // [(val & 0xF0F0F0F0) >> 4] | [(val & 0x0F0F0F0F) << 4]
+ // -> [(val & 0xF0F0F0F0) >> 4] | [(val << 4) & 0xF0F0F0F0]
+ MachineInstrBuilder Swap4 =
+ SwapN(4, Ty, MIRBuilder, BSWAP, APInt::getSplat(Size, APInt(8, 0xF0)));
+
+ // swap high and low 2 bits in 4 bit blocks 32|10 76|54 -> 10|32 54|76
+ // [(val & 0xCCCCCCCC) >> 2] & [(val & 0x33333333) << 2]
+ // -> [(val & 0xCCCCCCCC) >> 2] & [(val << 2) & 0xCCCCCCCC]
+ MachineInstrBuilder Swap2 =
+ SwapN(2, Ty, MIRBuilder, Swap4, APInt::getSplat(Size, APInt(8, 0xCC)));
+
+ // swap high and low 1 bit in 2 bit blocks 1|0 3|2 5|4 7|6 -> 0|1 2|3 4|5 6|7
+ // [(val & 0xAAAAAAAA) >> 1] & [(val & 0x55555555) << 1]
+ // -> [(val & 0xAAAAAAAA) >> 1] & [(val << 1) & 0xAAAAAAAA]
+ SwapN(1, Dst, MIRBuilder, Swap2, APInt::getSplat(Size, APInt(8, 0xAA)));
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerReadRegister(MachineInstr &MI) {
+ Register Dst = MI.getOperand(0).getReg();
+ const LLT Ty = MRI.getType(Dst);
+ const MDString *RegStr = cast<MDString>(
+ cast<MDNode>(MI.getOperand(1).getMetadata())->getOperand(0));
+
+ MachineFunction &MF = MIRBuilder.getMF();
+ const TargetSubtargetInfo &STI = MF.getSubtarget();
+ const TargetLowering *TLI = STI.getTargetLowering();
+ Register Reg = TLI->getRegisterByName(RegStr->getString().data(), Ty, MF);
+ if (!Reg.isValid())
+ return UnableToLegalize;
+
+ MIRBuilder.buildCopy(Dst, Reg);
+ MI.eraseFromParent();
+ return Legalized;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
index 70045512fae5..02f6b39e0905 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
@@ -128,24 +128,26 @@ static bool mutationIsSane(const LegalizeRule &Rule,
switch (Rule.getAction()) {
case FewerElements:
- case MoreElements: {
if (!OldTy.isVector())
return false;
-
+ LLVM_FALLTHROUGH;
+ case MoreElements: {
+ // MoreElements can go from scalar to vector.
+ const unsigned OldElts = OldTy.isVector() ? OldTy.getNumElements() : 1;
if (NewTy.isVector()) {
if (Rule.getAction() == FewerElements) {
// Make sure the element count really decreased.
- if (NewTy.getNumElements() >= OldTy.getNumElements())
+ if (NewTy.getNumElements() >= OldElts)
return false;
} else {
// Make sure the element count really increased.
- if (NewTy.getNumElements() <= OldTy.getNumElements())
+ if (NewTy.getNumElements() <= OldElts)
return false;
}
}
// Make sure the element type didn't change.
- return NewTy.getScalarType() == OldTy.getElementType();
+ return NewTy.getScalarType() == OldTy.getScalarType();
}
case NarrowScalar:
case WidenScalar: {
@@ -685,6 +687,10 @@ bool LegalizerInfo::legalizeIntrinsic(MachineInstr &MI,
return true;
}
+unsigned LegalizerInfo::getExtOpcodeForWideningConstant(LLT SmallTy) const {
+ return SmallTy.isByteSized() ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
+}
+
/// \pre Type indices of every opcode form a dense set starting from 0.
void LegalizerInfo::verify(const MCInstrInfo &MII) const {
#ifndef NDEBUG
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Localizer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Localizer.cpp
index f882ecbf5db3..1c4a668e5f31 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Localizer.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Localizer.cpp
@@ -10,9 +10,10 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/GlobalISel/Localizer.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/Debug.h"
#define DEBUG_TYPE "localizer"
@@ -28,7 +29,11 @@ INITIALIZE_PASS_END(Localizer, DEBUG_TYPE,
"Move/duplicate certain instructions close to their use",
false, false)
-Localizer::Localizer() : MachineFunctionPass(ID) { }
+Localizer::Localizer(std::function<bool(const MachineFunction &)> F)
+ : MachineFunctionPass(ID), DoNotRunPass(F) {}
+
+Localizer::Localizer()
+ : Localizer([](const MachineFunction &) { return false; }) {}
void Localizer::init(MachineFunction &MF) {
MRI = &MF.getRegInfo();
@@ -211,6 +216,10 @@ bool Localizer::runOnMachineFunction(MachineFunction &MF) {
MachineFunctionProperties::Property::FailedISel))
return false;
+ // Don't run the pass if the target asked so.
+ if (DoNotRunPass(MF))
+ return false;
+
LLVM_DEBUG(dbgs() << "Localize instructions for: " << MF.getName() << '\n');
init(MF);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
index df770f6664ca..67d9dacda61b 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
@@ -219,19 +219,19 @@ void MachineIRBuilder::validateShiftOp(const LLT &Res, const LLT &Op0,
assert((Res == Op0) && "type mismatch");
}
-MachineInstrBuilder MachineIRBuilder::buildGEP(const DstOp &Res,
- const SrcOp &Op0,
- const SrcOp &Op1) {
+MachineInstrBuilder MachineIRBuilder::buildPtrAdd(const DstOp &Res,
+ const SrcOp &Op0,
+ const SrcOp &Op1) {
assert(Res.getLLTTy(*getMRI()).isPointer() &&
Res.getLLTTy(*getMRI()) == Op0.getLLTTy(*getMRI()) && "type mismatch");
assert(Op1.getLLTTy(*getMRI()).isScalar() && "invalid offset type");
- return buildInstr(TargetOpcode::G_GEP, {Res}, {Op0, Op1});
+ return buildInstr(TargetOpcode::G_PTR_ADD, {Res}, {Op0, Op1});
}
Optional<MachineInstrBuilder>
-MachineIRBuilder::materializeGEP(Register &Res, Register Op0,
- const LLT &ValueTy, uint64_t Value) {
+MachineIRBuilder::materializePtrAdd(Register &Res, Register Op0,
+ const LLT &ValueTy, uint64_t Value) {
assert(Res == 0 && "Res is a result argument");
assert(ValueTy.isScalar() && "invalid offset type");
@@ -242,7 +242,7 @@ MachineIRBuilder::materializeGEP(Register &Res, Register Op0,
Res = getMRI()->createGenericVirtualRegister(getMRI()->getType(Op0));
auto Cst = buildConstant(ValueTy, Value);
- return buildGEP(Res, Op0, Cst.getReg(0));
+ return buildPtrAdd(Res, Op0, Cst.getReg(0));
}
MachineInstrBuilder MachineIRBuilder::buildPtrMask(const DstOp &Res,
@@ -698,8 +698,9 @@ MachineInstrBuilder MachineIRBuilder::buildTrunc(const DstOp &Res,
}
MachineInstrBuilder MachineIRBuilder::buildFPTrunc(const DstOp &Res,
- const SrcOp &Op) {
- return buildInstr(TargetOpcode::G_FPTRUNC, Res, Op);
+ const SrcOp &Op,
+ Optional<unsigned> Flags) {
+ return buildInstr(TargetOpcode::G_FPTRUNC, Res, Op, Flags);
}
MachineInstrBuilder MachineIRBuilder::buildICmp(CmpInst::Predicate Pred,
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp
index f0e35c65c53b..98e48f5fc1d5 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp
@@ -32,6 +32,7 @@
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Function.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/BlockFrequency.h"
#include "llvm/Support/CommandLine.h"
@@ -118,16 +119,16 @@ bool RegBankSelect::assignmentMatch(
return false;
const RegisterBank *CurRegBank = RBI->getRegBank(Reg, *MRI, *TRI);
- const RegisterBank *DesiredRegBrank = ValMapping.BreakDown[0].RegBank;
+ const RegisterBank *DesiredRegBank = ValMapping.BreakDown[0].RegBank;
// Reg is free of assignment, a simple assignment will make the
// register bank to match.
OnlyAssign = CurRegBank == nullptr;
LLVM_DEBUG(dbgs() << "Does assignment already match: ";
if (CurRegBank) dbgs() << *CurRegBank; else dbgs() << "none";
dbgs() << " against ";
- assert(DesiredRegBrank && "The mapping must be valid");
- dbgs() << *DesiredRegBrank << '\n';);
- return CurRegBank == DesiredRegBrank;
+ assert(DesiredRegBank && "The mapping must be valid");
+ dbgs() << *DesiredRegBank << '\n';);
+ return CurRegBank == DesiredRegBank;
}
bool RegBankSelect::repairReg(
@@ -259,11 +260,11 @@ uint64_t RegBankSelect::getRepairCost(
return RBI->getBreakDownCost(ValMapping, CurRegBank);
if (IsSameNumOfValues) {
- const RegisterBank *DesiredRegBrank = ValMapping.BreakDown[0].RegBank;
+ const RegisterBank *DesiredRegBank = ValMapping.BreakDown[0].RegBank;
// If we repair a definition, swap the source and destination for
// the repairing.
if (MO.isDef())
- std::swap(CurRegBank, DesiredRegBrank);
+ std::swap(CurRegBank, DesiredRegBank);
// TODO: It may be possible to actually avoid the copy.
// If we repair something where the source is defined by a copy
// and the source of that copy is on the right bank, we can reuse
@@ -275,7 +276,7 @@ uint64_t RegBankSelect::getRepairCost(
// into a new virtual register.
// We would also need to propagate this information in the
// repairing placement.
- unsigned Cost = RBI->copyCost(*DesiredRegBrank, *CurRegBank,
+ unsigned Cost = RBI->copyCost(*DesiredRegBank, *CurRegBank,
RBI->getSizeInBits(MO.getReg(), *MRI, *TRI));
// TODO: use a dedicated constant for ImpossibleCost.
if (Cost != std::numeric_limits<unsigned>::max())
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
index 3fcc55286beb..255ea693b5c4 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
@@ -82,15 +82,18 @@ bool RegisterBankInfo::verify(const TargetRegisterInfo &TRI) const {
const RegisterBank *
RegisterBankInfo::getRegBank(Register Reg, const MachineRegisterInfo &MRI,
const TargetRegisterInfo &TRI) const {
- if (Register::isPhysicalRegister(Reg))
- return &getRegBankFromRegClass(getMinimalPhysRegClass(Reg, TRI));
+ if (Register::isPhysicalRegister(Reg)) {
+ // FIXME: This was probably a copy to a virtual register that does have a
+ // type we could use.
+ return &getRegBankFromRegClass(getMinimalPhysRegClass(Reg, TRI), LLT());
+ }
assert(Reg && "NoRegister does not have a register bank");
const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
if (auto *RB = RegClassOrBank.dyn_cast<const RegisterBank *>())
return RB;
if (auto *RC = RegClassOrBank.dyn_cast<const TargetRegisterClass *>())
- return &getRegBankFromRegClass(*RC);
+ return &getRegBankFromRegClass(*RC, MRI.getType(Reg));
return nullptr;
}
@@ -108,15 +111,18 @@ RegisterBankInfo::getMinimalPhysRegClass(Register Reg,
const RegisterBank *RegisterBankInfo::getRegBankFromConstraints(
const MachineInstr &MI, unsigned OpIdx, const TargetInstrInfo &TII,
- const TargetRegisterInfo &TRI) const {
+ const MachineRegisterInfo &MRI) const {
+ const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
+
// The mapping of the registers may be available via the
// register class constraints.
- const TargetRegisterClass *RC = MI.getRegClassConstraint(OpIdx, &TII, &TRI);
+ const TargetRegisterClass *RC = MI.getRegClassConstraint(OpIdx, &TII, TRI);
if (!RC)
return nullptr;
- const RegisterBank &RegBank = getRegBankFromRegClass(*RC);
+ Register Reg = MI.getOperand(OpIdx).getReg();
+ const RegisterBank &RegBank = getRegBankFromRegClass(*RC, MRI.getType(Reg));
// Sanity check that the target properly implemented getRegBankFromRegClass.
assert(RegBank.covers(*RC) &&
"The mapping of the register bank does not make sense");
@@ -195,7 +201,7 @@ RegisterBankInfo::getInstrMappingImpl(const MachineInstr &MI) const {
if (!CurRegBank) {
// If this is a target specific instruction, we can deduce
// the register bank from the encoding constraints.
- CurRegBank = getRegBankFromConstraints(MI, OpIdx, TII, TRI);
+ CurRegBank = getRegBankFromConstraints(MI, OpIdx, TII, MRI);
if (!CurRegBank) {
// All our attempts failed, give up.
CompleteMapping = false;
@@ -444,7 +450,7 @@ void RegisterBankInfo::applyDefaultMapping(const OperandsMapper &OpdMapper) {
continue;
}
if (!MO.getReg()) {
- LLVM_DEBUG(dbgs() << " is %%noreg, nothing to be done\n");
+ LLVM_DEBUG(dbgs() << " is $noreg, nothing to be done\n");
continue;
}
assert(OpdMapper.getInstrMapping().getOperandMapping(OpIdx).NumBreakDowns !=
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index 45618d7992ad..eeec2a5d536a 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -431,20 +431,3 @@ Optional<APInt> llvm::ConstantFoldExtOp(unsigned Opcode, const unsigned Op1,
void llvm::getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU) {
AU.addPreserved<StackProtector>();
}
-
-MVT llvm::getMVTForLLT(LLT Ty) {
- if (!Ty.isVector())
- return MVT::getIntegerVT(Ty.getSizeInBits());
-
- return MVT::getVectorVT(
- MVT::getIntegerVT(Ty.getElementType().getSizeInBits()),
- Ty.getNumElements());
-}
-
-LLT llvm::getLLTForMVT(MVT Ty) {
- if (!Ty.isVector())
- return LLT::scalar(Ty.getSizeInBits());
-
- return LLT::vector(Ty.getVectorNumElements(),
- Ty.getVectorElementType().getSizeInBits());
-}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalMerge.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalMerge.cpp
index d4fa45fcb405..5870e20d4227 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalMerge.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalMerge.cpp
@@ -82,6 +82,7 @@
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/HardwareLoops.cpp b/contrib/llvm-project/llvm/lib/CodeGen/HardwareLoops.cpp
index 6a0f98d2e2b4..65c2a37e5d43 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/HardwareLoops.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/HardwareLoops.cpp
@@ -15,25 +15,28 @@
///
//===----------------------------------------------------------------------===//
-#include "llvm/Pass.h"
-#include "llvm/PassRegistry.h"
-#include "llvm/PassSupport.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
-#include "llvm/IR/Constants.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/PassRegistry.h"
+#include "llvm/PassSupport.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils.h"
@@ -75,8 +78,44 @@ ForceGuardLoopEntry(
STATISTIC(NumHWLoops, "Number of loops converted to hardware loops");
+#ifndef NDEBUG
+static void debugHWLoopFailure(const StringRef DebugMsg,
+ Instruction *I) {
+ dbgs() << "HWLoops: " << DebugMsg;
+ if (I)
+ dbgs() << ' ' << *I;
+ else
+ dbgs() << '.';
+ dbgs() << '\n';
+}
+#endif
+
+static OptimizationRemarkAnalysis
+createHWLoopAnalysis(StringRef RemarkName, Loop *L, Instruction *I) {
+ Value *CodeRegion = L->getHeader();
+ DebugLoc DL = L->getStartLoc();
+
+ if (I) {
+ CodeRegion = I->getParent();
+ // If there is no debug location attached to the instruction, revert back to
+ // using the loop's.
+ if (I->getDebugLoc())
+ DL = I->getDebugLoc();
+ }
+
+ OptimizationRemarkAnalysis R(DEBUG_TYPE, RemarkName, DL, CodeRegion);
+ R << "hardware-loop not created: ";
+ return R;
+}
+
namespace {
+ void reportHWLoopFailure(const StringRef Msg, const StringRef ORETag,
+ OptimizationRemarkEmitter *ORE, Loop *TheLoop, Instruction *I = nullptr) {
+ LLVM_DEBUG(debugHWLoopFailure(Msg, I));
+ ORE->emit(createHWLoopAnalysis(ORETag, TheLoop, I) << Msg);
+ }
+
using TTI = TargetTransformInfo;
class HardwareLoops : public FunctionPass {
@@ -97,6 +136,7 @@ namespace {
AU.addRequired<ScalarEvolutionWrapperPass>();
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
}
// Try to convert the given Loop into a hardware loop.
@@ -110,6 +150,7 @@ namespace {
ScalarEvolution *SE = nullptr;
LoopInfo *LI = nullptr;
const DataLayout *DL = nullptr;
+ OptimizationRemarkEmitter *ORE = nullptr;
const TargetTransformInfo *TTI = nullptr;
DominatorTree *DT = nullptr;
bool PreserveLCSSA = false;
@@ -143,8 +184,9 @@ namespace {
public:
HardwareLoop(HardwareLoopInfo &Info, ScalarEvolution &SE,
- const DataLayout &DL) :
- SE(SE), DL(DL), L(Info.L), M(L->getHeader()->getModule()),
+ const DataLayout &DL,
+ OptimizationRemarkEmitter *ORE) :
+ SE(SE), DL(DL), ORE(ORE), L(Info.L), M(L->getHeader()->getModule()),
ExitCount(Info.ExitCount),
CountType(Info.CountType),
ExitBranch(Info.ExitBranch),
@@ -157,6 +199,7 @@ namespace {
private:
ScalarEvolution &SE;
const DataLayout &DL;
+ OptimizationRemarkEmitter *ORE = nullptr;
Loop *L = nullptr;
Module *M = nullptr;
const SCEV *ExitCount = nullptr;
@@ -182,6 +225,7 @@ bool HardwareLoops::runOnFunction(Function &F) {
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
DL = &F.getParent()->getDataLayout();
+ ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
LibInfo = TLIP ? &TLIP->getTLI(F) : nullptr;
PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
@@ -201,31 +245,39 @@ bool HardwareLoops::runOnFunction(Function &F) {
// converted and the parent loop doesn't support containing a hardware loop.
bool HardwareLoops::TryConvertLoop(Loop *L) {
// Process nested loops first.
- for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I)
- if (TryConvertLoop(*I))
+ for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) {
+ if (TryConvertLoop(*I)) {
+ reportHWLoopFailure("nested hardware-loops not supported", "HWLoopNested",
+ ORE, L);
return true; // Stop search.
+ }
+ }
HardwareLoopInfo HWLoopInfo(L);
- if (!HWLoopInfo.canAnalyze(*LI))
+ if (!HWLoopInfo.canAnalyze(*LI)) {
+ reportHWLoopFailure("cannot analyze loop, irreducible control flow",
+ "HWLoopCannotAnalyze", ORE, L);
return false;
+ }
- if (TTI->isHardwareLoopProfitable(L, *SE, *AC, LibInfo, HWLoopInfo) ||
- ForceHardwareLoops) {
-
- // Allow overriding of the counter width and loop decrement value.
- if (CounterBitWidth.getNumOccurrences())
- HWLoopInfo.CountType =
- IntegerType::get(M->getContext(), CounterBitWidth);
+ if (!ForceHardwareLoops &&
+ !TTI->isHardwareLoopProfitable(L, *SE, *AC, LibInfo, HWLoopInfo)) {
+ reportHWLoopFailure("it's not profitable to create a hardware-loop",
+ "HWLoopNotProfitable", ORE, L);
+ return false;
+ }
- if (LoopDecrement.getNumOccurrences())
- HWLoopInfo.LoopDecrement =
- ConstantInt::get(HWLoopInfo.CountType, LoopDecrement);
+ // Allow overriding of the counter width and loop decrement value.
+ if (CounterBitWidth.getNumOccurrences())
+ HWLoopInfo.CountType =
+ IntegerType::get(M->getContext(), CounterBitWidth);
- MadeChange |= TryConvertLoop(HWLoopInfo);
- return MadeChange && (!HWLoopInfo.IsNestingLegal && !ForceNestedLoop);
- }
+ if (LoopDecrement.getNumOccurrences())
+ HWLoopInfo.LoopDecrement =
+ ConstantInt::get(HWLoopInfo.CountType, LoopDecrement);
- return false;
+ MadeChange |= TryConvertLoop(HWLoopInfo);
+ return MadeChange && (!HWLoopInfo.IsNestingLegal && !ForceNestedLoop);
}
bool HardwareLoops::TryConvertLoop(HardwareLoopInfo &HWLoopInfo) {
@@ -234,8 +286,13 @@ bool HardwareLoops::TryConvertLoop(HardwareLoopInfo &HWLoopInfo) {
LLVM_DEBUG(dbgs() << "HWLoops: Try to convert profitable loop: " << *L);
if (!HWLoopInfo.isHardwareLoopCandidate(*SE, *LI, *DT, ForceNestedLoop,
- ForceHardwareLoopPHI))
+ ForceHardwareLoopPHI)) {
+ // TODO: there can be many reasons a loop is not considered a
+ // candidate, so we should let isHardwareLoopCandidate fill in the
+ // reason and then report a better message here.
+ reportHWLoopFailure("loop is not a candidate", "HWLoopNoCandidate", ORE, L);
return false;
+ }
assert(
(HWLoopInfo.ExitBlock && HWLoopInfo.ExitBranch && HWLoopInfo.ExitCount) &&
@@ -249,7 +306,7 @@ bool HardwareLoops::TryConvertLoop(HardwareLoopInfo &HWLoopInfo) {
if (!Preheader)
return false;
- HardwareLoop HWLoop(HWLoopInfo, *SE, *DL);
+ HardwareLoop HWLoop(HWLoopInfo, *SE, *DL, ORE);
HWLoop.Create();
++NumHWLoops;
return true;
@@ -257,10 +314,13 @@ bool HardwareLoops::TryConvertLoop(HardwareLoopInfo &HWLoopInfo) {
void HardwareLoop::Create() {
LLVM_DEBUG(dbgs() << "HWLoops: Converting loop..\n");
-
+
Value *LoopCountInit = InitLoopCount();
- if (!LoopCountInit)
+ if (!LoopCountInit) {
+ reportHWLoopFailure("could not safely create a loop count expression",
+ "HWLoopNotSafe", ORE, L);
return;
+ }
InsertIterationSetup(LoopCountInit);
@@ -458,6 +518,7 @@ INITIALIZE_PASS_BEGIN(HardwareLoops, DEBUG_TYPE, HW_LOOPS_NAME, false, false)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
INITIALIZE_PASS_END(HardwareLoops, DEBUG_TYPE, HW_LOOPS_NAME, false, false)
FunctionPass *llvm::createHardwareLoopsPass() { return new HardwareLoops(); }
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/IfConversion.cpp b/contrib/llvm-project/llvm/lib/CodeGen/IfConversion.cpp
index d9caa5660695..7d64828aa482 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/IfConversion.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/IfConversion.cpp
@@ -19,6 +19,7 @@
#include "llvm/ADT/SparseSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/iterator_range.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
@@ -35,7 +36,9 @@
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSchedule.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/Attributes.h"
#include "llvm/IR/DebugLoc.h"
+#include "llvm/InitializePasses.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Pass.h"
#include "llvm/Support/BranchProbability.h"
@@ -211,6 +214,7 @@ namespace {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<MachineBlockFrequencyInfo>();
AU.addRequired<MachineBranchProbabilityInfo>();
+ AU.addRequired<ProfileSummaryInfoWrapperPass>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -432,6 +436,7 @@ char &llvm::IfConverterID = IfConverter::ID;
INITIALIZE_PASS_BEGIN(IfConverter, DEBUG_TYPE, "If Converter", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
+INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
INITIALIZE_PASS_END(IfConverter, DEBUG_TYPE, "If Converter", false, false)
bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
@@ -444,6 +449,8 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
TRI = ST.getRegisterInfo();
BranchFolder::MBFIWrapper MBFI(getAnalysis<MachineBlockFrequencyInfo>());
MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
+ ProfileSummaryInfo *PSI =
+ &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
MRI = &MF.getRegInfo();
SchedModel.init(&ST);
@@ -454,7 +461,7 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
bool BFChange = false;
if (!PreRegAlloc) {
// Tail merge tend to expose more if-conversion opportunities.
- BranchFolder BF(true, false, MBFI, *MBPI);
+ BranchFolder BF(true, false, MBFI, *MBPI, PSI);
auto *MMIWP = getAnalysisIfAvailable<MachineModuleInfoWrapperPass>();
BFChange = BF.OptimizeFunction(
MF, TII, ST.getRegisterInfo(),
@@ -596,7 +603,7 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
BBAnalysis.clear();
if (MadeChange && IfCvtBranchFold) {
- BranchFolder BF(false, false, MBFI, *MBPI);
+ BranchFolder BF(false, false, MBFI, *MBPI, PSI);
auto *MMIWP = getAnalysisIfAvailable<MachineModuleInfoWrapperPass>();
BF.OptimizeFunction(
MF, TII, MF.getSubtarget().getRegisterInfo(),
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ImplicitNullChecks.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ImplicitNullChecks.cpp
index b7dcaec90106..0bbedb0a5ea6 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ImplicitNullChecks.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ImplicitNullChecks.cpp
@@ -50,6 +50,7 @@
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/InitializePasses.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Pass.h"
@@ -371,7 +372,7 @@ ImplicitNullChecks::isSuitableMemoryOp(const MachineInstr &MI,
// We want the mem access to be issued at a sane offset from PointerReg,
// so that if PointerReg is null then the access reliably page faults.
- if (!((MI.mayLoad() || MI.mayStore()) && !MI.isPredicable() &&
+ if (!(MI.mayLoadOrStore() && !MI.isPredicable() &&
-PageSize < Offset && Offset < PageSize))
return SR_Unsuitable;
@@ -697,7 +698,7 @@ void ImplicitNullChecks::rewriteNullChecks(
if (auto *DepMI = NC.getOnlyDependency()) {
for (auto &MO : DepMI->operands()) {
- if (!MO.isReg() || !MO.getReg() || !MO.isDef())
+ if (!MO.isReg() || !MO.getReg() || !MO.isDef() || MO.isDead())
continue;
if (!NC.getNotNullSucc()->isLiveIn(MO.getReg()))
NC.getNotNullSucc()->addLiveIn(MO.getReg());
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/IndirectBrExpandPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/IndirectBrExpandPass.cpp
index 7ac093ba4a71..4473a139d3ad 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/IndirectBrExpandPass.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/IndirectBrExpandPass.cpp
@@ -36,6 +36,7 @@
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/InlineSpiller.cpp b/contrib/llvm-project/llvm/lib/CodeGen/InlineSpiller.cpp
index 2408f18678e4..ed3e159ac566 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/InlineSpiller.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/InlineSpiller.cpp
@@ -534,7 +534,7 @@ bool InlineSpiller::canGuaranteeAssignmentAfterRemat(unsigned VReg,
// may have more remats than physregs, we're guaranteed to fail to assign
// one.
// At the moment, we only handle this for STATEPOINTs since they're the only
- // psuedo op where we've seen this. If we start seeing other instructions
+ // pseudo op where we've seen this. If we start seeing other instructions
// with the same problem, we need to revisit this.
return (MI.getOpcode() != TargetOpcode::STATEPOINT);
}
@@ -543,8 +543,7 @@ bool InlineSpiller::canGuaranteeAssignmentAfterRemat(unsigned VReg,
bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineInstr &MI) {
// Analyze instruction
SmallVector<std::pair<MachineInstr *, unsigned>, 8> Ops;
- MIBundleOperands::VirtRegInfo RI =
- MIBundleOperands(MI).analyzeVirtReg(VirtReg.reg, &Ops);
+ VirtRegInfo RI = AnalyzeVirtRegInBundle(MI, VirtReg.reg, &Ops);
if (!RI.Reads)
return false;
@@ -782,7 +781,7 @@ static void dumpMachineInstrRangeWithSlotIndex(MachineBasicBlock::iterator B,
/// foldMemoryOperand - Try folding stack slot references in Ops into their
/// instructions.
///
-/// @param Ops Operand indices from analyzeVirtReg().
+/// @param Ops Operand indices from AnalyzeVirtRegInBundle().
/// @param LoadMI Load instruction to use instead of stack slot when non-null.
/// @return True on success.
bool InlineSpiller::
@@ -851,8 +850,7 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr *, unsigned>> Ops,
// Skip non-Defs, including undef uses and internal reads.
if (MO->isUse())
continue;
- MIBundleOperands::PhysRegInfo RI =
- MIBundleOperands(*FoldMI).analyzePhysReg(Reg, &TRI);
+ PhysRegInfo RI = AnalyzePhysRegInBundle(*FoldMI, Reg, &TRI);
if (RI.FullyDefined)
continue;
// FoldMI does not define this physreg. Remove the LI segment.
@@ -992,8 +990,7 @@ void InlineSpiller::spillAroundUses(unsigned Reg) {
// Analyze instruction.
SmallVector<std::pair<MachineInstr*, unsigned>, 8> Ops;
- MIBundleOperands::VirtRegInfo RI =
- MIBundleOperands(*MI).analyzeVirtReg(Reg, &Ops);
+ VirtRegInfo RI = AnalyzeVirtRegInBundle(*MI, Reg, &Ops);
// Find the slot index where this instruction reads and writes OldLI.
// This is usually the def slot, except for tied early clobbers.
@@ -1430,7 +1427,7 @@ void HoistSpillHelper::runHoistSpills(
}
// For spills in SpillsToKeep with LiveReg set (i.e., not original spill),
// save them to SpillsToIns.
- for (const auto Ent : SpillsToKeep) {
+ for (const auto &Ent : SpillsToKeep) {
if (Ent.second)
SpillsToIns[Ent.first->getBlock()] = Ent.second;
}
@@ -1489,7 +1486,7 @@ void HoistSpillHelper::hoistAllSpills() {
LLVM_DEBUG({
dbgs() << "Finally inserted spills in BB: ";
- for (const auto Ispill : SpillsToIns)
+ for (const auto &Ispill : SpillsToIns)
dbgs() << Ispill.first->getNumber() << " ";
dbgs() << "\nFinally removed spills in BB: ";
for (const auto Rspill : SpillsToRm)
@@ -1504,7 +1501,7 @@ void HoistSpillHelper::hoistAllSpills() {
StackIntvl.getValNumInfo(0));
// Insert hoisted spills.
- for (auto const Insert : SpillsToIns) {
+ for (auto const &Insert : SpillsToIns) {
MachineBasicBlock *BB = Insert.first;
unsigned LiveReg = Insert.second;
MachineBasicBlock::iterator MI = IPA.getLastInsertPointIter(OrigLI, *BB);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/InterleavedAccessPass.cpp
index 14bc560a561c..1f9b436378d2 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/InterleavedAccessPass.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/InterleavedAccessPass.cpp
@@ -58,6 +58,7 @@
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Type.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp
index 770c4952d169..42691b8a6154 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp
@@ -34,6 +34,7 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -1167,7 +1168,7 @@ bool InterleavedLoadCombineImpl::combine(std::list<VectorInfo> &InterleavedLoad,
// If there are users outside the set to be eliminated, we abort the
// transformation. No gain can be expected.
- for (const auto &U : I->users()) {
+ for (auto *U : I->users()) {
if (Is.find(dyn_cast<Instruction>(U)) == Is.end())
return false;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/IntrinsicLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/IntrinsicLowering.cpp
index 8cbd8bcaeabb..4461a235d6c1 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/IntrinsicLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/IntrinsicLowering.cpp
@@ -50,14 +50,6 @@ static CallInst *ReplaceCallWith(const char *NewFn, CallInst *CI,
return NewCI;
}
-// VisualStudio defines setjmp as _setjmp
-#if defined(_MSC_VER) && defined(setjmp) && \
- !defined(setjmp_undefined_for_msvc)
-# pragma push_macro("setjmp")
-# undef setjmp
-# define setjmp_undefined_for_msvc
-#endif
-
/// Emit the code to lower bswap of V before the specified instruction IP.
static Value *LowerBSWAP(LLVMContext &Context, Value *V, Instruction *IP) {
assert(V->getType()->isIntOrIntVectorTy() && "Can't bswap a non-integer type!");
@@ -254,34 +246,6 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
break;
}
- // The setjmp/longjmp intrinsics should only exist in the code if it was
- // never optimized (ie, right out of the CFE), or if it has been hacked on
- // by the lowerinvoke pass. In both cases, the right thing to do is to
- // convert the call to an explicit setjmp or longjmp call.
- case Intrinsic::setjmp: {
- Value *V = ReplaceCallWith("setjmp", CI, CS.arg_begin(), CS.arg_end(),
- Type::getInt32Ty(Context));
- if (!CI->getType()->isVoidTy())
- CI->replaceAllUsesWith(V);
- break;
- }
- case Intrinsic::sigsetjmp:
- if (!CI->getType()->isVoidTy())
- CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
- break;
-
- case Intrinsic::longjmp: {
- ReplaceCallWith("longjmp", CI, CS.arg_begin(), CS.arg_end(),
- Type::getVoidTy(Context));
- break;
- }
-
- case Intrinsic::siglongjmp: {
- // Insert the call to abort
- ReplaceCallWith("abort", CI, CS.arg_end(), CS.arg_end(),
- Type::getVoidTy(Context));
- break;
- }
case Intrinsic::ctpop:
CI->replaceAllUsesWith(LowerCTPOP(Context, CI->getArgOperand(0), CI));
break;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LLVMTargetMachine.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LLVMTargetMachine.cpp
index 1c362aec6e67..50c178ff7598 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LLVMTargetMachine.cpp
@@ -48,8 +48,8 @@ void LLVMTargetMachine::initAsmInfo() {
STI.reset(TheTarget.createMCSubtargetInfo(
getTargetTriple().str(), getTargetCPU(), getTargetFeatureString()));
- MCAsmInfo *TmpAsmInfo =
- TheTarget.createMCAsmInfo(*MRI, getTargetTriple().str());
+ MCAsmInfo *TmpAsmInfo = TheTarget.createMCAsmInfo(
+ *MRI, getTargetTriple().str(), Options.MCOptions);
// TargetSelect.h moved to a different directory between LLVM 2.9 and 3.0,
// and if the old one gets included then MCAsmInfo will be NULL and
// we'll crash later.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp
index cef5085ae079..63a0d0c1c43e 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp
@@ -14,6 +14,7 @@
///===---------------------------------------------------------------------===//
#include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
+#include "llvm/InitializePasses.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues.cpp
index f1b237d83e8c..2226c10b49a4 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues.cpp
@@ -57,6 +57,7 @@
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
@@ -89,8 +90,28 @@ static Register isDbgValueDescribedByReg(const MachineInstr &MI) {
return MI.getOperand(0).isReg() ? MI.getOperand(0).getReg() : Register();
}
+/// If \p Op is a stack or frame register return true, otherwise return false.
+/// This is used to avoid basing the debug entry values on the registers, since
+/// we do not support it at the moment.
+static bool isRegOtherThanSPAndFP(const MachineOperand &Op,
+ const MachineInstr &MI,
+ const TargetRegisterInfo *TRI) {
+ if (!Op.isReg())
+ return false;
+
+ const MachineFunction *MF = MI.getParent()->getParent();
+ const TargetLowering *TLI = MF->getSubtarget().getTargetLowering();
+ unsigned SP = TLI->getStackPointerRegisterToSaveRestore();
+ Register FP = TRI->getFrameRegister(*MF);
+ Register Reg = Op.getReg();
+
+ return Reg && Reg != SP && Reg != FP;
+}
+
namespace {
+using DefinedRegsSet = SmallSet<Register, 32>;
+
class LiveDebugValues : public MachineFunctionPass {
private:
const TargetRegisterInfo *TRI;
@@ -123,60 +144,6 @@ private:
using FragmentInfo = DIExpression::FragmentInfo;
using OptFragmentInfo = Optional<DIExpression::FragmentInfo>;
- /// Storage for identifying a potentially inlined instance of a variable,
- /// or a fragment thereof.
- class DebugVariable {
- const DILocalVariable *Variable;
- OptFragmentInfo Fragment;
- const DILocation *InlinedAt;
-
- /// Fragment that will overlap all other fragments. Used as default when
- /// caller demands a fragment.
- static const FragmentInfo DefaultFragment;
-
- public:
- DebugVariable(const DILocalVariable *Var, OptFragmentInfo &&FragmentInfo,
- const DILocation *InlinedAt)
- : Variable(Var), Fragment(FragmentInfo), InlinedAt(InlinedAt) {}
-
- DebugVariable(const DILocalVariable *Var, OptFragmentInfo &FragmentInfo,
- const DILocation *InlinedAt)
- : Variable(Var), Fragment(FragmentInfo), InlinedAt(InlinedAt) {}
-
- DebugVariable(const DILocalVariable *Var, const DIExpression *DIExpr,
- const DILocation *InlinedAt)
- : DebugVariable(Var, DIExpr->getFragmentInfo(), InlinedAt) {}
-
- DebugVariable(const MachineInstr &MI)
- : DebugVariable(MI.getDebugVariable(),
- MI.getDebugExpression()->getFragmentInfo(),
- MI.getDebugLoc()->getInlinedAt()) {}
-
- const DILocalVariable *getVar() const { return Variable; }
- const OptFragmentInfo &getFragment() const { return Fragment; }
- const DILocation *getInlinedAt() const { return InlinedAt; }
-
- const FragmentInfo getFragmentDefault() const {
- return Fragment.getValueOr(DefaultFragment);
- }
-
- static bool isFragmentDefault(FragmentInfo &F) {
- return F == DefaultFragment;
- }
-
- bool operator==(const DebugVariable &Other) const {
- return std::tie(Variable, Fragment, InlinedAt) ==
- std::tie(Other.Variable, Other.Fragment, Other.InlinedAt);
- }
-
- bool operator<(const DebugVariable &Other) const {
- return std::tie(Variable, Fragment, InlinedAt) <
- std::tie(Other.Variable, Other.Fragment, Other.InlinedAt);
- }
- };
-
- friend struct llvm::DenseMapInfo<DebugVariable>;
-
/// A pair of debug variable and value location.
struct VarLoc {
// The location at which a spilled variable resides. It consists of a
@@ -205,7 +172,9 @@ private:
RegisterKind,
SpillLocKind,
ImmediateKind,
- EntryValueKind
+ EntryValueKind,
+ EntryValueBackupKind,
+ EntryValueCopyBackupKind
} Kind = InvalidKind;
/// The value location. Stored separately to avoid repeatedly
@@ -220,14 +189,15 @@ private:
} Loc;
VarLoc(const MachineInstr &MI, LexicalScopes &LS)
- : Var(MI), Expr(MI.getDebugExpression()), MI(MI),
- UVS(MI.getDebugLoc(), LS) {
+ : Var(MI.getDebugVariable(), MI.getDebugExpression(),
+ MI.getDebugLoc()->getInlinedAt()),
+ Expr(MI.getDebugExpression()), MI(MI), UVS(MI.getDebugLoc(), LS) {
static_assert((sizeof(Loc) == sizeof(uint64_t)),
"hash does not cover all members of Loc");
assert(MI.isDebugValue() && "not a DBG_VALUE");
assert(MI.getNumOperands() == 4 && "malformed DBG_VALUE");
if (int RegNo = isDbgValueDescribedByReg(MI)) {
- Kind = MI.isDebugEntryValue() ? EntryValueKind : RegisterKind;
+ Kind = RegisterKind;
Loc.RegNo = RegNo;
} else if (MI.getOperand(0).isImm()) {
Kind = ImmediateKind;
@@ -239,17 +209,50 @@ private:
Kind = ImmediateKind;
Loc.CImm = MI.getOperand(0).getCImm();
}
- assert((Kind != ImmediateKind || !MI.isDebugEntryValue()) &&
- "entry values must be register locations");
+
+ // We create the debug entry values from the factory functions rather than
+ // from this ctor.
+ assert(Kind != EntryValueKind && !isEntryBackupLoc());
}
/// Take the variable and machine-location in DBG_VALUE MI, and build an
/// entry location using the given expression.
static VarLoc CreateEntryLoc(const MachineInstr &MI, LexicalScopes &LS,
- const DIExpression *EntryExpr) {
+ const DIExpression *EntryExpr, unsigned Reg) {
VarLoc VL(MI, LS);
+ assert(VL.Kind == RegisterKind);
VL.Kind = EntryValueKind;
VL.Expr = EntryExpr;
+ VL.Loc.RegNo = Reg;
+ return VL;
+ }
+
+ /// Take the variable and machine-location from the DBG_VALUE (from the
+ /// function entry), and build an entry value backup location. The backup
+ /// location will turn into the normal location if the backup is valid at
+ /// the time of the primary location clobbering.
+ static VarLoc CreateEntryBackupLoc(const MachineInstr &MI,
+ LexicalScopes &LS,
+ const DIExpression *EntryExpr) {
+ VarLoc VL(MI, LS);
+ assert(VL.Kind == RegisterKind);
+ VL.Kind = EntryValueBackupKind;
+ VL.Expr = EntryExpr;
+ return VL;
+ }
+
+ /// Take the variable and machine-location from the DBG_VALUE (from the
+ /// function entry), and build a copy of an entry value backup location by
+ /// setting the register location to NewReg.
+ static VarLoc CreateEntryCopyBackupLoc(const MachineInstr &MI,
+ LexicalScopes &LS,
+ const DIExpression *EntryExpr,
+ unsigned NewReg) {
+ VarLoc VL(MI, LS);
+ assert(VL.Kind == RegisterKind);
+ VL.Kind = EntryValueCopyBackupKind;
+ VL.Expr = EntryExpr;
+ VL.Loc.RegNo = NewReg;
return VL;
}
@@ -288,8 +291,11 @@ private:
switch (Kind) {
case EntryValueKind:
// An entry value is a register location -- but with an updated
- // expression.
- return BuildMI(MF, DbgLoc, IID, Indirect, Loc.RegNo, Var, Expr);
+ // expression. The register location of such DBG_VALUE is always the one
+ // from the entry DBG_VALUE, it does not matter if the entry value was
+ // copied in to another register due to some optimizations.
+ return BuildMI(MF, DbgLoc, IID, Indirect, MI.getOperand(0).getReg(),
+ Var, Expr);
case RegisterKind:
// Register locations are like the source DBG_VALUE, but with the
// register number from this VarLoc.
@@ -308,8 +314,11 @@ private:
MachineOperand MO = MI.getOperand(0);
return BuildMI(MF, DbgLoc, IID, Indirect, MO, Var, DIExpr);
}
+ case EntryValueBackupKind:
+ case EntryValueCopyBackupKind:
case InvalidKind:
- llvm_unreachable("Tried to produce DBG_VALUE for invalid VarLoc");
+ llvm_unreachable(
+ "Tried to produce DBG_VALUE for invalid or backup VarLoc");
}
llvm_unreachable("Unrecognized LiveDebugValues.VarLoc.Kind enum");
}
@@ -317,6 +326,27 @@ private:
/// Is the Loc field a constant or constant object?
bool isConstant() const { return Kind == ImmediateKind; }
+ /// Check if the Loc field is an entry backup location.
+ bool isEntryBackupLoc() const {
+ return Kind == EntryValueBackupKind || Kind == EntryValueCopyBackupKind;
+ }
+
+ /// If this variable is described by a register holding the entry value,
+ /// return it, otherwise return 0.
+ unsigned getEntryValueBackupReg() const {
+ if (Kind == EntryValueBackupKind)
+ return Loc.RegNo;
+ return 0;
+ }
+
+ /// If this variable is described by a register holding the copy of the
+ /// entry value, return it, otherwise return 0.
+ unsigned getEntryValueCopyBackupReg() const {
+ if (Kind == EntryValueCopyBackupKind)
+ return Loc.RegNo;
+ return 0;
+ }
+
/// If this variable is described by a register, return it,
/// otherwise return 0.
unsigned isDescribedByReg() const {
@@ -336,6 +366,8 @@ private:
switch (Kind) {
case RegisterKind:
case EntryValueKind:
+ case EntryValueBackupKind:
+ case EntryValueCopyBackupKind:
dbgs() << printReg(Loc.RegNo, TRI);
break;
case SpillLocKind:
@@ -349,11 +381,17 @@ private:
llvm_unreachable("Invalid VarLoc in dump method");
}
- dbgs() << ", \"" << Var.getVar()->getName() << "\", " << *Expr << ", ";
+ dbgs() << ", \"" << Var.getVariable()->getName() << "\", " << *Expr
+ << ", ";
if (Var.getInlinedAt())
dbgs() << "!" << Var.getInlinedAt()->getMetadataID() << ")\n";
else
- dbgs() << "(null))\n";
+ dbgs() << "(null))";
+
+ if (isEntryBackupLoc())
+ dbgs() << " (backup loc)\n";
+ else
+ dbgs() << "\n";
}
#endif
@@ -369,7 +407,6 @@ private:
}
};
- using DebugParamMap = SmallDenseMap<const DILocalVariable *, MachineInstr *>;
using VarLocMap = UniqueVector<VarLoc>;
using VarLocSet = SparseBitVector<>;
using VarLocInMBB = SmallDenseMap<const MachineBasicBlock *, VarLocSet>;
@@ -395,10 +432,18 @@ private:
/// This holds the working set of currently open ranges. For fast
/// access, this is done both as a set of VarLocIDs, and a map of
/// DebugVariable to recent VarLocID. Note that a DBG_VALUE ends all
- /// previous open ranges for the same variable.
+ /// previous open ranges for the same variable. In addition, we keep
+ /// two different maps (Vars/EntryValuesBackupVars), so erase/insert
+ /// methods act differently depending on whether a VarLoc is primary
+ /// location or backup one. In the case the VarLoc is backup location
+ /// we will erase/insert from the EntryValuesBackupVars map, otherwise
+ /// we perform the operation on the Vars.
class OpenRangesSet {
VarLocSet VarLocs;
+ // Map the DebugVariable to recent primary location ID.
SmallDenseMap<DebugVariable, unsigned, 8> Vars;
+ // Map the DebugVariable to recent backup location ID.
+ SmallDenseMap<DebugVariable, unsigned, 8> EntryValuesBackupVars;
OverlapMap &OverlappingFragments;
public:
@@ -406,40 +451,38 @@ private:
const VarLocSet &getVarLocs() const { return VarLocs; }
- /// Terminate all open ranges for Var by removing it from the set.
- void erase(DebugVariable Var);
+ /// Terminate all open ranges for VL.Var by removing it from the set.
+ void erase(const VarLoc &VL);
/// Terminate all open ranges listed in \c KillSet by removing
/// them from the set.
- void erase(const VarLocSet &KillSet, const VarLocMap &VarLocIDs) {
- VarLocs.intersectWithComplement(KillSet);
- for (unsigned ID : KillSet)
- Vars.erase(VarLocIDs[ID].Var);
- }
+ void erase(const VarLocSet &KillSet, const VarLocMap &VarLocIDs);
/// Insert a new range into the set.
- void insert(unsigned VarLocID, DebugVariable Var) {
- VarLocs.set(VarLocID);
- Vars.insert({Var, VarLocID});
- }
+ void insert(unsigned VarLocID, const VarLoc &VL);
/// Insert a set of ranges.
void insertFromLocSet(const VarLocSet &ToLoad, const VarLocMap &Map) {
for (unsigned Id : ToLoad) {
- const VarLoc &Var = Map[Id];
- insert(Id, Var.Var);
+ const VarLoc &VarL = Map[Id];
+ insert(Id, VarL);
}
}
+ llvm::Optional<unsigned> getEntryValueBackup(DebugVariable Var);
+
/// Empty the set.
void clear() {
VarLocs.clear();
Vars.clear();
+ EntryValuesBackupVars.clear();
}
/// Return whether the set is empty or not.
bool empty() const {
- assert(Vars.empty() == VarLocs.empty() && "open ranges are inconsistent");
+ assert(Vars.empty() == EntryValuesBackupVars.empty() &&
+ Vars.empty() == VarLocs.empty() &&
+ "open ranges are inconsistent");
return VarLocs.empty();
}
};
@@ -456,6 +499,14 @@ private:
bool isLocationSpill(const MachineInstr &MI, MachineFunction *MF,
unsigned &Reg);
+ /// Returns true if the given machine instruction is a debug value which we
+ /// can emit entry values for.
+ ///
+ /// Currently, we generate debug entry values only for parameters that are
+ /// unmodified throughout the function and located in a register.
+ bool isEntryValueCandidate(const MachineInstr &MI,
+ const DefinedRegsSet &Regs) const;
+
/// If a given instruction is identified as a spill, return the spill location
/// and set \p Reg to the spilled register.
Optional<VarLoc::SpillLoc> isRestoreInstruction(const MachineInstr &MI,
@@ -473,23 +524,23 @@ private:
VarLocMap &VarLocIDs);
void transferSpillOrRestoreInst(MachineInstr &MI, OpenRangesSet &OpenRanges,
VarLocMap &VarLocIDs, TransferMap &Transfers);
+ bool removeEntryValue(const MachineInstr &MI, OpenRangesSet &OpenRanges,
+ VarLocMap &VarLocIDs, const VarLoc &EntryVL);
void emitEntryValues(MachineInstr &MI, OpenRangesSet &OpenRanges,
VarLocMap &VarLocIDs, TransferMap &Transfers,
- DebugParamMap &DebugEntryVals,
SparseBitVector<> &KillSet);
+ void recordEntryValue(const MachineInstr &MI,
+ const DefinedRegsSet &DefinedRegs,
+ OpenRangesSet &OpenRanges, VarLocMap &VarLocIDs);
void transferRegisterCopy(MachineInstr &MI, OpenRangesSet &OpenRanges,
VarLocMap &VarLocIDs, TransferMap &Transfers);
void transferRegisterDef(MachineInstr &MI, OpenRangesSet &OpenRanges,
- VarLocMap &VarLocIDs, TransferMap &Transfers,
- DebugParamMap &DebugEntryVals);
+ VarLocMap &VarLocIDs, TransferMap &Transfers);
bool transferTerminator(MachineBasicBlock *MBB, OpenRangesSet &OpenRanges,
VarLocInMBB &OutLocs, const VarLocMap &VarLocIDs);
void process(MachineInstr &MI, OpenRangesSet &OpenRanges,
- VarLocInMBB &OutLocs, VarLocMap &VarLocIDs,
- TransferMap &Transfers, DebugParamMap &DebugEntryVals,
- OverlapMap &OverlapFragments,
- VarToFragments &SeenFragments);
+ VarLocMap &VarLocIDs, TransferMap &Transfers);
void accumulateFragmentMap(MachineInstr &MI, VarToFragments &SeenFragments,
OverlapMap &OLapMap);
@@ -532,46 +583,10 @@ public:
} // end anonymous namespace
-namespace llvm {
-
-template <> struct DenseMapInfo<LiveDebugValues::DebugVariable> {
- using DV = LiveDebugValues::DebugVariable;
- using OptFragmentInfo = LiveDebugValues::OptFragmentInfo;
- using FragmentInfo = LiveDebugValues::FragmentInfo;
-
- // Empty key: no key should be generated that has no DILocalVariable.
- static inline DV getEmptyKey() {
- return DV(nullptr, OptFragmentInfo(), nullptr);
- }
-
- // Difference in tombstone is that the Optional is meaningful
- static inline DV getTombstoneKey() {
- return DV(nullptr, OptFragmentInfo({0, 0}), nullptr);
- }
-
- static unsigned getHashValue(const DV &D) {
- unsigned HV = 0;
- const OptFragmentInfo &Fragment = D.getFragment();
- if (Fragment)
- HV = DenseMapInfo<FragmentInfo>::getHashValue(*Fragment);
-
- return hash_combine(D.getVar(), HV, D.getInlinedAt());
- }
-
- static bool isEqual(const DV &A, const DV &B) { return A == B; }
-};
-
-} // namespace llvm
-
//===----------------------------------------------------------------------===//
// Implementation
//===----------------------------------------------------------------------===//
-const DIExpression::FragmentInfo
- LiveDebugValues::DebugVariable::DefaultFragment = {
- std::numeric_limits<uint64_t>::max(),
- std::numeric_limits<uint64_t>::min()};
-
char LiveDebugValues::ID = 0;
char &llvm::LiveDebugValuesID = LiveDebugValues::ID;
@@ -592,38 +607,72 @@ void LiveDebugValues::getAnalysisUsage(AnalysisUsage &AU) const {
}
/// Erase a variable from the set of open ranges, and additionally erase any
-/// fragments that may overlap it.
-void LiveDebugValues::OpenRangesSet::erase(DebugVariable Var) {
+/// fragments that may overlap it. If the VarLoc is a buckup location, erase
+/// the variable from the EntryValuesBackupVars set, indicating we should stop
+/// tracking its backup entry location. Otherwise, if the VarLoc is primary
+/// location, erase the variable from the Vars set.
+void LiveDebugValues::OpenRangesSet::erase(const VarLoc &VL) {
// Erasure helper.
- auto DoErase = [this](DebugVariable VarToErase) {
- auto It = Vars.find(VarToErase);
- if (It != Vars.end()) {
+ auto DoErase = [VL, this](DebugVariable VarToErase) {
+ auto *EraseFrom = VL.isEntryBackupLoc() ? &EntryValuesBackupVars : &Vars;
+ auto It = EraseFrom->find(VarToErase);
+ if (It != EraseFrom->end()) {
unsigned ID = It->second;
VarLocs.reset(ID);
- Vars.erase(It);
+ EraseFrom->erase(It);
}
};
+ DebugVariable Var = VL.Var;
+
// Erase the variable/fragment that ends here.
DoErase(Var);
// Extract the fragment. Interpret an empty fragment as one that covers all
// possible bits.
- FragmentInfo ThisFragment = Var.getFragmentDefault();
+ FragmentInfo ThisFragment = Var.getFragmentOrDefault();
// There may be fragments that overlap the designated fragment. Look them up
// in the pre-computed overlap map, and erase them too.
- auto MapIt = OverlappingFragments.find({Var.getVar(), ThisFragment});
+ auto MapIt = OverlappingFragments.find({Var.getVariable(), ThisFragment});
if (MapIt != OverlappingFragments.end()) {
for (auto Fragment : MapIt->second) {
LiveDebugValues::OptFragmentInfo FragmentHolder;
- if (!DebugVariable::isFragmentDefault(Fragment))
+ if (!DebugVariable::isDefaultFragment(Fragment))
FragmentHolder = LiveDebugValues::OptFragmentInfo(Fragment);
- DoErase({Var.getVar(), FragmentHolder, Var.getInlinedAt()});
+ DoErase({Var.getVariable(), FragmentHolder, Var.getInlinedAt()});
}
}
}
+void LiveDebugValues::OpenRangesSet::erase(const VarLocSet &KillSet,
+ const VarLocMap &VarLocIDs) {
+ VarLocs.intersectWithComplement(KillSet);
+ for (unsigned ID : KillSet) {
+ const VarLoc *VL = &VarLocIDs[ID];
+ auto *EraseFrom = VL->isEntryBackupLoc() ? &EntryValuesBackupVars : &Vars;
+ EraseFrom->erase(VL->Var);
+ }
+}
+
+void LiveDebugValues::OpenRangesSet::insert(unsigned VarLocID,
+ const VarLoc &VL) {
+ auto *InsertInto = VL.isEntryBackupLoc() ? &EntryValuesBackupVars : &Vars;
+ VarLocs.set(VarLocID);
+ InsertInto->insert({VL.Var, VarLocID});
+}
+
+/// Return the Loc ID of an entry value backup location, if it exists for the
+/// variable.
+llvm::Optional<unsigned>
+LiveDebugValues::OpenRangesSet::getEntryValueBackup(DebugVariable Var) {
+ auto It = EntryValuesBackupVars.find(Var);
+ if (It != EntryValuesBackupVars.end())
+ return It->second;
+
+ return llvm::None;
+}
+
//===----------------------------------------------------------------------===//
// Debug Range Extension Implementation
//===----------------------------------------------------------------------===//
@@ -642,7 +691,7 @@ void LiveDebugValues::printVarLocInMBB(const MachineFunction &MF,
Out << "MBB: " << BB.getNumber() << ":\n";
for (unsigned VLL : L) {
const VarLoc &VL = VarLocIDs[VLL];
- Out << " Var: " << VL.Var.getVar()->getName();
+ Out << " Var: " << VL.Var.getVariable()->getName();
Out << " MI: ";
VL.dump(TRI, Out);
}
@@ -666,6 +715,62 @@ LiveDebugValues::extractSpillBaseRegAndOffset(const MachineInstr &MI) {
return {Reg, Offset};
}
+/// Try to salvage the debug entry value if we encounter a new debug value
+/// describing the same parameter, otherwise stop tracking the value. Return
+/// true if we should stop tracking the entry value, otherwise return false.
+bool LiveDebugValues::removeEntryValue(const MachineInstr &MI,
+ OpenRangesSet &OpenRanges,
+ VarLocMap &VarLocIDs,
+ const VarLoc &EntryVL) {
+ // Skip the DBG_VALUE which is the debug entry value itself.
+ if (MI.isIdenticalTo(EntryVL.MI))
+ return false;
+
+ // If the parameter's location is not register location, we can not track
+ // the entry value any more. In addition, if the debug expression from the
+ // DBG_VALUE is not empty, we can assume the parameter's value has changed
+ // indicating that we should stop tracking its entry value as well.
+ if (!MI.getOperand(0).isReg() ||
+ MI.getDebugExpression()->getNumElements() != 0)
+ return true;
+
+ // If the DBG_VALUE comes from a copy instruction that copies the entry value,
+ // it means the parameter's value has not changed and we should be able to use
+ // its entry value.
+ bool TrySalvageEntryValue = false;
+ Register Reg = MI.getOperand(0).getReg();
+ auto I = std::next(MI.getReverseIterator());
+ const MachineOperand *SrcRegOp, *DestRegOp;
+ if (I != MI.getParent()->rend()) {
+ // TODO: Try to keep tracking of an entry value if we encounter a propagated
+ // DBG_VALUE describing the copy of the entry value. (Propagated entry value
+ // does not indicate the parameter modification.)
+ auto DestSrc = TII->isCopyInstr(*I);
+ if (!DestSrc)
+ return true;
+
+ SrcRegOp = DestSrc->Source;
+ DestRegOp = DestSrc->Destination;
+ if (Reg != DestRegOp->getReg())
+ return true;
+ TrySalvageEntryValue = true;
+ }
+
+ if (TrySalvageEntryValue) {
+ for (unsigned ID : OpenRanges.getVarLocs()) {
+ const VarLoc &VL = VarLocIDs[ID];
+ if (!VL.isEntryBackupLoc())
+ continue;
+
+ if (VL.getEntryValueCopyBackupReg() == Reg &&
+ VL.MI.getOperand(0).getReg() == SrcRegOp->getReg())
+ return false;
+ }
+ }
+
+ return true;
+}
+
/// End all previous ranges related to @MI and start a new range from @MI
/// if it is a DBG_VALUE instr.
void LiveDebugValues::transferDebugValue(const MachineInstr &MI,
@@ -680,18 +785,33 @@ void LiveDebugValues::transferDebugValue(const MachineInstr &MI,
assert(Var->isValidLocationForIntrinsic(DebugLoc) &&
"Expected inlined-at fields to agree");
- // End all previous ranges of Var.
DebugVariable V(Var, Expr, InlinedAt);
- OpenRanges.erase(V);
- // Add the VarLoc to OpenRanges from this DBG_VALUE.
+ // Check if this DBG_VALUE indicates a parameter's value changing.
+ // If that is the case, we should stop tracking its entry value.
+ auto EntryValBackupID = OpenRanges.getEntryValueBackup(V);
+ if (Var->isParameter() && EntryValBackupID) {
+ const VarLoc &EntryVL = VarLocIDs[*EntryValBackupID];
+ if (removeEntryValue(MI, OpenRanges, VarLocIDs, EntryVL)) {
+ LLVM_DEBUG(dbgs() << "Deleting a DBG entry value because of: ";
+ MI.print(dbgs(), /*IsStandalone*/ false,
+ /*SkipOpers*/ false, /*SkipDebugLoc*/ false,
+ /*AddNewLine*/ true, TII));
+ OpenRanges.erase(EntryVL);
+ }
+ }
+
unsigned ID;
if (isDbgValueDescribedByReg(MI) || MI.getOperand(0).isImm() ||
MI.getOperand(0).isFPImm() || MI.getOperand(0).isCImm()) {
// Use normal VarLoc constructor for registers and immediates.
VarLoc VL(MI, LS);
+ // End all previous ranges of VL.Var.
+ OpenRanges.erase(VL);
+
ID = VarLocIDs.insert(VL);
- OpenRanges.insert(ID, VL.Var);
+ // Add the VarLoc to OpenRanges from this DBG_VALUE.
+ OpenRanges.insert(ID, VL);
} else if (MI.hasOneMemOperand()) {
llvm_unreachable("DBG_VALUE with mem operand encountered after regalloc?");
} else {
@@ -701,32 +821,30 @@ void LiveDebugValues::transferDebugValue(const MachineInstr &MI,
}
}
+/// Turn the entry value backup locations into primary locations.
void LiveDebugValues::emitEntryValues(MachineInstr &MI,
OpenRangesSet &OpenRanges,
VarLocMap &VarLocIDs,
TransferMap &Transfers,
- DebugParamMap &DebugEntryVals,
SparseBitVector<> &KillSet) {
for (unsigned ID : KillSet) {
- if (!VarLocIDs[ID].Var.getVar()->isParameter())
+ if (!VarLocIDs[ID].Var.getVariable()->isParameter())
continue;
- const MachineInstr *CurrDebugInstr = &VarLocIDs[ID].MI;
+ auto DebugVar = VarLocIDs[ID].Var;
+ auto EntryValBackupID = OpenRanges.getEntryValueBackup(DebugVar);
- // If parameter's DBG_VALUE is not in the map that means we can't
- // generate parameter's entry value.
- if (!DebugEntryVals.count(CurrDebugInstr->getDebugVariable()))
+ // If the parameter has the entry value backup, it means we should
+ // be able to use its entry value.
+ if (!EntryValBackupID)
continue;
- auto ParamDebugInstr = DebugEntryVals[CurrDebugInstr->getDebugVariable()];
- DIExpression *NewExpr = DIExpression::prepend(
- ParamDebugInstr->getDebugExpression(), DIExpression::EntryValue);
-
- VarLoc EntryLoc = VarLoc::CreateEntryLoc(*ParamDebugInstr, LS, NewExpr);
-
- unsigned EntryValLocID = VarLocIDs.insert(EntryLoc);
- Transfers.push_back({&MI, EntryValLocID});
- OpenRanges.insert(EntryValLocID, EntryLoc.Var);
+ const VarLoc &EntryVL = VarLocIDs[*EntryValBackupID];
+ VarLoc EntryLoc =
+ VarLoc::CreateEntryLoc(EntryVL.MI, LS, EntryVL.Expr, EntryVL.Loc.RegNo);
+ unsigned EntryValueID = VarLocIDs.insert(EntryLoc);
+ Transfers.push_back({&MI, EntryValueID});
+ OpenRanges.insert(EntryValueID, EntryLoc);
}
}
@@ -741,23 +859,21 @@ void LiveDebugValues::insertTransferDebugPair(
unsigned NewReg) {
const MachineInstr *DebugInstr = &VarLocIDs[OldVarID].MI;
- auto ProcessVarLoc = [&MI, &OpenRanges, &Transfers, &DebugInstr,
- &VarLocIDs](VarLoc &VL) {
+ auto ProcessVarLoc = [&MI, &OpenRanges, &Transfers, &VarLocIDs](VarLoc &VL) {
unsigned LocId = VarLocIDs.insert(VL);
// Close this variable's previous location range.
- DebugVariable V(*DebugInstr);
- OpenRanges.erase(V);
+ OpenRanges.erase(VL);
// Record the new location as an open range, and a postponed transfer
// inserting a DBG_VALUE for this location.
- OpenRanges.insert(LocId, VL.Var);
+ OpenRanges.insert(LocId, VL);
TransferDebugPair MIP = {&MI, LocId};
Transfers.push_back(MIP);
};
- // End all previous ranges of Var.
- OpenRanges.erase(VarLocIDs[OldVarID].Var);
+ // End all previous ranges of VL.Var.
+ OpenRanges.erase(VarLocIDs[OldVarID]);
switch (Kind) {
case TransferKind::TransferCopy: {
assert(NewReg &&
@@ -788,8 +904,6 @@ void LiveDebugValues::insertTransferDebugPair(
case TransferKind::TransferRestore: {
assert(NewReg &&
"No register supplied when handling a restore of a debug value");
- MachineFunction *MF = MI.getMF();
- DIBuilder DIB(*const_cast<Function &>(MF->getFunction()).getParent());
// DebugInstr refers to the pre-spill location, therefore we can reuse
// its expression.
VarLoc VL = VarLoc::CreateCopyLoc(*DebugInstr, LS, NewReg);
@@ -807,7 +921,7 @@ void LiveDebugValues::insertTransferDebugPair(
/// A definition of a register may mark the end of a range.
void LiveDebugValues::transferRegisterDef(
MachineInstr &MI, OpenRangesSet &OpenRanges, VarLocMap &VarLocIDs,
- TransferMap &Transfers, DebugParamMap &DebugEntryVals) {
+ TransferMap &Transfers) {
MachineFunction *MF = MI.getMF();
const TargetLowering *TLI = MF->getSubtarget().getTargetLowering();
unsigned SP = TLI->getStackPointerRegisterToSaveRestore();
@@ -841,8 +955,7 @@ void LiveDebugValues::transferRegisterDef(
if (auto *TPC = getAnalysisIfAvailable<TargetPassConfig>()) {
auto &TM = TPC->getTM<TargetMachine>();
if (TM.Options.EnableDebugEntryValues)
- emitEntryValues(MI, OpenRanges, VarLocIDs, Transfers, DebugEntryVals,
- KillSet);
+ emitEntryValues(MI, OpenRanges, VarLocIDs, Transfers, KillSet);
}
}
@@ -980,12 +1093,12 @@ void LiveDebugValues::transferSpillOrRestoreInst(MachineInstr &MI,
if (TKind == TransferKind::TransferSpill &&
VarLocIDs[ID].isDescribedByReg() == Reg) {
LLVM_DEBUG(dbgs() << "Spilling Register " << printReg(Reg, TRI) << '('
- << VarLocIDs[ID].Var.getVar()->getName() << ")\n");
+ << VarLocIDs[ID].Var.getVariable()->getName() << ")\n");
} else if (TKind == TransferKind::TransferRestore &&
VarLocIDs[ID].Kind == VarLoc::SpillLocKind &&
VarLocIDs[ID].Loc.SpillLocation == *Loc) {
LLVM_DEBUG(dbgs() << "Restoring Register " << printReg(Reg, TRI) << '('
- << VarLocIDs[ID].Var.getVar()->getName() << ")\n");
+ << VarLocIDs[ID].Var.getVariable()->getName() << ")\n");
} else
continue;
insertTransferDebugPair(MI, OpenRanges, Transfers, VarLocIDs, ID, TKind,
@@ -1001,13 +1114,17 @@ void LiveDebugValues::transferRegisterCopy(MachineInstr &MI,
OpenRangesSet &OpenRanges,
VarLocMap &VarLocIDs,
TransferMap &Transfers) {
- const MachineOperand *SrcRegOp, *DestRegOp;
+ auto DestSrc = TII->isCopyInstr(MI);
+ if (!DestSrc)
+ return;
+
+ const MachineOperand *DestRegOp = DestSrc->Destination;
+ const MachineOperand *SrcRegOp = DestSrc->Source;
- if (!TII->isCopyInstr(MI, SrcRegOp, DestRegOp) || !SrcRegOp->isKill() ||
- !DestRegOp->isDef())
+ if (!DestRegOp->isDef())
return;
- auto isCalleSavedReg = [&](unsigned Reg) {
+ auto isCalleeSavedReg = [&](unsigned Reg) {
for (MCRegAliasIterator RAI(Reg, TRI, true); RAI.isValid(); ++RAI)
if (CalleeSavedRegs.test(*RAI))
return true;
@@ -1022,7 +1139,31 @@ void LiveDebugValues::transferRegisterCopy(MachineInstr &MI,
// included, there would be a great chance that it is going to be clobbered
// soon. It is more likely that previous register location, which is callee
// saved, is going to stay unclobbered longer, even if it is killed.
- if (!isCalleSavedReg(DestReg))
+ if (!isCalleeSavedReg(DestReg))
+ return;
+
+ // Remember an entry value movement. If we encounter a new debug value of
+ // a parameter describing only a moving of the value around, rather then
+ // modifying it, we are still able to use the entry value if needed.
+ if (isRegOtherThanSPAndFP(*DestRegOp, MI, TRI)) {
+ for (unsigned ID : OpenRanges.getVarLocs()) {
+ if (VarLocIDs[ID].getEntryValueBackupReg() == SrcReg) {
+ LLVM_DEBUG(dbgs() << "Copy of the entry value: "; MI.dump(););
+ VarLoc EntryValLocCopyBackup = VarLoc::CreateEntryCopyBackupLoc(
+ VarLocIDs[ID].MI, LS, VarLocIDs[ID].Expr, DestReg);
+
+ // Stop tracking the original entry value.
+ OpenRanges.erase(VarLocIDs[ID]);
+
+ // Start tracking the entry value copy.
+ unsigned EntryValCopyLocID = VarLocIDs.insert(EntryValLocCopyBackup);
+ OpenRanges.insert(EntryValCopyLocID, EntryValLocCopyBackup);
+ break;
+ }
+ }
+ }
+
+ if (!SrcRegOp->isKill())
return;
for (unsigned ID : OpenRanges.getVarLocs()) {
@@ -1070,26 +1211,27 @@ bool LiveDebugValues::transferTerminator(MachineBasicBlock *CurMBB,
void LiveDebugValues::accumulateFragmentMap(MachineInstr &MI,
VarToFragments &SeenFragments,
OverlapMap &OverlappingFragments) {
- DebugVariable MIVar(MI);
- FragmentInfo ThisFragment = MIVar.getFragmentDefault();
+ DebugVariable MIVar(MI.getDebugVariable(), MI.getDebugExpression(),
+ MI.getDebugLoc()->getInlinedAt());
+ FragmentInfo ThisFragment = MIVar.getFragmentOrDefault();
// If this is the first sighting of this variable, then we are guaranteed
// there are currently no overlapping fragments either. Initialize the set
// of seen fragments, record no overlaps for the current one, and return.
- auto SeenIt = SeenFragments.find(MIVar.getVar());
+ auto SeenIt = SeenFragments.find(MIVar.getVariable());
if (SeenIt == SeenFragments.end()) {
SmallSet<FragmentInfo, 4> OneFragment;
OneFragment.insert(ThisFragment);
- SeenFragments.insert({MIVar.getVar(), OneFragment});
+ SeenFragments.insert({MIVar.getVariable(), OneFragment});
- OverlappingFragments.insert({{MIVar.getVar(), ThisFragment}, {}});
+ OverlappingFragments.insert({{MIVar.getVariable(), ThisFragment}, {}});
return;
}
// If this particular Variable/Fragment pair already exists in the overlap
// map, it has already been accounted for.
auto IsInOLapMap =
- OverlappingFragments.insert({{MIVar.getVar(), ThisFragment}, {}});
+ OverlappingFragments.insert({{MIVar.getVariable(), ThisFragment}, {}});
if (!IsInOLapMap.second)
return;
@@ -1107,7 +1249,7 @@ void LiveDebugValues::accumulateFragmentMap(MachineInstr &MI,
// Mark the previously seen fragment as being overlapped by the current
// one.
auto ASeenFragmentsOverlaps =
- OverlappingFragments.find({MIVar.getVar(), ASeenFragment});
+ OverlappingFragments.find({MIVar.getVariable(), ASeenFragment});
assert(ASeenFragmentsOverlaps != OverlappingFragments.end() &&
"Previously seen var fragment has no vector of overlaps");
ASeenFragmentsOverlaps->second.push_back(ThisFragment);
@@ -1117,16 +1259,11 @@ void LiveDebugValues::accumulateFragmentMap(MachineInstr &MI,
AllSeenFragments.insert(ThisFragment);
}
-/// This routine creates OpenRanges and OutLocs.
+/// This routine creates OpenRanges.
void LiveDebugValues::process(MachineInstr &MI, OpenRangesSet &OpenRanges,
- VarLocInMBB &OutLocs, VarLocMap &VarLocIDs,
- TransferMap &Transfers,
- DebugParamMap &DebugEntryVals,
- OverlapMap &OverlapFragments,
- VarToFragments &SeenFragments) {
+ VarLocMap &VarLocIDs, TransferMap &Transfers) {
transferDebugValue(MI, OpenRanges, VarLocIDs);
- transferRegisterDef(MI, OpenRanges, VarLocIDs, Transfers,
- DebugEntryVals);
+ transferRegisterDef(MI, OpenRanges, VarLocIDs, Transfers);
transferRegisterCopy(MI, OpenRanges, VarLocIDs, Transfers);
transferSpillOrRestoreInst(MI, OpenRanges, VarLocIDs, Transfers);
}
@@ -1175,7 +1312,7 @@ bool LiveDebugValues::join(
if (!InLocsT.empty()) {
for (auto ID : InLocsT)
dbgs() << " gathered candidate incoming var: "
- << VarLocIDs[ID].Var.getVar()->getName() << "\n";
+ << VarLocIDs[ID].Var.getVariable()->getName() << "\n";
}
});
@@ -1190,7 +1327,7 @@ bool LiveDebugValues::join(
if (!VarLocIDs[ID].dominates(MBB)) {
KillSet.set(ID);
LLVM_DEBUG({
- auto Name = VarLocIDs[ID].Var.getVar()->getName();
+ auto Name = VarLocIDs[ID].Var.getVariable()->getName();
dbgs() << " killing " << Name << ", it doesn't dominate MBB\n";
});
}
@@ -1247,6 +1384,8 @@ void LiveDebugValues::flushPendingLocs(VarLocInMBB &PendingInLocs,
// The ID location is live-in to MBB -- work out what kind of machine
// location it is and create a DBG_VALUE.
const VarLoc &DiffIt = VarLocIDs[ID];
+ if (DiffIt.isEntryBackupLoc())
+ continue;
MachineInstr *MI = DiffIt.BuildDbgValue(*MBB.getParent());
MBB.insert(MBB.instr_begin(), MI);
@@ -1256,6 +1395,87 @@ void LiveDebugValues::flushPendingLocs(VarLocInMBB &PendingInLocs,
}
}
+bool LiveDebugValues::isEntryValueCandidate(
+ const MachineInstr &MI, const DefinedRegsSet &DefinedRegs) const {
+ assert(MI.isDebugValue() && "This must be DBG_VALUE.");
+
+ // TODO: Add support for local variables that are expressed in terms of
+ // parameters entry values.
+ // TODO: Add support for modified arguments that can be expressed
+ // by using its entry value.
+ auto *DIVar = MI.getDebugVariable();
+ if (!DIVar->isParameter())
+ return false;
+
+ // Do not consider parameters that belong to an inlined function.
+ if (MI.getDebugLoc()->getInlinedAt())
+ return false;
+
+ // Do not consider indirect debug values (TODO: explain why).
+ if (MI.isIndirectDebugValue())
+ return false;
+
+ // Only consider parameters that are described using registers. Parameters
+ // that are passed on the stack are not yet supported, so ignore debug
+ // values that are described by the frame or stack pointer.
+ if (!isRegOtherThanSPAndFP(MI.getOperand(0), MI, TRI))
+ return false;
+
+ // If a parameter's value has been propagated from the caller, then the
+ // parameter's DBG_VALUE may be described using a register defined by some
+ // instruction in the entry block, in which case we shouldn't create an
+ // entry value.
+ if (DefinedRegs.count(MI.getOperand(0).getReg()))
+ return false;
+
+ // TODO: Add support for parameters that have a pre-existing debug expressions
+ // (e.g. fragments, or indirect parameters using DW_OP_deref).
+ if (MI.getDebugExpression()->getNumElements() > 0)
+ return false;
+
+ return true;
+}
+
+/// Collect all register defines (including aliases) for the given instruction.
+static void collectRegDefs(const MachineInstr &MI, DefinedRegsSet &Regs,
+ const TargetRegisterInfo *TRI) {
+ for (const MachineOperand &MO : MI.operands())
+ if (MO.isReg() && MO.isDef() && MO.getReg())
+ for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid(); ++AI)
+ Regs.insert(*AI);
+}
+
+/// This routine records the entry values of function parameters. The values
+/// could be used as backup values. If we loose the track of some unmodified
+/// parameters, the backup values will be used as a primary locations.
+void LiveDebugValues::recordEntryValue(const MachineInstr &MI,
+ const DefinedRegsSet &DefinedRegs,
+ OpenRangesSet &OpenRanges,
+ VarLocMap &VarLocIDs) {
+ if (auto *TPC = getAnalysisIfAvailable<TargetPassConfig>()) {
+ auto &TM = TPC->getTM<TargetMachine>();
+ if (!TM.Options.EnableDebugEntryValues)
+ return;
+ }
+
+ DebugVariable V(MI.getDebugVariable(), MI.getDebugExpression(),
+ MI.getDebugLoc()->getInlinedAt());
+
+ if (!isEntryValueCandidate(MI, DefinedRegs) ||
+ OpenRanges.getEntryValueBackup(V))
+ return;
+
+ LLVM_DEBUG(dbgs() << "Creating the backup entry location: "; MI.dump(););
+
+ // Create the entry value and use it as a backup location until it is
+ // valid. It is valid until a parameter is not changed.
+ DIExpression *NewExpr =
+ DIExpression::prepend(MI.getDebugExpression(), DIExpression::EntryValue);
+ VarLoc EntryValLocAsBackup = VarLoc::CreateEntryBackupLoc(MI, LS, NewExpr);
+ unsigned EntryValLocID = VarLocIDs.insert(EntryValLocAsBackup);
+ OpenRanges.insert(EntryValLocID, EntryValLocAsBackup);
+}
+
/// Calculate the liveness information for the given machine function and
/// extend ranges across basic blocks.
bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
@@ -1266,12 +1486,13 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
bool MBBJoined = false;
VarLocMap VarLocIDs; // Map VarLoc<>unique ID for use in bitvectors.
- OverlapMap OverlapFragments; // Map of overlapping variable fragments
+ OverlapMap OverlapFragments; // Map of overlapping variable fragments.
OpenRangesSet OpenRanges(OverlapFragments);
// Ranges that are open until end of bb.
VarLocInMBB OutLocs; // Ranges that exist beyond bb.
VarLocInMBB InLocs; // Ranges that are incoming after joining.
- TransferMap Transfers; // DBG_VALUEs associated with spills.
+ TransferMap Transfers; // DBG_VALUEs associated with transfers (such as
+ // spills, copies and restores).
VarLocInMBB PendingInLocs; // Ranges that are incoming after joining, but
// that we have deferred creating DBG_VALUE insts
// for immediately.
@@ -1291,42 +1512,18 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
std::greater<unsigned int>>
Pending;
- // Besides parameter's modification, check whether a DBG_VALUE is inlined
- // in order to deduce whether the variable that it tracks comes from
- // a different function. If that is the case we can't track its entry value.
- auto IsUnmodifiedFuncParam = [&](const MachineInstr &MI) {
- auto *DIVar = MI.getDebugVariable();
- return DIVar->isParameter() && DIVar->isNotModified() &&
- !MI.getDebugLoc()->getInlinedAt();
- };
-
- const TargetLowering *TLI = MF.getSubtarget().getTargetLowering();
- unsigned SP = TLI->getStackPointerRegisterToSaveRestore();
- Register FP = TRI->getFrameRegister(MF);
- auto IsRegOtherThanSPAndFP = [&](const MachineOperand &Op) -> bool {
- return Op.isReg() && Op.getReg() != SP && Op.getReg() != FP;
- };
-
- // Working set of currently collected debug variables mapped to DBG_VALUEs
- // representing candidates for production of debug entry values.
- DebugParamMap DebugEntryVals;
+ // Set of register defines that are seen when traversing the entry block
+ // looking for debug entry value candidates.
+ DefinedRegsSet DefinedRegs;
- MachineBasicBlock &First_MBB = *(MF.begin());
// Only in the case of entry MBB collect DBG_VALUEs representing
// function parameters in order to generate debug entry values for them.
- // Currently, we generate debug entry values only for parameters that are
- // unmodified throughout the function and located in a register.
- // TODO: Add support for parameters that are described as fragments.
- // TODO: Add support for modified arguments that can be expressed
- // by using its entry value.
- // TODO: Add support for local variables that are expressed in terms of
- // parameters entry values.
- for (auto &MI : First_MBB)
- if (MI.isDebugValue() && IsUnmodifiedFuncParam(MI) &&
- !MI.isIndirectDebugValue() && IsRegOtherThanSPAndFP(MI.getOperand(0)) &&
- !DebugEntryVals.count(MI.getDebugVariable()) &&
- !MI.getDebugExpression()->isFragment())
- DebugEntryVals[MI.getDebugVariable()] = &MI;
+ MachineBasicBlock &First_MBB = *(MF.begin());
+ for (auto &MI : First_MBB) {
+ collectRegDefs(MI, DefinedRegs, TRI);
+ if (MI.isDebugValue())
+ recordEntryValue(MI, DefinedRegs, OpenRanges, VarLocIDs);
+ }
// Initialize per-block structures and scan for fragment overlaps.
for (auto &MBB : MF) {
@@ -1379,13 +1576,12 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
MBBJoined = false;
Changed = true;
// Now that we have started to extend ranges across BBs we need to
- // examine spill instructions to see whether they spill registers that
- // correspond to user variables.
+ // examine spill, copy and restore instructions to see whether they
+ // operate with registers that correspond to user variables.
// First load any pending inlocs.
OpenRanges.insertFromLocSet(PendingInLocs[MBB], VarLocIDs);
for (auto &MI : *MBB)
- process(MI, OpenRanges, OutLocs, VarLocIDs, Transfers,
- DebugEntryVals, OverlapFragments, SeenFragments);
+ process(MI, OpenRanges, VarLocIDs, Transfers);
OLChanged |= transferTerminator(MBB, OpenRanges, OutLocs, VarLocIDs);
LLVM_DEBUG(printVarLocInMBB(MF, OutLocs, VarLocIDs,
@@ -1439,8 +1635,7 @@ bool LiveDebugValues::runOnMachineFunction(MachineFunction &MF) {
TRI = MF.getSubtarget().getRegisterInfo();
TII = MF.getSubtarget().getInstrInfo();
TFI = MF.getSubtarget().getFrameLowering();
- TFI->determineCalleeSaves(MF, CalleeSavedRegs,
- std::make_unique<RegScavenger>().get());
+ TFI->getCalleeSaves(MF, CalleeSavedRegs);
LS.initialize(MF);
bool Changed = ExtendRanges(MF);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp
index 2dd462fc72b3..2cc547a6b741 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp
@@ -49,6 +49,7 @@
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Metadata.h"
+#include "llvm/InitializePasses.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
@@ -142,51 +143,22 @@ namespace {
class LDVImpl;
-/// A UserValue is uniquely identified by the source variable it refers to
-/// (Variable), the expression describing how to get the value (Expression) and
-/// the specific usage (InlinedAt). InlinedAt differentiates both between
-/// inline and non-inline functions, and multiple inlined instances in the same
-/// scope. FIXME: The only part of the Expression which matters for UserValue
-/// identification is the fragment part.
-class UserValueIdentity {
-private:
- /// The debug info variable we are part of.
- const DILocalVariable *Variable;
- /// Any complex address expression.
- const DIExpression *Expression;
- /// Function usage identification.
- const DILocation *InlinedAt;
-
-public:
- UserValueIdentity(const DILocalVariable *Var, const DIExpression *Expr,
- const DILocation *IA)
- : Variable(Var), Expression(Expr), InlinedAt(IA) {}
-
- bool match(const DILocalVariable *Var, const DIExpression *Expr,
- const DILocation *IA) const {
- // FIXME: The fragment should be part of the identity, but not
- // other things in the expression like stack values.
- return Var == Variable && Expr == Expression && IA == InlinedAt;
- }
-
- bool match(const UserValueIdentity &Other) const {
- return match(Other.Variable, Other.Expression, Other.InlinedAt);
- }
-
- unsigned hash_value() const {
- return hash_combine(Variable, Expression, InlinedAt);
- }
-};
-
/// A user value is a part of a debug info user variable.
///
/// A DBG_VALUE instruction notes that (a sub-register of) a virtual register
/// holds part of a user variable. The part is identified by a byte offset.
+///
+/// UserValues are grouped into equivalence classes for easier searching. Two
+/// user values are related if they refer to the same variable, or if they are
+/// held by the same virtual register. The equivalence class is the transitive
+/// closure of that relation.
class UserValue {
const DILocalVariable *Variable; ///< The debug info variable we are part of.
const DIExpression *Expression; ///< Any complex address expression.
DebugLoc dl; ///< The debug location for the variable. This is
///< used by dwarf writer to find lexical scope.
+ UserValue *leader; ///< Equivalence class leader.
+ UserValue *next = nullptr; ///< Next value in equivalence class, or null.
/// Numbered locations referenced by locmap.
SmallVector<MachineOperand, 4> locations;
@@ -207,15 +179,49 @@ class UserValue {
LiveIntervals &LIS);
public:
- UserValue(const UserValue &) = delete;
-
/// Create a new UserValue.
UserValue(const DILocalVariable *var, const DIExpression *expr, DebugLoc L,
LocMap::Allocator &alloc)
- : Variable(var), Expression(expr), dl(std::move(L)), locInts(alloc) {}
+ : Variable(var), Expression(expr), dl(std::move(L)), leader(this),
+ locInts(alloc) {}
+
+ /// Get the leader of this value's equivalence class.
+ UserValue *getLeader() {
+ UserValue *l = leader;
+ while (l != l->leader)
+ l = l->leader;
+ return leader = l;
+ }
+
+ /// Return the next UserValue in the equivalence class.
+ UserValue *getNext() const { return next; }
+
+ /// Does this UserValue match the parameters?
+ bool match(const DILocalVariable *Var, const DIExpression *Expr,
+ const DILocation *IA) const {
+ // FIXME: The fragment should be part of the equivalence class, but not
+ // other things in the expression like stack values.
+ return Var == Variable && Expr == Expression && dl->getInlinedAt() == IA;
+ }
- UserValueIdentity getId() {
- return UserValueIdentity(Variable, Expression, dl->getInlinedAt());
+ /// Merge equivalence classes.
+ static UserValue *merge(UserValue *L1, UserValue *L2) {
+ L2 = L2->getLeader();
+ if (!L1)
+ return L2;
+ L1 = L1->getLeader();
+ if (L1 == L2)
+ return L1;
+ // Splice L2 before L1's members.
+ UserValue *End = L2;
+ while (End->next) {
+ End->leader = L1;
+ End = End->next;
+ }
+ End->leader = L1;
+ End->next = L1->next;
+ L1->next = L2;
+ return L1;
}
/// Return the location number that matches Loc.
@@ -250,6 +256,25 @@ public:
return locations.size() - 1;
}
+ /// Remove (recycle) a location number. If \p LocNo still is used by the
+ /// locInts nothing is done.
+ void removeLocationIfUnused(unsigned LocNo) {
+ // Bail out if LocNo still is used.
+ for (LocMap::const_iterator I = locInts.begin(); I.valid(); ++I) {
+ DbgValueLocation Loc = I.value();
+ if (Loc.locNo() == LocNo)
+ return;
+ }
+ // Remove the entry in the locations vector, and adjust all references to
+ // location numbers above the removed entry.
+ locations.erase(locations.begin() + LocNo);
+ for (LocMap::iterator I = locInts.begin(); I.valid(); ++I) {
+ DbgValueLocation Loc = I.value();
+ if (!Loc.isUndef() && Loc.locNo() > LocNo)
+ I.setValueUnchecked(Loc.changeLocNo(Loc.locNo() - 1));
+ }
+ }
+
/// Ensure that all virtual register locations are mapped.
void mapVirtRegs(LDVImpl *LDV);
@@ -327,29 +352,7 @@ public:
void print(raw_ostream &, const TargetRegisterInfo *);
};
-} // namespace
-namespace llvm {
-template <> struct DenseMapInfo<UserValueIdentity> {
- static UserValueIdentity getEmptyKey() {
- auto Key = DenseMapInfo<DILocalVariable *>::getEmptyKey();
- return UserValueIdentity(Key, nullptr, nullptr);
- }
- static UserValueIdentity getTombstoneKey() {
- auto Key = DenseMapInfo<DILocalVariable *>::getTombstoneKey();
- return UserValueIdentity(Key, nullptr, nullptr);
- }
- static unsigned getHashValue(const UserValueIdentity &Val) {
- return Val.hash_value();
- }
- static bool isEqual(const UserValueIdentity &LHS,
- const UserValueIdentity &RHS) {
- return LHS.match(RHS);
- }
-};
-} // namespace llvm
-
-namespace {
/// A user label is a part of a debug info user label.
class UserLabel {
const DILabel *Label; ///< The debug info label we are part of.
@@ -401,20 +404,20 @@ class LDVImpl {
/// All allocated UserLabel instances.
SmallVector<std::unique_ptr<UserLabel>, 2> userLabels;
- /// Map virtual register to UserValues which use it.
- using VRMap = DenseMap<unsigned, SmallVector<UserValue *, 4>>;
- VRMap VirtRegToUserVals;
+ /// Map virtual register to eq class leader.
+ using VRMap = DenseMap<unsigned, UserValue *>;
+ VRMap virtRegToEqClass;
- /// Map unique UserValue identity to UserValue.
- using UVMap = DenseMap<UserValueIdentity, UserValue *>;
- UVMap UserVarMap;
+ /// Map user variable to eq class leader.
+ using UVMap = DenseMap<const DILocalVariable *, UserValue *>;
+ UVMap userVarMap;
/// Find or create a UserValue.
UserValue *getUserValue(const DILocalVariable *Var, const DIExpression *Expr,
const DebugLoc &DL);
- /// Find the UserValues for VirtReg or null.
- SmallVectorImpl<UserValue *> *lookupVirtReg(unsigned VirtReg);
+ /// Find the EC leader for VirtReg or null.
+ UserValue *lookupVirtReg(unsigned VirtReg);
/// Add DBG_VALUE instruction to our maps.
///
@@ -454,8 +457,8 @@ public:
MF = nullptr;
userValues.clear();
userLabels.clear();
- VirtRegToUserVals.clear();
- UserVarMap.clear();
+ virtRegToEqClass.clear();
+ userVarMap.clear();
// Make sure we call emitDebugValues if the machine function was modified.
assert((!ModifiedMF || EmitDone) &&
"Dbg values are not emitted in LDV");
@@ -463,8 +466,8 @@ public:
ModifiedMF = false;
}
- /// Map virtual register to a UserValue.
- void mapVirtReg(unsigned VirtReg, UserValue *UV);
+ /// Map virtual register to an equivalence class.
+ void mapVirtReg(unsigned VirtReg, UserValue *EC);
/// Replace all references to OldReg with NewRegs.
void splitRegister(unsigned OldReg, ArrayRef<unsigned> NewRegs);
@@ -503,7 +506,7 @@ static void printExtendedName(raw_ostream &OS, const DINode *Node,
const DILocation *DL) {
const LLVMContext &Ctx = Node->getContext();
StringRef Res;
- unsigned Line;
+ unsigned Line = 0;
if (const auto *V = dyn_cast<const DILocalVariable>(Node)) {
Res = V->getName();
Line = V->getLine();
@@ -572,27 +575,31 @@ void UserValue::mapVirtRegs(LDVImpl *LDV) {
UserValue *LDVImpl::getUserValue(const DILocalVariable *Var,
const DIExpression *Expr, const DebugLoc &DL) {
- auto Ident = UserValueIdentity(Var, Expr, DL->getInlinedAt());
- UserValue *&UVEntry = UserVarMap[Ident];
-
- if (UVEntry)
- return UVEntry;
+ UserValue *&Leader = userVarMap[Var];
+ if (Leader) {
+ UserValue *UV = Leader->getLeader();
+ Leader = UV;
+ for (; UV; UV = UV->getNext())
+ if (UV->match(Var, Expr, DL->getInlinedAt()))
+ return UV;
+ }
- userValues.push_back(std::make_unique<UserValue>(Var, Expr, DL, allocator));
- return UVEntry = userValues.back().get();
+ userValues.push_back(
+ std::make_unique<UserValue>(Var, Expr, DL, allocator));
+ UserValue *UV = userValues.back().get();
+ Leader = UserValue::merge(Leader, UV);
+ return UV;
}
-void LDVImpl::mapVirtReg(unsigned VirtReg, UserValue *UV) {
+void LDVImpl::mapVirtReg(unsigned VirtReg, UserValue *EC) {
assert(Register::isVirtualRegister(VirtReg) && "Only map VirtRegs");
- assert(UserVarMap.find(UV->getId()) != UserVarMap.end() &&
- "UserValue should exist in UserVarMap");
- VirtRegToUserVals[VirtReg].push_back(UV);
+ UserValue *&Leader = virtRegToEqClass[VirtReg];
+ Leader = UserValue::merge(Leader, EC);
}
-SmallVectorImpl<UserValue *> *LDVImpl::lookupVirtReg(unsigned VirtReg) {
- VRMap::iterator Itr = VirtRegToUserVals.find(VirtReg);
- if (Itr != VirtRegToUserVals.end())
- return &Itr->getSecond();
+UserValue *LDVImpl::lookupVirtReg(unsigned VirtReg) {
+ if (UserValue *UV = virtRegToEqClass.lookup(VirtReg))
+ return UV->getLeader();
return nullptr;
}
@@ -1086,23 +1093,14 @@ UserValue::splitLocation(unsigned OldLocNo, ArrayRef<unsigned> NewRegs,
}
}
- // Finally, remove any remaining OldLocNo intervals and OldLocNo itself.
- locations.erase(locations.begin() + OldLocNo);
- LocMapI.goToBegin();
- while (LocMapI.valid()) {
- DbgValueLocation v = LocMapI.value();
- if (v.locNo() == OldLocNo) {
- LLVM_DEBUG(dbgs() << "Erasing [" << LocMapI.start() << ';'
- << LocMapI.stop() << ")\n");
- LocMapI.erase();
- } else {
- // Undef values always have location number UndefLocNo, so don't change
- // locNo in that case. See getLocationNo().
- if (!v.isUndef() && v.locNo() > OldLocNo)
- LocMapI.setValueUnchecked(v.changeLocNo(v.locNo() - 1));
- ++LocMapI;
- }
- }
+ // Finally, remove OldLocNo unless it is still used by some interval in the
+ // locInts map. One case when OldLocNo still is in use is when the register
+ // has been spilled. In such situations the spilled register is kept as a
+ // location until rewriteLocations is called (VirtRegMap is mapping the old
+ // register to the spill slot). So for a while we can have locations that map
+ // to virtual registers that have been removed from both the MachineFunction
+ // and from LiveIntervals.
+ removeLocationIfUnused(OldLocNo);
LLVM_DEBUG({
dbgs() << "Split result: \t";
@@ -1129,18 +1127,16 @@ UserValue::splitRegister(unsigned OldReg, ArrayRef<unsigned> NewRegs,
void LDVImpl::splitRegister(unsigned OldReg, ArrayRef<unsigned> NewRegs) {
bool DidChange = false;
- if (auto *UserVals = lookupVirtReg(OldReg))
- for (auto *UV : *UserVals)
- DidChange |= UV->splitRegister(OldReg, NewRegs, *LIS);
+ for (UserValue *UV = lookupVirtReg(OldReg); UV; UV = UV->getNext())
+ DidChange |= UV->splitRegister(OldReg, NewRegs, *LIS);
if (!DidChange)
return;
// Map all of the new virtual registers.
- if (auto *UserVals = lookupVirtReg(OldReg))
- for (auto *UV : *UserVals)
- for (unsigned i = 0; i != NewRegs.size(); ++i)
- mapVirtReg(NewRegs[i], UV);
+ UserValue *UV = lookupVirtReg(OldReg);
+ for (unsigned i = 0; i != NewRegs.size(); ++i)
+ mapVirtReg(NewRegs[i], UV);
}
void LiveDebugVariables::
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveInterval.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveInterval.cpp
index 54ac46f2e7ce..930dc116205a 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveInterval.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveInterval.cpp
@@ -883,7 +883,8 @@ void LiveInterval::clearSubRanges() {
static void stripValuesNotDefiningMask(unsigned Reg, LiveInterval::SubRange &SR,
LaneBitmask LaneMask,
const SlotIndexes &Indexes,
- const TargetRegisterInfo &TRI) {
+ const TargetRegisterInfo &TRI,
+ unsigned ComposeSubRegIdx) {
// Phys reg should not be tracked at subreg level.
// Same for noreg (Reg == 0).
if (!Register::isVirtualRegister(Reg) || !Reg)
@@ -905,7 +906,12 @@ static void stripValuesNotDefiningMask(unsigned Reg, LiveInterval::SubRange &SR,
continue;
if (MOI->getReg() != Reg)
continue;
- if ((TRI.getSubRegIndexLaneMask(MOI->getSubReg()) & LaneMask).none())
+ LaneBitmask OrigMask = TRI.getSubRegIndexLaneMask(MOI->getSubReg());
+ LaneBitmask ExpectedDefMask =
+ ComposeSubRegIdx
+ ? TRI.composeSubRegIndexLaneMask(ComposeSubRegIdx, OrigMask)
+ : OrigMask;
+ if ((ExpectedDefMask & LaneMask).none())
continue;
hasDef = true;
break;
@@ -924,7 +930,8 @@ static void stripValuesNotDefiningMask(unsigned Reg, LiveInterval::SubRange &SR,
void LiveInterval::refineSubRanges(
BumpPtrAllocator &Allocator, LaneBitmask LaneMask,
std::function<void(LiveInterval::SubRange &)> Apply,
- const SlotIndexes &Indexes, const TargetRegisterInfo &TRI) {
+ const SlotIndexes &Indexes, const TargetRegisterInfo &TRI,
+ unsigned ComposeSubRegIdx) {
LaneBitmask ToApply = LaneMask;
for (SubRange &SR : subranges()) {
LaneBitmask SRMask = SR.LaneMask;
@@ -944,8 +951,10 @@ void LiveInterval::refineSubRanges(
MatchingRange = createSubRangeFrom(Allocator, Matching, SR);
// Now that the subrange is split in half, make sure we
// only keep in the subranges the VNIs that touch the related half.
- stripValuesNotDefiningMask(reg, *MatchingRange, Matching, Indexes, TRI);
- stripValuesNotDefiningMask(reg, SR, SR.LaneMask, Indexes, TRI);
+ stripValuesNotDefiningMask(reg, *MatchingRange, Matching, Indexes, TRI,
+ ComposeSubRegIdx);
+ stripValuesNotDefiningMask(reg, SR, SR.LaneMask, Indexes, TRI,
+ ComposeSubRegIdx);
}
Apply(*MatchingRange);
ToApply &= ~Matching;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervals.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervals.cpp
index 2989930ad093..9c80282bc59e 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervals.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervals.cpp
@@ -191,12 +191,12 @@ LiveInterval* LiveIntervals::createInterval(unsigned reg) {
}
/// Compute the live interval of a virtual register, based on defs and uses.
-void LiveIntervals::computeVirtRegInterval(LiveInterval &LI) {
+bool LiveIntervals::computeVirtRegInterval(LiveInterval &LI) {
assert(LRCalc && "LRCalc not initialized.");
assert(LI.empty() && "Should only compute empty intervals.");
LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator());
LRCalc->calculate(LI, MRI->shouldTrackSubRegLiveness(LI.reg));
- computeDeadValues(LI, nullptr);
+ return computeDeadValues(LI, nullptr);
}
void LiveIntervals::computeVirtRegs() {
@@ -204,7 +204,12 @@ void LiveIntervals::computeVirtRegs() {
unsigned Reg = Register::index2VirtReg(i);
if (MRI->reg_nodbg_empty(Reg))
continue;
- createAndComputeVirtRegInterval(Reg);
+ LiveInterval &LI = createEmptyInterval(Reg);
+ bool NeedSplit = computeVirtRegInterval(LI);
+ if (NeedSplit) {
+ SmallVector<LiveInterval*, 8> SplitLIs;
+ splitSeparateComponents(LI, SplitLIs);
+ }
}
}
@@ -500,6 +505,8 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
bool LiveIntervals::computeDeadValues(LiveInterval &LI,
SmallVectorImpl<MachineInstr*> *dead) {
bool MayHaveSplitComponents = false;
+ bool HaveDeadDef = false;
+
for (VNInfo *VNI : LI.valnos) {
if (VNI->isUnused())
continue;
@@ -530,6 +537,10 @@ bool LiveIntervals::computeDeadValues(LiveInterval &LI,
MachineInstr *MI = getInstructionFromIndex(Def);
assert(MI && "No instruction defining live value");
MI->addRegisterDead(LI.reg, TRI);
+ if (HaveDeadDef)
+ MayHaveSplitComponents = true;
+ HaveDeadDef = true;
+
if (dead && MI->allDefsAreDead()) {
LLVM_DEBUG(dbgs() << "All defs dead: " << Def << '\t' << *MI);
dead->push_back(MI);
@@ -1061,9 +1072,9 @@ private:
// Kill flags shouldn't be used while live intervals exist, they will be
// reinserted by VirtRegRewriter.
if (MachineInstr *KillMI = LIS.getInstructionFromIndex(OldIdxIn->end))
- for (MIBundleOperands MO(*KillMI); MO.isValid(); ++MO)
- if (MO->isReg() && MO->isUse())
- MO->setIsKill(false);
+ for (MachineOperand &MOP : mi_bundle_ops(*KillMI))
+ if (MOP.isReg() && MOP.isUse())
+ MOP.setIsKill(false);
// Is there a def before NewIdx which is not OldIdx?
LiveRange::iterator Next = std::next(OldIdxIn);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LivePhysRegs.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LivePhysRegs.cpp
index c2a1cc7c6490..7a5cffca3470 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LivePhysRegs.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LivePhysRegs.cpp
@@ -13,6 +13,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/CodeGen/LiveRegUnits.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBundle.h"
@@ -42,28 +43,23 @@ void LivePhysRegs::removeRegsInMask(const MachineOperand &MO,
/// Remove defined registers and regmask kills from the set.
void LivePhysRegs::removeDefs(const MachineInstr &MI) {
- for (ConstMIBundleOperands O(MI); O.isValid(); ++O) {
- if (O->isReg()) {
- if (!O->isDef() || O->isDebug())
- continue;
- Register Reg = O->getReg();
- if (!Register::isPhysicalRegister(Reg))
- continue;
- removeReg(Reg);
- } else if (O->isRegMask())
- removeRegsInMask(*O);
+ for (const MachineOperand &MOP : phys_regs_and_masks(MI)) {
+ if (MOP.isRegMask()) {
+ removeRegsInMask(MOP);
+ continue;
+ }
+
+ if (MOP.isDef())
+ removeReg(MOP.getReg());
}
}
/// Add uses to the set.
void LivePhysRegs::addUses(const MachineInstr &MI) {
- for (ConstMIBundleOperands O(MI); O.isValid(); ++O) {
- if (!O->isReg() || !O->readsReg() || O->isDebug())
- continue;
- Register Reg = O->getReg();
- if (!Register::isPhysicalRegister(Reg))
+ for (const MachineOperand &MOP : phys_regs_and_masks(MI)) {
+ if (!MOP.isReg() || !MOP.readsReg())
continue;
- addReg(Reg);
+ addReg(MOP.getReg());
}
}
@@ -116,7 +112,7 @@ void LivePhysRegs::stepForward(const MachineInstr &MI,
}
}
-/// Prin the currently live registers to OS.
+/// Print the currently live registers to OS.
void LivePhysRegs::print(raw_ostream &OS) const {
OS << "Live Registers:";
if (!TRI) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeShrink.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeShrink.cpp
index cbf112ee2bd5..2ebc8d7576d1 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeShrink.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeShrink.cpp
@@ -24,6 +24,7 @@
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveRegMatrix.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveRegMatrix.cpp
index 72c79e5f8a75..08f046420fa1 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveRegMatrix.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveRegMatrix.cpp
@@ -20,6 +20,7 @@
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/InitializePasses.h"
#include "llvm/MC/LaneBitmask.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Pass.h"
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveRegUnits.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveRegUnits.cpp
index 97763def1f40..b2731aa0e7db 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveRegUnits.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveRegUnits.cpp
@@ -43,41 +43,34 @@ void LiveRegUnits::addRegsInMask(const uint32_t *RegMask) {
void LiveRegUnits::stepBackward(const MachineInstr &MI) {
// Remove defined registers and regmask kills from the set.
- for (ConstMIBundleOperands O(MI); O.isValid(); ++O) {
- if (O->isReg()) {
- if (!O->isDef() || O->isDebug())
- continue;
- Register Reg = O->getReg();
- if (!Register::isPhysicalRegister(Reg))
- continue;
- removeReg(Reg);
- } else if (O->isRegMask())
- removeRegsNotPreserved(O->getRegMask());
+ for (const MachineOperand &MOP : phys_regs_and_masks(MI)) {
+ if (MOP.isRegMask()) {
+ removeRegsNotPreserved(MOP.getRegMask());
+ continue;
+ }
+
+ if (MOP.isDef())
+ removeReg(MOP.getReg());
}
// Add uses to the set.
- for (ConstMIBundleOperands O(MI); O.isValid(); ++O) {
- if (!O->isReg() || !O->readsReg() || O->isDebug())
- continue;
- Register Reg = O->getReg();
- if (!Register::isPhysicalRegister(Reg))
+ for (const MachineOperand &MOP : phys_regs_and_masks(MI)) {
+ if (!MOP.isReg() || !MOP.readsReg())
continue;
- addReg(Reg);
+ addReg(MOP.getReg());
}
}
void LiveRegUnits::accumulate(const MachineInstr &MI) {
// Add defs, uses and regmask clobbers to the set.
- for (ConstMIBundleOperands O(MI); O.isValid(); ++O) {
- if (O->isReg()) {
- Register Reg = O->getReg();
- if (!Register::isPhysicalRegister(Reg))
- continue;
- if (!O->isDef() && !O->readsReg())
- continue;
- addReg(Reg);
- } else if (O->isRegMask())
- addRegsInMask(O->getRegMask());
+ for (const MachineOperand &MOP : phys_regs_and_masks(MI)) {
+ if (MOP.isRegMask()) {
+ addRegsInMask(MOP.getRegMask());
+ continue;
+ }
+ if (!MOP.isDef() && !MOP.readsReg())
+ continue;
+ addReg(MOP.getReg());
}
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
index 2392d4d00b56..5022726dc70a 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
@@ -28,6 +28,7 @@
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LowLevelType.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LowLevelType.cpp
index ca0daa14fedf..40dfa696a2b9 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LowLevelType.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LowLevelType.cpp
@@ -24,14 +24,37 @@ LLT llvm::getLLTForType(Type &Ty, const DataLayout &DL) {
if (NumElements == 1)
return ScalarTy;
return LLT::vector(NumElements, ScalarTy);
- } else if (auto PTy = dyn_cast<PointerType>(&Ty)) {
- return LLT::pointer(PTy->getAddressSpace(), DL.getTypeSizeInBits(&Ty));
- } else if (Ty.isSized()) {
+ }
+
+ if (auto PTy = dyn_cast<PointerType>(&Ty)) {
+ unsigned AddrSpace = PTy->getAddressSpace();
+ return LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
+ }
+
+ if (Ty.isSized()) {
// Aggregates are no different from real scalars as far as GlobalISel is
// concerned.
auto SizeInBits = DL.getTypeSizeInBits(&Ty);
assert(SizeInBits != 0 && "invalid zero-sized type");
return LLT::scalar(SizeInBits);
}
+
return LLT();
}
+
+MVT llvm::getMVTForLLT(LLT Ty) {
+ if (!Ty.isVector())
+ return MVT::getIntegerVT(Ty.getSizeInBits());
+
+ return MVT::getVectorVT(
+ MVT::getIntegerVT(Ty.getElementType().getSizeInBits()),
+ Ty.getNumElements());
+}
+
+LLT llvm::getLLTForMVT(MVT Ty) {
+ if (!Ty.isVector())
+ return LLT::scalar(Ty.getSizeInBits());
+
+ return LLT::vector(Ty.getVectorNumElements(),
+ Ty.getVectorElementType().getSizeInBits());
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LowerEmuTLS.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LowerEmuTLS.cpp
index ed48365b0102..529d478756d4 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LowerEmuTLS.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LowerEmuTLS.cpp
@@ -19,6 +19,7 @@
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp
index c9bb5461aa3c..5ef907b88315 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp
@@ -30,6 +30,7 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -48,10 +49,6 @@ static cl::opt<unsigned>
cl::value_desc("N"),
cl::desc("Function number to canonicalize."));
-static cl::opt<unsigned> CanonicalizeBasicBlockNumber(
- "canon-nth-basicblock", cl::Hidden, cl::init(~0u), cl::value_desc("N"),
- cl::desc("BasicBlock number to canonicalize."));
-
namespace {
class MIRCanonicalizer : public MachineFunctionPass {
@@ -373,34 +370,14 @@ static bool doDefKillClear(MachineBasicBlock *MBB) {
}
static bool runOnBasicBlock(MachineBasicBlock *MBB,
- std::vector<StringRef> &bbNames,
- unsigned &basicBlockNum, NamedVRegCursor &NVC) {
-
- if (CanonicalizeBasicBlockNumber != ~0U) {
- if (CanonicalizeBasicBlockNumber != basicBlockNum++)
- return false;
- LLVM_DEBUG(dbgs() << "\n Canonicalizing BasicBlock " << MBB->getName()
- << "\n";);
- }
-
- if (llvm::find(bbNames, MBB->getName()) != bbNames.end()) {
- LLVM_DEBUG({
- dbgs() << "Found potentially duplicate BasicBlocks: " << MBB->getName()
- << "\n";
- });
- return false;
- }
-
+ unsigned BasicBlockNum, VRegRenamer &Renamer) {
LLVM_DEBUG({
dbgs() << "\n\n NEW BASIC BLOCK: " << MBB->getName() << " \n\n";
dbgs() << "\n\n================================================\n\n";
});
bool Changed = false;
- MachineFunction &MF = *MBB->getParent();
- MachineRegisterInfo &MRI = MF.getRegInfo();
- bbNames.push_back(MBB->getName());
LLVM_DEBUG(dbgs() << "\n\n NEW BASIC BLOCK: " << MBB->getName() << "\n\n";);
LLVM_DEBUG(dbgs() << "MBB Before Canonical Copy Propagation:\n";
@@ -413,32 +390,10 @@ static bool runOnBasicBlock(MachineBasicBlock *MBB,
Changed |= rescheduleCanonically(IdempotentInstCount, MBB);
LLVM_DEBUG(dbgs() << "MBB After Scheduling:\n"; MBB->dump(););
- Changed |= NVC.renameVRegs(MBB);
-
- // Here we renumber the def vregs for the idempotent instructions from the top
- // of the MachineBasicBlock so that they are named in the order that we sorted
- // them alphabetically. Eventually we wont need SkipVRegs because we will use
- // named vregs instead.
- if (IdempotentInstCount)
- NVC.skipVRegs();
-
- auto MII = MBB->begin();
- for (unsigned i = 0; i < IdempotentInstCount && MII != MBB->end(); ++i) {
- MachineInstr &MI = *MII++;
- Changed = true;
- Register vRegToRename = MI.getOperand(0).getReg();
- auto Rename = NVC.createVirtualRegister(vRegToRename);
-
- std::vector<MachineOperand *> RenameMOs;
- for (auto &MO : MRI.reg_operands(vRegToRename)) {
- RenameMOs.push_back(&MO);
- }
-
- for (auto *MO : RenameMOs) {
- MO->setReg(Rename);
- }
- }
+ Changed |= Renamer.renameVRegs(MBB, BasicBlockNum);
+ // TODO: Consider dropping this. Dropping kill defs is probably not
+ // semantically sound.
Changed |= doDefKillClear(MBB);
LLVM_DEBUG(dbgs() << "Updated MachineBasicBlock:\n"; MBB->dump();
@@ -470,16 +425,12 @@ bool MIRCanonicalizer::runOnMachineFunction(MachineFunction &MF) {
: RPOList) { dbgs() << MBB->getName() << "\n"; } dbgs()
<< "\n\n================================================\n\n";);
- std::vector<StringRef> BBNames;
-
unsigned BBNum = 0;
-
bool Changed = false;
-
MachineRegisterInfo &MRI = MF.getRegInfo();
- NamedVRegCursor NVC(MRI);
+ VRegRenamer Renamer(MRI);
for (auto MBB : RPOList)
- Changed |= runOnBasicBlock(MBB, BBNames, BBNum, NVC);
+ Changed |= runOnBasicBlock(MBB, BBNum++, Renamer);
return Changed;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRNamerPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRNamerPass.cpp
index 9d719f3917ce..9f61dd9ef243 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MIRNamerPass.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRNamerPass.cpp
@@ -23,6 +23,7 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/InitializePasses.h"
using namespace llvm;
@@ -54,11 +55,12 @@ public:
if (MF.empty())
return Changed;
- NamedVRegCursor NVC(MF.getRegInfo());
+ VRegRenamer Renamer(MF.getRegInfo());
+ unsigned BBIndex = 0;
ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin());
for (auto &MBB : RPOT)
- Changed |= NVC.renameVRegs(MBB);
+ Changed |= Renamer.renameVRegs(MBB, BBIndex++);
return Changed;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.cpp
index ad5c617623f2..5976f5da1569 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.cpp
@@ -204,7 +204,7 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) {
.Case("nuw" , MIToken::kw_nuw)
.Case("nsw" , MIToken::kw_nsw)
.Case("exact" , MIToken::kw_exact)
- .Case("fpexcept", MIToken::kw_fpexcept)
+ .Case("nofpexcept", MIToken::kw_nofpexcept)
.Case("debug-location", MIToken::kw_debug_location)
.Case("same_value", MIToken::kw_cfi_same_value)
.Case("offset", MIToken::kw_cfi_offset)
@@ -242,6 +242,7 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) {
.Case("jump-table", MIToken::kw_jump_table)
.Case("constant-pool", MIToken::kw_constant_pool)
.Case("call-entry", MIToken::kw_call_entry)
+ .Case("custom", MIToken::kw_custom)
.Case("liveout", MIToken::kw_liveout)
.Case("address-taken", MIToken::kw_address_taken)
.Case("landing-pad", MIToken::kw_landing_pad)
@@ -252,6 +253,7 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) {
.Case("shufflemask", MIToken::kw_shufflemask)
.Case("pre-instr-symbol", MIToken::kw_pre_instr_symbol)
.Case("post-instr-symbol", MIToken::kw_post_instr_symbol)
+ .Case("heap-alloc-marker", MIToken::kw_heap_alloc_marker)
.Case("unknown-size", MIToken::kw_unknown_size)
.Default(MIToken::Identifier);
}
@@ -582,8 +584,8 @@ static MIToken::TokenKind getMetadataKeywordKind(StringRef Identifier) {
.Default(MIToken::Error);
}
-static Cursor maybeLexExlaim(Cursor C, MIToken &Token,
- ErrorCallbackType ErrorCallback) {
+static Cursor maybeLexExclaim(Cursor C, MIToken &Token,
+ ErrorCallbackType ErrorCallback) {
if (C.peek() != '!')
return None;
auto Range = C;
@@ -719,7 +721,7 @@ StringRef llvm::lexMIToken(StringRef Source, MIToken &Token,
return R.remaining();
if (Cursor R = maybeLexNumericalLiteral(C, Token))
return R.remaining();
- if (Cursor R = maybeLexExlaim(C, Token, ErrorCallback))
+ if (Cursor R = maybeLexExclaim(C, Token, ErrorCallback))
return R.remaining();
if (Cursor R = maybeLexSymbol(C, Token))
return R.remaining();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.h b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.h
index 200f9d026cc8..aaffe4a4c91b 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.h
@@ -73,7 +73,7 @@ struct MIToken {
kw_nuw,
kw_nsw,
kw_exact,
- kw_fpexcept,
+ kw_nofpexcept,
kw_debug_location,
kw_cfi_same_value,
kw_cfi_offset,
@@ -110,6 +110,7 @@ struct MIToken {
kw_jump_table,
kw_constant_pool,
kw_call_entry,
+ kw_custom,
kw_liveout,
kw_address_taken,
kw_landing_pad,
@@ -120,6 +121,7 @@ struct MIToken {
kw_shufflemask,
kw_pre_instr_symbol,
kw_post_instr_symbol,
+ kw_heap_alloc_marker,
kw_unknown_size,
// Named metadata keywords
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIParser.cpp
index 6498acc9fa51..076ca943788b 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIParser.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIParser.cpp
@@ -28,6 +28,7 @@
#include "llvm/AsmParser/SlotMapping.h"
#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
+#include "llvm/CodeGen/MIRFormatter.h"
#include "llvm/CodeGen/MIRPrinter.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -343,6 +344,37 @@ VRegInfo &PerFunctionMIParsingState::getVRegInfoNamed(StringRef RegName) {
return *I.first->second;
}
+static void mapValueToSlot(const Value *V, ModuleSlotTracker &MST,
+ DenseMap<unsigned, const Value *> &Slots2Values) {
+ int Slot = MST.getLocalSlot(V);
+ if (Slot == -1)
+ return;
+ Slots2Values.insert(std::make_pair(unsigned(Slot), V));
+}
+
+/// Creates the mapping from slot numbers to function's unnamed IR values.
+static void initSlots2Values(const Function &F,
+ DenseMap<unsigned, const Value *> &Slots2Values) {
+ ModuleSlotTracker MST(F.getParent(), /*ShouldInitializeAllMetadata=*/false);
+ MST.incorporateFunction(F);
+ for (const auto &Arg : F.args())
+ mapValueToSlot(&Arg, MST, Slots2Values);
+ for (const auto &BB : F) {
+ mapValueToSlot(&BB, MST, Slots2Values);
+ for (const auto &I : BB)
+ mapValueToSlot(&I, MST, Slots2Values);
+ }
+}
+
+const Value* PerFunctionMIParsingState::getIRValue(unsigned Slot) {
+ if (Slots2Values.empty())
+ initSlots2Values(MF.getFunction(), Slots2Values);
+ auto ValueInfo = Slots2Values.find(Slot);
+ if (ValueInfo == Slots2Values.end())
+ return nullptr;
+ return ValueInfo->second;
+}
+
namespace {
/// A wrapper struct around the 'MachineOperand' struct that includes a source
@@ -370,8 +402,6 @@ class MIParser {
PerFunctionMIParsingState &PFS;
/// Maps from slot numbers to function's unnamed basic blocks.
DenseMap<unsigned, const BasicBlock *> Slots2BasicBlocks;
- /// Maps from slot numbers to function's unnamed values.
- DenseMap<unsigned, const Value *> Slots2Values;
public:
MIParser(PerFunctionMIParsingState &PFS, SMDiagnostic &Error,
@@ -455,9 +485,12 @@ public:
bool parseTargetIndexOperand(MachineOperand &Dest);
bool parseCustomRegisterMaskOperand(MachineOperand &Dest);
bool parseLiveoutRegisterMaskOperand(MachineOperand &Dest);
- bool parseMachineOperand(MachineOperand &Dest,
+ bool parseMachineOperand(const unsigned OpCode, const unsigned OpIdx,
+ MachineOperand &Dest,
Optional<unsigned> &TiedDefIdx);
- bool parseMachineOperandAndTargetFlags(MachineOperand &Dest,
+ bool parseMachineOperandAndTargetFlags(const unsigned OpCode,
+ const unsigned OpIdx,
+ MachineOperand &Dest,
Optional<unsigned> &TiedDefIdx);
bool parseOffset(int64_t &Offset);
bool parseAlignment(unsigned &Alignment);
@@ -471,6 +504,10 @@ public:
bool parseOptionalAtomicOrdering(AtomicOrdering &Order);
bool parseMachineMemoryOperand(MachineMemOperand *&Dest);
bool parsePreOrPostInstrSymbol(MCSymbol *&Symbol);
+ bool parseHeapAllocMarker(MDNode *&Node);
+
+ bool parseTargetImmMnemonic(const unsigned OpCode, const unsigned OpIdx,
+ MachineOperand &Dest, const MIRFormatter &MF);
private:
/// Convert the integer literal in the current token into an unsigned integer.
@@ -508,8 +545,6 @@ private:
const BasicBlock *getIRBlock(unsigned Slot);
const BasicBlock *getIRBlock(unsigned Slot, const Function &F);
- const Value *getIRValue(unsigned Slot);
-
/// Get or create an MCSymbol for a given name.
MCSymbol *getOrCreateMCSymbol(StringRef Name);
@@ -550,6 +585,9 @@ bool MIParser::error(StringRef::iterator Loc, const Twine &Msg) {
return true;
}
+typedef function_ref<bool(StringRef::iterator Loc, const Twine &)>
+ ErrorCallbackType;
+
static const char *toString(MIToken::TokenKind TokenKind) {
switch (TokenKind) {
case MIToken::comma:
@@ -906,11 +944,12 @@ bool MIParser::parse(MachineInstr *&MI) {
// Parse the remaining machine operands.
while (!Token.isNewlineOrEOF() && Token.isNot(MIToken::kw_pre_instr_symbol) &&
Token.isNot(MIToken::kw_post_instr_symbol) &&
+ Token.isNot(MIToken::kw_heap_alloc_marker) &&
Token.isNot(MIToken::kw_debug_location) &&
Token.isNot(MIToken::coloncolon) && Token.isNot(MIToken::lbrace)) {
auto Loc = Token.location();
Optional<unsigned> TiedDefIdx;
- if (parseMachineOperandAndTargetFlags(MO, TiedDefIdx))
+ if (parseMachineOperandAndTargetFlags(OpCode, Operands.size(), MO, TiedDefIdx))
return true;
if (OpCode == TargetOpcode::DBG_VALUE && MO.isReg())
MO.setIsDebug();
@@ -932,6 +971,10 @@ bool MIParser::parse(MachineInstr *&MI) {
if (Token.is(MIToken::kw_post_instr_symbol))
if (parsePreOrPostInstrSymbol(PostInstrSymbol))
return true;
+ MDNode *HeapAllocMarker = nullptr;
+ if (Token.is(MIToken::kw_heap_alloc_marker))
+ if (parseHeapAllocMarker(HeapAllocMarker))
+ return true;
DebugLoc DebugLocation;
if (Token.is(MIToken::kw_debug_location)) {
@@ -985,6 +1028,8 @@ bool MIParser::parse(MachineInstr *&MI) {
MI->setPreInstrSymbol(MF, PreInstrSymbol);
if (PostInstrSymbol)
MI->setPostInstrSymbol(MF, PostInstrSymbol);
+ if (HeapAllocMarker)
+ MI->setHeapAllocMarker(MF, HeapAllocMarker);
if (!MemOperands.empty())
MI->setMemRefs(MF, MemOperands);
return false;
@@ -1138,7 +1183,7 @@ bool MIParser::parseInstruction(unsigned &OpCode, unsigned &Flags) {
Token.is(MIToken::kw_nuw) ||
Token.is(MIToken::kw_nsw) ||
Token.is(MIToken::kw_exact) ||
- Token.is(MIToken::kw_fpexcept)) {
+ Token.is(MIToken::kw_nofpexcept)) {
// Mine frame and fast math flags
if (Token.is(MIToken::kw_frame_setup))
Flags |= MachineInstr::FrameSetup;
@@ -1164,8 +1209,8 @@ bool MIParser::parseInstruction(unsigned &OpCode, unsigned &Flags) {
Flags |= MachineInstr::NoSWrap;
if (Token.is(MIToken::kw_exact))
Flags |= MachineInstr::IsExact;
- if (Token.is(MIToken::kw_fpexcept))
- Flags |= MachineInstr::FPExcept;
+ if (Token.is(MIToken::kw_nofpexcept))
+ Flags |= MachineInstr::NoFPExcept;
lex();
}
@@ -1485,17 +1530,61 @@ bool MIParser::parseImmediateOperand(MachineOperand &Dest) {
return false;
}
-bool MIParser::parseIRConstant(StringRef::iterator Loc, StringRef StringValue,
- const Constant *&C) {
+bool MIParser::parseTargetImmMnemonic(const unsigned OpCode,
+ const unsigned OpIdx,
+ MachineOperand &Dest,
+ const MIRFormatter &MF) {
+ assert(Token.is(MIToken::dot));
+ auto Loc = Token.location(); // record start position
+ size_t Len = 1; // for "."
+ lex();
+
+ // Handle the case that mnemonic starts with number.
+ if (Token.is(MIToken::IntegerLiteral)) {
+ Len += Token.range().size();
+ lex();
+ }
+
+ StringRef Src;
+ if (Token.is(MIToken::comma))
+ Src = StringRef(Loc, Len);
+ else {
+ assert(Token.is(MIToken::Identifier));
+ Src = StringRef(Loc, Len + Token.stringValue().size());
+ }
+ int64_t Val;
+ if (MF.parseImmMnemonic(OpCode, OpIdx, Src, Val,
+ [this](StringRef::iterator Loc, const Twine &Msg)
+ -> bool { return error(Loc, Msg); }))
+ return true;
+
+ Dest = MachineOperand::CreateImm(Val);
+ if (!Token.is(MIToken::comma))
+ lex();
+ return false;
+}
+
+static bool parseIRConstant(StringRef::iterator Loc, StringRef StringValue,
+ PerFunctionMIParsingState &PFS, const Constant *&C,
+ ErrorCallbackType ErrCB) {
auto Source = StringValue.str(); // The source has to be null terminated.
SMDiagnostic Err;
- C = parseConstantValue(Source, Err, *MF.getFunction().getParent(),
+ C = parseConstantValue(Source, Err, *PFS.MF.getFunction().getParent(),
&PFS.IRSlots);
if (!C)
- return error(Loc + Err.getColumnNo(), Err.getMessage());
+ return ErrCB(Loc + Err.getColumnNo(), Err.getMessage());
return false;
}
+bool MIParser::parseIRConstant(StringRef::iterator Loc, StringRef StringValue,
+ const Constant *&C) {
+ return ::parseIRConstant(
+ Loc, StringValue, PFS, C,
+ [this](StringRef::iterator Loc, const Twine &Msg) -> bool {
+ return error(Loc, Msg);
+ });
+}
+
bool MIParser::parseIRConstant(StringRef::iterator Loc, const Constant *&C) {
if (parseIRConstant(Loc, StringRef(Loc, Token.range().end() - Loc), C))
return true;
@@ -1628,27 +1717,52 @@ bool MIParser::parseFPImmediateOperand(MachineOperand &Dest) {
return false;
}
-bool MIParser::getUnsigned(unsigned &Result) {
+static bool getHexUint(const MIToken &Token, APInt &Result) {
+ assert(Token.is(MIToken::HexLiteral));
+ StringRef S = Token.range();
+ assert(S[0] == '0' && tolower(S[1]) == 'x');
+ // This could be a floating point literal with a special prefix.
+ if (!isxdigit(S[2]))
+ return true;
+ StringRef V = S.substr(2);
+ APInt A(V.size()*4, V, 16);
+
+ // If A is 0, then A.getActiveBits() is 0. This isn't a valid bitwidth. Make
+ // sure it isn't the case before constructing result.
+ unsigned NumBits = (A == 0) ? 32 : A.getActiveBits();
+ Result = APInt(NumBits, ArrayRef<uint64_t>(A.getRawData(), A.getNumWords()));
+ return false;
+}
+
+static bool getUnsigned(const MIToken &Token, unsigned &Result,
+ ErrorCallbackType ErrCB) {
if (Token.hasIntegerValue()) {
const uint64_t Limit = uint64_t(std::numeric_limits<unsigned>::max()) + 1;
uint64_t Val64 = Token.integerValue().getLimitedValue(Limit);
if (Val64 == Limit)
- return error("expected 32-bit integer (too large)");
+ return ErrCB(Token.location(), "expected 32-bit integer (too large)");
Result = Val64;
return false;
}
if (Token.is(MIToken::HexLiteral)) {
APInt A;
- if (getHexUint(A))
+ if (getHexUint(Token, A))
return true;
if (A.getBitWidth() > 32)
- return error("expected 32-bit integer (too large)");
+ return ErrCB(Token.location(), "expected 32-bit integer (too large)");
Result = A.getZExtValue();
return false;
}
return true;
}
+bool MIParser::getUnsigned(unsigned &Result) {
+ return ::getUnsigned(
+ Token, Result, [this](StringRef::iterator Loc, const Twine &Msg) -> bool {
+ return error(Loc, Msg);
+ });
+}
+
bool MIParser::parseMBBReference(MachineBasicBlock *&MBB) {
assert(Token.is(MIToken::MachineBasicBlock) ||
Token.is(MIToken::MachineBasicBlockLabel));
@@ -1728,23 +1842,25 @@ bool MIParser::parseFixedStackObjectOperand(MachineOperand &Dest) {
return false;
}
-bool MIParser::parseGlobalValue(GlobalValue *&GV) {
+static bool parseGlobalValue(const MIToken &Token,
+ PerFunctionMIParsingState &PFS, GlobalValue *&GV,
+ ErrorCallbackType ErrCB) {
switch (Token.kind()) {
case MIToken::NamedGlobalValue: {
- const Module *M = MF.getFunction().getParent();
+ const Module *M = PFS.MF.getFunction().getParent();
GV = M->getNamedValue(Token.stringValue());
if (!GV)
- return error(Twine("use of undefined global value '") + Token.range() +
- "'");
+ return ErrCB(Token.location(), Twine("use of undefined global value '") +
+ Token.range() + "'");
break;
}
case MIToken::GlobalValue: {
unsigned GVIdx;
- if (getUnsigned(GVIdx))
+ if (getUnsigned(Token, GVIdx, ErrCB))
return true;
if (GVIdx >= PFS.IRSlots.GlobalValues.size())
- return error(Twine("use of undefined global value '@") + Twine(GVIdx) +
- "'");
+ return ErrCB(Token.location(), Twine("use of undefined global value '@") +
+ Twine(GVIdx) + "'");
GV = PFS.IRSlots.GlobalValues[GVIdx];
break;
}
@@ -1754,6 +1870,14 @@ bool MIParser::parseGlobalValue(GlobalValue *&GV) {
return false;
}
+bool MIParser::parseGlobalValue(GlobalValue *&GV) {
+ return ::parseGlobalValue(
+ Token, PFS, GV,
+ [this](StringRef::iterator Loc, const Twine &Msg) -> bool {
+ return error(Loc, Msg);
+ });
+}
+
bool MIParser::parseGlobalAddressOperand(MachineOperand &Dest) {
GlobalValue *GV = nullptr;
if (parseGlobalValue(GV))
@@ -2295,23 +2419,13 @@ bool MIParser::parseShuffleMaskOperand(MachineOperand &Dest) {
if (expectAndConsume(MIToken::lparen))
return error("expected syntax shufflemask(<integer or undef>, ...)");
- SmallVector<Constant *, 32> ShufMask;
- LLVMContext &Ctx = MF.getFunction().getContext();
- Type *I32Ty = Type::getInt32Ty(Ctx);
-
- bool AllZero = true;
- bool AllUndef = true;
-
+ SmallVector<int, 32> ShufMask;
do {
if (Token.is(MIToken::kw_undef)) {
- ShufMask.push_back(UndefValue::get(I32Ty));
- AllZero = false;
+ ShufMask.push_back(-1);
} else if (Token.is(MIToken::IntegerLiteral)) {
- AllUndef = false;
const APSInt &Int = Token.integerValue();
- if (!Int.isNullValue())
- AllZero = false;
- ShufMask.push_back(ConstantInt::get(I32Ty, Int.getExtValue()));
+ ShufMask.push_back(Int.getExtValue());
} else
return error("expected integer constant");
@@ -2321,13 +2435,8 @@ bool MIParser::parseShuffleMaskOperand(MachineOperand &Dest) {
if (expectAndConsume(MIToken::rparen))
return error("shufflemask should be terminated by ')'.");
- if (AllZero || AllUndef) {
- VectorType *VT = VectorType::get(I32Ty, ShufMask.size());
- Constant *C = AllZero ? Constant::getNullValue(VT) : UndefValue::get(VT);
- Dest = MachineOperand::CreateShuffleMask(C);
- } else
- Dest = MachineOperand::CreateShuffleMask(ConstantVector::get(ShufMask));
-
+ ArrayRef<int> MaskAlloc = MF.allocateShuffleMask(ShufMask);
+ Dest = MachineOperand::CreateShuffleMask(MaskAlloc);
return false;
}
@@ -2402,7 +2511,8 @@ bool MIParser::parseLiveoutRegisterMaskOperand(MachineOperand &Dest) {
return false;
}
-bool MIParser::parseMachineOperand(MachineOperand &Dest,
+bool MIParser::parseMachineOperand(const unsigned OpCode, const unsigned OpIdx,
+ MachineOperand &Dest,
Optional<unsigned> &TiedDefIdx) {
switch (Token.kind()) {
case MIToken::kw_implicit:
@@ -2491,6 +2601,13 @@ bool MIParser::parseMachineOperand(MachineOperand &Dest,
return parseCustomRegisterMaskOperand(Dest);
} else
return parseTypedImmediateOperand(Dest);
+ case MIToken::dot: {
+ const auto *TII = MF.getSubtarget().getInstrInfo();
+ if (const auto *Formatter = TII->getMIRFormatter()) {
+ return parseTargetImmMnemonic(OpCode, OpIdx, Dest, *Formatter);
+ }
+ LLVM_FALLTHROUGH;
+ }
default:
// FIXME: Parse the MCSymbol machine operand.
return error("expected a machine operand");
@@ -2499,7 +2616,8 @@ bool MIParser::parseMachineOperand(MachineOperand &Dest,
}
bool MIParser::parseMachineOperandAndTargetFlags(
- MachineOperand &Dest, Optional<unsigned> &TiedDefIdx) {
+ const unsigned OpCode, const unsigned OpIdx, MachineOperand &Dest,
+ Optional<unsigned> &TiedDefIdx) {
unsigned TF = 0;
bool HasTargetFlags = false;
if (Token.is(MIToken::kw_target_flags)) {
@@ -2531,7 +2649,7 @@ bool MIParser::parseMachineOperandAndTargetFlags(
return true;
}
auto Loc = Token.location();
- if (parseMachineOperand(Dest, TiedDefIdx))
+ if (parseMachineOperand(OpCode, OpIdx, Dest, TiedDefIdx))
return true;
if (!HasTargetFlags)
return false;
@@ -2592,30 +2710,31 @@ bool MIParser::parseOperandsOffset(MachineOperand &Op) {
return false;
}
-bool MIParser::parseIRValue(const Value *&V) {
+static bool parseIRValue(const MIToken &Token, PerFunctionMIParsingState &PFS,
+ const Value *&V, ErrorCallbackType ErrCB) {
switch (Token.kind()) {
case MIToken::NamedIRValue: {
- V = MF.getFunction().getValueSymbolTable()->lookup(Token.stringValue());
+ V = PFS.MF.getFunction().getValueSymbolTable()->lookup(Token.stringValue());
break;
}
case MIToken::IRValue: {
unsigned SlotNumber = 0;
- if (getUnsigned(SlotNumber))
+ if (getUnsigned(Token, SlotNumber, ErrCB))
return true;
- V = getIRValue(SlotNumber);
+ V = PFS.getIRValue(SlotNumber);
break;
}
case MIToken::NamedGlobalValue:
case MIToken::GlobalValue: {
GlobalValue *GV = nullptr;
- if (parseGlobalValue(GV))
+ if (parseGlobalValue(Token, PFS, GV, ErrCB))
return true;
V = GV;
break;
}
case MIToken::QuotedIRValue: {
const Constant *C = nullptr;
- if (parseIRConstant(Token.location(), Token.stringValue(), C))
+ if (parseIRConstant(Token.location(), Token.stringValue(), PFS, C, ErrCB))
return true;
V = C;
break;
@@ -2624,10 +2743,17 @@ bool MIParser::parseIRValue(const Value *&V) {
llvm_unreachable("The current token should be an IR block reference");
}
if (!V)
- return error(Twine("use of undefined IR value '") + Token.range() + "'");
+ return ErrCB(Token.location(), Twine("use of undefined IR value '") + Token.range() + "'");
return false;
}
+bool MIParser::parseIRValue(const Value *&V) {
+ return ::parseIRValue(
+ Token, PFS, V, [this](StringRef::iterator Loc, const Twine &Msg) -> bool {
+ return error(Loc, Msg);
+ });
+}
+
bool MIParser::getUint64(uint64_t &Result) {
if (Token.hasIntegerValue()) {
if (Token.integerValue().getActiveBits() > 64)
@@ -2648,20 +2774,7 @@ bool MIParser::getUint64(uint64_t &Result) {
}
bool MIParser::getHexUint(APInt &Result) {
- assert(Token.is(MIToken::HexLiteral));
- StringRef S = Token.range();
- assert(S[0] == '0' && tolower(S[1]) == 'x');
- // This could be a floating point literal with a special prefix.
- if (!isxdigit(S[2]))
- return true;
- StringRef V = S.substr(2);
- APInt A(V.size()*4, V, 16);
-
- // If A is 0, then A.getActiveBits() is 0. This isn't a valid bitwidth. Make
- // sure it isn't the case before constructing result.
- unsigned NumBits = (A == 0) ? 32 : A.getActiveBits();
- Result = APInt(NumBits, ArrayRef<uint64_t>(A.getRawData(), A.getNumWords()));
- return false;
+ return ::getHexUint(Token, Result);
}
bool MIParser::parseMemoryOperandFlag(MachineMemOperand::Flags &Flags) {
@@ -2748,6 +2861,20 @@ bool MIParser::parseMemoryPseudoSourceValue(const PseudoSourceValue *&PSV) {
"expected a global value or an external symbol after 'call-entry'");
}
break;
+ case MIToken::kw_custom: {
+ lex();
+ const auto *TII = MF.getSubtarget().getInstrInfo();
+ if (const auto *Formatter = TII->getMIRFormatter()) {
+ if (Formatter->parseCustomPseudoSourceValue(
+ Token.stringValue(), MF, PFS, PSV,
+ [this](StringRef::iterator Loc, const Twine &Msg) -> bool {
+ return error(Loc, Msg);
+ }))
+ return true;
+ } else
+ return error("unable to parse target custom pseudo source value");
+ break;
+ }
default:
llvm_unreachable("The current token should be pseudo source value");
}
@@ -2759,7 +2886,7 @@ bool MIParser::parseMachinePointerInfo(MachinePointerInfo &Dest) {
if (Token.is(MIToken::kw_constant_pool) || Token.is(MIToken::kw_stack) ||
Token.is(MIToken::kw_got) || Token.is(MIToken::kw_jump_table) ||
Token.is(MIToken::FixedStackObject) || Token.is(MIToken::StackObject) ||
- Token.is(MIToken::kw_call_entry)) {
+ Token.is(MIToken::kw_call_entry) || Token.is(MIToken::kw_custom)) {
const PseudoSourceValue *PSV = nullptr;
if (parseMemoryPseudoSourceValue(PSV))
return true;
@@ -2956,6 +3083,22 @@ bool MIParser::parsePreOrPostInstrSymbol(MCSymbol *&Symbol) {
return false;
}
+bool MIParser::parseHeapAllocMarker(MDNode *&Node) {
+ assert(Token.is(MIToken::kw_heap_alloc_marker) &&
+ "Invalid token for a heap alloc marker!");
+ lex();
+ parseMDNode(Node);
+ if (!Node)
+ return error("expected a MDNode after 'heap-alloc-marker'");
+ if (Token.isNewlineOrEOF() || Token.is(MIToken::coloncolon) ||
+ Token.is(MIToken::lbrace))
+ return false;
+ if (Token.isNot(MIToken::comma))
+ return error("expected ',' before the next machine operand");
+ lex();
+ return false;
+}
+
static void initSlots2BasicBlocks(
const Function &F,
DenseMap<unsigned, const BasicBlock *> &Slots2BasicBlocks) {
@@ -2994,37 +3137,6 @@ const BasicBlock *MIParser::getIRBlock(unsigned Slot, const Function &F) {
return getIRBlockFromSlot(Slot, CustomSlots2BasicBlocks);
}
-static void mapValueToSlot(const Value *V, ModuleSlotTracker &MST,
- DenseMap<unsigned, const Value *> &Slots2Values) {
- int Slot = MST.getLocalSlot(V);
- if (Slot == -1)
- return;
- Slots2Values.insert(std::make_pair(unsigned(Slot), V));
-}
-
-/// Creates the mapping from slot numbers to function's unnamed IR values.
-static void initSlots2Values(const Function &F,
- DenseMap<unsigned, const Value *> &Slots2Values) {
- ModuleSlotTracker MST(F.getParent(), /*ShouldInitializeAllMetadata=*/false);
- MST.incorporateFunction(F);
- for (const auto &Arg : F.args())
- mapValueToSlot(&Arg, MST, Slots2Values);
- for (const auto &BB : F) {
- mapValueToSlot(&BB, MST, Slots2Values);
- for (const auto &I : BB)
- mapValueToSlot(&I, MST, Slots2Values);
- }
-}
-
-const Value *MIParser::getIRValue(unsigned Slot) {
- if (Slots2Values.empty())
- initSlots2Values(MF.getFunction(), Slots2Values);
- auto ValueInfo = Slots2Values.find(Slot);
- if (ValueInfo == Slots2Values.end())
- return nullptr;
- return ValueInfo->second;
-}
-
MCSymbol *MIParser::getOrCreateMCSymbol(StringRef Name) {
// FIXME: Currently we can't recognize temporary or local symbols and call all
// of the appropriate forms to create them. However, this handles basic cases
@@ -3087,3 +3199,15 @@ bool llvm::parseMDNode(PerFunctionMIParsingState &PFS,
MDNode *&Node, StringRef Src, SMDiagnostic &Error) {
return MIParser(PFS, Error, Src).parseStandaloneMDNode(Node);
}
+
+bool MIRFormatter::parseIRValue(StringRef Src, MachineFunction &MF,
+ PerFunctionMIParsingState &PFS, const Value *&V,
+ ErrorCallbackType ErrorCallback) {
+ MIToken Token;
+ Src = lexMIToken(Src, Token, [&](StringRef::iterator Loc, const Twine &Msg) {
+ ErrorCallback(Loc, Msg);
+ });
+ V = nullptr;
+
+ return ::parseIRValue(Token, PFS, V, ErrorCallback);
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIRParser.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
index 55fac93d8991..10157c746b46 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
@@ -64,9 +64,12 @@ class MIRParserImpl {
/// parts.
bool NoMIRDocuments = false;
+ std::function<void(Function &)> ProcessIRFunction;
+
public:
- MIRParserImpl(std::unique_ptr<MemoryBuffer> Contents,
- StringRef Filename, LLVMContext &Context);
+ MIRParserImpl(std::unique_ptr<MemoryBuffer> Contents, StringRef Filename,
+ LLVMContext &Context,
+ std::function<void(Function &)> ProcessIRFunction);
void reportDiagnostic(const SMDiagnostic &Diag);
@@ -92,6 +95,9 @@ public:
/// Return null if an error occurred.
std::unique_ptr<Module> parseIRModule();
+ /// Create an empty function with the given name.
+ Function *createDummyFunction(StringRef Name, Module &M);
+
bool parseMachineFunctions(Module &M, MachineModuleInfo &MMI);
/// Parse the machine function in the current YAML document.
@@ -163,13 +169,13 @@ static void handleYAMLDiag(const SMDiagnostic &Diag, void *Context) {
}
MIRParserImpl::MIRParserImpl(std::unique_ptr<MemoryBuffer> Contents,
- StringRef Filename, LLVMContext &Context)
+ StringRef Filename, LLVMContext &Context,
+ std::function<void(Function &)> Callback)
: SM(),
- In(SM.getMemoryBuffer(
- SM.AddNewSourceBuffer(std::move(Contents), SMLoc()))->getBuffer(),
- nullptr, handleYAMLDiag, this),
- Filename(Filename),
- Context(Context) {
+ In(SM.getMemoryBuffer(SM.AddNewSourceBuffer(std::move(Contents), SMLoc()))
+ ->getBuffer(),
+ nullptr, handleYAMLDiag, this),
+ Filename(Filename), Context(Context), ProcessIRFunction(Callback) {
In.setContext(&In);
}
@@ -256,14 +262,17 @@ bool MIRParserImpl::parseMachineFunctions(Module &M, MachineModuleInfo &MMI) {
return false;
}
-/// Create an empty function with the given name.
-static Function *createDummyFunction(StringRef Name, Module &M) {
+Function *MIRParserImpl::createDummyFunction(StringRef Name, Module &M) {
auto &Context = M.getContext();
Function *F =
Function::Create(FunctionType::get(Type::getVoidTy(Context), false),
Function::ExternalLinkage, Name, M);
BasicBlock *BB = BasicBlock::Create(Context, "entry", F);
new UnreachableInst(Context, BB);
+
+ if (ProcessIRFunction)
+ ProcessIRFunction(*F);
+
return F;
}
@@ -925,21 +934,23 @@ bool MIRParser::parseMachineFunctions(Module &M, MachineModuleInfo &MMI) {
return Impl->parseMachineFunctions(M, MMI);
}
-std::unique_ptr<MIRParser> llvm::createMIRParserFromFile(StringRef Filename,
- SMDiagnostic &Error,
- LLVMContext &Context) {
+std::unique_ptr<MIRParser> llvm::createMIRParserFromFile(
+ StringRef Filename, SMDiagnostic &Error, LLVMContext &Context,
+ std::function<void(Function &)> ProcessIRFunction) {
auto FileOrErr = MemoryBuffer::getFileOrSTDIN(Filename);
if (std::error_code EC = FileOrErr.getError()) {
Error = SMDiagnostic(Filename, SourceMgr::DK_Error,
"Could not open input file: " + EC.message());
return nullptr;
}
- return createMIRParser(std::move(FileOrErr.get()), Context);
+ return createMIRParser(std::move(FileOrErr.get()), Context,
+ ProcessIRFunction);
}
std::unique_ptr<MIRParser>
llvm::createMIRParser(std::unique_ptr<MemoryBuffer> Contents,
- LLVMContext &Context) {
+ LLVMContext &Context,
+ std::function<void(Function &)> ProcessIRFunction) {
auto Filename = Contents->getBufferIdentifier();
if (Context.shouldDiscardValueNames()) {
Context.diagnose(DiagnosticInfoMIRParser(
@@ -949,6 +960,6 @@ llvm::createMIRParser(std::unique_ptr<MemoryBuffer> Contents,
"Can't read MIR with a Context that discards named Values")));
return nullptr;
}
- return std::make_unique<MIRParser>(
- std::make_unique<MIRParserImpl>(std::move(Contents), Filename, Context));
+ return std::make_unique<MIRParser>(std::make_unique<MIRParserImpl>(
+ std::move(Contents), Filename, Context, ProcessIRFunction));
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRPrinter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRPrinter.cpp
index 1a4e21ac06a9..e8cd3d60ccb1 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MIRPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRPrinter.cpp
@@ -752,8 +752,8 @@ void MIPrinter::print(const MachineInstr &MI) {
OS << "nsw ";
if (MI.getFlag(MachineInstr::IsExact))
OS << "exact ";
- if (MI.getFlag(MachineInstr::FPExcept))
- OS << "fpexcept ";
+ if (MI.getFlag(MachineInstr::NoFPExcept))
+ OS << "nofpexcept ";
OS << TII->getName(MI.getOpcode());
if (I < E)
@@ -784,6 +784,13 @@ void MIPrinter::print(const MachineInstr &MI) {
MachineOperand::printSymbol(OS, *PostInstrSymbol);
NeedComma = true;
}
+ if (MDNode *HeapAllocMarker = MI.getHeapAllocMarker()) {
+ if (NeedComma)
+ OS << ',';
+ OS << " heap-alloc-marker ";
+ HeapAllocMarker->printAsOperand(OS, MST);
+ NeedComma = true;
+ }
if (const DebugLoc &DL = MI.getDebugLoc()) {
if (NeedComma)
@@ -849,7 +856,7 @@ void MIPrinter::print(const MachineInstr &MI, unsigned OpIdx,
if (ShouldPrintRegisterTies && Op.isReg() && Op.isTied() && !Op.isDef())
TiedOperandIdx = Op.getParent()->findTiedOperandIdx(OpIdx);
const TargetIntrinsicInfo *TII = MI.getMF()->getTarget().getIntrinsicInfo();
- Op.print(OS, MST, TypeToPrint, PrintDef, /*IsStandalone=*/false,
+ Op.print(OS, MST, TypeToPrint, OpIdx, PrintDef, /*IsStandalone=*/false,
ShouldPrintRegisterTies, TiedOperandIdx, TRI, TII);
break;
}
@@ -867,6 +874,28 @@ void MIPrinter::print(const MachineInstr &MI, unsigned OpIdx,
}
}
+void MIRFormatter::printIRValue(raw_ostream &OS, const Value &V,
+ ModuleSlotTracker &MST) {
+ if (isa<GlobalValue>(V)) {
+ V.printAsOperand(OS, /*PrintType=*/false, MST);
+ return;
+ }
+ if (isa<Constant>(V)) {
+ // Machine memory operands can load/store to/from constant value pointers.
+ OS << '`';
+ V.printAsOperand(OS, /*PrintType=*/true, MST);
+ OS << '`';
+ return;
+ }
+ OS << "%ir.";
+ if (V.hasName()) {
+ printLLVMNameWithoutPrefix(OS, V.getName());
+ return;
+ }
+ int Slot = MST.getCurrentFunction() ? MST.getLocalSlot(&V) : -1;
+ MachineOperand::printIRSlotNumber(OS, Slot);
+}
+
void llvm::printMIR(raw_ostream &OS, const Module &M) {
yaml::Output Out(OS);
Out << const_cast<Module &>(M);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRPrintingPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRPrintingPass.cpp
index e032fffd658c..1b5a9ade0871 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MIRPrintingPass.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRPrintingPass.cpp
@@ -12,9 +12,9 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/MIRPrinter.h"
-
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp
index 6629000f468f..fcc40b26c527 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp
@@ -13,336 +13,144 @@ using namespace llvm;
#define DEBUG_TYPE "mir-vregnamer-utils"
-namespace {
+using VRegRenameMap = std::map<unsigned, unsigned>;
-// TypedVReg and VRType are used to tell the renamer what to do at points in a
-// sequence of values to be renamed. A TypedVReg can either contain
-// an actual VReg, a FrameIndex, or it could just be a barrier for the next
-// candidate (side-effecting instruction). This tells the renamer to increment
-// to the next vreg name, or to skip modulo some skip-gap value.
-enum VRType { RSE_Reg = 0, RSE_FrameIndex, RSE_NewCandidate };
-class TypedVReg {
- VRType Type;
- Register Reg;
-
-public:
- TypedVReg(Register Reg) : Type(RSE_Reg), Reg(Reg) {}
- TypedVReg(VRType Type) : Type(Type), Reg(~0U) {
- assert(Type != RSE_Reg && "Expected a non-Register Type.");
- }
-
- bool isReg() const { return Type == RSE_Reg; }
- bool isFrameIndex() const { return Type == RSE_FrameIndex; }
- bool isCandidate() const { return Type == RSE_NewCandidate; }
-
- VRType getType() const { return Type; }
- Register getReg() const {
- assert(this->isReg() && "Expected a virtual or physical Register.");
- return Reg;
- }
-};
-
-/// Here we find our candidates. What makes an interesting candidate?
-/// A candidate for a canonicalization tree root is normally any kind of
-/// instruction that causes side effects such as a store to memory or a copy to
-/// a physical register or a return instruction. We use these as an expression
-/// tree root that we walk in order to build a canonical walk which should
-/// result in canonical vreg renaming.
-std::vector<MachineInstr *> populateCandidates(MachineBasicBlock *MBB) {
- std::vector<MachineInstr *> Candidates;
- MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
-
- for (auto II = MBB->begin(), IE = MBB->end(); II != IE; ++II) {
- MachineInstr *MI = &*II;
-
- bool DoesMISideEffect = false;
-
- if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg()) {
- const Register Dst = MI->getOperand(0).getReg();
- DoesMISideEffect |= !Register::isVirtualRegister(Dst);
-
- for (auto UI = MRI.use_begin(Dst); UI != MRI.use_end(); ++UI) {
- if (DoesMISideEffect)
- break;
- DoesMISideEffect |= (UI->getParent()->getParent() != MI->getParent());
- }
- }
-
- if (!MI->mayStore() && !MI->isBranch() && !DoesMISideEffect)
- continue;
+bool VRegRenamer::doVRegRenaming(const VRegRenameMap &VRM) {
+ bool Changed = false;
- LLVM_DEBUG(dbgs() << "Found Candidate: "; MI->dump(););
- Candidates.push_back(MI);
+ for (const auto &E : VRM) {
+ Changed = Changed || !MRI.reg_empty(E.first);
+ MRI.replaceRegWith(E.first, E.second);
}
- return Candidates;
-}
-
-void doCandidateWalk(std::vector<TypedVReg> &VRegs,
- std::queue<TypedVReg> &RegQueue,
- std::vector<MachineInstr *> &VisitedMIs,
- const MachineBasicBlock *MBB) {
-
- const MachineFunction &MF = *MBB->getParent();
- const MachineRegisterInfo &MRI = MF.getRegInfo();
-
- while (!RegQueue.empty()) {
-
- auto TReg = RegQueue.front();
- RegQueue.pop();
-
- if (TReg.isFrameIndex()) {
- LLVM_DEBUG(dbgs() << "Popping frame index.\n";);
- VRegs.push_back(TypedVReg(RSE_FrameIndex));
- continue;
- }
-
- assert(TReg.isReg() && "Expected vreg or physreg.");
- Register Reg = TReg.getReg();
-
- if (Register::isVirtualRegister(Reg)) {
- LLVM_DEBUG({
- dbgs() << "Popping vreg ";
- MRI.def_begin(Reg)->dump();
- dbgs() << "\n";
- });
-
- if (!llvm::any_of(VRegs, [&](const TypedVReg &TR) {
- return TR.isReg() && TR.getReg() == Reg;
- })) {
- VRegs.push_back(TypedVReg(Reg));
- }
- } else {
- LLVM_DEBUG(dbgs() << "Popping physreg.\n";);
- VRegs.push_back(TypedVReg(Reg));
- continue;
- }
-
- for (auto RI = MRI.def_begin(Reg), RE = MRI.def_end(); RI != RE; ++RI) {
- MachineInstr *Def = RI->getParent();
-
- if (Def->getParent() != MBB)
- continue;
-
- if (llvm::any_of(VisitedMIs,
- [&](const MachineInstr *VMI) { return Def == VMI; })) {
- break;
- }
-
- LLVM_DEBUG({
- dbgs() << "\n========================\n";
- dbgs() << "Visited MI: ";
- Def->dump();
- dbgs() << "BB Name: " << Def->getParent()->getName() << "\n";
- dbgs() << "\n========================\n";
- });
- VisitedMIs.push_back(Def);
- for (unsigned I = 1, E = Def->getNumOperands(); I != E; ++I) {
-
- MachineOperand &MO = Def->getOperand(I);
- if (MO.isFI()) {
- LLVM_DEBUG(dbgs() << "Pushing frame index.\n";);
- RegQueue.push(TypedVReg(RSE_FrameIndex));
- }
-
- if (!MO.isReg())
- continue;
- RegQueue.push(TypedVReg(MO.getReg()));
- }
- }
- }
+ return Changed;
}
-std::map<unsigned, unsigned>
-getVRegRenameMap(const std::vector<TypedVReg> &VRegs,
- const std::vector<Register> &renamedInOtherBB,
- MachineRegisterInfo &MRI, NamedVRegCursor &NVC) {
- std::map<unsigned, unsigned> VRegRenameMap;
- bool FirstCandidate = true;
-
- for (auto &vreg : VRegs) {
- if (vreg.isFrameIndex()) {
- // We skip one vreg for any frame index because there is a good chance
- // (especially when comparing SelectionDAG to GlobalISel generated MIR)
- // that in the other file we are just getting an incoming vreg that comes
- // from a copy from a frame index. So it's safe to skip by one.
- unsigned LastRenameReg = NVC.incrementVirtualVReg();
- (void)LastRenameReg;
- LLVM_DEBUG(dbgs() << "Skipping rename for FI " << LastRenameReg << "\n";);
- continue;
- } else if (vreg.isCandidate()) {
-
- // After the first candidate, for every subsequent candidate, we skip mod
- // 10 registers so that the candidates are more likely to start at the
- // same vreg number making it more likely that the canonical walk from the
- // candidate insruction. We don't need to skip from the first candidate of
- // the BasicBlock because we already skip ahead several vregs for each BB.
- unsigned LastRenameReg = NVC.getVirtualVReg();
- if (FirstCandidate)
- NVC.incrementVirtualVReg(LastRenameReg % 10);
- FirstCandidate = false;
- continue;
- } else if (!Register::isVirtualRegister(vreg.getReg())) {
- unsigned LastRenameReg = NVC.incrementVirtualVReg();
- (void)LastRenameReg;
- LLVM_DEBUG({
- dbgs() << "Skipping rename for Phys Reg " << LastRenameReg << "\n";
- });
- continue;
- }
-
- auto Reg = vreg.getReg();
- if (llvm::find(renamedInOtherBB, Reg) != renamedInOtherBB.end()) {
- LLVM_DEBUG(dbgs() << "Vreg " << Reg
- << " already renamed in other BB.\n";);
- continue;
- }
+VRegRenameMap
+VRegRenamer::getVRegRenameMap(const std::vector<NamedVReg> &VRegs) {
- auto Rename = NVC.createVirtualRegister(Reg);
+ StringMap<unsigned> VRegNameCollisionMap;
- if (VRegRenameMap.find(Reg) == VRegRenameMap.end()) {
- LLVM_DEBUG(dbgs() << "Mapping vreg ";);
- if (MRI.reg_begin(Reg) != MRI.reg_end()) {
- LLVM_DEBUG(auto foo = &*MRI.reg_begin(Reg); foo->dump(););
- } else {
- LLVM_DEBUG(dbgs() << Reg;);
- }
- LLVM_DEBUG(dbgs() << " to ";);
- if (MRI.reg_begin(Rename) != MRI.reg_end()) {
- LLVM_DEBUG(auto foo = &*MRI.reg_begin(Rename); foo->dump(););
- } else {
- LLVM_DEBUG(dbgs() << Rename;);
- }
- LLVM_DEBUG(dbgs() << "\n";);
+ auto GetUniqueVRegName = [&VRegNameCollisionMap](const NamedVReg &Reg) {
+ if (VRegNameCollisionMap.find(Reg.getName()) == VRegNameCollisionMap.end())
+ VRegNameCollisionMap[Reg.getName()] = 0;
+ const unsigned Counter = ++VRegNameCollisionMap[Reg.getName()];
+ return Reg.getName() + "__" + std::to_string(Counter);
+ };
- VRegRenameMap.insert(std::pair<unsigned, unsigned>(Reg, Rename));
- }
+ VRegRenameMap VRM;
+ for (const auto &VReg : VRegs) {
+ const unsigned Reg = VReg.getReg();
+ VRM[Reg] = createVirtualRegisterWithLowerName(Reg, GetUniqueVRegName(VReg));
}
-
- return VRegRenameMap;
+ return VRM;
}
-bool doVRegRenaming(std::vector<Register> &renamedInOtherBB,
- const std::map<unsigned, unsigned> &VRegRenameMap,
- MachineRegisterInfo &MRI) {
- bool Changed = false;
- for (auto I = VRegRenameMap.begin(), E = VRegRenameMap.end(); I != E; ++I) {
-
- auto VReg = I->first;
- auto Rename = I->second;
-
- renamedInOtherBB.push_back(Rename);
-
- std::vector<MachineOperand *> RenameMOs;
- for (auto &MO : MRI.reg_operands(VReg)) {
- RenameMOs.push_back(&MO);
- }
-
- for (auto *MO : RenameMOs) {
- Changed = true;
- MO->setReg(Rename);
+std::string VRegRenamer::getInstructionOpcodeHash(MachineInstr &MI) {
+ std::string S;
+ raw_string_ostream OS(S);
- if (!MO->isDef())
- MO->setIsKill(false);
+ // Gets a hashable artifact from a given MachineOperand (ie an unsigned).
+ auto GetHashableMO = [this](const MachineOperand &MO) -> unsigned {
+ switch (MO.getType()) {
+ case MachineOperand::MO_CImmediate:
+ return hash_combine(MO.getType(), MO.getTargetFlags(),
+ MO.getCImm()->getZExtValue());
+ case MachineOperand::MO_FPImmediate:
+ return hash_combine(
+ MO.getType(), MO.getTargetFlags(),
+ MO.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
+ case MachineOperand::MO_Register:
+ if (Register::isVirtualRegister(MO.getReg()))
+ return MRI.getVRegDef(MO.getReg())->getOpcode();
+ return MO.getReg();
+ case MachineOperand::MO_Immediate:
+ return MO.getImm();
+ case MachineOperand::MO_TargetIndex:
+ return MO.getOffset() | (MO.getTargetFlags() << 16);
+ case MachineOperand::MO_FrameIndex:
+ return llvm::hash_value(MO);
+
+ // We could explicitly handle all the types of the MachineOperand,
+ // here but we can just return a common number until we find a
+ // compelling test case where this is bad. The only side effect here
+ // is contributing to a hash collision but there's enough information
+ // (Opcodes,other registers etc) that this will likely not be a problem.
+
+ // TODO: Handle the following Index/ID/Predicate cases. They can
+ // be hashed on in a stable manner.
+ case MachineOperand::MO_ConstantPoolIndex:
+ case MachineOperand::MO_JumpTableIndex:
+ case MachineOperand::MO_CFIIndex:
+ case MachineOperand::MO_IntrinsicID:
+ case MachineOperand::MO_Predicate:
+
+ // In the cases below we havn't found a way to produce an artifact that will
+ // result in a stable hash, in most cases because they are pointers. We want
+ // stable hashes because we want the hash to be the same run to run.
+ case MachineOperand::MO_MachineBasicBlock:
+ case MachineOperand::MO_ExternalSymbol:
+ case MachineOperand::MO_GlobalAddress:
+ case MachineOperand::MO_BlockAddress:
+ case MachineOperand::MO_RegisterMask:
+ case MachineOperand::MO_RegisterLiveOut:
+ case MachineOperand::MO_Metadata:
+ case MachineOperand::MO_MCSymbol:
+ case MachineOperand::MO_ShuffleMask:
+ return 0;
}
+ llvm_unreachable("Unexpected MachineOperandType.");
+ };
+
+ SmallVector<unsigned, 16> MIOperands = {MI.getOpcode(), MI.getFlags()};
+ llvm::transform(MI.uses(), std::back_inserter(MIOperands), GetHashableMO);
+
+ for (const auto *Op : MI.memoperands()) {
+ MIOperands.push_back((unsigned)Op->getSize());
+ MIOperands.push_back((unsigned)Op->getFlags());
+ MIOperands.push_back((unsigned)Op->getOffset());
+ MIOperands.push_back((unsigned)Op->getOrdering());
+ MIOperands.push_back((unsigned)Op->getAddrSpace());
+ MIOperands.push_back((unsigned)Op->getSyncScopeID());
+ MIOperands.push_back((unsigned)Op->getBaseAlignment());
+ MIOperands.push_back((unsigned)Op->getFailureOrdering());
}
- return Changed;
+ auto HashMI = hash_combine_range(MIOperands.begin(), MIOperands.end());
+ return std::to_string(HashMI).substr(0, 5);
}
-bool renameVRegs(MachineBasicBlock *MBB,
- std::vector<Register> &renamedInOtherBB,
- NamedVRegCursor &NVC) {
- bool Changed = false;
- MachineFunction &MF = *MBB->getParent();
- MachineRegisterInfo &MRI = MF.getRegInfo();
-
- std::vector<MachineInstr *> Candidates = populateCandidates(MBB);
- std::vector<MachineInstr *> VisitedMIs;
- llvm::copy(Candidates, std::back_inserter(VisitedMIs));
-
- std::vector<TypedVReg> VRegs;
- for (auto candidate : Candidates) {
- VRegs.push_back(TypedVReg(RSE_NewCandidate));
-
- std::queue<TypedVReg> RegQueue;
-
- // Here we walk the vreg operands of a non-root node along our walk.
- // The root nodes are the original candidates (stores normally).
- // These are normally not the root nodes (except for the case of copies to
- // physical registers).
- for (unsigned i = 1; i < candidate->getNumOperands(); i++) {
- if (candidate->mayStore() || candidate->isBranch())
- break;
-
- MachineOperand &MO = candidate->getOperand(i);
- if (!(MO.isReg() && Register::isVirtualRegister(MO.getReg())))
- continue;
-
- LLVM_DEBUG(dbgs() << "Enqueue register"; MO.dump(); dbgs() << "\n";);
- RegQueue.push(TypedVReg(MO.getReg()));
- }
-
- // Here we walk the root candidates. We start from the 0th operand because
- // the root is normally a store to a vreg.
- for (unsigned i = 0; i < candidate->getNumOperands(); i++) {
-
- if (!candidate->mayStore() && !candidate->isBranch())
- break;
-
- MachineOperand &MO = candidate->getOperand(i);
-
- // TODO: Do we want to only add vregs here?
- if (!MO.isReg() && !MO.isFI())
- continue;
-
- LLVM_DEBUG(dbgs() << "Enqueue Reg/FI"; MO.dump(); dbgs() << "\n";);
-
- RegQueue.push(MO.isReg() ? TypedVReg(MO.getReg())
- : TypedVReg(RSE_FrameIndex));
- }
-
- doCandidateWalk(VRegs, RegQueue, VisitedMIs, MBB);
- }
-
- // If we have populated no vregs to rename then bail.
- // The rest of this function does the vreg remaping.
- if (VRegs.size() == 0)
- return Changed;
-
- auto VRegRenameMap = getVRegRenameMap(VRegs, renamedInOtherBB, MRI, NVC);
- Changed |= doVRegRenaming(renamedInOtherBB, VRegRenameMap, MRI);
- return Changed;
+unsigned VRegRenamer::createVirtualRegister(unsigned VReg) {
+ assert(Register::isVirtualRegister(VReg) && "Expected Virtual Registers");
+ std::string Name = getInstructionOpcodeHash(*MRI.getVRegDef(VReg));
+ return createVirtualRegisterWithLowerName(VReg, Name);
}
-} // anonymous namespace
-void NamedVRegCursor::skipVRegs() {
- unsigned VRegGapIndex = 1;
- if (!virtualVRegNumber) {
- VRegGapIndex = 0;
- virtualVRegNumber = MRI.createIncompleteVirtualRegister();
+bool VRegRenamer::renameInstsInMBB(MachineBasicBlock *MBB) {
+ std::vector<NamedVReg> VRegs;
+ std::string Prefix = "bb" + std::to_string(CurrentBBNumber) + "_";
+ for (MachineInstr &Candidate : *MBB) {
+ // Don't rename stores/branches.
+ if (Candidate.mayStore() || Candidate.isBranch())
+ continue;
+ if (!Candidate.getNumOperands())
+ continue;
+ // Look for instructions that define VRegs in operand 0.
+ MachineOperand &MO = Candidate.getOperand(0);
+ // Avoid non regs, instructions defining physical regs.
+ if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg()))
+ continue;
+ VRegs.push_back(
+ NamedVReg(MO.getReg(), Prefix + getInstructionOpcodeHash(Candidate)));
}
- const unsigned VR_GAP = (++VRegGapIndex * SkipGapSize);
- unsigned I = virtualVRegNumber;
- const unsigned E = (((I + VR_GAP) / VR_GAP) + 1) * VR_GAP;
-
- virtualVRegNumber = E;
-}
-
-unsigned NamedVRegCursor::createVirtualRegister(unsigned VReg) {
- if (!virtualVRegNumber)
- skipVRegs();
- std::string S;
- raw_string_ostream OS(S);
- OS << "namedVReg" << (virtualVRegNumber & ~0x80000000);
- OS.flush();
- virtualVRegNumber++;
- if (auto RC = MRI.getRegClassOrNull(VReg))
- return MRI.createVirtualRegister(RC, OS.str());
- return MRI.createGenericVirtualRegister(MRI.getType(VReg), OS.str());
+ return VRegs.size() ? doVRegRenaming(getVRegRenameMap(VRegs)) : false;
}
-bool NamedVRegCursor::renameVRegs(MachineBasicBlock *MBB) {
- return ::renameVRegs(MBB, RenamedInOtherBB, *this);
+unsigned VRegRenamer::createVirtualRegisterWithLowerName(unsigned VReg,
+ StringRef Name) {
+ std::string LowerName = Name.lower();
+ const TargetRegisterClass *RC = MRI.getRegClassOrNull(VReg);
+ return RC ? MRI.createVirtualRegister(RC, LowerName)
+ : MRI.createGenericVirtualRegister(MRI.getType(VReg), LowerName);
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRVRegNamerUtils.h b/contrib/llvm-project/llvm/lib/CodeGen/MIRVRegNamerUtils.h
index c5b52a968538..0c0a71a13248 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MIRVRegNamerUtils.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRVRegNamerUtils.h
@@ -25,65 +25,67 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/Support/raw_ostream.h"
-#include <queue>
-
namespace llvm {
+/// VRegRenamer - This class is used for renaming vregs in a machine basic
+/// block according to semantics of the instruction.
+class VRegRenamer {
+ class NamedVReg {
+ Register Reg;
+ std::string Name;
+
+ public:
+ NamedVReg(Register Reg, std::string Name = "") : Reg(Reg), Name(Name) {}
+ NamedVReg(std::string Name = "") : Reg(~0U), Name(Name) {}
+
+ const std::string &getName() const { return Name; }
+
+ Register getReg() const { return Reg; }
+ };
-/// NamedVRegCursor - The cursor is an object that keeps track of what the next
-/// vreg name should be. It does book keeping to determine when to skip the
-/// index value and by how much, or if the next vreg name should be an increment
-/// from the previous.
-class NamedVRegCursor {
MachineRegisterInfo &MRI;
- /// virtualVRegNumber - Book keeping of the last vreg position.
- unsigned virtualVRegNumber;
+ unsigned CurrentBBNumber = 0;
- /// SkipGapSize - Used to calculate a modulo amount to skip by after every
- /// sequence of instructions starting from a given side-effecting
- /// MachineInstruction for a given MachineBasicBlock. The general idea is that
- /// for a given program compiled with two different opt pipelines, there
- /// shouldn't be greater than SkipGapSize difference in how many vregs are in
- /// play between the two and for every def-use graph of vregs we rename we
- /// will round up to the next SkipGapSize'th number so that we have a high
- /// change of landing on the same name for two given matching side-effects
- /// for the two compilation outcomes.
- const unsigned SkipGapSize;
+ /// Given an Instruction, construct a hash of the operands
+ /// of the instructions along with the opcode.
+ /// When dealing with virtual registers, just hash the opcode of
+ /// the instruction defining that vreg.
+ /// Handle immediates, registers (physical and virtual) explicitly,
+ /// and return a common value for the other cases.
+ /// Instruction will be named in the following scheme
+ /// bb<block_no>_hash_<collission_count>.
+ std::string getInstructionOpcodeHash(MachineInstr &MI);
- /// RenamedInOtherBB - VRegs that we already renamed: ie breadcrumbs.
- std::vector<Register> RenamedInOtherBB;
+ /// For all the VRegs that are candidates for renaming,
+ /// return a mapping from old vregs to new vregs with names.
+ std::map<unsigned, unsigned>
+ getVRegRenameMap(const std::vector<NamedVReg> &VRegs);
-public:
- NamedVRegCursor() = delete;
- /// 1000 for the SkipGapSize was a good heuristic at the time of the writing
- /// of the MIRCanonicalizerPass. Adjust as needed.
- NamedVRegCursor(MachineRegisterInfo &MRI, unsigned SkipGapSize = 1000)
- : MRI(MRI), virtualVRegNumber(0), SkipGapSize(SkipGapSize) {}
-
- /// SkipGapSize - Skips modulo a gap value of indices. Indices are used to
- /// produce the next vreg name.
- void skipVRegs();
-
- unsigned getVirtualVReg() const { return virtualVRegNumber; }
-
- /// incrementVirtualVReg - This increments an index value that us used to
- /// create a new vreg name. This is not a Register.
- unsigned incrementVirtualVReg(unsigned incr = 1) {
- virtualVRegNumber += incr;
- return virtualVRegNumber;
- }
+ /// Perform replacing of registers based on the <old,new> vreg map.
+ bool doVRegRenaming(const std::map<unsigned, unsigned> &VRegRenameMap);
/// createVirtualRegister - Given an existing vreg, create a named vreg to
- /// take its place.
+ /// take its place. The name is determined by calling
+ /// getInstructionOpcodeHash.
unsigned createVirtualRegister(unsigned VReg);
- /// renameVRegs - For a given MachineBasicBlock, scan for side-effecting
- /// instructions, walk the def-use from each side-effecting root (in sorted
- /// root order) and rename the encountered vregs in the def-use graph in a
- /// canonical ordering. This method maintains book keeping for which vregs
- /// were already renamed in RenamedInOtherBB.
- // @return changed
- bool renameVRegs(MachineBasicBlock *MBB);
+ /// Create a vreg with name and return it.
+ unsigned createVirtualRegisterWithLowerName(unsigned VReg, StringRef Name);
+ /// Linearly traverse the MachineBasicBlock and rename each instruction's
+ /// vreg definition based on the semantics of the instruction.
+ /// Names are as follows bb<BBNum>_hash_[0-9]+
+ bool renameInstsInMBB(MachineBasicBlock *MBB);
+
+public:
+ VRegRenamer() = delete;
+ VRegRenamer(MachineRegisterInfo &MRI) : MRI(MRI) {}
+
+ /// Same as the above, but sets a BBNum depending on BB traversal that
+ /// will be used as prefix for the vreg names.
+ bool renameVRegs(MachineBasicBlock *MBB, unsigned BBNum) {
+ CurrentBBNumber = BBNum;
+ return renameInstsInMBB(MBB);
+ }
};
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineBasicBlock.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineBasicBlock.cpp
index 854bef3aab05..f433c4b6c90b 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineBasicBlock.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineBasicBlock.cpp
@@ -1395,8 +1395,7 @@ MachineBasicBlock::computeRegisterLiveness(const TargetRegisterInfo *TRI,
--N;
- MachineOperandIteratorBase::PhysRegInfo Info =
- ConstMIOperands(*I).analyzePhysReg(Reg, TRI);
+ PhysRegInfo Info = AnalyzePhysRegInBundle(*I, Reg, TRI);
// Register is live when we read it here.
if (Info.Read)
@@ -1434,8 +1433,7 @@ MachineBasicBlock::computeRegisterLiveness(const TargetRegisterInfo *TRI,
--N;
- MachineOperandIteratorBase::PhysRegInfo Info =
- ConstMIOperands(*I).analyzePhysReg(Reg, TRI);
+ PhysRegInfo Info = AnalyzePhysRegInBundle(*I, Reg, TRI);
// Defs happen after uses so they take precedence if both are present.
@@ -1462,6 +1460,11 @@ MachineBasicBlock::computeRegisterLiveness(const TargetRegisterInfo *TRI,
} while (I != begin() && N > 0);
}
+ // If all the instructions before this in the block are debug instructions,
+ // skip over them.
+ while (I != begin() && std::prev(I)->isDebugInstr())
+ --I;
+
// Did we get to the start of the block?
if (I == begin()) {
// If so, the register's state is definitely defined by the live-in state.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp
index 53a35b7e89c2..d8ea3e0b9cf6 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp
@@ -19,6 +19,7 @@
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/GraphWriter.h"
@@ -172,6 +173,13 @@ MachineBlockFrequencyInfo::MachineBlockFrequencyInfo()
initializeMachineBlockFrequencyInfoPass(*PassRegistry::getPassRegistry());
}
+MachineBlockFrequencyInfo::MachineBlockFrequencyInfo(
+ MachineFunction &F,
+ MachineBranchProbabilityInfo &MBPI,
+ MachineLoopInfo &MLI) : MachineFunctionPass(ID) {
+ calculate(F, MBPI, MLI);
+}
+
MachineBlockFrequencyInfo::~MachineBlockFrequencyInfo() = default;
void MachineBlockFrequencyInfo::getAnalysisUsage(AnalysisUsage &AU) const {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index ac19bc0bd8ea..30b98ec88c24 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -33,6 +33,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
@@ -41,6 +42,7 @@
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachinePostDominators.h"
+#include "llvm/CodeGen/MachineSizeOpts.h"
#include "llvm/CodeGen/TailDuplicator.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
@@ -48,6 +50,7 @@
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Function.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/BlockFrequency.h"
@@ -362,6 +365,8 @@ class MachineBlockPlacement : public MachineFunctionPass {
/// A handle to the post dominator tree.
MachinePostDominatorTree *MPDT;
+ ProfileSummaryInfo *PSI;
+
/// Duplicator used to duplicate tails during placement.
///
/// Placement decisions can open up new tail duplication opportunities, but
@@ -537,6 +542,7 @@ public:
if (TailDupPlacement)
AU.addRequired<MachinePostDominatorTree>();
AU.addRequired<MachineLoopInfo>();
+ AU.addRequired<ProfileSummaryInfoWrapperPass>();
AU.addRequired<TargetPassConfig>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -554,6 +560,7 @@ INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
INITIALIZE_PASS_END(MachineBlockPlacement, DEBUG_TYPE,
"Branch Probability Basic Block Placement", false, false)
@@ -1073,6 +1080,11 @@ bool MachineBlockPlacement::canTailDuplicateUnplacedPreds(
if (!shouldTailDuplicate(Succ))
return false;
+ // The result of canTailDuplicate.
+ bool Duplicate = true;
+ // Number of possible duplication.
+ unsigned int NumDup = 0;
+
// For CFG checking.
SmallPtrSet<const MachineBasicBlock *, 4> Successors(BB->succ_begin(),
BB->succ_end());
@@ -1119,9 +1131,50 @@ bool MachineBlockPlacement::canTailDuplicateUnplacedPreds(
// to trellises created by tail-duplication, so we just look for the
// CFG.
continue;
- return false;
+ Duplicate = false;
+ continue;
}
+ NumDup++;
}
+
+ // No possible duplication in current filter set.
+ if (NumDup == 0)
+ return false;
+
+ // This is mainly for function exit BB.
+ // The integrated tail duplication is really designed for increasing
+ // fallthrough from predecessors from Succ to its successors. We may need
+ // other machanism to handle different cases.
+ if (Succ->succ_size() == 0)
+ return true;
+
+ // Plus the already placed predecessor.
+ NumDup++;
+
+ // If the duplication candidate has more unplaced predecessors than
+ // successors, the extra duplication can't bring more fallthrough.
+ //
+ // Pred1 Pred2 Pred3
+ // \ | /
+ // \ | /
+ // \ | /
+ // Dup
+ // / \
+ // / \
+ // Succ1 Succ2
+ //
+ // In this example Dup has 2 successors and 3 predecessors, duplication of Dup
+ // can increase the fallthrough from Pred1 to Succ1 and from Pred2 to Succ2,
+ // but the duplication into Pred3 can't increase fallthrough.
+ //
+ // A small number of extra duplication may not hurt too much. We need a better
+ // heuristic to handle it.
+ //
+ // FIXME: we should selectively tail duplicate a BB into part of its
+ // predecessors.
+ if ((NumDup > Succ->succ_size()) || !Duplicate)
+ return false;
+
return true;
}
@@ -1417,9 +1470,10 @@ bool MachineBlockPlacement::hasBetterLayoutPredecessor(
bool BadCFGConflict = false;
for (MachineBasicBlock *Pred : Succ->predecessors()) {
- if (Pred == Succ || BlockToChain[Pred] == &SuccChain ||
+ BlockChain *PredChain = BlockToChain[Pred];
+ if (Pred == Succ || PredChain == &SuccChain ||
(BlockFilter && !BlockFilter->count(Pred)) ||
- BlockToChain[Pred] == &Chain ||
+ PredChain == &Chain || Pred != *std::prev(PredChain->end()) ||
// This check is redundant except for look ahead. This function is
// called for lookahead by isProfitableToTailDup when BB hasn't been
// placed yet.
@@ -1721,7 +1775,9 @@ void MachineBlockPlacement::buildChain(
MachineBasicBlock* BestSucc = Result.BB;
bool ShouldTailDup = Result.ShouldTailDup;
if (allowTailDupPlacement())
- ShouldTailDup |= (BestSucc && shouldTailDuplicate(BestSucc));
+ ShouldTailDup |= (BestSucc && canTailDuplicateUnplacedPreds(BB, BestSucc,
+ Chain,
+ BlockFilter));
// If an immediate successor isn't available, look for the best viable
// block among those we've identified as not violating the loop's CFG at
@@ -2025,7 +2081,10 @@ MachineBlockPlacement::findBestLoopTop(const MachineLoop &L,
// i.e. when the layout predecessor does not fallthrough to the loop header.
// In practice this never happens though: there always seems to be a preheader
// that can fallthrough and that is also placed before the header.
- if (F->getFunction().hasOptSize())
+ bool OptForSize = F->getFunction().hasOptSize() ||
+ llvm::shouldOptimizeForSize(L.getHeader(), PSI,
+ &MBFI->getMBFI());
+ if (OptForSize)
return L.getHeader();
MachineBasicBlock *OldTop = nullptr;
@@ -2781,6 +2840,11 @@ void MachineBlockPlacement::alignBlocks() {
if (Freq < (LoopHeaderFreq * ColdProb))
continue;
+ // If the global profiles indicates so, don't align it.
+ if (llvm::shouldOptimizeForSize(ChainBB, PSI, &MBFI->getMBFI()) &&
+ !TLI->alignLoopsWithOptSize())
+ continue;
+
// Check for the existence of a non-layout predecessor which would benefit
// from aligning this block.
MachineBasicBlock *LayoutPred =
@@ -2988,6 +3052,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
TII = MF.getSubtarget().getInstrInfo();
TLI = MF.getSubtarget().getTargetLowering();
MPDT = nullptr;
+ PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
// Initialize PreferredLoopExit to nullptr here since it may never be set if
// there are no MachineLoops.
@@ -3018,10 +3083,13 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
if (allowTailDupPlacement()) {
MPDT = &getAnalysis<MachinePostDominatorTree>();
- if (MF.getFunction().hasOptSize())
+ bool OptForSize = MF.getFunction().hasOptSize() ||
+ llvm::shouldOptimizeForSize(&MF, PSI, &MBFI->getMBFI());
+ if (OptForSize)
TailDupSize = 1;
bool PreRegAlloc = false;
- TailDup.initMF(MF, PreRegAlloc, MBPI, /* LayoutMode */ true, TailDupSize);
+ TailDup.initMF(MF, PreRegAlloc, MBPI, &MBFI->getMBFI(), PSI,
+ /* LayoutMode */ true, TailDupSize);
precomputeTriangleChains();
}
@@ -3037,7 +3105,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
if (MF.size() > 3 && EnableTailMerge) {
unsigned TailMergeSize = TailDupSize + 1;
BranchFolder BF(/*EnableTailMerge=*/true, /*CommonHoist=*/false, *MBFI,
- *MBPI, TailMergeSize);
+ *MBPI, PSI, TailMergeSize);
auto *MMIWP = getAnalysisIfAvailable<MachineModuleInfoWrapperPass>();
if (BF.OptimizeFunction(MF, TII, MF.getSubtarget().getRegisterInfo(),
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp
index d2277ce51746..f1d68c79a212 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp
@@ -13,6 +13,8 @@
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -37,6 +39,12 @@ cl::opt<unsigned> ProfileLikelyProb(
char MachineBranchProbabilityInfo::ID = 0;
+MachineBranchProbabilityInfo::MachineBranchProbabilityInfo()
+ : ImmutablePass(ID) {
+ PassRegistry &Registry = *PassRegistry::getPassRegistry();
+ initializeMachineBranchProbabilityInfoPass(Registry);
+}
+
void MachineBranchProbabilityInfo::anchor() {}
BranchProbability MachineBranchProbabilityInfo::getEdgeProbability(
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineCSE.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineCSE.cpp
index d9bd32b2fbab..9561a06ce8df 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineCSE.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineCSE.cpp
@@ -33,6 +33,7 @@
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Pass.h"
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineCombiner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineCombiner.cpp
index e9f462fd1b37..73895bdf834f 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineCombiner.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineCombiner.cpp
@@ -12,17 +12,21 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineSizeOpts.h"
#include "llvm/CodeGen/MachineTraceMetrics.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSchedule.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -66,6 +70,8 @@ class MachineCombiner : public MachineFunctionPass {
MachineLoopInfo *MLI; // Current MachineLoopInfo
MachineTraceMetrics *Traces;
MachineTraceMetrics::Ensemble *MinInstr;
+ MachineBlockFrequencyInfo *MBFI;
+ ProfileSummaryInfo *PSI;
TargetSchedModel TSchedModel;
@@ -82,7 +88,7 @@ public:
StringRef getPassName() const override { return "Machine InstCombiner"; }
private:
- bool doSubstitute(unsigned NewSize, unsigned OldSize);
+ bool doSubstitute(unsigned NewSize, unsigned OldSize, bool OptForSize);
bool combineInstructions(MachineBasicBlock *);
MachineInstr *getOperandDef(const MachineOperand &MO);
unsigned getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs,
@@ -131,6 +137,8 @@ void MachineCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<MachineLoopInfo>();
AU.addRequired<MachineTraceMetrics>();
AU.addPreserved<MachineTraceMetrics>();
+ AU.addRequired<LazyMachineBlockFrequencyInfoPass>();
+ AU.addRequired<ProfileSummaryInfoWrapperPass>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -408,8 +416,9 @@ bool MachineCombiner::preservesResourceLen(
/// \returns true when new instruction sequence should be generated
/// independent if it lengthens critical path or not
-bool MachineCombiner::doSubstitute(unsigned NewSize, unsigned OldSize) {
- if (OptSize && (NewSize < OldSize))
+bool MachineCombiner::doSubstitute(unsigned NewSize, unsigned OldSize,
+ bool OptForSize) {
+ if (OptForSize && (NewSize < OldSize))
return true;
if (!TSchedModel.hasInstrSchedModelOrItineraries())
return true;
@@ -507,6 +516,8 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
SparseSet<LiveRegUnit> RegUnits;
RegUnits.setUniverse(TRI->getNumRegUnits());
+ bool OptForSize = OptSize || llvm::shouldOptimizeForSize(MBB, PSI, MBFI);
+
while (BlockIter != MBB->end()) {
auto &MI = *BlockIter++;
SmallVector<MachineCombinerPattern, 16> Patterns;
@@ -583,7 +594,8 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
// fewer instructions OR
// the new sequence neither lengthens the critical path nor increases
// resource pressure.
- if (SubstituteAlways || doSubstitute(NewInstCount, OldInstCount)) {
+ if (SubstituteAlways ||
+ doSubstitute(NewInstCount, OldInstCount, OptForSize)) {
insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, MinInstr,
RegUnits, IncrementalUpdate);
// Eagerly stop after the first pattern fires.
@@ -638,6 +650,10 @@ bool MachineCombiner::runOnMachineFunction(MachineFunction &MF) {
MRI = &MF.getRegInfo();
MLI = &getAnalysis<MachineLoopInfo>();
Traces = &getAnalysis<MachineTraceMetrics>();
+ PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+ MBFI = (PSI && PSI->hasProfileSummary()) ?
+ &getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI() :
+ nullptr;
MinInstr = nullptr;
OptSize = MF.getFunction().hasOptSize();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineCopyPropagation.cpp
index ebe76e31dca9..c316b167059b 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineCopyPropagation.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineCopyPropagation.cpp
@@ -37,6 +37,15 @@
// ... // No clobber of %R0
// %R1 = COPY %R0 <<< Removed
//
+// or
+//
+// $R0 = OP ...
+// ... // No read/clobber of $R0 and $R1
+// $R1 = COPY $R0 // $R0 is killed
+// Replace $R0 with $R1 and remove the COPY
+// $R1 = OP ...
+// ...
+//
//===----------------------------------------------------------------------===//
#include "llvm/ADT/DenseMap.h"
@@ -54,6 +63,7 @@
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
@@ -68,6 +78,7 @@ using namespace llvm;
STATISTIC(NumDeletes, "Number of dead copies deleted");
STATISTIC(NumCopyForwards, "Number of copy uses forwarded");
+STATISTIC(NumCopyBackwardPropagated, "Number of copy defs backward propagated");
DEBUG_COUNTER(FwdCounter, "machine-cp-fwd",
"Controls which register COPYs are forwarded");
@@ -97,6 +108,28 @@ public:
}
}
+ /// Remove register from copy maps.
+ void invalidateRegister(unsigned Reg, const TargetRegisterInfo &TRI) {
+ // Since Reg might be a subreg of some registers, only invalidate Reg is not
+ // enough. We have to find the COPY defines Reg or registers defined by Reg
+ // and invalidate all of them.
+ DenseSet<unsigned> RegsToInvalidate{Reg};
+ for (MCRegUnitIterator RUI(Reg, &TRI); RUI.isValid(); ++RUI) {
+ auto I = Copies.find(*RUI);
+ if (I != Copies.end()) {
+ if (MachineInstr *MI = I->second.MI) {
+ RegsToInvalidate.insert(MI->getOperand(0).getReg());
+ RegsToInvalidate.insert(MI->getOperand(1).getReg());
+ }
+ RegsToInvalidate.insert(I->second.DefRegs.begin(),
+ I->second.DefRegs.end());
+ }
+ }
+ for (unsigned InvalidReg : RegsToInvalidate)
+ for (MCRegUnitIterator RUI(InvalidReg, &TRI); RUI.isValid(); ++RUI)
+ Copies.erase(*RUI);
+ }
+
/// Clobber a single register, removing it from the tracker's copy maps.
void clobberRegister(unsigned Reg, const TargetRegisterInfo &TRI) {
for (MCRegUnitIterator RUI(Reg, &TRI); RUI.isValid(); ++RUI) {
@@ -150,6 +183,38 @@ public:
return CI->second.MI;
}
+ MachineInstr *findCopyDefViaUnit(unsigned RegUnit,
+ const TargetRegisterInfo &TRI) {
+ auto CI = Copies.find(RegUnit);
+ if (CI == Copies.end())
+ return nullptr;
+ if (CI->second.DefRegs.size() != 1)
+ return nullptr;
+ MCRegUnitIterator RUI(CI->second.DefRegs[0], &TRI);
+ return findCopyForUnit(*RUI, TRI, true);
+ }
+
+ MachineInstr *findAvailBackwardCopy(MachineInstr &I, unsigned Reg,
+ const TargetRegisterInfo &TRI) {
+ MCRegUnitIterator RUI(Reg, &TRI);
+ MachineInstr *AvailCopy = findCopyDefViaUnit(*RUI, TRI);
+ if (!AvailCopy ||
+ !TRI.isSubRegisterEq(AvailCopy->getOperand(1).getReg(), Reg))
+ return nullptr;
+
+ Register AvailSrc = AvailCopy->getOperand(1).getReg();
+ Register AvailDef = AvailCopy->getOperand(0).getReg();
+ for (const MachineInstr &MI :
+ make_range(AvailCopy->getReverseIterator(), I.getReverseIterator()))
+ for (const MachineOperand &MO : MI.operands())
+ if (MO.isRegMask())
+ // FIXME: Shall we simultaneously invalidate AvailSrc or AvailDef?
+ if (MO.clobbersPhysReg(AvailSrc) || MO.clobbersPhysReg(AvailDef))
+ return nullptr;
+
+ return AvailCopy;
+ }
+
MachineInstr *findAvailCopy(MachineInstr &DestCopy, unsigned Reg,
const TargetRegisterInfo &TRI) {
// We check the first RegUnit here, since we'll only be interested in the
@@ -210,11 +275,16 @@ private:
void ClobberRegister(unsigned Reg);
void ReadRegister(unsigned Reg, MachineInstr &Reader,
DebugType DT);
- void CopyPropagateBlock(MachineBasicBlock &MBB);
+ void ForwardCopyPropagateBlock(MachineBasicBlock &MBB);
+ void BackwardCopyPropagateBlock(MachineBasicBlock &MBB);
bool eraseIfRedundant(MachineInstr &Copy, unsigned Src, unsigned Def);
void forwardUses(MachineInstr &MI);
+ void propagateDefs(MachineInstr &MI);
bool isForwardableRegClassCopy(const MachineInstr &Copy,
const MachineInstr &UseI, unsigned UseIdx);
+ bool isBackwardPropagatableRegClassCopy(const MachineInstr &Copy,
+ const MachineInstr &UseI,
+ unsigned UseIdx);
bool hasImplicitOverlap(const MachineInstr &MI, const MachineOperand &Use);
/// Candidates for deletion.
@@ -312,6 +382,19 @@ bool MachineCopyPropagation::eraseIfRedundant(MachineInstr &Copy, unsigned Src,
return true;
}
+bool MachineCopyPropagation::isBackwardPropagatableRegClassCopy(
+ const MachineInstr &Copy, const MachineInstr &UseI, unsigned UseIdx) {
+ Register Def = Copy.getOperand(0).getReg();
+
+ if (const TargetRegisterClass *URC =
+ UseI.getRegClassConstraint(UseIdx, TII, TRI))
+ return URC->contains(Def);
+
+ // We don't process further if UseI is a COPY, since forward copy propagation
+ // should handle that.
+ return false;
+}
+
/// Decide whether we should forward the source of \param Copy to its use in
/// \param UseI based on the physical register class constraints of the opcode
/// and avoiding introducing more cross-class COPYs.
@@ -432,6 +515,15 @@ void MachineCopyPropagation::forwardUses(MachineInstr &MI) {
if (hasImplicitOverlap(MI, MOUse))
continue;
+ // Check that the instruction is not a copy that partially overwrites the
+ // original copy source that we are about to use. The tracker mechanism
+ // cannot cope with that.
+ if (MI.isCopy() && MI.modifiesRegister(CopySrcReg, TRI) &&
+ !MI.definesRegister(CopySrcReg)) {
+ LLVM_DEBUG(dbgs() << "MCP: Copy source overlap with dest in " << MI);
+ continue;
+ }
+
if (!DebugCounter::shouldExecute(FwdCounter)) {
LLVM_DEBUG(dbgs() << "MCP: Skipping forwarding due to debug counter:\n "
<< MI);
@@ -458,8 +550,9 @@ void MachineCopyPropagation::forwardUses(MachineInstr &MI) {
}
}
-void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
- LLVM_DEBUG(dbgs() << "MCP: CopyPropagateBlock " << MBB.getName() << "\n");
+void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) {
+ LLVM_DEBUG(dbgs() << "MCP: ForwardCopyPropagateBlock " << MBB.getName()
+ << "\n");
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ) {
MachineInstr *MI = &*I;
@@ -637,6 +730,137 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
Tracker.clear();
}
+static bool isBackwardPropagatableCopy(MachineInstr &MI,
+ const MachineRegisterInfo &MRI) {
+ assert(MI.isCopy() && "MI is expected to be a COPY");
+ Register Def = MI.getOperand(0).getReg();
+ Register Src = MI.getOperand(1).getReg();
+
+ if (!Def || !Src)
+ return false;
+
+ if (MRI.isReserved(Def) || MRI.isReserved(Src))
+ return false;
+
+ return MI.getOperand(1).isRenamable() && MI.getOperand(1).isKill();
+}
+
+void MachineCopyPropagation::propagateDefs(MachineInstr &MI) {
+ if (!Tracker.hasAnyCopies())
+ return;
+
+ for (unsigned OpIdx = 0, OpEnd = MI.getNumOperands(); OpIdx != OpEnd;
+ ++OpIdx) {
+ MachineOperand &MODef = MI.getOperand(OpIdx);
+
+ if (!MODef.isReg() || MODef.isUse())
+ continue;
+
+ // Ignore non-trivial cases.
+ if (MODef.isTied() || MODef.isUndef() || MODef.isImplicit())
+ continue;
+
+ if (!MODef.getReg())
+ continue;
+
+ // We only handle if the register comes from a vreg.
+ if (!MODef.isRenamable())
+ continue;
+
+ MachineInstr *Copy =
+ Tracker.findAvailBackwardCopy(MI, MODef.getReg(), *TRI);
+ if (!Copy)
+ continue;
+
+ Register Def = Copy->getOperand(0).getReg();
+ Register Src = Copy->getOperand(1).getReg();
+
+ if (MODef.getReg() != Src)
+ continue;
+
+ if (!isBackwardPropagatableRegClassCopy(*Copy, MI, OpIdx))
+ continue;
+
+ if (hasImplicitOverlap(MI, MODef))
+ continue;
+
+ LLVM_DEBUG(dbgs() << "MCP: Replacing " << printReg(MODef.getReg(), TRI)
+ << "\n with " << printReg(Def, TRI) << "\n in "
+ << MI << " from " << *Copy);
+
+ MODef.setReg(Def);
+ MODef.setIsRenamable(Copy->getOperand(0).isRenamable());
+
+ LLVM_DEBUG(dbgs() << "MCP: After replacement: " << MI << "\n");
+ MaybeDeadCopies.insert(Copy);
+ Changed = true;
+ ++NumCopyBackwardPropagated;
+ }
+}
+
+void MachineCopyPropagation::BackwardCopyPropagateBlock(
+ MachineBasicBlock &MBB) {
+ LLVM_DEBUG(dbgs() << "MCP: BackwardCopyPropagateBlock " << MBB.getName()
+ << "\n");
+
+ for (MachineBasicBlock::reverse_iterator I = MBB.rbegin(), E = MBB.rend();
+ I != E;) {
+ MachineInstr *MI = &*I;
+ ++I;
+
+ // Ignore non-trivial COPYs.
+ if (MI->isCopy() && MI->getNumOperands() == 2 &&
+ !TRI->regsOverlap(MI->getOperand(0).getReg(),
+ MI->getOperand(1).getReg())) {
+
+ Register Def = MI->getOperand(0).getReg();
+ Register Src = MI->getOperand(1).getReg();
+
+ // Unlike forward cp, we don't invoke propagateDefs here,
+ // just let forward cp do COPY-to-COPY propagation.
+ if (isBackwardPropagatableCopy(*MI, *MRI)) {
+ Tracker.invalidateRegister(Src, *TRI);
+ Tracker.invalidateRegister(Def, *TRI);
+ Tracker.trackCopy(MI, *TRI);
+ continue;
+ }
+ }
+
+ // Invalidate any earlyclobber regs first.
+ for (const MachineOperand &MO : MI->operands())
+ if (MO.isReg() && MO.isEarlyClobber()) {
+ Register Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ Tracker.invalidateRegister(Reg, *TRI);
+ }
+
+ propagateDefs(*MI);
+ for (const MachineOperand &MO : MI->operands()) {
+ if (!MO.isReg())
+ continue;
+
+ if (!MO.getReg())
+ continue;
+
+ if (MO.isDef())
+ Tracker.invalidateRegister(MO.getReg(), *TRI);
+
+ if (MO.readsReg())
+ Tracker.invalidateRegister(MO.getReg(), *TRI);
+ }
+ }
+
+ for (auto *Copy : MaybeDeadCopies) {
+ Copy->eraseFromParent();
+ ++NumDeletes;
+ }
+
+ MaybeDeadCopies.clear();
+ CopyDbgUsers.clear();
+ Tracker.clear();
+}
+
bool MachineCopyPropagation::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;
@@ -647,8 +871,10 @@ bool MachineCopyPropagation::runOnMachineFunction(MachineFunction &MF) {
TII = MF.getSubtarget().getInstrInfo();
MRI = &MF.getRegInfo();
- for (MachineBasicBlock &MBB : MF)
- CopyPropagateBlock(MBB);
+ for (MachineBasicBlock &MBB : MF) {
+ BackwardCopyPropagateBlock(MBB);
+ ForwardCopyPropagateBlock(MBB);
+ }
return Changed;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineDominanceFrontier.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineDominanceFrontier.cpp
index 6704298c17d6..6ddb1758719b 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineDominanceFrontier.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineDominanceFrontier.cpp
@@ -10,6 +10,7 @@
#include "llvm/Analysis/DominanceFrontierImpl.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/InitializePasses.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineDominators.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineDominators.cpp
index 706c706d7527..c8845d838282 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineDominators.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineDominators.cpp
@@ -14,6 +14,7 @@
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
using namespace llvm;
@@ -49,11 +50,15 @@ void MachineDominatorTree::getAnalysisUsage(AnalysisUsage &AU) const {
}
bool MachineDominatorTree::runOnMachineFunction(MachineFunction &F) {
+ calculate(F);
+ return false;
+}
+
+void MachineDominatorTree::calculate(MachineFunction &F) {
CriticalEdgesToSplit.clear();
NewBBs.clear();
DT.reset(new DomTreeBase<MachineBasicBlock>());
DT->recalculate(F);
- return false;
}
MachineDominatorTree::MachineDominatorTree()
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineFrameInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineFrameInfo.cpp
index 604f5145b1a0..22ab2c7a6d77 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineFrameInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineFrameInfo.cpp
@@ -133,11 +133,11 @@ BitVector MachineFrameInfo::getPristineRegs(const MachineFunction &MF) const {
return BV;
}
-unsigned MachineFrameInfo::estimateStackSize(const MachineFunction &MF) const {
+uint64_t MachineFrameInfo::estimateStackSize(const MachineFunction &MF) const {
const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
unsigned MaxAlign = getMaxAlignment();
- int Offset = 0;
+ int64_t Offset = 0;
// This code is very, very similar to PEI::calculateFrameObjectOffsets().
// It really should be refactored to share code. Until then, changes
@@ -147,7 +147,7 @@ unsigned MachineFrameInfo::estimateStackSize(const MachineFunction &MF) const {
// Only estimate stack size of default stack.
if (getStackID(i) != TargetStackID::Default)
continue;
- int FixedOff = -getObjectOffset(i);
+ int64_t FixedOff = -getObjectOffset(i);
if (FixedOff > Offset) Offset = FixedOff;
}
for (unsigned i = 0, e = getObjectIndexEnd(); i != e; ++i) {
@@ -183,7 +183,7 @@ unsigned MachineFrameInfo::estimateStackSize(const MachineFunction &MF) const {
unsigned AlignMask = StackAlign - 1;
Offset = (Offset + AlignMask) & ~uint64_t(AlignMask);
- return (unsigned)Offset;
+ return (uint64_t)Offset;
}
void MachineFrameInfo::computeMaxCallFrameSize(const MachineFunction &MF) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp
index 0fc8cc16455d..4612690644fe 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp
@@ -270,6 +270,21 @@ getOrCreateJumpTableInfo(unsigned EntryKind) {
return JumpTableInfo;
}
+DenormalMode MachineFunction::getDenormalMode(const fltSemantics &FPType) const {
+ // TODO: Should probably avoid the connection to the IR and store directly
+ // in the MachineFunction.
+ Attribute Attr = F.getFnAttribute("denormal-fp-math");
+
+ // FIXME: This should assume IEEE behavior on an unspecified
+ // attribute. However, the one current user incorrectly assumes a non-IEEE
+ // target by default.
+ StringRef Val = Attr.getValueAsString();
+ if (Val.empty())
+ return DenormalMode::Invalid;
+
+ return parseDenormalFPAttribute(Val);
+}
+
/// Should we be emitting segmented stack stuff for the function
bool MachineFunction::shouldSplitStack() const {
return getFunction().hasFnAttribute("split-stack");
@@ -447,15 +462,7 @@ MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO,
MMO->getOrdering(), MMO->getFailureOrdering());
}
-MachineInstr::ExtraInfo *
-MachineFunction::createMIExtraInfo(ArrayRef<MachineMemOperand *> MMOs,
- MCSymbol *PreInstrSymbol,
- MCSymbol *PostInstrSymbol) {
- return MachineInstr::ExtraInfo::create(Allocator, MMOs, PreInstrSymbol,
- PostInstrSymbol, nullptr);
-}
-
-MachineInstr::ExtraInfo *MachineFunction::createMIExtraInfoWithMarker(
+MachineInstr::ExtraInfo *MachineFunction::createMIExtraInfo(
ArrayRef<MachineMemOperand *> MMOs, MCSymbol *PreInstrSymbol,
MCSymbol *PostInstrSymbol, MDNode *HeapAllocMarker) {
return MachineInstr::ExtraInfo::create(Allocator, MMOs, PreInstrSymbol,
@@ -477,6 +484,12 @@ uint32_t *MachineFunction::allocateRegMask() {
return Mask;
}
+ArrayRef<int> MachineFunction::allocateShuffleMask(ArrayRef<int> Mask) {
+ int* AllocMask = Allocator.Allocate<int>(Mask.size());
+ copy(Mask, AllocMask);
+ return {AllocMask, Mask.size()};
+}
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void MachineFunction::dump() const {
print(dbgs());
@@ -528,6 +541,13 @@ void MachineFunction::print(raw_ostream &OS, const SlotIndexes *Indexes) const {
OS << "\n# End machine code for function " << getName() << ".\n\n";
}
+/// True if this function needs frame moves for debug or exceptions.
+bool MachineFunction::needsFrameMoves() const {
+ return getMMI().hasDebugInfo() ||
+ getTarget().Options.ForceDwarfFrameSection ||
+ F.needsUnwindTableEntry();
+}
+
namespace llvm {
template<>
@@ -831,15 +851,13 @@ try_next:;
return FilterID;
}
-void MachineFunction::addCodeViewHeapAllocSite(MachineInstr *I,
- const MDNode *MD) {
- MCSymbol *BeginLabel = Ctx.createTempSymbol("heapallocsite", true);
- MCSymbol *EndLabel = Ctx.createTempSymbol("heapallocsite", true);
- I->setPreInstrSymbol(*this, BeginLabel);
- I->setPostInstrSymbol(*this, EndLabel);
+MachineFunction::CallSiteInfoMap::iterator
+MachineFunction::getCallSiteInfo(const MachineInstr *MI) {
+ assert(MI->isCall() && "Call site info refers only to call instructions!");
- const DIType *DI = dyn_cast<DIType>(MD);
- CodeViewHeapAllocSites.push_back(std::make_tuple(BeginLabel, EndLabel, DI));
+ if (!Target.Options.EnableDebugEntryValues)
+ return CallSitesInfo.end();
+ return CallSitesInfo.find(MI);
}
void MachineFunction::moveCallSiteInfo(const MachineInstr *Old,
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp
index 0ea8975cc74c..3645a4e3466b 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp
@@ -15,6 +15,7 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/IR/IRPrintingPasses.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp
index 83976060f124..08d786f8f12c 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp
@@ -187,8 +187,8 @@ static void moveOperands(MachineOperand *Dst, MachineOperand *Src,
unsigned NumOps, MachineRegisterInfo *MRI) {
if (MRI)
return MRI->moveOperands(Dst, Src, NumOps);
-
// MachineOperand is a trivially copyable type so we can just use memmove.
+ assert(Dst && Src && "Unknown operands");
std::memmove(Dst, Src, NumOps * sizeof(MachineOperand));
}
@@ -338,7 +338,7 @@ void MachineInstr::setExtraInfo(MachineFunction &MF,
// 32-bit pointers.
// FIXME: Maybe we should make the symbols in the extra info mutable?
else if (NumPointers > 1 || HasHeapAllocMarker) {
- Info.set<EIIK_OutOfLine>(MF.createMIExtraInfoWithMarker(
+ Info.set<EIIK_OutOfLine>(MF.createMIExtraInfo(
MMOs, PreInstrSymbol, PostInstrSymbol, HeapAllocMarker));
return;
}
@@ -1506,7 +1506,7 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
LLT TypeToPrint = MRI ? getTypeToPrint(StartOp, PrintedTypes, *MRI) : LLT{};
unsigned TiedOperandIdx = getTiedOperandIdx(StartOp);
- MO.print(OS, MST, TypeToPrint, /*PrintDef=*/false, IsStandalone,
+ MO.print(OS, MST, TypeToPrint, StartOp, /*PrintDef=*/false, IsStandalone,
ShouldPrintRegisterTies, TiedOperandIdx, TRI, IntrinsicInfo);
++StartOp;
}
@@ -1538,8 +1538,8 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
OS << "nsw ";
if (getFlag(MachineInstr::IsExact))
OS << "exact ";
- if (getFlag(MachineInstr::FPExcept))
- OS << "fpexcept ";
+ if (getFlag(MachineInstr::NoFPExcept))
+ OS << "nofpexcept ";
// Print the opcode name.
if (TII)
@@ -1561,7 +1561,7 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
const unsigned OpIdx = InlineAsm::MIOp_AsmString;
LLT TypeToPrint = MRI ? getTypeToPrint(OpIdx, PrintedTypes, *MRI) : LLT{};
unsigned TiedOperandIdx = getTiedOperandIdx(OpIdx);
- getOperand(OpIdx).print(OS, MST, TypeToPrint, /*PrintDef=*/true, IsStandalone,
+ getOperand(OpIdx).print(OS, MST, TypeToPrint, OpIdx, /*PrintDef=*/true, IsStandalone,
ShouldPrintRegisterTies, TiedOperandIdx, TRI,
IntrinsicInfo);
@@ -1600,7 +1600,7 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
else {
LLT TypeToPrint = MRI ? getTypeToPrint(i, PrintedTypes, *MRI) : LLT{};
unsigned TiedOperandIdx = getTiedOperandIdx(i);
- MO.print(OS, MST, TypeToPrint, /*PrintDef=*/true, IsStandalone,
+ MO.print(OS, MST, TypeToPrint, i, /*PrintDef=*/true, IsStandalone,
ShouldPrintRegisterTies, TiedOperandIdx, TRI, IntrinsicInfo);
}
} else if (isDebugLabel() && MO.isMetadata()) {
@@ -1611,7 +1611,7 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
else {
LLT TypeToPrint = MRI ? getTypeToPrint(i, PrintedTypes, *MRI) : LLT{};
unsigned TiedOperandIdx = getTiedOperandIdx(i);
- MO.print(OS, MST, TypeToPrint, /*PrintDef=*/true, IsStandalone,
+ MO.print(OS, MST, TypeToPrint, i, /*PrintDef=*/true, IsStandalone,
ShouldPrintRegisterTies, TiedOperandIdx, TRI, IntrinsicInfo);
}
} else if (i == AsmDescOp && MO.isImm()) {
@@ -1678,7 +1678,7 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
if (MO.isImm() && isOperandSubregIdx(i))
MachineOperand::printSubRegIdx(OS, MO.getImm(), TRI);
else
- MO.print(OS, MST, TypeToPrint, /*PrintDef=*/true, IsStandalone,
+ MO.print(OS, MST, TypeToPrint, i, /*PrintDef=*/true, IsStandalone,
ShouldPrintRegisterTies, TiedOperandIdx, TRI, IntrinsicInfo);
}
}
@@ -1706,7 +1706,8 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
FirstOp = false;
OS << ',';
}
- OS << " heap-alloc-marker";
+ OS << " heap-alloc-marker ";
+ HeapAllocMarker->printAsOperand(OS, MST);
}
if (!SkipDebugLoc) {
@@ -1976,7 +1977,7 @@ void MachineInstr::setPhysRegsDeadExcept(ArrayRef<Register> UsedRegs,
unsigned
MachineInstrExpressionTrait::getHashValue(const MachineInstr* const &MI) {
// Build up a buffer of hash code components.
- SmallVector<size_t, 8> HashComponents;
+ SmallVector<size_t, 16> HashComponents;
HashComponents.reserve(MI->getNumOperands() + 1);
HashComponents.push_back(MI->getOpcode());
for (const MachineOperand &MO : MI->operands()) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineInstrBundle.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineInstrBundle.cpp
index feb849ced353..94865b0e9031 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineInstrBundle.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineInstrBundle.cpp
@@ -15,6 +15,7 @@
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Target/TargetMachine.h"
#include <utility>
using namespace llvm;
@@ -277,22 +278,18 @@ bool llvm::finalizeBundles(MachineFunction &MF) {
return Changed;
}
-//===----------------------------------------------------------------------===//
-// MachineOperand iterator
-//===----------------------------------------------------------------------===//
-
-MachineOperandIteratorBase::VirtRegInfo
-MachineOperandIteratorBase::analyzeVirtReg(unsigned Reg,
- SmallVectorImpl<std::pair<MachineInstr*, unsigned> > *Ops) {
- VirtRegInfo RI = { false, false, false };
- for(; isValid(); ++*this) {
- MachineOperand &MO = deref();
+VirtRegInfo llvm::AnalyzeVirtRegInBundle(
+ MachineInstr &MI, unsigned Reg,
+ SmallVectorImpl<std::pair<MachineInstr *, unsigned>> *Ops) {
+ VirtRegInfo RI = {false, false, false};
+ for (MIBundleOperands O(MI); O.isValid(); ++O) {
+ MachineOperand &MO = *O;
if (!MO.isReg() || MO.getReg() != Reg)
continue;
// Remember each (MI, OpNo) that refers to Reg.
if (Ops)
- Ops->push_back(std::make_pair(MO.getParent(), getOperandNo()));
+ Ops->push_back(std::make_pair(MO.getParent(), O.getOperandNo()));
// Both defs and uses can read virtual registers.
if (MO.readsReg()) {
@@ -304,22 +301,22 @@ MachineOperandIteratorBase::analyzeVirtReg(unsigned Reg,
// Only defs can write.
if (MO.isDef())
RI.Writes = true;
- else if (!RI.Tied && MO.getParent()->isRegTiedToDefOperand(getOperandNo()))
+ else if (!RI.Tied &&
+ MO.getParent()->isRegTiedToDefOperand(O.getOperandNo()))
RI.Tied = true;
}
return RI;
}
-MachineOperandIteratorBase::PhysRegInfo
-MachineOperandIteratorBase::analyzePhysReg(unsigned Reg,
- const TargetRegisterInfo *TRI) {
+PhysRegInfo llvm::AnalyzePhysRegInBundle(const MachineInstr &MI, unsigned Reg,
+ const TargetRegisterInfo *TRI) {
bool AllDefsDead = true;
PhysRegInfo PRI = {false, false, false, false, false, false, false, false};
assert(Register::isPhysicalRegister(Reg) &&
"analyzePhysReg not given a physical register!");
- for (; isValid(); ++*this) {
- MachineOperand &MO = deref();
+ for (ConstMIBundleOperands O(MI); O.isValid(); ++O) {
+ const MachineOperand &MO = *O;
if (MO.isRegMask() && MO.clobbersPhysReg(Reg)) {
PRI.Clobbered = true;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineLICM.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineLICM.cpp
index 6a898ff6ef88..462d4d3b3726 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineLICM.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineLICM.cpp
@@ -23,6 +23,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -39,6 +40,7 @@
#include "llvm/CodeGen/TargetSchedule.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/DebugLoc.h"
+#include "llvm/InitializePasses.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Pass.h"
@@ -74,6 +76,27 @@ static cl::opt<bool>
HoistConstStores("hoist-const-stores",
cl::desc("Hoist invariant stores"),
cl::init(true), cl::Hidden);
+// The default threshold of 100 (i.e. if target block is 100 times hotter)
+// is based on empirical data on a single target and is subject to tuning.
+static cl::opt<unsigned>
+BlockFrequencyRatioThreshold("block-freq-ratio-threshold",
+ cl::desc("Do not hoist instructions if target"
+ "block is N times hotter than the source."),
+ cl::init(100), cl::Hidden);
+
+enum class UseBFI { None, PGO, All };
+
+static cl::opt<UseBFI>
+DisableHoistingToHotterBlocks("disable-hoisting-to-hotter-blocks",
+ cl::desc("Disable hoisting instructions to"
+ " hotter blocks"),
+ cl::init(UseBFI::None), cl::Hidden,
+ cl::values(clEnumValN(UseBFI::None, "none",
+ "disable the feature"),
+ clEnumValN(UseBFI::PGO, "pgo",
+ "enable the feature when using profile data"),
+ clEnumValN(UseBFI::All, "all",
+ "enable the feature with/wo profile data")));
STATISTIC(NumHoisted,
"Number of machine instructions hoisted out of loops");
@@ -87,6 +110,8 @@ STATISTIC(NumPostRAHoisted,
"Number of machine instructions hoisted out of loops post regalloc");
STATISTIC(NumStoreConst,
"Number of stores of const phys reg hoisted out of loops");
+STATISTIC(NumNotHoistedDueToHotness,
+ "Number of instructions not hoisted due to block frequency");
namespace {
@@ -98,9 +123,11 @@ namespace {
MachineRegisterInfo *MRI;
TargetSchedModel SchedModel;
bool PreRegAlloc;
+ bool HasProfileData;
// Various analyses that we use...
AliasAnalysis *AA; // Alias analysis info.
+ MachineBlockFrequencyInfo *MBFI; // Machine block frequncy info
MachineLoopInfo *MLI; // Current MachineLoopInfo
MachineDominatorTree *DT; // Machine dominator tree for the cur loop
@@ -150,6 +177,8 @@ namespace {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<MachineLoopInfo>();
+ if (DisableHoistingToHotterBlocks != UseBFI::None)
+ AU.addRequired<MachineBlockFrequencyInfo>();
AU.addRequired<MachineDominatorTree>();
AU.addRequired<AAResultsWrapperPass>();
AU.addPreserved<MachineLoopInfo>();
@@ -245,6 +274,8 @@ namespace {
void InitCSEMap(MachineBasicBlock *BB);
+ bool isTgtHotterThanSrc(MachineBasicBlock *SrcBlock,
+ MachineBasicBlock *TgtBlock);
MachineBasicBlock *getCurPreheader();
};
@@ -275,6 +306,7 @@ char &llvm::EarlyMachineLICMID = EarlyMachineLICM::ID;
INITIALIZE_PASS_BEGIN(MachineLICM, DEBUG_TYPE,
"Machine Loop Invariant Code Motion", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_END(MachineLICM, DEBUG_TYPE,
@@ -283,6 +315,7 @@ INITIALIZE_PASS_END(MachineLICM, DEBUG_TYPE,
INITIALIZE_PASS_BEGIN(EarlyMachineLICM, "early-machinelicm",
"Early Machine Loop Invariant Code Motion", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_END(EarlyMachineLICM, "early-machinelicm",
@@ -315,6 +348,7 @@ bool MachineLICMBase::runOnMachineFunction(MachineFunction &MF) {
SchedModel.init(&ST);
PreRegAlloc = MRI->isSSA();
+ HasProfileData = MF.getFunction().hasProfileData();
if (PreRegAlloc)
LLVM_DEBUG(dbgs() << "******** Pre-regalloc Machine LICM: ");
@@ -333,6 +367,8 @@ bool MachineLICMBase::runOnMachineFunction(MachineFunction &MF) {
}
// Get our Loop information...
+ if (DisableHoistingToHotterBlocks != UseBFI::None)
+ MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
MLI = &getAnalysis<MachineLoopInfo>();
DT = &getAnalysis<MachineDominatorTree>();
AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
@@ -1433,6 +1469,15 @@ bool MachineLICMBase::MayCSE(MachineInstr *MI) {
/// that are safe to hoist, this instruction is called to do the dirty work.
/// It returns true if the instruction is hoisted.
bool MachineLICMBase::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) {
+ MachineBasicBlock *SrcBlock = MI->getParent();
+
+ // Disable the instruction hoisting due to block hotness
+ if ((DisableHoistingToHotterBlocks == UseBFI::All ||
+ (DisableHoistingToHotterBlocks == UseBFI::PGO && HasProfileData)) &&
+ isTgtHotterThanSrc(SrcBlock, Preheader)) {
+ ++NumNotHoistedDueToHotness;
+ return false;
+ }
// First check whether we should hoist this instruction.
if (!IsLoopInvariantInst(*MI) || !IsProfitableToHoist(*MI)) {
// If not, try unfolding a hoistable load.
@@ -1526,3 +1571,21 @@ MachineBasicBlock *MachineLICMBase::getCurPreheader() {
}
return CurPreheader;
}
+
+/// Is the target basic block at least "BlockFrequencyRatioThreshold"
+/// times hotter than the source basic block.
+bool MachineLICMBase::isTgtHotterThanSrc(MachineBasicBlock *SrcBlock,
+ MachineBasicBlock *TgtBlock) {
+ // Parse source and target basic block frequency from MBFI
+ uint64_t SrcBF = MBFI->getBlockFreq(SrcBlock).getFrequency();
+ uint64_t DstBF = MBFI->getBlockFreq(TgtBlock).getFrequency();
+
+ // Disable the hoisting if source block frequency is zero
+ if (!SrcBF)
+ return true;
+
+ double Ratio = (double)DstBF / SrcBF;
+
+ // Compare the block frequency ratio with the threshold
+ return Ratio > BlockFrequencyRatioThreshold;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineLoopInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineLoopInfo.cpp
index 3b8b430d1b0f..0c1439da9b29 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineLoopInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineLoopInfo.cpp
@@ -18,6 +18,7 @@
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/Config/llvm-config.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -27,6 +28,9 @@ template class llvm::LoopBase<MachineBasicBlock, MachineLoop>;
template class llvm::LoopInfoBase<MachineBasicBlock, MachineLoop>;
char MachineLoopInfo::ID = 0;
+MachineLoopInfo::MachineLoopInfo() : MachineFunctionPass(ID) {
+ initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry());
+}
INITIALIZE_PASS_BEGIN(MachineLoopInfo, "machine-loops",
"Machine Natural Loop Construction", true, true)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
@@ -36,11 +40,15 @@ INITIALIZE_PASS_END(MachineLoopInfo, "machine-loops",
char &llvm::MachineLoopInfoID = MachineLoopInfo::ID;
bool MachineLoopInfo::runOnMachineFunction(MachineFunction &) {
- releaseMemory();
- LI.analyze(getAnalysis<MachineDominatorTree>().getBase());
+ calculate(getAnalysis<MachineDominatorTree>());
return false;
}
+void MachineLoopInfo::calculate(MachineDominatorTree &MDT) {
+ releaseMemory();
+ LI.analyze(MDT.getBase());
+}
+
void MachineLoopInfo::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
AU.addRequired<MachineDominatorTree>();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineLoopUtils.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineLoopUtils.cpp
index e074b76082f0..cf30e28449cd 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineLoopUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineLoopUtils.cpp
@@ -6,6 +6,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineLoopUtils.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -130,3 +131,14 @@ MachineBasicBlock *llvm::PeelSingleBlockLoop(LoopPeelDirection Direction,
return NewBB;
}
+
+bool llvm::isRegLiveInExitBlocks(MachineLoop *Loop, int PhysReg) {
+ SmallVector<MachineBasicBlock *, 4> ExitBlocks;
+ Loop->getExitBlocks(ExitBlocks);
+
+ for (auto *MBB : ExitBlocks)
+ if (MBB->isLiveIn(PhysReg))
+ return true;
+
+ return false;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineModuleInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineModuleInfo.cpp
index e0b4e9cac229..0094a923e039 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineModuleInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineModuleInfo.cpp
@@ -20,8 +20,10 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
+#include "llvm/InitializePasses.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCSymbolXCOFF.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
@@ -116,7 +118,17 @@ ArrayRef<MCSymbol *> MMIAddrLabelMap::getAddrLabelSymbolToEmit(BasicBlock *BB) {
BBCallbacks.back().setMap(this);
Entry.Index = BBCallbacks.size() - 1;
Entry.Fn = BB->getParent();
- Entry.Symbols.push_back(Context.createTempSymbol(!BB->hasAddressTaken()));
+ MCSymbol *Sym = Context.createTempSymbol(!BB->hasAddressTaken());
+ if (Context.getObjectFileInfo()->getTargetTriple().isOSBinFormatXCOFF()) {
+ MCSymbol *FnEntryPointSym =
+ Context.lookupSymbol("." + Entry.Fn->getName());
+ assert(FnEntryPointSym && "The function entry pointer symbol should have"
+ " already been initialized.");
+ MCSectionXCOFF *Csect =
+ cast<MCSymbolXCOFF>(FnEntryPointSym)->getContainingCsect();
+ cast<MCSymbolXCOFF>(Sym)->setContainingCsect(Csect);
+ }
+ Entry.Symbols.push_back(Sym);
return Entry.Symbols;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineOperand.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineOperand.cpp
index 8b19501ec3cf..7b8f01100929 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineOperand.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineOperand.cpp
@@ -14,6 +14,7 @@
#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/CodeGen/MIRFormatter.h"
#include "llvm/CodeGen/MIRPrinter.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
@@ -458,28 +459,6 @@ static void printIRBlockReference(raw_ostream &OS, const BasicBlock &BB,
OS << "<unknown>";
}
-static void printIRValueReference(raw_ostream &OS, const Value &V,
- ModuleSlotTracker &MST) {
- if (isa<GlobalValue>(V)) {
- V.printAsOperand(OS, /*PrintType=*/false, MST);
- return;
- }
- if (isa<Constant>(V)) {
- // Machine memory operands can load/store to/from constant value pointers.
- OS << '`';
- V.printAsOperand(OS, /*PrintType=*/true, MST);
- OS << '`';
- return;
- }
- OS << "%ir.";
- if (V.hasName()) {
- printLLVMNameWithoutPrefix(OS, V.getName());
- return;
- }
- int Slot = MST.getCurrentFunction() ? MST.getLocalSlot(&V) : -1;
- MachineOperand::printIRSlotNumber(OS, Slot);
-}
-
static void printSyncScope(raw_ostream &OS, const LLVMContext &Context,
SyncScope::ID SSID,
SmallVectorImpl<StringRef> &SSNs) {
@@ -734,14 +713,15 @@ void MachineOperand::print(raw_ostream &OS, LLT TypeToPrint,
const TargetIntrinsicInfo *IntrinsicInfo) const {
tryToGetTargetInfo(*this, TRI, IntrinsicInfo);
ModuleSlotTracker DummyMST(nullptr);
- print(OS, DummyMST, TypeToPrint, /*PrintDef=*/false, /*IsStandalone=*/true,
+ print(OS, DummyMST, TypeToPrint, None, /*PrintDef=*/false,
+ /*IsStandalone=*/true,
/*ShouldPrintRegisterTies=*/true,
/*TiedOperandIdx=*/0, TRI, IntrinsicInfo);
}
void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
- LLT TypeToPrint, bool PrintDef, bool IsStandalone,
- bool ShouldPrintRegisterTies,
+ LLT TypeToPrint, Optional<unsigned> OpIdx, bool PrintDef,
+ bool IsStandalone, bool ShouldPrintRegisterTies,
unsigned TiedOperandIdx,
const TargetRegisterInfo *TRI,
const TargetIntrinsicInfo *IntrinsicInfo) const {
@@ -802,9 +782,19 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
OS << '(' << TypeToPrint << ')';
break;
}
- case MachineOperand::MO_Immediate:
- OS << getImm();
+ case MachineOperand::MO_Immediate: {
+ const MIRFormatter *Formatter = nullptr;
+ if (const MachineFunction *MF = getMFIfAvailable(*this)) {
+ const auto *TII = MF->getSubtarget().getInstrInfo();
+ assert(TII && "expected instruction info");
+ Formatter = TII->getMIRFormatter();
+ }
+ if (Formatter)
+ Formatter->printImm(OS, *getParent(), OpIdx, getImm());
+ else
+ OS << getImm();
break;
+ }
case MachineOperand::MO_CImmediate:
getCImm()->printAsOperand(OS, /*PrintType=*/true, MST);
break;
@@ -940,13 +930,13 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
}
case MachineOperand::MO_ShuffleMask:
OS << "shufflemask(";
- const Constant* C = getShuffleMask();
- const int NumElts = C->getType()->getVectorNumElements();
-
+ ArrayRef<int> Mask = getShuffleMask();
StringRef Separator;
- for (int I = 0; I != NumElts; ++I) {
- OS << Separator;
- C->getAggregateElement(I)->printAsOperand(OS, false, MST);
+ for (int Elt : Mask) {
+ if (Elt == -1)
+ OS << Separator << "undef";
+ else
+ OS << Separator << Elt;
Separator = ", ";
}
@@ -1111,7 +1101,7 @@ void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
if (const Value *Val = getValue()) {
OS << ((isLoad() && isStore()) ? " on " : isLoad() ? " from " : " into ");
- printIRValueReference(OS, *Val, MST);
+ MIRFormatter::printIRValue(OS, *Val, MST);
} else if (const PseudoSourceValue *PVal = getPseudoValue()) {
OS << ((isLoad() && isStore()) ? " on " : isLoad() ? " from " : " into ");
assert(PVal && "Expected a pseudo source value");
@@ -1144,15 +1134,18 @@ void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
printLLVMNameWithoutPrefix(
OS, cast<ExternalSymbolPseudoSourceValue>(PVal)->getSymbol());
break;
- default:
+ default: {
+ const MIRFormatter *Formatter = TII->getMIRFormatter();
// FIXME: This is not necessarily the correct MIR serialization format for
// a custom pseudo source value, but at least it allows
// -print-machineinstrs to work on a target with custom pseudo source
// values.
- OS << "custom ";
- PVal->printCustom(OS);
+ OS << "custom \"";
+ Formatter->printCustomPseudoSourceValue(OS, MST, *PVal);
+ OS << '\"';
break;
}
+ }
}
MachineOperand::printOperandOffset(OS, getOffset());
if (getBaseAlignment() != getSize())
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp
index b82403ae1b85..d656953f9115 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp
@@ -17,6 +17,7 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/InitializePasses.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineOutliner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineOutliner.cpp
index 8cd66825a58a..3a9104bda0d1 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineOutliner.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineOutliner.cpp
@@ -68,6 +68,7 @@
#include "llvm/IR/DIBuilder.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Mangler.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -91,8 +92,7 @@ STATISTIC(FunctionsCreated, "Number of functions created");
// this is off by default. It should, however, be the default behaviour in
// LTO.
static cl::opt<bool> EnableLinkOnceODROutlining(
- "enable-linkonceodr-outlining",
- cl::Hidden,
+ "enable-linkonceodr-outlining", cl::Hidden,
cl::desc("Enable the machine outliner on linkonceodr functions"),
cl::init(false));
@@ -253,7 +253,7 @@ private:
/// Ukkonen's algorithm.
struct ActiveState {
/// The next node to insert at.
- SuffixTreeNode *Node;
+ SuffixTreeNode *Node = nullptr;
/// The index of the first character in the substring currently being added.
unsigned Idx = EmptyIdx;
@@ -301,8 +301,8 @@ private:
"Non-root internal nodes must have parents!");
unsigned *E = new (InternalEndIdxAllocator) unsigned(EndIdx);
- SuffixTreeNode *N = new (NodeAllocator.Allocate())
- SuffixTreeNode(StartIdx, E, Root);
+ SuffixTreeNode *N =
+ new (NodeAllocator.Allocate()) SuffixTreeNode(StartIdx, E, Root);
if (Parent)
Parent->Children[Edge] = N;
@@ -311,26 +311,31 @@ private:
/// Set the suffix indices of the leaves to the start indices of their
/// respective suffixes.
- ///
- /// \param[in] CurrNode The node currently being visited.
- /// \param CurrNodeLen The concatenation of all node sizes from the root to
- /// this node. Used to produce suffix indices.
- void setSuffixIndices(SuffixTreeNode &CurrNode, unsigned CurrNodeLen) {
-
- bool IsLeaf = CurrNode.Children.size() == 0 && !CurrNode.isRoot();
-
- // Store the concatenation of lengths down from the root.
- CurrNode.ConcatLen = CurrNodeLen;
- // Traverse the tree depth-first.
- for (auto &ChildPair : CurrNode.Children) {
- assert(ChildPair.second && "Node had a null child!");
- setSuffixIndices(*ChildPair.second,
- CurrNodeLen + ChildPair.second->size());
- }
+ void setSuffixIndices() {
+ // List of nodes we need to visit along with the current length of the
+ // string.
+ std::vector<std::pair<SuffixTreeNode *, unsigned>> ToVisit;
+
+ // Current node being visited.
+ SuffixTreeNode *CurrNode = Root;
+
+ // Sum of the lengths of the nodes down the path to the current one.
+ unsigned CurrNodeLen = 0;
+ ToVisit.push_back({CurrNode, CurrNodeLen});
+ while (!ToVisit.empty()) {
+ std::tie(CurrNode, CurrNodeLen) = ToVisit.back();
+ ToVisit.pop_back();
+ CurrNode->ConcatLen = CurrNodeLen;
+ for (auto &ChildPair : CurrNode->Children) {
+ assert(ChildPair.second && "Node had a null child!");
+ ToVisit.push_back(
+ {ChildPair.second, CurrNodeLen + ChildPair.second->size()});
+ }
- // Is this node a leaf? If it is, give it a suffix index.
- if (IsLeaf)
- CurrNode.SuffixIdx = Str.size() - CurrNodeLen;
+ // No children, so we are at the end of the string.
+ if (CurrNode->Children.size() == 0 && !CurrNode->isRoot())
+ CurrNode->SuffixIdx = Str.size() - CurrNodeLen;
+ }
}
/// Construct the suffix tree for the prefix of the input ending at
@@ -473,7 +478,6 @@ public:
// Keep track of the number of suffixes we have to add of the current
// prefix.
unsigned SuffixesToAdd = 0;
- Active.Node = Root;
// Construct the suffix tree iteratively on each prefix of the string.
// PfxEndIdx is the end index of the current prefix.
@@ -487,13 +491,12 @@ public:
// Set the suffix indices of each leaf.
assert(Root && "Root node can't be nullptr!");
- setSuffixIndices(*Root, 0);
+ setSuffixIndices();
}
-
/// Iterator for finding all repeated substrings in the suffix tree.
struct RepeatedSubstringIterator {
- private:
+ private:
/// The current node we're visiting.
SuffixTreeNode *N = nullptr;
@@ -595,7 +598,7 @@ public:
advance();
}
}
-};
+ };
typedef RepeatedSubstringIterator iterator;
iterator begin() { return iterator(Root); }
@@ -694,9 +697,10 @@ struct InstructionMapper {
/// IllegalInstrNumber.
///
/// \returns The integer that \p *It was mapped to.
- unsigned mapToIllegalUnsigned(MachineBasicBlock::iterator &It,
- bool &CanOutlineWithPrevInstr, std::vector<unsigned> &UnsignedVecForMBB,
- std::vector<MachineBasicBlock::iterator> &InstrListForMBB) {
+ unsigned mapToIllegalUnsigned(
+ MachineBasicBlock::iterator &It, bool &CanOutlineWithPrevInstr,
+ std::vector<unsigned> &UnsignedVecForMBB,
+ std::vector<MachineBasicBlock::iterator> &InstrListForMBB) {
// Can't outline an illegal instruction. Set the flag.
CanOutlineWithPrevInstr = false;
@@ -764,12 +768,12 @@ struct InstructionMapper {
std::vector<unsigned> UnsignedVecForMBB;
std::vector<MachineBasicBlock::iterator> InstrListForMBB;
- for (MachineBasicBlock::iterator Et = MBB.end(); It != Et; It++) {
+ for (MachineBasicBlock::iterator Et = MBB.end(); It != Et; ++It) {
// Keep track of where this instruction is in the module.
switch (TII.getOutliningType(It, Flags)) {
case InstrType::Illegal:
- mapToIllegalUnsigned(It, CanOutlineWithPrevInstr,
- UnsignedVecForMBB, InstrListForMBB);
+ mapToIllegalUnsigned(It, CanOutlineWithPrevInstr, UnsignedVecForMBB,
+ InstrListForMBB);
break;
case InstrType::Legal:
@@ -783,7 +787,7 @@ struct InstructionMapper {
// The instruction also acts as a terminator, so we have to record that
// in the string.
mapToIllegalUnsigned(It, CanOutlineWithPrevInstr, UnsignedVecForMBB,
- InstrListForMBB);
+ InstrListForMBB);
break;
case InstrType::Invisible:
@@ -802,7 +806,7 @@ struct InstructionMapper {
// boundaries since the "end" is encoded uniquely and thus appears in no
// repeated substring.
mapToIllegalUnsigned(It, CanOutlineWithPrevInstr, UnsignedVecForMBB,
- InstrListForMBB);
+ InstrListForMBB);
InstrList.insert(InstrList.end(), InstrListForMBB.begin(),
InstrListForMBB.end());
UnsignedVec.insert(UnsignedVec.end(), UnsignedVecForMBB.begin(),
@@ -888,24 +892,27 @@ struct MachineOutliner : public ModulePass {
/// \param FunctionList A list of functions to be inserted into the module.
/// \param Mapper Contains the instruction mappings for the module.
bool outline(Module &M, std::vector<OutlinedFunction> &FunctionList,
- InstructionMapper &Mapper);
+ InstructionMapper &Mapper, unsigned &OutlinedFunctionNum);
/// Creates a function for \p OF and inserts it into the module.
MachineFunction *createOutlinedFunction(Module &M, OutlinedFunction &OF,
InstructionMapper &Mapper,
unsigned Name);
+ /// Calls 'doOutline()'.
+ bool runOnModule(Module &M) override;
+
/// Construct a suffix tree on the instructions in \p M and outline repeated
/// strings from that tree.
- bool runOnModule(Module &M) override;
+ bool doOutline(Module &M, unsigned &OutlinedFunctionNum);
/// Return a DISubprogram for OF if one exists, and null otherwise. Helper
/// function for remark emission.
DISubprogram *getSubprogramOrNull(const OutlinedFunction &OF) {
- DISubprogram *SP;
for (const Candidate &C : OF.Candidates)
- if (C.getMF() && (SP = C.getMF()->getFunction().getSubprogram()))
- return SP;
+ if (MachineFunction *MF = C.getMF())
+ if (DISubprogram *SP = MF->getFunction().getSubprogram())
+ return SP;
return nullptr;
}
@@ -918,15 +925,14 @@ struct MachineOutliner : public ModulePass {
/// FIXME: This should be handled by the pass manager, not the outliner.
/// FIXME: This is nearly identical to the initSizeRemarkInfo in the legacy
/// pass manager.
- void initSizeRemarkInfo(
- const Module &M, const MachineModuleInfo &MMI,
- StringMap<unsigned> &FunctionToInstrCount);
+ void initSizeRemarkInfo(const Module &M, const MachineModuleInfo &MMI,
+ StringMap<unsigned> &FunctionToInstrCount);
/// Emit the remark.
// FIXME: This should be handled by the pass manager, not the outliner.
- void emitInstrCountChangedRemark(
- const Module &M, const MachineModuleInfo &MMI,
- const StringMap<unsigned> &FunctionToInstrCount);
+ void
+ emitInstrCountChangedRemark(const Module &M, const MachineModuleInfo &MMI,
+ const StringMap<unsigned> &FunctionToInstrCount);
};
} // Anonymous namespace.
@@ -1003,13 +1009,12 @@ void MachineOutliner::emitOutlinedFunctionRemark(OutlinedFunction &OF) {
MORE.emit(R);
}
-void
-MachineOutliner::findCandidates(InstructionMapper &Mapper,
- std::vector<OutlinedFunction> &FunctionList) {
+void MachineOutliner::findCandidates(
+ InstructionMapper &Mapper, std::vector<OutlinedFunction> &FunctionList) {
FunctionList.clear();
SuffixTree ST(Mapper.UnsignedVec);
- // First, find dall of the repeated substrings in the tree of minimum length
+ // First, find all of the repeated substrings in the tree of minimum length
// 2.
std::vector<Candidate> CandidatesForRepeatedSeq;
for (auto It = ST.begin(), Et = ST.end(); It != Et; ++It) {
@@ -1087,10 +1092,8 @@ MachineOutliner::findCandidates(InstructionMapper &Mapper,
}
}
-MachineFunction *
-MachineOutliner::createOutlinedFunction(Module &M, OutlinedFunction &OF,
- InstructionMapper &Mapper,
- unsigned Name) {
+MachineFunction *MachineOutliner::createOutlinedFunction(
+ Module &M, OutlinedFunction &OF, InstructionMapper &Mapper, unsigned Name) {
// Create the function name. This should be unique.
// FIXME: We should have a better naming scheme. This should be stable,
@@ -1190,13 +1193,11 @@ MachineOutliner::createOutlinedFunction(Module &M, OutlinedFunction &OF,
bool MachineOutliner::outline(Module &M,
std::vector<OutlinedFunction> &FunctionList,
- InstructionMapper &Mapper) {
+ InstructionMapper &Mapper,
+ unsigned &OutlinedFunctionNum) {
bool OutlinedSomething = false;
- // Number to append to the current outlined function.
- unsigned OutlinedFunctionNum = 0;
-
// Sort by benefit. The most beneficial functions should be outlined first.
llvm::stable_sort(FunctionList, [](const OutlinedFunction &LHS,
const OutlinedFunction &RHS) {
@@ -1303,12 +1304,6 @@ void MachineOutliner::populateMapper(InstructionMapper &Mapper, Module &M,
if (F.empty())
continue;
- // Disable outlining from noreturn functions right now. Noreturn requires
- // special handling for the case where what we are outlining could be a
- // tail call.
- if (F.hasFnAttribute(Attribute::NoReturn))
- continue;
-
// There's something in F. Check if it has a MachineFunction associated with
// it.
MachineFunction *MF = MMI.getMachineFunction(F);
@@ -1403,8 +1398,7 @@ void MachineOutliner::emitInstrCountChangedRemark(
MachineOptimizationRemarkEmitter MORE(*MF, nullptr);
MORE.emit([&]() {
MachineOptimizationRemarkAnalysis R("size-info", "FunctionMISizeChange",
- DiagnosticLocation(),
- &MF->front());
+ DiagnosticLocation(), &MF->front());
R << DiagnosticInfoOptimizationBase::Argument("Pass", "Machine Outliner")
<< ": Function: "
<< DiagnosticInfoOptimizationBase::Argument("Function", F.getName())
@@ -1427,6 +1421,15 @@ bool MachineOutliner::runOnModule(Module &M) {
if (M.empty())
return false;
+ // Number to append to the current outlined function.
+ unsigned OutlinedFunctionNum = 0;
+
+ if (!doOutline(M, OutlinedFunctionNum))
+ return false;
+ return true;
+}
+
+bool MachineOutliner::doOutline(Module &M, unsigned &OutlinedFunctionNum) {
MachineModuleInfo &MMI = getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
// If the user passed -enable-machine-outliner=always or
@@ -1434,14 +1437,14 @@ bool MachineOutliner::runOnModule(Module &M) {
// Otherwise, if the target supports default outlining, it will run on all
// functions deemed by the target to be worth outlining from by default. Tell
// the user how the outliner is running.
- LLVM_DEBUG(
+ LLVM_DEBUG({
dbgs() << "Machine Outliner: Running on ";
if (RunOnAllFunctions)
dbgs() << "all functions";
else
dbgs() << "target-default functions";
- dbgs() << "\n"
- );
+ dbgs() << "\n";
+ });
// If the user specifies that they want to outline from linkonceodrs, set
// it here.
@@ -1470,7 +1473,8 @@ bool MachineOutliner::runOnModule(Module &M) {
initSizeRemarkInfo(M, MMI, FunctionToInstrCount);
// Outline each of the candidates and return true if something was outlined.
- bool OutlinedSomething = outline(M, FunctionList, Mapper);
+ bool OutlinedSomething =
+ outline(M, FunctionList, Mapper, OutlinedFunctionNum);
// If we outlined something, we definitely changed the MI count of the
// module. If we've asked for size remarks, then output them.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp
index 89c9f6093a97..ef22caa877c9 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp
@@ -1314,8 +1314,9 @@ void SwingSchedulerDAG::CopyToPhiMutation::apply(ScheduleDAGInstrs *DAG) {
// Find the USEs of PHI. If the use is a PHI or REG_SEQUENCE, push back this
// SUnit to the container.
SmallVector<SUnit *, 8> UseSUs;
- for (auto I = PHISUs.begin(); I != PHISUs.end(); ++I) {
- for (auto &Dep : (*I)->Succs) {
+ // Do not use iterator based loop here as we are updating the container.
+ for (size_t Index = 0; Index < PHISUs.size(); ++Index) {
+ for (auto &Dep : PHISUs[Index]->Succs) {
if (Dep.getKind() != SDep::Data)
continue;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachinePostDominators.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachinePostDominators.cpp
index f4daff667e86..fb96d0efa4d4 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachinePostDominators.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachinePostDominators.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/MachinePostDominators.h"
+#include "llvm/InitializePasses.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineRegionInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineRegionInfo.cpp
index 2961d456be0d..45cdcbfeab9f 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineRegionInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineRegionInfo.cpp
@@ -11,6 +11,7 @@
#include "llvm/Analysis/RegionInfoImpl.h"
#include "llvm/CodeGen/MachinePostDominators.h"
#include "llvm/Config/llvm-config.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp
index f0721ea3b76d..e42701b9c6ca 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp
@@ -48,6 +48,7 @@
#include "llvm/CodeGen/TargetSchedule.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Config/llvm-config.h"
+#include "llvm/InitializePasses.h"
#include "llvm/MC/LaneBitmask.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
@@ -238,6 +239,7 @@ void PostMachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
AU.addRequired<MachineDominatorTree>();
AU.addRequired<MachineLoopInfo>();
+ AU.addRequired<AAResultsWrapperPass>();
AU.addRequired<TargetPassConfig>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -402,7 +404,7 @@ bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {
if (EnablePostRAMachineSched.getNumOccurrences()) {
if (!EnablePostRAMachineSched)
return false;
- } else if (!mf.getSubtarget().enablePostRAScheduler()) {
+ } else if (!mf.getSubtarget().enablePostRAMachineScheduler()) {
LLVM_DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n");
return false;
}
@@ -412,6 +414,7 @@ bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {
MF = &mf;
MLI = &getAnalysis<MachineLoopInfo>();
PassConfig = &getAnalysis<TargetPassConfig>();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
if (VerifyScheduling)
MF->verify(this, "Before post machine scheduling.");
@@ -1495,7 +1498,7 @@ class BaseMemOpClusterMutation : public ScheduleDAGMutation {
: BaseOp->getIndex() < RHS.BaseOp->getIndex();
if (Offset != RHS.Offset)
- return StackGrowsDown ? Offset > RHS.Offset : Offset < RHS.Offset;
+ return Offset < RHS.Offset;
return SU->NodeNum < RHS.SU->NodeNum;
}
@@ -1570,6 +1573,8 @@ void BaseMemOpClusterMutation::clusterNeighboringMemOps(
for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) {
SUnit *SUa = MemOpRecords[Idx].SU;
SUnit *SUb = MemOpRecords[Idx+1].SU;
+ if (SUa->NodeNum > SUb->NodeNum)
+ std::swap(SUa, SUb);
if (TII->shouldClusterMemOps(*MemOpRecords[Idx].BaseOp,
*MemOpRecords[Idx + 1].BaseOp,
ClusterLength) &&
@@ -1595,10 +1600,8 @@ void BaseMemOpClusterMutation::clusterNeighboringMemOps(
/// Callback from DAG postProcessing to create cluster edges for loads.
void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAG) {
- // Map DAG NodeNum to store chain ID.
- DenseMap<unsigned, unsigned> StoreChainIDs;
- // Map each store chain to a set of dependent MemOps.
- SmallVector<SmallVector<SUnit*,4>, 32> StoreChainDependents;
+ // Map DAG NodeNum to a set of dependent MemOps in store chain.
+ DenseMap<unsigned, SmallVector<SUnit *, 4>> StoreChains;
for (SUnit &SU : DAG->SUnits) {
if ((IsLoad && !SU.getInstr()->mayLoad()) ||
(!IsLoad && !SU.getInstr()->mayStore()))
@@ -1611,19 +1614,14 @@ void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAG) {
break;
}
}
- // Check if this chain-like pred has been seen
- // before. ChainPredID==MaxNodeID at the top of the schedule.
- unsigned NumChains = StoreChainDependents.size();
- std::pair<DenseMap<unsigned, unsigned>::iterator, bool> Result =
- StoreChainIDs.insert(std::make_pair(ChainPredID, NumChains));
- if (Result.second)
- StoreChainDependents.resize(NumChains + 1);
- StoreChainDependents[Result.first->second].push_back(&SU);
+ // Insert the SU to corresponding store chain.
+ auto &Chain = StoreChains.FindAndConstruct(ChainPredID).second;
+ Chain.push_back(&SU);
}
// Iterate over the store chains.
- for (auto &SCD : StoreChainDependents)
- clusterNeighboringMemOps(SCD, DAG);
+ for (auto &SCD : StoreChains)
+ clusterNeighboringMemOps(SCD.second, DAG);
}
//===----------------------------------------------------------------------===//
@@ -2085,7 +2083,8 @@ getOtherResourceCount(unsigned &OtherCritIdx) {
return OtherCritCount;
}
-void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle) {
+void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle, bool InPQueue,
+ unsigned Idx) {
assert(SU->getInstr() && "Scheduled SUnit must have instr");
#ifndef NDEBUG
@@ -2102,11 +2101,19 @@ void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle) {
// Check for interlocks first. For the purpose of other heuristics, an
// instruction that cannot issue appears as if it's not in the ReadyQueue.
bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
- if ((!IsBuffered && ReadyCycle > CurrCycle) || checkHazard(SU) ||
- Available.size() >= ReadyListLimit)
- Pending.push(SU);
- else
+ bool HazardDetected = (!IsBuffered && ReadyCycle > CurrCycle) ||
+ checkHazard(SU) || (Available.size() >= ReadyListLimit);
+
+ if (!HazardDetected) {
Available.push(SU);
+
+ if (InPQueue)
+ Pending.remove(Pending.begin() + Idx);
+ return;
+ }
+
+ if (!InPQueue)
+ Pending.push(SU);
}
/// Move the boundary of scheduled code by one cycle.
@@ -2346,26 +2353,21 @@ void SchedBoundary::releasePending() {
// Check to see if any of the pending instructions are ready to issue. If
// so, add them to the available queue.
- bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
- for (unsigned i = 0, e = Pending.size(); i != e; ++i) {
- SUnit *SU = *(Pending.begin()+i);
+ for (unsigned I = 0, E = Pending.size(); I < E; ++I) {
+ SUnit *SU = *(Pending.begin() + I);
unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
if (ReadyCycle < MinReadyCycle)
MinReadyCycle = ReadyCycle;
- if (!IsBuffered && ReadyCycle > CurrCycle)
- continue;
-
- if (checkHazard(SU))
- continue;
-
if (Available.size() >= ReadyListLimit)
break;
- Available.push(SU);
- Pending.remove(Pending.begin()+i);
- --i; --e;
+ releaseNode(SU, ReadyCycle, true, I);
+ if (E != Pending.size()) {
+ --I;
+ --E;
+ }
}
CheckPending = false;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp
index 27a2e7023f22..a4ba197b7a1d 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp
@@ -15,6 +15,8 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/PointerIntPair.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
@@ -38,6 +40,7 @@
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/InitializePasses.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Pass.h"
#include "llvm/Support/BranchProbability.h"
@@ -105,6 +108,25 @@ namespace {
using AllSuccsCache =
std::map<MachineBasicBlock *, SmallVector<MachineBasicBlock *, 4>>;
+ /// DBG_VALUE pointer and flag. The flag is true if this DBG_VALUE is
+ /// post-dominated by another DBG_VALUE of the same variable location.
+ /// This is necessary to detect sequences such as:
+ /// %0 = someinst
+ /// DBG_VALUE %0, !123, !DIExpression()
+ /// %1 = anotherinst
+ /// DBG_VALUE %1, !123, !DIExpression()
+ /// Where if %0 were to sink, the DBG_VAUE should not sink with it, as that
+ /// would re-order assignments.
+ using SeenDbgUser = PointerIntPair<MachineInstr *, 1>;
+
+ /// Record of DBG_VALUE uses of vregs in a block, so that we can identify
+ /// debug instructions to sink.
+ SmallDenseMap<unsigned, TinyPtrVector<SeenDbgUser>> SeenDbgUsers;
+
+ /// Record of debug variables that have had their locations set in the
+ /// current block.
+ DenseSet<DebugVariable> SeenDbgVars;
+
public:
static char ID; // Pass identification
@@ -132,6 +154,7 @@ namespace {
private:
bool ProcessBlock(MachineBasicBlock &MBB);
+ void ProcessDbgInst(MachineInstr &MI);
bool isWorthBreakingCriticalEdge(MachineInstr &MI,
MachineBasicBlock *From,
MachineBasicBlock *To);
@@ -153,8 +176,14 @@ namespace {
MachineBasicBlock *To,
bool BreakPHIEdge);
bool SinkInstruction(MachineInstr &MI, bool &SawStore,
-
AllSuccsCache &AllSuccessors);
+
+ /// If we sink a COPY inst, some debug users of it's destination may no
+ /// longer be dominated by the COPY, and will eventually be dropped.
+ /// This is easily rectified by forwarding the non-dominated debug uses
+ /// to the copy source.
+ void SalvageUnsunkDebugUsersOfCopy(MachineInstr &,
+ MachineBasicBlock *TargetBlock);
bool AllUsesDominatedByBlock(unsigned Reg, MachineBasicBlock *MBB,
MachineBasicBlock *DefMBB,
bool &BreakPHIEdge, bool &LocalUse) const;
@@ -367,8 +396,11 @@ bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) {
if (!ProcessedBegin)
--I;
- if (MI.isDebugInstr())
+ if (MI.isDebugInstr()) {
+ if (MI.isDebugValue())
+ ProcessDbgInst(MI);
continue;
+ }
bool Joined = PerformTrivialForwardCoalescing(MI, &MBB);
if (Joined) {
@@ -384,9 +416,29 @@ bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) {
// If we just processed the first instruction in the block, we're done.
} while (!ProcessedBegin);
+ SeenDbgUsers.clear();
+ SeenDbgVars.clear();
+
return MadeChange;
}
+void MachineSinking::ProcessDbgInst(MachineInstr &MI) {
+ // When we see DBG_VALUEs for registers, record any vreg it reads, so that
+ // we know what to sink if the vreg def sinks.
+ assert(MI.isDebugValue() && "Expected DBG_VALUE for processing");
+
+ DebugVariable Var(MI.getDebugVariable(), MI.getDebugExpression(),
+ MI.getDebugLoc()->getInlinedAt());
+ bool SeenBefore = SeenDbgVars.count(Var) != 0;
+
+ MachineOperand &MO = MI.getOperand(0);
+ if (MO.isReg() && MO.getReg().isVirtual())
+ SeenDbgUsers[MO.getReg()].push_back(SeenDbgUser(&MI, SeenBefore));
+
+ // Record the variable for any DBG_VALUE, to avoid re-ordering any of them.
+ SeenDbgVars.insert(Var);
+}
+
bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr &MI,
MachineBasicBlock *From,
MachineBasicBlock *To) {
@@ -731,18 +783,60 @@ static bool SinkingPreventsImplicitNullCheck(MachineInstr &MI,
MBP.LHS.getReg() == BaseOp->getReg();
}
-/// Sink an instruction and its associated debug instructions. If the debug
-/// instructions to be sunk are already known, they can be provided in DbgVals.
+/// If the sunk instruction is a copy, try to forward the copy instead of
+/// leaving an 'undef' DBG_VALUE in the original location. Don't do this if
+/// there's any subregister weirdness involved. Returns true if copy
+/// propagation occurred.
+static bool attemptDebugCopyProp(MachineInstr &SinkInst, MachineInstr &DbgMI) {
+ const MachineRegisterInfo &MRI = SinkInst.getMF()->getRegInfo();
+ const TargetInstrInfo &TII = *SinkInst.getMF()->getSubtarget().getInstrInfo();
+
+ // Copy DBG_VALUE operand and set the original to undef. We then check to
+ // see whether this is something that can be copy-forwarded. If it isn't,
+ // continue around the loop.
+ MachineOperand DbgMO = DbgMI.getOperand(0);
+
+ const MachineOperand *SrcMO = nullptr, *DstMO = nullptr;
+ auto CopyOperands = TII.isCopyInstr(SinkInst);
+ if (!CopyOperands)
+ return false;
+ SrcMO = CopyOperands->Source;
+ DstMO = CopyOperands->Destination;
+
+ // Check validity of forwarding this copy.
+ bool PostRA = MRI.getNumVirtRegs() == 0;
+
+ // Trying to forward between physical and virtual registers is too hard.
+ if (DbgMO.getReg().isVirtual() != SrcMO->getReg().isVirtual())
+ return false;
+
+ // Only try virtual register copy-forwarding before regalloc, and physical
+ // register copy-forwarding after regalloc.
+ bool arePhysRegs = !DbgMO.getReg().isVirtual();
+ if (arePhysRegs != PostRA)
+ return false;
+
+ // Pre-regalloc, only forward if all subregisters agree (or there are no
+ // subregs at all). More analysis might recover some forwardable copies.
+ if (!PostRA && (DbgMO.getSubReg() != SrcMO->getSubReg() ||
+ DbgMO.getSubReg() != DstMO->getSubReg()))
+ return false;
+
+ // Post-regalloc, we may be sinking a DBG_VALUE of a sub or super-register
+ // of this copy. Only forward the copy if the DBG_VALUE operand exactly
+ // matches the copy destination.
+ if (PostRA && DbgMO.getReg() != DstMO->getReg())
+ return false;
+
+ DbgMI.getOperand(0).setReg(SrcMO->getReg());
+ DbgMI.getOperand(0).setSubReg(SrcMO->getSubReg());
+ return true;
+}
+
+/// Sink an instruction and its associated debug instructions.
static void performSink(MachineInstr &MI, MachineBasicBlock &SuccToSinkTo,
MachineBasicBlock::iterator InsertPos,
- SmallVectorImpl<MachineInstr *> *DbgVals = nullptr) {
- // If debug values are provided use those, otherwise call collectDebugValues.
- SmallVector<MachineInstr *, 2> DbgValuesToSink;
- if (DbgVals)
- DbgValuesToSink.insert(DbgValuesToSink.begin(),
- DbgVals->begin(), DbgVals->end());
- else
- MI.collectDebugValues(DbgValuesToSink);
+ SmallVectorImpl<MachineInstr *> &DbgValuesToSink) {
// If we cannot find a location to use (merge with), then we erase the debug
// location to prevent debug-info driven tools from potentially reporting
@@ -758,13 +852,19 @@ static void performSink(MachineInstr &MI, MachineBasicBlock &SuccToSinkTo,
SuccToSinkTo.splice(InsertPos, ParentBlock, MI,
++MachineBasicBlock::iterator(MI));
- // Move previously adjacent debug value instructions to the insert position.
+ // Sink a copy of debug users to the insert position. Mark the original
+ // DBG_VALUE location as 'undef', indicating that any earlier variable
+ // location should be terminated as we've optimised away the value at this
+ // point.
for (SmallVectorImpl<MachineInstr *>::iterator DBI = DbgValuesToSink.begin(),
DBE = DbgValuesToSink.end();
DBI != DBE; ++DBI) {
MachineInstr *DbgMI = *DBI;
- SuccToSinkTo.splice(InsertPos, ParentBlock, DbgMI,
- ++MachineBasicBlock::iterator(DbgMI));
+ MachineInstr *NewDbgMI = DbgMI->getMF()->CloneMachineInstr(*DBI);
+ SuccToSinkTo.insert(InsertPos, NewDbgMI);
+
+ if (!attemptDebugCopyProp(MI, *DbgMI))
+ DbgMI->getOperand(0).setReg(0);
}
}
@@ -882,7 +982,36 @@ bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore,
while (InsertPos != SuccToSinkTo->end() && InsertPos->isPHI())
++InsertPos;
- performSink(MI, *SuccToSinkTo, InsertPos);
+ // Collect debug users of any vreg that this inst defines.
+ SmallVector<MachineInstr *, 4> DbgUsersToSink;
+ for (auto &MO : MI.operands()) {
+ if (!MO.isReg() || !MO.isDef() || !MO.getReg().isVirtual())
+ continue;
+ if (!SeenDbgUsers.count(MO.getReg()))
+ continue;
+
+ // Sink any users that don't pass any other DBG_VALUEs for this variable.
+ auto &Users = SeenDbgUsers[MO.getReg()];
+ for (auto &User : Users) {
+ MachineInstr *DbgMI = User.getPointer();
+ if (User.getInt()) {
+ // This DBG_VALUE would re-order assignments. If we can't copy-propagate
+ // it, it can't be recovered. Set it undef.
+ if (!attemptDebugCopyProp(MI, *DbgMI))
+ DbgMI->getOperand(0).setReg(0);
+ } else {
+ DbgUsersToSink.push_back(DbgMI);
+ }
+ }
+ }
+
+ // After sinking, some debug users may not be dominated any more. If possible,
+ // copy-propagate their operands. As it's expensive, don't do this if there's
+ // no debuginfo in the program.
+ if (MI.getMF()->getFunction().getSubprogram() && MI.isCopy())
+ SalvageUnsunkDebugUsersOfCopy(MI, SuccToSinkTo);
+
+ performSink(MI, *SuccToSinkTo, InsertPos, DbgUsersToSink);
// Conservatively, clear any kill flags, since it's possible that they are no
// longer correct.
@@ -897,6 +1026,41 @@ bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore,
return true;
}
+void MachineSinking::SalvageUnsunkDebugUsersOfCopy(
+ MachineInstr &MI, MachineBasicBlock *TargetBlock) {
+ assert(MI.isCopy());
+ assert(MI.getOperand(1).isReg());
+
+ // Enumerate all users of vreg operands that are def'd. Skip those that will
+ // be sunk. For the rest, if they are not dominated by the block we will sink
+ // MI into, propagate the copy source to them.
+ SmallVector<MachineInstr *, 4> DbgDefUsers;
+ const MachineRegisterInfo &MRI = MI.getMF()->getRegInfo();
+ for (auto &MO : MI.operands()) {
+ if (!MO.isReg() || !MO.isDef() || !MO.getReg().isVirtual())
+ continue;
+ for (auto &User : MRI.use_instructions(MO.getReg())) {
+ if (!User.isDebugValue() || DT->dominates(TargetBlock, User.getParent()))
+ continue;
+
+ // If is in same block, will either sink or be use-before-def.
+ if (User.getParent() == MI.getParent())
+ continue;
+
+ assert(User.getOperand(0).isReg() &&
+ "DBG_VALUE user of vreg, but non reg operand?");
+ DbgDefUsers.push_back(&User);
+ }
+ }
+
+ // Point the users of this copy that are no longer dominated, at the source
+ // of the copy.
+ for (auto *User : DbgDefUsers) {
+ User->getOperand(0).setReg(MI.getOperand(1).getReg());
+ User->getOperand(0).setSubReg(MI.getOperand(1).getSubReg());
+ }
+}
+
//===----------------------------------------------------------------------===//
// This pass is not intended to be a replacement or a complete alternative
// for the pre-ra machine sink pass. It is only designed to sink COPY
@@ -1051,10 +1215,14 @@ static void updateLiveIn(MachineInstr *MI, MachineBasicBlock *SuccBB,
for (MCSubRegIterator S(DefReg, TRI, true); S.isValid(); ++S)
SuccBB->removeLiveIn(*S);
for (auto U : UsedOpsInCopy) {
- Register Reg = MI->getOperand(U).getReg();
- if (!SuccBB->isLiveIn(Reg))
- SuccBB->addLiveIn(Reg);
+ Register SrcReg = MI->getOperand(U).getReg();
+ LaneBitmask Mask;
+ for (MCRegUnitMaskIterator S(SrcReg, TRI); S.isValid(); ++S) {
+ Mask |= (*S).second;
+ }
+ SuccBB->addLiveIn(SrcReg, Mask.any() ? Mask : LaneBitmask::getAll());
}
+ SuccBB->sortUniqueLiveIns();
}
static bool hasRegisterDependency(MachineInstr *MI,
@@ -1206,7 +1374,7 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB,
// block.
clearKillFlags(MI, CurBB, UsedOpsInCopy, UsedRegUnits, TRI);
MachineBasicBlock::iterator InsertPos = SuccBB->getFirstNonPHI();
- performSink(*MI, *SuccBB, InsertPos, &DbgValsToSink);
+ performSink(*MI, *SuccBB, InsertPos, DbgValsToSink);
updateLiveIn(MI, SuccBB, UsedOpsInCopy, DefedRegsInCopy);
Changed = true;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineSizeOpts.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineSizeOpts.cpp
new file mode 100644
index 000000000000..aff67f9cfd55
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineSizeOpts.cpp
@@ -0,0 +1,122 @@
+//===- MachineSizeOpts.cpp - code size optimization related code ----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains some shared machine IR code size optimization related
+// code.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineSizeOpts.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+
+using namespace llvm;
+
+extern cl::opt<bool> EnablePGSO;
+extern cl::opt<bool> PGSOLargeWorkingSetSizeOnly;
+extern cl::opt<bool> ForcePGSO;
+extern cl::opt<int> PgsoCutoffInstrProf;
+extern cl::opt<int> PgsoCutoffSampleProf;
+
+namespace machine_size_opts_detail {
+
+/// Like ProfileSummaryInfo::isColdBlock but for MachineBasicBlock.
+bool isColdBlock(const MachineBasicBlock *MBB,
+ ProfileSummaryInfo *PSI,
+ const MachineBlockFrequencyInfo *MBFI) {
+ auto Count = MBFI->getBlockProfileCount(MBB);
+ return Count && PSI->isColdCount(*Count);
+}
+
+/// Like ProfileSummaryInfo::isHotBlockNthPercentile but for MachineBasicBlock.
+static bool isHotBlockNthPercentile(int PercentileCutoff,
+ const MachineBasicBlock *MBB,
+ ProfileSummaryInfo *PSI,
+ const MachineBlockFrequencyInfo *MBFI) {
+ auto Count = MBFI->getBlockProfileCount(MBB);
+ return Count && PSI->isHotCountNthPercentile(PercentileCutoff, *Count);
+}
+
+/// Like ProfileSummaryInfo::isFunctionColdInCallGraph but for
+/// MachineFunction.
+bool isFunctionColdInCallGraph(
+ const MachineFunction *MF,
+ ProfileSummaryInfo *PSI,
+ const MachineBlockFrequencyInfo &MBFI) {
+ if (auto FunctionCount = MF->getFunction().getEntryCount())
+ if (!PSI->isColdCount(FunctionCount.getCount()))
+ return false;
+ for (const auto &MBB : *MF)
+ if (!isColdBlock(&MBB, PSI, &MBFI))
+ return false;
+ return true;
+}
+
+/// Like ProfileSummaryInfo::isFunctionHotInCallGraphNthPercentile but for
+/// MachineFunction.
+bool isFunctionHotInCallGraphNthPercentile(
+ int PercentileCutoff,
+ const MachineFunction *MF,
+ ProfileSummaryInfo *PSI,
+ const MachineBlockFrequencyInfo &MBFI) {
+ if (auto FunctionCount = MF->getFunction().getEntryCount())
+ if (PSI->isHotCountNthPercentile(PercentileCutoff,
+ FunctionCount.getCount()))
+ return true;
+ for (const auto &MBB : *MF)
+ if (isHotBlockNthPercentile(PercentileCutoff, &MBB, PSI, &MBFI))
+ return true;
+ return false;
+}
+} // namespace machine_size_opts_detail
+
+namespace {
+struct MachineBasicBlockBFIAdapter {
+ static bool isFunctionColdInCallGraph(const MachineFunction *MF,
+ ProfileSummaryInfo *PSI,
+ const MachineBlockFrequencyInfo &MBFI) {
+ return machine_size_opts_detail::isFunctionColdInCallGraph(MF, PSI, MBFI);
+ }
+ static bool isFunctionHotInCallGraphNthPercentile(
+ int CutOff,
+ const MachineFunction *MF,
+ ProfileSummaryInfo *PSI,
+ const MachineBlockFrequencyInfo &MBFI) {
+ return machine_size_opts_detail::isFunctionHotInCallGraphNthPercentile(
+ CutOff, MF, PSI, MBFI);
+ }
+ static bool isColdBlock(const MachineBasicBlock *MBB,
+ ProfileSummaryInfo *PSI,
+ const MachineBlockFrequencyInfo *MBFI) {
+ return machine_size_opts_detail::isColdBlock(MBB, PSI, MBFI);
+ }
+ static bool isHotBlockNthPercentile(int CutOff,
+ const MachineBasicBlock *MBB,
+ ProfileSummaryInfo *PSI,
+ const MachineBlockFrequencyInfo *MBFI) {
+ return machine_size_opts_detail::isHotBlockNthPercentile(
+ CutOff, MBB, PSI, MBFI);
+ }
+};
+} // end anonymous namespace
+
+bool llvm::shouldOptimizeForSize(const MachineFunction *MF,
+ ProfileSummaryInfo *PSI,
+ const MachineBlockFrequencyInfo *MBFI,
+ PGSOQueryType QueryType) {
+ return shouldFuncOptimizeForSizeImpl<MachineBasicBlockBFIAdapter>(
+ MF, PSI, MBFI, QueryType);
+}
+
+bool llvm::shouldOptimizeForSize(const MachineBasicBlock *MBB,
+ ProfileSummaryInfo *PSI,
+ const MachineBlockFrequencyInfo *MBFI,
+ PGSOQueryType QueryType) {
+ return shouldOptimizeForSizeImpl<MachineBasicBlockBFIAdapter>(
+ MBB, PSI, MBFI, QueryType);
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineTraceMetrics.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineTraceMetrics.cpp
index 66a3bc2f8cc4..e6b51b7e1e56 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineTraceMetrics.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineTraceMetrics.cpp
@@ -24,6 +24,7 @@
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSchedule.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineVerifier.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineVerifier.cpp
index 969743edca52..6c0402df8489 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineVerifier.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineVerifier.cpp
@@ -59,6 +59,7 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/InitializePasses.h"
#include "llvm/MC/LaneBitmask.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCInstrDesc.h"
@@ -123,8 +124,8 @@ namespace {
void addRegWithSubRegs(RegVector &RV, unsigned Reg) {
RV.push_back(Reg);
if (Register::isPhysicalRegister(Reg))
- for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
- RV.push_back(*SubRegs);
+ for (const MCPhysReg &SubReg : TRI->subregs(Reg))
+ RV.push_back(SubReg);
}
struct BBInfo {
@@ -801,18 +802,16 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
report("MBB live-in list contains non-physical register", MBB);
continue;
}
- for (MCSubRegIterator SubRegs(LI.PhysReg, TRI, /*IncludeSelf=*/true);
- SubRegs.isValid(); ++SubRegs)
- regsLive.insert(*SubRegs);
+ for (const MCPhysReg &SubReg : TRI->subregs_inclusive(LI.PhysReg))
+ regsLive.insert(SubReg);
}
}
const MachineFrameInfo &MFI = MF->getFrameInfo();
BitVector PR = MFI.getPristineRegs(*MF);
for (unsigned I : PR.set_bits()) {
- for (MCSubRegIterator SubRegs(I, TRI, /*IncludeSelf=*/true);
- SubRegs.isValid(); ++SubRegs)
- regsLive.insert(*SubRegs);
+ for (const MCPhysReg &SubReg : TRI->subregs_inclusive(I))
+ regsLive.insert(SubReg);
}
regsKilled.clear();
@@ -1100,7 +1099,7 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
break;
}
- case TargetOpcode::G_GEP: {
+ case TargetOpcode::G_PTR_ADD: {
LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
LLT PtrTy = MRI->getType(MI->getOperand(1).getReg());
LLT OffsetTy = MRI->getType(MI->getOperand(2).getReg());
@@ -1408,18 +1407,6 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
break;
}
- const Constant *Mask = MaskOp.getShuffleMask();
- auto *MaskVT = dyn_cast<VectorType>(Mask->getType());
- if (!MaskVT || !MaskVT->getElementType()->isIntegerTy(32)) {
- report("Invalid shufflemask constant type", MI);
- break;
- }
-
- if (!Mask->getAggregateElement(0u)) {
- report("Invalid shufflemask constant type", MI);
- break;
- }
-
LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
LLT Src0Ty = MRI->getType(MI->getOperand(1).getReg());
LLT Src1Ty = MRI->getType(MI->getOperand(2).getReg());
@@ -1435,8 +1422,7 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
int SrcNumElts = Src0Ty.isVector() ? Src0Ty.getNumElements() : 1;
int DstNumElts = DstTy.isVector() ? DstTy.getNumElements() : 1;
- SmallVector<int, 32> MaskIdxes;
- ShuffleVectorInst::getShuffleMask(Mask, MaskIdxes);
+ ArrayRef<int> MaskIdxes = MaskOp.getShuffleMask();
if (static_cast<int>(MaskIdxes.size()) != DstNumElts)
report("Wrong result type for shufflemask", MI);
@@ -1609,13 +1595,23 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
} else if (MONum < MCID.getNumOperands()) {
const MCOperandInfo &MCOI = MCID.OpInfo[MONum];
// Don't check if it's the last operand in a variadic instruction. See,
- // e.g., LDM_RET in the arm back end.
- if (MO->isReg() &&
- !(MI->isVariadic() && MONum == MCID.getNumOperands()-1)) {
- if (MO->isDef() && !MCOI.isOptionalDef())
- report("Explicit operand marked as def", MO, MONum);
- if (MO->isImplicit())
- report("Explicit operand marked as implicit", MO, MONum);
+ // e.g., LDM_RET in the arm back end. Check non-variadic operands only.
+ bool IsOptional = MI->isVariadic() && MONum == MCID.getNumOperands() - 1;
+ if (!IsOptional) {
+ if (MO->isReg()) {
+ if (MO->isDef() && !MCOI.isOptionalDef())
+ report("Explicit operand marked as def", MO, MONum);
+ if (MO->isImplicit())
+ report("Explicit operand marked as implicit", MO, MONum);
+ }
+
+ // Check that an instruction has register operands only as expected.
+ if (MCOI.OperandType == MCOI::OPERAND_REGISTER &&
+ !MO->isReg() && !MO->isFI())
+ report("Expected a register operand.", MO, MONum);
+ if ((MCOI.OperandType == MCOI::OPERAND_IMMEDIATE ||
+ MCOI.OperandType == MCOI::OPERAND_PCREL) && MO->isReg())
+ report("Expected a non-register operand.", MO, MONum);
}
int TiedTo = MCID.getOperandConstraint(MONum, MCOI::TIED_TO);
@@ -2005,9 +2001,9 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
bool Bad = !isReserved(Reg);
// We are fine if just any subregister has a defined value.
if (Bad) {
- for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid();
- ++SubRegs) {
- if (regsLive.count(*SubRegs)) {
+
+ for (const MCPhysReg &SubReg : TRI->subregs(Reg)) {
+ if (regsLive.count(SubReg)) {
Bad = false;
break;
}
@@ -2025,9 +2021,8 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
if (!Register::isPhysicalRegister(MOP.getReg()))
continue;
- for (MCSubRegIterator SubRegs(MOP.getReg(), TRI); SubRegs.isValid();
- ++SubRegs) {
- if (*SubRegs == Reg) {
+ for (const MCPhysReg &SubReg : TRI->subregs(MOP.getReg())) {
+ if (SubReg == Reg) {
Bad = false;
break;
}
@@ -2304,6 +2299,32 @@ void MachineVerifier::visitMachineFunctionAfter() {
if (LiveInts)
verifyLiveIntervals();
+ // Check live-in list of each MBB. If a register is live into MBB, check
+ // that the register is in regsLiveOut of each predecessor block. Since
+ // this must come from a definition in the predecesssor or its live-in
+ // list, this will catch a live-through case where the predecessor does not
+ // have the register in its live-in list. This currently only checks
+ // registers that have no aliases, are not allocatable and are not
+ // reserved, which could mean a condition code register for instance.
+ if (MRI->tracksLiveness())
+ for (const auto &MBB : *MF)
+ for (MachineBasicBlock::RegisterMaskPair P : MBB.liveins()) {
+ MCPhysReg LiveInReg = P.PhysReg;
+ bool hasAliases = MCRegAliasIterator(LiveInReg, TRI, false).isValid();
+ if (hasAliases || isAllocatable(LiveInReg) || isReserved(LiveInReg))
+ continue;
+ for (const MachineBasicBlock *Pred : MBB.predecessors()) {
+ BBInfo &PInfo = MBBInfoMap[Pred];
+ if (!PInfo.regsLiveOut.count(LiveInReg)) {
+ report("Live in register not found to be live out from predecessor.",
+ &MBB);
+ errs() << TRI->getName(LiveInReg)
+ << " not found to be live out from "
+ << printMBBReference(*Pred) << "\n";
+ }
+ }
+ }
+
for (auto CSInfo : MF->getCallSitesInfo())
if (!CSInfo.first->isCall())
report("Call site info referencing instruction that is not call", MF);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MacroFusion.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MacroFusion.cpp
index d21eae222af0..d2ee21c8720f 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MacroFusion.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MacroFusion.cpp
@@ -36,6 +36,21 @@ static bool isHazard(const SDep &Dep) {
return Dep.getKind() == SDep::Anti || Dep.getKind() == SDep::Output;
}
+static SUnit *getPredClusterSU(const SUnit &SU) {
+ for (const SDep &SI : SU.Preds)
+ if (SI.isCluster())
+ return SI.getSUnit();
+
+ return nullptr;
+}
+
+static bool hasLessThanNumFused(const SUnit &SU, unsigned FuseLimit) {
+ unsigned Num = 1;
+ const SUnit *CurrentSU = &SU;
+ while ((CurrentSU = getPredClusterSU(*CurrentSU)) && Num < FuseLimit) Num ++;
+ return Num < FuseLimit;
+}
+
static bool fuseInstructionPair(ScheduleDAGInstrs &DAG, SUnit &FirstSU,
SUnit &SecondSU) {
// Check that neither instr is already paired with another along the edge
@@ -56,6 +71,14 @@ static bool fuseInstructionPair(ScheduleDAGInstrs &DAG, SUnit &FirstSU,
if (!DAG.addEdge(&SecondSU, SDep(&FirstSU, SDep::Cluster)))
return false;
+ // TODO - If we want to chain more than two instructions, we need to create
+ // artifical edges to make dependencies from the FirstSU also dependent
+ // on other chained instructions, and other chained instructions also
+ // dependent on the dependencies of the SecondSU, to prevent them from being
+ // scheduled into these chained instructions.
+ assert(hasLessThanNumFused(FirstSU, 2) &&
+ "Currently we only support chaining together two instructions");
+
// Adjust the latency between both instrs.
for (SDep &SI : FirstSU.Succs)
if (SI.getSUnit() == &SecondSU)
@@ -161,8 +184,10 @@ bool MacroFusion::scheduleAdjacentImpl(ScheduleDAGInstrs &DAG, SUnit &AnchorSU)
if (DepSU.isBoundaryNode())
continue;
+ // Only chain two instructions together at most.
const MachineInstr *DepMI = DepSU.getInstr();
- if (!shouldScheduleAdjacent(TII, ST, DepMI, AnchorMI))
+ if (!hasLessThanNumFused(DepSU, 2) ||
+ !shouldScheduleAdjacent(TII, ST, DepMI, AnchorMI))
continue;
if (fuseInstructionPair(DAG, DepSU, AnchorSU))
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp
index 7ce3c5861801..163e52d9199d 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp
@@ -13,6 +13,7 @@
#include "llvm/CodeGen/MachineLoopUtils.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/InitializePasses.h"
#include "llvm/MC/MCContext.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -1189,7 +1190,7 @@ void ModuloScheduleExpander::rewriteScheduledInstr(
bool ModuloScheduleExpander::isLoopCarried(MachineInstr &Phi) {
if (!Phi.isPHI())
return false;
- unsigned DefCycle = Schedule.getCycle(&Phi);
+ int DefCycle = Schedule.getCycle(&Phi);
int DefStage = Schedule.getStage(&Phi);
unsigned InitVal = 0;
@@ -1198,7 +1199,7 @@ bool ModuloScheduleExpander::isLoopCarried(MachineInstr &Phi) {
MachineInstr *Use = MRI.getVRegDef(LoopVal);
if (!Use || Use->isPHI())
return true;
- unsigned LoopCycle = Schedule.getCycle(Use);
+ int LoopCycle = Schedule.getCycle(Use);
int LoopStage = Schedule.getStage(Use);
return (LoopCycle > DefCycle) || (LoopStage <= DefStage);
}
@@ -1214,7 +1215,7 @@ namespace {
// Remove any dead phis in MBB. Dead phis either have only one block as input
// (in which case they are the identity) or have no uses.
void EliminateDeadPhis(MachineBasicBlock *MBB, MachineRegisterInfo &MRI,
- LiveIntervals *LIS) {
+ LiveIntervals *LIS, bool KeepSingleSrcPhi = false) {
bool Changed = true;
while (Changed) {
Changed = false;
@@ -1226,7 +1227,7 @@ void EliminateDeadPhis(MachineBasicBlock *MBB, MachineRegisterInfo &MRI,
LIS->RemoveMachineInstrFromMaps(MI);
MI.eraseFromParent();
Changed = true;
- } else if (MI.getNumExplicitOperands() == 3) {
+ } else if (!KeepSingleSrcPhi && MI.getNumExplicitOperands() == 3) {
MRI.constrainRegClass(MI.getOperand(1).getReg(),
MRI.getRegClass(MI.getOperand(0).getReg()));
MRI.replaceRegWith(MI.getOperand(0).getReg(),
@@ -1582,6 +1583,133 @@ PeelingModuloScheduleExpander::peelKernel(LoopPeelDirection LPD) {
return NewBB;
}
+void PeelingModuloScheduleExpander::filterInstructions(MachineBasicBlock *MB,
+ int MinStage) {
+ for (auto I = MB->getFirstInstrTerminator()->getReverseIterator();
+ I != std::next(MB->getFirstNonPHI()->getReverseIterator());) {
+ MachineInstr *MI = &*I++;
+ int Stage = getStage(MI);
+ if (Stage == -1 || Stage >= MinStage)
+ continue;
+
+ for (MachineOperand &DefMO : MI->defs()) {
+ SmallVector<std::pair<MachineInstr *, Register>, 4> Subs;
+ for (MachineInstr &UseMI : MRI.use_instructions(DefMO.getReg())) {
+ // Only PHIs can use values from this block by construction.
+ // Match with the equivalent PHI in B.
+ assert(UseMI.isPHI());
+ Register Reg = getEquivalentRegisterIn(UseMI.getOperand(0).getReg(),
+ MI->getParent());
+ Subs.emplace_back(&UseMI, Reg);
+ }
+ for (auto &Sub : Subs)
+ Sub.first->substituteRegister(DefMO.getReg(), Sub.second, /*SubIdx=*/0,
+ *MRI.getTargetRegisterInfo());
+ }
+ if (LIS)
+ LIS->RemoveMachineInstrFromMaps(*MI);
+ MI->eraseFromParent();
+ }
+}
+
+void PeelingModuloScheduleExpander::moveStageBetweenBlocks(
+ MachineBasicBlock *DestBB, MachineBasicBlock *SourceBB, unsigned Stage) {
+ auto InsertPt = DestBB->getFirstNonPHI();
+ DenseMap<Register, Register> Remaps;
+ for (auto I = SourceBB->getFirstNonPHI(); I != SourceBB->end();) {
+ MachineInstr *MI = &*I++;
+ if (MI->isPHI()) {
+ // This is an illegal PHI. If we move any instructions using an illegal
+ // PHI, we need to create a legal Phi
+ Register PhiR = MI->getOperand(0).getReg();
+ auto RC = MRI.getRegClass(PhiR);
+ Register NR = MRI.createVirtualRegister(RC);
+ MachineInstr *NI = BuildMI(*DestBB, DestBB->getFirstNonPHI(), DebugLoc(),
+ TII->get(TargetOpcode::PHI), NR)
+ .addReg(PhiR)
+ .addMBB(SourceBB);
+ BlockMIs[{DestBB, CanonicalMIs[MI]}] = NI;
+ CanonicalMIs[NI] = CanonicalMIs[MI];
+ Remaps[PhiR] = NR;
+ continue;
+ }
+ if (getStage(MI) != Stage)
+ continue;
+ MI->removeFromParent();
+ DestBB->insert(InsertPt, MI);
+ auto *KernelMI = CanonicalMIs[MI];
+ BlockMIs[{DestBB, KernelMI}] = MI;
+ BlockMIs.erase({SourceBB, KernelMI});
+ }
+ SmallVector<MachineInstr *, 4> PhiToDelete;
+ for (MachineInstr &MI : DestBB->phis()) {
+ assert(MI.getNumOperands() == 3);
+ MachineInstr *Def = MRI.getVRegDef(MI.getOperand(1).getReg());
+ // If the instruction referenced by the phi is moved inside the block
+ // we don't need the phi anymore.
+ if (getStage(Def) == Stage) {
+ Register PhiReg = MI.getOperand(0).getReg();
+ MRI.replaceRegWith(MI.getOperand(0).getReg(),
+ Def->getOperand(0).getReg());
+ MI.getOperand(0).setReg(PhiReg);
+ PhiToDelete.push_back(&MI);
+ }
+ }
+ for (auto *P : PhiToDelete)
+ P->eraseFromParent();
+ InsertPt = DestBB->getFirstNonPHI();
+ // Helper to clone Phi instructions into the destination block. We clone Phi
+ // greedily to avoid combinatorial explosion of Phi instructions.
+ auto clonePhi = [&](MachineInstr *Phi) {
+ MachineInstr *NewMI = MF.CloneMachineInstr(Phi);
+ DestBB->insert(InsertPt, NewMI);
+ Register OrigR = Phi->getOperand(0).getReg();
+ Register R = MRI.createVirtualRegister(MRI.getRegClass(OrigR));
+ NewMI->getOperand(0).setReg(R);
+ NewMI->getOperand(1).setReg(OrigR);
+ NewMI->getOperand(2).setMBB(*DestBB->pred_begin());
+ Remaps[OrigR] = R;
+ CanonicalMIs[NewMI] = CanonicalMIs[Phi];
+ BlockMIs[{DestBB, CanonicalMIs[Phi]}] = NewMI;
+ PhiNodeLoopIteration[NewMI] = PhiNodeLoopIteration[Phi];
+ return R;
+ };
+ for (auto I = DestBB->getFirstNonPHI(); I != DestBB->end(); ++I) {
+ for (MachineOperand &MO : I->uses()) {
+ if (!MO.isReg())
+ continue;
+ if (Remaps.count(MO.getReg()))
+ MO.setReg(Remaps[MO.getReg()]);
+ else {
+ // If we are using a phi from the source block we need to add a new phi
+ // pointing to the old one.
+ MachineInstr *Use = MRI.getUniqueVRegDef(MO.getReg());
+ if (Use && Use->isPHI() && Use->getParent() == SourceBB) {
+ Register R = clonePhi(Use);
+ MO.setReg(R);
+ }
+ }
+ }
+ }
+}
+
+Register
+PeelingModuloScheduleExpander::getPhiCanonicalReg(MachineInstr *CanonicalPhi,
+ MachineInstr *Phi) {
+ unsigned distance = PhiNodeLoopIteration[Phi];
+ MachineInstr *CanonicalUse = CanonicalPhi;
+ for (unsigned I = 0; I < distance; ++I) {
+ assert(CanonicalUse->isPHI());
+ assert(CanonicalUse->getNumOperands() == 5);
+ unsigned LoopRegIdx = 3, InitRegIdx = 1;
+ if (CanonicalUse->getOperand(2).getMBB() == CanonicalUse->getParent())
+ std::swap(LoopRegIdx, InitRegIdx);
+ CanonicalUse =
+ MRI.getVRegDef(CanonicalUse->getOperand(LoopRegIdx).getReg());
+ }
+ return CanonicalUse->getOperand(0).getReg();
+}
+
void PeelingModuloScheduleExpander::peelPrologAndEpilogs() {
BitVector LS(Schedule.getNumStages(), true);
BitVector AS(Schedule.getNumStages(), true);
@@ -1604,26 +1732,45 @@ void PeelingModuloScheduleExpander::peelPrologAndEpilogs() {
// property that any value deffed in BB but used outside of BB is used by a
// PHI in the exiting block.
MachineBasicBlock *ExitingBB = CreateLCSSAExitingBlock();
-
+ EliminateDeadPhis(ExitingBB, MRI, LIS, /*KeepSingleSrcPhi=*/true);
// Push out the epilogs, again in reverse order.
// We can't assume anything about the minumum loop trip count at this point,
- // so emit a fairly complex epilog:
- // K[0, 1, 2] // Kernel runs stages 0, 1, 2
- // E0[2] <- P1 // Epilog runs stage 2 only, so the state after is [0].
- // E1[1, 2] <- P0 // Epilog 1 moves the last item from stage 0 to stage 2.
- //
- // This creates a single-successor single-predecessor sequence of blocks for
- // each epilog, which are kept this way for simplicity at this stage and
- // cleaned up by the optimizer later.
+ // so emit a fairly complex epilog.
+
+ // We first peel number of stages minus one epilogue. Then we remove dead
+ // stages and reorder instructions based on their stage. If we have 3 stages
+ // we generate first:
+ // E0[3, 2, 1]
+ // E1[3', 2']
+ // E2[3'']
+ // And then we move instructions based on their stages to have:
+ // E0[3]
+ // E1[2, 3']
+ // E2[1, 2', 3'']
+ // The transformation is legal because we only move instructions past
+ // instructions of a previous loop iteration.
for (int I = 1; I <= Schedule.getNumStages() - 1; ++I) {
- Epilogs.push_back(nullptr);
- for (int J = Schedule.getNumStages() - 1; J >= I; --J) {
- LS.reset();
- LS[J] = 1;
- Epilogs.back() = peelKernel(LPD_Back);
- LiveStages[Epilogs.back()] = LS;
- AvailableStages[Epilogs.back()] = AS;
+ Epilogs.push_back(peelKernel(LPD_Back));
+ MachineBasicBlock *B = Epilogs.back();
+ filterInstructions(B, Schedule.getNumStages() - I);
+ // Keep track at which iteration each phi belongs to. We need it to know
+ // what version of the variable to use during prologue/epilogue stitching.
+ EliminateDeadPhis(B, MRI, LIS, /*KeepSingleSrcPhi=*/true);
+ for (auto Phi = B->begin(), IE = B->getFirstNonPHI(); Phi != IE; ++Phi)
+ PhiNodeLoopIteration[&*Phi] = Schedule.getNumStages() - I;
+ }
+ for (size_t I = 0; I < Epilogs.size(); I++) {
+ LS.reset();
+ for (size_t J = I; J < Epilogs.size(); J++) {
+ int Iteration = J;
+ unsigned Stage = Schedule.getNumStages() - 1 + I - J;
+ // Move stage one block at a time so that Phi nodes are updated correctly.
+ for (size_t K = Iteration; K > I; K--)
+ moveStageBetweenBlocks(Epilogs[K - 1], Epilogs[K], Stage);
+ LS[Stage] = 1;
}
+ LiveStages[Epilogs[I]] = LS;
+ AvailableStages[Epilogs[I]] = AS;
}
// Now we've defined all the prolog and epilog blocks as a fallthrough
@@ -1638,8 +1785,16 @@ void PeelingModuloScheduleExpander::peelPrologAndEpilogs() {
for (MachineInstr &MI : (*EI)->phis()) {
Register Reg = MI.getOperand(1).getReg();
MachineInstr *Use = MRI.getUniqueVRegDef(Reg);
- if (Use && Use->getParent() == Pred)
+ if (Use && Use->getParent() == Pred) {
+ MachineInstr *CanonicalUse = CanonicalMIs[Use];
+ if (CanonicalUse->isPHI()) {
+ // If the use comes from a phi we need to skip as many phi as the
+ // distance between the epilogue and the kernel. Trace through the phi
+ // chain to find the right value.
+ Reg = getPhiCanonicalReg(CanonicalUse, Use);
+ }
Reg = getEquivalentRegisterIn(Reg, *PI);
+ }
MI.addOperand(MachineOperand::CreateReg(Reg, /*isDef=*/false));
MI.addOperand(MachineOperand::CreateMBB(*PI));
}
@@ -1659,6 +1814,13 @@ void PeelingModuloScheduleExpander::peelPrologAndEpilogs() {
rewriteUsesOf(MI);
}
}
+ for (auto *MI : IllegalPhisToDelete) {
+ if (LIS)
+ LIS->RemoveMachineInstrFromMaps(*MI);
+ MI->eraseFromParent();
+ }
+ IllegalPhisToDelete.clear();
+
// Now all remapping has been done, we're free to optimize the generated code.
for (MachineBasicBlock *B : reverse(Blocks))
EliminateDeadPhis(B, MRI, LIS);
@@ -1727,9 +1889,10 @@ void PeelingModuloScheduleExpander::rewriteUsesOf(MachineInstr *MI) {
R = MI->getOperand(1).getReg();
MRI.setRegClass(R, MRI.getRegClass(PhiR));
MRI.replaceRegWith(PhiR, R);
- if (LIS)
- LIS->RemoveMachineInstrFromMaps(*MI);
- MI->eraseFromParent();
+ // Postpone deleting the Phi as it may be referenced by BlockMIs and used
+ // later to figure out how to remap registers.
+ MI->getOperand(0).setReg(PhiR);
+ IllegalPhisToDelete.push_back(MI);
return;
}
@@ -1759,10 +1922,6 @@ void PeelingModuloScheduleExpander::rewriteUsesOf(MachineInstr *MI) {
}
void PeelingModuloScheduleExpander::fixupBranches() {
- std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo> Info =
- TII->analyzeLoopForPipelining(BB);
- assert(Info);
-
// Work outwards from the kernel.
bool KernelDisposed = false;
int TC = Schedule.getNumStages() - 1;
@@ -1818,6 +1977,8 @@ void PeelingModuloScheduleExpander::expand() {
BB = Schedule.getLoop()->getTopBlock();
Preheader = Schedule.getLoop()->getLoopPreheader();
LLVM_DEBUG(Schedule.dump());
+ Info = TII->analyzeLoopForPipelining(BB);
+ assert(Info);
rewriteKernel();
peelPrologAndEpilogs();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/NonRelocatableStringpool.cpp b/contrib/llvm-project/llvm/lib/CodeGen/NonRelocatableStringpool.cpp
new file mode 100644
index 000000000000..9ed3471c0fc9
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/NonRelocatableStringpool.cpp
@@ -0,0 +1,54 @@
+//===-- NonRelocatableStringpool.cpp --------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/NonRelocatableStringpool.h"
+
+namespace llvm {
+
+DwarfStringPoolEntryRef NonRelocatableStringpool::getEntry(StringRef S) {
+ if (S.empty() && !Strings.empty())
+ return EmptyString;
+
+ if (Translator)
+ S = Translator(S);
+ auto I = Strings.insert({S, DwarfStringPoolEntry()});
+ auto &Entry = I.first->second;
+ if (I.second || !Entry.isIndexed()) {
+ Entry.Index = NumEntries++;
+ Entry.Offset = CurrentEndOffset;
+ Entry.Symbol = nullptr;
+ CurrentEndOffset += S.size() + 1;
+ }
+ return DwarfStringPoolEntryRef(*I.first, true);
+}
+
+StringRef NonRelocatableStringpool::internString(StringRef S) {
+ DwarfStringPoolEntry Entry{nullptr, 0, DwarfStringPoolEntry::NotIndexed};
+
+ if (Translator)
+ S = Translator(S);
+
+ auto InsertResult = Strings.insert({S, Entry});
+ return InsertResult.first->getKey();
+}
+
+std::vector<DwarfStringPoolEntryRef>
+NonRelocatableStringpool::getEntriesForEmission() const {
+ std::vector<DwarfStringPoolEntryRef> Result;
+ Result.reserve(Strings.size());
+ for (const auto &E : Strings)
+ if (E.getValue().isIndexed())
+ Result.emplace_back(E, true);
+ llvm::sort(Result, [](const DwarfStringPoolEntryRef A,
+ const DwarfStringPoolEntryRef B) {
+ return A.getIndex() < B.getIndex();
+ });
+ return Result;
+}
+
+} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/OptimizePHIs.cpp b/contrib/llvm-project/llvm/lib/CodeGen/OptimizePHIs.cpp
index 1a493964e678..02a70ab801e9 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/OptimizePHIs.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/OptimizePHIs.cpp
@@ -21,6 +21,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include <cassert>
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ParallelCG.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ParallelCG.cpp
index e4c73658cb4f..7dbd830666fb 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ParallelCG.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ParallelCG.cpp
@@ -26,7 +26,7 @@ using namespace llvm;
static void codegen(Module *M, llvm::raw_pwrite_stream &OS,
function_ref<std::unique_ptr<TargetMachine>()> TMFactory,
- TargetMachine::CodeGenFileType FileType) {
+ CodeGenFileType FileType) {
std::unique_ptr<TargetMachine> TM = TMFactory();
legacy::PassManager CodeGenPasses;
if (TM->addPassesToEmitFile(CodeGenPasses, OS, nullptr, FileType))
@@ -38,7 +38,7 @@ std::unique_ptr<Module> llvm::splitCodeGen(
std::unique_ptr<Module> M, ArrayRef<llvm::raw_pwrite_stream *> OSs,
ArrayRef<llvm::raw_pwrite_stream *> BCOSs,
const std::function<std::unique_ptr<TargetMachine>()> &TMFactory,
- TargetMachine::CodeGenFileType FileType, bool PreserveLocals) {
+ CodeGenFileType FileType, bool PreserveLocals) {
assert(BCOSs.empty() || BCOSs.size() == OSs.size());
if (OSs.size() == 1) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PatchableFunction.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PatchableFunction.cpp
index 529fde84e39a..1d6069c50554 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/PatchableFunction.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/PatchableFunction.cpp
@@ -18,6 +18,7 @@
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
using namespace llvm;
@@ -54,6 +55,15 @@ static bool doesNotGeneratecode(const MachineInstr &MI) {
}
bool PatchableFunction::runOnMachineFunction(MachineFunction &MF) {
+ if (MF.getFunction().hasFnAttribute("patchable-function-entry")) {
+ MachineBasicBlock &FirstMBB = *MF.begin();
+ MachineInstr &FirstMI = *FirstMBB.begin();
+ const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+ BuildMI(FirstMBB, FirstMI, FirstMI.getDebugLoc(),
+ TII->get(TargetOpcode::PATCHABLE_FUNCTION_ENTER));
+ return true;
+ }
+
if (!MF.getFunction().hasFnAttribute("patchable-function"))
return false;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PeepholeOptimizer.cpp
index 54f1d38ed106..c9c279cf0ddf 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/PeepholeOptimizer.cpp
@@ -84,6 +84,7 @@
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
#include "llvm/MC/LaneBitmask.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/Pass.h"
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PostRAHazardRecognizer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PostRAHazardRecognizer.cpp
index 0a3838617bc5..4f88f4d3dd6a 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/PostRAHazardRecognizer.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/PostRAHazardRecognizer.cpp
@@ -32,6 +32,7 @@
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PostRASchedulerList.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PostRASchedulerList.cpp
index 5bea9f2893c9..d68959935cec 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/PostRASchedulerList.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/PostRASchedulerList.cpp
@@ -38,6 +38,7 @@
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Config/llvm-config.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -77,7 +78,7 @@ AntiDepBreaker::~AntiDepBreaker() { }
namespace {
class PostRAScheduler : public MachineFunctionPass {
- const TargetInstrInfo *TII;
+ const TargetInstrInfo *TII = nullptr;
RegisterClassInfo RegClassInfo;
public:
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
index 0d2f6f99ca96..1ff4e7cbd8fb 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
@@ -12,14 +12,16 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/PreISelIntrinsicLowering.h"
+#include "llvm/Analysis/ObjCARCInstKind.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
@@ -56,6 +58,17 @@ static bool lowerLoadRelative(Function &F) {
return Changed;
}
+// ObjCARC has knowledge about whether an obj-c runtime function needs to be
+// always tail-called or never tail-called.
+static CallInst::TailCallKind getOverridingTailCallKind(const Function &F) {
+ objcarc::ARCInstKind Kind = objcarc::GetFunctionClass(&F);
+ if (objcarc::IsAlwaysTail(Kind))
+ return CallInst::TCK_Tail;
+ else if (objcarc::IsNeverTail(Kind))
+ return CallInst::TCK_NoTail;
+ return CallInst::TCK_None;
+}
+
static bool lowerObjCCall(Function &F, const char *NewFn,
bool setNonLazyBind = false) {
if (F.use_empty())
@@ -75,6 +88,8 @@ static bool lowerObjCCall(Function &F, const char *NewFn,
}
}
+ CallInst::TailCallKind OverridingTCK = getOverridingTailCallKind(F);
+
for (auto I = F.use_begin(), E = F.use_end(); I != E;) {
auto *CI = cast<CallInst>(I->getUser());
assert(CI->getCalledFunction() && "Cannot lower an indirect call!");
@@ -84,7 +99,17 @@ static bool lowerObjCCall(Function &F, const char *NewFn,
SmallVector<Value *, 8> Args(CI->arg_begin(), CI->arg_end());
CallInst *NewCI = Builder.CreateCall(FCache, Args);
NewCI->setName(CI->getName());
- NewCI->setTailCallKind(CI->getTailCallKind());
+
+ // Try to set the most appropriate TailCallKind based on both the current
+ // attributes and the ones that we could get from ObjCARC's special
+ // knowledge of the runtime functions.
+ //
+ // std::max respects both requirements of notail and tail here:
+ // * notail on either the call or from ObjCARC becomes notail
+ // * tail on either side is stronger than none, but not notail
+ CallInst::TailCallKind TCK = CI->getTailCallKind();
+ NewCI->setTailCallKind(std::max(TCK, OverridingTCK));
+
if (!CI->use_empty())
CI->replaceAllUsesWith(NewCI);
CI->eraseFromParent();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ProcessImplicitDefs.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
index 11bff45f9ad5..ed19f7448151 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
@@ -14,6 +14,7 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp
index 729f06dda62b..3909b5717281 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp
@@ -51,6 +51,7 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/InitializePasses.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Pass.h"
#include "llvm/Support/CodeGen.h"
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ReachingDefAnalysis.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ReachingDefAnalysis.cpp
index 2850033e6419..3c1f9905afd0 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ReachingDefAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ReachingDefAnalysis.cpp
@@ -6,6 +6,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/ReachingDefAnalysis.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
@@ -132,8 +133,6 @@ void ReachingDefAnalysis::processBasicBlock(
}
bool ReachingDefAnalysis::runOnMachineFunction(MachineFunction &mf) {
- if (skipFunction(mf.getFunction()))
- return false;
MF = &mf;
TRI = MF->getSubtarget().getRegisterInfo();
@@ -189,7 +188,145 @@ int ReachingDefAnalysis::getReachingDef(MachineInstr *MI, int PhysReg) {
return LatestDef;
}
+MachineInstr* ReachingDefAnalysis::getReachingMIDef(MachineInstr *MI, int PhysReg) {
+ return getInstFromId(MI->getParent(), getReachingDef(MI, PhysReg));
+}
+
+bool ReachingDefAnalysis::hasSameReachingDef(MachineInstr *A, MachineInstr *B,
+ int PhysReg) {
+ MachineBasicBlock *ParentA = A->getParent();
+ MachineBasicBlock *ParentB = B->getParent();
+ if (ParentA != ParentB)
+ return false;
+
+ return getReachingDef(A, PhysReg) == getReachingDef(B, PhysReg);
+}
+
+MachineInstr *ReachingDefAnalysis::getInstFromId(MachineBasicBlock *MBB,
+ int InstId) {
+ assert(static_cast<size_t>(MBB->getNumber()) < MBBReachingDefs.size() &&
+ "Unexpected basic block number.");
+ assert(InstId < static_cast<int>(MBB->size()) &&
+ "Unexpected instruction id.");
+
+ if (InstId < 0)
+ return nullptr;
+
+ for (auto &MI : *MBB) {
+ if (InstIds.count(&MI) && InstIds[&MI] == InstId)
+ return &MI;
+ }
+ return nullptr;
+}
+
int ReachingDefAnalysis::getClearance(MachineInstr *MI, MCPhysReg PhysReg) {
assert(InstIds.count(MI) && "Unexpected machine instuction.");
return InstIds[MI] - getReachingDef(MI, PhysReg);
}
+
+void ReachingDefAnalysis::getReachingLocalUses(MachineInstr *Def, int PhysReg,
+ SmallVectorImpl<MachineInstr*> &Uses) {
+ MachineBasicBlock *MBB = Def->getParent();
+ MachineBasicBlock::iterator MI = MachineBasicBlock::iterator(Def);
+ while (++MI != MBB->end()) {
+ // If/when we find a new reaching def, we know that there's no more uses
+ // of 'Def'.
+ if (getReachingMIDef(&*MI, PhysReg) != Def)
+ return;
+
+ for (auto &MO : MI->operands()) {
+ if (!MO.isReg() || !MO.isUse() || MO.getReg() != PhysReg)
+ continue;
+
+ Uses.push_back(&*MI);
+ if (MO.isKill())
+ return;
+ }
+ }
+}
+
+unsigned ReachingDefAnalysis::getNumUses(MachineInstr *Def, int PhysReg) {
+ SmallVector<MachineInstr*, 4> Uses;
+ getReachingLocalUses(Def, PhysReg, Uses);
+ return Uses.size();
+}
+
+bool ReachingDefAnalysis::isRegUsedAfter(MachineInstr *MI, int PhysReg) {
+ MachineBasicBlock *MBB = MI->getParent();
+ LivePhysRegs LiveRegs(*TRI);
+ LiveRegs.addLiveOuts(*MBB);
+
+ // Yes if the register is live out of the basic block.
+ if (LiveRegs.contains(PhysReg))
+ return true;
+
+ // Walk backwards through the block to see if the register is live at some
+ // point.
+ for (auto Last = MBB->rbegin(), End = MBB->rend(); Last != End; ++Last) {
+ LiveRegs.stepBackward(*Last);
+ if (LiveRegs.contains(PhysReg))
+ return InstIds[&*Last] > InstIds[MI];
+ }
+ return false;
+}
+
+bool ReachingDefAnalysis::isReachingDefLiveOut(MachineInstr *MI, int PhysReg) {
+ MachineBasicBlock *MBB = MI->getParent();
+ LivePhysRegs LiveRegs(*TRI);
+ LiveRegs.addLiveOuts(*MBB);
+ if (!LiveRegs.contains(PhysReg))
+ return false;
+
+ MachineInstr *Last = &MBB->back();
+ int Def = getReachingDef(MI, PhysReg);
+ if (getReachingDef(Last, PhysReg) != Def)
+ return false;
+
+ // Finally check that the last instruction doesn't redefine the register.
+ for (auto &MO : Last->operands())
+ if (MO.isReg() && MO.isDef() && MO.getReg() == PhysReg)
+ return false;
+
+ return true;
+}
+
+MachineInstr* ReachingDefAnalysis::getLocalLiveOutMIDef(MachineBasicBlock *MBB,
+ int PhysReg) {
+ LivePhysRegs LiveRegs(*TRI);
+ LiveRegs.addLiveOuts(*MBB);
+ if (!LiveRegs.contains(PhysReg))
+ return nullptr;
+
+ MachineInstr *Last = &MBB->back();
+ int Def = getReachingDef(Last, PhysReg);
+ for (auto &MO : Last->operands())
+ if (MO.isReg() && MO.isDef() && MO.getReg() == PhysReg)
+ return Last;
+
+ return Def < 0 ? nullptr : getInstFromId(MBB, Def);
+}
+
+MachineInstr *ReachingDefAnalysis::getInstWithUseBefore(MachineInstr *MI,
+ int PhysReg) {
+ auto I = MachineBasicBlock::reverse_iterator(MI);
+ auto E = MI->getParent()->rend();
+ I++;
+
+ for ( ; I != E; I++)
+ for (auto &MO : I->operands())
+ if (MO.isReg() && MO.isUse() && MO.getReg() == PhysReg)
+ return &*I;
+
+ return nullptr;
+}
+
+void ReachingDefAnalysis::getAllInstWithUseBefore(MachineInstr *MI,
+ int PhysReg, SmallVectorImpl<MachineInstr*> &Uses) {
+ MachineInstr *Use = nullptr;
+ MachineInstr *Pos = MI;
+
+ while ((Use = getInstWithUseBefore(Pos, PhysReg))) {
+ Uses.push_back(Use);
+ Pos = Use;
+ }
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp
index 44d0233604e7..89b5bcebd61c 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp
@@ -35,6 +35,7 @@
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Metadata.h"
+#include "llvm/InitializePasses.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Pass.h"
@@ -82,12 +83,12 @@ namespace {
/// Everything we know about a live virtual register.
struct LiveReg {
MachineInstr *LastUse = nullptr; ///< Last instr to use reg.
- unsigned VirtReg; ///< Virtual register number.
+ Register VirtReg; ///< Virtual register number.
MCPhysReg PhysReg = 0; ///< Currently held here.
unsigned short LastOpNum = 0; ///< OpNum on LastUse.
bool Dirty = false; ///< Register needs spill.
- explicit LiveReg(unsigned VirtReg) : VirtReg(VirtReg) {}
+ explicit LiveReg(Register VirtReg) : VirtReg(VirtReg) {}
unsigned getSparseSetIndex() const {
return Register::virtReg2Index(VirtReg);
@@ -128,7 +129,7 @@ namespace {
/// Maps each physical register to a RegState enum or a virtual register.
std::vector<unsigned> PhysRegState;
- SmallVector<unsigned, 16> VirtDead;
+ SmallVector<Register, 16> VirtDead;
SmallVector<MachineInstr *, 32> Coalesced;
using RegUnitSet = SparseSet<uint16_t, identity<uint16_t>>;
@@ -184,14 +185,14 @@ namespace {
void allocateInstruction(MachineInstr &MI);
void handleDebugValue(MachineInstr &MI);
void handleThroughOperands(MachineInstr &MI,
- SmallVectorImpl<unsigned> &VirtDead);
+ SmallVectorImpl<Register> &VirtDead);
bool isLastUseOfLocalReg(const MachineOperand &MO) const;
void addKillFlag(const LiveReg &LRI);
void killVirtReg(LiveReg &LR);
- void killVirtReg(unsigned VirtReg);
+ void killVirtReg(Register VirtReg);
void spillVirtReg(MachineBasicBlock::iterator MI, LiveReg &LR);
- void spillVirtReg(MachineBasicBlock::iterator MI, unsigned VirtReg);
+ void spillVirtReg(MachineBasicBlock::iterator MI, Register VirtReg);
void usePhysReg(MachineOperand &MO);
void definePhysReg(MachineBasicBlock::iterator MI, MCPhysReg PhysReg,
@@ -199,34 +200,34 @@ namespace {
unsigned calcSpillCost(MCPhysReg PhysReg) const;
void assignVirtToPhysReg(LiveReg &, MCPhysReg PhysReg);
- LiveRegMap::iterator findLiveVirtReg(unsigned VirtReg) {
+ LiveRegMap::iterator findLiveVirtReg(Register VirtReg) {
return LiveVirtRegs.find(Register::virtReg2Index(VirtReg));
}
- LiveRegMap::const_iterator findLiveVirtReg(unsigned VirtReg) const {
+ LiveRegMap::const_iterator findLiveVirtReg(Register VirtReg) const {
return LiveVirtRegs.find(Register::virtReg2Index(VirtReg));
}
- void allocVirtReg(MachineInstr &MI, LiveReg &LR, unsigned Hint);
+ void allocVirtReg(MachineInstr &MI, LiveReg &LR, Register Hint);
void allocVirtRegUndef(MachineOperand &MO);
- MCPhysReg defineVirtReg(MachineInstr &MI, unsigned OpNum, unsigned VirtReg,
- unsigned Hint);
- LiveReg &reloadVirtReg(MachineInstr &MI, unsigned OpNum, unsigned VirtReg,
- unsigned Hint);
+ MCPhysReg defineVirtReg(MachineInstr &MI, unsigned OpNum, Register VirtReg,
+ Register Hint);
+ LiveReg &reloadVirtReg(MachineInstr &MI, unsigned OpNum, Register VirtReg,
+ Register Hint);
void spillAll(MachineBasicBlock::iterator MI, bool OnlyLiveOut);
bool setPhysReg(MachineInstr &MI, MachineOperand &MO, MCPhysReg PhysReg);
- unsigned traceCopies(unsigned VirtReg) const;
- unsigned traceCopyChain(unsigned Reg) const;
+ Register traceCopies(Register VirtReg) const;
+ Register traceCopyChain(Register Reg) const;
- int getStackSpaceFor(unsigned VirtReg);
- void spill(MachineBasicBlock::iterator Before, unsigned VirtReg,
+ int getStackSpaceFor(Register VirtReg);
+ void spill(MachineBasicBlock::iterator Before, Register VirtReg,
MCPhysReg AssignedReg, bool Kill);
- void reload(MachineBasicBlock::iterator Before, unsigned VirtReg,
+ void reload(MachineBasicBlock::iterator Before, Register VirtReg,
MCPhysReg PhysReg);
- bool mayLiveOut(unsigned VirtReg);
- bool mayLiveIn(unsigned VirtReg);
+ bool mayLiveOut(Register VirtReg);
+ bool mayLiveIn(Register VirtReg);
void dumpState();
};
@@ -244,7 +245,7 @@ void RegAllocFast::setPhysRegState(MCPhysReg PhysReg, unsigned NewState) {
/// This allocates space for the specified virtual register to be held on the
/// stack.
-int RegAllocFast::getStackSpaceFor(unsigned VirtReg) {
+int RegAllocFast::getStackSpaceFor(Register VirtReg) {
// Find the location Reg would belong...
int SS = StackSlotForVirtReg[VirtReg];
// Already has space allocated?
@@ -263,7 +264,7 @@ int RegAllocFast::getStackSpaceFor(unsigned VirtReg) {
}
/// Returns false if \p VirtReg is known to not live out of the current block.
-bool RegAllocFast::mayLiveOut(unsigned VirtReg) {
+bool RegAllocFast::mayLiveOut(Register VirtReg) {
if (MayLiveAcrossBlocks.test(Register::virtReg2Index(VirtReg))) {
// Cannot be live-out if there are no successors.
return !MBB->succ_empty();
@@ -292,7 +293,7 @@ bool RegAllocFast::mayLiveOut(unsigned VirtReg) {
}
/// Returns false if \p VirtReg is known to not be live into the current block.
-bool RegAllocFast::mayLiveIn(unsigned VirtReg) {
+bool RegAllocFast::mayLiveIn(Register VirtReg) {
if (MayLiveAcrossBlocks.test(Register::virtReg2Index(VirtReg)))
return !MBB->pred_empty();
@@ -311,7 +312,7 @@ bool RegAllocFast::mayLiveIn(unsigned VirtReg) {
/// Insert spill instruction for \p AssignedReg before \p Before. Update
/// DBG_VALUEs with \p VirtReg operands with the stack slot.
-void RegAllocFast::spill(MachineBasicBlock::iterator Before, unsigned VirtReg,
+void RegAllocFast::spill(MachineBasicBlock::iterator Before, Register VirtReg,
MCPhysReg AssignedReg, bool Kill) {
LLVM_DEBUG(dbgs() << "Spilling " << printReg(VirtReg, TRI)
<< " in " << printReg(AssignedReg, TRI));
@@ -339,7 +340,7 @@ void RegAllocFast::spill(MachineBasicBlock::iterator Before, unsigned VirtReg,
}
/// Insert reload instruction for \p PhysReg before \p Before.
-void RegAllocFast::reload(MachineBasicBlock::iterator Before, unsigned VirtReg,
+void RegAllocFast::reload(MachineBasicBlock::iterator Before, Register VirtReg,
MCPhysReg PhysReg) {
LLVM_DEBUG(dbgs() << "Reloading " << printReg(VirtReg, TRI) << " into "
<< printReg(PhysReg, TRI) << '\n');
@@ -393,7 +394,7 @@ void RegAllocFast::killVirtReg(LiveReg &LR) {
}
/// Mark virtreg as no longer available.
-void RegAllocFast::killVirtReg(unsigned VirtReg) {
+void RegAllocFast::killVirtReg(Register VirtReg) {
assert(Register::isVirtualRegister(VirtReg) &&
"killVirtReg needs a virtual register");
LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg);
@@ -404,7 +405,7 @@ void RegAllocFast::killVirtReg(unsigned VirtReg) {
/// This method spills the value specified by VirtReg into the corresponding
/// stack slot if needed.
void RegAllocFast::spillVirtReg(MachineBasicBlock::iterator MI,
- unsigned VirtReg) {
+ Register VirtReg) {
assert(Register::isVirtualRegister(VirtReg) &&
"Spilling a physical register is illegal!");
LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg);
@@ -456,7 +457,7 @@ void RegAllocFast::usePhysReg(MachineOperand &MO) {
return;
Register PhysReg = MO.getReg();
- assert(Register::isPhysicalRegister(PhysReg) && "Bad usePhysReg operand");
+ assert(PhysReg.isPhysical() && "Bad usePhysReg operand");
markRegUsedInInstr(PhysReg);
switch (PhysRegState[PhysReg]) {
@@ -520,7 +521,7 @@ void RegAllocFast::usePhysReg(MachineOperand &MO) {
void RegAllocFast::definePhysReg(MachineBasicBlock::iterator MI,
MCPhysReg PhysReg, RegState NewState) {
markRegUsedInInstr(PhysReg);
- switch (unsigned VirtReg = PhysRegState[PhysReg]) {
+ switch (Register VirtReg = PhysRegState[PhysReg]) {
case regDisabled:
break;
default:
@@ -536,7 +537,7 @@ void RegAllocFast::definePhysReg(MachineBasicBlock::iterator MI,
setPhysRegState(PhysReg, NewState);
for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) {
MCPhysReg Alias = *AI;
- switch (unsigned VirtReg = PhysRegState[Alias]) {
+ switch (Register VirtReg = PhysRegState[Alias]) {
case regDisabled:
break;
default:
@@ -562,7 +563,7 @@ unsigned RegAllocFast::calcSpillCost(MCPhysReg PhysReg) const {
<< " is already used in instr.\n");
return spillImpossible;
}
- switch (unsigned VirtReg = PhysRegState[PhysReg]) {
+ switch (Register VirtReg = PhysRegState[PhysReg]) {
case regDisabled:
break;
case regFree:
@@ -584,7 +585,7 @@ unsigned RegAllocFast::calcSpillCost(MCPhysReg PhysReg) const {
unsigned Cost = 0;
for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) {
MCPhysReg Alias = *AI;
- switch (unsigned VirtReg = PhysRegState[Alias]) {
+ switch (Register VirtReg = PhysRegState[Alias]) {
case regDisabled:
break;
case regFree:
@@ -608,7 +609,7 @@ unsigned RegAllocFast::calcSpillCost(MCPhysReg PhysReg) const {
/// proper container for VirtReg now. The physical register must not be used
/// for anything else when this is called.
void RegAllocFast::assignVirtToPhysReg(LiveReg &LR, MCPhysReg PhysReg) {
- unsigned VirtReg = LR.VirtReg;
+ Register VirtReg = LR.VirtReg;
LLVM_DEBUG(dbgs() << "Assigning " << printReg(VirtReg, TRI) << " to "
<< printReg(PhysReg, TRI) << '\n');
assert(LR.PhysReg == 0 && "Already assigned a physreg");
@@ -621,13 +622,13 @@ static bool isCoalescable(const MachineInstr &MI) {
return MI.isFullCopy();
}
-unsigned RegAllocFast::traceCopyChain(unsigned Reg) const {
+Register RegAllocFast::traceCopyChain(Register Reg) const {
static const unsigned ChainLengthLimit = 3;
unsigned C = 0;
do {
- if (Register::isPhysicalRegister(Reg))
+ if (Reg.isPhysical())
return Reg;
- assert(Register::isVirtualRegister(Reg));
+ assert(Reg.isVirtual());
MachineInstr *VRegDef = MRI->getUniqueVRegDef(Reg);
if (!VRegDef || !isCoalescable(*VRegDef))
@@ -640,26 +641,26 @@ unsigned RegAllocFast::traceCopyChain(unsigned Reg) const {
/// Check if any of \p VirtReg's definitions is a copy. If it is follow the
/// chain of copies to check whether we reach a physical register we can
/// coalesce with.
-unsigned RegAllocFast::traceCopies(unsigned VirtReg) const {
+Register RegAllocFast::traceCopies(Register VirtReg) const {
static const unsigned DefLimit = 3;
unsigned C = 0;
for (const MachineInstr &MI : MRI->def_instructions(VirtReg)) {
if (isCoalescable(MI)) {
Register Reg = MI.getOperand(1).getReg();
Reg = traceCopyChain(Reg);
- if (Reg != 0)
+ if (Reg.isValid())
return Reg;
}
if (++C >= DefLimit)
break;
}
- return 0;
+ return Register();
}
/// Allocates a physical register for VirtReg.
-void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, unsigned Hint0) {
- const unsigned VirtReg = LR.VirtReg;
+void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, Register Hint0) {
+ const Register VirtReg = LR.VirtReg;
assert(Register::isVirtualRegister(VirtReg) &&
"Can only allocate virtual registers");
@@ -670,7 +671,7 @@ void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, unsigned Hint0) {
<< " with hint " << printReg(Hint0, TRI) << '\n');
// Take hint when possible.
- if (Register::isPhysicalRegister(Hint0) && MRI->isAllocatable(Hint0) &&
+ if (Hint0.isPhysical() && MRI->isAllocatable(Hint0) &&
RC.contains(Hint0)) {
// Ignore the hint if we would have to spill a dirty register.
unsigned Cost = calcSpillCost(Hint0);
@@ -686,12 +687,12 @@ void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, unsigned Hint0) {
<< "occupied\n");
}
} else {
- Hint0 = 0;
+ Hint0 = Register();
}
// Try other hint.
- unsigned Hint1 = traceCopies(VirtReg);
- if (Register::isPhysicalRegister(Hint1) && MRI->isAllocatable(Hint1) &&
+ Register Hint1 = traceCopies(VirtReg);
+ if (Hint1.isPhysical() && MRI->isAllocatable(Hint1) &&
RC.contains(Hint1) && !isRegUsedInInstr(Hint1)) {
// Ignore the hint if we would have to spill a dirty register.
unsigned Cost = calcSpillCost(Hint1);
@@ -707,7 +708,7 @@ void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, unsigned Hint0) {
<< "occupied\n");
}
} else {
- Hint1 = 0;
+ Hint1 = Register();
}
MCPhysReg BestReg = 0;
@@ -775,14 +776,14 @@ void RegAllocFast::allocVirtRegUndef(MachineOperand &MO) {
/// Allocates a register for VirtReg and mark it as dirty.
MCPhysReg RegAllocFast::defineVirtReg(MachineInstr &MI, unsigned OpNum,
- unsigned VirtReg, unsigned Hint) {
+ Register VirtReg, Register Hint) {
assert(Register::isVirtualRegister(VirtReg) && "Not a virtual register");
LiveRegMap::iterator LRI;
bool New;
std::tie(LRI, New) = LiveVirtRegs.insert(LiveReg(VirtReg));
if (!LRI->PhysReg) {
// If there is no hint, peek at the only use of this register.
- if ((!Hint || !Register::isPhysicalRegister(Hint)) &&
+ if ((!Hint || !Hint.isPhysical()) &&
MRI->hasOneNonDBGUse(VirtReg)) {
const MachineInstr &UseMI = *MRI->use_instr_nodbg_begin(VirtReg);
// It's a copy, use the destination register as a hint.
@@ -807,8 +808,8 @@ MCPhysReg RegAllocFast::defineVirtReg(MachineInstr &MI, unsigned OpNum,
/// Make sure VirtReg is available in a physreg and return it.
RegAllocFast::LiveReg &RegAllocFast::reloadVirtReg(MachineInstr &MI,
unsigned OpNum,
- unsigned VirtReg,
- unsigned Hint) {
+ Register VirtReg,
+ Register Hint) {
assert(Register::isVirtualRegister(VirtReg) && "Not a virtual register");
LiveRegMap::iterator LRI;
bool New;
@@ -884,13 +885,13 @@ bool RegAllocFast::setPhysReg(MachineInstr &MI, MachineOperand &MO,
// Handles special instruction operand like early clobbers and tied ops when
// there are additional physreg defines.
void RegAllocFast::handleThroughOperands(MachineInstr &MI,
- SmallVectorImpl<unsigned> &VirtDead) {
+ SmallVectorImpl<Register> &VirtDead) {
LLVM_DEBUG(dbgs() << "Scanning for through registers:");
- SmallSet<unsigned, 8> ThroughRegs;
+ SmallSet<Register, 8> ThroughRegs;
for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg()) continue;
Register Reg = MO.getReg();
- if (!Register::isVirtualRegister(Reg))
+ if (!Reg.isVirtual())
continue;
if (MO.isEarlyClobber() || (MO.isUse() && MO.isTied()) ||
(MO.getSubReg() && MI.readsVirtualRegister(Reg))) {
@@ -905,7 +906,7 @@ void RegAllocFast::handleThroughOperands(MachineInstr &MI,
for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg() || !MO.isDef()) continue;
Register Reg = MO.getReg();
- if (!Reg || !Register::isPhysicalRegister(Reg))
+ if (!Reg || !Reg.isPhysical())
continue;
markRegUsedInInstr(Reg);
for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
@@ -914,7 +915,7 @@ void RegAllocFast::handleThroughOperands(MachineInstr &MI,
}
}
- SmallVector<unsigned, 8> PartialDefs;
+ SmallVector<Register, 8> PartialDefs;
LLVM_DEBUG(dbgs() << "Allocating tied uses.\n");
for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
MachineOperand &MO = MI.getOperand(I);
@@ -961,7 +962,7 @@ void RegAllocFast::handleThroughOperands(MachineInstr &MI,
for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg() || (MO.isDef() && !MO.isEarlyClobber())) continue;
Register Reg = MO.getReg();
- if (!Reg || !Register::isPhysicalRegister(Reg))
+ if (!Reg || !Reg.isPhysical())
continue;
LLVM_DEBUG(dbgs() << "\tSetting " << printReg(Reg, TRI)
<< " as used in instr\n");
@@ -969,7 +970,7 @@ void RegAllocFast::handleThroughOperands(MachineInstr &MI,
}
// Also mark PartialDefs as used to avoid reallocation.
- for (unsigned PartialDef : PartialDefs)
+ for (Register PartialDef : PartialDefs)
markRegUsedInInstr(PartialDef);
}
@@ -1002,7 +1003,7 @@ void RegAllocFast::dumpState() {
e = LiveVirtRegs.end(); i != e; ++i) {
if (!i->PhysReg)
continue;
- assert(Register::isVirtualRegister(i->VirtReg) && "Bad map key");
+ assert(i->VirtReg.isVirtual() && "Bad map key");
assert(Register::isPhysicalRegister(i->PhysReg) && "Bad map value");
assert(PhysRegState[i->PhysReg] == i->VirtReg && "Bad inverse map");
}
@@ -1013,8 +1014,8 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
const MCInstrDesc &MCID = MI.getDesc();
// If this is a copy, we may be able to coalesce.
- unsigned CopySrcReg = 0;
- unsigned CopyDstReg = 0;
+ Register CopySrcReg;
+ Register CopyDstReg;
unsigned CopySrcSub = 0;
unsigned CopyDstSub = 0;
if (MI.isCopy()) {
@@ -1082,7 +1083,7 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
(hasTiedOps && (hasPhysDefs || MCID.getNumDefs() > 1))) {
handleThroughOperands(MI, VirtDead);
// Don't attempt coalescing when we have funny stuff going on.
- CopyDstReg = 0;
+ CopyDstReg = Register();
// Pretend we have early clobbers so the use operands get marked below.
// This is not necessary for the common case of a single tied use.
hasEarlyClobbers = true;
@@ -1095,7 +1096,7 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
MachineOperand &MO = MI.getOperand(I);
if (!MO.isReg()) continue;
Register Reg = MO.getReg();
- if (!Register::isVirtualRegister(Reg))
+ if (!Reg.isVirtual())
continue;
if (MO.isUse()) {
if (MO.isUndef()) {
@@ -1124,7 +1125,7 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
if (!MO.isReg() || !MO.isUse())
continue;
Register Reg = MO.getReg();
- if (!Register::isVirtualRegister(Reg))
+ if (!Reg.isVirtual())
continue;
assert(MO.isUndef() && "Should only have undef virtreg uses left");
@@ -1139,7 +1140,7 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg()) continue;
Register Reg = MO.getReg();
- if (!Reg || !Register::isPhysicalRegister(Reg))
+ if (!Reg || !Reg.isPhysical())
continue;
// Look for physreg defs and tied uses.
if (!MO.isDef() && !MO.isTied()) continue;
@@ -1168,7 +1169,7 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
continue;
Register Reg = MO.getReg();
- if (!Reg || !Register::isPhysicalRegister(Reg) || !MRI->isAllocatable(Reg))
+ if (!Reg || !Reg.isPhysical() || !MRI->isAllocatable(Reg))
continue;
definePhysReg(MI, Reg, MO.isDead() ? regFree : regReserved);
}
@@ -1182,12 +1183,12 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
Register Reg = MO.getReg();
// We have already dealt with phys regs in the previous scan.
- if (Register::isPhysicalRegister(Reg))
+ if (Reg.isPhysical())
continue;
MCPhysReg PhysReg = defineVirtReg(MI, I, Reg, CopySrcReg);
if (setPhysReg(MI, MI.getOperand(I), PhysReg)) {
VirtDead.push_back(Reg);
- CopyDstReg = 0; // cancel coalescing;
+ CopyDstReg = Register(); // cancel coalescing;
} else
CopyDstReg = (CopyDstReg == Reg || CopyDstReg == PhysReg) ? PhysReg : 0;
}
@@ -1196,7 +1197,7 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
// register are allocated identically. We didn't need to do this for uses
// because we are crerating our own kill flags, and they are always at the
// last use.
- for (unsigned VirtReg : VirtDead)
+ for (Register VirtReg : VirtDead)
killVirtReg(VirtReg);
VirtDead.clear();
@@ -1234,7 +1235,7 @@ void RegAllocFast::handleDebugValue(MachineInstr &MI) {
// We can't allocate a physreg for a DebugValue, sorry!
LLVM_DEBUG(dbgs() << "Unable to allocate vreg used by DBG_VALUE");
- MO.setReg(0);
+ MO.setReg(Register());
}
// If Reg hasn't been spilled, put this DBG_VALUE in LiveDbgValueMap so
@@ -1252,7 +1253,7 @@ void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) {
MachineBasicBlock::iterator MII = MBB.begin();
// Add live-in registers as live.
- for (const MachineBasicBlock::RegisterMaskPair LI : MBB.liveins())
+ for (const MachineBasicBlock::RegisterMaskPair &LI : MBB.liveins())
if (MRI->isAllocatable(LI.PhysReg))
definePhysReg(MII, LI.PhysReg, regReserved);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp
index d27db678f02a..27de7fe45887 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp
@@ -3126,6 +3126,11 @@ unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
spiller().spill(LRE);
setStage(NewVRegs.begin(), NewVRegs.end(), RS_Done);
+ // Tell LiveDebugVariables about the new ranges. Ranges not being covered by
+ // the new regs are kept in LDV (still mapping to the old register), until
+ // we rewrite spilled locations in LDV at a later stage.
+ DebugVars->splitRegister(VirtReg.reg, LRE.regs(), *LIS);
+
if (VerifyEnabled)
MF->verify(this, "After spilling");
}
@@ -3220,8 +3225,10 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
MF->getSubtarget().enableRALocalReassignment(
MF->getTarget().getOptLevel());
- EnableAdvancedRASplitCost = ConsiderLocalIntervalCost ||
- MF->getSubtarget().enableAdvancedRASplitCost();
+ EnableAdvancedRASplitCost =
+ ConsiderLocalIntervalCost.getNumOccurrences()
+ ? ConsiderLocalIntervalCost
+ : MF->getSubtarget().enableAdvancedRASplitCost();
if (VerifyEnabled)
MF->verify(this, "Before greedy register allocator");
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegUsageInfoCollector.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegUsageInfoCollector.cpp
index 757ff0e44953..5a79ac44dcf4 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegUsageInfoCollector.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegUsageInfoCollector.cpp
@@ -56,7 +56,7 @@ public:
bool runOnMachineFunction(MachineFunction &MF) override;
- // Call determineCalleeSaves and then also set the bits for subregs and
+ // Call getCalleeSaves and then also set the bits for subregs and
// fully saved superregs.
static void computeCalleeSavedRegs(BitVector &SavedRegs, MachineFunction &MF);
@@ -199,7 +199,7 @@ computeCalleeSavedRegs(BitVector &SavedRegs, MachineFunction &MF) {
// Target will return the set of registers that it saves/restores as needed.
SavedRegs.clear();
- TFI.determineCalleeSaves(MF, SavedRegs);
+ TFI.getCalleeSaves(MF, SavedRegs);
if (SavedRegs.none())
return;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegisterClassInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegisterClassInfo.cpp
index 530e0cccf1d4..1523bd4d1649 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegisterClassInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegisterClassInfo.cpp
@@ -59,7 +59,7 @@ void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) {
if (Update || CSR != CalleeSavedRegs) {
// Build a CSRAlias map. Every CSR alias saves the last
// overlapping CSR.
- CalleeSavedAliases.resize(TRI->getNumRegs(), 0);
+ CalleeSavedAliases.assign(TRI->getNumRegs(), 0);
for (const MCPhysReg *I = CSR; *I; ++I)
for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI)
CalleeSavedAliases[*AI] = *I;
@@ -186,6 +186,7 @@ unsigned RegisterClassInfo::computePSetLimit(unsigned Idx) const {
NumRCUnits = NUnits;
}
}
+ assert(RC && "Failed to find register class");
compute(RC);
unsigned NReserved = RC->getNumRegs() - getNumAllocatableRegs(RC);
return TRI->getRegPressureSetLimit(*MF, Idx) -
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp
index 6ff5ddbc023d..a3f75d82d0ec 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -40,6 +40,7 @@
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/DebugLoc.h"
+#include "llvm/InitializePasses.h"
#include "llvm/MC/LaneBitmask.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCRegisterInfo.h"
@@ -119,32 +120,46 @@ static cl::opt<unsigned> LargeIntervalFreqThreshold(
namespace {
+ class JoinVals;
+
class RegisterCoalescer : public MachineFunctionPass,
private LiveRangeEdit::Delegate {
- MachineFunction* MF;
- MachineRegisterInfo* MRI;
- const TargetRegisterInfo* TRI;
- const TargetInstrInfo* TII;
- LiveIntervals *LIS;
- const MachineLoopInfo* Loops;
- AliasAnalysis *AA;
+ MachineFunction* MF = nullptr;
+ MachineRegisterInfo* MRI = nullptr;
+ const TargetRegisterInfo* TRI = nullptr;
+ const TargetInstrInfo* TII = nullptr;
+ LiveIntervals *LIS = nullptr;
+ const MachineLoopInfo* Loops = nullptr;
+ AliasAnalysis *AA = nullptr;
RegisterClassInfo RegClassInfo;
+ /// Debug variable location tracking -- for each VReg, maintain an
+ /// ordered-by-slot-index set of DBG_VALUEs, to help quick
+ /// identification of whether coalescing may change location validity.
+ using DbgValueLoc = std::pair<SlotIndex, MachineInstr*>;
+ DenseMap<unsigned, std::vector<DbgValueLoc>> DbgVRegToValues;
+
+ /// VRegs may be repeatedly coalesced, and have many DBG_VALUEs attached.
+ /// To avoid repeatedly merging sets of DbgValueLocs, instead record
+ /// which vregs have been coalesced, and where to. This map is from
+ /// vreg => {set of vregs merged in}.
+ DenseMap<unsigned, SmallVector<unsigned, 4>> DbgMergedVRegNums;
+
/// A LaneMask to remember on which subregister live ranges we need to call
/// shrinkToUses() later.
LaneBitmask ShrinkMask;
/// True if the main range of the currently coalesced intervals should be
/// checked for smaller live intervals.
- bool ShrinkMainRange;
+ bool ShrinkMainRange = false;
/// True if the coalescer should aggressively coalesce global copies
/// in favor of keeping local copies.
- bool JoinGlobalCopies;
+ bool JoinGlobalCopies = false;
/// True if the coalescer should aggressively coalesce fall-thru
/// blocks exclusively containing copies.
- bool JoinSplitEdges;
+ bool JoinSplitEdges = false;
/// Copy instructions yet to be coalesced.
SmallVector<MachineInstr*, 8> WorkList;
@@ -225,7 +240,8 @@ namespace {
/// @p ToMerge will occupy in the coalescer register. @p LI has its subrange
/// lanemasks already adjusted to the coalesced register.
void mergeSubRangeInto(LiveInterval &LI, const LiveRange &ToMerge,
- LaneBitmask LaneMask, CoalescerPair &CP);
+ LaneBitmask LaneMask, CoalescerPair &CP,
+ unsigned DstIdx);
/// Join the liveranges of two subregisters. Joins @p RRange into
/// @p LRange, @p RRange may be invalid afterwards.
@@ -325,6 +341,19 @@ namespace {
MI->eraseFromParent();
}
+ /// Walk over function and initialize the DbgVRegToValues map.
+ void buildVRegToDbgValueMap(MachineFunction &MF);
+
+ /// Test whether, after merging, any DBG_VALUEs would refer to a
+ /// different value number than before merging, and whether this can
+ /// be resolved. If not, mark the DBG_VALUE as being undef.
+ void checkMergingChangesDbgValues(CoalescerPair &CP, LiveRange &LHS,
+ JoinVals &LHSVals, LiveRange &RHS,
+ JoinVals &RHSVals);
+
+ void checkMergingChangesDbgValuesImpl(unsigned Reg, LiveRange &OtherRange,
+ LiveRange &RegRange, JoinVals &Vals2);
+
public:
static char ID; ///< Class identification, replacement for typeinfo
@@ -1648,8 +1677,7 @@ void RegisterCoalescer::addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx,
}
}
-void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg,
- unsigned DstReg,
+void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg, unsigned DstReg,
unsigned SubIdx) {
bool DstIsPhys = Register::isPhysicalRegister(DstReg);
LiveInterval *DstInt = DstIsPhys ? nullptr : &LIS->getInterval(DstReg);
@@ -1705,8 +1733,15 @@ void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg,
if (SubIdx != 0 && MO.isUse() && MRI->shouldTrackSubRegLiveness(DstReg)) {
if (!DstInt->hasSubRanges()) {
BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
- LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(DstInt->reg);
- DstInt->createSubRangeFrom(Allocator, Mask, *DstInt);
+ LaneBitmask FullMask = MRI->getMaxLaneMaskForVReg(DstInt->reg);
+ LaneBitmask UsedLanes = TRI->getSubRegIndexLaneMask(SubIdx);
+ LaneBitmask UnusedLanes = FullMask & ~UsedLanes;
+ DstInt->createSubRangeFrom(Allocator, UsedLanes, *DstInt);
+ // The unused lanes are just empty live-ranges at this point.
+ // It is the caller responsibility to set the proper
+ // dead segments if there is an actual dead def of the
+ // unused lanes. This may happen with rematerialization.
+ DstInt->createSubRange(Allocator, UnusedLanes);
}
SlotIndex MIIdx = UseMI->isDebugValue()
? LIS->getSlotIndexes()->getIndexBefore(*UseMI)
@@ -2195,6 +2230,7 @@ class JoinVals {
/// NewVNInfo. This is suitable for passing to LiveInterval::join().
SmallVector<int, 8> Assignments;
+ public:
/// Conflict resolution for overlapping values.
enum ConflictResolution {
/// No overlap, simply keep this value.
@@ -2223,6 +2259,7 @@ class JoinVals {
CR_Impossible
};
+ private:
/// Per-value info for LI. The lane bit masks are all relative to the final
/// joined register, so they can be compared directly between SrcReg and
/// DstReg.
@@ -2383,6 +2420,11 @@ public:
/// Get the value assignments suitable for passing to LiveInterval::join.
const int *getAssignments() const { return Assignments.data(); }
+
+ /// Get the conflict resolution for a value number.
+ ConflictResolution getResolution(unsigned Num) const {
+ return Vals[Num].Resolution;
+ }
};
} // end anonymous namespace
@@ -3115,7 +3157,8 @@ void JoinVals::eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs,
LiveInterval *LI) {
for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
// Get the def location before markUnused() below invalidates it.
- SlotIndex Def = LR.getValNumInfo(i)->def;
+ VNInfo *VNI = LR.getValNumInfo(i);
+ SlotIndex Def = VNI->def;
switch (Vals[i].Resolution) {
case CR_Keep: {
// If an IMPLICIT_DEF value is pruned, it doesn't serve a purpose any
@@ -3131,8 +3174,6 @@ void JoinVals::eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs,
// In such cases, removing this def from the main range must be
// complemented by extending the main range to account for the liveness
// of the other subrange.
- VNInfo *VNI = LR.getValNumInfo(i);
- SlotIndex Def = VNI->def;
// The new end point of the main range segment to be extended.
SlotIndex NewEnd;
if (LI != nullptr) {
@@ -3272,7 +3313,8 @@ void RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
void RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI,
const LiveRange &ToMerge,
LaneBitmask LaneMask,
- CoalescerPair &CP) {
+ CoalescerPair &CP,
+ unsigned ComposeSubRegIdx) {
BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
LI.refineSubRanges(
Allocator, LaneMask,
@@ -3285,7 +3327,7 @@ void RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI,
joinSubRegRanges(SR, RangeCopy, SR.LaneMask, CP);
}
},
- *LIS->getSlotIndexes(), *TRI);
+ *LIS->getSlotIndexes(), *TRI, ComposeSubRegIdx);
}
bool RegisterCoalescer::isHighCostLiveInterval(LiveInterval &LI) {
@@ -3351,12 +3393,12 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
if (!RHS.hasSubRanges()) {
LaneBitmask Mask = SrcIdx == 0 ? CP.getNewRC()->getLaneMask()
: TRI->getSubRegIndexLaneMask(SrcIdx);
- mergeSubRangeInto(LHS, RHS, Mask, CP);
+ mergeSubRangeInto(LHS, RHS, Mask, CP, DstIdx);
} else {
// Pair up subranges and merge.
for (LiveInterval::SubRange &R : RHS.subranges()) {
LaneBitmask Mask = TRI->composeSubRegIndexLaneMask(SrcIdx, R.LaneMask);
- mergeSubRangeInto(LHS, R, Mask, CP);
+ mergeSubRangeInto(LHS, R, Mask, CP, DstIdx);
}
}
LLVM_DEBUG(dbgs() << "\tJoined SubRanges " << LHS << "\n");
@@ -3385,6 +3427,9 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
while (!ShrinkRegs.empty())
shrinkToUses(&LIS->getInterval(ShrinkRegs.pop_back_val()));
+ // Scan and mark undef any DBG_VALUEs that would refer to a different value.
+ checkMergingChangesDbgValues(CP, LHS, LHSVals, RHS, RHSVals);
+
// Join RHS into LHS.
LHS.join(RHS, LHSVals.getAssignments(), RHSVals.getAssignments(), NewVNInfo);
@@ -3416,6 +3461,140 @@ bool RegisterCoalescer::joinIntervals(CoalescerPair &CP) {
return CP.isPhys() ? joinReservedPhysReg(CP) : joinVirtRegs(CP);
}
+void RegisterCoalescer::buildVRegToDbgValueMap(MachineFunction &MF)
+{
+ const SlotIndexes &Slots = *LIS->getSlotIndexes();
+ SmallVector<MachineInstr *, 8> ToInsert;
+
+ // After collecting a block of DBG_VALUEs into ToInsert, enter them into the
+ // vreg => DbgValueLoc map.
+ auto CloseNewDVRange = [this, &ToInsert](SlotIndex Slot) {
+ for (auto *X : ToInsert)
+ DbgVRegToValues[X->getOperand(0).getReg()].push_back({Slot, X});
+
+ ToInsert.clear();
+ };
+
+ // Iterate over all instructions, collecting them into the ToInsert vector.
+ // Once a non-debug instruction is found, record the slot index of the
+ // collected DBG_VALUEs.
+ for (auto &MBB : MF) {
+ SlotIndex CurrentSlot = Slots.getMBBStartIdx(&MBB);
+
+ for (auto &MI : MBB) {
+ if (MI.isDebugValue() && MI.getOperand(0).isReg() &&
+ MI.getOperand(0).getReg().isVirtual()) {
+ ToInsert.push_back(&MI);
+ } else if (!MI.isDebugInstr()) {
+ CurrentSlot = Slots.getInstructionIndex(MI);
+ CloseNewDVRange(CurrentSlot);
+ }
+ }
+
+ // Close range of DBG_VALUEs at the end of blocks.
+ CloseNewDVRange(Slots.getMBBEndIdx(&MBB));
+ }
+
+ // Sort all DBG_VALUEs we've seen by slot number.
+ for (auto &Pair : DbgVRegToValues)
+ llvm::sort(Pair.second);
+}
+
+void RegisterCoalescer::checkMergingChangesDbgValues(CoalescerPair &CP,
+ LiveRange &LHS,
+ JoinVals &LHSVals,
+ LiveRange &RHS,
+ JoinVals &RHSVals) {
+ auto ScanForDstReg = [&](unsigned Reg) {
+ checkMergingChangesDbgValuesImpl(Reg, RHS, LHS, LHSVals);
+ };
+
+ auto ScanForSrcReg = [&](unsigned Reg) {
+ checkMergingChangesDbgValuesImpl(Reg, LHS, RHS, RHSVals);
+ };
+
+ // Scan for potentially unsound DBG_VALUEs: examine first the register number
+ // Reg, and then any other vregs that may have been merged into it.
+ auto PerformScan = [this](unsigned Reg, std::function<void(unsigned)> Func) {
+ Func(Reg);
+ if (DbgMergedVRegNums.count(Reg))
+ for (unsigned X : DbgMergedVRegNums[Reg])
+ Func(X);
+ };
+
+ // Scan for unsound updates of both the source and destination register.
+ PerformScan(CP.getSrcReg(), ScanForSrcReg);
+ PerformScan(CP.getDstReg(), ScanForDstReg);
+}
+
+void RegisterCoalescer::checkMergingChangesDbgValuesImpl(unsigned Reg,
+ LiveRange &OtherLR,
+ LiveRange &RegLR,
+ JoinVals &RegVals) {
+ // Are there any DBG_VALUEs to examine?
+ auto VRegMapIt = DbgVRegToValues.find(Reg);
+ if (VRegMapIt == DbgVRegToValues.end())
+ return;
+
+ auto &DbgValueSet = VRegMapIt->second;
+ auto DbgValueSetIt = DbgValueSet.begin();
+ auto SegmentIt = OtherLR.begin();
+
+ bool LastUndefResult = false;
+ SlotIndex LastUndefIdx;
+
+ // If the "Other" register is live at a slot Idx, test whether Reg can
+ // safely be merged with it, or should be marked undef.
+ auto ShouldUndef = [&RegVals, &RegLR, &LastUndefResult,
+ &LastUndefIdx](SlotIndex Idx) -> bool {
+ // Our worst-case performance typically happens with asan, causing very
+ // many DBG_VALUEs of the same location. Cache a copy of the most recent
+ // result for this edge-case.
+ if (LastUndefIdx == Idx)
+ return LastUndefResult;
+
+ // If the other range was live, and Reg's was not, the register coalescer
+ // will not have tried to resolve any conflicts. We don't know whether
+ // the DBG_VALUE will refer to the same value number, so it must be made
+ // undef.
+ auto OtherIt = RegLR.find(Idx);
+ if (OtherIt == RegLR.end())
+ return true;
+
+ // Both the registers were live: examine the conflict resolution record for
+ // the value number Reg refers to. CR_Keep meant that this value number
+ // "won" and the merged register definitely refers to that value. CR_Erase
+ // means the value number was a redundant copy of the other value, which
+ // was coalesced and Reg deleted. It's safe to refer to the other register
+ // (which will be the source of the copy).
+ auto Resolution = RegVals.getResolution(OtherIt->valno->id);
+ LastUndefResult = Resolution != JoinVals::CR_Keep &&
+ Resolution != JoinVals::CR_Erase;
+ LastUndefIdx = Idx;
+ return LastUndefResult;
+ };
+
+ // Iterate over both the live-range of the "Other" register, and the set of
+ // DBG_VALUEs for Reg at the same time. Advance whichever one has the lowest
+ // slot index. This relies on the DbgValueSet being ordered.
+ while (DbgValueSetIt != DbgValueSet.end() && SegmentIt != OtherLR.end()) {
+ if (DbgValueSetIt->first < SegmentIt->end) {
+ // "Other" is live and there is a DBG_VALUE of Reg: test if we should
+ // set it undef.
+ if (DbgValueSetIt->first >= SegmentIt->start &&
+ DbgValueSetIt->second->getOperand(0).getReg() != 0 &&
+ ShouldUndef(DbgValueSetIt->first)) {
+ // Mark undef, erase record of this DBG_VALUE to avoid revisiting.
+ DbgValueSetIt->second->getOperand(0).setReg(0);
+ continue;
+ }
+ ++DbgValueSetIt;
+ } else {
+ ++SegmentIt;
+ }
+ }
+}
+
namespace {
/// Information concerning MBB coalescing priority.
@@ -3698,6 +3877,10 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
if (VerifyCoalescing)
MF->verify(this, "Before register coalescing");
+ DbgVRegToValues.clear();
+ DbgMergedVRegNums.clear();
+ buildVRegToDbgValueMap(fn);
+
RegClassInfo.runOnMachineFunction(fn);
// Join (coalesce) intervals if requested.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegisterScavenging.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegisterScavenging.cpp
index ec0868acab38..a5bea1463468 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegisterScavenging.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegisterScavenging.cpp
@@ -31,6 +31,7 @@
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
@@ -220,8 +221,8 @@ void RegScavenger::forward() {
// Ideally we would like a way to model this, but leaving the
// insert_subreg around causes both correctness and performance issues.
bool SubUsed = false;
- for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
- if (isRegUsed(*SubRegs)) {
+ for (const MCPhysReg &SubReg : TRI->subregs(Reg))
+ if (isRegUsed(SubReg)) {
SubUsed = true;
break;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RenameIndependentSubregs.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RenameIndependentSubregs.cpp
index e3f5abb6301f..4ee28d6bbb46 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RenameIndependentSubregs.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RenameIndependentSubregs.cpp
@@ -35,6 +35,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/InitializePasses.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ResetMachineFunctionPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ResetMachineFunctionPass.cpp
index 019de6554d2a..0f73973c8a51 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ResetMachineFunctionPass.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ResetMachineFunctionPass.cpp
@@ -17,9 +17,10 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/StackProtector.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/StackProtector.h"
#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/Debug.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SafeStack.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SafeStack.cpp
index ddbbd0f8d6e9..8aa488e63913 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SafeStack.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SafeStack.cpp
@@ -28,7 +28,6 @@
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
@@ -54,6 +53,7 @@
#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
@@ -63,6 +63,7 @@
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Local.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -562,7 +563,7 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack(
for (Argument *Arg : ByValArguments) {
unsigned Offset = SSL.getObjectOffset(Arg);
- unsigned Align = SSL.getObjectAlignment(Arg);
+ MaybeAlign Align(SSL.getObjectAlignment(Arg));
Type *Ty = Arg->getType()->getPointerElementType();
uint64_t Size = DL.getTypeStoreSize(Ty);
@@ -579,7 +580,7 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack(
DIExpression::ApplyOffset, -Offset);
Arg->replaceAllUsesWith(NewArg);
IRB.SetInsertPoint(cast<Instruction>(NewArg)->getNextNode());
- IRB.CreateMemCpy(Off, Align, Arg, Arg->getParamAlignment(), Size);
+ IRB.CreateMemCpy(Off, Align, Arg, Arg->getParamAlign(), Size);
}
// Allocate space for every unsafe static AllocaInst on the unsafe stack.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp
index b4037499d7d1..ee72de67d875 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp
@@ -29,6 +29,7 @@
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include <algorithm>
@@ -848,21 +849,20 @@ bool ScalarizeMaskedMemIntrin::optimizeCallInst(CallInst *CI,
bool &ModifiedDT) {
IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
if (II) {
+ unsigned Alignment;
switch (II->getIntrinsicID()) {
default:
break;
case Intrinsic::masked_load: {
// Scalarize unsupported vector masked load
- unsigned Alignment =
- cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
+ Alignment = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
if (TTI->isLegalMaskedLoad(CI->getType(), MaybeAlign(Alignment)))
return false;
scalarizeMaskedLoad(CI, ModifiedDT);
return true;
}
case Intrinsic::masked_store: {
- unsigned Alignment =
- cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
+ Alignment = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
if (TTI->isLegalMaskedStore(CI->getArgOperand(0)->getType(),
MaybeAlign(Alignment)))
return false;
@@ -870,12 +870,15 @@ bool ScalarizeMaskedMemIntrin::optimizeCallInst(CallInst *CI,
return true;
}
case Intrinsic::masked_gather:
- if (TTI->isLegalMaskedGather(CI->getType()))
+ Alignment = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
+ if (TTI->isLegalMaskedGather(CI->getType(), MaybeAlign(Alignment)))
return false;
scalarizeMaskedGather(CI, ModifiedDT);
return true;
case Intrinsic::masked_scatter:
- if (TTI->isLegalMaskedScatter(CI->getArgOperand(0)->getType()))
+ Alignment = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
+ if (TTI->isLegalMaskedScatter(CI->getArgOperand(0)->getType(),
+ MaybeAlign(Alignment)))
return false;
scalarizeMaskedScatter(CI, ModifiedDT);
return true;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
index 96a1f86c3e04..d11406cc330f 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -270,8 +270,13 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) {
Dep.setLatency(SchedModel.computeOperandLatency(SU->getInstr(), OperIdx,
RegUse, UseOp));
ST.adjustSchedDependency(SU, UseSU, Dep);
- } else
+ } else {
Dep.setLatency(0);
+ // FIXME: We could always let target to adjustSchedDependency(), and
+ // remove this condition, but that currently asserts in Hexagon BE.
+ if (SU->getInstr()->isBundle() || (RegUse && RegUse->isBundle()))
+ ST.adjustSchedDependency(SU, UseSU, Dep);
+ }
UseSU->addPred(Dep);
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index e8950b58d42d..e5bc08b9280a 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -131,6 +131,7 @@ namespace {
const TargetLowering &TLI;
CombineLevel Level;
CodeGenOpt::Level OptLevel;
+ bool LegalDAG = false;
bool LegalOperations = false;
bool LegalTypes = false;
bool ForCodeSize;
@@ -179,6 +180,12 @@ namespace {
AddToWorklist(Node);
}
+ /// Convenient shorthand to add a node and all of its user to the worklist.
+ void AddToWorklistWithUsers(SDNode *N) {
+ AddUsersToWorklist(N);
+ AddToWorklist(N);
+ }
+
// Prune potentially dangling nodes. This is called after
// any visit to a node, but should also be called during a visit after any
// failed combine which may have created a DAG node.
@@ -217,14 +224,16 @@ namespace {
DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
: DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
OptLevel(OL), AA(AA) {
- ForCodeSize = DAG.getMachineFunction().getFunction().hasOptSize();
+ ForCodeSize = DAG.shouldOptForSize();
MaximumLegalStoreInBits = 0;
+ // We use the minimum store size here, since that's all we can guarantee
+ // for the scalable vector types.
for (MVT VT : MVT::all_valuetypes())
if (EVT(VT).isSimple() && VT != MVT::Other &&
TLI.isTypeLegal(EVT(VT)) &&
- VT.getSizeInBits() >= MaximumLegalStoreInBits)
- MaximumLegalStoreInBits = VT.getSizeInBits();
+ VT.getSizeInBits().getKnownMinSize() >= MaximumLegalStoreInBits)
+ MaximumLegalStoreInBits = VT.getSizeInBits().getKnownMinSize();
}
void ConsiderForPruning(SDNode *N) {
@@ -622,7 +631,7 @@ namespace {
ConstantSDNode *Mask, SDNode *&NodeToMask);
/// Attempt to propagate a given AND node back to load leaves so that they
/// can be combined into narrow loads.
- bool BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG);
+ bool BackwardsPropagateMask(SDNode *N);
/// Helper function for MergeConsecutiveStores which merges the
/// component store chains.
@@ -1026,8 +1035,7 @@ CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
// Push the new node and any (possibly new) users onto the worklist.
- AddToWorklist(TLO.New.getNode());
- AddUsersToWorklist(TLO.New.getNode());
+ AddToWorklistWithUsers(TLO.New.getNode());
// Finally, if the node is now dead, remove it from the graph. The node
// may not be dead if the replacement process recursively simplified to
@@ -1393,6 +1401,7 @@ bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
void DAGCombiner::Run(CombineLevel AtLevel) {
// set the instance variables, so that the various visit routines may use it.
Level = AtLevel;
+ LegalDAG = Level >= AfterLegalizeDAG;
LegalOperations = Level >= AfterLegalizeVectorOps;
LegalTypes = Level >= AfterLegalizeTypes;
@@ -1419,14 +1428,13 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
// If this combine is running after legalizing the DAG, re-legalize any
// nodes pulled off the worklist.
- if (Level == AfterLegalizeDAG) {
+ if (LegalDAG) {
SmallSetVector<SDNode *, 16> UpdatedNodes;
bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
- for (SDNode *LN : UpdatedNodes) {
- AddUsersToWorklist(LN);
- AddToWorklist(LN);
- }
+ for (SDNode *LN : UpdatedNodes)
+ AddToWorklistWithUsers(LN);
+
if (!NIsValid)
continue;
}
@@ -2800,6 +2808,96 @@ static SDValue combineADDCARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG,
return SDValue();
}
+// If we are facing some sort of diamond carry/borrow in/out pattern try to
+// match patterns like:
+//
+// (uaddo A, B) CarryIn
+// | \ |
+// | \ |
+// PartialSum PartialCarryOutX /
+// | | /
+// | ____|____________/
+// | / |
+// (uaddo *, *) \________
+// | \ \
+// | \ |
+// | PartialCarryOutY |
+// | \ |
+// | \ /
+// AddCarrySum | ______/
+// | /
+// CarryOut = (or *, *)
+//
+// And generate ADDCARRY (or SUBCARRY) with two result values:
+//
+// {AddCarrySum, CarryOut} = (addcarry A, B, CarryIn)
+//
+// Our goal is to identify A, B, and CarryIn and produce ADDCARRY/SUBCARRY with
+// a single path for carry/borrow out propagation:
+static SDValue combineCarryDiamond(DAGCombiner &Combiner, SelectionDAG &DAG,
+ const TargetLowering &TLI, SDValue Carry0,
+ SDValue Carry1, SDNode *N) {
+ if (Carry0.getResNo() != 1 || Carry1.getResNo() != 1)
+ return SDValue();
+ unsigned Opcode = Carry0.getOpcode();
+ if (Opcode != Carry1.getOpcode())
+ return SDValue();
+ if (Opcode != ISD::UADDO && Opcode != ISD::USUBO)
+ return SDValue();
+
+ // Canonicalize the add/sub of A and B as Carry0 and the add/sub of the
+ // carry/borrow in as Carry1. (The top and middle uaddo nodes respectively in
+ // the above ASCII art.)
+ if (Carry1.getOperand(0) != Carry0.getValue(0) &&
+ Carry1.getOperand(1) != Carry0.getValue(0))
+ std::swap(Carry0, Carry1);
+ if (Carry1.getOperand(0) != Carry0.getValue(0) &&
+ Carry1.getOperand(1) != Carry0.getValue(0))
+ return SDValue();
+
+ // The carry in value must be on the righthand side for subtraction.
+ unsigned CarryInOperandNum =
+ Carry1.getOperand(0) == Carry0.getValue(0) ? 1 : 0;
+ if (Opcode == ISD::USUBO && CarryInOperandNum != 1)
+ return SDValue();
+ SDValue CarryIn = Carry1.getOperand(CarryInOperandNum);
+
+ unsigned NewOp = Opcode == ISD::UADDO ? ISD::ADDCARRY : ISD::SUBCARRY;
+ if (!TLI.isOperationLegalOrCustom(NewOp, Carry0.getValue(0).getValueType()))
+ return SDValue();
+
+ // Verify that the carry/borrow in is plausibly a carry/borrow bit.
+ // TODO: make getAsCarry() aware of how partial carries are merged.
+ if (CarryIn.getOpcode() != ISD::ZERO_EXTEND)
+ return SDValue();
+ CarryIn = CarryIn.getOperand(0);
+ if (CarryIn.getValueType() != MVT::i1)
+ return SDValue();
+
+ SDLoc DL(N);
+ SDValue Merged =
+ DAG.getNode(NewOp, DL, Carry1->getVTList(), Carry0.getOperand(0),
+ Carry0.getOperand(1), CarryIn);
+
+ // Please note that because we have proven that the result of the UADDO/USUBO
+ // of A and B feeds into the UADDO/USUBO that does the carry/borrow in, we can
+ // therefore prove that if the first UADDO/USUBO overflows, the second
+ // UADDO/USUBO cannot. For example consider 8-bit numbers where 0xFF is the
+ // maximum value.
+ //
+ // 0xFF + 0xFF == 0xFE with carry but 0xFE + 1 does not carry
+ // 0x00 - 0xFF == 1 with a carry/borrow but 1 - 1 == 0 (no carry/borrow)
+ //
+ // This is important because it means that OR and XOR can be used to merge
+ // carry flags; and that AND can return a constant zero.
+ //
+ // TODO: match other operations that can merge flags (ADD, etc)
+ DAG.ReplaceAllUsesOfValueWith(Carry1.getValue(0), Merged.getValue(0));
+ if (N->getOpcode() == ISD::AND)
+ return DAG.getConstant(0, DL, MVT::i1);
+ return Merged.getValue(1);
+}
+
SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
SDNode *N) {
// fold (addcarry (xor a, -1), b, c) -> (subcarry b, a, !c) and flip carry.
@@ -3006,6 +3104,20 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1),
N1.getOperand(0)));
+ // A - (A & B) -> A & (~B)
+ if (N1.getOpcode() == ISD::AND) {
+ SDValue A = N1.getOperand(0);
+ SDValue B = N1.getOperand(1);
+ if (A != N0)
+ std::swap(A, B);
+ if (A == N0 &&
+ (N1.hasOneUse() || isConstantOrConstantVector(B, /*NoOpaques=*/true))) {
+ SDValue InvB =
+ DAG.getNode(ISD::XOR, DL, VT, B, DAG.getAllOnesConstant(DL, VT));
+ return DAG.getNode(ISD::AND, DL, VT, A, InvB);
+ }
+ }
+
// fold (X - (-Y * Z)) -> (X + (Y * Z))
if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) {
if (N1.getOperand(0).getOpcode() == ISD::SUB &&
@@ -4225,7 +4337,6 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
// Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
// Only do this if the current op isn't legal and the flipped is.
unsigned Opcode = N->getOpcode();
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (!TLI.isOperationLegal(Opcode, VT) &&
(N0.isUndef() || DAG.SignBitIsZero(N0)) &&
(N1.isUndef() || DAG.SignBitIsZero(N1))) {
@@ -4543,8 +4654,8 @@ SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
// (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
// (or (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
if (LL == RL && LR == RR) {
- ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, IsInteger)
- : ISD::getSetCCOrOperation(CC0, CC1, IsInteger);
+ ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, OpVT)
+ : ISD::getSetCCOrOperation(CC0, CC1, OpVT);
if (NewCC != ISD::SETCC_INVALID &&
(!LegalOperations ||
(TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
@@ -4856,7 +4967,7 @@ bool DAGCombiner::SearchForAndLoads(SDNode *N,
return true;
}
-bool DAGCombiner::BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG) {
+bool DAGCombiner::BackwardsPropagateMask(SDNode *N) {
auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
if (!Mask)
return false;
@@ -5092,6 +5203,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
if (SDValue Shuffle = XformToShuffleWithZero(N))
return Shuffle;
+ if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N))
+ return Combined;
+
// fold (and (or x, C), D) -> D if (C & D) == D
auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
@@ -5238,14 +5352,13 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
}
}
- if (Level >= AfterLegalizeTypes) {
+ if (LegalTypes) {
// Attempt to propagate the AND back up to the leaves which, if they're
// loads, can be combined to narrow loads and the AND node can be removed.
// Perform after legalization so that extend nodes will already be
// combined into the loads.
- if (BackwardsPropagateMask(N, DAG)) {
+ if (BackwardsPropagateMask(N))
return SDValue(N, 0);
- }
}
if (SDValue Combined = visitANDLike(N0, N1, N))
@@ -5787,6 +5900,9 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
if (SDValue Combined = visitORLike(N0, N1, N))
return Combined;
+ if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N))
+ return Combined;
+
// Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
return BSwap;
@@ -6418,7 +6534,7 @@ static unsigned BigEndianByteAt(unsigned BW, unsigned i) {
// Check if the bytes offsets we are looking at match with either big or
// little endian value loaded. Return true for big endian, false for little
// endian, and None if match failed.
-static Optional<bool> isBigEndian(const SmallVector<int64_t, 4> &ByteOffsets,
+static Optional<bool> isBigEndian(const ArrayRef<int64_t> ByteOffsets,
int64_t FirstOffset) {
// The endian can be decided only when it is 2 bytes at least.
unsigned Width = ByteOffsets.size();
@@ -6491,7 +6607,6 @@ SDValue DAGCombiner::MatchStoreCombine(StoreSDNode *N) {
if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
return SDValue();
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (LegalOperations && !TLI.isOperationLegal(ISD::STORE, VT))
return SDValue();
@@ -6499,7 +6614,7 @@ SDValue DAGCombiner::MatchStoreCombine(StoreSDNode *N) {
// to the same base address. Collect bytes offsets from Base address into
// ByteOffsets.
SDValue CombinedValue;
- SmallVector<int64_t, 4> ByteOffsets(Width, INT64_MAX);
+ SmallVector<int64_t, 8> ByteOffsets(Width, INT64_MAX);
int64_t FirstOffset = INT64_MAX;
StoreSDNode *FirstStore = nullptr;
Optional<BaseIndexOffset> Base;
@@ -6655,13 +6770,6 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
return SDValue();
unsigned ByteWidth = VT.getSizeInBits() / 8;
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- // Before legalize we can introduce too wide illegal loads which will be later
- // split into legal sized loads. This enables us to combine i64 load by i8
- // patterns to a couple of i32 loads on 32 bit targets.
- if (LegalOperations && !TLI.isOperationLegal(ISD::LOAD, VT))
- return SDValue();
-
bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
auto MemoryByteOffset = [&] (ByteProvider P) {
assert(P.isMemory() && "Must be a memory byte provider");
@@ -6683,12 +6791,22 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
// Check if all the bytes of the OR we are looking at are loaded from the same
// base address. Collect bytes offsets from Base address in ByteOffsets.
- SmallVector<int64_t, 4> ByteOffsets(ByteWidth);
- for (unsigned i = 0; i < ByteWidth; i++) {
+ SmallVector<int64_t, 8> ByteOffsets(ByteWidth);
+ unsigned ZeroExtendedBytes = 0;
+ for (int i = ByteWidth - 1; i >= 0; --i) {
auto P = calculateByteProvider(SDValue(N, 0), i, 0, /*Root=*/true);
- if (!P || !P->isMemory()) // All the bytes must be loaded from memory
+ if (!P)
return SDValue();
+ if (P->isConstantZero()) {
+ // It's OK for the N most significant bytes to be 0, we can just
+ // zero-extend the load.
+ if (++ZeroExtendedBytes != (ByteWidth - static_cast<unsigned>(i)))
+ return SDValue();
+ continue;
+ }
+ assert(P->isMemory() && "provenance should either be memory or zero");
+
LoadSDNode *L = P->Load;
assert(L->hasNUsesOfValue(1, 0) && L->isSimple() &&
!L->isIndexed() &&
@@ -6727,9 +6845,26 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
assert(Base && "Base address of the accessed memory location must be set");
assert(FirstOffset != INT64_MAX && "First byte offset must be set");
+ bool NeedsZext = ZeroExtendedBytes > 0;
+
+ EVT MemVT =
+ EVT::getIntegerVT(*DAG.getContext(), (ByteWidth - ZeroExtendedBytes) * 8);
+
+ if (!MemVT.isSimple())
+ return SDValue();
+
+ // Before legalize we can introduce too wide illegal loads which will be later
+ // split into legal sized loads. This enables us to combine i64 load by i8
+ // patterns to a couple of i32 loads on 32 bit targets.
+ if (LegalOperations &&
+ !TLI.isOperationLegal(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD,
+ MemVT))
+ return SDValue();
+
// Check if the bytes of the OR we are looking at match with either big or
// little endian value load
- Optional<bool> IsBigEndian = isBigEndian(ByteOffsets, FirstOffset);
+ Optional<bool> IsBigEndian = isBigEndian(
+ makeArrayRef(ByteOffsets).drop_back(ZeroExtendedBytes), FirstOffset);
if (!IsBigEndian.hasValue())
return SDValue();
@@ -6742,7 +6877,8 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
LoadSDNode *FirstLoad = FirstByteProvider->Load;
// The node we are looking at matches with the pattern, check if we can
- // replace it with a single load and bswap if needed.
+ // replace it with a single (possibly zero-extended) load and bswap + shift if
+ // needed.
// If the load needs byte swap check if the target supports it
bool NeedsBswap = IsBigEndianTarget != *IsBigEndian;
@@ -6750,25 +6886,45 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
// Before legalize we can introduce illegal bswaps which will be later
// converted to an explicit bswap sequence. This way we end up with a single
// load and byte shuffling instead of several loads and byte shuffling.
- if (NeedsBswap && LegalOperations && !TLI.isOperationLegal(ISD::BSWAP, VT))
+ // We do not introduce illegal bswaps when zero-extending as this tends to
+ // introduce too many arithmetic instructions.
+ if (NeedsBswap && (LegalOperations || NeedsZext) &&
+ !TLI.isOperationLegal(ISD::BSWAP, VT))
+ return SDValue();
+
+ // If we need to bswap and zero extend, we have to insert a shift. Check that
+ // it is legal.
+ if (NeedsBswap && NeedsZext && LegalOperations &&
+ !TLI.isOperationLegal(ISD::SHL, VT))
return SDValue();
// Check that a load of the wide type is both allowed and fast on the target
bool Fast = false;
- bool Allowed = TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
- VT, *FirstLoad->getMemOperand(), &Fast);
+ bool Allowed =
+ TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
+ *FirstLoad->getMemOperand(), &Fast);
if (!Allowed || !Fast)
return SDValue();
- SDValue NewLoad =
- DAG.getLoad(VT, SDLoc(N), Chain, FirstLoad->getBasePtr(),
- FirstLoad->getPointerInfo(), FirstLoad->getAlignment());
+ SDValue NewLoad = DAG.getExtLoad(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD,
+ SDLoc(N), VT, Chain, FirstLoad->getBasePtr(),
+ FirstLoad->getPointerInfo(), MemVT,
+ FirstLoad->getAlignment());
// Transfer chain users from old loads to the new load.
for (LoadSDNode *L : Loads)
DAG.ReplaceAllUsesOfValueWith(SDValue(L, 1), SDValue(NewLoad.getNode(), 1));
- return NeedsBswap ? DAG.getNode(ISD::BSWAP, SDLoc(N), VT, NewLoad) : NewLoad;
+ if (!NeedsBswap)
+ return NewLoad;
+
+ SDValue ShiftedLoad =
+ NeedsZext
+ ? DAG.getNode(ISD::SHL, SDLoc(N), VT, NewLoad,
+ DAG.getShiftAmountConstant(ZeroExtendedBytes * 8, VT,
+ SDLoc(N), LegalOperations))
+ : NewLoad;
+ return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, ShiftedLoad);
}
// If the target has andn, bsl, or a similar bit-select instruction,
@@ -6904,7 +7060,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
SDValue LHS, RHS, CC;
if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC)) {
ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
- LHS.getValueType().isInteger());
+ LHS.getValueType());
if (!LegalOperations ||
TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
switch (N0Opcode) {
@@ -6964,6 +7120,13 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
DAG.getAllOnesConstant(DL, VT));
}
+ // fold (not (add X, -1)) -> (neg X)
+ if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::ADD &&
+ isAllOnesOrAllOnesSplat(N0.getOperand(1))) {
+ return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
+ N0.getOperand(0));
+ }
+
// fold (xor (and x, y), y) -> (and (not x), y)
if (N0Opcode == ISD::AND && N0.hasOneUse() && N0->getOperand(1) == N1) {
SDValue X = N0.getOperand(0);
@@ -7051,6 +7214,9 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
if (SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
+ if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N))
+ return Combined;
+
return SDValue();
}
@@ -7567,8 +7733,9 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
if (VT.isVector())
ExtVT = EVT::getVectorVT(*DAG.getContext(),
ExtVT, VT.getVectorNumElements());
- if ((!LegalOperations ||
- TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT)))
+ if (!LegalOperations ||
+ TLI.getOperationAction(ISD::SIGN_EXTEND_INREG, ExtVT) ==
+ TargetLowering::Legal)
return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
N0.getOperand(0), DAG.getValueType(ExtVT));
}
@@ -7776,26 +7943,40 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
}
}
- // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2)))
- // TODO - support non-uniform vector shift amounts.
if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
N0.getOperand(0).getOpcode() == ISD::SRL) {
- if (auto N001C = isConstOrConstSplat(N0.getOperand(0).getOperand(1))) {
+ SDValue InnerShift = N0.getOperand(0);
+ // TODO - support non-uniform vector shift amounts.
+ if (auto *N001C = isConstOrConstSplat(InnerShift.getOperand(1))) {
uint64_t c1 = N001C->getZExtValue();
uint64_t c2 = N1C->getZExtValue();
- EVT InnerShiftVT = N0.getOperand(0).getValueType();
- EVT ShiftCountVT = N0.getOperand(0).getOperand(1).getValueType();
+ EVT InnerShiftVT = InnerShift.getValueType();
+ EVT ShiftAmtVT = InnerShift.getOperand(1).getValueType();
uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
+ // srl (trunc (srl x, c1)), c2 --> 0 or (trunc (srl x, (add c1, c2)))
// This is only valid if the OpSizeInBits + c1 = size of inner shift.
if (c1 + OpSizeInBits == InnerShiftSize) {
- SDLoc DL(N0);
+ SDLoc DL(N);
if (c1 + c2 >= InnerShiftSize)
return DAG.getConstant(0, DL, VT);
- return DAG.getNode(ISD::TRUNCATE, DL, VT,
- DAG.getNode(ISD::SRL, DL, InnerShiftVT,
- N0.getOperand(0).getOperand(0),
- DAG.getConstant(c1 + c2, DL,
- ShiftCountVT)));
+ SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
+ SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
+ InnerShift.getOperand(0), NewShiftAmt);
+ return DAG.getNode(ISD::TRUNCATE, DL, VT, NewShift);
+ }
+ // In the more general case, we can clear the high bits after the shift:
+ // srl (trunc (srl x, c1)), c2 --> trunc (and (srl x, (c1+c2)), Mask)
+ if (N0.hasOneUse() && InnerShift.hasOneUse() &&
+ c1 + c2 < InnerShiftSize) {
+ SDLoc DL(N);
+ SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
+ SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
+ InnerShift.getOperand(0), NewShiftAmt);
+ SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(InnerShiftSize,
+ OpSizeInBits - c2),
+ DL, InnerShiftVT);
+ SDValue And = DAG.getNode(ISD::AND, DL, InnerShiftVT, NewShift, Mask);
+ return DAG.getNode(ISD::TRUNCATE, DL, VT, And);
}
}
}
@@ -8585,6 +8766,10 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
if (ISD::isBuildVectorAllZeros(Mask.getNode()))
return Chain;
+ // Try transforming N to an indexed store.
+ if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
+ return SDValue(N, 0);
+
return SDValue();
}
@@ -8609,6 +8794,10 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) {
if (ISD::isBuildVectorAllZeros(Mask.getNode()))
return CombineTo(N, MLD->getPassThru(), MLD->getChain());
+ // Try transforming N to an indexed load.
+ if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
+ return SDValue(N, 0);
+
return SDValue();
}
@@ -9108,6 +9297,8 @@ SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
return SDValue();
+ assert(!DstVT.isScalableVector() && "Unexpected scalable vector type");
+
SDLoc DL(N);
const unsigned NumSplits =
DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
@@ -9125,8 +9316,7 @@ SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align,
LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
- BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr,
- DAG.getConstant(Stride, DL, BasePtr.getValueType()));
+ BasePtr = DAG.getMemBasePlusOffset(BasePtr, Stride, DL);
Loads.push_back(SplitLoad.getValue(0));
Chains.push_back(SplitLoad.getValue(1));
@@ -9365,11 +9555,10 @@ static SDValue tryToFoldExtOfMaskedLoad(SelectionDAG &DAG,
SDLoc dl(Ld);
SDValue PassThru = DAG.getNode(ExtOpc, dl, VT, Ld->getPassThru());
- SDValue NewLoad = DAG.getMaskedLoad(VT, dl, Ld->getChain(),
- Ld->getBasePtr(), Ld->getMask(),
- PassThru, Ld->getMemoryVT(),
- Ld->getMemOperand(), ExtLoadType,
- Ld->isExpandingLoad());
+ SDValue NewLoad = DAG.getMaskedLoad(
+ VT, dl, Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(), Ld->getMask(),
+ PassThru, Ld->getMemoryVT(), Ld->getMemOperand(), Ld->getAddressingMode(),
+ ExtLoadType, Ld->isExpandingLoad());
DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), SDValue(NewLoad.getNode(), 1));
return NewLoad;
}
@@ -9397,10 +9586,15 @@ static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG,
// sext i1 (setgt iN X, -1) --> sra (not X), (N - 1)
// zext i1 (setgt iN X, -1) --> srl (not X), (N - 1)
SDLoc DL(N);
- SDValue NotX = DAG.getNOT(DL, X, VT);
- SDValue ShiftAmount = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
- auto ShiftOpcode = N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL;
- return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount);
+ unsigned ShCt = VT.getSizeInBits() - 1;
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
+ SDValue NotX = DAG.getNOT(DL, X, VT);
+ SDValue ShiftAmount = DAG.getConstant(ShCt, DL, VT);
+ auto ShiftOpcode =
+ N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL;
+ return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount);
+ }
}
return SDValue();
}
@@ -9671,6 +9865,29 @@ static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
return (Known.Zero | 1).isAllOnesValue();
}
+/// Given an extending node with a pop-count operand, if the target does not
+/// support a pop-count in the narrow source type but does support it in the
+/// destination type, widen the pop-count to the destination type.
+static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG) {
+ assert((Extend->getOpcode() == ISD::ZERO_EXTEND ||
+ Extend->getOpcode() == ISD::ANY_EXTEND) && "Expected extend op");
+
+ SDValue CtPop = Extend->getOperand(0);
+ if (CtPop.getOpcode() != ISD::CTPOP || !CtPop.hasOneUse())
+ return SDValue();
+
+ EVT VT = Extend->getValueType(0);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (TLI.isOperationLegalOrCustom(ISD::CTPOP, CtPop.getValueType()) ||
+ !TLI.isOperationLegalOrCustom(ISD::CTPOP, VT))
+ return SDValue();
+
+ // zext (ctpop X) --> ctpop (zext X)
+ SDLoc DL(Extend);
+ SDValue NewZext = DAG.getZExtOrTrunc(CtPop.getOperand(0), DL, VT);
+ return DAG.getNode(ISD::CTPOP, DL, VT, NewZext);
+}
+
SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
@@ -9921,6 +10138,9 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
return NewVSel;
+ if (SDValue NewCtPop = widenCtPop(N, DAG))
+ return NewCtPop;
+
return SDValue();
}
@@ -10067,6 +10287,9 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
return SCC;
}
+ if (SDValue NewCtPop = widenCtPop(N, DAG))
+ return NewCtPop;
+
return SDValue();
}
@@ -10273,17 +10496,14 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
if (DAG.getDataLayout().isBigEndian())
ShAmt = AdjustBigEndianShift(ShAmt);
- EVT PtrType = N0.getOperand(1).getValueType();
uint64_t PtrOff = ShAmt / 8;
unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
SDLoc DL(LN0);
// The original load itself didn't wrap, so an offset within it doesn't.
SDNodeFlags Flags;
Flags.setNoUnsignedWrap(true);
- SDValue NewPtr = DAG.getNode(ISD::ADD, DL,
- PtrType, LN0->getBasePtr(),
- DAG.getConstant(PtrOff, DL, PtrType),
- Flags);
+ SDValue NewPtr =
+ DAG.getMemBasePlusOffset(LN0->getBasePtr(), PtrOff, DL, Flags);
AddToWorklist(NewPtr.getNode());
SDValue Load;
@@ -10735,16 +10955,16 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
// e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
SDValue VecSrc = N0.getOperand(0);
- EVT SrcVT = VecSrc.getValueType();
- if (SrcVT.isVector() && SrcVT.getScalarType() == VT &&
+ EVT VecSrcVT = VecSrc.getValueType();
+ if (VecSrcVT.isVector() && VecSrcVT.getScalarType() == VT &&
(!LegalOperations ||
- TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, SrcVT))) {
+ TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecSrcVT))) {
SDLoc SL(N);
EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
- unsigned Idx = isLE ? 0 : SrcVT.getVectorNumElements() - 1;
- return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT,
- VecSrc, DAG.getConstant(Idx, SL, IdxVT));
+ unsigned Idx = isLE ? 0 : VecSrcVT.getVectorNumElements() - 1;
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT, VecSrc,
+ DAG.getConstant(Idx, SL, IdxVT));
}
}
@@ -11299,11 +11519,11 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
const TargetOptions &Options = DAG.getTarget().Options;
// Floating-point multiply-add with intermediate rounding.
- bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
+ bool HasFMAD = (LegalOperations && TLI.isFMADLegalForFAddFSub(DAG, N));
// Floating-point multiply-add without intermediate rounding.
bool HasFMA =
- TLI.isFMAFasterThanFMulAndFAdd(VT) &&
+ TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
(!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
// No valid opcode, do not combine.
@@ -11359,7 +11579,8 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
if (N0.getOpcode() == ISD::FP_EXTEND) {
SDValue N00 = N0.getOperand(0);
if (isContractableFMUL(N00) &&
- TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
+ TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
+ N00.getValueType())) {
return DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FP_EXTEND, SL, VT,
N00.getOperand(0)),
@@ -11373,7 +11594,8 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
if (N1.getOpcode() == ISD::FP_EXTEND) {
SDValue N10 = N1.getOperand(0);
if (isContractableFMUL(N10) &&
- TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
+ TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
+ N10.getValueType())) {
return DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FP_EXTEND, SL, VT,
N10.getOperand(0)),
@@ -11427,7 +11649,8 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
if (N02.getOpcode() == ISD::FP_EXTEND) {
SDValue N020 = N02.getOperand(0);
if (isContractableFMUL(N020) &&
- TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N020.getValueType())) {
+ TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
+ N020.getValueType())) {
return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
N020.getOperand(0), N020.getOperand(1),
N1, Flags);
@@ -11456,7 +11679,8 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
if (N00.getOpcode() == PreferredFusedOpcode) {
SDValue N002 = N00.getOperand(2);
if (isContractableFMUL(N002) &&
- TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
+ TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
+ N00.getValueType())) {
return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
N002.getOperand(0), N002.getOperand(1),
N1, Flags);
@@ -11471,7 +11695,8 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
if (N12.getOpcode() == ISD::FP_EXTEND) {
SDValue N120 = N12.getOperand(0);
if (isContractableFMUL(N120) &&
- TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N120.getValueType())) {
+ TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
+ N120.getValueType())) {
return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
N120.getOperand(0), N120.getOperand(1),
N0, Flags);
@@ -11489,7 +11714,8 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
if (N10.getOpcode() == PreferredFusedOpcode) {
SDValue N102 = N10.getOperand(2);
if (isContractableFMUL(N102) &&
- TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
+ TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
+ N10.getValueType())) {
return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
N102.getOperand(0), N102.getOperand(1),
N0, Flags);
@@ -11510,11 +11736,11 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
const TargetOptions &Options = DAG.getTarget().Options;
// Floating-point multiply-add with intermediate rounding.
- bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
+ bool HasFMAD = (LegalOperations && TLI.isFMADLegalForFAddFSub(DAG, N));
// Floating-point multiply-add without intermediate rounding.
bool HasFMA =
- TLI.isFMAFasterThanFMulAndFAdd(VT) &&
+ TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
(!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
// No valid opcode, do not combine.
@@ -11579,7 +11805,8 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
if (N0.getOpcode() == ISD::FP_EXTEND) {
SDValue N00 = N0.getOperand(0);
if (isContractableFMUL(N00) &&
- TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
+ TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
+ N00.getValueType())) {
return DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FP_EXTEND, SL, VT,
N00.getOperand(0)),
@@ -11595,7 +11822,8 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
if (N1.getOpcode() == ISD::FP_EXTEND) {
SDValue N10 = N1.getOperand(0);
if (isContractableFMUL(N10) &&
- TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
+ TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
+ N10.getValueType())) {
return DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FNEG, SL, VT,
DAG.getNode(ISD::FP_EXTEND, SL, VT,
@@ -11617,7 +11845,8 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
if (N00.getOpcode() == ISD::FNEG) {
SDValue N000 = N00.getOperand(0);
if (isContractableFMUL(N000) &&
- TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
+ TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
+ N00.getValueType())) {
return DAG.getNode(ISD::FNEG, SL, VT,
DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FP_EXTEND, SL, VT,
@@ -11640,7 +11869,8 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
if (N00.getOpcode() == ISD::FP_EXTEND) {
SDValue N000 = N00.getOperand(0);
if (isContractableFMUL(N000) &&
- TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N000.getValueType())) {
+ TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
+ N000.getValueType())) {
return DAG.getNode(ISD::FNEG, SL, VT,
DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FP_EXTEND, SL, VT,
@@ -11671,7 +11901,8 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
// fold (fsub x, (fma y, z, (fmul u, v)))
// -> (fma (fneg y), z, (fma (fneg u), v, x))
if (CanFuse && N1.getOpcode() == PreferredFusedOpcode &&
- isContractableFMUL(N1.getOperand(2))) {
+ isContractableFMUL(N1.getOperand(2)) &&
+ N1->hasOneUse()) {
SDValue N20 = N1.getOperand(2).getOperand(0);
SDValue N21 = N1.getOperand(2).getOperand(1);
return DAG.getNode(PreferredFusedOpcode, SL, VT,
@@ -11686,12 +11917,14 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
// fold (fsub (fma x, y, (fpext (fmul u, v))), z)
// -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
- if (N0.getOpcode() == PreferredFusedOpcode) {
+ if (N0.getOpcode() == PreferredFusedOpcode &&
+ N0->hasOneUse()) {
SDValue N02 = N0.getOperand(2);
if (N02.getOpcode() == ISD::FP_EXTEND) {
SDValue N020 = N02.getOperand(0);
if (isContractableFMUL(N020) &&
- TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N020.getValueType())) {
+ TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
+ N020.getValueType())) {
return DAG.getNode(PreferredFusedOpcode, SL, VT,
N0.getOperand(0), N0.getOperand(1),
DAG.getNode(PreferredFusedOpcode, SL, VT,
@@ -11716,7 +11949,8 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
if (N00.getOpcode() == PreferredFusedOpcode) {
SDValue N002 = N00.getOperand(2);
if (isContractableFMUL(N002) &&
- TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
+ TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
+ N00.getValueType())) {
return DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FP_EXTEND, SL, VT,
N00.getOperand(0)),
@@ -11736,10 +11970,12 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
// fold (fsub x, (fma y, z, (fpext (fmul u, v))))
// -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
if (N1.getOpcode() == PreferredFusedOpcode &&
- N1.getOperand(2).getOpcode() == ISD::FP_EXTEND) {
+ N1.getOperand(2).getOpcode() == ISD::FP_EXTEND &&
+ N1->hasOneUse()) {
SDValue N120 = N1.getOperand(2).getOperand(0);
if (isContractableFMUL(N120) &&
- TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N120.getValueType())) {
+ TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
+ N120.getValueType())) {
SDValue N1200 = N120.getOperand(0);
SDValue N1201 = N120.getOperand(1);
return DAG.getNode(PreferredFusedOpcode, SL, VT,
@@ -11768,7 +12004,8 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
SDValue N101 = CvtSrc.getOperand(1);
SDValue N102 = CvtSrc.getOperand(2);
if (isContractableFMUL(N102) &&
- TLI.isFPExtFoldable(PreferredFusedOpcode, VT, CvtSrc.getValueType())) {
+ TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
+ CvtSrc.getValueType())) {
SDValue N1020 = N102.getOperand(0);
SDValue N1021 = N102.getOperand(1);
return DAG.getNode(PreferredFusedOpcode, SL, VT,
@@ -11812,7 +12049,7 @@ SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
// Floating-point multiply-add without intermediate rounding.
bool HasFMA =
(Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
- TLI.isFMAFasterThanFMulAndFAdd(VT) &&
+ TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
(!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
// Floating-point multiply-add with intermediate rounding. This can result
@@ -12402,6 +12639,15 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
}
}
+ // fold ((fma (fneg X), Y, (fneg Z)) -> fneg (fma X, Y, Z))
+ // fold ((fma X, (fneg Y), (fneg Z)) -> fneg (fma X, Y, Z))
+ if (!TLI.isFNegFree(VT) &&
+ TLI.isNegatibleForFree(SDValue(N, 0), DAG, LegalOperations,
+ ForCodeSize) == 2)
+ return DAG.getNode(ISD::FNEG, DL, VT,
+ TLI.getNegatedExpression(SDValue(N, 0), DAG,
+ LegalOperations, ForCodeSize),
+ Flags);
return SDValue();
}
@@ -12738,7 +12984,7 @@ SDValue DAGCombiner::visitFPOW(SDNode *N) {
// Assume that libcalls are the smallest code.
// TODO: This restriction should probably be lifted for vectors.
- if (DAG.getMachineFunction().getFunction().hasOptSize())
+ if (ForCodeSize)
return SDValue();
// pow(X, 0.25) --> sqrt(sqrt(X))
@@ -13135,6 +13381,16 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
if (TLI.isNegatibleForFree(N0, DAG, LegalOperations, ForCodeSize))
return TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize);
+ // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0 FIXME: This is
+ // duplicated in isNegatibleForFree, but isNegatibleForFree doesn't know it
+ // was called from a context with a nsz flag if the input fsub does not.
+ if (N0.getOpcode() == ISD::FSUB &&
+ (DAG.getTarget().Options.NoSignedZerosFPMath ||
+ N->getFlags().hasNoSignedZeros()) && N0.hasOneUse()) {
+ return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0.getOperand(1),
+ N0.getOperand(0), N->getFlags());
+ }
+
// Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading
// constant pool values.
if (!TLI.isFNegFree(VT) &&
@@ -13168,9 +13424,8 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
if (CFP1) {
APFloat CVal = CFP1->getValueAPF();
CVal.changeSign();
- if (Level >= AfterLegalizeDAG &&
- (TLI.isFPImmLegal(CVal, VT, ForCodeSize) ||
- TLI.isOperationLegal(ISD::ConstantFP, VT)))
+ if (LegalDAG && (TLI.isFPImmLegal(CVal, VT, ForCodeSize) ||
+ TLI.isOperationLegal(ISD::ConstantFP, VT)))
return DAG.getNode(
ISD::FMUL, SDLoc(N), VT, N0.getOperand(0),
DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0.getOperand(1)),
@@ -13423,12 +13678,22 @@ static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
EVT VT;
unsigned AS;
- if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
return false;
VT = LD->getMemoryVT();
AS = LD->getAddressSpace();
- } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
+ } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
+ if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
+ return false;
+ VT = ST->getMemoryVT();
+ AS = ST->getAddressSpace();
+ } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(Use)) {
+ if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
+ return false;
+ VT = LD->getMemoryVT();
+ AS = LD->getAddressSpace();
+ } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(Use)) {
if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
return false;
VT = ST->getMemoryVT();
@@ -13462,38 +13727,64 @@ static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
VT.getTypeForEVT(*DAG.getContext()), AS);
}
-/// Try turning a load/store into a pre-indexed load/store when the base
-/// pointer is an add or subtract and it has other uses besides the load/store.
-/// After the transformation, the new indexed load/store has effectively folded
-/// the add/subtract in and all of its other uses are redirected to the
-/// new load/store.
-bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
- if (Level < AfterLegalizeDAG)
- return false;
-
- bool isLoad = true;
- SDValue Ptr;
- EVT VT;
- if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+static bool getCombineLoadStoreParts(SDNode *N, unsigned Inc, unsigned Dec,
+ bool &IsLoad, bool &IsMasked, SDValue &Ptr,
+ const TargetLowering &TLI) {
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
if (LD->isIndexed())
return false;
- VT = LD->getMemoryVT();
- if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) &&
- !TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT))
+ EVT VT = LD->getMemoryVT();
+ if (!TLI.isIndexedLoadLegal(Inc, VT) && !TLI.isIndexedLoadLegal(Dec, VT))
return false;
Ptr = LD->getBasePtr();
- } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+ } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
if (ST->isIndexed())
return false;
- VT = ST->getMemoryVT();
- if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) &&
- !TLI.isIndexedStoreLegal(ISD::PRE_DEC, VT))
+ EVT VT = ST->getMemoryVT();
+ if (!TLI.isIndexedStoreLegal(Inc, VT) && !TLI.isIndexedStoreLegal(Dec, VT))
return false;
Ptr = ST->getBasePtr();
- isLoad = false;
+ IsLoad = false;
+ } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
+ if (LD->isIndexed())
+ return false;
+ EVT VT = LD->getMemoryVT();
+ if (!TLI.isIndexedMaskedLoadLegal(Inc, VT) &&
+ !TLI.isIndexedMaskedLoadLegal(Dec, VT))
+ return false;
+ Ptr = LD->getBasePtr();
+ IsMasked = true;
+ } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(N)) {
+ if (ST->isIndexed())
+ return false;
+ EVT VT = ST->getMemoryVT();
+ if (!TLI.isIndexedMaskedStoreLegal(Inc, VT) &&
+ !TLI.isIndexedMaskedStoreLegal(Dec, VT))
+ return false;
+ Ptr = ST->getBasePtr();
+ IsLoad = false;
+ IsMasked = true;
} else {
return false;
}
+ return true;
+}
+
+/// Try turning a load/store into a pre-indexed load/store when the base
+/// pointer is an add or subtract and it has other uses besides the load/store.
+/// After the transformation, the new indexed load/store has effectively folded
+/// the add/subtract in and all of its other uses are redirected to the
+/// new load/store.
+bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
+ if (Level < AfterLegalizeDAG)
+ return false;
+
+ bool IsLoad = true;
+ bool IsMasked = false;
+ SDValue Ptr;
+ if (!getCombineLoadStoreParts(N, ISD::PRE_INC, ISD::PRE_DEC, IsLoad, IsMasked,
+ Ptr, TLI))
+ return false;
// If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
// out. There is no reason to make this a preinc/predec.
@@ -13535,8 +13826,9 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
return false;
// Check #2.
- if (!isLoad) {
- SDValue Val = cast<StoreSDNode>(N)->getValue();
+ if (!IsLoad) {
+ SDValue Val = IsMasked ? cast<MaskedStoreSDNode>(N)->getValue()
+ : cast<StoreSDNode>(N)->getValue();
// Would require a copy.
if (Val == BasePtr)
@@ -13612,18 +13904,26 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
return false;
SDValue Result;
- if (isLoad)
- Result = DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
- BasePtr, Offset, AM);
- else
- Result = DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
- BasePtr, Offset, AM);
+ if (!IsMasked) {
+ if (IsLoad)
+ Result = DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
+ else
+ Result =
+ DAG.getIndexedStore(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
+ } else {
+ if (IsLoad)
+ Result = DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
+ Offset, AM);
+ else
+ Result = DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N), BasePtr,
+ Offset, AM);
+ }
++PreIndexedNodes;
++NodesCombined;
LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: ";
Result.getNode()->dump(&DAG); dbgs() << '\n');
WorklistRemover DeadNodes(*this);
- if (isLoad) {
+ if (IsLoad) {
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
} else {
@@ -13677,7 +13977,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
// We can now generate the new expression.
SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
- SDValue NewOp2 = Result.getValue(isLoad ? 1 : 0);
+ SDValue NewOp2 = Result.getValue(IsLoad ? 1 : 0);
SDValue NewUse = DAG.getNode(Opcode,
DL,
@@ -13687,7 +13987,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
}
// Replace the uses of Ptr with uses of the updated base value.
- DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0));
+ DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(IsLoad ? 1 : 0));
deleteAndRecombine(Ptr.getNode());
AddToWorklist(Result.getNode());
@@ -13702,29 +14002,12 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
if (Level < AfterLegalizeDAG)
return false;
- bool isLoad = true;
+ bool IsLoad = true;
+ bool IsMasked = false;
SDValue Ptr;
- EVT VT;
- if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
- if (LD->isIndexed())
- return false;
- VT = LD->getMemoryVT();
- if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) &&
- !TLI.isIndexedLoadLegal(ISD::POST_DEC, VT))
- return false;
- Ptr = LD->getBasePtr();
- } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
- if (ST->isIndexed())
- return false;
- VT = ST->getMemoryVT();
- if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) &&
- !TLI.isIndexedStoreLegal(ISD::POST_DEC, VT))
- return false;
- Ptr = ST->getBasePtr();
- isLoad = false;
- } else {
+ if (!getCombineLoadStoreParts(N, ISD::POST_INC, ISD::POST_DEC, IsLoad, IsMasked,
+ Ptr, TLI))
return false;
- }
if (Ptr.getNode()->hasOneUse())
return false;
@@ -13760,7 +14043,7 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
// If all the uses are load / store addresses, then don't do the
// transformation.
- if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){
+ if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB) {
bool RealUse = false;
for (SDNode *UseUse : Use->uses()) {
if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI))
@@ -13786,18 +14069,24 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
Worklist.push_back(Op);
if (!SDNode::hasPredecessorHelper(N, Visited, Worklist) &&
!SDNode::hasPredecessorHelper(Op, Visited, Worklist)) {
- SDValue Result = isLoad
- ? DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
- BasePtr, Offset, AM)
- : DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
- BasePtr, Offset, AM);
+ SDValue Result;
+ if (!IsMasked)
+ Result = IsLoad ? DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
+ Offset, AM)
+ : DAG.getIndexedStore(SDValue(N, 0), SDLoc(N),
+ BasePtr, Offset, AM);
+ else
+ Result = IsLoad ? DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N),
+ BasePtr, Offset, AM)
+ : DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N),
+ BasePtr, Offset, AM);
++PostIndexedNodes;
++NodesCombined;
LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG);
dbgs() << "\nWith: "; Result.getNode()->dump(&DAG);
dbgs() << '\n');
WorklistRemover DeadNodes(*this);
- if (isLoad) {
+ if (IsLoad) {
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
} else {
@@ -13809,7 +14098,7 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
// Replace the uses of Use with uses of the updated base value.
DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
- Result.getValue(isLoad ? 1 : 0));
+ Result.getValue(IsLoad ? 1 : 0));
deleteAndRecombine(Op);
return true;
}
@@ -13923,8 +14212,8 @@ SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
// the stored value). With Offset=n (for n > 0) the loaded value starts at the
// n:th least significant byte of the stored value.
if (DAG.getDataLayout().isBigEndian())
- Offset = (STMemType.getStoreSizeInBits() -
- LDMemType.getStoreSizeInBits()) / 8 - Offset;
+ Offset = ((int64_t)STMemType.getStoreSizeInBits() -
+ (int64_t)LDMemType.getStoreSizeInBits()) / 8 - Offset;
// Check that the stored value cover all bits that are loaded.
bool STCoversLD =
@@ -14066,7 +14355,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
return V;
// Try to infer better alignment information than the load already has.
- if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
+ if (OptLevel != CodeGenOpt::None && LD->isUnindexed() && !LD->isAtomic()) {
if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
if (Align > LD->getAlignment() && LD->getSrcValueOffset() % Align == 0) {
SDValue NewLoad = DAG.getExtLoad(
@@ -14786,8 +15075,7 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
SDValue Ptr = St->getBasePtr();
if (StOffset) {
SDLoc DL(IVal);
- Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(),
- Ptr, DAG.getConstant(StOffset, DL, Ptr.getValueType()));
+ Ptr = DAG.getMemBasePlusOffset(Ptr, StOffset, DL);
NewAlign = MinAlign(NewAlign, StOffset);
}
@@ -14898,10 +15186,7 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
if (NewAlign < DAG.getDataLayout().getABITypeAlignment(NewVTTy))
return SDValue();
- SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LD),
- Ptr.getValueType(), Ptr,
- DAG.getConstant(PtrOff, SDLoc(LD),
- Ptr.getValueType()));
+ SDValue NewPtr = DAG.getMemBasePlusOffset(Ptr, PtrOff, SDLoc(LD));
SDValue NewLD =
DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
@@ -15081,7 +15366,7 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
// The latest Node in the DAG.
SDLoc DL(StoreNodes[0].MemNode);
- int64_t ElementSizeBits = MemVT.getStoreSizeInBits();
+ TypeSize ElementSizeBits = MemVT.getStoreSizeInBits();
unsigned SizeInBits = NumStores * ElementSizeBits;
unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
@@ -15466,7 +15751,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
Attribute::NoImplicitFloat);
// This function cannot currently deal with non-byte-sized memory sizes.
- if (ElementSizeBytes * 8 != MemVT.getSizeInBits())
+ if (ElementSizeBytes * 8 != (int64_t)MemVT.getSizeInBits())
return false;
if (!MemVT.isSimple())
@@ -16015,6 +16300,9 @@ SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
if (Value.getOpcode() == ISD::TargetConstantFP)
return SDValue();
+ if (!ISD::isNormalStore(ST))
+ return SDValue();
+
SDLoc DL(ST);
SDValue Chain = ST->getChain();
@@ -16075,8 +16363,7 @@ SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
ST->getAlignment(), MMOFlags, AAInfo);
- Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
- DAG.getConstant(4, DL, Ptr.getValueType()));
+ Ptr = DAG.getMemBasePlusOffset(Ptr, 4, DL);
Alignment = MinAlign(Alignment, 4U);
SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
ST->getPointerInfo().getWithOffset(4),
@@ -16111,8 +16398,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT,
DAG, *ST->getMemOperand())) {
return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
- ST->getPointerInfo(), ST->getAlignment(),
- ST->getMemOperand()->getFlags(), ST->getAAInfo());
+ ST->getMemOperand());
}
}
@@ -16121,7 +16407,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
return Chain;
// Try to infer better alignment information than the store already has.
- if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {
+ if (OptLevel != CodeGenOpt::None && ST->isUnindexed() && !ST->isAtomic()) {
if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
if (Align > ST->getAlignment() && ST->getSrcValueOffset() % Align == 0) {
SDValue NewStore =
@@ -16451,9 +16737,7 @@ SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
// Lower value store.
SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
ST->getAlignment(), MMOFlags, AAInfo);
- Ptr =
- DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
- DAG.getConstant(HalfValBitSize / 8, DL, Ptr.getValueType()));
+ Ptr = DAG.getMemBasePlusOffset(Ptr, HalfValBitSize / 8, DL);
// Higher value store.
SDValue St1 =
DAG.getStore(St0, DL, Hi, Ptr,
@@ -16464,11 +16748,15 @@ SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
/// Convert a disguised subvector insertion into a shuffle:
SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
+ assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&
+ "Expected extract_vector_elt");
SDValue InsertVal = N->getOperand(1);
SDValue Vec = N->getOperand(0);
- // (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N), InsIndex)
- // --> (vector_shuffle X, Y)
+ // (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N),
+ // InsIndex)
+ // --> (vector_shuffle X, Y) and variations where shuffle operands may be
+ // CONCAT_VECTORS.
if (Vec.getOpcode() == ISD::VECTOR_SHUFFLE && Vec.hasOneUse() &&
InsertVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
isa<ConstantSDNode>(InsertVal.getOperand(1))) {
@@ -16481,18 +16769,47 @@ SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
// Vec's operand 0 is using indices from 0 to N-1 and
// operand 1 from N to 2N - 1, where N is the number of
// elements in the vectors.
- int XOffset = -1;
- if (InsertVal.getOperand(0) == X) {
- XOffset = 0;
- } else if (InsertVal.getOperand(0) == Y) {
- XOffset = X.getValueType().getVectorNumElements();
+ SDValue InsertVal0 = InsertVal.getOperand(0);
+ int ElementOffset = -1;
+
+ // We explore the inputs of the shuffle in order to see if we find the
+ // source of the extract_vector_elt. If so, we can use it to modify the
+ // shuffle rather than perform an insert_vector_elt.
+ SmallVector<std::pair<int, SDValue>, 8> ArgWorkList;
+ ArgWorkList.emplace_back(Mask.size(), Y);
+ ArgWorkList.emplace_back(0, X);
+
+ while (!ArgWorkList.empty()) {
+ int ArgOffset;
+ SDValue ArgVal;
+ std::tie(ArgOffset, ArgVal) = ArgWorkList.pop_back_val();
+
+ if (ArgVal == InsertVal0) {
+ ElementOffset = ArgOffset;
+ break;
+ }
+
+ // Peek through concat_vector.
+ if (ArgVal.getOpcode() == ISD::CONCAT_VECTORS) {
+ int CurrentArgOffset =
+ ArgOffset + ArgVal.getValueType().getVectorNumElements();
+ int Step = ArgVal.getOperand(0).getValueType().getVectorNumElements();
+ for (SDValue Op : reverse(ArgVal->ops())) {
+ CurrentArgOffset -= Step;
+ ArgWorkList.emplace_back(CurrentArgOffset, Op);
+ }
+
+ // Make sure we went through all the elements and did not screw up index
+ // computation.
+ assert(CurrentArgOffset == ArgOffset);
+ }
}
- if (XOffset != -1) {
+ if (ElementOffset != -1) {
SmallVector<int, 16> NewMask(Mask.begin(), Mask.end());
auto *ExtrIndex = cast<ConstantSDNode>(InsertVal.getOperand(1));
- NewMask[InsIndex] = XOffset + ExtrIndex->getZExtValue();
+ NewMask[InsIndex] = ElementOffset + ExtrIndex->getZExtValue();
assert(NewMask[InsIndex] <
(int)(2 * Vec.getValueType().getVectorNumElements()) &&
NewMask[InsIndex] >= 0 && "NewMask[InsIndex] is out of bound");
@@ -16562,13 +16879,14 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
SDValue EltNo = N->getOperand(2);
SDLoc DL(N);
- // If the inserted element is an UNDEF, just use the input vector.
- if (InVal.isUndef())
- return InVec;
-
EVT VT = InVec.getValueType();
unsigned NumElts = VT.getVectorNumElements();
+ // Insert into out-of-bounds element is undefined.
+ if (auto *IndexC = dyn_cast<ConstantSDNode>(EltNo))
+ if (IndexC->getZExtValue() >= VT.getVectorNumElements())
+ return DAG.getUNDEF(VT);
+
// Remove redundant insertions:
// (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
@@ -16683,7 +17001,7 @@ SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
// operand can't represent this new access since the offset is variable.
MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
}
- NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, NewPtr, Offset);
+ NewPtr = DAG.getMemBasePlusOffset(NewPtr, Offset, DL);
// The replacement we need to do here is a little tricky: we need to
// replace an extractelement of a load with a load.
@@ -16723,8 +17041,7 @@ SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
AddToWorklist(EVE);
// Since we're explicitly calling ReplaceAllUses, add the new node to the
// worklist explicitly as well.
- AddUsersToWorklist(Load.getNode()); // Add users too
- AddToWorklist(Load.getNode());
+ AddToWorklistWithUsers(Load.getNode());
++OpsNarrowed;
return SDValue(EVE, 0);
}
@@ -18239,22 +18556,61 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
return DAG.getBitcast(NVT, NewExtract);
}
}
- // TODO - handle (DestNumElts % SrcNumElts) == 0
+ if ((DestNumElts % SrcNumElts) == 0) {
+ unsigned DestSrcRatio = DestNumElts / SrcNumElts;
+ if ((NVT.getVectorNumElements() % DestSrcRatio) == 0) {
+ unsigned NewExtNumElts = NVT.getVectorNumElements() / DestSrcRatio;
+ EVT NewExtVT = EVT::getVectorVT(*DAG.getContext(),
+ SrcVT.getScalarType(), NewExtNumElts);
+ if ((N->getConstantOperandVal(1) % DestSrcRatio) == 0 &&
+ TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) {
+ unsigned IndexValScaled = N->getConstantOperandVal(1) / DestSrcRatio;
+ SDLoc DL(N);
+ SDValue NewIndex = DAG.getIntPtrConstant(IndexValScaled, DL);
+ SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
+ V.getOperand(0), NewIndex);
+ return DAG.getBitcast(NVT, NewExtract);
+ }
+ }
+ }
}
- // Combine:
- // (extract_subvec (concat V1, V2, ...), i)
- // Into:
- // Vi if possible
- // Only operand 0 is checked as 'concat' assumes all inputs of the same
- // type.
- if (V.getOpcode() == ISD::CONCAT_VECTORS && isa<ConstantSDNode>(Index) &&
- V.getOperand(0).getValueType() == NVT) {
- unsigned Idx = N->getConstantOperandVal(1);
- unsigned NumElems = NVT.getVectorNumElements();
- assert((Idx % NumElems) == 0 &&
- "IDX in concat is not a multiple of the result vector length.");
- return V->getOperand(Idx / NumElems);
+ if (V.getOpcode() == ISD::CONCAT_VECTORS && isa<ConstantSDNode>(Index)) {
+ EVT ConcatSrcVT = V.getOperand(0).getValueType();
+ assert(ConcatSrcVT.getVectorElementType() == NVT.getVectorElementType() &&
+ "Concat and extract subvector do not change element type");
+
+ unsigned ExtIdx = N->getConstantOperandVal(1);
+ unsigned ExtNumElts = NVT.getVectorNumElements();
+ assert(ExtIdx % ExtNumElts == 0 &&
+ "Extract index is not a multiple of the input vector length.");
+
+ unsigned ConcatSrcNumElts = ConcatSrcVT.getVectorNumElements();
+ unsigned ConcatOpIdx = ExtIdx / ConcatSrcNumElts;
+
+ // If the concatenated source types match this extract, it's a direct
+ // simplification:
+ // extract_subvec (concat V1, V2, ...), i --> Vi
+ if (ConcatSrcNumElts == ExtNumElts)
+ return V.getOperand(ConcatOpIdx);
+
+ // If the concatenated source vectors are a multiple length of this extract,
+ // then extract a fraction of one of those source vectors directly from a
+ // concat operand. Example:
+ // v2i8 extract_subvec (v16i8 concat (v8i8 X), (v8i8 Y), 14 -->
+ // v2i8 extract_subvec v8i8 Y, 6
+ if (ConcatSrcNumElts % ExtNumElts == 0) {
+ SDLoc DL(N);
+ unsigned NewExtIdx = ExtIdx - ConcatOpIdx * ConcatSrcNumElts;
+ assert(NewExtIdx + ExtNumElts <= ConcatSrcNumElts &&
+ "Trying to extract from >1 concat operand?");
+ assert(NewExtIdx % ExtNumElts == 0 &&
+ "Extract index is not a multiple of the input vector length.");
+ MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
+ SDValue NewIndexC = DAG.getConstant(NewExtIdx, DL, IdxTy);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT,
+ V.getOperand(ConcatOpIdx), NewIndexC);
+ }
}
V = peekThroughBitcasts(V);
@@ -18962,6 +19318,30 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
return V;
}
+ // A shuffle of a concat of the same narrow vector can be reduced to use
+ // only low-half elements of a concat with undef:
+ // shuf (concat X, X), undef, Mask --> shuf (concat X, undef), undef, Mask'
+ if (N0.getOpcode() == ISD::CONCAT_VECTORS && N1.isUndef() &&
+ N0.getNumOperands() == 2 &&
+ N0.getOperand(0) == N0.getOperand(1)) {
+ int HalfNumElts = (int)NumElts / 2;
+ SmallVector<int, 8> NewMask;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int Idx = SVN->getMaskElt(i);
+ if (Idx >= HalfNumElts) {
+ assert(Idx < (int)NumElts && "Shuffle mask chooses undef op");
+ Idx -= HalfNumElts;
+ }
+ NewMask.push_back(Idx);
+ }
+ if (TLI.isShuffleMaskLegal(NewMask, VT)) {
+ SDValue UndefVec = DAG.getUNDEF(N0.getOperand(0).getValueType());
+ SDValue NewCat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
+ N0.getOperand(0), UndefVec);
+ return DAG.getVectorShuffle(VT, SDLoc(N), NewCat, N1, NewMask);
+ }
+ }
+
// Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
// BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT))
@@ -19446,8 +19826,10 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
int EltIdx = i / Split;
int SubIdx = i % Split;
SDValue Elt = RHS.getOperand(EltIdx);
+ // X & undef --> 0 (not undef). So this lane must be converted to choose
+ // from the zero constant vector (same as if the element had all 0-bits).
if (Elt.isUndef()) {
- Indices.push_back(-1);
+ Indices.push_back(i + NumSubElts);
continue;
}
@@ -19460,14 +19842,10 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
return SDValue();
// Extract the sub element from the constant bit mask.
- if (DAG.getDataLayout().isBigEndian()) {
- Bits.lshrInPlace((Split - SubIdx - 1) * NumSubBits);
- } else {
- Bits.lshrInPlace(SubIdx * NumSubBits);
- }
-
- if (Split > 1)
- Bits = Bits.trunc(NumSubBits);
+ if (DAG.getDataLayout().isBigEndian())
+ Bits = Bits.extractBits(NumSubBits, (Split - SubIdx - 1) * NumSubBits);
+ else
+ Bits = Bits.extractBits(NumSubBits, SubIdx * NumSubBits);
if (Bits.isAllOnesValue())
Indices.push_back(i);
@@ -19910,22 +20288,28 @@ SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
- SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
- SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
- AddToWorklist(Shift.getNode());
-
- if (XType.bitsGT(AType)) {
- Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
+ if (!TLI.shouldAvoidTransformToShift(XType, ShCt)) {
+ SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
+ SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
AddToWorklist(Shift.getNode());
- }
- if (CC == ISD::SETGT)
- Shift = DAG.getNOT(DL, Shift, AType);
+ if (XType.bitsGT(AType)) {
+ Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
+ AddToWorklist(Shift.getNode());
+ }
- return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
+ if (CC == ISD::SETGT)
+ Shift = DAG.getNOT(DL, Shift, AType);
+
+ return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
+ }
}
- SDValue ShiftAmt = DAG.getConstant(XType.getSizeInBits() - 1, DL, ShiftAmtTy);
+ unsigned ShCt = XType.getSizeInBits() - 1;
+ if (TLI.shouldAvoidTransformToShift(XType, ShCt))
+ return SDValue();
+
+ SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
AddToWorklist(Shift.getNode());
@@ -20035,31 +20419,29 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
// when the condition can be materialized as an all-ones register. Any
// single bit-test can be materialized as an all-ones register with
// shift-left and shift-right-arith.
- // TODO: The operation legality checks could be loosened to include "custom",
- // but that may cause regressions for targets that do not have shift
- // instructions.
if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
- N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2) &&
- TLI.isOperationLegal(ISD::SHL, VT) &&
- TLI.isOperationLegal(ISD::SRA, VT)) {
+ N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
SDValue AndLHS = N0->getOperand(0);
auto *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
// Shift the tested bit over the sign bit.
const APInt &AndMask = ConstAndRHS->getAPIntValue();
- SDValue ShlAmt =
- DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS),
- getShiftAmountTy(AndLHS.getValueType()));
- SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
-
- // Now arithmetic right shift it all the way over, so the result is either
- // all-ones, or zero.
- SDValue ShrAmt =
- DAG.getConstant(AndMask.getBitWidth() - 1, SDLoc(Shl),
- getShiftAmountTy(Shl.getValueType()));
- SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
-
- return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
+ unsigned ShCt = AndMask.getBitWidth() - 1;
+ if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
+ SDValue ShlAmt =
+ DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS),
+ getShiftAmountTy(AndLHS.getValueType()));
+ SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
+
+ // Now arithmetic right shift it all the way over, so the result is
+ // either all-ones, or zero.
+ SDValue ShrAmt =
+ DAG.getConstant(ShCt, SDLoc(Shl),
+ getShiftAmountTy(Shl.getValueType()));
+ SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
+
+ return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
+ }
}
}
@@ -20073,7 +20455,7 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
(!LegalOperations || TLI.isOperationLegal(ISD::SETCC, CmpOpVT))) {
if (Swap) {
- CC = ISD::getSetCCInverse(CC, CmpOpVT.isInteger());
+ CC = ISD::getSetCCInverse(CC, CmpOpVT);
std::swap(N2C, N3C);
}
@@ -20101,10 +20483,13 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
if (N2C->isOne())
return Temp;
+ unsigned ShCt = N2C->getAPIntValue().logBase2();
+ if (TLI.shouldAvoidTransformToShift(VT, ShCt))
+ return SDValue();
+
// shl setcc result by log2 n2c
return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp,
- DAG.getConstant(N2C->getAPIntValue().logBase2(),
- SDLoc(Temp),
+ DAG.getConstant(ShCt, SDLoc(Temp),
getShiftAmountTy(Temp.getValueType())));
}
@@ -20237,7 +20622,7 @@ SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) {
/// Result = N X_i + X_i (N - N A X_i)
SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op,
SDNodeFlags Flags) {
- if (Level >= AfterLegalizeDAG)
+ if (LegalDAG)
return SDValue();
// TODO: Handle half and/or extended types?
@@ -20376,7 +20761,7 @@ SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
/// Op can be zero.
SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
bool Reciprocal) {
- if (Level >= AfterLegalizeDAG)
+ if (LegalDAG)
return SDValue();
// TODO: Handle half and/or extended types?
@@ -20411,9 +20796,8 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
SDLoc DL(Op);
EVT CCVT = getSetCCResultType(VT);
ISD::NodeType SelOpcode = VT.isVector() ? ISD::VSELECT : ISD::SELECT;
- const Function &F = DAG.getMachineFunction().getFunction();
- Attribute Denorms = F.getFnAttribute("denormal-fp-math");
- if (Denorms.getValueAsString().equals("ieee")) {
+ DenormalMode DenormMode = DAG.getDenormalMode(VT);
+ if (DenormMode == DenormalMode::IEEE) {
// fabs(X) < SmallestNormal ? 0.0 : Est
const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
index 6d7260d7aee5..2bec8613e79c 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -410,8 +410,8 @@ unsigned FastISel::materializeConstant(const Value *V, MVT VT) {
else if (isa<ConstantPointerNull>(V))
// Translate this as an integer zero so that it can be
// local-CSE'd with actual integer zeros.
- Reg = getRegForValue(
- Constant::getNullValue(DL.getIntPtrType(V->getContext())));
+ Reg =
+ getRegForValue(Constant::getNullValue(DL.getIntPtrType(V->getType())));
else if (const auto *CF = dyn_cast<ConstantFP>(V)) {
if (CF->isNullValue())
Reg = fastMaterializeFloatZero(CF);
@@ -1190,6 +1190,8 @@ bool FastISel::lowerCallTo(CallLoweringInfo &CLI) {
Flags.setSwiftSelf();
if (Arg.IsSwiftError)
Flags.setSwiftError();
+ if (Arg.IsCFGuardTarget)
+ Flags.setCFGuardTarget();
if (Arg.IsByVal)
Flags.setByVal();
if (Arg.IsInAlloca) {
@@ -1236,10 +1238,9 @@ bool FastISel::lowerCallTo(CallLoweringInfo &CLI) {
updateValueMap(CLI.CS->getInstruction(), CLI.ResultReg, CLI.NumResultRegs);
// Set labels for heapallocsite call.
- if (CLI.CS && CLI.CS->getInstruction()->hasMetadata("heapallocsite")) {
- const MDNode *MD = CLI.CS->getInstruction()->getMetadata("heapallocsite");
- MF->addCodeViewHeapAllocSite(CLI.Call, MD);
- }
+ if (CLI.CS)
+ if (MDNode *MD = CLI.CS->getInstruction()->getMetadata("heapallocsite"))
+ CLI.Call->setHeapAllocMarker(*MF, MD);
return true;
}
@@ -1275,6 +1276,10 @@ bool FastISel::lowerCall(const CallInst *CI) {
bool IsTailCall = CI->isTailCall();
if (IsTailCall && !isInTailCallPosition(CS, TM))
IsTailCall = false;
+ if (IsTailCall && MF->getFunction()
+ .getFnAttribute("disable-tail-calls")
+ .getValueAsString() == "true")
+ IsTailCall = false;
CallLoweringInfo CLI;
CLI.setCallee(RetTy, FuncTy, CI->getCalledValue(), std::move(Args), CS)
@@ -1926,7 +1931,8 @@ FastISel::FastISel(FunctionLoweringInfo &FuncInfo,
TII(*MF->getSubtarget().getInstrInfo()),
TLI(*MF->getSubtarget().getTargetLowering()),
TRI(*MF->getSubtarget().getRegisterInfo()), LibInfo(LibInfo),
- SkipTargetIndependentISel(SkipTargetIndependentISel) {}
+ SkipTargetIndependentISel(SkipTargetIndependentISel),
+ LastLocalValue(nullptr), EmitStartPt(nullptr) {}
FastISel::~FastISel() = default;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index cf6711adad48..fa33400cd4b3 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -144,7 +144,8 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
if (AI->isStaticAlloca() &&
(TFI->isStackRealignable() || (Align <= StackAlign))) {
const ConstantInt *CUI = cast<ConstantInt>(AI->getArraySize());
- uint64_t TySize = MF->getDataLayout().getTypeAllocSize(Ty);
+ uint64_t TySize =
+ MF->getDataLayout().getTypeAllocSize(Ty).getKnownMinSize();
TySize *= CUI->getZExtValue(); // Get total allocated size.
if (TySize == 0) TySize = 1; // Don't create zero-sized stack objects.
@@ -159,6 +160,12 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
MF->getFrameInfo().CreateStackObject(TySize, Align, false, AI);
}
+ // Scalable vectors may need a special StackID to distinguish
+ // them from other (fixed size) stack objects.
+ if (Ty->isVectorTy() && Ty->getVectorIsScalable())
+ MF->getFrameInfo().setStackID(FrameIndex,
+ TFI->getStackIDForScalableVectors());
+
StaticAllocaMap[AI] = FrameIndex;
// Update the catch handler information.
if (Iter != CatchObjects.end()) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index c5095995ec2e..c613c2540628 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -882,8 +882,8 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
if (Flags.hasExact())
MI->setFlag(MachineInstr::MIFlag::IsExact);
- if (Flags.hasFPExcept())
- MI->setFlag(MachineInstr::MIFlag::FPExcept);
+ if (Flags.hasNoFPExcept())
+ MI->setFlag(MachineInstr::MIFlag::NoFPExcept);
}
// Emit all of the actual operands of this instruction, adding them to the
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index f9fdf525240f..80ac8b95e4ef 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -134,26 +134,27 @@ private:
ArrayRef<int> Mask) const;
bool LegalizeSetCCCondCode(EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC,
- bool &NeedInvert, const SDLoc &dl);
+ bool &NeedInvert, const SDLoc &dl, SDValue &Chain,
+ bool IsSignaling = false);
SDValue ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned);
- std::pair<SDValue, SDValue> ExpandChainLibCall(RTLIB::Libcall LC,
- SDNode *Node, bool isSigned);
- SDValue ExpandFPLibCall(SDNode *Node, RTLIB::Libcall Call_F32,
- RTLIB::Libcall Call_F64, RTLIB::Libcall Call_F80,
- RTLIB::Libcall Call_F128,
- RTLIB::Libcall Call_PPCF128);
+ void ExpandFPLibCall(SDNode *Node, RTLIB::Libcall Call_F32,
+ RTLIB::Libcall Call_F64, RTLIB::Libcall Call_F80,
+ RTLIB::Libcall Call_F128,
+ RTLIB::Libcall Call_PPCF128,
+ SmallVectorImpl<SDValue> &Results);
SDValue ExpandIntLibCall(SDNode *Node, bool isSigned,
RTLIB::Libcall Call_I8,
RTLIB::Libcall Call_I16,
RTLIB::Libcall Call_I32,
RTLIB::Libcall Call_I64,
RTLIB::Libcall Call_I128);
- SDValue ExpandArgFPLibCall(SDNode *Node,
- RTLIB::Libcall Call_F32, RTLIB::Libcall Call_F64,
- RTLIB::Libcall Call_F80, RTLIB::Libcall Call_F128,
- RTLIB::Libcall Call_PPCF128);
+ void ExpandArgFPLibCall(SDNode *Node,
+ RTLIB::Libcall Call_F32, RTLIB::Libcall Call_F64,
+ RTLIB::Libcall Call_F80, RTLIB::Libcall Call_F128,
+ RTLIB::Libcall Call_PPCF128,
+ SmallVectorImpl<SDValue> &Results);
void ExpandDivRemLibCall(SDNode *Node, SmallVectorImpl<SDValue> &Results);
void ExpandSinCosLibCall(SDNode *Node, SmallVectorImpl<SDValue> &Results);
@@ -172,12 +173,11 @@ private:
SDValue NewIntValue) const;
SDValue ExpandFCOPYSIGN(SDNode *Node) const;
SDValue ExpandFABS(SDNode *Node) const;
- SDValue ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0, EVT DestVT,
- const SDLoc &dl);
- SDValue PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT, bool isSigned,
- const SDLoc &dl);
- SDValue PromoteLegalFP_TO_INT(SDValue LegalOp, EVT DestVT, bool isSigned,
- const SDLoc &dl);
+ SDValue ExpandLegalINT_TO_FP(SDNode *Node, SDValue &Chain);
+ void PromoteLegalINT_TO_FP(SDNode *N, const SDLoc &dl,
+ SmallVectorImpl<SDValue> &Results);
+ void PromoteLegalFP_TO_INT(SDNode *N, const SDLoc &dl,
+ SmallVectorImpl<SDValue> &Results);
SDValue ExpandBITREVERSE(SDValue Op, const SDLoc &dl);
SDValue ExpandBSWAP(SDValue Op, const SDLoc &dl);
@@ -421,6 +421,9 @@ SDValue SelectionDAGLegalize::ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val,
}
SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
+ if (!ISD::isNormalStore(ST))
+ return SDValue();
+
LLVM_DEBUG(dbgs() << "Optimizing float store operations\n");
// Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
// FIXME: We shouldn't do this for TargetConstantFP's.
@@ -466,8 +469,7 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
Lo = DAG.getStore(Chain, dl, Lo, Ptr, ST->getPointerInfo(), Alignment,
MMOFlags, AAInfo);
- Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
- DAG.getConstant(4, dl, Ptr.getValueType()));
+ Ptr = DAG.getMemBasePlusOffset(Ptr, 4, dl);
Hi = DAG.getStore(Chain, dl, Hi, Ptr,
ST->getPointerInfo().getWithOffset(4),
MinAlign(Alignment, 4U), MMOFlags, AAInfo);
@@ -577,9 +579,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
// Store the remaining ExtraWidth bits.
IncrementSize = RoundWidth / 8;
- Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
- DAG.getConstant(IncrementSize, dl,
- Ptr.getValueType()));
+ Ptr = DAG.getMemBasePlusOffset(Ptr, IncrementSize, dl);
Hi = DAG.getNode(
ISD::SRL, dl, Value.getValueType(), Value,
DAG.getConstant(RoundWidth, dl,
@@ -793,9 +793,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
// Load the remaining ExtraWidth bits.
IncrementSize = RoundWidth / 8;
- Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
- DAG.getConstant(IncrementSize, dl,
- Ptr.getValueType()));
+ Ptr = DAG.getMemBasePlusOffset(Ptr, IncrementSize, dl);
Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Chain, Ptr,
LD->getPointerInfo().getWithOffset(IncrementSize),
ExtraVT, MinAlign(Alignment, IncrementSize), MMOFlags,
@@ -824,9 +822,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
// Load the remaining ExtraWidth bits.
IncrementSize = RoundWidth / 8;
- Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
- DAG.getConstant(IncrementSize, dl,
- Ptr.getValueType()));
+ Ptr = DAG.getMemBasePlusOffset(Ptr, IncrementSize, dl);
Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0), Chain, Ptr,
LD->getPointerInfo().getWithOffset(IncrementSize),
ExtraVT, MinAlign(Alignment, IncrementSize), MMOFlags,
@@ -1013,6 +1009,18 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
Action = TLI.getOperationAction(Node->getOpcode(),
Node->getOperand(0).getValueType());
break;
+ case ISD::STRICT_SINT_TO_FP:
+ case ISD::STRICT_UINT_TO_FP:
+ case ISD::STRICT_LRINT:
+ case ISD::STRICT_LLRINT:
+ case ISD::STRICT_LROUND:
+ case ISD::STRICT_LLROUND:
+ // These pseudo-ops are the same as the other STRICT_ ops except
+ // they are registered with setOperationAction() using the input type
+ // instead of the output type.
+ Action = TLI.getOperationAction(Node->getOpcode(),
+ Node->getOperand(1).getValueType());
+ break;
case ISD::SIGN_EXTEND_INREG: {
EVT InnerType = cast<VTSDNode>(Node->getOperand(1))->getVT();
Action = TLI.getOperationAction(Node->getOpcode(), InnerType);
@@ -1023,11 +1031,17 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
Node->getOperand(2).getValueType());
break;
case ISD::SELECT_CC:
+ case ISD::STRICT_FSETCC:
+ case ISD::STRICT_FSETCCS:
case ISD::SETCC:
case ISD::BR_CC: {
unsigned CCOperand = Node->getOpcode() == ISD::SELECT_CC ? 4 :
+ Node->getOpcode() == ISD::STRICT_FSETCC ? 3 :
+ Node->getOpcode() == ISD::STRICT_FSETCCS ? 3 :
Node->getOpcode() == ISD::SETCC ? 2 : 1;
- unsigned CompareOperand = Node->getOpcode() == ISD::BR_CC ? 2 : 0;
+ unsigned CompareOperand = Node->getOpcode() == ISD::BR_CC ? 2 :
+ Node->getOpcode() == ISD::STRICT_FSETCC ? 1 :
+ Node->getOpcode() == ISD::STRICT_FSETCCS ? 1 : 0;
MVT OpVT = Node->getOperand(CompareOperand).getSimpleValueType();
ISD::CondCode CCCode =
cast<CondCodeSDNode>(Node->getOperand(CCOperand))->get();
@@ -1105,16 +1119,6 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
return;
}
break;
- case ISD::STRICT_LRINT:
- case ISD::STRICT_LLRINT:
- case ISD::STRICT_LROUND:
- case ISD::STRICT_LLROUND:
- // These pseudo-ops are the same as the other STRICT_ ops except
- // they are registered with setOperationAction() using the input type
- // instead of the output type.
- Action = TLI.getStrictFPOperationAction(Node->getOpcode(),
- Node->getOperand(1).getValueType());
- break;
case ISD::SADDSAT:
case ISD::UADDSAT:
case ISD::SSUBSAT:
@@ -1125,7 +1129,9 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
case ISD::SMULFIX:
case ISD::SMULFIXSAT:
case ISD::UMULFIX:
- case ISD::UMULFIXSAT: {
+ case ISD::UMULFIXSAT:
+ case ISD::SDIVFIX:
+ case ISD::UDIVFIX: {
unsigned Scale = Node->getConstantOperandVal(2);
Action = TLI.getFixedPointOperationAction(Node->getOpcode(),
Node->getValueType(0), Scale);
@@ -1408,7 +1414,7 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
unsigned Offset = TypeByteSize*i;
SDValue Idx = DAG.getConstant(Offset, dl, FIPtr.getValueType());
- Idx = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr, Idx);
+ Idx = DAG.getMemBasePlusOffset(FIPtr, Idx, dl);
// If the destination vector element type is narrower than the source
// element type, only store the bits necessary.
@@ -1471,8 +1477,7 @@ void SelectionDAGLegalize::getSignAsIntValue(FloatSignAsInt &State,
} else {
// Advance the pointer so that the loaded byte will contain the sign bit.
unsigned ByteOffset = (FloatVT.getSizeInBits() / 8) - 1;
- IntPtr = DAG.getNode(ISD::ADD, DL, StackPtr.getValueType(), StackPtr,
- DAG.getConstant(ByteOffset, DL, StackPtr.getValueType()));
+ IntPtr = DAG.getMemBasePlusOffset(StackPtr, ByteOffset, DL);
State.IntPointerInfo = MachinePointerInfo::getFixedStack(MF, FI,
ByteOffset);
}
@@ -1629,10 +1634,9 @@ void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node,
/// of a true/false result.
///
/// \returns true if the SetCC has been legalized, false if it hasn't.
-bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, SDValue &LHS,
- SDValue &RHS, SDValue &CC,
- bool &NeedInvert,
- const SDLoc &dl) {
+bool SelectionDAGLegalize::LegalizeSetCCCondCode(
+ EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, bool &NeedInvert,
+ const SDLoc &dl, SDValue &Chain, bool IsSignaling) {
MVT OpVT = LHS.getSimpleValueType();
ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
NeedInvert = false;
@@ -1650,7 +1654,7 @@ bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, SDValue &LHS,
}
// Swapping operands didn't work. Try inverting the condition.
bool NeedSwap = false;
- InvCC = getSetCCInverse(CCCode, OpVT.isInteger());
+ InvCC = getSetCCInverse(CCCode, OpVT);
if (!TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
// If inverting the condition is not enough, try swapping operands
// on top of it.
@@ -1715,13 +1719,16 @@ bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, SDValue &LHS,
if (CCCode != ISD::SETO && CCCode != ISD::SETUO) {
// If we aren't the ordered or unorder operation,
// then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS).
- SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1);
- SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2);
+ SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1, Chain, IsSignaling);
+ SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2, Chain, IsSignaling);
} else {
// Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS)
- SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1);
- SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2);
+ SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1, Chain, IsSignaling);
+ SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2, Chain, IsSignaling);
}
+ if (Chain)
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, SetCC1.getValue(1),
+ SetCC2.getValue(1));
LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2);
RHS = SDValue();
CC = SDValue();
@@ -2077,52 +2084,13 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
return CallInfo.first;
}
-// Expand a node into a call to a libcall. Similar to
-// ExpandLibCall except that the first operand is the in-chain.
-std::pair<SDValue, SDValue>
-SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC,
- SDNode *Node,
- bool isSigned) {
- SDValue InChain = Node->getOperand(0);
-
- TargetLowering::ArgListTy Args;
- TargetLowering::ArgListEntry Entry;
- for (unsigned i = 1, e = Node->getNumOperands(); i != e; ++i) {
- EVT ArgVT = Node->getOperand(i).getValueType();
- Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
- Entry.Node = Node->getOperand(i);
- Entry.Ty = ArgTy;
- Entry.IsSExt = isSigned;
- Entry.IsZExt = !isSigned;
- Args.push_back(Entry);
- }
- SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
- TLI.getPointerTy(DAG.getDataLayout()));
-
- Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
-
- TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(SDLoc(Node))
- .setChain(InChain)
- .setLibCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee,
- std::move(Args))
- .setSExtResult(isSigned)
- .setZExtResult(!isSigned);
-
- std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
-
- return CallInfo;
-}
-
-SDValue SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node,
- RTLIB::Libcall Call_F32,
- RTLIB::Libcall Call_F64,
- RTLIB::Libcall Call_F80,
- RTLIB::Libcall Call_F128,
- RTLIB::Libcall Call_PPCF128) {
- if (Node->isStrictFPOpcode())
- Node = DAG.mutateStrictFPToFP(Node);
-
+void SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node,
+ RTLIB::Libcall Call_F32,
+ RTLIB::Libcall Call_F64,
+ RTLIB::Libcall Call_F80,
+ RTLIB::Libcall Call_F128,
+ RTLIB::Libcall Call_PPCF128,
+ SmallVectorImpl<SDValue> &Results) {
RTLIB::Libcall LC;
switch (Node->getSimpleValueType(0).SimpleTy) {
default: llvm_unreachable("Unexpected request for libcall!");
@@ -2132,7 +2100,22 @@ SDValue SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node,
case MVT::f128: LC = Call_F128; break;
case MVT::ppcf128: LC = Call_PPCF128; break;
}
- return ExpandLibCall(LC, Node, false);
+
+ if (Node->isStrictFPOpcode()) {
+ EVT RetVT = Node->getValueType(0);
+ SmallVector<SDValue, 4> Ops(Node->op_begin() + 1, Node->op_end());
+ TargetLowering::MakeLibCallOptions CallOptions;
+ // FIXME: This doesn't support tail calls.
+ std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, RetVT,
+ Ops, CallOptions,
+ SDLoc(Node),
+ Node->getOperand(0));
+ Results.push_back(Tmp.first);
+ Results.push_back(Tmp.second);
+ } else {
+ SDValue Tmp = ExpandLibCall(LC, Node, false);
+ Results.push_back(Tmp);
+ }
}
SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned,
@@ -2155,17 +2138,17 @@ SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned,
/// Expand the node to a libcall based on first argument type (for instance
/// lround and its variant).
-SDValue SelectionDAGLegalize::ExpandArgFPLibCall(SDNode* Node,
- RTLIB::Libcall Call_F32,
- RTLIB::Libcall Call_F64,
- RTLIB::Libcall Call_F80,
- RTLIB::Libcall Call_F128,
- RTLIB::Libcall Call_PPCF128) {
- if (Node->isStrictFPOpcode())
- Node = DAG.mutateStrictFPToFP(Node);
+void SelectionDAGLegalize::ExpandArgFPLibCall(SDNode* Node,
+ RTLIB::Libcall Call_F32,
+ RTLIB::Libcall Call_F64,
+ RTLIB::Libcall Call_F80,
+ RTLIB::Libcall Call_F128,
+ RTLIB::Libcall Call_PPCF128,
+ SmallVectorImpl<SDValue> &Results) {
+ EVT InVT = Node->getOperand(Node->isStrictFPOpcode() ? 1 : 0).getValueType();
RTLIB::Libcall LC;
- switch (Node->getOperand(0).getValueType().getSimpleVT().SimpleTy) {
+ switch (InVT.getSimpleVT().SimpleTy) {
default: llvm_unreachable("Unexpected request for libcall!");
case MVT::f32: LC = Call_F32; break;
case MVT::f64: LC = Call_F64; break;
@@ -2174,7 +2157,21 @@ SDValue SelectionDAGLegalize::ExpandArgFPLibCall(SDNode* Node,
case MVT::ppcf128: LC = Call_PPCF128; break;
}
- return ExpandLibCall(LC, Node, false);
+ if (Node->isStrictFPOpcode()) {
+ EVT RetVT = Node->getValueType(0);
+ SmallVector<SDValue, 4> Ops(Node->op_begin() + 1, Node->op_end());
+ TargetLowering::MakeLibCallOptions CallOptions;
+ // FIXME: This doesn't support tail calls.
+ std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, RetVT,
+ Ops, CallOptions,
+ SDLoc(Node),
+ Node->getOperand(0));
+ Results.push_back(Tmp.first);
+ Results.push_back(Tmp.second);
+ } else {
+ SDValue Tmp = ExpandLibCall(LC, Node, false);
+ Results.push_back(Tmp);
+ }
}
/// Issue libcalls to __{u}divmod to compute div / rem pairs.
@@ -2344,9 +2341,14 @@ SelectionDAGLegalize::ExpandSinCosLibCall(SDNode *Node,
/// INT_TO_FP operation of the specified operand when the target requests that
/// we expand it. At this point, we know that the result and operand types are
/// legal for the target.
-SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0,
- EVT DestVT,
- const SDLoc &dl) {
+SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(SDNode *Node,
+ SDValue &Chain) {
+ bool isSigned = (Node->getOpcode() == ISD::STRICT_SINT_TO_FP ||
+ Node->getOpcode() == ISD::SINT_TO_FP);
+ EVT DestVT = Node->getValueType(0);
+ SDLoc dl(Node);
+ unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
+ SDValue Op0 = Node->getOperand(OpNo);
EVT SrcVT = Op0.getValueType();
// TODO: Should any fast-math-flags be set for the created nodes?
@@ -2393,16 +2395,39 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0,
BitsToDouble(0x4330000080000000ULL) :
BitsToDouble(0x4330000000000000ULL),
dl, MVT::f64);
- // subtract the bias
- SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Load, Bias);
- // final result
- SDValue Result = DAG.getFPExtendOrRound(Sub, dl, DestVT);
+ // Subtract the bias and get the final result.
+ SDValue Sub;
+ SDValue Result;
+ if (Node->isStrictFPOpcode()) {
+ Sub = DAG.getNode(ISD::STRICT_FSUB, dl, {MVT::f64, MVT::Other},
+ {Node->getOperand(0), Load, Bias});
+ Chain = Sub.getValue(1);
+ if (DestVT != Sub.getValueType()) {
+ std::pair<SDValue, SDValue> ResultPair;
+ ResultPair =
+ DAG.getStrictFPExtendOrRound(Sub, Chain, dl, DestVT);
+ Result = ResultPair.first;
+ Chain = ResultPair.second;
+ }
+ else
+ Result = Sub;
+ } else {
+ Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Load, Bias);
+ Result = DAG.getFPExtendOrRound(Sub, dl, DestVT);
+ }
return Result;
}
assert(!isSigned && "Legalize cannot Expand SINT_TO_FP for i64 yet");
// Code below here assumes !isSigned without checking again.
+ // FIXME: This can produce slightly incorrect results. See details in
+ // FIXME: https://reviews.llvm.org/D69275
- SDValue Tmp1 = DAG.getNode(ISD::SINT_TO_FP, dl, DestVT, Op0);
+ SDValue Tmp1;
+ if (Node->isStrictFPOpcode()) {
+ Tmp1 = DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, { DestVT, MVT::Other },
+ { Node->getOperand(0), Op0 });
+ } else
+ Tmp1 = DAG.getNode(ISD::SINT_TO_FP, dl, DestVT, Op0);
SDValue SignSet = DAG.getSetCC(dl, getSetCCResultType(SrcVT), Op0,
DAG.getConstant(0, dl, SrcVT), ISD::SETLT);
@@ -2448,6 +2473,13 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0,
FudgeInReg = Handle.getValue();
}
+ if (Node->isStrictFPOpcode()) {
+ SDValue Result = DAG.getNode(ISD::STRICT_FADD, dl, { DestVT, MVT::Other },
+ { Tmp1.getValue(1), Tmp1, FudgeInReg });
+ Chain = Result.getValue(1);
+ return Result;
+ }
+
return DAG.getNode(ISD::FADD, dl, DestVT, Tmp1, FudgeInReg);
}
@@ -2456,9 +2488,16 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0,
/// we promote it. At this point, we know that the result and operand types are
/// legal for the target, and that there is a legal UINT_TO_FP or SINT_TO_FP
/// operation that takes a larger input.
-SDValue SelectionDAGLegalize::PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT,
- bool isSigned,
- const SDLoc &dl) {
+void SelectionDAGLegalize::PromoteLegalINT_TO_FP(
+ SDNode *N, const SDLoc &dl, SmallVectorImpl<SDValue> &Results) {
+ bool IsStrict = N->isStrictFPOpcode();
+ bool IsSigned = N->getOpcode() == ISD::SINT_TO_FP ||
+ N->getOpcode() == ISD::STRICT_SINT_TO_FP;
+ EVT DestVT = N->getValueType(0);
+ SDValue LegalOp = N->getOperand(IsStrict ? 1 : 0);
+ unsigned UIntOp = IsStrict ? ISD::STRICT_UINT_TO_FP : ISD::UINT_TO_FP;
+ unsigned SIntOp = IsStrict ? ISD::STRICT_SINT_TO_FP : ISD::SINT_TO_FP;
+
// First step, figure out the appropriate *INT_TO_FP operation to use.
EVT NewInTy = LegalOp.getValueType();
@@ -2470,15 +2509,16 @@ SDValue SelectionDAGLegalize::PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT,
assert(NewInTy.isInteger() && "Ran out of possibilities!");
// If the target supports SINT_TO_FP of this type, use it.
- if (TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, NewInTy)) {
- OpToUse = ISD::SINT_TO_FP;
+ if (TLI.isOperationLegalOrCustom(SIntOp, NewInTy)) {
+ OpToUse = SIntOp;
break;
}
- if (isSigned) continue;
+ if (IsSigned)
+ continue;
// If the target supports UINT_TO_FP of this type, use it.
- if (TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, NewInTy)) {
- OpToUse = ISD::UINT_TO_FP;
+ if (TLI.isOperationLegalOrCustom(UIntOp, NewInTy)) {
+ OpToUse = UIntOp;
break;
}
@@ -2487,9 +2527,21 @@ SDValue SelectionDAGLegalize::PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT,
// Okay, we found the operation and type to use. Zero extend our input to the
// desired type then run the operation on it.
- return DAG.getNode(OpToUse, dl, DestVT,
- DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
- dl, NewInTy, LegalOp));
+ if (IsStrict) {
+ SDValue Res =
+ DAG.getNode(OpToUse, dl, {DestVT, MVT::Other},
+ {N->getOperand(0),
+ DAG.getNode(IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
+ dl, NewInTy, LegalOp)});
+ Results.push_back(Res);
+ Results.push_back(Res.getValue(1));
+ return;
+ }
+
+ Results.push_back(
+ DAG.getNode(OpToUse, dl, DestVT,
+ DAG.getNode(IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
+ dl, NewInTy, LegalOp)));
}
/// This function is responsible for legalizing a
@@ -2497,9 +2549,13 @@ SDValue SelectionDAGLegalize::PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT,
/// we promote it. At this point, we know that the result and operand types are
/// legal for the target, and that there is a legal FP_TO_UINT or FP_TO_SINT
/// operation that returns a larger result.
-SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp, EVT DestVT,
- bool isSigned,
- const SDLoc &dl) {
+void SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDNode *N, const SDLoc &dl,
+ SmallVectorImpl<SDValue> &Results) {
+ bool IsStrict = N->isStrictFPOpcode();
+ bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT ||
+ N->getOpcode() == ISD::STRICT_FP_TO_SINT;
+ EVT DestVT = N->getValueType(0);
+ SDValue LegalOp = N->getOperand(IsStrict ? 1 : 0);
// First step, figure out the appropriate FP_TO*INT operation to use.
EVT NewOutTy = DestVT;
@@ -2512,26 +2568,32 @@ SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp, EVT DestVT,
// A larger signed type can hold all unsigned values of the requested type,
// so using FP_TO_SINT is valid
- if (TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NewOutTy)) {
- OpToUse = ISD::FP_TO_SINT;
+ OpToUse = IsStrict ? ISD::STRICT_FP_TO_SINT : ISD::FP_TO_SINT;
+ if (TLI.isOperationLegalOrCustom(OpToUse, NewOutTy))
break;
- }
// However, if the value may be < 0.0, we *must* use some FP_TO_SINT.
- if (!isSigned && TLI.isOperationLegalOrCustom(ISD::FP_TO_UINT, NewOutTy)) {
- OpToUse = ISD::FP_TO_UINT;
+ OpToUse = IsStrict ? ISD::STRICT_FP_TO_UINT : ISD::FP_TO_UINT;
+ if (!IsSigned && TLI.isOperationLegalOrCustom(OpToUse, NewOutTy))
break;
- }
// Otherwise, try a larger type.
}
// Okay, we found the operation and type to use.
- SDValue Operation = DAG.getNode(OpToUse, dl, NewOutTy, LegalOp);
+ SDValue Operation;
+ if (IsStrict) {
+ SDVTList VTs = DAG.getVTList(NewOutTy, MVT::Other);
+ Operation = DAG.getNode(OpToUse, dl, VTs, N->getOperand(0), LegalOp);
+ } else
+ Operation = DAG.getNode(OpToUse, dl, NewOutTy, LegalOp);
// Truncate the result of the extended FP_TO_*INT operation to the desired
// size.
- return DAG.getNode(ISD::TRUNCATE, dl, DestVT, Operation);
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, DestVT, Operation);
+ Results.push_back(Trunc);
+ if (IsStrict)
+ Results.push_back(Operation.getValue(1));
}
/// Legalize a BITREVERSE scalar/vector operation as a series of mask + shifts.
@@ -2812,12 +2874,18 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
break;
}
case ISD::STRICT_FP_ROUND:
- // This expansion does not honor the "strict" properties anyway,
- // so prefer falling back to the non-strict operation if legal.
+ // When strict mode is enforced we can't do expansion because it
+ // does not honor the "strict" properties. Only libcall is allowed.
+ if (TLI.isStrictFPEnabled())
+ break;
+ // We might as well mutate to FP_ROUND when FP_ROUND operation is legal
+ // since this operation is more efficient than stack operation.
if (TLI.getStrictFPOperationAction(Node->getOpcode(),
Node->getValueType(0))
== TargetLowering::Legal)
break;
+ // We fall back to use stack operation when the FP_ROUND operation
+ // isn't available.
Tmp1 = EmitStackConvert(Node->getOperand(1),
Node->getValueType(0),
Node->getValueType(0), dl, Node->getOperand(0));
@@ -2832,12 +2900,18 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Results.push_back(Tmp1);
break;
case ISD::STRICT_FP_EXTEND:
- // This expansion does not honor the "strict" properties anyway,
- // so prefer falling back to the non-strict operation if legal.
+ // When strict mode is enforced we can't do expansion because it
+ // does not honor the "strict" properties. Only libcall is allowed.
+ if (TLI.isStrictFPEnabled())
+ break;
+ // We might as well mutate to FP_EXTEND when FP_EXTEND operation is legal
+ // since this operation is more efficient than stack operation.
if (TLI.getStrictFPOperationAction(Node->getOpcode(),
Node->getValueType(0))
== TargetLowering::Legal)
break;
+ // We fall back to use stack operation when the FP_EXTEND operation
+ // isn't available.
Tmp1 = EmitStackConvert(Node->getOperand(1),
Node->getOperand(1).getValueType(),
Node->getValueType(0), dl, Node->getOperand(0));
@@ -2883,15 +2957,20 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
break;
}
case ISD::UINT_TO_FP:
- if (TLI.expandUINT_TO_FP(Node, Tmp1, DAG)) {
+ case ISD::STRICT_UINT_TO_FP:
+ if (TLI.expandUINT_TO_FP(Node, Tmp1, Tmp2, DAG)) {
Results.push_back(Tmp1);
+ if (Node->isStrictFPOpcode())
+ Results.push_back(Tmp2);
break;
}
LLVM_FALLTHROUGH;
case ISD::SINT_TO_FP:
- Tmp1 = ExpandLegalINT_TO_FP(Node->getOpcode() == ISD::SINT_TO_FP,
- Node->getOperand(0), Node->getValueType(0), dl);
+ case ISD::STRICT_SINT_TO_FP:
+ Tmp1 = ExpandLegalINT_TO_FP(Node, Tmp2);
Results.push_back(Tmp1);
+ if (Node->isStrictFPOpcode())
+ Results.push_back(Tmp2);
break;
case ISD::FP_TO_SINT:
if (TLI.expandFP_TO_SINT(Node, Tmp1, DAG))
@@ -3340,6 +3419,24 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
case ISD::UMULFIXSAT:
Results.push_back(TLI.expandFixedPointMul(Node, DAG));
break;
+ case ISD::SDIVFIX:
+ case ISD::UDIVFIX:
+ if (SDValue V = TLI.expandFixedPointDiv(Node->getOpcode(), SDLoc(Node),
+ Node->getOperand(0),
+ Node->getOperand(1),
+ Node->getConstantOperandVal(2),
+ DAG)) {
+ Results.push_back(V);
+ break;
+ }
+ // FIXME: We might want to retry here with a wider type if we fail, if that
+ // type is legal.
+ // FIXME: Technically, so long as we only have sdivfixes where BW+Scale is
+ // <= 128 (which is the case for all of the default Embedded-C types),
+ // we will only get here with types and scales that we could always expand
+ // if we were allowed to generate libcalls to division functions of illegal
+ // type. But we cannot do that.
+ llvm_unreachable("Cannot expand DIVFIX!");
case ISD::ADDCARRY:
case ISD::SUBCARRY: {
SDValue LHS = Node->getOperand(0);
@@ -3503,12 +3600,19 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
}
Results.push_back(Tmp1);
break;
- case ISD::SETCC: {
- Tmp1 = Node->getOperand(0);
- Tmp2 = Node->getOperand(1);
- Tmp3 = Node->getOperand(2);
- bool Legalized = LegalizeSetCCCondCode(Node->getValueType(0), Tmp1, Tmp2,
- Tmp3, NeedInvert, dl);
+ case ISD::SETCC:
+ case ISD::STRICT_FSETCC:
+ case ISD::STRICT_FSETCCS: {
+ bool IsStrict = Node->getOpcode() != ISD::SETCC;
+ bool IsSignaling = Node->getOpcode() == ISD::STRICT_FSETCCS;
+ SDValue Chain = IsStrict ? Node->getOperand(0) : SDValue();
+ unsigned Offset = IsStrict ? 1 : 0;
+ Tmp1 = Node->getOperand(0 + Offset);
+ Tmp2 = Node->getOperand(1 + Offset);
+ Tmp3 = Node->getOperand(2 + Offset);
+ bool Legalized =
+ LegalizeSetCCCondCode(Node->getValueType(0), Tmp1, Tmp2, Tmp3,
+ NeedInvert, dl, Chain, IsSignaling);
if (Legalized) {
// If we expanded the SETCC by swapping LHS and RHS, or by inverting the
@@ -3523,9 +3627,16 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Tmp1 = DAG.getLogicalNOT(dl, Tmp1, Tmp1->getValueType(0));
Results.push_back(Tmp1);
+ if (IsStrict)
+ Results.push_back(Chain);
+
break;
}
+ // FIXME: It seems Legalized is false iff CCCode is Legal. I don't
+ // understand if this code is useful for strict nodes.
+ assert(!IsStrict && "Don't know how to expand for strict nodes.");
+
// Otherwise, SETCC for the given comparison type must be completely
// illegal; expand it into a SELECT_CC.
EVT VT = Node->getValueType(0);
@@ -3548,11 +3659,13 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
break;
}
case ISD::SELECT_CC: {
+ // TODO: need to add STRICT_SELECT_CC and STRICT_SELECT_CCS
Tmp1 = Node->getOperand(0); // LHS
Tmp2 = Node->getOperand(1); // RHS
Tmp3 = Node->getOperand(2); // True
Tmp4 = Node->getOperand(3); // False
EVT VT = Node->getValueType(0);
+ SDValue Chain;
SDValue CC = Node->getOperand(4);
ISD::CondCode CCOp = cast<CondCodeSDNode>(CC)->get();
@@ -3574,8 +3687,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
// Try to legalize by inverting the condition. This is for targets that
// might support an ordered version of a condition, but not the unordered
// version (or vice versa).
- ISD::CondCode InvCC = ISD::getSetCCInverse(CCOp,
- Tmp1.getValueType().isInteger());
+ ISD::CondCode InvCC = ISD::getSetCCInverse(CCOp, Tmp1.getValueType());
if (TLI.isCondCodeLegalOrCustom(InvCC, Tmp1.getSimpleValueType())) {
// Use the new condition code and swap true and false
Legalized = true;
@@ -3595,9 +3707,8 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
}
if (!Legalized) {
- Legalized = LegalizeSetCCCondCode(
- getSetCCResultType(Tmp1.getValueType()), Tmp1, Tmp2, CC, NeedInvert,
- dl);
+ Legalized = LegalizeSetCCCondCode(getSetCCResultType(Tmp1.getValueType()),
+ Tmp1, Tmp2, CC, NeedInvert, dl, Chain);
assert(Legalized && "Can't legalize SELECT_CC with legal condition!");
@@ -3623,13 +3734,16 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
break;
}
case ISD::BR_CC: {
+ // TODO: need to add STRICT_BR_CC and STRICT_BR_CCS
+ SDValue Chain;
Tmp1 = Node->getOperand(0); // Chain
Tmp2 = Node->getOperand(2); // LHS
Tmp3 = Node->getOperand(3); // RHS
Tmp4 = Node->getOperand(1); // CC
- bool Legalized = LegalizeSetCCCondCode(getSetCCResultType(
- Tmp2.getValueType()), Tmp2, Tmp3, Tmp4, NeedInvert, dl);
+ bool Legalized =
+ LegalizeSetCCCondCode(getSetCCResultType(Tmp2.getValueType()), Tmp2,
+ Tmp3, Tmp4, NeedInvert, dl, Chain);
(void)Legalized;
assert(Legalized && "Can't legalize BR_CC with legal condition!");
@@ -3677,7 +3791,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
}
SDValue Result = DAG.getBuildVector(Node->getValueType(0), dl, Scalars);
- ReplaceNode(SDValue(Node, 0), Result);
+ Results.push_back(Result);
break;
}
case ISD::VECREDUCE_FADD:
@@ -3705,10 +3819,12 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
case ISD::INTRINSIC_WO_CHAIN:
case ISD::INTRINSIC_VOID:
// FIXME: Custom lowering for these operations shouldn't return null!
- break;
+ // Return true so that we don't call ConvertNodeToLibcall which also won't
+ // do anything.
+ return true;
}
- if (Results.empty() && Node->isStrictFPOpcode()) {
+ if (!TLI.isStrictFPEnabled() && Results.empty() && Node->isStrictFPOpcode()) {
// FIXME: We were asked to expand a strict floating-point operation,
// but there is currently no expansion implemented that would preserve
// the "strict" properties. For now, we just fall back to the non-strict
@@ -3793,7 +3909,13 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
RTLIB::Libcall LC = RTLIB::getSYNC(Opc, VT);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected atomic op or value type!");
- std::pair<SDValue, SDValue> Tmp = ExpandChainLibCall(LC, Node, false);
+ EVT RetVT = Node->getValueType(0);
+ SmallVector<SDValue, 4> Ops(Node->op_begin() + 1, Node->op_end());
+ TargetLowering::MakeLibCallOptions CallOptions;
+ std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, RetVT,
+ Ops, CallOptions,
+ SDLoc(Node),
+ Node->getOperand(0));
Results.push_back(Tmp.first);
Results.push_back(Tmp.second);
break;
@@ -3815,38 +3937,38 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
}
case ISD::FMINNUM:
case ISD::STRICT_FMINNUM:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::FMIN_F32, RTLIB::FMIN_F64,
- RTLIB::FMIN_F80, RTLIB::FMIN_F128,
- RTLIB::FMIN_PPCF128));
+ ExpandFPLibCall(Node, RTLIB::FMIN_F32, RTLIB::FMIN_F64,
+ RTLIB::FMIN_F80, RTLIB::FMIN_F128,
+ RTLIB::FMIN_PPCF128, Results);
break;
case ISD::FMAXNUM:
case ISD::STRICT_FMAXNUM:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::FMAX_F32, RTLIB::FMAX_F64,
- RTLIB::FMAX_F80, RTLIB::FMAX_F128,
- RTLIB::FMAX_PPCF128));
+ ExpandFPLibCall(Node, RTLIB::FMAX_F32, RTLIB::FMAX_F64,
+ RTLIB::FMAX_F80, RTLIB::FMAX_F128,
+ RTLIB::FMAX_PPCF128, Results);
break;
case ISD::FSQRT:
case ISD::STRICT_FSQRT:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::SQRT_F32, RTLIB::SQRT_F64,
- RTLIB::SQRT_F80, RTLIB::SQRT_F128,
- RTLIB::SQRT_PPCF128));
+ ExpandFPLibCall(Node, RTLIB::SQRT_F32, RTLIB::SQRT_F64,
+ RTLIB::SQRT_F80, RTLIB::SQRT_F128,
+ RTLIB::SQRT_PPCF128, Results);
break;
case ISD::FCBRT:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::CBRT_F32, RTLIB::CBRT_F64,
- RTLIB::CBRT_F80, RTLIB::CBRT_F128,
- RTLIB::CBRT_PPCF128));
+ ExpandFPLibCall(Node, RTLIB::CBRT_F32, RTLIB::CBRT_F64,
+ RTLIB::CBRT_F80, RTLIB::CBRT_F128,
+ RTLIB::CBRT_PPCF128, Results);
break;
case ISD::FSIN:
case ISD::STRICT_FSIN:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::SIN_F32, RTLIB::SIN_F64,
- RTLIB::SIN_F80, RTLIB::SIN_F128,
- RTLIB::SIN_PPCF128));
+ ExpandFPLibCall(Node, RTLIB::SIN_F32, RTLIB::SIN_F64,
+ RTLIB::SIN_F80, RTLIB::SIN_F128,
+ RTLIB::SIN_PPCF128, Results);
break;
case ISD::FCOS:
case ISD::STRICT_FCOS:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::COS_F32, RTLIB::COS_F64,
- RTLIB::COS_F80, RTLIB::COS_F128,
- RTLIB::COS_PPCF128));
+ ExpandFPLibCall(Node, RTLIB::COS_F32, RTLIB::COS_F64,
+ RTLIB::COS_F80, RTLIB::COS_F128,
+ RTLIB::COS_PPCF128, Results);
break;
case ISD::FSINCOS:
// Expand into sincos libcall.
@@ -3855,181 +3977,204 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
case ISD::FLOG:
case ISD::STRICT_FLOG:
if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_log_finite))
- Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG_FINITE_F32,
- RTLIB::LOG_FINITE_F64,
- RTLIB::LOG_FINITE_F80,
- RTLIB::LOG_FINITE_F128,
- RTLIB::LOG_FINITE_PPCF128));
+ ExpandFPLibCall(Node, RTLIB::LOG_FINITE_F32,
+ RTLIB::LOG_FINITE_F64,
+ RTLIB::LOG_FINITE_F80,
+ RTLIB::LOG_FINITE_F128,
+ RTLIB::LOG_FINITE_PPCF128, Results);
else
- Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG_F32, RTLIB::LOG_F64,
- RTLIB::LOG_F80, RTLIB::LOG_F128,
- RTLIB::LOG_PPCF128));
+ ExpandFPLibCall(Node, RTLIB::LOG_F32, RTLIB::LOG_F64,
+ RTLIB::LOG_F80, RTLIB::LOG_F128,
+ RTLIB::LOG_PPCF128, Results);
break;
case ISD::FLOG2:
case ISD::STRICT_FLOG2:
if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_log2_finite))
- Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG2_FINITE_F32,
- RTLIB::LOG2_FINITE_F64,
- RTLIB::LOG2_FINITE_F80,
- RTLIB::LOG2_FINITE_F128,
- RTLIB::LOG2_FINITE_PPCF128));
+ ExpandFPLibCall(Node, RTLIB::LOG2_FINITE_F32,
+ RTLIB::LOG2_FINITE_F64,
+ RTLIB::LOG2_FINITE_F80,
+ RTLIB::LOG2_FINITE_F128,
+ RTLIB::LOG2_FINITE_PPCF128, Results);
else
- Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG2_F32, RTLIB::LOG2_F64,
- RTLIB::LOG2_F80, RTLIB::LOG2_F128,
- RTLIB::LOG2_PPCF128));
+ ExpandFPLibCall(Node, RTLIB::LOG2_F32, RTLIB::LOG2_F64,
+ RTLIB::LOG2_F80, RTLIB::LOG2_F128,
+ RTLIB::LOG2_PPCF128, Results);
break;
case ISD::FLOG10:
case ISD::STRICT_FLOG10:
if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_log10_finite))
- Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG10_FINITE_F32,
- RTLIB::LOG10_FINITE_F64,
- RTLIB::LOG10_FINITE_F80,
- RTLIB::LOG10_FINITE_F128,
- RTLIB::LOG10_FINITE_PPCF128));
+ ExpandFPLibCall(Node, RTLIB::LOG10_FINITE_F32,
+ RTLIB::LOG10_FINITE_F64,
+ RTLIB::LOG10_FINITE_F80,
+ RTLIB::LOG10_FINITE_F128,
+ RTLIB::LOG10_FINITE_PPCF128, Results);
else
- Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG10_F32, RTLIB::LOG10_F64,
- RTLIB::LOG10_F80, RTLIB::LOG10_F128,
- RTLIB::LOG10_PPCF128));
+ ExpandFPLibCall(Node, RTLIB::LOG10_F32, RTLIB::LOG10_F64,
+ RTLIB::LOG10_F80, RTLIB::LOG10_F128,
+ RTLIB::LOG10_PPCF128, Results);
break;
case ISD::FEXP:
case ISD::STRICT_FEXP:
if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_exp_finite))
- Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP_FINITE_F32,
- RTLIB::EXP_FINITE_F64,
- RTLIB::EXP_FINITE_F80,
- RTLIB::EXP_FINITE_F128,
- RTLIB::EXP_FINITE_PPCF128));
+ ExpandFPLibCall(Node, RTLIB::EXP_FINITE_F32,
+ RTLIB::EXP_FINITE_F64,
+ RTLIB::EXP_FINITE_F80,
+ RTLIB::EXP_FINITE_F128,
+ RTLIB::EXP_FINITE_PPCF128, Results);
else
- Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP_F32, RTLIB::EXP_F64,
- RTLIB::EXP_F80, RTLIB::EXP_F128,
- RTLIB::EXP_PPCF128));
+ ExpandFPLibCall(Node, RTLIB::EXP_F32, RTLIB::EXP_F64,
+ RTLIB::EXP_F80, RTLIB::EXP_F128,
+ RTLIB::EXP_PPCF128, Results);
break;
case ISD::FEXP2:
case ISD::STRICT_FEXP2:
if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_exp2_finite))
- Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP2_FINITE_F32,
- RTLIB::EXP2_FINITE_F64,
- RTLIB::EXP2_FINITE_F80,
- RTLIB::EXP2_FINITE_F128,
- RTLIB::EXP2_FINITE_PPCF128));
+ ExpandFPLibCall(Node, RTLIB::EXP2_FINITE_F32,
+ RTLIB::EXP2_FINITE_F64,
+ RTLIB::EXP2_FINITE_F80,
+ RTLIB::EXP2_FINITE_F128,
+ RTLIB::EXP2_FINITE_PPCF128, Results);
else
- Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP2_F32, RTLIB::EXP2_F64,
- RTLIB::EXP2_F80, RTLIB::EXP2_F128,
- RTLIB::EXP2_PPCF128));
+ ExpandFPLibCall(Node, RTLIB::EXP2_F32, RTLIB::EXP2_F64,
+ RTLIB::EXP2_F80, RTLIB::EXP2_F128,
+ RTLIB::EXP2_PPCF128, Results);
break;
case ISD::FTRUNC:
case ISD::STRICT_FTRUNC:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::TRUNC_F32, RTLIB::TRUNC_F64,
- RTLIB::TRUNC_F80, RTLIB::TRUNC_F128,
- RTLIB::TRUNC_PPCF128));
+ ExpandFPLibCall(Node, RTLIB::TRUNC_F32, RTLIB::TRUNC_F64,
+ RTLIB::TRUNC_F80, RTLIB::TRUNC_F128,
+ RTLIB::TRUNC_PPCF128, Results);
break;
case ISD::FFLOOR:
case ISD::STRICT_FFLOOR:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::FLOOR_F32, RTLIB::FLOOR_F64,
- RTLIB::FLOOR_F80, RTLIB::FLOOR_F128,
- RTLIB::FLOOR_PPCF128));
+ ExpandFPLibCall(Node, RTLIB::FLOOR_F32, RTLIB::FLOOR_F64,
+ RTLIB::FLOOR_F80, RTLIB::FLOOR_F128,
+ RTLIB::FLOOR_PPCF128, Results);
break;
case ISD::FCEIL:
case ISD::STRICT_FCEIL:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::CEIL_F32, RTLIB::CEIL_F64,
- RTLIB::CEIL_F80, RTLIB::CEIL_F128,
- RTLIB::CEIL_PPCF128));
+ ExpandFPLibCall(Node, RTLIB::CEIL_F32, RTLIB::CEIL_F64,
+ RTLIB::CEIL_F80, RTLIB::CEIL_F128,
+ RTLIB::CEIL_PPCF128, Results);
break;
case ISD::FRINT:
case ISD::STRICT_FRINT:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::RINT_F32, RTLIB::RINT_F64,
- RTLIB::RINT_F80, RTLIB::RINT_F128,
- RTLIB::RINT_PPCF128));
+ ExpandFPLibCall(Node, RTLIB::RINT_F32, RTLIB::RINT_F64,
+ RTLIB::RINT_F80, RTLIB::RINT_F128,
+ RTLIB::RINT_PPCF128, Results);
break;
case ISD::FNEARBYINT:
case ISD::STRICT_FNEARBYINT:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::NEARBYINT_F32,
- RTLIB::NEARBYINT_F64,
- RTLIB::NEARBYINT_F80,
- RTLIB::NEARBYINT_F128,
- RTLIB::NEARBYINT_PPCF128));
+ ExpandFPLibCall(Node, RTLIB::NEARBYINT_F32,
+ RTLIB::NEARBYINT_F64,
+ RTLIB::NEARBYINT_F80,
+ RTLIB::NEARBYINT_F128,
+ RTLIB::NEARBYINT_PPCF128, Results);
break;
case ISD::FROUND:
case ISD::STRICT_FROUND:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::ROUND_F32,
- RTLIB::ROUND_F64,
- RTLIB::ROUND_F80,
- RTLIB::ROUND_F128,
- RTLIB::ROUND_PPCF128));
+ ExpandFPLibCall(Node, RTLIB::ROUND_F32,
+ RTLIB::ROUND_F64,
+ RTLIB::ROUND_F80,
+ RTLIB::ROUND_F128,
+ RTLIB::ROUND_PPCF128, Results);
break;
case ISD::FPOWI:
- case ISD::STRICT_FPOWI:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::POWI_F32, RTLIB::POWI_F64,
- RTLIB::POWI_F80, RTLIB::POWI_F128,
- RTLIB::POWI_PPCF128));
+ case ISD::STRICT_FPOWI: {
+ RTLIB::Libcall LC;
+ switch (Node->getSimpleValueType(0).SimpleTy) {
+ default: llvm_unreachable("Unexpected request for libcall!");
+ case MVT::f32: LC = RTLIB::POWI_F32; break;
+ case MVT::f64: LC = RTLIB::POWI_F64; break;
+ case MVT::f80: LC = RTLIB::POWI_F80; break;
+ case MVT::f128: LC = RTLIB::POWI_F128; break;
+ case MVT::ppcf128: LC = RTLIB::POWI_PPCF128; break;
+ }
+ if (!TLI.getLibcallName(LC)) {
+ // Some targets don't have a powi libcall; use pow instead.
+ SDValue Exponent = DAG.getNode(ISD::SINT_TO_FP, SDLoc(Node),
+ Node->getValueType(0),
+ Node->getOperand(1));
+ Results.push_back(DAG.getNode(ISD::FPOW, SDLoc(Node),
+ Node->getValueType(0), Node->getOperand(0),
+ Exponent));
+ break;
+ }
+ ExpandFPLibCall(Node, RTLIB::POWI_F32, RTLIB::POWI_F64,
+ RTLIB::POWI_F80, RTLIB::POWI_F128,
+ RTLIB::POWI_PPCF128, Results);
break;
+ }
case ISD::FPOW:
case ISD::STRICT_FPOW:
if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_pow_finite))
- Results.push_back(ExpandFPLibCall(Node, RTLIB::POW_FINITE_F32,
- RTLIB::POW_FINITE_F64,
- RTLIB::POW_FINITE_F80,
- RTLIB::POW_FINITE_F128,
- RTLIB::POW_FINITE_PPCF128));
+ ExpandFPLibCall(Node, RTLIB::POW_FINITE_F32,
+ RTLIB::POW_FINITE_F64,
+ RTLIB::POW_FINITE_F80,
+ RTLIB::POW_FINITE_F128,
+ RTLIB::POW_FINITE_PPCF128, Results);
else
- Results.push_back(ExpandFPLibCall(Node, RTLIB::POW_F32, RTLIB::POW_F64,
- RTLIB::POW_F80, RTLIB::POW_F128,
- RTLIB::POW_PPCF128));
+ ExpandFPLibCall(Node, RTLIB::POW_F32, RTLIB::POW_F64,
+ RTLIB::POW_F80, RTLIB::POW_F128,
+ RTLIB::POW_PPCF128, Results);
break;
case ISD::LROUND:
case ISD::STRICT_LROUND:
- Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LROUND_F32,
- RTLIB::LROUND_F64, RTLIB::LROUND_F80,
- RTLIB::LROUND_F128,
- RTLIB::LROUND_PPCF128));
+ ExpandArgFPLibCall(Node, RTLIB::LROUND_F32,
+ RTLIB::LROUND_F64, RTLIB::LROUND_F80,
+ RTLIB::LROUND_F128,
+ RTLIB::LROUND_PPCF128, Results);
break;
case ISD::LLROUND:
case ISD::STRICT_LLROUND:
- Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LLROUND_F32,
- RTLIB::LLROUND_F64, RTLIB::LLROUND_F80,
- RTLIB::LLROUND_F128,
- RTLIB::LLROUND_PPCF128));
+ ExpandArgFPLibCall(Node, RTLIB::LLROUND_F32,
+ RTLIB::LLROUND_F64, RTLIB::LLROUND_F80,
+ RTLIB::LLROUND_F128,
+ RTLIB::LLROUND_PPCF128, Results);
break;
case ISD::LRINT:
case ISD::STRICT_LRINT:
- Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LRINT_F32,
- RTLIB::LRINT_F64, RTLIB::LRINT_F80,
- RTLIB::LRINT_F128,
- RTLIB::LRINT_PPCF128));
+ ExpandArgFPLibCall(Node, RTLIB::LRINT_F32,
+ RTLIB::LRINT_F64, RTLIB::LRINT_F80,
+ RTLIB::LRINT_F128,
+ RTLIB::LRINT_PPCF128, Results);
break;
case ISD::LLRINT:
case ISD::STRICT_LLRINT:
- Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LLRINT_F32,
- RTLIB::LLRINT_F64, RTLIB::LLRINT_F80,
- RTLIB::LLRINT_F128,
- RTLIB::LLRINT_PPCF128));
+ ExpandArgFPLibCall(Node, RTLIB::LLRINT_F32,
+ RTLIB::LLRINT_F64, RTLIB::LLRINT_F80,
+ RTLIB::LLRINT_F128,
+ RTLIB::LLRINT_PPCF128, Results);
break;
case ISD::FDIV:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::DIV_F32, RTLIB::DIV_F64,
- RTLIB::DIV_F80, RTLIB::DIV_F128,
- RTLIB::DIV_PPCF128));
+ case ISD::STRICT_FDIV:
+ ExpandFPLibCall(Node, RTLIB::DIV_F32, RTLIB::DIV_F64,
+ RTLIB::DIV_F80, RTLIB::DIV_F128,
+ RTLIB::DIV_PPCF128, Results);
break;
case ISD::FREM:
case ISD::STRICT_FREM:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::REM_F32, RTLIB::REM_F64,
- RTLIB::REM_F80, RTLIB::REM_F128,
- RTLIB::REM_PPCF128));
+ ExpandFPLibCall(Node, RTLIB::REM_F32, RTLIB::REM_F64,
+ RTLIB::REM_F80, RTLIB::REM_F128,
+ RTLIB::REM_PPCF128, Results);
break;
case ISD::FMA:
case ISD::STRICT_FMA:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::FMA_F32, RTLIB::FMA_F64,
- RTLIB::FMA_F80, RTLIB::FMA_F128,
- RTLIB::FMA_PPCF128));
+ ExpandFPLibCall(Node, RTLIB::FMA_F32, RTLIB::FMA_F64,
+ RTLIB::FMA_F80, RTLIB::FMA_F128,
+ RTLIB::FMA_PPCF128, Results);
break;
case ISD::FADD:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::ADD_F32, RTLIB::ADD_F64,
- RTLIB::ADD_F80, RTLIB::ADD_F128,
- RTLIB::ADD_PPCF128));
+ case ISD::STRICT_FADD:
+ ExpandFPLibCall(Node, RTLIB::ADD_F32, RTLIB::ADD_F64,
+ RTLIB::ADD_F80, RTLIB::ADD_F128,
+ RTLIB::ADD_PPCF128, Results);
break;
case ISD::FMUL:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::MUL_F32, RTLIB::MUL_F64,
- RTLIB::MUL_F80, RTLIB::MUL_F128,
- RTLIB::MUL_PPCF128));
+ case ISD::STRICT_FMUL:
+ ExpandFPLibCall(Node, RTLIB::MUL_F32, RTLIB::MUL_F64,
+ RTLIB::MUL_F80, RTLIB::MUL_F128,
+ RTLIB::MUL_PPCF128, Results);
break;
case ISD::FP16_TO_FP:
if (Node->getValueType(0) == MVT::f32) {
@@ -4044,9 +4189,10 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
break;
}
case ISD::FSUB:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::SUB_F32, RTLIB::SUB_F64,
- RTLIB::SUB_F80, RTLIB::SUB_F128,
- RTLIB::SUB_PPCF128));
+ case ISD::STRICT_FSUB:
+ ExpandFPLibCall(Node, RTLIB::SUB_F32, RTLIB::SUB_F64,
+ RTLIB::SUB_F80, RTLIB::SUB_F128,
+ RTLIB::SUB_PPCF128, Results);
break;
case ISD::SREM:
Results.push_back(ExpandIntLibCall(Node, true,
@@ -4129,6 +4275,9 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
Node->getOpcode() == ISD::INSERT_VECTOR_ELT) {
OVT = Node->getOperand(0).getSimpleValueType();
}
+ if (Node->getOpcode() == ISD::STRICT_UINT_TO_FP ||
+ Node->getOpcode() == ISD::STRICT_SINT_TO_FP)
+ OVT = Node->getOperand(1).getSimpleValueType();
if (Node->getOpcode() == ISD::BR_CC)
OVT = Node->getOperand(2).getSimpleValueType();
MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), OVT);
@@ -4177,16 +4326,16 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
break;
}
case ISD::FP_TO_UINT:
+ case ISD::STRICT_FP_TO_UINT:
case ISD::FP_TO_SINT:
- Tmp1 = PromoteLegalFP_TO_INT(Node->getOperand(0), Node->getValueType(0),
- Node->getOpcode() == ISD::FP_TO_SINT, dl);
- Results.push_back(Tmp1);
+ case ISD::STRICT_FP_TO_SINT:
+ PromoteLegalFP_TO_INT(Node, dl, Results);
break;
case ISD::UINT_TO_FP:
+ case ISD::STRICT_UINT_TO_FP:
case ISD::SINT_TO_FP:
- Tmp1 = PromoteLegalINT_TO_FP(Node->getOperand(0), Node->getValueType(0),
- Node->getOpcode() == ISD::SINT_TO_FP, dl);
- Results.push_back(Tmp1);
+ case ISD::STRICT_SINT_TO_FP:
+ PromoteLegalINT_TO_FP(Node, dl, Results);
break;
case ISD::VAARG: {
SDValue Chain = Node->getOperand(0); // Get the chain.
@@ -4358,6 +4507,21 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT,
Tmp3, DAG.getIntPtrConstant(0, dl)));
break;
+ case ISD::STRICT_FREM:
+ case ISD::STRICT_FPOW:
+ Tmp1 = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {NVT, MVT::Other},
+ {Node->getOperand(0), Node->getOperand(1)});
+ Tmp2 = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {NVT, MVT::Other},
+ {Node->getOperand(0), Node->getOperand(2)});
+ Tmp3 = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Tmp1.getValue(1),
+ Tmp2.getValue(1));
+ Tmp1 = DAG.getNode(Node->getOpcode(), dl, {NVT, MVT::Other},
+ {Tmp3, Tmp1, Tmp2});
+ Tmp1 = DAG.getNode(ISD::STRICT_FP_ROUND, dl, {OVT, MVT::Other},
+ {Tmp1.getValue(1), Tmp1, DAG.getIntPtrConstant(0, dl)});
+ Results.push_back(Tmp1);
+ Results.push_back(Tmp1.getValue(1));
+ break;
case ISD::FMA:
Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(1));
@@ -4404,6 +4568,22 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT,
Tmp2, DAG.getIntPtrConstant(0, dl)));
break;
+ case ISD::STRICT_FFLOOR:
+ case ISD::STRICT_FCEIL:
+ case ISD::STRICT_FSIN:
+ case ISD::STRICT_FCOS:
+ case ISD::STRICT_FLOG:
+ case ISD::STRICT_FLOG10:
+ case ISD::STRICT_FEXP:
+ Tmp1 = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {NVT, MVT::Other},
+ {Node->getOperand(0), Node->getOperand(1)});
+ Tmp2 = DAG.getNode(Node->getOpcode(), dl, {NVT, MVT::Other},
+ {Tmp1.getValue(1), Tmp1});
+ Tmp3 = DAG.getNode(ISD::STRICT_FP_ROUND, dl, {OVT, MVT::Other},
+ {Tmp2.getValue(1), Tmp2, DAG.getIntPtrConstant(0, dl)});
+ Results.push_back(Tmp3);
+ Results.push_back(Tmp3.getValue(1));
+ break;
case ISD::BUILD_VECTOR: {
MVT EltVT = OVT.getVectorElementType();
MVT NewEltVT = NVT.getVectorElementType();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 72d052473f11..f191160dee4f 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -65,39 +65,68 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
case ISD::EXTRACT_VECTOR_ELT:
R = SoftenFloatRes_EXTRACT_VECTOR_ELT(N, ResNo); break;
case ISD::FABS: R = SoftenFloatRes_FABS(N); break;
+ case ISD::STRICT_FMINNUM:
case ISD::FMINNUM: R = SoftenFloatRes_FMINNUM(N); break;
+ case ISD::STRICT_FMAXNUM:
case ISD::FMAXNUM: R = SoftenFloatRes_FMAXNUM(N); break;
+ case ISD::STRICT_FADD:
case ISD::FADD: R = SoftenFloatRes_FADD(N); break;
+ case ISD::FCBRT: R = SoftenFloatRes_FCBRT(N); break;
+ case ISD::STRICT_FCEIL:
case ISD::FCEIL: R = SoftenFloatRes_FCEIL(N); break;
case ISD::FCOPYSIGN: R = SoftenFloatRes_FCOPYSIGN(N); break;
+ case ISD::STRICT_FCOS:
case ISD::FCOS: R = SoftenFloatRes_FCOS(N); break;
+ case ISD::STRICT_FDIV:
case ISD::FDIV: R = SoftenFloatRes_FDIV(N); break;
+ case ISD::STRICT_FEXP:
case ISD::FEXP: R = SoftenFloatRes_FEXP(N); break;
+ case ISD::STRICT_FEXP2:
case ISD::FEXP2: R = SoftenFloatRes_FEXP2(N); break;
+ case ISD::STRICT_FFLOOR:
case ISD::FFLOOR: R = SoftenFloatRes_FFLOOR(N); break;
+ case ISD::STRICT_FLOG:
case ISD::FLOG: R = SoftenFloatRes_FLOG(N); break;
+ case ISD::STRICT_FLOG2:
case ISD::FLOG2: R = SoftenFloatRes_FLOG2(N); break;
+ case ISD::STRICT_FLOG10:
case ISD::FLOG10: R = SoftenFloatRes_FLOG10(N); break;
+ case ISD::STRICT_FMA:
case ISD::FMA: R = SoftenFloatRes_FMA(N); break;
+ case ISD::STRICT_FMUL:
case ISD::FMUL: R = SoftenFloatRes_FMUL(N); break;
+ case ISD::STRICT_FNEARBYINT:
case ISD::FNEARBYINT: R = SoftenFloatRes_FNEARBYINT(N); break;
case ISD::FNEG: R = SoftenFloatRes_FNEG(N); break;
+ case ISD::STRICT_FP_EXTEND:
case ISD::FP_EXTEND: R = SoftenFloatRes_FP_EXTEND(N); break;
+ case ISD::STRICT_FP_ROUND:
case ISD::FP_ROUND: R = SoftenFloatRes_FP_ROUND(N); break;
case ISD::FP16_TO_FP: R = SoftenFloatRes_FP16_TO_FP(N); break;
+ case ISD::STRICT_FPOW:
case ISD::FPOW: R = SoftenFloatRes_FPOW(N); break;
+ case ISD::STRICT_FPOWI:
case ISD::FPOWI: R = SoftenFloatRes_FPOWI(N); break;
+ case ISD::STRICT_FREM:
case ISD::FREM: R = SoftenFloatRes_FREM(N); break;
+ case ISD::STRICT_FRINT:
case ISD::FRINT: R = SoftenFloatRes_FRINT(N); break;
+ case ISD::STRICT_FROUND:
case ISD::FROUND: R = SoftenFloatRes_FROUND(N); break;
+ case ISD::STRICT_FSIN:
case ISD::FSIN: R = SoftenFloatRes_FSIN(N); break;
+ case ISD::STRICT_FSQRT:
case ISD::FSQRT: R = SoftenFloatRes_FSQRT(N); break;
+ case ISD::STRICT_FSUB:
case ISD::FSUB: R = SoftenFloatRes_FSUB(N); break;
+ case ISD::STRICT_FTRUNC:
case ISD::FTRUNC: R = SoftenFloatRes_FTRUNC(N); break;
case ISD::LOAD: R = SoftenFloatRes_LOAD(N); break;
case ISD::ATOMIC_SWAP: R = BitcastToInt_ATOMIC_SWAP(N); break;
case ISD::SELECT: R = SoftenFloatRes_SELECT(N); break;
case ISD::SELECT_CC: R = SoftenFloatRes_SELECT_CC(N); break;
+ case ISD::STRICT_SINT_TO_FP:
+ case ISD::STRICT_UINT_TO_FP:
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP: R = SoftenFloatRes_XINT_TO_FP(N); break;
case ISD::UNDEF: R = SoftenFloatRes_UNDEF(N); break;
@@ -111,6 +140,46 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
}
}
+SDValue DAGTypeLegalizer::SoftenFloatRes_Unary(SDNode *N, RTLIB::Libcall LC) {
+ bool IsStrict = N->isStrictFPOpcode();
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ unsigned Offset = IsStrict ? 1 : 0;
+ assert(N->getNumOperands() == (1 + Offset) &&
+ "Unexpected number of operands!");
+ SDValue Op = GetSoftenedFloat(N->getOperand(0 + Offset));
+ SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpVT = N->getOperand(0 + Offset).getValueType();
+ CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true);
+ std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, NVT, Op,
+ CallOptions, SDLoc(N),
+ Chain);
+ if (IsStrict)
+ ReplaceValueWith(SDValue(N, 1), Tmp.second);
+ return Tmp.first;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_Binary(SDNode *N, RTLIB::Libcall LC) {
+ bool IsStrict = N->isStrictFPOpcode();
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ unsigned Offset = IsStrict ? 1 : 0;
+ assert(N->getNumOperands() == (2 + Offset) &&
+ "Unexpected number of operands!");
+ SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0 + Offset)),
+ GetSoftenedFloat(N->getOperand(1 + Offset)) };
+ SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[2] = { N->getOperand(0 + Offset).getValueType(),
+ N->getOperand(1 + Offset).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
+ std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, NVT, Ops,
+ CallOptions, SDLoc(N),
+ Chain);
+ if (IsStrict)
+ ReplaceValueWith(SDValue(N, 1), Tmp.second);
+ return Tmp.first;
+}
+
SDValue DAGTypeLegalizer::SoftenFloatRes_BITCAST(SDNode *N) {
return BitConvertToInteger(N->getOperand(0));
}
@@ -174,69 +243,48 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N) {
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FMINNUM(SDNode *N) {
- EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
- GetSoftenedFloat(N->getOperand(1)) };
- TargetLowering::MakeLibCallOptions CallOptions;
- EVT OpsVT[2] = { N->getOperand(0).getValueType(),
- N->getOperand(1).getValueType() };
- CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
- return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
- RTLIB::FMIN_F32,
- RTLIB::FMIN_F64,
- RTLIB::FMIN_F80,
- RTLIB::FMIN_F128,
- RTLIB::FMIN_PPCF128),
- NVT, Ops, CallOptions, SDLoc(N)).first;
+ return SoftenFloatRes_Binary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::FMIN_F32,
+ RTLIB::FMIN_F64,
+ RTLIB::FMIN_F80,
+ RTLIB::FMIN_F128,
+ RTLIB::FMIN_PPCF128));
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FMAXNUM(SDNode *N) {
- EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
- GetSoftenedFloat(N->getOperand(1)) };
- TargetLowering::MakeLibCallOptions CallOptions;
- EVT OpsVT[2] = { N->getOperand(0).getValueType(),
- N->getOperand(1).getValueType() };
- CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
- return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
- RTLIB::FMAX_F32,
- RTLIB::FMAX_F64,
- RTLIB::FMAX_F80,
- RTLIB::FMAX_F128,
- RTLIB::FMAX_PPCF128),
- NVT, Ops, CallOptions, SDLoc(N)).first;
+ return SoftenFloatRes_Binary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::FMAX_F32,
+ RTLIB::FMAX_F64,
+ RTLIB::FMAX_F80,
+ RTLIB::FMAX_F128,
+ RTLIB::FMAX_PPCF128));
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FADD(SDNode *N) {
- EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
- GetSoftenedFloat(N->getOperand(1)) };
- TargetLowering::MakeLibCallOptions CallOptions;
- EVT OpsVT[2] = { N->getOperand(0).getValueType(),
- N->getOperand(1).getValueType() };
- CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
- return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
- RTLIB::ADD_F32,
- RTLIB::ADD_F64,
- RTLIB::ADD_F80,
- RTLIB::ADD_F128,
- RTLIB::ADD_PPCF128),
- NVT, Ops, CallOptions, SDLoc(N)).first;
+ return SoftenFloatRes_Binary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::ADD_F32,
+ RTLIB::ADD_F64,
+ RTLIB::ADD_F80,
+ RTLIB::ADD_F128,
+ RTLIB::ADD_PPCF128));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FCBRT(SDNode *N) {
+ return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::CBRT_F32,
+ RTLIB::CBRT_F64,
+ RTLIB::CBRT_F80,
+ RTLIB::CBRT_F128,
+ RTLIB::CBRT_PPCF128));
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FCEIL(SDNode *N) {
- EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- SDValue Op = GetSoftenedFloat(N->getOperand(0));
- TargetLowering::MakeLibCallOptions CallOptions;
- EVT OpsVT[1] = { N->getOperand(0).getValueType() };
- CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
- return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
- RTLIB::CEIL_F32,
- RTLIB::CEIL_F64,
- RTLIB::CEIL_F80,
- RTLIB::CEIL_F128,
- RTLIB::CEIL_PPCF128),
- NVT, Op, CallOptions, SDLoc(N)).first;
+ return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::CEIL_F32,
+ RTLIB::CEIL_F64,
+ RTLIB::CEIL_F80,
+ RTLIB::CEIL_F128,
+ RTLIB::CEIL_PPCF128));
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N) {
@@ -288,231 +336,170 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N) {
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FCOS(SDNode *N) {
- EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- SDValue Op = GetSoftenedFloat(N->getOperand(0));
- TargetLowering::MakeLibCallOptions CallOptions;
- EVT OpsVT[1] = { N->getOperand(0).getValueType() };
- CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
- return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
- RTLIB::COS_F32,
- RTLIB::COS_F64,
- RTLIB::COS_F80,
- RTLIB::COS_F128,
- RTLIB::COS_PPCF128),
- NVT, Op, CallOptions, SDLoc(N)).first;
+ return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::COS_F32,
+ RTLIB::COS_F64,
+ RTLIB::COS_F80,
+ RTLIB::COS_F128,
+ RTLIB::COS_PPCF128));
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FDIV(SDNode *N) {
- EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
- GetSoftenedFloat(N->getOperand(1)) };
- TargetLowering::MakeLibCallOptions CallOptions;
- EVT OpsVT[2] = { N->getOperand(0).getValueType(),
- N->getOperand(1).getValueType() };
- CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
- return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
- RTLIB::DIV_F32,
- RTLIB::DIV_F64,
- RTLIB::DIV_F80,
- RTLIB::DIV_F128,
- RTLIB::DIV_PPCF128),
- NVT, Ops, CallOptions, SDLoc(N)).first;
+ return SoftenFloatRes_Binary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::DIV_F32,
+ RTLIB::DIV_F64,
+ RTLIB::DIV_F80,
+ RTLIB::DIV_F128,
+ RTLIB::DIV_PPCF128));
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP(SDNode *N) {
- EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- SDValue Op = GetSoftenedFloat(N->getOperand(0));
- TargetLowering::MakeLibCallOptions CallOptions;
- EVT OpsVT[1] = { N->getOperand(0).getValueType() };
- CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
- return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
- RTLIB::EXP_F32,
- RTLIB::EXP_F64,
- RTLIB::EXP_F80,
- RTLIB::EXP_F128,
- RTLIB::EXP_PPCF128),
- NVT, Op, CallOptions, SDLoc(N)).first;
+ return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::EXP_F32,
+ RTLIB::EXP_F64,
+ RTLIB::EXP_F80,
+ RTLIB::EXP_F128,
+ RTLIB::EXP_PPCF128));
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP2(SDNode *N) {
- EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- SDValue Op = GetSoftenedFloat(N->getOperand(0));
- TargetLowering::MakeLibCallOptions CallOptions;
- EVT OpsVT[1] = { N->getOperand(0).getValueType() };
- CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
- return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
- RTLIB::EXP2_F32,
- RTLIB::EXP2_F64,
- RTLIB::EXP2_F80,
- RTLIB::EXP2_F128,
- RTLIB::EXP2_PPCF128),
- NVT, Op, CallOptions, SDLoc(N)).first;
+ return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::EXP2_F32,
+ RTLIB::EXP2_F64,
+ RTLIB::EXP2_F80,
+ RTLIB::EXP2_F128,
+ RTLIB::EXP2_PPCF128));
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FFLOOR(SDNode *N) {
- EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- SDValue Op = GetSoftenedFloat(N->getOperand(0));
- TargetLowering::MakeLibCallOptions CallOptions;
- EVT OpsVT[1] = { N->getOperand(0).getValueType() };
- CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
- return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
- RTLIB::FLOOR_F32,
- RTLIB::FLOOR_F64,
- RTLIB::FLOOR_F80,
- RTLIB::FLOOR_F128,
- RTLIB::FLOOR_PPCF128),
- NVT, Op, CallOptions, SDLoc(N)).first;
+ return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::FLOOR_F32,
+ RTLIB::FLOOR_F64,
+ RTLIB::FLOOR_F80,
+ RTLIB::FLOOR_F128,
+ RTLIB::FLOOR_PPCF128));
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG(SDNode *N) {
- EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- SDValue Op = GetSoftenedFloat(N->getOperand(0));
- TargetLowering::MakeLibCallOptions CallOptions;
- EVT OpsVT[1] = { N->getOperand(0).getValueType() };
- CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
- return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
- RTLIB::LOG_F32,
- RTLIB::LOG_F64,
- RTLIB::LOG_F80,
- RTLIB::LOG_F128,
- RTLIB::LOG_PPCF128),
- NVT, Op, CallOptions, SDLoc(N)).first;
+ return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::LOG_F32,
+ RTLIB::LOG_F64,
+ RTLIB::LOG_F80,
+ RTLIB::LOG_F128,
+ RTLIB::LOG_PPCF128));
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG2(SDNode *N) {
- EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- SDValue Op = GetSoftenedFloat(N->getOperand(0));
- TargetLowering::MakeLibCallOptions CallOptions;
- EVT OpsVT[1] = { N->getOperand(0).getValueType() };
- CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
- return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
- RTLIB::LOG2_F32,
- RTLIB::LOG2_F64,
- RTLIB::LOG2_F80,
- RTLIB::LOG2_F128,
- RTLIB::LOG2_PPCF128),
- NVT, Op, CallOptions, SDLoc(N)).first;
+ return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::LOG2_F32,
+ RTLIB::LOG2_F64,
+ RTLIB::LOG2_F80,
+ RTLIB::LOG2_F128,
+ RTLIB::LOG2_PPCF128));
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG10(SDNode *N) {
- EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- SDValue Op = GetSoftenedFloat(N->getOperand(0));
- TargetLowering::MakeLibCallOptions CallOptions;
- EVT OpsVT[1] = { N->getOperand(0).getValueType() };
- CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
- return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
- RTLIB::LOG10_F32,
- RTLIB::LOG10_F64,
- RTLIB::LOG10_F80,
- RTLIB::LOG10_F128,
- RTLIB::LOG10_PPCF128),
- NVT, Op, CallOptions, SDLoc(N)).first;
+ return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::LOG10_F32,
+ RTLIB::LOG10_F64,
+ RTLIB::LOG10_F80,
+ RTLIB::LOG10_F128,
+ RTLIB::LOG10_PPCF128));
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FMA(SDNode *N) {
+ bool IsStrict = N->isStrictFPOpcode();
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- SDValue Ops[3] = { GetSoftenedFloat(N->getOperand(0)),
- GetSoftenedFloat(N->getOperand(1)),
- GetSoftenedFloat(N->getOperand(2)) };
+ unsigned Offset = IsStrict ? 1 : 0;
+ SDValue Ops[3] = { GetSoftenedFloat(N->getOperand(0 + Offset)),
+ GetSoftenedFloat(N->getOperand(1 + Offset)),
+ GetSoftenedFloat(N->getOperand(2 + Offset)) };
+ SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
TargetLowering::MakeLibCallOptions CallOptions;
- EVT OpsVT[3] = { N->getOperand(0).getValueType(),
- N->getOperand(1).getValueType(),
- N->getOperand(2).getValueType() };
+ EVT OpsVT[3] = { N->getOperand(0 + Offset).getValueType(),
+ N->getOperand(1 + Offset).getValueType(),
+ N->getOperand(2 + Offset).getValueType() };
CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
- return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
- RTLIB::FMA_F32,
- RTLIB::FMA_F64,
- RTLIB::FMA_F80,
- RTLIB::FMA_F128,
- RTLIB::FMA_PPCF128),
- NVT, Ops, CallOptions, SDLoc(N)).first;
+ std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG,
+ GetFPLibCall(N->getValueType(0),
+ RTLIB::FMA_F32,
+ RTLIB::FMA_F64,
+ RTLIB::FMA_F80,
+ RTLIB::FMA_F128,
+ RTLIB::FMA_PPCF128),
+ NVT, Ops, CallOptions, SDLoc(N), Chain);
+ if (IsStrict)
+ ReplaceValueWith(SDValue(N, 1), Tmp.second);
+ return Tmp.first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FMUL(SDNode *N) {
- EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
- GetSoftenedFloat(N->getOperand(1)) };
- TargetLowering::MakeLibCallOptions CallOptions;
- EVT OpsVT[2] = { N->getOperand(0).getValueType(),
- N->getOperand(1).getValueType() };
- CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
- return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
- RTLIB::MUL_F32,
- RTLIB::MUL_F64,
- RTLIB::MUL_F80,
- RTLIB::MUL_F128,
- RTLIB::MUL_PPCF128),
- NVT, Ops, CallOptions, SDLoc(N)).first;
+ return SoftenFloatRes_Binary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::MUL_F32,
+ RTLIB::MUL_F64,
+ RTLIB::MUL_F80,
+ RTLIB::MUL_F128,
+ RTLIB::MUL_PPCF128));
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FNEARBYINT(SDNode *N) {
- EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- SDValue Op = GetSoftenedFloat(N->getOperand(0));
- TargetLowering::MakeLibCallOptions CallOptions;
- EVT OpsVT[1] = { N->getOperand(0).getValueType() };
- CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
- return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
- RTLIB::NEARBYINT_F32,
- RTLIB::NEARBYINT_F64,
- RTLIB::NEARBYINT_F80,
- RTLIB::NEARBYINT_F128,
- RTLIB::NEARBYINT_PPCF128),
- NVT, Op, CallOptions, SDLoc(N)).first;
+ return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::NEARBYINT_F32,
+ RTLIB::NEARBYINT_F64,
+ RTLIB::NEARBYINT_F80,
+ RTLIB::NEARBYINT_F128,
+ RTLIB::NEARBYINT_PPCF128));
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDLoc dl(N);
- EVT FloatVT = N->getValueType(0);
- if (FloatVT == MVT::f32 || FloatVT == MVT::f64 || FloatVT == MVT::f128) {
- // Expand Y = FNEG(X) -> Y = X ^ sign mask
- APInt SignMask = APInt::getSignMask(NVT.getSizeInBits());
- return DAG.getNode(ISD::XOR, dl, NVT, GetSoftenedFloat(N->getOperand(0)),
- DAG.getConstant(SignMask, dl, NVT));
- }
-
- // Expand Y = FNEG(X) -> Y = SUB -0.0, X
- SDValue Ops[2] = { DAG.getConstantFP(-0.0, dl, N->getValueType(0)),
- GetSoftenedFloat(N->getOperand(0)) };
- TargetLowering::MakeLibCallOptions CallOptions;
- return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
- RTLIB::SUB_F32,
- RTLIB::SUB_F64,
- RTLIB::SUB_F80,
- RTLIB::SUB_F128,
- RTLIB::SUB_PPCF128),
- NVT, Ops, CallOptions, dl).first;
+ // Expand Y = FNEG(X) -> Y = X ^ sign mask
+ APInt SignMask = APInt::getSignMask(NVT.getSizeInBits());
+ return DAG.getNode(ISD::XOR, dl, NVT, GetSoftenedFloat(N->getOperand(0)),
+ DAG.getConstant(SignMask, dl, NVT));
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) {
+ bool IsStrict = N->isStrictFPOpcode();
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- SDValue Op = N->getOperand(0);
+ SDValue Op = N->getOperand(IsStrict ? 1 : 0);
- // There's only a libcall for f16 -> f32, so proceed in two stages. Also, it's
- // entirely possible for both f16 and f32 to be legal, so use the fully
- // hard-float FP_EXTEND rather than FP16_TO_FP.
- if (Op.getValueType() == MVT::f16 && N->getValueType(0) != MVT::f32) {
- Op = DAG.getNode(ISD::FP_EXTEND, SDLoc(N), MVT::f32, Op);
- if (getTypeAction(MVT::f32) == TargetLowering::TypeSoftenFloat)
- AddToWorklist(Op.getNode());
- }
+ SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteFloat) {
Op = GetPromotedFloat(Op);
// If the promotion did the FP_EXTEND to the destination type for us,
// there's nothing left to do here.
- if (Op.getValueType() == N->getValueType(0)) {
+ if (Op.getValueType() == N->getValueType(0))
return BitConvertToInteger(Op);
+ }
+
+ // There's only a libcall for f16 -> f32, so proceed in two stages. Also, it's
+ // entirely possible for both f16 and f32 to be legal, so use the fully
+ // hard-float FP_EXTEND rather than FP16_TO_FP.
+ if (Op.getValueType() == MVT::f16 && N->getValueType(0) != MVT::f32) {
+ if (IsStrict) {
+ Op = DAG.getNode(ISD::STRICT_FP_EXTEND, SDLoc(N),
+ { MVT::f32, MVT::Other }, { Chain, Op });
+ Chain = Op.getValue(1);
+ } else {
+ Op = DAG.getNode(ISD::FP_EXTEND, SDLoc(N), MVT::f32, Op);
}
}
RTLIB::Libcall LC = RTLIB::getFPEXT(Op.getValueType(), N->getValueType(0));
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!");
TargetLowering::MakeLibCallOptions CallOptions;
- EVT OpsVT[1] = { N->getOperand(0).getValueType() };
- CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
- return TLI.makeLibCall(DAG, LC, NVT, Op, CallOptions, SDLoc(N)).first;
+ EVT OpVT = N->getOperand(IsStrict ? 1 : 0).getValueType();
+ CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true);
+ std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, NVT, Op,
+ CallOptions, SDLoc(N),
+ Chain);
+ if (IsStrict)
+ ReplaceValueWith(SDValue(N, 1), Tmp.second);
+ return Tmp.first;
}
// FIXME: Should we just use 'normal' FP_EXTEND / FP_TRUNC instead of special
@@ -535,167 +522,127 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP16_TO_FP(SDNode *N) {
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) {
+ bool IsStrict = N->isStrictFPOpcode();
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- SDValue Op = N->getOperand(0);
- if (N->getValueType(0) == MVT::f16) {
- // Semi-soften first, to FP_TO_FP16, so that targets which support f16 as a
- // storage-only type get a chance to select things.
- return DAG.getNode(ISD::FP_TO_FP16, SDLoc(N), NVT, Op);
- }
-
+ SDValue Op = N->getOperand(IsStrict ? 1 : 0);
+ SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
RTLIB::Libcall LC = RTLIB::getFPROUND(Op.getValueType(), N->getValueType(0));
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND!");
TargetLowering::MakeLibCallOptions CallOptions;
- EVT OpsVT[1] = { N->getOperand(0).getValueType() };
- CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
- return TLI.makeLibCall(DAG, LC, NVT, Op, CallOptions, SDLoc(N)).first;
+ EVT OpVT = N->getOperand(IsStrict ? 1 : 0).getValueType();
+ CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true);
+ std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, NVT, Op,
+ CallOptions, SDLoc(N),
+ Chain);
+ if (IsStrict)
+ ReplaceValueWith(SDValue(N, 1), Tmp.second);
+ return Tmp.first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FPOW(SDNode *N) {
- EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
- GetSoftenedFloat(N->getOperand(1)) };
- TargetLowering::MakeLibCallOptions CallOptions;
- EVT OpsVT[2] = { N->getOperand(0).getValueType(),
- N->getOperand(1).getValueType() };
- CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
- return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
- RTLIB::POW_F32,
- RTLIB::POW_F64,
- RTLIB::POW_F80,
- RTLIB::POW_F128,
- RTLIB::POW_PPCF128),
- NVT, Ops, CallOptions, SDLoc(N)).first;
+ return SoftenFloatRes_Binary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::POW_F32,
+ RTLIB::POW_F64,
+ RTLIB::POW_F80,
+ RTLIB::POW_F128,
+ RTLIB::POW_PPCF128));
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) {
- assert(N->getOperand(1).getValueType() == MVT::i32 &&
+ bool IsStrict = N->isStrictFPOpcode();
+ unsigned Offset = IsStrict ? 1 : 0;
+ assert(N->getOperand(1 + Offset).getValueType() == MVT::i32 &&
"Unsupported power type!");
+ RTLIB::Libcall LC = GetFPLibCall(N->getValueType(0),
+ RTLIB::POWI_F32,
+ RTLIB::POWI_F64,
+ RTLIB::POWI_F80,
+ RTLIB::POWI_F128,
+ RTLIB::POWI_PPCF128);
+ if (!TLI.getLibcallName(LC)) {
+ // Some targets don't have a powi libcall; use pow instead.
+ // FIXME: Implement this if some target needs it.
+ DAG.getContext()->emitError("Don't know how to soften fpowi to fpow");
+ return DAG.getUNDEF(N->getValueType(0));
+ }
+
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), N->getOperand(1) };
+ SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0 + Offset)),
+ N->getOperand(1 + Offset) };
+ SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
TargetLowering::MakeLibCallOptions CallOptions;
- EVT OpsVT[2] = { N->getOperand(0).getValueType(),
- N->getOperand(1).getValueType() };
+ EVT OpsVT[2] = { N->getOperand(0 + Offset).getValueType(),
+ N->getOperand(1 + Offset).getValueType() };
CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
- return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
- RTLIB::POWI_F32,
- RTLIB::POWI_F64,
- RTLIB::POWI_F80,
- RTLIB::POWI_F128,
- RTLIB::POWI_PPCF128),
- NVT, Ops, CallOptions, SDLoc(N)).first;
+ std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, NVT, Ops,
+ CallOptions, SDLoc(N),
+ Chain);
+ if (IsStrict)
+ ReplaceValueWith(SDValue(N, 1), Tmp.second);
+ return Tmp.first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FREM(SDNode *N) {
- EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
- GetSoftenedFloat(N->getOperand(1)) };
- TargetLowering::MakeLibCallOptions CallOptions;
- EVT OpsVT[2] = { N->getOperand(0).getValueType(),
- N->getOperand(1).getValueType() };
- CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
- return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
- RTLIB::REM_F32,
- RTLIB::REM_F64,
- RTLIB::REM_F80,
- RTLIB::REM_F128,
- RTLIB::REM_PPCF128),
- NVT, Ops, CallOptions, SDLoc(N)).first;
+ return SoftenFloatRes_Binary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::REM_F32,
+ RTLIB::REM_F64,
+ RTLIB::REM_F80,
+ RTLIB::REM_F128,
+ RTLIB::REM_PPCF128));
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FRINT(SDNode *N) {
- EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- SDValue Op = GetSoftenedFloat(N->getOperand(0));
- TargetLowering::MakeLibCallOptions CallOptions;
- EVT OpsVT[1] = { N->getOperand(0).getValueType() };
- CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
- return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
- RTLIB::RINT_F32,
- RTLIB::RINT_F64,
- RTLIB::RINT_F80,
- RTLIB::RINT_F128,
- RTLIB::RINT_PPCF128),
- NVT, Op, CallOptions, SDLoc(N)).first;
+ return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::RINT_F32,
+ RTLIB::RINT_F64,
+ RTLIB::RINT_F80,
+ RTLIB::RINT_F128,
+ RTLIB::RINT_PPCF128));
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FROUND(SDNode *N) {
- EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- SDValue Op = GetSoftenedFloat(N->getOperand(0));
- TargetLowering::MakeLibCallOptions CallOptions;
- EVT OpsVT[1] = { N->getOperand(0).getValueType() };
- CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
- return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
- RTLIB::ROUND_F32,
- RTLIB::ROUND_F64,
- RTLIB::ROUND_F80,
- RTLIB::ROUND_F128,
- RTLIB::ROUND_PPCF128),
- NVT, Op, CallOptions, SDLoc(N)).first;
+ return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::ROUND_F32,
+ RTLIB::ROUND_F64,
+ RTLIB::ROUND_F80,
+ RTLIB::ROUND_F128,
+ RTLIB::ROUND_PPCF128));
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FSIN(SDNode *N) {
- EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- SDValue Op = GetSoftenedFloat(N->getOperand(0));
- TargetLowering::MakeLibCallOptions CallOptions;
- EVT OpsVT[1] = { N->getOperand(0).getValueType() };
- CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
- return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
- RTLIB::SIN_F32,
- RTLIB::SIN_F64,
- RTLIB::SIN_F80,
- RTLIB::SIN_F128,
- RTLIB::SIN_PPCF128),
- NVT, Op, CallOptions, SDLoc(N)).first;
+ return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::SIN_F32,
+ RTLIB::SIN_F64,
+ RTLIB::SIN_F80,
+ RTLIB::SIN_F128,
+ RTLIB::SIN_PPCF128));
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FSQRT(SDNode *N) {
- EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- SDValue Op = GetSoftenedFloat(N->getOperand(0));
- TargetLowering::MakeLibCallOptions CallOptions;
- EVT OpsVT[1] = { N->getOperand(0).getValueType() };
- CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
- return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
- RTLIB::SQRT_F32,
- RTLIB::SQRT_F64,
- RTLIB::SQRT_F80,
- RTLIB::SQRT_F128,
- RTLIB::SQRT_PPCF128),
- NVT, Op, CallOptions, SDLoc(N)).first;
+ return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::SQRT_F32,
+ RTLIB::SQRT_F64,
+ RTLIB::SQRT_F80,
+ RTLIB::SQRT_F128,
+ RTLIB::SQRT_PPCF128));
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) {
- EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
- GetSoftenedFloat(N->getOperand(1)) };
- TargetLowering::MakeLibCallOptions CallOptions;
- EVT OpsVT[2] = { N->getOperand(0).getValueType(),
- N->getOperand(1).getValueType() };
- CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
- return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
- RTLIB::SUB_F32,
- RTLIB::SUB_F64,
- RTLIB::SUB_F80,
- RTLIB::SUB_F128,
- RTLIB::SUB_PPCF128),
- NVT, Ops, CallOptions, SDLoc(N)).first;
+ return SoftenFloatRes_Binary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::SUB_F32,
+ RTLIB::SUB_F64,
+ RTLIB::SUB_F80,
+ RTLIB::SUB_F128,
+ RTLIB::SUB_PPCF128));
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) {
- EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- if (N->getValueType(0) == MVT::f16)
- return DAG.getNode(ISD::FP_TO_FP16, SDLoc(N), NVT, N->getOperand(0));
-
- SDValue Op = GetSoftenedFloat(N->getOperand(0));
- TargetLowering::MakeLibCallOptions CallOptions;
- EVT OpsVT[1] = { N->getOperand(0).getValueType() };
- CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
- return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
- RTLIB::TRUNC_F32,
- RTLIB::TRUNC_F64,
- RTLIB::TRUNC_F80,
- RTLIB::TRUNC_F128,
- RTLIB::TRUNC_PPCF128),
- NVT, Op, CallOptions, SDLoc(N)).first;
+ return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::TRUNC_F32,
+ RTLIB::TRUNC_F64,
+ RTLIB::TRUNC_F80,
+ RTLIB::TRUNC_F128,
+ RTLIB::TRUNC_PPCF128));
}
SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) {
@@ -715,8 +662,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) {
L->getAAInfo());
// Legalized the chain result - switch anything that used the old chain to
// use the new one.
- if (N != NewL.getValue(1).getNode())
- ReplaceValueWith(SDValue(N, 1), NewL.getValue(1));
+ ReplaceValueWith(SDValue(N, 1), NewL.getValue(1));
return NewL;
}
@@ -771,8 +717,10 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_VAARG(SDNode *N) {
}
SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) {
- bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
- EVT SVT = N->getOperand(0).getValueType();
+ bool IsStrict = N->isStrictFPOpcode();
+ bool Signed = N->getOpcode() == ISD::SINT_TO_FP ||
+ N->getOpcode() == ISD::STRICT_SINT_TO_FP;
+ EVT SVT = N->getOperand(IsStrict ? 1 : 0).getValueType();
EVT RVT = N->getValueType(0);
EVT NVT = EVT();
SDLoc dl(N);
@@ -790,16 +738,20 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) {
}
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XINT_TO_FP!");
+ SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
// Sign/zero extend the argument if the libcall takes a larger type.
SDValue Op = DAG.getNode(Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl,
- NVT, N->getOperand(0));
+ NVT, N->getOperand(IsStrict ? 1 : 0));
TargetLowering::MakeLibCallOptions CallOptions;
CallOptions.setSExt(Signed);
- EVT OpsVT[1] = { N->getOperand(0).getValueType() };
- CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
- return TLI.makeLibCall(DAG, LC,
- TLI.getTypeToTransformTo(*DAG.getContext(), RVT),
- Op, CallOptions, dl).first;
+ CallOptions.setTypeListBeforeSoften(SVT, RVT, true);
+ std::pair<SDValue, SDValue> Tmp =
+ TLI.makeLibCall(DAG, LC, TLI.getTypeToTransformTo(*DAG.getContext(), RVT),
+ Op, CallOptions, dl, Chain);
+
+ if (IsStrict)
+ ReplaceValueWith(SDValue(N, 1), Tmp.second);
+ return Tmp.first;
}
@@ -822,18 +774,27 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {
case ISD::BITCAST: Res = SoftenFloatOp_BITCAST(N); break;
case ISD::BR_CC: Res = SoftenFloatOp_BR_CC(N); break;
- case ISD::FP_EXTEND: Res = SoftenFloatOp_FP_EXTEND(N); break;
case ISD::FP_TO_FP16: // Same as FP_ROUND for softening purposes
+ case ISD::STRICT_FP_ROUND:
case ISD::FP_ROUND: Res = SoftenFloatOp_FP_ROUND(N); break;
+ case ISD::STRICT_FP_TO_SINT:
+ case ISD::STRICT_FP_TO_UINT:
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT: Res = SoftenFloatOp_FP_TO_XINT(N); break;
+ case ISD::STRICT_LROUND:
case ISD::LROUND: Res = SoftenFloatOp_LROUND(N); break;
+ case ISD::STRICT_LLROUND:
case ISD::LLROUND: Res = SoftenFloatOp_LLROUND(N); break;
+ case ISD::STRICT_LRINT:
case ISD::LRINT: Res = SoftenFloatOp_LRINT(N); break;
+ case ISD::STRICT_LLRINT:
case ISD::LLRINT: Res = SoftenFloatOp_LLRINT(N); break;
case ISD::SELECT_CC: Res = SoftenFloatOp_SELECT_CC(N); break;
+ case ISD::STRICT_FSETCC:
+ case ISD::STRICT_FSETCCS:
case ISD::SETCC: Res = SoftenFloatOp_SETCC(N); break;
case ISD::STORE: Res = SoftenFloatOp_STORE(N, OpNo); break;
+ case ISD::FCOPYSIGN: Res = SoftenFloatOp_FCOPYSIGN(N); break;
}
// If the result is null, the sub-method took care of registering results etc.
@@ -845,7 +806,7 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {
return true;
assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
- "Invalid operand promotion");
+ "Invalid operand softening");
ReplaceValueWith(SDValue(N, 0), Res);
return false;
@@ -857,42 +818,34 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_BITCAST(SDNode *N) {
return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Op0);
}
-SDValue DAGTypeLegalizer::SoftenFloatOp_FP_EXTEND(SDNode *N) {
- // If we get here, the result must be legal but the source illegal.
- EVT SVT = N->getOperand(0).getValueType();
- EVT RVT = N->getValueType(0);
- SDValue Op = GetSoftenedFloat(N->getOperand(0));
-
- if (SVT == MVT::f16)
- return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), RVT, Op);
-
- RTLIB::Libcall LC = RTLIB::getFPEXT(SVT, RVT);
- assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND libcall");
-
- TargetLowering::MakeLibCallOptions CallOptions;
- EVT OpsVT[1] = { N->getOperand(0).getValueType() };
- CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
- return TLI.makeLibCall(DAG, LC, RVT, Op, CallOptions, SDLoc(N)).first;
-}
-
-
SDValue DAGTypeLegalizer::SoftenFloatOp_FP_ROUND(SDNode *N) {
// We actually deal with the partially-softened FP_TO_FP16 node too, which
// returns an i16 so doesn't meet the constraints necessary for FP_ROUND.
- assert(N->getOpcode() == ISD::FP_ROUND || N->getOpcode() == ISD::FP_TO_FP16);
+ assert(N->getOpcode() == ISD::FP_ROUND || N->getOpcode() == ISD::FP_TO_FP16 ||
+ N->getOpcode() == ISD::STRICT_FP_ROUND);
- EVT SVT = N->getOperand(0).getValueType();
+ bool IsStrict = N->isStrictFPOpcode();
+ SDValue Op = N->getOperand(IsStrict ? 1 : 0);
+ EVT SVT = Op.getValueType();
EVT RVT = N->getValueType(0);
EVT FloatRVT = N->getOpcode() == ISD::FP_TO_FP16 ? MVT::f16 : RVT;
RTLIB::Libcall LC = RTLIB::getFPROUND(SVT, FloatRVT);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND libcall");
- SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
+ Op = GetSoftenedFloat(Op);
TargetLowering::MakeLibCallOptions CallOptions;
- EVT OpsVT[1] = { N->getOperand(0).getValueType() };
- CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
- return TLI.makeLibCall(DAG, LC, RVT, Op, CallOptions, SDLoc(N)).first;
+ CallOptions.setTypeListBeforeSoften(SVT, RVT, true);
+ std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, RVT, Op,
+ CallOptions, SDLoc(N),
+ Chain);
+ if (IsStrict) {
+ ReplaceValueWith(SDValue(N, 1), Tmp.second);
+ ReplaceValueWith(SDValue(N, 0), Tmp.first);
+ return SDValue();
+ }
+ return Tmp.first;
}
SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) {
@@ -920,8 +873,12 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) {
}
SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_XINT(SDNode *N) {
- bool Signed = N->getOpcode() == ISD::FP_TO_SINT;
- EVT SVT = N->getOperand(0).getValueType();
+ bool IsStrict = N->isStrictFPOpcode();
+ bool Signed = N->getOpcode() == ISD::FP_TO_SINT ||
+ N->getOpcode() == ISD::STRICT_FP_TO_SINT;
+
+ SDValue Op = N->getOperand(IsStrict ? 1 : 0);
+ EVT SVT = Op.getValueType();
EVT RVT = N->getValueType(0);
EVT NVT = EVT();
SDLoc dl(N);
@@ -937,18 +894,26 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_XINT(SDNode *N) {
NVT = (MVT::SimpleValueType)IntVT;
// The type needs to big enough to hold the result.
if (NVT.bitsGE(RVT))
- LC = Signed ? RTLIB::getFPTOSINT(SVT, NVT):RTLIB::getFPTOUINT(SVT, NVT);
+ LC = Signed ? RTLIB::getFPTOSINT(SVT, NVT) : RTLIB::getFPTOUINT(SVT, NVT);
}
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_XINT!");
- SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ Op = GetSoftenedFloat(Op);
+ SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
TargetLowering::MakeLibCallOptions CallOptions;
- EVT OpsVT[1] = { N->getOperand(0).getValueType() };
- CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
- SDValue Res = TLI.makeLibCall(DAG, LC, NVT, Op, CallOptions, dl).first;
+ CallOptions.setTypeListBeforeSoften(SVT, RVT, true);
+ std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, NVT, Op,
+ CallOptions, dl, Chain);
// Truncate the result if the libcall returns a larger type.
- return DAG.getNode(ISD::TRUNCATE, dl, RVT, Res);
+ SDValue Res = DAG.getNode(ISD::TRUNCATE, dl, RVT, Tmp.first);
+
+ if (!IsStrict)
+ return Res;
+
+ ReplaceValueWith(SDValue(N, 1), Tmp.second);
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return SDValue();
}
SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) {
@@ -976,26 +941,39 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) {
}
SDValue DAGTypeLegalizer::SoftenFloatOp_SETCC(SDNode *N) {
- SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
- ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(2))->get();
+ bool IsStrict = N->isStrictFPOpcode();
+ SDValue Op0 = N->getOperand(IsStrict ? 1 : 0);
+ SDValue Op1 = N->getOperand(IsStrict ? 2 : 1);
+ SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
+ ISD::CondCode CCCode =
+ cast<CondCodeSDNode>(N->getOperand(IsStrict ? 3 : 2))->get();
+
+ EVT VT = Op0.getValueType();
+ SDValue NewLHS = GetSoftenedFloat(Op0);
+ SDValue NewRHS = GetSoftenedFloat(Op1);
+ TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, SDLoc(N), Op0, Op1,
+ Chain, N->getOpcode() == ISD::STRICT_FSETCCS);
- EVT VT = NewLHS.getValueType();
- NewLHS = GetSoftenedFloat(NewLHS);
- NewRHS = GetSoftenedFloat(NewRHS);
- TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, SDLoc(N),
- N->getOperand(0), N->getOperand(1));
-
- // If softenSetCCOperands returned a scalar, use it.
- if (!NewRHS.getNode()) {
- assert(NewLHS.getValueType() == N->getValueType(0) &&
- "Unexpected setcc expansion!");
- return NewLHS;
+ // Update N to have the operands specified.
+ if (NewRHS.getNode()) {
+ if (IsStrict)
+ NewLHS = DAG.getNode(ISD::SETCC, SDLoc(N), N->getValueType(0), NewLHS,
+ NewRHS, DAG.getCondCode(CCCode));
+ else
+ return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS,
+ DAG.getCondCode(CCCode)), 0);
}
- // Otherwise, update N to have the operands specified.
- return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS,
- DAG.getCondCode(CCCode)),
- 0);
+ // Otherwise, softenSetCCOperands returned a scalar, use it.
+ assert((NewRHS.getNode() || NewLHS.getValueType() == N->getValueType(0)) &&
+ "Unexpected setcc expansion!");
+
+ if (IsStrict) {
+ ReplaceValueWith(SDValue(N, 0), NewLHS);
+ ReplaceValueWith(SDValue(N, 1), Chain);
+ return SDValue();
+ }
+ return NewLHS;
}
SDValue DAGTypeLegalizer::SoftenFloatOp_STORE(SDNode *N, unsigned OpNo) {
@@ -1016,72 +994,99 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_STORE(SDNode *N, unsigned OpNo) {
ST->getMemOperand());
}
-SDValue DAGTypeLegalizer::SoftenFloatOp_LROUND(SDNode *N) {
- EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+SDValue DAGTypeLegalizer::SoftenFloatOp_FCOPYSIGN(SDNode *N) {
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = BitConvertToInteger(N->getOperand(1));
+ SDLoc dl(N);
- SDValue Op = GetSoftenedFloat(N->getOperand(0));
- EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
- TargetLowering::MakeLibCallOptions CallOptions;
- EVT OpsVT[1] = { N->getOperand(0).getValueType() };
- CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
- return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,
- RTLIB::LROUND_F32,
- RTLIB::LROUND_F64,
- RTLIB::LROUND_F80,
- RTLIB::LROUND_F128,
- RTLIB::LROUND_PPCF128),
- NVT, Op, CallOptions, SDLoc(N)).first;
+ EVT LVT = LHS.getValueType();
+ EVT ILVT = EVT::getIntegerVT(*DAG.getContext(), LVT.getSizeInBits());
+ EVT RVT = RHS.getValueType();
+
+ unsigned LSize = LVT.getSizeInBits();
+ unsigned RSize = RVT.getSizeInBits();
+
+ // Shift right or sign-extend it if the two operands have different types.
+ int SizeDiff = RSize - LSize;
+ if (SizeDiff > 0) {
+ RHS =
+ DAG.getNode(ISD::SRL, dl, RVT, RHS,
+ DAG.getConstant(SizeDiff, dl,
+ TLI.getShiftAmountTy(RHS.getValueType(),
+ DAG.getDataLayout())));
+ RHS = DAG.getNode(ISD::TRUNCATE, dl, ILVT, RHS);
+ } else if (SizeDiff < 0) {
+ RHS = DAG.getNode(ISD::ANY_EXTEND, dl, LVT, RHS);
+ RHS =
+ DAG.getNode(ISD::SHL, dl, ILVT, RHS,
+ DAG.getConstant(-SizeDiff, dl,
+ TLI.getShiftAmountTy(RHS.getValueType(),
+ DAG.getDataLayout())));
+ }
+
+ RHS = DAG.getBitcast(LVT, RHS);
+ return DAG.getNode(ISD::FCOPYSIGN, dl, LVT, LHS, RHS);
}
-SDValue DAGTypeLegalizer::SoftenFloatOp_LLROUND(SDNode *N) {
+SDValue DAGTypeLegalizer::SoftenFloatOp_Unary(SDNode *N, RTLIB::Libcall LC) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
-
- SDValue Op = GetSoftenedFloat(N->getOperand(0));
- EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
+ bool IsStrict = N->isStrictFPOpcode();
+ unsigned Offset = IsStrict ? 1 : 0;
+ SDValue Op = GetSoftenedFloat(N->getOperand(0 + Offset));
+ SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
TargetLowering::MakeLibCallOptions CallOptions;
- EVT OpsVT[1] = { N->getOperand(0).getValueType() };
- CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
- return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,
- RTLIB::LLROUND_F32,
- RTLIB::LLROUND_F64,
- RTLIB::LLROUND_F80,
- RTLIB::LLROUND_F128,
- RTLIB::LLROUND_PPCF128),
- NVT, Op, CallOptions, SDLoc(N)).first;
+ EVT OpVT = N->getOperand(0 + Offset).getValueType();
+ CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true);
+ std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, NVT, Op,
+ CallOptions, SDLoc(N),
+ Chain);
+ if (IsStrict) {
+ ReplaceValueWith(SDValue(N, 1), Tmp.second);
+ ReplaceValueWith(SDValue(N, 0), Tmp.first);
+ return SDValue();
+ }
+
+ return Tmp.first;
}
-SDValue DAGTypeLegalizer::SoftenFloatOp_LRINT(SDNode *N) {
- EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+SDValue DAGTypeLegalizer::SoftenFloatOp_LROUND(SDNode *N) {
+ EVT OpVT = N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType();
+ return SoftenFloatOp_Unary(N, GetFPLibCall(OpVT,
+ RTLIB::LROUND_F32,
+ RTLIB::LROUND_F64,
+ RTLIB::LROUND_F80,
+ RTLIB::LROUND_F128,
+ RTLIB::LROUND_PPCF128));
+}
- SDValue Op = GetSoftenedFloat(N->getOperand(0));
- EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
- TargetLowering::MakeLibCallOptions CallOptions;
- EVT OpsVT[1] = { N->getOperand(0).getValueType() };
- CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
- return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,
- RTLIB::LRINT_F32,
- RTLIB::LRINT_F64,
- RTLIB::LRINT_F80,
- RTLIB::LRINT_F128,
- RTLIB::LRINT_PPCF128),
- NVT, Op, CallOptions, SDLoc(N)).first;
+SDValue DAGTypeLegalizer::SoftenFloatOp_LLROUND(SDNode *N) {
+ EVT OpVT = N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType();
+ return SoftenFloatOp_Unary(N, GetFPLibCall(OpVT,
+ RTLIB::LLROUND_F32,
+ RTLIB::LLROUND_F64,
+ RTLIB::LLROUND_F80,
+ RTLIB::LLROUND_F128,
+ RTLIB::LLROUND_PPCF128));
}
-SDValue DAGTypeLegalizer::SoftenFloatOp_LLRINT(SDNode *N) {
- EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+SDValue DAGTypeLegalizer::SoftenFloatOp_LRINT(SDNode *N) {
+ EVT OpVT = N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType();
+ return SoftenFloatOp_Unary(N, GetFPLibCall(OpVT,
+ RTLIB::LRINT_F32,
+ RTLIB::LRINT_F64,
+ RTLIB::LRINT_F80,
+ RTLIB::LRINT_F128,
+ RTLIB::LRINT_PPCF128));
+}
- SDValue Op = GetSoftenedFloat(N->getOperand(0));
- EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
- TargetLowering::MakeLibCallOptions CallOptions;
- EVT OpsVT[1] = { N->getOperand(0).getValueType() };
- CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
- return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,
- RTLIB::LLRINT_F32,
- RTLIB::LLRINT_F64,
- RTLIB::LLRINT_F80,
- RTLIB::LLRINT_F128,
- RTLIB::LLRINT_PPCF128),
- NVT, Op, CallOptions, SDLoc(N)).first;
+SDValue DAGTypeLegalizer::SoftenFloatOp_LLRINT(SDNode *N) {
+ EVT OpVT = N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType();
+ return SoftenFloatOp_Unary(N, GetFPLibCall(OpVT,
+ RTLIB::LLRINT_F32,
+ RTLIB::LLRINT_F64,
+ RTLIB::LLRINT_F80,
+ RTLIB::LLRINT_F128,
+ RTLIB::LLRINT_PPCF128));
}
//===----------------------------------------------------------------------===//
@@ -1122,35 +1127,61 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {
case ISD::ConstantFP: ExpandFloatRes_ConstantFP(N, Lo, Hi); break;
case ISD::FABS: ExpandFloatRes_FABS(N, Lo, Hi); break;
+ case ISD::STRICT_FMINNUM:
case ISD::FMINNUM: ExpandFloatRes_FMINNUM(N, Lo, Hi); break;
+ case ISD::STRICT_FMAXNUM:
case ISD::FMAXNUM: ExpandFloatRes_FMAXNUM(N, Lo, Hi); break;
+ case ISD::STRICT_FADD:
case ISD::FADD: ExpandFloatRes_FADD(N, Lo, Hi); break;
+ case ISD::FCBRT: ExpandFloatRes_FCBRT(N, Lo, Hi); break;
+ case ISD::STRICT_FCEIL:
case ISD::FCEIL: ExpandFloatRes_FCEIL(N, Lo, Hi); break;
case ISD::FCOPYSIGN: ExpandFloatRes_FCOPYSIGN(N, Lo, Hi); break;
+ case ISD::STRICT_FCOS:
case ISD::FCOS: ExpandFloatRes_FCOS(N, Lo, Hi); break;
+ case ISD::STRICT_FDIV:
case ISD::FDIV: ExpandFloatRes_FDIV(N, Lo, Hi); break;
+ case ISD::STRICT_FEXP:
case ISD::FEXP: ExpandFloatRes_FEXP(N, Lo, Hi); break;
+ case ISD::STRICT_FEXP2:
case ISD::FEXP2: ExpandFloatRes_FEXP2(N, Lo, Hi); break;
+ case ISD::STRICT_FFLOOR:
case ISD::FFLOOR: ExpandFloatRes_FFLOOR(N, Lo, Hi); break;
+ case ISD::STRICT_FLOG:
case ISD::FLOG: ExpandFloatRes_FLOG(N, Lo, Hi); break;
+ case ISD::STRICT_FLOG2:
case ISD::FLOG2: ExpandFloatRes_FLOG2(N, Lo, Hi); break;
+ case ISD::STRICT_FLOG10:
case ISD::FLOG10: ExpandFloatRes_FLOG10(N, Lo, Hi); break;
+ case ISD::STRICT_FMA:
case ISD::FMA: ExpandFloatRes_FMA(N, Lo, Hi); break;
+ case ISD::STRICT_FMUL:
case ISD::FMUL: ExpandFloatRes_FMUL(N, Lo, Hi); break;
+ case ISD::STRICT_FNEARBYINT:
case ISD::FNEARBYINT: ExpandFloatRes_FNEARBYINT(N, Lo, Hi); break;
case ISD::FNEG: ExpandFloatRes_FNEG(N, Lo, Hi); break;
+ case ISD::STRICT_FP_EXTEND:
case ISD::FP_EXTEND: ExpandFloatRes_FP_EXTEND(N, Lo, Hi); break;
+ case ISD::STRICT_FPOW:
case ISD::FPOW: ExpandFloatRes_FPOW(N, Lo, Hi); break;
+ case ISD::STRICT_FPOWI:
case ISD::FPOWI: ExpandFloatRes_FPOWI(N, Lo, Hi); break;
+ case ISD::STRICT_FRINT:
case ISD::FRINT: ExpandFloatRes_FRINT(N, Lo, Hi); break;
+ case ISD::STRICT_FROUND:
case ISD::FROUND: ExpandFloatRes_FROUND(N, Lo, Hi); break;
+ case ISD::STRICT_FSIN:
case ISD::FSIN: ExpandFloatRes_FSIN(N, Lo, Hi); break;
+ case ISD::STRICT_FSQRT:
case ISD::FSQRT: ExpandFloatRes_FSQRT(N, Lo, Hi); break;
+ case ISD::STRICT_FSUB:
case ISD::FSUB: ExpandFloatRes_FSUB(N, Lo, Hi); break;
+ case ISD::STRICT_FTRUNC:
case ISD::FTRUNC: ExpandFloatRes_FTRUNC(N, Lo, Hi); break;
case ISD::LOAD: ExpandFloatRes_LOAD(N, Lo, Hi); break;
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP: ExpandFloatRes_XINT_TO_FP(N, Lo, Hi); break;
+ case ISD::STRICT_FREM:
case ISD::FREM: ExpandFloatRes_FREM(N, Lo, Hi); break;
}
@@ -1174,6 +1205,36 @@ void DAGTypeLegalizer::ExpandFloatRes_ConstantFP(SDNode *N, SDValue &Lo,
dl, NVT);
}
+void DAGTypeLegalizer::ExpandFloatRes_Unary(SDNode *N, RTLIB::Libcall LC,
+ SDValue &Lo, SDValue &Hi) {
+ bool IsStrict = N->isStrictFPOpcode();
+ unsigned Offset = IsStrict ? 1 : 0;
+ SDValue Op = N->getOperand(0 + Offset);
+ SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
+ TargetLowering::MakeLibCallOptions CallOptions;
+ std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, N->getValueType(0),
+ Op, CallOptions, SDLoc(N),
+ Chain);
+ if (IsStrict)
+ ReplaceValueWith(SDValue(N, 1), Tmp.second);
+ GetPairElements(Tmp.first, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_Binary(SDNode *N, RTLIB::Libcall LC,
+ SDValue &Lo, SDValue &Hi) {
+ bool IsStrict = N->isStrictFPOpcode();
+ unsigned Offset = IsStrict ? 1 : 0;
+ SDValue Ops[] = { N->getOperand(0 + Offset), N->getOperand(1 + Offset) };
+ SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
+ TargetLowering::MakeLibCallOptions CallOptions;
+ std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, N->getValueType(0),
+ Ops, CallOptions, SDLoc(N),
+ Chain);
+ if (IsStrict)
+ ReplaceValueWith(SDValue(N, 1), Tmp.second);
+ GetPairElements(Tmp.first, Lo, Hi);
+}
+
void DAGTypeLegalizer::ExpandFloatRes_FABS(SDNode *N, SDValue &Lo,
SDValue &Hi) {
assert(N->getValueType(0) == MVT::ppcf128 &&
@@ -1190,181 +1251,159 @@ void DAGTypeLegalizer::ExpandFloatRes_FABS(SDNode *N, SDValue &Lo,
void DAGTypeLegalizer::ExpandFloatRes_FMINNUM(SDNode *N, SDValue &Lo,
SDValue &Hi) {
- SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
- RTLIB::FMIN_F32, RTLIB::FMIN_F64,
- RTLIB::FMIN_F80, RTLIB::FMIN_F128,
- RTLIB::FMIN_PPCF128),
- N, false);
- GetPairElements(Call, Lo, Hi);
+ ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::FMIN_F32, RTLIB::FMIN_F64,
+ RTLIB::FMIN_F80, RTLIB::FMIN_F128,
+ RTLIB::FMIN_PPCF128), Lo, Hi);
}
void DAGTypeLegalizer::ExpandFloatRes_FMAXNUM(SDNode *N, SDValue &Lo,
SDValue &Hi) {
- SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
- RTLIB::FMAX_F32, RTLIB::FMAX_F64,
- RTLIB::FMAX_F80, RTLIB::FMAX_F128,
- RTLIB::FMAX_PPCF128),
- N, false);
- GetPairElements(Call, Lo, Hi);
+ ExpandFloatRes_Binary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::FMAX_F32, RTLIB::FMAX_F64,
+ RTLIB::FMAX_F80, RTLIB::FMAX_F128,
+ RTLIB::FMAX_PPCF128), Lo, Hi);
}
void DAGTypeLegalizer::ExpandFloatRes_FADD(SDNode *N, SDValue &Lo,
SDValue &Hi) {
- SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
- RTLIB::ADD_F32, RTLIB::ADD_F64,
- RTLIB::ADD_F80, RTLIB::ADD_F128,
- RTLIB::ADD_PPCF128),
- N, false);
- GetPairElements(Call, Lo, Hi);
+ ExpandFloatRes_Binary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::ADD_F32, RTLIB::ADD_F64,
+ RTLIB::ADD_F80, RTLIB::ADD_F128,
+ RTLIB::ADD_PPCF128), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FCBRT(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), RTLIB::CBRT_F32,
+ RTLIB::CBRT_F64, RTLIB::CBRT_F80,
+ RTLIB::CBRT_F128,
+ RTLIB::CBRT_PPCF128), Lo, Hi);
}
void DAGTypeLegalizer::ExpandFloatRes_FCEIL(SDNode *N,
SDValue &Lo, SDValue &Hi) {
- SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
- RTLIB::CEIL_F32, RTLIB::CEIL_F64,
- RTLIB::CEIL_F80, RTLIB::CEIL_F128,
- RTLIB::CEIL_PPCF128),
- N, false);
- GetPairElements(Call, Lo, Hi);
+ ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::CEIL_F32, RTLIB::CEIL_F64,
+ RTLIB::CEIL_F80, RTLIB::CEIL_F128,
+ RTLIB::CEIL_PPCF128), Lo, Hi);
}
void DAGTypeLegalizer::ExpandFloatRes_FCOPYSIGN(SDNode *N,
SDValue &Lo, SDValue &Hi) {
- SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
- RTLIB::COPYSIGN_F32,
- RTLIB::COPYSIGN_F64,
- RTLIB::COPYSIGN_F80,
- RTLIB::COPYSIGN_F128,
- RTLIB::COPYSIGN_PPCF128),
- N, false);
- GetPairElements(Call, Lo, Hi);
+ ExpandFloatRes_Binary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::COPYSIGN_F32,
+ RTLIB::COPYSIGN_F64,
+ RTLIB::COPYSIGN_F80,
+ RTLIB::COPYSIGN_F128,
+ RTLIB::COPYSIGN_PPCF128), Lo, Hi);
}
void DAGTypeLegalizer::ExpandFloatRes_FCOS(SDNode *N,
SDValue &Lo, SDValue &Hi) {
- SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
- RTLIB::COS_F32, RTLIB::COS_F64,
- RTLIB::COS_F80, RTLIB::COS_F128,
- RTLIB::COS_PPCF128),
- N, false);
- GetPairElements(Call, Lo, Hi);
+ ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::COS_F32, RTLIB::COS_F64,
+ RTLIB::COS_F80, RTLIB::COS_F128,
+ RTLIB::COS_PPCF128), Lo, Hi);
}
void DAGTypeLegalizer::ExpandFloatRes_FDIV(SDNode *N, SDValue &Lo,
SDValue &Hi) {
- SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
- TargetLowering::MakeLibCallOptions CallOptions;
- SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
- RTLIB::DIV_F32,
- RTLIB::DIV_F64,
- RTLIB::DIV_F80,
- RTLIB::DIV_F128,
- RTLIB::DIV_PPCF128),
- N->getValueType(0), Ops, CallOptions,
- SDLoc(N)).first;
- GetPairElements(Call, Lo, Hi);
+ ExpandFloatRes_Binary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::DIV_F32,
+ RTLIB::DIV_F64,
+ RTLIB::DIV_F80,
+ RTLIB::DIV_F128,
+ RTLIB::DIV_PPCF128), Lo, Hi);
}
void DAGTypeLegalizer::ExpandFloatRes_FEXP(SDNode *N,
SDValue &Lo, SDValue &Hi) {
- SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
- RTLIB::EXP_F32, RTLIB::EXP_F64,
- RTLIB::EXP_F80, RTLIB::EXP_F128,
- RTLIB::EXP_PPCF128),
- N, false);
- GetPairElements(Call, Lo, Hi);
+ ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::EXP_F32, RTLIB::EXP_F64,
+ RTLIB::EXP_F80, RTLIB::EXP_F128,
+ RTLIB::EXP_PPCF128), Lo, Hi);
}
void DAGTypeLegalizer::ExpandFloatRes_FEXP2(SDNode *N,
SDValue &Lo, SDValue &Hi) {
- SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
- RTLIB::EXP2_F32, RTLIB::EXP2_F64,
- RTLIB::EXP2_F80, RTLIB::EXP2_F128,
- RTLIB::EXP2_PPCF128),
- N, false);
- GetPairElements(Call, Lo, Hi);
+ ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::EXP2_F32, RTLIB::EXP2_F64,
+ RTLIB::EXP2_F80, RTLIB::EXP2_F128,
+ RTLIB::EXP2_PPCF128), Lo, Hi);
}
void DAGTypeLegalizer::ExpandFloatRes_FFLOOR(SDNode *N,
SDValue &Lo, SDValue &Hi) {
- SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
- RTLIB::FLOOR_F32, RTLIB::FLOOR_F64,
- RTLIB::FLOOR_F80, RTLIB::FLOOR_F128,
- RTLIB::FLOOR_PPCF128),
- N, false);
- GetPairElements(Call, Lo, Hi);
+ ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::FLOOR_F32, RTLIB::FLOOR_F64,
+ RTLIB::FLOOR_F80, RTLIB::FLOOR_F128,
+ RTLIB::FLOOR_PPCF128), Lo, Hi);
}
void DAGTypeLegalizer::ExpandFloatRes_FLOG(SDNode *N,
SDValue &Lo, SDValue &Hi) {
- SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
- RTLIB::LOG_F32, RTLIB::LOG_F64,
- RTLIB::LOG_F80, RTLIB::LOG_F128,
- RTLIB::LOG_PPCF128),
- N, false);
- GetPairElements(Call, Lo, Hi);
+ ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::LOG_F32, RTLIB::LOG_F64,
+ RTLIB::LOG_F80, RTLIB::LOG_F128,
+ RTLIB::LOG_PPCF128), Lo, Hi);
}
void DAGTypeLegalizer::ExpandFloatRes_FLOG2(SDNode *N,
SDValue &Lo, SDValue &Hi) {
- SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
- RTLIB::LOG2_F32, RTLIB::LOG2_F64,
- RTLIB::LOG2_F80, RTLIB::LOG2_F128,
- RTLIB::LOG2_PPCF128),
- N, false);
- GetPairElements(Call, Lo, Hi);
+ ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::LOG2_F32, RTLIB::LOG2_F64,
+ RTLIB::LOG2_F80, RTLIB::LOG2_F128,
+ RTLIB::LOG2_PPCF128), Lo, Hi);
}
void DAGTypeLegalizer::ExpandFloatRes_FLOG10(SDNode *N,
SDValue &Lo, SDValue &Hi) {
- SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
- RTLIB::LOG10_F32, RTLIB::LOG10_F64,
- RTLIB::LOG10_F80, RTLIB::LOG10_F128,
- RTLIB::LOG10_PPCF128),
- N, false);
- GetPairElements(Call, Lo, Hi);
+ ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::LOG10_F32, RTLIB::LOG10_F64,
+ RTLIB::LOG10_F80, RTLIB::LOG10_F128,
+ RTLIB::LOG10_PPCF128), Lo, Hi);
}
void DAGTypeLegalizer::ExpandFloatRes_FMA(SDNode *N, SDValue &Lo,
SDValue &Hi) {
- SDValue Ops[3] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) };
+ bool IsStrict = N->isStrictFPOpcode();
+ unsigned Offset = IsStrict ? 1 : 0;
+ SDValue Ops[3] = { N->getOperand(0 + Offset), N->getOperand(1 + Offset),
+ N->getOperand(2 + Offset) };
+ SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
TargetLowering::MakeLibCallOptions CallOptions;
- SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
RTLIB::FMA_F32,
RTLIB::FMA_F64,
RTLIB::FMA_F80,
RTLIB::FMA_F128,
RTLIB::FMA_PPCF128),
N->getValueType(0), Ops, CallOptions,
- SDLoc(N)).first;
- GetPairElements(Call, Lo, Hi);
+ SDLoc(N), Chain);
+ if (IsStrict)
+ ReplaceValueWith(SDValue(N, 1), Tmp.second);
+ GetPairElements(Tmp.first, Lo, Hi);
}
void DAGTypeLegalizer::ExpandFloatRes_FMUL(SDNode *N, SDValue &Lo,
SDValue &Hi) {
- SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
- TargetLowering::MakeLibCallOptions CallOptions;
- SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ ExpandFloatRes_Binary(N, GetFPLibCall(N->getValueType(0),
RTLIB::MUL_F32,
RTLIB::MUL_F64,
RTLIB::MUL_F80,
RTLIB::MUL_F128,
- RTLIB::MUL_PPCF128),
- N->getValueType(0), Ops, CallOptions,
- SDLoc(N)).first;
- GetPairElements(Call, Lo, Hi);
+ RTLIB::MUL_PPCF128), Lo, Hi);
}
void DAGTypeLegalizer::ExpandFloatRes_FNEARBYINT(SDNode *N,
SDValue &Lo, SDValue &Hi) {
- SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
- RTLIB::NEARBYINT_F32,
- RTLIB::NEARBYINT_F64,
- RTLIB::NEARBYINT_F80,
- RTLIB::NEARBYINT_F128,
- RTLIB::NEARBYINT_PPCF128),
- N, false);
- GetPairElements(Call, Lo, Hi);
+ ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::NEARBYINT_F32,
+ RTLIB::NEARBYINT_F64,
+ RTLIB::NEARBYINT_F80,
+ RTLIB::NEARBYINT_F128,
+ RTLIB::NEARBYINT_PPCF128), Lo, Hi);
}
void DAGTypeLegalizer::ExpandFloatRes_FNEG(SDNode *N, SDValue &Lo,
@@ -1379,106 +1418,105 @@ void DAGTypeLegalizer::ExpandFloatRes_FP_EXTEND(SDNode *N, SDValue &Lo,
SDValue &Hi) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDLoc dl(N);
- Hi = DAG.getNode(ISD::FP_EXTEND, dl, NVT, N->getOperand(0));
+ bool IsStrict = N->isStrictFPOpcode();
+
+ SDValue Chain;
+ if (IsStrict) {
+ // If the expanded type is the same as the input type, just bypass the node.
+ if (NVT == N->getOperand(1).getValueType()) {
+ Hi = N->getOperand(1);
+ Chain = N->getOperand(0);
+ } else {
+ // Other we need to extend.
+ Hi = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, { NVT, MVT::Other },
+ { N->getOperand(0), N->getOperand(1) });
+ Chain = Hi.getValue(1);
+ }
+ } else {
+ Hi = DAG.getNode(ISD::FP_EXTEND, dl, NVT, N->getOperand(0));
+ }
+
Lo = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT),
APInt(NVT.getSizeInBits(), 0)), dl, NVT);
+
+ if (IsStrict)
+ ReplaceValueWith(SDValue(N, 1), Chain);
}
void DAGTypeLegalizer::ExpandFloatRes_FPOW(SDNode *N,
SDValue &Lo, SDValue &Hi) {
- SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
- RTLIB::POW_F32, RTLIB::POW_F64,
- RTLIB::POW_F80, RTLIB::POW_F128,
- RTLIB::POW_PPCF128),
- N, false);
- GetPairElements(Call, Lo, Hi);
+ ExpandFloatRes_Binary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::POW_F32, RTLIB::POW_F64,
+ RTLIB::POW_F80, RTLIB::POW_F128,
+ RTLIB::POW_PPCF128), Lo, Hi);
}
void DAGTypeLegalizer::ExpandFloatRes_FPOWI(SDNode *N,
SDValue &Lo, SDValue &Hi) {
- SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
- RTLIB::POWI_F32, RTLIB::POWI_F64,
- RTLIB::POWI_F80, RTLIB::POWI_F128,
- RTLIB::POWI_PPCF128),
- N, false);
- GetPairElements(Call, Lo, Hi);
+ ExpandFloatRes_Binary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::POWI_F32, RTLIB::POWI_F64,
+ RTLIB::POWI_F80, RTLIB::POWI_F128,
+ RTLIB::POWI_PPCF128), Lo, Hi);
}
void DAGTypeLegalizer::ExpandFloatRes_FREM(SDNode *N,
SDValue &Lo, SDValue &Hi) {
- SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
- RTLIB::REM_F32, RTLIB::REM_F64,
- RTLIB::REM_F80, RTLIB::REM_F128,
- RTLIB::REM_PPCF128),
- N, false);
- GetPairElements(Call, Lo, Hi);
+ ExpandFloatRes_Binary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::REM_F32, RTLIB::REM_F64,
+ RTLIB::REM_F80, RTLIB::REM_F128,
+ RTLIB::REM_PPCF128), Lo, Hi);
}
void DAGTypeLegalizer::ExpandFloatRes_FRINT(SDNode *N,
SDValue &Lo, SDValue &Hi) {
- SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
- RTLIB::RINT_F32, RTLIB::RINT_F64,
- RTLIB::RINT_F80, RTLIB::RINT_F128,
- RTLIB::RINT_PPCF128),
- N, false);
- GetPairElements(Call, Lo, Hi);
+ ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::RINT_F32, RTLIB::RINT_F64,
+ RTLIB::RINT_F80, RTLIB::RINT_F128,
+ RTLIB::RINT_PPCF128), Lo, Hi);
}
void DAGTypeLegalizer::ExpandFloatRes_FROUND(SDNode *N,
SDValue &Lo, SDValue &Hi) {
- SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
- RTLIB::ROUND_F32,
- RTLIB::ROUND_F64,
- RTLIB::ROUND_F80,
- RTLIB::ROUND_F128,
- RTLIB::ROUND_PPCF128),
- N, false);
- GetPairElements(Call, Lo, Hi);
+ ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::ROUND_F32,
+ RTLIB::ROUND_F64,
+ RTLIB::ROUND_F80,
+ RTLIB::ROUND_F128,
+ RTLIB::ROUND_PPCF128), Lo, Hi);
}
void DAGTypeLegalizer::ExpandFloatRes_FSIN(SDNode *N,
SDValue &Lo, SDValue &Hi) {
- SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
- RTLIB::SIN_F32, RTLIB::SIN_F64,
- RTLIB::SIN_F80, RTLIB::SIN_F128,
- RTLIB::SIN_PPCF128),
- N, false);
- GetPairElements(Call, Lo, Hi);
+ ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::SIN_F32, RTLIB::SIN_F64,
+ RTLIB::SIN_F80, RTLIB::SIN_F128,
+ RTLIB::SIN_PPCF128), Lo, Hi);
}
void DAGTypeLegalizer::ExpandFloatRes_FSQRT(SDNode *N,
SDValue &Lo, SDValue &Hi) {
- SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
- RTLIB::SQRT_F32, RTLIB::SQRT_F64,
- RTLIB::SQRT_F80, RTLIB::SQRT_F128,
- RTLIB::SQRT_PPCF128),
- N, false);
- GetPairElements(Call, Lo, Hi);
+ ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::SQRT_F32, RTLIB::SQRT_F64,
+ RTLIB::SQRT_F80, RTLIB::SQRT_F128,
+ RTLIB::SQRT_PPCF128), Lo, Hi);
}
void DAGTypeLegalizer::ExpandFloatRes_FSUB(SDNode *N, SDValue &Lo,
SDValue &Hi) {
- SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
- TargetLowering::MakeLibCallOptions CallOptions;
- SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
- RTLIB::SUB_F32,
- RTLIB::SUB_F64,
- RTLIB::SUB_F80,
- RTLIB::SUB_F128,
- RTLIB::SUB_PPCF128),
- N->getValueType(0), Ops, CallOptions,
- SDLoc(N)).first;
- GetPairElements(Call, Lo, Hi);
+ ExpandFloatRes_Binary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::SUB_F32,
+ RTLIB::SUB_F64,
+ RTLIB::SUB_F80,
+ RTLIB::SUB_F128,
+ RTLIB::SUB_PPCF128), Lo, Hi);
}
void DAGTypeLegalizer::ExpandFloatRes_FTRUNC(SDNode *N,
SDValue &Lo, SDValue &Hi) {
- SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
- RTLIB::TRUNC_F32, RTLIB::TRUNC_F64,
- RTLIB::TRUNC_F80, RTLIB::TRUNC_F128,
- RTLIB::TRUNC_PPCF128),
- N, false);
- GetPairElements(Call, Lo, Hi);
+ ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::TRUNC_F32, RTLIB::TRUNC_F64,
+ RTLIB::TRUNC_F80, RTLIB::TRUNC_F128,
+ RTLIB::TRUNC_PPCF128), Lo, Hi);
}
void DAGTypeLegalizer::ExpandFloatRes_LOAD(SDNode *N, SDValue &Lo,
@@ -1619,8 +1657,11 @@ bool DAGTypeLegalizer::ExpandFloatOperand(SDNode *N, unsigned OpNo) {
case ISD::BR_CC: Res = ExpandFloatOp_BR_CC(N); break;
case ISD::FCOPYSIGN: Res = ExpandFloatOp_FCOPYSIGN(N); break;
+ case ISD::STRICT_FP_ROUND:
case ISD::FP_ROUND: Res = ExpandFloatOp_FP_ROUND(N); break;
+ case ISD::STRICT_FP_TO_SINT:
case ISD::FP_TO_SINT: Res = ExpandFloatOp_FP_TO_SINT(N); break;
+ case ISD::STRICT_FP_TO_UINT:
case ISD::FP_TO_UINT: Res = ExpandFloatOp_FP_TO_UINT(N); break;
case ISD::LROUND: Res = ExpandFloatOp_LROUND(N); break;
case ISD::LLROUND: Res = ExpandFloatOp_LLROUND(N); break;
@@ -1709,34 +1750,72 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FCOPYSIGN(SDNode *N) {
}
SDValue DAGTypeLegalizer::ExpandFloatOp_FP_ROUND(SDNode *N) {
- assert(N->getOperand(0).getValueType() == MVT::ppcf128 &&
+ bool IsStrict = N->isStrictFPOpcode();
+ assert(N->getOperand(IsStrict ? 1 : 0).getValueType() == MVT::ppcf128 &&
"Logic only correct for ppcf128!");
SDValue Lo, Hi;
- GetExpandedFloat(N->getOperand(0), Lo, Hi);
- // Round it the rest of the way (e.g. to f32) if needed.
- return DAG.getNode(ISD::FP_ROUND, SDLoc(N),
- N->getValueType(0), Hi, N->getOperand(1));
+ GetExpandedFloat(N->getOperand(IsStrict ? 1 : 0), Lo, Hi);
+
+ if (!IsStrict)
+ // Round it the rest of the way (e.g. to f32) if needed.
+ return DAG.getNode(ISD::FP_ROUND, SDLoc(N),
+ N->getValueType(0), Hi, N->getOperand(1));
+
+ // Eliminate the node if the input float type is the same as the output float
+ // type.
+ if (Hi.getValueType() == N->getValueType(0)) {
+ // Connect the output chain to the input chain, unlinking the node.
+ ReplaceValueWith(SDValue(N, 1), N->getOperand(0));
+ ReplaceValueWith(SDValue(N, 0), Hi);
+ return SDValue();
+ }
+
+ SDValue Expansion = DAG.getNode(ISD::STRICT_FP_ROUND, SDLoc(N),
+ {N->getValueType(0), MVT::Other},
+ {N->getOperand(0), Hi, N->getOperand(2)});
+ ReplaceValueWith(SDValue(N, 1), Expansion.getValue(1));
+ ReplaceValueWith(SDValue(N, 0), Expansion);
+ return SDValue();
}
SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_SINT(SDNode *N) {
EVT RVT = N->getValueType(0);
SDLoc dl(N);
- RTLIB::Libcall LC = RTLIB::getFPTOSINT(N->getOperand(0).getValueType(), RVT);
+ bool IsStrict = N->isStrictFPOpcode();
+ SDValue Op = N->getOperand(IsStrict ? 1 : 0);
+ SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
+ RTLIB::Libcall LC = RTLIB::getFPTOSINT(Op.getValueType(), RVT);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!");
TargetLowering::MakeLibCallOptions CallOptions;
- return TLI.makeLibCall(DAG, LC, RVT, N->getOperand(0), CallOptions, dl).first;
+ std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, RVT, Op,
+ CallOptions, dl, Chain);
+ if (!IsStrict)
+ return Tmp.first;
+
+ ReplaceValueWith(SDValue(N, 1), Tmp.second);
+ ReplaceValueWith(SDValue(N, 0), Tmp.first);
+ return SDValue();
}
SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) {
EVT RVT = N->getValueType(0);
SDLoc dl(N);
- RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT);
+ bool IsStrict = N->isStrictFPOpcode();
+ SDValue Op = N->getOperand(IsStrict ? 1 : 0);
+ SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
+ RTLIB::Libcall LC = RTLIB::getFPTOUINT(Op.getValueType(), RVT);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!");
TargetLowering::MakeLibCallOptions CallOptions;
- return TLI.makeLibCall(DAG, LC, N->getValueType(0), N->getOperand(0),
- CallOptions, dl).first;
+ std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, RVT, Op,
+ CallOptions, dl, Chain);
+ if (!IsStrict)
+ return Tmp.first;
+
+ ReplaceValueWith(SDValue(N, 1), Tmp.second);
+ ReplaceValueWith(SDValue(N, 0), Tmp.first);
+ return SDValue();
}
SDValue DAGTypeLegalizer::ExpandFloatOp_SELECT_CC(SDNode *N) {
@@ -1800,7 +1879,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_STORE(SDNode *N, unsigned OpNo) {
SDValue DAGTypeLegalizer::ExpandFloatOp_LROUND(SDNode *N) {
EVT RVT = N->getValueType(0);
- EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
+ EVT RetVT = N->getOperand(0).getValueType();
TargetLowering::MakeLibCallOptions CallOptions;
return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,
RTLIB::LROUND_F32,
@@ -1813,7 +1892,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_LROUND(SDNode *N) {
SDValue DAGTypeLegalizer::ExpandFloatOp_LLROUND(SDNode *N) {
EVT RVT = N->getValueType(0);
- EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
+ EVT RetVT = N->getOperand(0).getValueType();
TargetLowering::MakeLibCallOptions CallOptions;
return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,
RTLIB::LLROUND_F32,
@@ -1826,7 +1905,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_LLROUND(SDNode *N) {
SDValue DAGTypeLegalizer::ExpandFloatOp_LRINT(SDNode *N) {
EVT RVT = N->getValueType(0);
- EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
+ EVT RetVT = N->getOperand(0).getValueType();
TargetLowering::MakeLibCallOptions CallOptions;
return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,
RTLIB::LRINT_F32,
@@ -1839,7 +1918,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_LRINT(SDNode *N) {
SDValue DAGTypeLegalizer::ExpandFloatOp_LLRINT(SDNode *N) {
EVT RVT = N->getValueType(0);
- EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
+ EVT RetVT = N->getOperand(0).getValueType();
TargetLowering::MakeLibCallOptions CallOptions;
return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,
RTLIB::LLRINT_F32,
@@ -1963,12 +2042,11 @@ SDValue DAGTypeLegalizer::PromoteFloatOp_SELECT_CC(SDNode *N, unsigned OpNo) {
// code.
SDValue DAGTypeLegalizer::PromoteFloatOp_SETCC(SDNode *N, unsigned OpNo) {
EVT VT = N->getValueType(0);
- EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
SDValue Op0 = GetPromotedFloat(N->getOperand(0));
SDValue Op1 = GetPromotedFloat(N->getOperand(1));
ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(2))->get();
- return DAG.getSetCC(SDLoc(N), NVT, Op0, Op1, CCCode);
+ return DAG.getSetCC(SDLoc(N), VT, Op0, Op1, CCCode);
}
@@ -2026,6 +2104,7 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) {
// Unary FP Operations
case ISD::FABS:
+ case ISD::FCBRT:
case ISD::FCEIL:
case ISD::FCOS:
case ISD::FEXP:
@@ -2304,7 +2383,6 @@ SDValue DAGTypeLegalizer::PromoteFloatRes_UNDEF(SDNode *N) {
SDValue DAGTypeLegalizer::BitcastToInt_ATOMIC_SWAP(SDNode *N) {
EVT VT = N->getValueType(0);
- EVT NFPVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
AtomicSDNode *AM = cast<AtomicSDNode>(N);
SDLoc SL(N);
@@ -2318,13 +2396,19 @@ SDValue DAGTypeLegalizer::BitcastToInt_ATOMIC_SWAP(SDNode *N) {
{ AM->getChain(), AM->getBasePtr(), CastVal },
AM->getMemOperand());
- SDValue ResultCast = DAG.getNode(GetPromotionOpcode(VT, NFPVT), SL, NFPVT,
- NewAtomic);
+ SDValue Result = NewAtomic;
+
+ if (getTypeAction(VT) == TargetLowering::TypePromoteFloat) {
+ EVT NFPVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ Result = DAG.getNode(GetPromotionOpcode(VT, NFPVT), SL, NFPVT,
+ NewAtomic);
+ }
+
// Legalize the chain result by replacing uses of the old value chain with the
// new one
ReplaceValueWith(SDValue(N, 1), NewAtomic.getValue(1));
- return ResultCast;
+ return Result;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index d5c1b539adbd..0e46f8d68f83 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -75,6 +75,8 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::SELECT: Res = PromoteIntRes_SELECT(N); break;
case ISD::VSELECT: Res = PromoteIntRes_VSELECT(N); break;
case ISD::SELECT_CC: Res = PromoteIntRes_SELECT_CC(N); break;
+ case ISD::STRICT_FSETCC:
+ case ISD::STRICT_FSETCCS:
case ISD::SETCC: Res = PromoteIntRes_SETCC(N); break;
case ISD::SMIN:
case ISD::SMAX: Res = PromoteIntRes_SExtIntBinOp(N); break;
@@ -158,6 +160,9 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::UMULFIX:
case ISD::UMULFIXSAT: Res = PromoteIntRes_MULFIX(N); break;
+ case ISD::SDIVFIX:
+ case ISD::UDIVFIX: Res = PromoteIntRes_DIVFIX(N); break;
+
case ISD::ABS: Res = PromoteIntRes_ABS(N); break;
case ISD::ATOMIC_LOAD:
@@ -337,8 +342,21 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) {
// The input is widened to the same size. Convert to the widened value.
// Make sure that the outgoing value is not a vector, because this would
// make us bitcast between two vectors which are legalized in different ways.
- if (NOutVT.bitsEq(NInVT) && !NOutVT.isVector())
- return DAG.getNode(ISD::BITCAST, dl, NOutVT, GetWidenedVector(InOp));
+ if (NOutVT.bitsEq(NInVT) && !NOutVT.isVector()) {
+ SDValue Res =
+ DAG.getNode(ISD::BITCAST, dl, NOutVT, GetWidenedVector(InOp));
+
+ // For big endian targets we need to shift the casted value or the
+ // interesting bits will end up at the wrong place.
+ if (DAG.getDataLayout().isBigEndian()) {
+ unsigned ShiftAmt = NInVT.getSizeInBits() - InVT.getSizeInBits();
+ EVT ShiftAmtTy = TLI.getShiftAmountTy(NOutVT, DAG.getDataLayout());
+ assert(ShiftAmt < NOutVT.getSizeInBits() && "Too large shift amount!");
+ Res = DAG.getNode(ISD::SRL, dl, NOutVT, Res,
+ DAG.getConstant(ShiftAmt, dl, ShiftAmtTy));
+ }
+ return Res;
+ }
// If the output type is also a vector and widening it to the same size
// as the widened input type would be a legal type, we can widen the bitcast
// and handle the promotion after.
@@ -365,15 +383,15 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) {
CreateStackStoreLoad(InOp, OutVT));
}
-// Helper for BSWAP/BITREVERSE promotion to ensure we can fit the shift amount
+// Helper for BSWAP/BITREVERSE promotion to ensure we can fit any shift amount
// in the VT returned by getShiftAmountTy and to return a safe VT if we can't.
-static EVT getShiftAmountTyForConstant(unsigned Val, EVT VT,
- const TargetLowering &TLI,
+static EVT getShiftAmountTyForConstant(EVT VT, const TargetLowering &TLI,
SelectionDAG &DAG) {
EVT ShiftVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
- // If the value won't fit in the prefered type, just use something safe. It
- // will be legalized when the shift is expanded.
- if ((Log2_32(Val) + 1) > ShiftVT.getScalarSizeInBits())
+ // If any possible shift value won't fit in the prefered type, just use
+ // something safe. It will be legalized when the shift is expanded.
+ if (!ShiftVT.isVector() &&
+ ShiftVT.getSizeInBits() < Log2_32_Ceil(VT.getSizeInBits()))
ShiftVT = MVT::i32;
return ShiftVT;
}
@@ -385,7 +403,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) {
SDLoc dl(N);
unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits();
- EVT ShiftVT = getShiftAmountTyForConstant(DiffBits, NVT, TLI, DAG);
+ EVT ShiftVT = getShiftAmountTyForConstant(NVT, TLI, DAG);
return DAG.getNode(ISD::SRL, dl, NVT, DAG.getNode(ISD::BSWAP, dl, NVT, Op),
DAG.getConstant(DiffBits, dl, ShiftVT));
}
@@ -397,7 +415,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITREVERSE(SDNode *N) {
SDLoc dl(N);
unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits();
- EVT ShiftVT = getShiftAmountTyForConstant(DiffBits, NVT, TLI, DAG);
+ EVT ShiftVT = getShiftAmountTyForConstant(NVT, TLI, DAG);
return DAG.getNode(ISD::SRL, dl, NVT,
DAG.getNode(ISD::BITREVERSE, dl, NVT, Op),
DAG.getConstant(DiffBits, dl, ShiftVT));
@@ -592,8 +610,9 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MLOAD(MaskedLoadSDNode *N) {
SDLoc dl(N);
SDValue Res = DAG.getMaskedLoad(NVT, dl, N->getChain(), N->getBasePtr(),
- N->getMask(), ExtPassThru, N->getMemoryVT(),
- N->getMemOperand(), ISD::EXTLOAD);
+ N->getOffset(), N->getMask(), ExtPassThru,
+ N->getMemoryVT(), N->getMemOperand(),
+ N->getAddressingMode(), ISD::EXTLOAD);
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
@@ -604,7 +623,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MGATHER(MaskedGatherSDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue ExtPassThru = GetPromotedInteger(N->getPassThru());
assert(NVT == ExtPassThru.getValueType() &&
- "Gather result type and the passThru agrument type should be the same");
+ "Gather result type and the passThru argument type should be the same");
SDLoc dl(N);
SDValue Ops[] = {N->getChain(), ExtPassThru, N->getMask(), N->getBasePtr(),
@@ -762,6 +781,71 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MULFIX(SDNode *N) {
N->getOperand(2));
}
+static SDValue earlyExpandDIVFIX(SDNode *N, SDValue LHS, SDValue RHS,
+ unsigned Scale, const TargetLowering &TLI,
+ SelectionDAG &DAG) {
+ EVT VT = LHS.getValueType();
+ bool Signed = N->getOpcode() == ISD::SDIVFIX;
+
+ SDLoc dl(N);
+ // See if we can perform the division in this type without widening.
+ if (SDValue V = TLI.expandFixedPointDiv(N->getOpcode(), dl, LHS, RHS, Scale,
+ DAG))
+ return V;
+
+ // If that didn't work, double the type width and try again. That must work,
+ // or something is wrong.
+ EVT WideVT = EVT::getIntegerVT(*DAG.getContext(),
+ VT.getScalarSizeInBits() * 2);
+ if (Signed) {
+ LHS = DAG.getSExtOrTrunc(LHS, dl, WideVT);
+ RHS = DAG.getSExtOrTrunc(RHS, dl, WideVT);
+ } else {
+ LHS = DAG.getZExtOrTrunc(LHS, dl, WideVT);
+ RHS = DAG.getZExtOrTrunc(RHS, dl, WideVT);
+ }
+
+ // TODO: Saturation.
+
+ SDValue Res = TLI.expandFixedPointDiv(N->getOpcode(), dl, LHS, RHS, Scale,
+ DAG);
+ assert(Res && "Expanding DIVFIX with wide type failed?");
+ return DAG.getZExtOrTrunc(Res, dl, VT);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_DIVFIX(SDNode *N) {
+ SDLoc dl(N);
+ SDValue Op1Promoted, Op2Promoted;
+ bool Signed = N->getOpcode() == ISD::SDIVFIX;
+ if (Signed) {
+ Op1Promoted = SExtPromotedInteger(N->getOperand(0));
+ Op2Promoted = SExtPromotedInteger(N->getOperand(1));
+ } else {
+ Op1Promoted = ZExtPromotedInteger(N->getOperand(0));
+ Op2Promoted = ZExtPromotedInteger(N->getOperand(1));
+ }
+ EVT PromotedType = Op1Promoted.getValueType();
+ unsigned Scale = N->getConstantOperandVal(2);
+
+ SDValue Res;
+ // If the type is already legal and the operation is legal in that type, we
+ // should not early expand.
+ if (TLI.isTypeLegal(PromotedType)) {
+ TargetLowering::LegalizeAction Action =
+ TLI.getFixedPointOperationAction(N->getOpcode(), PromotedType, Scale);
+ if (Action == TargetLowering::Legal || Action == TargetLowering::Custom)
+ Res = DAG.getNode(N->getOpcode(), dl, PromotedType, Op1Promoted,
+ Op2Promoted, N->getOperand(2));
+ }
+
+ if (!Res)
+ Res = earlyExpandDIVFIX(N, Op1Promoted, Op2Promoted, Scale, TLI, DAG);
+
+ // TODO: Saturation.
+
+ return Res;
+}
+
SDValue DAGTypeLegalizer::PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo) {
if (ResNo == 1)
return PromoteIntRes_Overflow(N);
@@ -816,7 +900,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SELECT_CC(SDNode *N) {
}
SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) {
- EVT InVT = N->getOperand(0).getValueType();
+ unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
+ EVT InVT = N->getOperand(OpNo).getValueType();
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
EVT SVT = getSetCCResultType(InVT);
@@ -835,12 +920,22 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) {
}
SDLoc dl(N);
- assert(SVT.isVector() == N->getOperand(0).getValueType().isVector() &&
+ assert(SVT.isVector() == N->getOperand(OpNo).getValueType().isVector() &&
"Vector compare must return a vector result!");
// Get the SETCC result using the canonical SETCC type.
- SDValue SetCC = DAG.getNode(N->getOpcode(), dl, SVT, N->getOperand(0),
- N->getOperand(1), N->getOperand(2));
+ SDValue SetCC;
+ if (N->isStrictFPOpcode()) {
+ EVT VTs[] = {SVT, MVT::Other};
+ SDValue Opers[] = {N->getOperand(0), N->getOperand(1),
+ N->getOperand(2), N->getOperand(3)};
+ SetCC = DAG.getNode(N->getOpcode(), dl, VTs, Opers);
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), SetCC.getValue(1));
+ } else
+ SetCC = DAG.getNode(N->getOpcode(), dl, SVT, N->getOperand(0),
+ N->getOperand(1), N->getOperand(2));
// Convert to the expected type.
return DAG.getSExtOrTrunc(SetCC, dl, NVT);
@@ -1058,8 +1153,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) {
if (N->getOpcode() == ISD::UMULO) {
// Unsigned overflow occurred if the high part is non-zero.
unsigned Shift = SmallVT.getScalarSizeInBits();
- EVT ShiftTy = getShiftAmountTyForConstant(Shift, Mul.getValueType(),
- TLI, DAG);
+ EVT ShiftTy = getShiftAmountTyForConstant(Mul.getValueType(), TLI, DAG);
SDValue Hi = DAG.getNode(ISD::SRL, DL, Mul.getValueType(), Mul,
DAG.getConstant(Shift, DL, ShiftTy));
Overflow = DAG.getSetCC(DL, N->getValueType(1), Hi,
@@ -1176,6 +1270,7 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
case ISD::SETCC: Res = PromoteIntOp_SETCC(N, OpNo); break;
case ISD::SIGN_EXTEND: Res = PromoteIntOp_SIGN_EXTEND(N); break;
case ISD::SINT_TO_FP: Res = PromoteIntOp_SINT_TO_FP(N); break;
+ case ISD::STRICT_SINT_TO_FP: Res = PromoteIntOp_STRICT_SINT_TO_FP(N); break;
case ISD::STORE: Res = PromoteIntOp_STORE(cast<StoreSDNode>(N),
OpNo); break;
case ISD::MSTORE: Res = PromoteIntOp_MSTORE(cast<MaskedStoreSDNode>(N),
@@ -1189,6 +1284,7 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
case ISD::TRUNCATE: Res = PromoteIntOp_TRUNCATE(N); break;
case ISD::FP16_TO_FP:
case ISD::UINT_TO_FP: Res = PromoteIntOp_UINT_TO_FP(N); break;
+ case ISD::STRICT_UINT_TO_FP: Res = PromoteIntOp_STRICT_UINT_TO_FP(N); break;
case ISD::ZERO_EXTEND: Res = PromoteIntOp_ZERO_EXTEND(N); break;
case ISD::EXTRACT_SUBVECTOR: Res = PromoteIntOp_EXTRACT_SUBVECTOR(N); break;
@@ -1209,7 +1305,9 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
case ISD::SMULFIX:
case ISD::SMULFIXSAT:
case ISD::UMULFIX:
- case ISD::UMULFIXSAT: Res = PromoteIntOp_MULFIX(N); break;
+ case ISD::UMULFIXSAT:
+ case ISD::SDIVFIX:
+ case ISD::UDIVFIX: Res = PromoteIntOp_FIX(N); break;
case ISD::FPOWI: Res = PromoteIntOp_FPOWI(N); break;
@@ -1465,6 +1563,11 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SINT_TO_FP(SDNode *N) {
SExtPromotedInteger(N->getOperand(0))), 0);
}
+SDValue DAGTypeLegalizer::PromoteIntOp_STRICT_SINT_TO_FP(SDNode *N) {
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
+ SExtPromotedInteger(N->getOperand(1))), 0);
+}
+
SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){
assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!");
SDValue Ch = N->getChain(), Ptr = N->getBasePtr();
@@ -1486,11 +1589,11 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N,
SDLoc dl(N);
bool TruncateStore = false;
- if (OpNo == 3) {
+ if (OpNo == 4) {
Mask = PromoteTargetBoolean(Mask, DataVT);
// Update in place.
SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end());
- NewOps[3] = Mask;
+ NewOps[4] = Mask;
return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
} else { // Data operand
assert(OpNo == 1 && "Unexpected operand for promotion");
@@ -1498,14 +1601,15 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N,
TruncateStore = true;
}
- return DAG.getMaskedStore(N->getChain(), dl, DataOp, N->getBasePtr(), Mask,
- N->getMemoryVT(), N->getMemOperand(),
+ return DAG.getMaskedStore(N->getChain(), dl, DataOp, N->getBasePtr(),
+ N->getOffset(), Mask, N->getMemoryVT(),
+ N->getMemOperand(), N->getAddressingMode(),
TruncateStore, N->isCompressingStore());
}
SDValue DAGTypeLegalizer::PromoteIntOp_MLOAD(MaskedLoadSDNode *N,
unsigned OpNo) {
- assert(OpNo == 2 && "Only know how to promote the mask!");
+ assert(OpNo == 3 && "Only know how to promote the mask!");
EVT DataVT = N->getValueType(0);
SDValue Mask = PromoteTargetBoolean(N->getOperand(OpNo), DataVT);
SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end());
@@ -1563,6 +1667,11 @@ SDValue DAGTypeLegalizer::PromoteIntOp_UINT_TO_FP(SDNode *N) {
ZExtPromotedInteger(N->getOperand(0))), 0);
}
+SDValue DAGTypeLegalizer::PromoteIntOp_STRICT_UINT_TO_FP(SDNode *N) {
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
+ ZExtPromotedInteger(N->getOperand(1))), 0);
+}
+
SDValue DAGTypeLegalizer::PromoteIntOp_ZERO_EXTEND(SDNode *N) {
SDLoc dl(N);
SDValue Op = GetPromotedInteger(N->getOperand(0));
@@ -1584,7 +1693,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_ADDSUBCARRY(SDNode *N, unsigned OpNo) {
return SDValue(DAG.UpdateNodeOperands(N, LHS, RHS, Carry), 0);
}
-SDValue DAGTypeLegalizer::PromoteIntOp_MULFIX(SDNode *N) {
+SDValue DAGTypeLegalizer::PromoteIntOp_FIX(SDNode *N) {
SDValue Op2 = ZExtPromotedInteger(N->getOperand(2));
return SDValue(
DAG.UpdateNodeOperands(N, N->getOperand(0), N->getOperand(1), Op2), 0);
@@ -1697,10 +1806,14 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::CTTZ_ZERO_UNDEF:
case ISD::CTTZ: ExpandIntRes_CTTZ(N, Lo, Hi); break;
case ISD::FLT_ROUNDS_: ExpandIntRes_FLT_ROUNDS(N, Lo, Hi); break;
+ case ISD::STRICT_FP_TO_SINT:
case ISD::FP_TO_SINT: ExpandIntRes_FP_TO_SINT(N, Lo, Hi); break;
+ case ISD::STRICT_FP_TO_UINT:
case ISD::FP_TO_UINT: ExpandIntRes_FP_TO_UINT(N, Lo, Hi); break;
- case ISD::LLROUND: ExpandIntRes_LLROUND(N, Lo, Hi); break;
- case ISD::LLRINT: ExpandIntRes_LLRINT(N, Lo, Hi); break;
+ case ISD::STRICT_LLROUND:
+ case ISD::STRICT_LLRINT:
+ case ISD::LLROUND:
+ case ISD::LLRINT: ExpandIntRes_LLROUND_LLRINT(N, Lo, Hi); break;
case ISD::LOAD: ExpandIntRes_LOAD(cast<LoadSDNode>(N), Lo, Hi); break;
case ISD::MUL: ExpandIntRes_MUL(N, Lo, Hi); break;
case ISD::READCYCLECOUNTER: ExpandIntRes_READCYCLECOUNTER(N, Lo, Hi); break;
@@ -1794,6 +1907,9 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::UMULFIX:
case ISD::UMULFIXSAT: ExpandIntRes_MULFIX(N, Lo, Hi); break;
+ case ISD::SDIVFIX:
+ case ISD::UDIVFIX: ExpandIntRes_DIVFIX(N, Lo, Hi); break;
+
case ISD::VECREDUCE_ADD:
case ISD::VECREDUCE_MUL:
case ISD::VECREDUCE_AND:
@@ -1817,7 +1933,11 @@ std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) {
RTLIB::Libcall LC = RTLIB::getSYNC(Opc, VT);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected atomic op or value type!");
- return ExpandChainLibCall(LC, Node, false);
+ EVT RetVT = Node->getValueType(0);
+ SmallVector<SDValue, 4> Ops(Node->op_begin() + 1, Node->op_end());
+ TargetLowering::MakeLibCallOptions CallOptions;
+ return TLI.makeLibCall(DAG, LC, RetVT, Ops, CallOptions, SDLoc(Node),
+ Node->getOperand(0));
}
/// N is a shift by a value that needs to be expanded,
@@ -2304,11 +2424,27 @@ void DAGTypeLegalizer::ExpandIntRes_UADDSUBO(SDNode *N,
SDValue Ovf;
- bool HasOpCarry = TLI.isOperationLegalOrCustom(
- N->getOpcode() == ISD::ADD ? ISD::ADDCARRY : ISD::SUBCARRY,
- TLI.getTypeToExpandTo(*DAG.getContext(), LHS.getValueType()));
+ unsigned CarryOp, NoCarryOp;
+ ISD::CondCode Cond;
+ switch(N->getOpcode()) {
+ case ISD::UADDO:
+ CarryOp = ISD::ADDCARRY;
+ NoCarryOp = ISD::ADD;
+ Cond = ISD::SETULT;
+ break;
+ case ISD::USUBO:
+ CarryOp = ISD::SUBCARRY;
+ NoCarryOp = ISD::SUB;
+ Cond = ISD::SETUGT;
+ break;
+ default:
+ llvm_unreachable("Node has unexpected Opcode");
+ }
- if (HasOpCarry) {
+ bool HasCarryOp = TLI.isOperationLegalOrCustom(
+ CarryOp, TLI.getTypeToExpandTo(*DAG.getContext(), LHS.getValueType()));
+
+ if (HasCarryOp) {
// Expand the subcomponents.
SDValue LHSL, LHSH, RHSL, RHSH;
GetExpandedInteger(LHS, LHSL, LHSH);
@@ -2317,22 +2453,19 @@ void DAGTypeLegalizer::ExpandIntRes_UADDSUBO(SDNode *N,
SDValue LoOps[2] = { LHSL, RHSL };
SDValue HiOps[3] = { LHSH, RHSH };
- unsigned Opc = N->getOpcode() == ISD::UADDO ? ISD::ADDCARRY : ISD::SUBCARRY;
Lo = DAG.getNode(N->getOpcode(), dl, VTList, LoOps);
HiOps[2] = Lo.getValue(1);
- Hi = DAG.getNode(Opc, dl, VTList, HiOps);
+ Hi = DAG.getNode(CarryOp, dl, VTList, HiOps);
Ovf = Hi.getValue(1);
} else {
// Expand the result by simply replacing it with the equivalent
// non-overflow-checking operation.
- auto Opc = N->getOpcode() == ISD::UADDO ? ISD::ADD : ISD::SUB;
- SDValue Sum = DAG.getNode(Opc, dl, LHS.getValueType(), LHS, RHS);
+ SDValue Sum = DAG.getNode(NoCarryOp, dl, LHS.getValueType(), LHS, RHS);
SplitInteger(Sum, Lo, Hi);
// Calculate the overflow: addition overflows iff a + b < a, and subtraction
// overflows iff a - b > a.
- auto Cond = N->getOpcode() == ISD::UADDO ? ISD::SETULT : ISD::SETUGT;
Ovf = DAG.getSetCC(dl, N->getValueType(1), Sum, LHS, Cond);
}
@@ -2544,7 +2677,9 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo,
SDLoc dl(N);
EVT VT = N->getValueType(0);
- SDValue Op = N->getOperand(0);
+ bool IsStrict = N->isStrictFPOpcode();
+ SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
+ SDValue Op = N->getOperand(IsStrict ? 1 : 0);
if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteFloat)
Op = GetPromotedFloat(Op);
@@ -2552,8 +2687,12 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo,
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-sint conversion!");
TargetLowering::MakeLibCallOptions CallOptions;
CallOptions.setSExt(true);
- SplitInteger(TLI.makeLibCall(DAG, LC, VT, Op, CallOptions, dl).first,
- Lo, Hi);
+ std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, VT, Op,
+ CallOptions, dl, Chain);
+ SplitInteger(Tmp.first, Lo, Hi);
+
+ if (IsStrict)
+ ReplaceValueWith(SDValue(N, 1), Tmp.second);
}
void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo,
@@ -2561,75 +2700,94 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo,
SDLoc dl(N);
EVT VT = N->getValueType(0);
- SDValue Op = N->getOperand(0);
+ bool IsStrict = N->isStrictFPOpcode();
+ SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
+ SDValue Op = N->getOperand(IsStrict ? 1 : 0);
if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteFloat)
Op = GetPromotedFloat(Op);
RTLIB::Libcall LC = RTLIB::getFPTOUINT(Op.getValueType(), VT);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-uint conversion!");
TargetLowering::MakeLibCallOptions CallOptions;
- SplitInteger(TLI.makeLibCall(DAG, LC, VT, Op, CallOptions, dl).first,
- Lo, Hi);
+ std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, VT, Op,
+ CallOptions, dl, Chain);
+ SplitInteger(Tmp.first, Lo, Hi);
+
+ if (IsStrict)
+ ReplaceValueWith(SDValue(N, 1), Tmp.second);
}
-void DAGTypeLegalizer::ExpandIntRes_LLROUND(SDNode *N, SDValue &Lo,
- SDValue &Hi) {
- RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
- EVT VT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
- if (VT == MVT::f32)
- LC = RTLIB::LLROUND_F32;
- else if (VT == MVT::f64)
- LC = RTLIB::LLROUND_F64;
- else if (VT == MVT::f80)
- LC = RTLIB::LLROUND_F80;
- else if (VT == MVT::f128)
- LC = RTLIB::LLROUND_F128;
- else if (VT == MVT::ppcf128)
- LC = RTLIB::LLROUND_PPCF128;
- assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected llround input type!");
+void DAGTypeLegalizer::ExpandIntRes_LLROUND_LLRINT(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue Op = N->getOperand(N->isStrictFPOpcode() ? 1 : 0);
- SDValue Op = N->getOperand(0);
- if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteFloat)
- Op = GetPromotedFloat(Op);
+ assert(getTypeAction(Op.getValueType()) != TargetLowering::TypePromoteFloat &&
+ "Input type needs to be promoted!");
- SDLoc dl(N);
- EVT RetVT = N->getValueType(0);
- TargetLowering::MakeLibCallOptions CallOptions;
- CallOptions.setSExt(true);
- SplitInteger(TLI.makeLibCall(DAG, LC, RetVT, Op, CallOptions, dl).first,
- Lo, Hi);
-}
+ EVT VT = Op.getValueType();
-void DAGTypeLegalizer::ExpandIntRes_LLRINT(SDNode *N, SDValue &Lo,
- SDValue &Hi) {
RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
- EVT VT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
- if (VT == MVT::f32)
- LC = RTLIB::LLRINT_F32;
- else if (VT == MVT::f64)
- LC = RTLIB::LLRINT_F64;
- else if (VT == MVT::f80)
- LC = RTLIB::LLRINT_F80;
- else if (VT == MVT::f128)
- LC = RTLIB::LLRINT_F128;
- else if (VT == MVT::ppcf128)
- LC = RTLIB::LLRINT_PPCF128;
- assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected llrint input type!");
-
- SDValue Op = N->getOperand(0);
- if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteFloat)
- Op = GetPromotedFloat(Op);
+ if (N->getOpcode() == ISD::LLROUND ||
+ N->getOpcode() == ISD::STRICT_LLROUND) {
+ if (VT == MVT::f32)
+ LC = RTLIB::LLROUND_F32;
+ else if (VT == MVT::f64)
+ LC = RTLIB::LLROUND_F64;
+ else if (VT == MVT::f80)
+ LC = RTLIB::LLROUND_F80;
+ else if (VT == MVT::f128)
+ LC = RTLIB::LLROUND_F128;
+ else if (VT == MVT::ppcf128)
+ LC = RTLIB::LLROUND_PPCF128;
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected llround input type!");
+ } else if (N->getOpcode() == ISD::LLRINT ||
+ N->getOpcode() == ISD::STRICT_LLRINT) {
+ if (VT == MVT::f32)
+ LC = RTLIB::LLRINT_F32;
+ else if (VT == MVT::f64)
+ LC = RTLIB::LLRINT_F64;
+ else if (VT == MVT::f80)
+ LC = RTLIB::LLRINT_F80;
+ else if (VT == MVT::f128)
+ LC = RTLIB::LLRINT_F128;
+ else if (VT == MVT::ppcf128)
+ LC = RTLIB::LLRINT_PPCF128;
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected llrint input type!");
+ } else
+ llvm_unreachable("Unexpected opcode!");
SDLoc dl(N);
EVT RetVT = N->getValueType(0);
+ SDValue Chain = N->isStrictFPOpcode() ? N->getOperand(0) : SDValue();
+
TargetLowering::MakeLibCallOptions CallOptions;
CallOptions.setSExt(true);
- SplitInteger(TLI.makeLibCall(DAG, LC, RetVT, Op, CallOptions, dl).first,
- Lo, Hi);
+ std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, RetVT,
+ Op, CallOptions, dl,
+ Chain);
+ SplitInteger(Tmp.first, Lo, Hi);
+
+ if (N->isStrictFPOpcode())
+ ReplaceValueWith(SDValue(N, 1), Tmp.second);
}
void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
SDValue &Lo, SDValue &Hi) {
+ if (N->isAtomic()) {
+ // It's typical to have larger CAS than atomic load instructions.
+ SDLoc dl(N);
+ EVT VT = N->getMemoryVT();
+ SDVTList VTs = DAG.getVTList(VT, MVT::i1, MVT::Other);
+ SDValue Zero = DAG.getConstant(0, dl, VT);
+ SDValue Swap = DAG.getAtomicCmpSwap(
+ ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, dl,
+ VT, VTs, N->getOperand(0),
+ N->getOperand(1), Zero, Zero, N->getMemOperand());
+ ReplaceValueWith(SDValue(N, 0), Swap.getValue(0));
+ ReplaceValueWith(SDValue(N, 1), Swap.getValue(2));
+ return;
+ }
+
if (ISD::isNormalLoad(N)) {
ExpandRes_NormalLoad(N, Lo, Hi);
return;
@@ -2684,8 +2842,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
// Increment the pointer to the other half.
unsigned IncrementSize = NVT.getSizeInBits()/8;
- Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
- DAG.getConstant(IncrementSize, dl, Ptr.getValueType()));
+ Ptr = DAG.getMemBasePlusOffset(Ptr, IncrementSize, dl);
Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr,
N->getPointerInfo().getWithOffset(IncrementSize), NEVT,
MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo);
@@ -2709,8 +2866,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
Alignment, MMOFlags, AAInfo);
// Increment the pointer to the other half.
- Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
- DAG.getConstant(IncrementSize, dl, Ptr.getValueType()));
+ Ptr = DAG.getMemBasePlusOffset(Ptr, IncrementSize, dl);
// Load the rest of the low bits.
Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, NVT, Ch, Ptr,
N->getPointerInfo().getWithOffset(IncrementSize),
@@ -3068,6 +3224,13 @@ void DAGTypeLegalizer::ExpandIntRes_MULFIX(SDNode *N, SDValue &Lo,
Lo = DAG.getSelect(dl, NVT, SatMin, NVTZero, Lo);
}
+void DAGTypeLegalizer::ExpandIntRes_DIVFIX(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue Res = earlyExpandDIVFIX(N, N->getOperand(0), N->getOperand(1),
+ N->getConstantOperandVal(2), TLI, DAG);
+ SplitInteger(Res, Lo, Hi);
+}
+
void DAGTypeLegalizer::ExpandIntRes_SADDSUBO(SDNode *Node,
SDValue &Lo, SDValue &Hi) {
SDValue LHS = Node->getOperand(0);
@@ -3596,9 +3759,11 @@ bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) {
case ISD::SELECT_CC: Res = ExpandIntOp_SELECT_CC(N); break;
case ISD::SETCC: Res = ExpandIntOp_SETCC(N); break;
case ISD::SETCCCARRY: Res = ExpandIntOp_SETCCCARRY(N); break;
+ case ISD::STRICT_SINT_TO_FP:
case ISD::SINT_TO_FP: Res = ExpandIntOp_SINT_TO_FP(N); break;
case ISD::STORE: Res = ExpandIntOp_STORE(cast<StoreSDNode>(N), OpNo); break;
case ISD::TRUNCATE: Res = ExpandIntOp_TRUNCATE(N); break;
+ case ISD::STRICT_UINT_TO_FP:
case ISD::UINT_TO_FP: Res = ExpandIntOp_UINT_TO_FP(N); break;
case ISD::SHL:
@@ -3865,17 +4030,37 @@ SDValue DAGTypeLegalizer::ExpandIntOp_RETURNADDR(SDNode *N) {
}
SDValue DAGTypeLegalizer::ExpandIntOp_SINT_TO_FP(SDNode *N) {
- SDValue Op = N->getOperand(0);
+ bool IsStrict = N->isStrictFPOpcode();
+ SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
+ SDValue Op = N->getOperand(IsStrict ? 1 : 0);
EVT DstVT = N->getValueType(0);
RTLIB::Libcall LC = RTLIB::getSINTTOFP(Op.getValueType(), DstVT);
assert(LC != RTLIB::UNKNOWN_LIBCALL &&
"Don't know how to expand this SINT_TO_FP!");
TargetLowering::MakeLibCallOptions CallOptions;
CallOptions.setSExt(true);
- return TLI.makeLibCall(DAG, LC, DstVT, Op, CallOptions, SDLoc(N)).first;
+ std::pair<SDValue, SDValue> Tmp =
+ TLI.makeLibCall(DAG, LC, DstVT, Op, CallOptions, SDLoc(N), Chain);
+
+ if (!IsStrict)
+ return Tmp.first;
+
+ ReplaceValueWith(SDValue(N, 1), Tmp.second);
+ ReplaceValueWith(SDValue(N, 0), Tmp.first);
+ return SDValue();
}
SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
+ if (N->isAtomic()) {
+ // It's typical to have larger CAS than atomic store instructions.
+ SDLoc dl(N);
+ SDValue Swap = DAG.getAtomic(ISD::ATOMIC_SWAP, dl,
+ N->getMemoryVT(),
+ N->getOperand(0), N->getOperand(2),
+ N->getOperand(1),
+ N->getMemOperand());
+ return Swap.getValue(1);
+ }
if (ISD::isNormalStore(N))
return ExpandOp_NormalStore(N, OpNo);
@@ -3965,81 +4150,24 @@ SDValue DAGTypeLegalizer::ExpandIntOp_TRUNCATE(SDNode *N) {
}
SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) {
- SDValue Op = N->getOperand(0);
- EVT SrcVT = Op.getValueType();
+ bool IsStrict = N->isStrictFPOpcode();
+ SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
+ SDValue Op = N->getOperand(IsStrict ? 1 : 0);
EVT DstVT = N->getValueType(0);
- SDLoc dl(N);
-
- // The following optimization is valid only if every value in SrcVT (when
- // treated as signed) is representable in DstVT. Check that the mantissa
- // size of DstVT is >= than the number of bits in SrcVT -1.
- const fltSemantics &sem = DAG.EVTToAPFloatSemantics(DstVT);
- if (APFloat::semanticsPrecision(sem) >= SrcVT.getSizeInBits()-1 &&
- TLI.getOperationAction(ISD::SINT_TO_FP, SrcVT) == TargetLowering::Custom){
- // Do a signed conversion then adjust the result.
- SDValue SignedConv = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Op);
- SignedConv = TLI.LowerOperation(SignedConv, DAG);
-
- // The result of the signed conversion needs adjusting if the 'sign bit' of
- // the incoming integer was set. To handle this, we dynamically test to see
- // if it is set, and, if so, add a fudge factor.
-
- const uint64_t F32TwoE32 = 0x4F800000ULL;
- const uint64_t F32TwoE64 = 0x5F800000ULL;
- const uint64_t F32TwoE128 = 0x7F800000ULL;
-
- APInt FF(32, 0);
- if (SrcVT == MVT::i32)
- FF = APInt(32, F32TwoE32);
- else if (SrcVT == MVT::i64)
- FF = APInt(32, F32TwoE64);
- else if (SrcVT == MVT::i128)
- FF = APInt(32, F32TwoE128);
- else
- llvm_unreachable("Unsupported UINT_TO_FP!");
-
- // Check whether the sign bit is set.
- SDValue Lo, Hi;
- GetExpandedInteger(Op, Lo, Hi);
- SDValue SignSet = DAG.getSetCC(dl,
- getSetCCResultType(Hi.getValueType()),
- Hi,
- DAG.getConstant(0, dl, Hi.getValueType()),
- ISD::SETLT);
-
- // Build a 64 bit pair (0, FF) in the constant pool, with FF in the lo bits.
- SDValue FudgePtr =
- DAG.getConstantPool(ConstantInt::get(*DAG.getContext(), FF.zext(64)),
- TLI.getPointerTy(DAG.getDataLayout()));
-
- // Get a pointer to FF if the sign bit was set, or to 0 otherwise.
- SDValue Zero = DAG.getIntPtrConstant(0, dl);
- SDValue Four = DAG.getIntPtrConstant(4, dl);
- if (DAG.getDataLayout().isBigEndian())
- std::swap(Zero, Four);
- SDValue Offset = DAG.getSelect(dl, Zero.getValueType(), SignSet,
- Zero, Four);
- unsigned Alignment = cast<ConstantPoolSDNode>(FudgePtr)->getAlignment();
- FudgePtr = DAG.getNode(ISD::ADD, dl, FudgePtr.getValueType(),
- FudgePtr, Offset);
- Alignment = std::min(Alignment, 4u);
-
- // Load the value out, extending it from f32 to the destination float type.
- // FIXME: Avoid the extend by constructing the right constant pool?
- SDValue Fudge = DAG.getExtLoad(
- ISD::EXTLOAD, dl, DstVT, DAG.getEntryNode(), FudgePtr,
- MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), MVT::f32,
- Alignment);
- return DAG.getNode(ISD::FADD, dl, DstVT, SignedConv, Fudge);
- }
-
- // Otherwise, use a libcall.
- RTLIB::Libcall LC = RTLIB::getUINTTOFP(SrcVT, DstVT);
+ RTLIB::Libcall LC = RTLIB::getUINTTOFP(Op.getValueType(), DstVT);
assert(LC != RTLIB::UNKNOWN_LIBCALL &&
"Don't know how to expand this UINT_TO_FP!");
TargetLowering::MakeLibCallOptions CallOptions;
CallOptions.setSExt(true);
- return TLI.makeLibCall(DAG, LC, DstVT, Op, CallOptions, dl).first;
+ std::pair<SDValue, SDValue> Tmp =
+ TLI.makeLibCall(DAG, LC, DstVT, Op, CallOptions, SDLoc(N), Chain);
+
+ if (!IsStrict)
+ return Tmp.first;
+
+ ReplaceValueWith(SDValue(N, 1), Tmp.second);
+ ReplaceValueWith(SDValue(N, 0), Tmp.first);
+ return SDValue();
}
SDValue DAGTypeLegalizer::ExpandIntOp_ATOMIC_STORE(SDNode *N) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index b596c174a287..63ddb59fce68 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -204,7 +204,8 @@ bool DAGTypeLegalizer::run() {
// non-leaves.
for (SDNode &Node : DAG.allnodes()) {
if (Node.getNumOperands() == 0) {
- AddToWorklist(&Node);
+ Node.setNodeId(ReadyToProcess);
+ Worklist.push_back(&Node);
} else {
Node.setNodeId(Unanalyzed);
}
@@ -974,68 +975,6 @@ SDValue DAGTypeLegalizer::JoinIntegers(SDValue Lo, SDValue Hi) {
return DAG.getNode(ISD::OR, dlHi, NVT, Lo, Hi);
}
-/// Convert the node into a libcall with the same prototype.
-SDValue DAGTypeLegalizer::LibCallify(RTLIB::Libcall LC, SDNode *N,
- bool isSigned) {
- TargetLowering::MakeLibCallOptions CallOptions;
- CallOptions.setSExt(isSigned);
- unsigned NumOps = N->getNumOperands();
- SDLoc dl(N);
- if (NumOps == 0) {
- return TLI.makeLibCall(DAG, LC, N->getValueType(0), None, CallOptions,
- dl).first;
- } else if (NumOps == 1) {
- SDValue Op = N->getOperand(0);
- return TLI.makeLibCall(DAG, LC, N->getValueType(0), Op, CallOptions,
- dl).first;
- } else if (NumOps == 2) {
- SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
- return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, CallOptions,
- dl).first;
- }
- SmallVector<SDValue, 8> Ops(NumOps);
- for (unsigned i = 0; i < NumOps; ++i)
- Ops[i] = N->getOperand(i);
-
- return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, CallOptions, dl).first;
-}
-
-/// Expand a node into a call to a libcall. Similar to ExpandLibCall except that
-/// the first operand is the in-chain.
-std::pair<SDValue, SDValue>
-DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC, SDNode *Node,
- bool isSigned) {
- SDValue InChain = Node->getOperand(0);
-
- TargetLowering::ArgListTy Args;
- TargetLowering::ArgListEntry Entry;
- for (unsigned i = 1, e = Node->getNumOperands(); i != e; ++i) {
- EVT ArgVT = Node->getOperand(i).getValueType();
- Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
- Entry.Node = Node->getOperand(i);
- Entry.Ty = ArgTy;
- Entry.IsSExt = isSigned;
- Entry.IsZExt = !isSigned;
- Args.push_back(Entry);
- }
- SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
- TLI.getPointerTy(DAG.getDataLayout()));
-
- Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
-
- TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(SDLoc(Node))
- .setChain(InChain)
- .setLibCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee,
- std::move(Args))
- .setSExtResult(isSigned)
- .setZExtResult(!isSigned);
-
- std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
-
- return CallInfo;
-}
-
/// Promote the given target boolean to a target boolean of the given type.
/// A target boolean is an integer value, not necessarily of type i1, the bits
/// of which conform to getBooleanContents.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 4afbae69128a..faae14444d51 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -215,10 +215,7 @@ private:
SDValue DisintegrateMERGE_VALUES(SDNode *N, unsigned ResNo);
SDValue JoinIntegers(SDValue Lo, SDValue Hi);
- SDValue LibCallify(RTLIB::Libcall LC, SDNode *N, bool isSigned);
- std::pair<SDValue, SDValue> ExpandChainLibCall(RTLIB::Libcall LC,
- SDNode *Node, bool isSigned);
std::pair<SDValue, SDValue> ExpandAtomic(SDNode *Node);
SDValue PromoteTargetBoolean(SDValue Bool, EVT ValVT);
@@ -228,11 +225,6 @@ private:
void SplitInteger(SDValue Op, EVT LoVT, EVT HiVT,
SDValue &Lo, SDValue &Hi);
- void AddToWorklist(SDNode *N) {
- N->setNodeId(ReadyToProcess);
- Worklist.push_back(N);
- }
-
//===--------------------------------------------------------------------===//
// Integer Promotion Support: LegalizeIntegerTypes.cpp
//===--------------------------------------------------------------------===//
@@ -337,6 +329,7 @@ private:
SDValue PromoteIntRes_XMULO(SDNode *N, unsigned ResNo);
SDValue PromoteIntRes_ADDSUBSAT(SDNode *N);
SDValue PromoteIntRes_MULFIX(SDNode *N);
+ SDValue PromoteIntRes_DIVFIX(SDNode *N);
SDValue PromoteIntRes_FLT_ROUNDS(SDNode *N);
SDValue PromoteIntRes_VECREDUCE(SDNode *N);
SDValue PromoteIntRes_ABS(SDNode *N);
@@ -362,9 +355,11 @@ private:
SDValue PromoteIntOp_Shift(SDNode *N);
SDValue PromoteIntOp_SIGN_EXTEND(SDNode *N);
SDValue PromoteIntOp_SINT_TO_FP(SDNode *N);
+ SDValue PromoteIntOp_STRICT_SINT_TO_FP(SDNode *N);
SDValue PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo);
SDValue PromoteIntOp_TRUNCATE(SDNode *N);
SDValue PromoteIntOp_UINT_TO_FP(SDNode *N);
+ SDValue PromoteIntOp_STRICT_UINT_TO_FP(SDNode *N);
SDValue PromoteIntOp_ZERO_EXTEND(SDNode *N);
SDValue PromoteIntOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo);
SDValue PromoteIntOp_MLOAD(MaskedLoadSDNode *N, unsigned OpNo);
@@ -373,7 +368,7 @@ private:
SDValue PromoteIntOp_ADDSUBCARRY(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_FRAMERETURNADDR(SDNode *N);
SDValue PromoteIntOp_PREFETCH(SDNode *N, unsigned OpNo);
- SDValue PromoteIntOp_MULFIX(SDNode *N);
+ SDValue PromoteIntOp_FIX(SDNode *N);
SDValue PromoteIntOp_FPOWI(SDNode *N);
SDValue PromoteIntOp_VECREDUCE(SDNode *N);
@@ -411,8 +406,7 @@ private:
void ExpandIntRes_FLT_ROUNDS (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_FP_TO_SINT (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_FP_TO_UINT (SDNode *N, SDValue &Lo, SDValue &Hi);
- void ExpandIntRes_LLROUND (SDNode *N, SDValue &Lo, SDValue &Hi);
- void ExpandIntRes_LLRINT (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_LLROUND_LLRINT (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_Logical (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_ADDSUB (SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -435,6 +429,7 @@ private:
void ExpandIntRes_XMULO (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_ADDSUBSAT (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_MULFIX (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_DIVFIX (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_ATOMIC_LOAD (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_VECREDUCE (SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -486,6 +481,8 @@ private:
// Convert Float Results to Integer.
void SoftenFloatResult(SDNode *N, unsigned ResNo);
+ SDValue SoftenFloatRes_Unary(SDNode *N, RTLIB::Libcall LC);
+ SDValue SoftenFloatRes_Binary(SDNode *N, RTLIB::Libcall LC);
SDValue SoftenFloatRes_MERGE_VALUES(SDNode *N, unsigned ResNo);
SDValue SoftenFloatRes_BITCAST(SDNode *N);
SDValue SoftenFloatRes_BUILD_PAIR(SDNode *N);
@@ -495,6 +492,7 @@ private:
SDValue SoftenFloatRes_FMINNUM(SDNode *N);
SDValue SoftenFloatRes_FMAXNUM(SDNode *N);
SDValue SoftenFloatRes_FADD(SDNode *N);
+ SDValue SoftenFloatRes_FCBRT(SDNode *N);
SDValue SoftenFloatRes_FCEIL(SDNode *N);
SDValue SoftenFloatRes_FCOPYSIGN(SDNode *N);
SDValue SoftenFloatRes_FCOS(SDNode *N);
@@ -530,9 +528,9 @@ private:
// Convert Float Operand to Integer.
bool SoftenFloatOperand(SDNode *N, unsigned OpNo);
+ SDValue SoftenFloatOp_Unary(SDNode *N, RTLIB::Libcall LC);
SDValue SoftenFloatOp_BITCAST(SDNode *N);
SDValue SoftenFloatOp_BR_CC(SDNode *N);
- SDValue SoftenFloatOp_FP_EXTEND(SDNode *N);
SDValue SoftenFloatOp_FP_ROUND(SDNode *N);
SDValue SoftenFloatOp_FP_TO_XINT(SDNode *N);
SDValue SoftenFloatOp_LROUND(SDNode *N);
@@ -542,6 +540,7 @@ private:
SDValue SoftenFloatOp_SELECT_CC(SDNode *N);
SDValue SoftenFloatOp_SETCC(SDNode *N);
SDValue SoftenFloatOp_STORE(SDNode *N, unsigned OpNo);
+ SDValue SoftenFloatOp_FCOPYSIGN(SDNode *N);
//===--------------------------------------------------------------------===//
// Float Expansion Support: LegalizeFloatTypes.cpp
@@ -559,10 +558,15 @@ private:
// Float Result Expansion.
void ExpandFloatResult(SDNode *N, unsigned ResNo);
void ExpandFloatRes_ConstantFP(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_Unary(SDNode *N, RTLIB::Libcall LC,
+ SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_Binary(SDNode *N, RTLIB::Libcall LC,
+ SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FABS (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FMINNUM (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FMAXNUM (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FADD (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FCBRT (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FCEIL (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FCOPYSIGN (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FCOS (SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -676,7 +680,6 @@ private:
SDValue ScalarizeVecRes_BUILD_VECTOR(SDNode *N);
SDValue ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N);
SDValue ScalarizeVecRes_FP_ROUND(SDNode *N);
- SDValue ScalarizeVecRes_STRICT_FP_ROUND(SDNode *N);
SDValue ScalarizeVecRes_FPOWI(SDNode *N);
SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N);
SDValue ScalarizeVecRes_LOAD(LoadSDNode *N);
@@ -688,7 +691,7 @@ private:
SDValue ScalarizeVecRes_UNDEF(SDNode *N);
SDValue ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N);
- SDValue ScalarizeVecRes_MULFIX(SDNode *N);
+ SDValue ScalarizeVecRes_FIX(SDNode *N);
// Vector Operand Scalarization: <1 x ty> -> ty.
bool ScalarizeVectorOperand(SDNode *N, unsigned OpNo);
@@ -730,7 +733,7 @@ private:
void SplitVecRes_OverflowOp(SDNode *N, unsigned ResNo,
SDValue &Lo, SDValue &Hi);
- void SplitVecRes_MULFIX(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_FIX(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -804,6 +807,7 @@ private:
SDValue WidenVSELECTAndMask(SDNode *N);
SDValue WidenVecRes_SELECT_CC(SDNode* N);
SDValue WidenVecRes_SETCC(SDNode* N);
+ SDValue WidenVecRes_STRICT_FSETCC(SDNode* N);
SDValue WidenVecRes_UNDEF(SDNode *N);
SDValue WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N);
@@ -833,6 +837,7 @@ private:
SDValue WidenVecOp_MGATHER(SDNode* N, unsigned OpNo);
SDValue WidenVecOp_MSCATTER(SDNode* N, unsigned OpNo);
SDValue WidenVecOp_SETCC(SDNode* N);
+ SDValue WidenVecOp_STRICT_FSETCC(SDNode* N);
SDValue WidenVecOp_VSELECT(SDNode *N);
SDValue WidenVecOp_Convert(SDNode *N);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index 5562f400b6e1..c45c62cabc05 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -169,9 +169,7 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
// Increment the pointer to the other half.
unsigned IncrementSize = NOutVT.getSizeInBits() / 8;
- StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
- DAG.getConstant(IncrementSize, dl,
- StackPtr.getValueType()));
+ StackPtr = DAG.getMemBasePlusOffset(StackPtr, IncrementSize, dl);
// Load the second half from the stack slot.
Hi = DAG.getLoad(NOutVT, dl, Store, StackPtr,
@@ -248,6 +246,7 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo,
SDLoc dl(N);
LoadSDNode *LD = cast<LoadSDNode>(N);
+ assert(!LD->isAtomic() && "Atomics can not be split");
EVT ValueVT = LD->getValueType(0);
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), ValueVT);
SDValue Chain = LD->getChain();
@@ -262,8 +261,7 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo,
// Increment the pointer to the other half.
unsigned IncrementSize = NVT.getSizeInBits() / 8;
- Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
- DAG.getConstant(IncrementSize, dl, Ptr.getValueType()));
+ Ptr = DAG.getMemBasePlusOffset(Ptr, IncrementSize, dl);
Hi = DAG.getLoad(NVT, dl, Chain, Ptr,
LD->getPointerInfo().getWithOffset(IncrementSize),
MinAlign(Alignment, IncrementSize),
@@ -459,6 +457,7 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) {
SDLoc dl(N);
StoreSDNode *St = cast<StoreSDNode>(N);
+ assert(!St->isAtomic() && "Atomics can not be split");
EVT ValueVT = St->getValue().getValueType();
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), ValueVT);
SDValue Chain = St->getChain();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 15c3a0b6cfad..7d0b1ee6ae07 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -75,95 +75,95 @@ class VectorLegalizer {
SDValue LegalizeOp(SDValue Op);
/// Assuming the node is legal, "legalize" the results.
- SDValue TranslateLegalizeResults(SDValue Op, SDValue Result);
+ SDValue TranslateLegalizeResults(SDValue Op, SDNode *Result);
+
+ /// Make sure Results are legal and update the translation cache.
+ SDValue RecursivelyLegalizeResults(SDValue Op,
+ MutableArrayRef<SDValue> Results);
+
+ /// Wrapper to interface LowerOperation with a vector of Results.
+ /// Returns false if the target wants to use default expansion. Otherwise
+ /// returns true. If return is true and the Results are empty, then the
+ /// target wants to keep the input node as is.
+ bool LowerOperationWrapper(SDNode *N, SmallVectorImpl<SDValue> &Results);
/// Implements unrolling a VSETCC.
- SDValue UnrollVSETCC(SDValue Op);
+ SDValue UnrollVSETCC(SDNode *Node);
/// Implement expand-based legalization of vector operations.
///
/// This is just a high-level routine to dispatch to specific code paths for
/// operations to legalize them.
- SDValue Expand(SDValue Op);
+ void Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results);
/// Implements expansion for FP_TO_UINT; falls back to UnrollVectorOp if
/// FP_TO_SINT isn't legal.
- SDValue ExpandFP_TO_UINT(SDValue Op);
+ void ExpandFP_TO_UINT(SDNode *Node, SmallVectorImpl<SDValue> &Results);
/// Implements expansion for UINT_TO_FLOAT; falls back to UnrollVectorOp if
/// SINT_TO_FLOAT and SHR on vectors isn't legal.
- SDValue ExpandUINT_TO_FLOAT(SDValue Op);
+ void ExpandUINT_TO_FLOAT(SDNode *Node, SmallVectorImpl<SDValue> &Results);
/// Implement expansion for SIGN_EXTEND_INREG using SRL and SRA.
- SDValue ExpandSEXTINREG(SDValue Op);
+ SDValue ExpandSEXTINREG(SDNode *Node);
/// Implement expansion for ANY_EXTEND_VECTOR_INREG.
///
/// Shuffles the low lanes of the operand into place and bitcasts to the proper
/// type. The contents of the bits in the extended part of each element are
/// undef.
- SDValue ExpandANY_EXTEND_VECTOR_INREG(SDValue Op);
+ SDValue ExpandANY_EXTEND_VECTOR_INREG(SDNode *Node);
/// Implement expansion for SIGN_EXTEND_VECTOR_INREG.
///
/// Shuffles the low lanes of the operand into place, bitcasts to the proper
/// type, then shifts left and arithmetic shifts right to introduce a sign
/// extension.
- SDValue ExpandSIGN_EXTEND_VECTOR_INREG(SDValue Op);
+ SDValue ExpandSIGN_EXTEND_VECTOR_INREG(SDNode *Node);
/// Implement expansion for ZERO_EXTEND_VECTOR_INREG.
///
/// Shuffles the low lanes of the operand into place and blends zeros into
/// the remaining lanes, finally bitcasting to the proper type.
- SDValue ExpandZERO_EXTEND_VECTOR_INREG(SDValue Op);
-
- /// Implement expand-based legalization of ABS vector operations.
- /// If following expanding is legal/custom then do it:
- /// (ABS x) --> (XOR (ADD x, (SRA x, sizeof(x)-1)), (SRA x, sizeof(x)-1))
- /// else unroll the operation.
- SDValue ExpandABS(SDValue Op);
+ SDValue ExpandZERO_EXTEND_VECTOR_INREG(SDNode *Node);
/// Expand bswap of vectors into a shuffle if legal.
- SDValue ExpandBSWAP(SDValue Op);
+ SDValue ExpandBSWAP(SDNode *Node);
/// Implement vselect in terms of XOR, AND, OR when blend is not
/// supported by the target.
- SDValue ExpandVSELECT(SDValue Op);
- SDValue ExpandSELECT(SDValue Op);
- SDValue ExpandLoad(SDValue Op);
- SDValue ExpandStore(SDValue Op);
- SDValue ExpandFNEG(SDValue Op);
- SDValue ExpandFSUB(SDValue Op);
- SDValue ExpandBITREVERSE(SDValue Op);
- SDValue ExpandCTPOP(SDValue Op);
- SDValue ExpandCTLZ(SDValue Op);
- SDValue ExpandCTTZ(SDValue Op);
- SDValue ExpandFunnelShift(SDValue Op);
- SDValue ExpandROT(SDValue Op);
- SDValue ExpandFMINNUM_FMAXNUM(SDValue Op);
- SDValue ExpandUADDSUBO(SDValue Op);
- SDValue ExpandSADDSUBO(SDValue Op);
- SDValue ExpandMULO(SDValue Op);
- SDValue ExpandAddSubSat(SDValue Op);
- SDValue ExpandFixedPointMul(SDValue Op);
- SDValue ExpandStrictFPOp(SDValue Op);
+ SDValue ExpandVSELECT(SDNode *Node);
+ SDValue ExpandSELECT(SDNode *Node);
+ std::pair<SDValue, SDValue> ExpandLoad(SDNode *N);
+ SDValue ExpandStore(SDNode *N);
+ SDValue ExpandFNEG(SDNode *Node);
+ void ExpandFSUB(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+ void ExpandBITREVERSE(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+ void ExpandUADDSUBO(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+ void ExpandSADDSUBO(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+ void ExpandMULO(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+ SDValue ExpandFixedPointDiv(SDNode *Node);
+ SDValue ExpandStrictFPOp(SDNode *Node);
+ void ExpandStrictFPOp(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+
+ void UnrollStrictFPOp(SDNode *Node, SmallVectorImpl<SDValue> &Results);
/// Implements vector promotion.
///
/// This is essentially just bitcasting the operands to a different type and
/// bitcasting the result back to the original type.
- SDValue Promote(SDValue Op);
+ void Promote(SDNode *Node, SmallVectorImpl<SDValue> &Results);
/// Implements [SU]INT_TO_FP vector promotion.
///
/// This is a [zs]ext of the input operand to a larger integer type.
- SDValue PromoteINT_TO_FP(SDValue Op);
+ void PromoteINT_TO_FP(SDNode *Node, SmallVectorImpl<SDValue> &Results);
/// Implements FP_TO_[SU]INT vector promotion of the result type.
///
/// It is promoted to a larger integer type. The result is then
/// truncated back to the original type.
- SDValue PromoteFP_TO_INT(SDValue Op);
+ void PromoteFP_TO_INT(SDNode *Node, SmallVectorImpl<SDValue> &Results);
public:
VectorLegalizer(SelectionDAG& dag) :
@@ -219,11 +219,27 @@ bool VectorLegalizer::Run() {
return Changed;
}
-SDValue VectorLegalizer::TranslateLegalizeResults(SDValue Op, SDValue Result) {
+SDValue VectorLegalizer::TranslateLegalizeResults(SDValue Op, SDNode *Result) {
+ assert(Op->getNumValues() == Result->getNumValues() &&
+ "Unexpected number of results");
// Generic legalization: just pass the operand through.
- for (unsigned i = 0, e = Op.getNode()->getNumValues(); i != e; ++i)
- AddLegalizedOperand(Op.getValue(i), Result.getValue(i));
- return Result.getValue(Op.getResNo());
+ for (unsigned i = 0, e = Op->getNumValues(); i != e; ++i)
+ AddLegalizedOperand(Op.getValue(i), SDValue(Result, i));
+ return SDValue(Result, Op.getResNo());
+}
+
+SDValue
+VectorLegalizer::RecursivelyLegalizeResults(SDValue Op,
+ MutableArrayRef<SDValue> Results) {
+ assert(Results.size() == Op->getNumValues() &&
+ "Unexpected number of results");
+ // Make sure that the generated code is itself legal.
+ for (unsigned i = 0, e = Results.size(); i != e; ++i) {
+ Results[i] = LegalizeOp(Results[i]);
+ AddLegalizedOperand(Op.getValue(i), Results[i]);
+ }
+
+ return Results[Op.getResNo()];
}
SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
@@ -232,18 +248,15 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op);
if (I != LegalizedNodes.end()) return I->second;
- SDNode* Node = Op.getNode();
-
// Legalize the operands
SmallVector<SDValue, 8> Ops;
- for (const SDValue &Op : Node->op_values())
- Ops.push_back(LegalizeOp(Op));
+ for (const SDValue &Oper : Op->op_values())
+ Ops.push_back(LegalizeOp(Oper));
- SDValue Result = SDValue(DAG.UpdateNodeOperands(Op.getNode(), Ops),
- Op.getResNo());
+ SDNode *Node = DAG.UpdateNodeOperands(Op.getNode(), Ops);
if (Op.getOpcode() == ISD::LOAD) {
- LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
+ LoadSDNode *LD = cast<LoadSDNode>(Node);
ISD::LoadExtType ExtType = LD->getExtensionType();
if (LD->getMemoryVT().isVector() && ExtType != ISD::NON_EXTLOAD) {
LLVM_DEBUG(dbgs() << "\nLegalizing extending vector load: ";
@@ -252,26 +265,29 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
LD->getMemoryVT())) {
default: llvm_unreachable("This action is not supported yet!");
case TargetLowering::Legal:
- return TranslateLegalizeResults(Op, Result);
- case TargetLowering::Custom:
- if (SDValue Lowered = TLI.LowerOperation(Result, DAG)) {
- assert(Lowered->getNumValues() == Op->getNumValues() &&
- "Unexpected number of results");
- if (Lowered != Result) {
- // Make sure the new code is also legal.
- Lowered = LegalizeOp(Lowered);
- Changed = true;
- }
- return TranslateLegalizeResults(Op, Lowered);
+ return TranslateLegalizeResults(Op, Node);
+ case TargetLowering::Custom: {
+ SmallVector<SDValue, 2> ResultVals;
+ if (LowerOperationWrapper(Node, ResultVals)) {
+ if (ResultVals.empty())
+ return TranslateLegalizeResults(Op, Node);
+
+ Changed = true;
+ return RecursivelyLegalizeResults(Op, ResultVals);
}
LLVM_FALLTHROUGH;
- case TargetLowering::Expand:
+ }
+ case TargetLowering::Expand: {
Changed = true;
- return ExpandLoad(Op);
+ std::pair<SDValue, SDValue> Tmp = ExpandLoad(Node);
+ AddLegalizedOperand(Op.getValue(0), Tmp.first);
+ AddLegalizedOperand(Op.getValue(1), Tmp.second);
+ return Op.getResNo() ? Tmp.first : Tmp.second;
+ }
}
}
} else if (Op.getOpcode() == ISD::STORE) {
- StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
+ StoreSDNode *ST = cast<StoreSDNode>(Node);
EVT StVT = ST->getMemoryVT();
MVT ValVT = ST->getValue().getSimpleValueType();
if (StVT.isVector() && ST->isTruncatingStore()) {
@@ -280,19 +296,24 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
switch (TLI.getTruncStoreAction(ValVT, StVT)) {
default: llvm_unreachable("This action is not supported yet!");
case TargetLowering::Legal:
- return TranslateLegalizeResults(Op, Result);
+ return TranslateLegalizeResults(Op, Node);
case TargetLowering::Custom: {
- SDValue Lowered = TLI.LowerOperation(Result, DAG);
- if (Lowered != Result) {
- // Make sure the new code is also legal.
- Lowered = LegalizeOp(Lowered);
+ SmallVector<SDValue, 1> ResultVals;
+ if (LowerOperationWrapper(Node, ResultVals)) {
+ if (ResultVals.empty())
+ return TranslateLegalizeResults(Op, Node);
+
Changed = true;
+ return RecursivelyLegalizeResults(Op, ResultVals);
}
- return TranslateLegalizeResults(Op, Lowered);
+ LLVM_FALLTHROUGH;
}
- case TargetLowering::Expand:
+ case TargetLowering::Expand: {
Changed = true;
- return ExpandStore(Op);
+ SDValue Chain = ExpandStore(Node);
+ AddLegalizedOperand(Op, Chain);
+ return Chain;
+ }
}
}
}
@@ -300,55 +321,41 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
bool HasVectorValueOrOp = false;
for (auto J = Node->value_begin(), E = Node->value_end(); J != E; ++J)
HasVectorValueOrOp |= J->isVector();
- for (const SDValue &Op : Node->op_values())
- HasVectorValueOrOp |= Op.getValueType().isVector();
+ for (const SDValue &Oper : Node->op_values())
+ HasVectorValueOrOp |= Oper.getValueType().isVector();
if (!HasVectorValueOrOp)
- return TranslateLegalizeResults(Op, Result);
+ return TranslateLegalizeResults(Op, Node);
TargetLowering::LegalizeAction Action = TargetLowering::Legal;
+ EVT ValVT;
switch (Op.getOpcode()) {
default:
- return TranslateLegalizeResults(Op, Result);
- case ISD::STRICT_FADD:
- case ISD::STRICT_FSUB:
- case ISD::STRICT_FMUL:
- case ISD::STRICT_FDIV:
- case ISD::STRICT_FREM:
- case ISD::STRICT_FSQRT:
- case ISD::STRICT_FMA:
- case ISD::STRICT_FPOW:
- case ISD::STRICT_FPOWI:
- case ISD::STRICT_FSIN:
- case ISD::STRICT_FCOS:
- case ISD::STRICT_FEXP:
- case ISD::STRICT_FEXP2:
- case ISD::STRICT_FLOG:
- case ISD::STRICT_FLOG10:
- case ISD::STRICT_FLOG2:
- case ISD::STRICT_FRINT:
- case ISD::STRICT_FNEARBYINT:
- case ISD::STRICT_FMAXNUM:
- case ISD::STRICT_FMINNUM:
- case ISD::STRICT_FCEIL:
- case ISD::STRICT_FFLOOR:
- case ISD::STRICT_FROUND:
- case ISD::STRICT_FTRUNC:
- case ISD::STRICT_FP_TO_SINT:
- case ISD::STRICT_FP_TO_UINT:
- case ISD::STRICT_FP_ROUND:
- case ISD::STRICT_FP_EXTEND:
+ return TranslateLegalizeResults(Op, Node);
+ case ISD::MERGE_VALUES:
Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
+ // This operation lies about being legal: when it claims to be legal,
+ // it should actually be expanded.
+ if (Action == TargetLowering::Legal)
+ Action = TargetLowering::Expand;
+ break;
+#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
+ case ISD::STRICT_##DAGN:
+#include "llvm/IR/ConstrainedOps.def"
+ ValVT = Node->getValueType(0);
+ if (Op.getOpcode() == ISD::STRICT_SINT_TO_FP ||
+ Op.getOpcode() == ISD::STRICT_UINT_TO_FP)
+ ValVT = Node->getOperand(1).getValueType();
+ Action = TLI.getOperationAction(Node->getOpcode(), ValVT);
// If we're asked to expand a strict vector floating-point operation,
// by default we're going to simply unroll it. That is usually the
// best approach, except in the case where the resulting strict (scalar)
// operations would themselves use the fallback mutation to non-strict.
// In that specific case, just do the fallback on the vector op.
- if (Action == TargetLowering::Expand &&
- TLI.getStrictFPOperationAction(Node->getOpcode(),
- Node->getValueType(0))
- == TargetLowering::Legal) {
- EVT EltVT = Node->getValueType(0).getVectorElementType();
+ if (Action == TargetLowering::Expand && !TLI.isStrictFPEnabled() &&
+ TLI.getStrictFPOperationAction(Node->getOpcode(), ValVT) ==
+ TargetLowering::Legal) {
+ EVT EltVT = ValVT.getVectorElementType();
if (TLI.getOperationAction(Node->getOpcode(), EltVT)
== TargetLowering::Expand &&
TLI.getStrictFPOperationAction(Node->getOpcode(), EltVT)
@@ -454,7 +461,9 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::SMULFIX:
case ISD::SMULFIXSAT:
case ISD::UMULFIX:
- case ISD::UMULFIXSAT: {
+ case ISD::UMULFIXSAT:
+ case ISD::SDIVFIX:
+ case ISD::UDIVFIX: {
unsigned Scale = Node->getConstantOperandVal(2);
Action = TLI.getFixedPointOperationAction(Node->getOpcode(),
Node->getValueType(0), Scale);
@@ -482,53 +491,90 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
LLVM_DEBUG(dbgs() << "\nLegalizing vector op: "; Node->dump(&DAG));
+ SmallVector<SDValue, 8> ResultVals;
switch (Action) {
default: llvm_unreachable("This action is not supported yet!");
case TargetLowering::Promote:
- Result = Promote(Op);
- Changed = true;
+ LLVM_DEBUG(dbgs() << "Promoting\n");
+ Promote(Node, ResultVals);
+ assert(!ResultVals.empty() && "No results for promotion?");
break;
case TargetLowering::Legal:
LLVM_DEBUG(dbgs() << "Legal node: nothing to do\n");
break;
- case TargetLowering::Custom: {
+ case TargetLowering::Custom:
LLVM_DEBUG(dbgs() << "Trying custom legalization\n");
- if (SDValue Tmp1 = TLI.LowerOperation(Op, DAG)) {
- LLVM_DEBUG(dbgs() << "Successfully custom legalized node\n");
- Result = Tmp1;
+ if (LowerOperationWrapper(Node, ResultVals))
break;
- }
LLVM_DEBUG(dbgs() << "Could not custom legalize node\n");
LLVM_FALLTHROUGH;
- }
case TargetLowering::Expand:
- Result = Expand(Op);
+ LLVM_DEBUG(dbgs() << "Expanding\n");
+ Expand(Node, ResultVals);
+ break;
}
- // Make sure that the generated code is itself legal.
- if (Result != Op) {
- Result = LegalizeOp(Result);
- Changed = true;
+ if (ResultVals.empty())
+ return TranslateLegalizeResults(Op, Node);
+
+ Changed = true;
+ return RecursivelyLegalizeResults(Op, ResultVals);
+}
+
+// FIME: This is very similar to the X86 override of
+// TargetLowering::LowerOperationWrapper. Can we merge them somehow?
+bool VectorLegalizer::LowerOperationWrapper(SDNode *Node,
+ SmallVectorImpl<SDValue> &Results) {
+ SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG);
+
+ if (!Res.getNode())
+ return false;
+
+ if (Res == SDValue(Node, 0))
+ return true;
+
+ // If the original node has one result, take the return value from
+ // LowerOperation as is. It might not be result number 0.
+ if (Node->getNumValues() == 1) {
+ Results.push_back(Res);
+ return true;
}
- // Note that LegalizeOp may be reentered even from single-use nodes, which
- // means that we always must cache transformed nodes.
- AddLegalizedOperand(Op, Result);
- return Result;
+ // If the original node has multiple results, then the return node should
+ // have the same number of results.
+ assert((Node->getNumValues() == Res->getNumValues()) &&
+ "Lowering returned the wrong number of results!");
+
+ // Places new result values base on N result number.
+ for (unsigned I = 0, E = Node->getNumValues(); I != E; ++I)
+ Results.push_back(Res.getValue(I));
+
+ return true;
}
-SDValue VectorLegalizer::Promote(SDValue Op) {
+void VectorLegalizer::Promote(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
// For a few operations there is a specific concept for promotion based on
// the operand's type.
- switch (Op.getOpcode()) {
+ switch (Node->getOpcode()) {
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
+ case ISD::STRICT_SINT_TO_FP:
+ case ISD::STRICT_UINT_TO_FP:
// "Promote" the operation by extending the operand.
- return PromoteINT_TO_FP(Op);
+ PromoteINT_TO_FP(Node, Results);
+ return;
case ISD::FP_TO_UINT:
case ISD::FP_TO_SINT:
+ case ISD::STRICT_FP_TO_UINT:
+ case ISD::STRICT_FP_TO_SINT:
// Promote the operation by extending the operand.
- return PromoteFP_TO_INT(Op);
+ PromoteFP_TO_INT(Node, Results);
+ return;
+ case ISD::FP_ROUND:
+ case ISD::FP_EXTEND:
+ // These operations are used to do promotion so they can't be promoted
+ // themselves.
+ llvm_unreachable("Don't know how to promote this operation!");
}
// There are currently two cases of vector promotion:
@@ -536,91 +582,128 @@ SDValue VectorLegalizer::Promote(SDValue Op) {
// same overall length. For example, x86 promotes ISD::AND v2i32 to v1i64.
// 2) Extending a vector of floats to a vector of the same number of larger
// floats. For example, AArch64 promotes ISD::FADD on v4f16 to v4f32.
- MVT VT = Op.getSimpleValueType();
- assert(Op.getNode()->getNumValues() == 1 &&
+ assert(Node->getNumValues() == 1 &&
"Can't promote a vector with multiple results!");
- MVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT);
- SDLoc dl(Op);
- SmallVector<SDValue, 4> Operands(Op.getNumOperands());
-
- for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
- if (Op.getOperand(j).getValueType().isVector())
- if (Op.getOperand(j)
+ MVT VT = Node->getSimpleValueType(0);
+ MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT);
+ SDLoc dl(Node);
+ SmallVector<SDValue, 4> Operands(Node->getNumOperands());
+
+ for (unsigned j = 0; j != Node->getNumOperands(); ++j) {
+ if (Node->getOperand(j).getValueType().isVector())
+ if (Node->getOperand(j)
.getValueType()
.getVectorElementType()
.isFloatingPoint() &&
NVT.isVector() && NVT.getVectorElementType().isFloatingPoint())
- Operands[j] = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Op.getOperand(j));
+ Operands[j] = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(j));
else
- Operands[j] = DAG.getNode(ISD::BITCAST, dl, NVT, Op.getOperand(j));
+ Operands[j] = DAG.getNode(ISD::BITCAST, dl, NVT, Node->getOperand(j));
else
- Operands[j] = Op.getOperand(j);
+ Operands[j] = Node->getOperand(j);
}
- Op = DAG.getNode(Op.getOpcode(), dl, NVT, Operands, Op.getNode()->getFlags());
+ SDValue Res =
+ DAG.getNode(Node->getOpcode(), dl, NVT, Operands, Node->getFlags());
+
if ((VT.isFloatingPoint() && NVT.isFloatingPoint()) ||
(VT.isVector() && VT.getVectorElementType().isFloatingPoint() &&
NVT.isVector() && NVT.getVectorElementType().isFloatingPoint()))
- return DAG.getNode(ISD::FP_ROUND, dl, VT, Op, DAG.getIntPtrConstant(0, dl));
+ Res = DAG.getNode(ISD::FP_ROUND, dl, VT, Res, DAG.getIntPtrConstant(0, dl));
else
- return DAG.getNode(ISD::BITCAST, dl, VT, Op);
+ Res = DAG.getNode(ISD::BITCAST, dl, VT, Res);
+
+ Results.push_back(Res);
}
-SDValue VectorLegalizer::PromoteINT_TO_FP(SDValue Op) {
+void VectorLegalizer::PromoteINT_TO_FP(SDNode *Node,
+ SmallVectorImpl<SDValue> &Results) {
// INT_TO_FP operations may require the input operand be promoted even
// when the type is otherwise legal.
- MVT VT = Op.getOperand(0).getSimpleValueType();
- MVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT);
+ bool IsStrict = Node->isStrictFPOpcode();
+ MVT VT = Node->getOperand(IsStrict ? 1 : 0).getSimpleValueType();
+ MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT);
assert(NVT.getVectorNumElements() == VT.getVectorNumElements() &&
"Vectors have different number of elements!");
- SDLoc dl(Op);
- SmallVector<SDValue, 4> Operands(Op.getNumOperands());
+ SDLoc dl(Node);
+ SmallVector<SDValue, 4> Operands(Node->getNumOperands());
- unsigned Opc = Op.getOpcode() == ISD::UINT_TO_FP ? ISD::ZERO_EXTEND :
- ISD::SIGN_EXTEND;
- for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
- if (Op.getOperand(j).getValueType().isVector())
- Operands[j] = DAG.getNode(Opc, dl, NVT, Op.getOperand(j));
+ unsigned Opc = (Node->getOpcode() == ISD::UINT_TO_FP ||
+ Node->getOpcode() == ISD::STRICT_UINT_TO_FP)
+ ? ISD::ZERO_EXTEND
+ : ISD::SIGN_EXTEND;
+ for (unsigned j = 0; j != Node->getNumOperands(); ++j) {
+ if (Node->getOperand(j).getValueType().isVector())
+ Operands[j] = DAG.getNode(Opc, dl, NVT, Node->getOperand(j));
else
- Operands[j] = Op.getOperand(j);
+ Operands[j] = Node->getOperand(j);
+ }
+
+ if (IsStrict) {
+ SDValue Res = DAG.getNode(Node->getOpcode(), dl,
+ {Node->getValueType(0), MVT::Other}, Operands);
+ Results.push_back(Res);
+ Results.push_back(Res.getValue(1));
+ return;
}
- return DAG.getNode(Op.getOpcode(), dl, Op.getValueType(), Operands);
+ SDValue Res =
+ DAG.getNode(Node->getOpcode(), dl, Node->getValueType(0), Operands);
+ Results.push_back(Res);
}
// For FP_TO_INT we promote the result type to a vector type with wider
// elements and then truncate the result. This is different from the default
// PromoteVector which uses bitcast to promote thus assumning that the
// promoted vector type has the same overall size.
-SDValue VectorLegalizer::PromoteFP_TO_INT(SDValue Op) {
- MVT VT = Op.getSimpleValueType();
- MVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT);
+void VectorLegalizer::PromoteFP_TO_INT(SDNode *Node,
+ SmallVectorImpl<SDValue> &Results) {
+ MVT VT = Node->getSimpleValueType(0);
+ MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT);
+ bool IsStrict = Node->isStrictFPOpcode();
assert(NVT.getVectorNumElements() == VT.getVectorNumElements() &&
"Vectors have different number of elements!");
- unsigned NewOpc = Op->getOpcode();
+ unsigned NewOpc = Node->getOpcode();
// Change FP_TO_UINT to FP_TO_SINT if possible.
// TODO: Should we only do this if FP_TO_UINT itself isn't legal?
if (NewOpc == ISD::FP_TO_UINT &&
TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NVT))
NewOpc = ISD::FP_TO_SINT;
- SDLoc dl(Op);
- SDValue Promoted = DAG.getNode(NewOpc, dl, NVT, Op.getOperand(0));
+ if (NewOpc == ISD::STRICT_FP_TO_UINT &&
+ TLI.isOperationLegalOrCustom(ISD::STRICT_FP_TO_SINT, NVT))
+ NewOpc = ISD::STRICT_FP_TO_SINT;
+
+ SDLoc dl(Node);
+ SDValue Promoted, Chain;
+ if (IsStrict) {
+ Promoted = DAG.getNode(NewOpc, dl, {NVT, MVT::Other},
+ {Node->getOperand(0), Node->getOperand(1)});
+ Chain = Promoted.getValue(1);
+ } else
+ Promoted = DAG.getNode(NewOpc, dl, NVT, Node->getOperand(0));
// Assert that the converted value fits in the original type. If it doesn't
// (eg: because the value being converted is too big), then the result of the
// original operation was undefined anyway, so the assert is still correct.
- Promoted = DAG.getNode(Op->getOpcode() == ISD::FP_TO_UINT ? ISD::AssertZext
- : ISD::AssertSext,
- dl, NVT, Promoted,
+ if (Node->getOpcode() == ISD::FP_TO_UINT ||
+ Node->getOpcode() == ISD::STRICT_FP_TO_UINT)
+ NewOpc = ISD::AssertZext;
+ else
+ NewOpc = ISD::AssertSext;
+
+ Promoted = DAG.getNode(NewOpc, dl, NVT, Promoted,
DAG.getValueType(VT.getScalarType()));
- return DAG.getNode(ISD::TRUNCATE, dl, VT, Promoted);
+ Promoted = DAG.getNode(ISD::TRUNCATE, dl, VT, Promoted);
+ Results.push_back(Promoted);
+ if (IsStrict)
+ Results.push_back(Chain);
}
-SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
- LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
+std::pair<SDValue, SDValue> VectorLegalizer::ExpandLoad(SDNode *N) {
+ LoadSDNode *LD = cast<LoadSDNode>(N);
EVT SrcVT = LD->getMemoryVT();
EVT SrcEltVT = SrcVT.getScalarType();
@@ -629,7 +712,7 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
SDValue NewChain;
SDValue Value;
if (SrcVT.getVectorNumElements() > 1 && !SrcEltVT.isByteSized()) {
- SDLoc dl(Op);
+ SDLoc dl(N);
SmallVector<SDValue, 8> Vals;
SmallVector<SDValue, 8> LoadChains;
@@ -741,130 +824,157 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
}
NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
- Value = DAG.getBuildVector(Op.getNode()->getValueType(0), dl, Vals);
+ Value = DAG.getBuildVector(N->getValueType(0), dl, Vals);
} else {
- SDValue Scalarized = TLI.scalarizeVectorLoad(LD, DAG);
- // Skip past MERGE_VALUE node if known.
- if (Scalarized->getOpcode() == ISD::MERGE_VALUES) {
- NewChain = Scalarized.getOperand(1);
- Value = Scalarized.getOperand(0);
- } else {
- NewChain = Scalarized.getValue(1);
- Value = Scalarized.getValue(0);
- }
+ std::tie(Value, NewChain) = TLI.scalarizeVectorLoad(LD, DAG);
}
- AddLegalizedOperand(Op.getValue(0), Value);
- AddLegalizedOperand(Op.getValue(1), NewChain);
-
- return (Op.getResNo() ? NewChain : Value);
+ return std::make_pair(Value, NewChain);
}
-SDValue VectorLegalizer::ExpandStore(SDValue Op) {
- StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
+SDValue VectorLegalizer::ExpandStore(SDNode *N) {
+ StoreSDNode *ST = cast<StoreSDNode>(N);
SDValue TF = TLI.scalarizeVectorStore(ST, DAG);
- AddLegalizedOperand(Op, TF);
return TF;
}
-SDValue VectorLegalizer::Expand(SDValue Op) {
- switch (Op->getOpcode()) {
+void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
+ SDValue Tmp;
+ switch (Node->getOpcode()) {
+ case ISD::MERGE_VALUES:
+ for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
+ Results.push_back(Node->getOperand(i));
+ return;
case ISD::SIGN_EXTEND_INREG:
- return ExpandSEXTINREG(Op);
+ Results.push_back(ExpandSEXTINREG(Node));
+ return;
case ISD::ANY_EXTEND_VECTOR_INREG:
- return ExpandANY_EXTEND_VECTOR_INREG(Op);
+ Results.push_back(ExpandANY_EXTEND_VECTOR_INREG(Node));
+ return;
case ISD::SIGN_EXTEND_VECTOR_INREG:
- return ExpandSIGN_EXTEND_VECTOR_INREG(Op);
+ Results.push_back(ExpandSIGN_EXTEND_VECTOR_INREG(Node));
+ return;
case ISD::ZERO_EXTEND_VECTOR_INREG:
- return ExpandZERO_EXTEND_VECTOR_INREG(Op);
+ Results.push_back(ExpandZERO_EXTEND_VECTOR_INREG(Node));
+ return;
case ISD::BSWAP:
- return ExpandBSWAP(Op);
+ Results.push_back(ExpandBSWAP(Node));
+ return;
case ISD::VSELECT:
- return ExpandVSELECT(Op);
+ Results.push_back(ExpandVSELECT(Node));
+ return;
case ISD::SELECT:
- return ExpandSELECT(Op);
+ Results.push_back(ExpandSELECT(Node));
+ return;
case ISD::FP_TO_UINT:
- return ExpandFP_TO_UINT(Op);
+ ExpandFP_TO_UINT(Node, Results);
+ return;
case ISD::UINT_TO_FP:
- return ExpandUINT_TO_FLOAT(Op);
+ ExpandUINT_TO_FLOAT(Node, Results);
+ return;
case ISD::FNEG:
- return ExpandFNEG(Op);
+ Results.push_back(ExpandFNEG(Node));
+ return;
case ISD::FSUB:
- return ExpandFSUB(Op);
+ ExpandFSUB(Node, Results);
+ return;
case ISD::SETCC:
- return UnrollVSETCC(Op);
+ Results.push_back(UnrollVSETCC(Node));
+ return;
case ISD::ABS:
- return ExpandABS(Op);
+ if (TLI.expandABS(Node, Tmp, DAG)) {
+ Results.push_back(Tmp);
+ return;
+ }
+ break;
case ISD::BITREVERSE:
- return ExpandBITREVERSE(Op);
+ ExpandBITREVERSE(Node, Results);
+ return;
case ISD::CTPOP:
- return ExpandCTPOP(Op);
+ if (TLI.expandCTPOP(Node, Tmp, DAG)) {
+ Results.push_back(Tmp);
+ return;
+ }
+ break;
case ISD::CTLZ:
case ISD::CTLZ_ZERO_UNDEF:
- return ExpandCTLZ(Op);
+ if (TLI.expandCTLZ(Node, Tmp, DAG)) {
+ Results.push_back(Tmp);
+ return;
+ }
+ break;
case ISD::CTTZ:
case ISD::CTTZ_ZERO_UNDEF:
- return ExpandCTTZ(Op);
+ if (TLI.expandCTTZ(Node, Tmp, DAG)) {
+ Results.push_back(Tmp);
+ return;
+ }
+ break;
case ISD::FSHL:
case ISD::FSHR:
- return ExpandFunnelShift(Op);
+ if (TLI.expandFunnelShift(Node, Tmp, DAG)) {
+ Results.push_back(Tmp);
+ return;
+ }
+ break;
case ISD::ROTL:
case ISD::ROTR:
- return ExpandROT(Op);
+ if (TLI.expandROT(Node, Tmp, DAG)) {
+ Results.push_back(Tmp);
+ return;
+ }
+ break;
case ISD::FMINNUM:
case ISD::FMAXNUM:
- return ExpandFMINNUM_FMAXNUM(Op);
+ if (SDValue Expanded = TLI.expandFMINNUM_FMAXNUM(Node, DAG)) {
+ Results.push_back(Expanded);
+ return;
+ }
+ break;
case ISD::UADDO:
case ISD::USUBO:
- return ExpandUADDSUBO(Op);
+ ExpandUADDSUBO(Node, Results);
+ return;
case ISD::SADDO:
case ISD::SSUBO:
- return ExpandSADDSUBO(Op);
+ ExpandSADDSUBO(Node, Results);
+ return;
case ISD::UMULO:
case ISD::SMULO:
- return ExpandMULO(Op);
+ ExpandMULO(Node, Results);
+ return;
case ISD::USUBSAT:
case ISD::SSUBSAT:
case ISD::UADDSAT:
case ISD::SADDSAT:
- return ExpandAddSubSat(Op);
+ if (SDValue Expanded = TLI.expandAddSubSat(Node, DAG)) {
+ Results.push_back(Expanded);
+ return;
+ }
+ break;
case ISD::SMULFIX:
case ISD::UMULFIX:
- return ExpandFixedPointMul(Op);
+ if (SDValue Expanded = TLI.expandFixedPointMul(Node, DAG)) {
+ Results.push_back(Expanded);
+ return;
+ }
+ break;
case ISD::SMULFIXSAT:
case ISD::UMULFIXSAT:
// FIXME: We do not expand SMULFIXSAT/UMULFIXSAT here yet, not sure exactly
// why. Maybe it results in worse codegen compared to the unroll for some
// targets? This should probably be investigated. And if we still prefer to
// unroll an explanation could be helpful.
- return DAG.UnrollVectorOp(Op.getNode());
- case ISD::STRICT_FADD:
- case ISD::STRICT_FSUB:
- case ISD::STRICT_FMUL:
- case ISD::STRICT_FDIV:
- case ISD::STRICT_FREM:
- case ISD::STRICT_FSQRT:
- case ISD::STRICT_FMA:
- case ISD::STRICT_FPOW:
- case ISD::STRICT_FPOWI:
- case ISD::STRICT_FSIN:
- case ISD::STRICT_FCOS:
- case ISD::STRICT_FEXP:
- case ISD::STRICT_FEXP2:
- case ISD::STRICT_FLOG:
- case ISD::STRICT_FLOG10:
- case ISD::STRICT_FLOG2:
- case ISD::STRICT_FRINT:
- case ISD::STRICT_FNEARBYINT:
- case ISD::STRICT_FMAXNUM:
- case ISD::STRICT_FMINNUM:
- case ISD::STRICT_FCEIL:
- case ISD::STRICT_FFLOOR:
- case ISD::STRICT_FROUND:
- case ISD::STRICT_FTRUNC:
- case ISD::STRICT_FP_TO_SINT:
- case ISD::STRICT_FP_TO_UINT:
- return ExpandStrictFPOp(Op);
+ break;
+ case ISD::SDIVFIX:
+ case ISD::UDIVFIX:
+ Results.push_back(ExpandFixedPointDiv(Node));
+ return;
+#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
+ case ISD::STRICT_##DAGN:
+#include "llvm/IR/ConstrainedOps.def"
+ ExpandStrictFPOp(Node, Results);
+ return;
case ISD::VECREDUCE_ADD:
case ISD::VECREDUCE_MUL:
case ISD::VECREDUCE_AND:
@@ -878,22 +988,23 @@ SDValue VectorLegalizer::Expand(SDValue Op) {
case ISD::VECREDUCE_FMUL:
case ISD::VECREDUCE_FMAX:
case ISD::VECREDUCE_FMIN:
- return TLI.expandVecReduce(Op.getNode(), DAG);
- default:
- return DAG.UnrollVectorOp(Op.getNode());
+ Results.push_back(TLI.expandVecReduce(Node, DAG));
+ return;
}
+
+ Results.push_back(DAG.UnrollVectorOp(Node));
}
-SDValue VectorLegalizer::ExpandSELECT(SDValue Op) {
+SDValue VectorLegalizer::ExpandSELECT(SDNode *Node) {
// Lower a select instruction where the condition is a scalar and the
// operands are vectors. Lower this select to VSELECT and implement it
// using XOR AND OR. The selector bit is broadcasted.
- EVT VT = Op.getValueType();
- SDLoc DL(Op);
+ EVT VT = Node->getValueType(0);
+ SDLoc DL(Node);
- SDValue Mask = Op.getOperand(0);
- SDValue Op1 = Op.getOperand(1);
- SDValue Op2 = Op.getOperand(2);
+ SDValue Mask = Node->getOperand(0);
+ SDValue Op1 = Node->getOperand(1);
+ SDValue Op2 = Node->getOperand(2);
assert(VT.isVector() && !Mask.getValueType().isVector()
&& Op1.getValueType() == Op2.getValueType() && "Invalid type");
@@ -907,7 +1018,7 @@ SDValue VectorLegalizer::ExpandSELECT(SDValue Op) {
TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand ||
TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand ||
TLI.getOperationAction(ISD::BUILD_VECTOR, VT) == TargetLowering::Expand)
- return DAG.UnrollVectorOp(Op.getNode());
+ return DAG.UnrollVectorOp(Node);
// Generate a mask operand.
EVT MaskTy = VT.changeVectorElementTypeToInteger();
@@ -936,36 +1047,35 @@ SDValue VectorLegalizer::ExpandSELECT(SDValue Op) {
Op1 = DAG.getNode(ISD::AND, DL, MaskTy, Op1, Mask);
Op2 = DAG.getNode(ISD::AND, DL, MaskTy, Op2, NotMask);
SDValue Val = DAG.getNode(ISD::OR, DL, MaskTy, Op1, Op2);
- return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Val);
+ return DAG.getNode(ISD::BITCAST, DL, Node->getValueType(0), Val);
}
-SDValue VectorLegalizer::ExpandSEXTINREG(SDValue Op) {
- EVT VT = Op.getValueType();
+SDValue VectorLegalizer::ExpandSEXTINREG(SDNode *Node) {
+ EVT VT = Node->getValueType(0);
// Make sure that the SRA and SHL instructions are available.
if (TLI.getOperationAction(ISD::SRA, VT) == TargetLowering::Expand ||
TLI.getOperationAction(ISD::SHL, VT) == TargetLowering::Expand)
- return DAG.UnrollVectorOp(Op.getNode());
+ return DAG.UnrollVectorOp(Node);
- SDLoc DL(Op);
- EVT OrigTy = cast<VTSDNode>(Op->getOperand(1))->getVT();
+ SDLoc DL(Node);
+ EVT OrigTy = cast<VTSDNode>(Node->getOperand(1))->getVT();
unsigned BW = VT.getScalarSizeInBits();
unsigned OrigBW = OrigTy.getScalarSizeInBits();
SDValue ShiftSz = DAG.getConstant(BW - OrigBW, DL, VT);
- Op = Op.getOperand(0);
- Op = DAG.getNode(ISD::SHL, DL, VT, Op, ShiftSz);
+ SDValue Op = DAG.getNode(ISD::SHL, DL, VT, Node->getOperand(0), ShiftSz);
return DAG.getNode(ISD::SRA, DL, VT, Op, ShiftSz);
}
// Generically expand a vector anyext in register to a shuffle of the relevant
// lanes into the appropriate locations, with other lanes left undef.
-SDValue VectorLegalizer::ExpandANY_EXTEND_VECTOR_INREG(SDValue Op) {
- SDLoc DL(Op);
- EVT VT = Op.getValueType();
+SDValue VectorLegalizer::ExpandANY_EXTEND_VECTOR_INREG(SDNode *Node) {
+ SDLoc DL(Node);
+ EVT VT = Node->getValueType(0);
int NumElements = VT.getVectorNumElements();
- SDValue Src = Op.getOperand(0);
+ SDValue Src = Node->getOperand(0);
EVT SrcVT = Src.getValueType();
int NumSrcElements = SrcVT.getVectorNumElements();
@@ -997,15 +1107,15 @@ SDValue VectorLegalizer::ExpandANY_EXTEND_VECTOR_INREG(SDValue Op) {
DAG.getVectorShuffle(SrcVT, DL, Src, DAG.getUNDEF(SrcVT), ShuffleMask));
}
-SDValue VectorLegalizer::ExpandSIGN_EXTEND_VECTOR_INREG(SDValue Op) {
- SDLoc DL(Op);
- EVT VT = Op.getValueType();
- SDValue Src = Op.getOperand(0);
+SDValue VectorLegalizer::ExpandSIGN_EXTEND_VECTOR_INREG(SDNode *Node) {
+ SDLoc DL(Node);
+ EVT VT = Node->getValueType(0);
+ SDValue Src = Node->getOperand(0);
EVT SrcVT = Src.getValueType();
// First build an any-extend node which can be legalized above when we
// recurse through it.
- Op = DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG, DL, VT, Src);
+ SDValue Op = DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG, DL, VT, Src);
// Now we need sign extend. Do this by shifting the elements. Even if these
// aren't legal operations, they have a better chance of being legalized
@@ -1021,11 +1131,11 @@ SDValue VectorLegalizer::ExpandSIGN_EXTEND_VECTOR_INREG(SDValue Op) {
// Generically expand a vector zext in register to a shuffle of the relevant
// lanes into the appropriate locations, a blend of zero into the high bits,
// and a bitcast to the wider element type.
-SDValue VectorLegalizer::ExpandZERO_EXTEND_VECTOR_INREG(SDValue Op) {
- SDLoc DL(Op);
- EVT VT = Op.getValueType();
+SDValue VectorLegalizer::ExpandZERO_EXTEND_VECTOR_INREG(SDNode *Node) {
+ SDLoc DL(Node);
+ EVT VT = Node->getValueType(0);
int NumElements = VT.getVectorNumElements();
- SDValue Src = Op.getOperand(0);
+ SDValue Src = Node->getOperand(0);
EVT SrcVT = Src.getValueType();
int NumSrcElements = SrcVT.getVectorNumElements();
@@ -1068,8 +1178,8 @@ static void createBSWAPShuffleMask(EVT VT, SmallVectorImpl<int> &ShuffleMask) {
ShuffleMask.push_back((I * ScalarSizeInBytes) + J);
}
-SDValue VectorLegalizer::ExpandBSWAP(SDValue Op) {
- EVT VT = Op.getValueType();
+SDValue VectorLegalizer::ExpandBSWAP(SDNode *Node) {
+ EVT VT = Node->getValueType(0);
// Generate a byte wise shuffle mask for the BSWAP.
SmallVector<int, 16> ShuffleMask;
@@ -1078,20 +1188,24 @@ SDValue VectorLegalizer::ExpandBSWAP(SDValue Op) {
// Only emit a shuffle if the mask is legal.
if (!TLI.isShuffleMaskLegal(ShuffleMask, ByteVT))
- return DAG.UnrollVectorOp(Op.getNode());
+ return DAG.UnrollVectorOp(Node);
- SDLoc DL(Op);
- Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Op.getOperand(0));
+ SDLoc DL(Node);
+ SDValue Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Node->getOperand(0));
Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT), ShuffleMask);
return DAG.getNode(ISD::BITCAST, DL, VT, Op);
}
-SDValue VectorLegalizer::ExpandBITREVERSE(SDValue Op) {
- EVT VT = Op.getValueType();
+void VectorLegalizer::ExpandBITREVERSE(SDNode *Node,
+ SmallVectorImpl<SDValue> &Results) {
+ EVT VT = Node->getValueType(0);
// If we have the scalar operation, it's probably cheaper to unroll it.
- if (TLI.isOperationLegalOrCustom(ISD::BITREVERSE, VT.getScalarType()))
- return DAG.UnrollVectorOp(Op.getNode());
+ if (TLI.isOperationLegalOrCustom(ISD::BITREVERSE, VT.getScalarType())) {
+ SDValue Tmp = DAG.UnrollVectorOp(Node);
+ Results.push_back(Tmp);
+ return;
+ }
// If the vector element width is a whole number of bytes, test if its legal
// to BSWAP shuffle the bytes and then perform the BITREVERSE on the byte
@@ -1108,35 +1222,39 @@ SDValue VectorLegalizer::ExpandBITREVERSE(SDValue Op) {
TLI.isOperationLegalOrCustom(ISD::SRL, ByteVT) &&
TLI.isOperationLegalOrCustomOrPromote(ISD::AND, ByteVT) &&
TLI.isOperationLegalOrCustomOrPromote(ISD::OR, ByteVT)))) {
- SDLoc DL(Op);
- Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Op.getOperand(0));
+ SDLoc DL(Node);
+ SDValue Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Node->getOperand(0));
Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT),
BSWAPMask);
Op = DAG.getNode(ISD::BITREVERSE, DL, ByteVT, Op);
- return DAG.getNode(ISD::BITCAST, DL, VT, Op);
+ Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
+ Results.push_back(Op);
+ return;
}
}
// If we have the appropriate vector bit operations, it is better to use them
// than unrolling and expanding each component.
- if (!TLI.isOperationLegalOrCustom(ISD::SHL, VT) ||
- !TLI.isOperationLegalOrCustom(ISD::SRL, VT) ||
- !TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT) ||
- !TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT))
- return DAG.UnrollVectorOp(Op.getNode());
-
- // Let LegalizeDAG handle this later.
- return Op;
+ if (TLI.isOperationLegalOrCustom(ISD::SHL, VT) &&
+ TLI.isOperationLegalOrCustom(ISD::SRL, VT) &&
+ TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT) &&
+ TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT))
+ // Let LegalizeDAG handle this later.
+ return;
+
+ // Otherwise unroll.
+ SDValue Tmp = DAG.UnrollVectorOp(Node);
+ Results.push_back(Tmp);
}
-SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) {
+SDValue VectorLegalizer::ExpandVSELECT(SDNode *Node) {
// Implement VSELECT in terms of XOR, AND, OR
// on platforms which do not support blend natively.
- SDLoc DL(Op);
+ SDLoc DL(Node);
- SDValue Mask = Op.getOperand(0);
- SDValue Op1 = Op.getOperand(1);
- SDValue Op2 = Op.getOperand(2);
+ SDValue Mask = Node->getOperand(0);
+ SDValue Op1 = Node->getOperand(1);
+ SDValue Op2 = Node->getOperand(2);
EVT VT = Mask.getValueType();
@@ -1152,13 +1270,13 @@ SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) {
TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand ||
TLI.getBooleanContents(Op1.getValueType()) !=
TargetLowering::ZeroOrNegativeOneBooleanContent)
- return DAG.UnrollVectorOp(Op.getNode());
+ return DAG.UnrollVectorOp(Node);
// If the mask and the type are different sizes, unroll the vector op. This
// can occur when getSetCCResultType returns something that is different in
// size from the operand types. For example, v4i8 = select v4i32, v4i8, v4i8.
if (VT.getSizeInBits() != Op1.getValueSizeInBits())
- return DAG.UnrollVectorOp(Op.getNode());
+ return DAG.UnrollVectorOp(Node);
// Bitcast the operands to be the same type as the mask.
// This is needed when we select between FP types because
@@ -1173,46 +1291,61 @@ SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) {
Op1 = DAG.getNode(ISD::AND, DL, VT, Op1, Mask);
Op2 = DAG.getNode(ISD::AND, DL, VT, Op2, NotMask);
SDValue Val = DAG.getNode(ISD::OR, DL, VT, Op1, Op2);
- return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Val);
-}
-
-SDValue VectorLegalizer::ExpandABS(SDValue Op) {
- // Attempt to expand using TargetLowering.
- SDValue Result;
- if (TLI.expandABS(Op.getNode(), Result, DAG))
- return Result;
-
- // Otherwise go ahead and unroll.
- return DAG.UnrollVectorOp(Op.getNode());
+ return DAG.getNode(ISD::BITCAST, DL, Node->getValueType(0), Val);
}
-SDValue VectorLegalizer::ExpandFP_TO_UINT(SDValue Op) {
+void VectorLegalizer::ExpandFP_TO_UINT(SDNode *Node,
+ SmallVectorImpl<SDValue> &Results) {
// Attempt to expand using TargetLowering.
SDValue Result, Chain;
- if (TLI.expandFP_TO_UINT(Op.getNode(), Result, Chain, DAG)) {
- if (Op.getNode()->isStrictFPOpcode())
- // Relink the chain
- DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Chain);
- return Result;
+ if (TLI.expandFP_TO_UINT(Node, Result, Chain, DAG)) {
+ Results.push_back(Result);
+ if (Node->isStrictFPOpcode())
+ Results.push_back(Chain);
+ return;
}
// Otherwise go ahead and unroll.
- return DAG.UnrollVectorOp(Op.getNode());
+ if (Node->isStrictFPOpcode()) {
+ UnrollStrictFPOp(Node, Results);
+ return;
+ }
+
+ Results.push_back(DAG.UnrollVectorOp(Node));
}
-SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) {
- EVT VT = Op.getOperand(0).getValueType();
- SDLoc DL(Op);
+void VectorLegalizer::ExpandUINT_TO_FLOAT(SDNode *Node,
+ SmallVectorImpl<SDValue> &Results) {
+ bool IsStrict = Node->isStrictFPOpcode();
+ unsigned OpNo = IsStrict ? 1 : 0;
+ SDValue Src = Node->getOperand(OpNo);
+ EVT VT = Src.getValueType();
+ SDLoc DL(Node);
// Attempt to expand using TargetLowering.
SDValue Result;
- if (TLI.expandUINT_TO_FP(Op.getNode(), Result, DAG))
- return Result;
+ SDValue Chain;
+ if (TLI.expandUINT_TO_FP(Node, Result, Chain, DAG)) {
+ Results.push_back(Result);
+ if (IsStrict)
+ Results.push_back(Chain);
+ return;
+ }
// Make sure that the SINT_TO_FP and SRL instructions are available.
- if (TLI.getOperationAction(ISD::SINT_TO_FP, VT) == TargetLowering::Expand ||
- TLI.getOperationAction(ISD::SRL, VT) == TargetLowering::Expand)
- return DAG.UnrollVectorOp(Op.getNode());
+ if (((!IsStrict && TLI.getOperationAction(ISD::SINT_TO_FP, VT) ==
+ TargetLowering::Expand) ||
+ (IsStrict && TLI.getOperationAction(ISD::STRICT_SINT_TO_FP, VT) ==
+ TargetLowering::Expand)) ||
+ TLI.getOperationAction(ISD::SRL, VT) == TargetLowering::Expand) {
+ if (IsStrict) {
+ UnrollStrictFPOp(Node, Results);
+ return;
+ }
+
+ Results.push_back(DAG.UnrollVectorOp(Node));
+ return;
+ }
unsigned BW = VT.getScalarSizeInBits();
assert((BW == 64 || BW == 32) &&
@@ -1227,153 +1360,141 @@ SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) {
SDValue HalfWordMask = DAG.getConstant(HWMask, DL, VT);
// Two to the power of half-word-size.
- SDValue TWOHW = DAG.getConstantFP(1ULL << (BW / 2), DL, Op.getValueType());
+ SDValue TWOHW =
+ DAG.getConstantFP(1ULL << (BW / 2), DL, Node->getValueType(0));
// Clear upper part of LO, lower HI
- SDValue HI = DAG.getNode(ISD::SRL, DL, VT, Op.getOperand(0), HalfWord);
- SDValue LO = DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), HalfWordMask);
+ SDValue HI = DAG.getNode(ISD::SRL, DL, VT, Src, HalfWord);
+ SDValue LO = DAG.getNode(ISD::AND, DL, VT, Src, HalfWordMask);
+
+ if (IsStrict) {
+ // Convert hi and lo to floats
+ // Convert the hi part back to the upper values
+ // TODO: Can any fast-math-flags be set on these nodes?
+ SDValue fHI = DAG.getNode(ISD::STRICT_SINT_TO_FP, DL,
+ {Node->getValueType(0), MVT::Other},
+ {Node->getOperand(0), HI});
+ fHI = DAG.getNode(ISD::STRICT_FMUL, DL, {Node->getValueType(0), MVT::Other},
+ {fHI.getValue(1), fHI, TWOHW});
+ SDValue fLO = DAG.getNode(ISD::STRICT_SINT_TO_FP, DL,
+ {Node->getValueType(0), MVT::Other},
+ {Node->getOperand(0), LO});
+
+ SDValue TF = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, fHI.getValue(1),
+ fLO.getValue(1));
+
+ // Add the two halves
+ SDValue Result =
+ DAG.getNode(ISD::STRICT_FADD, DL, {Node->getValueType(0), MVT::Other},
+ {TF, fHI, fLO});
+
+ Results.push_back(Result);
+ Results.push_back(Result.getValue(1));
+ return;
+ }
// Convert hi and lo to floats
// Convert the hi part back to the upper values
// TODO: Can any fast-math-flags be set on these nodes?
- SDValue fHI = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), HI);
- fHI = DAG.getNode(ISD::FMUL, DL, Op.getValueType(), fHI, TWOHW);
- SDValue fLO = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), LO);
+ SDValue fHI = DAG.getNode(ISD::SINT_TO_FP, DL, Node->getValueType(0), HI);
+ fHI = DAG.getNode(ISD::FMUL, DL, Node->getValueType(0), fHI, TWOHW);
+ SDValue fLO = DAG.getNode(ISD::SINT_TO_FP, DL, Node->getValueType(0), LO);
// Add the two halves
- return DAG.getNode(ISD::FADD, DL, Op.getValueType(), fHI, fLO);
+ Results.push_back(
+ DAG.getNode(ISD::FADD, DL, Node->getValueType(0), fHI, fLO));
}
-SDValue VectorLegalizer::ExpandFNEG(SDValue Op) {
- if (TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) {
- SDLoc DL(Op);
- SDValue Zero = DAG.getConstantFP(-0.0, DL, Op.getValueType());
+SDValue VectorLegalizer::ExpandFNEG(SDNode *Node) {
+ if (TLI.isOperationLegalOrCustom(ISD::FSUB, Node->getValueType(0))) {
+ SDLoc DL(Node);
+ SDValue Zero = DAG.getConstantFP(-0.0, DL, Node->getValueType(0));
// TODO: If FNEG had fast-math-flags, they'd get propagated to this FSUB.
- return DAG.getNode(ISD::FSUB, DL, Op.getValueType(),
- Zero, Op.getOperand(0));
+ return DAG.getNode(ISD::FSUB, DL, Node->getValueType(0), Zero,
+ Node->getOperand(0));
}
- return DAG.UnrollVectorOp(Op.getNode());
+ return DAG.UnrollVectorOp(Node);
}
-SDValue VectorLegalizer::ExpandFSUB(SDValue Op) {
+void VectorLegalizer::ExpandFSUB(SDNode *Node,
+ SmallVectorImpl<SDValue> &Results) {
// For floating-point values, (a-b) is the same as a+(-b). If FNEG is legal,
// we can defer this to operation legalization where it will be lowered as
// a+(-b).
- EVT VT = Op.getValueType();
+ EVT VT = Node->getValueType(0);
if (TLI.isOperationLegalOrCustom(ISD::FNEG, VT) &&
TLI.isOperationLegalOrCustom(ISD::FADD, VT))
- return Op; // Defer to LegalizeDAG
-
- return DAG.UnrollVectorOp(Op.getNode());
-}
+ return; // Defer to LegalizeDAG
-SDValue VectorLegalizer::ExpandCTPOP(SDValue Op) {
- SDValue Result;
- if (TLI.expandCTPOP(Op.getNode(), Result, DAG))
- return Result;
-
- return DAG.UnrollVectorOp(Op.getNode());
-}
-
-SDValue VectorLegalizer::ExpandCTLZ(SDValue Op) {
- SDValue Result;
- if (TLI.expandCTLZ(Op.getNode(), Result, DAG))
- return Result;
-
- return DAG.UnrollVectorOp(Op.getNode());
+ SDValue Tmp = DAG.UnrollVectorOp(Node);
+ Results.push_back(Tmp);
}
-SDValue VectorLegalizer::ExpandCTTZ(SDValue Op) {
- SDValue Result;
- if (TLI.expandCTTZ(Op.getNode(), Result, DAG))
- return Result;
-
- return DAG.UnrollVectorOp(Op.getNode());
-}
-
-SDValue VectorLegalizer::ExpandFunnelShift(SDValue Op) {
- SDValue Result;
- if (TLI.expandFunnelShift(Op.getNode(), Result, DAG))
- return Result;
-
- return DAG.UnrollVectorOp(Op.getNode());
-}
-
-SDValue VectorLegalizer::ExpandROT(SDValue Op) {
- SDValue Result;
- if (TLI.expandROT(Op.getNode(), Result, DAG))
- return Result;
-
- return DAG.UnrollVectorOp(Op.getNode());
-}
-
-SDValue VectorLegalizer::ExpandFMINNUM_FMAXNUM(SDValue Op) {
- if (SDValue Expanded = TLI.expandFMINNUM_FMAXNUM(Op.getNode(), DAG))
- return Expanded;
- return DAG.UnrollVectorOp(Op.getNode());
-}
-
-SDValue VectorLegalizer::ExpandUADDSUBO(SDValue Op) {
+void VectorLegalizer::ExpandUADDSUBO(SDNode *Node,
+ SmallVectorImpl<SDValue> &Results) {
SDValue Result, Overflow;
- TLI.expandUADDSUBO(Op.getNode(), Result, Overflow, DAG);
-
- if (Op.getResNo() == 0) {
- AddLegalizedOperand(Op.getValue(1), LegalizeOp(Overflow));
- return Result;
- } else {
- AddLegalizedOperand(Op.getValue(0), LegalizeOp(Result));
- return Overflow;
- }
+ TLI.expandUADDSUBO(Node, Result, Overflow, DAG);
+ Results.push_back(Result);
+ Results.push_back(Overflow);
}
-SDValue VectorLegalizer::ExpandSADDSUBO(SDValue Op) {
+void VectorLegalizer::ExpandSADDSUBO(SDNode *Node,
+ SmallVectorImpl<SDValue> &Results) {
SDValue Result, Overflow;
- TLI.expandSADDSUBO(Op.getNode(), Result, Overflow, DAG);
-
- if (Op.getResNo() == 0) {
- AddLegalizedOperand(Op.getValue(1), LegalizeOp(Overflow));
- return Result;
- } else {
- AddLegalizedOperand(Op.getValue(0), LegalizeOp(Result));
- return Overflow;
- }
+ TLI.expandSADDSUBO(Node, Result, Overflow, DAG);
+ Results.push_back(Result);
+ Results.push_back(Overflow);
}
-SDValue VectorLegalizer::ExpandMULO(SDValue Op) {
+void VectorLegalizer::ExpandMULO(SDNode *Node,
+ SmallVectorImpl<SDValue> &Results) {
SDValue Result, Overflow;
- if (!TLI.expandMULO(Op.getNode(), Result, Overflow, DAG))
- std::tie(Result, Overflow) = DAG.UnrollVectorOverflowOp(Op.getNode());
+ if (!TLI.expandMULO(Node, Result, Overflow, DAG))
+ std::tie(Result, Overflow) = DAG.UnrollVectorOverflowOp(Node);
- if (Op.getResNo() == 0) {
- AddLegalizedOperand(Op.getValue(1), LegalizeOp(Overflow));
- return Result;
- } else {
- AddLegalizedOperand(Op.getValue(0), LegalizeOp(Result));
- return Overflow;
- }
+ Results.push_back(Result);
+ Results.push_back(Overflow);
}
-SDValue VectorLegalizer::ExpandAddSubSat(SDValue Op) {
- if (SDValue Expanded = TLI.expandAddSubSat(Op.getNode(), DAG))
+SDValue VectorLegalizer::ExpandFixedPointDiv(SDNode *Node) {
+ SDNode *N = Node;
+ if (SDValue Expanded = TLI.expandFixedPointDiv(N->getOpcode(), SDLoc(N),
+ N->getOperand(0), N->getOperand(1), N->getConstantOperandVal(2), DAG))
return Expanded;
- return DAG.UnrollVectorOp(Op.getNode());
+ return DAG.UnrollVectorOp(N);
}
-SDValue VectorLegalizer::ExpandFixedPointMul(SDValue Op) {
- if (SDValue Expanded = TLI.expandFixedPointMul(Op.getNode(), DAG))
- return Expanded;
- return DAG.UnrollVectorOp(Op.getNode());
+void VectorLegalizer::ExpandStrictFPOp(SDNode *Node,
+ SmallVectorImpl<SDValue> &Results) {
+ if (Node->getOpcode() == ISD::STRICT_UINT_TO_FP) {
+ ExpandUINT_TO_FLOAT(Node, Results);
+ return;
+ }
+ if (Node->getOpcode() == ISD::STRICT_FP_TO_UINT) {
+ ExpandFP_TO_UINT(Node, Results);
+ return;
+ }
+
+ UnrollStrictFPOp(Node, Results);
}
-SDValue VectorLegalizer::ExpandStrictFPOp(SDValue Op) {
- EVT VT = Op.getValueType();
+void VectorLegalizer::UnrollStrictFPOp(SDNode *Node,
+ SmallVectorImpl<SDValue> &Results) {
+ EVT VT = Node->getValueType(0);
EVT EltVT = VT.getVectorElementType();
unsigned NumElems = VT.getVectorNumElements();
- unsigned NumOpers = Op.getNumOperands();
+ unsigned NumOpers = Node->getNumOperands();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- EVT ValueVTs[] = {EltVT, MVT::Other};
- SDValue Chain = Op.getOperand(0);
- SDLoc dl(Op);
+
+ EVT TmpEltVT = EltVT;
+ if (Node->getOpcode() == ISD::STRICT_FSETCC ||
+ Node->getOpcode() == ISD::STRICT_FSETCCS)
+ TmpEltVT = TLI.getSetCCResultType(DAG.getDataLayout(),
+ *DAG.getContext(), TmpEltVT);
+
+ EVT ValueVTs[] = {TmpEltVT, MVT::Other};
+ SDValue Chain = Node->getOperand(0);
+ SDLoc dl(Node);
SmallVector<SDValue, 32> OpValues;
SmallVector<SDValue, 32> OpChains;
@@ -1387,7 +1508,7 @@ SDValue VectorLegalizer::ExpandStrictFPOp(SDValue Op) {
// Now process the remaining operands.
for (unsigned j = 1; j < NumOpers; ++j) {
- SDValue Oper = Op.getOperand(j);
+ SDValue Oper = Node->getOperand(j);
EVT OperVT = Oper.getValueType();
if (OperVT.isVector())
@@ -1397,28 +1518,37 @@ SDValue VectorLegalizer::ExpandStrictFPOp(SDValue Op) {
Opers.push_back(Oper);
}
- SDValue ScalarOp = DAG.getNode(Op->getOpcode(), dl, ValueVTs, Opers);
+ SDValue ScalarOp = DAG.getNode(Node->getOpcode(), dl, ValueVTs, Opers);
+ SDValue ScalarResult = ScalarOp.getValue(0);
+ SDValue ScalarChain = ScalarOp.getValue(1);
+
+ if (Node->getOpcode() == ISD::STRICT_FSETCC ||
+ Node->getOpcode() == ISD::STRICT_FSETCCS)
+ ScalarResult = DAG.getSelect(dl, EltVT, ScalarResult,
+ DAG.getConstant(APInt::getAllOnesValue
+ (EltVT.getSizeInBits()), dl, EltVT),
+ DAG.getConstant(0, dl, EltVT));
- OpValues.push_back(ScalarOp.getValue(0));
- OpChains.push_back(ScalarOp.getValue(1));
+ OpValues.push_back(ScalarResult);
+ OpChains.push_back(ScalarChain);
}
SDValue Result = DAG.getBuildVector(VT, dl, OpValues);
SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OpChains);
- AddLegalizedOperand(Op.getValue(0), Result);
- AddLegalizedOperand(Op.getValue(1), NewChain);
-
- return Op.getResNo() ? NewChain : Result;
+ Results.push_back(Result);
+ Results.push_back(NewChain);
}
-SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) {
- EVT VT = Op.getValueType();
+SDValue VectorLegalizer::UnrollVSETCC(SDNode *Node) {
+ EVT VT = Node->getValueType(0);
unsigned NumElems = VT.getVectorNumElements();
EVT EltVT = VT.getVectorElementType();
- SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1), CC = Op.getOperand(2);
+ SDValue LHS = Node->getOperand(0);
+ SDValue RHS = Node->getOperand(1);
+ SDValue CC = Node->getOperand(2);
EVT TmpEltVT = LHS.getValueType().getVectorElementType();
- SDLoc dl(Op);
+ SDLoc dl(Node);
SmallVector<SDValue, 8> Ops(NumElems);
for (unsigned i = 0; i < NumElems; ++i) {
SDValue LHSElem = DAG.getNode(
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 3763e886cef2..974914d00d05 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -23,6 +23,7 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/TypeSize.h"
using namespace llvm;
#define DEBUG_TYPE "legalize-types"
@@ -50,7 +51,6 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::BITCAST: R = ScalarizeVecRes_BITCAST(N); break;
case ISD::BUILD_VECTOR: R = ScalarizeVecRes_BUILD_VECTOR(N); break;
case ISD::EXTRACT_SUBVECTOR: R = ScalarizeVecRes_EXTRACT_SUBVECTOR(N); break;
- case ISD::STRICT_FP_ROUND: R = ScalarizeVecRes_STRICT_FP_ROUND(N); break;
case ISD::FP_ROUND: R = ScalarizeVecRes_FP_ROUND(N); break;
case ISD::FPOWI: R = ScalarizeVecRes_FPOWI(N); break;
case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break;
@@ -146,35 +146,13 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FMA:
R = ScalarizeVecRes_TernaryOp(N);
break;
- case ISD::STRICT_FADD:
- case ISD::STRICT_FSUB:
- case ISD::STRICT_FMUL:
- case ISD::STRICT_FDIV:
- case ISD::STRICT_FREM:
- case ISD::STRICT_FSQRT:
- case ISD::STRICT_FMA:
- case ISD::STRICT_FPOW:
- case ISD::STRICT_FPOWI:
- case ISD::STRICT_FSIN:
- case ISD::STRICT_FCOS:
- case ISD::STRICT_FEXP:
- case ISD::STRICT_FEXP2:
- case ISD::STRICT_FLOG:
- case ISD::STRICT_FLOG10:
- case ISD::STRICT_FLOG2:
- case ISD::STRICT_FRINT:
- case ISD::STRICT_FNEARBYINT:
- case ISD::STRICT_FMAXNUM:
- case ISD::STRICT_FMINNUM:
- case ISD::STRICT_FCEIL:
- case ISD::STRICT_FFLOOR:
- case ISD::STRICT_FROUND:
- case ISD::STRICT_FTRUNC:
- case ISD::STRICT_FP_TO_SINT:
- case ISD::STRICT_FP_TO_UINT:
- case ISD::STRICT_FP_EXTEND:
+
+#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
+ case ISD::STRICT_##DAGN:
+#include "llvm/IR/ConstrainedOps.def"
R = ScalarizeVecRes_StrictFPOp(N);
break;
+
case ISD::UADDO:
case ISD::SADDO:
case ISD::USUBO:
@@ -187,7 +165,9 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::SMULFIXSAT:
case ISD::UMULFIX:
case ISD::UMULFIXSAT:
- R = ScalarizeVecRes_MULFIX(N);
+ case ISD::SDIVFIX:
+ case ISD::UDIVFIX:
+ R = ScalarizeVecRes_FIX(N);
break;
}
@@ -211,7 +191,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_TernaryOp(SDNode *N) {
Op0.getValueType(), Op0, Op1, Op2);
}
-SDValue DAGTypeLegalizer::ScalarizeVecRes_MULFIX(SDNode *N) {
+SDValue DAGTypeLegalizer::ScalarizeVecRes_FIX(SDNode *N) {
SDValue Op0 = GetScalarizedVector(N->getOperand(0));
SDValue Op1 = GetScalarizedVector(N->getOperand(1));
SDValue Op2 = N->getOperand(2);
@@ -226,10 +206,10 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_StrictFPOp(SDNode *N) {
EVT ValueVTs[] = {VT, MVT::Other};
SDLoc dl(N);
- SmallVector<SDValue, 4> Opers;
+ SmallVector<SDValue, 4> Opers(NumOpers);
// The Chain is the first operand.
- Opers.push_back(Chain);
+ Opers[0] = Chain;
// Now process the remaining operands.
for (unsigned i = 1; i < NumOpers; ++i) {
@@ -238,7 +218,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_StrictFPOp(SDNode *N) {
if (Oper.getValueType().isVector())
Oper = GetScalarizedVector(Oper);
- Opers.push_back(Oper);
+ Opers[i] = Oper;
}
SDValue Result = DAG.getNode(N->getOpcode(), dl, ValueVTs, Opers);
@@ -326,18 +306,6 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_FP_ROUND(SDNode *N) {
NewVT, Op, N->getOperand(1));
}
-SDValue DAGTypeLegalizer::ScalarizeVecRes_STRICT_FP_ROUND(SDNode *N) {
- EVT NewVT = N->getValueType(0).getVectorElementType();
- SDValue Op = GetScalarizedVector(N->getOperand(1));
- SDValue Res = DAG.getNode(ISD::STRICT_FP_ROUND, SDLoc(N),
- { NewVT, MVT::Other },
- { N->getOperand(0), Op, N->getOperand(2) });
- // Legalize the chain result - switch anything that used the old chain to
- // use the new one.
- ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
- return Res;
-}
-
SDValue DAGTypeLegalizer::ScalarizeVecRes_FPOWI(SDNode *N) {
SDValue Op = GetScalarizedVector(N->getOperand(0));
return DAG.getNode(ISD::FPOWI, SDLoc(N),
@@ -606,6 +574,8 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::UINT_TO_FP:
Res = ScalarizeVecOp_UnaryOp(N);
break;
+ case ISD::STRICT_SINT_TO_FP:
+ case ISD::STRICT_UINT_TO_FP:
case ISD::STRICT_FP_TO_SINT:
case ISD::STRICT_FP_TO_UINT:
Res = ScalarizeVecOp_UnaryOp_StrictFP(N);
@@ -699,7 +669,12 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_UnaryOp_StrictFP(SDNode *N) {
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
// Revectorize the result so the types line up with what the uses of this
// expression expect.
- return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res);
+ Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res);
+
+ // Do our own replacement and return SDValue() to tell the caller that we
+ // handled all replacements since caller can only handle a single result.
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return SDValue();
}
/// The vectors to concatenate have length one - use a BUILD_VECTOR instead.
@@ -804,7 +779,13 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_STRICT_FP_ROUND(SDNode *N,
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
- return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res);
+
+ Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res);
+
+ // Do our own replacement and return SDValue() to tell the caller that we
+ // handled all replacements since caller can only handle a single result.
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return SDValue();
}
SDValue DAGTypeLegalizer::ScalarizeVecOp_VECREDUCE(SDNode *N) {
@@ -901,13 +882,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FNEARBYINT:
case ISD::FNEG:
case ISD::FP_EXTEND:
- case ISD::STRICT_FP_EXTEND:
case ISD::FP_ROUND:
- case ISD::STRICT_FP_ROUND:
case ISD::FP_TO_SINT:
- case ISD::STRICT_FP_TO_SINT:
case ISD::FP_TO_UINT:
- case ISD::STRICT_FP_TO_UINT:
case ISD::FRINT:
case ISD::FROUND:
case ISD::FSIN:
@@ -964,32 +941,13 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FMA:
SplitVecRes_TernaryOp(N, Lo, Hi);
break;
- case ISD::STRICT_FADD:
- case ISD::STRICT_FSUB:
- case ISD::STRICT_FMUL:
- case ISD::STRICT_FDIV:
- case ISD::STRICT_FREM:
- case ISD::STRICT_FSQRT:
- case ISD::STRICT_FMA:
- case ISD::STRICT_FPOW:
- case ISD::STRICT_FPOWI:
- case ISD::STRICT_FSIN:
- case ISD::STRICT_FCOS:
- case ISD::STRICT_FEXP:
- case ISD::STRICT_FEXP2:
- case ISD::STRICT_FLOG:
- case ISD::STRICT_FLOG10:
- case ISD::STRICT_FLOG2:
- case ISD::STRICT_FRINT:
- case ISD::STRICT_FNEARBYINT:
- case ISD::STRICT_FMAXNUM:
- case ISD::STRICT_FMINNUM:
- case ISD::STRICT_FCEIL:
- case ISD::STRICT_FFLOOR:
- case ISD::STRICT_FROUND:
- case ISD::STRICT_FTRUNC:
+
+#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
+ case ISD::STRICT_##DAGN:
+#include "llvm/IR/ConstrainedOps.def"
SplitVecRes_StrictFPOp(N, Lo, Hi);
break;
+
case ISD::UADDO:
case ISD::SADDO:
case ISD::USUBO:
@@ -1002,7 +960,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::SMULFIXSAT:
case ISD::UMULFIX:
case ISD::UMULFIXSAT:
- SplitVecRes_MULFIX(N, Lo, Hi);
+ case ISD::SDIVFIX:
+ case ISD::UDIVFIX:
+ SplitVecRes_FIX(N, Lo, Hi);
break;
}
@@ -1041,7 +1001,7 @@ void DAGTypeLegalizer::SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo,
Op0Hi, Op1Hi, Op2Hi);
}
-void DAGTypeLegalizer::SplitVecRes_MULFIX(SDNode *N, SDValue &Lo, SDValue &Hi) {
+void DAGTypeLegalizer::SplitVecRes_FIX(SDNode *N, SDValue &Lo, SDValue &Hi) {
SDValue LHSLo, LHSHi;
GetSplitVector(N->getOperand(0), LHSLo, LHSHi);
SDValue RHSLo, RHSHi;
@@ -1206,9 +1166,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo,
// Increment the pointer to the other part.
unsigned IncrementSize = Lo.getValueSizeInBits() / 8;
- StackPtr =
- DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
- DAG.getConstant(IncrementSize, dl, StackPtr.getValueType()));
+ StackPtr = DAG.getMemBasePlusOffset(StackPtr, IncrementSize, dl);
// Load the Hi part from the stack slot.
Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MachinePointerInfo(),
@@ -1304,12 +1262,12 @@ void DAGTypeLegalizer::SplitVecRes_StrictFPOp(SDNode *N, SDValue &Lo,
SDLoc dl(N);
std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
- SmallVector<SDValue, 4> OpsLo;
- SmallVector<SDValue, 4> OpsHi;
+ SmallVector<SDValue, 4> OpsLo(NumOps);
+ SmallVector<SDValue, 4> OpsHi(NumOps);
// The Chain is the first operand.
- OpsLo.push_back(Chain);
- OpsHi.push_back(Chain);
+ OpsLo[0] = Chain;
+ OpsHi[0] = Chain;
// Now process the remaining operands.
for (unsigned i = 1; i < NumOps; ++i) {
@@ -1327,8 +1285,8 @@ void DAGTypeLegalizer::SplitVecRes_StrictFPOp(SDNode *N, SDValue &Lo,
std::tie(OpLo, OpHi) = DAG.SplitVectorOperand(N, i);
}
- OpsLo.push_back(OpLo);
- OpsHi.push_back(OpHi);
+ OpsLo[i] = OpLo;
+ OpsHi[i] = OpHi;
}
EVT LoValueVTs[] = {LoVT, MVT::Other};
@@ -1572,12 +1530,15 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,
void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
SDValue &Lo, SDValue &Hi) {
+ assert(MLD->isUnindexed() && "Indexed masked load during type legalization!");
EVT LoVT, HiVT;
SDLoc dl(MLD);
std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0));
SDValue Ch = MLD->getChain();
SDValue Ptr = MLD->getBasePtr();
+ SDValue Offset = MLD->getOffset();
+ assert(Offset.isUndef() && "Unexpected indexed masked load offset");
SDValue Mask = MLD->getMask();
SDValue PassThru = MLD->getPassThru();
unsigned Alignment = MLD->getOriginalAlignment();
@@ -1609,8 +1570,9 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
MachineMemOperand::MOLoad, LoMemVT.getStoreSize(),
Alignment, MLD->getAAInfo(), MLD->getRanges());
- Lo = DAG.getMaskedLoad(LoVT, dl, Ch, Ptr, MaskLo, PassThruLo, LoMemVT, MMO,
- ExtType, MLD->isExpandingLoad());
+ Lo = DAG.getMaskedLoad(LoVT, dl, Ch, Ptr, Offset, MaskLo, PassThruLo, LoMemVT,
+ MMO, MLD->getAddressingMode(), ExtType,
+ MLD->isExpandingLoad());
Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, dl, LoMemVT, DAG,
MLD->isExpandingLoad());
@@ -1621,8 +1583,9 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
HiMemVT.getStoreSize(), Alignment, MLD->getAAInfo(),
MLD->getRanges());
- Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, MaskHi, PassThruHi, HiMemVT, MMO,
- ExtType, MLD->isExpandingLoad());
+ Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, Offset, MaskHi, PassThruHi, HiMemVT,
+ MMO, MLD->getAddressingMode(), ExtType,
+ MLD->isExpandingLoad());
// Build a factor node to remember that this load is independent of the
// other one.
@@ -1747,24 +1710,6 @@ void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo,
if (N->getOpcode() == ISD::FP_ROUND) {
Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo, N->getOperand(1));
Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi, N->getOperand(1));
- } else if (N->getOpcode() == ISD::STRICT_FP_ROUND) {
- Lo = DAG.getNode(N->getOpcode(), dl, { LoVT, MVT::Other },
- { N->getOperand(0), Lo, N->getOperand(2) });
- Hi = DAG.getNode(N->getOpcode(), dl, { HiVT, MVT::Other },
- { N->getOperand(0), Hi, N->getOperand(2) });
- SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- Lo.getValue(1), Hi.getValue(1));
- ReplaceValueWith(SDValue(N, 1), NewChain);
- } else if (N->isStrictFPOpcode()) {
- Lo = DAG.getNode(N->getOpcode(), dl, { LoVT, MVT::Other },
- { N->getOperand(0), Lo });
- Hi = DAG.getNode(N->getOpcode(), dl, { HiVT, MVT::Other },
- { N->getOperand(0), Hi });
- // Legalize the chain result - switch anything that used the old chain to
- // use the new one.
- SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- Lo.getValue(1), Hi.getValue(1));
- ReplaceValueWith(SDValue(N, 1), NewChain);
} else {
Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo);
Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi);
@@ -2003,9 +1948,12 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::VSELECT:
Res = SplitVecOp_VSELECT(N, OpNo);
break;
+ case ISD::STRICT_SINT_TO_FP:
+ case ISD::STRICT_UINT_TO_FP:
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
- if (N->getValueType(0).bitsLT(N->getOperand(0).getValueType()))
+ if (N->getValueType(0).bitsLT(
+ N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType()))
Res = SplitVecOp_TruncateHelper(N);
else
Res = SplitVecOp_UnaryOp(N);
@@ -2357,8 +2305,11 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT,
SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
unsigned OpNo) {
+ assert(N->isUnindexed() && "Indexed masked store of vector?");
SDValue Ch = N->getChain();
SDValue Ptr = N->getBasePtr();
+ SDValue Offset = N->getOffset();
+ assert(Offset.isUndef() && "Unexpected indexed masked store offset");
SDValue Mask = N->getMask();
SDValue Data = N->getValue();
EVT MemoryVT = N->getMemoryVT();
@@ -2392,8 +2343,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
MachineMemOperand::MOStore, LoMemVT.getStoreSize(),
Alignment, N->getAAInfo(), N->getRanges());
- Lo = DAG.getMaskedStore(Ch, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO,
- N->isTruncatingStore(),
+ Lo = DAG.getMaskedStore(Ch, DL, DataLo, Ptr, Offset, MaskLo, LoMemVT, MMO,
+ N->getAddressingMode(), N->isTruncatingStore(),
N->isCompressingStore());
Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
@@ -2405,8 +2356,9 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
HiMemVT.getStoreSize(), Alignment, N->getAAInfo(),
N->getRanges());
- Hi = DAG.getMaskedStore(Ch, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,
- N->isTruncatingStore(), N->isCompressingStore());
+ Hi = DAG.getMaskedStore(Ch, DL, DataHi, Ptr, Offset, MaskHi, HiMemVT, MMO,
+ N->getAddressingMode(), N->isTruncatingStore(),
+ N->isCompressingStore());
// Build a factor node to remember that this store is independent of the
// other one.
@@ -2562,7 +2514,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_TruncateHelper(SDNode *N) {
//
// Without this transform, the original truncate would end up being
// scalarized, which is pretty much always a last resort.
- SDValue InVec = N->getOperand(0);
+ unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
+ SDValue InVec = N->getOperand(OpNo);
EVT InVT = InVec->getValueType(0);
EVT OutVT = N->getValueType(0);
unsigned NumElements = OutVT.getVectorNumElements();
@@ -2606,8 +2559,23 @@ SDValue DAGTypeLegalizer::SplitVecOp_TruncateHelper(SDNode *N) {
EVT::getIntegerVT(*DAG.getContext(), InElementSize/2);
EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT,
NumElements/2);
- SDValue HalfLo = DAG.getNode(N->getOpcode(), DL, HalfVT, InLoVec);
- SDValue HalfHi = DAG.getNode(N->getOpcode(), DL, HalfVT, InHiVec);
+
+ SDValue HalfLo;
+ SDValue HalfHi;
+ SDValue Chain;
+ if (N->isStrictFPOpcode()) {
+ HalfLo = DAG.getNode(N->getOpcode(), DL, {HalfVT, MVT::Other},
+ {N->getOperand(0), HalfLo});
+ HalfHi = DAG.getNode(N->getOpcode(), DL, {HalfVT, MVT::Other},
+ {N->getOperand(0), HalfHi});
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, HalfLo.getValue(1),
+ HalfHi.getValue(1));
+ } else {
+ HalfLo = DAG.getNode(N->getOpcode(), DL, HalfVT, InLoVec);
+ HalfHi = DAG.getNode(N->getOpcode(), DL, HalfVT, InHiVec);
+ }
// Concatenate them to get the full intermediate truncation result.
EVT InterVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT, NumElements);
SDValue InterVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InterVT, HalfLo,
@@ -2616,6 +2584,17 @@ SDValue DAGTypeLegalizer::SplitVecOp_TruncateHelper(SDNode *N) {
// type. This should normally be something that ends up being legal directly,
// but in theory if a target has very wide vectors and an annoyingly
// restricted set of legal types, this split can chain to build things up.
+
+ if (N->isStrictFPOpcode()) {
+ SDValue Res = DAG.getNode(
+ ISD::STRICT_FP_ROUND, DL, {OutVT, MVT::Other},
+ {Chain, InterVec,
+ DAG.getTargetConstant(0, DL, TLI.getPointerTy(DAG.getDataLayout()))});
+ // Relink the chain
+ ReplaceValueWith(SDValue(N, 1), SDValue(Res.getNode(), 1));
+ return Res;
+ }
+
return IsFloat
? DAG.getNode(ISD::FP_ROUND, DL, OutVT, InterVec,
DAG.getTargetConstant(
@@ -2774,30 +2753,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
Res = WidenVecRes_BinaryWithExtraScalarOp(N);
break;
- case ISD::STRICT_FADD:
- case ISD::STRICT_FSUB:
- case ISD::STRICT_FMUL:
- case ISD::STRICT_FDIV:
- case ISD::STRICT_FREM:
- case ISD::STRICT_FSQRT:
- case ISD::STRICT_FMA:
- case ISD::STRICT_FPOW:
- case ISD::STRICT_FPOWI:
- case ISD::STRICT_FSIN:
- case ISD::STRICT_FCOS:
- case ISD::STRICT_FEXP:
- case ISD::STRICT_FEXP2:
- case ISD::STRICT_FLOG:
- case ISD::STRICT_FLOG10:
- case ISD::STRICT_FLOG2:
- case ISD::STRICT_FRINT:
- case ISD::STRICT_FNEARBYINT:
- case ISD::STRICT_FMAXNUM:
- case ISD::STRICT_FMINNUM:
- case ISD::STRICT_FCEIL:
- case ISD::STRICT_FFLOOR:
- case ISD::STRICT_FROUND:
- case ISD::STRICT_FTRUNC:
+#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
+ case ISD::STRICT_##DAGN:
+#include "llvm/IR/ConstrainedOps.def"
Res = WidenVecRes_StrictFP(N);
break;
@@ -2843,13 +2801,6 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
Res = WidenVecRes_Convert(N);
break;
- case ISD::STRICT_FP_EXTEND:
- case ISD::STRICT_FP_ROUND:
- case ISD::STRICT_FP_TO_SINT:
- case ISD::STRICT_FP_TO_UINT:
- Res = WidenVecRes_Convert_StrictFP(N);
- break;
-
case ISD::FABS:
case ISD::FCEIL:
case ISD::FCOS:
@@ -3091,6 +3042,21 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) {
}
SDValue DAGTypeLegalizer::WidenVecRes_StrictFP(SDNode *N) {
+ switch (N->getOpcode()) {
+ case ISD::STRICT_FSETCC:
+ case ISD::STRICT_FSETCCS:
+ return WidenVecRes_STRICT_FSETCC(N);
+ case ISD::STRICT_FP_EXTEND:
+ case ISD::STRICT_FP_ROUND:
+ case ISD::STRICT_FP_TO_SINT:
+ case ISD::STRICT_FP_TO_UINT:
+ case ISD::STRICT_SINT_TO_FP:
+ case ISD::STRICT_UINT_TO_FP:
+ return WidenVecRes_Convert_StrictFP(N);
+ default:
+ break;
+ }
+
// StrictFP op widening for operations that can trap.
unsigned NumOpers = N->getNumOperands();
unsigned Opcode = N->getOpcode();
@@ -3497,7 +3463,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) {
switch (getTypeAction(InVT)) {
case TargetLowering::TypeLegal:
break;
- case TargetLowering::TypePromoteInteger:
+ case TargetLowering::TypePromoteInteger: {
// If the incoming type is a vector that is being promoted, then
// we know that the elements are arranged differently and that we
// must perform the conversion using a stack slot.
@@ -3506,11 +3472,24 @@ SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) {
// If the InOp is promoted to the same size, convert it. Otherwise,
// fall out of the switch and widen the promoted input.
- InOp = GetPromotedInteger(InOp);
- InVT = InOp.getValueType();
- if (WidenVT.bitsEq(InVT))
- return DAG.getNode(ISD::BITCAST, dl, WidenVT, InOp);
+ SDValue NInOp = GetPromotedInteger(InOp);
+ EVT NInVT = NInOp.getValueType();
+ if (WidenVT.bitsEq(NInVT)) {
+ // For big endian targets we need to shift the input integer or the
+ // interesting bits will end up at the wrong place.
+ if (DAG.getDataLayout().isBigEndian()) {
+ unsigned ShiftAmt = NInVT.getSizeInBits() - InVT.getSizeInBits();
+ EVT ShiftAmtTy = TLI.getShiftAmountTy(NInVT, DAG.getDataLayout());
+ assert(ShiftAmt < WidenVT.getSizeInBits() && "Too large shift amount!");
+ NInOp = DAG.getNode(ISD::SHL, dl, NInVT, NInOp,
+ DAG.getConstant(ShiftAmt, dl, ShiftAmtTy));
+ }
+ return DAG.getNode(ISD::BITCAST, dl, WidenVT, NInOp);
+ }
+ InOp = NInOp;
+ InVT = NInVT;
break;
+ }
case TargetLowering::TypeSoftenFloat:
case TargetLowering::TypePromoteFloat:
case TargetLowering::TypeExpandInteger:
@@ -3748,10 +3727,10 @@ SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) {
WidenVT.getVectorNumElements());
Mask = ModifyToType(Mask, WideMaskVT, true);
- SDValue Res = DAG.getMaskedLoad(WidenVT, dl, N->getChain(), N->getBasePtr(),
- Mask, PassThru, N->getMemoryVT(),
- N->getMemOperand(), ExtType,
- N->isExpandingLoad());
+ SDValue Res = DAG.getMaskedLoad(
+ WidenVT, dl, N->getChain(), N->getBasePtr(), N->getOffset(), Mask,
+ PassThru, N->getMemoryVT(), N->getMemOperand(), N->getAddressingMode(),
+ ExtType, N->isExpandingLoad());
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
@@ -3798,6 +3777,17 @@ SDValue DAGTypeLegalizer::WidenVecRes_SCALAR_TO_VECTOR(SDNode *N) {
WidenVT, N->getOperand(0));
}
+// Return true is this is a SETCC node or a strict version of it.
+static inline bool isSETCCOp(unsigned Opcode) {
+ switch (Opcode) {
+ case ISD::SETCC:
+ case ISD::STRICT_FSETCC:
+ case ISD::STRICT_FSETCCS:
+ return true;
+ }
+ return false;
+}
+
// Return true if this is a node that could have two SETCCs as operands.
static inline bool isLogicalMaskOp(unsigned Opcode) {
switch (Opcode) {
@@ -3809,6 +3799,13 @@ static inline bool isLogicalMaskOp(unsigned Opcode) {
return false;
}
+// If N is a SETCC or a strict variant of it, return the type
+// of the compare operands.
+static inline EVT getSETCCOperandType(SDValue N) {
+ unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
+ return N->getOperand(OpNo).getValueType();
+}
+
// This is used just for the assert in convertMask(). Check that this either
// a SETCC or a previously handled SETCC by convertMask().
#ifndef NDEBUG
@@ -3831,7 +3828,7 @@ static inline bool isSETCCorConvertedSETCC(SDValue N) {
return isSETCCorConvertedSETCC(N.getOperand(0)) &&
isSETCCorConvertedSETCC(N.getOperand(1));
- return (N.getOpcode() == ISD::SETCC ||
+ return (isSETCCOp(N.getOpcode()) ||
ISD::isBuildVectorOfConstantSDNodes(N.getNode()));
}
#endif
@@ -3846,10 +3843,17 @@ SDValue DAGTypeLegalizer::convertMask(SDValue InMask, EVT MaskVT,
assert(isSETCCorConvertedSETCC(InMask) && "Unexpected mask argument.");
// Make a new Mask node, with a legal result VT.
+ SDValue Mask;
SmallVector<SDValue, 4> Ops;
for (unsigned i = 0, e = InMask->getNumOperands(); i < e; ++i)
Ops.push_back(InMask->getOperand(i));
- SDValue Mask = DAG.getNode(InMask->getOpcode(), SDLoc(InMask), MaskVT, Ops);
+ if (InMask->isStrictFPOpcode()) {
+ Mask = DAG.getNode(InMask->getOpcode(), SDLoc(InMask),
+ { MaskVT, MVT::Other }, Ops);
+ ReplaceValueWith(InMask.getValue(1), Mask.getValue(1));
+ }
+ else
+ Mask = DAG.getNode(InMask->getOpcode(), SDLoc(InMask), MaskVT, Ops);
// If MaskVT has smaller or bigger elements than ToMaskVT, a vector sign
// extend or truncate is needed.
@@ -3902,7 +3906,7 @@ SDValue DAGTypeLegalizer::WidenVSELECTAndMask(SDNode *N) {
if (N->getOpcode() != ISD::VSELECT)
return SDValue();
- if (Cond->getOpcode() != ISD::SETCC && !isLogicalMaskOp(Cond->getOpcode()))
+ if (!isSETCCOp(Cond->getOpcode()) && !isLogicalMaskOp(Cond->getOpcode()))
return SDValue();
// If this is a splitted VSELECT that was previously already handled, do
@@ -3925,8 +3929,8 @@ SDValue DAGTypeLegalizer::WidenVSELECTAndMask(SDNode *N) {
return SDValue();
// If there is support for an i1 vector mask, don't touch.
- if (Cond.getOpcode() == ISD::SETCC) {
- EVT SetCCOpVT = Cond->getOperand(0).getValueType();
+ if (isSETCCOp(Cond.getOpcode())) {
+ EVT SetCCOpVT = getSETCCOperandType(Cond);
while (TLI.getTypeAction(Ctx, SetCCOpVT) != TargetLowering::TypeLegal)
SetCCOpVT = TLI.getTypeToTransformTo(Ctx, SetCCOpVT);
EVT SetCCResVT = getSetCCResultType(SetCCOpVT);
@@ -3957,17 +3961,17 @@ SDValue DAGTypeLegalizer::WidenVSELECTAndMask(SDNode *N) {
ToMaskVT = ToMaskVT.changeVectorElementTypeToInteger();
SDValue Mask;
- if (Cond->getOpcode() == ISD::SETCC) {
- EVT MaskVT = getSetCCResultType(Cond.getOperand(0).getValueType());
+ if (isSETCCOp(Cond->getOpcode())) {
+ EVT MaskVT = getSetCCResultType(getSETCCOperandType(Cond));
Mask = convertMask(Cond, MaskVT, ToMaskVT);
} else if (isLogicalMaskOp(Cond->getOpcode()) &&
- Cond->getOperand(0).getOpcode() == ISD::SETCC &&
- Cond->getOperand(1).getOpcode() == ISD::SETCC) {
+ isSETCCOp(Cond->getOperand(0).getOpcode()) &&
+ isSETCCOp(Cond->getOperand(1).getOpcode())) {
// Cond is (AND/OR/XOR (SETCC, SETCC))
SDValue SETCC0 = Cond->getOperand(0);
SDValue SETCC1 = Cond->getOperand(1);
- EVT VT0 = getSetCCResultType(SETCC0.getOperand(0).getValueType());
- EVT VT1 = getSetCCResultType(SETCC1.getOperand(0).getValueType());
+ EVT VT0 = getSetCCResultType(getSETCCOperandType(SETCC0));
+ EVT VT1 = getSetCCResultType(getSETCCOperandType(SETCC1));
unsigned ScalarBits0 = VT0.getScalarSizeInBits();
unsigned ScalarBits1 = VT1.getScalarSizeInBits();
unsigned ScalarBits_ToMask = ToMaskVT.getScalarSizeInBits();
@@ -4119,6 +4123,47 @@ SDValue DAGTypeLegalizer::WidenVecRes_SETCC(SDNode *N) {
WidenVT, InOp1, InOp2, N->getOperand(2));
}
+SDValue DAGTypeLegalizer::WidenVecRes_STRICT_FSETCC(SDNode *N) {
+ assert(N->getValueType(0).isVector() &&
+ N->getOperand(1).getValueType().isVector() &&
+ "Operands must be vectors");
+ EVT VT = N->getValueType(0);
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+ unsigned NumElts = VT.getVectorNumElements();
+ EVT EltVT = VT.getVectorElementType();
+
+ SDLoc dl(N);
+ SDValue Chain = N->getOperand(0);
+ SDValue LHS = N->getOperand(1);
+ SDValue RHS = N->getOperand(2);
+ SDValue CC = N->getOperand(3);
+ EVT TmpEltVT = LHS.getValueType().getVectorElementType();
+
+ // Fully unroll and reassemble.
+ SmallVector<SDValue, 8> Scalars(WidenNumElts, DAG.getUNDEF(EltVT));
+ SmallVector<SDValue, 8> Chains(NumElts);
+ for (unsigned i = 0; i != NumElts; ++i) {
+ SDValue LHSElem = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS,
+ DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ SDValue RHSElem = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS,
+ DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+
+ Scalars[i] = DAG.getNode(N->getOpcode(), dl, {MVT::i1, MVT::Other},
+ {Chain, LHSElem, RHSElem, CC});
+ Chains[i] = Scalars[i].getValue(1);
+ Scalars[i] = DAG.getSelect(dl, EltVT, Scalars[i],
+ DAG.getBoolConstant(true, dl, EltVT, VT),
+ DAG.getBoolConstant(false, dl, EltVT, VT));
+ }
+
+ SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
+ ReplaceValueWith(SDValue(N, 1), NewChain);
+
+ return DAG.getBuildVector(WidenVT, dl, Scalars);
+}
//===----------------------------------------------------------------------===//
// Widen Vector Operand
@@ -4150,6 +4195,8 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::MGATHER: Res = WidenVecOp_MGATHER(N, OpNo); break;
case ISD::MSCATTER: Res = WidenVecOp_MSCATTER(N, OpNo); break;
case ISD::SETCC: Res = WidenVecOp_SETCC(N); break;
+ case ISD::STRICT_FSETCC:
+ case ISD::STRICT_FSETCCS: Res = WidenVecOp_STRICT_FSETCC(N); break;
case ISD::VSELECT: Res = WidenVecOp_VSELECT(N); break;
case ISD::FCOPYSIGN: Res = WidenVecOp_FCOPYSIGN(N); break;
@@ -4161,12 +4208,16 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::FP_EXTEND:
case ISD::STRICT_FP_EXTEND:
+ case ISD::FP_ROUND:
+ case ISD::STRICT_FP_ROUND:
case ISD::FP_TO_SINT:
case ISD::STRICT_FP_TO_SINT:
case ISD::FP_TO_UINT:
case ISD::STRICT_FP_TO_UINT:
case ISD::SINT_TO_FP:
+ case ISD::STRICT_SINT_TO_FP:
case ISD::UINT_TO_FP:
+ case ISD::STRICT_UINT_TO_FP:
case ISD::TRUNCATE:
Res = WidenVecOp_Convert(N);
break;
@@ -4297,13 +4348,21 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
if (TLI.isTypeLegal(WideVT) && !N->isStrictFPOpcode()) {
SDValue Res;
if (N->isStrictFPOpcode()) {
- Res = DAG.getNode(Opcode, dl, { WideVT, MVT::Other },
- { N->getOperand(0), InOp });
+ if (Opcode == ISD::STRICT_FP_ROUND)
+ Res = DAG.getNode(Opcode, dl, { WideVT, MVT::Other },
+ { N->getOperand(0), InOp, N->getOperand(2) });
+ else
+ Res = DAG.getNode(Opcode, dl, { WideVT, MVT::Other },
+ { N->getOperand(0), InOp });
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
- } else
- Res = DAG.getNode(Opcode, dl, WideVT, InOp);
+ } else {
+ if (Opcode == ISD::FP_ROUND)
+ Res = DAG.getNode(Opcode, dl, WideVT, InOp, N->getOperand(1));
+ else
+ Res = DAG.getNode(Opcode, dl, WideVT, InOp);
+ }
return DAG.getNode(
ISD::EXTRACT_SUBVECTOR, dl, VT, Res,
DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
@@ -4486,7 +4545,8 @@ SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) {
StVal.getValueType().getVectorNumElements() &&
"Mask and data vectors should have the same number of elements");
return DAG.getMaskedStore(MST->getChain(), dl, StVal, MST->getBasePtr(),
- Mask, MST->getMemoryVT(), MST->getMemOperand(),
+ MST->getOffset(), Mask, MST->getMemoryVT(),
+ MST->getMemOperand(), MST->getAddressingMode(),
false, MST->isCompressingStore());
}
@@ -4580,6 +4640,44 @@ SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) {
return DAG.getNode(ExtendCode, dl, VT, CC);
}
+SDValue DAGTypeLegalizer::WidenVecOp_STRICT_FSETCC(SDNode *N) {
+ SDValue Chain = N->getOperand(0);
+ SDValue LHS = GetWidenedVector(N->getOperand(1));
+ SDValue RHS = GetWidenedVector(N->getOperand(2));
+ SDValue CC = N->getOperand(3);
+ SDLoc dl(N);
+
+ EVT VT = N->getValueType(0);
+ EVT EltVT = VT.getVectorElementType();
+ EVT TmpEltVT = LHS.getValueType().getVectorElementType();
+ unsigned NumElts = VT.getVectorNumElements();
+
+ // Unroll into a build vector.
+ SmallVector<SDValue, 8> Scalars(NumElts);
+ SmallVector<SDValue, 8> Chains(NumElts);
+
+ for (unsigned i = 0; i != NumElts; ++i) {
+ SDValue LHSElem = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS,
+ DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ SDValue RHSElem = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS,
+ DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+
+ Scalars[i] = DAG.getNode(N->getOpcode(), dl, {MVT::i1, MVT::Other},
+ {Chain, LHSElem, RHSElem, CC});
+ Chains[i] = Scalars[i].getValue(1);
+ Scalars[i] = DAG.getSelect(dl, EltVT, Scalars[i],
+ DAG.getBoolConstant(true, dl, EltVT, VT),
+ DAG.getBoolConstant(false, dl, EltVT, VT));
+ }
+
+ SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
+ ReplaceValueWith(SDValue(N, 1), NewChain);
+
+ return DAG.getBuildVector(VT, dl, Scalars);
+}
+
SDValue DAGTypeLegalizer::WidenVecOp_VECREDUCE(SDNode *N) {
SDLoc dl(N);
SDValue Op = GetWidenedVector(N->getOperand(0));
@@ -4670,7 +4768,8 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI,
unsigned Width, EVT WidenVT,
unsigned Align = 0, unsigned WidenEx = 0) {
EVT WidenEltVT = WidenVT.getVectorElementType();
- unsigned WidenWidth = WidenVT.getSizeInBits();
+ const bool Scalable = WidenVT.isScalableVector();
+ unsigned WidenWidth = WidenVT.getSizeInBits().getKnownMinSize();
unsigned WidenEltWidth = WidenEltVT.getSizeInBits();
unsigned AlignInBits = Align*8;
@@ -4681,23 +4780,27 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI,
// See if there is larger legal integer than the element type to load/store.
unsigned VT;
- for (VT = (unsigned)MVT::LAST_INTEGER_VALUETYPE;
- VT >= (unsigned)MVT::FIRST_INTEGER_VALUETYPE; --VT) {
- EVT MemVT((MVT::SimpleValueType) VT);
- unsigned MemVTWidth = MemVT.getSizeInBits();
- if (MemVT.getSizeInBits() <= WidenEltWidth)
- break;
- auto Action = TLI.getTypeAction(*DAG.getContext(), MemVT);
- if ((Action == TargetLowering::TypeLegal ||
- Action == TargetLowering::TypePromoteInteger) &&
- (WidenWidth % MemVTWidth) == 0 &&
- isPowerOf2_32(WidenWidth / MemVTWidth) &&
- (MemVTWidth <= Width ||
- (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) {
- if (MemVTWidth == WidenWidth)
- return MemVT;
- RetVT = MemVT;
- break;
+ // Don't bother looking for an integer type if the vector is scalable, skip
+ // to vector types.
+ if (!Scalable) {
+ for (VT = (unsigned)MVT::LAST_INTEGER_VALUETYPE;
+ VT >= (unsigned)MVT::FIRST_INTEGER_VALUETYPE; --VT) {
+ EVT MemVT((MVT::SimpleValueType) VT);
+ unsigned MemVTWidth = MemVT.getSizeInBits();
+ if (MemVT.getSizeInBits() <= WidenEltWidth)
+ break;
+ auto Action = TLI.getTypeAction(*DAG.getContext(), MemVT);
+ if ((Action == TargetLowering::TypeLegal ||
+ Action == TargetLowering::TypePromoteInteger) &&
+ (WidenWidth % MemVTWidth) == 0 &&
+ isPowerOf2_32(WidenWidth / MemVTWidth) &&
+ (MemVTWidth <= Width ||
+ (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) {
+ if (MemVTWidth == WidenWidth)
+ return MemVT;
+ RetVT = MemVT;
+ break;
+ }
}
}
@@ -4706,7 +4809,10 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI,
for (VT = (unsigned)MVT::LAST_VECTOR_VALUETYPE;
VT >= (unsigned)MVT::FIRST_VECTOR_VALUETYPE; --VT) {
EVT MemVT = (MVT::SimpleValueType) VT;
- unsigned MemVTWidth = MemVT.getSizeInBits();
+ // Skip vector MVTs which don't match the scalable property of WidenVT.
+ if (Scalable != MemVT.isScalableVector())
+ continue;
+ unsigned MemVTWidth = MemVT.getSizeInBits().getKnownMinSize();
auto Action = TLI.getTypeAction(*DAG.getContext(), MemVT);
if ((Action == TargetLowering::TypeLegal ||
Action == TargetLowering::TypePromoteInteger) &&
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index d4c1fb36475e..0e4d783e3505 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -910,10 +910,9 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
if (HasDbg)
ProcessSourceNode(N, DAG, Emitter, VRBaseMap, Orders, Seen, NewInsn);
- if (MDNode *MD = DAG->getHeapAllocSite(N)) {
+ if (MDNode *MD = DAG->getHeapAllocSite(N))
if (NewInsn && NewInsn->isCall())
- MF.addCodeViewHeapAllocSite(NewInsn, MD);
- }
+ NewInsn->setHeapAllocMarker(MF, MD);
GluedNodes.pop_back();
}
@@ -923,9 +922,10 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
if (HasDbg)
ProcessSourceNode(SU->getNode(), DAG, Emitter, VRBaseMap, Orders, Seen,
NewInsn);
+
if (MDNode *MD = DAG->getHeapAllocSite(SU->getNode())) {
if (NewInsn && NewInsn->isCall())
- MF.addCodeViewHeapAllocSite(NewInsn, MD);
+ NewInsn->setHeapAllocMarker(MF, MD);
}
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 52a71b91d93f..313e07b5fdd6 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -24,6 +24,9 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
@@ -63,6 +66,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/Transforms/Utils/SizeOpts.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -352,9 +356,9 @@ ISD::CondCode ISD::getSetCCSwappedOperands(ISD::CondCode Operation) {
(OldG << 2)); // New L bit.
}
-ISD::CondCode ISD::getSetCCInverse(ISD::CondCode Op, bool isInteger) {
+static ISD::CondCode getSetCCInverseImpl(ISD::CondCode Op, bool isIntegerLike) {
unsigned Operation = Op;
- if (isInteger)
+ if (isIntegerLike)
Operation ^= 7; // Flip L, G, E bits, but not U.
else
Operation ^= 15; // Flip all of the condition bits.
@@ -365,6 +369,15 @@ ISD::CondCode ISD::getSetCCInverse(ISD::CondCode Op, bool isInteger) {
return ISD::CondCode(Operation);
}
+ISD::CondCode ISD::getSetCCInverse(ISD::CondCode Op, EVT Type) {
+ return getSetCCInverseImpl(Op, Type.isInteger());
+}
+
+ISD::CondCode ISD::GlobalISel::getSetCCInverse(ISD::CondCode Op,
+ bool isIntegerLike) {
+ return getSetCCInverseImpl(Op, isIntegerLike);
+}
+
/// For an integer comparison, return 1 if the comparison is a signed operation
/// and 2 if the result is an unsigned comparison. Return zero if the operation
/// does not depend on the sign of the input (setne and seteq).
@@ -385,7 +398,8 @@ static int isSignedOp(ISD::CondCode Opcode) {
}
ISD::CondCode ISD::getSetCCOrOperation(ISD::CondCode Op1, ISD::CondCode Op2,
- bool IsInteger) {
+ EVT Type) {
+ bool IsInteger = Type.isInteger();
if (IsInteger && (isSignedOp(Op1) | isSignedOp(Op2)) == 3)
// Cannot fold a signed integer setcc with an unsigned integer setcc.
return ISD::SETCC_INVALID;
@@ -405,7 +419,8 @@ ISD::CondCode ISD::getSetCCOrOperation(ISD::CondCode Op1, ISD::CondCode Op2,
}
ISD::CondCode ISD::getSetCCAndOperation(ISD::CondCode Op1, ISD::CondCode Op2,
- bool IsInteger) {
+ EVT Type) {
+ bool IsInteger = Type.isInteger();
if (IsInteger && (isSignedOp(Op1) | isSignedOp(Op2)) == 3)
// Cannot fold a signed setcc with an unsigned setcc.
return ISD::SETCC_INVALID;
@@ -1005,7 +1020,9 @@ SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL)
void SelectionDAG::init(MachineFunction &NewMF,
OptimizationRemarkEmitter &NewORE,
Pass *PassPtr, const TargetLibraryInfo *LibraryInfo,
- LegacyDivergenceAnalysis * Divergence) {
+ LegacyDivergenceAnalysis * Divergence,
+ ProfileSummaryInfo *PSIin,
+ BlockFrequencyInfo *BFIin) {
MF = &NewMF;
SDAGISelPass = PassPtr;
ORE = &NewORE;
@@ -1014,6 +1031,8 @@ void SelectionDAG::init(MachineFunction &NewMF,
LibInfo = LibraryInfo;
Context = &MF->getFunction().getContext();
DA = Divergence;
+ PSI = PSIin;
+ BFI = BFIin;
}
SelectionDAG::~SelectionDAG() {
@@ -1023,6 +1042,11 @@ SelectionDAG::~SelectionDAG() {
delete DbgInfo;
}
+bool SelectionDAG::shouldOptForSize() const {
+ return MF->getFunction().hasOptSize() ||
+ llvm::shouldOptimizeForSize(FLI->MBB->getBasicBlock(), PSI, BFI);
+}
+
void SelectionDAG::allnodes_clear() {
assert(&*AllNodes.begin() == &EntryNode);
AllNodes.remove(AllNodes.begin());
@@ -1101,6 +1125,20 @@ SDValue SelectionDAG::getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT) {
: getNode(ISD::FP_ROUND, DL, VT, Op, getIntPtrConstant(0, DL));
}
+std::pair<SDValue, SDValue>
+SelectionDAG::getStrictFPExtendOrRound(SDValue Op, SDValue Chain,
+ const SDLoc &DL, EVT VT) {
+ assert(!VT.bitsEq(Op.getValueType()) &&
+ "Strict no-op FP extend/round not allowed.");
+ SDValue Res =
+ VT.bitsGT(Op.getValueType())
+ ? getNode(ISD::STRICT_FP_EXTEND, DL, {VT, MVT::Other}, {Chain, Op})
+ : getNode(ISD::STRICT_FP_ROUND, DL, {VT, MVT::Other},
+ {Chain, Op, getIntPtrConstant(0, DL)});
+
+ return std::pair<SDValue, SDValue>(Res, SDValue(Res.getNode(), 1));
+}
+
SDValue SelectionDAG::getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT) {
return VT.bitsGT(Op.getValueType()) ?
getNode(ISD::ANY_EXTEND, DL, VT, Op) :
@@ -1279,7 +1317,9 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, const SDLoc &DL,
}
SDValue Result(N, 0);
- if (VT.isVector())
+ if (VT.isScalableVector())
+ Result = getSplatVector(VT, DL, Result);
+ else if (VT.isVector())
Result = getSplatBuildVector(VT, DL, Result);
return Result;
@@ -1425,7 +1465,7 @@ SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT,
assert((TargetFlags == 0 || isTarget) &&
"Cannot set target flags on target-independent globals");
if (Alignment == 0)
- Alignment = MF->getFunction().hasOptSize()
+ Alignment = shouldOptForSize()
? getDataLayout().getABITypeAlignment(C->getType())
: getDataLayout().getPrefTypeAlignment(C->getType());
unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool;
@@ -2379,9 +2419,10 @@ SDValue SelectionDAG::getSplatValue(SDValue V) {
/// If a SHL/SRA/SRL node has a constant or splat constant shift amount that
/// is less than the element bit-width of the shift node, return it.
-static const APInt *getValidShiftAmountConstant(SDValue V) {
+static const APInt *getValidShiftAmountConstant(SDValue V,
+ const APInt &DemandedElts) {
unsigned BitWidth = V.getScalarValueSizeInBits();
- if (ConstantSDNode *SA = isConstOrConstSplat(V.getOperand(1))) {
+ if (ConstantSDNode *SA = isConstOrConstSplat(V.getOperand(1), DemandedElts)) {
// Shifting more than the bitwidth is not valid.
const APInt &ShAmt = SA->getAPIntValue();
if (ShAmt.ult(BitWidth))
@@ -2392,13 +2433,16 @@ static const APInt *getValidShiftAmountConstant(SDValue V) {
/// If a SHL/SRA/SRL node has constant vector shift amounts that are all less
/// than the element bit-width of the shift node, return the minimum value.
-static const APInt *getValidMinimumShiftAmountConstant(SDValue V) {
+static const APInt *
+getValidMinimumShiftAmountConstant(SDValue V, const APInt &DemandedElts) {
unsigned BitWidth = V.getScalarValueSizeInBits();
auto *BV = dyn_cast<BuildVectorSDNode>(V.getOperand(1));
if (!BV)
return nullptr;
const APInt *MinShAmt = nullptr;
for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
+ if (!DemandedElts[i])
+ continue;
auto *SA = dyn_cast<ConstantSDNode>(BV->getOperand(i));
if (!SA)
return nullptr;
@@ -2413,6 +2457,32 @@ static const APInt *getValidMinimumShiftAmountConstant(SDValue V) {
return MinShAmt;
}
+/// If a SHL/SRA/SRL node has constant vector shift amounts that are all less
+/// than the element bit-width of the shift node, return the maximum value.
+static const APInt *
+getValidMaximumShiftAmountConstant(SDValue V, const APInt &DemandedElts) {
+ unsigned BitWidth = V.getScalarValueSizeInBits();
+ auto *BV = dyn_cast<BuildVectorSDNode>(V.getOperand(1));
+ if (!BV)
+ return nullptr;
+ const APInt *MaxShAmt = nullptr;
+ for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
+ if (!DemandedElts[i])
+ continue;
+ auto *SA = dyn_cast<ConstantSDNode>(BV->getOperand(i));
+ if (!SA)
+ return nullptr;
+ // Shifting more than the bitwidth is not valid.
+ const APInt &ShAmt = SA->getAPIntValue();
+ if (ShAmt.uge(BitWidth))
+ return nullptr;
+ if (MaxShAmt && MaxShAmt->uge(ShAmt))
+ continue;
+ MaxShAmt = &ShAmt;
+ }
+ return MaxShAmt;
+}
+
/// Determine which bits of Op are known to be either zero or one and return
/// them in Known. For vectors, the known bits are those that are shared by
/// every vector element.
@@ -2784,37 +2854,60 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
Known.Zero.setBitsFrom(1);
break;
case ISD::SETCC:
+ case ISD::STRICT_FSETCC:
+ case ISD::STRICT_FSETCCS: {
+ unsigned OpNo = Op->isStrictFPOpcode() ? 1 : 0;
// If we know the result of a setcc has the top bits zero, use this info.
- if (TLI->getBooleanContents(Op.getOperand(0).getValueType()) ==
+ if (TLI->getBooleanContents(Op.getOperand(OpNo).getValueType()) ==
TargetLowering::ZeroOrOneBooleanContent &&
BitWidth > 1)
Known.Zero.setBitsFrom(1);
break;
+ }
case ISD::SHL:
- if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) {
- Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+
+ if (const APInt *ShAmt = getValidShiftAmountConstant(Op, DemandedElts)) {
unsigned Shift = ShAmt->getZExtValue();
Known.Zero <<= Shift;
Known.One <<= Shift;
// Low bits are known zero.
Known.Zero.setLowBits(Shift);
+ break;
}
+
+ // No matter the shift amount, the trailing zeros will stay zero.
+ Known.Zero = APInt::getLowBitsSet(BitWidth, Known.countMinTrailingZeros());
+ Known.One.clearAllBits();
+
+ // Minimum shift low bits are known zero.
+ if (const APInt *ShMinAmt =
+ getValidMinimumShiftAmountConstant(Op, DemandedElts))
+ Known.Zero.setLowBits(ShMinAmt->getZExtValue());
break;
case ISD::SRL:
- if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) {
- Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+
+ if (const APInt *ShAmt = getValidShiftAmountConstant(Op, DemandedElts)) {
unsigned Shift = ShAmt->getZExtValue();
Known.Zero.lshrInPlace(Shift);
Known.One.lshrInPlace(Shift);
// High bits are known zero.
Known.Zero.setHighBits(Shift);
- } else if (const APInt *ShMinAmt = getValidMinimumShiftAmountConstant(Op)) {
- // Minimum shift high bits are known zero.
- Known.Zero.setHighBits(ShMinAmt->getZExtValue());
+ break;
}
+
+ // No matter the shift amount, the leading zeros will stay zero.
+ Known.Zero = APInt::getHighBitsSet(BitWidth, Known.countMinLeadingZeros());
+ Known.One.clearAllBits();
+
+ // Minimum shift high bits are known zero.
+ if (const APInt *ShMinAmt =
+ getValidMinimumShiftAmountConstant(Op, DemandedElts))
+ Known.Zero.setHighBits(ShMinAmt->getZExtValue());
break;
case ISD::SRA:
- if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) {
+ if (const APInt *ShAmt = getValidShiftAmountConstant(Op, DemandedElts)) {
Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
unsigned Shift = ShAmt->getZExtValue();
// Sign extend known zero/one bit (else is unknown).
@@ -3336,20 +3429,20 @@ SelectionDAG::OverflowKind SelectionDAG::computeOverflowKind(SDValue N0,
KnownBits N0Known = computeKnownBits(N0);
bool overflow;
- (void)(~N0Known.Zero).uadd_ov(~N1Known.Zero, overflow);
+ (void)N0Known.getMaxValue().uadd_ov(N1Known.getMaxValue(), overflow);
if (!overflow)
return OFK_Never;
}
// mulhi + 1 never overflow
if (N0.getOpcode() == ISD::UMUL_LOHI && N0.getResNo() == 1 &&
- (~N1Known.Zero & 0x01) == ~N1Known.Zero)
+ (N1Known.getMaxValue() & 0x01) == N1Known.getMaxValue())
return OFK_Never;
if (N1.getOpcode() == ISD::UMUL_LOHI && N1.getResNo() == 1) {
KnownBits N0Known = computeKnownBits(N0);
- if ((~N0Known.Zero & 0x01) == ~N0Known.Zero)
+ if ((N0Known.getMaxValue() & 0x01) == N0Known.getMaxValue())
return OFK_Never;
}
@@ -3550,25 +3643,26 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
Tmp = VTBits - SrcVT.getScalarSizeInBits();
return ComputeNumSignBits(Src, DemandedSrcElts, Depth+1) + Tmp;
}
-
case ISD::SRA:
- Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth+1);
- // SRA X, C -> adds C sign bits.
- if (ConstantSDNode *C =
- isConstOrConstSplat(Op.getOperand(1), DemandedElts)) {
- APInt ShiftVal = C->getAPIntValue();
- ShiftVal += Tmp;
- Tmp = ShiftVal.uge(VTBits) ? VTBits : ShiftVal.getZExtValue();
- }
+ Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ // SRA X, C -> adds C sign bits.
+ if (const APInt *ShAmt = getValidShiftAmountConstant(Op, DemandedElts))
+ Tmp = std::min<uint64_t>(Tmp + ShAmt->getZExtValue(), VTBits);
+ else if (const APInt *ShAmt =
+ getValidMinimumShiftAmountConstant(Op, DemandedElts))
+ Tmp = std::min<uint64_t>(Tmp + ShAmt->getZExtValue(), VTBits);
return Tmp;
case ISD::SHL:
- if (ConstantSDNode *C =
- isConstOrConstSplat(Op.getOperand(1), DemandedElts)) {
- // shl destroys sign bits.
- Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth+1);
- if (C->getAPIntValue().uge(VTBits) || // Bad shift.
- C->getAPIntValue().uge(Tmp)) break; // Shifted all sign bits out.
- return Tmp - C->getZExtValue();
+ if (const APInt *ShAmt = getValidShiftAmountConstant(Op, DemandedElts)) {
+ // shl destroys sign bits, ensure it doesn't shift out all sign bits.
+ Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ if (ShAmt->ult(Tmp))
+ return Tmp - ShAmt->getZExtValue();
+ } else if (const APInt *ShAmt =
+ getValidMaximumShiftAmountConstant(Op, DemandedElts)) {
+ Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ if (ShAmt->ult(Tmp))
+ return Tmp - ShAmt->getZExtValue();
}
break;
case ISD::AND:
@@ -3648,11 +3742,15 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
return VTBits;
break;
case ISD::SETCC:
+ case ISD::STRICT_FSETCC:
+ case ISD::STRICT_FSETCCS: {
+ unsigned OpNo = Op->isStrictFPOpcode() ? 1 : 0;
// If setcc returns 0/-1, all bits are sign bits.
- if (TLI->getBooleanContents(Op.getOperand(0).getValueType()) ==
+ if (TLI->getBooleanContents(Op.getOperand(OpNo).getValueType()) ==
TargetLowering::ZeroOrNegativeOneBooleanContent)
return VTBits;
break;
+ }
case ISD::ROTL:
case ISD::ROTR:
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
@@ -4648,11 +4746,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
if (OpOpcode == ISD::UNDEF)
return getUNDEF(VT);
- // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
- if ((getTarget().Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) &&
- OpOpcode == ISD::FSUB)
- return getNode(ISD::FSUB, DL, VT, Operand.getOperand(1),
- Operand.getOperand(0), Flags);
if (OpOpcode == ISD::FNEG) // --X -> X
return Operand.getOperand(0);
break;
@@ -4689,46 +4782,46 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
return V;
}
-static std::pair<APInt, bool> FoldValue(unsigned Opcode, const APInt &C1,
- const APInt &C2) {
+static llvm::Optional<APInt> FoldValue(unsigned Opcode, const APInt &C1,
+ const APInt &C2) {
switch (Opcode) {
- case ISD::ADD: return std::make_pair(C1 + C2, true);
- case ISD::SUB: return std::make_pair(C1 - C2, true);
- case ISD::MUL: return std::make_pair(C1 * C2, true);
- case ISD::AND: return std::make_pair(C1 & C2, true);
- case ISD::OR: return std::make_pair(C1 | C2, true);
- case ISD::XOR: return std::make_pair(C1 ^ C2, true);
- case ISD::SHL: return std::make_pair(C1 << C2, true);
- case ISD::SRL: return std::make_pair(C1.lshr(C2), true);
- case ISD::SRA: return std::make_pair(C1.ashr(C2), true);
- case ISD::ROTL: return std::make_pair(C1.rotl(C2), true);
- case ISD::ROTR: return std::make_pair(C1.rotr(C2), true);
- case ISD::SMIN: return std::make_pair(C1.sle(C2) ? C1 : C2, true);
- case ISD::SMAX: return std::make_pair(C1.sge(C2) ? C1 : C2, true);
- case ISD::UMIN: return std::make_pair(C1.ule(C2) ? C1 : C2, true);
- case ISD::UMAX: return std::make_pair(C1.uge(C2) ? C1 : C2, true);
- case ISD::SADDSAT: return std::make_pair(C1.sadd_sat(C2), true);
- case ISD::UADDSAT: return std::make_pair(C1.uadd_sat(C2), true);
- case ISD::SSUBSAT: return std::make_pair(C1.ssub_sat(C2), true);
- case ISD::USUBSAT: return std::make_pair(C1.usub_sat(C2), true);
+ case ISD::ADD: return C1 + C2;
+ case ISD::SUB: return C1 - C2;
+ case ISD::MUL: return C1 * C2;
+ case ISD::AND: return C1 & C2;
+ case ISD::OR: return C1 | C2;
+ case ISD::XOR: return C1 ^ C2;
+ case ISD::SHL: return C1 << C2;
+ case ISD::SRL: return C1.lshr(C2);
+ case ISD::SRA: return C1.ashr(C2);
+ case ISD::ROTL: return C1.rotl(C2);
+ case ISD::ROTR: return C1.rotr(C2);
+ case ISD::SMIN: return C1.sle(C2) ? C1 : C2;
+ case ISD::SMAX: return C1.sge(C2) ? C1 : C2;
+ case ISD::UMIN: return C1.ule(C2) ? C1 : C2;
+ case ISD::UMAX: return C1.uge(C2) ? C1 : C2;
+ case ISD::SADDSAT: return C1.sadd_sat(C2);
+ case ISD::UADDSAT: return C1.uadd_sat(C2);
+ case ISD::SSUBSAT: return C1.ssub_sat(C2);
+ case ISD::USUBSAT: return C1.usub_sat(C2);
case ISD::UDIV:
if (!C2.getBoolValue())
break;
- return std::make_pair(C1.udiv(C2), true);
+ return C1.udiv(C2);
case ISD::UREM:
if (!C2.getBoolValue())
break;
- return std::make_pair(C1.urem(C2), true);
+ return C1.urem(C2);
case ISD::SDIV:
if (!C2.getBoolValue())
break;
- return std::make_pair(C1.sdiv(C2), true);
+ return C1.sdiv(C2);
case ISD::SREM:
if (!C2.getBoolValue())
break;
- return std::make_pair(C1.srem(C2), true);
+ return C1.srem(C2);
}
- return std::make_pair(APInt(1, 0), false);
+ return llvm::None;
}
SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
@@ -4736,12 +4829,10 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
const ConstantSDNode *C2) {
if (C1->isOpaque() || C2->isOpaque())
return SDValue();
-
- std::pair<APInt, bool> Folded = FoldValue(Opcode, C1->getAPIntValue(),
- C2->getAPIntValue());
- if (!Folded.second)
- return SDValue();
- return getConstant(Folded.first, DL, VT);
+ if (Optional<APInt> Folded =
+ FoldValue(Opcode, C1->getAPIntValue(), C2->getAPIntValue()))
+ return getConstant(Folded.getValue(), DL, VT);
+ return SDValue();
}
SDValue SelectionDAG::FoldSymbolOffset(unsigned Opcode, EVT VT,
@@ -5228,8 +5319,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
"The result of EXTRACT_VECTOR_ELT must be at least as wide as the \
element type of the vector.");
- // EXTRACT_VECTOR_ELT of an UNDEF is an UNDEF.
- if (N1.isUndef())
+ // Extract from an undefined value or using an undefined index is undefined.
+ if (N1.isUndef() || N2.isUndef())
return getUNDEF(VT);
// EXTRACT_VECTOR_ELT of out-of-bounds element is an UNDEF
@@ -5506,6 +5597,15 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
// INSERT_VECTOR_ELT into out-of-bounds element is an UNDEF
if (N3C && N3C->getZExtValue() >= N1.getValueType().getVectorNumElements())
return getUNDEF(VT);
+
+ // Undefined index can be assumed out-of-bounds, so that's UNDEF too.
+ if (N3.isUndef())
+ return getUNDEF(VT);
+
+ // If the inserted element is an UNDEF, just use the input vector.
+ if (N2.isUndef())
+ return N1;
+
break;
}
case ISD::INSERT_SUBVECTOR: {
@@ -5697,10 +5797,19 @@ static SDValue getMemsetStringVal(EVT VT, const SDLoc &dl, SelectionDAG &DAG,
return SDValue(nullptr, 0);
}
-SDValue SelectionDAG::getMemBasePlusOffset(SDValue Base, unsigned Offset,
- const SDLoc &DL) {
+SDValue SelectionDAG::getMemBasePlusOffset(SDValue Base, int64_t Offset,
+ const SDLoc &DL,
+ const SDNodeFlags Flags) {
EVT VT = Base.getValueType();
- return getNode(ISD::ADD, DL, VT, Base, getConstant(Offset, DL, VT));
+ return getMemBasePlusOffset(Base, getConstant(Offset, DL, VT), DL, Flags);
+}
+
+SDValue SelectionDAG::getMemBasePlusOffset(SDValue Ptr, SDValue Offset,
+ const SDLoc &DL,
+ const SDNodeFlags Flags) {
+ assert(Offset.getValueType().isInteger());
+ EVT BasePtrVT = Ptr.getValueType();
+ return getNode(ISD::ADD, DL, BasePtrVT, Ptr, Offset, Flags);
}
/// Returns true if memcpy source is constant data.
@@ -5722,12 +5831,13 @@ static bool isMemSrcFromConstant(SDValue Src, ConstantDataArraySlice &Slice) {
SrcDelta + G->getOffset());
}
-static bool shouldLowerMemFuncForSize(const MachineFunction &MF) {
+static bool shouldLowerMemFuncForSize(const MachineFunction &MF,
+ SelectionDAG &DAG) {
// On Darwin, -Os means optimize for size without hurting performance, so
// only really optimize for size when -Oz (MinSize) is used.
if (MF.getTarget().getTargetTriple().isOSDarwin())
return MF.getFunction().hasMinSize();
- return MF.getFunction().hasOptSize();
+ return DAG.shouldOptForSize();
}
static void chainLoadsAndStoresForMemcpy(SelectionDAG &DAG, const SDLoc &dl,
@@ -5777,7 +5887,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
bool DstAlignCanChange = false;
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo &MFI = MF.getFrameInfo();
- bool OptSize = shouldLowerMemFuncForSize(MF);
+ bool OptSize = shouldLowerMemFuncForSize(MF, DAG);
FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
if (FI && !MFI.isFixedObjectIndex(FI->getIndex()))
DstAlignCanChange = true;
@@ -5960,7 +6070,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
bool DstAlignCanChange = false;
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo &MFI = MF.getFrameInfo();
- bool OptSize = shouldLowerMemFuncForSize(MF);
+ bool OptSize = shouldLowerMemFuncForSize(MF, DAG);
FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
if (FI && !MFI.isFixedObjectIndex(FI->getIndex()))
DstAlignCanChange = true;
@@ -6066,7 +6176,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl,
bool DstAlignCanChange = false;
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo &MFI = MF.getFrameInfo();
- bool OptSize = shouldLowerMemFuncForSize(MF);
+ bool OptSize = shouldLowerMemFuncForSize(MF, DAG);
FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
if (FI && !MFI.isFixedObjectIndex(FI->getIndex()))
DstAlignCanChange = true;
@@ -6557,7 +6667,9 @@ SDValue SelectionDAG::getMemIntrinsicNode(
if (Align == 0) // Ensure that codegen never sees alignment 0
Align = getEVTAlignment(MemVT);
- if (!Size)
+ if (!Size && MemVT.isScalableVector())
+ Size = MemoryLocation::UnknownSize;
+ else if (!Size)
Size = MemVT.getStoreSize();
MachineFunction &MF = getMachineFunction();
@@ -6951,16 +7063,22 @@ SDValue SelectionDAG::getIndexedStore(SDValue OrigStore, const SDLoc &dl,
}
SDValue SelectionDAG::getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain,
- SDValue Ptr, SDValue Mask, SDValue PassThru,
- EVT MemVT, MachineMemOperand *MMO,
+ SDValue Base, SDValue Offset, SDValue Mask,
+ SDValue PassThru, EVT MemVT,
+ MachineMemOperand *MMO,
+ ISD::MemIndexedMode AM,
ISD::LoadExtType ExtTy, bool isExpanding) {
- SDVTList VTs = getVTList(VT, MVT::Other);
- SDValue Ops[] = { Chain, Ptr, Mask, PassThru };
+ bool Indexed = AM != ISD::UNINDEXED;
+ assert((Indexed || Offset.isUndef()) &&
+ "Unindexed masked load with an offset!");
+ SDVTList VTs = Indexed ? getVTList(VT, Base.getValueType(), MVT::Other)
+ : getVTList(VT, MVT::Other);
+ SDValue Ops[] = {Chain, Base, Offset, Mask, PassThru};
FoldingSetNodeID ID;
AddNodeIDNode(ID, ISD::MLOAD, VTs, Ops);
ID.AddInteger(MemVT.getRawBits());
ID.AddInteger(getSyntheticNodeSubclassData<MaskedLoadSDNode>(
- dl.getIROrder(), VTs, ExtTy, isExpanding, MemVT, MMO));
+ dl.getIROrder(), VTs, AM, ExtTy, isExpanding, MemVT, MMO));
ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
@@ -6968,7 +7086,7 @@ SDValue SelectionDAG::getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain,
return SDValue(E, 0);
}
auto *N = newSDNode<MaskedLoadSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs,
- ExtTy, isExpanding, MemVT, MMO);
+ AM, ExtTy, isExpanding, MemVT, MMO);
createOperands(N, Ops);
CSEMap.InsertNode(N, IP);
@@ -6978,27 +7096,45 @@ SDValue SelectionDAG::getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain,
return V;
}
+SDValue SelectionDAG::getIndexedMaskedLoad(SDValue OrigLoad, const SDLoc &dl,
+ SDValue Base, SDValue Offset,
+ ISD::MemIndexedMode AM) {
+ MaskedLoadSDNode *LD = cast<MaskedLoadSDNode>(OrigLoad);
+ assert(LD->getOffset().isUndef() && "Masked load is already a indexed load!");
+ return getMaskedLoad(OrigLoad.getValueType(), dl, LD->getChain(), Base,
+ Offset, LD->getMask(), LD->getPassThru(),
+ LD->getMemoryVT(), LD->getMemOperand(), AM,
+ LD->getExtensionType(), LD->isExpandingLoad());
+}
+
SDValue SelectionDAG::getMaskedStore(SDValue Chain, const SDLoc &dl,
- SDValue Val, SDValue Ptr, SDValue Mask,
- EVT MemVT, MachineMemOperand *MMO,
- bool IsTruncating, bool IsCompressing) {
+ SDValue Val, SDValue Base, SDValue Offset,
+ SDValue Mask, EVT MemVT,
+ MachineMemOperand *MMO,
+ ISD::MemIndexedMode AM, bool IsTruncating,
+ bool IsCompressing) {
assert(Chain.getValueType() == MVT::Other &&
"Invalid chain type");
- SDVTList VTs = getVTList(MVT::Other);
- SDValue Ops[] = { Chain, Val, Ptr, Mask };
+ bool Indexed = AM != ISD::UNINDEXED;
+ assert((Indexed || Offset.isUndef()) &&
+ "Unindexed masked store with an offset!");
+ SDVTList VTs = Indexed ? getVTList(Base.getValueType(), MVT::Other)
+ : getVTList(MVT::Other);
+ SDValue Ops[] = {Chain, Val, Base, Offset, Mask};
FoldingSetNodeID ID;
AddNodeIDNode(ID, ISD::MSTORE, VTs, Ops);
ID.AddInteger(MemVT.getRawBits());
ID.AddInteger(getSyntheticNodeSubclassData<MaskedStoreSDNode>(
- dl.getIROrder(), VTs, IsTruncating, IsCompressing, MemVT, MMO));
+ dl.getIROrder(), VTs, AM, IsTruncating, IsCompressing, MemVT, MMO));
ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
cast<MaskedStoreSDNode>(E)->refineAlignment(MMO);
return SDValue(E, 0);
}
- auto *N = newSDNode<MaskedStoreSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs,
- IsTruncating, IsCompressing, MemVT, MMO);
+ auto *N =
+ newSDNode<MaskedStoreSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, AM,
+ IsTruncating, IsCompressing, MemVT, MMO);
createOperands(N, Ops);
CSEMap.InsertNode(N, IP);
@@ -7008,6 +7144,17 @@ SDValue SelectionDAG::getMaskedStore(SDValue Chain, const SDLoc &dl,
return V;
}
+SDValue SelectionDAG::getIndexedMaskedStore(SDValue OrigStore, const SDLoc &dl,
+ SDValue Base, SDValue Offset,
+ ISD::MemIndexedMode AM) {
+ MaskedStoreSDNode *ST = cast<MaskedStoreSDNode>(OrigStore);
+ assert(ST->getOffset().isUndef() &&
+ "Masked store is already a indexed store!");
+ return getMaskedStore(ST->getChain(), dl, ST->getValue(), Base, Offset,
+ ST->getMask(), ST->getMemoryVT(), ST->getMemOperand(),
+ AM, ST->isTruncatingStore(), ST->isCompressingStore());
+}
+
SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl,
ArrayRef<SDValue> Ops,
MachineMemOperand *MMO,
@@ -7263,8 +7410,40 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
if (VTList.NumVTs == 1)
return getNode(Opcode, DL, VTList.VTs[0], Ops);
-#if 0
switch (Opcode) {
+ case ISD::STRICT_FP_EXTEND:
+ assert(VTList.NumVTs == 2 && Ops.size() == 2 &&
+ "Invalid STRICT_FP_EXTEND!");
+ assert(VTList.VTs[0].isFloatingPoint() &&
+ Ops[1].getValueType().isFloatingPoint() && "Invalid FP cast!");
+ assert(VTList.VTs[0].isVector() == Ops[1].getValueType().isVector() &&
+ "STRICT_FP_EXTEND result type should be vector iff the operand "
+ "type is vector!");
+ assert((!VTList.VTs[0].isVector() ||
+ VTList.VTs[0].getVectorNumElements() ==
+ Ops[1].getValueType().getVectorNumElements()) &&
+ "Vector element count mismatch!");
+ assert(Ops[1].getValueType().bitsLT(VTList.VTs[0]) &&
+ "Invalid fpext node, dst <= src!");
+ break;
+ case ISD::STRICT_FP_ROUND:
+ assert(VTList.NumVTs == 2 && Ops.size() == 3 && "Invalid STRICT_FP_ROUND!");
+ assert(VTList.VTs[0].isVector() == Ops[1].getValueType().isVector() &&
+ "STRICT_FP_ROUND result type should be vector iff the operand "
+ "type is vector!");
+ assert((!VTList.VTs[0].isVector() ||
+ VTList.VTs[0].getVectorNumElements() ==
+ Ops[1].getValueType().getVectorNumElements()) &&
+ "Vector element count mismatch!");
+ assert(VTList.VTs[0].isFloatingPoint() &&
+ Ops[1].getValueType().isFloatingPoint() &&
+ VTList.VTs[0].bitsLT(Ops[1].getValueType()) &&
+ isa<ConstantSDNode>(Ops[2]) &&
+ (cast<ConstantSDNode>(Ops[2])->getZExtValue() == 0 ||
+ cast<ConstantSDNode>(Ops[2])->getZExtValue() == 1) &&
+ "Invalid STRICT_FP_ROUND!");
+ break;
+#if 0
// FIXME: figure out how to safely handle things like
// int foo(int x) { return 1 << (x & 255); }
// int bar() { return foo(256); }
@@ -7283,8 +7462,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
return getNode(Opcode, DL, VT, N1, N2, N3.getOperand(0));
}
break;
- }
#endif
+ }
// Memoize the node unless it returns a flag.
SDNode *N;
@@ -7740,38 +7919,11 @@ SDNode* SelectionDAG::mutateStrictFPToFP(SDNode *Node) {
switch (OrigOpc) {
default:
llvm_unreachable("mutateStrictFPToFP called with unexpected opcode!");
- case ISD::STRICT_FADD: NewOpc = ISD::FADD; break;
- case ISD::STRICT_FSUB: NewOpc = ISD::FSUB; break;
- case ISD::STRICT_FMUL: NewOpc = ISD::FMUL; break;
- case ISD::STRICT_FDIV: NewOpc = ISD::FDIV; break;
- case ISD::STRICT_FREM: NewOpc = ISD::FREM; break;
- case ISD::STRICT_FMA: NewOpc = ISD::FMA; break;
- case ISD::STRICT_FSQRT: NewOpc = ISD::FSQRT; break;
- case ISD::STRICT_FPOW: NewOpc = ISD::FPOW; break;
- case ISD::STRICT_FPOWI: NewOpc = ISD::FPOWI; break;
- case ISD::STRICT_FSIN: NewOpc = ISD::FSIN; break;
- case ISD::STRICT_FCOS: NewOpc = ISD::FCOS; break;
- case ISD::STRICT_FEXP: NewOpc = ISD::FEXP; break;
- case ISD::STRICT_FEXP2: NewOpc = ISD::FEXP2; break;
- case ISD::STRICT_FLOG: NewOpc = ISD::FLOG; break;
- case ISD::STRICT_FLOG10: NewOpc = ISD::FLOG10; break;
- case ISD::STRICT_FLOG2: NewOpc = ISD::FLOG2; break;
- case ISD::STRICT_LRINT: NewOpc = ISD::LRINT; break;
- case ISD::STRICT_LLRINT: NewOpc = ISD::LLRINT; break;
- case ISD::STRICT_FRINT: NewOpc = ISD::FRINT; break;
- case ISD::STRICT_FNEARBYINT: NewOpc = ISD::FNEARBYINT; break;
- case ISD::STRICT_FMAXNUM: NewOpc = ISD::FMAXNUM; break;
- case ISD::STRICT_FMINNUM: NewOpc = ISD::FMINNUM; break;
- case ISD::STRICT_FCEIL: NewOpc = ISD::FCEIL; break;
- case ISD::STRICT_FFLOOR: NewOpc = ISD::FFLOOR; break;
- case ISD::STRICT_LROUND: NewOpc = ISD::LROUND; break;
- case ISD::STRICT_LLROUND: NewOpc = ISD::LLROUND; break;
- case ISD::STRICT_FROUND: NewOpc = ISD::FROUND; break;
- case ISD::STRICT_FTRUNC: NewOpc = ISD::FTRUNC; break;
- case ISD::STRICT_FP_ROUND: NewOpc = ISD::FP_ROUND; break;
- case ISD::STRICT_FP_EXTEND: NewOpc = ISD::FP_EXTEND; break;
- case ISD::STRICT_FP_TO_SINT: NewOpc = ISD::FP_TO_SINT; break;
- case ISD::STRICT_FP_TO_UINT: NewOpc = ISD::FP_TO_UINT; break;
+#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
+ case ISD::STRICT_##DAGN: NewOpc = ISD::DAGN; break;
+#define CMP_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
+ case ISD::STRICT_##DAGN: NewOpc = ISD::SETCC; break;
+#include "llvm/IR/ConstrainedOps.def"
}
assert(Node->getNumValues() == 2 && "Unexpected number of results!");
@@ -8051,9 +8203,9 @@ void SelectionDAG::transferDbgValues(SDValue From, SDValue To,
Expr = *Fragment;
}
// Clone the SDDbgValue and move it to To.
- SDDbgValue *Clone =
- getDbgValue(Var, Expr, ToNode, To.getResNo(), Dbg->isIndirect(),
- Dbg->getDebugLoc(), Dbg->getOrder());
+ SDDbgValue *Clone = getDbgValue(
+ Var, Expr, ToNode, To.getResNo(), Dbg->isIndirect(), Dbg->getDebugLoc(),
+ std::max(ToNode->getIROrder(), Dbg->getOrder()));
ClonedDVs.push_back(Clone);
if (InvalidateDbg) {
@@ -8831,7 +8983,9 @@ MemSDNode::MemSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl,
// We check here that the size of the memory operand fits within the size of
// the MMO. This is because the MMO might indicate only a possible address
// range instead of specifying the affected memory addresses precisely.
- assert(memvt.getStoreSize() <= MMO->getSize() && "Size mismatch!");
+ // TODO: Make MachineMemOperands aware of scalable vectors.
+ assert(memvt.getStoreSize().getKnownMinSize() <= MMO->getSize() &&
+ "Size mismatch!");
}
/// Profile - Gather unique data for the node.
@@ -9245,11 +9399,11 @@ bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD,
/// it cannot be inferred.
unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {
// If this is a GlobalAddress + cst, return the alignment.
- const GlobalValue *GV;
+ const GlobalValue *GV = nullptr;
int64_t GVOffset = 0;
if (TLI->isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) {
- unsigned IdxWidth = getDataLayout().getIndexTypeSizeInBits(GV->getType());
- KnownBits Known(IdxWidth);
+ unsigned PtrWidth = getDataLayout().getPointerTypeSizeInBits(GV->getType());
+ KnownBits Known(PtrWidth);
llvm::computeKnownBits(GV, Known, getDataLayout());
unsigned AlignBits = Known.countMinTrailingZeros();
unsigned Align = AlignBits ? 1 << std::min(31U, AlignBits) : 0;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 8c15563fcd23..728d963a916f 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -27,11 +27,13 @@
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/VectorUtils.h"
@@ -84,6 +86,8 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsAArch64.h"
+#include "llvm/IR/IntrinsicsWebAssembly.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
@@ -722,7 +726,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
unsigned IntermediateNumElts = IntermediateVT.isVector() ?
IntermediateVT.getVectorNumElements() : 1;
- // Convert the vector to the appropiate type if necessary.
+ // Convert the vector to the appropriate type if necessary.
unsigned DestVectorNoElts = NumIntermediates * IntermediateNumElts;
EVT BuiltVectorTy = EVT::getVectorVT(
@@ -1021,6 +1025,8 @@ void SelectionDAGBuilder::clear() {
UnusedArgNodeMap.clear();
PendingLoads.clear();
PendingExports.clear();
+ PendingConstrainedFP.clear();
+ PendingConstrainedFPStrict.clear();
CurInst = nullptr;
HasTailCall = false;
SDNodeOrder = LowestSDNodeOrder;
@@ -1031,50 +1037,66 @@ void SelectionDAGBuilder::clearDanglingDebugInfo() {
DanglingDebugInfoMap.clear();
}
-SDValue SelectionDAGBuilder::getRoot() {
- if (PendingLoads.empty())
- return DAG.getRoot();
-
- if (PendingLoads.size() == 1) {
- SDValue Root = PendingLoads[0];
- DAG.setRoot(Root);
- PendingLoads.clear();
- return Root;
- }
-
- // Otherwise, we have to make a token factor node.
- SDValue Root = DAG.getTokenFactor(getCurSDLoc(), PendingLoads);
- PendingLoads.clear();
- DAG.setRoot(Root);
- return Root;
-}
-
-SDValue SelectionDAGBuilder::getControlRoot() {
+// Update DAG root to include dependencies on Pending chains.
+SDValue SelectionDAGBuilder::updateRoot(SmallVectorImpl<SDValue> &Pending) {
SDValue Root = DAG.getRoot();
- if (PendingExports.empty())
+ if (Pending.empty())
return Root;
- // Turn all of the CopyToReg chains into one factored node.
+ // Add current root to PendingChains, unless we already indirectly
+ // depend on it.
if (Root.getOpcode() != ISD::EntryToken) {
- unsigned i = 0, e = PendingExports.size();
+ unsigned i = 0, e = Pending.size();
for (; i != e; ++i) {
- assert(PendingExports[i].getNode()->getNumOperands() > 1);
- if (PendingExports[i].getNode()->getOperand(0) == Root)
+ assert(Pending[i].getNode()->getNumOperands() > 1);
+ if (Pending[i].getNode()->getOperand(0) == Root)
break; // Don't add the root if we already indirectly depend on it.
}
if (i == e)
- PendingExports.push_back(Root);
+ Pending.push_back(Root);
}
- Root = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other,
- PendingExports);
- PendingExports.clear();
+ if (Pending.size() == 1)
+ Root = Pending[0];
+ else
+ Root = DAG.getTokenFactor(getCurSDLoc(), Pending);
+
DAG.setRoot(Root);
+ Pending.clear();
return Root;
}
+SDValue SelectionDAGBuilder::getMemoryRoot() {
+ return updateRoot(PendingLoads);
+}
+
+SDValue SelectionDAGBuilder::getRoot() {
+ // Chain up all pending constrained intrinsics together with all
+ // pending loads, by simply appending them to PendingLoads and
+ // then calling getMemoryRoot().
+ PendingLoads.reserve(PendingLoads.size() +
+ PendingConstrainedFP.size() +
+ PendingConstrainedFPStrict.size());
+ PendingLoads.append(PendingConstrainedFP.begin(),
+ PendingConstrainedFP.end());
+ PendingLoads.append(PendingConstrainedFPStrict.begin(),
+ PendingConstrainedFPStrict.end());
+ PendingConstrainedFP.clear();
+ PendingConstrainedFPStrict.clear();
+ return getMemoryRoot();
+}
+
+SDValue SelectionDAGBuilder::getControlRoot() {
+ // We need to emit pending fpexcept.strict constrained intrinsics,
+ // so append them to the PendingExports list.
+ PendingExports.append(PendingConstrainedFPStrict.begin(),
+ PendingConstrainedFPStrict.end());
+ PendingConstrainedFPStrict.clear();
+ return updateRoot(PendingExports);
+}
+
void SelectionDAGBuilder::visit(const Instruction &I) {
// Set up outgoing PHI node register values before emitting the terminator.
if (I.isTerminator()) {
@@ -1104,6 +1126,15 @@ void SelectionDAGBuilder::visit(const Instruction &I) {
Node->intersectFlagsWith(IncomingFlags);
}
}
+ // Constrained FP intrinsics with fpexcept.ignore should also get
+ // the NoFPExcept flag.
+ if (auto *FPI = dyn_cast<ConstrainedFPIntrinsic>(&I))
+ if (FPI->getExceptionBehavior() == fp::ExceptionBehavior::ebIgnore)
+ if (SDNode *Node = getNodeForIRValue(&I)) {
+ SDNodeFlags Flags = Node->getFlags();
+ Flags.setNoFPExcept(true);
+ Node->setFlags(Flags);
+ }
if (!I.isTerminator() && !HasTailCall &&
!isStatepoint(&I)) // statepoints handle their exports internally
@@ -2746,8 +2777,9 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
// Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't
// have to do anything here to lower funclet bundles.
- assert(!I.hasOperandBundlesOtherThan(
- {LLVMContext::OB_deopt, LLVMContext::OB_funclet}) &&
+ assert(!I.hasOperandBundlesOtherThan({LLVMContext::OB_deopt,
+ LLVMContext::OB_funclet,
+ LLVMContext::OB_cfguardtarget}) &&
"Cannot lower invokes with arbitrary operand bundles yet!");
const Value *Callee(I.getCalledValue());
@@ -3033,7 +3065,7 @@ static bool isVectorReductionOp(const User *I) {
if (!Visited.insert(User).second)
continue;
- for (const auto &U : User->users()) {
+ for (const auto *U : User->users()) {
auto Inst = dyn_cast<Instruction>(U);
if (!Inst)
return false;
@@ -3119,6 +3151,13 @@ void SelectionDAGBuilder::visitBinary(const User &I, unsigned Opcode) {
if (isVectorReductionOp(&I)) {
Flags.setVectorReduction(true);
LLVM_DEBUG(dbgs() << "Detected a reduction operation:" << I << "\n");
+
+ // If no flags are set we will propagate the incoming flags, if any flags
+ // are set, we will intersect them with the incoming flag and so we need to
+ // copy the FMF flags here.
+ if (auto *FPOp = dyn_cast<FPMathOperator>(&I)) {
+ Flags.copyFMF(*FPOp);
+ }
}
SDValue Op1 = getValue(I.getOperand(0));
@@ -4039,9 +4078,11 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
SDValue Root;
bool ConstantMemory = false;
- if (isVolatile || NumValues > MaxParallelChains)
+ if (isVolatile)
// Serialize volatile loads with other side effects.
Root = getRoot();
+ else if (NumValues > MaxParallelChains)
+ Root = getMemoryRoot();
else if (AA &&
AA->pointsToConstantMemory(MemoryLocation(
SV,
@@ -4216,10 +4257,9 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
SDValue Src = getValue(SrcV);
SDValue Ptr = getValue(PtrV);
- SDValue Root = getRoot();
+ SDValue Root = I.isVolatile() ? getRoot() : getMemoryRoot();
SmallVector<SDValue, 4> Chains(std::min(MaxParallelChains, NumValues));
SDLoc dl = getCurSDLoc();
- EVT PtrVT = Ptr.getValueType();
unsigned Alignment = I.getAlignment();
AAMDNodes AAInfo;
I.getAAMetadata(AAInfo);
@@ -4245,8 +4285,7 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
Root = Chain;
ChainI = 0;
}
- SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr,
- DAG.getConstant(Offsets[i], dl, PtrVT), Flags);
+ SDValue Add = DAG.getMemBasePlusOffset(Ptr, Offsets[i], dl, Flags);
SDValue Val = SDValue(Src.getNode(), Src.getResNo() + i);
if (MemVTs[i] != ValueVTs[i])
Val = DAG.getPtrExtOrTrunc(Val, dl, MemVTs[i]);
@@ -4292,6 +4331,7 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I,
SDValue Ptr = getValue(PtrOperand);
SDValue Src0 = getValue(Src0Operand);
SDValue Mask = getValue(MaskOperand);
+ SDValue Offset = DAG.getUNDEF(Ptr.getValueType());
EVT VT = Src0.getValueType();
if (!Alignment)
@@ -4303,11 +4343,14 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I,
MachineMemOperand *MMO =
DAG.getMachineFunction().
getMachineMemOperand(MachinePointerInfo(PtrOperand),
- MachineMemOperand::MOStore, VT.getStoreSize(),
+ MachineMemOperand::MOStore,
+ // TODO: Make MachineMemOperands aware of scalable
+ // vectors.
+ VT.getStoreSize().getKnownMinSize(),
Alignment, AAInfo);
- SDValue StoreNode = DAG.getMaskedStore(getRoot(), sdl, Src0, Ptr, Mask, VT,
- MMO, false /* Truncating */,
- IsCompressing);
+ SDValue StoreNode =
+ DAG.getMaskedStore(getMemoryRoot(), sdl, Src0, Ptr, Offset, Mask, VT, MMO,
+ ISD::UNINDEXED, false /* Truncating */, IsCompressing);
DAG.setRoot(StoreNode);
setValue(&I, StoreNode);
}
@@ -4346,9 +4389,10 @@ static bool getUniformBase(const Value *&Ptr, SDValue &Base, SDValue &Index,
unsigned FinalIndex = GEP->getNumOperands() - 1;
Value *IndexVal = GEP->getOperand(FinalIndex);
+ gep_type_iterator GTI = gep_type_begin(*GEP);
// Ensure all the other indices are 0.
- for (unsigned i = 1; i < FinalIndex; ++i) {
+ for (unsigned i = 1; i < FinalIndex; ++i, ++GTI) {
auto *C = dyn_cast<Constant>(GEP->getOperand(i));
if (!C)
return false;
@@ -4361,18 +4405,39 @@ static bool getUniformBase(const Value *&Ptr, SDValue &Base, SDValue &Index,
// The operands of the GEP may be defined in another basic block.
// In this case we'll not find nodes for the operands.
- if (!SDB->findValue(Ptr) || !SDB->findValue(IndexVal))
+ if (!SDB->findValue(Ptr))
+ return false;
+ Constant *C = dyn_cast<Constant>(IndexVal);
+ if (!C && !SDB->findValue(IndexVal))
return false;
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
const DataLayout &DL = DAG.getDataLayout();
- Scale = DAG.getTargetConstant(DL.getTypeAllocSize(GEP->getResultElementType()),
- SDB->getCurSDLoc(), TLI.getPointerTy(DL));
+ StructType *STy = GTI.getStructTypeOrNull();
+
+ if (STy) {
+ const StructLayout *SL = DL.getStructLayout(STy);
+ if (isa<VectorType>(C->getType())) {
+ C = C->getSplatValue();
+ // FIXME: If getSplatValue may return nullptr for a structure?
+ // If not, the following check can be removed.
+ if (!C)
+ return false;
+ }
+ auto *CI = cast<ConstantInt>(C);
+ Scale = DAG.getTargetConstant(1, SDB->getCurSDLoc(), TLI.getPointerTy(DL));
+ Index = DAG.getConstant(SL->getElementOffset(CI->getZExtValue()),
+ SDB->getCurSDLoc(), TLI.getPointerTy(DL));
+ } else {
+ Scale = DAG.getTargetConstant(
+ DL.getTypeAllocSize(GEP->getResultElementType()),
+ SDB->getCurSDLoc(), TLI.getPointerTy(DL));
+ Index = SDB->getValue(IndexVal);
+ }
Base = SDB->getValue(Ptr);
- Index = SDB->getValue(IndexVal);
IndexType = ISD::SIGNED_SCALED;
- if (!Index.getValueType().isVector()) {
+ if (STy || !Index.getValueType().isVector()) {
unsigned GEPWidth = GEP->getType()->getVectorNumElements();
EVT VT = EVT::getVectorVT(Context, Index.getValueType(), GEPWidth);
Index = DAG.getSplatBuildVector(VT, SDLoc(Index), Index);
@@ -4383,7 +4448,7 @@ static bool getUniformBase(const Value *&Ptr, SDValue &Base, SDValue &Index,
void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {
SDLoc sdl = getCurSDLoc();
- // llvm.masked.scatter.*(Src0, Ptrs, alignemt, Mask)
+ // llvm.masked.scatter.*(Src0, Ptrs, alignment, Mask)
const Value *Ptr = I.getArgOperand(1);
SDValue Src0 = getValue(I.getArgOperand(0));
SDValue Mask = getValue(I.getArgOperand(3));
@@ -4407,7 +4472,10 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {
const Value *MemOpBasePtr = UniformBase ? BasePtr : nullptr;
MachineMemOperand *MMO = DAG.getMachineFunction().
getMachineMemOperand(MachinePointerInfo(MemOpBasePtr),
- MachineMemOperand::MOStore, VT.getStoreSize(),
+ MachineMemOperand::MOStore,
+ // TODO: Make MachineMemOperands aware of scalable
+ // vectors.
+ VT.getStoreSize().getKnownMinSize(),
Alignment, AAInfo);
if (!UniformBase) {
Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()));
@@ -4415,7 +4483,7 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {
IndexType = ISD::SIGNED_SCALED;
Scale = DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout()));
}
- SDValue Ops[] = { getRoot(), Src0, Mask, Base, Index, Scale };
+ SDValue Ops[] = { getMemoryRoot(), Src0, Mask, Base, Index, Scale };
SDValue Scatter = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), VT, sdl,
Ops, MMO, IndexType);
DAG.setRoot(Scatter);
@@ -4452,6 +4520,7 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) {
SDValue Ptr = getValue(PtrOperand);
SDValue Src0 = getValue(Src0Operand);
SDValue Mask = getValue(MaskOperand);
+ SDValue Offset = DAG.getUNDEF(Ptr.getValueType());
EVT VT = Src0.getValueType();
if (!Alignment)
@@ -4462,22 +4531,29 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) {
const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
// Do not serialize masked loads of constant memory with anything.
- bool AddToChain =
- !AA || !AA->pointsToConstantMemory(MemoryLocation(
- PtrOperand,
- LocationSize::precise(
- DAG.getDataLayout().getTypeStoreSize(I.getType())),
- AAInfo));
+ MemoryLocation ML;
+ if (VT.isScalableVector())
+ ML = MemoryLocation(PtrOperand);
+ else
+ ML = MemoryLocation(PtrOperand, LocationSize::precise(
+ DAG.getDataLayout().getTypeStoreSize(I.getType())),
+ AAInfo);
+ bool AddToChain = !AA || !AA->pointsToConstantMemory(ML);
+
SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode();
MachineMemOperand *MMO =
DAG.getMachineFunction().
getMachineMemOperand(MachinePointerInfo(PtrOperand),
- MachineMemOperand::MOLoad, VT.getStoreSize(),
+ MachineMemOperand::MOLoad,
+ // TODO: Make MachineMemOperands aware of scalable
+ // vectors.
+ VT.getStoreSize().getKnownMinSize(),
Alignment, AAInfo, Ranges);
- SDValue Load = DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Mask, Src0, VT, MMO,
- ISD::NON_EXTLOAD, IsExpanding);
+ SDValue Load =
+ DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Offset, Mask, Src0, VT, MMO,
+ ISD::UNINDEXED, ISD::NON_EXTLOAD, IsExpanding);
if (AddToChain)
PendingLoads.push_back(Load.getValue(1));
setValue(&I, Load);
@@ -4524,7 +4600,10 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {
MachineMemOperand *MMO =
DAG.getMachineFunction().
getMachineMemOperand(MachinePointerInfo(UniformBase ? BasePtr : nullptr),
- MachineMemOperand::MOLoad, VT.getStoreSize(),
+ MachineMemOperand::MOLoad,
+ // TODO: Make MachineMemOperands aware of scalable
+ // vectors.
+ VT.getStoreSize().getKnownMinSize(),
Alignment, AAInfo, Ranges);
if (!UniformBase) {
@@ -4634,10 +4713,10 @@ void SelectionDAGBuilder::visitFence(const FenceInst &I) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue Ops[3];
Ops[0] = getRoot();
- Ops[1] = DAG.getConstant((unsigned)I.getOrdering(), dl,
- TLI.getFenceOperandTy(DAG.getDataLayout()));
- Ops[2] = DAG.getConstant(I.getSyncScopeID(), dl,
- TLI.getFenceOperandTy(DAG.getDataLayout()));
+ Ops[1] = DAG.getTargetConstant((unsigned)I.getOrdering(), dl,
+ TLI.getFenceOperandTy(DAG.getDataLayout()));
+ Ops[2] = DAG.getTargetConstant(I.getSyncScopeID(), dl,
+ TLI.getFenceOperandTy(DAG.getDataLayout()));
DAG.setRoot(DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops));
}
@@ -5344,8 +5423,8 @@ static SDValue ExpandPowI(const SDLoc &DL, SDValue LHS, SDValue RHS,
if (Val == 0)
return DAG.getConstantFP(1.0, DL, LHS.getValueType());
- const Function &F = DAG.getMachineFunction().getFunction();
- if (!F.hasOptSize() ||
+ bool OptForSize = DAG.shouldOptForSize();
+ if (!OptForSize ||
// If optimizing for size, don't insert too many multiplies.
// This inserts up to 5 multiplies.
countPopulation(Val) + Log2_32(Val) < 7) {
@@ -5382,6 +5461,60 @@ static SDValue ExpandPowI(const SDLoc &DL, SDValue LHS, SDValue RHS,
return DAG.getNode(ISD::FPOWI, DL, LHS.getValueType(), LHS, RHS);
}
+static SDValue expandDivFix(unsigned Opcode, const SDLoc &DL,
+ SDValue LHS, SDValue RHS, SDValue Scale,
+ SelectionDAG &DAG, const TargetLowering &TLI) {
+ EVT VT = LHS.getValueType();
+ bool Signed = Opcode == ISD::SDIVFIX;
+ LLVMContext &Ctx = *DAG.getContext();
+
+ // If the type is legal but the operation isn't, this node might survive all
+ // the way to operation legalization. If we end up there and we do not have
+ // the ability to widen the type (if VT*2 is not legal), we cannot expand the
+ // node.
+
+ // Coax the legalizer into expanding the node during type legalization instead
+ // by bumping the size by one bit. This will force it to Promote, enabling the
+ // early expansion and avoiding the need to expand later.
+
+ // We don't have to do this if Scale is 0; that can always be expanded.
+
+ // FIXME: We wouldn't have to do this (or any of the early
+ // expansion/promotion) if it was possible to expand a libcall of an
+ // illegal type during operation legalization. But it's not, so things
+ // get a bit hacky.
+ unsigned ScaleInt = cast<ConstantSDNode>(Scale)->getZExtValue();
+ if (ScaleInt > 0 &&
+ (TLI.isTypeLegal(VT) ||
+ (VT.isVector() && TLI.isTypeLegal(VT.getVectorElementType())))) {
+ TargetLowering::LegalizeAction Action = TLI.getFixedPointOperationAction(
+ Opcode, VT, ScaleInt);
+ if (Action != TargetLowering::Legal && Action != TargetLowering::Custom) {
+ EVT PromVT;
+ if (VT.isScalarInteger())
+ PromVT = EVT::getIntegerVT(Ctx, VT.getSizeInBits() + 1);
+ else if (VT.isVector()) {
+ PromVT = VT.getVectorElementType();
+ PromVT = EVT::getIntegerVT(Ctx, PromVT.getSizeInBits() + 1);
+ PromVT = EVT::getVectorVT(Ctx, PromVT, VT.getVectorElementCount());
+ } else
+ llvm_unreachable("Wrong VT for DIVFIX?");
+ if (Signed) {
+ LHS = DAG.getSExtOrTrunc(LHS, DL, PromVT);
+ RHS = DAG.getSExtOrTrunc(RHS, DL, PromVT);
+ } else {
+ LHS = DAG.getZExtOrTrunc(LHS, DL, PromVT);
+ RHS = DAG.getZExtOrTrunc(RHS, DL, PromVT);
+ }
+ // TODO: Saturation.
+ SDValue Res = DAG.getNode(Opcode, DL, PromVT, LHS, RHS, Scale);
+ return DAG.getZExtOrTrunc(Res, DL, VT);
+ }
+ }
+
+ return DAG.getNode(Opcode, DL, VT, LHS, RHS, Scale);
+}
+
// getUnderlyingArgRegs - Find underlying registers used for a truncated,
// bitcasted, or split argument. Returns a list of <Register, size in bits>
static void
@@ -5474,7 +5607,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
// is an argument. But since we already has used %a1 to describe a parameter
// we should not handle that last dbg.value here (that would result in an
// incorrect hoisting of the DBG_VALUE to the function entry).
- // Notice that we allow one dbg.value per IR level argument, to accomodate
+ // Notice that we allow one dbg.value per IR level argument, to accommodate
// for the situation with fragments above.
if (VariableIsFunctionInputArg) {
unsigned ArgNo = Arg->getArgNo();
@@ -5489,7 +5622,6 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
MachineFunction &MF = DAG.getMachineFunction();
const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo();
- bool IsIndirect = false;
Optional<MachineOperand> Op;
// Some arguments' frame index is recorded during argument lowering.
int FI = FuncInfo.getArgumentFrameIndex(Arg);
@@ -5511,7 +5643,6 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
}
if (Reg) {
Op = MachineOperand::CreateReg(Reg, false);
- IsIndirect = IsDbgDeclare;
}
}
@@ -5530,15 +5661,38 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
= [&](ArrayRef<std::pair<unsigned, unsigned>> SplitRegs) {
unsigned Offset = 0;
for (auto RegAndSize : SplitRegs) {
+ // If the expression is already a fragment, the current register
+ // offset+size might extend beyond the fragment. In this case, only
+ // the register bits that are inside the fragment are relevant.
+ int RegFragmentSizeInBits = RegAndSize.second;
+ if (auto ExprFragmentInfo = Expr->getFragmentInfo()) {
+ uint64_t ExprFragmentSizeInBits = ExprFragmentInfo->SizeInBits;
+ // The register is entirely outside the expression fragment,
+ // so is irrelevant for debug info.
+ if (Offset >= ExprFragmentSizeInBits)
+ break;
+ // The register is partially outside the expression fragment, only
+ // the low bits within the fragment are relevant for debug info.
+ if (Offset + RegFragmentSizeInBits > ExprFragmentSizeInBits) {
+ RegFragmentSizeInBits = ExprFragmentSizeInBits - Offset;
+ }
+ }
+
auto FragmentExpr = DIExpression::createFragmentExpression(
- Expr, Offset, RegAndSize.second);
- if (!FragmentExpr)
+ Expr, Offset, RegFragmentSizeInBits);
+ Offset += RegAndSize.second;
+ // If a valid fragment expression cannot be created, the variable's
+ // correct value cannot be determined and so it is set as Undef.
+ if (!FragmentExpr) {
+ SDDbgValue *SDV = DAG.getConstantDbgValue(
+ Variable, Expr, UndefValue::get(V->getType()), DL, SDNodeOrder);
+ DAG.AddDbgValue(SDV, nullptr, false);
continue;
+ }
assert(!IsDbgDeclare && "DbgDeclare operand is not in memory?");
FuncInfo.ArgDbgValues.push_back(
BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), false,
RegAndSize.first, Variable, *FragmentExpr));
- Offset += RegAndSize.second;
}
};
@@ -5555,7 +5709,6 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
}
Op = MachineOperand::CreateReg(VMI->second, false);
- IsIndirect = IsDbgDeclare;
} else if (ArgRegsAndSizes.size() > 1) {
// This was split due to the calling convention, and no virtual register
// mapping exists for the value.
@@ -5569,9 +5722,26 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
assert(Variable->isValidLocationForIntrinsic(DL) &&
"Expected inlined-at fields to agree");
- IsIndirect = (Op->isReg()) ? IsIndirect : true;
- if (IsIndirect)
+
+ // If the argument arrives in a stack slot, then what the IR thought was a
+ // normal Value is actually in memory, and we must add a deref to load it.
+ if (Op->isFI()) {
+ int FI = Op->getIndex();
+ unsigned Size = DAG.getMachineFunction().getFrameInfo().getObjectSize(FI);
+ if (Expr->isImplicit()) {
+ SmallVector<uint64_t, 2> Ops = {dwarf::DW_OP_deref_size, Size};
+ Expr = DIExpression::prependOpcodes(Expr, Ops);
+ } else {
+ Expr = DIExpression::prepend(Expr, DIExpression::DerefBefore);
+ }
+ }
+
+ // If this location was specified with a dbg.declare, then it and its
+ // expression calculate the address of the variable. Append a deref to
+ // force it to be a memory location.
+ if (IsDbgDeclare)
Expr = DIExpression::append(Expr, {dwarf::DW_OP_deref});
+
FuncInfo.ArgDbgValues.push_back(
BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), false,
*Op, Variable, Expr));
@@ -5603,20 +5773,20 @@ SDDbgValue *SelectionDAGBuilder::getDbgValue(SDValue N,
/*IsIndirect*/ false, dl, DbgSDNodeOrder);
}
-// VisualStudio defines setjmp as _setjmp
-#if defined(_MSC_VER) && defined(setjmp) && \
- !defined(setjmp_undefined_for_msvc)
-# pragma push_macro("setjmp")
-# undef setjmp
-# define setjmp_undefined_for_msvc
-#endif
-
static unsigned FixedPointIntrinsicToOpcode(unsigned Intrinsic) {
switch (Intrinsic) {
case Intrinsic::smul_fix:
return ISD::SMULFIX;
case Intrinsic::umul_fix:
return ISD::UMULFIX;
+ case Intrinsic::smul_fix_sat:
+ return ISD::SMULFIXSAT;
+ case Intrinsic::umul_fix_sat:
+ return ISD::UMULFIXSAT;
+ case Intrinsic::sdiv_fix:
+ return ISD::SDIVFIX;
+ case Intrinsic::udiv_fix:
+ return ISD::UDIVFIX;
default:
llvm_unreachable("Unhandled fixed point intrinsic");
}
@@ -5687,12 +5857,6 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
RegName, getValue(RegValue)));
return;
}
- case Intrinsic::setjmp:
- lowerCallToExternalSymbol(I, &"_setjmp"[!TLI.usesUnderscoreSetJmp()]);
- return;
- case Intrinsic::longjmp:
- lowerCallToExternalSymbol(I, &"_longjmp"[!TLI.usesUnderscoreLongJmp()]);
- return;
case Intrinsic::memcpy: {
const auto &MCI = cast<MemCpyInst>(I);
SDValue Op1 = getValue(I.getArgOperand(0));
@@ -5706,7 +5870,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget());
// FIXME: Support passing different dest/src alignments to the memcpy DAG
// node.
- SDValue MC = DAG.getMemcpy(getRoot(), sdl, Op1, Op2, Op3, Align, isVol,
+ SDValue Root = isVol ? getRoot() : getMemoryRoot();
+ SDValue MC = DAG.getMemcpy(Root, sdl, Op1, Op2, Op3, Align, isVol,
false, isTC,
MachinePointerInfo(I.getArgOperand(0)),
MachinePointerInfo(I.getArgOperand(1)));
@@ -5722,7 +5887,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
unsigned Align = std::max<unsigned>(MSI.getDestAlignment(), 1);
bool isVol = MSI.isVolatile();
bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget());
- SDValue MS = DAG.getMemset(getRoot(), sdl, Op1, Op2, Op3, Align, isVol,
+ SDValue Root = isVol ? getRoot() : getMemoryRoot();
+ SDValue MS = DAG.getMemset(Root, sdl, Op1, Op2, Op3, Align, isVol,
isTC, MachinePointerInfo(I.getArgOperand(0)));
updateDAGForMaybeTailCall(MS);
return;
@@ -5740,7 +5906,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget());
// FIXME: Support passing different dest/src alignments to the memmove DAG
// node.
- SDValue MM = DAG.getMemmove(getRoot(), sdl, Op1, Op2, Op3, Align, isVol,
+ SDValue Root = isVol ? getRoot() : getMemoryRoot();
+ SDValue MM = DAG.getMemmove(Root, sdl, Op1, Op2, Op3, Align, isVol,
isTC, MachinePointerInfo(I.getArgOperand(0)),
MachinePointerInfo(I.getArgOperand(1)));
updateDAGForMaybeTailCall(MM);
@@ -6102,44 +6269,15 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
getValue(I.getArgOperand(1)),
getValue(I.getArgOperand(2))));
return;
- case Intrinsic::experimental_constrained_fadd:
- case Intrinsic::experimental_constrained_fsub:
- case Intrinsic::experimental_constrained_fmul:
- case Intrinsic::experimental_constrained_fdiv:
- case Intrinsic::experimental_constrained_frem:
- case Intrinsic::experimental_constrained_fma:
- case Intrinsic::experimental_constrained_fptosi:
- case Intrinsic::experimental_constrained_fptoui:
- case Intrinsic::experimental_constrained_fptrunc:
- case Intrinsic::experimental_constrained_fpext:
- case Intrinsic::experimental_constrained_sqrt:
- case Intrinsic::experimental_constrained_pow:
- case Intrinsic::experimental_constrained_powi:
- case Intrinsic::experimental_constrained_sin:
- case Intrinsic::experimental_constrained_cos:
- case Intrinsic::experimental_constrained_exp:
- case Intrinsic::experimental_constrained_exp2:
- case Intrinsic::experimental_constrained_log:
- case Intrinsic::experimental_constrained_log10:
- case Intrinsic::experimental_constrained_log2:
- case Intrinsic::experimental_constrained_lrint:
- case Intrinsic::experimental_constrained_llrint:
- case Intrinsic::experimental_constrained_rint:
- case Intrinsic::experimental_constrained_nearbyint:
- case Intrinsic::experimental_constrained_maxnum:
- case Intrinsic::experimental_constrained_minnum:
- case Intrinsic::experimental_constrained_ceil:
- case Intrinsic::experimental_constrained_floor:
- case Intrinsic::experimental_constrained_lround:
- case Intrinsic::experimental_constrained_llround:
- case Intrinsic::experimental_constrained_round:
- case Intrinsic::experimental_constrained_trunc:
+#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
+ case Intrinsic::INTRINSIC:
+#include "llvm/IR/ConstrainedOps.def"
visitConstrainedFPIntrinsic(cast<ConstrainedFPIntrinsic>(I));
return;
case Intrinsic::fmuladd: {
EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict &&
- TLI.isFMAFasterThanFMulAndFAdd(VT)) {
+ TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT)) {
setValue(&I, DAG.getNode(ISD::FMA, sdl,
getValue(I.getArgOperand(0)).getValueType(),
getValue(I.getArgOperand(0)),
@@ -6307,7 +6445,9 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
return;
}
case Intrinsic::smul_fix:
- case Intrinsic::umul_fix: {
+ case Intrinsic::umul_fix:
+ case Intrinsic::smul_fix_sat:
+ case Intrinsic::umul_fix_sat: {
SDValue Op1 = getValue(I.getArgOperand(0));
SDValue Op2 = getValue(I.getArgOperand(1));
SDValue Op3 = getValue(I.getArgOperand(2));
@@ -6315,20 +6455,13 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
Op1.getValueType(), Op1, Op2, Op3));
return;
}
- case Intrinsic::smul_fix_sat: {
+ case Intrinsic::sdiv_fix:
+ case Intrinsic::udiv_fix: {
SDValue Op1 = getValue(I.getArgOperand(0));
SDValue Op2 = getValue(I.getArgOperand(1));
SDValue Op3 = getValue(I.getArgOperand(2));
- setValue(&I, DAG.getNode(ISD::SMULFIXSAT, sdl, Op1.getValueType(), Op1, Op2,
- Op3));
- return;
- }
- case Intrinsic::umul_fix_sat: {
- SDValue Op1 = getValue(I.getArgOperand(0));
- SDValue Op2 = getValue(I.getArgOperand(1));
- SDValue Op3 = getValue(I.getArgOperand(2));
- setValue(&I, DAG.getNode(ISD::UMULFIXSAT, sdl, Op1.getValueType(), Op1, Op2,
- Op3));
+ setValue(&I, expandDivFix(FixedPointIntrinsicToOpcode(Intrinsic), sdl,
+ Op1, Op2, Op3, DAG, TLI));
return;
}
case Intrinsic::stacksave: {
@@ -6681,7 +6814,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
// Add the offset to the FP.
Value *FP = I.getArgOperand(1);
SDValue FPVal = getValue(FP);
- SDValue Add = DAG.getNode(ISD::ADD, sdl, PtrVT, FPVal, OffsetVal);
+ SDValue Add = DAG.getMemBasePlusOffset(FPVal, OffsetVal, sdl);
setValue(&I, Add);
return;
@@ -6876,142 +7009,82 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
const ConstrainedFPIntrinsic &FPI) {
SDLoc sdl = getCurSDLoc();
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(TLI, DAG.getDataLayout(), FPI.getType(), ValueVTs);
+ ValueVTs.push_back(MVT::Other); // Out chain
+
+ // We do not need to serialize constrained FP intrinsics against
+ // each other or against (nonvolatile) loads, so they can be
+ // chained like loads.
+ SDValue Chain = DAG.getRoot();
+ SmallVector<SDValue, 4> Opers;
+ Opers.push_back(Chain);
+ if (FPI.isUnaryOp()) {
+ Opers.push_back(getValue(FPI.getArgOperand(0)));
+ } else if (FPI.isTernaryOp()) {
+ Opers.push_back(getValue(FPI.getArgOperand(0)));
+ Opers.push_back(getValue(FPI.getArgOperand(1)));
+ Opers.push_back(getValue(FPI.getArgOperand(2)));
+ } else {
+ Opers.push_back(getValue(FPI.getArgOperand(0)));
+ Opers.push_back(getValue(FPI.getArgOperand(1)));
+ }
+
unsigned Opcode;
switch (FPI.getIntrinsicID()) {
default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
- case Intrinsic::experimental_constrained_fadd:
- Opcode = ISD::STRICT_FADD;
- break;
- case Intrinsic::experimental_constrained_fsub:
- Opcode = ISD::STRICT_FSUB;
- break;
- case Intrinsic::experimental_constrained_fmul:
- Opcode = ISD::STRICT_FMUL;
- break;
- case Intrinsic::experimental_constrained_fdiv:
- Opcode = ISD::STRICT_FDIV;
- break;
- case Intrinsic::experimental_constrained_frem:
- Opcode = ISD::STRICT_FREM;
- break;
- case Intrinsic::experimental_constrained_fma:
- Opcode = ISD::STRICT_FMA;
- break;
- case Intrinsic::experimental_constrained_fptosi:
- Opcode = ISD::STRICT_FP_TO_SINT;
- break;
- case Intrinsic::experimental_constrained_fptoui:
- Opcode = ISD::STRICT_FP_TO_UINT;
- break;
- case Intrinsic::experimental_constrained_fptrunc:
- Opcode = ISD::STRICT_FP_ROUND;
- break;
- case Intrinsic::experimental_constrained_fpext:
- Opcode = ISD::STRICT_FP_EXTEND;
- break;
- case Intrinsic::experimental_constrained_sqrt:
- Opcode = ISD::STRICT_FSQRT;
- break;
- case Intrinsic::experimental_constrained_pow:
- Opcode = ISD::STRICT_FPOW;
- break;
- case Intrinsic::experimental_constrained_powi:
- Opcode = ISD::STRICT_FPOWI;
- break;
- case Intrinsic::experimental_constrained_sin:
- Opcode = ISD::STRICT_FSIN;
+#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
+ case Intrinsic::INTRINSIC: \
+ Opcode = ISD::STRICT_##DAGN; \
break;
- case Intrinsic::experimental_constrained_cos:
- Opcode = ISD::STRICT_FCOS;
- break;
- case Intrinsic::experimental_constrained_exp:
- Opcode = ISD::STRICT_FEXP;
- break;
- case Intrinsic::experimental_constrained_exp2:
- Opcode = ISD::STRICT_FEXP2;
- break;
- case Intrinsic::experimental_constrained_log:
- Opcode = ISD::STRICT_FLOG;
- break;
- case Intrinsic::experimental_constrained_log10:
- Opcode = ISD::STRICT_FLOG10;
- break;
- case Intrinsic::experimental_constrained_log2:
- Opcode = ISD::STRICT_FLOG2;
- break;
- case Intrinsic::experimental_constrained_lrint:
- Opcode = ISD::STRICT_LRINT;
- break;
- case Intrinsic::experimental_constrained_llrint:
- Opcode = ISD::STRICT_LLRINT;
- break;
- case Intrinsic::experimental_constrained_rint:
- Opcode = ISD::STRICT_FRINT;
- break;
- case Intrinsic::experimental_constrained_nearbyint:
- Opcode = ISD::STRICT_FNEARBYINT;
- break;
- case Intrinsic::experimental_constrained_maxnum:
- Opcode = ISD::STRICT_FMAXNUM;
- break;
- case Intrinsic::experimental_constrained_minnum:
- Opcode = ISD::STRICT_FMINNUM;
- break;
- case Intrinsic::experimental_constrained_ceil:
- Opcode = ISD::STRICT_FCEIL;
- break;
- case Intrinsic::experimental_constrained_floor:
- Opcode = ISD::STRICT_FFLOOR;
- break;
- case Intrinsic::experimental_constrained_lround:
- Opcode = ISD::STRICT_LROUND;
- break;
- case Intrinsic::experimental_constrained_llround:
- Opcode = ISD::STRICT_LLROUND;
- break;
- case Intrinsic::experimental_constrained_round:
- Opcode = ISD::STRICT_FROUND;
+#include "llvm/IR/ConstrainedOps.def"
+ }
+
+ // A few strict DAG nodes carry additional operands that are not
+ // set up by the default code above.
+ switch (Opcode) {
+ default: break;
+ case ISD::STRICT_FP_ROUND:
+ Opers.push_back(
+ DAG.getTargetConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout())));
break;
- case Intrinsic::experimental_constrained_trunc:
- Opcode = ISD::STRICT_FTRUNC;
+ case ISD::STRICT_FSETCC:
+ case ISD::STRICT_FSETCCS: {
+ auto *FPCmp = dyn_cast<ConstrainedFPCmpIntrinsic>(&FPI);
+ Opers.push_back(DAG.getCondCode(getFCmpCondCode(FPCmp->getPredicate())));
break;
}
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- SDValue Chain = getRoot();
- SmallVector<EVT, 4> ValueVTs;
- ComputeValueVTs(TLI, DAG.getDataLayout(), FPI.getType(), ValueVTs);
- ValueVTs.push_back(MVT::Other); // Out chain
+ }
SDVTList VTs = DAG.getVTList(ValueVTs);
- SDValue Result;
- if (Opcode == ISD::STRICT_FP_ROUND)
- Result = DAG.getNode(Opcode, sdl, VTs,
- { Chain, getValue(FPI.getArgOperand(0)),
- DAG.getTargetConstant(0, sdl,
- TLI.getPointerTy(DAG.getDataLayout())) });
- else if (FPI.isUnaryOp())
- Result = DAG.getNode(Opcode, sdl, VTs,
- { Chain, getValue(FPI.getArgOperand(0)) });
- else if (FPI.isTernaryOp())
- Result = DAG.getNode(Opcode, sdl, VTs,
- { Chain, getValue(FPI.getArgOperand(0)),
- getValue(FPI.getArgOperand(1)),
- getValue(FPI.getArgOperand(2)) });
- else
- Result = DAG.getNode(Opcode, sdl, VTs,
- { Chain, getValue(FPI.getArgOperand(0)),
- getValue(FPI.getArgOperand(1)) });
-
- if (FPI.getExceptionBehavior() !=
- ConstrainedFPIntrinsic::ExceptionBehavior::ebIgnore) {
- SDNodeFlags Flags;
- Flags.setFPExcept(true);
- Result->setFlags(Flags);
- }
+ SDValue Result = DAG.getNode(Opcode, sdl, VTs, Opers);
assert(Result.getNode()->getNumValues() == 2);
+
+ // Push node to the appropriate list so that future instructions can be
+ // chained up correctly.
SDValue OutChain = Result.getValue(1);
- DAG.setRoot(OutChain);
+ switch (FPI.getExceptionBehavior().getValue()) {
+ case fp::ExceptionBehavior::ebIgnore:
+ // The only reason why ebIgnore nodes still need to be chained is that
+ // they might depend on the current rounding mode, and therefore must
+ // not be moved across instruction that may change that mode.
+ LLVM_FALLTHROUGH;
+ case fp::ExceptionBehavior::ebMayTrap:
+ // These must not be moved across calls or instructions that may change
+ // floating-point exception masks.
+ PendingConstrainedFP.push_back(OutChain);
+ break;
+ case fp::ExceptionBehavior::ebStrict:
+ // These must not be moved across calls or instructions that may change
+ // floating-point exception masks or read floating-point exception flags.
+ // In addition, they cannot be optimized out even if unused.
+ PendingConstrainedFPStrict.push_back(OutChain);
+ break;
+ }
+
SDValue FPResult = Result.getValue(0);
setValue(&FPI, FPResult);
}
@@ -7102,13 +7175,21 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
const Value *SwiftErrorVal = nullptr;
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- // We can't tail call inside a function with a swifterror argument. Lowering
- // does not support this yet. It would have to move into the swifterror
- // register before the call.
- auto *Caller = CS.getInstruction()->getParent()->getParent();
- if (TLI.supportSwiftError() &&
- Caller->getAttributes().hasAttrSomewhere(Attribute::SwiftError))
- isTailCall = false;
+ if (isTailCall) {
+ // Avoid emitting tail calls in functions with the disable-tail-calls
+ // attribute.
+ auto *Caller = CS.getInstruction()->getParent()->getParent();
+ if (Caller->getFnAttribute("disable-tail-calls").getValueAsString() ==
+ "true")
+ isTailCall = false;
+
+ // We can't tail call inside a function with a swifterror argument. Lowering
+ // does not support this yet. It would have to move into the swifterror
+ // register before the call.
+ if (TLI.supportSwiftError() &&
+ Caller->getAttributes().hasAttrSomewhere(Attribute::SwiftError))
+ isTailCall = false;
+ }
for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
i != e; ++i) {
@@ -7142,6 +7223,18 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
isTailCall = false;
}
+ // If call site has a cfguardtarget operand bundle, create and add an
+ // additional ArgListEntry.
+ if (auto Bundle = CS.getOperandBundle(LLVMContext::OB_cfguardtarget)) {
+ TargetLowering::ArgListEntry Entry;
+ Value *V = Bundle->Inputs[0];
+ SDValue ArgNode = getValue(V);
+ Entry.Node = ArgNode;
+ Entry.Ty = V->getType();
+ Entry.IsCFGuardTarget = true;
+ Args.push_back(Entry);
+ }
+
// Check if target-independent constraints permit a tail call here.
// Target-dependent constraints are checked within TLI->LowerCallTo.
if (isTailCall && !isInTailCallPosition(CS, DAG.getTarget()))
@@ -7374,7 +7467,8 @@ bool SelectionDAGBuilder::visitMemPCpyCall(const CallInst &I) {
// In the mempcpy context we need to pass in a false value for isTailCall
// because the return pointer needs to be adjusted by the size of
// the copied memory.
- SDValue MC = DAG.getMemcpy(getRoot(), sdl, Dst, Src, Size, Align, isVol,
+ SDValue Root = isVol ? getRoot() : getMemoryRoot();
+ SDValue MC = DAG.getMemcpy(Root, sdl, Dst, Src, Size, Align, isVol,
false, /*isTailCall=*/false,
MachinePointerInfo(I.getArgOperand(0)),
MachinePointerInfo(I.getArgOperand(1)));
@@ -7683,8 +7777,10 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
// Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't
// have to do anything here to lower funclet bundles.
- assert(!I.hasOperandBundlesOtherThan(
- {LLVMContext::OB_deopt, LLVMContext::OB_funclet}) &&
+ // CFGuardTarget bundles are lowered in LowerCallTo.
+ assert(!I.hasOperandBundlesOtherThan({LLVMContext::OB_deopt,
+ LLVMContext::OB_funclet,
+ LLVMContext::OB_cfguardtarget}) &&
"Cannot lower calls with arbitrary operand bundles!");
SDValue Callee = getValue(I.getCalledValue());
@@ -8182,10 +8278,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
switch (OpInfo.Type) {
case InlineAsm::isOutput:
- if (OpInfo.ConstraintType == TargetLowering::C_Memory ||
- ((OpInfo.ConstraintType == TargetLowering::C_Immediate ||
- OpInfo.ConstraintType == TargetLowering::C_Other) &&
- OpInfo.isIndirect)) {
+ if (OpInfo.ConstraintType == TargetLowering::C_Memory) {
unsigned ConstraintID =
TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode);
assert(ConstraintID != InlineAsm::Constraint_Unknown &&
@@ -8197,12 +8290,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlags, getCurSDLoc(),
MVT::i32));
AsmNodeOperands.push_back(OpInfo.CallOperand);
- break;
- } else if (((OpInfo.ConstraintType == TargetLowering::C_Immediate ||
- OpInfo.ConstraintType == TargetLowering::C_Other) &&
- !OpInfo.isIndirect) ||
- OpInfo.ConstraintType == TargetLowering::C_Register ||
- OpInfo.ConstraintType == TargetLowering::C_RegisterClass) {
+ } else {
// Otherwise, this outputs to a register (directly for C_Register /
// C_RegisterClass, and a target-defined fashion for
// C_Immediate/C_Other). Find a register that we can use.
@@ -8285,8 +8373,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
}
// Treat indirect 'X' constraint as memory.
- if ((OpInfo.ConstraintType == TargetLowering::C_Immediate ||
- OpInfo.ConstraintType == TargetLowering::C_Other) &&
+ if (OpInfo.ConstraintType == TargetLowering::C_Other &&
OpInfo.isIndirect)
OpInfo.ConstraintType = TargetLowering::C_Memory;
@@ -8339,8 +8426,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
}
assert((OpInfo.ConstraintType == TargetLowering::C_RegisterClass ||
- OpInfo.ConstraintType == TargetLowering::C_Register ||
- OpInfo.ConstraintType == TargetLowering::C_Immediate) &&
+ OpInfo.ConstraintType == TargetLowering::C_Register) &&
"Unknown constraint type!");
// TODO: Support this.
@@ -8678,7 +8764,7 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) {
Callee = getValue(CI.getCalledValue());
NullPtr = DAG.getIntPtrConstant(0, DL, true);
- // The stackmap intrinsic only records the live variables (the arguemnts
+ // The stackmap intrinsic only records the live variables (the arguments
// passed to it) and emits NOPS (if requested). Unlike the patchpoint
// intrinsic, this won't be lowered to a function call. This means we don't
// have to worry about calling conventions and target specific lowering code.
@@ -9027,6 +9113,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
Entry.IsReturned = false;
Entry.IsSwiftSelf = false;
Entry.IsSwiftError = false;
+ Entry.IsCFGuardTarget = false;
Entry.Alignment = Align;
CLI.getArgs().insert(CLI.getArgs().begin(), Entry);
CLI.NumFixedArgs += 1;
@@ -9139,6 +9226,8 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
Flags.setSwiftSelf();
if (Args[i].IsSwiftError)
Flags.setSwiftError();
+ if (Args[i].IsCFGuardTarget)
+ Flags.setCFGuardTarget();
if (Args[i].IsByVal)
Flags.setByVal();
if (Args[i].IsInAlloca) {
@@ -9214,9 +9303,11 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
for (unsigned j = 0; j != NumParts; ++j) {
// if it isn't first piece, alignment must be 1
+ // For scalable vectors the scalable part is currently handled
+ // by individual targets, so we just use the known minimum size here.
ISD::OutputArg MyFlags(Flags, Parts[j].getValueType(), VT,
- i < CLI.NumFixedArgs,
- i, j*Parts[j].getValueType().getStoreSize());
+ i < CLI.NumFixedArgs, i,
+ j*Parts[j].getValueType().getStoreSize().getKnownMinSize());
if (NumParts > 1 && j == 0)
MyFlags.Flags.setSplit();
else if (j != 0) {
@@ -9487,7 +9578,7 @@ findArgumentCopyElisionCandidates(const DataLayout &DL,
/// Try to elide argument copies from memory into a local alloca. Succeeds if
/// ArgVal is a load from a suitable fixed stack object.
static void tryToElideArgumentCopy(
- FunctionLoweringInfo *FuncInfo, SmallVectorImpl<SDValue> &Chains,
+ FunctionLoweringInfo &FuncInfo, SmallVectorImpl<SDValue> &Chains,
DenseMap<int, int> &ArgCopyElisionFrameIndexMap,
SmallPtrSetImpl<const Instruction *> &ElidedArgCopyInstrs,
ArgCopyElisionMapTy &ArgCopyElisionCandidates, const Argument &Arg,
@@ -9507,9 +9598,9 @@ static void tryToElideArgumentCopy(
assert(ArgCopyIter != ArgCopyElisionCandidates.end());
const AllocaInst *AI = ArgCopyIter->second.first;
int FixedIndex = FINode->getIndex();
- int &AllocaIndex = FuncInfo->StaticAllocaMap[AI];
+ int &AllocaIndex = FuncInfo.StaticAllocaMap[AI];
int OldIndex = AllocaIndex;
- MachineFrameInfo &MFI = FuncInfo->MF->getFrameInfo();
+ MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
if (MFI.getObjectSize(FixedIndex) != MFI.getObjectSize(OldIndex)) {
LLVM_DEBUG(
dbgs() << " argument copy elision failed due to bad fixed stack "
@@ -9518,7 +9609,7 @@ static void tryToElideArgumentCopy(
}
unsigned RequiredAlignment = AI->getAlignment();
if (!RequiredAlignment) {
- RequiredAlignment = FuncInfo->MF->getDataLayout().getABITypeAlignment(
+ RequiredAlignment = FuncInfo.MF->getDataLayout().getABITypeAlignment(
AI->getAllocatedType());
}
if (MFI.getObjectAlignment(FixedIndex) < RequiredAlignment) {
@@ -9584,7 +9675,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
// flag to ask the target to give us the memory location of that argument if
// available.
ArgCopyElisionMapTy ArgCopyElisionCandidates;
- findArgumentCopyElisionCandidates(DL, FuncInfo, ArgCopyElisionCandidates);
+ findArgumentCopyElisionCandidates(DL, FuncInfo.get(),
+ ArgCopyElisionCandidates);
// Set up the incoming argument description vector.
for (const Argument &Arg : F.args()) {
@@ -9685,8 +9777,11 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
unsigned NumRegs = TLI->getNumRegistersForCallingConv(
*CurDAG->getContext(), F.getCallingConv(), VT);
for (unsigned i = 0; i != NumRegs; ++i) {
+ // For scalable vectors, use the minimum size; individual targets
+ // are responsible for handling scalable vector arguments and
+ // return values.
ISD::InputArg MyFlags(Flags, RegisterVT, VT, isArgValueUsed,
- ArgNo, PartBase+i*RegisterVT.getStoreSize());
+ ArgNo, PartBase+i*RegisterVT.getStoreSize().getKnownMinSize());
if (NumRegs > 1 && i == 0)
MyFlags.Flags.setSplit();
// if it isn't first piece, alignment must be 1
@@ -9699,7 +9794,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
}
if (NeedsRegBlock && Value == NumValues - 1)
Ins[Ins.size() - 1].Flags.setInConsecutiveRegsLast();
- PartBase += VT.getStoreSize();
+ PartBase += VT.getStoreSize().getKnownMinSize();
}
}
@@ -9769,7 +9864,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
// Elide the copying store if the target loaded this argument from a
// suitable fixed stack object.
if (Ins[i].Flags.isCopyElisionCandidate()) {
- tryToElideArgumentCopy(FuncInfo, Chains, ArgCopyElisionFrameIndexMap,
+ tryToElideArgumentCopy(*FuncInfo, Chains, ArgCopyElisionFrameIndexMap,
ElidedArgCopyInstrs, ArgCopyElisionCandidates, Arg,
InVals[i], ArgHasUses);
}
@@ -9795,7 +9890,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
unsigned NumParts = TLI->getNumRegistersForCallingConv(
*CurDAG->getContext(), F.getCallingConv(), VT);
- // Even an apparant 'unused' swifterror argument needs to be returned. So
+ // Even an apparent 'unused' swifterror argument needs to be returned. So
// we do generate a copy for it that can be used on return from the
// function.
if (ArgHasUses || isSwiftErrorArg) {
@@ -10508,7 +10603,7 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
return;
}
- SL->findJumpTables(Clusters, &SI, DefaultMBB);
+ SL->findJumpTables(Clusters, &SI, DefaultMBB, DAG.getPSI(), DAG.getBFI());
SL->findBitTestClusters(Clusters, &SI);
LLVM_DEBUG({
@@ -10557,3 +10652,8 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
lowerWorkItem(W, SI.getCondition(), SwitchMBB, DefaultMBB);
}
}
+
+void SelectionDAGBuilder::visitFreeze(const FreezeInst &I) {
+ SDValue N = getValue(I.getOperand(0));
+ setValue(&I, N);
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index bfcf30b430b6..18e0edf7fc04 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -143,6 +143,20 @@ private:
/// tokenfactor for them just before terminator instructions.
SmallVector<SDValue, 8> PendingExports;
+ /// Similar to loads, nodes corresponding to constrained FP intrinsics are
+ /// bunched up and emitted when necessary. These can be moved across each
+ /// other and any (normal) memory operation (load or store), but not across
+ /// calls or instructions having unspecified side effects. As a special
+ /// case, constrained FP intrinsics using fpexcept.strict may not be deleted
+ /// even if otherwise unused, so they need to be chained before any
+ /// terminator instruction (like PendingExports). We track the latter
+ /// set of nodes in a separate list.
+ SmallVector<SDValue, 8> PendingConstrainedFP;
+ SmallVector<SDValue, 8> PendingConstrainedFPStrict;
+
+ /// Update root to include all chains from the Pending list.
+ SDValue updateRoot(SmallVectorImpl<SDValue> &Pending);
+
/// A unique monotonically increasing number used to order the SDNodes we
/// create.
unsigned SDNodeOrder;
@@ -447,12 +461,18 @@ public:
/// Return the current virtual root of the Selection DAG, flushing any
/// PendingLoad items. This must be done before emitting a store or any other
- /// node that may need to be ordered after any prior load instructions.
+ /// memory node that may need to be ordered after any prior load instructions.
+ SDValue getMemoryRoot();
+
+ /// Similar to getMemoryRoot, but also flushes PendingConstrainedFP(Strict)
+ /// items. This must be done before emitting any call other any other node
+ /// that may need to be ordered after FP instructions due to other side
+ /// effects.
SDValue getRoot();
/// Similar to getRoot, but instead of flushing all the PendingLoad items,
- /// flush all the PendingExports items. It is necessary to do this before
- /// emitting a terminator instruction.
+ /// flush all the PendingExports (and PendingConstrainedFPStrict) items.
+ /// It is necessary to do this before emitting a terminator instruction.
SDValue getControlRoot();
SDLoc getCurSDLoc() const {
@@ -742,6 +762,7 @@ private:
void visitAtomicStore(const StoreInst &I);
void visitLoadFromSwiftError(const LoadInst &I);
void visitStoreToSwiftError(const StoreInst &I);
+ void visitFreeze(const FreezeInst &I);
void visitInlineAsm(ImmutableCallSite CS);
void visitIntrinsicCall(const CallInst &I, unsigned Intrinsic);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index bc10f7621239..6fd71393bf38 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -186,7 +186,9 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::FMINNUM_IEEE: return "fminnum_ieee";
case ISD::FMAXNUM_IEEE: return "fmaxnum_ieee";
case ISD::FMINIMUM: return "fminimum";
+ case ISD::STRICT_FMINIMUM: return "strict_fminimum";
case ISD::FMAXIMUM: return "fmaximum";
+ case ISD::STRICT_FMAXIMUM: return "strict_fmaximum";
case ISD::FNEG: return "fneg";
case ISD::FSQRT: return "fsqrt";
case ISD::STRICT_FSQRT: return "strict_fsqrt";
@@ -270,6 +272,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::STRICT_FPOWI: return "strict_fpowi";
case ISD::SETCC: return "setcc";
case ISD::SETCCCARRY: return "setcccarry";
+ case ISD::STRICT_FSETCC: return "strict_fsetcc";
+ case ISD::STRICT_FSETCCS: return "strict_fsetccs";
case ISD::SELECT: return "select";
case ISD::VSELECT: return "vselect";
case ISD::SELECT_CC: return "select_cc";
@@ -308,6 +312,9 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::UMULFIX: return "umulfix";
case ISD::UMULFIXSAT: return "umulfixsat";
+ case ISD::SDIVFIX: return "sdivfix";
+ case ISD::UDIVFIX: return "udivfix";
+
// Conversion operators.
case ISD::SIGN_EXTEND: return "sign_extend";
case ISD::ZERO_EXTEND: return "zero_extend";
@@ -324,7 +331,9 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::STRICT_FP_EXTEND: return "strict_fp_extend";
case ISD::SINT_TO_FP: return "sint_to_fp";
+ case ISD::STRICT_SINT_TO_FP: return "strict_sint_to_fp";
case ISD::UINT_TO_FP: return "uint_to_fp";
+ case ISD::STRICT_UINT_TO_FP: return "strict_uint_to_fp";
case ISD::FP_TO_SINT: return "fp_to_sint";
case ISD::STRICT_FP_TO_SINT: return "strict_fp_to_sint";
case ISD::FP_TO_UINT: return "fp_to_uint";
@@ -541,6 +550,9 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
if (getFlags().hasVectorReduction())
OS << " vector-reduction";
+ if (getFlags().hasNoFPExcept())
+ OS << " nofpexcept";
+
if (const MachineSDNode *MN = dyn_cast<MachineSDNode>(this)) {
if (!MN->memoperands_empty()) {
OS << "<";
@@ -685,6 +697,10 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
if (doExt)
OS << " from " << MLd->getMemoryVT().getEVTString();
+ const char *AM = getIndexedModeName(MLd->getAddressingMode());
+ if (*AM)
+ OS << ", " << AM;
+
if (MLd->isExpandingLoad())
OS << ", expanding";
@@ -696,6 +712,10 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
if (MSt->isTruncatingStore())
OS << ", trunc to " << MSt->getMemoryVT().getEVTString();
+ const char *AM = getIndexedModeName(MSt->getAddressingMode());
+ if (*AM)
+ OS << ", " << AM;
+
if (MSt->isCompressingStore())
OS << ", compressing";
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 1f07a241a824..6c57c72d47a7 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -27,8 +27,10 @@
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/EHPersonalities.h"
+#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/FastISel.h"
@@ -71,10 +73,12 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsWebAssembly.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Pass.h"
@@ -147,17 +151,17 @@ static cl::opt<bool>
ViewLegalizeTypesDAGs("view-legalize-types-dags", cl::Hidden,
cl::desc("Pop up a window to show dags before legalize types"));
static cl::opt<bool>
-ViewLegalizeDAGs("view-legalize-dags", cl::Hidden,
- cl::desc("Pop up a window to show dags before legalize"));
+ ViewDAGCombineLT("view-dag-combine-lt-dags", cl::Hidden,
+ cl::desc("Pop up a window to show dags before the post "
+ "legalize types dag combine pass"));
+static cl::opt<bool>
+ ViewLegalizeDAGs("view-legalize-dags", cl::Hidden,
+ cl::desc("Pop up a window to show dags before legalize"));
static cl::opt<bool>
ViewDAGCombine2("view-dag-combine2-dags", cl::Hidden,
cl::desc("Pop up a window to show dags before the second "
"dag combine pass"));
static cl::opt<bool>
-ViewDAGCombineLT("view-dag-combine-lt-dags", cl::Hidden,
- cl::desc("Pop up a window to show dags before the post legalize types"
- " dag combine pass"));
-static cl::opt<bool>
ViewISelDAGs("view-isel-dags", cl::Hidden,
cl::desc("Pop up a window to show isel dags as they are selected"));
static cl::opt<bool>
@@ -167,12 +171,10 @@ static cl::opt<bool>
ViewSUnitDAGs("view-sunit-dags", cl::Hidden,
cl::desc("Pop up a window to show SUnit dags after they are processed"));
#else
-static const bool ViewDAGCombine1 = false,
- ViewLegalizeTypesDAGs = false, ViewLegalizeDAGs = false,
- ViewDAGCombine2 = false,
- ViewDAGCombineLT = false,
- ViewISelDAGs = false, ViewSchedDAGs = false,
- ViewSUnitDAGs = false;
+static const bool ViewDAGCombine1 = false, ViewLegalizeTypesDAGs = false,
+ ViewDAGCombineLT = false, ViewLegalizeDAGs = false,
+ ViewDAGCombine2 = false, ViewISelDAGs = false,
+ ViewSchedDAGs = false, ViewSUnitDAGs = false;
#endif
//===---------------------------------------------------------------------===//
@@ -305,28 +307,22 @@ void TargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
// SelectionDAGISel code
//===----------------------------------------------------------------------===//
-SelectionDAGISel::SelectionDAGISel(TargetMachine &tm,
- CodeGenOpt::Level OL) :
- MachineFunctionPass(ID), TM(tm),
- FuncInfo(new FunctionLoweringInfo()),
- SwiftError(new SwiftErrorValueTracking()),
- CurDAG(new SelectionDAG(tm, OL)),
- SDB(new SelectionDAGBuilder(*CurDAG, *FuncInfo, *SwiftError, OL)),
- AA(), GFI(),
- OptLevel(OL),
- DAGSize(0) {
- initializeGCModuleInfoPass(*PassRegistry::getPassRegistry());
- initializeBranchProbabilityInfoWrapperPassPass(
- *PassRegistry::getPassRegistry());
- initializeAAResultsWrapperPassPass(*PassRegistry::getPassRegistry());
- initializeTargetLibraryInfoWrapperPassPass(
- *PassRegistry::getPassRegistry());
- }
+SelectionDAGISel::SelectionDAGISel(TargetMachine &tm, CodeGenOpt::Level OL)
+ : MachineFunctionPass(ID), TM(tm), FuncInfo(new FunctionLoweringInfo()),
+ SwiftError(new SwiftErrorValueTracking()),
+ CurDAG(new SelectionDAG(tm, OL)),
+ SDB(std::make_unique<SelectionDAGBuilder>(*CurDAG, *FuncInfo, *SwiftError,
+ OL)),
+ AA(), GFI(), OptLevel(OL), DAGSize(0) {
+ initializeGCModuleInfoPass(*PassRegistry::getPassRegistry());
+ initializeBranchProbabilityInfoWrapperPassPass(
+ *PassRegistry::getPassRegistry());
+ initializeAAResultsWrapperPassPass(*PassRegistry::getPassRegistry());
+ initializeTargetLibraryInfoWrapperPassPass(*PassRegistry::getPassRegistry());
+}
SelectionDAGISel::~SelectionDAGISel() {
- delete SDB;
delete CurDAG;
- delete FuncInfo;
delete SwiftError;
}
@@ -340,6 +336,8 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<TargetTransformInfoWrapperPass>();
if (UseMBPI && OptLevel != CodeGenOpt::None)
AU.addRequired<BranchProbabilityInfoWrapperPass>();
+ AU.addRequired<ProfileSummaryInfoWrapperPass>();
+ LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU);
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -442,13 +440,17 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr;
auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
LoopInfo *LI = LIWP ? &LIWP->getLoopInfo() : nullptr;
+ auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+ auto *BFI = (PSI && PSI->hasProfileSummary()) ?
+ &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() :
+ nullptr;
LLVM_DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n");
SplitCriticalSideEffectEdges(const_cast<Function &>(Fn), DT, LI);
CurDAG->init(*MF, *ORE, this, LibInfo,
- getAnalysisIfAvailable<LegacyDivergenceAnalysis>());
+ getAnalysisIfAvailable<LegacyDivergenceAnalysis>(), PSI, BFI);
FuncInfo->set(Fn, *MF, CurDAG);
SwiftError->setFunction(*MF);
@@ -735,23 +737,20 @@ void SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin,
}
void SelectionDAGISel::ComputeLiveOutVRegInfo() {
- SmallPtrSet<SDNode*, 16> VisitedNodes;
+ SmallPtrSet<SDNode *, 16> Added;
SmallVector<SDNode*, 128> Worklist;
Worklist.push_back(CurDAG->getRoot().getNode());
+ Added.insert(CurDAG->getRoot().getNode());
KnownBits Known;
do {
SDNode *N = Worklist.pop_back_val();
- // If we've already seen this node, ignore it.
- if (!VisitedNodes.insert(N).second)
- continue;
-
// Otherwise, add all chain operands to the worklist.
for (const SDValue &Op : N->op_values())
- if (Op.getValueType() == MVT::Other)
+ if (Op.getValueType() == MVT::Other && Added.insert(Op.getNode()).second)
Worklist.push_back(Op.getNode());
// If this is a CopyToReg with a vreg dest, process it.
@@ -793,8 +792,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
FuncInfo->MBB->getBasicBlock()->getName());
#endif
#ifdef NDEBUG
- if (ViewDAGCombine1 || ViewLegalizeTypesDAGs || ViewLegalizeDAGs ||
- ViewDAGCombine2 || ViewDAGCombineLT || ViewISelDAGs || ViewSchedDAGs ||
+ if (ViewDAGCombine1 || ViewLegalizeTypesDAGs || ViewDAGCombineLT ||
+ ViewLegalizeDAGs || ViewDAGCombine2 || ViewISelDAGs || ViewSchedDAGs ||
ViewSUnitDAGs)
#endif
{
@@ -1159,10 +1158,30 @@ void SelectionDAGISel::DoInstructionSelection() {
// we convert them to normal FP opcodes instead at this point. This
// will allow them to be handled by existing target-specific instruction
// selectors.
- if (Node->isStrictFPOpcode() &&
- (TLI->getOperationAction(Node->getOpcode(), Node->getValueType(0))
- != TargetLowering::Legal))
- Node = CurDAG->mutateStrictFPToFP(Node);
+ if (!TLI->isStrictFPEnabled() && Node->isStrictFPOpcode()) {
+ // For some opcodes, we need to call TLI->getOperationAction using
+ // the first operand type instead of the result type. Note that this
+ // must match what SelectionDAGLegalize::LegalizeOp is doing.
+ EVT ActionVT;
+ switch (Node->getOpcode()) {
+ case ISD::STRICT_SINT_TO_FP:
+ case ISD::STRICT_UINT_TO_FP:
+ case ISD::STRICT_LRINT:
+ case ISD::STRICT_LLRINT:
+ case ISD::STRICT_LROUND:
+ case ISD::STRICT_LLROUND:
+ case ISD::STRICT_FSETCC:
+ case ISD::STRICT_FSETCCS:
+ ActionVT = Node->getOperand(1).getValueType();
+ break;
+ default:
+ ActionVT = Node->getValueType(0);
+ break;
+ }
+ if (TLI->getOperationAction(Node->getOpcode(), ActionVT)
+ == TargetLowering::Expand)
+ Node = CurDAG->mutateStrictFPToFP(Node);
+ }
LLVM_DEBUG(dbgs() << "\nISEL: Starting selection on root node: ";
Node->dump(CurDAG));
@@ -1280,20 +1299,20 @@ bool SelectionDAGISel::PrepareEHLandingPad() {
/// side-effect free and is either dead or folded into a generated instruction.
/// Return false if it needs to be emitted.
static bool isFoldedOrDeadInstruction(const Instruction *I,
- FunctionLoweringInfo *FuncInfo) {
+ const FunctionLoweringInfo &FuncInfo) {
return !I->mayWriteToMemory() && // Side-effecting instructions aren't folded.
!I->isTerminator() && // Terminators aren't folded.
- !isa<DbgInfoIntrinsic>(I) && // Debug instructions aren't folded.
- !I->isEHPad() && // EH pad instructions aren't folded.
- !FuncInfo->isExportedInst(I); // Exported instrs must be computed.
+ !isa<DbgInfoIntrinsic>(I) && // Debug instructions aren't folded.
+ !I->isEHPad() && // EH pad instructions aren't folded.
+ !FuncInfo.isExportedInst(I); // Exported instrs must be computed.
}
/// Collect llvm.dbg.declare information. This is done after argument lowering
/// in case the declarations refer to arguments.
-static void processDbgDeclares(FunctionLoweringInfo *FuncInfo) {
- MachineFunction *MF = FuncInfo->MF;
+static void processDbgDeclares(FunctionLoweringInfo &FuncInfo) {
+ MachineFunction *MF = FuncInfo.MF;
const DataLayout &DL = MF->getDataLayout();
- for (const BasicBlock &BB : *FuncInfo->Fn) {
+ for (const BasicBlock &BB : *FuncInfo.Fn) {
for (const Instruction &I : BB) {
const DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(&I);
if (!DI)
@@ -1315,11 +1334,11 @@ static void processDbgDeclares(FunctionLoweringInfo *FuncInfo) {
// intrinsic and handle this during isel like dbg.value.
int FI = std::numeric_limits<int>::max();
if (const auto *AI = dyn_cast<AllocaInst>(Address)) {
- auto SI = FuncInfo->StaticAllocaMap.find(AI);
- if (SI != FuncInfo->StaticAllocaMap.end())
+ auto SI = FuncInfo.StaticAllocaMap.find(AI);
+ if (SI != FuncInfo.StaticAllocaMap.end())
FI = SI->second;
} else if (const auto *Arg = dyn_cast<Argument>(Address))
- FI = FuncInfo->getArgumentFrameIndex(Arg);
+ FI = FuncInfo.getArgumentFrameIndex(Arg);
if (FI == std::numeric_limits<int>::max())
continue;
@@ -1353,7 +1372,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
FuncInfo->MBB = FuncInfo->MBBMap[&Fn.getEntryBlock()];
FuncInfo->InsertPt = FuncInfo->MBB->begin();
- CurDAG->setFunctionLoweringInfo(FuncInfo);
+ CurDAG->setFunctionLoweringInfo(FuncInfo.get());
if (!FastIS) {
LowerArguments(Fn);
@@ -1393,7 +1412,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
if (FastIS && Inserted)
FastIS->setLastLocalValue(&*std::prev(FuncInfo->InsertPt));
- processDbgDeclares(FuncInfo);
+ processDbgDeclares(*FuncInfo);
// Iterate over all basic blocks in the function.
StackProtector &SP = getAnalysis<StackProtector>();
@@ -1453,7 +1472,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
const Instruction *Inst = &*std::prev(BI);
// If we no longer require this instruction, skip it.
- if (isFoldedOrDeadInstruction(Inst, FuncInfo) ||
+ if (isFoldedOrDeadInstruction(Inst, *FuncInfo) ||
ElidedArgCopyInstrs.count(Inst)) {
--NumFastIselRemaining;
continue;
@@ -1473,7 +1492,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
const Instruction *BeforeInst = Inst;
while (BeforeInst != &*Begin) {
BeforeInst = &*std::prev(BasicBlock::const_iterator(BeforeInst));
- if (!isFoldedOrDeadInstruction(BeforeInst, FuncInfo))
+ if (!isFoldedOrDeadInstruction(BeforeInst, *FuncInfo))
break;
}
if (BeforeInst != Inst && isa<LoadInst>(BeforeInst) &&
@@ -1589,7 +1608,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
// But if FastISel was run, we already selected some of the block.
// If we emitted a tail-call, we need to delete any previously emitted
// instruction that follows it.
- if (HadTailCall && FuncInfo->InsertPt != FuncInfo->MBB->end())
+ if (FastIS && HadTailCall && FuncInfo->InsertPt != FuncInfo->MBB->end())
FastIS->removeDeadCode(FuncInfo->InsertPt, FuncInfo->MBB->end());
}
@@ -2230,10 +2249,13 @@ void SelectionDAGISel::Select_INLINEASM(SDNode *N, bool Branch) {
void SelectionDAGISel::Select_READ_REGISTER(SDNode *Op) {
SDLoc dl(Op);
- MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(Op->getOperand(1));
- const MDString *RegStr = dyn_cast<MDString>(MD->getMD()->getOperand(0));
+ MDNodeSDNode *MD = cast<MDNodeSDNode>(Op->getOperand(1));
+ const MDString *RegStr = cast<MDString>(MD->getMD()->getOperand(0));
+
+ EVT VT = Op->getValueType(0);
+ LLT Ty = VT.isSimple() ? getLLTForMVT(VT.getSimpleVT()) : LLT();
Register Reg =
- TLI->getRegisterByName(RegStr->getString().data(), Op->getValueType(0),
+ TLI->getRegisterByName(RegStr->getString().data(), Ty,
CurDAG->getMachineFunction());
SDValue New = CurDAG->getCopyFromReg(
Op->getOperand(0), dl, Reg, Op->getValueType(0));
@@ -2244,10 +2266,13 @@ void SelectionDAGISel::Select_READ_REGISTER(SDNode *Op) {
void SelectionDAGISel::Select_WRITE_REGISTER(SDNode *Op) {
SDLoc dl(Op);
- MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(Op->getOperand(1));
- const MDString *RegStr = dyn_cast<MDString>(MD->getMD()->getOperand(0));
- Register Reg = TLI->getRegisterByName(RegStr->getString().data(),
- Op->getOperand(2).getValueType(),
+ MDNodeSDNode *MD = cast<MDNodeSDNode>(Op->getOperand(1));
+ const MDString *RegStr = cast<MDString>(MD->getMD()->getOperand(0));
+
+ EVT VT = Op->getOperand(2).getValueType();
+ LLT Ty = VT.isSimple() ? getLLTForMVT(VT.getSimpleVT()) : LLT();
+
+ Register Reg = TLI->getRegisterByName(RegStr->getString().data(), Ty,
CurDAG->getMachineFunction());
SDValue New = CurDAG->getCopyToReg(
Op->getOperand(0), dl, Reg, Op->getOperand(2));
@@ -3176,13 +3201,19 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
case OPC_CheckFoldableChainNode: {
assert(NodeStack.size() != 1 && "No parent node");
// Verify that all intermediate nodes between the root and this one have
- // a single use.
+ // a single use (ignoring chains, which are handled in UpdateChains).
bool HasMultipleUses = false;
- for (unsigned i = 1, e = NodeStack.size()-1; i != e; ++i)
- if (!NodeStack[i].getNode()->hasOneUse()) {
- HasMultipleUses = true;
- break;
- }
+ for (unsigned i = 1, e = NodeStack.size()-1; i != e; ++i) {
+ unsigned NNonChainUses = 0;
+ SDNode *NS = NodeStack[i].getNode();
+ for (auto UI = NS->use_begin(), UE = NS->use_end(); UI != UE; ++UI)
+ if (UI.getUse().getValueType() != MVT::Other)
+ if (++NNonChainUses > 1) {
+ HasMultipleUses = true;
+ break;
+ }
+ if (HasMultipleUses) break;
+ }
if (HasMultipleUses) break;
// Check to see that the target thinks this is profitable to fold and that
@@ -3433,6 +3464,17 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
if ((EmitNodeInfo & OPFL_GlueInput) && InputGlue.getNode() != nullptr)
Ops.push_back(InputGlue);
+ // Check whether any matched node could raise an FP exception. Since all
+ // such nodes must have a chain, it suffices to check ChainNodesMatched.
+ // We need to perform this check before potentially modifying one of the
+ // nodes via MorphNode.
+ bool MayRaiseFPException = false;
+ for (auto *N : ChainNodesMatched)
+ if (mayRaiseFPException(N) && !N->getFlags().hasNoFPExcept()) {
+ MayRaiseFPException = true;
+ break;
+ }
+
// Create the node.
MachineSDNode *Res = nullptr;
bool IsMorphNodeTo = Opcode == OPC_MorphNodeTo ||
@@ -3464,6 +3506,14 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
Ops, EmitNodeInfo));
}
+ // Set the NoFPExcept flag when no original matched node could
+ // raise an FP exception, but the new node potentially might.
+ if (!MayRaiseFPException && mayRaiseFPException(Res)) {
+ SDNodeFlags Flags = Res->getFlags();
+ Flags.setNoFPExcept(true);
+ Res->setFlags(Flags);
+ }
+
// If the node had chain/glue results, update our notion of the current
// chain and glue.
if (EmitNodeInfo & OPFL_GlueOutput) {
@@ -3619,6 +3669,21 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
}
}
+/// Return whether the node may raise an FP exception.
+bool SelectionDAGISel::mayRaiseFPException(SDNode *N) const {
+ // For machine opcodes, consult the MCID flag.
+ if (N->isMachineOpcode()) {
+ const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
+ return MCID.mayRaiseFPException();
+ }
+
+ // For ISD opcodes, only StrictFP opcodes may raise an FP
+ // exception.
+ if (N->isTargetOpcode())
+ return N->isTargetStrictFPOpcode();
+ return N->isStrictFPOpcode();
+}
+
bool SelectionDAGISel::isOrEquivalentToAdd(const SDNode *N) const {
assert(N->getOpcode() == ISD::OR && "Unexpected opcode");
auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
index fad98b6f50dc..c628f379e415 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
@@ -384,7 +384,8 @@ spillIncomingStatepointValue(SDValue Incoming, SDValue Chain,
// can consider allowing spills of smaller values to larger slots
// (i.e. change the '==' in the assert below to a '>=').
MachineFrameInfo &MFI = Builder.DAG.getMachineFunction().getFrameInfo();
- assert((MFI.getObjectSize(Index) * 8) == Incoming.getValueSizeInBits() &&
+ assert((MFI.getObjectSize(Index) * 8) ==
+ (int64_t)Incoming.getValueSizeInBits() &&
"Bad spill: stack slot does not match!");
// Note: Using the alignment of the spill slot (rather than the abi or
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 9ab1324533f1..24ab65171a17 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -52,6 +52,10 @@ bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
SDValue &Chain) const {
const Function &F = DAG.getMachineFunction().getFunction();
+ // First, check if tail calls have been disabled in this function.
+ if (F.getFnAttribute("disable-tail-calls").getValueAsString() == "true")
+ return false;
+
// Conservatively require the attributes of the call to match those of
// the return. Ignore NoAlias and NonNull because they don't affect the
// call sequence.
@@ -122,7 +126,11 @@ std::pair<SDValue, SDValue>
TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
ArrayRef<SDValue> Ops,
MakeLibCallOptions CallOptions,
- const SDLoc &dl) const {
+ const SDLoc &dl,
+ SDValue InChain) const {
+ if (!InChain)
+ InChain = DAG.getEntryNode();
+
TargetLowering::ArgListTy Args;
Args.reserve(Ops.size());
@@ -158,7 +166,7 @@ TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
}
CLI.setDebugLoc(dl)
- .setChain(DAG.getEntryNode())
+ .setChain(InChain)
.setLibCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
.setNoReturn(CallOptions.DoesNotReturn)
.setDiscardResult(!CallOptions.IsReturnValueUsed)
@@ -277,6 +285,22 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
ISD::CondCode &CCCode,
const SDLoc &dl, const SDValue OldLHS,
const SDValue OldRHS) const {
+ SDValue Chain;
+ return softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, dl, OldLHS,
+ OldRHS, Chain);
+}
+
+void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
+ SDValue &NewLHS, SDValue &NewRHS,
+ ISD::CondCode &CCCode,
+ const SDLoc &dl, const SDValue OldLHS,
+ const SDValue OldRHS,
+ SDValue &Chain,
+ bool IsSignaling) const {
+ // FIXME: Currently we cannot really respect all IEEE predicates due to libgcc
+ // not supporting it. We can update this code when libgcc provides such
+ // functions.
+
assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128)
&& "Unsupported setcc type!");
@@ -320,25 +344,18 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
(VT == MVT::f64) ? RTLIB::OGT_F64 :
(VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
break;
+ case ISD::SETO:
+ ShouldInvertCC = true;
+ LLVM_FALLTHROUGH;
case ISD::SETUO:
LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
(VT == MVT::f64) ? RTLIB::UO_F64 :
(VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
break;
- case ISD::SETO:
- LC1 = (VT == MVT::f32) ? RTLIB::O_F32 :
- (VT == MVT::f64) ? RTLIB::O_F64 :
- (VT == MVT::f128) ? RTLIB::O_F128 : RTLIB::O_PPCF128;
- break;
case ISD::SETONE:
- // SETONE = SETOLT | SETOGT
- LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
- (VT == MVT::f64) ? RTLIB::OLT_F64 :
- (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
- LC2 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
- (VT == MVT::f64) ? RTLIB::OGT_F64 :
- (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
- break;
+ // SETONE = O && UNE
+ ShouldInvertCC = true;
+ LLVM_FALLTHROUGH;
case ISD::SETUEQ:
LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
(VT == MVT::f64) ? RTLIB::UO_F64 :
@@ -382,24 +399,33 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
EVT OpsVT[2] = { OldLHS.getValueType(),
OldRHS.getValueType() };
CallOptions.setTypeListBeforeSoften(OpsVT, RetVT, true);
- NewLHS = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl).first;
+ auto Call = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl, Chain);
+ NewLHS = Call.first;
NewRHS = DAG.getConstant(0, dl, RetVT);
CCCode = getCmpLibcallCC(LC1);
- if (ShouldInvertCC)
- CCCode = getSetCCInverse(CCCode, /*isInteger=*/true);
-
- if (LC2 != RTLIB::UNKNOWN_LIBCALL) {
- SDValue Tmp = DAG.getNode(
- ISD::SETCC, dl,
- getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT),
- NewLHS, NewRHS, DAG.getCondCode(CCCode));
- NewLHS = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl).first;
- NewLHS = DAG.getNode(
- ISD::SETCC, dl,
- getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT),
- NewLHS, NewRHS, DAG.getCondCode(getCmpLibcallCC(LC2)));
- NewLHS = DAG.getNode(ISD::OR, dl, Tmp.getValueType(), Tmp, NewLHS);
+ if (ShouldInvertCC) {
+ assert(RetVT.isInteger());
+ CCCode = getSetCCInverse(CCCode, RetVT);
+ }
+
+ if (LC2 == RTLIB::UNKNOWN_LIBCALL) {
+ // Update Chain.
+ Chain = Call.second;
+ } else {
+ EVT SetCCVT =
+ getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT);
+ SDValue Tmp = DAG.getSetCC(dl, SetCCVT, NewLHS, NewRHS, CCCode);
+ auto Call2 = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl, Chain);
+ CCCode = getCmpLibcallCC(LC2);
+ if (ShouldInvertCC)
+ CCCode = getSetCCInverse(CCCode, RetVT);
+ NewLHS = DAG.getSetCC(dl, SetCCVT, Call2.first, NewRHS, CCCode);
+ if (Chain)
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Call.second,
+ Call2.second);
+ NewLHS = DAG.getNode(ShouldInvertCC ? ISD::AND : ISD::OR, dl,
+ Tmp.getValueType(), Tmp, NewLHS);
NewRHS = SDValue();
}
}
@@ -693,6 +719,27 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
return Op.getOperand(1);
break;
}
+ case ISD::SETCC: {
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+ // If (1) we only need the sign-bit, (2) the setcc operands are the same
+ // width as the setcc result, and (3) the result of a setcc conforms to 0 or
+ // -1, we may be able to bypass the setcc.
+ if (DemandedBits.isSignMask() &&
+ Op0.getScalarValueSizeInBits() == DemandedBits.getBitWidth() &&
+ getBooleanContents(Op0.getValueType()) ==
+ BooleanContent::ZeroOrNegativeOneBooleanContent) {
+ // If we're testing X < 0, then this compare isn't needed - just use X!
+ // FIXME: We're limiting to integer types here, but this should also work
+ // if we don't care about FP signed-zero. The use of SETLT with FP means
+ // that we don't care about NaNs.
+ if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
+ (isNullConstant(Op1) || ISD::isBuildVectorAllZeros(Op1.getNode())))
+ return Op0;
+ }
+ break;
+ }
case ISD::SIGN_EXTEND_INREG: {
// If none of the extended bits are demanded, eliminate the sextinreg.
EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
@@ -1251,7 +1298,7 @@ bool TargetLowering::SimplifyDemandedBits(
// -1, we may be able to bypass the setcc.
if (DemandedBits.isSignMask() &&
Op0.getScalarValueSizeInBits() == BitWidth &&
- getBooleanContents(VT) ==
+ getBooleanContents(Op0.getValueType()) ==
BooleanContent::ZeroOrNegativeOneBooleanContent) {
// If we're testing X < 0, then this compare isn't needed - just use X!
// FIXME: We're limiting to integer types here, but this should also work
@@ -1538,6 +1585,16 @@ bool TargetLowering::SimplifyDemandedBits(
Known.Zero = Known2.Zero.reverseBits();
break;
}
+ case ISD::BSWAP: {
+ SDValue Src = Op.getOperand(0);
+ APInt DemandedSrcBits = DemandedBits.byteSwap();
+ if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
+ Depth + 1))
+ return true;
+ Known.One = Known2.One.byteSwap();
+ Known.Zero = Known2.Zero.byteSwap();
+ break;
+ }
case ISD::SIGN_EXTEND_INREG: {
SDValue Op0 = Op.getOperand(0);
EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
@@ -1753,15 +1810,11 @@ bool TargetLowering::SimplifyDemandedBits(
// undesirable.
break;
- auto *ShAmt = dyn_cast<ConstantSDNode>(Src.getOperand(1));
- if (!ShAmt || ShAmt->getAPIntValue().uge(BitWidth))
+ SDValue ShAmt = Src.getOperand(1);
+ auto *ShAmtC = dyn_cast<ConstantSDNode>(ShAmt);
+ if (!ShAmtC || ShAmtC->getAPIntValue().uge(BitWidth))
break;
-
- SDValue Shift = Src.getOperand(1);
- uint64_t ShVal = ShAmt->getZExtValue();
-
- if (TLO.LegalTypes())
- Shift = TLO.DAG.getConstant(ShVal, dl, getShiftAmountTy(VT, DL));
+ uint64_t ShVal = ShAmtC->getZExtValue();
APInt HighBits =
APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth - BitWidth);
@@ -1771,10 +1824,12 @@ bool TargetLowering::SimplifyDemandedBits(
if (!(HighBits & DemandedBits)) {
// None of the shifted in bits are needed. Add a truncate of the
// shift input, then shift it.
+ if (TLO.LegalTypes())
+ ShAmt = TLO.DAG.getConstant(ShVal, dl, getShiftAmountTy(VT, DL));
SDValue NewTrunc =
TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0));
return TLO.CombineTo(
- Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, Shift));
+ Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, ShAmt));
}
break;
}
@@ -1818,6 +1873,17 @@ bool TargetLowering::SimplifyDemandedBits(
Depth + 1))
return true;
+ // Attempt to avoid multi-use ops if we don't need anything from them.
+ if (!DemandedSrcBits.isAllOnesValue() ||
+ !DemandedSrcElts.isAllOnesValue()) {
+ if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
+ Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
+ SDValue NewOp =
+ TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc, Idx);
+ return TLO.CombineTo(Op, NewOp);
+ }
+ }
+
Known = Known2;
if (BitWidth > EltBitWidth)
Known = Known.zext(BitWidth, false /* => any extend */);
@@ -2808,7 +2874,8 @@ SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
// Note that where Y is variable and is known to have at most one bit set
// (for example, if it is Z & 1) we cannot do this; the expressions are not
// equivalent when Y == 0.
- Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true);
+ assert(OpVT.isInteger());
+ Cond = ISD::getSetCCInverse(Cond, OpVT);
if (DCI.isBeforeLegalizeOps() ||
isCondCodeLegal(Cond, N0.getSimpleValueType()))
return DAG.getSetCC(DL, VT, N0, Zero, Cond);
@@ -2897,7 +2964,8 @@ SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
// What if we invert constants? (and the target predicate)
I1.negate();
I01.negate();
- NewCond = getSetCCInverse(NewCond, /*isInteger=*/true);
+ assert(XVT.isInteger());
+ NewCond = getSetCCInverse(NewCond, XVT);
if (!checkConstants())
return SDValue();
// Great, e.g. got icmp uge i16 (add i16 %x, -128), -256
@@ -3052,6 +3120,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
DAGCombinerInfo &DCI,
const SDLoc &dl) const {
SelectionDAG &DAG = DCI.DAG;
+ const DataLayout &Layout = DAG.getDataLayout();
EVT OpVT = N0.getValueType();
// Constant fold or commute setcc.
@@ -3132,7 +3201,8 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// (ctpop x) != 1 --> (x == 0) || ((x & x-1) != 0)
SDValue Zero = DAG.getConstant(0, dl, CTVT);
SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
- ISD::CondCode InvCond = ISD::getSetCCInverse(Cond, true);
+ assert(CTVT.isInteger());
+ ISD::CondCode InvCond = ISD::getSetCCInverse(Cond, CTVT);
SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
SDValue LHS = DAG.getSetCC(dl, VT, CTOp, Zero, InvCond);
@@ -3223,7 +3293,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
ISD::CondCode InvCond = ISD::getSetCCInverse(
cast<CondCodeSDNode>(TopSetCC.getOperand(2))->get(),
- TopSetCC.getOperand(0).getValueType().isInteger());
+ TopSetCC.getOperand(0).getValueType());
return DAG.getSetCC(dl, VT, TopSetCC.getOperand(0),
TopSetCC.getOperand(1),
InvCond);
@@ -3256,7 +3326,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
APInt newMask = APInt::getLowBitsSet(maskWidth, width);
for (unsigned offset=0; offset<origWidth/width; offset++) {
if (Mask.isSubsetOf(newMask)) {
- if (DAG.getDataLayout().isLittleEndian())
+ if (Layout.isLittleEndian())
bestOffset = (uint64_t)offset * (width/8);
else
bestOffset = (origWidth/width - offset - 1) * (width/8);
@@ -3272,11 +3342,9 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth);
if (newVT.isRound() &&
shouldReduceLoadWidth(Lod, ISD::NON_EXTLOAD, newVT)) {
- EVT PtrType = Lod->getOperand(1).getValueType();
SDValue Ptr = Lod->getBasePtr();
if (bestOffset != 0)
- Ptr = DAG.getNode(ISD::ADD, dl, PtrType, Lod->getBasePtr(),
- DAG.getConstant(bestOffset, dl, PtrType));
+ Ptr = DAG.getMemBasePlusOffset(Ptr, bestOffset, dl);
unsigned NewAlign = MinAlign(Lod->getAlignment(), bestOffset);
SDValue NewLoad = DAG.getLoad(
newVT, dl, Lod->getChain(), Ptr,
@@ -3332,8 +3400,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
if (DCI.isBeforeLegalizeOps() ||
(isOperationLegal(ISD::SETCC, newVT) &&
isCondCodeLegal(Cond, newVT.getSimpleVT()))) {
- EVT NewSetCCVT =
- getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), newVT);
+ EVT NewSetCCVT = getSetCCResultType(Layout, *DAG.getContext(), newVT);
SDValue NewConst = DAG.getConstant(C1.trunc(InSize), dl, newVT);
SDValue NewSetCC = DAG.getSetCC(dl, NewSetCCVT, N0.getOperand(0),
@@ -3379,14 +3446,16 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
(Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
// SETCC (SETCC), [0|1], [EQ|NE] -> SETCC
if (N0.getOpcode() == ISD::SETCC &&
- isTypeLegal(VT) && VT.bitsLE(N0.getValueType())) {
+ isTypeLegal(VT) && VT.bitsLE(N0.getValueType()) &&
+ (N0.getValueType() == MVT::i1 ||
+ getBooleanContents(N0.getOperand(0).getValueType()) ==
+ ZeroOrOneBooleanContent)) {
bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (!N1C->isOne());
if (TrueWhenTrue)
return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
// Invert the condition.
ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
- CC = ISD::getSetCCInverse(CC,
- N0.getOperand(0).getValueType().isInteger());
+ CC = ISD::getSetCCInverse(CC, N0.getOperand(0).getValueType());
if (DCI.isBeforeLegalizeOps() ||
isCondCodeLegal(CC, N0.getOperand(0).getSimpleValueType()))
return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
@@ -3420,10 +3489,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
return DAG.getSetCC(dl, VT, Val, N1,
Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
}
- } else if (N1C->isOne() &&
- (VT == MVT::i1 ||
- getBooleanContents(N0->getValueType(0)) ==
- ZeroOrOneBooleanContent)) {
+ } else if (N1C->isOne()) {
SDValue Op0 = N0;
if (Op0.getOpcode() == ISD::TRUNCATE)
Op0 = Op0.getOperand(0);
@@ -3431,10 +3497,18 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
if ((Op0.getOpcode() == ISD::XOR) &&
Op0.getOperand(0).getOpcode() == ISD::SETCC &&
Op0.getOperand(1).getOpcode() == ISD::SETCC) {
- // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
- Cond = (Cond == ISD::SETEQ) ? ISD::SETNE : ISD::SETEQ;
- return DAG.getSetCC(dl, VT, Op0.getOperand(0), Op0.getOperand(1),
- Cond);
+ SDValue XorLHS = Op0.getOperand(0);
+ SDValue XorRHS = Op0.getOperand(1);
+ // Ensure that the input setccs return an i1 type or 0/1 value.
+ if (Op0.getValueType() == MVT::i1 ||
+ (getBooleanContents(XorLHS.getOperand(0).getValueType()) ==
+ ZeroOrOneBooleanContent &&
+ getBooleanContents(XorRHS.getOperand(0).getValueType()) ==
+ ZeroOrOneBooleanContent)) {
+ // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
+ Cond = (Cond == ISD::SETEQ) ? ISD::SETNE : ISD::SETEQ;
+ return DAG.getSetCC(dl, VT, XorLHS, XorRHS, Cond);
+ }
}
if (Op0.getOpcode() == ISD::AND &&
isa<ConstantSDNode>(Op0.getOperand(1)) &&
@@ -3611,14 +3685,14 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
(VT == ShValTy || (isTypeLegal(VT) && VT.bitsLE(ShValTy))) &&
N0.getOpcode() == ISD::AND) {
- auto &DL = DAG.getDataLayout();
if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
- EVT ShiftTy = getShiftAmountTy(ShValTy, DL, !DCI.isBeforeLegalize());
+ EVT ShiftTy =
+ getShiftAmountTy(ShValTy, Layout, !DCI.isBeforeLegalize());
if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3
// Perform the xform if the AND RHS is a single bit.
unsigned ShCt = AndRHS->getAPIntValue().logBase2();
if (AndRHS->getAPIntValue().isPowerOf2() &&
- ShCt <= TLI.getShiftAmountThreshold(ShValTy)) {
+ !TLI.shouldAvoidTransformToShift(ShValTy, ShCt)) {
return DAG.getNode(ISD::TRUNCATE, dl, VT,
DAG.getNode(ISD::SRL, dl, ShValTy, N0,
DAG.getConstant(ShCt, dl, ShiftTy)));
@@ -3628,7 +3702,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// Perform the xform if C1 is a single bit.
unsigned ShCt = C1.logBase2();
if (C1.isPowerOf2() &&
- ShCt <= TLI.getShiftAmountThreshold(ShValTy)) {
+ !TLI.shouldAvoidTransformToShift(ShValTy, ShCt)) {
return DAG.getNode(ISD::TRUNCATE, dl, VT,
DAG.getNode(ISD::SRL, dl, ShValTy, N0,
DAG.getConstant(ShCt, dl, ShiftTy)));
@@ -3639,6 +3713,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
if (C1.getMinSignedBits() <= 64 &&
!isLegalICmpImmediate(C1.getSExtValue())) {
+ EVT ShiftTy = getShiftAmountTy(ShValTy, Layout, !DCI.isBeforeLegalize());
// (X & -256) == 256 -> (X >> 8) == 1
if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
@@ -3646,15 +3721,13 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
const APInt &AndRHSC = AndRHS->getAPIntValue();
if ((-AndRHSC).isPowerOf2() && (AndRHSC & C1) == C1) {
unsigned ShiftBits = AndRHSC.countTrailingZeros();
- auto &DL = DAG.getDataLayout();
- EVT ShiftTy = getShiftAmountTy(N0.getValueType(), DL,
- !DCI.isBeforeLegalize());
- EVT CmpTy = N0.getValueType();
- SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0.getOperand(0),
- DAG.getConstant(ShiftBits, dl,
- ShiftTy));
- SDValue CmpRHS = DAG.getConstant(C1.lshr(ShiftBits), dl, CmpTy);
- return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
+ if (!TLI.shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
+ SDValue Shift =
+ DAG.getNode(ISD::SRL, dl, ShValTy, N0.getOperand(0),
+ DAG.getConstant(ShiftBits, dl, ShiftTy));
+ SDValue CmpRHS = DAG.getConstant(C1.lshr(ShiftBits), dl, ShValTy);
+ return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
+ }
}
}
} else if (Cond == ISD::SETULT || Cond == ISD::SETUGE ||
@@ -3676,14 +3749,11 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
}
NewC.lshrInPlace(ShiftBits);
if (ShiftBits && NewC.getMinSignedBits() <= 64 &&
- isLegalICmpImmediate(NewC.getSExtValue())) {
- auto &DL = DAG.getDataLayout();
- EVT ShiftTy = getShiftAmountTy(N0.getValueType(), DL,
- !DCI.isBeforeLegalize());
- EVT CmpTy = N0.getValueType();
- SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0,
+ isLegalICmpImmediate(NewC.getSExtValue()) &&
+ !TLI.shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
+ SDValue Shift = DAG.getNode(ISD::SRL, dl, ShValTy, N0,
DAG.getConstant(ShiftBits, dl, ShiftTy));
- SDValue CmpRHS = DAG.getConstant(NewC, dl, CmpTy);
+ SDValue CmpRHS = DAG.getConstant(NewC, dl, ShValTy);
return DAG.getSetCC(dl, VT, Shift, CmpRHS, NewCond);
}
}
@@ -4480,6 +4550,12 @@ static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo,
TargetLowering::ConstraintType CType =
TLI.getConstraintType(OpInfo.Codes[i]);
+ // Indirect 'other' or 'immediate' constraints are not allowed.
+ if (OpInfo.isIndirect && !(CType == TargetLowering::C_Memory ||
+ CType == TargetLowering::C_Register ||
+ CType == TargetLowering::C_RegisterClass))
+ continue;
+
// If this is an 'other' or 'immediate' constraint, see if the operand is
// valid for it. For example, on X86 we might have an 'rI' constraint. If
// the operand is an integer in the range [0..31] we want to use I (saving a
@@ -4905,7 +4981,7 @@ SDValue TargetLowering::buildUREMEqFold(EVT SETCCVT, SDValue REMNode,
ISD::CondCode Cond,
DAGCombinerInfo &DCI,
const SDLoc &DL) const {
- SmallVector<SDNode *, 2> Built;
+ SmallVector<SDNode *, 5> Built;
if (SDValue Folded = prepareUREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
DCI, DL, Built)) {
for (SDNode *N : Built)
@@ -4940,26 +5016,44 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
if (!isOperationLegalOrCustom(ISD::MUL, VT))
return SDValue();
- // TODO: Could support comparing with non-zero too.
- ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode);
- if (!CompTarget || !CompTarget->isNullValue())
- return SDValue();
-
- bool HadOneDivisor = false;
- bool AllDivisorsAreOnes = true;
+ bool ComparingWithAllZeros = true;
+ bool AllComparisonsWithNonZerosAreTautological = true;
+ bool HadTautologicalLanes = false;
+ bool AllLanesAreTautological = true;
bool HadEvenDivisor = false;
bool AllDivisorsArePowerOfTwo = true;
- SmallVector<SDValue, 16> PAmts, KAmts, QAmts;
+ bool HadTautologicalInvertedLanes = false;
+ SmallVector<SDValue, 16> PAmts, KAmts, QAmts, IAmts;
- auto BuildUREMPattern = [&](ConstantSDNode *C) {
+ auto BuildUREMPattern = [&](ConstantSDNode *CDiv, ConstantSDNode *CCmp) {
// Division by 0 is UB. Leave it to be constant-folded elsewhere.
- if (C->isNullValue())
+ if (CDiv->isNullValue())
return false;
- const APInt &D = C->getAPIntValue();
- // If all divisors are ones, we will prefer to avoid the fold.
- HadOneDivisor |= D.isOneValue();
- AllDivisorsAreOnes &= D.isOneValue();
+ const APInt &D = CDiv->getAPIntValue();
+ const APInt &Cmp = CCmp->getAPIntValue();
+
+ ComparingWithAllZeros &= Cmp.isNullValue();
+
+ // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
+ // if C2 is not less than C1, the comparison is always false.
+ // But we will only be able to produce the comparison that will give the
+ // opposive tautological answer. So this lane would need to be fixed up.
+ bool TautologicalInvertedLane = D.ule(Cmp);
+ HadTautologicalInvertedLanes |= TautologicalInvertedLane;
+
+ // If all lanes are tautological (either all divisors are ones, or divisor
+ // is not greater than the constant we are comparing with),
+ // we will prefer to avoid the fold.
+ bool TautologicalLane = D.isOneValue() || TautologicalInvertedLane;
+ HadTautologicalLanes |= TautologicalLane;
+ AllLanesAreTautological &= TautologicalLane;
+
+ // If we are comparing with non-zero, we need'll need to subtract said
+ // comparison value from the LHS. But there is no point in doing that if
+ // every lane where we are comparing with non-zero is tautological..
+ if (!Cmp.isNullValue())
+ AllComparisonsWithNonZerosAreTautological &= TautologicalLane;
// Decompose D into D0 * 2^K
unsigned K = D.countTrailingZeros();
@@ -4981,19 +5075,27 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
assert(!P.isNullValue() && "No multiplicative inverse!"); // unreachable
assert((D0 * P).isOneValue() && "Multiplicative inverse sanity check.");
- // Q = floor((2^W - 1) / D)
- APInt Q = APInt::getAllOnesValue(W).udiv(D);
+ // Q = floor((2^W - 1) u/ D)
+ // R = ((2^W - 1) u% D)
+ APInt Q, R;
+ APInt::udivrem(APInt::getAllOnesValue(W), D, Q, R);
+
+ // If we are comparing with zero, then that comparison constant is okay,
+ // else it may need to be one less than that.
+ if (Cmp.ugt(R))
+ Q -= 1;
assert(APInt::getAllOnesValue(ShSVT.getSizeInBits()).ugt(K) &&
"We are expecting that K is always less than all-ones for ShSVT");
- // If the divisor is 1 the result can be constant-folded.
- if (D.isOneValue()) {
+ // If the lane is tautological the result can be constant-folded.
+ if (TautologicalLane) {
// Set P and K amount to a bogus values so we can try to splat them.
P = 0;
K = -1;
- assert(Q.isAllOnesValue() &&
- "Expecting all-ones comparison for one divisor");
+ // And ensure that comparison constant is tautological,
+ // it will always compare true/false.
+ Q = -1;
}
PAmts.push_back(DAG.getConstant(P, DL, SVT));
@@ -5007,11 +5109,11 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
SDValue D = REMNode.getOperand(1);
// Collect the values from each element.
- if (!ISD::matchUnaryPredicate(D, BuildUREMPattern))
+ if (!ISD::matchBinaryPredicate(D, CompTargetNode, BuildUREMPattern))
return SDValue();
- // If this is a urem by a one, avoid the fold since it can be constant-folded.
- if (AllDivisorsAreOnes)
+ // If all lanes are tautological, the result can be constant-folded.
+ if (AllLanesAreTautological)
return SDValue();
// If this is a urem by a powers-of-two, avoid the fold since it can be
@@ -5021,7 +5123,7 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
SDValue PVal, KVal, QVal;
if (VT.isVector()) {
- if (HadOneDivisor) {
+ if (HadTautologicalLanes) {
// Try to turn PAmts into a splat, since we don't care about the values
// that are currently '0'. If we can't, just keep '0'`s.
turnVectorIntoSplatVector(PAmts, isNullConstant);
@@ -5040,6 +5142,14 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
QVal = QAmts[0];
}
+ if (!ComparingWithAllZeros && !AllComparisonsWithNonZerosAreTautological) {
+ if (!isOperationLegalOrCustom(ISD::SUB, VT))
+ return SDValue(); // FIXME: Could/should use `ISD::ADD`?
+ assert(CompTargetNode.getValueType() == N.getValueType() &&
+ "Expecting that the types on LHS and RHS of comparisons match.");
+ N = DAG.getNode(ISD::SUB, DL, VT, N, CompTargetNode);
+ }
+
// (mul N, P)
SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
Created.push_back(Op0.getNode());
@@ -5058,8 +5168,41 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
}
// UREM: (setule/setugt (rotr (mul N, P), K), Q)
- return DAG.getSetCC(DL, SETCCVT, Op0, QVal,
- ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
+ SDValue NewCC =
+ DAG.getSetCC(DL, SETCCVT, Op0, QVal,
+ ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
+ if (!HadTautologicalInvertedLanes)
+ return NewCC;
+
+ // If any lanes previously compared always-false, the NewCC will give
+ // always-true result for them, so we need to fixup those lanes.
+ // Or the other way around for inequality predicate.
+ assert(VT.isVector() && "Can/should only get here for vectors.");
+ Created.push_back(NewCC.getNode());
+
+ // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
+ // if C2 is not less than C1, the comparison is always false.
+ // But we have produced the comparison that will give the
+ // opposive tautological answer. So these lanes would need to be fixed up.
+ SDValue TautologicalInvertedChannels =
+ DAG.getSetCC(DL, SETCCVT, D, CompTargetNode, ISD::SETULE);
+ Created.push_back(TautologicalInvertedChannels.getNode());
+
+ if (isOperationLegalOrCustom(ISD::VSELECT, SETCCVT)) {
+ // If we have a vector select, let's replace the comparison results in the
+ // affected lanes with the correct tautological result.
+ SDValue Replacement = DAG.getBoolConstant(Cond == ISD::SETEQ ? false : true,
+ DL, SETCCVT, SETCCVT);
+ return DAG.getNode(ISD::VSELECT, DL, SETCCVT, TautologicalInvertedChannels,
+ Replacement, NewCC);
+ }
+
+ // Else, we can just invert the comparison result in the appropriate lanes.
+ if (isOperationLegalOrCustom(ISD::XOR, SETCCVT))
+ return DAG.getNode(ISD::XOR, DL, SETCCVT, NewCC,
+ TautologicalInvertedChannels);
+
+ return SDValue(); // Don't know how to lower.
}
/// Given an ISD::SREM used only by an ISD::SETEQ or ISD::SETNE
@@ -5544,7 +5687,14 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
ForCodeSize, Depth + 1);
char V1 = isNegatibleForFree(Op.getOperand(1), DAG, LegalOperations,
ForCodeSize, Depth + 1);
- if (V0 >= V1) {
+ // TODO: This is a hack. It is possible that costs have changed between now
+ // and the initial calls to isNegatibleForFree(). That is because we
+ // are rewriting the expression, and that may change the number of
+ // uses (and therefore the cost) of values. If the negation costs are
+ // equal, only negate this value if it is a constant. Otherwise, try
+ // operand 1. A better fix would eliminate uses as a cost factor or
+ // track the change in uses as we rewrite the expression.
+ if (V0 > V1 || (V0 == V1 && isa<ConstantFPSDNode>(Op.getOperand(0)))) {
// fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
SDValue Neg0 = getNegatedExpression(
Op.getOperand(0), DAG, LegalOperations, ForCodeSize, Depth + 1);
@@ -5954,6 +6104,8 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
EVT DstVT = Node->getValueType(0);
EVT SetCCVT =
getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
+ EVT DstSetCCVT =
+ getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
// Only expand vector types if we have the appropriate vector bit operations.
unsigned SIntOpcode = Node->isStrictFPOpcode() ? ISD::STRICT_FP_TO_SINT :
@@ -5980,7 +6132,15 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
}
SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
- SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT);
+ SDValue Sel;
+
+ if (Node->isStrictFPOpcode()) {
+ Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
+ Node->getOperand(0), /*IsSignaling*/ true);
+ Chain = Sel.getValue(1);
+ } else {
+ Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT);
+ }
bool Strict = Node->isStrictFPOpcode() ||
shouldUseStrictFP_TO_INT(SrcVT, DstVT, /*IsSigned*/ false);
@@ -5989,28 +6149,29 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
// Expand based on maximum range of FP_TO_SINT, if the value exceeds the
// signmask then offset (the result of which should be fully representable).
// Sel = Src < 0x8000000000000000
- // Val = select Sel, Src, Src - 0x8000000000000000
- // Ofs = select Sel, 0, 0x8000000000000000
- // Result = fp_to_sint(Val) ^ Ofs
+ // FltOfs = select Sel, 0, 0x8000000000000000
+ // IntOfs = select Sel, 0, 0x8000000000000000
+ // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
// TODO: Should any fast-math-flags be set for the FSUB?
- SDValue SrcBiased;
- if (Node->isStrictFPOpcode())
- SrcBiased = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other },
- { Node->getOperand(0), Src, Cst });
- else
- SrcBiased = DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst);
- SDValue Val = DAG.getSelect(dl, SrcVT, Sel, Src, SrcBiased);
- SDValue Ofs = DAG.getSelect(dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT),
- DAG.getConstant(SignMask, dl, DstVT));
+ SDValue FltOfs = DAG.getSelect(dl, SrcVT, Sel,
+ DAG.getConstantFP(0.0, dl, SrcVT), Cst);
+ Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
+ SDValue IntOfs = DAG.getSelect(dl, DstVT, Sel,
+ DAG.getConstant(0, dl, DstVT),
+ DAG.getConstant(SignMask, dl, DstVT));
SDValue SInt;
if (Node->isStrictFPOpcode()) {
+ SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other },
+ { Chain, Src, FltOfs });
SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
- { SrcBiased.getValue(1), Val });
+ { Val.getValue(1), Val });
Chain = SInt.getValue(1);
- } else
+ } else {
+ SDValue Val = DAG.getNode(ISD::FSUB, dl, SrcVT, Src, FltOfs);
SInt = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val);
- Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, Ofs);
+ }
+ Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
} else {
// Expand based on maximum range of FP_TO_SINT:
// True = fp_to_sint(Src)
@@ -6023,14 +6184,17 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst));
False = DAG.getNode(ISD::XOR, dl, DstVT, False,
DAG.getConstant(SignMask, dl, DstVT));
+ Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
Result = DAG.getSelect(dl, DstVT, Sel, True, False);
}
return true;
}
bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
+ SDValue &Chain,
SelectionDAG &DAG) const {
- SDValue Src = Node->getOperand(0);
+ unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
+ SDValue Src = Node->getOperand(OpNo);
EVT SrcVT = Src.getValueType();
EVT DstVT = Node->getValueType(0);
@@ -6052,17 +6216,7 @@ bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
return false;
// For unsigned conversions, convert them to signed conversions using the
- // algorithm from the x86_64 __floatundidf in compiler_rt.
- SDValue Fast = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Src);
-
- SDValue ShiftConst = DAG.getConstant(1, dl, ShiftVT);
- SDValue Shr = DAG.getNode(ISD::SRL, dl, SrcVT, Src, ShiftConst);
- SDValue AndConst = DAG.getConstant(1, dl, SrcVT);
- SDValue And = DAG.getNode(ISD::AND, dl, SrcVT, Src, AndConst);
- SDValue Or = DAG.getNode(ISD::OR, dl, SrcVT, And, Shr);
-
- SDValue SignCvt = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Or);
- SDValue Slow = DAG.getNode(ISD::FADD, dl, DstVT, SignCvt, SignCvt);
+ // algorithm from the x86_64 __floatundisf in compiler_rt.
// TODO: This really should be implemented using a branch rather than a
// select. We happen to get lucky and machinesink does the right
@@ -6073,6 +6227,37 @@ bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
SDValue SignBitTest = DAG.getSetCC(
dl, SetCCVT, Src, DAG.getConstant(0, dl, SrcVT), ISD::SETLT);
+
+ SDValue ShiftConst = DAG.getConstant(1, dl, ShiftVT);
+ SDValue Shr = DAG.getNode(ISD::SRL, dl, SrcVT, Src, ShiftConst);
+ SDValue AndConst = DAG.getConstant(1, dl, SrcVT);
+ SDValue And = DAG.getNode(ISD::AND, dl, SrcVT, Src, AndConst);
+ SDValue Or = DAG.getNode(ISD::OR, dl, SrcVT, And, Shr);
+
+ SDValue Slow, Fast;
+ if (Node->isStrictFPOpcode()) {
+ // In strict mode, we must avoid spurious exceptions, and therefore
+ // must make sure to only emit a single STRICT_SINT_TO_FP.
+ SDValue InCvt = DAG.getSelect(dl, SrcVT, SignBitTest, Or, Src);
+ Fast = DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, { DstVT, MVT::Other },
+ { Node->getOperand(0), InCvt });
+ Slow = DAG.getNode(ISD::STRICT_FADD, dl, { DstVT, MVT::Other },
+ { Fast.getValue(1), Fast, Fast });
+ Chain = Slow.getValue(1);
+ // The STRICT_SINT_TO_FP inherits the exception mode from the
+ // incoming STRICT_UINT_TO_FP node; the STRICT_FADD node can
+ // never raise any exception.
+ SDNodeFlags Flags;
+ Flags.setNoFPExcept(Node->getFlags().hasNoFPExcept());
+ Fast->setFlags(Flags);
+ Flags.setNoFPExcept(true);
+ Slow->setFlags(Flags);
+ } else {
+ SDValue SignCvt = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Or);
+ Slow = DAG.getNode(ISD::FADD, dl, DstVT, SignCvt, SignCvt);
+ Fast = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Src);
+ }
+
Result = DAG.getSelect(dl, DstVT, SignBitTest, Slow, Fast);
return true;
}
@@ -6105,8 +6290,18 @@ bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
SDValue HiOr = DAG.getNode(ISD::OR, dl, SrcVT, Hi, TwoP84);
SDValue LoFlt = DAG.getBitcast(DstVT, LoOr);
SDValue HiFlt = DAG.getBitcast(DstVT, HiOr);
- SDValue HiSub = DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52);
- Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub);
+ if (Node->isStrictFPOpcode()) {
+ SDValue HiSub =
+ DAG.getNode(ISD::STRICT_FSUB, dl, {DstVT, MVT::Other},
+ {Node->getOperand(0), HiFlt, TwoP84PlusTwoP52});
+ Result = DAG.getNode(ISD::STRICT_FADD, dl, {DstVT, MVT::Other},
+ {HiSub.getValue(1), LoFlt, HiSub});
+ Chain = Result.getValue(1);
+ } else {
+ SDValue HiSub =
+ DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52);
+ Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub);
+ }
return true;
}
@@ -6150,6 +6345,26 @@ SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node,
}
}
+ // If none of the above worked, but there are no NaNs, then expand to
+ // a compare/select sequence. This is required for correctness since
+ // InstCombine might have canonicalized a fcmp+select sequence to a
+ // FMINNUM/FMAXNUM node. If we were to fall through to the default
+ // expansion to libcall, we might introduce a link-time dependency
+ // on libm into a file that originally did not have one.
+ if (Node->getFlags().hasNoNaNs()) {
+ ISD::CondCode Pred =
+ Node->getOpcode() == ISD::FMINNUM ? ISD::SETLT : ISD::SETGT;
+ SDValue Op1 = Node->getOperand(0);
+ SDValue Op2 = Node->getOperand(1);
+ SDValue SelCC = DAG.getSelectCC(dl, Op1, Op2, Op1, Op2, Pred);
+ // Copy FMF flags, but always set the no-signed-zeros flag
+ // as this is implied by the FMINNUM/FMAXNUM semantics.
+ SDNodeFlags Flags = Node->getFlags();
+ Flags.setNoSignedZeros(true);
+ SelCC->setFlags(Flags);
+ return SelCC;
+ }
+
return SDValue();
}
@@ -6342,8 +6557,9 @@ bool TargetLowering::expandABS(SDNode *N, SDValue &Result,
return true;
}
-SDValue TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
- SelectionDAG &DAG) const {
+std::pair<SDValue, SDValue>
+TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
+ SelectionDAG &DAG) const {
SDLoc SL(LD);
SDValue Chain = LD->getChain();
SDValue BasePTR = LD->getBasePtr();
@@ -6377,7 +6593,7 @@ SDValue TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains);
SDValue Value = DAG.getBuildVector(LD->getValueType(0), SL, Vals);
- return DAG.getMergeValues({Value, NewChain}, SL);
+ return std::make_pair(Value, NewChain);
}
SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST,
@@ -6471,10 +6687,7 @@ TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const {
if (!isOperationLegalOrCustom(ISD::LOAD, intVT) &&
LoadedVT.isVector()) {
// Scalarize the load and let the individual components be handled.
- SDValue Scalarized = scalarizeVectorLoad(LD, DAG);
- if (Scalarized->getOpcode() == ISD::MERGE_VALUES)
- return std::make_pair(Scalarized.getOperand(0), Scalarized.getOperand(1));
- return std::make_pair(Scalarized.getValue(0), Scalarized.getValue(1));
+ return scalarizeVectorLoad(LD, DAG);
}
// Expand to a (misaligned) integer load of the same size,
@@ -6807,7 +7020,7 @@ SDValue TargetLowering::getVectorElementPointer(SelectionDAG &DAG,
Index = DAG.getNode(ISD::MUL, dl, IdxVT, Index,
DAG.getConstant(EltSize, dl, IdxVT));
- return DAG.getNode(ISD::ADD, dl, IdxVT, VecPtr, Index);
+ return DAG.getMemBasePlusOffset(VecPtr, Index, dl);
}
//===----------------------------------------------------------------------===//
@@ -7096,6 +7309,86 @@ TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const {
return Result;
}
+SDValue
+TargetLowering::expandFixedPointDiv(unsigned Opcode, const SDLoc &dl,
+ SDValue LHS, SDValue RHS,
+ unsigned Scale, SelectionDAG &DAG) const {
+ assert((Opcode == ISD::SDIVFIX ||
+ Opcode == ISD::UDIVFIX) &&
+ "Expected a fixed point division opcode");
+
+ EVT VT = LHS.getValueType();
+ bool Signed = Opcode == ISD::SDIVFIX;
+ EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+
+ // If there is enough room in the type to upscale the LHS or downscale the
+ // RHS before the division, we can perform it in this type without having to
+ // resize. For signed operations, the LHS headroom is the number of
+ // redundant sign bits, and for unsigned ones it is the number of zeroes.
+ // The headroom for the RHS is the number of trailing zeroes.
+ unsigned LHSLead = Signed ? DAG.ComputeNumSignBits(LHS) - 1
+ : DAG.computeKnownBits(LHS).countMinLeadingZeros();
+ unsigned RHSTrail = DAG.computeKnownBits(RHS).countMinTrailingZeros();
+
+ if (LHSLead + RHSTrail < Scale)
+ return SDValue();
+
+ unsigned LHSShift = std::min(LHSLead, Scale);
+ unsigned RHSShift = Scale - LHSShift;
+
+ // At this point, we know that if we shift the LHS up by LHSShift and the
+ // RHS down by RHSShift, we can emit a regular division with a final scaling
+ // factor of Scale.
+
+ EVT ShiftTy = getShiftAmountTy(VT, DAG.getDataLayout());
+ if (LHSShift)
+ LHS = DAG.getNode(ISD::SHL, dl, VT, LHS,
+ DAG.getConstant(LHSShift, dl, ShiftTy));
+ if (RHSShift)
+ RHS = DAG.getNode(Signed ? ISD::SRA : ISD::SRL, dl, VT, RHS,
+ DAG.getConstant(RHSShift, dl, ShiftTy));
+
+ SDValue Quot;
+ if (Signed) {
+ // For signed operations, if the resulting quotient is negative and the
+ // remainder is nonzero, subtract 1 from the quotient to round towards
+ // negative infinity.
+ SDValue Rem;
+ // FIXME: Ideally we would always produce an SDIVREM here, but if the
+ // type isn't legal, SDIVREM cannot be expanded. There is no reason why
+ // we couldn't just form a libcall, but the type legalizer doesn't do it.
+ if (isTypeLegal(VT) &&
+ isOperationLegalOrCustom(ISD::SDIVREM, VT)) {
+ Quot = DAG.getNode(ISD::SDIVREM, dl,
+ DAG.getVTList(VT, VT),
+ LHS, RHS);
+ Rem = Quot.getValue(1);
+ Quot = Quot.getValue(0);
+ } else {
+ Quot = DAG.getNode(ISD::SDIV, dl, VT,
+ LHS, RHS);
+ Rem = DAG.getNode(ISD::SREM, dl, VT,
+ LHS, RHS);
+ }
+ SDValue Zero = DAG.getConstant(0, dl, VT);
+ SDValue RemNonZero = DAG.getSetCC(dl, BoolVT, Rem, Zero, ISD::SETNE);
+ SDValue LHSNeg = DAG.getSetCC(dl, BoolVT, LHS, Zero, ISD::SETLT);
+ SDValue RHSNeg = DAG.getSetCC(dl, BoolVT, RHS, Zero, ISD::SETLT);
+ SDValue QuotNeg = DAG.getNode(ISD::XOR, dl, BoolVT, LHSNeg, RHSNeg);
+ SDValue Sub1 = DAG.getNode(ISD::SUB, dl, VT, Quot,
+ DAG.getConstant(1, dl, VT));
+ Quot = DAG.getSelect(dl, VT,
+ DAG.getNode(ISD::AND, dl, BoolVT, RemNonZero, QuotNeg),
+ Sub1, Quot);
+ } else
+ Quot = DAG.getNode(ISD::UDIV, dl, VT,
+ LHS, RHS);
+
+ // TODO: Saturation.
+
+ return Quot;
+}
+
void TargetLowering::expandUADDSUBO(
SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
SDLoc dl(Node);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ShadowStackGCLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ShadowStackGCLowering.cpp
index 17a4d76c4c80..45427dc41e6e 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ShadowStackGCLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ShadowStackGCLowering.cpp
@@ -32,6 +32,7 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Transforms/Utils/EscapeEnumerator.h"
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ShrinkWrap.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ShrinkWrap.cpp
index 412a00095b9b..85dd4f59fa13 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ShrinkWrap.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ShrinkWrap.cpp
@@ -73,6 +73,7 @@
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Function.h"
+#include "llvm/InitializePasses.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SjLjEHPrepare.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SjLjEHPrepare.cpp
index db520d4e6403..4abf9ea41b65 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SjLjEHPrepare.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SjLjEHPrepare.cpp
@@ -15,7 +15,6 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
@@ -24,9 +23,11 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
#define DEBUG_TYPE "sjljehprepare"
@@ -175,9 +176,9 @@ Value *SjLjEHPrepare::setupFunctionContext(Function &F,
// that needs to be restored on all exits from the function. This is an alloca
// because the value needs to be added to the global context list.
auto &DL = F.getParent()->getDataLayout();
- unsigned Align = DL.getPrefTypeAlignment(FunctionContextTy);
- FuncCtx = new AllocaInst(FunctionContextTy, DL.getAllocaAddrSpace(),
- nullptr, Align, "fn_context", &EntryBB->front());
+ const Align Alignment(DL.getPrefTypeAlignment(FunctionContextTy));
+ FuncCtx = new AllocaInst(FunctionContextTy, DL.getAllocaAddrSpace(), nullptr,
+ Alignment, "fn_context", &EntryBB->front());
// Fill in the function context structure.
for (LandingPadInst *LPI : LPads) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SlotIndexes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SlotIndexes.cpp
index 9fff873324d0..6664b58eccf8 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SlotIndexes.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SlotIndexes.cpp
@@ -10,6 +10,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/Config/llvm-config.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -18,6 +19,16 @@ using namespace llvm;
#define DEBUG_TYPE "slotindexes"
char SlotIndexes::ID = 0;
+
+SlotIndexes::SlotIndexes() : MachineFunctionPass(ID), mf(nullptr) {
+ initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
+}
+
+SlotIndexes::~SlotIndexes() {
+ // The indexList's nodes are all allocated in the BumpPtrAllocator.
+ indexList.clearAndLeakNodesUnsafely();
+}
+
INITIALIZE_PASS(SlotIndexes, DEBUG_TYPE,
"Slot index numbering", false, false)
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SpillPlacement.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SpillPlacement.cpp
index 11452fdb747a..36a0ddf67b19 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SpillPlacement.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SpillPlacement.cpp
@@ -37,6 +37,7 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/BlockFrequency.h"
#include <algorithm>
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/StackColoring.cpp b/contrib/llvm-project/llvm/lib/CodeGen/StackColoring.cpp
index 641b54205d62..b6e81116286f 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/StackColoring.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/StackColoring.cpp
@@ -48,6 +48,7 @@
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Use.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
@@ -1003,7 +1004,7 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
// zone are okay, despite the fact that we don't have a good way
// for validating all of the usages of the calculation.
#ifndef NDEBUG
- bool TouchesMemory = I.mayLoad() || I.mayStore();
+ bool TouchesMemory = I.mayLoadOrStore();
// If we *don't* protect the user from escaped allocas, don't bother
// validating the instructions.
if (!I.isDebugInstr() && TouchesMemory && ProtectFromEscapedAllocas) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp b/contrib/llvm-project/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp
index fb2abf3daa7f..5ccfacfc26dc 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp
@@ -19,6 +19,7 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/StackMaps.cpp b/contrib/llvm-project/llvm/lib/CodeGen/StackMaps.cpp
index 383c91259ffc..e16587c44a55 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/StackMaps.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/StackMaps.cpp
@@ -260,7 +260,7 @@ StackMaps::parseRegisterLiveOutMask(const uint32_t *Mask) const {
// Create a LiveOutReg for each bit that is set in the register mask.
for (unsigned Reg = 0, NumRegs = TRI->getNumRegs(); Reg != NumRegs; ++Reg)
- if ((Mask[Reg / 32] >> Reg % 32) & 1)
+ if ((Mask[Reg / 32] >> (Reg % 32)) & 1)
LiveOuts.push_back(createLiveOutReg(Reg, TRI));
// We don't need to keep track of a register if its super-register is already
@@ -294,14 +294,13 @@ StackMaps::parseRegisterLiveOutMask(const uint32_t *Mask) const {
return LiveOuts;
}
-void StackMaps::recordStackMapOpers(const MachineInstr &MI, uint64_t ID,
+void StackMaps::recordStackMapOpers(const MCSymbol &MILabel,
+ const MachineInstr &MI, uint64_t ID,
MachineInstr::const_mop_iterator MOI,
MachineInstr::const_mop_iterator MOE,
bool recordResult) {
MCContext &OutContext = AP.OutStreamer->getContext();
- MCSymbol *MILabel = OutContext.createTempSymbol();
- AP.OutStreamer->EmitLabel(MILabel);
-
+
LocationVec Locations;
LiveOutVec LiveOuts;
@@ -340,7 +339,7 @@ void StackMaps::recordStackMapOpers(const MachineInstr &MI, uint64_t ID,
// Create an expression to calculate the offset of the callsite from function
// entry.
const MCExpr *CSOffsetExpr = MCBinaryExpr::createSub(
- MCSymbolRefExpr::create(MILabel, OutContext),
+ MCSymbolRefExpr::create(&MILabel, OutContext),
MCSymbolRefExpr::create(AP.CurrentFnSymForSize, OutContext), OutContext);
CSInfos.emplace_back(CSOffsetExpr, ID, std::move(Locations),
@@ -360,22 +359,23 @@ void StackMaps::recordStackMapOpers(const MachineInstr &MI, uint64_t ID,
FnInfos.insert(std::make_pair(AP.CurrentFnSym, FunctionInfo(FrameSize)));
}
-void StackMaps::recordStackMap(const MachineInstr &MI) {
+void StackMaps::recordStackMap(const MCSymbol &L, const MachineInstr &MI) {
assert(MI.getOpcode() == TargetOpcode::STACKMAP && "expected stackmap");
StackMapOpers opers(&MI);
const int64_t ID = MI.getOperand(PatchPointOpers::IDPos).getImm();
- recordStackMapOpers(MI, ID, std::next(MI.operands_begin(), opers.getVarIdx()),
+ recordStackMapOpers(L, MI, ID, std::next(MI.operands_begin(),
+ opers.getVarIdx()),
MI.operands_end());
}
-void StackMaps::recordPatchPoint(const MachineInstr &MI) {
+void StackMaps::recordPatchPoint(const MCSymbol &L, const MachineInstr &MI) {
assert(MI.getOpcode() == TargetOpcode::PATCHPOINT && "expected patchpoint");
PatchPointOpers opers(&MI);
const int64_t ID = opers.getID();
auto MOI = std::next(MI.operands_begin(), opers.getStackMapStartIdx());
- recordStackMapOpers(MI, ID, MOI, MI.operands_end(),
+ recordStackMapOpers(L, MI, ID, MOI, MI.operands_end(),
opers.isAnyReg() && opers.hasDef());
#ifndef NDEBUG
@@ -390,14 +390,14 @@ void StackMaps::recordPatchPoint(const MachineInstr &MI) {
#endif
}
-void StackMaps::recordStatepoint(const MachineInstr &MI) {
+void StackMaps::recordStatepoint(const MCSymbol &L, const MachineInstr &MI) {
assert(MI.getOpcode() == TargetOpcode::STATEPOINT && "expected statepoint");
StatepointOpers opers(&MI);
// Record all the deopt and gc operands (they're contiguous and run from the
// initial index to the end of the operand list)
const unsigned StartIdx = opers.getVarIdx();
- recordStackMapOpers(MI, opers.getID(), MI.operands_begin() + StartIdx,
+ recordStackMapOpers(L, MI, opers.getID(), MI.operands_begin() + StartIdx,
MI.operands_end(), false);
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/StackProtector.cpp b/contrib/llvm-project/llvm/lib/CodeGen/StackProtector.cpp
index 5683d1db473c..4e2189884bb1 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/StackProtector.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/StackProtector.cpp
@@ -41,6 +41,7 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
@@ -61,6 +62,10 @@ static cl::opt<bool> EnableSelectionDAGSP("enable-selectiondag-sp",
char StackProtector::ID = 0;
+StackProtector::StackProtector() : FunctionPass(ID), SSPBufferSize(8) {
+ initializeStackProtectorPass(*PassRegistry::getPassRegistry());
+}
+
INITIALIZE_PASS_BEGIN(StackProtector, DEBUG_TYPE,
"Insert stack protectors", false, true)
INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/StackSlotColoring.cpp b/contrib/llvm-project/llvm/lib/CodeGen/StackSlotColoring.cpp
index 9c8143c55dc2..7ae758323280 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/StackSlotColoring.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/StackSlotColoring.cpp
@@ -30,6 +30,7 @@
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SwitchLoweringUtils.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SwitchLoweringUtils.cpp
index 83acf7f80715..c2cd8fa0324e 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SwitchLoweringUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SwitchLoweringUtils.cpp
@@ -42,7 +42,9 @@ SwitchCG::getJumpTableNumCases(const SmallVectorImpl<unsigned> &TotalCases,
void SwitchCG::SwitchLowering::findJumpTables(CaseClusterVector &Clusters,
const SwitchInst *SI,
- MachineBasicBlock *DefaultMBB) {
+ MachineBasicBlock *DefaultMBB,
+ ProfileSummaryInfo *PSI,
+ BlockFrequencyInfo *BFI) {
#ifndef NDEBUG
// Clusters must be non-empty, sorted, and only contain Range clusters.
assert(!Clusters.empty());
@@ -80,7 +82,7 @@ void SwitchCG::SwitchLowering::findJumpTables(CaseClusterVector &Clusters,
assert(Range >= NumCases);
// Cheap case: the whole range may be suitable for jump table.
- if (TLI->isSuitableForJumpTable(SI, NumCases, Range)) {
+ if (TLI->isSuitableForJumpTable(SI, NumCases, Range, PSI, BFI)) {
CaseCluster JTCluster;
if (buildJumpTable(Clusters, 0, N - 1, SI, DefaultMBB, JTCluster)) {
Clusters[0] = JTCluster;
@@ -138,7 +140,7 @@ void SwitchCG::SwitchLowering::findJumpTables(CaseClusterVector &Clusters,
assert(NumCases < UINT64_MAX / 100);
assert(Range >= NumCases);
- if (TLI->isSuitableForJumpTable(SI, NumCases, Range)) {
+ if (TLI->isSuitableForJumpTable(SI, NumCases, Range, PSI, BFI)) {
unsigned NumPartitions = 1 + (j == N - 1 ? 0 : MinPartitions[j + 1]);
unsigned Score = j == N - 1 ? 0 : PartitionsScore[j + 1];
int64_t NumEntries = j - i + 1;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TailDuplication.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TailDuplication.cpp
index ba348b4a9d41..648bf48b7d17 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TailDuplication.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TailDuplication.cpp
@@ -12,12 +12,15 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TailDuplicator.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
using namespace llvm;
@@ -37,6 +40,8 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<MachineBranchProbabilityInfo>();
+ AU.addRequired<LazyMachineBlockFrequencyInfoPass>();
+ AU.addRequired<ProfileSummaryInfoWrapperPass>();
MachineFunctionPass::getAnalysisUsage(AU);
}
};
@@ -55,6 +60,11 @@ public:
EarlyTailDuplicate() : TailDuplicateBase(ID, true) {
initializeEarlyTailDuplicatePass(*PassRegistry::getPassRegistry());
}
+
+ MachineFunctionProperties getClearedProperties() const override {
+ return MachineFunctionProperties()
+ .set(MachineFunctionProperties::Property::NoPHIs);
+ }
};
} // end anonymous namespace
@@ -74,7 +84,11 @@ bool TailDuplicateBase::runOnMachineFunction(MachineFunction &MF) {
return false;
auto MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
- Duplicator.initMF(MF, PreRegAlloc, MBPI, /*LayoutMode=*/false);
+ auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+ auto *MBFI = (PSI && PSI->hasProfileSummary()) ?
+ &getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI() :
+ nullptr;
+ Duplicator.initMF(MF, PreRegAlloc, MBPI, MBFI, PSI, /*LayoutMode=*/false);
bool MadeChange = false;
while (Duplicator.tailDuplicateBlocks())
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TailDuplicator.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TailDuplicator.cpp
index 03c68a37e459..cd1278fd4d8d 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TailDuplicator.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TailDuplicator.cpp
@@ -19,13 +19,16 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineSizeOpts.h"
#include "llvm/CodeGen/MachineSSAUpdater.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
@@ -77,6 +80,8 @@ static cl::opt<unsigned> TailDupLimit("tail-dup-limit", cl::init(~0U),
void TailDuplicator::initMF(MachineFunction &MFin, bool PreRegAlloc,
const MachineBranchProbabilityInfo *MBPIin,
+ const MachineBlockFrequencyInfo *MBFIin,
+ ProfileSummaryInfo *PSIin,
bool LayoutModeIn, unsigned TailDupSizeIn) {
MF = &MFin;
TII = MF->getSubtarget().getInstrInfo();
@@ -84,6 +89,8 @@ void TailDuplicator::initMF(MachineFunction &MFin, bool PreRegAlloc,
MRI = &MF->getRegInfo();
MMI = &MF->getMMI();
MBPI = MBPIin;
+ MBFI = MBFIin;
+ PSI = PSIin;
TailDupSize = TailDupSizeIn;
assert(MBPI != nullptr && "Machine Branch Probability Info required");
@@ -555,14 +562,14 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple,
// duplicate only one, because one branch instruction can be eliminated to
// compensate for the duplication.
unsigned MaxDuplicateCount;
- if (TailDupSize == 0 &&
- TailDuplicateSize.getNumOccurrences() == 0 &&
- MF->getFunction().hasOptSize())
- MaxDuplicateCount = 1;
- else if (TailDupSize == 0)
+ bool OptForSize = MF->getFunction().hasOptSize() ||
+ llvm::shouldOptimizeForSize(&TailBB, PSI, MBFI);
+ if (TailDupSize == 0)
MaxDuplicateCount = TailDuplicateSize;
else
MaxDuplicateCount = TailDupSize;
+ if (OptForSize)
+ MaxDuplicateCount = 1;
// If the block to be duplicated ends in an unanalyzable fallthrough, don't
// duplicate it.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
index 9eeacc2584cb..bc59be890c97 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
@@ -60,6 +60,19 @@ bool TargetFrameLowering::needsFrameIndexResolution(
return MF.getFrameInfo().hasStackObjects();
}
+void TargetFrameLowering::getCalleeSaves(const MachineFunction &MF,
+ BitVector &CalleeSaves) const {
+ const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
+ CalleeSaves.resize(TRI.getNumRegs());
+
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ if (!MFI.isCalleeSavedInfoValid())
+ return;
+
+ for (const CalleeSavedInfo &Info : MFI.getCalleeSavedInfo())
+ CalleeSaves.set(Info.getReg());
+}
+
void TargetFrameLowering::determineCalleeSaves(MachineFunction &MF,
BitVector &SavedRegs,
RegScavenger *RS) const {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetInstrInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetInstrInfo.cpp
index 6cae3b869501..a98c627dab09 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TargetInstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetInstrInfo.cpp
@@ -15,6 +15,7 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineScheduler.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
#include "llvm/CodeGen/StackMaps.h"
@@ -1015,19 +1016,16 @@ CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI,
}
// Default implementation of CreateTargetMIHazardRecognizer.
-ScheduleHazardRecognizer *TargetInstrInfo::
-CreateTargetMIHazardRecognizer(const InstrItineraryData *II,
- const ScheduleDAG *DAG) const {
- return (ScheduleHazardRecognizer *)
- new ScoreboardHazardRecognizer(II, DAG, "machine-scheduler");
+ScheduleHazardRecognizer *TargetInstrInfo::CreateTargetMIHazardRecognizer(
+ const InstrItineraryData *II, const ScheduleDAGMI *DAG) const {
+ return new ScoreboardHazardRecognizer(II, DAG, "machine-scheduler");
}
// Default implementation of CreateTargetPostRAHazardRecognizer.
ScheduleHazardRecognizer *TargetInstrInfo::
CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
const ScheduleDAG *DAG) const {
- return (ScheduleHazardRecognizer *)
- new ScoreboardHazardRecognizer(II, DAG, "post-RA-sched");
+ return new ScoreboardHazardRecognizer(II, DAG, "post-RA-sched");
}
//===----------------------------------------------------------------------===//
@@ -1121,18 +1119,64 @@ bool TargetInstrInfo::hasLowDefLatency(const TargetSchedModel &SchedModel,
}
Optional<ParamLoadedValue>
-TargetInstrInfo::describeLoadedValue(const MachineInstr &MI) const {
+TargetInstrInfo::describeLoadedValue(const MachineInstr &MI,
+ Register Reg) const {
const MachineFunction *MF = MI.getMF();
- const MachineOperand *Op = nullptr;
- DIExpression *Expr = DIExpression::get(MF->getFunction().getContext(), {});;
- const MachineOperand *SrcRegOp, *DestRegOp;
-
- if (isCopyInstr(MI, SrcRegOp, DestRegOp)) {
- Op = SrcRegOp;
- return ParamLoadedValue(*Op, Expr);
- } else if (MI.isMoveImmediate()) {
- Op = &MI.getOperand(1);
- return ParamLoadedValue(*Op, Expr);
+ const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
+ DIExpression *Expr = DIExpression::get(MF->getFunction().getContext(), {});
+ int64_t Offset;
+
+ // To simplify the sub-register handling, verify that we only need to
+ // consider physical registers.
+ assert(MF->getProperties().hasProperty(
+ MachineFunctionProperties::Property::NoVRegs));
+
+ if (auto DestSrc = isCopyInstr(MI)) {
+ Register DestReg = DestSrc->Destination->getReg();
+
+ if (Reg == DestReg)
+ return ParamLoadedValue(*DestSrc->Source, Expr);
+
+ // Cases where super- or sub-registers needs to be described should
+ // be handled by the target's hook implementation.
+ assert(!TRI->isSuperOrSubRegisterEq(Reg, DestReg) &&
+ "TargetInstrInfo::describeLoadedValue can't describe super- or "
+ "sub-regs for copy instructions");
+ return None;
+ } else if (auto RegImm = isAddImmediate(MI, Reg)) {
+ Register SrcReg = RegImm->Reg;
+ Offset = RegImm->Imm;
+ Expr = DIExpression::prepend(Expr, DIExpression::ApplyOffset, Offset);
+ return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr);
+ } else if (MI.hasOneMemOperand()) {
+ // Only describe memory which provably does not escape the function. As
+ // described in llvm.org/PR43343, escaped memory may be clobbered by the
+ // callee (or by another thread).
+ const auto &TII = MF->getSubtarget().getInstrInfo();
+ const MachineFrameInfo &MFI = MF->getFrameInfo();
+ const MachineMemOperand *MMO = MI.memoperands()[0];
+ const PseudoSourceValue *PSV = MMO->getPseudoValue();
+
+ // If the address points to "special" memory (e.g. a spill slot), it's
+ // sufficient to check that it isn't aliased by any high-level IR value.
+ if (!PSV || PSV->mayAlias(&MFI))
+ return None;
+
+ const MachineOperand *BaseOp;
+ if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, TRI))
+ return None;
+
+ assert(MI.getNumExplicitDefs() == 1 &&
+ "Can currently only handle mem instructions with a single define");
+
+ // TODO: In what way do we need to take Reg into consideration here?
+
+ SmallVector<uint64_t, 8> Ops;
+ DIExpression::appendOffset(Ops, Offset);
+ Ops.push_back(dwarf::DW_OP_deref_size);
+ Ops.push_back(MMO->getSize());
+ Expr = DIExpression::prependOpcodes(Expr, Ops);
+ return ParamLoadedValue(*BaseOp, Expr);
}
return None;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringBase.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 9b23012f47e3..e5a7b70d82c8 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -88,6 +88,14 @@ static cl::opt<unsigned> OptsizeJumpTableDensity(
cl::desc("Minimum density for building a jump table in "
"an optsize function"));
+// FIXME: This option is only to test if the strict fp operation processed
+// correctly by preventing mutating strict fp operation to normal fp operation
+// during development. When the backend supports strict float operation, this
+// option will be meaningless.
+static cl::opt<bool> DisableStrictNodeMutation("disable-strictnode-mutation",
+ cl::desc("Don't mutate strict-float node to a legalize node"),
+ cl::init(false), cl::Hidden);
+
static bool darwinHasSinCos(const Triple &TT) {
assert(TT.isOSDarwin() && "should be called with darwin triple");
// Don't bother with 32 bit x86.
@@ -148,7 +156,6 @@ void TargetLoweringBase::InitLibcalls(const Triple &TT) {
setLibcallName(RTLIB::OLE_F128, "__lekf2");
setLibcallName(RTLIB::OGT_F128, "__gtkf2");
setLibcallName(RTLIB::UO_F128, "__unordkf2");
- setLibcallName(RTLIB::O_F128, "__unordkf2");
}
// A few names are different on particular architectures or environments.
@@ -556,10 +563,6 @@ static void InitCmpLibcallCCs(ISD::CondCode *CCs) {
CCs[RTLIB::UO_F64] = ISD::SETNE;
CCs[RTLIB::UO_F128] = ISD::SETNE;
CCs[RTLIB::UO_PPCF128] = ISD::SETNE;
- CCs[RTLIB::O_F32] = ISD::SETEQ;
- CCs[RTLIB::O_F64] = ISD::SETEQ;
- CCs[RTLIB::O_F128] = ISD::SETEQ;
- CCs[RTLIB::O_PPCF128] = ISD::SETEQ;
}
/// NOTE: The TargetMachine owns TLOF.
@@ -572,8 +575,6 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) {
MaxGluedStoresPerMemcpy = 0;
MaxStoresPerMemsetOptSize = MaxStoresPerMemcpyOptSize =
MaxStoresPerMemmoveOptSize = MaxLoadsPerMemcmpOptSize = 4;
- UseUnderscoreSetJmp = false;
- UseUnderscoreLongJmp = false;
HasMultipleConditionRegisters = false;
HasExtractBitsInsn = false;
JumpIsExpensive = JumpIsExpensiveOverride;
@@ -585,6 +586,7 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) {
BooleanVectorContents = UndefinedBooleanContent;
SchedPreferenceInfo = Sched::ILP;
GatherAllAliasesMaxDepth = 18;
+ IsStrictFPEnabled = DisableStrictNodeMutation;
// TODO: the default will be switched to 0 in the next commit, along
// with the Target-specific changes necessary.
MaxAtomicSizeInBitsSupported = 1024;
@@ -624,6 +626,8 @@ void TargetLoweringBase::initActions() {
IM != (unsigned)ISD::LAST_INDEXED_MODE; ++IM) {
setIndexedLoadAction(IM, VT, Expand);
setIndexedStoreAction(IM, VT, Expand);
+ setIndexedMaskedLoadAction(IM, VT, Expand);
+ setIndexedMaskedStoreAction(IM, VT, Expand);
}
// Most backends expect to see the node which just returns the value loaded.
@@ -654,6 +658,8 @@ void TargetLoweringBase::initActions() {
setOperationAction(ISD::SMULFIXSAT, VT, Expand);
setOperationAction(ISD::UMULFIX, VT, Expand);
setOperationAction(ISD::UMULFIXSAT, VT, Expand);
+ setOperationAction(ISD::SDIVFIX, VT, Expand);
+ setOperationAction(ISD::UDIVFIX, VT, Expand);
// Overflow operations default to expand
setOperationAction(ISD::SADDO, VT, Expand);
@@ -687,6 +693,7 @@ void TargetLoweringBase::initActions() {
// These operations default to expand for vector types.
if (VT.isVector()) {
setOperationAction(ISD::FCOPYSIGN, VT, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
setOperationAction(ISD::ANY_EXTEND_VECTOR_INREG, VT, Expand);
setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Expand);
setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Expand);
@@ -694,38 +701,9 @@ void TargetLoweringBase::initActions() {
}
// Constrained floating-point operations default to expand.
- setOperationAction(ISD::STRICT_FADD, VT, Expand);
- setOperationAction(ISD::STRICT_FSUB, VT, Expand);
- setOperationAction(ISD::STRICT_FMUL, VT, Expand);
- setOperationAction(ISD::STRICT_FDIV, VT, Expand);
- setOperationAction(ISD::STRICT_FREM, VT, Expand);
- setOperationAction(ISD::STRICT_FMA, VT, Expand);
- setOperationAction(ISD::STRICT_FSQRT, VT, Expand);
- setOperationAction(ISD::STRICT_FPOW, VT, Expand);
- setOperationAction(ISD::STRICT_FPOWI, VT, Expand);
- setOperationAction(ISD::STRICT_FSIN, VT, Expand);
- setOperationAction(ISD::STRICT_FCOS, VT, Expand);
- setOperationAction(ISD::STRICT_FEXP, VT, Expand);
- setOperationAction(ISD::STRICT_FEXP2, VT, Expand);
- setOperationAction(ISD::STRICT_FLOG, VT, Expand);
- setOperationAction(ISD::STRICT_FLOG10, VT, Expand);
- setOperationAction(ISD::STRICT_FLOG2, VT, Expand);
- setOperationAction(ISD::STRICT_LRINT, VT, Expand);
- setOperationAction(ISD::STRICT_LLRINT, VT, Expand);
- setOperationAction(ISD::STRICT_FRINT, VT, Expand);
- setOperationAction(ISD::STRICT_FNEARBYINT, VT, Expand);
- setOperationAction(ISD::STRICT_FCEIL, VT, Expand);
- setOperationAction(ISD::STRICT_FFLOOR, VT, Expand);
- setOperationAction(ISD::STRICT_LROUND, VT, Expand);
- setOperationAction(ISD::STRICT_LLROUND, VT, Expand);
- setOperationAction(ISD::STRICT_FROUND, VT, Expand);
- setOperationAction(ISD::STRICT_FTRUNC, VT, Expand);
- setOperationAction(ISD::STRICT_FMAXNUM, VT, Expand);
- setOperationAction(ISD::STRICT_FMINNUM, VT, Expand);
- setOperationAction(ISD::STRICT_FP_ROUND, VT, Expand);
- setOperationAction(ISD::STRICT_FP_EXTEND, VT, Expand);
- setOperationAction(ISD::STRICT_FP_TO_SINT, VT, Expand);
- setOperationAction(ISD::STRICT_FP_TO_UINT, VT, Expand);
+#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
+ setOperationAction(ISD::STRICT_##DAGN, VT, Expand);
+#include "llvm/IR/ConstrainedOps.def"
// For most targets @llvm.get.dynamic.area.offset just returns 0.
setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, VT, Expand);
@@ -1332,8 +1310,11 @@ void TargetLoweringBase::computeRegisterProperties(
MVT IntermediateVT;
MVT RegisterVT;
unsigned NumIntermediates;
- NumRegistersForVT[i] = getVectorTypeBreakdownMVT(VT, IntermediateVT,
+ unsigned NumRegisters = getVectorTypeBreakdownMVT(VT, IntermediateVT,
NumIntermediates, RegisterVT, this);
+ NumRegistersForVT[i] = NumRegisters;
+ assert(NumRegistersForVT[i] == NumRegisters &&
+ "NumRegistersForVT size cannot represent NumRegisters!");
RegisterTypeForVT[i] = RegisterVT;
MVT NVT = VT.getPow2VectorType();
@@ -1456,6 +1437,28 @@ unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context, EVT VT
return NumVectorRegs;
}
+bool TargetLoweringBase::isSuitableForJumpTable(const SwitchInst *SI,
+ uint64_t NumCases,
+ uint64_t Range,
+ ProfileSummaryInfo *PSI,
+ BlockFrequencyInfo *BFI) const {
+ // FIXME: This function check the maximum table size and density, but the
+ // minimum size is not checked. It would be nice if the minimum size is
+ // also combined within this function. Currently, the minimum size check is
+ // performed in findJumpTable() in SelectionDAGBuiler and
+ // getEstimatedNumberOfCaseClusters() in BasicTTIImpl.
+ const bool OptForSize =
+ SI->getParent()->getParent()->hasOptSize() ||
+ llvm::shouldOptimizeForSize(SI->getParent(), PSI, BFI);
+ const unsigned MinDensity = getMinimumJumpTableDensity(OptForSize);
+ const unsigned MaxJumpTableSize = getMaximumJumpTableSize();
+
+ // Check whether the number of cases is small enough and
+ // the range is dense enough for a jump table.
+ return (OptForSize || Range <= MaxJumpTableSize) &&
+ (NumCases * 100 >= Range * MinDensity);
+}
+
/// Get the EVTs and ArgFlags collections that represent the legalized return
/// type of the given function. This does not require a DAG or a return value,
/// and is suitable for use before any DAGs for the function are constructed.
@@ -1641,6 +1644,7 @@ int TargetLoweringBase::InstructionOpcodeToISD(unsigned Opcode) const {
case ExtractValue: return ISD::MERGE_VALUES;
case InsertValue: return ISD::MERGE_VALUES;
case LandingPad: return 0;
+ case Freeze: return 0;
}
llvm_unreachable("Unknown instruction type encountered!");
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index 4978f4b9500b..8cb9814300d1 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -273,7 +273,7 @@ void TargetLoweringObjectFileELF::emitModuleMetadata(MCStreamer &Streamer,
Streamer.SwitchSection(S);
- for (const auto &Operand : LinkerOptions->operands()) {
+ for (const auto *Operand : LinkerOptions->operands()) {
if (cast<MDNode>(Operand)->getNumOperands() != 2)
report_fatal_error("invalid llvm.linker.options");
for (const auto &Option : cast<MDNode>(Operand)->operands()) {
@@ -289,7 +289,7 @@ void TargetLoweringObjectFileELF::emitModuleMetadata(MCStreamer &Streamer,
Streamer.SwitchSection(S);
- for (const auto &Operand : DependentLibraries->operands()) {
+ for (const auto *Operand : DependentLibraries->operands()) {
Streamer.EmitBytes(
cast<MDString>(cast<MDNode>(Operand)->getOperand(0))->getString());
Streamer.EmitIntValue(0, 1);
@@ -885,7 +885,7 @@ void TargetLoweringObjectFileMachO::emitModuleMetadata(MCStreamer &Streamer,
Module &M) const {
// Emit the linker options if present.
if (auto *LinkerOptions = M.getNamedMetadata("llvm.linker.options")) {
- for (const auto &Option : LinkerOptions->operands()) {
+ for (const auto *Option : LinkerOptions->operands()) {
SmallVector<std::string, 4> StrOptions;
for (const auto &Piece : cast<MDNode>(Option)->operands())
StrOptions.push_back(cast<MDString>(Piece)->getString());
@@ -1449,7 +1449,7 @@ void TargetLoweringObjectFileCOFF::emitModuleMetadata(MCStreamer &Streamer,
// linker.
MCSection *Sec = getDrectveSection();
Streamer.SwitchSection(Sec);
- for (const auto &Option : LinkerOptions->operands()) {
+ for (const auto *Option : LinkerOptions->operands()) {
for (const auto &Piece : cast<MDNode>(Option)->operands()) {
// Lead with a space for consistency with our dllexport implementation.
std::string Directive(" ");
@@ -1849,18 +1849,66 @@ MCSection *TargetLoweringObjectFileXCOFF::SelectSectionForGlobal(
SC, Kind, /* BeginSymbolName */ nullptr);
}
+ if (Kind.isMergeableCString()) {
+ if (!Kind.isMergeable1ByteCString())
+ report_fatal_error("Unhandled multi-byte mergeable string kind.");
+
+ unsigned Align = GO->getParent()->getDataLayout().getPreferredAlignment(
+ cast<GlobalVariable>(GO));
+
+ unsigned EntrySize = getEntrySizeForKind(Kind);
+ std::string SizeSpec = ".rodata.str" + utostr(EntrySize) + ".";
+ SmallString<128> Name;
+ Name = SizeSpec + utostr(Align);
+
+ return getContext().getXCOFFSection(
+ Name, XCOFF::XMC_RO, XCOFF::XTY_SD,
+ TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(GO),
+ Kind, /* BeginSymbolName */ nullptr);
+ }
+
if (Kind.isText())
return TextSection;
- if (Kind.isData())
+ if (Kind.isData() || Kind.isReadOnlyWithRel())
+ // TODO: We may put this under option control, because user may want to
+ // have read-only data with relocations placed into a read-only section by
+ // the compiler.
+ return DataSection;
+
+ // Zero initialized data must be emitted to the .data section because external
+ // linkage control sections that get mapped to the .bss section will be linked
+ // as tentative defintions, which is only appropriate for SectionKind::Common.
+ if (Kind.isBSS())
return DataSection;
+ if (Kind.isReadOnly())
+ return ReadOnlySection;
+
report_fatal_error("XCOFF other section types not yet implemented.");
}
+MCSection *TargetLoweringObjectFileXCOFF::getSectionForJumpTable(
+ const Function &F, const TargetMachine &TM) const {
+ assert (!TM.getFunctionSections() && "Unique sections not supported on XCOFF"
+ " yet.");
+ assert (!F.getComdat() && "Comdat not supported on XCOFF.");
+ //TODO: Enable emiting jump table to unique sections when we support it.
+ return ReadOnlySection;
+}
+
bool TargetLoweringObjectFileXCOFF::shouldPutJumpTableInFunctionSection(
bool UsesLabelDifference, const Function &F) const {
- report_fatal_error("TLOF XCOFF not yet implemented.");
+ return false;
+}
+
+/// Given a mergeable constant with the specified size and relocation
+/// information, return a section that it should be placed in.
+MCSection *TargetLoweringObjectFileXCOFF::getSectionForConstant(
+ const DataLayout &DL, SectionKind Kind, const Constant *C,
+ unsigned &Align) const {
+ //TODO: Enable emiting constant pool to unique sections when we support it.
+ return ReadOnlySection;
}
void TargetLoweringObjectFileXCOFF::Initialize(MCContext &Ctx,
@@ -1891,6 +1939,7 @@ XCOFF::StorageClass TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(
const GlobalObject *GO) {
switch (GO->getLinkage()) {
case GlobalValue::InternalLinkage:
+ case GlobalValue::PrivateLinkage:
return XCOFF::C_HIDEXT;
case GlobalValue::ExternalLinkage:
case GlobalValue::CommonLinkage:
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetOptionsImpl.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetOptionsImpl.cpp
index 039748d817ca..d794a261ecb2 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TargetOptionsImpl.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetOptionsImpl.cpp
@@ -28,20 +28,8 @@ bool TargetOptions::DisableFramePointerElim(const MachineFunction &MF) const {
const Function &F = MF.getFunction();
- // TODO: Remove support for old `fp elim` function attributes after fully
- // migrate to use "frame-pointer"
- if (!F.hasFnAttribute("frame-pointer")) {
- // Check to see if we should eliminate all frame pointers.
- if (F.getFnAttribute("no-frame-pointer-elim").getValueAsString() == "true")
- return true;
-
- // Check to see if we should eliminate non-leaf frame pointers.
- if (F.hasFnAttribute("no-frame-pointer-elim-non-leaf"))
- return MF.getFrameInfo().hasCalls();
-
+ if (!F.hasFnAttribute("frame-pointer"))
return false;
- }
-
StringRef FP = F.getFnAttribute("frame-pointer").getValueAsString();
if (FP == "all")
return true;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetPassConfig.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetPassConfig.cpp
index f1f4f65adf7c..41cb511ad9b4 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TargetPassConfig.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetPassConfig.cpp
@@ -30,6 +30,7 @@
#include "llvm/IR/IRPrintingPasses.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Verifier.h"
+#include "llvm/InitializePasses.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCTargetOptions.h"
#include "llvm/Pass.h"
@@ -38,8 +39,8 @@
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/Threading.h"
#include "llvm/Support/SaveAndRestore.h"
+#include "llvm/Support/Threading.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils.h"
@@ -178,10 +179,10 @@ static cl::opt<CFLAAType> UseCFLAA(
/// Option names for limiting the codegen pipeline.
/// Those are used in error reporting and we didn't want
/// to duplicate their names all over the place.
-static const char *StartAfterOptName = "start-after";
-static const char *StartBeforeOptName = "start-before";
-static const char *StopAfterOptName = "stop-after";
-static const char *StopBeforeOptName = "stop-before";
+static const char StartAfterOptName[] = "start-after";
+static const char StartBeforeOptName[] = "start-before";
+static const char StopAfterOptName[] = "stop-after";
+static const char StopBeforeOptName[] = "stop-before";
static cl::opt<std::string>
StartAfterOpt(StringRef(StartAfterOptName),
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetSubtargetInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetSubtargetInfo.cpp
index 59eb2f9c88cb..63766df4d2be 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TargetSubtargetInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetSubtargetInfo.cpp
@@ -54,6 +54,10 @@ bool TargetSubtargetInfo::enablePostRAScheduler() const {
return getSchedModel().PostRAScheduler;
}
+bool TargetSubtargetInfo::enablePostRAMachineScheduler() const {
+ return enableMachineScheduler() && enablePostRAScheduler();
+}
+
bool TargetSubtargetInfo::useAA() const {
return false;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
index ea971809d4e4..2b1ffab74b6f 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -1287,7 +1287,7 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,
bool Commuted = tryInstructionCommute(&MI, DstIdx, SrcIdx, regBKilled, Dist);
// If the instruction is convertible to 3 Addr, instead
- // of returning try 3 Addr transformation aggresively and
+ // of returning try 3 Addr transformation aggressively and
// use this variable to check later. Because it might be better.
// For example, we can just use `leal (%rsi,%rdi), %eax` and `ret`
// instead of the following code.
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMCodeGenPrepare.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TypePromotion.cpp
index 1c2c8aef55bb..4522484222f5 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMCodeGenPrepare.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TypePromotion.cpp
@@ -1,4 +1,4 @@
-//===----- ARMCodeGenPrepare.cpp ------------------------------------------===//
+//===----- TypePromotion.cpp ----------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -7,51 +7,48 @@
//===----------------------------------------------------------------------===//
//
/// \file
-/// This pass inserts intrinsics to handle small types that would otherwise be
-/// promoted during legalization. Here we can manually promote types or insert
-/// intrinsics which can handle narrow types that aren't supported by the
-/// register classes.
-//
+/// This is an opcode based type promotion pass for small types that would
+/// otherwise be promoted during legalisation. This works around the limitations
+/// of selection dag for cyclic regions. The search begins from icmp
+/// instructions operands where a tree, consisting of non-wrapping or safe
+/// wrapping instructions, is built, checked and promoted if possible.
+///
//===----------------------------------------------------------------------===//
-#include "ARM.h"
-#include "ARMSubtarget.h"
-#include "ARMTargetMachine.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsARM.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/Verifier.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
-#define DEBUG_TYPE "arm-codegenprepare"
+#define DEBUG_TYPE "type-promotion"
+#define PASS_NAME "Type Promotion"
using namespace llvm;
static cl::opt<bool>
-DisableCGP("arm-disable-cgp", cl::Hidden, cl::init(true),
- cl::desc("Disable ARM specific CodeGenPrepare pass"));
-
-static cl::opt<bool>
-EnableDSP("arm-enable-scalar-dsp", cl::Hidden, cl::init(false),
- cl::desc("Use DSP instructions for scalar operations"));
-
-static cl::opt<bool>
-EnableDSPWithImms("arm-enable-scalar-dsp-imms", cl::Hidden, cl::init(false),
- cl::desc("Use DSP instructions for scalar operations\
- with immediate operands"));
+DisablePromotion("disable-type-promotion", cl::Hidden, cl::init(false),
+ cl::desc("Disable type promotion pass"));
// The goal of this pass is to enable more efficient code generation for
// operations on narrow types (i.e. types with < 32-bits) and this is a
@@ -107,23 +104,18 @@ EnableDSPWithImms("arm-enable-scalar-dsp-imms", cl::Hidden, cl::init(false),
namespace {
class IRPromoter {
+ LLVMContext &Ctx;
+ IntegerType *OrigTy = nullptr;
+ unsigned PromotedWidth = 0;
+ SetVector<Value*> &Visited;
+ SetVector<Value*> &Sources;
+ SetVector<Instruction*> &Sinks;
+ SmallVectorImpl<Instruction*> &SafeWrap;
+ IntegerType *ExtTy = nullptr;
SmallPtrSet<Value*, 8> NewInsts;
SmallPtrSet<Instruction*, 4> InstsToRemove;
DenseMap<Value*, SmallVector<Type*, 4>> TruncTysMap;
SmallPtrSet<Value*, 8> Promoted;
- Module *M = nullptr;
- LLVMContext &Ctx;
- // The type we promote to: always i32
- IntegerType *ExtTy = nullptr;
- // The type of the value that the search began from, either i8 or i16.
- // This defines the max range of the values that we allow in the promoted
- // tree.
- IntegerType *OrigTy = nullptr;
- SetVector<Value*> *Visited;
- SmallPtrSetImpl<Value*> *Sources;
- SmallPtrSetImpl<Instruction*> *Sinks;
- SmallPtrSetImpl<Instruction*> *SafeToPromote;
- SmallPtrSetImpl<Instruction*> *SafeWrap;
void ReplaceAllUsersOfWith(Value *From, Value *To);
void PrepareWrappingAdds(void);
@@ -134,46 +126,69 @@ class IRPromoter {
void Cleanup(void);
public:
- IRPromoter(Module *M) : M(M), Ctx(M->getContext()),
- ExtTy(Type::getInt32Ty(Ctx)) { }
-
+ IRPromoter(LLVMContext &C, IntegerType *Ty, unsigned Width,
+ SetVector<Value*> &visited, SetVector<Value*> &sources,
+ SetVector<Instruction*> &sinks,
+ SmallVectorImpl<Instruction*> &wrap) :
+ Ctx(C), OrigTy(Ty), PromotedWidth(Width), Visited(visited),
+ Sources(sources), Sinks(sinks), SafeWrap(wrap) {
+ ExtTy = IntegerType::get(Ctx, PromotedWidth);
+ assert(OrigTy->getPrimitiveSizeInBits() < ExtTy->getPrimitiveSizeInBits()
+ && "Original type not smaller than extended type");
+ }
- void Mutate(Type *OrigTy,
- SetVector<Value*> &Visited,
- SmallPtrSetImpl<Value*> &Sources,
- SmallPtrSetImpl<Instruction*> &Sinks,
- SmallPtrSetImpl<Instruction*> &SafeToPromote,
- SmallPtrSetImpl<Instruction*> &SafeWrap);
+ void Mutate();
};
-class ARMCodeGenPrepare : public FunctionPass {
- const ARMSubtarget *ST = nullptr;
- IRPromoter *Promoter = nullptr;
- std::set<Value*> AllVisited;
+class TypePromotion : public FunctionPass {
+ unsigned TypeSize = 0;
+ LLVMContext *Ctx = nullptr;
+ unsigned RegisterBitWidth = 0;
+ SmallPtrSet<Value*, 16> AllVisited;
SmallPtrSet<Instruction*, 8> SafeToPromote;
- SmallPtrSet<Instruction*, 4> SafeWrap;
-
+ SmallVector<Instruction*, 4> SafeWrap;
+
+ // Does V have the same size result type as TypeSize.
+ bool EqualTypeSize(Value *V);
+ // Does V have the same size, or narrower, result type as TypeSize.
+ bool LessOrEqualTypeSize(Value *V);
+ // Does V have a result type that is wider than TypeSize.
+ bool GreaterThanTypeSize(Value *V);
+ // Does V have a result type that is narrower than TypeSize.
+ bool LessThanTypeSize(Value *V);
+ // Should V be a leaf in the promote tree?
+ bool isSource(Value *V);
+ // Should V be a root in the promotion tree?
+ bool isSink(Value *V);
+ // Should we change the result type of V? It will result in the users of V
+ // being visited.
+ bool shouldPromote(Value *V);
+ // Is I an add or a sub, which isn't marked as nuw, but where a wrapping
+ // result won't affect the computation?
bool isSafeWrap(Instruction *I);
+ // Can V have its integer type promoted, or can the type be ignored.
+ bool isSupportedType(Value *V);
+ // Is V an instruction with a supported opcode or another value that we can
+ // handle, such as constants and basic blocks.
bool isSupportedValue(Value *V);
+ // Is V an instruction thats result can trivially promoted, or has safe
+ // wrapping.
bool isLegalToPromote(Value *V);
- bool TryToPromote(Value *V);
+ bool TryToPromote(Value *V, unsigned PromotedWidth);
public:
static char ID;
- static unsigned TypeSize;
- Type *OrigTy = nullptr;
- ARMCodeGenPrepare() : FunctionPass(ID) {}
+ TypePromotion() : FunctionPass(ID) {}
void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<TargetTransformInfoWrapperPass>();
AU.addRequired<TargetPassConfig>();
}
- StringRef getPassName() const override { return "ARM IR optimizations"; }
+ StringRef getPassName() const override { return PASS_NAME; }
- bool doInitialization(Module &M) override;
bool runOnFunction(Function &F) override;
- bool doFinalization(Module &M) override;
};
}
@@ -187,41 +202,20 @@ static bool GenerateSignBits(Value *V) {
Opc == Instruction::SRem || Opc == Instruction::SExt;
}
-static bool EqualTypeSize(Value *V) {
- return V->getType()->getScalarSizeInBits() == ARMCodeGenPrepare::TypeSize;
+bool TypePromotion::EqualTypeSize(Value *V) {
+ return V->getType()->getScalarSizeInBits() == TypeSize;
}
-static bool LessOrEqualTypeSize(Value *V) {
- return V->getType()->getScalarSizeInBits() <= ARMCodeGenPrepare::TypeSize;
+bool TypePromotion::LessOrEqualTypeSize(Value *V) {
+ return V->getType()->getScalarSizeInBits() <= TypeSize;
}
-static bool GreaterThanTypeSize(Value *V) {
- return V->getType()->getScalarSizeInBits() > ARMCodeGenPrepare::TypeSize;
+bool TypePromotion::GreaterThanTypeSize(Value *V) {
+ return V->getType()->getScalarSizeInBits() > TypeSize;
}
-static bool LessThanTypeSize(Value *V) {
- return V->getType()->getScalarSizeInBits() < ARMCodeGenPrepare::TypeSize;
-}
-
-/// Some instructions can use 8- and 16-bit operands, and we don't need to
-/// promote anything larger. We disallow booleans to make life easier when
-/// dealing with icmps but allow any other integer that is <= 16 bits. Void
-/// types are accepted so we can handle switches.
-static bool isSupportedType(Value *V) {
- Type *Ty = V->getType();
-
- // Allow voids and pointers, these won't be promoted.
- if (Ty->isVoidTy() || Ty->isPointerTy())
- return true;
-
- if (auto *Ld = dyn_cast<LoadInst>(V))
- Ty = cast<PointerType>(Ld->getPointerOperandType())->getElementType();
-
- if (!isa<IntegerType>(Ty) ||
- cast<IntegerType>(V->getType())->getBitWidth() == 1)
- return false;
-
- return LessOrEqualTypeSize(V);
+bool TypePromotion::LessThanTypeSize(Value *V) {
+ return V->getType()->getScalarSizeInBits() < TypeSize;
}
/// Return true if the given value is a source in the use-def chain, producing
@@ -231,7 +225,7 @@ static bool isSupportedType(Value *V) {
/// return values because we only accept ones that guarantee a zeroext ret val.
/// Many arguments will have the zeroext attribute too, so those would be free
/// too.
-static bool isSource(Value *V) {
+bool TypePromotion::isSource(Value *V) {
if (!isa<IntegerType>(V->getType()))
return false;
@@ -252,7 +246,7 @@ static bool isSource(Value *V) {
/// Return true if V will require any promoted values to be truncated for the
/// the IR to remain valid. We can't mutate the value type of these
/// instructions.
-static bool isSink(Value *V) {
+bool TypePromotion::isSink(Value *V) {
// TODO The truncate also isn't actually necessary because we would already
// proved that the data value is kept within the range of the original data
// type.
@@ -278,7 +272,7 @@ static bool isSink(Value *V) {
}
/// Return whether this instruction can safely wrap.
-bool ARMCodeGenPrepare::isSafeWrap(Instruction *I) {
+bool TypePromotion::isSafeWrap(Instruction *I) {
// We can support a, potentially, wrapping instruction (I) if:
// - It is only used by an unsigned icmp.
// - The icmp uses a constant.
@@ -374,7 +368,7 @@ bool ARMCodeGenPrepare::isSafeWrap(Instruction *I) {
Total += OverflowConst->getValue().getBitWidth() < 32 ?
OverflowConst->getValue().abs().zext(32) : OverflowConst->getValue().abs();
- APInt Max = APInt::getAllOnesValue(ARMCodeGenPrepare::TypeSize);
+ APInt Max = APInt::getAllOnesValue(TypePromotion::TypeSize);
if (Total.getBitWidth() > Max.getBitWidth()) {
if (Total.ugt(Max.zext(Total.getBitWidth())))
@@ -385,12 +379,13 @@ bool ARMCodeGenPrepare::isSafeWrap(Instruction *I) {
} else if (Total.ugt(Max))
return false;
- LLVM_DEBUG(dbgs() << "ARM CGP: Allowing safe overflow for " << *I << "\n");
- SafeWrap.insert(I);
+ LLVM_DEBUG(dbgs() << "IR Promotion: Allowing safe overflow for "
+ << *I << "\n");
+ SafeWrap.push_back(I);
return true;
}
-static bool shouldPromote(Value *V) {
+bool TypePromotion::shouldPromote(Value *V) {
if (!isa<IntegerType>(V->getType()) || isSink(V))
return false;
@@ -422,32 +417,12 @@ static bool isPromotedResultSafe(Value *V) {
return cast<Instruction>(V)->hasNoUnsignedWrap();
}
-/// Return the intrinsic for the instruction that can perform the same
-/// operation but on a narrow type. This is using the parallel dsp intrinsics
-/// on scalar values.
-static Intrinsic::ID getNarrowIntrinsic(Instruction *I) {
- // Whether we use the signed or unsigned versions of these intrinsics
- // doesn't matter because we're not using the GE bits that they set in
- // the APSR.
- switch(I->getOpcode()) {
- default:
- break;
- case Instruction::Add:
- return ARMCodeGenPrepare::TypeSize == 16 ? Intrinsic::arm_uadd16 :
- Intrinsic::arm_uadd8;
- case Instruction::Sub:
- return ARMCodeGenPrepare::TypeSize == 16 ? Intrinsic::arm_usub16 :
- Intrinsic::arm_usub8;
- }
- llvm_unreachable("unhandled opcode for narrow intrinsic");
-}
-
void IRPromoter::ReplaceAllUsersOfWith(Value *From, Value *To) {
SmallVector<Instruction*, 4> Users;
Instruction *InstTo = dyn_cast<Instruction>(To);
bool ReplacedAll = true;
- LLVM_DEBUG(dbgs() << "ARM CGP: Replacing " << *From << " with " << *To
+ LLVM_DEBUG(dbgs() << "IR Promotion: Replacing " << *From << " with " << *To
<< "\n");
for (Use &U : From->uses()) {
@@ -468,18 +443,18 @@ void IRPromoter::ReplaceAllUsersOfWith(Value *From, Value *To) {
}
void IRPromoter::PrepareWrappingAdds() {
- LLVM_DEBUG(dbgs() << "ARM CGP: Prepare underflowing adds.\n");
+ LLVM_DEBUG(dbgs() << "IR Promotion: Prepare wrapping adds.\n");
IRBuilder<> Builder{Ctx};
// For adds that safely wrap and use a negative immediate as operand 1, we
// create an equivalent instruction using a positive immediate.
// That positive immediate can then be zext along with all the other
// immediates later.
- for (auto *I : *SafeWrap) {
+ for (auto *I : SafeWrap) {
if (I->getOpcode() != Instruction::Add)
continue;
- LLVM_DEBUG(dbgs() << "ARM CGP: Adjusting " << *I << "\n");
+ LLVM_DEBUG(dbgs() << "IR Promotion: Adjusting " << *I << "\n");
assert((isa<ConstantInt>(I->getOperand(1)) &&
cast<ConstantInt>(I->getOperand(1))->isNegative()) &&
"Wrapping should have a negative immediate as the second operand");
@@ -494,10 +469,10 @@ void IRPromoter::PrepareWrappingAdds() {
}
InstsToRemove.insert(I);
I->replaceAllUsesWith(NewVal);
- LLVM_DEBUG(dbgs() << "ARM CGP: New equivalent: " << *NewVal << "\n");
+ LLVM_DEBUG(dbgs() << "IR Promotion: New equivalent: " << *NewVal << "\n");
}
for (auto *I : NewInsts)
- Visited->insert(I);
+ Visited.insert(I);
}
void IRPromoter::ExtendSources() {
@@ -505,7 +480,7 @@ void IRPromoter::ExtendSources() {
auto InsertZExt = [&](Value *V, Instruction *InsertPt) {
assert(V->getType() != ExtTy && "zext already extends to i32");
- LLVM_DEBUG(dbgs() << "ARM CGP: Inserting ZExt for " << *V << "\n");
+ LLVM_DEBUG(dbgs() << "IR Promotion: Inserting ZExt for " << *V << "\n");
Builder.SetInsertPoint(InsertPt);
if (auto *I = dyn_cast<Instruction>(V))
Builder.SetCurrentDebugLocation(I->getDebugLoc());
@@ -523,8 +498,8 @@ void IRPromoter::ExtendSources() {
};
// Now, insert extending instructions between the sources and their users.
- LLVM_DEBUG(dbgs() << "ARM CGP: Promoting sources:\n");
- for (auto V : *Sources) {
+ LLVM_DEBUG(dbgs() << "IR Promotion: Promoting sources:\n");
+ for (auto V : Sources) {
LLVM_DEBUG(dbgs() << " - " << *V << "\n");
if (auto *I = dyn_cast<Instruction>(V))
InsertZExt(I, I);
@@ -539,18 +514,18 @@ void IRPromoter::ExtendSources() {
}
void IRPromoter::PromoteTree() {
- LLVM_DEBUG(dbgs() << "ARM CGP: Mutating the tree..\n");
+ LLVM_DEBUG(dbgs() << "IR Promotion: Mutating the tree..\n");
IRBuilder<> Builder{Ctx};
// Mutate the types of the instructions within the tree. Here we handle
// constant operands.
- for (auto *V : *Visited) {
- if (Sources->count(V))
+ for (auto *V : Visited) {
+ if (Sources.count(V))
continue;
auto *I = cast<Instruction>(V);
- if (Sinks->count(I))
+ if (Sinks.count(I))
continue;
for (unsigned i = 0, e = I->getNumOperands(); i < e; ++i) {
@@ -565,43 +540,16 @@ void IRPromoter::PromoteTree() {
I->setOperand(i, UndefValue::get(ExtTy));
}
- if (shouldPromote(I)) {
+ // Mutate the result type, unless this is an icmp.
+ if (!isa<ICmpInst>(I)) {
I->mutateType(ExtTy);
Promoted.insert(I);
}
}
-
- // Finally, any instructions that should be promoted but haven't yet been,
- // need to be handled using intrinsics.
- for (auto *V : *Visited) {
- auto *I = dyn_cast<Instruction>(V);
- if (!I)
- continue;
-
- if (Sources->count(I) || Sinks->count(I))
- continue;
-
- if (!shouldPromote(I) || SafeToPromote->count(I) || NewInsts.count(I))
- continue;
-
- assert(EnableDSP && "DSP intrinisc insertion not enabled!");
-
- // Replace unsafe instructions with appropriate intrinsic calls.
- LLVM_DEBUG(dbgs() << "ARM CGP: Inserting DSP intrinsic for "
- << *I << "\n");
- Function *DSPInst =
- Intrinsic::getDeclaration(M, getNarrowIntrinsic(I));
- Builder.SetInsertPoint(I);
- Builder.SetCurrentDebugLocation(I->getDebugLoc());
- Value *Args[] = { I->getOperand(0), I->getOperand(1) };
- CallInst *Call = Builder.CreateCall(DSPInst, Args);
- NewInsts.insert(Call);
- ReplaceAllUsersOfWith(I, Call);
- }
}
void IRPromoter::TruncateSinks() {
- LLVM_DEBUG(dbgs() << "ARM CGP: Fixing up the sinks:\n");
+ LLVM_DEBUG(dbgs() << "IR Promotion: Fixing up the sinks:\n");
IRBuilder<> Builder{Ctx};
@@ -609,10 +557,10 @@ void IRPromoter::TruncateSinks() {
if (!isa<Instruction>(V) || !isa<IntegerType>(V->getType()))
return nullptr;
- if ((!Promoted.count(V) && !NewInsts.count(V)) || Sources->count(V))
+ if ((!Promoted.count(V) && !NewInsts.count(V)) || Sources.count(V))
return nullptr;
- LLVM_DEBUG(dbgs() << "ARM CGP: Creating " << *TruncTy << " Trunc for "
+ LLVM_DEBUG(dbgs() << "IR Promotion: Creating " << *TruncTy << " Trunc for "
<< *V << "\n");
Builder.SetInsertPoint(cast<Instruction>(V));
auto *Trunc = dyn_cast<Instruction>(Builder.CreateTrunc(V, TruncTy));
@@ -623,8 +571,8 @@ void IRPromoter::TruncateSinks() {
// Fix up any stores or returns that use the results of the promoted
// chain.
- for (auto I : *Sinks) {
- LLVM_DEBUG(dbgs() << "ARM CGP: For Sink: " << *I << "\n");
+ for (auto I : Sinks) {
+ LLVM_DEBUG(dbgs() << "IR Promotion: For Sink: " << *I << "\n");
// Handle calls separately as we need to iterate over arg operands.
if (auto *Call = dyn_cast<CallInst>(I)) {
@@ -661,10 +609,10 @@ void IRPromoter::TruncateSinks() {
}
void IRPromoter::Cleanup() {
- LLVM_DEBUG(dbgs() << "ARM CGP: Cleanup..\n");
+ LLVM_DEBUG(dbgs() << "IR Promotion: Cleanup..\n");
// Some zexts will now have become redundant, along with their trunc
// operands, so remove them
- for (auto V : *Visited) {
+ for (auto V : Visited) {
if (!isa<ZExtInst>(V))
continue;
@@ -674,7 +622,7 @@ void IRPromoter::Cleanup() {
Value *Src = ZExt->getOperand(0);
if (ZExt->getSrcTy() == ZExt->getDestTy()) {
- LLVM_DEBUG(dbgs() << "ARM CGP: Removing unnecessary cast: " << *ZExt
+ LLVM_DEBUG(dbgs() << "IR Promotion: Removing unnecessary cast: " << *ZExt
<< "\n");
ReplaceAllUsersOfWith(ZExt, Src);
continue;
@@ -693,25 +641,18 @@ void IRPromoter::Cleanup() {
}
for (auto *I : InstsToRemove) {
- LLVM_DEBUG(dbgs() << "ARM CGP: Removing " << *I << "\n");
+ LLVM_DEBUG(dbgs() << "IR Promotion: Removing " << *I << "\n");
I->dropAllReferences();
I->eraseFromParent();
}
-
- InstsToRemove.clear();
- NewInsts.clear();
- TruncTysMap.clear();
- Promoted.clear();
- SafeToPromote->clear();
- SafeWrap->clear();
}
void IRPromoter::ConvertTruncs() {
- LLVM_DEBUG(dbgs() << "ARM CGP: Converting truncs..\n");
+ LLVM_DEBUG(dbgs() << "IR Promotion: Converting truncs..\n");
IRBuilder<> Builder{Ctx};
- for (auto *V : *Visited) {
- if (!isa<TruncInst>(V) || Sources->count(V))
+ for (auto *V : Visited) {
+ if (!isa<TruncInst>(V) || Sources.count(V))
continue;
auto *Trunc = cast<TruncInst>(V);
@@ -731,25 +672,9 @@ void IRPromoter::ConvertTruncs() {
}
}
-void IRPromoter::Mutate(Type *OrigTy,
- SetVector<Value*> &Visited,
- SmallPtrSetImpl<Value*> &Sources,
- SmallPtrSetImpl<Instruction*> &Sinks,
- SmallPtrSetImpl<Instruction*> &SafeToPromote,
- SmallPtrSetImpl<Instruction*> &SafeWrap) {
- LLVM_DEBUG(dbgs() << "ARM CGP: Promoting use-def chains to from "
- << ARMCodeGenPrepare::TypeSize << " to 32-bits\n");
-
- assert(isa<IntegerType>(OrigTy) && "expected integer type");
- this->OrigTy = cast<IntegerType>(OrigTy);
- assert(OrigTy->getPrimitiveSizeInBits() < ExtTy->getPrimitiveSizeInBits() &&
- "original type not smaller than extended type");
-
- this->Visited = &Visited;
- this->Sources = &Sources;
- this->Sinks = &Sinks;
- this->SafeToPromote = &SafeToPromote;
- this->SafeWrap = &SafeWrap;
+void IRPromoter::Mutate() {
+ LLVM_DEBUG(dbgs() << "IR Promotion: Promoting use-def chains from "
+ << OrigTy->getBitWidth() << " to " << PromotedWidth << "-bits\n");
// Cache original types of the values that will likely need truncating
for (auto *I : Sinks) {
@@ -779,9 +704,7 @@ void IRPromoter::Mutate(Type *OrigTy,
// Insert zext instructions between sources and their users.
ExtendSources();
- // Promote visited instructions, mutating their types in place. Also insert
- // DSP intrinsics, if enabled, for adds and subs which would be unsafe to
- // promote.
+ // Promote visited instructions, mutating their types in place.
PromoteTree();
// Convert any truncs, that aren't sources, into AND masks.
@@ -794,14 +717,32 @@ void IRPromoter::Mutate(Type *OrigTy,
// clear the data structures.
Cleanup();
- LLVM_DEBUG(dbgs() << "ARM CGP: Mutation complete\n");
+ LLVM_DEBUG(dbgs() << "IR Promotion: Mutation complete\n");
+}
+
+/// We disallow booleans to make life easier when dealing with icmps but allow
+/// any other integer that fits in a scalar register. Void types are accepted
+/// so we can handle switches.
+bool TypePromotion::isSupportedType(Value *V) {
+ Type *Ty = V->getType();
+
+ // Allow voids and pointers, these won't be promoted.
+ if (Ty->isVoidTy() || Ty->isPointerTy())
+ return true;
+
+ if (!isa<IntegerType>(Ty) ||
+ cast<IntegerType>(Ty)->getBitWidth() == 1 ||
+ cast<IntegerType>(Ty)->getBitWidth() > RegisterBitWidth)
+ return false;
+
+ return LessOrEqualTypeSize(V);
}
/// We accept most instructions, as well as Arguments and ConstantInsts. We
/// Disallow casts other than zext and truncs and only allow calls if their
/// return value is zeroext. We don't allow opcodes that can introduce sign
/// bits.
-bool ARMCodeGenPrepare::isSupportedValue(Value *V) {
+bool TypePromotion::isSupportedValue(Value *V) {
if (auto *I = dyn_cast<Instruction>(V)) {
switch (I->getOpcode()) {
default:
@@ -849,7 +790,7 @@ bool ARMCodeGenPrepare::isSupportedValue(Value *V) {
/// Check that the type of V would be promoted and that the original type is
/// smaller than the targeted promoted type. Check that we're not trying to
/// promote something larger than our base 'TypeSize' type.
-bool ARMCodeGenPrepare::isLegalToPromote(Value *V) {
+bool TypePromotion::isLegalToPromote(Value *V) {
auto *I = dyn_cast<Instruction>(V);
if (!I)
@@ -862,51 +803,24 @@ bool ARMCodeGenPrepare::isLegalToPromote(Value *V) {
SafeToPromote.insert(I);
return true;
}
-
- if (I->getOpcode() != Instruction::Add && I->getOpcode() != Instruction::Sub)
- return false;
-
- // If promotion is not safe, can we use a DSP instruction to natively
- // handle the narrow type?
- if (!ST->hasDSP() || !EnableDSP || !isSupportedType(I))
- return false;
-
- if (ST->isThumb() && !ST->hasThumb2())
- return false;
-
- // TODO
- // Would it be profitable? For Thumb code, these parallel DSP instructions
- // are only Thumb-2, so we wouldn't be able to dual issue on Cortex-M33. For
- // Cortex-A, specifically Cortex-A72, the latency is double and throughput is
- // halved. They also do not take immediates as operands.
- for (auto &Op : I->operands()) {
- if (isa<Constant>(Op)) {
- if (!EnableDSPWithImms)
- return false;
- }
- }
- LLVM_DEBUG(dbgs() << "ARM CGP: Will use an intrinsic for: " << *I << "\n");
- return true;
+ return false;
}
-bool ARMCodeGenPrepare::TryToPromote(Value *V) {
- OrigTy = V->getType();
+bool TypePromotion::TryToPromote(Value *V, unsigned PromotedWidth) {
+ Type *OrigTy = V->getType();
TypeSize = OrigTy->getPrimitiveSizeInBits();
- if (TypeSize > 16 || TypeSize < 8)
- return false;
-
SafeToPromote.clear();
SafeWrap.clear();
if (!isSupportedValue(V) || !shouldPromote(V) || !isLegalToPromote(V))
return false;
- LLVM_DEBUG(dbgs() << "ARM CGP: TryToPromote: " << *V << ", TypeSize = "
- << TypeSize << "\n");
+ LLVM_DEBUG(dbgs() << "IR Promotion: TryToPromote: " << *V << ", from "
+ << TypeSize << " bits to " << PromotedWidth << "\n");
SetVector<Value*> WorkList;
- SmallPtrSet<Value*, 8> Sources;
- SmallPtrSet<Instruction*, 4> Sinks;
+ SetVector<Value*> Sources;
+ SetVector<Instruction*> Sinks;
SetVector<Value*> CurrentVisited;
WorkList.insert(V);
@@ -923,7 +837,7 @@ bool ARMCodeGenPrepare::TryToPromote(Value *V) {
return true;
if (!isSupportedValue(V) || (shouldPromote(V) && !isLegalToPromote(V))) {
- LLVM_DEBUG(dbgs() << "ARM CGP: Can't handle: " << *V << "\n");
+ LLVM_DEBUG(dbgs() << "IR Promotion: Can't handle: " << *V << "\n");
return false;
}
@@ -979,64 +893,100 @@ bool ARMCodeGenPrepare::TryToPromote(Value *V) {
}
}
- LLVM_DEBUG(dbgs() << "ARM CGP: Visited nodes:\n";
+ LLVM_DEBUG(dbgs() << "IR Promotion: Visited nodes:\n";
for (auto *I : CurrentVisited)
I->dump();
);
+
unsigned ToPromote = 0;
+ unsigned NonFreeArgs = 0;
+ SmallPtrSet<BasicBlock*, 4> Blocks;
for (auto *V : CurrentVisited) {
- if (Sources.count(V))
+ if (auto *I = dyn_cast<Instruction>(V))
+ Blocks.insert(I->getParent());
+
+ if (Sources.count(V)) {
+ if (auto *Arg = dyn_cast<Argument>(V))
+ if (!Arg->hasZExtAttr() && !Arg->hasSExtAttr())
+ ++NonFreeArgs;
continue;
+ }
+
if (Sinks.count(cast<Instruction>(V)))
continue;
- ++ToPromote;
- }
+ ++ToPromote;
+ }
+
+ // DAG optimisations should be able to handle these cases better, especially
+ // for function arguments.
+ if (ToPromote < 2 || (Blocks.size() == 1 && (NonFreeArgs > SafeWrap.size())))
+ return false;
if (ToPromote < 2)
return false;
- Promoter->Mutate(OrigTy, CurrentVisited, Sources, Sinks, SafeToPromote,
- SafeWrap);
+ IRPromoter Promoter(*Ctx, cast<IntegerType>(OrigTy), PromotedWidth,
+ CurrentVisited, Sources, Sinks, SafeWrap);
+ Promoter.Mutate();
return true;
}
-bool ARMCodeGenPrepare::doInitialization(Module &M) {
- Promoter = new IRPromoter(&M);
- return false;
-}
-
-bool ARMCodeGenPrepare::runOnFunction(Function &F) {
- if (skipFunction(F) || DisableCGP)
+bool TypePromotion::runOnFunction(Function &F) {
+ if (skipFunction(F) || DisablePromotion)
return false;
- auto *TPC = &getAnalysis<TargetPassConfig>();
+ LLVM_DEBUG(dbgs() << "IR Promotion: Running on " << F.getName() << "\n");
+
+ auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
if (!TPC)
return false;
- const TargetMachine &TM = TPC->getTM<TargetMachine>();
- ST = &TM.getSubtarget<ARMSubtarget>(F);
bool MadeChange = false;
- LLVM_DEBUG(dbgs() << "ARM CGP: Running on " << F.getName() << "\n");
+ const DataLayout &DL = F.getParent()->getDataLayout();
+ const TargetMachine &TM = TPC->getTM<TargetMachine>();
+ const TargetSubtargetInfo *SubtargetInfo = TM.getSubtargetImpl(F);
+ const TargetLowering *TLI = SubtargetInfo->getTargetLowering();
+ const TargetTransformInfo &TII =
+ getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+ RegisterBitWidth = TII.getRegisterBitWidth(false);
+ Ctx = &F.getParent()->getContext();
// Search up from icmps to try to promote their operands.
for (BasicBlock &BB : F) {
- auto &Insts = BB.getInstList();
- for (auto &I : Insts) {
+ for (auto &I : BB) {
if (AllVisited.count(&I))
continue;
- if (isa<ICmpInst>(I)) {
- auto &CI = cast<ICmpInst>(I);
+ if (!isa<ICmpInst>(&I))
+ continue;
+
+ auto *ICmp = cast<ICmpInst>(&I);
+ // Skip signed or pointer compares
+ if (ICmp->isSigned() ||
+ !isa<IntegerType>(ICmp->getOperand(0)->getType()))
+ continue;
- // Skip signed or pointer compares
- if (CI.isSigned() || !isa<IntegerType>(CI.getOperand(0)->getType()))
- continue;
+ LLVM_DEBUG(dbgs() << "IR Promotion: Searching from: " << *ICmp << "\n");
- LLVM_DEBUG(dbgs() << "ARM CGP: Searching from: " << CI << "\n");
+ for (auto &Op : ICmp->operands()) {
+ if (auto *I = dyn_cast<Instruction>(Op)) {
+ EVT SrcVT = TLI->getValueType(DL, I->getType());
+ if (SrcVT.isSimple() && TLI->isTypeLegal(SrcVT.getSimpleVT()))
+ break;
- for (auto &Op : CI.operands()) {
- if (auto *I = dyn_cast<Instruction>(Op))
- MadeChange |= TryToPromote(I);
+ if (TLI->getTypeAction(ICmp->getContext(), SrcVT) !=
+ TargetLowering::TypePromoteInteger)
+ break;
+
+ EVT PromotedVT = TLI->getTypeToTransformTo(ICmp->getContext(), SrcVT);
+ if (RegisterBitWidth < PromotedVT.getSizeInBits()) {
+ LLVM_DEBUG(dbgs() << "IR Promotion: Couldn't find target register "
+ << "for promoted type\n");
+ break;
+ }
+
+ MadeChange |= TryToPromote(I, PromotedVT.getSizeInBits());
+ break;
}
}
}
@@ -1046,24 +996,16 @@ bool ARMCodeGenPrepare::runOnFunction(Function &F) {
});
}
if (MadeChange)
- LLVM_DEBUG(dbgs() << "After ARMCodeGenPrepare: " << F << "\n");
+ LLVM_DEBUG(dbgs() << "After TypePromotion: " << F << "\n");
return MadeChange;
}
-bool ARMCodeGenPrepare::doFinalization(Module &M) {
- delete Promoter;
- return false;
-}
-
-INITIALIZE_PASS_BEGIN(ARMCodeGenPrepare, DEBUG_TYPE,
- "ARM IR optimizations", false, false)
-INITIALIZE_PASS_END(ARMCodeGenPrepare, DEBUG_TYPE, "ARM IR optimizations",
- false, false)
+INITIALIZE_PASS_BEGIN(TypePromotion, DEBUG_TYPE, PASS_NAME, false, false)
+INITIALIZE_PASS_END(TypePromotion, DEBUG_TYPE, PASS_NAME, false, false)
-char ARMCodeGenPrepare::ID = 0;
-unsigned ARMCodeGenPrepare::TypeSize = 0;
+char TypePromotion::ID = 0;
-FunctionPass *llvm::createARMCodeGenPreparePass() {
- return new ARMCodeGenPrepare();
+FunctionPass *llvm::createTypePromotionPass() {
+ return new TypePromotion();
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/UnreachableBlockElim.cpp b/contrib/llvm-project/llvm/lib/CodeGen/UnreachableBlockElim.cpp
index 3289eff71336..b770e1d94488 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/UnreachableBlockElim.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/UnreachableBlockElim.cpp
@@ -36,6 +36,7 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Type.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ValueTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ValueTypes.cpp
index 73b862d51c0f..41cbdf035558 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ValueTypes.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ValueTypes.cpp
@@ -11,6 +11,7 @@
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Type.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TypeSize.h"
using namespace llvm;
EVT EVT::changeExtendedTypeToInteger() const {
@@ -101,12 +102,12 @@ unsigned EVT::getExtendedVectorNumElements() const {
return cast<VectorType>(LLVMTy)->getNumElements();
}
-unsigned EVT::getExtendedSizeInBits() const {
+TypeSize EVT::getExtendedSizeInBits() const {
assert(isExtended() && "Type is not extended!");
if (IntegerType *ITy = dyn_cast<IntegerType>(LLVMTy))
- return ITy->getBitWidth();
+ return TypeSize::Fixed(ITy->getBitWidth());
if (VectorType *VTy = dyn_cast<VectorType>(LLVMTy))
- return VTy->getBitWidth();
+ return VTy->getPrimitiveSizeInBits();
llvm_unreachable("Unrecognized extended type!");
}
@@ -119,139 +120,14 @@ std::string EVT::getEVTString() const {
+ getVectorElementType().getEVTString();
if (isInteger())
return "i" + utostr(getSizeInBits());
+ if (isFloatingPoint())
+ return "f" + utostr(getSizeInBits());
llvm_unreachable("Invalid EVT!");
- case MVT::i1: return "i1";
- case MVT::i8: return "i8";
- case MVT::i16: return "i16";
- case MVT::i32: return "i32";
- case MVT::i64: return "i64";
- case MVT::i128: return "i128";
- case MVT::f16: return "f16";
- case MVT::f32: return "f32";
- case MVT::f64: return "f64";
- case MVT::f80: return "f80";
- case MVT::f128: return "f128";
case MVT::ppcf128: return "ppcf128";
case MVT::isVoid: return "isVoid";
case MVT::Other: return "ch";
case MVT::Glue: return "glue";
case MVT::x86mmx: return "x86mmx";
- case MVT::v1i1: return "v1i1";
- case MVT::v2i1: return "v2i1";
- case MVT::v4i1: return "v4i1";
- case MVT::v8i1: return "v8i1";
- case MVT::v16i1: return "v16i1";
- case MVT::v32i1: return "v32i1";
- case MVT::v64i1: return "v64i1";
- case MVT::v128i1: return "v128i1";
- case MVT::v256i1: return "v256i1";
- case MVT::v512i1: return "v512i1";
- case MVT::v1024i1: return "v1024i1";
- case MVT::v1i8: return "v1i8";
- case MVT::v2i8: return "v2i8";
- case MVT::v4i8: return "v4i8";
- case MVT::v8i8: return "v8i8";
- case MVT::v16i8: return "v16i8";
- case MVT::v32i8: return "v32i8";
- case MVT::v64i8: return "v64i8";
- case MVT::v128i8: return "v128i8";
- case MVT::v256i8: return "v256i8";
- case MVT::v1i16: return "v1i16";
- case MVT::v2i16: return "v2i16";
- case MVT::v3i16: return "v3i16";
- case MVT::v4i16: return "v4i16";
- case MVT::v8i16: return "v8i16";
- case MVT::v16i16: return "v16i16";
- case MVT::v32i16: return "v32i16";
- case MVT::v64i16: return "v64i16";
- case MVT::v128i16: return "v128i16";
- case MVT::v1i32: return "v1i32";
- case MVT::v2i32: return "v2i32";
- case MVT::v3i32: return "v3i32";
- case MVT::v4i32: return "v4i32";
- case MVT::v5i32: return "v5i32";
- case MVT::v8i32: return "v8i32";
- case MVT::v16i32: return "v16i32";
- case MVT::v32i32: return "v32i32";
- case MVT::v64i32: return "v64i32";
- case MVT::v128i32: return "v128i32";
- case MVT::v256i32: return "v256i32";
- case MVT::v512i32: return "v512i32";
- case MVT::v1024i32:return "v1024i32";
- case MVT::v2048i32:return "v2048i32";
- case MVT::v1i64: return "v1i64";
- case MVT::v2i64: return "v2i64";
- case MVT::v4i64: return "v4i64";
- case MVT::v8i64: return "v8i64";
- case MVT::v16i64: return "v16i64";
- case MVT::v32i64: return "v32i64";
- case MVT::v1i128: return "v1i128";
- case MVT::v1f32: return "v1f32";
- case MVT::v2f32: return "v2f32";
- case MVT::v2f16: return "v2f16";
- case MVT::v3f16: return "v3f16";
- case MVT::v4f16: return "v4f16";
- case MVT::v8f16: return "v8f16";
- case MVT::v16f16: return "v16f16";
- case MVT::v32f16: return "v32f16";
- case MVT::v3f32: return "v3f32";
- case MVT::v4f32: return "v4f32";
- case MVT::v5f32: return "v5f32";
- case MVT::v8f32: return "v8f32";
- case MVT::v16f32: return "v16f32";
- case MVT::v32f32: return "v32f32";
- case MVT::v64f32: return "v64f32";
- case MVT::v128f32: return "v128f32";
- case MVT::v256f32: return "v256f32";
- case MVT::v512f32: return "v512f32";
- case MVT::v1024f32:return "v1024f32";
- case MVT::v2048f32:return "v2048f32";
- case MVT::v1f64: return "v1f64";
- case MVT::v2f64: return "v2f64";
- case MVT::v4f64: return "v4f64";
- case MVT::v8f64: return "v8f64";
- case MVT::nxv1i1: return "nxv1i1";
- case MVT::nxv2i1: return "nxv2i1";
- case MVT::nxv4i1: return "nxv4i1";
- case MVT::nxv8i1: return "nxv8i1";
- case MVT::nxv16i1: return "nxv16i1";
- case MVT::nxv32i1: return "nxv32i1";
- case MVT::nxv1i8: return "nxv1i8";
- case MVT::nxv2i8: return "nxv2i8";
- case MVT::nxv4i8: return "nxv4i8";
- case MVT::nxv8i8: return "nxv8i8";
- case MVT::nxv16i8: return "nxv16i8";
- case MVT::nxv32i8: return "nxv32i8";
- case MVT::nxv1i16: return "nxv1i16";
- case MVT::nxv2i16: return "nxv2i16";
- case MVT::nxv4i16: return "nxv4i16";
- case MVT::nxv8i16: return "nxv8i16";
- case MVT::nxv16i16:return "nxv16i16";
- case MVT::nxv32i16:return "nxv32i16";
- case MVT::nxv1i32: return "nxv1i32";
- case MVT::nxv2i32: return "nxv2i32";
- case MVT::nxv4i32: return "nxv4i32";
- case MVT::nxv8i32: return "nxv8i32";
- case MVT::nxv16i32:return "nxv16i32";
- case MVT::nxv32i32:return "nxv32i32";
- case MVT::nxv1i64: return "nxv1i64";
- case MVT::nxv2i64: return "nxv2i64";
- case MVT::nxv4i64: return "nxv4i64";
- case MVT::nxv8i64: return "nxv8i64";
- case MVT::nxv16i64:return "nxv16i64";
- case MVT::nxv32i64:return "nxv32i64";
- case MVT::nxv2f16: return "nxv2f16";
- case MVT::nxv4f16: return "nxv4f16";
- case MVT::nxv8f16: return "nxv8f16";
- case MVT::nxv1f32: return "nxv1f32";
- case MVT::nxv2f32: return "nxv2f32";
- case MVT::nxv4f32: return "nxv4f32";
- case MVT::nxv8f32: return "nxv8f32";
- case MVT::nxv16f32:return "nxv16f32";
- case MVT::nxv1f64: return "nxv1f64";
- case MVT::nxv2f64: return "nxv2f64";
- case MVT::nxv4f64: return "nxv4f64";
- case MVT::nxv8f64: return "nxv8f64";
case MVT::Metadata:return "Metadata";
case MVT::Untyped: return "Untyped";
case MVT::exnref : return "exnref";
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/WasmEHPrepare.cpp b/contrib/llvm-project/llvm/lib/CodeGen/WasmEHPrepare.cpp
index 865a1cfbf43a..1582f12ad580 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/WasmEHPrepare.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/WasmEHPrepare.cpp
@@ -87,6 +87,8 @@
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsWebAssembly.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/WinEHPrepare.cpp b/contrib/llvm-project/llvm/lib/CodeGen/WinEHPrepare.cpp
index cdf79374e974..87958a738c67 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/WinEHPrepare.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/WinEHPrepare.cpp
@@ -20,17 +20,19 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/EHPersonalities.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/WinEHFuncInfo.h"
#include "llvm/IR/Verifier.h"
+#include "llvm/InitializePasses.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/XRayInstrumentation.cpp b/contrib/llvm-project/llvm/lib/CodeGen/XRayInstrumentation.cpp
index 119c3fd1ec7f..4847a0c3e842 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/XRayInstrumentation.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/XRayInstrumentation.cpp
@@ -26,6 +26,7 @@
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Function.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Target/TargetMachine.h"
diff --git a/contrib/llvm-project/llvm/lib/DWARFLinker/DWARFLinker.cpp b/contrib/llvm-project/llvm/lib/DWARFLinker/DWARFLinker.cpp
new file mode 100644
index 000000000000..65b2a1bdf1fc
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/DWARFLinker/DWARFLinker.cpp
@@ -0,0 +1,17 @@
+//=== DWARFLinker.cpp -----------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DWARFLinker/DWARFLinker.h"
+
+namespace llvm {
+
+AddressesMap::~AddressesMap() {}
+
+DwarfEmitter::~DwarfEmitter() {}
+
+} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/DWARFLinker/DWARFLinkerCompileUnit.cpp b/contrib/llvm-project/llvm/lib/DWARFLinker/DWARFLinkerCompileUnit.cpp
new file mode 100644
index 000000000000..e4de01676dca
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/DWARFLinker/DWARFLinkerCompileUnit.cpp
@@ -0,0 +1,144 @@
+//===- DWARFLinkerCompileUnit.cpp -----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DWARFLinker/DWARFLinkerCompileUnit.h"
+#include "llvm/DWARFLinker/DWARFLinkerDeclContext.h"
+
+namespace llvm {
+
+/// Check if the DIE at \p Idx is in the scope of a function.
+static bool inFunctionScope(CompileUnit &U, unsigned Idx) {
+ while (Idx) {
+ if (U.getOrigUnit().getDIEAtIndex(Idx).getTag() == dwarf::DW_TAG_subprogram)
+ return true;
+ Idx = U.getInfo(Idx).ParentIdx;
+ }
+ return false;
+}
+
+uint16_t CompileUnit::getLanguage() {
+ if (!Language) {
+ DWARFDie CU = getOrigUnit().getUnitDIE();
+ Language = dwarf::toUnsigned(CU.find(dwarf::DW_AT_language), 0);
+ }
+ return Language;
+}
+
+void CompileUnit::markEverythingAsKept() {
+ unsigned Idx = 0;
+
+ setHasInterestingContent();
+
+ for (auto &I : Info) {
+ // Mark everything that wasn't explicit marked for pruning.
+ I.Keep = !I.Prune;
+ auto DIE = OrigUnit.getDIEAtIndex(Idx++);
+
+ // Try to guess which DIEs must go to the accelerator tables. We do that
+ // just for variables, because functions will be handled depending on
+ // whether they carry a DW_AT_low_pc attribute or not.
+ if (DIE.getTag() != dwarf::DW_TAG_variable &&
+ DIE.getTag() != dwarf::DW_TAG_constant)
+ continue;
+
+ Optional<DWARFFormValue> Value;
+ if (!(Value = DIE.find(dwarf::DW_AT_location))) {
+ if ((Value = DIE.find(dwarf::DW_AT_const_value)) &&
+ !inFunctionScope(*this, I.ParentIdx))
+ I.InDebugMap = true;
+ continue;
+ }
+ if (auto Block = Value->getAsBlock()) {
+ if (Block->size() > OrigUnit.getAddressByteSize() &&
+ (*Block)[0] == dwarf::DW_OP_addr)
+ I.InDebugMap = true;
+ }
+ }
+}
+
+uint64_t CompileUnit::computeNextUnitOffset() {
+ NextUnitOffset = StartOffset;
+ if (NewUnit) {
+ NextUnitOffset += 11 /* Header size */;
+ NextUnitOffset += NewUnit->getUnitDie().getSize();
+ }
+ return NextUnitOffset;
+}
+
+/// Keep track of a forward cross-cu reference from this unit
+/// to \p Die that lives in \p RefUnit.
+void CompileUnit::noteForwardReference(DIE *Die, const CompileUnit *RefUnit,
+ DeclContext *Ctxt, PatchLocation Attr) {
+ ForwardDIEReferences.emplace_back(Die, RefUnit, Ctxt, Attr);
+}
+
+void CompileUnit::fixupForwardReferences() {
+ for (const auto &Ref : ForwardDIEReferences) {
+ DIE *RefDie;
+ const CompileUnit *RefUnit;
+ PatchLocation Attr;
+ DeclContext *Ctxt;
+ std::tie(RefDie, RefUnit, Ctxt, Attr) = Ref;
+ if (Ctxt && Ctxt->getCanonicalDIEOffset())
+ Attr.set(Ctxt->getCanonicalDIEOffset());
+ else
+ Attr.set(RefDie->getOffset() + RefUnit->getStartOffset());
+ }
+}
+
+void CompileUnit::addLabelLowPc(uint64_t LabelLowPc, int64_t PcOffset) {
+ Labels.insert({LabelLowPc, PcOffset});
+}
+
+void CompileUnit::addFunctionRange(uint64_t FuncLowPc, uint64_t FuncHighPc,
+ int64_t PcOffset) {
+ // Don't add empty ranges to the interval map. They are a problem because
+ // the interval map expects half open intervals. This is safe because they
+ // are empty anyway.
+ if (FuncHighPc != FuncLowPc)
+ Ranges.insert(FuncLowPc, FuncHighPc, PcOffset);
+ this->LowPc = std::min(LowPc, FuncLowPc + PcOffset);
+ this->HighPc = std::max(HighPc, FuncHighPc + PcOffset);
+}
+
+void CompileUnit::noteRangeAttribute(const DIE &Die, PatchLocation Attr) {
+ if (Die.getTag() != dwarf::DW_TAG_compile_unit)
+ RangeAttributes.push_back(Attr);
+ else
+ UnitRangeAttribute = Attr;
+}
+
+void CompileUnit::noteLocationAttribute(PatchLocation Attr, int64_t PcOffset) {
+ LocationAttributes.emplace_back(Attr, PcOffset);
+}
+
+void CompileUnit::addNamespaceAccelerator(const DIE *Die,
+ DwarfStringPoolEntryRef Name) {
+ Namespaces.emplace_back(Name, Die);
+}
+
+void CompileUnit::addObjCAccelerator(const DIE *Die,
+ DwarfStringPoolEntryRef Name,
+ bool SkipPubSection) {
+ ObjC.emplace_back(Name, Die, SkipPubSection);
+}
+
+void CompileUnit::addNameAccelerator(const DIE *Die,
+ DwarfStringPoolEntryRef Name,
+ bool SkipPubSection) {
+ Pubnames.emplace_back(Name, Die, SkipPubSection);
+}
+
+void CompileUnit::addTypeAccelerator(const DIE *Die,
+ DwarfStringPoolEntryRef Name,
+ bool ObjcClassImplementation,
+ uint32_t QualifiedNameHash) {
+ Pubtypes.emplace_back(Name, Die, QualifiedNameHash, ObjcClassImplementation);
+}
+
+} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/DWARFLinker/DWARFLinkerDeclContext.cpp b/contrib/llvm-project/llvm/lib/DWARFLinker/DWARFLinkerDeclContext.cpp
new file mode 100644
index 000000000000..077fd4494241
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/DWARFLinker/DWARFLinkerDeclContext.cpp
@@ -0,0 +1,209 @@
+//===- DWARFLinkerDeclContext.cpp -----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DWARFLinker/DWARFLinkerDeclContext.h"
+#include "llvm/DebugInfo/DWARF/DWARFContext.h"
+#include "llvm/DebugInfo/DWARF/DWARFDie.h"
+#include "llvm/DebugInfo/DWARF/DWARFUnit.h"
+
+namespace llvm {
+
+/// Set the last DIE/CU a context was seen in and, possibly invalidate the
+/// context if it is ambiguous.
+///
+/// In the current implementation, we don't handle overloaded functions well,
+/// because the argument types are not taken into account when computing the
+/// DeclContext tree.
+///
+/// Some of this is mitigated byt using mangled names that do contain the
+/// arguments types, but sometimes (e.g. with function templates) we don't have
+/// that. In that case, just do not unique anything that refers to the contexts
+/// we are not able to distinguish.
+///
+/// If a context that is not a namespace appears twice in the same CU, we know
+/// it is ambiguous. Make it invalid.
+bool DeclContext::setLastSeenDIE(CompileUnit &U, const DWARFDie &Die) {
+ if (LastSeenCompileUnitID == U.getUniqueID()) {
+ DWARFUnit &OrigUnit = U.getOrigUnit();
+ uint32_t FirstIdx = OrigUnit.getDIEIndex(LastSeenDIE);
+ U.getInfo(FirstIdx).Ctxt = nullptr;
+ return false;
+ }
+
+ LastSeenCompileUnitID = U.getUniqueID();
+ LastSeenDIE = Die;
+ return true;
+}
+
+PointerIntPair<DeclContext *, 1> DeclContextTree::getChildDeclContext(
+ DeclContext &Context, const DWARFDie &DIE, CompileUnit &U,
+ UniquingStringPool &StringPool, bool InClangModule) {
+ unsigned Tag = DIE.getTag();
+
+ // FIXME: dsymutil-classic compat: We should bail out here if we
+ // have a specification or an abstract_origin. We will get the
+ // parent context wrong here.
+
+ switch (Tag) {
+ default:
+ // By default stop gathering child contexts.
+ return PointerIntPair<DeclContext *, 1>(nullptr);
+ case dwarf::DW_TAG_module:
+ break;
+ case dwarf::DW_TAG_compile_unit:
+ return PointerIntPair<DeclContext *, 1>(&Context);
+ case dwarf::DW_TAG_subprogram:
+ // Do not unique anything inside CU local functions.
+ if ((Context.getTag() == dwarf::DW_TAG_namespace ||
+ Context.getTag() == dwarf::DW_TAG_compile_unit) &&
+ !dwarf::toUnsigned(DIE.find(dwarf::DW_AT_external), 0))
+ return PointerIntPair<DeclContext *, 1>(nullptr);
+ LLVM_FALLTHROUGH;
+ case dwarf::DW_TAG_member:
+ case dwarf::DW_TAG_namespace:
+ case dwarf::DW_TAG_structure_type:
+ case dwarf::DW_TAG_class_type:
+ case dwarf::DW_TAG_union_type:
+ case dwarf::DW_TAG_enumeration_type:
+ case dwarf::DW_TAG_typedef:
+ // Artificial things might be ambiguous, because they might be created on
+ // demand. For example implicitly defined constructors are ambiguous
+ // because of the way we identify contexts, and they won't be generated
+ // every time everywhere.
+ if (dwarf::toUnsigned(DIE.find(dwarf::DW_AT_artificial), 0))
+ return PointerIntPair<DeclContext *, 1>(nullptr);
+ break;
+ }
+
+ const char *Name = DIE.getName(DINameKind::LinkageName);
+ const char *ShortName = DIE.getName(DINameKind::ShortName);
+ StringRef NameRef;
+ StringRef ShortNameRef;
+ StringRef FileRef;
+
+ if (Name)
+ NameRef = StringPool.internString(Name);
+ else if (Tag == dwarf::DW_TAG_namespace)
+ // FIXME: For dsymutil-classic compatibility. I think uniquing within
+ // anonymous namespaces is wrong. There is no ODR guarantee there.
+ NameRef = StringPool.internString("(anonymous namespace)");
+
+ if (ShortName && ShortName != Name)
+ ShortNameRef = StringPool.internString(ShortName);
+ else
+ ShortNameRef = NameRef;
+
+ if (Tag != dwarf::DW_TAG_class_type && Tag != dwarf::DW_TAG_structure_type &&
+ Tag != dwarf::DW_TAG_union_type &&
+ Tag != dwarf::DW_TAG_enumeration_type && NameRef.empty())
+ return PointerIntPair<DeclContext *, 1>(nullptr);
+
+ unsigned Line = 0;
+ unsigned ByteSize = std::numeric_limits<uint32_t>::max();
+
+ if (!InClangModule) {
+ // Gather some discriminating data about the DeclContext we will be
+ // creating: File, line number and byte size. This shouldn't be necessary,
+ // because the ODR is just about names, but given that we do some
+ // approximations with overloaded functions and anonymous namespaces, use
+ // these additional data points to make the process safer.
+ //
+ // This is disabled for clang modules, because forward declarations of
+ // module-defined types do not have a file and line.
+ ByteSize = dwarf::toUnsigned(DIE.find(dwarf::DW_AT_byte_size),
+ std::numeric_limits<uint64_t>::max());
+ if (Tag != dwarf::DW_TAG_namespace || !Name) {
+ if (unsigned FileNum =
+ dwarf::toUnsigned(DIE.find(dwarf::DW_AT_decl_file), 0)) {
+ if (const auto *LT = U.getOrigUnit().getContext().getLineTableForUnit(
+ &U.getOrigUnit())) {
+ // FIXME: dsymutil-classic compatibility. I'd rather not
+ // unique anything in anonymous namespaces, but if we do, then
+ // verify that the file and line correspond.
+ if (!Name && Tag == dwarf::DW_TAG_namespace)
+ FileNum = 1;
+
+ if (LT->hasFileAtIndex(FileNum)) {
+ Line = dwarf::toUnsigned(DIE.find(dwarf::DW_AT_decl_line), 0);
+ // Cache the resolved paths based on the index in the line table,
+ // because calling realpath is expansive.
+ StringRef ResolvedPath = U.getResolvedPath(FileNum);
+ if (!ResolvedPath.empty()) {
+ FileRef = ResolvedPath;
+ } else {
+ std::string File;
+ bool FoundFileName = LT->getFileNameByIndex(
+ FileNum, U.getOrigUnit().getCompilationDir(),
+ DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath,
+ File);
+ (void)FoundFileName;
+ assert(FoundFileName && "Must get file name from line table");
+ // Second level of caching, this time based on the file's parent
+ // path.
+ FileRef = PathResolver.resolve(File, StringPool);
+ U.setResolvedPath(FileNum, FileRef);
+ }
+ }
+ }
+ }
+ }
+ }
+
+ if (!Line && NameRef.empty())
+ return PointerIntPair<DeclContext *, 1>(nullptr);
+
+ // We hash NameRef, which is the mangled name, in order to get most
+ // overloaded functions resolve correctly.
+ //
+ // Strictly speaking, hashing the Tag is only necessary for a
+ // DW_TAG_module, to prevent uniquing of a module and a namespace
+ // with the same name.
+ //
+ // FIXME: dsymutil-classic won't unique the same type presented
+ // once as a struct and once as a class. Using the Tag in the fully
+ // qualified name hash to get the same effect.
+ unsigned Hash = hash_combine(Context.getQualifiedNameHash(), Tag, NameRef);
+
+ // FIXME: dsymutil-classic compatibility: when we don't have a name,
+ // use the filename.
+ if (Tag == dwarf::DW_TAG_namespace && NameRef == "(anonymous namespace)")
+ Hash = hash_combine(Hash, FileRef);
+
+ // Now look if this context already exists.
+ DeclContext Key(Hash, Line, ByteSize, Tag, NameRef, FileRef, Context);
+ auto ContextIter = Contexts.find(&Key);
+
+ if (ContextIter == Contexts.end()) {
+ // The context wasn't found.
+ bool Inserted;
+ DeclContext *NewContext =
+ new (Allocator) DeclContext(Hash, Line, ByteSize, Tag, NameRef, FileRef,
+ Context, DIE, U.getUniqueID());
+ std::tie(ContextIter, Inserted) = Contexts.insert(NewContext);
+ assert(Inserted && "Failed to insert DeclContext");
+ (void)Inserted;
+ } else if (Tag != dwarf::DW_TAG_namespace &&
+ !(*ContextIter)->setLastSeenDIE(U, DIE)) {
+ // The context was found, but it is ambiguous with another context
+ // in the same file. Mark it invalid.
+ return PointerIntPair<DeclContext *, 1>(*ContextIter, /* Invalid= */ 1);
+ }
+
+ assert(ContextIter != Contexts.end());
+ // FIXME: dsymutil-classic compatibility. Union types aren't
+ // uniques, but their children might be.
+ if ((Tag == dwarf::DW_TAG_subprogram &&
+ Context.getTag() != dwarf::DW_TAG_structure_type &&
+ Context.getTag() != dwarf::DW_TAG_class_type) ||
+ (Tag == dwarf::DW_TAG_union_type))
+ return PointerIntPair<DeclContext *, 1>(*ContextIter, /* Invalid= */ 1);
+
+ return PointerIntPair<DeclContext *, 1>(*ContextIter);
+}
+
+} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/SymbolRecordHelpers.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/SymbolRecordHelpers.cpp
index 51a5a9e9243e..2562c633bb99 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/SymbolRecordHelpers.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/SymbolRecordHelpers.cpp
@@ -14,7 +14,7 @@
using namespace llvm;
using namespace llvm::codeview;
-template <typename RecordT> RecordT createRecord(const CVSymbol &sym) {
+template <typename RecordT> static RecordT createRecord(const CVSymbol &sym) {
RecordT record(static_cast<SymbolRecordKind>(sym.kind()));
cantFail(SymbolDeserializer::deserializeAs<RecordT>(sym, record));
return record;
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/TypeStreamMerger.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/TypeStreamMerger.cpp
index aba0e96d606e..f9fca74a2199 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/TypeStreamMerger.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/TypeStreamMerger.cpp
@@ -15,6 +15,7 @@
#include "llvm/DebugInfo/CodeView/TypeIndex.h"
#include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h"
#include "llvm/DebugInfo/CodeView/TypeRecord.h"
+#include "llvm/DebugInfo/CodeView/TypeRecordHelpers.h"
#include "llvm/Support/Error.h"
using namespace llvm;
@@ -202,21 +203,6 @@ private:
const TypeIndex TypeStreamMerger::Untranslated(SimpleTypeKind::NotTranslated);
-static bool isIdRecord(TypeLeafKind K) {
- switch (K) {
- case TypeLeafKind::LF_FUNC_ID:
- case TypeLeafKind::LF_MFUNC_ID:
- case TypeLeafKind::LF_STRING_ID:
- case TypeLeafKind::LF_SUBSTR_LIST:
- case TypeLeafKind::LF_BUILDINFO:
- case TypeLeafKind::LF_UDT_SRC_LINE:
- case TypeLeafKind::LF_UDT_MOD_SRC_LINE:
- return true;
- default:
- return false;
- }
-}
-
void TypeStreamMerger::addMapping(TypeIndex Idx) {
if (!IsSecondPass) {
assert(IndexMap.size() == slotForIndex(CurIndex) &&
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp
index 875f5e9989a0..575edba51ee8 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp
@@ -277,7 +277,7 @@ Optional<DWARFFormValue>
AppleAcceleratorTable::Entry::lookup(HeaderData::AtomType Atom) const {
assert(HdrData && "Dereferencing end iterator?");
assert(HdrData->Atoms.size() == Values.size());
- for (const auto &Tuple : zip_first(HdrData->Atoms, Values)) {
+ for (auto Tuple : zip_first(HdrData->Atoms, Values)) {
if (std::get<0>(Tuple).first == Atom)
return std::get<1>(Tuple);
}
@@ -531,7 +531,7 @@ DWARFDebugNames::Entry::Entry(const NameIndex &NameIdx, const Abbrev &Abbr)
Optional<DWARFFormValue>
DWARFDebugNames::Entry::lookup(dwarf::Index Index) const {
assert(Abbr->Attributes.size() == Values.size());
- for (const auto &Tuple : zip_first(Abbr->Attributes, Values)) {
+ for (auto Tuple : zip_first(Abbr->Attributes, Values)) {
if (std::get<0>(Tuple).Index == Index)
return std::get<1>(Tuple);
}
@@ -565,7 +565,7 @@ void DWARFDebugNames::Entry::dump(ScopedPrinter &W) const {
W.printHex("Abbrev", Abbr->Code);
W.startLine() << formatv("Tag: {0}\n", Abbr->Tag);
assert(Abbr->Attributes.size() == Values.size());
- for (const auto &Tuple : zip_first(Abbr->Attributes, Values)) {
+ for (auto Tuple : zip_first(Abbr->Attributes, Values)) {
W.startLine() << formatv("{0}: ", std::get<0>(Tuple).Index);
std::get<1>(Tuple).dump(W.getOStream());
W.getOStream() << '\n';
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFAddressRange.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFAddressRange.cpp
index ef6da08d34aa..ddf307de2221 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFAddressRange.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFAddressRange.cpp
@@ -7,19 +7,23 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/DWARF/DWARFAddressRange.h"
-
+#include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
void DWARFAddressRange::dump(raw_ostream &OS, uint32_t AddressSize,
- DIDumpOptions DumpOpts) const {
+ DIDumpOptions DumpOpts,
+ const DWARFObject *Obj) const {
OS << (DumpOpts.DisplayRawContents ? " " : "[");
OS << format("0x%*.*" PRIx64 ", ", AddressSize * 2, AddressSize * 2, LowPC)
<< format("0x%*.*" PRIx64, AddressSize * 2, AddressSize * 2, HighPC);
OS << (DumpOpts.DisplayRawContents ? "" : ")");
+
+ if (Obj)
+ DWARFFormValue::dumpAddressSection(*Obj, OS, DumpOpts, SectionIndex);
}
raw_ostream &llvm::operator<<(raw_ostream &OS, const DWARFAddressRange &R) {
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
index c06d85d50609..aaa6d5250f23 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
@@ -288,6 +288,7 @@ static void dumpRnglistsSection(
static void dumpLoclistsSection(raw_ostream &OS, DIDumpOptions DumpOpts,
DWARFDataExtractor Data,
const MCRegisterInfo *MRI,
+ const DWARFObject &Obj,
Optional<uint64_t> DumpOffset) {
uint64_t Offset = 0;
@@ -299,13 +300,21 @@ static void dumpLoclistsSection(raw_ostream &OS, DIDumpOptions DumpOpts,
}
Header.dump(OS, DumpOpts);
- DataExtractor LocData(Data.getData(),
- Data.isLittleEndian(), Header.getAddrSize());
- DWARFDebugLoclists Loclists;
uint64_t EndOffset = Header.length() + Header.getHeaderOffset();
- Loclists.parse(LocData, Offset, EndOffset, Header.getVersion());
- Loclists.dump(OS, 0, MRI, DumpOpts, DumpOffset);
+ Data.setAddressSize(Header.getAddrSize());
+ DWARFDebugLoclists Loc(Data, Header.getVersion());
+ if (DumpOffset) {
+ if (DumpOffset >= Offset && DumpOffset < EndOffset) {
+ Offset = *DumpOffset;
+ Loc.dumpLocationList(&Offset, OS, /*BaseAddr=*/None, MRI, Obj, nullptr,
+ DumpOpts, /*Indent=*/0);
+ OS << "\n";
+ return;
+ }
+ } else {
+ Loc.dumpRange(Offset, EndOffset - Offset, OS, MRI, Obj, DumpOpts);
+ }
Offset = EndOffset;
}
}
@@ -380,21 +389,45 @@ void DWARFContext::dump(
dumpDebugType(".debug_types.dwo", dwo_types_section_units());
}
+ DIDumpOptions LLDumpOpts = DumpOpts;
+ if (LLDumpOpts.Verbose)
+ LLDumpOpts.DisplayRawContents = true;
+
if (const auto *Off = shouldDump(Explicit, ".debug_loc", DIDT_ID_DebugLoc,
DObj->getLocSection().Data)) {
- getDebugLoc()->dump(OS, getRegisterInfo(), DumpOpts, *Off);
+ getDebugLoc()->dump(OS, getRegisterInfo(), *DObj, LLDumpOpts, *Off);
}
if (const auto *Off =
shouldDump(Explicit, ".debug_loclists", DIDT_ID_DebugLoclists,
DObj->getLoclistsSection().Data)) {
DWARFDataExtractor Data(*DObj, DObj->getLoclistsSection(), isLittleEndian(),
0);
- dumpLoclistsSection(OS, DumpOpts, Data, getRegisterInfo(), *Off);
+ dumpLoclistsSection(OS, LLDumpOpts, Data, getRegisterInfo(), *DObj, *Off);
+ }
+ if (const auto *Off =
+ shouldDump(ExplicitDWO, ".debug_loclists.dwo", DIDT_ID_DebugLoclists,
+ DObj->getLoclistsDWOSection().Data)) {
+ DWARFDataExtractor Data(*DObj, DObj->getLoclistsDWOSection(),
+ isLittleEndian(), 0);
+ dumpLoclistsSection(OS, LLDumpOpts, Data, getRegisterInfo(), *DObj, *Off);
}
+
if (const auto *Off =
shouldDump(ExplicitDWO, ".debug_loc.dwo", DIDT_ID_DebugLoc,
DObj->getLocDWOSection().Data)) {
- getDebugLocDWO()->dump(OS, 0, getRegisterInfo(), DumpOpts, *Off);
+ DWARFDataExtractor Data(*DObj, DObj->getLocDWOSection(), isLittleEndian(),
+ 4);
+ DWARFDebugLoclists Loc(Data, /*Version=*/4);
+ if (*Off) {
+ uint64_t Offset = **Off;
+ Loc.dumpLocationList(&Offset, OS,
+ /*BaseAddr=*/None, getRegisterInfo(), *DObj, nullptr,
+ LLDumpOpts, /*Indent=*/0);
+ OS << "\n";
+ } else {
+ Loc.dumpRange(0, Data.getData().size(), OS, getRegisterInfo(), *DObj,
+ LLDumpOpts);
+ }
}
if (const auto *Off = shouldDump(Explicit, ".debug_frame", DIDT_ID_DebugFrame,
@@ -409,6 +442,9 @@ void DWARFContext::dump(
if (Explicit || !getDebugMacro()->empty()) {
OS << "\n.debug_macinfo contents:\n";
getDebugMacro()->dump(OS);
+ } else if (ExplicitDWO || !getDebugMacroDWO()->empty()) {
+ OS << "\n.debug_macinfo.dwo contents:\n";
+ getDebugMacroDWO()->dump(OS);
}
}
@@ -715,32 +751,16 @@ const DWARFDebugLoc *DWARFContext::getDebugLoc() {
if (Loc)
return Loc.get();
- Loc.reset(new DWARFDebugLoc);
// Assume all units have the same address byte size.
- if (getNumCompileUnits()) {
- DWARFDataExtractor LocData(*DObj, DObj->getLocSection(), isLittleEndian(),
- getUnitAtIndex(0)->getAddressByteSize());
- Loc->parse(LocData);
- }
+ auto LocData =
+ getNumCompileUnits()
+ ? DWARFDataExtractor(*DObj, DObj->getLocSection(), isLittleEndian(),
+ getUnitAtIndex(0)->getAddressByteSize())
+ : DWARFDataExtractor("", isLittleEndian(), 0);
+ Loc.reset(new DWARFDebugLoc(std::move(LocData)));
return Loc.get();
}
-const DWARFDebugLoclists *DWARFContext::getDebugLocDWO() {
- if (LocDWO)
- return LocDWO.get();
-
- LocDWO.reset(new DWARFDebugLoclists());
- // Assume all compile units have the same address byte size.
- // FIXME: We don't need AddressSize for split DWARF since relocatable
- // addresses cannot appear there. At the moment DWARFExpression requires it.
- DataExtractor LocData(DObj->getLocDWOSection().Data, isLittleEndian(), 4);
- // Use version 4. DWO does not support the DWARF v5 .debug_loclists yet and
- // that means we are parsing the new style .debug_loc (pre-standatized version
- // of the .debug_loclists).
- LocDWO->parse(LocData, 0, LocData.getData().size(), 4 /* Version */);
- return LocDWO.get();
-}
-
const DWARFDebugAranges *DWARFContext::getDebugAranges() {
if (Aranges)
return Aranges.get();
@@ -781,6 +801,17 @@ const DWARFDebugFrame *DWARFContext::getEHFrame() {
return DebugFrame.get();
}
+const DWARFDebugMacro *DWARFContext::getDebugMacroDWO() {
+ if (MacroDWO)
+ return MacroDWO.get();
+
+ DataExtractor MacinfoDWOData(DObj->getMacinfoDWOSection(), isLittleEndian(),
+ 0);
+ MacroDWO.reset(new DWARFDebugMacro());
+ MacroDWO->parse(MacinfoDWOData);
+ return MacroDWO.get();
+}
+
const DWARFDebugMacro *DWARFContext::getDebugMacro() {
if (Macro)
return Macro.get();
@@ -843,7 +874,7 @@ DWARFContext::getLineTableForUnit(DWARFUnit *U) {
}
Expected<const DWARFDebugLine::LineTable *> DWARFContext::getLineTableForUnit(
- DWARFUnit *U, std::function<void(Error)> RecoverableErrorCallback) {
+ DWARFUnit *U, function_ref<void(Error)> RecoverableErrorCallback) {
if (!Line)
Line.reset(new DWARFDebugLine);
@@ -1027,19 +1058,56 @@ static Optional<uint64_t> getTypeSize(DWARFDie Type, uint64_t PointerSize) {
return Optional<uint64_t>();
}
+static Optional<int64_t>
+getExpressionFrameOffset(ArrayRef<uint8_t> Expr,
+ Optional<unsigned> FrameBaseReg) {
+ if (!Expr.empty() &&
+ (Expr[0] == DW_OP_fbreg ||
+ (FrameBaseReg && Expr[0] == DW_OP_breg0 + *FrameBaseReg))) {
+ unsigned Count;
+ int64_t Offset = decodeSLEB128(Expr.data() + 1, &Count, Expr.end());
+ // A single DW_OP_fbreg or DW_OP_breg.
+ if (Expr.size() == Count + 1)
+ return Offset;
+ // Same + DW_OP_deref (Fortran arrays look like this).
+ if (Expr.size() == Count + 2 && Expr[Count + 1] == DW_OP_deref)
+ return Offset;
+ // Fallthrough. Do not accept ex. (DW_OP_breg W29, DW_OP_stack_value)
+ }
+ return None;
+}
+
void DWARFContext::addLocalsForDie(DWARFCompileUnit *CU, DWARFDie Subprogram,
DWARFDie Die, std::vector<DILocal> &Result) {
if (Die.getTag() == DW_TAG_variable ||
Die.getTag() == DW_TAG_formal_parameter) {
DILocal Local;
- if (auto NameAttr = Subprogram.find(DW_AT_name))
- if (Optional<const char *> Name = NameAttr->getAsCString())
- Local.FunctionName = *Name;
- if (auto LocationAttr = Die.find(DW_AT_location))
- if (Optional<ArrayRef<uint8_t>> Location = LocationAttr->getAsBlock())
- if (!Location->empty() && (*Location)[0] == DW_OP_fbreg)
- Local.FrameOffset =
- decodeSLEB128(Location->data() + 1, nullptr, Location->end());
+ if (const char *Name = Subprogram.getSubroutineName(DINameKind::ShortName))
+ Local.FunctionName = Name;
+
+ Optional<unsigned> FrameBaseReg;
+ if (auto FrameBase = Subprogram.find(DW_AT_frame_base))
+ if (Optional<ArrayRef<uint8_t>> Expr = FrameBase->getAsBlock())
+ if (!Expr->empty() && (*Expr)[0] >= DW_OP_reg0 &&
+ (*Expr)[0] <= DW_OP_reg31) {
+ FrameBaseReg = (*Expr)[0] - DW_OP_reg0;
+ }
+
+ if (Expected<std::vector<DWARFLocationExpression>> Loc =
+ Die.getLocations(DW_AT_location)) {
+ for (const auto &Entry : *Loc) {
+ if (Optional<int64_t> FrameOffset =
+ getExpressionFrameOffset(Entry.Expr, FrameBaseReg)) {
+ Local.FrameOffset = *FrameOffset;
+ break;
+ }
+ }
+ } else {
+ // FIXME: missing DW_AT_location is OK here, but other errors should be
+ // reported to the user.
+ consumeError(Loc.takeError());
+ }
+
if (auto TagOffsetAttr = Die.find(DW_AT_LLVM_tag_offset))
Local.TagOffset = TagOffsetAttr->getAsUnsignedConstant();
@@ -1377,6 +1445,7 @@ class DWARFObjInMemory final : public DWARFObject {
DWARFSectionMap LocSection;
DWARFSectionMap LoclistsSection;
+ DWARFSectionMap LoclistsDWOSection;
DWARFSectionMap LineSection;
DWARFSectionMap RangesSection;
DWARFSectionMap RnglistsSection;
@@ -1403,6 +1472,7 @@ class DWARFObjInMemory final : public DWARFObject {
return StringSwitch<DWARFSectionMap *>(Name)
.Case("debug_loc", &LocSection)
.Case("debug_loclists", &LoclistsSection)
+ .Case("debug_loclists.dwo", &LoclistsDWOSection)
.Case("debug_line", &LineSection)
.Case("debug_frame", &FrameSection)
.Case("eh_frame", &EHFrameSection)
@@ -1431,6 +1501,7 @@ class DWARFObjInMemory final : public DWARFObject {
StringRef ArangesSection;
StringRef StrSection;
StringRef MacinfoSection;
+ StringRef MacinfoDWOSection;
StringRef AbbrevDWOSection;
StringRef StrDWOSection;
StringRef CUIndexSection;
@@ -1450,6 +1521,7 @@ class DWARFObjInMemory final : public DWARFObject {
.Case("debug_aranges", &ArangesSection)
.Case("debug_str", &StrSection)
.Case("debug_macinfo", &MacinfoSection)
+ .Case("debug_macinfo.dwo", &MacinfoDWOSection)
.Case("debug_abbrev.dwo", &AbbrevDWOSection)
.Case("debug_str.dwo", &StrDWOSection)
.Case("debug_cu_index", &CUIndexSection)
@@ -1733,6 +1805,9 @@ public:
const DWARFSection &getRnglistsDWOSection() const override {
return RnglistsDWOSection;
}
+ const DWARFSection &getLoclistsDWOSection() const override {
+ return LoclistsDWOSection;
+ }
const DWARFSection &getAddrSection() const override { return AddrSection; }
StringRef getCUIndexSection() const override { return CUIndexSection; }
StringRef getGdbIndexSection() const override { return GdbIndexSection; }
@@ -1773,6 +1848,7 @@ public:
return RnglistsSection;
}
StringRef getMacinfoSection() const override { return MacinfoSection; }
+ StringRef getMacinfoDWOSection() const override { return MacinfoDWOSection; }
const DWARFSection &getPubnamesSection() const override { return PubnamesSection; }
const DWARFSection &getPubtypesSection() const override { return PubtypesSection; }
const DWARFSection &getGnuPubnamesSection() const override {
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDebugAranges.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDebugAranges.cpp
index ca6043109cdb..fa157e868851 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDebugAranges.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDebugAranges.cpp
@@ -113,10 +113,10 @@ void DWARFDebugAranges::construct() {
Endpoints.shrink_to_fit();
}
-uint32_t DWARFDebugAranges::findAddress(uint64_t Address) const {
+uint64_t DWARFDebugAranges::findAddress(uint64_t Address) const {
RangeCollIterator It =
partition_point(Aranges, [=](Range R) { return R.HighPC() <= Address; });
if (It != Aranges.end() && It->LowPC <= Address)
return It->CUOffset;
- return -1U;
+ return -1ULL;
}
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
index dbee28ff5ab1..11adb1e47640 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
@@ -39,7 +39,7 @@ struct ContentDescriptor {
using ContentDescriptors = SmallVector<ContentDescriptor, 4>;
-} // end anonmyous namespace
+} // end anonymous namespace
void DWARFDebugLine::ContentTypeTracker::trackContentType(
dwarf::LineNumberEntryFormat ContentType) {
@@ -190,20 +190,11 @@ parseV2DirFileTables(const DWARFDataExtractor &DebugLineData,
// the end of the prologue.
static llvm::Expected<ContentDescriptors>
parseV5EntryFormat(const DWARFDataExtractor &DebugLineData, uint64_t *OffsetPtr,
- uint64_t EndPrologueOffset,
DWARFDebugLine::ContentTypeTracker *ContentTypes) {
ContentDescriptors Descriptors;
int FormatCount = DebugLineData.getU8(OffsetPtr);
bool HasPath = false;
for (int I = 0; I != FormatCount; ++I) {
- if (*OffsetPtr >= EndPrologueOffset)
- return createStringError(
- errc::invalid_argument,
- "failed to parse entry content descriptions at offset "
- "0x%8.8" PRIx64
- " because offset extends beyond the prologue end at offset "
- "0x%8.8" PRIx64,
- *OffsetPtr, EndPrologueOffset);
ContentDescriptor Descriptor;
Descriptor.Type =
dwarf::LineNumberEntryFormat(DebugLineData.getULEB128(OffsetPtr));
@@ -224,29 +215,20 @@ parseV5EntryFormat(const DWARFDataExtractor &DebugLineData, uint64_t *OffsetPtr,
static Error
parseV5DirFileTables(const DWARFDataExtractor &DebugLineData,
- uint64_t *OffsetPtr, uint64_t EndPrologueOffset,
- const dwarf::FormParams &FormParams,
+ uint64_t *OffsetPtr, const dwarf::FormParams &FormParams,
const DWARFContext &Ctx, const DWARFUnit *U,
DWARFDebugLine::ContentTypeTracker &ContentTypes,
std::vector<DWARFFormValue> &IncludeDirectories,
std::vector<DWARFDebugLine::FileNameEntry> &FileNames) {
// Get the directory entry description.
llvm::Expected<ContentDescriptors> DirDescriptors =
- parseV5EntryFormat(DebugLineData, OffsetPtr, EndPrologueOffset, nullptr);
+ parseV5EntryFormat(DebugLineData, OffsetPtr, nullptr);
if (!DirDescriptors)
return DirDescriptors.takeError();
// Get the directory entries, according to the format described above.
int DirEntryCount = DebugLineData.getU8(OffsetPtr);
for (int I = 0; I != DirEntryCount; ++I) {
- if (*OffsetPtr >= EndPrologueOffset)
- return createStringError(
- errc::invalid_argument,
- "failed to parse directory entry at offset "
- "0x%8.8" PRIx64
- " because offset extends beyond the prologue end at offset "
- "0x%8.8" PRIx64,
- *OffsetPtr, EndPrologueOffset);
for (auto Descriptor : *DirDescriptors) {
DWARFFormValue Value(Descriptor.Form);
switch (Descriptor.Type) {
@@ -267,22 +249,14 @@ parseV5DirFileTables(const DWARFDataExtractor &DebugLineData,
}
// Get the file entry description.
- llvm::Expected<ContentDescriptors> FileDescriptors = parseV5EntryFormat(
- DebugLineData, OffsetPtr, EndPrologueOffset, &ContentTypes);
+ llvm::Expected<ContentDescriptors> FileDescriptors =
+ parseV5EntryFormat(DebugLineData, OffsetPtr, &ContentTypes);
if (!FileDescriptors)
return FileDescriptors.takeError();
// Get the file entries, according to the format described above.
int FileEntryCount = DebugLineData.getU8(OffsetPtr);
for (int I = 0; I != FileEntryCount; ++I) {
- if (*OffsetPtr >= EndPrologueOffset)
- return createStringError(
- errc::invalid_argument,
- "failed to parse file entry at offset "
- "0x%8.8" PRIx64
- " because offset extends beyond the prologue end at offset "
- "0x%8.8" PRIx64,
- *OffsetPtr, EndPrologueOffset);
DWARFDebugLine::FileNameEntry FileEntry;
for (auto Descriptor : *FileDescriptors) {
DWARFFormValue Value(Descriptor.Form);
@@ -373,9 +347,9 @@ Error DWARFDebugLine::Prologue::parse(const DWARFDataExtractor &DebugLineData,
}
if (getVersion() >= 5) {
- if (Error e = parseV5DirFileTables(
- DebugLineData, OffsetPtr, EndPrologueOffset, FormParams, Ctx, U,
- ContentTypes, IncludeDirectories, FileNames)) {
+ if (Error E =
+ parseV5DirFileTables(DebugLineData, OffsetPtr, FormParams, Ctx, U,
+ ContentTypes, IncludeDirectories, FileNames)) {
return joinErrors(
createStringError(
errc::invalid_argument,
@@ -383,7 +357,7 @@ Error DWARFDebugLine::Prologue::parse(const DWARFDataExtractor &DebugLineData,
" found an invalid directory or file table description at"
" 0x%8.8" PRIx64,
PrologueOffset, *OffsetPtr),
- std::move(e));
+ std::move(E));
}
} else
parseV2DirFileTables(DebugLineData, OffsetPtr, EndPrologueOffset,
@@ -453,14 +427,18 @@ DWARFDebugLine::LineTable::LineTable() { clear(); }
void DWARFDebugLine::LineTable::dump(raw_ostream &OS,
DIDumpOptions DumpOptions) const {
Prologue.dump(OS, DumpOptions);
- OS << '\n';
if (!Rows.empty()) {
+ OS << '\n';
Row::dumpTableHeader(OS);
for (const Row &R : Rows) {
R.dump(OS);
}
}
+
+ // Terminate the table with a final blank line to clearly delineate it from
+ // later dumps.
+ OS << '\n';
}
void DWARFDebugLine::LineTable::clear() {
@@ -510,7 +488,7 @@ DWARFDebugLine::getLineTable(uint64_t Offset) const {
Expected<const DWARFDebugLine::LineTable *> DWARFDebugLine::getOrParseLineTable(
DWARFDataExtractor &DebugLineData, uint64_t Offset, const DWARFContext &Ctx,
- const DWARFUnit *U, std::function<void(Error)> RecoverableErrorCallback) {
+ const DWARFUnit *U, function_ref<void(Error)> RecoverableErrorCallback) {
if (!DebugLineData.isValidOffset(Offset))
return createStringError(errc::invalid_argument, "offset 0x%8.8" PRIx64
" is not a valid debug line section offset",
@@ -531,7 +509,7 @@ Expected<const DWARFDebugLine::LineTable *> DWARFDebugLine::getOrParseLineTable(
Error DWARFDebugLine::LineTable::parse(
DWARFDataExtractor &DebugLineData, uint64_t *OffsetPtr,
const DWARFContext &Ctx, const DWARFUnit *U,
- std::function<void(Error)> RecoverableErrorCallback, raw_ostream *OS) {
+ function_ref<void(Error)> RecoverableErrorCallback, raw_ostream *OS) {
const uint64_t DebugLineOffset = *OffsetPtr;
clear();
@@ -548,8 +526,23 @@ Error DWARFDebugLine::LineTable::parse(
if (PrologueErr)
return PrologueErr;
- const uint64_t EndOffset =
- DebugLineOffset + Prologue.TotalLength + Prologue.sizeofTotalLength();
+ uint64_t ProgramLength = Prologue.TotalLength + Prologue.sizeofTotalLength();
+ if (!DebugLineData.isValidOffsetForDataOfSize(DebugLineOffset,
+ ProgramLength)) {
+ assert(DebugLineData.size() > DebugLineOffset &&
+ "prologue parsing should handle invalid offset");
+ uint64_t BytesRemaining = DebugLineData.size() - DebugLineOffset;
+ RecoverableErrorCallback(
+ createStringError(errc::invalid_argument,
+ "line table program with offset 0x%8.8" PRIx64
+ " has length 0x%8.8" PRIx64 " but only 0x%8.8" PRIx64
+ " bytes are available",
+ DebugLineOffset, ProgramLength, BytesRemaining));
+ // Continue by capping the length at the number of remaining bytes.
+ ProgramLength = BytesRemaining;
+ }
+
+ const uint64_t EndOffset = DebugLineOffset + ProgramLength;
// See if we should tell the data extractor the address size.
if (DebugLineData.getAddressSize() == 0)
@@ -595,12 +588,12 @@ Error DWARFDebugLine::LineTable::parse(
// address is that of the byte after the last target machine instruction
// of the sequence.
State.Row.EndSequence = true;
- State.appendRowToMatrix();
if (OS) {
*OS << "\n";
OS->indent(12);
State.Row.dump(*OS);
}
+ State.appendRowToMatrix();
State.resetRowAndSequence();
break;
@@ -614,19 +607,28 @@ Error DWARFDebugLine::LineTable::parse(
//
// Make sure the extractor knows the address size. If not, infer it
// from the size of the operand.
- if (DebugLineData.getAddressSize() == 0)
+ {
+ uint8_t ExtractorAddressSize = DebugLineData.getAddressSize();
+ if (ExtractorAddressSize != Len - 1 && ExtractorAddressSize != 0)
+ RecoverableErrorCallback(createStringError(
+ errc::invalid_argument,
+ "mismatching address size at offset 0x%8.8" PRIx64
+ " expected 0x%2.2" PRIx8 " found 0x%2.2" PRIx64,
+ ExtOffset, ExtractorAddressSize, Len - 1));
+
+ // Assume that the line table is correct and temporarily override the
+ // address size.
DebugLineData.setAddressSize(Len - 1);
- else if (DebugLineData.getAddressSize() != Len - 1) {
- return createStringError(errc::invalid_argument,
- "mismatching address size at offset 0x%8.8" PRIx64
- " expected 0x%2.2" PRIx8 " found 0x%2.2" PRIx64,
- ExtOffset, DebugLineData.getAddressSize(),
- Len - 1);
+ State.Row.Address.Address = DebugLineData.getRelocatedAddress(
+ OffsetPtr, &State.Row.Address.SectionIndex);
+
+ // Restore the address size if the extractor already had it.
+ if (ExtractorAddressSize != 0)
+ DebugLineData.setAddressSize(ExtractorAddressSize);
+
+ if (OS)
+ *OS << format(" (0x%16.16" PRIx64 ")", State.Row.Address.Address);
}
- State.Row.Address.Address = DebugLineData.getRelocatedAddress(
- OffsetPtr, &State.Row.Address.SectionIndex);
- if (OS)
- *OS << format(" (0x%16.16" PRIx64 ")", State.Row.Address.Address);
break;
case DW_LNE_define_file:
@@ -813,7 +815,7 @@ Error DWARFDebugLine::LineTable::parse(
// column register of the state machine.
State.Row.Isa = DebugLineData.getULEB128(OffsetPtr);
if (OS)
- *OS << " (" << State.Row.Isa << ")";
+ *OS << " (" << (uint64_t)State.Row.Isa << ")";
break;
default:
@@ -888,9 +890,11 @@ Error DWARFDebugLine::LineTable::parse(
}
if (!State.Sequence.Empty)
- RecoverableErrorCallback(
- createStringError(errc::illegal_byte_sequence,
- "last sequence in debug line table is not terminated!"));
+ RecoverableErrorCallback(createStringError(
+ errc::illegal_byte_sequence,
+ "last sequence in debug line table at offset 0x%8.8" PRIx64
+ " is not terminated",
+ DebugLineOffset));
// Sort all sequences so that address lookup will work faster.
if (!Sequences.empty()) {
@@ -1046,7 +1050,10 @@ bool DWARFDebugLine::Prologue::getFileNameByIndex(
if (Kind == FileLineInfoKind::None || !hasFileAtIndex(FileIndex))
return false;
const FileNameEntry &Entry = getFileNameEntry(FileIndex);
- StringRef FileName = Entry.Name.getAsCString().getValue();
+ Optional<const char *> Name = Entry.Name.getAsCString();
+ if (!Name)
+ return false;
+ StringRef FileName = *Name;
if (Kind != FileLineInfoKind::AbsoluteFilePath ||
isPathAbsoluteOnWindowsOrPosix(FileName)) {
Result = FileName;
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp
index 4f7b01130a47..0c5f9a9c54ec 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp
@@ -22,6 +22,89 @@
#include <cstdint>
using namespace llvm;
+using object::SectionedAddress;
+
+namespace {
+class DWARFLocationInterpreter {
+ Optional<object::SectionedAddress> Base;
+ std::function<Optional<object::SectionedAddress>(uint32_t)> LookupAddr;
+
+public:
+ DWARFLocationInterpreter(
+ Optional<object::SectionedAddress> Base,
+ std::function<Optional<object::SectionedAddress>(uint32_t)> LookupAddr)
+ : Base(Base), LookupAddr(std::move(LookupAddr)) {}
+
+ Expected<Optional<DWARFLocationExpression>>
+ Interpret(const DWARFLocationEntry &E);
+};
+} // namespace
+
+static Error createResolverError(uint32_t Index, unsigned Kind) {
+ return createStringError(errc::invalid_argument,
+ "Unable to resolve indirect address %u for: %s",
+ Index, dwarf::LocListEncodingString(Kind).data());
+}
+
+Expected<Optional<DWARFLocationExpression>>
+DWARFLocationInterpreter::Interpret(const DWARFLocationEntry &E) {
+ switch (E.Kind) {
+ case dwarf::DW_LLE_end_of_list:
+ return None;
+ case dwarf::DW_LLE_base_addressx: {
+ Base = LookupAddr(E.Value0);
+ if (!Base)
+ return createResolverError(E.Value0, E.Kind);
+ return None;
+ }
+ case dwarf::DW_LLE_startx_endx: {
+ Optional<SectionedAddress> LowPC = LookupAddr(E.Value0);
+ if (!LowPC)
+ return createResolverError(E.Value0, E.Kind);
+ Optional<SectionedAddress> HighPC = LookupAddr(E.Value1);
+ if (!HighPC)
+ return createResolverError(E.Value1, E.Kind);
+ return DWARFLocationExpression{
+ DWARFAddressRange{LowPC->Address, HighPC->Address, LowPC->SectionIndex},
+ E.Loc};
+ }
+ case dwarf::DW_LLE_startx_length: {
+ Optional<SectionedAddress> LowPC = LookupAddr(E.Value0);
+ if (!LowPC)
+ return createResolverError(E.Value0, E.Kind);
+ return DWARFLocationExpression{DWARFAddressRange{LowPC->Address,
+ LowPC->Address + E.Value1,
+ LowPC->SectionIndex},
+ E.Loc};
+ }
+ case dwarf::DW_LLE_offset_pair: {
+ if (!Base) {
+ return createStringError(inconvertibleErrorCode(),
+ "Unable to resolve location list offset pair: "
+ "Base address not defined");
+ }
+ DWARFAddressRange Range{Base->Address + E.Value0, Base->Address + E.Value1,
+ Base->SectionIndex};
+ if (Range.SectionIndex == SectionedAddress::UndefSection)
+ Range.SectionIndex = E.SectionIndex;
+ return DWARFLocationExpression{Range, E.Loc};
+ }
+ case dwarf::DW_LLE_default_location:
+ return DWARFLocationExpression{None, E.Loc};
+ case dwarf::DW_LLE_base_address:
+ Base = SectionedAddress{E.Value0, E.SectionIndex};
+ return None;
+ case dwarf::DW_LLE_start_end:
+ return DWARFLocationExpression{
+ DWARFAddressRange{E.Value0, E.Value1, E.SectionIndex}, E.Loc};
+ case dwarf::DW_LLE_start_length:
+ return DWARFLocationExpression{
+ DWARFAddressRange{E.Value0, E.Value0 + E.Value1, E.SectionIndex},
+ E.Loc};
+ default:
+ llvm_unreachable("unreachable locations list kind");
+ }
+}
// When directly dumping the .debug_loc without a compile unit, we have to guess
// at the DWARF version. This only affects DW_OP_call_ref, which is a rare
@@ -35,126 +118,180 @@ static void dumpExpression(raw_ostream &OS, ArrayRef<uint8_t> Data,
DWARFExpression(Extractor, dwarf::DWARF_VERSION, AddressSize).print(OS, MRI, U);
}
-void DWARFDebugLoc::LocationList::dump(raw_ostream &OS, uint64_t BaseAddress,
- bool IsLittleEndian,
- unsigned AddressSize,
- const MCRegisterInfo *MRI, DWARFUnit *U,
- DIDumpOptions DumpOpts,
- unsigned Indent) const {
- for (const Entry &E : Entries) {
- OS << '\n';
+bool DWARFLocationTable::dumpLocationList(uint64_t *Offset, raw_ostream &OS,
+ Optional<SectionedAddress> BaseAddr,
+ const MCRegisterInfo *MRI,
+ const DWARFObject &Obj, DWARFUnit *U,
+ DIDumpOptions DumpOpts,
+ unsigned Indent) const {
+ DWARFLocationInterpreter Interp(
+ BaseAddr, [U](uint32_t Index) -> Optional<SectionedAddress> {
+ if (U)
+ return U->getAddrOffsetSectionItem(Index);
+ return None;
+ });
+ OS << format("0x%8.8" PRIx64 ": ", *Offset);
+ Error E = visitLocationList(Offset, [&](const DWARFLocationEntry &E) {
+ Expected<Optional<DWARFLocationExpression>> Loc = Interp.Interpret(E);
+ if (!Loc || DumpOpts.DisplayRawContents)
+ dumpRawEntry(E, OS, Indent, DumpOpts, Obj);
+ if (Loc && *Loc) {
+ OS << "\n";
+ OS.indent(Indent);
+ if (DumpOpts.DisplayRawContents)
+ OS << " => ";
+
+ DIDumpOptions RangeDumpOpts(DumpOpts);
+ RangeDumpOpts.DisplayRawContents = false;
+ if (Loc.get()->Range)
+ Loc.get()->Range->dump(OS, Data.getAddressSize(), RangeDumpOpts, &Obj);
+ else
+ OS << "<default>";
+ }
+ if (!Loc)
+ consumeError(Loc.takeError());
+
+ if (E.Kind != dwarf::DW_LLE_base_address &&
+ E.Kind != dwarf::DW_LLE_base_addressx &&
+ E.Kind != dwarf::DW_LLE_end_of_list) {
+ OS << ": ";
+ dumpExpression(OS, E.Loc, Data.isLittleEndian(), Data.getAddressSize(),
+ MRI, U);
+ }
+ return true;
+ });
+ if (E) {
+ OS << "\n";
OS.indent(Indent);
- OS << format("[0x%*.*" PRIx64 ", ", AddressSize * 2, AddressSize * 2,
- BaseAddress + E.Begin);
- OS << format(" 0x%*.*" PRIx64 ")", AddressSize * 2, AddressSize * 2,
- BaseAddress + E.End);
- OS << ": ";
-
- dumpExpression(OS, E.Loc, IsLittleEndian, AddressSize, MRI, U);
+ OS << "error: " << toString(std::move(E));
+ return false;
}
+ return true;
}
-DWARFDebugLoc::LocationList const *
-DWARFDebugLoc::getLocationListAtOffset(uint64_t Offset) const {
- auto It = partition_point(
- Locations, [=](const LocationList &L) { return L.Offset < Offset; });
- if (It != Locations.end() && It->Offset == Offset)
- return &(*It);
- return nullptr;
+Error DWARFLocationTable::visitAbsoluteLocationList(
+ uint64_t Offset, Optional<SectionedAddress> BaseAddr,
+ std::function<Optional<SectionedAddress>(uint32_t)> LookupAddr,
+ function_ref<bool(Expected<DWARFLocationExpression>)> Callback) const {
+ DWARFLocationInterpreter Interp(BaseAddr, std::move(LookupAddr));
+ return visitLocationList(&Offset, [&](const DWARFLocationEntry &E) {
+ Expected<Optional<DWARFLocationExpression>> Loc = Interp.Interpret(E);
+ if (!Loc)
+ return Callback(Loc.takeError());
+ if (*Loc)
+ return Callback(**Loc);
+ return true;
+ });
}
-void DWARFDebugLoc::dump(raw_ostream &OS, const MCRegisterInfo *MRI, DIDumpOptions DumpOpts,
- Optional<uint64_t> Offset) const {
- auto DumpLocationList = [&](const LocationList &L) {
- OS << format("0x%8.8" PRIx64 ": ", L.Offset);
- L.dump(OS, 0, IsLittleEndian, AddressSize, MRI, nullptr, DumpOpts, 12);
- OS << "\n";
- };
-
- if (Offset) {
- if (auto *L = getLocationListAtOffset(*Offset))
- DumpLocationList(*L);
- return;
- }
-
- for (const LocationList &L : Locations) {
- DumpLocationList(L);
- if (&L != &Locations.back())
+void DWARFDebugLoc::dump(raw_ostream &OS, const MCRegisterInfo *MRI,
+ const DWARFObject &Obj, DIDumpOptions DumpOpts,
+ Optional<uint64_t> DumpOffset) const {
+ auto BaseAddr = None;
+ unsigned Indent = 12;
+ if (DumpOffset) {
+ dumpLocationList(&*DumpOffset, OS, BaseAddr, MRI, Obj, nullptr, DumpOpts,
+ Indent);
+ } else {
+ uint64_t Offset = 0;
+ StringRef Separator;
+ bool CanContinue = true;
+ while (CanContinue && Data.isValidOffset(Offset)) {
+ OS << Separator;
+ Separator = "\n";
+
+ CanContinue = dumpLocationList(&Offset, OS, BaseAddr, MRI, Obj, nullptr,
+ DumpOpts, Indent);
OS << '\n';
+ }
}
}
-Expected<DWARFDebugLoc::LocationList>
-DWARFDebugLoc::parseOneLocationList(const DWARFDataExtractor &Data,
- uint64_t *Offset) {
- LocationList LL;
- LL.Offset = *Offset;
- AddressSize = Data.getAddressSize();
+Error DWARFDebugLoc::visitLocationList(
+ uint64_t *Offset,
+ function_ref<bool(const DWARFLocationEntry &)> Callback) const {
DataExtractor::Cursor C(*Offset);
-
- // 2.6.2 Location Lists
- // A location list entry consists of:
while (true) {
- Entry E;
-
- // 1. A beginning address offset. ...
- E.Begin = Data.getRelocatedAddress(C);
+ uint64_t SectionIndex;
+ uint64_t Value0 = Data.getRelocatedAddress(C);
+ uint64_t Value1 = Data.getRelocatedAddress(C, &SectionIndex);
- // 2. An ending address offset. ...
- E.End = Data.getRelocatedAddress(C);
-
- if (Error Err = C.takeError())
- return std::move(Err);
+ DWARFLocationEntry E;
// The end of any given location list is marked by an end of list entry,
// which consists of a 0 for the beginning address offset and a 0 for the
- // ending address offset.
- if (E.Begin == 0 && E.End == 0) {
- *Offset = C.tell();
- return LL;
- }
-
- if (E.Begin != (AddressSize == 4 ? -1U : -1ULL)) {
+ // ending address offset. A beginning offset of 0xff...f marks the base
+ // address selection entry.
+ if (Value0 == 0 && Value1 == 0) {
+ E.Kind = dwarf::DW_LLE_end_of_list;
+ } else if (Value0 == (Data.getAddressSize() == 4 ? -1U : -1ULL)) {
+ E.Kind = dwarf::DW_LLE_base_address;
+ E.Value0 = Value1;
+ E.SectionIndex = SectionIndex;
+ } else {
+ E.Kind = dwarf::DW_LLE_offset_pair;
+ E.Value0 = Value0;
+ E.Value1 = Value1;
+ E.SectionIndex = SectionIndex;
unsigned Bytes = Data.getU16(C);
// A single location description describing the location of the object...
Data.getU8(C, E.Loc, Bytes);
}
- LL.Entries.push_back(std::move(E));
+ if (!C)
+ return C.takeError();
+ if (!Callback(E) || E.Kind == dwarf::DW_LLE_end_of_list)
+ break;
}
+ *Offset = C.tell();
+ return Error::success();
}
-void DWARFDebugLoc::parse(const DWARFDataExtractor &data) {
- IsLittleEndian = data.isLittleEndian();
- AddressSize = data.getAddressSize();
-
- uint64_t Offset = 0;
- while (Offset < data.getData().size()) {
- if (auto LL = parseOneLocationList(data, &Offset))
- Locations.push_back(std::move(*LL));
- else {
- logAllUnhandledErrors(LL.takeError(), WithColor::error());
- break;
- }
+void DWARFDebugLoc::dumpRawEntry(const DWARFLocationEntry &Entry,
+ raw_ostream &OS, unsigned Indent,
+ DIDumpOptions DumpOpts,
+ const DWARFObject &Obj) const {
+ uint64_t Value0, Value1;
+ switch (Entry.Kind) {
+ case dwarf::DW_LLE_base_address:
+ Value0 = Data.getAddressSize() == 4 ? -1U : -1ULL;
+ Value1 = Entry.Value0;
+ break;
+ case dwarf::DW_LLE_offset_pair:
+ Value0 = Entry.Value0;
+ Value1 = Entry.Value1;
+ break;
+ case dwarf::DW_LLE_end_of_list:
+ Value0 = Value1 = 0;
+ return;
+ default:
+ llvm_unreachable("Not possible in DWARF4!");
}
+ OS << '\n';
+ OS.indent(Indent);
+ OS << '(' << format_hex(Value0, 2 + Data.getAddressSize() * 2) << ", "
+ << format_hex(Value1, 2 + Data.getAddressSize() * 2) << ')';
+ DWARFFormValue::dumpAddressSection(Obj, OS, DumpOpts, Entry.SectionIndex);
}
-Expected<DWARFDebugLoclists::LocationList>
-DWARFDebugLoclists::parseOneLocationList(const DataExtractor &Data,
- uint64_t *Offset, unsigned Version) {
- LocationList LL;
- LL.Offset = *Offset;
- DataExtractor::Cursor C(*Offset);
+Error DWARFDebugLoclists::visitLocationList(
+ uint64_t *Offset, function_ref<bool(const DWARFLocationEntry &)> F) const {
- // dwarf::DW_LLE_end_of_list_entry is 0 and indicates the end of the list.
- while (auto Kind = Data.getU8(C)) {
- Entry E;
- E.Kind = Kind;
- E.Offset = C.tell() - 1;
- switch (Kind) {
+ DataExtractor::Cursor C(*Offset);
+ bool Continue = true;
+ while (Continue) {
+ DWARFLocationEntry E;
+ E.Kind = Data.getU8(C);
+ switch (E.Kind) {
+ case dwarf::DW_LLE_end_of_list:
+ break;
case dwarf::DW_LLE_base_addressx:
E.Value0 = Data.getULEB128(C);
break;
+ case dwarf::DW_LLE_startx_endx:
+ E.Value0 = Data.getULEB128(C);
+ E.Value1 = Data.getULEB128(C);
+ break;
case dwarf::DW_LLE_startx_length:
E.Value0 = Data.getULEB128(C);
// Pre-DWARF 5 has different interpretation of the length field. We have
@@ -164,176 +301,109 @@ DWARFDebugLoclists::parseOneLocationList(const DataExtractor &Data,
else
E.Value1 = Data.getULEB128(C);
break;
- case dwarf::DW_LLE_start_length:
- E.Value0 = Data.getAddress(C);
- E.Value1 = Data.getULEB128(C);
- break;
case dwarf::DW_LLE_offset_pair:
E.Value0 = Data.getULEB128(C);
E.Value1 = Data.getULEB128(C);
+ E.SectionIndex = SectionedAddress::UndefSection;
+ break;
+ case dwarf::DW_LLE_default_location:
break;
case dwarf::DW_LLE_base_address:
- E.Value0 = Data.getAddress(C);
+ E.Value0 = Data.getRelocatedAddress(C, &E.SectionIndex);
+ break;
+ case dwarf::DW_LLE_start_end:
+ E.Value0 = Data.getRelocatedAddress(C, &E.SectionIndex);
+ E.Value1 = Data.getRelocatedAddress(C);
+ break;
+ case dwarf::DW_LLE_start_length:
+ E.Value0 = Data.getRelocatedAddress(C, &E.SectionIndex);
+ E.Value1 = Data.getULEB128(C);
break;
default:
cantFail(C.takeError());
return createStringError(errc::illegal_byte_sequence,
- "LLE of kind %x not supported", (int)Kind);
+ "LLE of kind %x not supported", (int)E.Kind);
}
- if (Kind != dwarf::DW_LLE_base_address &&
- Kind != dwarf::DW_LLE_base_addressx) {
+ if (E.Kind != dwarf::DW_LLE_base_address &&
+ E.Kind != dwarf::DW_LLE_base_addressx &&
+ E.Kind != dwarf::DW_LLE_end_of_list) {
unsigned Bytes = Version >= 5 ? Data.getULEB128(C) : Data.getU16(C);
// A single location description describing the location of the object...
Data.getU8(C, E.Loc, Bytes);
}
- LL.Entries.push_back(std::move(E));
+ if (!C)
+ return C.takeError();
+ Continue = F(E) && E.Kind != dwarf::DW_LLE_end_of_list;
}
- if (Error Err = C.takeError())
- return std::move(Err);
- Entry E;
- E.Kind = dwarf::DW_LLE_end_of_list;
- E.Offset = C.tell() - 1;
- LL.Entries.push_back(E);
*Offset = C.tell();
- return LL;
-}
-
-void DWARFDebugLoclists::parse(DataExtractor data, uint64_t Offset, uint64_t EndOffset, uint16_t Version) {
- IsLittleEndian = data.isLittleEndian();
- AddressSize = data.getAddressSize();
-
- while (Offset < EndOffset) {
- if (auto LL = parseOneLocationList(data, &Offset, Version))
- Locations.push_back(std::move(*LL));
- else {
- logAllUnhandledErrors(LL.takeError(), WithColor::error());
- return;
- }
- }
-}
-
-DWARFDebugLoclists::LocationList const *
-DWARFDebugLoclists::getLocationListAtOffset(uint64_t Offset) const {
- auto It = partition_point(
- Locations, [=](const LocationList &L) { return L.Offset < Offset; });
- if (It != Locations.end() && It->Offset == Offset)
- return &(*It);
- return nullptr;
+ return Error::success();
}
-void DWARFDebugLoclists::Entry::dump(raw_ostream &OS, uint64_t &BaseAddr,
- bool IsLittleEndian, unsigned AddressSize,
- const MCRegisterInfo *MRI, DWARFUnit *U,
- DIDumpOptions DumpOpts, unsigned Indent,
- size_t MaxEncodingStringLength) const {
- if (DumpOpts.Verbose) {
- OS << "\n";
- OS.indent(Indent);
- auto EncodingString = dwarf::LocListEncodingString(Kind);
- // Unsupported encodings should have been reported during parsing.
- assert(!EncodingString.empty() && "Unknown loclist entry encoding");
- OS << format("%s%*c", EncodingString.data(),
- MaxEncodingStringLength - EncodingString.size() + 1, '(');
- switch (Kind) {
- case dwarf::DW_LLE_startx_length:
- case dwarf::DW_LLE_start_length:
- case dwarf::DW_LLE_offset_pair:
- OS << format("0x%*.*" PRIx64 ", 0x%*.*" PRIx64, AddressSize * 2,
- AddressSize * 2, Value0, AddressSize * 2, AddressSize * 2,
- Value1);
- break;
- case dwarf::DW_LLE_base_addressx:
- case dwarf::DW_LLE_base_address:
- OS << format("0x%*.*" PRIx64, AddressSize * 2, AddressSize * 2,
- Value0);
- break;
- case dwarf::DW_LLE_end_of_list:
- break;
- }
- OS << ')';
- }
- auto PrintPrefix = [&] {
- OS << "\n";
- OS.indent(Indent);
- if (DumpOpts.Verbose)
- OS << format("%*s", MaxEncodingStringLength, (const char *)"=> ");
- };
- switch (Kind) {
- case dwarf::DW_LLE_startx_length:
- PrintPrefix();
- OS << "Addr idx " << Value0 << " (w/ length " << Value1 << "): ";
- break;
- case dwarf::DW_LLE_start_length:
- PrintPrefix();
- DWARFAddressRange(Value0, Value0 + Value1)
- .dump(OS, AddressSize, DumpOpts);
- OS << ": ";
+void DWARFDebugLoclists::dumpRawEntry(const DWARFLocationEntry &Entry,
+ raw_ostream &OS, unsigned Indent,
+ DIDumpOptions DumpOpts,
+ const DWARFObject &Obj) const {
+ size_t MaxEncodingStringLength = 0;
+#define HANDLE_DW_LLE(ID, NAME) \
+ MaxEncodingStringLength = std::max(MaxEncodingStringLength, \
+ dwarf::LocListEncodingString(ID).size());
+#include "llvm/BinaryFormat/Dwarf.def"
+
+ OS << "\n";
+ OS.indent(Indent);
+ StringRef EncodingString = dwarf::LocListEncodingString(Entry.Kind);
+ // Unsupported encodings should have been reported during parsing.
+ assert(!EncodingString.empty() && "Unknown loclist entry encoding");
+ OS << format("%-*s(", MaxEncodingStringLength, EncodingString.data());
+ unsigned FieldSize = 2 + 2 * Data.getAddressSize();
+ switch (Entry.Kind) {
+ case dwarf::DW_LLE_end_of_list:
+ case dwarf::DW_LLE_default_location:
break;
+ case dwarf::DW_LLE_startx_endx:
+ case dwarf::DW_LLE_startx_length:
case dwarf::DW_LLE_offset_pair:
- PrintPrefix();
- DWARFAddressRange(BaseAddr + Value0, BaseAddr + Value1)
- .dump(OS, AddressSize, DumpOpts);
- OS << ": ";
+ case dwarf::DW_LLE_start_end:
+ case dwarf::DW_LLE_start_length:
+ OS << format_hex(Entry.Value0, FieldSize) << ", "
+ << format_hex(Entry.Value1, FieldSize);
break;
case dwarf::DW_LLE_base_addressx:
- if (!DumpOpts.Verbose)
- return;
- break;
- case dwarf::DW_LLE_end_of_list:
- if (!DumpOpts.Verbose)
- return;
+ case dwarf::DW_LLE_base_address:
+ OS << format_hex(Entry.Value0, FieldSize);
break;
+ }
+ OS << ')';
+ switch (Entry.Kind) {
case dwarf::DW_LLE_base_address:
- BaseAddr = Value0;
- if (!DumpOpts.Verbose)
- return;
+ case dwarf::DW_LLE_start_end:
+ case dwarf::DW_LLE_start_length:
+ DWARFFormValue::dumpAddressSection(Obj, OS, DumpOpts, Entry.SectionIndex);
break;
default:
- llvm_unreachable("unreachable locations list kind");
+ break;
}
-
- dumpExpression(OS, Loc, IsLittleEndian, AddressSize, MRI, U);
-}
-void DWARFDebugLoclists::LocationList::dump(raw_ostream &OS, uint64_t BaseAddr,
- bool IsLittleEndian,
- unsigned AddressSize,
- const MCRegisterInfo *MRI,
- DWARFUnit *U,
- DIDumpOptions DumpOpts,
- unsigned Indent) const {
- size_t MaxEncodingStringLength = 0;
- if (DumpOpts.Verbose)
- for (const auto &Entry : Entries)
- MaxEncodingStringLength =
- std::max(MaxEncodingStringLength,
- dwarf::LocListEncodingString(Entry.Kind).size());
-
- for (const Entry &E : Entries)
- E.dump(OS, BaseAddr, IsLittleEndian, AddressSize, MRI, U, DumpOpts, Indent,
- MaxEncodingStringLength);
}
-void DWARFDebugLoclists::dump(raw_ostream &OS, uint64_t BaseAddr,
- const MCRegisterInfo *MRI, DIDumpOptions DumpOpts,
- Optional<uint64_t> Offset) const {
- auto DumpLocationList = [&](const LocationList &L) {
- OS << format("0x%8.8" PRIx64 ": ", L.Offset);
- L.dump(OS, BaseAddr, IsLittleEndian, AddressSize, MRI, nullptr, DumpOpts,
- /*Indent=*/12);
- OS << "\n";
- };
-
- if (Offset) {
- if (auto *L = getLocationListAtOffset(*Offset))
- DumpLocationList(*L);
+void DWARFDebugLoclists::dumpRange(uint64_t StartOffset, uint64_t Size,
+ raw_ostream &OS, const MCRegisterInfo *MRI,
+ const DWARFObject &Obj,
+ DIDumpOptions DumpOpts) {
+ if (!Data.isValidOffsetForDataOfSize(StartOffset, Size)) {
+ OS << "Invalid dump range\n";
return;
}
-
- for (const LocationList &L : Locations) {
- DumpLocationList(L);
- if (&L != &Locations.back())
- OS << '\n';
+ uint64_t Offset = StartOffset;
+ StringRef Separator;
+ bool CanContinue = true;
+ while (CanContinue && Offset < StartOffset + Size) {
+ OS << Separator;
+ Separator = "\n";
+
+ CanContinue = dumpLocationList(&Offset, OS, /*BaseAddr=*/None, MRI, Obj,
+ nullptr, DumpOpts, /*Indent=*/12);
+ OS << '\n';
}
}
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDebugMacro.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDebugMacro.cpp
index 9a0e770aed3d..8cb259ebc622 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDebugMacro.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDebugMacro.cpp
@@ -17,36 +17,39 @@ using namespace dwarf;
void DWARFDebugMacro::dump(raw_ostream &OS) const {
unsigned IndLevel = 0;
- for (const Entry &E : Macros) {
- // There should not be DW_MACINFO_end_file when IndLevel is Zero. However,
- // this check handles the case of corrupted ".debug_macinfo" section.
- if (IndLevel > 0)
- IndLevel -= (E.Type == DW_MACINFO_end_file);
- // Print indentation.
- for (unsigned I = 0; I < IndLevel; I++)
- OS << " ";
- IndLevel += (E.Type == DW_MACINFO_start_file);
+ for (const auto &Macros : MacroLists) {
+ for (const Entry &E : Macros) {
+ // There should not be DW_MACINFO_end_file when IndLevel is Zero. However,
+ // this check handles the case of corrupted ".debug_macinfo" section.
+ if (IndLevel > 0)
+ IndLevel -= (E.Type == DW_MACINFO_end_file);
+ // Print indentation.
+ for (unsigned I = 0; I < IndLevel; I++)
+ OS << " ";
+ IndLevel += (E.Type == DW_MACINFO_start_file);
- WithColor(OS, HighlightColor::Macro).get() << MacinfoString(E.Type);
- switch (E.Type) {
- default:
- // Got a corrupted ".debug_macinfo" section (invalid macinfo type).
- break;
- case DW_MACINFO_define:
- case DW_MACINFO_undef:
- OS << " - lineno: " << E.Line;
- OS << " macro: " << E.MacroStr;
- break;
- case DW_MACINFO_start_file:
- OS << " - lineno: " << E.Line;
- OS << " filenum: " << E.File;
- break;
- case DW_MACINFO_end_file:
- break;
- case DW_MACINFO_vendor_ext:
- OS << " - constant: " << E.ExtConstant;
- OS << " string: " << E.ExtStr;
- break;
+ WithColor(OS, HighlightColor::Macro).get() << MacinfoString(E.Type);
+ switch (E.Type) {
+ default:
+ // Got a corrupted ".debug_macinfo" section (invalid macinfo type).
+ break;
+ case DW_MACINFO_define:
+ case DW_MACINFO_undef:
+ OS << " - lineno: " << E.Line;
+ OS << " macro: " << E.MacroStr;
+ break;
+ case DW_MACINFO_start_file:
+ OS << " - lineno: " << E.Line;
+ OS << " filenum: " << E.File;
+ break;
+ case DW_MACINFO_end_file:
+ break;
+ case DW_MACINFO_vendor_ext:
+ OS << " - constant: " << E.ExtConstant;
+ OS << " string: " << E.ExtStr;
+ break;
+ }
+ OS << "\n";
}
OS << "\n";
}
@@ -54,15 +57,21 @@ void DWARFDebugMacro::dump(raw_ostream &OS) const {
void DWARFDebugMacro::parse(DataExtractor data) {
uint64_t Offset = 0;
+ MacroList *M = nullptr;
while (data.isValidOffset(Offset)) {
+ if (!M) {
+ MacroLists.emplace_back();
+ M = &MacroLists.back();
+ }
// A macro list entry consists of:
- Entry E;
+ M->emplace_back();
+ Entry &E = M->back();
// 1. Macinfo type
E.Type = data.getULEB128(&Offset);
if (E.Type == 0) {
- // Reached end of ".debug_macinfo" section.
- return;
+ // Reached end of a ".debug_macinfo" section contribution.
+ continue;
}
switch (E.Type) {
@@ -70,7 +79,6 @@ void DWARFDebugMacro::parse(DataExtractor data) {
// Got a corrupted ".debug_macinfo" section (invalid macinfo type).
// Push the corrupted entry to the list and halt parsing.
E.Type = DW_MACINFO_invalid;
- Macros.push_back(E);
return;
case DW_MACINFO_define:
case DW_MACINFO_undef:
@@ -94,7 +102,5 @@ void DWARFDebugMacro::parse(DataExtractor data) {
E.ExtStr = data.getCStr(&Offset);
break;
}
-
- Macros.push_back(E);
}
}
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDebugRnglists.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDebugRnglists.cpp
index f6785b89e86d..9ae4c5b73ebe 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDebugRnglists.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDebugRnglists.cpp
@@ -114,12 +114,21 @@ Error RangeListEntry::extract(DWARFDataExtractor Data, uint64_t End,
DWARFAddressRangesVector DWARFDebugRnglist::getAbsoluteRanges(
llvm::Optional<object::SectionedAddress> BaseAddr, DWARFUnit &U) const {
+ return getAbsoluteRanges(BaseAddr, [&](uint32_t Index) {
+ return U.getAddrOffsetSectionItem(Index);
+ });
+}
+
+DWARFAddressRangesVector DWARFDebugRnglist::getAbsoluteRanges(
+ Optional<object::SectionedAddress> BaseAddr,
+ function_ref<Optional<object::SectionedAddress>(uint32_t)>
+ LookupPooledAddress) const {
DWARFAddressRangesVector Res;
for (const RangeListEntry &RLE : Entries) {
if (RLE.EntryKind == dwarf::DW_RLE_end_of_list)
break;
if (RLE.EntryKind == dwarf::DW_RLE_base_addressx) {
- BaseAddr = U.getAddrOffsetSectionItem(RLE.Value0);
+ BaseAddr = LookupPooledAddress(RLE.Value0);
if (!BaseAddr)
BaseAddr = {RLE.Value0, -1ULL};
continue;
@@ -152,7 +161,7 @@ DWARFAddressRangesVector DWARFDebugRnglist::getAbsoluteRanges(
E.HighPC = E.LowPC + RLE.Value1;
break;
case dwarf::DW_RLE_startx_length: {
- auto Start = U.getAddrOffsetSectionItem(RLE.Value0);
+ auto Start = LookupPooledAddress(RLE.Value0);
if (!Start)
Start = {0, -1ULL};
E.SectionIndex = Start->SectionIndex;
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp
index cec194e8b6b3..c1dc3b68c6ab 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp
@@ -62,16 +62,10 @@ static void dumpRanges(const DWARFObject &Obj, raw_ostream &OS,
if (!DumpOpts.ShowAddresses)
return;
- ArrayRef<SectionName> SectionNames;
- if (DumpOpts.Verbose)
- SectionNames = Obj.getSectionNames();
-
for (const DWARFAddressRange &R : Ranges) {
OS << '\n';
OS.indent(Indent);
- R.dump(OS, AddressSize);
-
- DWARFFormValue::dumpAddressSection(Obj, OS, DumpOpts, R.SectionIndex);
+ R.dump(OS, AddressSize, DumpOpts, &Obj);
}
}
@@ -79,7 +73,6 @@ static void dumpLocation(raw_ostream &OS, DWARFFormValue &FormValue,
DWARFUnit *U, unsigned Indent,
DIDumpOptions DumpOpts) {
DWARFContext &Ctx = U->getContext();
- const DWARFObject &Obj = Ctx.getDWARFObj();
const MCRegisterInfo *MRI = Ctx.getRegisterInfo();
if (FormValue.isFormClass(DWARFFormValue::FC_Block) ||
FormValue.isFormClass(DWARFFormValue::FC_Exprloc)) {
@@ -91,49 +84,24 @@ static void dumpLocation(raw_ostream &OS, DWARFFormValue &FormValue,
return;
}
- FormValue.dump(OS, DumpOpts);
- const auto &DumpLL = [&](auto ExpectedLL) {
- if (ExpectedLL) {
- uint64_t BaseAddr = 0;
- if (Optional<object::SectionedAddress> BA = U->getBaseAddress())
- BaseAddr = BA->Address;
- auto LLDumpOpts = DumpOpts;
- LLDumpOpts.Verbose = false;
- ExpectedLL->dump(OS, BaseAddr, Ctx.isLittleEndian(), Obj.getAddressSize(),
- MRI, U, LLDumpOpts, Indent);
- } else {
- OS << '\n';
- OS.indent(Indent);
- OS << formatv("error extracting location list: {0}",
- fmt_consume(ExpectedLL.takeError()));
- }
- };
if (FormValue.isFormClass(DWARFFormValue::FC_SectionOffset)) {
uint64_t Offset = *FormValue.getAsSectionOffset();
- if (!U->isDWOUnit() && !U->getLocSection()->Data.empty()) {
- DWARFDebugLoc DebugLoc;
- DWARFDataExtractor Data(Obj, *U->getLocSection(), Ctx.isLittleEndian(),
- Obj.getAddressSize());
- DumpLL(DebugLoc.parseOneLocationList(Data, &Offset));
- return;
- }
- bool UseLocLists = !U->isDWOUnit();
- StringRef LoclistsSectionData =
- UseLocLists ? Obj.getLoclistsSection().Data : U->getLocSectionData();
+ if (FormValue.getForm() == DW_FORM_loclistx) {
+ FormValue.dump(OS, DumpOpts);
- if (!LoclistsSectionData.empty()) {
- DataExtractor Data(LoclistsSectionData, Ctx.isLittleEndian(),
- Obj.getAddressSize());
-
- // Old-style location list were used in DWARF v4 (.debug_loc.dwo section).
- // Modern locations list (.debug_loclists) are used starting from v5.
- // Ideally we should take the version from the .debug_loclists section
- // header, but using CU's version for simplicity.
- DumpLL(DWARFDebugLoclists::parseOneLocationList(
- Data, &Offset, UseLocLists ? U->getVersion() : 4));
+ if (auto LoclistOffset = U->getLoclistOffset(Offset))
+ Offset = *LoclistOffset;
+ else
+ return;
}
+ U->getLocationTable().dumpLocationList(&Offset, OS, U->getBaseAddress(),
+ MRI, Ctx.getDWARFObj(), U, DumpOpts,
+ Indent);
+ return;
}
+
+ FormValue.dump(OS, DumpOpts);
}
/// Dump the name encoded in the type tag.
@@ -311,7 +279,8 @@ static void dumpAttribute(raw_ostream &OS, const DWARFDie &Die,
else
FormValue.dump(OS, DumpOpts);
}
- } else if (DWARFAttribute::mayHaveLocationDescription(Attr))
+ } else if (Form == dwarf::Form::DW_FORM_exprloc ||
+ DWARFAttribute::mayHaveLocationDescription(Attr))
dumpLocation(OS, FormValue, U, sizeof(BaseIndent) + Indent + 4, DumpOpts);
else
FormValue.dump(OS, DumpOpts);
@@ -441,6 +410,10 @@ Optional<uint64_t> DWARFDie::getRangesBaseAttribute() const {
return toSectionOffset(find({DW_AT_rnglists_base, DW_AT_GNU_ranges_base}));
}
+Optional<uint64_t> DWARFDie::getLocBaseAttribute() const {
+ return toSectionOffset(find(DW_AT_loclists_base));
+}
+
Optional<uint64_t> DWARFDie::getHighPC(uint64_t LowPC) const {
if (auto FormValue = find(DW_AT_high_pc)) {
if (auto Address = FormValue->getAsAddress()) {
@@ -516,6 +489,37 @@ bool DWARFDie::addressRangeContainsAddress(const uint64_t Address) const {
return false;
}
+Expected<DWARFLocationExpressionsVector>
+DWARFDie::getLocations(dwarf::Attribute Attr) const {
+ Optional<DWARFFormValue> Location = find(Attr);
+ if (!Location)
+ return createStringError(inconvertibleErrorCode(), "No %s",
+ dwarf::AttributeString(Attr).data());
+
+ if (Optional<uint64_t> Off = Location->getAsSectionOffset()) {
+ uint64_t Offset = *Off;
+
+ if (Location->getForm() == DW_FORM_loclistx) {
+ if (auto LoclistOffset = U->getLoclistOffset(Offset))
+ Offset = *LoclistOffset;
+ else
+ return createStringError(inconvertibleErrorCode(),
+ "Loclist table not found");
+ }
+ return U->findLoclistFromOffset(Offset);
+ }
+
+ if (Optional<ArrayRef<uint8_t>> Expr = Location->getAsBlock()) {
+ return DWARFLocationExpressionsVector{
+ DWARFLocationExpression{None, to_vector<4>(*Expr)}};
+ }
+
+ return createStringError(
+ inconvertibleErrorCode(), "Unsupported %s encoding: %s",
+ dwarf::AttributeString(Attr).data(),
+ dwarf::FormEncodingString(Location->getForm()).data());
+}
+
const char *DWARFDie::getSubroutineName(DINameKind Kind) const {
if (!isSubroutineDIE())
return nullptr;
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFExpression.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFExpression.cpp
index 5009b1b7b412..7d817d8a9925 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFExpression.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFExpression.cpp
@@ -93,6 +93,8 @@ static DescVector getDescriptions() {
Descriptions[DW_OP_implicit_value] =
Desc(Op::Dwarf3, Op::SizeLEB, Op::SizeBlock);
Descriptions[DW_OP_stack_value] = Desc(Op::Dwarf3);
+ Descriptions[DW_OP_WASM_location] =
+ Desc(Op::Dwarf4, Op::SizeLEB, Op::SignedSizeLEB);
Descriptions[DW_OP_GNU_push_tls_address] = Desc(Op::Dwarf3);
Descriptions[DW_OP_addrx] = Desc(Op::Dwarf4, Op::SizeLEB);
Descriptions[DW_OP_GNU_addr_index] = Desc(Op::Dwarf4, Op::SizeLEB);
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp
index 26090638b34c..e97ae81345b8 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp
@@ -312,6 +312,7 @@ bool DWARFFormValue::extractValue(const DWARFDataExtractor &Data,
case DW_FORM_udata:
case DW_FORM_ref_udata:
case DW_FORM_rnglistx:
+ case DW_FORM_loclistx:
Value.uval = Data.getULEB128(OffsetPtr);
break;
case DW_FORM_string:
@@ -411,7 +412,7 @@ void DWARFFormValue::dump(raw_ostream &OS, DIDumpOptions DumpOpts) const {
if (A)
dumpSectionedAddress(AddrOS, DumpOpts, *A);
else
- OS << "<no .debug_addr section>";
+ OS << "<unresolved>";
break;
}
case DW_FORM_flag_present:
@@ -551,6 +552,10 @@ void DWARFFormValue::dump(raw_ostream &OS, DIDumpOptions DumpOpts) const {
OS << format("indexed (0x%x) rangelist = ", (uint32_t)UValue);
break;
+ case DW_FORM_loclistx:
+ OS << format("indexed (0x%x) loclist = ", (uint32_t)UValue);
+ break;
+
// Should be formatted to 64-bit for DWARF64.
case DW_FORM_sec_offset:
AddrOS << format("0x%08x", (uint32_t)UValue);
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFLocationExpression.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFLocationExpression.cpp
new file mode 100644
index 000000000000..1cf73a666778
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFLocationExpression.cpp
@@ -0,0 +1,19 @@
+//===- DWARFLocationExpression.cpp ----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/DWARF/DWARFLocationExpression.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/Support/FormatVariadic.h"
+
+using namespace llvm;
+
+raw_ostream &llvm::operator<<(raw_ostream &OS,
+ const DWARFLocationExpression &Loc) {
+ return OS << Loc.Range << ": "
+ << formatv("{0}", make_range(Loc.Expr.begin(), Loc.Expr.end()));
+}
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
index a56402a707ad..7bb019466161 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
@@ -175,18 +175,37 @@ DWARFUnit::DWARFUnit(DWARFContext &DC, const DWARFSection &Section,
const DWARFSection *AOS, const DWARFSection &LS, bool LE,
bool IsDWO, const DWARFUnitVector &UnitVector)
: Context(DC), InfoSection(Section), Header(Header), Abbrev(DA),
- RangeSection(RS), LocSection(LocSection), LineSection(LS),
- StringSection(SS), StringOffsetSection(SOS), AddrOffsetSection(AOS),
- isLittleEndian(LE), IsDWO(IsDWO), UnitVector(UnitVector) {
+ RangeSection(RS), LineSection(LS), StringSection(SS),
+ StringOffsetSection(SOS), AddrOffsetSection(AOS), isLittleEndian(LE),
+ IsDWO(IsDWO), UnitVector(UnitVector) {
clear();
- // For split DWARF we only need to keep track of the location list section's
- // data (no relocations), and if we are reading a package file, we need to
- // adjust the location list data based on the index entries.
if (IsDWO) {
- LocSectionData = LocSection->Data;
+ // If we are reading a package file, we need to adjust the location list
+ // data based on the index entries.
+ StringRef Data = LocSection->Data;
if (auto *IndexEntry = Header.getIndexEntry())
if (const auto *C = IndexEntry->getOffset(DW_SECT_LOC))
- LocSectionData = LocSectionData.substr(C->Offset, C->Length);
+ Data = Data.substr(C->Offset, C->Length);
+
+ DWARFDataExtractor DWARFData =
+ Header.getVersion() >= 5
+ ? DWARFDataExtractor(Context.getDWARFObj(),
+ Context.getDWARFObj().getLoclistsDWOSection(),
+ isLittleEndian, getAddressByteSize())
+ : DWARFDataExtractor(Data, isLittleEndian, getAddressByteSize());
+ LocTable =
+ std::make_unique<DWARFDebugLoclists>(DWARFData, Header.getVersion());
+
+ } else if (Header.getVersion() >= 5) {
+ LocTable = std::make_unique<DWARFDebugLoclists>(
+ DWARFDataExtractor(Context.getDWARFObj(),
+ Context.getDWARFObj().getLoclistsSection(),
+ isLittleEndian, getAddressByteSize()),
+ Header.getVersion());
+ } else {
+ LocTable = std::make_unique<DWARFDebugLoc>(
+ DWARFDataExtractor(Context.getDWARFObj(), *LocSection, isLittleEndian,
+ getAddressByteSize()));
}
}
@@ -209,7 +228,9 @@ DWARFUnit::getAddrOffsetSectionItem(uint32_t Index) const {
if (I != R.end() && std::next(I) == R.end())
return (*I)->getAddrOffsetSectionItem(Index);
}
- uint64_t Offset = AddrOffsetSectionBase + Index * getAddressByteSize();
+ if (!AddrOffsetSectionBase)
+ return None;
+ uint64_t Offset = *AddrOffsetSectionBase + Index * getAddressByteSize();
if (AddrOffsetSection->Data.size() < Offset + getAddressByteSize())
return None;
DWARFDataExtractor DA(Context.getDWARFObj(), *AddrOffsetSection,
@@ -238,23 +259,26 @@ bool DWARFUnitHeader::extract(DWARFContext &Context,
const DWARFUnitIndex *Index,
const DWARFUnitIndex::Entry *Entry) {
Offset = *offset_ptr;
+ Error Err = Error::success();
IndexEntry = Entry;
if (!IndexEntry && Index)
IndexEntry = Index->getFromOffset(*offset_ptr);
- Length = debug_info.getRelocatedValue(4, offset_ptr);
+ Length = debug_info.getRelocatedValue(4, offset_ptr, nullptr, &Err);
FormParams.Format = DWARF32;
if (Length == dwarf::DW_LENGTH_DWARF64) {
- Length = debug_info.getU64(offset_ptr);
+ Length = debug_info.getU64(offset_ptr, &Err);
FormParams.Format = DWARF64;
}
- FormParams.Version = debug_info.getU16(offset_ptr);
+ FormParams.Version = debug_info.getU16(offset_ptr, &Err);
if (FormParams.Version >= 5) {
- UnitType = debug_info.getU8(offset_ptr);
- FormParams.AddrSize = debug_info.getU8(offset_ptr);
- AbbrOffset = debug_info.getRelocatedValue(FormParams.getDwarfOffsetByteSize(), offset_ptr);
+ UnitType = debug_info.getU8(offset_ptr, &Err);
+ FormParams.AddrSize = debug_info.getU8(offset_ptr, &Err);
+ AbbrOffset = debug_info.getRelocatedValue(
+ FormParams.getDwarfOffsetByteSize(), offset_ptr, nullptr, &Err);
} else {
- AbbrOffset = debug_info.getRelocatedValue(FormParams.getDwarfOffsetByteSize(), offset_ptr);
- FormParams.AddrSize = debug_info.getU8(offset_ptr);
+ AbbrOffset = debug_info.getRelocatedValue(
+ FormParams.getDwarfOffsetByteSize(), offset_ptr, nullptr, &Err);
+ FormParams.AddrSize = debug_info.getU8(offset_ptr, &Err);
// Fake a unit type based on the section type. This isn't perfect,
// but distinguishing compile and type units is generally enough.
if (SectionKind == DW_SECT_TYPES)
@@ -274,11 +298,14 @@ bool DWARFUnitHeader::extract(DWARFContext &Context,
AbbrOffset = AbbrEntry->Offset;
}
if (isTypeUnit()) {
- TypeHash = debug_info.getU64(offset_ptr);
- TypeOffset =
- debug_info.getUnsigned(offset_ptr, FormParams.getDwarfOffsetByteSize());
+ TypeHash = debug_info.getU64(offset_ptr, &Err);
+ TypeOffset = debug_info.getUnsigned(
+ offset_ptr, FormParams.getDwarfOffsetByteSize(), &Err);
} else if (UnitType == DW_UT_split_compile || UnitType == DW_UT_skeleton)
- DWOId = debug_info.getU64(offset_ptr);
+ DWOId = debug_info.getU64(offset_ptr, &Err);
+
+ if (errorToBool(std::move(Err)))
+ return false;
// Header fields all parsed, capture the size of this unit header.
assert(*offset_ptr - Offset <= 255 && "unexpected header size");
@@ -305,8 +332,9 @@ bool DWARFUnitHeader::extract(DWARFContext &Context,
// Parse the rangelist table header, including the optional array of offsets
// following it (DWARF v5 and later).
-static Expected<DWARFDebugRnglistTable>
-parseRngListTableHeader(DWARFDataExtractor &DA, uint64_t Offset,
+template<typename ListTableType>
+static Expected<ListTableType>
+parseListTableHeader(DWARFDataExtractor &DA, uint64_t Offset,
DwarfFormat Format) {
// We are expected to be called with Offset 0 or pointing just past the table
// header. Correct Offset in the latter case so that it points to the start
@@ -314,12 +342,12 @@ parseRngListTableHeader(DWARFDataExtractor &DA, uint64_t Offset,
if (Offset > 0) {
uint64_t HeaderSize = DWARFListTableHeader::getHeaderSize(Format);
if (Offset < HeaderSize)
- return createStringError(errc::invalid_argument, "Did not detect a valid"
- " range list table with base = 0x%" PRIx64 "\n",
+ return createStringError(errc::invalid_argument, "did not detect a valid"
+ " list table with base = 0x%" PRIx64 "\n",
Offset);
Offset -= HeaderSize;
}
- llvm::DWARFDebugRnglistTable Table;
+ ListTableType Table;
if (Error E = Table.extractHeaderAndOffsets(DA, &Offset))
return std::move(E);
return Table;
@@ -339,7 +367,8 @@ void DWARFUnit::clear() {
Abbrevs = nullptr;
BaseAddr.reset();
RangeSectionBase = 0;
- AddrOffsetSectionBase = 0;
+ LocSectionBase = 0;
+ AddrOffsetSectionBase = None;
clearDIEs(false);
DWO.reset();
}
@@ -427,13 +456,15 @@ Error DWARFUnit::tryExtractDIEsIfNeeded(bool CUDieOnly) {
if (Optional<uint64_t> DWOId = toUnsigned(UnitDie.find(DW_AT_GNU_dwo_id)))
Header.setDWOId(*DWOId);
if (!IsDWO) {
- assert(AddrOffsetSectionBase == 0);
+ assert(AddrOffsetSectionBase == None);
assert(RangeSectionBase == 0);
- AddrOffsetSectionBase = toSectionOffset(UnitDie.find(DW_AT_addr_base), 0);
+ assert(LocSectionBase == 0);
+ AddrOffsetSectionBase = toSectionOffset(UnitDie.find(DW_AT_addr_base));
if (!AddrOffsetSectionBase)
AddrOffsetSectionBase =
- toSectionOffset(UnitDie.find(DW_AT_GNU_addr_base), 0);
+ toSectionOffset(UnitDie.find(DW_AT_GNU_addr_base));
RangeSectionBase = toSectionOffset(UnitDie.find(DW_AT_rnglists_base), 0);
+ LocSectionBase = toSectionOffset(UnitDie.find(DW_AT_loclists_base), 0);
}
// In general, in DWARF v5 and beyond we derive the start of the unit's
@@ -471,8 +502,8 @@ Error DWARFUnit::tryExtractDIEsIfNeeded(bool CUDieOnly) {
// extracted lazily.
DWARFDataExtractor RangesDA(Context.getDWARFObj(), *RangeSection,
isLittleEndian, 0);
- auto TableOrError = parseRngListTableHeader(RangesDA, RangeSectionBase,
- Header.getFormat());
+ auto TableOrError = parseListTableHeader<DWARFDebugRnglistTable>(
+ RangesDA, RangeSectionBase, Header.getFormat());
if (!TableOrError)
return createStringError(errc::invalid_argument,
"parsing a range list table: " +
@@ -485,6 +516,37 @@ Error DWARFUnit::tryExtractDIEsIfNeeded(bool CUDieOnly) {
if (IsDWO && RngListTable)
RangeSectionBase = RngListTable->getHeaderSize();
}
+
+ // In a split dwarf unit, there is no DW_AT_loclists_base attribute.
+ // Setting LocSectionBase to point past the table header.
+ if (IsDWO)
+ setLocSection(&Context.getDWARFObj().getLoclistsDWOSection(),
+ DWARFListTableHeader::getHeaderSize(Header.getFormat()));
+ else
+ setLocSection(&Context.getDWARFObj().getLoclistsSection(),
+ toSectionOffset(UnitDie.find(DW_AT_loclists_base), 0));
+
+ if (LocSection->Data.size()) {
+ if (IsDWO)
+ LoclistTableHeader.emplace(".debug_loclists.dwo", "locations");
+ else
+ LoclistTableHeader.emplace(".debug_loclists", "locations");
+
+ uint64_t HeaderSize = DWARFListTableHeader::getHeaderSize(Header.getFormat());
+ uint64_t Offset = getLocSectionBase();
+ DWARFDataExtractor Data(Context.getDWARFObj(), *LocSection,
+ isLittleEndian, getAddressByteSize());
+ if (Offset < HeaderSize)
+ return createStringError(errc::invalid_argument,
+ "did not detect a valid"
+ " list table with base = 0x%" PRIx64 "\n",
+ Offset);
+ Offset -= HeaderSize;
+ if (Error E = LoclistTableHeader->extract(Data, &Offset))
+ return createStringError(errc::invalid_argument,
+ "parsing a loclist table: " +
+ toString(std::move(E)));
+ }
}
// Don't fall back to DW_AT_GNU_ranges_base: it should be ignored for
@@ -500,7 +562,9 @@ bool DWARFUnit::parseDWO() {
DWARFDie UnitDie = getUnitDIE();
if (!UnitDie)
return false;
- auto DWOFileName = dwarf::toString(UnitDie.find(DW_AT_GNU_dwo_name));
+ auto DWOFileName = getVersion() >= 5
+ ? dwarf::toString(UnitDie.find(DW_AT_dwo_name))
+ : dwarf::toString(UnitDie.find(DW_AT_GNU_dwo_name));
if (!DWOFileName)
return false;
auto CompilationDir = dwarf::toString(UnitDie.find(DW_AT_comp_dir));
@@ -522,13 +586,14 @@ bool DWARFUnit::parseDWO() {
return false;
DWO = std::shared_ptr<DWARFCompileUnit>(std::move(DWOContext), DWOCU);
// Share .debug_addr and .debug_ranges section with compile unit in .dwo
- DWO->setAddrOffsetSection(AddrOffsetSection, AddrOffsetSectionBase);
+ if (AddrOffsetSectionBase)
+ DWO->setAddrOffsetSection(AddrOffsetSection, *AddrOffsetSectionBase);
if (getVersion() >= 5) {
DWO->setRangesSection(&Context.getDWARFObj().getRnglistsDWOSection(), 0);
DWARFDataExtractor RangesDA(Context.getDWARFObj(), *RangeSection,
isLittleEndian, 0);
- if (auto TableOrError = parseRngListTableHeader(RangesDA, RangeSectionBase,
- Header.getFormat()))
+ if (auto TableOrError = parseListTableHeader<DWARFDebugRnglistTable>(
+ RangesDA, RangeSectionBase, Header.getFormat()))
DWO->RngListTable = TableOrError.get();
else
WithColor::error() << "parsing a range list table: "
@@ -575,7 +640,7 @@ DWARFUnit::findRnglistFromOffset(uint64_t Offset) {
Expected<DWARFAddressRangesVector>
DWARFUnit::findRnglistFromIndex(uint32_t Index) {
if (auto Offset = getRnglistOffset(Index))
- return findRnglistFromOffset(*Offset + RangeSectionBase);
+ return findRnglistFromOffset(*Offset);
if (RngListTable)
return createStringError(errc::invalid_argument,
@@ -599,6 +664,30 @@ Expected<DWARFAddressRangesVector> DWARFUnit::collectAddressRanges() {
return *CUDIERangesOrError;
}
+Expected<DWARFLocationExpressionsVector>
+DWARFUnit::findLoclistFromOffset(uint64_t Offset) {
+ DWARFLocationExpressionsVector Result;
+
+ Error InterpretationError = Error::success();
+
+ Error ParseError = getLocationTable().visitAbsoluteLocationList(
+ Offset, getBaseAddress(),
+ [this](uint32_t Index) { return getAddrOffsetSectionItem(Index); },
+ [&](Expected<DWARFLocationExpression> L) {
+ if (L)
+ Result.push_back(std::move(*L));
+ else
+ InterpretationError =
+ joinErrors(L.takeError(), std::move(InterpretationError));
+ return !InterpretationError;
+ });
+
+ if (ParseError || InterpretationError)
+ return joinErrors(std::move(ParseError), std::move(InterpretationError));
+
+ return Result;
+}
+
void DWARFUnit::updateAddressDieMap(DWARFDie Die) {
if (Die.isSubroutineDIE()) {
auto DIERangesOrError = Die.getAddressRanges();
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp
index bf499b6ee092..1fd6c1d7d282 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp
@@ -196,6 +196,14 @@ unsigned DWARFVerifier::verifyUnitContents(DWARFUnit &Unit) {
NumUnitErrors++;
}
+ // According to DWARF Debugging Information Format Version 5,
+ // 3.1.2 Skeleton Compilation Unit Entries:
+ // "A skeleton compilation unit has no children."
+ if (Die.getTag() == dwarf::DW_TAG_skeleton_unit && Die.hasChildren()) {
+ error() << "Skeleton compilation unit has children.\n";
+ NumUnitErrors++;
+ }
+
DieRangeInfo RI;
NumUnitErrors += verifyDieRanges(Die, RI);
@@ -468,27 +476,21 @@ unsigned DWARFVerifier::verifyDebugInfoAttribute(const DWARFDie &Die,
ReportError("DIE has invalid DW_AT_stmt_list encoding:");
break;
case DW_AT_location: {
- auto VerifyLocationExpr = [&](ArrayRef<uint8_t> D) {
+ if (Expected<std::vector<DWARFLocationExpression>> Loc =
+ Die.getLocations(DW_AT_location)) {
DWARFUnit *U = Die.getDwarfUnit();
- DataExtractor Data(toStringRef(D), DCtx.isLittleEndian(), 0);
- DWARFExpression Expression(Data, U->getVersion(),
- U->getAddressByteSize());
- bool Error = llvm::any_of(Expression, [](DWARFExpression::Operation &Op) {
- return Op.isError();
- });
- if (Error || !Expression.verify(U))
- ReportError("DIE contains invalid DWARF expression:");
- };
- if (Optional<ArrayRef<uint8_t>> Expr = AttrValue.Value.getAsBlock()) {
- // Verify inlined location.
- VerifyLocationExpr(*Expr);
- } else if (auto LocOffset = AttrValue.Value.getAsSectionOffset()) {
- // Verify location list.
- if (auto DebugLoc = DCtx.getDebugLoc())
- if (auto LocList = DebugLoc->getLocationListAtOffset(*LocOffset))
- for (const auto &Entry : LocList->Entries)
- VerifyLocationExpr(Entry.Loc);
- }
+ for (const auto &Entry : *Loc) {
+ DataExtractor Data(toStringRef(Entry.Expr), DCtx.isLittleEndian(), 0);
+ DWARFExpression Expression(Data, U->getVersion(),
+ U->getAddressByteSize());
+ bool Error = any_of(Expression, [](DWARFExpression::Operation &Op) {
+ return Op.isError();
+ });
+ if (Error || !Expression.verify(U))
+ ReportError("DIE contains invalid DWARF expression:");
+ }
+ } else
+ ReportError(toString(Loc.takeError()));
break;
}
case DW_AT_specification:
@@ -640,7 +642,7 @@ unsigned DWARFVerifier::verifyDebugInfoReferences() {
// getting the DIE by offset and emitting an error
OS << "Verifying .debug_info references...\n";
unsigned NumErrors = 0;
- for (const std::pair<uint64_t, std::set<uint64_t>> &Pair :
+ for (const std::pair<const uint64_t, std::set<uint64_t>> &Pair :
ReferenceToDIEOffsets) {
if (DCtx.getDIEForOffset(Pair.first))
continue;
@@ -968,7 +970,7 @@ DWARFVerifier::verifyNameIndexBuckets(const DWARFDebugNames::NameIndex &NI,
constexpr BucketInfo(uint32_t Bucket, uint32_t Index)
: Bucket(Bucket), Index(Index) {}
- bool operator<(const BucketInfo &RHS) const { return Index < RHS.Index; };
+ bool operator<(const BucketInfo &RHS) const { return Index < RHS.Index; }
};
uint32_t NumErrors = 0;
@@ -1278,36 +1280,24 @@ unsigned DWARFVerifier::verifyNameIndexEntries(
}
static bool isVariableIndexable(const DWARFDie &Die, DWARFContext &DCtx) {
- Optional<DWARFFormValue> Location = Die.findRecursively(DW_AT_location);
- if (!Location)
+ Expected<std::vector<DWARFLocationExpression>> Loc =
+ Die.getLocations(DW_AT_location);
+ if (!Loc) {
+ consumeError(Loc.takeError());
return false;
-
- auto ContainsInterestingOperators = [&](ArrayRef<uint8_t> D) {
- DWARFUnit *U = Die.getDwarfUnit();
- DataExtractor Data(toStringRef(D), DCtx.isLittleEndian(), U->getAddressByteSize());
+ }
+ DWARFUnit *U = Die.getDwarfUnit();
+ for (const auto &Entry : *Loc) {
+ DataExtractor Data(toStringRef(Entry.Expr), DCtx.isLittleEndian(),
+ U->getAddressByteSize());
DWARFExpression Expression(Data, U->getVersion(), U->getAddressByteSize());
- return any_of(Expression, [](DWARFExpression::Operation &Op) {
+ bool IsInteresting = any_of(Expression, [](DWARFExpression::Operation &Op) {
return !Op.isError() && (Op.getCode() == DW_OP_addr ||
Op.getCode() == DW_OP_form_tls_address ||
Op.getCode() == DW_OP_GNU_push_tls_address);
});
- };
-
- if (Optional<ArrayRef<uint8_t>> Expr = Location->getAsBlock()) {
- // Inlined location.
- if (ContainsInterestingOperators(*Expr))
+ if (IsInteresting)
return true;
- } else if (Optional<uint64_t> Offset = Location->getAsSectionOffset()) {
- // Location list.
- if (const DWARFDebugLoc *DebugLoc = DCtx.getDebugLoc()) {
- if (const DWARFDebugLoc::LocationList *LocList =
- DebugLoc->getLocationListAtOffset(*Offset)) {
- if (any_of(LocList->Entries, [&](const DWARFDebugLoc::Entry &E) {
- return ContainsInterestingOperators(E.Loc);
- }))
- return true;
- }
- }
}
return false;
}
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp
index ad022fec9e32..6731a8b27443 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp
@@ -8,6 +8,7 @@
#include "llvm/DebugInfo/GSYM/FunctionInfo.h"
#include "llvm/DebugInfo/GSYM/FileWriter.h"
+#include "llvm/DebugInfo/GSYM/GsymReader.h"
#include "llvm/DebugInfo/GSYM/LineTable.h"
#include "llvm/DebugInfo/GSYM/InlineInfo.h"
#include "llvm/Support/DataExtractor.h"
@@ -114,7 +115,7 @@ llvm::Expected<uint64_t> FunctionInfo::encode(FileWriter &O) const {
llvm::Error err = OptLineTable->encode(O, Range.Start);
if (err)
return std::move(err);
- const off_t Length = O.tell() - StartOffset;
+ const auto Length = O.tell() - StartOffset;
if (Length > UINT32_MAX)
return createStringError(std::errc::invalid_argument,
"LineTable length is greater than UINT32_MAX");
@@ -132,7 +133,7 @@ llvm::Expected<uint64_t> FunctionInfo::encode(FileWriter &O) const {
llvm::Error err = Inline->encode(O, Range.Start);
if (err)
return std::move(err);
- const off_t Length = O.tell() - StartOffset;
+ const auto Length = O.tell() - StartOffset;
if (Length > UINT32_MAX)
return createStringError(std::errc::invalid_argument,
"InlineInfo length is greater than UINT32_MAX");
@@ -145,3 +146,104 @@ llvm::Expected<uint64_t> FunctionInfo::encode(FileWriter &O) const {
O.writeU32(0);
return FuncInfoOffset;
}
+
+
+llvm::Expected<LookupResult> FunctionInfo::lookup(DataExtractor &Data,
+ const GsymReader &GR,
+ uint64_t FuncAddr,
+ uint64_t Addr) {
+ LookupResult LR;
+ LR.LookupAddr = Addr;
+ LR.FuncRange.Start = FuncAddr;
+ uint64_t Offset = 0;
+ LR.FuncRange.End = FuncAddr + Data.getU32(&Offset);
+ uint32_t NameOffset = Data.getU32(&Offset);
+ // The "lookup" functions doesn't report errors as accurately as the "decode"
+ // function as it is meant to be fast. For more accurage errors we could call
+ // "decode".
+ if (!Data.isValidOffset(Offset))
+ return createStringError(std::errc::io_error,
+ "FunctionInfo data is truncated");
+ // This function will be called with the result of a binary search of the
+ // address table, we must still make sure the address does not fall into a
+ // gap between functions are after the last function.
+ if (Addr >= LR.FuncRange.End)
+ return createStringError(std::errc::io_error,
+ "address 0x%" PRIx64 " is not in GSYM", Addr);
+
+ if (NameOffset == 0)
+ return createStringError(std::errc::io_error,
+ "0x%8.8" PRIx64 ": invalid FunctionInfo Name value 0x00000000",
+ Offset - 4);
+ LR.FuncName = GR.getString(NameOffset);
+ bool Done = false;
+ Optional<LineEntry> LineEntry;
+ Optional<DataExtractor> InlineInfoData;
+ while (!Done) {
+ if (!Data.isValidOffsetForDataOfSize(Offset, 8))
+ return createStringError(std::errc::io_error,
+ "FunctionInfo data is truncated");
+ const uint32_t IT = Data.getU32(&Offset);
+ const uint32_t InfoLength = Data.getU32(&Offset);
+ const StringRef InfoBytes = Data.getData().substr(Offset, InfoLength);
+ if (InfoLength != InfoBytes.size())
+ return createStringError(std::errc::io_error,
+ "FunctionInfo data is truncated");
+ DataExtractor InfoData(InfoBytes, Data.isLittleEndian(),
+ Data.getAddressSize());
+ switch (IT) {
+ case InfoType::EndOfList:
+ Done = true;
+ break;
+
+ case InfoType::LineTableInfo:
+ if (auto ExpectedLE = LineTable::lookup(InfoData, FuncAddr, Addr))
+ LineEntry = ExpectedLE.get();
+ else
+ return ExpectedLE.takeError();
+ break;
+
+ case InfoType::InlineInfo:
+ // We will parse the inline info after our line table, but only if
+ // we have a line entry.
+ InlineInfoData = InfoData;
+ break;
+
+ default:
+ break;
+ }
+ Offset += InfoLength;
+ }
+
+ if (!LineEntry) {
+ // We don't have a valid line entry for our address, fill in our source
+ // location as best we can and return.
+ SourceLocation SrcLoc;
+ SrcLoc.Name = LR.FuncName;
+ LR.Locations.push_back(SrcLoc);
+ return LR;
+ }
+
+ Optional<FileEntry> LineEntryFile = GR.getFile(LineEntry->File);
+ if (!LineEntryFile)
+ return createStringError(std::errc::invalid_argument,
+ "failed to extract file[%" PRIu32 "]",
+ LineEntry->File);
+
+ SourceLocation SrcLoc;
+ SrcLoc.Name = LR.FuncName;
+ SrcLoc.Dir = GR.getString(LineEntryFile->Dir);
+ SrcLoc.Base = GR.getString(LineEntryFile->Base);
+ SrcLoc.Line = LineEntry->Line;
+ LR.Locations.push_back(SrcLoc);
+ // If we don't have inline information, we are done.
+ if (!InlineInfoData)
+ return LR;
+ // We have inline information. Try to augment the lookup result with this
+ // data.
+ llvm::Error Err = InlineInfo::lookup(GR, *InlineInfoData, FuncAddr, Addr,
+ LR.Locations);
+ if (Err)
+ return std::move(Err);
+ return LR;
+}
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/GSYM/GsymReader.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/GSYM/GsymReader.cpp
index 1b448cf80b70..b4f3f2052ae7 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/GSYM/GsymReader.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/GSYM/GsymReader.cpp
@@ -1,9 +1,8 @@
//===- GsymReader.cpp -----------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -263,3 +262,18 @@ llvm::Expected<FunctionInfo> GsymReader::getFunctionInfo(uint64_t Addr) const {
"failed to extract address[%" PRIu64 "]",
*AddressIndex);
}
+
+llvm::Expected<LookupResult> GsymReader::lookup(uint64_t Addr) const {
+ Expected<uint64_t> AddressIndex = getAddressIndex(Addr);
+ if (!AddressIndex)
+ return AddressIndex.takeError();
+ // Address info offsets size should have been checked in parse().
+ assert(*AddressIndex < AddrInfoOffsets.size());
+ auto AddrInfoOffset = AddrInfoOffsets[*AddressIndex];
+ DataExtractor Data(MemBuffer->getBuffer().substr(AddrInfoOffset), Endian, 4);
+ if (Optional<uint64_t> OptAddr = getAddress(*AddressIndex))
+ return FunctionInfo::lookup(Data, *this, *OptAddr, Addr);
+ return createStringError(std::errc::invalid_argument,
+ "failed to extract address[%" PRIu64 "]",
+ *AddressIndex);
+}
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/GSYM/InlineInfo.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/GSYM/InlineInfo.cpp
index 32ed2c709575..1b8c974fdcd2 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/GSYM/InlineInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/GSYM/InlineInfo.cpp
@@ -1,14 +1,14 @@
//===- InlineInfo.cpp -------------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/GSYM/FileEntry.h"
#include "llvm/DebugInfo/GSYM/FileWriter.h"
+#include "llvm/DebugInfo/GSYM/GsymReader.h"
#include "llvm/DebugInfo/GSYM/InlineInfo.h"
#include "llvm/Support/DataExtractor.h"
#include <algorithm>
@@ -60,6 +60,108 @@ llvm::Optional<InlineInfo::InlineArray> InlineInfo::getInlineStack(uint64_t Addr
return llvm::None;
}
+/// Skip an InlineInfo object in the specified data at the specified offset.
+///
+/// Used during the InlineInfo::lookup() call to quickly skip child InlineInfo
+/// objects where the addres ranges isn't contained in the InlineInfo object
+/// or its children. This avoids allocations by not appending child InlineInfo
+/// objects to the InlineInfo::Children array.
+///
+/// \param Data The binary stream to read the data from.
+///
+/// \param Offset The byte offset within \a Data.
+///
+/// \param SkippedRanges If true, address ranges have already been skipped.
+
+static bool skip(DataExtractor &Data, uint64_t &Offset, bool SkippedRanges) {
+ if (!SkippedRanges) {
+ if (AddressRanges::skip(Data, Offset) == 0)
+ return false;
+ }
+ bool HasChildren = Data.getU8(&Offset) != 0;
+ Data.getU32(&Offset); // Skip Inline.Name.
+ Data.getULEB128(&Offset); // Skip Inline.CallFile.
+ Data.getULEB128(&Offset); // Skip Inline.CallLine.
+ if (HasChildren) {
+ while (skip(Data, Offset, false /* SkippedRanges */))
+ /* Do nothing */;
+ }
+ // We skipped a valid InlineInfo.
+ return true;
+}
+
+/// A Lookup helper functions.
+///
+/// Used during the InlineInfo::lookup() call to quickly only parse an
+/// InlineInfo object if the address falls within this object. This avoids
+/// allocations by not appending child InlineInfo objects to the
+/// InlineInfo::Children array and also skips any InlineInfo objects that do
+/// not contain the address we are looking up.
+///
+/// \param Data The binary stream to read the data from.
+///
+/// \param Offset The byte offset within \a Data.
+///
+/// \param BaseAddr The address that the relative address range offsets are
+/// relative to.
+
+static bool lookup(const GsymReader &GR, DataExtractor &Data, uint64_t &Offset,
+ uint64_t BaseAddr, uint64_t Addr, SourceLocations &SrcLocs,
+ llvm::Error &Err) {
+ InlineInfo Inline;
+ Inline.Ranges.decode(Data, BaseAddr, Offset);
+ if (Inline.Ranges.empty())
+ return true;
+ // Check if the address is contained within the inline information, and if
+ // not, quickly skip this InlineInfo object and all its children.
+ if (!Inline.Ranges.contains(Addr)) {
+ skip(Data, Offset, true /* SkippedRanges */);
+ return false;
+ }
+
+ // The address range is contained within this InlineInfo, add the source
+ // location for this InlineInfo and any children that contain the address.
+ bool HasChildren = Data.getU8(&Offset) != 0;
+ Inline.Name = Data.getU32(&Offset);
+ Inline.CallFile = (uint32_t)Data.getULEB128(&Offset);
+ Inline.CallLine = (uint32_t)Data.getULEB128(&Offset);
+ if (HasChildren) {
+ // Child address ranges are encoded relative to the first address in the
+ // parent InlineInfo object.
+ const auto ChildBaseAddr = Inline.Ranges[0].Start;
+ bool Done = false;
+ while (!Done)
+ Done = lookup(GR, Data, Offset, ChildBaseAddr, Addr, SrcLocs, Err);
+ }
+
+ Optional<FileEntry> CallFile = GR.getFile(Inline.CallFile);
+ if (!CallFile) {
+ Err = createStringError(std::errc::invalid_argument,
+ "failed to extract file[%" PRIu32 "]",
+ Inline.CallFile);
+ return false;
+ }
+
+ SourceLocation SrcLoc;
+ SrcLoc.Name = SrcLocs.back().Name;
+ SrcLoc.Dir = GR.getString(CallFile->Dir);
+ SrcLoc.Base = GR.getString(CallFile->Base);
+ SrcLoc.Line = Inline.CallLine;
+ SrcLocs.back().Name = GR.getString(Inline.Name);
+ SrcLocs.push_back(SrcLoc);
+ return true;
+}
+
+llvm::Error InlineInfo::lookup(const GsymReader &GR, DataExtractor &Data,
+ uint64_t BaseAddr, uint64_t Addr,
+ SourceLocations &SrcLocs) {
+ // Call our recursive helper function starting at offset zero.
+ uint64_t Offset = 0;
+ llvm::Error Err = Error::success();
+ ::lookup(GR, Data, Offset, BaseAddr, Addr, SrcLocs, Err);
+ return Err;
+}
+
/// Decode an InlineInfo in Data at the specified offset.
///
/// A local helper function to decode InlineInfo objects. This function is
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/GSYM/LineTable.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/GSYM/LineTable.cpp
index 824c0041be9f..a49a3ba9bf2a 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/GSYM/LineTable.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/GSYM/LineTable.cpp
@@ -262,8 +262,8 @@ llvm::Expected<LineTable> LineTable::decode(DataExtractor &Data,
// Parse the line table on the fly and find the row we are looking for.
// We will need to determine if we need to cache the line table by calling
// LineTable::parseAllEntries(...) or just call this function each time.
-// There is a CPU vs memory tradeoff we will need to determine.
-LineEntry LineTable::lookup(DataExtractor &Data, uint64_t BaseAddr, uint64_t Addr) {
+// There is a CPU vs memory tradeoff we will need to determined.
+Expected<LineEntry> LineTable::lookup(DataExtractor &Data, uint64_t BaseAddr, uint64_t Addr) {
LineEntry Result;
llvm::Error Err = parse(Data, BaseAddr,
[Addr, &Result](const LineEntry &Row) -> bool {
@@ -277,7 +277,13 @@ LineEntry LineTable::lookup(DataExtractor &Data, uint64_t BaseAddr, uint64_t Add
}
return true; // Keep parsing till we find the right row.
});
- return Result;
+ if (Err)
+ return std::move(Err);
+ if (Result.isValid())
+ return Result;
+ return createStringError(std::errc::invalid_argument,
+ "address 0x%" PRIx64 " is not in the line table",
+ Addr);
}
raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, const LineTable &LT) {
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/GSYM/LookupResult.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/GSYM/LookupResult.cpp
new file mode 100644
index 000000000000..c54b166b2887
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/GSYM/LookupResult.cpp
@@ -0,0 +1,69 @@
+//===- LookupResult.cpp -------------------------------------------------*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/GSYM/LookupResult.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/raw_ostream.h"
+#include <ciso646>
+
+using namespace llvm;
+using namespace gsym;
+
+std::string LookupResult::getSourceFile(uint32_t Index) const {
+ std::string Fullpath;
+ if (Index < Locations.size()) {
+ if (!Locations[Index].Dir.empty()) {
+ if (Locations[Index].Base.empty()) {
+ Fullpath = Locations[Index].Dir;
+ } else {
+ llvm::SmallString<64> Storage;
+ llvm::sys::path::append(Storage, Locations[Index].Dir,
+ Locations[Index].Base);
+ Fullpath.assign(Storage.begin(), Storage.end());
+ }
+ } else if (!Locations[Index].Base.empty())
+ Fullpath = Locations[Index].Base;
+ }
+ return Fullpath;
+}
+
+raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, const SourceLocation &SL) {
+ OS << SL.Name << " @ ";
+ if (!SL.Dir.empty()) {
+ OS << SL.Dir;
+ if (SL.Dir.contains('\\') and not SL.Dir.contains('/'))
+ OS << '\\';
+ else
+ OS << '/';
+ }
+ if (SL.Base.empty())
+ OS << "<invalid-file>";
+ else
+ OS << SL.Base;
+ OS << ':' << SL.Line;
+ return OS;
+}
+
+raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, const LookupResult &LR) {
+ OS << HEX64(LR.LookupAddr) << ": ";
+ auto NumLocations = LR.Locations.size();
+ for (size_t I = 0; I < NumLocations; ++I) {
+ if (I > 0) {
+ OS << '\n';
+ OS.indent(20);
+ }
+ const bool IsInlined = I + 1 != NumLocations;
+ OS << LR.Locations[I];
+ if (IsInlined)
+ OS << " [inlined]";
+ }
+ OS << '\n';
+ return OS;
+}
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/GSYM/Range.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/GSYM/Range.cpp
index 19ab700fdd57..f78101e49bf8 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/GSYM/Range.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/GSYM/Range.cpp
@@ -100,3 +100,15 @@ void AddressRanges::decode(DataExtractor &Data, uint64_t BaseAddr,
for (auto &Range : Ranges)
Range.decode(Data, BaseAddr, Offset);
}
+
+void AddressRange::skip(DataExtractor &Data, uint64_t &Offset) {
+ Data.getULEB128(&Offset);
+ Data.getULEB128(&Offset);
+}
+
+uint64_t AddressRanges::skip(DataExtractor &Data, uint64_t &Offset) {
+ uint64_t NumRanges = Data.getULEB128(&Offset);
+ for (uint64_t I=0; I<NumRanges; ++I)
+ AddressRange::skip(Data, Offset);
+ return NumRanges;
+}
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/DbiModuleDescriptor.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/DbiModuleDescriptor.cpp
index 5095efcdee3c..9755f2ca3bdc 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/DbiModuleDescriptor.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/DbiModuleDescriptor.cpp
@@ -18,13 +18,6 @@ using namespace llvm;
using namespace llvm::pdb;
using namespace llvm::support;
-DbiModuleDescriptor::DbiModuleDescriptor() = default;
-
-DbiModuleDescriptor::DbiModuleDescriptor(const DbiModuleDescriptor &Info) =
- default;
-
-DbiModuleDescriptor::~DbiModuleDescriptor() = default;
-
Error DbiModuleDescriptor::initialize(BinaryStreamRef Stream,
DbiModuleDescriptor &Info) {
BinaryStreamReader Reader(Stream);
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp
index b1a80cbc4580..2f3a2500c293 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp
@@ -118,21 +118,32 @@ DIPrinter &DIPrinter::operator<<(const DIGlobal &Global) {
}
DIPrinter &DIPrinter::operator<<(const DILocal &Local) {
- OS << Local.FunctionName << '\n';
- OS << Local.Name << '\n';
+ if (Local.FunctionName.empty())
+ OS << "??\n";
+ else
+ OS << Local.FunctionName << '\n';
+
+ if (Local.Name.empty())
+ OS << "??\n";
+ else
+ OS << Local.Name << '\n';
+
if (Local.DeclFile.empty())
OS << "??";
else
OS << Local.DeclFile;
OS << ':' << Local.DeclLine << '\n';
+
if (Local.FrameOffset)
OS << *Local.FrameOffset << ' ';
else
OS << "?? ";
+
if (Local.Size)
OS << *Local.Size << ' ';
else
OS << "?? ";
+
if (Local.TagOffset)
OS << *Local.TagOffset << '\n';
else
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp
index 49a78cf572e3..adbed4f78593 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp
@@ -284,6 +284,79 @@ bool darwinDsymMatchesBinary(const MachOObjectFile *DbgObj,
return !memcmp(dbg_uuid.data(), bin_uuid.data(), dbg_uuid.size());
}
+template <typename ELFT>
+Optional<ArrayRef<uint8_t>> getBuildID(const ELFFile<ELFT> *Obj) {
+ if (!Obj)
+ return {};
+ auto PhdrsOrErr = Obj->program_headers();
+ if (!PhdrsOrErr) {
+ consumeError(PhdrsOrErr.takeError());
+ return {};
+ }
+ for (const auto &P : *PhdrsOrErr) {
+ if (P.p_type != ELF::PT_NOTE)
+ continue;
+ Error Err = Error::success();
+ for (auto N : Obj->notes(P, Err))
+ if (N.getType() == ELF::NT_GNU_BUILD_ID && N.getName() == ELF::ELF_NOTE_GNU)
+ return N.getDesc();
+ }
+ return {};
+}
+
+Optional<ArrayRef<uint8_t>> getBuildID(const ELFObjectFileBase *Obj) {
+ Optional<ArrayRef<uint8_t>> BuildID;
+ if (auto *O = dyn_cast<ELFObjectFile<ELF32LE>>(Obj))
+ BuildID = getBuildID(O->getELFFile());
+ else if (auto *O = dyn_cast<ELFObjectFile<ELF32BE>>(Obj))
+ BuildID = getBuildID(O->getELFFile());
+ else if (auto *O = dyn_cast<ELFObjectFile<ELF64LE>>(Obj))
+ BuildID = getBuildID(O->getELFFile());
+ else if (auto *O = dyn_cast<ELFObjectFile<ELF64BE>>(Obj))
+ BuildID = getBuildID(O->getELFFile());
+ else
+ llvm_unreachable("unsupported file format");
+ return BuildID;
+}
+
+bool findDebugBinary(const std::vector<std::string> &DebugFileDirectory,
+ const ArrayRef<uint8_t> BuildID,
+ std::string &Result) {
+ auto getDebugPath = [&](StringRef Directory) {
+ SmallString<128> Path{Directory};
+ sys::path::append(Path, ".build-id",
+ llvm::toHex(BuildID[0], /*LowerCase=*/true),
+ llvm::toHex(BuildID.slice(1), /*LowerCase=*/true));
+ Path += ".debug";
+ return Path;
+ };
+ if (DebugFileDirectory.empty()) {
+ SmallString<128> Path = getDebugPath(
+#if defined(__NetBSD__)
+ // Try /usr/libdata/debug/.build-id/../...
+ "/usr/libdata/debug"
+#else
+ // Try /usr/lib/debug/.build-id/../...
+ "/usr/lib/debug"
+#endif
+ );
+ if (llvm::sys::fs::exists(Path)) {
+ Result = Path.str();
+ return true;
+ }
+ } else {
+ for (const auto &Directory : DebugFileDirectory) {
+ // Try <debug-file-directory>/.build-id/../...
+ SmallString<128> Path = getDebugPath(Directory);
+ if (llvm::sys::fs::exists(Path)) {
+ Result = Path.str();
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
} // end anonymous namespace
ObjectFile *LLVMSymbolizer::lookUpDsymFile(const std::string &ExePath,
@@ -335,6 +408,25 @@ ObjectFile *LLVMSymbolizer::lookUpDebuglinkObject(const std::string &Path,
return DbgObjOrErr.get();
}
+ObjectFile *LLVMSymbolizer::lookUpBuildIDObject(const std::string &Path,
+ const ELFObjectFileBase *Obj,
+ const std::string &ArchName) {
+ auto BuildID = getBuildID(Obj);
+ if (!BuildID)
+ return nullptr;
+ if (BuildID->size() < 2)
+ return nullptr;
+ std::string DebugBinaryPath;
+ if (!findDebugBinary(Opts.DebugFileDirectory, *BuildID, DebugBinaryPath))
+ return nullptr;
+ auto DbgObjOrErr = getOrCreateObject(DebugBinaryPath, ArchName);
+ if (!DbgObjOrErr) {
+ consumeError(DbgObjOrErr.takeError());
+ return nullptr;
+ }
+ return DbgObjOrErr.get();
+}
+
Expected<LLVMSymbolizer::ObjectPair>
LLVMSymbolizer::getOrCreateObjectPair(const std::string &Path,
const std::string &ArchName) {
@@ -355,6 +447,8 @@ LLVMSymbolizer::getOrCreateObjectPair(const std::string &Path,
if (auto MachObj = dyn_cast<const MachOObjectFile>(Obj))
DbgObj = lookUpDsymFile(Path, MachObj, ArchName);
+ else if (auto ELFObj = dyn_cast<const ELFObjectFileBase>(Obj))
+ DbgObj = lookUpBuildIDObject(Path, ELFObj, ArchName);
if (!DbgObj)
DbgObj = lookUpDebuglinkObject(Path, Obj, ArchName);
if (!DbgObj)
diff --git a/contrib/llvm-project/llvm/lib/Demangle/ItaniumDemangle.cpp b/contrib/llvm-project/llvm/lib/Demangle/ItaniumDemangle.cpp
index 760d28b3ab9d..e112d5c5ec77 100644
--- a/contrib/llvm-project/llvm/lib/Demangle/ItaniumDemangle.cpp
+++ b/contrib/llvm-project/llvm/lib/Demangle/ItaniumDemangle.cpp
@@ -89,14 +89,6 @@ struct DumpVisitor {
else
printStr("<null>");
}
- void print(NodeOrString NS) {
- if (NS.isNode())
- print(NS.asNode());
- else if (NS.isString())
- print(NS.asString());
- else
- printStr("NodeOrString()");
- }
void print(NodeArray A) {
++Depth;
printStr("{");
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/ExecutionEngineBindings.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/ExecutionEngineBindings.cpp
index c741fe2b3778..ff1e8050c7e7 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/ExecutionEngineBindings.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/ExecutionEngineBindings.cpp
@@ -183,13 +183,13 @@ LLVMBool LLVMCreateMCJITCompilerForModule(
std::unique_ptr<Module> Mod(unwrap(M));
if (Mod)
- // Set function attribute "no-frame-pointer-elim" based on
+ // Set function attribute "frame-pointer" based on
// NoFramePointerElim.
for (auto &F : *Mod) {
auto Attrs = F.getAttributes();
- StringRef Value(options.NoFramePointerElim ? "true" : "false");
+ StringRef Value = options.NoFramePointerElim ? "all" : "none";
Attrs = Attrs.addAttribute(F.getContext(), AttributeList::FunctionIndex,
- "no-frame-pointer-elim", Value);
+ "frame-pointer", Value);
F.setAttributes(Attrs);
}
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/EHFrameSupport.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/EHFrameSupport.cpp
index f80b0e7f8909..f1114e92c360 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/EHFrameSupport.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/EHFrameSupport.cpp
@@ -17,174 +17,281 @@
namespace llvm {
namespace jitlink {
-EHFrameBinaryParser::EHFrameBinaryParser(JITTargetAddress EHFrameAddress,
- StringRef EHFrameContent,
- unsigned PointerSize,
- support::endianness Endianness)
- : EHFrameAddress(EHFrameAddress), EHFrameContent(EHFrameContent),
- PointerSize(PointerSize), EHFrameReader(EHFrameContent, Endianness) {}
+EHFrameSplitter::EHFrameSplitter(StringRef EHFrameSectionName)
+ : EHFrameSectionName(EHFrameSectionName) {}
-Error EHFrameBinaryParser::addToGraph() {
- while (!EHFrameReader.empty()) {
- size_t RecordOffset = EHFrameReader.getOffset();
+Error EHFrameSplitter::operator()(LinkGraph &G) {
+ auto *EHFrame = G.findSectionByName(EHFrameSectionName);
+ if (!EHFrame) {
LLVM_DEBUG({
- dbgs() << "Processing eh-frame record at "
- << format("0x%016" PRIx64, EHFrameAddress + RecordOffset)
- << " (offset " << RecordOffset << ")\n";
+ dbgs() << "EHFrameSplitter: No " << EHFrameSectionName
+ << " section. Nothing to do\n";
});
+ return Error::success();
+ }
- size_t RecordLength = 0;
- uint32_t RecordLengthField;
- if (auto Err = EHFrameReader.readInteger(RecordLengthField))
+ LLVM_DEBUG({
+ dbgs() << "EHFrameSplitter: Processing " << EHFrameSectionName << "...\n";
+ });
+
+ DenseMap<Block *, LinkGraph::SplitBlockCache> Caches;
+
+ {
+ // Pre-build the split caches.
+ for (auto *B : EHFrame->blocks())
+ Caches[B] = LinkGraph::SplitBlockCache::value_type();
+ for (auto *Sym : EHFrame->symbols())
+ Caches[&Sym->getBlock()]->push_back(Sym);
+ for (auto *B : EHFrame->blocks())
+ llvm::sort(*Caches[B], [](const Symbol *LHS, const Symbol *RHS) {
+ return LHS->getOffset() > RHS->getOffset();
+ });
+ }
+
+ // Iterate over blocks (we do this by iterating over Caches entries rather
+ // than EHFrame->blocks() as we will be inserting new blocks along the way,
+ // which would invalidate iterators in the latter sequence.
+ for (auto &KV : Caches) {
+ auto &B = *KV.first;
+ auto &BCache = KV.second;
+ if (auto Err = processBlock(G, B, BCache))
return Err;
+ }
- // Process CIE/FDE length/extended-length fields to build the blocks.
- //
- // The value of these fields describe the length of the *rest* of the CIE
- // (not including data up to the end of the field itself) so we have to
- // bump RecordLength to include the data up to the end of the field: 4 bytes
- // for Length, or 12 bytes (4 bytes + 8 bytes) for ExtendedLength.
- if (RecordLengthField == 0) // Length 0 means end of __eh_frame section.
- break;
+ return Error::success();
+}
- // If the regular length field's value is 0xffffffff, use extended length.
- if (RecordLengthField == 0xffffffff) {
- uint64_t ExtendedLengthField;
- if (auto Err = EHFrameReader.readInteger(ExtendedLengthField))
- return Err;
- if (ExtendedLengthField > EHFrameReader.bytesRemaining())
- return make_error<JITLinkError>("CIE record extends past the end of "
- "the __eh_frame section");
- if (ExtendedLengthField + 12 > std::numeric_limits<size_t>::max())
- return make_error<JITLinkError>("CIE record too large to process");
- RecordLength = ExtendedLengthField + 12;
- } else {
- if (RecordLengthField > EHFrameReader.bytesRemaining())
- return make_error<JITLinkError>("CIE record extends past the end of "
- "the __eh_frame section");
- RecordLength = RecordLengthField + 4;
- }
+Error EHFrameSplitter::processBlock(LinkGraph &G, Block &B,
+ LinkGraph::SplitBlockCache &Cache) {
+ LLVM_DEBUG({
+ dbgs() << " Processing block at " << formatv("{0:x16}", B.getAddress())
+ << "\n";
+ });
- LLVM_DEBUG(dbgs() << " length: " << RecordLength << "\n");
+ // eh-frame should not contain zero-fill blocks.
+ if (B.isZeroFill())
+ return make_error<JITLinkError>("Unexpected zero-fill block in " +
+ EHFrameSectionName + " section");
- // Read the CIE Pointer.
- size_t CIEPointerAddress = EHFrameAddress + EHFrameReader.getOffset();
- uint32_t CIEPointer;
- if (auto Err = EHFrameReader.readInteger(CIEPointer))
- return Err;
+ if (B.getSize() == 0) {
+ LLVM_DEBUG(dbgs() << " Block is empty. Skipping.\n");
+ return Error::success();
+ }
+
+ BinaryStreamReader BlockReader(B.getContent(), G.getEndianness());
- // Based on the CIE pointer value, parse this as a CIE or FDE record.
- if (CIEPointer == 0) {
- if (auto Err = processCIE(RecordOffset, RecordLength))
+ while (true) {
+ uint64_t RecordStartOffset = BlockReader.getOffset();
+
+ LLVM_DEBUG({
+ dbgs() << " Processing CFI record at "
+ << formatv("{0:x16}", B.getAddress()) << "\n";
+ });
+
+ uint32_t Length;
+ if (auto Err = BlockReader.readInteger(Length))
+ return Err;
+ if (Length != 0xffffffff) {
+ if (auto Err = BlockReader.skip(Length))
return Err;
} else {
- if (auto Err = processFDE(RecordOffset, RecordLength, CIEPointerAddress,
- CIEPointer))
+ uint64_t ExtendedLength;
+ if (auto Err = BlockReader.readInteger(ExtendedLength))
+ return Err;
+ if (auto Err = BlockReader.skip(ExtendedLength))
return Err;
}
- EHFrameReader.setOffset(RecordOffset + RecordLength);
+ // If this was the last block then there's nothing to split
+ if (BlockReader.empty()) {
+ LLVM_DEBUG(dbgs() << " Extracted " << B << "\n");
+ return Error::success();
+ }
+
+ uint64_t BlockSize = BlockReader.getOffset() - RecordStartOffset;
+ auto &NewBlock = G.splitBlock(B, BlockSize);
+ (void)NewBlock;
+ LLVM_DEBUG(dbgs() << " Extracted " << NewBlock << "\n");
+ }
+}
+
+EHFrameEdgeFixer::EHFrameEdgeFixer(StringRef EHFrameSectionName,
+ Edge::Kind FDEToCIE, Edge::Kind FDEToPCBegin,
+ Edge::Kind FDEToLSDA)
+ : EHFrameSectionName(EHFrameSectionName), FDEToCIE(FDEToCIE),
+ FDEToPCBegin(FDEToPCBegin), FDEToLSDA(FDEToLSDA) {}
+
+Error EHFrameEdgeFixer::operator()(LinkGraph &G) {
+ auto *EHFrame = G.findSectionByName(EHFrameSectionName);
+
+ if (!EHFrame) {
+ LLVM_DEBUG({
+ dbgs() << "EHFrameEdgeFixer: No " << EHFrameSectionName
+ << " section. Nothing to do\n";
+ });
+ return Error::success();
+ }
+
+ LLVM_DEBUG({
+ dbgs() << "EHFrameEdgeFixer: Processing " << EHFrameSectionName << "...\n";
+ });
+
+ ParseContext PC(G);
+
+ // Build a map of all blocks and symbols in the text sections. We will use
+ // these for finding / building edge targets when processing FDEs.
+ for (auto &Sec : G.sections()) {
+ PC.AddrToSyms.addSymbols(Sec.symbols());
+ if (auto Err = PC.AddrToBlock.addBlocks(Sec.blocks(),
+ BlockAddressMap::includeNonNull))
+ return Err;
}
+ // Sort eh-frame blocks into address order to ensure we visit CIEs before
+ // their child FDEs.
+ std::vector<Block *> EHFrameBlocks;
+ for (auto *B : EHFrame->blocks())
+ EHFrameBlocks.push_back(B);
+ llvm::sort(EHFrameBlocks, [](const Block *LHS, const Block *RHS) {
+ return LHS->getAddress() < RHS->getAddress();
+ });
+
+ // Loop over the blocks in address order.
+ for (auto *B : EHFrameBlocks)
+ if (auto Err = processBlock(PC, *B))
+ return Err;
+
return Error::success();
}
-void EHFrameBinaryParser::anchor() {}
+Error EHFrameEdgeFixer::processBlock(ParseContext &PC, Block &B) {
-Expected<EHFrameBinaryParser::AugmentationInfo>
-EHFrameBinaryParser::parseAugmentationString() {
- AugmentationInfo AugInfo;
- uint8_t NextChar;
- uint8_t *NextField = &AugInfo.Fields[0];
+ LLVM_DEBUG({
+ dbgs() << " Processing block at " << formatv("{0:x16}", B.getAddress())
+ << "\n";
+ });
- if (auto Err = EHFrameReader.readInteger(NextChar))
- return std::move(Err);
+ // eh-frame should not contain zero-fill blocks.
+ if (B.isZeroFill())
+ return make_error<JITLinkError>("Unexpected zero-fill block in " +
+ EHFrameSectionName + " section");
- while (NextChar != 0) {
- switch (NextChar) {
- case 'z':
- AugInfo.AugmentationDataPresent = true;
- break;
- case 'e':
- if (auto Err = EHFrameReader.readInteger(NextChar))
- return std::move(Err);
- if (NextChar != 'h')
- return make_error<JITLinkError>("Unrecognized substring e" +
- Twine(NextChar) +
- " in augmentation string");
- AugInfo.EHDataFieldPresent = true;
- break;
- case 'L':
- case 'P':
- case 'R':
- *NextField++ = NextChar;
- break;
- default:
- return make_error<JITLinkError>("Unrecognized character " +
- Twine(NextChar) +
- " in augmentation string");
+ if (B.getSize() == 0) {
+ LLVM_DEBUG(dbgs() << " Block is empty. Skipping.\n");
+ return Error::success();
+ }
+
+ // Find the offsets of any existing edges from this block.
+ BlockEdgeMap BlockEdges;
+ for (auto &E : B.edges())
+ if (E.isRelocation()) {
+ if (BlockEdges.count(E.getOffset()))
+ return make_error<JITLinkError>(
+ "Multiple relocations at offset " +
+ formatv("{0:x16}", E.getOffset()) + " in " + EHFrameSectionName +
+ " block at address " + formatv("{0:x16}", B.getAddress()));
+
+ BlockEdges[E.getOffset()] = EdgeTarget(E);
}
- if (auto Err = EHFrameReader.readInteger(NextChar))
- return std::move(Err);
- }
+ CIEInfosMap CIEInfos;
+ BinaryStreamReader BlockReader(B.getContent(), PC.G.getEndianness());
+ while (!BlockReader.empty()) {
+ size_t RecordStartOffset = BlockReader.getOffset();
- return std::move(AugInfo);
-}
+ LLVM_DEBUG({
+ dbgs() << " Processing CFI record at "
+ << formatv("{0:x16}", B.getAddress() + RecordStartOffset) << "\n";
+ });
-Expected<JITTargetAddress> EHFrameBinaryParser::readAbsolutePointer() {
- static_assert(sizeof(JITTargetAddress) == sizeof(uint64_t),
- "Result must be able to hold a uint64_t");
- JITTargetAddress Addr;
- if (PointerSize == 8) {
- if (auto Err = EHFrameReader.readInteger(Addr))
- return std::move(Err);
- } else if (PointerSize == 4) {
- uint32_t Addr32;
- if (auto Err = EHFrameReader.readInteger(Addr32))
- return std::move(Err);
- Addr = Addr32;
- } else
- llvm_unreachable("Pointer size is not 32-bit or 64-bit");
- return Addr;
+ // Get the record length.
+ size_t RecordRemaining;
+ {
+ uint32_t Length;
+ if (auto Err = BlockReader.readInteger(Length))
+ return Err;
+ // If Length < 0xffffffff then use the regular length field, otherwise
+ // read the extended length field.
+ if (Length != 0xffffffff)
+ RecordRemaining = Length;
+ else {
+ uint64_t ExtendedLength;
+ if (auto Err = BlockReader.readInteger(ExtendedLength))
+ return Err;
+ RecordRemaining = ExtendedLength;
+ }
+ }
+
+ if (BlockReader.bytesRemaining() < RecordRemaining)
+ return make_error<JITLinkError>(
+ "Incomplete CFI record at " +
+ formatv("{0:x16}", B.getAddress() + RecordStartOffset));
+
+ // Read the CIE delta for this record.
+ uint64_t CIEDeltaFieldOffset = BlockReader.getOffset() - RecordStartOffset;
+ uint32_t CIEDelta;
+ if (auto Err = BlockReader.readInteger(CIEDelta))
+ return Err;
+
+ if (CIEDelta == 0) {
+ if (auto Err = processCIE(PC, B, RecordStartOffset,
+ CIEDeltaFieldOffset + RecordRemaining,
+ CIEDeltaFieldOffset))
+ return Err;
+ } else {
+ if (auto Err = processFDE(PC, B, RecordStartOffset,
+ CIEDeltaFieldOffset + RecordRemaining,
+ CIEDeltaFieldOffset, CIEDelta, BlockEdges))
+ return Err;
+ }
+
+ // Move to the next record.
+ BlockReader.setOffset(RecordStartOffset + CIEDeltaFieldOffset +
+ RecordRemaining);
+ }
+
+ return Error::success();
}
-Error EHFrameBinaryParser::processCIE(size_t RecordOffset,
- size_t RecordLength) {
- // Use the dwarf namespace for convenient access to pointer encoding
- // constants.
+Error EHFrameEdgeFixer::processCIE(ParseContext &PC, Block &B,
+ size_t RecordOffset, size_t RecordLength,
+ size_t CIEDeltaFieldOffset) {
using namespace dwarf;
- LLVM_DEBUG(dbgs() << " Record is CIE\n");
+ LLVM_DEBUG(dbgs() << " Record is CIE\n");
- auto &CIESymbol =
- createCIERecord(EHFrameAddress + RecordOffset,
- EHFrameContent.substr(RecordOffset, RecordLength));
+ auto RecordContent = B.getContent().substr(RecordOffset, RecordLength);
+ BinaryStreamReader RecordReader(RecordContent, PC.G.getEndianness());
+
+ // Skip past the CIE delta field: we've already processed this far.
+ RecordReader.setOffset(CIEDeltaFieldOffset + 4);
+ auto &CIESymbol =
+ PC.G.addAnonymousSymbol(B, RecordOffset, RecordLength, false, false);
CIEInformation CIEInfo(CIESymbol);
uint8_t Version = 0;
- if (auto Err = EHFrameReader.readInteger(Version))
+ if (auto Err = RecordReader.readInteger(Version))
return Err;
if (Version != 0x01)
return make_error<JITLinkError>("Bad CIE version " + Twine(Version) +
" (should be 0x01) in eh-frame");
- auto AugInfo = parseAugmentationString();
+ auto AugInfo = parseAugmentationString(RecordReader);
if (!AugInfo)
return AugInfo.takeError();
// Skip the EH Data field if present.
if (AugInfo->EHDataFieldPresent)
- if (auto Err = EHFrameReader.skip(PointerSize))
+ if (auto Err = RecordReader.skip(PC.G.getPointerSize()))
return Err;
// Read and sanity check the code alignment factor.
{
uint64_t CodeAlignmentFactor = 0;
- if (auto Err = EHFrameReader.readULEB128(CodeAlignmentFactor))
+ if (auto Err = RecordReader.readULEB128(CodeAlignmentFactor))
return Err;
if (CodeAlignmentFactor != 1)
return make_error<JITLinkError>("Unsupported CIE code alignment factor " +
@@ -195,7 +302,7 @@ Error EHFrameBinaryParser::processCIE(size_t RecordOffset,
// Read and sanity check the data alignment factor.
{
int64_t DataAlignmentFactor = 0;
- if (auto Err = EHFrameReader.readSLEB128(DataAlignmentFactor))
+ if (auto Err = RecordReader.readSLEB128(DataAlignmentFactor))
return Err;
if (DataAlignmentFactor != -8)
return make_error<JITLinkError>("Unsupported CIE data alignment factor " +
@@ -204,14 +311,14 @@ Error EHFrameBinaryParser::processCIE(size_t RecordOffset,
}
// Skip the return address register field.
- if (auto Err = EHFrameReader.skip(1))
+ if (auto Err = RecordReader.skip(1))
return Err;
uint64_t AugmentationDataLength = 0;
- if (auto Err = EHFrameReader.readULEB128(AugmentationDataLength))
+ if (auto Err = RecordReader.readULEB128(AugmentationDataLength))
return Err;
- uint32_t AugmentationDataStartOffset = EHFrameReader.getOffset();
+ uint32_t AugmentationDataStartOffset = RecordReader.getOffset();
uint8_t *NextField = &AugInfo->Fields[0];
while (uint8_t Field = *NextField++) {
@@ -219,7 +326,7 @@ Error EHFrameBinaryParser::processCIE(size_t RecordOffset,
case 'L': {
CIEInfo.FDEsHaveLSDAField = true;
uint8_t LSDAPointerEncoding;
- if (auto Err = EHFrameReader.readInteger(LSDAPointerEncoding))
+ if (auto Err = RecordReader.readInteger(LSDAPointerEncoding))
return Err;
if (LSDAPointerEncoding != (DW_EH_PE_pcrel | DW_EH_PE_absptr))
return make_error<JITLinkError>(
@@ -230,7 +337,7 @@ Error EHFrameBinaryParser::processCIE(size_t RecordOffset,
}
case 'P': {
uint8_t PersonalityPointerEncoding = 0;
- if (auto Err = EHFrameReader.readInteger(PersonalityPointerEncoding))
+ if (auto Err = RecordReader.readInteger(PersonalityPointerEncoding))
return Err;
if (PersonalityPointerEncoding !=
(DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4))
@@ -240,13 +347,13 @@ Error EHFrameBinaryParser::processCIE(size_t RecordOffset,
formatv("{0:x2}", PersonalityPointerEncoding) + " in CIE at " +
formatv("{0:x16}", CIESymbol.getAddress()));
uint32_t PersonalityPointerAddress;
- if (auto Err = EHFrameReader.readInteger(PersonalityPointerAddress))
+ if (auto Err = RecordReader.readInteger(PersonalityPointerAddress))
return Err;
break;
}
case 'R': {
uint8_t FDEPointerEncoding;
- if (auto Err = EHFrameReader.readInteger(FDEPointerEncoding))
+ if (auto Err = RecordReader.readInteger(FDEPointerEncoding))
return Err;
if (FDEPointerEncoding != (DW_EH_PE_pcrel | DW_EH_PE_absptr))
return make_error<JITLinkError>(
@@ -261,107 +368,265 @@ Error EHFrameBinaryParser::processCIE(size_t RecordOffset,
}
}
- if (EHFrameReader.getOffset() - AugmentationDataStartOffset >
+ if (RecordReader.getOffset() - AugmentationDataStartOffset >
AugmentationDataLength)
return make_error<JITLinkError>("Read past the end of the augmentation "
"data while parsing fields");
- assert(!CIEInfos.count(CIESymbol.getAddress()) &&
+ assert(!PC.CIEInfos.count(CIESymbol.getAddress()) &&
"Multiple CIEs recorded at the same address?");
- CIEInfos[CIESymbol.getAddress()] = std::move(CIEInfo);
+ PC.CIEInfos[CIESymbol.getAddress()] = std::move(CIEInfo);
return Error::success();
}
-Error EHFrameBinaryParser::processFDE(size_t RecordOffset, size_t RecordLength,
- JITTargetAddress CIEPointerAddress,
- uint32_t CIEPointer) {
- LLVM_DEBUG(dbgs() << " Record is FDE\n");
+Error EHFrameEdgeFixer::processFDE(ParseContext &PC, Block &B,
+ size_t RecordOffset, size_t RecordLength,
+ size_t CIEDeltaFieldOffset,
+ uint32_t CIEDelta,
+ BlockEdgeMap &BlockEdges) {
+ LLVM_DEBUG(dbgs() << " Record is FDE\n");
- LLVM_DEBUG({
- dbgs() << " CIE pointer: "
- << format("0x%016" PRIx64, CIEPointerAddress - CIEPointer) << "\n";
- });
-
- auto CIEInfoItr = CIEInfos.find(CIEPointerAddress - CIEPointer);
- if (CIEInfoItr == CIEInfos.end())
- return make_error<JITLinkError>(
- "FDE at " + formatv("{0:x16}", EHFrameAddress + RecordOffset) +
- " points to non-existant CIE at " +
- formatv("{0:x16}", CIEPointerAddress - CIEPointer));
- auto &CIEInfo = CIEInfoItr->second;
+ JITTargetAddress RecordAddress = B.getAddress() + RecordOffset;
- // Read and sanity check the PC-start pointer and size.
- JITTargetAddress PCBeginAddress = EHFrameAddress + EHFrameReader.getOffset();
+ auto RecordContent = B.getContent().substr(RecordOffset, RecordLength);
+ BinaryStreamReader RecordReader(RecordContent, PC.G.getEndianness());
- auto PCBeginDelta = readAbsolutePointer();
- if (!PCBeginDelta)
- return PCBeginDelta.takeError();
+ // Skip past the CIE delta field: we've already read this far.
+ RecordReader.setOffset(CIEDeltaFieldOffset + 4);
- JITTargetAddress PCBegin = PCBeginAddress + *PCBeginDelta;
- LLVM_DEBUG({
- dbgs() << " PC begin: " << format("0x%016" PRIx64, PCBegin) << "\n";
- });
+ auto &FDESymbol =
+ PC.G.addAnonymousSymbol(B, RecordOffset, RecordLength, false, false);
- auto *TargetSymbol = getSymbolAtAddress(PCBegin);
+ CIEInformation *CIEInfo = nullptr;
- if (!TargetSymbol)
- return make_error<JITLinkError>("FDE PC-begin " +
- formatv("{0:x16}", PCBegin) +
- " does not point at symbol");
+ {
+ // Process the CIE pointer field.
+ auto CIEEdgeItr = BlockEdges.find(RecordOffset + CIEDeltaFieldOffset);
+ JITTargetAddress CIEAddress =
+ RecordAddress + CIEDeltaFieldOffset - CIEDelta;
+ if (CIEEdgeItr == BlockEdges.end()) {
+
+ LLVM_DEBUG({
+ dbgs() << " Adding edge at "
+ << formatv("{0:x16}", RecordAddress + CIEDeltaFieldOffset)
+ << " to CIE at: " << formatv("{0:x16}", CIEAddress) << "\n";
+ });
+ if (auto CIEInfoOrErr = PC.findCIEInfo(CIEAddress))
+ CIEInfo = *CIEInfoOrErr;
+ else
+ return CIEInfoOrErr.takeError();
+ assert(CIEInfo->CIESymbol && "CIEInfo has no CIE symbol set");
+ B.addEdge(FDEToCIE, RecordOffset + CIEDeltaFieldOffset,
+ *CIEInfo->CIESymbol, 0);
+ } else {
+ LLVM_DEBUG({
+ dbgs() << " Already has edge at "
+ << formatv("{0:x16}", RecordAddress + CIEDeltaFieldOffset)
+ << " to CIE at " << formatv("{0:x16}", CIEAddress) << "\n";
+ });
+ auto &EI = CIEEdgeItr->second;
+ if (EI.Addend)
+ return make_error<JITLinkError>(
+ "CIE edge at " +
+ formatv("{0:x16}", RecordAddress + CIEDeltaFieldOffset) +
+ " has non-zero addend");
+ if (auto CIEInfoOrErr = PC.findCIEInfo(EI.Target->getAddress()))
+ CIEInfo = *CIEInfoOrErr;
+ else
+ return CIEInfoOrErr.takeError();
+ }
+ }
- if (TargetSymbol->getAddress() != PCBegin)
- return make_error<JITLinkError>(
- "FDE PC-begin " + formatv("{0:x16}", PCBegin) +
- " does not point to start of symbol at " +
- formatv("{0:x16}", TargetSymbol->getAddress()));
+ {
+ // Process the PC-Begin field.
+ Block *PCBeginBlock = nullptr;
+ JITTargetAddress PCBeginFieldOffset = RecordReader.getOffset();
+ auto PCEdgeItr = BlockEdges.find(RecordOffset + PCBeginFieldOffset);
+ if (PCEdgeItr == BlockEdges.end()) {
+ auto PCBeginDelta = readAbsolutePointer(PC.G, RecordReader);
+ if (!PCBeginDelta)
+ return PCBeginDelta.takeError();
+ JITTargetAddress PCBegin =
+ RecordAddress + PCBeginFieldOffset + *PCBeginDelta;
+ LLVM_DEBUG({
+ dbgs() << " Adding edge at "
+ << formatv("{0:x16}", RecordAddress + PCBeginFieldOffset)
+ << " to PC at " << formatv("{0:x16}", PCBegin) << "\n";
+ });
+ auto PCBeginSym = getOrCreateSymbol(PC, PCBegin);
+ if (!PCBeginSym)
+ return PCBeginSym.takeError();
+ B.addEdge(FDEToPCBegin, RecordOffset + PCBeginFieldOffset, *PCBeginSym,
+ 0);
+ PCBeginBlock = &PCBeginSym->getBlock();
+ } else {
+ auto &EI = PCEdgeItr->second;
+ LLVM_DEBUG({
+ dbgs() << " Already has edge at "
+ << formatv("{0:x16}", RecordAddress + PCBeginFieldOffset)
+ << " to PC at " << formatv("{0:x16}", EI.Target->getAddress());
+ if (EI.Addend)
+ dbgs() << " + " << formatv("{0:x16}", EI.Addend);
+ dbgs() << "\n";
+ });
+
+ // Make sure the existing edge points at a defined block.
+ if (!EI.Target->isDefined()) {
+ auto EdgeAddr = RecordAddress + PCBeginFieldOffset;
+ return make_error<JITLinkError>("FDE edge at " +
+ formatv("{0:x16}", EdgeAddr) +
+ " points at external block");
+ }
+ PCBeginBlock = &EI.Target->getBlock();
+ if (auto Err = RecordReader.skip(PC.G.getPointerSize()))
+ return Err;
+ }
- LLVM_DEBUG(dbgs() << " FDE target: " << *TargetSymbol << "\n");
+ // Add a keep-alive edge from the FDE target to the FDE to ensure that the
+ // FDE is kept alive if its target is.
+ assert(PCBeginBlock && "PC-begin block not recorded");
+ PCBeginBlock->addEdge(Edge::KeepAlive, 0, FDESymbol, 0);
+ }
// Skip over the PC range size field.
- if (auto Err = EHFrameReader.skip(PointerSize))
+ if (auto Err = RecordReader.skip(PC.G.getPointerSize()))
return Err;
- Symbol *LSDASymbol = nullptr;
- JITTargetAddress LSDAAddress = 0;
- if (CIEInfo.FDEsHaveLSDAField) {
+ if (CIEInfo->FDEsHaveLSDAField) {
uint64_t AugmentationDataSize;
- if (auto Err = EHFrameReader.readULEB128(AugmentationDataSize))
+ if (auto Err = RecordReader.readULEB128(AugmentationDataSize))
return Err;
- if (AugmentationDataSize != PointerSize)
+ if (AugmentationDataSize != PC.G.getPointerSize())
return make_error<JITLinkError>(
"Unexpected FDE augmentation data size (expected " +
- Twine(PointerSize) + ", got " + Twine(AugmentationDataSize) +
- ") for FDE at " + formatv("{0:x16}", EHFrameAddress + RecordOffset));
- LSDAAddress = EHFrameAddress + EHFrameReader.getOffset();
- auto LSDADelta = readAbsolutePointer();
- if (!LSDADelta)
- return LSDADelta.takeError();
+ Twine(PC.G.getPointerSize()) + ", got " +
+ Twine(AugmentationDataSize) + ") for FDE at " +
+ formatv("{0:x16}", RecordAddress));
+
+ JITTargetAddress LSDAFieldOffset = RecordReader.getOffset();
+ auto LSDAEdgeItr = BlockEdges.find(RecordOffset + LSDAFieldOffset);
+ if (LSDAEdgeItr == BlockEdges.end()) {
+ auto LSDADelta = readAbsolutePointer(PC.G, RecordReader);
+ if (!LSDADelta)
+ return LSDADelta.takeError();
+ JITTargetAddress LSDA = RecordAddress + LSDAFieldOffset + *LSDADelta;
+ auto LSDASym = getOrCreateSymbol(PC, LSDA);
+ if (!LSDASym)
+ return LSDASym.takeError();
+ LLVM_DEBUG({
+ dbgs() << " Adding edge at "
+ << formatv("{0:x16}", RecordAddress + LSDAFieldOffset)
+ << " to LSDA at " << formatv("{0:x16}", LSDA) << "\n";
+ });
+ B.addEdge(FDEToLSDA, RecordOffset + LSDAFieldOffset, *LSDASym, 0);
+ } else {
+ LLVM_DEBUG({
+ auto &EI = LSDAEdgeItr->second;
+ dbgs() << " Already has edge at "
+ << formatv("{0:x16}", RecordAddress + LSDAFieldOffset)
+ << " to LSDA at " << formatv("{0:x16}", EI.Target->getAddress());
+ if (EI.Addend)
+ dbgs() << " + " << formatv("{0:x16}", EI.Addend);
+ dbgs() << "\n";
+ });
+ if (auto Err = RecordReader.skip(PC.G.getPointerSize()))
+ return Err;
+ }
+ } else {
+ LLVM_DEBUG(dbgs() << " Record does not have LSDA field.\n");
+ }
- JITTargetAddress LSDA = LSDAAddress + *LSDADelta;
+ return Error::success();
+}
- LSDASymbol = getSymbolAtAddress(LSDA);
+Expected<EHFrameEdgeFixer::AugmentationInfo>
+EHFrameEdgeFixer::parseAugmentationString(BinaryStreamReader &RecordReader) {
+ AugmentationInfo AugInfo;
+ uint8_t NextChar;
+ uint8_t *NextField = &AugInfo.Fields[0];
- if (!LSDASymbol)
- return make_error<JITLinkError>("FDE LSDA " + formatv("{0:x16}", LSDA) +
- " does not point at symbol");
+ if (auto Err = RecordReader.readInteger(NextChar))
+ return std::move(Err);
- if (LSDASymbol->getAddress() != LSDA)
- return make_error<JITLinkError>(
- "FDE LSDA " + formatv("{0:x16}", LSDA) +
- " does not point to start of symbol at " +
- formatv("{0:x16}", LSDASymbol->getAddress()));
+ while (NextChar != 0) {
+ switch (NextChar) {
+ case 'z':
+ AugInfo.AugmentationDataPresent = true;
+ break;
+ case 'e':
+ if (auto Err = RecordReader.readInteger(NextChar))
+ return std::move(Err);
+ if (NextChar != 'h')
+ return make_error<JITLinkError>("Unrecognized substring e" +
+ Twine(NextChar) +
+ " in augmentation string");
+ AugInfo.EHDataFieldPresent = true;
+ break;
+ case 'L':
+ case 'P':
+ case 'R':
+ *NextField++ = NextChar;
+ break;
+ default:
+ return make_error<JITLinkError>("Unrecognized character " +
+ Twine(NextChar) +
+ " in augmentation string");
+ }
- LLVM_DEBUG(dbgs() << " FDE LSDA: " << *LSDASymbol << "\n");
+ if (auto Err = RecordReader.readInteger(NextChar))
+ return std::move(Err);
}
- JITTargetAddress RecordAddress = EHFrameAddress + RecordOffset;
- auto FDESymbol = createFDERecord(
- RecordAddress, EHFrameContent.substr(RecordOffset, RecordLength),
- *CIEInfo.CIESymbol, CIEPointerAddress - RecordAddress, *TargetSymbol,
- PCBeginAddress - RecordAddress, LSDASymbol, LSDAAddress - RecordAddress);
+ return std::move(AugInfo);
+}
+
+Expected<JITTargetAddress>
+EHFrameEdgeFixer::readAbsolutePointer(LinkGraph &G,
+ BinaryStreamReader &RecordReader) {
+ static_assert(sizeof(JITTargetAddress) == sizeof(uint64_t),
+ "Result must be able to hold a uint64_t");
+ JITTargetAddress Addr;
+ if (G.getPointerSize() == 8) {
+ if (auto Err = RecordReader.readInteger(Addr))
+ return std::move(Err);
+ } else if (G.getPointerSize() == 4) {
+ uint32_t Addr32;
+ if (auto Err = RecordReader.readInteger(Addr32))
+ return std::move(Err);
+ Addr = Addr32;
+ } else
+ llvm_unreachable("Pointer size is not 32-bit or 64-bit");
+ return Addr;
+}
+
+Expected<Symbol &> EHFrameEdgeFixer::getOrCreateSymbol(ParseContext &PC,
+ JITTargetAddress Addr) {
+ Symbol *CanonicalSym = nullptr;
+
+ auto UpdateCanonicalSym = [&](Symbol *Sym) {
+ if (!CanonicalSym || Sym->getLinkage() < CanonicalSym->getLinkage() ||
+ Sym->getScope() < CanonicalSym->getScope() ||
+ (Sym->hasName() && !CanonicalSym->hasName()) ||
+ Sym->getName() < CanonicalSym->getName())
+ CanonicalSym = Sym;
+ };
+
+ if (auto *SymbolsAtAddr = PC.AddrToSyms.getSymbolsAt(Addr))
+ for (auto *Sym : *SymbolsAtAddr)
+ UpdateCanonicalSym(Sym);
+
+ // If we found an existing symbol at the given address then use it.
+ if (CanonicalSym)
+ return *CanonicalSym;
- return FDESymbol.takeError();
+ // Otherwise search for a block covering the address and create a new symbol.
+ auto *B = PC.AddrToBlock.getBlockCovering(Addr);
+ if (!B)
+ return make_error<JITLinkError>("No symbol or block covering address " +
+ formatv("{0:x16}", Addr));
+
+ return PC.G.addAnonymousSymbol(*B, Addr - B->getAddress(), 0, false, false);
}
// Determine whether we can register EH tables.
@@ -444,9 +709,6 @@ Error walkAppleEHFrameSection(const char *const SectionStart,
else
Size += 4;
uint32_t Offset = *reinterpret_cast<const uint32_t *>(OffsetField);
- if (Offset != 0)
- if (auto Err = HandleFDE(CurCFIRecord))
- return Err;
LLVM_DEBUG({
dbgs() << "Registering eh-frame section:\n";
@@ -456,6 +718,11 @@ Error walkAppleEHFrameSection(const char *const SectionStart,
dbgs() << format(" 0x%02" PRIx8, *(CurCFIRecord + I));
dbgs() << " ]\n";
});
+
+ if (Offset != 0)
+ if (auto Err = HandleFDE(CurCFIRecord))
+ return Err;
+
CurCFIRecord += Size;
Size = *reinterpret_cast<const uint32_t *>(CurCFIRecord);
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/EHFrameSupportImpl.h b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/EHFrameSupportImpl.h
index 6f9f68ad8382..a8cd32c664dc 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/EHFrameSupportImpl.h
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/EHFrameSupportImpl.h
@@ -21,30 +21,30 @@
namespace llvm {
namespace jitlink {
-/// A generic binary parser for eh-frame sections.
-///
-/// Adds blocks and symbols representing CIE and FDE entries to a JITLink graph.
-///
-/// This parser assumes that the user has already verified that the EH-frame's
-/// address range does not overlap any other section/symbol, so that generated
-/// CIE/FDE records do not overlap other sections/symbols.
-class EHFrameBinaryParser {
+/// A LinkGraph pass that splits blocks in an eh-frame section into sub-blocks
+/// representing individual eh-frames.
+/// EHFrameSplitter should not be run without EHFrameEdgeFixer, which is
+/// responsible for adding FDE-to-CIE edges.
+class EHFrameSplitter {
public:
- EHFrameBinaryParser(JITTargetAddress EHFrameAddress, StringRef EHFrameContent,
- unsigned PointerSize, support::endianness Endianness);
- virtual ~EHFrameBinaryParser() {}
+ EHFrameSplitter(StringRef EHFrameSectionName);
+ Error operator()(LinkGraph &G);
- Error addToGraph();
+private:
+ Error processBlock(LinkGraph &G, Block &B, LinkGraph::SplitBlockCache &Cache);
+
+ StringRef EHFrameSectionName;
+};
+
+/// A LinkGraph pass that adds missing FDE-to-CIE, FDE-to-PC and FDE-to-LSDA
+/// edges.
+class EHFrameEdgeFixer {
+public:
+ EHFrameEdgeFixer(StringRef EHFrameSectionName, Edge::Kind FDEToCIE,
+ Edge::Kind FDEToPCBegin, Edge::Kind FDEToLSDA);
+ Error operator()(LinkGraph &G);
private:
- virtual void anchor();
- virtual Symbol *getSymbolAtAddress(JITTargetAddress Addr) = 0;
- virtual Symbol &createCIERecord(JITTargetAddress RecordAddr,
- StringRef RecordContent) = 0;
- virtual Expected<Symbol &>
- createFDERecord(JITTargetAddress RecordAddr, StringRef RecordContent,
- Symbol &CIE, size_t CIEOffset, Symbol &Func,
- size_t FuncOffset, Symbol *LSDA, size_t LSDAOffset) = 0;
struct AugmentationInfo {
bool AugmentationDataPresent = false;
@@ -52,12 +52,6 @@ private:
uint8_t Fields[4] = {0x0, 0x0, 0x0, 0x0};
};
- Expected<AugmentationInfo> parseAugmentationString();
- Expected<JITTargetAddress> readAbsolutePointer();
- Error processCIE(size_t RecordOffset, size_t RecordLength);
- Error processFDE(size_t RecordOffset, size_t RecordLength,
- JITTargetAddress CIEPointerOffset, uint32_t CIEPointer);
-
struct CIEInformation {
CIEInformation() = default;
CIEInformation(Symbol &CIESymbol) : CIESymbol(&CIESymbol) {}
@@ -65,11 +59,51 @@ private:
bool FDEsHaveLSDAField = false;
};
- JITTargetAddress EHFrameAddress;
- StringRef EHFrameContent;
- unsigned PointerSize;
- BinaryStreamReader EHFrameReader;
- DenseMap<JITTargetAddress, CIEInformation> CIEInfos;
+ struct EdgeTarget {
+ EdgeTarget() = default;
+ EdgeTarget(const Edge &E) : Target(&E.getTarget()), Addend(E.getAddend()) {}
+
+ Symbol *Target = nullptr;
+ Edge::AddendT Addend = 0;
+ };
+
+ using BlockEdgeMap = DenseMap<Edge::OffsetT, EdgeTarget>;
+ using CIEInfosMap = DenseMap<JITTargetAddress, CIEInformation>;
+
+ struct ParseContext {
+ ParseContext(LinkGraph &G) : G(G) {}
+
+ Expected<CIEInformation *> findCIEInfo(JITTargetAddress Address) {
+ auto I = CIEInfos.find(Address);
+ if (I == CIEInfos.end())
+ return make_error<JITLinkError>("No CIE found at address " +
+ formatv("{0:x16}", Address));
+ return &I->second;
+ }
+
+ LinkGraph &G;
+ CIEInfosMap CIEInfos;
+ BlockAddressMap AddrToBlock;
+ SymbolAddressMap AddrToSyms;
+ };
+
+ Error processBlock(ParseContext &PC, Block &B);
+ Error processCIE(ParseContext &PC, Block &B, size_t RecordOffset,
+ size_t RecordLength, size_t CIEDeltaFieldOffset);
+ Error processFDE(ParseContext &PC, Block &B, size_t RecordOffset,
+ size_t RecordLength, size_t CIEDeltaFieldOffset,
+ uint32_t CIEDelta, BlockEdgeMap &BlockEdges);
+
+ Expected<AugmentationInfo>
+ parseAugmentationString(BinaryStreamReader &RecordReader);
+ Expected<JITTargetAddress>
+ readAbsolutePointer(LinkGraph &G, BinaryStreamReader &RecordReader);
+ Expected<Symbol &> getOrCreateSymbol(ParseContext &PC, JITTargetAddress Addr);
+
+ StringRef EHFrameSectionName;
+ Edge::Kind FDEToCIE;
+ Edge::Kind FDEToPCBegin;
+ Edge::Kind FDEToLSDA;
};
} // end namespace jitlink
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/JITLink.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/JITLink.cpp
index 1e19038951ac..6c924f889577 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/JITLink.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/JITLink.cpp
@@ -145,14 +145,89 @@ void printEdge(raw_ostream &OS, const Block &B, const Edge &E,
Section::~Section() {
for (auto *Sym : Symbols)
Sym->~Symbol();
-}
-
-LinkGraph::~LinkGraph() {
- // Destroy blocks.
for (auto *B : Blocks)
B->~Block();
}
+Block &LinkGraph::splitBlock(Block &B, size_t SplitIndex,
+ SplitBlockCache *Cache) {
+
+ assert(SplitIndex > 0 && "splitBlock can not be called with SplitIndex == 0");
+
+ // If the split point covers all of B then just return B.
+ if (SplitIndex == B.getSize())
+ return B;
+
+ assert(SplitIndex < B.getSize() && "SplitIndex out of range");
+
+ // Create the new block covering [ 0, SplitIndex ).
+ auto &NewBlock =
+ B.isZeroFill()
+ ? createZeroFillBlock(B.getSection(), SplitIndex, B.getAddress(),
+ B.getAlignment(), B.getAlignmentOffset())
+ : createContentBlock(
+ B.getSection(), B.getContent().substr(0, SplitIndex),
+ B.getAddress(), B.getAlignment(), B.getAlignmentOffset());
+
+ // Modify B to cover [ SplitIndex, B.size() ).
+ B.setAddress(B.getAddress() + SplitIndex);
+ B.setContent(B.getContent().substr(SplitIndex));
+ B.setAlignmentOffset((B.getAlignmentOffset() + SplitIndex) %
+ B.getAlignment());
+
+ // Handle edge transfer/update.
+ {
+ // Copy edges to NewBlock (recording their iterators so that we can remove
+ // them from B), and update of Edges remaining on B.
+ std::vector<Block::edge_iterator> EdgesToRemove;
+ for (auto I = B.edges().begin(), E = B.edges().end(); I != E; ++I) {
+ if (I->getOffset() < SplitIndex) {
+ NewBlock.addEdge(*I);
+ EdgesToRemove.push_back(I);
+ } else
+ I->setOffset(I->getOffset() - SplitIndex);
+ }
+
+ // Remove edges that were transfered to NewBlock from B.
+ while (!EdgesToRemove.empty()) {
+ B.removeEdge(EdgesToRemove.back());
+ EdgesToRemove.pop_back();
+ }
+ }
+
+ // Handle symbol transfer/update.
+ {
+ // Initialize the symbols cache if necessary.
+ SplitBlockCache LocalBlockSymbolsCache;
+ if (!Cache)
+ Cache = &LocalBlockSymbolsCache;
+ if (*Cache == None) {
+ *Cache = SplitBlockCache::value_type();
+ for (auto *Sym : B.getSection().symbols())
+ if (&Sym->getBlock() == &B)
+ (*Cache)->push_back(Sym);
+
+ llvm::sort(**Cache, [](const Symbol *LHS, const Symbol *RHS) {
+ return LHS->getOffset() > RHS->getOffset();
+ });
+ }
+ auto &BlockSymbols = **Cache;
+
+ // Transfer all symbols with offset less than SplitIndex to NewBlock.
+ while (!BlockSymbols.empty() &&
+ BlockSymbols.back()->getOffset() < SplitIndex) {
+ BlockSymbols.back()->setBlock(NewBlock);
+ BlockSymbols.pop_back();
+ }
+
+ // Update offsets for all remaining symbols in B.
+ for (auto *Sym : BlockSymbols)
+ Sym->setOffset(Sym->getOffset() - SplitIndex);
+ }
+
+ return NewBlock;
+}
+
void LinkGraph::dump(raw_ostream &OS,
std::function<StringRef(Edge::Kind)> EdgeKindToName) {
if (!EdgeKindToName)
@@ -191,6 +266,16 @@ void LinkGraph::dump(raw_ostream &OS,
<< "\n";
}
+raw_ostream &operator<<(raw_ostream &OS, const SymbolLookupFlags &LF) {
+ switch (LF) {
+ case SymbolLookupFlags::RequiredSymbol:
+ return OS << "RequiredSymbol";
+ case SymbolLookupFlags::WeaklyReferencedSymbol:
+ return OS << "WeaklyReferencedSymbol";
+ }
+ llvm_unreachable("Unrecognized lookup flags");
+}
+
void JITLinkAsyncLookupContinuation::anchor() {}
JITLinkContext::~JITLinkContext() {}
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp
index d4270b5aa796..7b594fd2c0ea 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp
@@ -151,9 +151,12 @@ JITLinkerBase::SegmentLayoutMap JITLinkerBase::layOutBlocks() {
for (auto &KV : Layout) {
auto CompareBlocks = [](const Block *LHS, const Block *RHS) {
+ // Sort by section, address and size
if (LHS->getSection().getOrdinal() != RHS->getSection().getOrdinal())
return LHS->getSection().getOrdinal() < RHS->getSection().getOrdinal();
- return LHS->getOrdinal() < RHS->getOrdinal();
+ if (LHS->getAddress() != RHS->getAddress())
+ return LHS->getAddress() < RHS->getAddress();
+ return LHS->getSize() < RHS->getSize();
};
auto &SegLists = KV.second;
@@ -254,25 +257,35 @@ Error JITLinkerBase::allocateSegments(const SegmentLayoutMap &Layout) {
return Error::success();
}
-DenseSet<StringRef> JITLinkerBase::getExternalSymbolNames() const {
+JITLinkContext::LookupMap JITLinkerBase::getExternalSymbolNames() const {
// Identify unresolved external symbols.
- DenseSet<StringRef> UnresolvedExternals;
+ JITLinkContext::LookupMap UnresolvedExternals;
for (auto *Sym : G->external_symbols()) {
assert(Sym->getAddress() == 0 &&
"External has already been assigned an address");
assert(Sym->getName() != StringRef() && Sym->getName() != "" &&
"Externals must be named");
- UnresolvedExternals.insert(Sym->getName());
+ SymbolLookupFlags LookupFlags =
+ Sym->getLinkage() == Linkage::Weak
+ ? SymbolLookupFlags::WeaklyReferencedSymbol
+ : SymbolLookupFlags::RequiredSymbol;
+ UnresolvedExternals[Sym->getName()] = LookupFlags;
}
return UnresolvedExternals;
}
void JITLinkerBase::applyLookupResult(AsyncLookupResult Result) {
for (auto *Sym : G->external_symbols()) {
+ assert(Sym->getOffset() == 0 &&
+ "External symbol is not at the start of its addressable block");
assert(Sym->getAddress() == 0 && "Symbol already resolved");
assert(!Sym->isDefined() && "Symbol being resolved is already defined");
- assert(Result.count(Sym->getName()) && "Missing resolution for symbol");
- Sym->getAddressable().setAddress(Result[Sym->getName()].getAddress());
+ auto ResultI = Result.find(Sym->getName());
+ if (ResultI != Result.end())
+ Sym->getAddressable().setAddress(ResultI->second.getAddress());
+ else
+ assert(Sym->getLinkage() == Linkage::Weak &&
+ "Failed to resolve non-weak reference");
}
LLVM_DEBUG({
@@ -282,8 +295,11 @@ void JITLinkerBase::applyLookupResult(AsyncLookupResult Result) {
<< formatv("{0:x16}", Sym->getAddress()) << "\n";
});
assert(llvm::all_of(G->external_symbols(),
- [](Symbol *Sym) { return Sym->getAddress() != 0; }) &&
- "All symbols should have been resolved by this point");
+ [](Symbol *Sym) {
+ return Sym->getAddress() != 0 ||
+ Sym->getLinkage() == Linkage::Weak;
+ }) &&
+ "All strong external symbols should have been resolved by now");
}
void JITLinkerBase::deallocateAndBailOut(Error Err) {
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.h b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.h
index 07dee6cee200..d5687b7afc96 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.h
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.h
@@ -106,7 +106,7 @@ private:
SegmentLayoutMap layOutBlocks();
Error allocateSegments(const SegmentLayoutMap &Layout);
- DenseSet<StringRef> getExternalSymbolNames() const;
+ JITLinkContext::LookupMap getExternalSymbolNames() const;
void applyLookupResult(AsyncLookupResult LR);
void deallocateAndBailOut(Error Err);
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.cpp
index 7366f53ebf36..701f108a9a21 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.cpp
@@ -179,7 +179,9 @@ Error MachOLinkGraphBuilder::createNormalizedSections() {
llvm::sort(Sections,
[](const NormalizedSection *LHS, const NormalizedSection *RHS) {
assert(LHS && RHS && "Null section?");
- return LHS->Address < RHS->Address;
+ if (LHS->Address != RHS->Address)
+ return LHS->Address < RHS->Address;
+ return LHS->Size < RHS->Size;
});
for (unsigned I = 0, E = Sections.size() - 1; I != E; ++I) {
@@ -311,7 +313,7 @@ Error MachOLinkGraphBuilder::graphifyRegularSymbols() {
return make_error<JITLinkError>("Anonymous common symbol at index " +
Twine(KV.first));
NSym.GraphSymbol = &G->addCommonSymbol(
- *NSym.Name, NSym.S, getCommonSection(), NSym.Value, 0,
+ *NSym.Name, NSym.S, getCommonSection(), 0, NSym.Value,
1ull << MachO::GET_COMM_ALIGN(NSym.Desc),
NSym.Desc & MachO::N_NO_DEAD_STRIP);
} else {
@@ -319,7 +321,9 @@ Error MachOLinkGraphBuilder::graphifyRegularSymbols() {
return make_error<JITLinkError>("Anonymous external symbol at "
"index " +
Twine(KV.first));
- NSym.GraphSymbol = &G->addExternalSymbol(*NSym.Name, 0);
+ NSym.GraphSymbol = &G->addExternalSymbol(
+ *NSym.Name, 0,
+ NSym.Desc & MachO::N_WEAK_REF ? Linkage::Weak : Linkage::Strong);
}
break;
case MachO::N_ABS:
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.h b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.h
index e1123cd11048..91b1d5a22387 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.h
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.h
@@ -30,74 +30,6 @@ public:
Expected<std::unique_ptr<LinkGraph>> buildGraph();
protected:
- class MachOEHFrameBinaryParser : public EHFrameBinaryParser {
- public:
- MachOEHFrameBinaryParser(MachOLinkGraphBuilder &Builder,
- JITTargetAddress EHFrameAddress,
- StringRef EHFrameContent, Section &EHFrameSection,
- uint64_t CIEAlignment, uint64_t FDEAlignment,
- Edge::Kind FDEToCIERelocKind,
- Edge::Kind FDEToTargetRelocKind)
- : EHFrameBinaryParser(EHFrameAddress, EHFrameContent,
- Builder.getGraph().getPointerSize(),
- Builder.getGraph().getEndianness()),
- Builder(Builder), EHFrameSection(EHFrameSection),
- CIEAlignment(CIEAlignment), FDEAlignment(FDEAlignment),
- FDEToCIERelocKind(FDEToCIERelocKind),
- FDEToTargetRelocKind(FDEToTargetRelocKind) {}
-
- Symbol *getSymbolAtAddress(JITTargetAddress Address) override {
- if (auto *Sym = Builder.getSymbolByAddress(Address))
- if (Sym->getAddress() == Address)
- return Sym;
- return nullptr;
- }
-
- Symbol &createCIERecord(JITTargetAddress RecordAddr,
- StringRef RecordContent) override {
- auto &G = Builder.getGraph();
- auto &B = G.createContentBlock(EHFrameSection, RecordContent, RecordAddr,
- CIEAlignment, 0);
- auto &CIESymbol =
- G.addAnonymousSymbol(B, 0, RecordContent.size(), false, false);
- Builder.setCanonicalSymbol(CIESymbol);
- return CIESymbol;
- }
-
- Expected<Symbol &> createFDERecord(JITTargetAddress RecordAddr,
- StringRef RecordContent, Symbol &CIE,
- size_t CIEOffset, Symbol &Func,
- size_t FuncOffset, Symbol *LSDA,
- size_t LSDAOffset) override {
- auto &G = Builder.getGraph();
- auto &B = G.createContentBlock(EHFrameSection, RecordContent, RecordAddr,
- FDEAlignment, 0);
-
- // Add edges to CIE, Func, and (conditionally) LSDA.
- B.addEdge(FDEToCIERelocKind, CIEOffset, CIE, 0);
- B.addEdge(FDEToTargetRelocKind, FuncOffset, Func, 0);
-
- if (LSDA)
- B.addEdge(FDEToTargetRelocKind, LSDAOffset, *LSDA, 0);
-
- auto &FDESymbol =
- G.addAnonymousSymbol(B, 0, RecordContent.size(), false, false);
-
- // Add a keep-alive relocation from the function to the FDE to ensure it
- // is not dead stripped.
- Func.getBlock().addEdge(Edge::KeepAlive, 0, FDESymbol, 0);
-
- return FDESymbol;
- }
-
- private:
- MachOLinkGraphBuilder &Builder;
- Section &EHFrameSection;
- uint64_t CIEAlignment;
- uint64_t FDEAlignment;
- Edge::Kind FDEToCIERelocKind;
- Edge::Kind FDEToTargetRelocKind;
- };
struct NormalizedSymbol {
friend class MachOLinkGraphBuilder;
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp
index 945343bff89d..944767449ce2 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp
@@ -27,19 +27,7 @@ class MachOLinkGraphBuilder_arm64 : public MachOLinkGraphBuilder {
public:
MachOLinkGraphBuilder_arm64(const object::MachOObjectFile &Obj)
: MachOLinkGraphBuilder(Obj),
- NumSymbols(Obj.getSymtabLoadCommand().nsyms) {
- addCustomSectionParser(
- "__eh_frame", [this](NormalizedSection &EHFrameSection) {
- if (!EHFrameSection.Data)
- return make_error<JITLinkError>(
- "__eh_frame section is marked zero-fill");
- return MachOEHFrameBinaryParser(
- *this, EHFrameSection.Address,
- StringRef(EHFrameSection.Data, EHFrameSection.Size),
- *EHFrameSection.GraphSection, 8, 4, NegDelta32, Delta64)
- .addToGraph();
- });
- }
+ NumSymbols(Obj.getSymtabLoadCommand().nsyms) {}
private:
static Expected<MachOARM64RelocationKind>
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp
index d83787ffd598..69ec72aae292 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp
@@ -26,19 +26,7 @@ namespace {
class MachOLinkGraphBuilder_x86_64 : public MachOLinkGraphBuilder {
public:
MachOLinkGraphBuilder_x86_64(const object::MachOObjectFile &Obj)
- : MachOLinkGraphBuilder(Obj) {
- addCustomSectionParser(
- "__eh_frame", [this](NormalizedSection &EHFrameSection) {
- if (!EHFrameSection.Data)
- return make_error<JITLinkError>(
- "__eh_frame section is marked zero-fill");
- return MachOEHFrameBinaryParser(
- *this, EHFrameSection.Address,
- StringRef(EHFrameSection.Data, EHFrameSection.Size),
- *EHFrameSection.GraphSection, 8, 4, NegDelta32, Delta64)
- .addToGraph();
- });
- }
+ : MachOLinkGraphBuilder(Obj) {}
private:
static Expected<MachOX86RelocationKind>
@@ -264,7 +252,7 @@ private:
TargetSymbol = TargetSymbolOrErr->GraphSymbol;
else
return TargetSymbolOrErr.takeError();
- Addend = *(const ulittle32_t *)FixupContent;
+ Addend = *(const little32_t *)FixupContent;
break;
case Pointer32:
if (auto TargetSymbolOrErr = findSymbolByIndex(RI.r_symbolnum))
@@ -296,12 +284,12 @@ private:
TargetSymbol = TargetSymbolOrErr->GraphSymbol;
else
return TargetSymbolOrErr.takeError();
- Addend = *(const ulittle32_t *)FixupContent +
+ Addend = *(const little32_t *)FixupContent +
(1 << (*Kind - PCRel32Minus1));
break;
case PCRel32Anon: {
JITTargetAddress TargetAddress =
- FixupAddress + 4 + *(const ulittle32_t *)FixupContent;
+ FixupAddress + 4 + *(const little32_t *)FixupContent;
if (auto TargetSymbolOrErr = findSymbolByAddress(TargetAddress))
TargetSymbol = &*TargetSymbolOrErr;
else
@@ -315,7 +303,7 @@ private:
JITTargetAddress Delta =
static_cast<JITTargetAddress>(1ULL << (*Kind - PCRel32Minus1Anon));
JITTargetAddress TargetAddress =
- FixupAddress + 4 + Delta + *(const ulittle32_t *)FixupContent;
+ FixupAddress + 4 + Delta + *(const little32_t *)FixupContent;
if (auto TargetSymbolOrErr = findSymbolByAddress(TargetAddress))
TargetSymbol = &*TargetSymbolOrErr;
else
@@ -566,6 +554,11 @@ void jitLink_MachO_x86_64(std::unique_ptr<JITLinkContext> Ctx) {
Triple TT("x86_64-apple-macosx");
if (Ctx->shouldAddDefaultTargetPasses(TT)) {
+ // Add eh-frame passses.
+ Config.PrePrunePasses.push_back(EHFrameSplitter("__eh_frame"));
+ Config.PrePrunePasses.push_back(
+ EHFrameEdgeFixer("__eh_frame", NegDelta32, Delta64, Delta64));
+
// Add a mark-live pass.
if (auto MarkLive = Ctx->getMarkLivePass(TT))
Config.PrePrunePasses.push_back(std::move(MarkLive));
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp
index 75ddbc30445d..f26835ff8a08 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp
@@ -162,7 +162,8 @@ void CompileOnDemandLayer::emit(MaterializationResponsibility R,
return;
}
- R.replace(reexports(PDR.getImplDylib(), std::move(NonCallables), true));
+ R.replace(reexports(PDR.getImplDylib(), std::move(NonCallables),
+ JITDylibLookupFlags::MatchAllSymbols));
R.replace(lazyReexports(LCTMgr, PDR.getISManager(), PDR.getImplDylib(),
std::move(Callables), AliaseeImpls));
}
@@ -171,18 +172,22 @@ CompileOnDemandLayer::PerDylibResources &
CompileOnDemandLayer::getPerDylibResources(JITDylib &TargetD) {
auto I = DylibResources.find(&TargetD);
if (I == DylibResources.end()) {
- auto &ImplD = getExecutionSession().createJITDylib(
- TargetD.getName() + ".impl", false);
- TargetD.withSearchOrderDo([&](const JITDylibSearchList &TargetSearchOrder) {
- auto NewSearchOrder = TargetSearchOrder;
- assert(!NewSearchOrder.empty() &&
- NewSearchOrder.front().first == &TargetD &&
- NewSearchOrder.front().second == true &&
- "TargetD must be at the front of its own search order and match "
- "non-exported symbol");
- NewSearchOrder.insert(std::next(NewSearchOrder.begin()), {&ImplD, true});
- ImplD.setSearchOrder(std::move(NewSearchOrder), false);
- });
+ auto &ImplD =
+ getExecutionSession().createJITDylib(TargetD.getName() + ".impl");
+ TargetD.withSearchOrderDo(
+ [&](const JITDylibSearchOrder &TargetSearchOrder) {
+ auto NewSearchOrder = TargetSearchOrder;
+ assert(
+ !NewSearchOrder.empty() &&
+ NewSearchOrder.front().first == &TargetD &&
+ NewSearchOrder.front().second ==
+ JITDylibLookupFlags::MatchAllSymbols &&
+ "TargetD must be at the front of its own search order and match "
+ "non-exported symbol");
+ NewSearchOrder.insert(std::next(NewSearchOrder.begin()),
+ {&ImplD, JITDylibLookupFlags::MatchAllSymbols});
+ ImplD.setSearchOrder(std::move(NewSearchOrder), false);
+ });
PerDylibResources PDR(ImplD, BuildIndirectStubsManager());
I = DylibResources.insert(std::make_pair(&TargetD, std::move(PDR))).first;
}
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/CompileUtils.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/CompileUtils.cpp
index f8251627a4ef..f5671d90420a 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/CompileUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/CompileUtils.cpp
@@ -43,8 +43,7 @@ SimpleCompiler::CompileResult SimpleCompiler::operator()(Module &M) {
}
auto ObjBuffer = std::make_unique<SmallVectorMemoryBuffer>(
- std::move(ObjBufferSV),
- "<in memory object compiled from " + M.getModuleIdentifier() + ">");
+ std::move(ObjBufferSV), M.getModuleIdentifier() + "-jitted-objectbuffer");
auto Obj = object::ObjectFile::createObjectFile(ObjBuffer->getMemBufferRef());
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Core.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Core.cpp
index 5c7d888c2d6e..63ef889dae46 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Core.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Core.cpp
@@ -7,6 +7,8 @@
//===----------------------------------------------------------------------===//
#include "llvm/ExecutionEngine/Orc/Core.h"
+
+#include "llvm/ADT/STLExtras.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/ExecutionEngine/Orc/OrcError.h"
#include "llvm/IR/Mangler.h"
@@ -77,16 +79,19 @@ bool flagsMatchCLOpts(const JITSymbolFlags &Flags) {
#endif // NDEBUG
}
-// Prints a set of items, filtered by an user-supplied predicate.
-template <typename Set, typename Pred = PrintAll<typename Set::value_type>>
-class SetPrinter {
+// Prints a sequence of items, filtered by an user-supplied predicate.
+template <typename Sequence,
+ typename Pred = PrintAll<typename Sequence::value_type>>
+class SequencePrinter {
public:
- SetPrinter(const Set &S, Pred ShouldPrint = Pred())
- : S(S), ShouldPrint(std::move(ShouldPrint)) {}
+ SequencePrinter(const Sequence &S, char OpenSeq, char CloseSeq,
+ Pred ShouldPrint = Pred())
+ : S(S), OpenSeq(OpenSeq), CloseSeq(CloseSeq),
+ ShouldPrint(std::move(ShouldPrint)) {}
void printTo(llvm::raw_ostream &OS) const {
bool PrintComma = false;
- OS << "{";
+ OS << OpenSeq;
for (auto &E : S) {
if (ShouldPrint(E)) {
if (PrintComma)
@@ -95,23 +100,26 @@ public:
PrintComma = true;
}
}
- OS << " }";
+ OS << ' ' << CloseSeq;
}
private:
- const Set &S;
+ const Sequence &S;
+ char OpenSeq;
+ char CloseSeq;
mutable Pred ShouldPrint;
};
-template <typename Set, typename Pred>
-SetPrinter<Set, Pred> printSet(const Set &S, Pred P = Pred()) {
- return SetPrinter<Set, Pred>(S, std::move(P));
+template <typename Sequence, typename Pred>
+SequencePrinter<Sequence, Pred> printSequence(const Sequence &S, char OpenSeq,
+ char CloseSeq, Pred P = Pred()) {
+ return SequencePrinter<Sequence, Pred>(S, OpenSeq, CloseSeq, std::move(P));
}
-// Render a SetPrinter by delegating to its printTo method.
-template <typename Set, typename Pred>
+// Render a SequencePrinter by delegating to its printTo method.
+template <typename Sequence, typename Pred>
llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
- const SetPrinter<Set, Pred> &Printer) {
+ const SequencePrinter<Sequence, Pred> &Printer) {
Printer.printTo(OS);
return OS;
}
@@ -147,7 +155,11 @@ raw_ostream &operator<<(raw_ostream &OS, const SymbolStringPtr &Sym) {
}
raw_ostream &operator<<(raw_ostream &OS, const SymbolNameSet &Symbols) {
- return OS << printSet(Symbols, PrintAll<SymbolStringPtr>());
+ return OS << printSequence(Symbols, '{', '}', PrintAll<SymbolStringPtr>());
+}
+
+raw_ostream &operator<<(raw_ostream &OS, const SymbolNameVector &Symbols) {
+ return OS << printSequence(Symbols, '[', ']', PrintAll<SymbolStringPtr>());
}
raw_ostream &operator<<(raw_ostream &OS, const JITSymbolFlags &Flags) {
@@ -182,11 +194,13 @@ raw_ostream &operator<<(raw_ostream &OS, const SymbolMap::value_type &KV) {
}
raw_ostream &operator<<(raw_ostream &OS, const SymbolFlagsMap &SymbolFlags) {
- return OS << printSet(SymbolFlags, PrintSymbolFlagsMapElemsMatchingCLOpts());
+ return OS << printSequence(SymbolFlags, '{', '}',
+ PrintSymbolFlagsMapElemsMatchingCLOpts());
}
raw_ostream &operator<<(raw_ostream &OS, const SymbolMap &Symbols) {
- return OS << printSet(Symbols, PrintSymbolMapElemsMatchingCLOpts());
+ return OS << printSequence(Symbols, '{', '}',
+ PrintSymbolMapElemsMatchingCLOpts());
}
raw_ostream &operator<<(raw_ostream &OS,
@@ -195,7 +209,8 @@ raw_ostream &operator<<(raw_ostream &OS,
}
raw_ostream &operator<<(raw_ostream &OS, const SymbolDependenceMap &Deps) {
- return OS << printSet(Deps, PrintAll<SymbolDependenceMap::value_type>());
+ return OS << printSequence(Deps, '{', '}',
+ PrintAll<SymbolDependenceMap::value_type>());
}
raw_ostream &operator<<(raw_ostream &OS, const MaterializationUnit &MU) {
@@ -205,16 +220,59 @@ raw_ostream &operator<<(raw_ostream &OS, const MaterializationUnit &MU) {
return OS << ")";
}
-raw_ostream &operator<<(raw_ostream &OS, const JITDylibSearchList &JDs) {
+raw_ostream &operator<<(raw_ostream &OS, const LookupKind &K) {
+ switch (K) {
+ case LookupKind::Static:
+ return OS << "Static";
+ case LookupKind::DLSym:
+ return OS << "DLSym";
+ }
+ llvm_unreachable("Invalid lookup kind");
+}
+
+raw_ostream &operator<<(raw_ostream &OS,
+ const JITDylibLookupFlags &JDLookupFlags) {
+ switch (JDLookupFlags) {
+ case JITDylibLookupFlags::MatchExportedSymbolsOnly:
+ return OS << "MatchExportedSymbolsOnly";
+ case JITDylibLookupFlags::MatchAllSymbols:
+ return OS << "MatchAllSymbols";
+ }
+ llvm_unreachable("Invalid JITDylib lookup flags");
+}
+
+raw_ostream &operator<<(raw_ostream &OS, const SymbolLookupFlags &LookupFlags) {
+ switch (LookupFlags) {
+ case SymbolLookupFlags::RequiredSymbol:
+ return OS << "RequiredSymbol";
+ case SymbolLookupFlags::WeaklyReferencedSymbol:
+ return OS << "WeaklyReferencedSymbol";
+ }
+ llvm_unreachable("Invalid symbol lookup flags");
+}
+
+raw_ostream &operator<<(raw_ostream &OS,
+ const SymbolLookupSet::value_type &KV) {
+ return OS << "(" << KV.first << ", " << KV.second << ")";
+}
+
+raw_ostream &operator<<(raw_ostream &OS, const SymbolLookupSet &LookupSet) {
+ return OS << printSequence(LookupSet, '{', '}',
+ PrintAll<SymbolLookupSet::value_type>());
+}
+
+raw_ostream &operator<<(raw_ostream &OS,
+ const JITDylibSearchOrder &SearchOrder) {
OS << "[";
- if (!JDs.empty()) {
- assert(JDs.front().first && "JITDylibList entries must not be null");
- OS << " (\"" << JDs.front().first->getName() << "\", "
- << (JDs.front().second ? "true" : "false") << ")";
- for (auto &KV : make_range(std::next(JDs.begin()), JDs.end())) {
+ if (!SearchOrder.empty()) {
+ assert(SearchOrder.front().first &&
+ "JITDylibList entries must not be null");
+ OS << " (\"" << SearchOrder.front().first->getName() << "\", "
+ << SearchOrder.begin()->second << ")";
+ for (auto &KV :
+ make_range(std::next(SearchOrder.begin(), 1), SearchOrder.end())) {
assert(KV.first && "JITDylibList entries must not be null");
- OS << ", (\"" << KV.first->getName() << "\", "
- << (KV.second ? "true" : "false") << ")";
+ OS << ", (\"" << KV.first->getName() << "\", " << KV.second << ")";
}
}
OS << " ]";
@@ -262,7 +320,13 @@ void FailedToMaterialize::log(raw_ostream &OS) const {
OS << "Failed to materialize symbols: " << *Symbols;
}
-SymbolsNotFound::SymbolsNotFound(SymbolNameSet Symbols)
+SymbolsNotFound::SymbolsNotFound(SymbolNameSet Symbols) {
+ for (auto &Sym : Symbols)
+ this->Symbols.push_back(Sym);
+ assert(!this->Symbols.empty() && "Can not fail to resolve an empty set");
+}
+
+SymbolsNotFound::SymbolsNotFound(SymbolNameVector Symbols)
: Symbols(std::move(Symbols)) {
assert(!this->Symbols.empty() && "Can not fail to resolve an empty set");
}
@@ -289,7 +353,7 @@ void SymbolsCouldNotBeRemoved::log(raw_ostream &OS) const {
}
AsynchronousSymbolQuery::AsynchronousSymbolQuery(
- const SymbolNameSet &Symbols, SymbolState RequiredState,
+ const SymbolLookupSet &Symbols, SymbolState RequiredState,
SymbolsResolvedCallback NotifyComplete)
: NotifyComplete(std::move(NotifyComplete)), RequiredState(RequiredState) {
assert(RequiredState >= SymbolState::Resolved &&
@@ -298,8 +362,8 @@ AsynchronousSymbolQuery::AsynchronousSymbolQuery(
OutstandingSymbolsCount = Symbols.size();
- for (auto &S : Symbols)
- ResolvedSymbols[S] = nullptr;
+ for (auto &KV : Symbols)
+ ResolvedSymbols[KV.first] = nullptr;
}
void AsynchronousSymbolQuery::notifySymbolMetRequiredState(
@@ -511,10 +575,10 @@ AbsoluteSymbolsMaterializationUnit::extractFlags(const SymbolMap &Symbols) {
}
ReExportsMaterializationUnit::ReExportsMaterializationUnit(
- JITDylib *SourceJD, bool MatchNonExported, SymbolAliasMap Aliases,
- VModuleKey K)
+ JITDylib *SourceJD, JITDylibLookupFlags SourceJDLookupFlags,
+ SymbolAliasMap Aliases, VModuleKey K)
: MaterializationUnit(extractFlags(Aliases), std::move(K)),
- SourceJD(SourceJD), MatchNonExported(MatchNonExported),
+ SourceJD(SourceJD), SourceJDLookupFlags(SourceJDLookupFlags),
Aliases(std::move(Aliases)) {}
StringRef ReExportsMaterializationUnit::getName() const {
@@ -551,7 +615,7 @@ void ReExportsMaterializationUnit::materialize(
if (!Aliases.empty()) {
if (SourceJD)
- R.replace(reexports(*SourceJD, std::move(Aliases), MatchNonExported));
+ R.replace(reexports(*SourceJD, std::move(Aliases), SourceJDLookupFlags));
else
R.replace(symbolAliases(std::move(Aliases)));
}
@@ -572,11 +636,11 @@ void ReExportsMaterializationUnit::materialize(
// be waitin on a symbol that it itself had to resolve. Usually this will just
// involve one round and a single query.
- std::vector<std::pair<SymbolNameSet, std::shared_ptr<OnResolveInfo>>>
+ std::vector<std::pair<SymbolLookupSet, std::shared_ptr<OnResolveInfo>>>
QueryInfos;
while (!RequestedAliases.empty()) {
SymbolNameSet ResponsibilitySymbols;
- SymbolNameSet QuerySymbols;
+ SymbolLookupSet QuerySymbols;
SymbolAliasMap QueryAliases;
// Collect as many aliases as we can without including a chain.
@@ -587,7 +651,7 @@ void ReExportsMaterializationUnit::materialize(
continue;
ResponsibilitySymbols.insert(KV.first);
- QuerySymbols.insert(KV.second.Aliasee);
+ QuerySymbols.add(KV.second.Aliasee);
QueryAliases[KV.first] = std::move(KV.second);
}
@@ -657,8 +721,9 @@ void ReExportsMaterializationUnit::materialize(
}
};
- ES.lookup(JITDylibSearchList({{&SrcJD, MatchNonExported}}), QuerySymbols,
- SymbolState::Resolved, std::move(OnComplete),
+ ES.lookup(LookupKind::Static,
+ JITDylibSearchOrder({{&SrcJD, SourceJDLookupFlags}}),
+ QuerySymbols, SymbolState::Resolved, std::move(OnComplete),
std::move(RegisterDependencies));
}
}
@@ -681,16 +746,16 @@ ReExportsMaterializationUnit::extractFlags(const SymbolAliasMap &Aliases) {
Expected<SymbolAliasMap>
buildSimpleReexportsAliasMap(JITDylib &SourceJD, const SymbolNameSet &Symbols) {
- auto Flags = SourceJD.lookupFlags(Symbols);
+ SymbolLookupSet LookupSet(Symbols);
+ auto Flags = SourceJD.lookupFlags(
+ LookupKind::Static, JITDylibLookupFlags::MatchAllSymbols, LookupSet);
if (!Flags)
return Flags.takeError();
- if (Flags->size() != Symbols.size()) {
- SymbolNameSet Unresolved = Symbols;
- for (auto &KV : *Flags)
- Unresolved.erase(KV.first);
- return make_error<SymbolsNotFound>(std::move(Unresolved));
+ if (!LookupSet.empty()) {
+ LookupSet.sortByName();
+ return make_error<SymbolsNotFound>(LookupSet.getSymbolNames());
}
SymbolAliasMap Result;
@@ -703,32 +768,32 @@ buildSimpleReexportsAliasMap(JITDylib &SourceJD, const SymbolNameSet &Symbols) {
}
ReexportsGenerator::ReexportsGenerator(JITDylib &SourceJD,
- bool MatchNonExported,
+ JITDylibLookupFlags SourceJDLookupFlags,
SymbolPredicate Allow)
- : SourceJD(SourceJD), MatchNonExported(MatchNonExported),
+ : SourceJD(SourceJD), SourceJDLookupFlags(SourceJDLookupFlags),
Allow(std::move(Allow)) {}
-Expected<SymbolNameSet>
-ReexportsGenerator::tryToGenerate(JITDylib &JD, const SymbolNameSet &Names) {
- orc::SymbolNameSet Added;
- orc::SymbolAliasMap AliasMap;
-
- auto Flags = SourceJD.lookupFlags(Names);
+Error ReexportsGenerator::tryToGenerate(LookupKind K, JITDylib &JD,
+ JITDylibLookupFlags JDLookupFlags,
+ const SymbolLookupSet &LookupSet) {
+ assert(&JD != &SourceJD && "Cannot re-export from the same dylib");
+ // Use lookupFlags to find the subset of symbols that match our lookup.
+ auto Flags = SourceJD.lookupFlags(K, JDLookupFlags, LookupSet);
if (!Flags)
return Flags.takeError();
- for (auto &KV : *Flags) {
- if (Allow && !Allow(KV.first))
- continue;
- AliasMap[KV.first] = SymbolAliasMapEntry(KV.first, KV.second);
- Added.insert(KV.first);
- }
+ // Create an alias map.
+ orc::SymbolAliasMap AliasMap;
+ for (auto &KV : *Flags)
+ if (!Allow || Allow(KV.first))
+ AliasMap[KV.first] = SymbolAliasMapEntry(KV.first, KV.second);
- if (!Added.empty())
- cantFail(JD.define(reexports(SourceJD, AliasMap, MatchNonExported)));
+ if (AliasMap.empty())
+ return Error::success();
- return Added;
+ // Define the re-exports.
+ return JD.define(reexports(SourceJD, AliasMap, SourceJDLookupFlags));
}
JITDylib::DefinitionGenerator::~DefinitionGenerator() {}
@@ -1228,11 +1293,14 @@ void JITDylib::notifyFailed(FailedSymbolsWorklist Worklist) {
MI.UnemittedDependencies.clear();
// Collect queries to be failed for this MII.
+ AsynchronousSymbolQueryList ToDetach;
for (auto &Q : MII->second.pendingQueries()) {
// Add the query to the list to be failed and detach it.
FailedQueries.insert(Q);
- Q->detach();
+ ToDetach.push_back(Q);
}
+ for (auto &Q : ToDetach)
+ Q->detach();
assert(MI.Dependants.empty() &&
"Can not delete MaterializingInfo with dependants still attached");
@@ -1249,41 +1317,41 @@ void JITDylib::notifyFailed(FailedSymbolsWorklist Worklist) {
Q->handleFailed(make_error<FailedToMaterialize>(FailedSymbolsMap));
}
-void JITDylib::setSearchOrder(JITDylibSearchList NewSearchOrder,
- bool SearchThisJITDylibFirst,
- bool MatchNonExportedInThisDylib) {
- if (SearchThisJITDylibFirst) {
- if (NewSearchOrder.empty() || NewSearchOrder.front().first != this)
- NewSearchOrder.insert(NewSearchOrder.begin(),
- {this, MatchNonExportedInThisDylib});
- }
-
- ES.runSessionLocked([&]() { SearchOrder = std::move(NewSearchOrder); });
-}
-
-void JITDylib::addToSearchOrder(JITDylib &JD, bool MatchNonExported) {
+void JITDylib::setSearchOrder(JITDylibSearchOrder NewSearchOrder,
+ bool SearchThisJITDylibFirst) {
ES.runSessionLocked([&]() {
- SearchOrder.push_back({&JD, MatchNonExported});
+ if (SearchThisJITDylibFirst) {
+ SearchOrder.clear();
+ if (NewSearchOrder.empty() || NewSearchOrder.front().first != this)
+ SearchOrder.push_back(
+ std::make_pair(this, JITDylibLookupFlags::MatchAllSymbols));
+ SearchOrder.insert(SearchOrder.end(), NewSearchOrder.begin(),
+ NewSearchOrder.end());
+ } else
+ SearchOrder = std::move(NewSearchOrder);
});
}
+void JITDylib::addToSearchOrder(JITDylib &JD,
+ JITDylibLookupFlags JDLookupFlags) {
+ ES.runSessionLocked([&]() { SearchOrder.push_back({&JD, JDLookupFlags}); });
+}
+
void JITDylib::replaceInSearchOrder(JITDylib &OldJD, JITDylib &NewJD,
- bool MatchNonExported) {
+ JITDylibLookupFlags JDLookupFlags) {
ES.runSessionLocked([&]() {
- auto I = std::find_if(SearchOrder.begin(), SearchOrder.end(),
- [&](const JITDylibSearchList::value_type &KV) {
- return KV.first == &OldJD;
- });
-
- if (I != SearchOrder.end())
- *I = {&NewJD, MatchNonExported};
+ for (auto &KV : SearchOrder)
+ if (KV.first == &OldJD) {
+ KV = {&NewJD, JDLookupFlags};
+ break;
+ }
});
}
void JITDylib::removeFromSearchOrder(JITDylib &JD) {
ES.runSessionLocked([&]() {
auto I = std::find_if(SearchOrder.begin(), SearchOrder.end(),
- [&](const JITDylibSearchList::value_type &KV) {
+ [&](const JITDylibSearchOrder::value_type &KV) {
return KV.first == &JD;
});
if (I != SearchOrder.end())
@@ -1346,63 +1414,54 @@ Error JITDylib::remove(const SymbolNameSet &Names) {
});
}
-Expected<SymbolFlagsMap> JITDylib::lookupFlags(const SymbolNameSet &Names) {
+Expected<SymbolFlagsMap>
+JITDylib::lookupFlags(LookupKind K, JITDylibLookupFlags JDLookupFlags,
+ SymbolLookupSet LookupSet) {
return ES.runSessionLocked([&, this]() -> Expected<SymbolFlagsMap> {
SymbolFlagsMap Result;
- auto Unresolved = lookupFlagsImpl(Result, Names);
- if (!Unresolved)
- return Unresolved.takeError();
+ lookupFlagsImpl(Result, K, JDLookupFlags, LookupSet);
- /// Run any definition generators.
+ // Run any definition generators.
for (auto &DG : DefGenerators) {
- // Bail out early if we've resolved everything.
- if (Unresolved->empty())
+ // Bail out early if we found everything.
+ if (LookupSet.empty())
break;
// Run this generator.
- auto NewDefs = DG->tryToGenerate(*this, *Unresolved);
- if (!NewDefs)
- return NewDefs.takeError();
-
- if (!NewDefs->empty()) {
- auto Unresolved2 = lookupFlagsImpl(Result, *NewDefs);
- if (!Unresolved2)
- return Unresolved2.takeError();
- (void)Unresolved2;
- assert(Unresolved2->empty() &&
- "All fallback defs should have been found by lookupFlagsImpl");
- }
+ if (auto Err = DG->tryToGenerate(K, *this, JDLookupFlags, LookupSet))
+ return std::move(Err);
- for (auto &Name : *NewDefs)
- Unresolved->erase(Name);
+ // Re-try the search.
+ lookupFlagsImpl(Result, K, JDLookupFlags, LookupSet);
}
+
return Result;
});
}
-Expected<SymbolNameSet> JITDylib::lookupFlagsImpl(SymbolFlagsMap &Flags,
- const SymbolNameSet &Names) {
- SymbolNameSet Unresolved;
+void JITDylib::lookupFlagsImpl(SymbolFlagsMap &Result, LookupKind K,
+ JITDylibLookupFlags JDLookupFlags,
+ SymbolLookupSet &LookupSet) {
- for (auto &Name : Names) {
- auto I = Symbols.find(Name);
- if (I != Symbols.end()) {
- assert(!Flags.count(Name) && "Symbol already present in Flags map");
- Flags[Name] = I->second.getFlags();
- } else
- Unresolved.insert(Name);
- }
-
- return Unresolved;
+ LookupSet.forEachWithRemoval(
+ [&](const SymbolStringPtr &Name, SymbolLookupFlags Flags) -> bool {
+ auto I = Symbols.find(Name);
+ if (I == Symbols.end())
+ return false;
+ assert(!Result.count(Name) && "Symbol already present in Flags map");
+ Result[Name] = I->second.getFlags();
+ return true;
+ });
}
-Error JITDylib::lodgeQuery(std::shared_ptr<AsynchronousSymbolQuery> &Q,
- SymbolNameSet &Unresolved, bool MatchNonExported,
- MaterializationUnitList &MUs) {
+Error JITDylib::lodgeQuery(MaterializationUnitList &MUs,
+ std::shared_ptr<AsynchronousSymbolQuery> &Q,
+ LookupKind K, JITDylibLookupFlags JDLookupFlags,
+ SymbolLookupSet &Unresolved) {
assert(Q && "Query can not be null");
- if (auto Err = lodgeQueryImpl(Q, Unresolved, MatchNonExported, MUs))
+ if (auto Err = lodgeQueryImpl(MUs, Q, K, JDLookupFlags, Unresolved))
return Err;
// Run any definition generators.
@@ -1413,104 +1472,86 @@ Error JITDylib::lodgeQuery(std::shared_ptr<AsynchronousSymbolQuery> &Q,
break;
// Run the generator.
- auto NewDefs = DG->tryToGenerate(*this, Unresolved);
-
- // If the generator returns an error then bail out.
- if (!NewDefs)
- return NewDefs.takeError();
-
- // If the generator was able to generate new definitions for any of the
- // unresolved symbols then lodge the query against them.
- if (!NewDefs->empty()) {
- for (auto &D : *NewDefs)
- Unresolved.erase(D);
-
- // Lodge query. This can not fail as any new definitions were added
- // by the generator under the session locked. Since they can't have
- // started materializing yet the can not have failed.
- cantFail(lodgeQueryImpl(Q, *NewDefs, MatchNonExported, MUs));
+ if (auto Err = DG->tryToGenerate(K, *this, JDLookupFlags, Unresolved))
+ return Err;
- assert(NewDefs->empty() &&
- "All fallback defs should have been found by lookupImpl");
- }
+ // Lodge query. This can not fail as any new definitions were added
+ // by the generator under the session locked. Since they can't have
+ // started materializing yet they can not have failed.
+ cantFail(lodgeQueryImpl(MUs, Q, K, JDLookupFlags, Unresolved));
}
return Error::success();
}
-Error JITDylib::lodgeQueryImpl(
- std::shared_ptr<AsynchronousSymbolQuery> &Q, SymbolNameSet &Unresolved,
- bool MatchNonExported,
- std::vector<std::unique_ptr<MaterializationUnit>> &MUs) {
-
- std::vector<SymbolStringPtr> ToRemove;
- for (auto Name : Unresolved) {
-
- // Search for the name in Symbols. Skip it if not found.
- auto SymI = Symbols.find(Name);
- if (SymI == Symbols.end())
- continue;
-
- // If this is a non exported symbol and we're skipping those then skip it.
- if (!SymI->second.getFlags().isExported() && !MatchNonExported)
- continue;
-
- // If we matched against Name in JD, mark it to be removed from the
- // Unresolved set.
- ToRemove.push_back(Name);
-
- // If we matched against this symbol but it is in the error state then
- // bail out and treat it as a failure to materialize.
- if (SymI->second.getFlags().hasError()) {
- auto FailedSymbolsMap = std::make_shared<SymbolDependenceMap>();
- (*FailedSymbolsMap)[this] = {Name};
- return make_error<FailedToMaterialize>(std::move(FailedSymbolsMap));
- }
-
- // If this symbol already meets the required state for then notify the
- // query and continue.
- if (SymI->second.getState() >= Q->getRequiredState()) {
- Q->notifySymbolMetRequiredState(Name, SymI->second.getSymbol());
- continue;
- }
-
- // Otherwise this symbol does not yet meet the required state. Check whether
- // it has a materializer attached, and if so prepare to run it.
- if (SymI->second.hasMaterializerAttached()) {
- assert(SymI->second.getAddress() == 0 &&
- "Symbol not resolved but already has address?");
- auto UMII = UnmaterializedInfos.find(Name);
- assert(UMII != UnmaterializedInfos.end() &&
- "Lazy symbol should have UnmaterializedInfo");
- auto MU = std::move(UMII->second->MU);
- assert(MU != nullptr && "Materializer should not be null");
-
- // Move all symbols associated with this MaterializationUnit into
- // materializing state.
- for (auto &KV : MU->getSymbols()) {
- auto SymK = Symbols.find(KV.first);
- SymK->second.setMaterializerAttached(false);
- SymK->second.setState(SymbolState::Materializing);
- UnmaterializedInfos.erase(KV.first);
- }
+Error JITDylib::lodgeQueryImpl(MaterializationUnitList &MUs,
+ std::shared_ptr<AsynchronousSymbolQuery> &Q,
+ LookupKind K, JITDylibLookupFlags JDLookupFlags,
+ SymbolLookupSet &Unresolved) {
+
+ return Unresolved.forEachWithRemoval(
+ [&](const SymbolStringPtr &Name,
+ SymbolLookupFlags SymLookupFlags) -> Expected<bool> {
+ // Search for name in symbols. If not found then continue without
+ // removal.
+ auto SymI = Symbols.find(Name);
+ if (SymI == Symbols.end())
+ return false;
+
+ // If this is a non exported symbol and we're matching exported symbols
+ // only then skip this symbol without removal.
+ if (!SymI->second.getFlags().isExported() &&
+ JDLookupFlags == JITDylibLookupFlags::MatchExportedSymbolsOnly)
+ return false;
+
+ // If we matched against this symbol but it is in the error state then
+ // bail out and treat it as a failure to materialize.
+ if (SymI->second.getFlags().hasError()) {
+ auto FailedSymbolsMap = std::make_shared<SymbolDependenceMap>();
+ (*FailedSymbolsMap)[this] = {Name};
+ return make_error<FailedToMaterialize>(std::move(FailedSymbolsMap));
+ }
- // Add MU to the list of MaterializationUnits to be materialized.
- MUs.push_back(std::move(MU));
- }
+ // If this symbol already meets the required state for then notify the
+ // query, then remove the symbol and continue.
+ if (SymI->second.getState() >= Q->getRequiredState()) {
+ Q->notifySymbolMetRequiredState(Name, SymI->second.getSymbol());
+ return true;
+ }
- // Add the query to the PendingQueries list.
- assert(SymI->second.isInMaterializationPhase() &&
- "By this line the symbol should be materializing");
- auto &MI = MaterializingInfos[Name];
- MI.addQuery(Q);
- Q->addQueryDependence(*this, Name);
- }
+ // Otherwise this symbol does not yet meet the required state. Check
+ // whether it has a materializer attached, and if so prepare to run it.
+ if (SymI->second.hasMaterializerAttached()) {
+ assert(SymI->second.getAddress() == 0 &&
+ "Symbol not resolved but already has address?");
+ auto UMII = UnmaterializedInfos.find(Name);
+ assert(UMII != UnmaterializedInfos.end() &&
+ "Lazy symbol should have UnmaterializedInfo");
+ auto MU = std::move(UMII->second->MU);
+ assert(MU != nullptr && "Materializer should not be null");
+
+ // Move all symbols associated with this MaterializationUnit into
+ // materializing state.
+ for (auto &KV : MU->getSymbols()) {
+ auto SymK = Symbols.find(KV.first);
+ SymK->second.setMaterializerAttached(false);
+ SymK->second.setState(SymbolState::Materializing);
+ UnmaterializedInfos.erase(KV.first);
+ }
- // Remove any symbols that we found.
- for (auto &Name : ToRemove)
- Unresolved.erase(Name);
+ // Add MU to the list of MaterializationUnits to be materialized.
+ MUs.push_back(std::move(MU));
+ }
- return Error::success();
+ // Add the query to the PendingQueries list and continue, deleting the
+ // element.
+ assert(SymI->second.isInMaterializationPhase() &&
+ "By this line the symbol should be materializing");
+ auto &MI = MaterializingInfos[Name];
+ MI.addQuery(Q);
+ Q->addQueryDependence(*this, Name);
+ return true;
+ });
}
Expected<SymbolNameSet>
@@ -1523,7 +1564,7 @@ JITDylib::legacyLookup(std::shared_ptr<AsynchronousSymbolQuery> Q,
bool QueryComplete = false;
std::vector<std::unique_ptr<MaterializationUnit>> MUs;
- SymbolNameSet Unresolved = std::move(Names);
+ SymbolLookupSet Unresolved(Names);
auto Err = ES.runSessionLocked([&, this]() -> Error {
QueryComplete = lookupImpl(Q, MUs, Unresolved);
@@ -1535,16 +1576,13 @@ JITDylib::legacyLookup(std::shared_ptr<AsynchronousSymbolQuery> Q,
break;
assert(!QueryComplete && "query complete but unresolved symbols remain?");
- auto NewDefs = DG->tryToGenerate(*this, Unresolved);
- if (!NewDefs)
- return NewDefs.takeError();
- if (!NewDefs->empty()) {
- for (auto &D : *NewDefs)
- Unresolved.erase(D);
- QueryComplete = lookupImpl(Q, MUs, *NewDefs);
- assert(NewDefs->empty() &&
- "All fallback defs should have been found by lookupImpl");
- }
+ if (auto Err = DG->tryToGenerate(LookupKind::Static, *this,
+ JITDylibLookupFlags::MatchAllSymbols,
+ Unresolved))
+ return Err;
+
+ if (!Unresolved.empty())
+ QueryComplete = lookupImpl(Q, MUs, Unresolved);
}
return Error::success();
});
@@ -1572,68 +1610,68 @@ JITDylib::legacyLookup(std::shared_ptr<AsynchronousSymbolQuery> Q,
// for (auto &MU : MUs)
// ES.dispatchMaterialization(*this, std::move(MU));
- return Unresolved;
+ SymbolNameSet RemainingSymbols;
+ for (auto &KV : Unresolved)
+ RemainingSymbols.insert(KV.first);
+
+ return RemainingSymbols;
}
bool JITDylib::lookupImpl(
std::shared_ptr<AsynchronousSymbolQuery> &Q,
std::vector<std::unique_ptr<MaterializationUnit>> &MUs,
- SymbolNameSet &Unresolved) {
+ SymbolLookupSet &Unresolved) {
bool QueryComplete = false;
std::vector<SymbolStringPtr> ToRemove;
- for (auto Name : Unresolved) {
-
- // Search for the name in Symbols. Skip it if not found.
- auto SymI = Symbols.find(Name);
- if (SymI == Symbols.end())
- continue;
-
- // If we found Name, mark it to be removed from the Unresolved set.
- ToRemove.push_back(Name);
-
- if (SymI->second.getState() >= Q->getRequiredState()) {
- Q->notifySymbolMetRequiredState(Name, SymI->second.getSymbol());
- if (Q->isComplete())
- QueryComplete = true;
- continue;
- }
-
- // If the symbol is lazy, get the MaterialiaztionUnit for it.
- if (SymI->second.hasMaterializerAttached()) {
- assert(SymI->second.getAddress() == 0 &&
- "Lazy symbol should not have a resolved address");
- auto UMII = UnmaterializedInfos.find(Name);
- assert(UMII != UnmaterializedInfos.end() &&
- "Lazy symbol should have UnmaterializedInfo");
- auto MU = std::move(UMII->second->MU);
- assert(MU != nullptr && "Materializer should not be null");
-
- // Kick all symbols associated with this MaterializationUnit into
- // materializing state.
- for (auto &KV : MU->getSymbols()) {
- auto SymK = Symbols.find(KV.first);
- assert(SymK != Symbols.end() && "Missing symbol table entry");
- SymK->second.setState(SymbolState::Materializing);
- SymK->second.setMaterializerAttached(false);
- UnmaterializedInfos.erase(KV.first);
- }
+ Unresolved.forEachWithRemoval(
+ [&](const SymbolStringPtr &Name, SymbolLookupFlags Flags) -> bool {
+ // Search for the name in Symbols. Skip without removing if not found.
+ auto SymI = Symbols.find(Name);
+ if (SymI == Symbols.end())
+ return false;
+
+ // If the symbol is already in the required state then notify the query
+ // and remove.
+ if (SymI->second.getState() >= Q->getRequiredState()) {
+ Q->notifySymbolMetRequiredState(Name, SymI->second.getSymbol());
+ if (Q->isComplete())
+ QueryComplete = true;
+ return true;
+ }
- // Add MU to the list of MaterializationUnits to be materialized.
- MUs.push_back(std::move(MU));
- }
+ // If the symbol is lazy, get the MaterialiaztionUnit for it.
+ if (SymI->second.hasMaterializerAttached()) {
+ assert(SymI->second.getAddress() == 0 &&
+ "Lazy symbol should not have a resolved address");
+ auto UMII = UnmaterializedInfos.find(Name);
+ assert(UMII != UnmaterializedInfos.end() &&
+ "Lazy symbol should have UnmaterializedInfo");
+ auto MU = std::move(UMII->second->MU);
+ assert(MU != nullptr && "Materializer should not be null");
+
+ // Kick all symbols associated with this MaterializationUnit into
+ // materializing state.
+ for (auto &KV : MU->getSymbols()) {
+ auto SymK = Symbols.find(KV.first);
+ assert(SymK != Symbols.end() && "Missing symbol table entry");
+ SymK->second.setState(SymbolState::Materializing);
+ SymK->second.setMaterializerAttached(false);
+ UnmaterializedInfos.erase(KV.first);
+ }
- // Add the query to the PendingQueries list.
- assert(SymI->second.isInMaterializationPhase() &&
- "By this line the symbol should be materializing");
- auto &MI = MaterializingInfos[Name];
- MI.addQuery(Q);
- Q->addQueryDependence(*this, Name);
- }
+ // Add MU to the list of MaterializationUnits to be materialized.
+ MUs.push_back(std::move(MU));
+ }
- // Remove any marked symbols from the Unresolved set.
- for (auto &Name : ToRemove)
- Unresolved.erase(Name);
+ // Add the query to the PendingQueries list.
+ assert(SymI->second.isInMaterializationPhase() &&
+ "By this line the symbol should be materializing");
+ auto &MI = MaterializingInfos[Name];
+ MI.addQuery(Q);
+ Q->addQueryDependence(*this, Name);
+ return true;
+ });
return QueryComplete;
}
@@ -1642,11 +1680,7 @@ void JITDylib::dump(raw_ostream &OS) {
ES.runSessionLocked([&, this]() {
OS << "JITDylib \"" << JITDylibName << "\" (ES: "
<< format("0x%016" PRIx64, reinterpret_cast<uintptr_t>(&ES)) << "):\n"
- << "Search order: [";
- for (auto &KV : SearchOrder)
- OS << " (\"" << KV.first->getName() << "\", "
- << (KV.second ? "all" : "exported only") << ")";
- OS << " ]\n"
+ << "Search order: " << SearchOrder << "\n"
<< "Symbol table:\n";
for (auto &KV : Symbols) {
@@ -1727,7 +1761,7 @@ JITDylib::MaterializingInfo::takeQueriesMeeting(SymbolState RequiredState) {
JITDylib::JITDylib(ExecutionSession &ES, std::string Name)
: ES(ES), JITDylibName(std::move(Name)) {
- SearchOrder.push_back({this, true});
+ SearchOrder.push_back({this, JITDylibLookupFlags::MatchAllSymbols});
}
Error JITDylib::defineImpl(MaterializationUnit &MU) {
@@ -1820,12 +1854,6 @@ void JITDylib::transferEmittedNodeDependencies(
ExecutionSession::ExecutionSession(std::shared_ptr<SymbolStringPool> SSP)
: SSP(SSP ? std::move(SSP) : std::make_shared<SymbolStringPool>()) {
- // Construct the main dylib.
- JDs.push_back(std::unique_ptr<JITDylib>(new JITDylib(*this, "<main>")));
-}
-
-JITDylib &ExecutionSession::getMainJITDylib() {
- return runSessionLocked([this]() -> JITDylib & { return *JDs.front(); });
}
JITDylib *ExecutionSession::getJITDylibByName(StringRef Name) {
@@ -1837,14 +1865,11 @@ JITDylib *ExecutionSession::getJITDylibByName(StringRef Name) {
});
}
-JITDylib &ExecutionSession::createJITDylib(std::string Name,
- bool AddToMainDylibSearchOrder) {
+JITDylib &ExecutionSession::createJITDylib(std::string Name) {
assert(!getJITDylibByName(Name) && "JITDylib with that name already exists");
return runSessionLocked([&, this]() -> JITDylib & {
JDs.push_back(
std::unique_ptr<JITDylib>(new JITDylib(*this, std::move(Name))));
- if (AddToMainDylibSearchOrder)
- JDs.front()->addToSearchOrder(*JDs.back());
return *JDs.back();
});
}
@@ -1895,7 +1920,7 @@ Expected<SymbolMap> ExecutionSession::legacyLookup(
#endif
auto Query = std::make_shared<AsynchronousSymbolQuery>(
- Names, RequiredState, std::move(NotifyComplete));
+ SymbolLookupSet(Names), RequiredState, std::move(NotifyComplete));
// FIXME: This should be run session locked along with the registration code
// and error reporting below.
SymbolNameSet UnresolvedSymbols = AsyncLookup(Query, std::move(Names));
@@ -1932,8 +1957,9 @@ Expected<SymbolMap> ExecutionSession::legacyLookup(
}
void ExecutionSession::lookup(
- const JITDylibSearchList &SearchOrder, SymbolNameSet Symbols,
- SymbolState RequiredState, SymbolsResolvedCallback NotifyComplete,
+ LookupKind K, const JITDylibSearchOrder &SearchOrder,
+ SymbolLookupSet Symbols, SymbolState RequiredState,
+ SymbolsResolvedCallback NotifyComplete,
RegisterDependenciesFunction RegisterDependencies) {
LLVM_DEBUG({
@@ -1962,14 +1988,24 @@ void ExecutionSession::lookup(
"JITDylibList should not contain duplicate entries");
auto &JD = *KV.first;
- auto MatchNonExported = KV.second;
- if (auto Err = JD.lodgeQuery(Q, Unresolved, MatchNonExported,
- CollectedMUsMap[&JD]))
+ auto JDLookupFlags = KV.second;
+ if (auto Err = JD.lodgeQuery(CollectedMUsMap[&JD], Q, K, JDLookupFlags,
+ Unresolved))
return Err;
}
+ // Strip any weakly referenced symbols that were not found.
+ Unresolved.forEachWithRemoval(
+ [&](const SymbolStringPtr &Name, SymbolLookupFlags Flags) {
+ if (Flags == SymbolLookupFlags::WeaklyReferencedSymbol) {
+ Q->dropSymbol(Name);
+ return true;
+ }
+ return false;
+ });
+
if (!Unresolved.empty())
- return make_error<SymbolsNotFound>(std::move(Unresolved));
+ return make_error<SymbolsNotFound>(Unresolved.getSymbolNames());
return Error::success();
};
@@ -2023,8 +2059,8 @@ void ExecutionSession::lookup(
}
Expected<SymbolMap>
-ExecutionSession::lookup(const JITDylibSearchList &SearchOrder,
- const SymbolNameSet &Symbols,
+ExecutionSession::lookup(const JITDylibSearchOrder &SearchOrder,
+ const SymbolLookupSet &Symbols, LookupKind K,
SymbolState RequiredState,
RegisterDependenciesFunction RegisterDependencies) {
#if LLVM_ENABLE_THREADS
@@ -2056,7 +2092,7 @@ ExecutionSession::lookup(const JITDylibSearchList &SearchOrder,
#endif
// Perform the asynchronous lookup.
- lookup(SearchOrder, Symbols, RequiredState, NotifyComplete,
+ lookup(K, SearchOrder, Symbols, RequiredState, NotifyComplete,
RegisterDependencies);
#if LLVM_ENABLE_THREADS
@@ -2077,12 +2113,12 @@ ExecutionSession::lookup(const JITDylibSearchList &SearchOrder,
}
Expected<JITEvaluatedSymbol>
-ExecutionSession::lookup(const JITDylibSearchList &SearchOrder,
+ExecutionSession::lookup(const JITDylibSearchOrder &SearchOrder,
SymbolStringPtr Name) {
- SymbolNameSet Names({Name});
+ SymbolLookupSet Names({Name});
- if (auto ResultMap = lookup(SearchOrder, std::move(Names), SymbolState::Ready,
- NoDependenciesToRegister)) {
+ if (auto ResultMap = lookup(SearchOrder, std::move(Names), LookupKind::Static,
+ SymbolState::Ready, NoDependenciesToRegister)) {
assert(ResultMap->size() == 1 && "Unexpected number of results");
assert(ResultMap->count(Name) && "Missing result for symbol");
return std::move(ResultMap->begin()->second);
@@ -2093,14 +2129,7 @@ ExecutionSession::lookup(const JITDylibSearchList &SearchOrder,
Expected<JITEvaluatedSymbol>
ExecutionSession::lookup(ArrayRef<JITDylib *> SearchOrder,
SymbolStringPtr Name) {
- SymbolNameSet Names({Name});
-
- JITDylibSearchList FullSearchOrder;
- FullSearchOrder.reserve(SearchOrder.size());
- for (auto *JD : SearchOrder)
- FullSearchOrder.push_back({JD, false});
-
- return lookup(FullSearchOrder, Name);
+ return lookup(makeJITDylibSearchOrder(SearchOrder), Name);
}
Expected<JITEvaluatedSymbol>
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/DebugUtils.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/DebugUtils.cpp
new file mode 100644
index 000000000000..c9e87ff737fc
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/DebugUtils.cpp
@@ -0,0 +1,68 @@
+//===---------- DebugUtils.cpp - Utilities for debugging ORC JITs ---------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/Orc/DebugUtils.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/raw_ostream.h"
+
+#define DEBUG_TYPE "orc"
+
+namespace llvm {
+namespace orc {
+
+DumpObjects::DumpObjects(std::string DumpDir, std::string IdentifierOverride)
+ : DumpDir(std::move(DumpDir)),
+ IdentifierOverride(std::move(IdentifierOverride)) {
+
+ /// Discard any trailing separators.
+ while (!this->DumpDir.empty() &&
+ sys::path::is_separator(this->DumpDir.back()))
+ this->DumpDir.pop_back();
+}
+
+Expected<std::unique_ptr<MemoryBuffer>>
+DumpObjects::operator()(std::unique_ptr<MemoryBuffer> Obj) {
+ size_t Idx = 1;
+
+ std::string DumpPathStem;
+ raw_string_ostream(DumpPathStem)
+ << DumpDir << (DumpDir.empty() ? "" : "/") << getBufferIdentifier(*Obj);
+
+ std::string DumpPath = DumpPathStem + ".o";
+ while (sys::fs::exists(DumpPath)) {
+ DumpPath.clear();
+ raw_string_ostream(DumpPath) << DumpPathStem << "." << (++Idx) << ".o";
+ }
+
+ LLVM_DEBUG({
+ dbgs() << "Dumping object buffer [ " << (const void *)Obj->getBufferStart()
+ << " -- " << (const void *)(Obj->getBufferEnd() - 1) << " ] to "
+ << DumpPath << "\n";
+ });
+
+ std::error_code EC;
+ raw_fd_ostream DumpStream(DumpPath, EC);
+ if (EC)
+ return errorCodeToError(EC);
+ DumpStream.write(Obj->getBufferStart(), Obj->getBufferSize());
+
+ return std::move(Obj);
+}
+
+StringRef DumpObjects::getBufferIdentifier(MemoryBuffer &B) {
+ if (!IdentifierOverride.empty())
+ return IdentifierOverride;
+ StringRef Identifier = B.getBufferIdentifier();
+ Identifier.consume_back(".o");
+ return Identifier;
+}
+
+} // End namespace orc.
+} // End namespace llvm.
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/ExecutionUtils.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/ExecutionUtils.cpp
index 4a886ac0597c..3d97fe9eeab1 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/ExecutionUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/ExecutionUtils.cpp
@@ -19,6 +19,32 @@
namespace llvm {
namespace orc {
+int runAsMain(int (*Main)(int, char *[]), ArrayRef<std::string> Args,
+ Optional<StringRef> ProgramName) {
+ std::vector<std::unique_ptr<char[]>> ArgVStorage;
+ std::vector<char *> ArgV;
+
+ ArgVStorage.reserve(Args.size() + (ProgramName ? 1 : 0));
+ ArgV.reserve(Args.size() + 1 + (ProgramName ? 1 : 0));
+
+ if (ProgramName) {
+ ArgVStorage.push_back(std::make_unique<char[]>(ProgramName->size() + 1));
+ llvm::copy(*ProgramName, &ArgVStorage.back()[0]);
+ ArgVStorage.back()[ProgramName->size()] = '\0';
+ ArgV.push_back(ArgVStorage.back().get());
+ }
+
+ for (auto &Arg : Args) {
+ ArgVStorage.push_back(std::make_unique<char[]>(Arg.size() + 1));
+ llvm::copy(Arg, &ArgVStorage.back()[0]);
+ ArgVStorage.back()[Arg.size()] = '\0';
+ ArgV.push_back(ArgVStorage.back().get());
+ }
+ ArgV.push_back(nullptr);
+
+ return Main(Args.size() + !!ProgramName, ArgV.data());
+}
+
CtorDtorIterator::CtorDtorIterator(const GlobalVariable *GV, bool End)
: InitList(
GV ? dyn_cast_or_null<ConstantArray>(GV->getInitializer()) : nullptr),
@@ -95,7 +121,7 @@ void CtorDtorRunner::add(iterator_range<CtorDtorIterator> CtorDtors) {
JD.getExecutionSession(),
(*CtorDtors.begin()).Func->getParent()->getDataLayout());
- for (const auto &CtorDtor : CtorDtors) {
+ for (auto CtorDtor : CtorDtors) {
assert(CtorDtor.Func && CtorDtor.Func->hasName() &&
"Ctor/Dtor function must be named to be runnable under the JIT");
@@ -118,19 +144,17 @@ void CtorDtorRunner::add(iterator_range<CtorDtorIterator> CtorDtors) {
Error CtorDtorRunner::run() {
using CtorDtorTy = void (*)();
- SymbolNameSet Names;
-
- for (auto &KV : CtorDtorsByPriority) {
- for (auto &Name : KV.second) {
- auto Added = Names.insert(Name).second;
- (void)Added;
- assert(Added && "Ctor/Dtor names clashed");
- }
- }
+ SymbolLookupSet LookupSet;
+ for (auto &KV : CtorDtorsByPriority)
+ for (auto &Name : KV.second)
+ LookupSet.add(Name);
+ assert(!LookupSet.containsDuplicates() &&
+ "Ctor/Dtor list contains duplicates");
auto &ES = JD.getExecutionSession();
- if (auto CtorDtorMap =
- ES.lookup(JITDylibSearchList({{&JD, true}}), std::move(Names))) {
+ if (auto CtorDtorMap = ES.lookup(
+ makeJITDylibSearchOrder(&JD, JITDylibLookupFlags::MatchAllSymbols),
+ std::move(LookupSet))) {
for (auto &KV : CtorDtorsByPriority) {
for (auto &Name : KV.second) {
assert(CtorDtorMap->count(Name) && "No entry for Name");
@@ -190,15 +214,16 @@ DynamicLibrarySearchGenerator::Load(const char *FileName, char GlobalPrefix,
std::move(Lib), GlobalPrefix, std::move(Allow));
}
-Expected<SymbolNameSet>
-DynamicLibrarySearchGenerator::tryToGenerate(JITDylib &JD,
- const SymbolNameSet &Names) {
- orc::SymbolNameSet Added;
+Error DynamicLibrarySearchGenerator::tryToGenerate(
+ LookupKind K, JITDylib &JD, JITDylibLookupFlags JDLookupFlags,
+ const SymbolLookupSet &Symbols) {
orc::SymbolMap NewSymbols;
bool HasGlobalPrefix = (GlobalPrefix != '\0');
- for (auto &Name : Names) {
+ for (auto &KV : Symbols) {
+ auto &Name = KV.first;
+
if ((*Name).empty())
continue;
@@ -211,20 +236,16 @@ DynamicLibrarySearchGenerator::tryToGenerate(JITDylib &JD,
std::string Tmp((*Name).data() + HasGlobalPrefix,
(*Name).size() - HasGlobalPrefix);
if (void *Addr = Dylib.getAddressOfSymbol(Tmp.c_str())) {
- Added.insert(Name);
NewSymbols[Name] = JITEvaluatedSymbol(
static_cast<JITTargetAddress>(reinterpret_cast<uintptr_t>(Addr)),
JITSymbolFlags::Exported);
}
}
- // Add any new symbols to JD. Since the generator is only called for symbols
- // that are not already defined, this will never trigger a duplicate
- // definition error, so we can wrap this call in a 'cantFail'.
- if (!NewSymbols.empty())
- cantFail(JD.define(absoluteSymbols(std::move(NewSymbols))));
+ if (NewSymbols.empty())
+ return Error::success();
- return Added;
+ return JD.define(absoluteSymbols(std::move(NewSymbols)));
}
Expected<std::unique_ptr<StaticLibraryDefinitionGenerator>>
@@ -251,15 +272,24 @@ StaticLibraryDefinitionGenerator::Create(
return std::move(ADG);
}
-Expected<SymbolNameSet>
-StaticLibraryDefinitionGenerator::tryToGenerate(JITDylib &JD,
- const SymbolNameSet &Names) {
+Error StaticLibraryDefinitionGenerator::tryToGenerate(
+ LookupKind K, JITDylib &JD, JITDylibLookupFlags JDLookupFlags,
+ const SymbolLookupSet &Symbols) {
+
+ // Don't materialize symbols from static archives unless this is a static
+ // lookup.
+ if (K != LookupKind::Static)
+ return Error::success();
+
+ // Bail out early if we've already freed the archive.
+ if (!Archive)
+ return Error::success();
DenseSet<std::pair<StringRef, StringRef>> ChildBufferInfos;
- SymbolNameSet NewDefs;
- for (const auto &Name : Names) {
- auto Child = Archive.findSym(*Name);
+ for (const auto &KV : Symbols) {
+ const auto &Name = KV.first;
+ auto Child = Archive->findSym(*Name);
if (!Child)
return Child.takeError();
if (*Child == None)
@@ -269,7 +299,6 @@ StaticLibraryDefinitionGenerator::tryToGenerate(JITDylib &JD,
return ChildBuffer.takeError();
ChildBufferInfos.insert(
{ChildBuffer->getBuffer(), ChildBuffer->getBufferIdentifier()});
- NewDefs.insert(Name);
}
for (auto ChildBufferInfo : ChildBufferInfos) {
@@ -278,31 +307,16 @@ StaticLibraryDefinitionGenerator::tryToGenerate(JITDylib &JD,
if (auto Err =
L.add(JD, MemoryBuffer::getMemBuffer(ChildBufferRef), VModuleKey()))
- return std::move(Err);
-
- --UnrealizedObjects;
+ return Err;
}
- return NewDefs;
+ return Error::success();
}
StaticLibraryDefinitionGenerator::StaticLibraryDefinitionGenerator(
ObjectLayer &L, std::unique_ptr<MemoryBuffer> ArchiveBuffer, Error &Err)
: L(L), ArchiveBuffer(std::move(ArchiveBuffer)),
- Archive(*this->ArchiveBuffer, Err) {
-
- if (Err)
- return;
-
- Error Err2 = Error::success();
- for (auto _ : Archive.children(Err2)) {
- (void)_;
- ++UnrealizedObjects;
- }
-
- // No need to check this: We will leave it to the caller.
- Err = std::move(Err2);
-}
+ Archive(std::make_unique<object::Archive>(*this->ArchiveBuffer, Err)) {}
} // End namespace orc.
} // End namespace llvm.
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp
index 0295db7633dd..1ac9a58aeaef 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp
@@ -101,7 +101,10 @@ JITTargetAddress JITCompileCallbackManager::executeCompileCallback(
Name = I->second;
}
- if (auto Sym = ES.lookup(JITDylibSearchList({{&CallbacksJD, true}}), Name))
+ if (auto Sym =
+ ES.lookup(makeJITDylibSearchOrder(
+ &CallbacksJD, JITDylibLookupFlags::MatchAllSymbols),
+ Name))
return Sym->getAddress();
else {
llvm::dbgs() << "Didn't find callback.\n";
@@ -199,7 +202,7 @@ createLocalIndirectStubsManagerBuilder(const Triple &T) {
return std::make_unique<
orc::LocalIndirectStubsManager<orc::OrcMips64>>();
};
-
+
case Triple::x86_64:
if (T.getOS() == Triple::OSType::Win32) {
return [](){
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/JITTargetMachineBuilder.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/JITTargetMachineBuilder.cpp
index 1d3e6db913e2..114e81e41771 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/JITTargetMachineBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/JITTargetMachineBuilder.cpp
@@ -28,14 +28,12 @@ Expected<JITTargetMachineBuilder> JITTargetMachineBuilder::detectHost() {
// Retrieve host CPU name and sub-target features and add them to builder.
// Relocation model, code model and codegen opt level are kept to default
// values.
- llvm::SubtargetFeatures SubtargetFeatures;
llvm::StringMap<bool> FeatureMap;
llvm::sys::getHostCPUFeatures(FeatureMap);
for (auto &Feature : FeatureMap)
- SubtargetFeatures.AddFeature(Feature.first(), Feature.second);
+ TMBuilder.getFeatures().AddFeature(Feature.first(), Feature.second);
TMBuilder.setCPU(llvm::sys::getHostCPUName());
- TMBuilder.addFeatures(SubtargetFeatures.getFeatures());
return TMBuilder;
}
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp
index a80f78afe80f..54473ab46423 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp
@@ -7,6 +7,8 @@
//===----------------------------------------------------------------------===//
#include "llvm/ExecutionEngine/Orc/LLJIT.h"
+#include "llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h"
+#include "llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h"
#include "llvm/ExecutionEngine/Orc/OrcError.h"
#include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h"
#include "llvm/ExecutionEngine/SectionMemoryManager.h"
@@ -24,6 +26,26 @@ Error LLJITBuilderState::prepareForConstruction() {
return JTMBOrErr.takeError();
}
+ // If the client didn't configure any linker options then auto-configure the
+ // JIT linker.
+ if (!CreateObjectLinkingLayer && JTMB->getCodeModel() == None &&
+ JTMB->getRelocationModel() == None) {
+
+ auto &TT = JTMB->getTargetTriple();
+ if (TT.isOSBinFormatMachO() &&
+ (TT.getArch() == Triple::aarch64 || TT.getArch() == Triple::x86_64)) {
+
+ JTMB->setRelocationModel(Reloc::PIC_);
+ JTMB->setCodeModel(CodeModel::Small);
+ CreateObjectLinkingLayer =
+ [](ExecutionSession &ES,
+ const Triple &) -> std::unique_ptr<ObjectLayer> {
+ return std::make_unique<ObjectLinkingLayer>(
+ ES, std::make_unique<jitlink::InProcessMemoryManager>());
+ };
+ }
+ }
+
return Error::success();
}
@@ -51,12 +73,14 @@ Error LLJIT::addIRModule(JITDylib &JD, ThreadSafeModule TSM) {
Error LLJIT::addObjectFile(JITDylib &JD, std::unique_ptr<MemoryBuffer> Obj) {
assert(Obj && "Can not add null object");
- return ObjLinkingLayer->add(JD, std::move(Obj), ES->allocateVModule());
+ return ObjTransformLayer.add(JD, std::move(Obj), ES->allocateVModule());
}
Expected<JITEvaluatedSymbol> LLJIT::lookupLinkerMangled(JITDylib &JD,
StringRef Name) {
- return ES->lookup(JITDylibSearchList({{&JD, true}}), ES->intern(Name));
+ return ES->lookup(
+ makeJITDylibSearchOrder(&JD, JITDylibLookupFlags::MatchAllSymbols),
+ ES->intern(Name));
}
std::unique_ptr<ObjectLayer>
@@ -103,13 +127,13 @@ LLJIT::createCompileFunction(LLJITBuilderState &S,
LLJIT::LLJIT(LLJITBuilderState &S, Error &Err)
: ES(S.ES ? std::move(S.ES) : std::make_unique<ExecutionSession>()),
- Main(this->ES->getMainJITDylib()), DL(""), CtorRunner(Main),
+ Main(this->ES->createJITDylib("<main>")), DL(""),
+ ObjLinkingLayer(createObjectLinkingLayer(S, *ES)),
+ ObjTransformLayer(*this->ES, *ObjLinkingLayer), CtorRunner(Main),
DtorRunner(Main) {
ErrorAsOutParameter _(&Err);
- ObjLinkingLayer = createObjectLinkingLayer(S, *ES);
-
if (auto DLOrErr = S.JTMB->getDefaultDataLayoutForTarget())
DL = std::move(*DLOrErr);
else {
@@ -124,7 +148,7 @@ LLJIT::LLJIT(LLJITBuilderState &S, Error &Err)
return;
}
CompileLayer = std::make_unique<IRCompileLayer>(
- *ES, *ObjLinkingLayer, std::move(*CompileFunction));
+ *ES, ObjTransformLayer, std::move(*CompileFunction));
}
if (S.NumCompileThreads > 0) {
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp
index 93aabd817d60..aab490feb8ea 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp
@@ -50,8 +50,10 @@ LazyCallThroughManager::callThroughToSymbol(JITTargetAddress TrampolineAddr) {
SourceJD = I->second.first;
SymbolName = I->second.second;
}
- auto LookupResult =
- ES.lookup(JITDylibSearchList({{SourceJD, true}}), SymbolName);
+
+ auto LookupResult = ES.lookup(
+ makeJITDylibSearchOrder(SourceJD, JITDylibLookupFlags::MatchAllSymbols),
+ SymbolName);
if (!LookupResult) {
ES.reportError(LookupResult.takeError());
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Legacy.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Legacy.cpp
index 9f9a6730b2c3..67b804c37287 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Legacy.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Legacy.cpp
@@ -37,7 +37,8 @@ void JITSymbolResolverAdapter::lookup(const LookupSet &Symbols,
};
auto Q = std::make_shared<AsynchronousSymbolQuery>(
- InternedSymbols, SymbolState::Resolved, std::move(OnResolvedWithUnwrap));
+ SymbolLookupSet(InternedSymbols), SymbolState::Resolved,
+ std::move(OnResolvedWithUnwrap));
auto Unresolved = R.lookup(Q, InternedSymbols);
if (Unresolved.empty()) {
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp
index 874decb2ade0..2572b7f4878d 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp
@@ -36,7 +36,7 @@ public:
Layer.ReturnObjectBuffer(std::move(ObjBuffer));
}
- JITLinkMemoryManager &getMemoryManager() override { return Layer.MemMgr; }
+ JITLinkMemoryManager &getMemoryManager() override { return *Layer.MemMgr; }
MemoryBufferRef getObjectBuffer() const override {
return ObjBuffer->getMemBufferRef();
@@ -47,18 +47,28 @@ public:
MR.failMaterialization();
}
- void lookup(const DenseSet<StringRef> &Symbols,
+ void lookup(const LookupMap &Symbols,
std::unique_ptr<JITLinkAsyncLookupContinuation> LC) override {
- JITDylibSearchList SearchOrder;
+ JITDylibSearchOrder SearchOrder;
MR.getTargetJITDylib().withSearchOrderDo(
- [&](const JITDylibSearchList &JDs) { SearchOrder = JDs; });
+ [&](const JITDylibSearchOrder &O) { SearchOrder = O; });
auto &ES = Layer.getExecutionSession();
- SymbolNameSet InternedSymbols;
- for (auto &S : Symbols)
- InternedSymbols.insert(ES.intern(S));
+ SymbolLookupSet LookupSet;
+ for (auto &KV : Symbols) {
+ orc::SymbolLookupFlags LookupFlags;
+ switch (KV.second) {
+ case jitlink::SymbolLookupFlags::RequiredSymbol:
+ LookupFlags = orc::SymbolLookupFlags::RequiredSymbol;
+ break;
+ case jitlink::SymbolLookupFlags::WeaklyReferencedSymbol:
+ LookupFlags = orc::SymbolLookupFlags::WeaklyReferencedSymbol;
+ break;
+ }
+ LookupSet.add(ES.intern(KV.first), LookupFlags);
+ }
// OnResolve -- De-intern the symbols and pass the result to the linker.
auto OnResolve = [this, LookupContinuation = std::move(LC)](
@@ -74,8 +84,9 @@ public:
}
};
- ES.lookup(SearchOrder, std::move(InternedSymbols), SymbolState::Resolved,
- std::move(OnResolve), [this](const SymbolDependenceMap &Deps) {
+ ES.lookup(LookupKind::Static, SearchOrder, std::move(LookupSet),
+ SymbolState::Resolved, std::move(OnResolve),
+ [this](const SymbolDependenceMap &Deps) {
registerDependencies(Deps);
});
}
@@ -317,9 +328,9 @@ private:
ObjectLinkingLayer::Plugin::~Plugin() {}
-ObjectLinkingLayer::ObjectLinkingLayer(ExecutionSession &ES,
- JITLinkMemoryManager &MemMgr)
- : ObjectLayer(ES), MemMgr(MemMgr) {}
+ObjectLinkingLayer::ObjectLinkingLayer(
+ ExecutionSession &ES, std::unique_ptr<JITLinkMemoryManager> MemMgr)
+ : ObjectLayer(ES), MemMgr(std::move(MemMgr)) {}
ObjectLinkingLayer::~ObjectLinkingLayer() {
if (auto Err = removeAllModules())
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/ObjectTransformLayer.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/ObjectTransformLayer.cpp
index 815517321b76..d18eb38a4142 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/ObjectTransformLayer.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/ObjectTransformLayer.cpp
@@ -21,12 +21,18 @@ void ObjectTransformLayer::emit(MaterializationResponsibility R,
std::unique_ptr<MemoryBuffer> O) {
assert(O && "Module must not be null");
- if (auto TransformedObj = Transform(std::move(O)))
- BaseLayer.emit(std::move(R), std::move(*TransformedObj));
- else {
- R.failMaterialization();
- getExecutionSession().reportError(TransformedObj.takeError());
+ // If there is a transform set then apply it.
+ if (Transform) {
+ if (auto TransformedObj = Transform(std::move(O)))
+ O = std::move(*TransformedObj);
+ else {
+ R.failMaterialization();
+ getExecutionSession().reportError(TransformedObj.takeError());
+ return;
+ }
}
+
+ BaseLayer.emit(std::move(R), std::move(O));
}
} // End namespace orc.
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp
index 939cd539d1fb..a92264c0be14 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp
@@ -19,11 +19,11 @@ public:
void lookup(const LookupSet &Symbols, OnResolvedFunction OnResolved) {
auto &ES = MR.getTargetJITDylib().getExecutionSession();
- SymbolNameSet InternedSymbols;
+ SymbolLookupSet InternedSymbols;
// Intern the requested symbols: lookup takes interned strings.
for (auto &S : Symbols)
- InternedSymbols.insert(ES.intern(S));
+ InternedSymbols.add(ES.intern(S));
// Build an OnResolve callback to unwrap the interned strings and pass them
// to the OnResolved callback.
@@ -46,11 +46,12 @@ public:
MR.addDependenciesForAll(Deps);
};
- JITDylibSearchList SearchOrder;
+ JITDylibSearchOrder SearchOrder;
MR.getTargetJITDylib().withSearchOrderDo(
- [&](const JITDylibSearchList &JDs) { SearchOrder = JDs; });
- ES.lookup(SearchOrder, InternedSymbols, SymbolState::Resolved,
- std::move(OnResolvedWithUnwrap), RegisterDependencies);
+ [&](const JITDylibSearchOrder &JDs) { SearchOrder = JDs; });
+ ES.lookup(LookupKind::Static, SearchOrder, InternedSymbols,
+ SymbolState::Resolved, std::move(OnResolvedWithUnwrap),
+ RegisterDependencies);
}
Expected<LookupSet> getResponsibilitySet(const LookupSet &Symbols) {
@@ -77,6 +78,12 @@ RTDyldObjectLinkingLayer::RTDyldObjectLinkingLayer(
ExecutionSession &ES, GetMemoryManagerFunction GetMemoryManager)
: ObjectLayer(ES), GetMemoryManager(GetMemoryManager) {}
+RTDyldObjectLinkingLayer::~RTDyldObjectLinkingLayer() {
+ std::lock_guard<std::mutex> Lock(RTDyldLayerMutex);
+ for (auto &MemMgr : MemMgrs)
+ MemMgr->deregisterEHFrames();
+}
+
void RTDyldObjectLinkingLayer::emit(MaterializationResponsibility R,
std::unique_ptr<MemoryBuffer> O) {
assert(O && "Object must not be null");
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/OrcError.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/OrcError/OrcError.cpp
index e6e9a095319c..5eab246d4b48 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/OrcError.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/OrcError/OrcError.cpp
@@ -14,6 +14,8 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/ManagedStatic.h"
+#include <type_traits>
+
using namespace llvm;
using namespace llvm::orc;
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/RPCUtils.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/OrcError/RPCError.cpp
index 367b3639f841..3cf78fd9f7ba 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/RPCUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/OrcError/RPCError.cpp
@@ -1,4 +1,4 @@
-//===--------------- RPCUtils.cpp - RPCUtils implementation ---------------===//
+//===--------------- RPCError.cpp - RPCERror implementation ---------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,11 +6,16 @@
//
//===----------------------------------------------------------------------===//
//
-// RPCUtils implementation.
+// RPC Error type implmentations.
//
//===----------------------------------------------------------------------===//
-#include "llvm/ExecutionEngine/Orc/RPCUtils.h"
+#include "llvm/ExecutionEngine/Orc/RPC/RPCUtils.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <system_error>
+#include <string>
char llvm::orc::rpc::RPCFatalError::ID = 0;
char llvm::orc::rpc::ConnectionClosed::ID = 0;
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp
index 184388dc4d7a..cc196df3b2fa 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp
@@ -24,6 +24,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/Errno.h"
#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Mutex.h"
#include "llvm/Support/Path.h"
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp
index 27a7690db34f..6e3cd7cd2cfc 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp
@@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "RuntimeDyldCOFF.h"
+#include "Targets/RuntimeDyldCOFFAArch64.h"
#include "Targets/RuntimeDyldCOFFI386.h"
#include "Targets/RuntimeDyldCOFFThumb.h"
#include "Targets/RuntimeDyldCOFFX86_64.h"
@@ -55,6 +56,8 @@ llvm::RuntimeDyldCOFF::create(Triple::ArchType Arch,
return std::make_unique<RuntimeDyldCOFFThumb>(MemMgr, Resolver);
case Triple::x86_64:
return std::make_unique<RuntimeDyldCOFFX86_64>(MemMgr, Resolver);
+ case Triple::aarch64:
+ return std::make_unique<RuntimeDyldCOFFAArch64>(MemMgr, Resolver);
}
}
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp
index b9c5a12e08d8..2ac0586ff324 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp
@@ -667,7 +667,7 @@ private:
ArrayRef<uint8_t> SymbolBytes(SymbolMem.bytes_begin(), SymbolMem.size());
MCDisassembler::DecodeStatus S =
- Dis->getInstruction(Inst, Size, SymbolBytes, 0, nulls(), nulls());
+ Dis->getInstruction(Inst, Size, SymbolBytes, 0, nulls());
return (S == MCDisassembler::Success);
}
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFAArch64.h b/contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFAArch64.h
new file mode 100644
index 000000000000..a94f54f50ac4
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFAArch64.h
@@ -0,0 +1,365 @@
+//===-- RuntimeDyldCOFFAArch64.h --- COFF/AArch64 specific code ---*- C++
+//-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// COFF AArch64 support for MC-JIT runtime dynamic linker.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_EXECUTIONENGINE_RUNTIMEDYLD_TARGETS_RUNTIMEDYLDCOFFAARCH64_H
+#define LLVM_LIB_EXECUTIONENGINE_RUNTIMEDYLD_TARGETS_RUNTIMEDYLDCOFFAARCH64_H
+
+#include "../RuntimeDyldCOFF.h"
+#include "llvm/BinaryFormat/COFF.h"
+#include "llvm/Object/COFF.h"
+#include "llvm/Support/Endian.h"
+
+#define DEBUG_TYPE "dyld"
+
+using namespace llvm::support::endian;
+
+namespace llvm {
+
+// This relocation type is used for handling long branch instruction
+// throught the Stub.
+enum InternalRelocationType : unsigned {
+ INTERNAL_REL_ARM64_LONG_BRANCH26 = 0x111,
+};
+
+static void add16(uint8_t *p, int16_t v) { write16le(p, read16le(p) + v); }
+static void or32le(void *P, int32_t V) { write32le(P, read32le(P) | V); }
+
+static void write32AArch64Imm(uint8_t *T, uint64_t imm, uint32_t rangeLimit) {
+ uint32_t orig = read32le(T);
+ orig &= ~(0xFFF << 10);
+ write32le(T, orig | ((imm & (0xFFF >> rangeLimit)) << 10));
+}
+
+static void write32AArch64Ldr(uint8_t *T, uint64_t imm) {
+ uint32_t orig = read32le(T);
+ uint32_t size = orig >> 30;
+ // 0x04000000 indicates SIMD/FP registers
+ // 0x00800000 indicates 128 bit
+ if ((orig & 0x04800000) == 0x04800000)
+ size += 4;
+ if ((imm & ((1 << size) - 1)) != 0)
+ assert(0 && "misaligned ldr/str offset");
+ write32AArch64Imm(T, imm >> size, size);
+}
+
+static void write32AArch64Addr(void *T, uint64_t s, uint64_t p, int shift) {
+ uint64_t Imm = (s >> shift) - (p >> shift);
+ uint32_t ImmLo = (Imm & 0x3) << 29;
+ uint32_t ImmHi = (Imm & 0x1FFFFC) << 3;
+ uint64_t Mask = (0x3 << 29) | (0x1FFFFC << 3);
+ write32le(T, (read32le(T) & ~Mask) | ImmLo | ImmHi);
+}
+
+class RuntimeDyldCOFFAArch64 : public RuntimeDyldCOFF {
+
+private:
+ // When a module is loaded we save the SectionID of the unwind
+ // sections in a table until we receive a request to register all
+ // unregisteredEH frame sections with the memory manager.
+ SmallVector<SID, 2> UnregisteredEHFrameSections;
+ SmallVector<SID, 2> RegisteredEHFrameSections;
+ uint64_t ImageBase;
+
+ // Fake an __ImageBase pointer by returning the section with the lowest adress
+ uint64_t getImageBase() {
+ if (!ImageBase) {
+ ImageBase = std::numeric_limits<uint64_t>::max();
+ for (const SectionEntry &Section : Sections)
+ // The Sections list may contain sections that weren't loaded for
+ // whatever reason: they may be debug sections, and ProcessAllSections
+ // is false, or they may be sections that contain 0 bytes. If the
+ // section isn't loaded, the load address will be 0, and it should not
+ // be included in the ImageBase calculation.
+ if (Section.getLoadAddress() != 0)
+ ImageBase = std::min(ImageBase, Section.getLoadAddress());
+ }
+ return ImageBase;
+ }
+
+public:
+ RuntimeDyldCOFFAArch64(RuntimeDyld::MemoryManager &MM,
+ JITSymbolResolver &Resolver)
+ : RuntimeDyldCOFF(MM, Resolver), ImageBase(0) {}
+
+ unsigned getStubAlignment() override { return 8; }
+
+ unsigned getMaxStubSize() const override { return 20; }
+
+ std::tuple<uint64_t, uint64_t, uint64_t>
+ generateRelocationStub(unsigned SectionID, StringRef TargetName,
+ uint64_t Offset, uint64_t RelType, uint64_t Addend,
+ StubMap &Stubs) {
+ uintptr_t StubOffset;
+ SectionEntry &Section = Sections[SectionID];
+
+ RelocationValueRef OriginalRelValueRef;
+ OriginalRelValueRef.SectionID = SectionID;
+ OriginalRelValueRef.Offset = Offset;
+ OriginalRelValueRef.Addend = Addend;
+ OriginalRelValueRef.SymbolName = TargetName.data();
+
+ auto Stub = Stubs.find(OriginalRelValueRef);
+ if (Stub == Stubs.end()) {
+ LLVM_DEBUG(dbgs() << " Create a new stub function for "
+ << TargetName.data() << "\n");
+
+ StubOffset = Section.getStubOffset();
+ Stubs[OriginalRelValueRef] = StubOffset;
+ createStubFunction(Section.getAddressWithOffset(StubOffset));
+ Section.advanceStubOffset(getMaxStubSize());
+ } else {
+ LLVM_DEBUG(dbgs() << " Stub function found for " << TargetName.data()
+ << "\n");
+ StubOffset = Stub->second;
+ }
+
+ // Resolve original relocation to stub function.
+ const RelocationEntry RE(SectionID, Offset, RelType, Addend);
+ resolveRelocation(RE, Section.getLoadAddressWithOffset(StubOffset));
+
+ // adjust relocation info so resolution writes to the stub function
+ // Here an internal relocation type is used for resolving long branch via
+ // stub instruction.
+ Addend = 0;
+ Offset = StubOffset;
+ RelType = INTERNAL_REL_ARM64_LONG_BRANCH26;
+
+ return std::make_tuple(Offset, RelType, Addend);
+ }
+
+ Expected<object::relocation_iterator>
+ processRelocationRef(unsigned SectionID, object::relocation_iterator RelI,
+ const object::ObjectFile &Obj,
+ ObjSectionToIDMap &ObjSectionToID,
+ StubMap &Stubs) override {
+
+ auto Symbol = RelI->getSymbol();
+ if (Symbol == Obj.symbol_end())
+ report_fatal_error("Unknown symbol in relocation");
+
+ Expected<StringRef> TargetNameOrErr = Symbol->getName();
+ if (!TargetNameOrErr)
+ return TargetNameOrErr.takeError();
+ StringRef TargetName = *TargetNameOrErr;
+
+ auto SectionOrErr = Symbol->getSection();
+ if (!SectionOrErr)
+ return SectionOrErr.takeError();
+ auto Section = *SectionOrErr;
+
+ uint64_t RelType = RelI->getType();
+ uint64_t Offset = RelI->getOffset();
+
+ // If there is no section, this must be an external reference.
+ const bool IsExtern = Section == Obj.section_end();
+
+ // Determine the Addend used to adjust the relocation value.
+ uint64_t Addend = 0;
+ SectionEntry &AddendSection = Sections[SectionID];
+ uintptr_t ObjTarget = AddendSection.getObjAddress() + Offset;
+ uint8_t *Displacement = (uint8_t *)ObjTarget;
+
+ switch (RelType) {
+ case COFF::IMAGE_REL_ARM64_ADDR32:
+ case COFF::IMAGE_REL_ARM64_ADDR32NB:
+ case COFF::IMAGE_REL_ARM64_REL32:
+ case COFF::IMAGE_REL_ARM64_SECREL:
+ Addend = read32le(Displacement);
+ break;
+ case COFF::IMAGE_REL_ARM64_BRANCH26: {
+ uint32_t orig = read32le(Displacement);
+ Addend = (orig & 0x03FFFFFF) << 2;
+
+ if (IsExtern)
+ std::tie(Offset, RelType, Addend) = generateRelocationStub(
+ SectionID, TargetName, Offset, RelType, Addend, Stubs);
+ break;
+ }
+ case COFF::IMAGE_REL_ARM64_BRANCH19: {
+ uint32_t orig = read32le(Displacement);
+ Addend = (orig & 0x00FFFFE0) >> 3;
+ break;
+ }
+ case COFF::IMAGE_REL_ARM64_BRANCH14: {
+ uint32_t orig = read32le(Displacement);
+ Addend = (orig & 0x000FFFE0) >> 3;
+ break;
+ }
+ case COFF::IMAGE_REL_ARM64_REL21:
+ case COFF::IMAGE_REL_ARM64_PAGEBASE_REL21: {
+ uint32_t orig = read32le(Displacement);
+ Addend = ((orig >> 29) & 0x3) | ((orig >> 3) & 0x1FFFFC);
+ break;
+ }
+ case COFF::IMAGE_REL_ARM64_PAGEOFFSET_12L:
+ case COFF::IMAGE_REL_ARM64_PAGEOFFSET_12A: {
+ uint32_t orig = read32le(Displacement);
+ Addend = ((orig >> 10) & 0xFFF);
+ break;
+ }
+ case COFF::IMAGE_REL_ARM64_ADDR64: {
+ Addend = read64le(Displacement);
+ break;
+ }
+ default:
+ break;
+ }
+
+#if !defined(NDEBUG)
+ SmallString<32> RelTypeName;
+ RelI->getTypeName(RelTypeName);
+
+ LLVM_DEBUG(dbgs() << "\t\tIn Section " << SectionID << " Offset " << Offset
+ << " RelType: " << RelTypeName << " TargetName: "
+ << TargetName << " Addend " << Addend << "\n");
+#endif
+
+ unsigned TargetSectionID = -1;
+ if (IsExtern) {
+ RelocationEntry RE(SectionID, Offset, RelType, Addend);
+ addRelocationForSymbol(RE, TargetName);
+ } else {
+ if (auto TargetSectionIDOrErr = findOrEmitSection(
+ Obj, *Section, Section->isText(), ObjSectionToID)) {
+ TargetSectionID = *TargetSectionIDOrErr;
+ } else
+ return TargetSectionIDOrErr.takeError();
+
+ uint64_t TargetOffset = getSymbolOffset(*Symbol);
+ RelocationEntry RE(SectionID, Offset, RelType, TargetOffset + Addend);
+ addRelocationForSection(RE, TargetSectionID);
+ }
+ return ++RelI;
+ }
+
+ void resolveRelocation(const RelocationEntry &RE, uint64_t Value) override {
+ const auto Section = Sections[RE.SectionID];
+ uint8_t *Target = Section.getAddressWithOffset(RE.Offset);
+ uint64_t FinalAddress = Section.getLoadAddressWithOffset(RE.Offset);
+
+ switch (RE.RelType) {
+ default:
+ llvm_unreachable("unsupported relocation type");
+ case COFF::IMAGE_REL_ARM64_ABSOLUTE: {
+ // This relocation is ignored.
+ break;
+ }
+ case COFF::IMAGE_REL_ARM64_PAGEBASE_REL21: {
+ // The page base of the target, for ADRP instruction.
+ Value += RE.Addend;
+ write32AArch64Addr(Target, Value, FinalAddress, 12);
+ break;
+ }
+ case COFF::IMAGE_REL_ARM64_REL21: {
+ // The 12-bit relative displacement to the target, for instruction ADR
+ Value += RE.Addend;
+ write32AArch64Addr(Target, Value, FinalAddress, 0);
+ break;
+ }
+ case COFF::IMAGE_REL_ARM64_PAGEOFFSET_12A: {
+ // The 12-bit page offset of the target,
+ // for instructions ADD/ADDS (immediate) with zero shift.
+ Value += RE.Addend;
+ write32AArch64Imm(Target, Value & 0xFFF, 0);
+ break;
+ }
+ case COFF::IMAGE_REL_ARM64_PAGEOFFSET_12L: {
+ // The 12-bit page offset of the target,
+ // for instruction LDR (indexed, unsigned immediate).
+ Value += RE.Addend;
+ write32AArch64Ldr(Target, Value & 0xFFF);
+ break;
+ }
+ case COFF::IMAGE_REL_ARM64_ADDR32: {
+ // The 32-bit VA of the target.
+ uint32_t VA = Value + RE.Addend;
+ write32le(Target, VA);
+ break;
+ }
+ case COFF::IMAGE_REL_ARM64_ADDR32NB: {
+ // The target's 32-bit RVA.
+ uint64_t RVA = Value + RE.Addend - getImageBase();
+ write32le(Target, RVA);
+ break;
+ }
+ case INTERNAL_REL_ARM64_LONG_BRANCH26: {
+ // Encode the immadiate value for generated Stub instruction (MOVZ)
+ or32le(Target + 12, ((Value + RE.Addend) & 0xFFFF) << 5);
+ or32le(Target + 8, ((Value + RE.Addend) & 0xFFFF0000) >> 11);
+ or32le(Target + 4, ((Value + RE.Addend) & 0xFFFF00000000) >> 27);
+ or32le(Target + 0, ((Value + RE.Addend) & 0xFFFF000000000000) >> 43);
+ break;
+ }
+ case COFF::IMAGE_REL_ARM64_BRANCH26: {
+ // The 26-bit relative displacement to the target, for B and BL
+ // instructions.
+ uint64_t PCRelVal = Value + RE.Addend - FinalAddress;
+ assert(isInt<28>(PCRelVal) && "Branch target is out of range.");
+ write32le(Target, (read32le(Target) & ~(0x03FFFFFF)) |
+ (PCRelVal & 0x0FFFFFFC) >> 2);
+ break;
+ }
+ case COFF::IMAGE_REL_ARM64_BRANCH19: {
+ // The 19-bit offset to the relocation target,
+ // for conditional B instruction.
+ uint64_t PCRelVal = Value + RE.Addend - FinalAddress;
+ assert(isInt<21>(PCRelVal) && "Branch target is out of range.");
+ write32le(Target, (read32le(Target) & ~(0x00FFFFE0)) |
+ (PCRelVal & 0x001FFFFC) << 3);
+ break;
+ }
+ case COFF::IMAGE_REL_ARM64_BRANCH14: {
+ // The 14-bit offset to the relocation target,
+ // for instructions TBZ and TBNZ.
+ uint64_t PCRelVal = Value + RE.Addend - FinalAddress;
+ assert(isInt<16>(PCRelVal) && "Branch target is out of range.");
+ write32le(Target, (read32le(Target) & ~(0x000FFFE0)) |
+ (PCRelVal & 0x0000FFFC) << 3);
+ break;
+ }
+ case COFF::IMAGE_REL_ARM64_ADDR64: {
+ // The 64-bit VA of the relocation target.
+ write64le(Target, Value + RE.Addend);
+ break;
+ }
+ case COFF::IMAGE_REL_ARM64_SECTION: {
+ // 16-bit section index of the section that contains the target.
+ assert(static_cast<uint32_t>(RE.SectionID) <= UINT16_MAX &&
+ "relocation overflow");
+ add16(Target, RE.SectionID);
+ break;
+ }
+ case COFF::IMAGE_REL_ARM64_SECREL: {
+ // 32-bit offset of the target from the beginning of its section.
+ assert(static_cast<int64_t>(RE.Addend) <= INT32_MAX &&
+ "Relocation overflow");
+ assert(static_cast<int64_t>(RE.Addend) >= INT32_MIN &&
+ "Relocation underflow");
+ write32le(Target, RE.Addend);
+ break;
+ }
+ case COFF::IMAGE_REL_ARM64_REL32: {
+ // The 32-bit relative address from the byte following the relocation.
+ uint64_t Result = Value - FinalAddress - 4;
+ write32le(Target, Result + RE.Addend);
+ break;
+ }
+ }
+ }
+
+ void registerEHFrames() override {}
+};
+
+} // End namespace llvm
+
+#endif
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/TargetSelect.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/TargetSelect.cpp
index 0d9c6cfa0908..28ea04be1a5e 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/TargetSelect.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/TargetSelect.cpp
@@ -83,13 +83,6 @@ TargetMachine *EngineBuilder::selectTarget(const Triple &TargetTriple,
FeaturesStr = Features.getString();
}
- // FIXME: non-iOS ARM FastISel is broken with MCJIT.
- if (TheTriple.getArch() == Triple::arm &&
- !TheTriple.isiOS() &&
- OptLevel == CodeGenOpt::None) {
- OptLevel = CodeGenOpt::Less;
- }
-
// Allocate a target...
TargetMachine *Target =
TheTarget->createTargetMachine(TheTriple.getTriple(), MCPU, FeaturesStr,
diff --git a/contrib/llvm-project/llvm/lib/Frontend/OpenMP/OMPConstants.cpp b/contrib/llvm-project/llvm/lib/Frontend/OpenMP/OMPConstants.cpp
new file mode 100644
index 000000000000..ec0733903e99
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Frontend/OpenMP/OMPConstants.cpp
@@ -0,0 +1,87 @@
+//===- OMPConstants.cpp - Helpers related to OpenMP code generation ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Frontend/OpenMP/OMPConstants.h"
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+
+using namespace llvm;
+using namespace omp;
+using namespace types;
+
+Directive llvm::omp::getOpenMPDirectiveKind(StringRef Str) {
+ return llvm::StringSwitch<Directive>(Str)
+#define OMP_DIRECTIVE(Enum, Str) .Case(Str, Enum)
+#include "llvm/Frontend/OpenMP/OMPKinds.def"
+ .Default(OMPD_unknown);
+}
+
+StringRef llvm::omp::getOpenMPDirectiveName(Directive Kind) {
+ switch (Kind) {
+#define OMP_DIRECTIVE(Enum, Str) \
+ case Enum: \
+ return Str;
+#include "llvm/Frontend/OpenMP/OMPKinds.def"
+ }
+ llvm_unreachable("Invalid OpenMP directive kind");
+}
+
+/// Declarations for LLVM-IR types (simple, function and structure) are
+/// generated below. Their names are defined and used in OpenMPKinds.def. Here
+/// we provide the declarations, the initializeTypes function will provide the
+/// values.
+///
+///{
+
+#define OMP_TYPE(VarName, InitValue) Type *llvm::omp::types::VarName = nullptr;
+#define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...) \
+ FunctionType *llvm::omp::types::VarName = nullptr; \
+ PointerType *llvm::omp::types::VarName##Ptr = nullptr;
+#define OMP_STRUCT_TYPE(VarName, StrName, ...) \
+ StructType *llvm::omp::types::VarName = nullptr; \
+ PointerType *llvm::omp::types::VarName##Ptr = nullptr;
+#include "llvm/Frontend/OpenMP/OMPKinds.def"
+
+///}
+
+void llvm::omp::types::initializeTypes(Module &M) {
+ if (Void)
+ return;
+
+ LLVMContext &Ctx = M.getContext();
+ // Create all simple and struct types exposed by the runtime and remember
+ // the llvm::PointerTypes of them for easy access later.
+ StructType *T;
+#define OMP_TYPE(VarName, InitValue) VarName = InitValue;
+#define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...) \
+ VarName = FunctionType::get(ReturnType, {__VA_ARGS__}, IsVarArg); \
+ VarName##Ptr = PointerType::getUnqual(VarName);
+#define OMP_STRUCT_TYPE(VarName, StructName, ...) \
+ T = M.getTypeByName(StructName); \
+ if (!T) \
+ T = StructType::create(Ctx, {__VA_ARGS__}, StructName); \
+ VarName = T; \
+ VarName##Ptr = PointerType::getUnqual(T);
+#include "llvm/Frontend/OpenMP/OMPKinds.def"
+}
+
+void llvm::omp::types::uninitializeTypes() {
+#define OMP_TYPE(VarName, InitValue) VarName = nullptr;
+#define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...) \
+ VarName = nullptr; \
+ VarName##Ptr = nullptr;
+#define OMP_STRUCT_TYPE(VarName, StrName, ...) \
+ VarName = nullptr; \
+ VarName##Ptr = nullptr;
+#include "llvm/Frontend/OpenMP/OMPKinds.def"
+}
diff --git a/contrib/llvm-project/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/contrib/llvm-project/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
new file mode 100644
index 000000000000..739c2998baa8
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -0,0 +1,632 @@
+//===- OpenMPIRBuilder.cpp - Builder for LLVM-IR for OpenMP directives ----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file implements the OpenMPIRBuilder class, which is used as a
+/// convenient way to create LLVM instructions for OpenMP directives.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/CodeExtractor.h"
+
+#include <sstream>
+
+#define DEBUG_TYPE "openmp-ir-builder"
+
+using namespace llvm;
+using namespace omp;
+using namespace types;
+
+static cl::opt<bool>
+ OptimisticAttributes("openmp-ir-builder-optimistic-attributes", cl::Hidden,
+ cl::desc("Use optimistic attributes describing "
+ "'as-if' properties of runtime calls."),
+ cl::init(false));
+
+void OpenMPIRBuilder::addAttributes(omp::RuntimeFunction FnID, Function &Fn) {
+ LLVMContext &Ctx = Fn.getContext();
+
+#define OMP_ATTRS_SET(VarName, AttrSet) AttributeSet VarName = AttrSet;
+#include "llvm/Frontend/OpenMP/OMPKinds.def"
+
+ // Add attributes to the new declaration.
+ switch (FnID) {
+#define OMP_RTL_ATTRS(Enum, FnAttrSet, RetAttrSet, ArgAttrSets) \
+ case Enum: \
+ Fn.setAttributes( \
+ AttributeList::get(Ctx, FnAttrSet, RetAttrSet, ArgAttrSets)); \
+ break;
+#include "llvm/Frontend/OpenMP/OMPKinds.def"
+ default:
+ // Attributes are optional.
+ break;
+ }
+}
+
+Function *OpenMPIRBuilder::getOrCreateRuntimeFunction(RuntimeFunction FnID) {
+ Function *Fn = nullptr;
+
+ // Try to find the declation in the module first.
+ switch (FnID) {
+#define OMP_RTL(Enum, Str, IsVarArg, ReturnType, ...) \
+ case Enum: \
+ Fn = M.getFunction(Str); \
+ break;
+#include "llvm/Frontend/OpenMP/OMPKinds.def"
+ }
+
+ if (!Fn) {
+ // Create a new declaration if we need one.
+ switch (FnID) {
+#define OMP_RTL(Enum, Str, IsVarArg, ReturnType, ...) \
+ case Enum: \
+ Fn = Function::Create(FunctionType::get(ReturnType, \
+ ArrayRef<Type *>{__VA_ARGS__}, \
+ IsVarArg), \
+ GlobalValue::ExternalLinkage, Str, M); \
+ break;
+#include "llvm/Frontend/OpenMP/OMPKinds.def"
+ }
+
+ addAttributes(FnID, *Fn);
+ }
+
+ assert(Fn && "Failed to create OpenMP runtime function");
+ return Fn;
+}
+
+void OpenMPIRBuilder::initialize() { initializeTypes(M); }
+
+Value *OpenMPIRBuilder::getOrCreateIdent(Constant *SrcLocStr,
+ IdentFlag LocFlags) {
+ // Enable "C-mode".
+ LocFlags |= OMP_IDENT_FLAG_KMPC;
+
+ GlobalVariable *&DefaultIdent = IdentMap[{SrcLocStr, uint64_t(LocFlags)}];
+ if (!DefaultIdent) {
+ Constant *I32Null = ConstantInt::getNullValue(Int32);
+ Constant *IdentData[] = {I32Null,
+ ConstantInt::get(Int32, uint64_t(LocFlags)),
+ I32Null, I32Null, SrcLocStr};
+ Constant *Initializer = ConstantStruct::get(
+ cast<StructType>(IdentPtr->getPointerElementType()), IdentData);
+
+ // Look for existing encoding of the location + flags, not needed but
+ // minimizes the difference to the existing solution while we transition.
+ for (GlobalVariable &GV : M.getGlobalList())
+ if (GV.getType() == IdentPtr && GV.hasInitializer())
+ if (GV.getInitializer() == Initializer)
+ return DefaultIdent = &GV;
+
+ DefaultIdent = new GlobalVariable(M, IdentPtr->getPointerElementType(),
+ /* isConstant = */ false,
+ GlobalValue::PrivateLinkage, Initializer);
+ DefaultIdent->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
+ DefaultIdent->setAlignment(Align(8));
+ }
+ return DefaultIdent;
+}
+
+Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(StringRef LocStr) {
+ Constant *&SrcLocStr = SrcLocStrMap[LocStr];
+ if (!SrcLocStr) {
+ Constant *Initializer =
+ ConstantDataArray::getString(M.getContext(), LocStr);
+
+ // Look for existing encoding of the location, not needed but minimizes the
+ // difference to the existing solution while we transition.
+ for (GlobalVariable &GV : M.getGlobalList())
+ if (GV.isConstant() && GV.hasInitializer() &&
+ GV.getInitializer() == Initializer)
+ return SrcLocStr = ConstantExpr::getPointerCast(&GV, Int8Ptr);
+
+ SrcLocStr = Builder.CreateGlobalStringPtr(LocStr);
+ }
+ return SrcLocStr;
+}
+
+Constant *OpenMPIRBuilder::getOrCreateDefaultSrcLocStr() {
+ return getOrCreateSrcLocStr(";unknown;unknown;0;0;;");
+}
+
+Constant *
+OpenMPIRBuilder::getOrCreateSrcLocStr(const LocationDescription &Loc) {
+ DILocation *DIL = Loc.DL.get();
+ if (!DIL)
+ return getOrCreateDefaultSrcLocStr();
+ StringRef Filename =
+ !DIL->getFilename().empty() ? DIL->getFilename() : M.getName();
+ StringRef Function = DIL->getScope()->getSubprogram()->getName();
+ Function =
+ !Function.empty() ? Function : Loc.IP.getBlock()->getParent()->getName();
+ std::string LineStr = std::to_string(DIL->getLine());
+ std::string ColumnStr = std::to_string(DIL->getColumn());
+ std::stringstream SrcLocStr;
+ SrcLocStr << ";" << Filename.data() << ";" << Function.data() << ";"
+ << LineStr << ";" << ColumnStr << ";;";
+ return getOrCreateSrcLocStr(SrcLocStr.str());
+}
+
+Value *OpenMPIRBuilder::getOrCreateThreadID(Value *Ident) {
+ return Builder.CreateCall(
+ getOrCreateRuntimeFunction(OMPRTL___kmpc_global_thread_num), Ident,
+ "omp_global_thread_num");
+}
+
+OpenMPIRBuilder::InsertPointTy
+OpenMPIRBuilder::CreateBarrier(const LocationDescription &Loc, Directive DK,
+ bool ForceSimpleCall, bool CheckCancelFlag) {
+ if (!updateToLocation(Loc))
+ return Loc.IP;
+ return emitBarrierImpl(Loc, DK, ForceSimpleCall, CheckCancelFlag);
+}
+
+OpenMPIRBuilder::InsertPointTy
+OpenMPIRBuilder::emitBarrierImpl(const LocationDescription &Loc, Directive Kind,
+ bool ForceSimpleCall, bool CheckCancelFlag) {
+ // Build call __kmpc_cancel_barrier(loc, thread_id) or
+ // __kmpc_barrier(loc, thread_id);
+
+ IdentFlag BarrierLocFlags;
+ switch (Kind) {
+ case OMPD_for:
+ BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_FOR;
+ break;
+ case OMPD_sections:
+ BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SECTIONS;
+ break;
+ case OMPD_single:
+ BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SINGLE;
+ break;
+ case OMPD_barrier:
+ BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_EXPL;
+ break;
+ default:
+ BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL;
+ break;
+ }
+
+ Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
+ Value *Args[] = {getOrCreateIdent(SrcLocStr, BarrierLocFlags),
+ getOrCreateThreadID(getOrCreateIdent(SrcLocStr))};
+
+ // If we are in a cancellable parallel region, barriers are cancellation
+ // points.
+ // TODO: Check why we would force simple calls or to ignore the cancel flag.
+ bool UseCancelBarrier =
+ !ForceSimpleCall && isLastFinalizationInfoCancellable(OMPD_parallel);
+
+ Value *Result = Builder.CreateCall(
+ getOrCreateRuntimeFunction(UseCancelBarrier ? OMPRTL___kmpc_cancel_barrier
+ : OMPRTL___kmpc_barrier),
+ Args);
+
+ if (UseCancelBarrier && CheckCancelFlag)
+ emitCancelationCheckImpl(Result, OMPD_parallel);
+
+ return Builder.saveIP();
+}
+
+OpenMPIRBuilder::InsertPointTy
+OpenMPIRBuilder::CreateCancel(const LocationDescription &Loc,
+ Value *IfCondition,
+ omp::Directive CanceledDirective) {
+ if (!updateToLocation(Loc))
+ return Loc.IP;
+
+ // LLVM utilities like blocks with terminators.
+ auto *UI = Builder.CreateUnreachable();
+
+ Instruction *ThenTI = UI, *ElseTI = nullptr;
+ if (IfCondition)
+ SplitBlockAndInsertIfThenElse(IfCondition, UI, &ThenTI, &ElseTI);
+ Builder.SetInsertPoint(ThenTI);
+
+ Value *CancelKind = nullptr;
+ switch (CanceledDirective) {
+#define OMP_CANCEL_KIND(Enum, Str, DirectiveEnum, Value) \
+ case DirectiveEnum: \
+ CancelKind = Builder.getInt32(Value); \
+ break;
+#include "llvm/Frontend/OpenMP/OMPKinds.def"
+ default:
+ llvm_unreachable("Unknown cancel kind!");
+ }
+
+ Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
+ Value *Ident = getOrCreateIdent(SrcLocStr);
+ Value *Args[] = {Ident, getOrCreateThreadID(Ident), CancelKind};
+ Value *Result = Builder.CreateCall(
+ getOrCreateRuntimeFunction(OMPRTL___kmpc_cancel), Args);
+
+ // The actual cancel logic is shared with others, e.g., cancel_barriers.
+ emitCancelationCheckImpl(Result, CanceledDirective);
+
+ // Update the insertion point and remove the terminator we introduced.
+ Builder.SetInsertPoint(UI->getParent());
+ UI->eraseFromParent();
+
+ return Builder.saveIP();
+}
+
+void OpenMPIRBuilder::emitCancelationCheckImpl(
+ Value *CancelFlag, omp::Directive CanceledDirective) {
+ assert(isLastFinalizationInfoCancellable(CanceledDirective) &&
+ "Unexpected cancellation!");
+
+ // For a cancel barrier we create two new blocks.
+ BasicBlock *BB = Builder.GetInsertBlock();
+ BasicBlock *NonCancellationBlock;
+ if (Builder.GetInsertPoint() == BB->end()) {
+ // TODO: This branch will not be needed once we moved to the
+ // OpenMPIRBuilder codegen completely.
+ NonCancellationBlock = BasicBlock::Create(
+ BB->getContext(), BB->getName() + ".cont", BB->getParent());
+ } else {
+ NonCancellationBlock = SplitBlock(BB, &*Builder.GetInsertPoint());
+ BB->getTerminator()->eraseFromParent();
+ Builder.SetInsertPoint(BB);
+ }
+ BasicBlock *CancellationBlock = BasicBlock::Create(
+ BB->getContext(), BB->getName() + ".cncl", BB->getParent());
+
+ // Jump to them based on the return value.
+ Value *Cmp = Builder.CreateIsNull(CancelFlag);
+ Builder.CreateCondBr(Cmp, NonCancellationBlock, CancellationBlock,
+ /* TODO weight */ nullptr, nullptr);
+
+ // From the cancellation block we finalize all variables and go to the
+ // post finalization block that is known to the FiniCB callback.
+ Builder.SetInsertPoint(CancellationBlock);
+ auto &FI = FinalizationStack.back();
+ FI.FiniCB(Builder.saveIP());
+
+ // The continuation block is where code generation continues.
+ Builder.SetInsertPoint(NonCancellationBlock, NonCancellationBlock->begin());
+}
+
+IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel(
+ const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB,
+ PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, Value *IfCondition,
+ Value *NumThreads, omp::ProcBindKind ProcBind, bool IsCancellable) {
+ if (!updateToLocation(Loc))
+ return Loc.IP;
+
+ Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
+ Value *Ident = getOrCreateIdent(SrcLocStr);
+ Value *ThreadID = getOrCreateThreadID(Ident);
+
+ if (NumThreads) {
+ // Build call __kmpc_push_num_threads(&Ident, global_tid, num_threads)
+ Value *Args[] = {
+ Ident, ThreadID,
+ Builder.CreateIntCast(NumThreads, Int32, /*isSigned*/ false)};
+ Builder.CreateCall(
+ getOrCreateRuntimeFunction(OMPRTL___kmpc_push_num_threads), Args);
+ }
+
+ if (ProcBind != OMP_PROC_BIND_default) {
+ // Build call __kmpc_push_proc_bind(&Ident, global_tid, proc_bind)
+ Value *Args[] = {
+ Ident, ThreadID,
+ ConstantInt::get(Int32, unsigned(ProcBind), /*isSigned=*/true)};
+ Builder.CreateCall(getOrCreateRuntimeFunction(OMPRTL___kmpc_push_proc_bind),
+ Args);
+ }
+
+ BasicBlock *InsertBB = Builder.GetInsertBlock();
+ Function *OuterFn = InsertBB->getParent();
+
+ // Vector to remember instructions we used only during the modeling but which
+ // we want to delete at the end.
+ SmallVector<Instruction *, 4> ToBeDeleted;
+
+ Builder.SetInsertPoint(OuterFn->getEntryBlock().getFirstNonPHI());
+ AllocaInst *TIDAddr = Builder.CreateAlloca(Int32, nullptr, "tid.addr");
+ AllocaInst *ZeroAddr = Builder.CreateAlloca(Int32, nullptr, "zero.addr");
+
+ // If there is an if condition we actually use the TIDAddr and ZeroAddr in the
+ // program, otherwise we only need them for modeling purposes to get the
+ // associated arguments in the outlined function. In the former case,
+ // initialize the allocas properly, in the latter case, delete them later.
+ if (IfCondition) {
+ Builder.CreateStore(Constant::getNullValue(Int32), TIDAddr);
+ Builder.CreateStore(Constant::getNullValue(Int32), ZeroAddr);
+ } else {
+ ToBeDeleted.push_back(TIDAddr);
+ ToBeDeleted.push_back(ZeroAddr);
+ }
+
+ // Create an artificial insertion point that will also ensure the blocks we
+ // are about to split are not degenerated.
+ auto *UI = new UnreachableInst(Builder.getContext(), InsertBB);
+
+ Instruction *ThenTI = UI, *ElseTI = nullptr;
+ if (IfCondition)
+ SplitBlockAndInsertIfThenElse(IfCondition, UI, &ThenTI, &ElseTI);
+
+ BasicBlock *ThenBB = ThenTI->getParent();
+ BasicBlock *PRegEntryBB = ThenBB->splitBasicBlock(ThenTI, "omp.par.entry");
+ BasicBlock *PRegBodyBB =
+ PRegEntryBB->splitBasicBlock(ThenTI, "omp.par.region");
+ BasicBlock *PRegPreFiniBB =
+ PRegBodyBB->splitBasicBlock(ThenTI, "omp.par.pre_finalize");
+ BasicBlock *PRegExitBB =
+ PRegPreFiniBB->splitBasicBlock(ThenTI, "omp.par.exit");
+
+ auto FiniCBWrapper = [&](InsertPointTy IP) {
+ // Hide "open-ended" blocks from the given FiniCB by setting the right jump
+ // target to the region exit block.
+ if (IP.getBlock()->end() == IP.getPoint()) {
+ IRBuilder<>::InsertPointGuard IPG(Builder);
+ Builder.restoreIP(IP);
+ Instruction *I = Builder.CreateBr(PRegExitBB);
+ IP = InsertPointTy(I->getParent(), I->getIterator());
+ }
+ assert(IP.getBlock()->getTerminator()->getNumSuccessors() == 1 &&
+ IP.getBlock()->getTerminator()->getSuccessor(0) == PRegExitBB &&
+ "Unexpected insertion point for finalization call!");
+ return FiniCB(IP);
+ };
+
+ FinalizationStack.push_back({FiniCBWrapper, OMPD_parallel, IsCancellable});
+
+ // Generate the privatization allocas in the block that will become the entry
+ // of the outlined function.
+ InsertPointTy AllocaIP(PRegEntryBB,
+ PRegEntryBB->getTerminator()->getIterator());
+ Builder.restoreIP(AllocaIP);
+ AllocaInst *PrivTIDAddr =
+ Builder.CreateAlloca(Int32, nullptr, "tid.addr.local");
+ Instruction *PrivTID = Builder.CreateLoad(PrivTIDAddr, "tid");
+
+ // Add some fake uses for OpenMP provided arguments.
+ ToBeDeleted.push_back(Builder.CreateLoad(TIDAddr, "tid.addr.use"));
+ ToBeDeleted.push_back(Builder.CreateLoad(ZeroAddr, "zero.addr.use"));
+
+ // ThenBB
+ // |
+ // V
+ // PRegionEntryBB <- Privatization allocas are placed here.
+ // |
+ // V
+ // PRegionBodyBB <- BodeGen is invoked here.
+ // |
+ // V
+ // PRegPreFiniBB <- The block we will start finalization from.
+ // |
+ // V
+ // PRegionExitBB <- A common exit to simplify block collection.
+ //
+
+ LLVM_DEBUG(dbgs() << "Before body codegen: " << *UI->getFunction() << "\n");
+
+ // Let the caller create the body.
+ assert(BodyGenCB && "Expected body generation callback!");
+ InsertPointTy CodeGenIP(PRegBodyBB, PRegBodyBB->begin());
+ BodyGenCB(AllocaIP, CodeGenIP, *PRegPreFiniBB);
+
+ LLVM_DEBUG(dbgs() << "After body codegen: " << *UI->getFunction() << "\n");
+
+ SmallPtrSet<BasicBlock *, 32> ParallelRegionBlockSet;
+ SmallVector<BasicBlock *, 32> ParallelRegionBlocks, Worklist;
+ ParallelRegionBlockSet.insert(PRegEntryBB);
+ ParallelRegionBlockSet.insert(PRegExitBB);
+
+ // Collect all blocks in-between PRegEntryBB and PRegExitBB.
+ Worklist.push_back(PRegEntryBB);
+ while (!Worklist.empty()) {
+ BasicBlock *BB = Worklist.pop_back_val();
+ ParallelRegionBlocks.push_back(BB);
+ for (BasicBlock *SuccBB : successors(BB))
+ if (ParallelRegionBlockSet.insert(SuccBB).second)
+ Worklist.push_back(SuccBB);
+ }
+
+ CodeExtractorAnalysisCache CEAC(*OuterFn);
+ CodeExtractor Extractor(ParallelRegionBlocks, /* DominatorTree */ nullptr,
+ /* AggregateArgs */ false,
+ /* BlockFrequencyInfo */ nullptr,
+ /* BranchProbabilityInfo */ nullptr,
+ /* AssumptionCache */ nullptr,
+ /* AllowVarArgs */ true,
+ /* AllowAlloca */ true,
+ /* Suffix */ ".omp_par");
+
+ // Find inputs to, outputs from the code region.
+ BasicBlock *CommonExit = nullptr;
+ SetVector<Value *> Inputs, Outputs, SinkingCands, HoistingCands;
+ Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
+ Extractor.findInputsOutputs(Inputs, Outputs, SinkingCands);
+
+ LLVM_DEBUG(dbgs() << "Before privatization: " << *UI->getFunction() << "\n");
+
+ FunctionCallee TIDRTLFn =
+ getOrCreateRuntimeFunction(OMPRTL___kmpc_global_thread_num);
+
+ auto PrivHelper = [&](Value &V) {
+ if (&V == TIDAddr || &V == ZeroAddr)
+ return;
+
+ SmallVector<Use *, 8> Uses;
+ for (Use &U : V.uses())
+ if (auto *UserI = dyn_cast<Instruction>(U.getUser()))
+ if (ParallelRegionBlockSet.count(UserI->getParent()))
+ Uses.push_back(&U);
+
+ Value *ReplacementValue = nullptr;
+ CallInst *CI = dyn_cast<CallInst>(&V);
+ if (CI && CI->getCalledFunction() == TIDRTLFn.getCallee()) {
+ ReplacementValue = PrivTID;
+ } else {
+ Builder.restoreIP(
+ PrivCB(AllocaIP, Builder.saveIP(), V, ReplacementValue));
+ assert(ReplacementValue &&
+ "Expected copy/create callback to set replacement value!");
+ if (ReplacementValue == &V)
+ return;
+ }
+
+ for (Use *UPtr : Uses)
+ UPtr->set(ReplacementValue);
+ };
+
+ for (Value *Input : Inputs) {
+ LLVM_DEBUG(dbgs() << "Captured input: " << *Input << "\n");
+ PrivHelper(*Input);
+ }
+ for (Value *Output : Outputs) {
+ LLVM_DEBUG(dbgs() << "Captured output: " << *Output << "\n");
+ PrivHelper(*Output);
+ }
+
+ LLVM_DEBUG(dbgs() << "After privatization: " << *UI->getFunction() << "\n");
+ LLVM_DEBUG({
+ for (auto *BB : ParallelRegionBlocks)
+ dbgs() << " PBR: " << BB->getName() << "\n";
+ });
+
+ // Add some known attributes to the outlined function.
+ Function *OutlinedFn = Extractor.extractCodeRegion(CEAC);
+ OutlinedFn->addParamAttr(0, Attribute::NoAlias);
+ OutlinedFn->addParamAttr(1, Attribute::NoAlias);
+ OutlinedFn->addFnAttr(Attribute::NoUnwind);
+ OutlinedFn->addFnAttr(Attribute::NoRecurse);
+
+ LLVM_DEBUG(dbgs() << "After outlining: " << *UI->getFunction() << "\n");
+ LLVM_DEBUG(dbgs() << " Outlined function: " << *OutlinedFn << "\n");
+
+ // For compability with the clang CG we move the outlined function after the
+ // one with the parallel region.
+ OutlinedFn->removeFromParent();
+ M.getFunctionList().insertAfter(OuterFn->getIterator(), OutlinedFn);
+
+ // Remove the artificial entry introduced by the extractor right away, we
+ // made our own entry block after all.
+ {
+ BasicBlock &ArtificialEntry = OutlinedFn->getEntryBlock();
+ assert(ArtificialEntry.getUniqueSuccessor() == PRegEntryBB);
+ assert(PRegEntryBB->getUniquePredecessor() == &ArtificialEntry);
+ PRegEntryBB->moveBefore(&ArtificialEntry);
+ ArtificialEntry.eraseFromParent();
+ }
+ LLVM_DEBUG(dbgs() << "PP Outlined function: " << *OutlinedFn << "\n");
+ assert(&OutlinedFn->getEntryBlock() == PRegEntryBB);
+
+ assert(OutlinedFn && OutlinedFn->getNumUses() == 1);
+ assert(OutlinedFn->arg_size() >= 2 &&
+ "Expected at least tid and bounded tid as arguments");
+ unsigned NumCapturedVars = OutlinedFn->arg_size() - /* tid & bounded tid */ 2;
+
+ CallInst *CI = cast<CallInst>(OutlinedFn->user_back());
+ CI->getParent()->setName("omp_parallel");
+ Builder.SetInsertPoint(CI);
+
+ // Build call __kmpc_fork_call(Ident, n, microtask, var1, .., varn);
+ Value *ForkCallArgs[] = {Ident, Builder.getInt32(NumCapturedVars),
+ Builder.CreateBitCast(OutlinedFn, ParallelTaskPtr)};
+
+ SmallVector<Value *, 16> RealArgs;
+ RealArgs.append(std::begin(ForkCallArgs), std::end(ForkCallArgs));
+ RealArgs.append(CI->arg_begin() + /* tid & bound tid */ 2, CI->arg_end());
+
+ FunctionCallee RTLFn = getOrCreateRuntimeFunction(OMPRTL___kmpc_fork_call);
+ if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
+ if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
+ llvm::LLVMContext &Ctx = F->getContext();
+ MDBuilder MDB(Ctx);
+ // Annotate the callback behavior of the __kmpc_fork_call:
+ // - The callback callee is argument number 2 (microtask).
+ // - The first two arguments of the callback callee are unknown (-1).
+ // - All variadic arguments to the __kmpc_fork_call are passed to the
+ // callback callee.
+ F->addMetadata(
+ llvm::LLVMContext::MD_callback,
+ *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding(
+ 2, {-1, -1},
+ /* VarArgsArePassed */ true)}));
+ }
+ }
+
+ Builder.CreateCall(RTLFn, RealArgs);
+
+ LLVM_DEBUG(dbgs() << "With fork_call placed: "
+ << *Builder.GetInsertBlock()->getParent() << "\n");
+
+ InsertPointTy AfterIP(UI->getParent(), UI->getParent()->end());
+ InsertPointTy ExitIP(PRegExitBB, PRegExitBB->end());
+ UI->eraseFromParent();
+
+ // Initialize the local TID stack location with the argument value.
+ Builder.SetInsertPoint(PrivTID);
+ Function::arg_iterator OutlinedAI = OutlinedFn->arg_begin();
+ Builder.CreateStore(Builder.CreateLoad(OutlinedAI), PrivTIDAddr);
+
+ // If no "if" clause was present we do not need the call created during
+ // outlining, otherwise we reuse it in the serialized parallel region.
+ if (!ElseTI) {
+ CI->eraseFromParent();
+ } else {
+
+ // If an "if" clause was present we are now generating the serialized
+ // version into the "else" branch.
+ Builder.SetInsertPoint(ElseTI);
+
+ // Build calls __kmpc_serialized_parallel(&Ident, GTid);
+ Value *SerializedParallelCallArgs[] = {Ident, ThreadID};
+ Builder.CreateCall(
+ getOrCreateRuntimeFunction(OMPRTL___kmpc_serialized_parallel),
+ SerializedParallelCallArgs);
+
+ // OutlinedFn(&GTid, &zero, CapturedStruct);
+ CI->removeFromParent();
+ Builder.Insert(CI);
+
+ // __kmpc_end_serialized_parallel(&Ident, GTid);
+ Value *EndArgs[] = {Ident, ThreadID};
+ Builder.CreateCall(
+ getOrCreateRuntimeFunction(OMPRTL___kmpc_end_serialized_parallel),
+ EndArgs);
+
+ LLVM_DEBUG(dbgs() << "With serialized parallel region: "
+ << *Builder.GetInsertBlock()->getParent() << "\n");
+ }
+
+ // Adjust the finalization stack, verify the adjustment, and call the
+ // finalize function a last time to finalize values between the pre-fini block
+ // and the exit block if we left the parallel "the normal way".
+ auto FiniInfo = FinalizationStack.pop_back_val();
+ (void)FiniInfo;
+ assert(FiniInfo.DK == OMPD_parallel &&
+ "Unexpected finalization stack state!");
+
+ Instruction *PreFiniTI = PRegPreFiniBB->getTerminator();
+ assert(PreFiniTI->getNumSuccessors() == 1 &&
+ PreFiniTI->getSuccessor(0)->size() == 1 &&
+ isa<ReturnInst>(PreFiniTI->getSuccessor(0)->getTerminator()) &&
+ "Unexpected CFG structure!");
+
+ InsertPointTy PreFiniIP(PRegPreFiniBB, PreFiniTI->getIterator());
+ FiniCB(PreFiniIP);
+
+ for (Instruction *I : ToBeDeleted)
+ I->eraseFromParent();
+
+ return AfterIP;
+}
diff --git a/contrib/llvm-project/llvm/lib/IR/AbstractCallSite.cpp b/contrib/llvm-project/llvm/lib/IR/AbstractCallSite.cpp
index b7a81030f41c..19b35665c3fa 100644
--- a/contrib/llvm-project/llvm/lib/IR/AbstractCallSite.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/AbstractCallSite.cpp
@@ -33,6 +33,25 @@ STATISTIC(NumInvalidAbstractCallSitesUnknownCallee,
STATISTIC(NumInvalidAbstractCallSitesNoCallback,
"Number of invalid abstract call sites created (no callback)");
+void AbstractCallSite::getCallbackUses(ImmutableCallSite ICS,
+ SmallVectorImpl<const Use *> &CBUses) {
+ const Function *Callee = ICS.getCalledFunction();
+ if (!Callee)
+ return;
+
+ MDNode *CallbackMD = Callee->getMetadata(LLVMContext::MD_callback);
+ if (!CallbackMD)
+ return;
+
+ for (const MDOperand &Op : CallbackMD->operands()) {
+ MDNode *OpMD = cast<MDNode>(Op.get());
+ auto *CBCalleeIdxAsCM = cast<ConstantAsMetadata>(OpMD->getOperand(0));
+ uint64_t CBCalleeIdx =
+ cast<ConstantInt>(CBCalleeIdxAsCM->getValue())->getZExtValue();
+ CBUses.push_back(ICS.arg_begin() + CBCalleeIdx);
+ }
+}
+
/// Create an abstract call site from a use.
AbstractCallSite::AbstractCallSite(const Use *U) : CS(U->getUser()) {
diff --git a/contrib/llvm-project/llvm/lib/IR/AsmWriter.cpp b/contrib/llvm-project/llvm/lib/IR/AsmWriter.cpp
index b0c26e0ecaf5..acf0e4afef27 100644
--- a/contrib/llvm-project/llvm/lib/IR/AsmWriter.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/AsmWriter.cpp
@@ -353,6 +353,7 @@ static void PrintCallingConv(unsigned cc, raw_ostream &Out) {
case CallingConv::CXX_FAST_TLS: Out << "cxx_fast_tlscc"; break;
case CallingConv::GHC: Out << "ghccc"; break;
case CallingConv::Tail: Out << "tailcc"; break;
+ case CallingConv::CFGuard_Check: Out << "cfguard_checkcc"; break;
case CallingConv::X86_StdCall: Out << "x86_stdcallcc"; break;
case CallingConv::X86_FastCall: Out << "x86_fastcallcc"; break;
case CallingConv::X86_ThisCall: Out << "x86_thiscallcc"; break;
@@ -363,6 +364,9 @@ static void PrintCallingConv(unsigned cc, raw_ostream &Out) {
case CallingConv::ARM_AAPCS: Out << "arm_aapcscc"; break;
case CallingConv::ARM_AAPCS_VFP: Out << "arm_aapcs_vfpcc"; break;
case CallingConv::AArch64_VectorCall: Out << "aarch64_vector_pcs"; break;
+ case CallingConv::AArch64_SVE_VectorCall:
+ Out << "aarch64_sve_vector_pcs";
+ break;
case CallingConv::MSP430_INTR: Out << "msp430_intrcc"; break;
case CallingConv::AVR_INTR: Out << "avr_intrcc "; break;
case CallingConv::AVR_SIGNAL: Out << "avr_signalcc "; break;
@@ -2053,7 +2057,7 @@ static void writeDIModule(raw_ostream &Out, const DIModule *N,
Printer.printString("name", N->getName());
Printer.printString("configMacros", N->getConfigurationMacros());
Printer.printString("includePath", N->getIncludePath());
- Printer.printString("isysroot", N->getISysRoot());
+ Printer.printString("sysroot", N->getSysRoot());
Out << ")";
}
@@ -2395,6 +2399,8 @@ public:
void writeAllMDNodes();
void writeMDNode(unsigned Slot, const MDNode *Node);
+ void writeAttribute(const Attribute &Attr, bool InAttrGroup = false);
+ void writeAttributeSet(const AttributeSet &AttrSet, bool InAttrGroup = false);
void writeAllAttributeGroups();
void printTypeIdentities();
@@ -2527,8 +2533,10 @@ void AssemblyWriter::writeParamOperand(const Value *Operand,
// Print the type
TypePrinter.print(Operand->getType(), Out);
// Print parameter attributes list
- if (Attrs.hasAttributes())
- Out << ' ' << Attrs.getAsString();
+ if (Attrs.hasAttributes()) {
+ Out << ' ';
+ writeAttributeSet(Attrs);
+ }
Out << ' ';
// Print the operand
WriteAsOperandInternal(Out, Operand, &TypePrinter, &Machine, TheModule);
@@ -2951,13 +2959,14 @@ void AssemblyWriter::printFunctionSummary(const FunctionSummary *FS) {
FunctionSummary::FFlags FFlags = FS->fflags();
if (FFlags.ReadNone | FFlags.ReadOnly | FFlags.NoRecurse |
- FFlags.ReturnDoesNotAlias | FFlags.NoInline) {
+ FFlags.ReturnDoesNotAlias | FFlags.NoInline | FFlags.AlwaysInline) {
Out << ", funcFlags: (";
Out << "readNone: " << FFlags.ReadNone;
Out << ", readOnly: " << FFlags.ReadOnly;
Out << ", noRecurse: " << FFlags.NoRecurse;
Out << ", returnDoesNotAlias: " << FFlags.ReturnDoesNotAlias;
Out << ", noInline: " << FFlags.NoInline;
+ Out << ", alwaysInline: " << FFlags.AlwaysInline;
Out << ")";
}
if (!FS->calls().empty()) {
@@ -3395,9 +3404,6 @@ void AssemblyWriter::printTypeIdentities() {
/// printFunction - Print all aspects of a function.
void AssemblyWriter::printFunction(const Function *F) {
- // Print out the return type and name.
- Out << '\n';
-
if (AnnotationWriter) AnnotationWriter->emitFunctionAnnot(F, Out);
if (F->isMaterializable())
@@ -3460,8 +3466,10 @@ void AssemblyWriter::printFunction(const Function *F) {
TypePrinter.print(FT->getParamType(I), Out);
AttributeSet ArgAttrs = Attrs.getParamAttributes(I);
- if (ArgAttrs.hasAttributes())
- Out << ' ' << ArgAttrs.getAsString();
+ if (ArgAttrs.hasAttributes()) {
+ Out << ' ';
+ writeAttributeSet(ArgAttrs);
+ }
}
} else {
// The arguments are meaningful here, print them in detail.
@@ -3547,8 +3555,10 @@ void AssemblyWriter::printArgument(const Argument *Arg, AttributeSet Attrs) {
TypePrinter.print(Arg->getType(), Out);
// Output parameter attributes list
- if (Attrs.hasAttributes())
- Out << ' ' << Attrs.getAsString();
+ if (Attrs.hasAttributes()) {
+ Out << ' ';
+ writeAttributeSet(Attrs);
+ }
// Output name, if available...
if (Arg->hasName()) {
@@ -3563,6 +3573,7 @@ void AssemblyWriter::printArgument(const Argument *Arg, AttributeSet Attrs) {
/// printBasicBlock - This member is called for each basic block in a method.
void AssemblyWriter::printBasicBlock(const BasicBlock *BB) {
+ assert(BB && BB->getParent() && "block without parent!");
bool IsEntryBlock = BB == &BB->getParent()->getEntryBlock();
if (BB->hasName()) { // Print out the label if it exists...
Out << "\n";
@@ -3577,10 +3588,7 @@ void AssemblyWriter::printBasicBlock(const BasicBlock *BB) {
Out << "<badref>:";
}
- if (!BB->getParent()) {
- Out.PadToColumn(50);
- Out << "; Error: Block without parent!";
- } else if (!IsEntryBlock) {
+ if (!IsEntryBlock) {
// Output predecessors for the block.
Out.PadToColumn(50);
Out << ";";
@@ -4126,6 +4134,33 @@ void AssemblyWriter::printMDNodeBody(const MDNode *Node) {
WriteMDNodeBodyInternal(Out, Node, &TypePrinter, &Machine, TheModule);
}
+void AssemblyWriter::writeAttribute(const Attribute &Attr, bool InAttrGroup) {
+ if (!Attr.isTypeAttribute()) {
+ Out << Attr.getAsString(InAttrGroup);
+ return;
+ }
+
+ assert(Attr.hasAttribute(Attribute::ByVal) && "unexpected type attr");
+
+ Out << "byval";
+ if (Type *Ty = Attr.getValueAsType()) {
+ Out << '(';
+ TypePrinter.print(Ty, Out);
+ Out << ')';
+ }
+}
+
+void AssemblyWriter::writeAttributeSet(const AttributeSet &AttrSet,
+ bool InAttrGroup) {
+ bool FirstAttr = true;
+ for (const auto &Attr : AttrSet) {
+ if (!FirstAttr)
+ Out << ' ';
+ writeAttribute(Attr, InAttrGroup);
+ FirstAttr = false;
+ }
+}
+
void AssemblyWriter::writeAllAttributeGroups() {
std::vector<std::pair<AttributeSet, unsigned>> asVec;
asVec.resize(Machine.as_size());
diff --git a/contrib/llvm-project/llvm/lib/IR/Attributes.cpp b/contrib/llvm-project/llvm/lib/IR/Attributes.cpp
index cc370e628e9a..5ca99c981739 100644
--- a/contrib/llvm-project/llvm/lib/IR/Attributes.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/Attributes.cpp
@@ -618,7 +618,7 @@ AttributeSet AttributeSet::addAttributes(LLVMContext &C,
return *this;
AttrBuilder B(AS);
- for (const auto I : *this)
+ for (const auto &I : *this)
B.addAttribute(I);
return get(C, B);
@@ -725,7 +725,7 @@ AttributeSetNode::AttributeSetNode(ArrayRef<Attribute> Attrs)
sizeof(AvailableAttrs) * CHAR_BIT,
"Too many attributes");
- for (const auto I : *this) {
+ for (const auto &I : *this) {
if (!I.isStringAttribute()) {
Attribute::AttrKind Kind = I.getKindAsEnum();
AvailableAttrs[Kind / 8] |= 1ULL << (Kind % 8);
@@ -745,7 +745,7 @@ AttributeSetNode *AttributeSetNode::get(LLVMContext &C,
SmallVector<Attribute, 8> SortedAttrs(Attrs.begin(), Attrs.end());
llvm::sort(SortedAttrs);
- for (const auto Attr : SortedAttrs)
+ for (const auto &Attr : SortedAttrs)
Attr.Profile(ID);
void *InsertPoint;
@@ -813,7 +813,7 @@ AttributeSetNode *AttributeSetNode::get(LLVMContext &C, const AttrBuilder &B) {
}
bool AttributeSetNode::hasAttribute(StringRef Kind) const {
- for (const auto I : *this)
+ for (const auto &I : *this)
if (I.hasAttribute(Kind))
return true;
return false;
@@ -821,7 +821,7 @@ bool AttributeSetNode::hasAttribute(StringRef Kind) const {
Attribute AttributeSetNode::getAttribute(Attribute::AttrKind Kind) const {
if (hasAttribute(Kind)) {
- for (const auto I : *this)
+ for (const auto &I : *this)
if (I.hasAttribute(Kind))
return I;
}
@@ -829,42 +829,42 @@ Attribute AttributeSetNode::getAttribute(Attribute::AttrKind Kind) const {
}
Attribute AttributeSetNode::getAttribute(StringRef Kind) const {
- for (const auto I : *this)
+ for (const auto &I : *this)
if (I.hasAttribute(Kind))
return I;
return {};
}
MaybeAlign AttributeSetNode::getAlignment() const {
- for (const auto I : *this)
+ for (const auto &I : *this)
if (I.hasAttribute(Attribute::Alignment))
return I.getAlignment();
return None;
}
MaybeAlign AttributeSetNode::getStackAlignment() const {
- for (const auto I : *this)
+ for (const auto &I : *this)
if (I.hasAttribute(Attribute::StackAlignment))
return I.getStackAlignment();
return None;
}
Type *AttributeSetNode::getByValType() const {
- for (const auto I : *this)
+ for (const auto &I : *this)
if (I.hasAttribute(Attribute::ByVal))
return I.getValueAsType();
return 0;
}
uint64_t AttributeSetNode::getDereferenceableBytes() const {
- for (const auto I : *this)
+ for (const auto &I : *this)
if (I.hasAttribute(Attribute::Dereferenceable))
return I.getDereferenceableBytes();
return 0;
}
uint64_t AttributeSetNode::getDereferenceableOrNullBytes() const {
- for (const auto I : *this)
+ for (const auto &I : *this)
if (I.hasAttribute(Attribute::DereferenceableOrNull))
return I.getDereferenceableOrNullBytes();
return 0;
@@ -872,7 +872,7 @@ uint64_t AttributeSetNode::getDereferenceableOrNullBytes() const {
std::pair<unsigned, Optional<unsigned>>
AttributeSetNode::getAllocSizeArgs() const {
- for (const auto I : *this)
+ for (const auto &I : *this)
if (I.hasAttribute(Attribute::AllocSize))
return I.getAllocSizeArgs();
return std::make_pair(0, 0);
@@ -914,7 +914,7 @@ AttributeListImpl::AttributeListImpl(LLVMContext &C,
"Too many attributes");
static_assert(attrIdxToArrayIdx(AttributeList::FunctionIndex) == 0U,
"function should be stored in slot 0");
- for (const auto I : Sets[0]) {
+ for (const auto &I : Sets[0]) {
if (!I.isStringAttribute()) {
Attribute::AttrKind Kind = I.getKindAsEnum();
AvailableFunctionAttrs[Kind / 8] |= 1ULL << (Kind % 8);
@@ -1030,7 +1030,7 @@ AttributeList::get(LLVMContext &C,
MaxIndex = Attrs[Attrs.size() - 2].first;
SmallVector<AttributeSet, 4> AttrVec(attrIdxToArrayIdx(MaxIndex) + 1);
- for (const auto Pair : Attrs)
+ for (const auto &Pair : Attrs)
AttrVec[attrIdxToArrayIdx(Pair.first)] = Pair.second;
return getImpl(C, AttrVec);
@@ -1098,7 +1098,7 @@ AttributeList AttributeList::get(LLVMContext &C, unsigned Index,
AttributeList AttributeList::get(LLVMContext &C, unsigned Index,
ArrayRef<StringRef> Kinds) {
SmallVector<std::pair<unsigned, Attribute>, 8> Attrs;
- for (const auto K : Kinds)
+ for (const auto &K : Kinds)
Attrs.emplace_back(Index, Attribute::get(C, K));
return get(C, Attrs);
}
@@ -1111,7 +1111,7 @@ AttributeList AttributeList::get(LLVMContext &C,
return Attrs[0];
unsigned MaxSize = 0;
- for (const auto List : Attrs)
+ for (const auto &List : Attrs)
MaxSize = std::max(MaxSize, List.getNumAttrSets());
// If every list was empty, there is no point in merging the lists.
@@ -1121,7 +1121,7 @@ AttributeList AttributeList::get(LLVMContext &C,
SmallVector<AttributeSet, 8> NewAttrSets(MaxSize);
for (unsigned I = 0; I < MaxSize; ++I) {
AttrBuilder CurBuilder;
- for (const auto List : Attrs)
+ for (const auto &List : Attrs)
CurBuilder.merge(List.getAttributes(I - 1));
NewAttrSets[I] = AttributeSet::get(C, CurBuilder);
}
@@ -1659,7 +1659,7 @@ bool AttrBuilder::hasAttributes() const {
bool AttrBuilder::hasAttributes(AttributeList AL, uint64_t Index) const {
AttributeSet AS = AL.getAttributes(Index);
- for (const auto Attr : AS) {
+ for (const auto &Attr : AS) {
if (Attr.isEnumAttribute() || Attr.isIntAttribute()) {
if (contains(Attr.getKindAsEnum()))
return true;
diff --git a/contrib/llvm-project/llvm/lib/IR/AutoUpgrade.cpp b/contrib/llvm-project/llvm/lib/IR/AutoUpgrade.cpp
index 79f580d0e14d..6e2beeb839b6 100644
--- a/contrib/llvm-project/llvm/lib/IR/AutoUpgrade.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/AutoUpgrade.cpp
@@ -22,6 +22,9 @@
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/IntrinsicsAArch64.h"
+#include "llvm/IR/IntrinsicsARM.h"
+#include "llvm/IR/IntrinsicsX86.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Verifier.h"
@@ -559,14 +562,33 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer);
return true;
}
+ if (Name.startswith("arm.neon.vqadds.")) {
+ NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::sadd_sat,
+ F->arg_begin()->getType());
+ return true;
+ }
+ if (Name.startswith("arm.neon.vqaddu.")) {
+ NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::uadd_sat,
+ F->arg_begin()->getType());
+ return true;
+ }
+ if (Name.startswith("arm.neon.vqsubs.")) {
+ NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ssub_sat,
+ F->arg_begin()->getType());
+ return true;
+ }
+ if (Name.startswith("arm.neon.vqsubu.")) {
+ NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::usub_sat,
+ F->arg_begin()->getType());
+ return true;
+ }
if (Name.startswith("aarch64.neon.addp")) {
if (F->arg_size() != 2)
break; // Invalid IR.
- auto fArgs = F->getFunctionType()->params();
- VectorType *ArgTy = dyn_cast<VectorType>(fArgs[0]);
- if (ArgTy && ArgTy->getElementType()->isFloatingPointTy()) {
+ VectorType *Ty = dyn_cast<VectorType>(F->getReturnType());
+ if (Ty && Ty->getElementType()->isFloatingPointTy()) {
NewFn = Intrinsic::getDeclaration(F->getParent(),
- Intrinsic::aarch64_neon_faddp, fArgs);
+ Intrinsic::aarch64_neon_faddp, Ty);
return true;
}
}
@@ -3877,15 +3899,36 @@ void llvm::UpgradeARCRuntime(Module &M) {
FunctionType *NewFuncTy = NewFn->getFunctionType();
SmallVector<Value *, 2> Args;
+ // Don't upgrade the intrinsic if it's not valid to bitcast the return
+ // value to the return type of the old function.
+ if (NewFuncTy->getReturnType() != CI->getType() &&
+ !CastInst::castIsValid(Instruction::BitCast, CI,
+ NewFuncTy->getReturnType()))
+ continue;
+
+ bool InvalidCast = false;
+
for (unsigned I = 0, E = CI->getNumArgOperands(); I != E; ++I) {
Value *Arg = CI->getArgOperand(I);
+
// Bitcast argument to the parameter type of the new function if it's
// not a variadic argument.
- if (I < NewFuncTy->getNumParams())
+ if (I < NewFuncTy->getNumParams()) {
+ // Don't upgrade the intrinsic if it's not valid to bitcast the argument
+ // to the parameter type of the new function.
+ if (!CastInst::castIsValid(Instruction::BitCast, Arg,
+ NewFuncTy->getParamType(I))) {
+ InvalidCast = true;
+ break;
+ }
Arg = Builder.CreateBitCast(Arg, NewFuncTy->getParamType(I));
+ }
Args.push_back(Arg);
}
+ if (InvalidCast)
+ continue;
+
// Create a call instruction that calls the new function.
CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args);
NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
@@ -4119,9 +4162,7 @@ std::string llvm::UpgradeDataLayoutString(StringRef DL, StringRef TT) {
// If X86, and the datalayout matches the expected format, add pointer size
// address spaces to the datalayout.
- Triple::ArchType Arch = Triple(TT).getArch();
- if ((Arch != llvm::Triple::x86 && Arch != llvm::Triple::x86_64) ||
- DL.contains(AddrSpaces))
+ if (!Triple(TT).isX86() || DL.contains(AddrSpaces))
return DL;
SmallVector<StringRef, 4> Groups;
@@ -4133,3 +4174,23 @@ std::string llvm::UpgradeDataLayoutString(StringRef DL, StringRef TT) {
std::string Res = (Groups[1] + AddrSpaces + Groups[3]).toStringRef(Buf).str();
return Res;
}
+
+void llvm::UpgradeFramePointerAttributes(AttrBuilder &B) {
+ StringRef FramePointer;
+ if (B.contains("no-frame-pointer-elim")) {
+ // The value can be "true" or "false".
+ for (const auto &I : B.td_attrs())
+ if (I.first == "no-frame-pointer-elim")
+ FramePointer = I.second == "true" ? "all" : "none";
+ B.removeAttribute("no-frame-pointer-elim");
+ }
+ if (B.contains("no-frame-pointer-elim-non-leaf")) {
+ // The value is ignored. "no-frame-pointer-elim"="true" takes priority.
+ if (FramePointer != "all")
+ FramePointer = "non-leaf";
+ B.removeAttribute("no-frame-pointer-elim-non-leaf");
+ }
+
+ if (!FramePointer.empty())
+ B.addAttribute("frame-pointer", FramePointer);
+}
diff --git a/contrib/llvm-project/llvm/lib/IR/ConstantFold.cpp b/contrib/llvm-project/llvm/lib/IR/ConstantFold.cpp
index 71fa795ec294..6e24f03c4cfd 100644
--- a/contrib/llvm-project/llvm/lib/IR/ConstantFold.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/ConstantFold.cpp
@@ -792,13 +792,36 @@ Constant *llvm::ConstantFoldExtractElementInstruction(Constant *Val,
if (isa<UndefValue>(Val) || isa<UndefValue>(Idx))
return UndefValue::get(Val->getType()->getVectorElementType());
- if (ConstantInt *CIdx = dyn_cast<ConstantInt>(Idx)) {
- // ee({w,x,y,z}, wrong_value) -> undef
- if (CIdx->uge(Val->getType()->getVectorNumElements()))
- return UndefValue::get(Val->getType()->getVectorElementType());
- return Val->getAggregateElement(CIdx->getZExtValue());
+ auto *CIdx = dyn_cast<ConstantInt>(Idx);
+ if (!CIdx)
+ return nullptr;
+
+ // ee({w,x,y,z}, wrong_value) -> undef
+ if (CIdx->uge(Val->getType()->getVectorNumElements()))
+ return UndefValue::get(Val->getType()->getVectorElementType());
+
+ // ee (gep (ptr, idx0, ...), idx) -> gep (ee (ptr, idx), ee (idx0, idx), ...)
+ if (auto *CE = dyn_cast<ConstantExpr>(Val)) {
+ if (CE->getOpcode() == Instruction::GetElementPtr) {
+ SmallVector<Constant *, 8> Ops;
+ Ops.reserve(CE->getNumOperands());
+ for (unsigned i = 0, e = CE->getNumOperands(); i != e; ++i) {
+ Constant *Op = CE->getOperand(i);
+ if (Op->getType()->isVectorTy()) {
+ Constant *ScalarOp = ConstantExpr::getExtractElement(Op, Idx);
+ if (!ScalarOp)
+ return nullptr;
+ Ops.push_back(ScalarOp);
+ } else
+ Ops.push_back(Op);
+ }
+ return CE->getWithOperands(Ops, CE->getType()->getVectorElementType(),
+ false,
+ Ops[0]->getType()->getPointerElementType());
+ }
}
- return nullptr;
+
+ return Val->getAggregateElement(CIdx);
}
Constant *llvm::ConstantFoldInsertElementInstruction(Constant *Val,
@@ -810,6 +833,12 @@ Constant *llvm::ConstantFoldInsertElementInstruction(Constant *Val,
ConstantInt *CIdx = dyn_cast<ConstantInt>(Idx);
if (!CIdx) return nullptr;
+ // Do not iterate on scalable vector. The num of elements is unknown at
+ // compile-time.
+ VectorType *ValTy = cast<VectorType>(Val->getType());
+ if (ValTy->isScalable())
+ return nullptr;
+
unsigned NumElts = Val->getType()->getVectorNumElements();
if (CIdx->uge(NumElts))
return UndefValue::get(Val->getType());
@@ -844,6 +873,12 @@ Constant *llvm::ConstantFoldShuffleVectorInstruction(Constant *V1,
// Don't break the bitcode reader hack.
if (isa<ConstantExpr>(Mask)) return nullptr;
+ // Do not iterate on scalable vector. The num of elements is unknown at
+ // compile-time.
+ VectorType *ValTy = cast<VectorType>(V1->getType());
+ if (ValTy->isScalable())
+ return nullptr;
+
unsigned SrcNumElts = V1->getType()->getVectorNumElements();
// Loop over the shuffle mask, evaluating each element.
@@ -965,6 +1000,19 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1,
Constant *C2) {
assert(Instruction::isBinaryOp(Opcode) && "Non-binary instruction detected");
+ // Simplify BinOps with their identity values first. They are no-ops and we
+ // can always return the other value, including undef or poison values.
+ // FIXME: remove unnecessary duplicated identity patterns below.
+ // FIXME: Use AllowRHSConstant with getBinOpIdentity to handle additional ops,
+ // like X << 0 = X.
+ Constant *Identity = ConstantExpr::getBinOpIdentity(Opcode, C1->getType());
+ if (Identity) {
+ if (C1 == Identity)
+ return C2;
+ if (C2 == Identity)
+ return C1;
+ }
+
// Handle scalar UndefValue. Vectors are always evaluated per element.
bool HasScalarUndef = !C1->getType()->isVectorTy() &&
(isa<UndefValue>(C1) || isa<UndefValue>(C2));
diff --git a/contrib/llvm-project/llvm/lib/IR/ConstantRange.cpp b/contrib/llvm-project/llvm/lib/IR/ConstantRange.cpp
index 642bf0f39342..3d25cb5bfbdf 100644
--- a/contrib/llvm-project/llvm/lib/IR/ConstantRange.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/ConstantRange.cpp
@@ -64,11 +64,11 @@ ConstantRange ConstantRange::fromKnownBits(const KnownBits &Known,
// For unsigned ranges, or signed ranges with known sign bit, create a simple
// range between the smallest and largest possible value.
if (!IsSigned || Known.isNegative() || Known.isNonNegative())
- return ConstantRange(Known.One, ~Known.Zero + 1);
+ return ConstantRange(Known.getMinValue(), Known.getMaxValue() + 1);
// If we don't know the sign bit, pick the lower bound as a negative number
// and the upper bound as a non-negative one.
- APInt Lower = Known.One, Upper = ~Known.Zero;
+ APInt Lower = Known.getMinValue(), Upper = Known.getMaxValue();
Lower.setSignBit();
Upper.clearSignBit();
return ConstantRange(Lower, Upper + 1);
@@ -641,7 +641,7 @@ ConstantRange ConstantRange::castOp(Instruction::CastOps CastOp,
if (getBitWidth() == ResultBitWidth)
return *this;
else
- return getFull();
+ return getFull(ResultBitWidth);
case Instruction::UIToFP: {
// TODO: use input range if available
auto BW = getBitWidth();
@@ -662,7 +662,7 @@ ConstantRange ConstantRange::castOp(Instruction::CastOps CastOp,
case Instruction::PtrToInt:
case Instruction::AddrSpaceCast:
// Conservatively return getFull set.
- return getFull();
+ return getFull(ResultBitWidth);
};
}
@@ -816,6 +816,23 @@ ConstantRange ConstantRange::binaryOp(Instruction::BinaryOps BinOp,
}
}
+ConstantRange ConstantRange::overflowingBinaryOp(Instruction::BinaryOps BinOp,
+ const ConstantRange &Other,
+ unsigned NoWrapKind) const {
+ assert(Instruction::isBinaryOp(BinOp) && "Binary operators only!");
+
+ switch (BinOp) {
+ case Instruction::Add:
+ return addWithNoWrap(Other, NoWrapKind);
+ case Instruction::Sub:
+ return subWithNoWrap(Other, NoWrapKind);
+ default:
+ // Don't know about this Overflowing Binary Operation.
+ // Conservatively fallback to plain binop handling.
+ return binaryOp(BinOp, Other);
+ }
+}
+
ConstantRange
ConstantRange::add(const ConstantRange &Other) const {
if (isEmptySet() || Other.isEmptySet())
@@ -849,41 +866,17 @@ ConstantRange ConstantRange::addWithNoWrap(const ConstantRange &Other,
using OBO = OverflowingBinaryOperator;
ConstantRange Result = add(Other);
- auto addWithNoUnsignedWrap = [this](const ConstantRange &Other) {
- APInt LMin = getUnsignedMin(), LMax = getUnsignedMax();
- APInt RMin = Other.getUnsignedMin(), RMax = Other.getUnsignedMax();
- bool Overflow;
- APInt NewMin = LMin.uadd_ov(RMin, Overflow);
- if (Overflow)
- return getEmpty();
- APInt NewMax = LMax.uadd_sat(RMax);
- return getNonEmpty(std::move(NewMin), std::move(NewMax) + 1);
- };
-
- auto addWithNoSignedWrap = [this](const ConstantRange &Other) {
- APInt LMin = getSignedMin(), LMax = getSignedMax();
- APInt RMin = Other.getSignedMin(), RMax = Other.getSignedMax();
- if (LMin.isNonNegative()) {
- bool Overflow;
- APInt Temp = LMin.sadd_ov(RMin, Overflow);
- if (Overflow)
- return getEmpty();
- }
- if (LMax.isNegative()) {
- bool Overflow;
- APInt Temp = LMax.sadd_ov(RMax, Overflow);
- if (Overflow)
- return getEmpty();
- }
- APInt NewMin = LMin.sadd_sat(RMin);
- APInt NewMax = LMax.sadd_sat(RMax);
- return getNonEmpty(std::move(NewMin), std::move(NewMax) + 1);
- };
+ // If an overflow happens for every value pair in these two constant ranges,
+ // we must return Empty set. In this case, we get that for free, because we
+ // get lucky that intersection of add() with uadd_sat()/sadd_sat() results
+ // in an empty set.
if (NoWrapKind & OBO::NoSignedWrap)
- Result = Result.intersectWith(addWithNoSignedWrap(Other), RangeType);
+ Result = Result.intersectWith(sadd_sat(Other), RangeType);
+
if (NoWrapKind & OBO::NoUnsignedWrap)
- Result = Result.intersectWith(addWithNoUnsignedWrap(Other), RangeType);
+ Result = Result.intersectWith(uadd_sat(Other), RangeType);
+
return Result;
}
@@ -907,6 +900,36 @@ ConstantRange::sub(const ConstantRange &Other) const {
return X;
}
+ConstantRange ConstantRange::subWithNoWrap(const ConstantRange &Other,
+ unsigned NoWrapKind,
+ PreferredRangeType RangeType) const {
+ // Calculate the range for "X - Y" which is guaranteed not to wrap(overflow).
+ // (X is from this, and Y is from Other)
+ if (isEmptySet() || Other.isEmptySet())
+ return getEmpty();
+ if (isFullSet() && Other.isFullSet())
+ return getFull();
+
+ using OBO = OverflowingBinaryOperator;
+ ConstantRange Result = sub(Other);
+
+ // If an overflow happens for every value pair in these two constant ranges,
+ // we must return Empty set. In signed case, we get that for free, because we
+ // get lucky that intersection of sub() with ssub_sat() results in an
+ // empty set. But for unsigned we must perform the overflow check manually.
+
+ if (NoWrapKind & OBO::NoSignedWrap)
+ Result = Result.intersectWith(ssub_sat(Other), RangeType);
+
+ if (NoWrapKind & OBO::NoUnsignedWrap) {
+ if (getUnsignedMax().ult(Other.getUnsignedMin()))
+ return getEmpty(); // Always overflows.
+ Result = Result.intersectWith(usub_sat(Other), RangeType);
+ }
+
+ return Result;
+}
+
ConstantRange
ConstantRange::multiply(const ConstantRange &Other) const {
// TODO: If either operand is a single element and the multiply is known to
@@ -1310,6 +1333,61 @@ ConstantRange ConstantRange::ssub_sat(const ConstantRange &Other) const {
return getNonEmpty(std::move(NewL), std::move(NewU));
}
+ConstantRange ConstantRange::umul_sat(const ConstantRange &Other) const {
+ if (isEmptySet() || Other.isEmptySet())
+ return getEmpty();
+
+ APInt NewL = getUnsignedMin().umul_sat(Other.getUnsignedMin());
+ APInt NewU = getUnsignedMax().umul_sat(Other.getUnsignedMax()) + 1;
+ return getNonEmpty(std::move(NewL), std::move(NewU));
+}
+
+ConstantRange ConstantRange::smul_sat(const ConstantRange &Other) const {
+ if (isEmptySet() || Other.isEmptySet())
+ return getEmpty();
+
+ // Because we could be dealing with negative numbers here, the lower bound is
+ // the smallest of the cartesian product of the lower and upper ranges;
+ // for example:
+ // [-1,4) * [-2,3) = min(-1*-2, -1*2, 3*-2, 3*2) = -6.
+ // Similarly for the upper bound, swapping min for max.
+
+ APInt this_min = getSignedMin().sext(getBitWidth() * 2);
+ APInt this_max = getSignedMax().sext(getBitWidth() * 2);
+ APInt Other_min = Other.getSignedMin().sext(getBitWidth() * 2);
+ APInt Other_max = Other.getSignedMax().sext(getBitWidth() * 2);
+
+ auto L = {this_min * Other_min, this_min * Other_max, this_max * Other_min,
+ this_max * Other_max};
+ auto Compare = [](const APInt &A, const APInt &B) { return A.slt(B); };
+
+ // Note that we wanted to perform signed saturating multiplication,
+ // so since we performed plain multiplication in twice the bitwidth,
+ // we need to perform signed saturating truncation.
+ return getNonEmpty(std::min(L, Compare).truncSSat(getBitWidth()),
+ std::max(L, Compare).truncSSat(getBitWidth()) + 1);
+}
+
+ConstantRange ConstantRange::ushl_sat(const ConstantRange &Other) const {
+ if (isEmptySet() || Other.isEmptySet())
+ return getEmpty();
+
+ APInt NewL = getUnsignedMin().ushl_sat(Other.getUnsignedMin());
+ APInt NewU = getUnsignedMax().ushl_sat(Other.getUnsignedMax()) + 1;
+ return getNonEmpty(std::move(NewL), std::move(NewU));
+}
+
+ConstantRange ConstantRange::sshl_sat(const ConstantRange &Other) const {
+ if (isEmptySet() || Other.isEmptySet())
+ return getEmpty();
+
+ APInt Min = getSignedMin(), Max = getSignedMax();
+ APInt ShAmtMin = Other.getUnsignedMin(), ShAmtMax = Other.getUnsignedMax();
+ APInt NewL = Min.sshl_sat(Min.isNonNegative() ? ShAmtMin : ShAmtMax);
+ APInt NewU = Max.sshl_sat(Max.isNegative() ? ShAmtMin : ShAmtMax) + 1;
+ return getNonEmpty(std::move(NewL), std::move(NewU));
+}
+
ConstantRange ConstantRange::inverse() const {
if (isFullSet())
return getEmpty();
diff --git a/contrib/llvm-project/llvm/lib/IR/Constants.cpp b/contrib/llvm-project/llvm/lib/IR/Constants.cpp
index f792f01efc1a..054375aab6c3 100644
--- a/contrib/llvm-project/llvm/lib/IR/Constants.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/Constants.cpp
@@ -31,6 +31,7 @@
#include <algorithm>
using namespace llvm;
+using namespace PatternMatch;
//===----------------------------------------------------------------------===//
// Constant Class
@@ -149,6 +150,30 @@ bool Constant::isOneValue() const {
return false;
}
+bool Constant::isNotOneValue() const {
+ // Check for 1 integers
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(this))
+ return !CI->isOneValue();
+
+ // Check for FP which are bitcasted from 1 integers
+ if (const ConstantFP *CFP = dyn_cast<ConstantFP>(this))
+ return !CFP->getValueAPF().bitcastToAPInt().isOneValue();
+
+ // Check that vectors don't contain 1
+ if (this->getType()->isVectorTy()) {
+ unsigned NumElts = this->getType()->getVectorNumElements();
+ for (unsigned i = 0; i != NumElts; ++i) {
+ Constant *Elt = this->getAggregateElement(i);
+ if (!Elt || !Elt->isNotOneValue())
+ return false;
+ }
+ return true;
+ }
+
+ // It *may* contain 1, we can't tell.
+ return false;
+}
+
bool Constant::isMinSignedValue() const {
// Check for INT_MIN integers
if (const ConstantInt *CI = dyn_cast<ConstantInt>(this))
@@ -255,14 +280,18 @@ bool Constant::isElementWiseEqual(Value *Y) const {
// Are they fully identical?
if (this == Y)
return true;
- // They may still be identical element-wise (if they have `undef`s).
- auto *Cy = dyn_cast<Constant>(Y);
- if (!Cy)
+
+ // The input value must be a vector constant with the same type.
+ Type *Ty = getType();
+ if (!isa<Constant>(Y) || !Ty->isVectorTy() || Ty != Y->getType())
return false;
- return PatternMatch::match(ConstantExpr::getICmp(ICmpInst::Predicate::ICMP_EQ,
- const_cast<Constant *>(this),
- Cy),
- PatternMatch::m_One());
+
+ // They may still be identical element-wise (if they have `undef`s).
+ // FIXME: This crashes on FP vector constants.
+ return match(ConstantExpr::getICmp(ICmpInst::Predicate::ICMP_EQ,
+ const_cast<Constant *>(this),
+ cast<Constant>(Y)),
+ m_One());
}
bool Constant::containsUndefElement() const {
@@ -595,6 +624,28 @@ void Constant::removeDeadConstantUsers() const {
}
}
+Constant *Constant::replaceUndefsWith(Constant *C, Constant *Replacement) {
+ assert(C && Replacement && "Expected non-nullptr constant arguments");
+ Type *Ty = C->getType();
+ if (match(C, m_Undef())) {
+ assert(Ty == Replacement->getType() && "Expected matching types");
+ return Replacement;
+ }
+
+ // Don't know how to deal with this constant.
+ if (!Ty->isVectorTy())
+ return C;
+
+ unsigned NumElts = Ty->getVectorNumElements();
+ SmallVector<Constant *, 32> NewC(NumElts);
+ for (unsigned i = 0; i != NumElts; ++i) {
+ Constant *EltC = C->getAggregateElement(i);
+ assert((!EltC || EltC->getType() == Replacement->getType()) &&
+ "Expected matching types");
+ NewC[i] = EltC && match(EltC, m_Undef()) ? Replacement : EltC;
+ }
+ return ConstantVector::get(NewC);
+}
//===----------------------------------------------------------------------===//
@@ -1396,24 +1447,41 @@ void ConstantVector::destroyConstantImpl() {
getType()->getContext().pImpl->VectorConstants.remove(this);
}
-Constant *Constant::getSplatValue() const {
+Constant *Constant::getSplatValue(bool AllowUndefs) const {
assert(this->getType()->isVectorTy() && "Only valid for vectors!");
if (isa<ConstantAggregateZero>(this))
return getNullValue(this->getType()->getVectorElementType());
if (const ConstantDataVector *CV = dyn_cast<ConstantDataVector>(this))
return CV->getSplatValue();
if (const ConstantVector *CV = dyn_cast<ConstantVector>(this))
- return CV->getSplatValue();
+ return CV->getSplatValue(AllowUndefs);
return nullptr;
}
-Constant *ConstantVector::getSplatValue() const {
+Constant *ConstantVector::getSplatValue(bool AllowUndefs) const {
// Check out first element.
Constant *Elt = getOperand(0);
// Then make sure all remaining elements point to the same value.
- for (unsigned I = 1, E = getNumOperands(); I < E; ++I)
- if (getOperand(I) != Elt)
+ for (unsigned I = 1, E = getNumOperands(); I < E; ++I) {
+ Constant *OpC = getOperand(I);
+ if (OpC == Elt)
+ continue;
+
+ // Strict mode: any mismatch is not a splat.
+ if (!AllowUndefs)
return nullptr;
+
+ // Allow undefs mode: ignore undefined elements.
+ if (isa<UndefValue>(OpC))
+ continue;
+
+ // If we do not have a defined element yet, use the current operand.
+ if (isa<UndefValue>(Elt))
+ Elt = OpC;
+
+ if (OpC != Elt)
+ return nullptr;
+ }
return Elt;
}
@@ -2165,7 +2233,7 @@ Constant *ConstantExpr::getShuffleVector(Constant *V1, Constant *V2,
if (Constant *FC = ConstantFoldShuffleVectorInstruction(V1, V2, Mask))
return FC; // Fold a few common cases.
- unsigned NElts = Mask->getType()->getVectorNumElements();
+ ElementCount NElts = Mask->getType()->getVectorElementCount();
Type *EltTy = V1->getType()->getVectorElementType();
Type *ShufTy = VectorType::get(EltTy, NElts);
@@ -2982,7 +3050,7 @@ Value *ConstantExpr::handleOperandChangeImpl(Value *From, Value *ToV) {
NewOps, this, From, To, NumUpdated, OperandNo);
}
-Instruction *ConstantExpr::getAsInstruction() {
+Instruction *ConstantExpr::getAsInstruction() const {
SmallVector<Value *, 4> ValueOperands(op_begin(), op_end());
ArrayRef<Value*> Ops(ValueOperands);
diff --git a/contrib/llvm-project/llvm/lib/IR/ConstantsContext.h b/contrib/llvm-project/llvm/lib/IR/ConstantsContext.h
index 1ec9087551f8..f5e2481f3903 100644
--- a/contrib/llvm-project/llvm/lib/IR/ConstantsContext.h
+++ b/contrib/llvm-project/llvm/lib/IR/ConstantsContext.h
@@ -149,7 +149,7 @@ public:
ShuffleVectorConstantExpr(Constant *C1, Constant *C2, Constant *C3)
: ConstantExpr(VectorType::get(
cast<VectorType>(C1->getType())->getElementType(),
- cast<VectorType>(C3->getType())->getNumElements()),
+ cast<VectorType>(C3->getType())->getElementCount()),
Instruction::ShuffleVector,
&Op<0>(), 3) {
Op<0>() = C1;
diff --git a/contrib/llvm-project/llvm/lib/IR/Core.cpp b/contrib/llvm-project/llvm/lib/IR/Core.cpp
index a5f46b16e600..04e34a90a9bc 100644
--- a/contrib/llvm-project/llvm/lib/IR/Core.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/Core.cpp
@@ -27,6 +27,7 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FileSystem.h"
@@ -47,7 +48,6 @@ void llvm::initializeCore(PassRegistry &Registry) {
initializeDominatorTreeWrapperPassPass(Registry);
initializePrintModulePassWrapperPass(Registry);
initializePrintFunctionPassWrapperPass(Registry);
- initializePrintBasicBlockPassPass(Registry);
initializeSafepointIRVerifierPass(Registry);
initializeVerifierLegacyPassPass(Registry);
}
@@ -147,9 +147,9 @@ LLVMAttributeRef LLVMCreateEnumAttribute(LLVMContextRef C, unsigned KindID,
// After r362128, byval attributes need to have a type attribute. Provide a
// NULL one until a proper API is added for this.
return wrap(Attribute::getWithByValType(Ctx, NULL));
- } else {
- return wrap(Attribute::get(Ctx, AttrKind, Val));
}
+
+ return wrap(Attribute::get(Ctx, AttrKind, Val));
}
unsigned LLVMGetEnumAttributeKind(LLVMAttributeRef A) {
@@ -3442,15 +3442,16 @@ LLVMValueRef LLVMBuildArrayMalloc(LLVMBuilderRef B, LLVMTypeRef Ty,
LLVMValueRef LLVMBuildMemSet(LLVMBuilderRef B, LLVMValueRef Ptr,
LLVMValueRef Val, LLVMValueRef Len,
unsigned Align) {
- return wrap(unwrap(B)->CreateMemSet(unwrap(Ptr), unwrap(Val), unwrap(Len), Align));
+ return wrap(unwrap(B)->CreateMemSet(unwrap(Ptr), unwrap(Val), unwrap(Len),
+ MaybeAlign(Align)));
}
LLVMValueRef LLVMBuildMemCpy(LLVMBuilderRef B,
LLVMValueRef Dst, unsigned DstAlign,
LLVMValueRef Src, unsigned SrcAlign,
LLVMValueRef Size) {
- return wrap(unwrap(B)->CreateMemCpy(unwrap(Dst), DstAlign,
- unwrap(Src), SrcAlign,
+ return wrap(unwrap(B)->CreateMemCpy(unwrap(Dst), MaybeAlign(DstAlign),
+ unwrap(Src), MaybeAlign(SrcAlign),
unwrap(Size)));
}
@@ -3458,8 +3459,8 @@ LLVMValueRef LLVMBuildMemMove(LLVMBuilderRef B,
LLVMValueRef Dst, unsigned DstAlign,
LLVMValueRef Src, unsigned SrcAlign,
LLVMValueRef Size) {
- return wrap(unwrap(B)->CreateMemMove(unwrap(Dst), DstAlign,
- unwrap(Src), SrcAlign,
+ return wrap(unwrap(B)->CreateMemMove(unwrap(Dst), MaybeAlign(DstAlign),
+ unwrap(Src), MaybeAlign(SrcAlign),
unwrap(Size)));
}
@@ -3898,6 +3899,11 @@ LLVMValueRef LLVMBuildInsertValue(LLVMBuilderRef B, LLVMValueRef AggVal,
Index, Name));
}
+LLVMValueRef LLVMBuildFreeze(LLVMBuilderRef B, LLVMValueRef Val,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateFreeze(unwrap(Val), Name));
+}
+
LLVMValueRef LLVMBuildIsNull(LLVMBuilderRef B, LLVMValueRef Val,
const char *Name) {
return wrap(unwrap(B)->CreateIsNull(unwrap(Val), Name));
diff --git a/contrib/llvm-project/llvm/lib/IR/DIBuilder.cpp b/contrib/llvm-project/llvm/lib/IR/DIBuilder.cpp
index 5d5671227430..c89f404e4296 100644
--- a/contrib/llvm-project/llvm/lib/IR/DIBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/DIBuilder.cpp
@@ -11,15 +11,16 @@
//===----------------------------------------------------------------------===//
#include "llvm/IR/DIBuilder.h"
-#include "llvm/IR/IRBuilder.h"
#include "LLVMContextImpl.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
using namespace llvm;
@@ -305,10 +306,11 @@ DIDerivedType *DIBuilder::createReferenceType(
DIDerivedType *DIBuilder::createTypedef(DIType *Ty, StringRef Name,
DIFile *File, unsigned LineNo,
- DIScope *Context) {
+ DIScope *Context,
+ uint32_t AlignInBits) {
return DIDerivedType::get(VMContext, dwarf::DW_TAG_typedef, Name, File,
- LineNo, getNonCompileUnitScope(Context), Ty, 0, 0,
- 0, None, DINode::FlagZero);
+ LineNo, getNonCompileUnitScope(Context), Ty, 0,
+ AlignInBits, 0, None, DINode::FlagZero);
}
DIDerivedType *DIBuilder::createFriend(DIType *Ty, DIType *FriendTy) {
@@ -638,14 +640,15 @@ static void checkGlobalVariableScope(DIScope *Context) {
DIGlobalVariableExpression *DIBuilder::createGlobalVariableExpression(
DIScope *Context, StringRef Name, StringRef LinkageName, DIFile *F,
- unsigned LineNumber, DIType *Ty, bool isLocalToUnit, DIExpression *Expr,
- MDNode *Decl, MDTuple *templateParams, uint32_t AlignInBits) {
+ unsigned LineNumber, DIType *Ty, bool IsLocalToUnit,
+ bool isDefined, DIExpression *Expr,
+ MDNode *Decl, MDTuple *TemplateParams, uint32_t AlignInBits) {
checkGlobalVariableScope(Context);
auto *GV = DIGlobalVariable::getDistinct(
VMContext, cast_or_null<DIScope>(Context), Name, LinkageName, F,
- LineNumber, Ty, isLocalToUnit, true, cast_or_null<DIDerivedType>(Decl),
- templateParams, AlignInBits);
+ LineNumber, Ty, IsLocalToUnit, isDefined, cast_or_null<DIDerivedType>(Decl),
+ TemplateParams, AlignInBits);
if (!Expr)
Expr = createExpression();
auto *N = DIGlobalVariableExpression::get(VMContext, GV, Expr);
@@ -655,14 +658,14 @@ DIGlobalVariableExpression *DIBuilder::createGlobalVariableExpression(
DIGlobalVariable *DIBuilder::createTempGlobalVariableFwdDecl(
DIScope *Context, StringRef Name, StringRef LinkageName, DIFile *F,
- unsigned LineNumber, DIType *Ty, bool isLocalToUnit, MDNode *Decl,
- MDTuple *templateParams, uint32_t AlignInBits) {
+ unsigned LineNumber, DIType *Ty, bool IsLocalToUnit, MDNode *Decl,
+ MDTuple *TemplateParams, uint32_t AlignInBits) {
checkGlobalVariableScope(Context);
return DIGlobalVariable::getTemporary(
VMContext, cast_or_null<DIScope>(Context), Name, LinkageName, F,
- LineNumber, Ty, isLocalToUnit, false,
- cast_or_null<DIDerivedType>(Decl), templateParams, AlignInBits)
+ LineNumber, Ty, IsLocalToUnit, false,
+ cast_or_null<DIDerivedType>(Decl), TemplateParams, AlignInBits)
.release();
}
@@ -827,9 +830,9 @@ DINamespace *DIBuilder::createNameSpace(DIScope *Scope, StringRef Name,
DIModule *DIBuilder::createModule(DIScope *Scope, StringRef Name,
StringRef ConfigurationMacros,
StringRef IncludePath,
- StringRef ISysRoot) {
+ StringRef SysRoot) {
return DIModule::get(VMContext, getNonCompileUnitScope(Scope), Name,
- ConfigurationMacros, IncludePath, ISysRoot);
+ ConfigurationMacros, IncludePath, SysRoot);
}
DILexicalBlockFile *DIBuilder::createLexicalBlockFile(DIScope *Scope,
diff --git a/contrib/llvm-project/llvm/lib/IR/DataLayout.cpp b/contrib/llvm-project/llvm/lib/IR/DataLayout.cpp
index 5fe7a2e94b6a..94e0740663cc 100644
--- a/contrib/llvm-project/llvm/lib/IR/DataLayout.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/DataLayout.cpp
@@ -768,13 +768,13 @@ unsigned DataLayout::getPrefTypeAlignment(Type *Ty) const {
IntegerType *DataLayout::getIntPtrType(LLVMContext &C,
unsigned AddressSpace) const {
- return IntegerType::get(C, getIndexSizeInBits(AddressSpace));
+ return IntegerType::get(C, getPointerSizeInBits(AddressSpace));
}
Type *DataLayout::getIntPtrType(Type *Ty) const {
assert(Ty->isPtrOrPtrVectorTy() &&
"Expected a pointer or pointer vector type.");
- unsigned NumBits = getIndexTypeSizeInBits(Ty);
+ unsigned NumBits = getPointerTypeSizeInBits(Ty);
IntegerType *IntTy = IntegerType::get(Ty->getContext(), NumBits);
if (VectorType *VecTy = dyn_cast<VectorType>(Ty))
return VectorType::get(IntTy, VecTy->getNumElements());
diff --git a/contrib/llvm-project/llvm/lib/IR/DebugInfo.cpp b/contrib/llvm-project/llvm/lib/IR/DebugInfo.cpp
index 1bbe6b85d260..fe8311923109 100644
--- a/contrib/llvm-project/llvm/lib/IR/DebugInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/DebugInfo.cpp
@@ -782,12 +782,12 @@ LLVMDIBuilderCreateModule(LLVMDIBuilderRef Builder, LLVMMetadataRef ParentScope,
const char *Name, size_t NameLen,
const char *ConfigMacros, size_t ConfigMacrosLen,
const char *IncludePath, size_t IncludePathLen,
- const char *ISysRoot, size_t ISysRootLen) {
+ const char *SysRoot, size_t SysRootLen) {
return wrap(unwrap(Builder)->createModule(
unwrapDI<DIScope>(ParentScope), StringRef(Name, NameLen),
StringRef(ConfigMacros, ConfigMacrosLen),
StringRef(IncludePath, IncludePathLen),
- StringRef(ISysRoot, ISysRootLen)));
+ StringRef(SysRoot, SysRootLen)));
}
LLVMMetadataRef LLVMDIBuilderCreateNameSpace(LLVMDIBuilderRef Builder,
@@ -1108,11 +1108,10 @@ LLVMMetadataRef
LLVMDIBuilderCreateTypedef(LLVMDIBuilderRef Builder, LLVMMetadataRef Type,
const char *Name, size_t NameLen,
LLVMMetadataRef File, unsigned LineNo,
- LLVMMetadataRef Scope) {
+ LLVMMetadataRef Scope, uint32_t AlignInBits) {
return wrap(unwrap(Builder)->createTypedef(
- unwrapDI<DIType>(Type), {Name, NameLen},
- unwrapDI<DIFile>(File), LineNo,
- unwrapDI<DIScope>(Scope)));
+ unwrapDI<DIType>(Type), {Name, NameLen}, unwrapDI<DIFile>(File), LineNo,
+ unwrapDI<DIScope>(Scope), AlignInBits));
}
LLVMMetadataRef
@@ -1290,7 +1289,7 @@ LLVMMetadataRef LLVMDIBuilderCreateGlobalVariableExpression(
return wrap(unwrap(Builder)->createGlobalVariableExpression(
unwrapDI<DIScope>(Scope), {Name, NameLen}, {Linkage, LinkLen},
unwrapDI<DIFile>(File), LineNo, unwrapDI<DIType>(Ty), LocalToUnit,
- unwrap<DIExpression>(Expr), unwrapDI<MDNode>(Decl),
+ true, unwrap<DIExpression>(Expr), unwrapDI<MDNode>(Decl),
nullptr, AlignInBits));
}
diff --git a/contrib/llvm-project/llvm/lib/IR/DebugInfoMetadata.cpp b/contrib/llvm-project/llvm/lib/IR/DebugInfoMetadata.cpp
index 94ec3abfa7a2..d3ecd9b0e03d 100644
--- a/contrib/llvm-project/llvm/lib/IR/DebugInfoMetadata.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/DebugInfoMetadata.cpp
@@ -23,6 +23,9 @@
using namespace llvm;
+const DIExpression::FragmentInfo DebugVariable::DefaultFragment = {
+ std::numeric_limits<uint64_t>::max(), std::numeric_limits<uint64_t>::min()};
+
DILocation::DILocation(LLVMContext &C, StorageType Storage, unsigned Line,
unsigned Column, ArrayRef<Metadata *> MDs,
bool ImplicitCode)
@@ -712,12 +715,12 @@ DICommonBlock *DICommonBlock::getImpl(LLVMContext &Context, Metadata *Scope,
DIModule *DIModule::getImpl(LLVMContext &Context, Metadata *Scope,
MDString *Name, MDString *ConfigurationMacros,
- MDString *IncludePath, MDString *ISysRoot,
+ MDString *IncludePath, MDString *SysRoot,
StorageType Storage, bool ShouldCreate) {
assert(isCanonical(Name) && "Expected canonical MDString");
DEFINE_GETIMPL_LOOKUP(
- DIModule, (Scope, Name, ConfigurationMacros, IncludePath, ISysRoot));
- Metadata *Ops[] = {Scope, Name, ConfigurationMacros, IncludePath, ISysRoot};
+ DIModule, (Scope, Name, ConfigurationMacros, IncludePath, SysRoot));
+ Metadata *Ops[] = {Scope, Name, ConfigurationMacros, IncludePath, SysRoot};
DEFINE_GETIMPL_STORE_NO_CONSTRUCTOR_ARGS(DIModule, Ops);
}
@@ -929,17 +932,22 @@ bool DIExpression::isValid() const {
}
bool DIExpression::isImplicit() const {
- unsigned N = getNumElements();
- if (isValid() && N > 0) {
- switch (getElement(N-1)) {
- case dwarf::DW_OP_stack_value:
- case dwarf::DW_OP_LLVM_tag_offset:
- return true;
- case dwarf::DW_OP_LLVM_fragment:
- return N > 1 && getElement(N-2) == dwarf::DW_OP_stack_value;
- default: break;
+ if (!isValid())
+ return false;
+
+ if (getNumElements() == 0)
+ return false;
+
+ for (const auto &It : expr_ops()) {
+ switch (It.getOp()) {
+ default:
+ break;
+ case dwarf::DW_OP_stack_value:
+ case dwarf::DW_OP_LLVM_tag_offset:
+ return true;
}
}
+
return false;
}
@@ -1013,6 +1021,8 @@ bool DIExpression::extractIfOffset(int64_t &Offset) const {
const DIExpression *DIExpression::extractAddressClass(const DIExpression *Expr,
unsigned &AddrClass) {
+ // FIXME: This seems fragile. Nothing that verifies that these elements
+ // actually map to ops and not operands.
const unsigned PatternSize = 4;
if (Expr->Elements.size() >= PatternSize &&
Expr->Elements[PatternSize - 4] == dwarf::DW_OP_constu &&
@@ -1141,10 +1151,14 @@ Optional<DIExpression *> DIExpression::createFragmentExpression(
for (auto Op : Expr->expr_ops()) {
switch (Op.getOp()) {
default: break;
+ case dwarf::DW_OP_shr:
+ case dwarf::DW_OP_shra:
+ case dwarf::DW_OP_shl:
case dwarf::DW_OP_plus:
+ case dwarf::DW_OP_plus_uconst:
case dwarf::DW_OP_minus:
- // We can't safely split arithmetic into multiple fragments because we
- // can't express carry-over between fragments.
+ // We can't safely split arithmetic or shift operations into multiple
+ // fragments because we can't express carry-over between fragments.
//
// FIXME: We *could* preserve the lowest fragment of a constant offset
// operation if the offset fits into SizeInBits.
@@ -1182,6 +1196,20 @@ bool DIExpression::isConstant() const {
return true;
}
+DIExpression::ExtOps DIExpression::getExtOps(unsigned FromSize, unsigned ToSize,
+ bool Signed) {
+ dwarf::TypeKind TK = Signed ? dwarf::DW_ATE_signed : dwarf::DW_ATE_unsigned;
+ DIExpression::ExtOps Ops{{dwarf::DW_OP_LLVM_convert, FromSize, TK,
+ dwarf::DW_OP_LLVM_convert, ToSize, TK}};
+ return Ops;
+}
+
+DIExpression *DIExpression::appendExt(const DIExpression *Expr,
+ unsigned FromSize, unsigned ToSize,
+ bool Signed) {
+ return appendToStack(Expr, getExtOps(FromSize, ToSize, Signed));
+}
+
DIGlobalVariableExpression *
DIGlobalVariableExpression::getImpl(LLVMContext &Context, Metadata *Variable,
Metadata *Expression, StorageType Storage,
diff --git a/contrib/llvm-project/llvm/lib/IR/Dominators.cpp b/contrib/llvm-project/llvm/lib/IR/Dominators.cpp
index 910a41050b94..03dc4da273a3 100644
--- a/contrib/llvm-project/llvm/lib/IR/Dominators.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/Dominators.cpp
@@ -21,6 +21,7 @@
#include "llvm/IR/Constants.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/PassManager.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/GenericDomTreeConstruction.h"
@@ -357,6 +358,11 @@ PreservedAnalyses DominatorTreeVerifierPass::run(Function &F,
//===----------------------------------------------------------------------===//
char DominatorTreeWrapperPass::ID = 0;
+
+DominatorTreeWrapperPass::DominatorTreeWrapperPass() : FunctionPass(ID) {
+ initializeDominatorTreeWrapperPassPass(*PassRegistry::getPassRegistry());
+}
+
INITIALIZE_PASS(DominatorTreeWrapperPass, "domtree",
"Dominator Tree Construction", true, true)
diff --git a/contrib/llvm-project/llvm/lib/IR/FPEnv.cpp b/contrib/llvm-project/llvm/lib/IR/FPEnv.cpp
new file mode 100644
index 000000000000..008852658232
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/IR/FPEnv.cpp
@@ -0,0 +1,78 @@
+//===-- FPEnv.cpp ---- FP Environment -------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// @file
+/// This file contains the implementations of entities that describe floating
+/// point environment.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/IR/FPEnv.h"
+
+namespace llvm {
+
+Optional<fp::RoundingMode> StrToRoundingMode(StringRef RoundingArg) {
+ // For dynamic rounding mode, we use round to nearest but we will set the
+ // 'exact' SDNodeFlag so that the value will not be rounded.
+ return StringSwitch<Optional<fp::RoundingMode>>(RoundingArg)
+ .Case("round.dynamic", fp::rmDynamic)
+ .Case("round.tonearest", fp::rmToNearest)
+ .Case("round.downward", fp::rmDownward)
+ .Case("round.upward", fp::rmUpward)
+ .Case("round.towardzero", fp::rmTowardZero)
+ .Default(None);
+}
+
+Optional<StringRef> RoundingModeToStr(fp::RoundingMode UseRounding) {
+ Optional<StringRef> RoundingStr = None;
+ switch (UseRounding) {
+ case fp::rmDynamic:
+ RoundingStr = "round.dynamic";
+ break;
+ case fp::rmToNearest:
+ RoundingStr = "round.tonearest";
+ break;
+ case fp::rmDownward:
+ RoundingStr = "round.downward";
+ break;
+ case fp::rmUpward:
+ RoundingStr = "round.upward";
+ break;
+ case fp::rmTowardZero:
+ RoundingStr = "round.towardzero";
+ break;
+ }
+ return RoundingStr;
+}
+
+Optional<fp::ExceptionBehavior> StrToExceptionBehavior(StringRef ExceptionArg) {
+ return StringSwitch<Optional<fp::ExceptionBehavior>>(ExceptionArg)
+ .Case("fpexcept.ignore", fp::ebIgnore)
+ .Case("fpexcept.maytrap", fp::ebMayTrap)
+ .Case("fpexcept.strict", fp::ebStrict)
+ .Default(None);
+}
+
+Optional<StringRef> ExceptionBehaviorToStr(fp::ExceptionBehavior UseExcept) {
+ Optional<StringRef> ExceptStr = None;
+ switch (UseExcept) {
+ case fp::ebStrict:
+ ExceptStr = "fpexcept.strict";
+ break;
+ case fp::ebIgnore:
+ ExceptStr = "fpexcept.ignore";
+ break;
+ case fp::ebMayTrap:
+ ExceptStr = "fpexcept.maytrap";
+ break;
+ }
+ return ExceptStr;
+}
+
+} \ No newline at end of file
diff --git a/contrib/llvm-project/llvm/lib/IR/Function.cpp b/contrib/llvm-project/llvm/lib/IR/Function.cpp
index 3f70d2c904e5..54612250b0d6 100644
--- a/contrib/llvm-project/llvm/lib/IR/Function.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/Function.cpp
@@ -30,8 +30,21 @@
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsAArch64.h"
+#include "llvm/IR/IntrinsicsAMDGPU.h"
+#include "llvm/IR/IntrinsicsARM.h"
+#include "llvm/IR/IntrinsicsBPF.h"
+#include "llvm/IR/IntrinsicsHexagon.h"
+#include "llvm/IR/IntrinsicsMips.h"
+#include "llvm/IR/IntrinsicsNVPTX.h"
+#include "llvm/IR/IntrinsicsPowerPC.h"
+#include "llvm/IR/IntrinsicsR600.h"
+#include "llvm/IR/IntrinsicsRISCV.h"
+#include "llvm/IR/IntrinsicsS390.h"
+#include "llvm/IR/IntrinsicsWebAssembly.h"
+#include "llvm/IR/IntrinsicsX86.h"
+#include "llvm/IR/IntrinsicsXCore.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Metadata.h"
@@ -113,6 +126,11 @@ unsigned Argument::getParamAlignment() const {
return getParent()->getParamAlignment(getArgNo());
}
+MaybeAlign Argument::getParamAlign() const {
+ assert(getType()->isPointerTy() && "Only pointers have alignments");
+ return getParent()->getParamAlign(getArgNo());
+}
+
Type *Argument::getParamByValType() const {
assert(getType()->isPointerTy() && "Only pointers have byval types");
return getParent()->getParamByValType(getArgNo());
@@ -560,7 +578,8 @@ Intrinsic::ID Function::lookupIntrinsicID(StringRef Name) {
const auto MatchSize = strlen(NameTable[Idx]);
assert(Name.size() >= MatchSize && "Expected either exact or prefix match");
bool IsExactMatch = Name.size() == MatchSize;
- return IsExactMatch || isOverloaded(ID) ? ID : Intrinsic::not_intrinsic;
+ return IsExactMatch || Intrinsic::isOverloaded(ID) ? ID
+ : Intrinsic::not_intrinsic;
}
void Function::recalculateIntrinsicID() {
@@ -639,7 +658,7 @@ static std::string getMangledTypeStr(Type* Ty) {
StringRef Intrinsic::getName(ID id) {
assert(id < num_intrinsics && "Invalid intrinsic ID!");
- assert(!isOverloaded(id) &&
+ assert(!Intrinsic::isOverloaded(id) &&
"This version of getName does not support overloading");
return IntrinsicNameTable[id];
}
@@ -1533,6 +1552,11 @@ void Function::setEntryCount(ProfileCount Count,
auto PrevCount = getEntryCount();
assert(!PrevCount.hasValue() || PrevCount.getType() == Count.getType());
#endif
+
+ auto ImportGUIDs = getImportGUIDs();
+ if (S == nullptr && ImportGUIDs.size())
+ S = &ImportGUIDs;
+
MDBuilder MDB(getContext());
setMetadata(
LLVMContext::MD_prof,
diff --git a/contrib/llvm-project/llvm/lib/IR/IRBuilder.cpp b/contrib/llvm-project/llvm/lib/IR/IRBuilder.cpp
index b782012e9731..30b558a655cb 100644
--- a/contrib/llvm-project/llvm/lib/IR/IRBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/IRBuilder.cpp
@@ -96,10 +96,10 @@ static InvokeInst *createInvokeHelper(Function *Invokee, BasicBlock *NormalDest,
return II;
}
-CallInst *IRBuilderBase::
-CreateMemSet(Value *Ptr, Value *Val, Value *Size, unsigned Align,
- bool isVolatile, MDNode *TBAATag, MDNode *ScopeTag,
- MDNode *NoAliasTag) {
+CallInst *IRBuilderBase::CreateMemSet(Value *Ptr, Value *Val, Value *Size,
+ MaybeAlign Align, bool isVolatile,
+ MDNode *TBAATag, MDNode *ScopeTag,
+ MDNode *NoAliasTag) {
Ptr = getCastedInt8PtrValue(Ptr);
Value *Ops[] = {Ptr, Val, Size, getInt1(isVolatile)};
Type *Tys[] = { Ptr->getType(), Size->getType() };
@@ -108,8 +108,8 @@ CreateMemSet(Value *Ptr, Value *Val, Value *Size, unsigned Align,
CallInst *CI = createCallHelper(TheFn, Ops, this);
- if (Align > 0)
- cast<MemSetInst>(CI)->setDestAlignment(Align);
+ if (Align)
+ cast<MemSetInst>(CI)->setDestAlignment(Align->value());
// Set the TBAA info if present.
if (TBAATag)
@@ -125,10 +125,8 @@ CreateMemSet(Value *Ptr, Value *Val, Value *Size, unsigned Align,
}
CallInst *IRBuilderBase::CreateElementUnorderedAtomicMemSet(
- Value *Ptr, Value *Val, Value *Size, unsigned Align, uint32_t ElementSize,
+ Value *Ptr, Value *Val, Value *Size, Align Alignment, uint32_t ElementSize,
MDNode *TBAATag, MDNode *ScopeTag, MDNode *NoAliasTag) {
- assert(Align >= ElementSize &&
- "Pointer alignment must be at least element size.");
Ptr = getCastedInt8PtrValue(Ptr);
Value *Ops[] = {Ptr, Val, Size, getInt32(ElementSize)};
@@ -139,7 +137,7 @@ CallInst *IRBuilderBase::CreateElementUnorderedAtomicMemSet(
CallInst *CI = createCallHelper(TheFn, Ops, this);
- cast<AtomicMemSetInst>(CI)->setDestAlignment(Align);
+ cast<AtomicMemSetInst>(CI)->setDestAlignment(Alignment);
// Set the TBAA info if present.
if (TBAATag)
@@ -154,12 +152,21 @@ CallInst *IRBuilderBase::CreateElementUnorderedAtomicMemSet(
return CI;
}
-CallInst *IRBuilderBase::
-CreateMemCpy(Value *Dst, unsigned DstAlign, Value *Src, unsigned SrcAlign,
- Value *Size, bool isVolatile, MDNode *TBAATag,
- MDNode *TBAAStructTag, MDNode *ScopeTag, MDNode *NoAliasTag) {
- assert((DstAlign == 0 || isPowerOf2_32(DstAlign)) && "Must be 0 or a power of 2");
- assert((SrcAlign == 0 || isPowerOf2_32(SrcAlign)) && "Must be 0 or a power of 2");
+CallInst *IRBuilderBase::CreateMemCpy(Value *Dst, unsigned DstAlign, Value *Src,
+ unsigned SrcAlign, Value *Size,
+ bool isVolatile, MDNode *TBAATag,
+ MDNode *TBAAStructTag, MDNode *ScopeTag,
+ MDNode *NoAliasTag) {
+ return CreateMemCpy(Dst, MaybeAlign(DstAlign), Src, MaybeAlign(SrcAlign),
+ Size, isVolatile, TBAATag, TBAAStructTag, ScopeTag,
+ NoAliasTag);
+}
+
+CallInst *IRBuilderBase::CreateMemCpy(Value *Dst, MaybeAlign DstAlign,
+ Value *Src, MaybeAlign SrcAlign,
+ Value *Size, bool isVolatile,
+ MDNode *TBAATag, MDNode *TBAAStructTag,
+ MDNode *ScopeTag, MDNode *NoAliasTag) {
Dst = getCastedInt8PtrValue(Dst);
Src = getCastedInt8PtrValue(Src);
@@ -171,10 +178,10 @@ CreateMemCpy(Value *Dst, unsigned DstAlign, Value *Src, unsigned SrcAlign,
CallInst *CI = createCallHelper(TheFn, Ops, this);
auto* MCI = cast<MemCpyInst>(CI);
- if (DstAlign > 0)
- MCI->setDestAlignment(DstAlign);
- if (SrcAlign > 0)
- MCI->setSourceAlignment(SrcAlign);
+ if (DstAlign)
+ MCI->setDestAlignment(*DstAlign);
+ if (SrcAlign)
+ MCI->setSourceAlignment(*SrcAlign);
// Set the TBAA info if present.
if (TBAATag)
@@ -234,12 +241,11 @@ CallInst *IRBuilderBase::CreateElementUnorderedAtomicMemCpy(
return CI;
}
-CallInst *IRBuilderBase::
-CreateMemMove(Value *Dst, unsigned DstAlign, Value *Src, unsigned SrcAlign,
- Value *Size, bool isVolatile, MDNode *TBAATag, MDNode *ScopeTag,
- MDNode *NoAliasTag) {
- assert((DstAlign == 0 || isPowerOf2_32(DstAlign)) && "Must be 0 or a power of 2");
- assert((SrcAlign == 0 || isPowerOf2_32(SrcAlign)) && "Must be 0 or a power of 2");
+CallInst *IRBuilderBase::CreateMemMove(Value *Dst, MaybeAlign DstAlign,
+ Value *Src, MaybeAlign SrcAlign,
+ Value *Size, bool isVolatile,
+ MDNode *TBAATag, MDNode *ScopeTag,
+ MDNode *NoAliasTag) {
Dst = getCastedInt8PtrValue(Dst);
Src = getCastedInt8PtrValue(Src);
@@ -251,10 +257,10 @@ CreateMemMove(Value *Dst, unsigned DstAlign, Value *Src, unsigned SrcAlign,
CallInst *CI = createCallHelper(TheFn, Ops, this);
auto *MMI = cast<MemMoveInst>(CI);
- if (DstAlign > 0)
- MMI->setDestAlignment(DstAlign);
- if (SrcAlign > 0)
- MMI->setSourceAlignment(SrcAlign);
+ if (DstAlign)
+ MMI->setDestAlignment(*DstAlign);
+ if (SrcAlign)
+ MMI->setSourceAlignment(*SrcAlign);
// Set the TBAA info if present.
if (TBAATag)
diff --git a/contrib/llvm-project/llvm/lib/IR/IRPrintingPasses.cpp b/contrib/llvm-project/llvm/lib/IR/IRPrintingPasses.cpp
index 953cf9410162..03657ff8d9d4 100644
--- a/contrib/llvm-project/llvm/lib/IR/IRPrintingPasses.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/IRPrintingPasses.cpp
@@ -14,6 +14,7 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -56,7 +57,7 @@ PreservedAnalyses PrintFunctionPass::run(Function &F,
if (forcePrintModuleIR())
OS << Banner << " (function: " << F.getName() << ")\n" << *F.getParent();
else
- OS << Banner << static_cast<Value &>(F);
+ OS << Banner << '\n' << static_cast<Value &>(F);
}
return PreservedAnalyses::all();
}
@@ -109,28 +110,6 @@ public:
StringRef getPassName() const override { return "Print Function IR"; }
};
-class PrintBasicBlockPass : public BasicBlockPass {
- raw_ostream &Out;
- std::string Banner;
-
-public:
- static char ID;
- PrintBasicBlockPass() : BasicBlockPass(ID), Out(dbgs()) {}
- PrintBasicBlockPass(raw_ostream &Out, const std::string &Banner)
- : BasicBlockPass(ID), Out(Out), Banner(Banner) {}
-
- bool runOnBasicBlock(BasicBlock &BB) override {
- Out << Banner << BB;
- return false;
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesAll();
- }
-
- StringRef getPassName() const override { return "Print BasicBlock IR"; }
-};
-
}
char PrintModulePassWrapper::ID = 0;
@@ -139,9 +118,6 @@ INITIALIZE_PASS(PrintModulePassWrapper, "print-module",
char PrintFunctionPassWrapper::ID = 0;
INITIALIZE_PASS(PrintFunctionPassWrapper, "print-function",
"Print function to stderr", false, true)
-char PrintBasicBlockPass::ID = 0;
-INITIALIZE_PASS(PrintBasicBlockPass, "print-bb", "Print BB to stderr", false,
- true)
ModulePass *llvm::createPrintModulePass(llvm::raw_ostream &OS,
const std::string &Banner,
@@ -154,15 +130,9 @@ FunctionPass *llvm::createPrintFunctionPass(llvm::raw_ostream &OS,
return new PrintFunctionPassWrapper(OS, Banner);
}
-BasicBlockPass *llvm::createPrintBasicBlockPass(llvm::raw_ostream &OS,
- const std::string &Banner) {
- return new PrintBasicBlockPass(OS, Banner);
-}
-
bool llvm::isIRPrintingPass(Pass *P) {
const char *PID = (const char*)P->getPassID();
- return (PID == &PrintModulePassWrapper::ID)
- || (PID == &PrintFunctionPassWrapper::ID)
- || (PID == &PrintBasicBlockPass::ID);
+ return (PID == &PrintModulePassWrapper::ID) ||
+ (PID == &PrintFunctionPassWrapper::ID);
}
diff --git a/contrib/llvm-project/llvm/lib/IR/Instruction.cpp b/contrib/llvm-project/llvm/lib/IR/Instruction.cpp
index b157c7bb34bf..7da169712896 100644
--- a/contrib/llvm-project/llvm/lib/IR/Instruction.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/Instruction.cpp
@@ -368,6 +368,7 @@ const char *Instruction::getOpcodeName(unsigned OpCode) {
case InsertValue: return "insertvalue";
case LandingPad: return "landingpad";
case CleanupPad: return "cleanuppad";
+ case Freeze: return "freeze";
default: return "<Invalid operator> ";
}
diff --git a/contrib/llvm-project/llvm/lib/IR/Instructions.cpp b/contrib/llvm-project/llvm/lib/IR/Instructions.cpp
index 245c7628b08e..c264277fa53c 100644
--- a/contrib/llvm-project/llvm/lib/IR/Instructions.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/Instructions.cpp
@@ -1217,30 +1217,31 @@ AllocaInst::AllocaInst(Type *Ty, unsigned AddrSpace, const Twine &Name,
AllocaInst::AllocaInst(Type *Ty, unsigned AddrSpace, Value *ArraySize,
const Twine &Name, Instruction *InsertBefore)
- : AllocaInst(Ty, AddrSpace, ArraySize, /*Align=*/0, Name, InsertBefore) {}
+ : AllocaInst(Ty, AddrSpace, ArraySize, /*Align=*/None, Name, InsertBefore) {
+}
AllocaInst::AllocaInst(Type *Ty, unsigned AddrSpace, Value *ArraySize,
const Twine &Name, BasicBlock *InsertAtEnd)
- : AllocaInst(Ty, AddrSpace, ArraySize, /*Align=*/0, Name, InsertAtEnd) {}
+ : AllocaInst(Ty, AddrSpace, ArraySize, /*Align=*/None, Name, InsertAtEnd) {}
AllocaInst::AllocaInst(Type *Ty, unsigned AddrSpace, Value *ArraySize,
- unsigned Align, const Twine &Name,
+ MaybeAlign Align, const Twine &Name,
Instruction *InsertBefore)
- : UnaryInstruction(PointerType::get(Ty, AddrSpace), Alloca,
- getAISize(Ty->getContext(), ArraySize), InsertBefore),
- AllocatedType(Ty) {
+ : UnaryInstruction(PointerType::get(Ty, AddrSpace), Alloca,
+ getAISize(Ty->getContext(), ArraySize), InsertBefore),
+ AllocatedType(Ty) {
setAlignment(MaybeAlign(Align));
assert(!Ty->isVoidTy() && "Cannot allocate void!");
setName(Name);
}
AllocaInst::AllocaInst(Type *Ty, unsigned AddrSpace, Value *ArraySize,
- unsigned Align, const Twine &Name,
+ MaybeAlign Align, const Twine &Name,
BasicBlock *InsertAtEnd)
- : UnaryInstruction(PointerType::get(Ty, AddrSpace), Alloca,
- getAISize(Ty->getContext(), ArraySize), InsertAtEnd),
+ : UnaryInstruction(PointerType::get(Ty, AddrSpace), Alloca,
+ getAISize(Ty->getContext(), ArraySize), InsertAtEnd),
AllocatedType(Ty) {
- setAlignment(MaybeAlign(Align));
+ setAlignment(Align);
assert(!Ty->isVoidTy() && "Cannot allocate void!");
setName(Name);
}
@@ -1341,11 +1342,7 @@ void LoadInst::setAlignment(MaybeAlign Align) {
"Alignment is greater than MaximumAlignment!");
setInstructionSubclassData((getSubclassDataFromInstruction() & ~(31 << 1)) |
(encode(Align) << 1));
- if (Align)
- assert(getAlignment() == Align->value() &&
- "Alignment representation error!");
- else
- assert(getAlignment() == 0 && "Alignment representation error!");
+ assert(getAlign() == Align && "Alignment representation error!");
}
//===----------------------------------------------------------------------===//
@@ -1415,16 +1412,12 @@ StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile, MaybeAlign Align,
AssertOK();
}
-void StoreInst::setAlignment(MaybeAlign Align) {
- assert((!Align || *Align <= MaximumAlignment) &&
+void StoreInst::setAlignment(MaybeAlign Alignment) {
+ assert((!Alignment || *Alignment <= MaximumAlignment) &&
"Alignment is greater than MaximumAlignment!");
setInstructionSubclassData((getSubclassDataFromInstruction() & ~(31 << 1)) |
- (encode(Align) << 1));
- if (Align)
- assert(getAlignment() == Align->value() &&
- "Alignment representation error!");
- else
- assert(getAlignment() == 0 && "Alignment representation error!");
+ (encode(Alignment) << 1));
+ assert(getAlign() == Alignment && "Alignment representation error!");
}
//===----------------------------------------------------------------------===//
@@ -2047,7 +2040,7 @@ bool ShuffleVectorInst::isExtractSubvectorMask(ArrayRef<int> Mask,
SubIndex = Offset;
}
- if (0 <= SubIndex) {
+ if (0 <= SubIndex && SubIndex + (int)Mask.size() <= NumSrcElts) {
Index = SubIndex;
return true;
}
@@ -4091,6 +4084,22 @@ void IndirectBrInst::removeDestination(unsigned idx) {
}
//===----------------------------------------------------------------------===//
+// FreezeInst Implementation
+//===----------------------------------------------------------------------===//
+
+FreezeInst::FreezeInst(Value *S,
+ const Twine &Name, Instruction *InsertBefore)
+ : UnaryInstruction(S->getType(), Freeze, S, InsertBefore) {
+ setName(Name);
+}
+
+FreezeInst::FreezeInst(Value *S,
+ const Twine &Name, BasicBlock *InsertAtEnd)
+ : UnaryInstruction(S->getType(), Freeze, S, InsertAtEnd) {
+ setName(Name);
+}
+
+//===----------------------------------------------------------------------===//
// cloneImpl() implementations
//===----------------------------------------------------------------------===//
@@ -4126,9 +4135,9 @@ InsertValueInst *InsertValueInst::cloneImpl() const {
}
AllocaInst *AllocaInst::cloneImpl() const {
- AllocaInst *Result = new AllocaInst(getAllocatedType(),
- getType()->getAddressSpace(),
- (Value *)getOperand(0), getAlignment());
+ AllocaInst *Result =
+ new AllocaInst(getAllocatedType(), getType()->getAddressSpace(),
+ (Value *)getOperand(0), MaybeAlign(getAlignment()));
Result->setUsedWithInAlloca(isUsedWithInAlloca());
Result->setSwiftError(isSwiftError());
return Result;
@@ -4306,3 +4315,7 @@ UnreachableInst *UnreachableInst::cloneImpl() const {
LLVMContext &Context = getContext();
return new UnreachableInst(Context);
}
+
+FreezeInst *FreezeInst::cloneImpl() const {
+ return new FreezeInst(getOperand(0));
+}
diff --git a/contrib/llvm-project/llvm/lib/IR/IntrinsicInst.cpp b/contrib/llvm-project/llvm/lib/IR/IntrinsicInst.cpp
index 26ed46a9cd91..b23742b83c12 100644
--- a/contrib/llvm-project/llvm/lib/IR/IntrinsicInst.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/IntrinsicInst.cpp
@@ -102,8 +102,7 @@ Value *InstrProfIncrementInst::getStep() const {
return ConstantInt::get(Type::getInt64Ty(Context), 1);
}
-Optional<ConstrainedFPIntrinsic::RoundingMode>
-ConstrainedFPIntrinsic::getRoundingMode() const {
+Optional<fp::RoundingMode> ConstrainedFPIntrinsic::getRoundingMode() const {
unsigned NumOperands = getNumArgOperands();
Metadata *MD =
cast<MetadataAsValue>(getArgOperand(NumOperands - 2))->getMetadata();
@@ -112,43 +111,7 @@ ConstrainedFPIntrinsic::getRoundingMode() const {
return StrToRoundingMode(cast<MDString>(MD)->getString());
}
-Optional<ConstrainedFPIntrinsic::RoundingMode>
-ConstrainedFPIntrinsic::StrToRoundingMode(StringRef RoundingArg) {
- // For dynamic rounding mode, we use round to nearest but we will set the
- // 'exact' SDNodeFlag so that the value will not be rounded.
- return StringSwitch<Optional<RoundingMode>>(RoundingArg)
- .Case("round.dynamic", rmDynamic)
- .Case("round.tonearest", rmToNearest)
- .Case("round.downward", rmDownward)
- .Case("round.upward", rmUpward)
- .Case("round.towardzero", rmTowardZero)
- .Default(None);
-}
-
-Optional<StringRef>
-ConstrainedFPIntrinsic::RoundingModeToStr(RoundingMode UseRounding) {
- Optional<StringRef> RoundingStr = None;
- switch (UseRounding) {
- case ConstrainedFPIntrinsic::rmDynamic:
- RoundingStr = "round.dynamic";
- break;
- case ConstrainedFPIntrinsic::rmToNearest:
- RoundingStr = "round.tonearest";
- break;
- case ConstrainedFPIntrinsic::rmDownward:
- RoundingStr = "round.downward";
- break;
- case ConstrainedFPIntrinsic::rmUpward:
- RoundingStr = "round.upward";
- break;
- case ConstrainedFPIntrinsic::rmTowardZero:
- RoundingStr = "round.towardzero";
- break;
- }
- return RoundingStr;
-}
-
-Optional<ConstrainedFPIntrinsic::ExceptionBehavior>
+Optional<fp::ExceptionBehavior>
ConstrainedFPIntrinsic::getExceptionBehavior() const {
unsigned NumOperands = getNumArgOperands();
Metadata *MD =
@@ -158,59 +121,38 @@ ConstrainedFPIntrinsic::getExceptionBehavior() const {
return StrToExceptionBehavior(cast<MDString>(MD)->getString());
}
-Optional<ConstrainedFPIntrinsic::ExceptionBehavior>
-ConstrainedFPIntrinsic::StrToExceptionBehavior(StringRef ExceptionArg) {
- return StringSwitch<Optional<ExceptionBehavior>>(ExceptionArg)
- .Case("fpexcept.ignore", ebIgnore)
- .Case("fpexcept.maytrap", ebMayTrap)
- .Case("fpexcept.strict", ebStrict)
- .Default(None);
-}
-
-Optional<StringRef>
-ConstrainedFPIntrinsic::ExceptionBehaviorToStr(ExceptionBehavior UseExcept) {
- Optional<StringRef> ExceptStr = None;
- switch (UseExcept) {
- case ConstrainedFPIntrinsic::ebStrict:
- ExceptStr = "fpexcept.strict";
- break;
- case ConstrainedFPIntrinsic::ebIgnore:
- ExceptStr = "fpexcept.ignore";
- break;
- case ConstrainedFPIntrinsic::ebMayTrap:
- ExceptStr = "fpexcept.maytrap";
- break;
- }
- return ExceptStr;
+FCmpInst::Predicate
+ConstrainedFPCmpIntrinsic::getPredicate() const {
+ Metadata *MD =
+ cast<MetadataAsValue>(getArgOperand(2))->getMetadata();
+ if (!MD || !isa<MDString>(MD))
+ return FCmpInst::BAD_FCMP_PREDICATE;
+ return StringSwitch<FCmpInst::Predicate>(cast<MDString>(MD)->getString())
+ .Case("oeq", FCmpInst::FCMP_OEQ)
+ .Case("ogt", FCmpInst::FCMP_OGT)
+ .Case("oge", FCmpInst::FCMP_OGE)
+ .Case("olt", FCmpInst::FCMP_OLT)
+ .Case("ole", FCmpInst::FCMP_OLE)
+ .Case("one", FCmpInst::FCMP_ONE)
+ .Case("ord", FCmpInst::FCMP_ORD)
+ .Case("uno", FCmpInst::FCMP_UNO)
+ .Case("ueq", FCmpInst::FCMP_UEQ)
+ .Case("ugt", FCmpInst::FCMP_UGT)
+ .Case("uge", FCmpInst::FCMP_UGE)
+ .Case("ult", FCmpInst::FCMP_ULT)
+ .Case("ule", FCmpInst::FCMP_ULE)
+ .Case("une", FCmpInst::FCMP_UNE)
+ .Default(FCmpInst::BAD_FCMP_PREDICATE);
}
bool ConstrainedFPIntrinsic::isUnaryOp() const {
switch (getIntrinsicID()) {
default:
return false;
- case Intrinsic::experimental_constrained_fptosi:
- case Intrinsic::experimental_constrained_fptoui:
- case Intrinsic::experimental_constrained_fptrunc:
- case Intrinsic::experimental_constrained_fpext:
- case Intrinsic::experimental_constrained_sqrt:
- case Intrinsic::experimental_constrained_sin:
- case Intrinsic::experimental_constrained_cos:
- case Intrinsic::experimental_constrained_exp:
- case Intrinsic::experimental_constrained_exp2:
- case Intrinsic::experimental_constrained_log:
- case Intrinsic::experimental_constrained_log10:
- case Intrinsic::experimental_constrained_log2:
- case Intrinsic::experimental_constrained_lrint:
- case Intrinsic::experimental_constrained_llrint:
- case Intrinsic::experimental_constrained_rint:
- case Intrinsic::experimental_constrained_nearbyint:
- case Intrinsic::experimental_constrained_ceil:
- case Intrinsic::experimental_constrained_floor:
- case Intrinsic::experimental_constrained_lround:
- case Intrinsic::experimental_constrained_llround:
- case Intrinsic::experimental_constrained_round:
- case Intrinsic::experimental_constrained_trunc:
- return true;
+#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
+ case Intrinsic::INTRINSIC: \
+ return NARG == 1;
+#include "llvm/IR/ConstrainedOps.def"
}
}
@@ -218,8 +160,21 @@ bool ConstrainedFPIntrinsic::isTernaryOp() const {
switch (getIntrinsicID()) {
default:
return false;
- case Intrinsic::experimental_constrained_fma:
- return true;
+#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
+ case Intrinsic::INTRINSIC: \
+ return NARG == 3;
+#include "llvm/IR/ConstrainedOps.def"
+ }
+}
+
+bool ConstrainedFPIntrinsic::classof(const IntrinsicInst *I) {
+ switch (I->getIntrinsicID()) {
+#define INSTRUCTION(NAME, NARGS, ROUND_MODE, INTRINSIC, DAGN) \
+ case Intrinsic::INTRINSIC:
+#include "llvm/IR/ConstrainedOps.def"
+ return true;
+ default:
+ return false;
}
}
diff --git a/contrib/llvm-project/llvm/lib/IR/LLVMContext.cpp b/contrib/llvm-project/llvm/lib/IR/LLVMContext.cpp
index 5e8772186a2a..cb13b27aa50f 100644
--- a/contrib/llvm-project/llvm/lib/IR/LLVMContext.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/LLVMContext.cpp
@@ -62,6 +62,11 @@ LLVMContext::LLVMContext() : pImpl(new LLVMContextImpl(*this)) {
"gc-transition operand bundle id drifted!");
(void)GCTransitionEntry;
+ auto *CFGuardTargetEntry = pImpl->getOrInsertBundleTag("cfguardtarget");
+ assert(CFGuardTargetEntry->second == LLVMContext::OB_cfguardtarget &&
+ "cfguardtarget operand bundle id drifted!");
+ (void)CFGuardTargetEntry;
+
SyncScope::ID SingleThreadSSID =
pImpl->getOrInsertSyncScopeID("singlethread");
assert(SingleThreadSSID == SyncScope::SingleThread &&
diff --git a/contrib/llvm-project/llvm/lib/IR/LLVMContextImpl.h b/contrib/llvm-project/llvm/lib/IR/LLVMContextImpl.h
index 78cf707e0e74..6f5d5752b38d 100644
--- a/contrib/llvm-project/llvm/lib/IR/LLVMContextImpl.h
+++ b/contrib/llvm-project/llvm/lib/IR/LLVMContextImpl.h
@@ -819,27 +819,27 @@ template <> struct MDNodeKeyImpl<DIModule> {
MDString *Name;
MDString *ConfigurationMacros;
MDString *IncludePath;
- MDString *ISysRoot;
+ MDString *SysRoot;
MDNodeKeyImpl(Metadata *Scope, MDString *Name, MDString *ConfigurationMacros,
- MDString *IncludePath, MDString *ISysRoot)
+ MDString *IncludePath, MDString *SysRoot)
: Scope(Scope), Name(Name), ConfigurationMacros(ConfigurationMacros),
- IncludePath(IncludePath), ISysRoot(ISysRoot) {}
+ IncludePath(IncludePath), SysRoot(SysRoot) {}
MDNodeKeyImpl(const DIModule *N)
: Scope(N->getRawScope()), Name(N->getRawName()),
ConfigurationMacros(N->getRawConfigurationMacros()),
- IncludePath(N->getRawIncludePath()), ISysRoot(N->getRawISysRoot()) {}
+ IncludePath(N->getRawIncludePath()), SysRoot(N->getRawSysRoot()) {}
bool isKeyOf(const DIModule *RHS) const {
return Scope == RHS->getRawScope() && Name == RHS->getRawName() &&
ConfigurationMacros == RHS->getRawConfigurationMacros() &&
IncludePath == RHS->getRawIncludePath() &&
- ISysRoot == RHS->getRawISysRoot();
+ SysRoot == RHS->getRawSysRoot();
}
unsigned getHashValue() const {
return hash_combine(Scope, Name,
- ConfigurationMacros, IncludePath, ISysRoot);
+ ConfigurationMacros, IncludePath, SysRoot);
}
};
diff --git a/contrib/llvm-project/llvm/lib/IR/LegacyPassManager.cpp b/contrib/llvm-project/llvm/lib/IR/LegacyPassManager.cpp
index 3a03c493100b..90239bb76298 100644
--- a/contrib/llvm-project/llvm/lib/IR/LegacyPassManager.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/LegacyPassManager.cpp
@@ -314,64 +314,6 @@ void PassManagerPrettyStackEntry::print(raw_ostream &OS) const {
OS << "'\n";
}
-
-namespace {
-//===----------------------------------------------------------------------===//
-// BBPassManager
-//
-/// BBPassManager manages BasicBlockPass. It batches all the
-/// pass together and sequence them to process one basic block before
-/// processing next basic block.
-class BBPassManager : public PMDataManager, public FunctionPass {
-
-public:
- static char ID;
- explicit BBPassManager()
- : PMDataManager(), FunctionPass(ID) {}
-
- /// Execute all of the passes scheduled for execution. Keep track of
- /// whether any of the passes modifies the function, and if so, return true.
- bool runOnFunction(Function &F) override;
-
- /// Pass Manager itself does not invalidate any analysis info.
- void getAnalysisUsage(AnalysisUsage &Info) const override {
- Info.setPreservesAll();
- }
-
- bool doInitialization(Module &M) override;
- bool doInitialization(Function &F);
- bool doFinalization(Module &M) override;
- bool doFinalization(Function &F);
-
- PMDataManager *getAsPMDataManager() override { return this; }
- Pass *getAsPass() override { return this; }
-
- StringRef getPassName() const override { return "BasicBlock Pass Manager"; }
-
- // Print passes managed by this manager
- void dumpPassStructure(unsigned Offset) override {
- dbgs().indent(Offset*2) << "BasicBlockPass Manager\n";
- for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
- BasicBlockPass *BP = getContainedPass(Index);
- BP->dumpPassStructure(Offset + 1);
- dumpLastUses(BP, Offset+1);
- }
- }
-
- BasicBlockPass *getContainedPass(unsigned N) {
- assert(N < PassVector.size() && "Pass number out of range!");
- BasicBlockPass *BP = static_cast<BasicBlockPass *>(PassVector[N]);
- return BP;
- }
-
- PassManagerType getPassManagerType() const override {
- return PMT_BasicBlockPassManager;
- }
-};
-
-char BBPassManager::ID = 0;
-} // End anonymous namespace
-
namespace llvm {
namespace legacy {
//===----------------------------------------------------------------------===//
@@ -434,6 +376,11 @@ public:
FPPassManager *FP = static_cast<FPPassManager *>(PassManagers[N]);
return FP;
}
+
+ void dumpPassStructure(unsigned Offset) override {
+ for (unsigned I = 0; I < getNumContainedManagers(); ++I)
+ getContainedManager(I)->dumpPassStructure(Offset);
+ }
};
void FunctionPassManagerImpl::anchor() {}
@@ -1249,9 +1196,6 @@ void PMDataManager::dumpPassInfo(Pass *P, enum PassDebuggingString S1,
break;
}
switch (S2) {
- case ON_BASICBLOCK_MSG:
- dbgs() << "' on BasicBlock '" << Msg << "'...\n";
- break;
case ON_FUNCTION_MSG:
dbgs() << "' on Function '" << Msg << "'...\n";
break;
@@ -1368,117 +1312,6 @@ Pass *AnalysisResolver::findImplPass(Pass *P, AnalysisID AnalysisPI,
}
//===----------------------------------------------------------------------===//
-// BBPassManager implementation
-
-/// Execute all of the passes scheduled for execution by invoking
-/// runOnBasicBlock method. Keep track of whether any of the passes modifies
-/// the function, and if so, return true.
-bool BBPassManager::runOnFunction(Function &F) {
- if (F.isDeclaration())
- return false;
-
- bool Changed = doInitialization(F);
- Module &M = *F.getParent();
-
- unsigned InstrCount, BBSize = 0;
- StringMap<std::pair<unsigned, unsigned>> FunctionToInstrCount;
- bool EmitICRemark = M.shouldEmitInstrCountChangedRemark();
- if (EmitICRemark)
- InstrCount = initSizeRemarkInfo(M, FunctionToInstrCount);
-
- for (BasicBlock &BB : F) {
- // Collect the initial size of the basic block.
- if (EmitICRemark)
- BBSize = BB.size();
- for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
- BasicBlockPass *BP = getContainedPass(Index);
- bool LocalChanged = false;
-
- dumpPassInfo(BP, EXECUTION_MSG, ON_BASICBLOCK_MSG, BB.getName());
- dumpRequiredSet(BP);
-
- initializeAnalysisImpl(BP);
-
- {
- // If the pass crashes, remember this.
- PassManagerPrettyStackEntry X(BP, BB);
- TimeRegion PassTimer(getPassTimer(BP));
- LocalChanged |= BP->runOnBasicBlock(BB);
- if (EmitICRemark) {
- unsigned NewSize = BB.size();
- // Update the size of the basic block, emit a remark, and update the
- // size of the module.
- if (NewSize != BBSize) {
- int64_t Delta =
- static_cast<int64_t>(NewSize) - static_cast<int64_t>(BBSize);
- emitInstrCountChangedRemark(BP, M, Delta, InstrCount,
- FunctionToInstrCount, &F);
- InstrCount = static_cast<int64_t>(InstrCount) + Delta;
- BBSize = NewSize;
- }
- }
- }
-
- Changed |= LocalChanged;
- if (LocalChanged)
- dumpPassInfo(BP, MODIFICATION_MSG, ON_BASICBLOCK_MSG,
- BB.getName());
- dumpPreservedSet(BP);
- dumpUsedSet(BP);
-
- verifyPreservedAnalysis(BP);
- removeNotPreservedAnalysis(BP);
- recordAvailableAnalysis(BP);
- removeDeadPasses(BP, BB.getName(), ON_BASICBLOCK_MSG);
- }
- }
-
- return doFinalization(F) || Changed;
-}
-
-// Implement doInitialization and doFinalization
-bool BBPassManager::doInitialization(Module &M) {
- bool Changed = false;
-
- for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index)
- Changed |= getContainedPass(Index)->doInitialization(M);
-
- return Changed;
-}
-
-bool BBPassManager::doFinalization(Module &M) {
- bool Changed = false;
-
- for (int Index = getNumContainedPasses() - 1; Index >= 0; --Index)
- Changed |= getContainedPass(Index)->doFinalization(M);
-
- return Changed;
-}
-
-bool BBPassManager::doInitialization(Function &F) {
- bool Changed = false;
-
- for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
- BasicBlockPass *BP = getContainedPass(Index);
- Changed |= BP->doInitialization(F);
- }
-
- return Changed;
-}
-
-bool BBPassManager::doFinalization(Function &F) {
- bool Changed = false;
-
- for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
- BasicBlockPass *BP = getContainedPass(Index);
- Changed |= BP->doFinalization(F);
- }
-
- return Changed;
-}
-
-
-//===----------------------------------------------------------------------===//
// FunctionPassManager implementation
/// Create new Function pass manager
@@ -1793,13 +1626,12 @@ MPPassManager::runOnModule(Module &M) {
/// RequiredPass is run on the fly by Pass Manager when P requests it
/// through getAnalysis interface.
void MPPassManager::addLowerLevelRequiredPass(Pass *P, Pass *RequiredPass) {
+ assert(RequiredPass && "No required pass?");
assert(P->getPotentialPassManagerType() == PMT_ModulePassManager &&
"Unable to handle Pass that requires lower level Analysis pass");
assert((P->getPotentialPassManagerType() <
RequiredPass->getPotentialPassManagerType()) &&
"Unable to handle Pass that requires lower level Analysis pass");
- if (!RequiredPass)
- return;
FunctionPassManagerImpl *FPP = OnTheFlyManagers[P];
if (!FPP) {
@@ -1944,114 +1776,42 @@ LLVM_DUMP_METHOD void PMStack::dump() const {
void ModulePass::assignPassManager(PMStack &PMS,
PassManagerType PreferredType) {
// Find Module Pass Manager
- while (!PMS.empty()) {
- PassManagerType TopPMType = PMS.top()->getPassManagerType();
- if (TopPMType == PreferredType)
- break; // We found desired pass manager
- else if (TopPMType > PMT_ModulePassManager)
- PMS.pop(); // Pop children pass managers
- else
- break;
- }
- assert(!PMS.empty() && "Unable to find appropriate Pass Manager");
+ PassManagerType T;
+ while ((T = PMS.top()->getPassManagerType()) > PMT_ModulePassManager &&
+ T != PreferredType)
+ PMS.pop();
PMS.top()->add(this);
}
/// Find appropriate Function Pass Manager or Call Graph Pass Manager
/// in the PM Stack and add self into that manager.
void FunctionPass::assignPassManager(PMStack &PMS,
- PassManagerType PreferredType) {
-
+ PassManagerType /*PreferredType*/) {
// Find Function Pass Manager
- while (!PMS.empty()) {
- if (PMS.top()->getPassManagerType() > PMT_FunctionPassManager)
- PMS.pop();
- else
- break;
- }
+ PMDataManager *PM;
+ while (PM = PMS.top(), PM->getPassManagerType() > PMT_FunctionPassManager)
+ PMS.pop();
// Create new Function Pass Manager if needed.
- FPPassManager *FPP;
- if (PMS.top()->getPassManagerType() == PMT_FunctionPassManager) {
- FPP = (FPPassManager *)PMS.top();
- } else {
- assert(!PMS.empty() && "Unable to create Function Pass Manager");
- PMDataManager *PMD = PMS.top();
-
+ if (PM->getPassManagerType() != PMT_FunctionPassManager) {
// [1] Create new Function Pass Manager
- FPP = new FPPassManager();
+ auto *FPP = new FPPassManager;
FPP->populateInheritedAnalysis(PMS);
// [2] Set up new manager's top level manager
- PMTopLevelManager *TPM = PMD->getTopLevelManager();
- TPM->addIndirectPassManager(FPP);
+ PM->getTopLevelManager()->addIndirectPassManager(FPP);
// [3] Assign manager to manage this new manager. This may create
// and push new managers into PMS
- FPP->assignPassManager(PMS, PMD->getPassManagerType());
+ FPP->assignPassManager(PMS, PM->getPassManagerType());
// [4] Push new manager into PMS
PMS.push(FPP);
+ PM = FPP;
}
// Assign FPP as the manager of this pass.
- FPP->add(this);
-}
-
-void BasicBlockPass::preparePassManager(PMStack &PMS) {
- // Find BBPassManager
- while (!PMS.empty() &&
- PMS.top()->getPassManagerType() > PMT_BasicBlockPassManager)
- PMS.pop();
-
- // If this pass is destroying high level information that is used
- // by other passes that are managed by BBPM then do not insert
- // this pass in current BBPM. Use new BBPassManager.
- if (PMS.top()->getPassManagerType() == PMT_BasicBlockPassManager &&
- !PMS.top()->preserveHigherLevelAnalysis(this))
- PMS.pop();
-}
-
-/// Find appropriate Basic Pass Manager or Call Graph Pass Manager
-/// in the PM Stack and add self into that manager.
-void BasicBlockPass::assignPassManager(PMStack &PMS,
- PassManagerType PreferredType) {
- while (!PMS.empty() &&
- PMS.top()->getPassManagerType() > PMT_BasicBlockPassManager)
- PMS.pop();
-
- BBPassManager *BBP;
-
- // Basic Pass Manager is a leaf pass manager. It does not handle
- // any other pass manager.
- if (!PMS.empty() &&
- PMS.top()->getPassManagerType() == PMT_BasicBlockPassManager) {
- BBP = (BBPassManager *)PMS.top();
- } else {
- // If leaf manager is not Basic Block Pass manager then create new
- // basic Block Pass manager.
- assert(!PMS.empty() && "Unable to create BasicBlock Pass Manager");
- PMDataManager *PMD = PMS.top();
-
- // [1] Create new Basic Block Manager
- BBP = new BBPassManager();
- BBP->populateInheritedAnalysis(PMS);
-
- // [2] Set up new manager's top level manager
- // Basic Block Pass Manager does not live by itself
- PMTopLevelManager *TPM = PMD->getTopLevelManager();
- TPM->addIndirectPassManager(BBP);
-
- // [3] Assign manager to manage this new manager. This may create
- // and push new managers into PMS
- BBP->assignPassManager(PMS, PreferredType);
-
- // [4] Push new manager into PMS
- PMS.push(BBP);
- }
-
- // Assign BBP as the manager of this pass.
- BBP->add(this);
+ PM->add(this);
}
PassManagerBase::~PassManagerBase() {}
diff --git a/contrib/llvm-project/llvm/lib/IR/Metadata.cpp b/contrib/llvm-project/llvm/lib/IR/Metadata.cpp
index 62c2aa86f3b0..de092894d30c 100644
--- a/contrib/llvm-project/llvm/lib/IR/Metadata.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/Metadata.cpp
@@ -489,7 +489,9 @@ void *MDNode::operator new(size_t Size, unsigned NumOps) {
return Ptr;
}
-void MDNode::operator delete(void *Mem) {
+// Repress memory sanitization, due to use-after-destroy by operator
+// delete. Bug report 24578 identifies this issue.
+LLVM_NO_SANITIZE_MEMORY_ATTRIBUTE void MDNode::operator delete(void *Mem) {
MDNode *N = static_cast<MDNode *>(Mem);
size_t OpSize = N->NumOperands * sizeof(MDOperand);
OpSize = alignTo(OpSize, alignof(uint64_t));
@@ -1260,6 +1262,7 @@ void Instruction::setMetadata(unsigned KindID, MDNode *Node) {
void Instruction::setAAMetadata(const AAMDNodes &N) {
setMetadata(LLVMContext::MD_tbaa, N.TBAA);
+ setMetadata(LLVMContext::MD_tbaa_struct, N.TBAAStruct);
setMetadata(LLVMContext::MD_alias_scope, N.Scope);
setMetadata(LLVMContext::MD_noalias, N.NoAlias);
}
diff --git a/contrib/llvm-project/llvm/lib/IR/Module.cpp b/contrib/llvm-project/llvm/lib/IR/Module.cpp
index 25efd009194f..271ae126d722 100644
--- a/contrib/llvm-project/llvm/lib/IR/Module.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/Module.cpp
@@ -381,6 +381,22 @@ void Module::debug_compile_units_iterator::SkipNoDebugCUs() {
++Idx;
}
+iterator_range<Module::global_object_iterator> Module::global_objects() {
+ return concat<GlobalObject>(functions(), globals());
+}
+iterator_range<Module::const_global_object_iterator>
+Module::global_objects() const {
+ return concat<const GlobalObject>(functions(), globals());
+}
+
+iterator_range<Module::global_value_iterator> Module::global_values() {
+ return concat<GlobalValue>(functions(), globals(), aliases(), ifuncs());
+}
+iterator_range<Module::const_global_value_iterator>
+Module::global_values() const {
+ return concat<const GlobalValue>(functions(), globals(), aliases(), ifuncs());
+}
+
//===----------------------------------------------------------------------===//
// Methods to control the materialization of GlobalValues in the Module.
//
diff --git a/contrib/llvm-project/llvm/lib/IR/ModuleSummaryIndex.cpp b/contrib/llvm-project/llvm/lib/IR/ModuleSummaryIndex.cpp
index 9f347d8da01d..180f96269a13 100644
--- a/contrib/llvm-project/llvm/lib/IR/ModuleSummaryIndex.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/ModuleSummaryIndex.cpp
@@ -15,6 +15,7 @@
#include "llvm/ADT/SCCIterator.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringMap.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -26,6 +27,10 @@ STATISTIC(ReadOnlyLiveGVars,
STATISTIC(WriteOnlyLiveGVars,
"Number of live global variables marked write only");
+static cl::opt<bool> PropagateAttrs("propagate-attrs", cl::init(true),
+ cl::Hidden,
+ cl::desc("Propagate attributes in index"));
+
FunctionSummary FunctionSummary::ExternalNode =
FunctionSummary::makeDummyFunctionSummary({});
@@ -61,6 +66,8 @@ std::pair<unsigned, unsigned> FunctionSummary::specialRefCounts() const {
return {RORefCnt, WORefCnt};
}
+constexpr uint64_t ModuleSummaryIndex::BitcodeSummaryVersion;
+
// Collect for the given module the list of function it defines
// (GUID -> Summary).
void ModuleSummaryIndex::collectDefinedFunctionsForModule(
@@ -155,6 +162,8 @@ static void propagateAttributesToRefs(GlobalValueSummary *S) {
// See internalizeGVsAfterImport.
void ModuleSummaryIndex::propagateAttributes(
const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols) {
+ if (!PropagateAttrs)
+ return;
for (auto &P : *this)
for (auto &S : P.second.SummaryList) {
if (!isGlobalValueLive(S.get()))
@@ -172,14 +181,16 @@ void ModuleSummaryIndex::propagateAttributes(
// assembly leading it to be in the @llvm.*used).
if (auto *GVS = dyn_cast<GlobalVarSummary>(S->getBaseObject()))
// Here we intentionally pass S.get() not GVS, because S could be
- // an alias.
- if (!canImportGlobalVar(S.get()) ||
+ // an alias. We don't analyze references here, because we have to
+ // know exactly if GV is readonly to do so.
+ if (!canImportGlobalVar(S.get(), /* AnalyzeRefs */ false) ||
GUIDPreservedSymbols.count(P.first)) {
GVS->setReadOnly(false);
GVS->setWriteOnly(false);
}
propagateAttributesToRefs(S.get());
}
+ setWithAttributePropagation();
if (llvm::AreStatisticsEnabled())
for (auto &P : *this)
if (P.second.SummaryList.size())
@@ -193,6 +204,37 @@ void ModuleSummaryIndex::propagateAttributes(
}
}
+bool ModuleSummaryIndex::canImportGlobalVar(GlobalValueSummary *S,
+ bool AnalyzeRefs) const {
+ auto HasRefsPreventingImport = [this](const GlobalVarSummary *GVS) {
+ // We don't analyze GV references during attribute propagation, so
+ // GV with non-trivial initializer can be marked either read or
+ // write-only.
+ // Importing definiton of readonly GV with non-trivial initializer
+ // allows us doing some extra optimizations (like converting indirect
+ // calls to direct).
+ // Definition of writeonly GV with non-trivial initializer should also
+ // be imported. Not doing so will result in:
+ // a) GV internalization in source module (because it's writeonly)
+ // b) Importing of GV declaration to destination module as a result
+ // of promotion.
+ // c) Link error (external declaration with internal definition).
+ // However we do not promote objects referenced by writeonly GV
+ // initializer by means of converting it to 'zeroinitializer'
+ return !isReadOnly(GVS) && !isWriteOnly(GVS) && GVS->refs().size();
+ };
+ auto *GVS = cast<GlobalVarSummary>(S->getBaseObject());
+
+ // Global variable with non-trivial initializer can be imported
+ // if it's readonly. This gives us extra opportunities for constant
+ // folding and converting indirect calls to direct calls. We don't
+ // analyze GV references during attribute propagation, because we
+ // don't know yet if it is readonly or not.
+ return !GlobalValue::isInterposableLinkage(S->linkage()) &&
+ !S->notEligibleToImport() &&
+ (!AnalyzeRefs || !HasRefsPreventingImport(GVS));
+}
+
// TODO: write a graphviz dumper for SCCs (see ModuleSummaryIndex::exportToDot)
// then delete this function and update its tests
LLVM_DUMP_METHOD
@@ -202,7 +244,7 @@ void ModuleSummaryIndex::dumpSCCs(raw_ostream &O) {
!I.isAtEnd(); ++I) {
O << "SCC (" << utostr(I->size()) << " node" << (I->size() == 1 ? "" : "s")
<< ") {\n";
- for (const ValueInfo V : *I) {
+ for (const ValueInfo &V : *I) {
FunctionSummary *F = nullptr;
if (V.getSummaryList().size())
F = cast<FunctionSummary>(V.getSummaryList().front().get());
@@ -298,7 +340,7 @@ static std::string fflagsToString(FunctionSummary::FFlags F) {
auto FlagValue = [](unsigned V) { return V ? '1' : '0'; };
char FlagRep[] = {FlagValue(F.ReadNone), FlagValue(F.ReadOnly),
FlagValue(F.NoRecurse), FlagValue(F.ReturnDoesNotAlias),
- FlagValue(F.NoInline), 0};
+ FlagValue(F.NoInline), FlagValue(F.AlwaysInline), 0};
return FlagRep;
}
@@ -363,7 +405,9 @@ static bool hasWriteOnlyFlag(const GlobalValueSummary *S) {
return false;
}
-void ModuleSummaryIndex::exportToDot(raw_ostream &OS) const {
+void ModuleSummaryIndex::exportToDot(
+ raw_ostream &OS,
+ const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols) const {
std::vector<Edge> CrossModuleEdges;
DenseMap<GlobalValue::GUID, std::vector<uint64_t>> NodeMap;
using GVSOrderedMapTy = std::map<GlobalValue::GUID, GlobalValueSummary *>;
@@ -443,6 +487,8 @@ void ModuleSummaryIndex::exportToDot(raw_ostream &OS) const {
A.addComment("dsoLocal");
if (Flags.CanAutoHide)
A.addComment("canAutoHide");
+ if (GUIDPreservedSymbols.count(SummaryIt.first))
+ A.addComment("preserved");
auto VI = getValueInfo(SummaryIt.first);
A.add("label", getNodeLabel(VI, SummaryIt.second));
diff --git a/contrib/llvm-project/llvm/lib/IR/Pass.cpp b/contrib/llvm-project/llvm/lib/IR/Pass.cpp
index 699a7e17c0cb..dbdbbf4cf35e 100644
--- a/contrib/llvm-project/llvm/lib/IR/Pass.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/Pass.cpp
@@ -176,51 +176,6 @@ bool FunctionPass::skipFunction(const Function &F) const {
return false;
}
-//===----------------------------------------------------------------------===//
-// BasicBlockPass Implementation
-//
-
-Pass *BasicBlockPass::createPrinterPass(raw_ostream &OS,
- const std::string &Banner) const {
- return createPrintBasicBlockPass(OS, Banner);
-}
-
-bool BasicBlockPass::doInitialization(Function &) {
- // By default, don't do anything.
- return false;
-}
-
-bool BasicBlockPass::doFinalization(Function &) {
- // By default, don't do anything.
- return false;
-}
-
-static std::string getDescription(const BasicBlock &BB) {
- return "basic block (" + BB.getName().str() + ") in function (" +
- BB.getParent()->getName().str() + ")";
-}
-
-bool BasicBlockPass::skipBasicBlock(const BasicBlock &BB) const {
- const Function *F = BB.getParent();
- if (!F)
- return false;
- OptPassGate &Gate = F->getContext().getOptPassGate();
- if (Gate.isEnabled() && !Gate.shouldRunPass(this, getDescription(BB)))
- return true;
- if (F->hasOptNone()) {
- // Report this only once per function.
- if (&BB == &F->getEntryBlock())
- LLVM_DEBUG(dbgs() << "Skipping pass '" << getPassName()
- << "' on function " << F->getName() << "\n");
- return true;
- }
- return false;
-}
-
-PassManagerType BasicBlockPass::getPotentialPassManagerType() const {
- return PMT_BasicBlockPassManager;
-}
-
const PassInfo *Pass::lookupPassInfo(const void *TI) {
return PassRegistry::getPassRegistry()->getPassInfo(TI);
}
diff --git a/contrib/llvm-project/llvm/lib/IR/RemarkStreamer.cpp b/contrib/llvm-project/llvm/lib/IR/RemarkStreamer.cpp
index 0fcc06b961f3..cdbcc4f456c5 100644
--- a/contrib/llvm-project/llvm/lib/IR/RemarkStreamer.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/RemarkStreamer.cpp
@@ -18,9 +18,17 @@
#include "llvm/Remarks/BitstreamRemarkSerializer.h"
#include "llvm/Remarks/RemarkFormat.h"
#include "llvm/Remarks/RemarkSerializer.h"
+#include "llvm/Support/CommandLine.h"
using namespace llvm;
+static cl::opt<cl::boolOrDefault> EnableRemarksSection(
+ "remarks-section",
+ cl::desc(
+ "Emit a section containing remark diagnostics metadata. By default, "
+ "this is enabled for the following formats: yaml-strtab, bitstream."),
+ cl::init(cl::BOU_UNSET), cl::Hidden);
+
RemarkStreamer::RemarkStreamer(
std::unique_ptr<remarks::RemarkSerializer> RemarkSerializer,
Optional<StringRef> FilenameIn)
@@ -104,6 +112,31 @@ void RemarkStreamer::emit(const DiagnosticInfoOptimizationBase &Diag) {
RemarkSerializer->emit(R);
}
+bool RemarkStreamer::needsSection() const {
+ if (EnableRemarksSection == cl::BOU_TRUE)
+ return true;
+
+ if (EnableRemarksSection == cl::BOU_FALSE)
+ return false;
+
+ assert(EnableRemarksSection == cl::BOU_UNSET);
+
+ // We only need a section if we're in separate mode.
+ if (RemarkSerializer->Mode != remarks::SerializerMode::Separate)
+ return false;
+
+ // Only some formats need a section:
+ // * bitstream
+ // * yaml-strtab
+ switch (RemarkSerializer->SerializerFormat) {
+ case remarks::Format::YAMLStrTab:
+ case remarks::Format::Bitstream:
+ return true;
+ default:
+ return false;
+ }
+}
+
char RemarkSetupFileError::ID = 0;
char RemarkSetupPatternError::ID = 0;
char RemarkSetupFormatError::ID = 0;
diff --git a/contrib/llvm-project/llvm/lib/IR/SafepointIRVerifier.cpp b/contrib/llvm-project/llvm/lib/IR/SafepointIRVerifier.cpp
index c90347ec48fd..f9578394a827 100644
--- a/contrib/llvm-project/llvm/lib/IR/SafepointIRVerifier.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/SafepointIRVerifier.cpp
@@ -30,6 +30,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/IR/SafepointIRVerifier.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SetOperations.h"
@@ -38,14 +39,14 @@
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
-#include "llvm/IR/Value.h"
-#include "llvm/IR/SafepointIRVerifier.h"
#include "llvm/IR/Statepoint.h"
-#include "llvm/Support/Debug.h"
+#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#define DEBUG_TYPE "safepoint-ir-verifier"
diff --git a/contrib/llvm-project/llvm/lib/IR/TypeFinder.cpp b/contrib/llvm-project/llvm/lib/IR/TypeFinder.cpp
index 2e2c194860cd..403ae45756a1 100644
--- a/contrib/llvm-project/llvm/lib/IR/TypeFinder.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/TypeFinder.cpp
@@ -77,7 +77,7 @@ void TypeFinder::run(const Module &M, bool onlyNamed) {
}
for (const auto &NMD : M.named_metadata())
- for (const auto &MDOp : NMD.operands())
+ for (const auto *MDOp : NMD.operands())
incorporateMDNode(MDOp);
}
diff --git a/contrib/llvm-project/llvm/lib/IR/User.cpp b/contrib/llvm-project/llvm/lib/IR/User.cpp
index 33a3686c94a1..4a3eba9e8cf7 100644
--- a/contrib/llvm-project/llvm/lib/IR/User.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/User.cpp
@@ -162,7 +162,9 @@ void *User::operator new(size_t Size) {
// User operator delete Implementation
//===----------------------------------------------------------------------===//
-void User::operator delete(void *Usr) {
+// Repress memory sanitization, due to use-after-destroy by operator
+// delete. Bug report 24578 identifies this issue.
+LLVM_NO_SANITIZE_MEMORY_ATTRIBUTE void User::operator delete(void *Usr) {
// Hung off uses use a single Use* before the User, while other subclasses
// use a Use[] allocated prior to the user.
User *Obj = static_cast<User *>(Usr);
diff --git a/contrib/llvm-project/llvm/lib/IR/Value.cpp b/contrib/llvm-project/llvm/lib/IR/Value.cpp
index 3c8a5b536695..cf9d08f6fc02 100644
--- a/contrib/llvm-project/llvm/lib/IR/Value.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/Value.cpp
@@ -13,8 +13,8 @@
#include "llvm/IR/Value.h"
#include "LLVMContextImpl.h"
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallString.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
@@ -29,6 +29,7 @@
#include "llvm/IR/Statepoint.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/IR/ValueSymbolTable.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/ManagedStatic.h"
diff --git a/contrib/llvm-project/llvm/lib/IR/Verifier.cpp b/contrib/llvm-project/llvm/lib/IR/Verifier.cpp
index b17fc433ed74..d15b70d71b47 100644
--- a/contrib/llvm-project/llvm/lib/IR/Verifier.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/Verifier.cpp
@@ -86,6 +86,7 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsWebAssembly.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
@@ -96,6 +97,7 @@
#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/AtomicOrdering.h"
#include "llvm/Support/Casting.h"
@@ -1506,7 +1508,6 @@ static bool isFuncOnlyAttr(Attribute::AttrKind Kind) {
case Attribute::NoCfCheck:
case Attribute::NoUnwind:
case Attribute::NoInline:
- case Attribute::NoFree:
case Attribute::AlwaysInline:
case Attribute::OptimizeForSize:
case Attribute::StackProtect:
@@ -1555,7 +1556,7 @@ static bool isFuncOnlyAttr(Attribute::AttrKind Kind) {
/// arguments.
static bool isFuncOrArgAttr(Attribute::AttrKind Kind) {
return Kind == Attribute::ReadOnly || Kind == Attribute::WriteOnly ||
- Kind == Attribute::ReadNone;
+ Kind == Attribute::ReadNone || Kind == Attribute::NoFree;
}
void Verifier::verifyAttributeTypes(AttributeSet Attrs, bool IsFunction,
@@ -1702,11 +1703,12 @@ void Verifier::verifyFunctionAttrs(FunctionType *FT, AttributeList Attrs,
!RetAttrs.hasAttribute(Attribute::Nest) &&
!RetAttrs.hasAttribute(Attribute::StructRet) &&
!RetAttrs.hasAttribute(Attribute::NoCapture) &&
+ !RetAttrs.hasAttribute(Attribute::NoFree) &&
!RetAttrs.hasAttribute(Attribute::Returned) &&
!RetAttrs.hasAttribute(Attribute::InAlloca) &&
!RetAttrs.hasAttribute(Attribute::SwiftSelf) &&
!RetAttrs.hasAttribute(Attribute::SwiftError)),
- "Attributes 'byval', 'inalloca', 'nest', 'sret', 'nocapture', "
+ "Attributes 'byval', 'inalloca', 'nest', 'sret', 'nocapture', 'nofree'"
"'returned', 'swiftself', and 'swifterror' do not apply to return "
"values!",
V);
@@ -1842,6 +1844,25 @@ void Verifier::verifyFunctionAttrs(FunctionType *FT, AttributeList Attrs,
if (Args.second && !CheckParam("number of elements", *Args.second))
return;
}
+
+ if (Attrs.hasFnAttribute("frame-pointer")) {
+ StringRef FP = Attrs.getAttribute(AttributeList::FunctionIndex,
+ "frame-pointer").getValueAsString();
+ if (FP != "all" && FP != "non-leaf" && FP != "none")
+ CheckFailed("invalid value for 'frame-pointer' attribute: " + FP, V);
+ }
+
+ if (Attrs.hasFnAttribute("patchable-function-entry")) {
+ StringRef S0 = Attrs
+ .getAttribute(AttributeList::FunctionIndex,
+ "patchable-function-entry")
+ .getValueAsString();
+ StringRef S = S0;
+ unsigned N;
+ if (S.getAsInteger(10, N))
+ CheckFailed(
+ "\"patchable-function-entry\" takes an unsigned integer: " + S0, V);
+ }
}
void Verifier::verifyFunctionMetadata(
@@ -2975,10 +2996,10 @@ void Verifier::visitCallBase(CallBase &Call) {
if (Intrinsic::ID ID = (Intrinsic::ID)F->getIntrinsicID())
visitIntrinsicCall(ID, Call);
- // Verify that a callsite has at most one "deopt", at most one "funclet" and
- // at most one "gc-transition" operand bundle.
+ // Verify that a callsite has at most one "deopt", at most one "funclet", at
+ // most one "gc-transition", and at most one "cfguardtarget" operand bundle.
bool FoundDeoptBundle = false, FoundFuncletBundle = false,
- FoundGCTransitionBundle = false;
+ FoundGCTransitionBundle = false, FoundCFGuardTargetBundle = false;
for (unsigned i = 0, e = Call.getNumOperandBundles(); i < e; ++i) {
OperandBundleUse BU = Call.getOperandBundleAt(i);
uint32_t Tag = BU.getTagID();
@@ -2997,6 +3018,12 @@ void Verifier::visitCallBase(CallBase &Call) {
Assert(isa<FuncletPadInst>(BU.Inputs.front()),
"Funclet bundle operands should correspond to a FuncletPadInst",
Call);
+ } else if (Tag == LLVMContext::OB_cfguardtarget) {
+ Assert(!FoundCFGuardTargetBundle,
+ "Multiple CFGuardTarget operand bundles", Call);
+ FoundCFGuardTargetBundle = true;
+ Assert(BU.Inputs.size() == 1,
+ "Expected exactly one cfguardtarget bundle operand", Call);
}
}
@@ -4291,38 +4318,9 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
"an array");
break;
}
- case Intrinsic::experimental_constrained_fadd:
- case Intrinsic::experimental_constrained_fsub:
- case Intrinsic::experimental_constrained_fmul:
- case Intrinsic::experimental_constrained_fdiv:
- case Intrinsic::experimental_constrained_frem:
- case Intrinsic::experimental_constrained_fma:
- case Intrinsic::experimental_constrained_fptosi:
- case Intrinsic::experimental_constrained_fptoui:
- case Intrinsic::experimental_constrained_fptrunc:
- case Intrinsic::experimental_constrained_fpext:
- case Intrinsic::experimental_constrained_sqrt:
- case Intrinsic::experimental_constrained_pow:
- case Intrinsic::experimental_constrained_powi:
- case Intrinsic::experimental_constrained_sin:
- case Intrinsic::experimental_constrained_cos:
- case Intrinsic::experimental_constrained_exp:
- case Intrinsic::experimental_constrained_exp2:
- case Intrinsic::experimental_constrained_log:
- case Intrinsic::experimental_constrained_log10:
- case Intrinsic::experimental_constrained_log2:
- case Intrinsic::experimental_constrained_lrint:
- case Intrinsic::experimental_constrained_llrint:
- case Intrinsic::experimental_constrained_rint:
- case Intrinsic::experimental_constrained_nearbyint:
- case Intrinsic::experimental_constrained_maxnum:
- case Intrinsic::experimental_constrained_minnum:
- case Intrinsic::experimental_constrained_ceil:
- case Intrinsic::experimental_constrained_floor:
- case Intrinsic::experimental_constrained_lround:
- case Intrinsic::experimental_constrained_llround:
- case Intrinsic::experimental_constrained_round:
- case Intrinsic::experimental_constrained_trunc:
+#define INSTRUCTION(NAME, NARGS, ROUND_MODE, INTRINSIC, DAGN) \
+ case Intrinsic::INTRINSIC:
+#include "llvm/IR/ConstrainedOps.def"
visitConstrainedFPIntrinsic(cast<ConstrainedFPIntrinsic>(Call));
break;
case Intrinsic::dbg_declare: // llvm.dbg.declare
@@ -4691,28 +4689,32 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
case Intrinsic::smul_fix:
case Intrinsic::smul_fix_sat:
case Intrinsic::umul_fix:
- case Intrinsic::umul_fix_sat: {
+ case Intrinsic::umul_fix_sat:
+ case Intrinsic::sdiv_fix:
+ case Intrinsic::udiv_fix: {
Value *Op1 = Call.getArgOperand(0);
Value *Op2 = Call.getArgOperand(1);
Assert(Op1->getType()->isIntOrIntVectorTy(),
- "first operand of [us]mul_fix[_sat] must be an int type or vector "
- "of ints");
+ "first operand of [us][mul|div]_fix[_sat] must be an int type or "
+ "vector of ints");
Assert(Op2->getType()->isIntOrIntVectorTy(),
- "second operand of [us]mul_fix_[sat] must be an int type or vector "
- "of ints");
+ "second operand of [us][mul|div]_fix[_sat] must be an int type or "
+ "vector of ints");
auto *Op3 = cast<ConstantInt>(Call.getArgOperand(2));
Assert(Op3->getType()->getBitWidth() <= 32,
- "third argument of [us]mul_fix[_sat] must fit within 32 bits");
+ "third argument of [us][mul|div]_fix[_sat] must fit within 32 bits");
- if (ID == Intrinsic::smul_fix || ID == Intrinsic::smul_fix_sat) {
+ if (ID == Intrinsic::smul_fix || ID == Intrinsic::smul_fix_sat ||
+ ID == Intrinsic::sdiv_fix) {
Assert(
Op3->getZExtValue() < Op1->getType()->getScalarSizeInBits(),
- "the scale of smul_fix[_sat] must be less than the width of the operands");
+ "the scale of s[mul|div]_fix[_sat] must be less than the width of "
+ "the operands");
} else {
Assert(Op3->getZExtValue() <= Op1->getType()->getScalarSizeInBits(),
- "the scale of umul_fix[_sat] must be less than or equal to the width of "
- "the operands");
+ "the scale of u[mul|div]_fix[_sat] must be less than or equal "
+ "to the width of the operands");
}
break;
}
@@ -4749,83 +4751,54 @@ static DISubprogram *getSubprogram(Metadata *LocalScope) {
}
void Verifier::visitConstrainedFPIntrinsic(ConstrainedFPIntrinsic &FPI) {
- unsigned NumOperands = FPI.getNumArgOperands();
- bool HasExceptionMD = false;
- bool HasRoundingMD = false;
+ unsigned NumOperands;
+ bool HasRoundingMD;
switch (FPI.getIntrinsicID()) {
- case Intrinsic::experimental_constrained_sqrt:
- case Intrinsic::experimental_constrained_sin:
- case Intrinsic::experimental_constrained_cos:
- case Intrinsic::experimental_constrained_exp:
- case Intrinsic::experimental_constrained_exp2:
- case Intrinsic::experimental_constrained_log:
- case Intrinsic::experimental_constrained_log10:
- case Intrinsic::experimental_constrained_log2:
- case Intrinsic::experimental_constrained_rint:
- case Intrinsic::experimental_constrained_nearbyint:
- case Intrinsic::experimental_constrained_ceil:
- case Intrinsic::experimental_constrained_floor:
- case Intrinsic::experimental_constrained_round:
- case Intrinsic::experimental_constrained_trunc:
- Assert((NumOperands == 3), "invalid arguments for constrained FP intrinsic",
- &FPI);
- HasExceptionMD = true;
- HasRoundingMD = true;
+#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
+ case Intrinsic::INTRINSIC: \
+ NumOperands = NARG; \
+ HasRoundingMD = ROUND_MODE; \
break;
+#include "llvm/IR/ConstrainedOps.def"
+ default:
+ llvm_unreachable("Invalid constrained FP intrinsic!");
+ }
+ NumOperands += (1 + HasRoundingMD);
+ // Compare intrinsics carry an extra predicate metadata operand.
+ if (isa<ConstrainedFPCmpIntrinsic>(FPI))
+ NumOperands += 1;
+ Assert((FPI.getNumArgOperands() == NumOperands),
+ "invalid arguments for constrained FP intrinsic", &FPI);
+ switch (FPI.getIntrinsicID()) {
case Intrinsic::experimental_constrained_lrint:
case Intrinsic::experimental_constrained_llrint: {
- Assert((NumOperands == 3), "invalid arguments for constrained FP intrinsic",
- &FPI);
Type *ValTy = FPI.getArgOperand(0)->getType();
Type *ResultTy = FPI.getType();
Assert(!ValTy->isVectorTy() && !ResultTy->isVectorTy(),
"Intrinsic does not support vectors", &FPI);
- HasExceptionMD = true;
- HasRoundingMD = true;
}
break;
case Intrinsic::experimental_constrained_lround:
case Intrinsic::experimental_constrained_llround: {
- Assert((NumOperands == 2), "invalid arguments for constrained FP intrinsic",
- &FPI);
Type *ValTy = FPI.getArgOperand(0)->getType();
Type *ResultTy = FPI.getType();
Assert(!ValTy->isVectorTy() && !ResultTy->isVectorTy(),
"Intrinsic does not support vectors", &FPI);
- HasExceptionMD = true;
break;
}
- case Intrinsic::experimental_constrained_fma:
- Assert((NumOperands == 5), "invalid arguments for constrained FP intrinsic",
- &FPI);
- HasExceptionMD = true;
- HasRoundingMD = true;
- break;
-
- case Intrinsic::experimental_constrained_fadd:
- case Intrinsic::experimental_constrained_fsub:
- case Intrinsic::experimental_constrained_fmul:
- case Intrinsic::experimental_constrained_fdiv:
- case Intrinsic::experimental_constrained_frem:
- case Intrinsic::experimental_constrained_pow:
- case Intrinsic::experimental_constrained_powi:
- case Intrinsic::experimental_constrained_maxnum:
- case Intrinsic::experimental_constrained_minnum:
- Assert((NumOperands == 4), "invalid arguments for constrained FP intrinsic",
- &FPI);
- HasExceptionMD = true;
- HasRoundingMD = true;
+ case Intrinsic::experimental_constrained_fcmp:
+ case Intrinsic::experimental_constrained_fcmps: {
+ auto Pred = cast<ConstrainedFPCmpIntrinsic>(&FPI)->getPredicate();
+ Assert(CmpInst::isFPPredicate(Pred),
+ "invalid predicate for constrained FP comparison intrinsic", &FPI);
break;
+ }
case Intrinsic::experimental_constrained_fptosi:
case Intrinsic::experimental_constrained_fptoui: {
- Assert((NumOperands == 2),
- "invalid arguments for constrained FP intrinsic", &FPI);
- HasExceptionMD = true;
-
Value *Operand = FPI.getArgOperand(0);
uint64_t NumSrcElem = 0;
Assert(Operand->getType()->isFPOrFPVectorTy(),
@@ -4847,18 +4820,30 @@ void Verifier::visitConstrainedFPIntrinsic(ConstrainedFPIntrinsic &FPI) {
}
break;
- case Intrinsic::experimental_constrained_fptrunc:
- case Intrinsic::experimental_constrained_fpext: {
- if (FPI.getIntrinsicID() == Intrinsic::experimental_constrained_fptrunc) {
- Assert((NumOperands == 3),
- "invalid arguments for constrained FP intrinsic", &FPI);
- HasRoundingMD = true;
- } else {
- Assert((NumOperands == 2),
- "invalid arguments for constrained FP intrinsic", &FPI);
+ case Intrinsic::experimental_constrained_sitofp:
+ case Intrinsic::experimental_constrained_uitofp: {
+ Value *Operand = FPI.getArgOperand(0);
+ uint64_t NumSrcElem = 0;
+ Assert(Operand->getType()->isIntOrIntVectorTy(),
+ "Intrinsic first argument must be integer", &FPI);
+ if (auto *OperandT = dyn_cast<VectorType>(Operand->getType())) {
+ NumSrcElem = OperandT->getNumElements();
+ }
+
+ Operand = &FPI;
+ Assert((NumSrcElem > 0) == Operand->getType()->isVectorTy(),
+ "Intrinsic first argument and result disagree on vector use", &FPI);
+ Assert(Operand->getType()->isFPOrFPVectorTy(),
+ "Intrinsic result must be a floating point", &FPI);
+ if (auto *OperandT = dyn_cast<VectorType>(Operand->getType())) {
+ Assert(NumSrcElem == OperandT->getNumElements(),
+ "Intrinsic first argument and result vector lengths must be equal",
+ &FPI);
}
- HasExceptionMD = true;
+ } break;
+ case Intrinsic::experimental_constrained_fptrunc:
+ case Intrinsic::experimental_constrained_fpext: {
Value *Operand = FPI.getArgOperand(0);
Type *OperandTy = Operand->getType();
Value *Result = &FPI;
@@ -4889,7 +4874,7 @@ void Verifier::visitConstrainedFPIntrinsic(ConstrainedFPIntrinsic &FPI) {
break;
default:
- llvm_unreachable("Invalid constrained FP intrinsic!");
+ break;
}
// If a non-metadata argument is passed in a metadata slot then the
@@ -4897,10 +4882,8 @@ void Verifier::visitConstrainedFPIntrinsic(ConstrainedFPIntrinsic &FPI) {
// match the specification in the intrinsic call table. Thus, no
// argument type check is needed here.
- if (HasExceptionMD) {
- Assert(FPI.getExceptionBehavior().hasValue(),
- "invalid exception behavior argument", &FPI);
- }
+ Assert(FPI.getExceptionBehavior().hasValue(),
+ "invalid exception behavior argument", &FPI);
if (HasRoundingMD) {
Assert(FPI.getRoundingMode().hasValue(),
"invalid rounding mode argument", &FPI);
diff --git a/contrib/llvm-project/llvm/lib/LTO/LTO.cpp b/contrib/llvm-project/llvm/lib/LTO/LTO.cpp
index 1e345e7dd89e..297b11de17a9 100644
--- a/contrib/llvm-project/llvm/lib/LTO/LTO.cpp
+++ b/contrib/llvm-project/llvm/lib/LTO/LTO.cpp
@@ -29,6 +29,7 @@
#include "llvm/LTO/SummaryBasedOptimizations.h"
#include "llvm/Linker/IRMover.h"
#include "llvm/Object/IRObjectFile.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/MemoryBuffer.h"
@@ -146,9 +147,11 @@ void llvm::computeLTOCacheKey(
// Include the hash for the current module
auto ModHash = Index.getModuleHash(ModuleID);
Hasher.update(ArrayRef<uint8_t>((uint8_t *)&ModHash[0], sizeof(ModHash)));
- for (auto F : ExportList)
+ for (const auto &VI : ExportList) {
+ auto GUID = VI.getGUID();
// The export list can impact the internalization, be conservative here
- Hasher.update(ArrayRef<uint8_t>((uint8_t *)&F, sizeof(F)));
+ Hasher.update(ArrayRef<uint8_t>((uint8_t *)&GUID, sizeof(GUID)));
+ }
// Include the hash for every module we import functions from. The set of
// imported symbols for each module may affect code generation and is
@@ -383,12 +386,11 @@ static bool isWeakObjectWithRWAccess(GlobalValueSummary *GVS) {
}
static void thinLTOInternalizeAndPromoteGUID(
- GlobalValueSummaryList &GVSummaryList, GlobalValue::GUID GUID,
- function_ref<bool(StringRef, GlobalValue::GUID)> isExported,
+ ValueInfo VI, function_ref<bool(StringRef, ValueInfo)> isExported,
function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
isPrevailing) {
- for (auto &S : GVSummaryList) {
- if (isExported(S->modulePath(), GUID)) {
+ for (auto &S : VI.getSummaryList()) {
+ if (isExported(S->modulePath(), VI)) {
if (GlobalValue::isLocalLinkage(S->linkage()))
S->setLinkage(GlobalValue::ExternalLinkage);
} else if (EnableLTOInternalization &&
@@ -396,7 +398,7 @@ static void thinLTOInternalizeAndPromoteGUID(
// doesn't resolve them.
!GlobalValue::isLocalLinkage(S->linkage()) &&
(!GlobalValue::isInterposableLinkage(S->linkage()) ||
- isPrevailing(GUID, S.get())) &&
+ isPrevailing(VI.getGUID(), S.get())) &&
S->linkage() != GlobalValue::AppendingLinkage &&
// We can't internalize available_externally globals because this
// can break function pointer equality.
@@ -415,11 +417,11 @@ static void thinLTOInternalizeAndPromoteGUID(
// as external and non-exported values as internal.
void llvm::thinLTOInternalizeAndPromoteInIndex(
ModuleSummaryIndex &Index,
- function_ref<bool(StringRef, GlobalValue::GUID)> isExported,
+ function_ref<bool(StringRef, ValueInfo)> isExported,
function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
isPrevailing) {
for (auto &I : Index)
- thinLTOInternalizeAndPromoteGUID(I.second.SummaryList, I.first, isExported,
+ thinLTOInternalizeAndPromoteGUID(Index.getValueInfo(I), isExported,
isPrevailing);
}
@@ -465,7 +467,7 @@ BitcodeModule &InputFile::getSingleBitcodeModule() {
}
LTO::RegularLTOState::RegularLTOState(unsigned ParallelCodeGenParallelismLevel,
- Config &Conf)
+ const Config &Conf)
: ParallelCodeGenParallelismLevel(ParallelCodeGenParallelismLevel),
Ctx(Conf), CombinedModule(std::make_unique<Module>("ld-temp.o", Ctx)),
Mover(std::make_unique<IRMover>(*CombinedModule)) {}
@@ -1027,12 +1029,12 @@ ArrayRef<const char*> LTO::getRuntimeLibcallSymbols() {
/// This class defines the interface to the ThinLTO backend.
class lto::ThinBackendProc {
protected:
- Config &Conf;
+ const Config &Conf;
ModuleSummaryIndex &CombinedIndex;
const StringMap<GVSummaryMapTy> &ModuleToDefinedGVSummaries;
public:
- ThinBackendProc(Config &Conf, ModuleSummaryIndex &CombinedIndex,
+ ThinBackendProc(const Config &Conf, ModuleSummaryIndex &CombinedIndex,
const StringMap<GVSummaryMapTy> &ModuleToDefinedGVSummaries)
: Conf(Conf), CombinedIndex(CombinedIndex),
ModuleToDefinedGVSummaries(ModuleToDefinedGVSummaries) {}
@@ -1060,7 +1062,7 @@ class InProcessThinBackend : public ThinBackendProc {
public:
InProcessThinBackend(
- Config &Conf, ModuleSummaryIndex &CombinedIndex,
+ const Config &Conf, ModuleSummaryIndex &CombinedIndex,
unsigned ThinLTOParallelismLevel,
const StringMap<GVSummaryMapTy> &ModuleToDefinedGVSummaries,
AddStreamFn AddStream, NativeObjectCache Cache)
@@ -1158,7 +1160,7 @@ public:
} // end anonymous namespace
ThinBackend lto::createInProcessThinBackend(unsigned ParallelismLevel) {
- return [=](Config &Conf, ModuleSummaryIndex &CombinedIndex,
+ return [=](const Config &Conf, ModuleSummaryIndex &CombinedIndex,
const StringMap<GVSummaryMapTy> &ModuleToDefinedGVSummaries,
AddStreamFn AddStream, NativeObjectCache Cache) {
return std::make_unique<InProcessThinBackend>(
@@ -1196,7 +1198,7 @@ class WriteIndexesThinBackend : public ThinBackendProc {
public:
WriteIndexesThinBackend(
- Config &Conf, ModuleSummaryIndex &CombinedIndex,
+ const Config &Conf, ModuleSummaryIndex &CombinedIndex,
const StringMap<GVSummaryMapTy> &ModuleToDefinedGVSummaries,
std::string OldPrefix, std::string NewPrefix, bool ShouldEmitImportsFiles,
raw_fd_ostream *LinkedObjectsFile, lto::IndexWriteCallback OnWrite)
@@ -1248,7 +1250,7 @@ public:
ThinBackend lto::createWriteIndexesThinBackend(
std::string OldPrefix, std::string NewPrefix, bool ShouldEmitImportsFiles,
raw_fd_ostream *LinkedObjectsFile, IndexWriteCallback OnWrite) {
- return [=](Config &Conf, ModuleSummaryIndex &CombinedIndex,
+ return [=](const Config &Conf, ModuleSummaryIndex &CombinedIndex,
const StringMap<GVSummaryMapTy> &ModuleToDefinedGVSummaries,
AddStreamFn AddStream, NativeObjectCache Cache) {
return std::make_unique<WriteIndexesThinBackend>(
@@ -1262,7 +1264,8 @@ Error LTO::runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache,
if (ThinLTO.ModuleMap.empty())
return Error::success();
- if (Conf.CombinedIndexHook && !Conf.CombinedIndexHook(ThinLTO.CombinedIndex))
+ if (Conf.CombinedIndexHook &&
+ !Conf.CombinedIndexHook(ThinLTO.CombinedIndex, GUIDPreservedSymbols))
return Error::success();
// Collect for each module the list of function it defines (GUID ->
@@ -1330,11 +1333,10 @@ Error LTO::runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache,
ExportedGUIDs.insert(
GlobalValue::getGUID(GlobalValue::dropLLVMManglingEscape(Def)));
- auto isExported = [&](StringRef ModuleIdentifier, GlobalValue::GUID GUID) {
+ auto isExported = [&](StringRef ModuleIdentifier, ValueInfo VI) {
const auto &ExportList = ExportLists.find(ModuleIdentifier);
- return (ExportList != ExportLists.end() &&
- ExportList->second.count(GUID)) ||
- ExportedGUIDs.count(GUID);
+ return (ExportList != ExportLists.end() && ExportList->second.count(VI)) ||
+ ExportedGUIDs.count(VI.getGUID());
};
// Update local devirtualized targets that were exported by cross-module
@@ -1380,8 +1382,12 @@ lto::setupOptimizationRemarks(LLVMContext &Context, StringRef RemarksFilename,
StringRef RemarksPasses, StringRef RemarksFormat,
bool RemarksWithHotness, int Count) {
std::string Filename = RemarksFilename;
+ // For ThinLTO, file.opt.<format> becomes
+ // file.opt.<format>.thin.<num>.<format>.
if (!Filename.empty() && Count != -1)
- Filename += ".thin." + llvm::utostr(Count) + ".yaml";
+ Filename =
+ (Twine(Filename) + ".thin." + llvm::utostr(Count) + "." + RemarksFormat)
+ .str();
auto ResultOrErr = llvm::setupOptimizationRemarks(
Context, Filename, RemarksPasses, RemarksFormat, RemarksWithHotness);
diff --git a/contrib/llvm-project/llvm/lib/LTO/LTOBackend.cpp b/contrib/llvm-project/llvm/lib/LTO/LTOBackend.cpp
index 2761f8367b0d..dcde7277b820 100644
--- a/contrib/llvm-project/llvm/lib/LTO/LTOBackend.cpp
+++ b/contrib/llvm-project/llvm/lib/LTO/LTOBackend.cpp
@@ -34,6 +34,7 @@
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/Program.h"
+#include "llvm/Support/SmallVectorMemoryBuffer.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/ThreadPool.h"
#include "llvm/Support/raw_ostream.h"
@@ -101,23 +102,25 @@ Error Config::addSaveTemps(std::string OutputFileName,
setHook("4.opt", PostOptModuleHook);
setHook("5.precodegen", PreCodeGenModuleHook);
- CombinedIndexHook = [=](const ModuleSummaryIndex &Index) {
- std::string Path = OutputFileName + "index.bc";
- std::error_code EC;
- raw_fd_ostream OS(Path, EC, sys::fs::OpenFlags::OF_None);
- // Because -save-temps is a debugging feature, we report the error
- // directly and exit.
- if (EC)
- reportOpenError(Path, EC.message());
- WriteIndexToFile(Index, OS);
-
- Path = OutputFileName + "index.dot";
- raw_fd_ostream OSDot(Path, EC, sys::fs::OpenFlags::OF_None);
- if (EC)
- reportOpenError(Path, EC.message());
- Index.exportToDot(OSDot);
- return true;
- };
+ CombinedIndexHook =
+ [=](const ModuleSummaryIndex &Index,
+ const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols) {
+ std::string Path = OutputFileName + "index.bc";
+ std::error_code EC;
+ raw_fd_ostream OS(Path, EC, sys::fs::OpenFlags::OF_None);
+ // Because -save-temps is a debugging feature, we report the error
+ // directly and exit.
+ if (EC)
+ reportOpenError(Path, EC.message());
+ WriteIndexToFile(Index, OS);
+
+ Path = OutputFileName + "index.dot";
+ raw_fd_ostream OSDot(Path, EC, sys::fs::OpenFlags::OF_None);
+ if (EC)
+ reportOpenError(Path, EC.message());
+ Index.exportToDot(OSDot, GUIDPreservedSymbols);
+ return true;
+ };
return Error::success();
}
@@ -125,7 +128,7 @@ Error Config::addSaveTemps(std::string OutputFileName,
namespace {
std::unique_ptr<TargetMachine>
-createTargetMachine(Config &Conf, const Target *TheTarget, Module &M) {
+createTargetMachine(const Config &Conf, const Target *TheTarget, Module &M) {
StringRef TheTriple = M.getTargetTriple();
SubtargetFeatures Features;
Features.getDefaultSubtargetFeatures(Triple(TheTriple));
@@ -150,7 +153,7 @@ createTargetMachine(Config &Conf, const Target *TheTarget, Module &M) {
CodeModel, Conf.CGOptLevel));
}
-static void runNewPMPasses(Config &Conf, Module &Mod, TargetMachine *TM,
+static void runNewPMPasses(const Config &Conf, Module &Mod, TargetMachine *TM,
unsigned OptLevel, bool IsThinLTO,
ModuleSummaryIndex *ExportSummary,
const ModuleSummaryIndex *ImportSummary) {
@@ -169,7 +172,7 @@ static void runNewPMPasses(Config &Conf, Module &Mod, TargetMachine *TM,
PassInstrumentationCallbacks PIC;
StandardInstrumentations SI;
SI.registerCallbacks(PIC);
- PassBuilder PB(TM, PipelineTuningOptions(),PGOOpt, &PIC);
+ PassBuilder PB(TM, Conf.PTO, PGOOpt, &PIC);
AAManager AA;
// Parse a custom AA pipeline if asked to.
@@ -266,7 +269,7 @@ static void runNewPMCustomPasses(Module &Mod, TargetMachine *TM,
MPM.run(Mod, MAM);
}
-static void runOldPMPasses(Config &Conf, Module &Mod, TargetMachine *TM,
+static void runOldPMPasses(const Config &Conf, Module &Mod, TargetMachine *TM,
bool IsThinLTO, ModuleSummaryIndex *ExportSummary,
const ModuleSummaryIndex *ImportSummary) {
legacy::PassManager passes;
@@ -297,7 +300,7 @@ static void runOldPMPasses(Config &Conf, Module &Mod, TargetMachine *TM,
passes.run(Mod);
}
-bool opt(Config &Conf, TargetMachine *TM, unsigned Task, Module &Mod,
+bool opt(const Config &Conf, TargetMachine *TM, unsigned Task, Module &Mod,
bool IsThinLTO, ModuleSummaryIndex *ExportSummary,
const ModuleSummaryIndex *ImportSummary) {
// FIXME: Plumb the combined index into the new pass manager.
@@ -312,11 +315,30 @@ bool opt(Config &Conf, TargetMachine *TM, unsigned Task, Module &Mod,
return !Conf.PostOptModuleHook || Conf.PostOptModuleHook(Task, Mod);
}
-void codegen(Config &Conf, TargetMachine *TM, AddStreamFn AddStream,
+static cl::opt<bool> EmbedBitcode(
+ "lto-embed-bitcode", cl::init(false),
+ cl::desc("Embed LLVM bitcode in object files produced by LTO"));
+
+static void EmitBitcodeSection(Module &M, const Config &Conf) {
+ if (!EmbedBitcode)
+ return;
+ SmallVector<char, 0> Buffer;
+ raw_svector_ostream OS(Buffer);
+ WriteBitcodeToFile(M, OS);
+
+ std::unique_ptr<MemoryBuffer> Buf(
+ new SmallVectorMemoryBuffer(std::move(Buffer)));
+ llvm::EmbedBitcodeInModule(M, Buf->getMemBufferRef(), /*EmbedBitcode*/ true,
+ /*EmbedMarker*/ false, /*CmdArgs*/ nullptr);
+}
+
+void codegen(const Config &Conf, TargetMachine *TM, AddStreamFn AddStream,
unsigned Task, Module &Mod) {
if (Conf.PreCodeGenModuleHook && !Conf.PreCodeGenModuleHook(Task, Mod))
return;
+ EmitBitcodeSection(Mod, Conf);
+
std::unique_ptr<ToolOutputFile> DwoOut;
SmallString<1024> DwoFile(Conf.SplitDwarfOutput);
if (!Conf.DwoDir.empty()) {
@@ -350,7 +372,7 @@ void codegen(Config &Conf, TargetMachine *TM, AddStreamFn AddStream,
DwoOut->keep();
}
-void splitCodeGen(Config &C, TargetMachine *TM, AddStreamFn AddStream,
+void splitCodeGen(const Config &C, TargetMachine *TM, AddStreamFn AddStream,
unsigned ParallelCodeGenParallelismLevel,
std::unique_ptr<Module> Mod) {
ThreadPool CodegenThreadPool(ParallelCodeGenParallelismLevel);
@@ -398,7 +420,7 @@ void splitCodeGen(Config &C, TargetMachine *TM, AddStreamFn AddStream,
CodegenThreadPool.wait();
}
-Expected<const Target *> initAndLookupTarget(Config &C, Module &Mod) {
+Expected<const Target *> initAndLookupTarget(const Config &C, Module &Mod) {
if (!C.OverrideTriple.empty())
Mod.setTargetTriple(C.OverrideTriple);
else if (Mod.getTargetTriple().empty())
@@ -410,7 +432,6 @@ Expected<const Target *> initAndLookupTarget(Config &C, Module &Mod) {
return make_error<StringError>(Msg, inconvertibleErrorCode());
return T;
}
-
}
static Error
@@ -424,7 +445,7 @@ finalizeOptimizationRemarks(std::unique_ptr<ToolOutputFile> DiagOutputFile) {
return Error::success();
}
-Error lto::backend(Config &C, AddStreamFn AddStream,
+Error lto::backend(const Config &C, AddStreamFn AddStream,
unsigned ParallelCodeGenParallelismLevel,
std::unique_ptr<Module> Mod,
ModuleSummaryIndex &CombinedIndex) {
@@ -478,7 +499,7 @@ static void dropDeadSymbols(Module &Mod, const GVSummaryMapTy &DefinedGlobals,
}
}
-Error lto::thinBackend(Config &Conf, unsigned Task, AddStreamFn AddStream,
+Error lto::thinBackend(const Config &Conf, unsigned Task, AddStreamFn AddStream,
Module &Mod, const ModuleSummaryIndex &CombinedIndex,
const FunctionImporter::ImportMapTy &ImportList,
const GVSummaryMapTy &DefinedGlobals,
diff --git a/contrib/llvm-project/llvm/lib/LTO/LTOCodeGenerator.cpp b/contrib/llvm-project/llvm/lib/LTO/LTOCodeGenerator.cpp
index 882192892867..5fef14230a9b 100644
--- a/contrib/llvm-project/llvm/lib/LTO/LTOCodeGenerator.cpp
+++ b/contrib/llvm-project/llvm/lib/LTO/LTOCodeGenerator.cpp
@@ -259,7 +259,7 @@ bool LTOCodeGenerator::compileOptimizedToFile(const char **Name) {
int FD;
StringRef Extension
- (FileType == TargetMachine::CGFT_AssemblyFile ? "s" : "o");
+ (FileType == CGFT_AssemblyFile ? "s" : "o");
std::error_code EC =
sys::fs::createTemporaryFile("lto-llvm", Extension, FD, Filename);
@@ -622,12 +622,9 @@ bool LTOCodeGenerator::compileOptimized(ArrayRef<raw_pwrite_stream *> Out) {
return true;
}
-/// setCodeGenDebugOptions - Set codegen debugging options to aid in debugging
-/// LTO problems.
-void LTOCodeGenerator::setCodeGenDebugOptions(StringRef Options) {
- for (std::pair<StringRef, StringRef> o = getToken(Options); !o.first.empty();
- o = getToken(o.second))
- CodegenOptions.push_back(o.first);
+void LTOCodeGenerator::setCodeGenDebugOptions(ArrayRef<const char *> Options) {
+ for (StringRef Option : Options)
+ CodegenOptions.push_back(Option);
}
void LTOCodeGenerator::parseCodeGenDebugOptions() {
diff --git a/contrib/llvm-project/llvm/lib/LTO/SummaryBasedOptimizations.cpp b/contrib/llvm-project/llvm/lib/LTO/SummaryBasedOptimizations.cpp
index 6db495de003b..d4dbefb231e8 100644
--- a/contrib/llvm-project/llvm/lib/LTO/SummaryBasedOptimizations.cpp
+++ b/contrib/llvm-project/llvm/lib/LTO/SummaryBasedOptimizations.cpp
@@ -15,6 +15,7 @@
#include "llvm/LTO/SummaryBasedOptimizations.h"
#include "llvm/Analysis/SyntheticCountsUtils.h"
#include "llvm/IR/ModuleSummaryIndex.h"
+#include "llvm/Support/CommandLine.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/LTO/ThinLTOCodeGenerator.cpp b/contrib/llvm-project/llvm/lib/LTO/ThinLTOCodeGenerator.cpp
index d151de17896f..a9e27832917c 100644
--- a/contrib/llvm-project/llvm/lib/LTO/ThinLTOCodeGenerator.cpp
+++ b/contrib/llvm-project/llvm/lib/LTO/ThinLTOCodeGenerator.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/LTO/legacy/ThinLTOCodeGenerator.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
@@ -292,7 +293,7 @@ std::unique_ptr<MemoryBuffer> codegenModule(Module &TheModule,
PM.add(createObjCARCContractPass());
// Setup the codegen now.
- if (TM.addPassesToEmitFile(PM, OS, nullptr, TargetMachine::CGFT_ObjectFile,
+ if (TM.addPassesToEmitFile(PM, OS, nullptr, CGFT_ObjectFile,
/* DisableVerify */ true))
report_fatal_error("Failed to setup codegen");
@@ -580,6 +581,7 @@ std::unique_ptr<ModuleSummaryIndex> ThinLTOCodeGenerator::linkCombinedIndex() {
return CombinedIndex;
}
+namespace {
struct IsExported {
const StringMap<FunctionImporter::ExportSetTy> &ExportLists;
const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols;
@@ -588,11 +590,10 @@ struct IsExported {
const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols)
: ExportLists(ExportLists), GUIDPreservedSymbols(GUIDPreservedSymbols) {}
- bool operator()(StringRef ModuleIdentifier, GlobalValue::GUID GUID) const {
+ bool operator()(StringRef ModuleIdentifier, ValueInfo VI) const {
const auto &ExportList = ExportLists.find(ModuleIdentifier);
- return (ExportList != ExportLists.end() &&
- ExportList->second.count(GUID)) ||
- GUIDPreservedSymbols.count(GUID);
+ return (ExportList != ExportLists.end() && ExportList->second.count(VI)) ||
+ GUIDPreservedSymbols.count(VI.getGUID());
}
};
@@ -610,6 +611,7 @@ struct IsPrevailing {
return Prevailing->second == S;
};
};
+} // namespace
static void computeDeadSymbolsInIndex(
ModuleSummaryIndex &Index,
diff --git a/contrib/llvm-project/llvm/lib/Linker/IRMover.cpp b/contrib/llvm-project/llvm/lib/Linker/IRMover.cpp
index 6784d81595e5..e13656ed1c10 100644
--- a/contrib/llvm-project/llvm/lib/Linker/IRMover.cpp
+++ b/contrib/llvm-project/llvm/lib/Linker/IRMover.cpp
@@ -628,7 +628,7 @@ GlobalVariable *IRLinker::copyGlobalVariableProto(const GlobalVariable *SGVar) {
SGVar->isConstant(), GlobalValue::ExternalLinkage,
/*init*/ nullptr, SGVar->getName(),
/*insertbefore*/ nullptr, SGVar->getThreadLocalMode(),
- SGVar->getType()->getAddressSpace());
+ SGVar->getAddressSpace());
NewDGV->setAlignment(MaybeAlign(SGVar->getAlignment()));
NewDGV->copyAttributesFrom(SGVar);
return NewDGV;
@@ -654,9 +654,9 @@ AttributeList IRLinker::mapAttributeTypes(LLVMContext &C, AttributeList Attrs) {
Function *IRLinker::copyFunctionProto(const Function *SF) {
// If there is no linkage to be performed or we are linking from the source,
// bring SF over.
- auto *F =
- Function::Create(TypeMap.get(SF->getFunctionType()),
- GlobalValue::ExternalLinkage, SF->getName(), &DstM);
+ auto *F = Function::Create(TypeMap.get(SF->getFunctionType()),
+ GlobalValue::ExternalLinkage,
+ SF->getAddressSpace(), SF->getName(), &DstM);
F->copyAttributesFrom(SF);
F->setAttributes(mapAttributeTypes(F->getContext(), F->getAttributes()));
return F;
@@ -671,11 +671,11 @@ IRLinker::copyGlobalIndirectSymbolProto(const GlobalIndirectSymbol *SGIS) {
auto *Ty = TypeMap.get(SGIS->getValueType());
GlobalIndirectSymbol *GIS;
if (isa<GlobalAlias>(SGIS))
- GIS = GlobalAlias::create(Ty, SGIS->getType()->getPointerAddressSpace(),
+ GIS = GlobalAlias::create(Ty, SGIS->getAddressSpace(),
GlobalValue::ExternalLinkage, SGIS->getName(),
&DstM);
else
- GIS = GlobalIFunc::create(Ty, SGIS->getType()->getPointerAddressSpace(),
+ GIS = GlobalIFunc::create(Ty, SGIS->getAddressSpace(),
GlobalValue::ExternalLinkage, SGIS->getName(),
nullptr, &DstM);
GIS->copyAttributesFrom(SGIS);
@@ -695,14 +695,15 @@ GlobalValue *IRLinker::copyGlobalValueProto(const GlobalValue *SGV,
else if (SGV->getValueType()->isFunctionTy())
NewGV =
Function::Create(cast<FunctionType>(TypeMap.get(SGV->getValueType())),
- GlobalValue::ExternalLinkage, SGV->getName(), &DstM);
+ GlobalValue::ExternalLinkage, SGV->getAddressSpace(),
+ SGV->getName(), &DstM);
else
- NewGV = new GlobalVariable(
- DstM, TypeMap.get(SGV->getValueType()),
- /*isConstant*/ false, GlobalValue::ExternalLinkage,
- /*init*/ nullptr, SGV->getName(),
- /*insertbefore*/ nullptr, SGV->getThreadLocalMode(),
- SGV->getType()->getAddressSpace());
+ NewGV =
+ new GlobalVariable(DstM, TypeMap.get(SGV->getValueType()),
+ /*isConstant*/ false, GlobalValue::ExternalLinkage,
+ /*init*/ nullptr, SGV->getName(),
+ /*insertbefore*/ nullptr,
+ SGV->getThreadLocalMode(), SGV->getAddressSpace());
}
if (ForDefinition)
@@ -918,7 +919,7 @@ IRLinker::linkAppendingVarProto(GlobalVariable *DstGV,
GlobalVariable *NG = new GlobalVariable(
DstM, NewType, SrcGV->isConstant(), SrcGV->getLinkage(),
/*init*/ nullptr, /*name*/ "", DstGV, SrcGV->getThreadLocalMode(),
- SrcGV->getType()->getAddressSpace());
+ SrcGV->getAddressSpace());
NG->copyAttributesFrom(SrcGV);
forceRenaming(NG, SrcGV->getName());
@@ -1098,7 +1099,7 @@ Error IRLinker::linkGlobalValueBody(GlobalValue &Dst, GlobalValue &Src) {
}
void IRLinker::flushRAUWWorklist() {
- for (const auto Elem : RAUWWorklist) {
+ for (const auto &Elem : RAUWWorklist) {
GlobalValue *Old;
Value *New;
std::tie(Old, New) = Elem;
diff --git a/contrib/llvm-project/llvm/lib/MC/ELFObjectWriter.cpp b/contrib/llvm-project/llvm/lib/MC/ELFObjectWriter.cpp
index 6f160e491cea..6b4b45eb8eff 100644
--- a/contrib/llvm-project/llvm/lib/MC/ELFObjectWriter.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/ELFObjectWriter.cpp
@@ -569,26 +569,6 @@ void ELFWriter::writeSymbol(SymbolTableWriter &Writer, uint32_t StringIndex,
IsReserved);
}
-// True if the assembler knows nothing about the final value of the symbol.
-// This doesn't cover the comdat issues, since in those cases the assembler
-// can at least know that all symbols in the section will move together.
-static bool isWeak(const MCSymbolELF &Sym) {
- if (Sym.getType() == ELF::STT_GNU_IFUNC)
- return true;
-
- switch (Sym.getBinding()) {
- default:
- llvm_unreachable("Unknown binding");
- case ELF::STB_LOCAL:
- return false;
- case ELF::STB_GLOBAL:
- return false;
- case ELF::STB_WEAK:
- case ELF::STB_GNU_UNIQUE:
- return true;
- }
-}
-
bool ELFWriter::isInSymtab(const MCAsmLayout &Layout, const MCSymbolELF &Symbol,
bool Used, bool Renamed) {
if (Symbol.isVariable()) {
@@ -615,9 +595,6 @@ bool ELFWriter::isInSymtab(const MCAsmLayout &Layout, const MCSymbolELF &Symbol,
return false;
}
- if (Symbol.isUndefined() && !Symbol.isBindingSet())
- return false;
-
if (Symbol.isTemporary())
return false;
@@ -1537,7 +1514,8 @@ bool ELFObjectWriter::isSymbolRefDifferenceFullyResolvedImpl(
const auto &SymA = cast<MCSymbolELF>(SA);
if (IsPCRel) {
assert(!InSet);
- if (isWeak(SymA))
+ if (SymA.getBinding() != ELF::STB_LOCAL ||
+ SymA.getType() == ELF::STT_GNU_IFUNC)
return false;
}
return MCObjectWriter::isSymbolRefDifferenceFullyResolvedImpl(Asm, SymA, FB,
diff --git a/contrib/llvm-project/llvm/lib/MC/MCAsmBackend.cpp b/contrib/llvm-project/llvm/lib/MC/MCAsmBackend.cpp
index b800e9caee22..cf110345df3d 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCAsmBackend.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCAsmBackend.cpp
@@ -9,7 +9,6 @@
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/MC/MCCodePadder.h"
#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCFixupKindInfo.h"
#include "llvm/MC/MCMachObjectWriter.h"
@@ -23,8 +22,7 @@
using namespace llvm;
-MCAsmBackend::MCAsmBackend(support::endianness Endian)
- : CodePadder(new MCCodePadder()), Endian(Endian) {}
+MCAsmBackend::MCAsmBackend(support::endianness Endian) : Endian(Endian) {}
MCAsmBackend::~MCAsmBackend() = default;
@@ -113,25 +111,3 @@ bool MCAsmBackend::fixupNeedsRelaxationAdvanced(
return true;
return fixupNeedsRelaxation(Fixup, Value, DF, Layout);
}
-
-void MCAsmBackend::handleCodePaddingBasicBlockStart(
- MCObjectStreamer *OS, const MCCodePaddingContext &Context) {
- CodePadder->handleBasicBlockStart(OS, Context);
-}
-
-void MCAsmBackend::handleCodePaddingBasicBlockEnd(
- const MCCodePaddingContext &Context) {
- CodePadder->handleBasicBlockEnd(Context);
-}
-
-void MCAsmBackend::handleCodePaddingInstructionBegin(const MCInst &Inst) {
- CodePadder->handleInstructionBegin(Inst);
-}
-
-void MCAsmBackend::handleCodePaddingInstructionEnd(const MCInst &Inst) {
- CodePadder->handleInstructionEnd(Inst);
-}
-
-bool MCAsmBackend::relaxFragment(MCPaddingFragment *PF, MCAsmLayout &Layout) {
- return CodePadder->relaxFragment(PF, Layout);
-}
diff --git a/contrib/llvm-project/llvm/lib/MC/MCAsmInfo.cpp b/contrib/llvm-project/llvm/lib/MC/MCAsmInfo.cpp
index 71e51e320f8b..420dbaa80ae9 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCAsmInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCAsmInfo.cpp
@@ -100,7 +100,7 @@ MCAsmInfo::getExprForFDESymbol(const MCSymbol *Sym,
return MCBinaryExpr::createSub(Res, PC, Context);
}
-static bool isAcceptableChar(char C) {
+bool MCAsmInfo::isAcceptableChar(char C) const {
return (C >= 'a' && C <= 'z') || (C >= 'A' && C <= 'Z') ||
(C >= '0' && C <= '9') || C == '_' || C == '$' || C == '.' || C == '@';
}
diff --git a/contrib/llvm-project/llvm/lib/MC/MCAsmInfoELF.cpp b/contrib/llvm-project/llvm/lib/MC/MCAsmInfoELF.cpp
index a5e8aff7f129..9b8b8db794f0 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCAsmInfoELF.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCAsmInfoELF.cpp
@@ -21,8 +21,6 @@ using namespace llvm;
void MCAsmInfoELF::anchor() {}
MCSection *MCAsmInfoELF::getNonexecutableStackSection(MCContext &Ctx) const {
- if (!UsesNonexecutableStackSection)
- return nullptr;
return Ctx.getELFSection(".note.GNU-stack", ELF::SHT_PROGBITS, 0);
}
diff --git a/contrib/llvm-project/llvm/lib/MC/MCAsmInfoXCOFF.cpp b/contrib/llvm-project/llvm/lib/MC/MCAsmInfoXCOFF.cpp
index 65fe8848e20f..c51cdff59fa0 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCAsmInfoXCOFF.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCAsmInfoXCOFF.cpp
@@ -26,10 +26,11 @@ MCAsmInfoXCOFF::MCAsmInfoXCOFF() {
SupportsQuotedNames = false;
}
-bool MCAsmInfoXCOFF::isValidUnquotedName(StringRef Name) const {
- // FIXME: Remove this function when we stop using "TOC[TC0]" as a symbol name.
- if (Name.equals("TOC[TC0]"))
+bool MCAsmInfoXCOFF::isAcceptableChar(char C) const {
+ // QualName is allowed for a MCSymbolXCOFF, and
+ // QualName contains '[' and ']'.
+ if (C == '[' || C == ']')
return true;
- return MCAsmInfo::isValidUnquotedName(Name);
+ return MCAsmInfo::isAcceptableChar(C);
}
diff --git a/contrib/llvm-project/llvm/lib/MC/MCAsmStreamer.cpp b/contrib/llvm-project/llvm/lib/MC/MCAsmStreamer.cpp
index 2d9c2cb21255..5d369503995b 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCAsmStreamer.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCAsmStreamer.cpp
@@ -77,6 +77,8 @@ public:
assert(InstPrinter);
if (IsVerboseAsm)
InstPrinter->setCommentStream(CommentStream);
+ if (Assembler->getBackendPtr())
+ setAllowAutoPadding(Assembler->getBackend().allowAutoPadding());
}
MCAssembler &getAssembler() { return *Assembler; }
@@ -164,7 +166,8 @@ public:
void EmitCOFFSectionIndex(MCSymbol const *Symbol) override;
void EmitCOFFSecRel32(MCSymbol const *Symbol, uint64_t Offset) override;
void EmitCOFFImgRel32(MCSymbol const *Symbol, int64_t Offset) override;
- void EmitXCOFFLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ void EmitXCOFFLocalCommonSymbol(MCSymbol *LabelSym, uint64_t Size,
+ MCSymbol *CsectSym,
unsigned ByteAlign) override;
void emitELFSize(MCSymbol *Symbol, const MCExpr *Value) override;
void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
@@ -765,16 +768,18 @@ void MCAsmStreamer::EmitCOFFImgRel32(MCSymbol const *Symbol, int64_t Offset) {
// We need an XCOFF-specific version of this directive as the AIX syntax
// requires a QualName argument identifying the csect name and storage mapping
// class to appear before the alignment if we are specifying it.
-void MCAsmStreamer::EmitXCOFFLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+void MCAsmStreamer::EmitXCOFFLocalCommonSymbol(MCSymbol *LabelSym,
+ uint64_t Size,
+ MCSymbol *CsectSym,
unsigned ByteAlignment) {
assert(MAI->getLCOMMDirectiveAlignmentType() == LCOMM::Log2Alignment &&
"We only support writing log base-2 alignment format with XCOFF.");
assert(isPowerOf2_32(ByteAlignment) && "Alignment must be a power of 2.");
OS << "\t.lcomm\t";
- Symbol->print(OS, MAI);
- OS << ',' << Size;
- OS << ',' << Symbol->getName();
+ LabelSym->print(OS, MAI);
+ OS << ',' << Size << ',';
+ CsectSym->print(OS, MAI);
OS << ',' << Log2_32(ByteAlignment);
EmitEOL();
@@ -1941,9 +1946,9 @@ void MCAsmStreamer::EmitInstruction(const MCInst &Inst,
}
if(getTargetStreamer())
- getTargetStreamer()->prettyPrintAsm(*InstPrinter, OS, Inst, STI);
+ getTargetStreamer()->prettyPrintAsm(*InstPrinter, 0, Inst, STI, OS);
else
- InstPrinter->printInst(&Inst, OS, "", STI);
+ InstPrinter->printInst(&Inst, 0, "", STI, OS);
StringRef Comments = CommentToEmit;
if (Comments.size() && Comments.back() != '\n')
diff --git a/contrib/llvm-project/llvm/lib/MC/MCAssembler.cpp b/contrib/llvm-project/llvm/lib/MC/MCAssembler.cpp
index cf42fe85b8e5..b30137aafb8d 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCAssembler.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCAssembler.cpp
@@ -68,10 +68,6 @@ STATISTIC(FragmentLayouts, "Number of fragment layouts");
STATISTIC(ObjectBytes, "Number of emitted object file bytes");
STATISTIC(RelaxationSteps, "Number of assembler layout and relaxation steps");
STATISTIC(RelaxedInstructions, "Number of relaxed instructions");
-STATISTIC(PaddingFragmentsRelaxations,
- "Number of Padding Fragments relaxations");
-STATISTIC(PaddingFragmentsBytes,
- "Total size of all padding from adding Fragments");
} // end namespace stats
} // end anonymous namespace
@@ -167,10 +163,6 @@ bool MCAssembler::isSymbolLinkerVisible(const MCSymbol &Symbol) const {
if (!Symbol.isTemporary())
return true;
- // Absolute temporary labels are never visible.
- if (!Symbol.isInSection())
- return false;
-
if (Symbol.isUsedInReloc())
return true;
@@ -313,8 +305,8 @@ uint64_t MCAssembler::computeFragmentSize(const MCAsmLayout &Layout,
case MCFragment::FT_LEB:
return cast<MCLEBFragment>(F).getContents().size();
- case MCFragment::FT_Padding:
- return cast<MCPaddingFragment>(F).getSize();
+ case MCFragment::FT_BoundaryAlign:
+ return cast<MCBoundaryAlignFragment>(F).getSize();
case MCFragment::FT_SymbolId:
return 4;
@@ -580,6 +572,7 @@ static void writeFragment(raw_ostream &OS, const MCAssembler &Asm,
unsigned VSize = FF.getValueSize();
const unsigned MaxChunkSize = 16;
char Data[MaxChunkSize];
+ assert(0 < VSize && VSize <= MaxChunkSize && "Illegal fragment fill size");
// Duplicate V into Data as byte vector to reduce number of
// writes done. As such, do endian conversion here.
for (unsigned I = 0; I != VSize; ++I) {
@@ -612,7 +605,7 @@ static void writeFragment(raw_ostream &OS, const MCAssembler &Asm,
break;
}
- case MCFragment::FT_Padding: {
+ case MCFragment::FT_BoundaryAlign: {
if (!Asm.getBackend().writeNopData(OS, FragmentSize))
report_fatal_error("unable to write nop sequence of " +
Twine(FragmentSize) + " bytes");
@@ -935,20 +928,6 @@ bool MCAssembler::relaxInstruction(MCAsmLayout &Layout,
return true;
}
-bool MCAssembler::relaxPaddingFragment(MCAsmLayout &Layout,
- MCPaddingFragment &PF) {
- assert(getBackendPtr() && "Expected assembler backend");
- uint64_t OldSize = PF.getSize();
- if (!getBackend().relaxFragment(&PF, Layout))
- return false;
- uint64_t NewSize = PF.getSize();
-
- ++stats::PaddingFragmentsRelaxations;
- stats::PaddingFragmentsBytes += NewSize;
- stats::PaddingFragmentsBytes -= OldSize;
- return true;
-}
-
bool MCAssembler::relaxLEB(MCAsmLayout &Layout, MCLEBFragment &LF) {
uint64_t OldSize = LF.getContents().size();
int64_t Value;
@@ -969,6 +948,72 @@ bool MCAssembler::relaxLEB(MCAsmLayout &Layout, MCLEBFragment &LF) {
return OldSize != LF.getContents().size();
}
+/// Check if the branch crosses the boundary.
+///
+/// \param StartAddr start address of the fused/unfused branch.
+/// \param Size size of the fused/unfused branch.
+/// \param BoundaryAlignment alignment requirement of the branch.
+/// \returns true if the branch cross the boundary.
+static bool mayCrossBoundary(uint64_t StartAddr, uint64_t Size,
+ Align BoundaryAlignment) {
+ uint64_t EndAddr = StartAddr + Size;
+ return (StartAddr >> Log2(BoundaryAlignment)) !=
+ ((EndAddr - 1) >> Log2(BoundaryAlignment));
+}
+
+/// Check if the branch is against the boundary.
+///
+/// \param StartAddr start address of the fused/unfused branch.
+/// \param Size size of the fused/unfused branch.
+/// \param BoundaryAlignment alignment requirement of the branch.
+/// \returns true if the branch is against the boundary.
+static bool isAgainstBoundary(uint64_t StartAddr, uint64_t Size,
+ Align BoundaryAlignment) {
+ uint64_t EndAddr = StartAddr + Size;
+ return (EndAddr & (BoundaryAlignment.value() - 1)) == 0;
+}
+
+/// Check if the branch needs padding.
+///
+/// \param StartAddr start address of the fused/unfused branch.
+/// \param Size size of the fused/unfused branch.
+/// \param BoundaryAlignment alignment requirement of the branch.
+/// \returns true if the branch needs padding.
+static bool needPadding(uint64_t StartAddr, uint64_t Size,
+ Align BoundaryAlignment) {
+ return mayCrossBoundary(StartAddr, Size, BoundaryAlignment) ||
+ isAgainstBoundary(StartAddr, Size, BoundaryAlignment);
+}
+
+bool MCAssembler::relaxBoundaryAlign(MCAsmLayout &Layout,
+ MCBoundaryAlignFragment &BF) {
+ // The MCBoundaryAlignFragment that doesn't emit NOP should not be relaxed.
+ if (!BF.canEmitNops())
+ return false;
+
+ uint64_t AlignedOffset = Layout.getFragmentOffset(BF.getNextNode());
+ uint64_t AlignedSize = 0;
+ const MCFragment *F = BF.getNextNode();
+ // If the branch is unfused, it is emitted into one fragment, otherwise it is
+ // emitted into two fragments at most, the next MCBoundaryAlignFragment(if
+ // exists) also marks the end of the branch.
+ for (auto i = 0, N = BF.isFused() ? 2 : 1;
+ i != N && !isa<MCBoundaryAlignFragment>(F); ++i, F = F->getNextNode()) {
+ AlignedSize += computeFragmentSize(Layout, *F);
+ }
+ uint64_t OldSize = BF.getSize();
+ AlignedOffset -= OldSize;
+ Align BoundaryAlignment = BF.getAlignment();
+ uint64_t NewSize = needPadding(AlignedOffset, AlignedSize, BoundaryAlignment)
+ ? offsetToAlignment(AlignedOffset, BoundaryAlignment)
+ : 0U;
+ if (NewSize == OldSize)
+ return false;
+ BF.setSize(NewSize);
+ Layout.invalidateFragmentsFrom(&BF);
+ return true;
+}
+
bool MCAssembler::relaxDwarfLineAddr(MCAsmLayout &Layout,
MCDwarfLineAddrFragment &DF) {
MCContext &Context = Layout.getAssembler().getContext();
@@ -1085,8 +1130,9 @@ bool MCAssembler::layoutSectionOnce(MCAsmLayout &Layout, MCSection &Sec) {
case MCFragment::FT_LEB:
RelaxedFrag = relaxLEB(Layout, *cast<MCLEBFragment>(I));
break;
- case MCFragment::FT_Padding:
- RelaxedFrag = relaxPaddingFragment(Layout, *cast<MCPaddingFragment>(I));
+ case MCFragment::FT_BoundaryAlign:
+ RelaxedFrag =
+ relaxBoundaryAlign(Layout, *cast<MCBoundaryAlignFragment>(I));
break;
case MCFragment::FT_CVInlineLines:
RelaxedFrag =
@@ -1124,8 +1170,8 @@ void MCAssembler::finishLayout(MCAsmLayout &Layout) {
// The layout is done. Mark every fragment as valid.
for (unsigned int i = 0, n = Layout.getSectionOrder().size(); i != n; ++i) {
MCSection &Section = *Layout.getSectionOrder()[i];
- Layout.getFragmentOffset(&*Section.rbegin());
- computeFragmentSize(Layout, *Section.rbegin());
+ Layout.getFragmentOffset(&*Section.getFragmentList().rbegin());
+ computeFragmentSize(Layout, *Section.getFragmentList().rbegin());
}
getBackend().finishLayout(*this, Layout);
}
diff --git a/contrib/llvm-project/llvm/lib/MC/MCCodePadder.cpp b/contrib/llvm-project/llvm/lib/MC/MCCodePadder.cpp
deleted file mode 100644
index 27a62f95a529..000000000000
--- a/contrib/llvm-project/llvm/lib/MC/MCCodePadder.cpp
+++ /dev/null
@@ -1,370 +0,0 @@
-//===- MCCodePadder.cpp - Target MC Code Padder ---------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/MC/MCAsmLayout.h"
-#include "llvm/MC/MCCodePadder.h"
-#include "llvm/MC/MCObjectStreamer.h"
-#include <algorithm>
-#include <limits>
-#include <numeric>
-
-using namespace llvm;
-
-//---------------------------------------------------------------------------
-// MCCodePadder
-//
-
-MCCodePadder::~MCCodePadder() {
- for (auto *Policy : CodePaddingPolicies)
- delete Policy;
-}
-
-bool MCCodePadder::addPolicy(MCCodePaddingPolicy *Policy) {
- assert(Policy && "Policy must be valid");
- return CodePaddingPolicies.insert(Policy).second;
-}
-
-void MCCodePadder::handleBasicBlockStart(MCObjectStreamer *OS,
- const MCCodePaddingContext &Context) {
- assert(OS != nullptr && "OS must be valid");
- assert(this->OS == nullptr && "Still handling another basic block");
- this->OS = OS;
-
- ArePoliciesActive = usePoliciesForBasicBlock(Context);
-
- bool InsertionPoint = basicBlockRequiresInsertionPoint(Context);
- assert((!InsertionPoint ||
- OS->getCurrentFragment()->getKind() != MCFragment::FT_Align) &&
- "Cannot insert padding nops right after an alignment fragment as it "
- "will ruin the alignment");
-
- uint64_t PoliciesMask = MCPaddingFragment::PFK_None;
- if (ArePoliciesActive) {
- PoliciesMask = std::accumulate(
- CodePaddingPolicies.begin(), CodePaddingPolicies.end(),
- MCPaddingFragment::PFK_None,
- [&Context](uint64_t Mask,
- const MCCodePaddingPolicy *Policy) -> uint64_t {
- return Policy->basicBlockRequiresPaddingFragment(Context)
- ? (Mask | Policy->getKindMask())
- : Mask;
- });
- }
-
- if (InsertionPoint || PoliciesMask != MCPaddingFragment::PFK_None) {
- MCPaddingFragment *PaddingFragment = OS->getOrCreatePaddingFragment();
- if (InsertionPoint)
- PaddingFragment->setAsInsertionPoint();
- PaddingFragment->setPaddingPoliciesMask(
- PaddingFragment->getPaddingPoliciesMask() | PoliciesMask);
- }
-}
-
-void MCCodePadder::handleBasicBlockEnd(const MCCodePaddingContext &Context) {
- assert(this->OS != nullptr && "Not handling a basic block");
- OS = nullptr;
-}
-
-void MCCodePadder::handleInstructionBegin(const MCInst &Inst) {
- if (!OS)
- return; // instruction was emitted outside a function
-
- assert(CurrHandledInstFragment == nullptr && "Can't start handling an "
- "instruction while still "
- "handling another instruction");
-
- bool InsertionPoint = instructionRequiresInsertionPoint(Inst);
- assert((!InsertionPoint ||
- OS->getCurrentFragment()->getKind() != MCFragment::FT_Align) &&
- "Cannot insert padding nops right after an alignment fragment as it "
- "will ruin the alignment");
-
- uint64_t PoliciesMask = MCPaddingFragment::PFK_None;
- if (ArePoliciesActive) {
- PoliciesMask = std::accumulate(
- CodePaddingPolicies.begin(), CodePaddingPolicies.end(),
- MCPaddingFragment::PFK_None,
- [&Inst](uint64_t Mask, const MCCodePaddingPolicy *Policy) -> uint64_t {
- return Policy->instructionRequiresPaddingFragment(Inst)
- ? (Mask | Policy->getKindMask())
- : Mask;
- });
- }
- MCFragment *CurrFragment = OS->getCurrentFragment();
- // CurrFragment can be a previously created MCPaddingFragment. If so, let's
- // update it with the information we have, such as the instruction that it
- // should point to.
- bool needToUpdateCurrFragment =
- CurrFragment != nullptr &&
- CurrFragment->getKind() == MCFragment::FT_Padding;
- if (InsertionPoint || PoliciesMask != MCPaddingFragment::PFK_None ||
- needToUpdateCurrFragment) {
- // temporarily holding the fragment as CurrHandledInstFragment, to be
- // updated after the instruction will be written
- CurrHandledInstFragment = OS->getOrCreatePaddingFragment();
- if (InsertionPoint)
- CurrHandledInstFragment->setAsInsertionPoint();
- CurrHandledInstFragment->setPaddingPoliciesMask(
- CurrHandledInstFragment->getPaddingPoliciesMask() | PoliciesMask);
- }
-}
-
-void MCCodePadder::handleInstructionEnd(const MCInst &Inst) {
- if (!OS)
- return; // instruction was emitted outside a function
- if (CurrHandledInstFragment == nullptr)
- return;
-
- MCFragment *InstFragment = OS->getCurrentFragment();
- if (MCDataFragment *InstDataFragment =
- dyn_cast_or_null<MCDataFragment>(InstFragment))
- // Inst is a fixed size instruction and was encoded into a MCDataFragment.
- // Let the fragment hold it and its size. Its size is the current size of
- // the data fragment, as the padding fragment was inserted right before it
- // and nothing was written yet except Inst
- CurrHandledInstFragment->setInstAndInstSize(
- Inst, InstDataFragment->getContents().size());
- else if (MCRelaxableFragment *InstRelaxableFragment =
- dyn_cast_or_null<MCRelaxableFragment>(InstFragment))
- // Inst may be relaxed and its size may vary.
- // Let the fragment hold the instruction and the MCRelaxableFragment
- // that's holding it.
- CurrHandledInstFragment->setInstAndInstFragment(Inst,
- InstRelaxableFragment);
- else
- llvm_unreachable("After encoding an instruction current fragment must be "
- "either a MCDataFragment or a MCRelaxableFragment");
-
- CurrHandledInstFragment = nullptr;
-}
-
-MCPFRange &MCCodePadder::getJurisdiction(MCPaddingFragment *Fragment,
- MCAsmLayout &Layout) {
- auto JurisdictionLocation = FragmentToJurisdiction.find(Fragment);
- if (JurisdictionLocation != FragmentToJurisdiction.end())
- return JurisdictionLocation->second;
-
- MCPFRange Jurisdiction;
-
- // Forward scanning the fragments in this section, starting from the given
- // fragments, and adding relevant MCPaddingFragments to the Jurisdiction
- for (MCFragment *CurrFragment = Fragment; CurrFragment != nullptr;
- CurrFragment = CurrFragment->getNextNode()) {
-
- MCPaddingFragment *CurrPaddingFragment =
- dyn_cast<MCPaddingFragment>(CurrFragment);
- if (CurrPaddingFragment == nullptr)
- continue;
-
- if (CurrPaddingFragment != Fragment &&
- CurrPaddingFragment->isInsertionPoint())
- // Found next insertion point Fragment. From now on it's its jurisdiction.
- break;
- for (const auto *Policy : CodePaddingPolicies) {
- if (CurrPaddingFragment->hasPaddingPolicy(Policy->getKindMask())) {
- Jurisdiction.push_back(CurrPaddingFragment);
- break;
- }
- }
- }
-
- auto InsertionResult =
- FragmentToJurisdiction.insert(std::make_pair(Fragment, Jurisdiction));
- assert(InsertionResult.second &&
- "Insertion to FragmentToJurisdiction failed");
- return InsertionResult.first->second;
-}
-
-uint64_t MCCodePadder::getMaxWindowSize(MCPaddingFragment *Fragment,
- MCAsmLayout &Layout) {
- auto MaxFragmentSizeLocation = FragmentToMaxWindowSize.find(Fragment);
- if (MaxFragmentSizeLocation != FragmentToMaxWindowSize.end())
- return MaxFragmentSizeLocation->second;
-
- MCPFRange &Jurisdiction = getJurisdiction(Fragment, Layout);
- uint64_t JurisdictionMask = MCPaddingFragment::PFK_None;
- for (const auto *Protege : Jurisdiction)
- JurisdictionMask |= Protege->getPaddingPoliciesMask();
-
- uint64_t MaxFragmentSize = UINT64_C(0);
- for (const auto *Policy : CodePaddingPolicies)
- if ((JurisdictionMask & Policy->getKindMask()) !=
- MCPaddingFragment::PFK_None)
- MaxFragmentSize = std::max(MaxFragmentSize, Policy->getWindowSize());
-
- auto InsertionResult =
- FragmentToMaxWindowSize.insert(std::make_pair(Fragment, MaxFragmentSize));
- assert(InsertionResult.second &&
- "Insertion to FragmentToMaxWindowSize failed");
- return InsertionResult.first->second;
-}
-
-bool MCCodePadder::relaxFragment(MCPaddingFragment *Fragment,
- MCAsmLayout &Layout) {
- if (!Fragment->isInsertionPoint())
- return false;
- uint64_t OldSize = Fragment->getSize();
-
- uint64_t MaxWindowSize = getMaxWindowSize(Fragment, Layout);
- if (MaxWindowSize == UINT64_C(0))
- return false;
- assert(isPowerOf2_64(MaxWindowSize) &&
- "MaxWindowSize must be an integer power of 2");
- uint64_t SectionAlignment = Fragment->getParent()->getAlignment();
- assert(isPowerOf2_64(SectionAlignment) &&
- "SectionAlignment must be an integer power of 2");
-
- MCPFRange &Jurisdiction = getJurisdiction(Fragment, Layout);
- uint64_t OptimalSize = UINT64_C(0);
- double OptimalWeight = std::numeric_limits<double>::max();
- uint64_t MaxFragmentSize = MaxWindowSize - UINT16_C(1);
- for (uint64_t Size = UINT64_C(0); Size <= MaxFragmentSize; ++Size) {
- Fragment->setSize(Size);
- Layout.invalidateFragmentsFrom(Fragment);
- double SizeWeight = 0.0;
- // The section is guaranteed to be aligned to SectionAlignment, but that
- // doesn't guarantee the exact section offset w.r.t. the policies window
- // size.
- // As a concrete example, the section could be aligned to 16B, but a
- // policy's window size can be 32B. That means that the section actual start
- // address can either be 0mod32 or 16mod32. The said policy will act
- // differently for each case, so we need to take both into consideration.
- for (uint64_t Offset = UINT64_C(0); Offset < MaxWindowSize;
- Offset += SectionAlignment) {
- double OffsetWeight = std::accumulate(
- CodePaddingPolicies.begin(), CodePaddingPolicies.end(), 0.0,
- [&Jurisdiction, &Offset, &Layout](
- double Weight, const MCCodePaddingPolicy *Policy) -> double {
- double PolicyWeight =
- Policy->computeRangePenaltyWeight(Jurisdiction, Offset, Layout);
- assert(PolicyWeight >= 0.0 && "A penalty weight must be positive");
- return Weight + PolicyWeight;
- });
- SizeWeight = std::max(SizeWeight, OffsetWeight);
- }
- if (SizeWeight < OptimalWeight) {
- OptimalWeight = SizeWeight;
- OptimalSize = Size;
- }
- if (OptimalWeight == 0.0)
- break;
- }
-
- Fragment->setSize(OptimalSize);
- Layout.invalidateFragmentsFrom(Fragment);
- return OldSize != OptimalSize;
-}
-
-//---------------------------------------------------------------------------
-// MCCodePaddingPolicy
-//
-
-uint64_t MCCodePaddingPolicy::getNextFragmentOffset(const MCFragment *Fragment,
- const MCAsmLayout &Layout) {
- assert(Fragment != nullptr && "Fragment cannot be null");
- MCFragment const *NextFragment = Fragment->getNextNode();
- return NextFragment == nullptr
- ? Layout.getSectionAddressSize(Fragment->getParent())
- : Layout.getFragmentOffset(NextFragment);
-}
-
-uint64_t
-MCCodePaddingPolicy::getFragmentInstByte(const MCPaddingFragment *Fragment,
- MCAsmLayout &Layout) const {
- uint64_t InstByte = getNextFragmentOffset(Fragment, Layout);
- if (InstByteIsLastByte)
- InstByte += Fragment->getInstSize() - UINT64_C(1);
- return InstByte;
-}
-
-uint64_t
-MCCodePaddingPolicy::computeWindowEndAddress(const MCPaddingFragment *Fragment,
- uint64_t Offset,
- MCAsmLayout &Layout) const {
- uint64_t InstByte = getFragmentInstByte(Fragment, Layout);
- return alignTo(InstByte + UINT64_C(1) + Offset, WindowSize) - Offset;
-}
-
-double MCCodePaddingPolicy::computeRangePenaltyWeight(
- const MCPFRange &Range, uint64_t Offset, MCAsmLayout &Layout) const {
-
- SmallVector<MCPFRange, 8> Windows;
- SmallVector<MCPFRange, 8>::iterator CurrWindowLocation = Windows.end();
- for (const MCPaddingFragment *Fragment : Range) {
- if (!Fragment->hasPaddingPolicy(getKindMask()))
- continue;
- uint64_t FragmentWindowEndAddress =
- computeWindowEndAddress(Fragment, Offset, Layout);
- if (CurrWindowLocation == Windows.end() ||
- FragmentWindowEndAddress !=
- computeWindowEndAddress(*CurrWindowLocation->begin(), Offset,
- Layout)) {
- // next window is starting
- Windows.push_back(MCPFRange());
- CurrWindowLocation = Windows.end() - 1;
- }
- CurrWindowLocation->push_back(Fragment);
- }
-
- if (Windows.empty())
- return 0.0;
-
- double RangeWeight = 0.0;
- SmallVector<MCPFRange, 8>::iterator I = Windows.begin();
- RangeWeight += computeFirstWindowPenaltyWeight(*I, Offset, Layout);
- ++I;
- RangeWeight += std::accumulate(
- I, Windows.end(), 0.0,
- [this, &Layout, &Offset](double Weight, MCPFRange &Window) -> double {
- return Weight += computeWindowPenaltyWeight(Window, Offset, Layout);
- });
- return RangeWeight;
-}
-
-double MCCodePaddingPolicy::computeFirstWindowPenaltyWeight(
- const MCPFRange &Window, uint64_t Offset, MCAsmLayout &Layout) const {
- if (Window.empty())
- return 0.0;
- uint64_t WindowEndAddress =
- computeWindowEndAddress(*Window.begin(), Offset, Layout);
-
- MCPFRange FullWindowFirstPart; // will hold all the fragments that are in the
- // same window as the fragments in the given
- // window but their penalty weight should not
- // be added
- for (const MCFragment *Fragment = (*Window.begin())->getPrevNode();
- Fragment != nullptr; Fragment = Fragment->getPrevNode()) {
- const MCPaddingFragment *PaddingNopFragment =
- dyn_cast<MCPaddingFragment>(Fragment);
- if (PaddingNopFragment == nullptr ||
- !PaddingNopFragment->hasPaddingPolicy(getKindMask()))
- continue;
- if (WindowEndAddress !=
- computeWindowEndAddress(PaddingNopFragment, Offset, Layout))
- break;
-
- FullWindowFirstPart.push_back(PaddingNopFragment);
- }
-
- std::reverse(FullWindowFirstPart.begin(), FullWindowFirstPart.end());
- double FullWindowFirstPartWeight =
- computeWindowPenaltyWeight(FullWindowFirstPart, Offset, Layout);
-
- MCPFRange FullWindow(
- FullWindowFirstPart); // will hold all the fragments that are in the
- // same window as the fragments in the given
- // window, whether their weight should be added
- // or not
- FullWindow.append(Window.begin(), Window.end());
- double FullWindowWeight =
- computeWindowPenaltyWeight(FullWindow, Offset, Layout);
-
- assert(FullWindowWeight >= FullWindowFirstPartWeight &&
- "More fragments necessarily means bigger weight");
- return FullWindowWeight - FullWindowFirstPartWeight;
-}
diff --git a/contrib/llvm-project/llvm/lib/MC/MCContext.cpp b/contrib/llvm-project/llvm/lib/MC/MCContext.cpp
index a69ee19e1a1a..a6417113fd38 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCContext.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCContext.cpp
@@ -15,6 +15,7 @@
#include "llvm/ADT/Twine.h"
#include "llvm/BinaryFormat/COFF.h"
#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/BinaryFormat/XCOFF.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCCodeView.h"
#include "llvm/MC/MCDwarf.h"
@@ -550,13 +551,15 @@ MCSectionXCOFF *MCContext::getXCOFFSection(StringRef Section,
// Otherwise, return a new section.
StringRef CachedName = Entry.first.SectionName;
+ MCSymbol *QualName = getOrCreateSymbol(
+ CachedName + "[" + XCOFF::getMappingClassString(SMC) + "]");
MCSymbol *Begin = nullptr;
if (BeginSymName)
Begin = createTempSymbol(BeginSymName, false);
- MCSectionXCOFF *Result = new (XCOFFAllocator.Allocate())
- MCSectionXCOFF(CachedName, SMC, Type, SC, Kind, Begin);
+ MCSectionXCOFF *Result = new (XCOFFAllocator.Allocate()) MCSectionXCOFF(
+ CachedName, SMC, Type, SC, Kind, cast<MCSymbolXCOFF>(QualName), Begin);
Entry.second = Result;
auto *F = new MCDataFragment();
diff --git a/contrib/llvm-project/llvm/lib/MC/MCDisassembler/Disassembler.cpp b/contrib/llvm-project/llvm/lib/MC/MCDisassembler/Disassembler.cpp
index 21bdc2eaea3e..ff56695e8cc4 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCDisassembler/Disassembler.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCDisassembler/Disassembler.cpp
@@ -24,6 +24,7 @@
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSchedule.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCTargetOptions.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/TargetRegistry.h"
@@ -56,8 +57,10 @@ LLVMCreateDisasmCPUFeatures(const char *TT, const char *CPU,
if (!MRI)
return nullptr;
+ MCTargetOptions MCOptions;
// Get the assembler info needed to setup the MCContext.
- std::unique_ptr<const MCAsmInfo> MAI(TheTarget->createMCAsmInfo(*MRI, TT));
+ std::unique_ptr<const MCAsmInfo> MAI(
+ TheTarget->createMCAsmInfo(*MRI, TT, MCOptions));
if (!MAI)
return nullptr;
@@ -260,8 +263,7 @@ size_t LLVMDisasmInstruction(LLVMDisasmContextRef DCR, uint8_t *Bytes,
MCDisassembler::DecodeStatus S;
SmallVector<char, 64> InsnStr;
raw_svector_ostream Annotations(InsnStr);
- S = DisAsm->getInstruction(Inst, Size, Data, PC,
- /*REMOVE*/ nulls(), Annotations);
+ S = DisAsm->getInstruction(Inst, Size, Data, PC, Annotations);
switch (S) {
case MCDisassembler::Fail:
case MCDisassembler::SoftFail:
@@ -274,7 +276,8 @@ size_t LLVMDisasmInstruction(LLVMDisasmContextRef DCR, uint8_t *Bytes,
SmallVector<char, 64> InsnStr;
raw_svector_ostream OS(InsnStr);
formatted_raw_ostream FormattedOS(OS);
- IP->printInst(&Inst, FormattedOS, AnnotationsStr, *DC->getSubtargetInfo());
+ IP->printInst(&Inst, PC, AnnotationsStr, *DC->getSubtargetInfo(),
+ FormattedOS);
if (DC->getOptions() & LLVMDisassembler_Option_PrintLatency)
emitLatency(DC, Inst);
diff --git a/contrib/llvm-project/llvm/lib/MC/MCDisassembler/MCDisassembler.cpp b/contrib/llvm-project/llvm/lib/MC/MCDisassembler/MCDisassembler.cpp
index 063f7e706024..373916fbed78 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCDisassembler/MCDisassembler.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCDisassembler/MCDisassembler.cpp
@@ -16,9 +16,10 @@ using namespace llvm;
MCDisassembler::~MCDisassembler() = default;
-MCDisassembler::DecodeStatus MCDisassembler::onSymbolStart(
- StringRef Name, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t Address,
- raw_ostream &VStream, raw_ostream &CStream) const {
+MCDisassembler::DecodeStatus
+MCDisassembler::onSymbolStart(StringRef Name, uint64_t &Size,
+ ArrayRef<uint8_t> Bytes, uint64_t Address,
+ raw_ostream &CStream) const {
Size = 0;
return MCDisassembler::Success;
}
@@ -27,18 +28,16 @@ bool MCDisassembler::tryAddingSymbolicOperand(MCInst &Inst, int64_t Value,
uint64_t Address, bool IsBranch,
uint64_t Offset,
uint64_t InstSize) const {
- raw_ostream &cStream = CommentStream ? *CommentStream : nulls();
if (Symbolizer)
- return Symbolizer->tryAddingSymbolicOperand(Inst, cStream, Value, Address,
- IsBranch, Offset, InstSize);
+ return Symbolizer->tryAddingSymbolicOperand(
+ Inst, *CommentStream, Value, Address, IsBranch, Offset, InstSize);
return false;
}
void MCDisassembler::tryAddingPcLoadReferenceComment(int64_t Value,
uint64_t Address) const {
- raw_ostream &cStream = CommentStream ? *CommentStream : nulls();
if (Symbolizer)
- Symbolizer->tryAddingPcLoadReferenceComment(cStream, Value, Address);
+ Symbolizer->tryAddingPcLoadReferenceComment(*CommentStream, Value, Address);
}
void MCDisassembler::setSymbolizer(std::unique_ptr<MCSymbolizer> Symzer) {
diff --git a/contrib/llvm-project/llvm/lib/MC/MCDwarf.cpp b/contrib/llvm-project/llvm/lib/MC/MCDwarf.cpp
index bcc7c45afc01..b4b3c9956cc2 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCDwarf.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCDwarf.cpp
@@ -1701,7 +1701,8 @@ void FrameEmitterImpl::EmitFDE(const MCSymbol &cieStart,
MakeStartMinusEndExpr(Streamer, SectionStart, cieStart, 0);
emitAbsValue(Streamer, offset, 4);
} else {
- Streamer.EmitSymbolValue(&cieStart, 4);
+ Streamer.EmitSymbolValue(&cieStart, 4,
+ asmInfo->needsDwarfSectionOffsetDirective());
}
// PC Begin
diff --git a/contrib/llvm-project/llvm/lib/MC/MCELFStreamer.cpp b/contrib/llvm-project/llvm/lib/MC/MCELFStreamer.cpp
index fa2133078bfe..0a0c30df9c07 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCELFStreamer.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCELFStreamer.cpp
@@ -106,9 +106,10 @@ void MCELFStreamer::EmitLabel(MCSymbol *S, SMLoc Loc) {
Symbol->setType(ELF::STT_TLS);
}
-void MCELFStreamer::EmitLabel(MCSymbol *S, SMLoc Loc, MCFragment *F) {
+void MCELFStreamer::EmitLabelAtPos(MCSymbol *S, SMLoc Loc, MCFragment *F,
+ uint64_t Offset) {
auto *Symbol = cast<MCSymbolELF>(S);
- MCObjectStreamer::EmitLabel(Symbol, Loc, F);
+ MCObjectStreamer::EmitLabelAtPos(Symbol, Loc, F, Offset);
const MCSectionELF &Section =
static_cast<const MCSectionELF &>(*getCurrentSectionOnly());
@@ -307,10 +308,6 @@ void MCELFStreamer::EmitCommonSymbol(MCSymbol *S, uint64_t Size,
EmitLabel(Symbol);
EmitZeros(Size);
- // Update the maximum alignment of the section if necessary.
- if (ByteAlignment > Section.getAlignment())
- Section.setAlignment(Align(ByteAlignment));
-
SwitchSection(P.first, P.second);
} else {
if(Symbol->declareCommon(Size, ByteAlignment))
diff --git a/contrib/llvm-project/llvm/lib/MC/MCExpr.cpp b/contrib/llvm-project/llvm/lib/MC/MCExpr.cpp
index 813c00f6f3bb..7f25fd4e90a7 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCExpr.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCExpr.cpp
@@ -601,7 +601,7 @@ static bool canFold(const MCAssembler *Asm, const MCSymbolRefExpr *A,
/// and
/// Result = (LHS_A - LHS_B + LHS_Cst) + (RHS_A - RHS_B + RHS_Cst).
///
-/// This routine attempts to aggresively fold the operands such that the result
+/// This routine attempts to aggressively fold the operands such that the result
/// is representable in an MCValue, but may not always succeed.
///
/// \returns True on success, false if the result is not representable in an
diff --git a/contrib/llvm-project/llvm/lib/MC/MCFragment.cpp b/contrib/llvm-project/llvm/lib/MC/MCFragment.cpp
index ae5bd65507bc..a96b8e86aed3 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCFragment.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCFragment.cpp
@@ -232,13 +232,11 @@ uint64_t llvm::computeBundlePadding(const MCAssembler &Assembler,
void ilist_alloc_traits<MCFragment>::deleteNode(MCFragment *V) { V->destroy(); }
-MCFragment::~MCFragment() = default;
-
MCFragment::MCFragment(FragmentType Kind, bool HasInstructions,
MCSection *Parent)
- : Kind(Kind), HasInstructions(HasInstructions), LayoutOrder(0),
- Parent(Parent), Atom(nullptr), Offset(~UINT64_C(0)) {
- if (Parent && !isDummy())
+ : Parent(Parent), Atom(nullptr), Offset(~UINT64_C(0)), LayoutOrder(0),
+ Kind(Kind), HasInstructions(HasInstructions) {
+ if (Parent && !isa<MCDummyFragment>(*this))
Parent->getFragmentList().push_back(this);
}
@@ -277,8 +275,8 @@ void MCFragment::destroy() {
case FT_LEB:
delete cast<MCLEBFragment>(this);
return;
- case FT_Padding:
- delete cast<MCPaddingFragment>(this);
+ case FT_BoundaryAlign:
+ delete cast<MCBoundaryAlignFragment>(this);
return;
case FT_SymbolId:
delete cast<MCSymbolIdFragment>(this);
@@ -324,7 +322,7 @@ LLVM_DUMP_METHOD void MCFragment::dump() const {
case MCFragment::FT_Dwarf: OS << "MCDwarfFragment"; break;
case MCFragment::FT_DwarfFrame: OS << "MCDwarfCallFrameFragment"; break;
case MCFragment::FT_LEB: OS << "MCLEBFragment"; break;
- case MCFragment::FT_Padding: OS << "MCPaddingFragment"; break;
+ case MCFragment::FT_BoundaryAlign: OS<<"MCBoundaryAlignFragment"; break;
case MCFragment::FT_SymbolId: OS << "MCSymbolIdFragment"; break;
case MCFragment::FT_CVInlineLines: OS << "MCCVInlineLineTableFragment"; break;
case MCFragment::FT_CVDefRange: OS << "MCCVDefRangeTableFragment"; break;
@@ -333,13 +331,13 @@ LLVM_DUMP_METHOD void MCFragment::dump() const {
OS << "<MCFragment " << (const void *)this << " LayoutOrder:" << LayoutOrder
<< " Offset:" << Offset << " HasInstructions:" << hasInstructions();
- if (const MCEncodedFragment *EF = dyn_cast<MCEncodedFragment>(this))
+ if (const auto *EF = dyn_cast<MCEncodedFragment>(this))
OS << " BundlePadding:" << static_cast<unsigned>(EF->getBundlePadding());
OS << ">";
switch (getKind()) {
case MCFragment::FT_Align: {
- const MCAlignFragment *AF = cast<MCAlignFragment>(this);
+ const auto *AF = cast<MCAlignFragment>(this);
if (AF->hasEmitNops())
OS << " (emit nops)";
OS << "\n ";
@@ -349,7 +347,7 @@ LLVM_DUMP_METHOD void MCFragment::dump() const {
break;
}
case MCFragment::FT_Data: {
- const MCDataFragment *DF = cast<MCDataFragment>(this);
+ const auto *DF = cast<MCDataFragment>(this);
OS << "\n ";
OS << " Contents:[";
const SmallVectorImpl<char> &Contents = DF->getContents();
@@ -372,7 +370,7 @@ LLVM_DUMP_METHOD void MCFragment::dump() const {
break;
}
case MCFragment::FT_CompactEncodedInst: {
- const MCCompactEncodedInstFragment *CEIF =
+ const auto *CEIF =
cast<MCCompactEncodedInstFragment>(this);
OS << "\n ";
OS << " Contents:[";
@@ -385,60 +383,60 @@ LLVM_DUMP_METHOD void MCFragment::dump() const {
break;
}
case MCFragment::FT_Fill: {
- const MCFillFragment *FF = cast<MCFillFragment>(this);
+ const auto *FF = cast<MCFillFragment>(this);
OS << " Value:" << static_cast<unsigned>(FF->getValue())
<< " ValueSize:" << static_cast<unsigned>(FF->getValueSize())
<< " NumValues:" << FF->getNumValues();
break;
}
case MCFragment::FT_Relaxable: {
- const MCRelaxableFragment *F = cast<MCRelaxableFragment>(this);
+ const auto *F = cast<MCRelaxableFragment>(this);
OS << "\n ";
OS << " Inst:";
F->getInst().dump_pretty(OS);
break;
}
case MCFragment::FT_Org: {
- const MCOrgFragment *OF = cast<MCOrgFragment>(this);
+ const auto *OF = cast<MCOrgFragment>(this);
OS << "\n ";
OS << " Offset:" << OF->getOffset()
<< " Value:" << static_cast<unsigned>(OF->getValue());
break;
}
case MCFragment::FT_Dwarf: {
- const MCDwarfLineAddrFragment *OF = cast<MCDwarfLineAddrFragment>(this);
+ const auto *OF = cast<MCDwarfLineAddrFragment>(this);
OS << "\n ";
OS << " AddrDelta:" << OF->getAddrDelta()
<< " LineDelta:" << OF->getLineDelta();
break;
}
case MCFragment::FT_DwarfFrame: {
- const MCDwarfCallFrameFragment *CF = cast<MCDwarfCallFrameFragment>(this);
+ const auto *CF = cast<MCDwarfCallFrameFragment>(this);
OS << "\n ";
OS << " AddrDelta:" << CF->getAddrDelta();
break;
}
case MCFragment::FT_LEB: {
- const MCLEBFragment *LF = cast<MCLEBFragment>(this);
+ const auto *LF = cast<MCLEBFragment>(this);
OS << "\n ";
OS << " Value:" << LF->getValue() << " Signed:" << LF->isSigned();
break;
}
- case MCFragment::FT_Padding: {
- const MCPaddingFragment *F = cast<MCPaddingFragment>(this);
- OS << "\n ";
- OS << " PaddingPoliciesMask:" << F->getPaddingPoliciesMask()
- << " IsInsertionPoint:" << F->isInsertionPoint()
- << " Size:" << F->getSize();
- OS << "\n ";
- OS << " Inst:";
- F->getInst().dump_pretty(OS);
- OS << " InstSize:" << F->getInstSize();
+ case MCFragment::FT_BoundaryAlign: {
+ const auto *BF = cast<MCBoundaryAlignFragment>(this);
+ if (BF->canEmitNops())
+ OS << " (can emit nops to align";
+ if (BF->isFused())
+ OS << " fused branch)";
+ else
+ OS << " unfused branch)";
OS << "\n ";
+ OS << " BoundarySize:" << BF->getAlignment().value()
+ << " Size:" << BF->getSize();
break;
}
case MCFragment::FT_SymbolId: {
- const MCSymbolIdFragment *F = cast<MCSymbolIdFragment>(this);
+ const auto *F = cast<MCSymbolIdFragment>(this);
OS << "\n ";
OS << " Sym:" << F->getSymbol();
break;
diff --git a/contrib/llvm-project/llvm/lib/MC/MCInstPrinter.cpp b/contrib/llvm-project/llvm/lib/MC/MCInstPrinter.cpp
index c5c06f323e68..8bf699279ada 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCInstPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCInstPrinter.cpp
@@ -10,7 +10,9 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
@@ -57,6 +59,94 @@ void MCInstPrinter::printAnnotation(raw_ostream &OS, StringRef Annot) {
}
}
+static bool matchAliasCondition(const MCInst &MI, const MCSubtargetInfo *STI,
+ const MCRegisterInfo &MRI, unsigned &OpIdx,
+ const AliasMatchingData &M,
+ const AliasPatternCond &C) {
+ // Feature tests are special, they don't consume operands.
+ if (C.Kind == AliasPatternCond::K_Feature)
+ return STI->getFeatureBits().test(C.Value);
+ if (C.Kind == AliasPatternCond::K_NegFeature)
+ return !STI->getFeatureBits().test(C.Value);
+
+ // Get and consume an operand.
+ const MCOperand &Opnd = MI.getOperand(OpIdx);
+ ++OpIdx;
+
+ // Check the specific condition for the operand.
+ switch (C.Kind) {
+ case AliasPatternCond::K_Imm:
+ // Operand must be a specific immediate.
+ return Opnd.isImm() && Opnd.getImm() == int32_t(C.Value);
+ case AliasPatternCond::K_Reg:
+ // Operand must be a specific register.
+ return Opnd.isReg() && Opnd.getReg() == C.Value;
+ case AliasPatternCond::K_TiedReg:
+ // Operand must match the register of another operand.
+ return Opnd.isReg() && Opnd.getReg() == MI.getOperand(C.Value).getReg();
+ case AliasPatternCond::K_RegClass:
+ // Operand must be a register in this class. Value is a register class id.
+ return Opnd.isReg() && MRI.getRegClass(C.Value).contains(Opnd.getReg());
+ case AliasPatternCond::K_Custom:
+ // Operand must match some custom criteria.
+ return M.ValidateMCOperand(Opnd, *STI, C.Value);
+ case AliasPatternCond::K_Ignore:
+ // Operand can be anything.
+ return true;
+ case AliasPatternCond::K_Feature:
+ case AliasPatternCond::K_NegFeature:
+ llvm_unreachable("handled earlier");
+ }
+ llvm_unreachable("invalid kind");
+}
+
+const char *MCInstPrinter::matchAliasPatterns(const MCInst *MI,
+ const MCSubtargetInfo *STI,
+ const AliasMatchingData &M) {
+ // Binary search by opcode. Return false if there are no aliases for this
+ // opcode.
+ auto It = lower_bound(M.OpToPatterns, MI->getOpcode(),
+ [](const PatternsForOpcode &L, unsigned Opcode) {
+ return L.Opcode < Opcode;
+ });
+ if (It == M.OpToPatterns.end() || It->Opcode != MI->getOpcode())
+ return nullptr;
+
+ // Try all patterns for this opcode.
+ uint32_t AsmStrOffset = ~0U;
+ ArrayRef<AliasPattern> Patterns =
+ M.Patterns.slice(It->PatternStart, It->NumPatterns);
+ for (const AliasPattern &P : Patterns) {
+ // Check operand count first.
+ if (MI->getNumOperands() != P.NumOperands)
+ return nullptr;
+
+ // Test all conditions for this pattern.
+ ArrayRef<AliasPatternCond> Conds =
+ M.PatternConds.slice(P.AliasCondStart, P.NumConds);
+ unsigned OpIdx = 0;
+ if (llvm::all_of(Conds, [&](const AliasPatternCond &C) {
+ return matchAliasCondition(*MI, STI, MRI, OpIdx, M, C);
+ })) {
+ // If all conditions matched, use this asm string.
+ AsmStrOffset = P.AsmStrOffset;
+ break;
+ }
+ }
+
+ // If no alias matched, don't print an alias.
+ if (AsmStrOffset == ~0U)
+ return nullptr;
+
+ // Go to offset AsmStrOffset and use the null terminated string there. The
+ // offset should point to the beginning of an alias string, so it should
+ // either be zero or be preceded by a null byte.
+ assert(AsmStrOffset < M.AsmStrings.size() &&
+ (AsmStrOffset == 0 || M.AsmStrings[AsmStrOffset - 1] == '\0') &&
+ "bad asm string offset");
+ return M.AsmStrings.data() + AsmStrOffset;
+}
+
/// Utility functions to make adding mark ups simpler.
StringRef MCInstPrinter::markup(StringRef s) const {
if (getUseMarkup())
diff --git a/contrib/llvm-project/llvm/lib/MC/MCObjectFileInfo.cpp b/contrib/llvm-project/llvm/lib/MC/MCObjectFileInfo.cpp
index 3fa7cb48fadc..d567cc14a830 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCObjectFileInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCObjectFileInfo.cpp
@@ -40,8 +40,7 @@ static bool useCompactUnwind(const Triple &T) {
return true;
// And the iOS simulator.
- if (T.isiOS() &&
- (T.getArch() == Triple::x86_64 || T.getArch() == Triple::x86))
+ if (T.isiOS() && T.isX86())
return true;
return false;
@@ -192,7 +191,7 @@ void MCObjectFileInfo::initMachOMCObjectFileInfo(const Triple &T) {
Ctx->getMachOSection("__LD", "__compact_unwind", MachO::S_ATTR_DEBUG,
SectionKind::getReadOnly());
- if (T.getArch() == Triple::x86_64 || T.getArch() == Triple::x86)
+ if (T.isX86())
CompactUnwindDwarfEHFrameOnly = 0x04000000; // UNWIND_X86_64_MODE_DWARF
else if (T.getArch() == Triple::aarch64 || T.getArch() == Triple::aarch64_32)
CompactUnwindDwarfEHFrameOnly = 0x03000000; // UNWIND_ARM64_MODE_DWARF
@@ -468,6 +467,11 @@ void MCObjectFileInfo::initELFMCObjectFileInfo(const Triple &T, bool Large) {
DebugSecType, ELF::SHF_EXCLUDE);
DwarfRnglistsDWOSection =
Ctx->getELFSection(".debug_rnglists.dwo", DebugSecType, ELF::SHF_EXCLUDE);
+ DwarfMacinfoDWOSection =
+ Ctx->getELFSection(".debug_macinfo.dwo", DebugSecType, ELF::SHF_EXCLUDE);
+
+ DwarfLoclistsDWOSection =
+ Ctx->getELFSection(".debug_loclists.dwo", DebugSecType, ELF::SHF_EXCLUDE);
// DWP Sections
DwarfCUIndexSection =
@@ -485,9 +489,6 @@ void MCObjectFileInfo::initELFMCObjectFileInfo(const Triple &T, bool Large) {
Ctx->getELFSection(".eh_frame", EHSectionType, EHSectionFlags);
StackSizesSection = Ctx->getELFSection(".stack_sizes", ELF::SHT_PROGBITS, 0);
-
- RemarksSection =
- Ctx->getELFSection(".remarks", ELF::SHT_PROGBITS, ELF::SHF_EXCLUDE);
}
void MCObjectFileInfo::initCOFFMCObjectFileInfo(const Triple &T) {
@@ -624,6 +625,11 @@ void MCObjectFileInfo::initCOFFMCObjectFileInfo(const Triple &T) {
COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
COFF::IMAGE_SCN_MEM_READ,
SectionKind::getMetadata(), "debug_macinfo");
+ DwarfMacinfoDWOSection = Ctx->getCOFFSection(
+ ".debug_macinfo.dwo",
+ COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getMetadata(), "debug_macinfo.dwo");
DwarfInfoDWOSection = Ctx->getCOFFSection(
".debug_info.dwo",
COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
@@ -720,6 +726,11 @@ void MCObjectFileInfo::initCOFFMCObjectFileInfo(const Triple &T) {
COFF::IMAGE_SCN_MEM_READ,
SectionKind::getMetadata());
+ GLJMPSection = Ctx->getCOFFSection(".gljmp$y",
+ COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getMetadata());
+
TLSDataSection = Ctx->getCOFFSection(
".tls$", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ |
COFF::IMAGE_SCN_MEM_WRITE,
@@ -780,6 +791,10 @@ void MCObjectFileInfo::initXCOFFMCObjectFileInfo(const Triple &T) {
DataSection = Ctx->getXCOFFSection(
".data", XCOFF::StorageMappingClass::XMC_RW, XCOFF::XTY_SD,
XCOFF::C_HIDEXT, SectionKind::getData());
+
+ ReadOnlySection = Ctx->getXCOFFSection(
+ ".rodata", XCOFF::StorageMappingClass::XMC_RO, XCOFF::XTY_SD,
+ XCOFF::C_HIDEXT, SectionKind::getReadOnly());
}
void MCObjectFileInfo::InitMCObjectFileInfo(const Triple &TheTriple, bool PIC,
diff --git a/contrib/llvm-project/llvm/lib/MC/MCObjectStreamer.cpp b/contrib/llvm-project/llvm/lib/MC/MCObjectStreamer.cpp
index 83f6ab8fe332..3d1358df475f 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCObjectStreamer.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCObjectStreamer.cpp
@@ -29,7 +29,10 @@ MCObjectStreamer::MCObjectStreamer(MCContext &Context,
: MCStreamer(Context),
Assembler(std::make_unique<MCAssembler>(
Context, std::move(TAB), std::move(Emitter), std::move(OW))),
- EmitEHFrame(true), EmitDebugFrame(false) {}
+ EmitEHFrame(true), EmitDebugFrame(false) {
+ if (Assembler->getBackendPtr())
+ setAllowAutoPadding(Assembler->getBackend().allowAutoPadding());
+}
MCObjectStreamer::~MCObjectStreamer() {}
@@ -42,20 +45,64 @@ MCAssembler *MCObjectStreamer::getAssemblerPtr() {
return nullptr;
}
+void MCObjectStreamer::addPendingLabel(MCSymbol* S) {
+ MCSection *CurSection = getCurrentSectionOnly();
+ if (CurSection) {
+ // Register labels that have not yet been assigned to a Section.
+ if (!PendingLabels.empty()) {
+ for (MCSymbol* Sym : PendingLabels)
+ CurSection->addPendingLabel(Sym);
+ PendingLabels.clear();
+ }
+
+ // Add this label to the current Section / Subsection.
+ CurSection->addPendingLabel(S, CurSubsectionIdx);
+
+ // Add this Section to the list of PendingLabelSections.
+ auto SecIt = std::find(PendingLabelSections.begin(),
+ PendingLabelSections.end(), CurSection);
+ if (SecIt == PendingLabelSections.end())
+ PendingLabelSections.push_back(CurSection);
+ }
+ else
+ // There is no Section / Subsection for this label yet.
+ PendingLabels.push_back(S);
+}
+
void MCObjectStreamer::flushPendingLabels(MCFragment *F, uint64_t FOffset) {
- if (PendingLabels.empty())
+ MCSection *CurSection = getCurrentSectionOnly();
+ if (!CurSection) {
+ assert(PendingLabels.empty());
return;
- if (!F) {
- F = new MCDataFragment();
- MCSection *CurSection = getCurrentSectionOnly();
- CurSection->getFragmentList().insert(CurInsertionPoint, F);
- F->setParent(CurSection);
}
- for (MCSymbol *Sym : PendingLabels) {
- Sym->setFragment(F);
- Sym->setOffset(FOffset);
+ // Register labels that have not yet been assigned to a Section.
+ if (!PendingLabels.empty()) {
+ for (MCSymbol* Sym : PendingLabels)
+ CurSection->addPendingLabel(Sym, CurSubsectionIdx);
+ PendingLabels.clear();
}
- PendingLabels.clear();
+
+ // Associate a fragment with this label, either the supplied fragment
+ // or an empty data fragment.
+ if (F)
+ CurSection->flushPendingLabels(F, FOffset, CurSubsectionIdx);
+ else
+ CurSection->flushPendingLabels(nullptr, 0, CurSubsectionIdx);
+}
+
+void MCObjectStreamer::flushPendingLabels() {
+ // Register labels that have not yet been assigned to a Section.
+ if (!PendingLabels.empty()) {
+ MCSection *CurSection = getCurrentSectionOnly();
+ assert(CurSection);
+ for (MCSymbol* Sym : PendingLabels)
+ CurSection->addPendingLabel(Sym, CurSubsectionIdx);
+ PendingLabels.clear();
+ }
+
+ // Assign an empty data fragment to all remaining pending labels.
+ for (MCSection* Section : PendingLabelSections)
+ Section->flushPendingLabels();
}
// When fixup's offset is a forward declared label, e.g.:
@@ -120,6 +167,7 @@ void MCObjectStreamer::reset() {
EmitEHFrame = true;
EmitDebugFrame = false;
PendingLabels.clear();
+ PendingLabelSections.clear();
MCStreamer::reset();
}
@@ -167,16 +215,6 @@ MCObjectStreamer::getOrCreateDataFragment(const MCSubtargetInfo *STI) {
return F;
}
-MCPaddingFragment *MCObjectStreamer::getOrCreatePaddingFragment() {
- MCPaddingFragment *F =
- dyn_cast_or_null<MCPaddingFragment>(getCurrentFragment());
- if (!F) {
- F = new MCPaddingFragment();
- insert(F);
- }
- return F;
-}
-
void MCObjectStreamer::visitUsedSymbol(const MCSymbol &Sym) {
Assembler->registerSymbol(Sym);
}
@@ -243,18 +281,32 @@ void MCObjectStreamer::EmitLabel(MCSymbol *Symbol, SMLoc Loc) {
Symbol->setFragment(F);
Symbol->setOffset(F->getContents().size());
} else {
- PendingLabels.push_back(Symbol);
+ // Assign all pending labels to offset 0 within the dummy "pending"
+ // fragment. (They will all be reassigned to a real fragment in
+ // flushPendingLabels())
+ Symbol->setOffset(0);
+ addPendingLabel(Symbol);
}
}
-void MCObjectStreamer::EmitLabel(MCSymbol *Symbol, SMLoc Loc, MCFragment *F) {
+// Emit a label at a previously emitted fragment/offset position. This must be
+// within the currently-active section.
+void MCObjectStreamer::EmitLabelAtPos(MCSymbol *Symbol, SMLoc Loc,
+ MCFragment *F, uint64_t Offset) {
+ assert(F->getParent() == getCurrentSectionOnly());
+
MCStreamer::EmitLabel(Symbol, Loc);
getAssembler().registerSymbol(*Symbol);
auto *DF = dyn_cast_or_null<MCDataFragment>(F);
- if (DF)
+ Symbol->setOffset(Offset);
+ if (DF) {
Symbol->setFragment(F);
- else
- PendingLabels.push_back(Symbol);
+ } else {
+ assert(isa<MCDummyFragment>(F) &&
+ "F must either be an MCDataFragment or the pending MCDummyFragment");
+ assert(Offset == 0);
+ addPendingLabel(Symbol);
+ }
}
void MCObjectStreamer::EmitULEB128Value(const MCExpr *Value) {
@@ -288,7 +340,6 @@ void MCObjectStreamer::ChangeSection(MCSection *Section,
bool MCObjectStreamer::changeSectionImpl(MCSection *Section,
const MCExpr *Subsection) {
assert(Section && "Cannot switch to a null section!");
- flushPendingLabels(nullptr);
getContext().clearDwarfLocSeen();
bool Created = getAssembler().registerSection(*Section);
@@ -299,8 +350,9 @@ bool MCObjectStreamer::changeSectionImpl(MCSection *Section,
report_fatal_error("Cannot evaluate subsection number");
if (IntSubsection < 0 || IntSubsection > 8192)
report_fatal_error("Subsection number out of range");
+ CurSubsectionIdx = unsigned(IntSubsection);
CurInsertionPoint =
- Section->getSubsectionInsertionPoint(unsigned(IntSubsection));
+ Section->getSubsectionInsertionPoint(CurSubsectionIdx);
return Created;
}
@@ -315,9 +367,9 @@ bool MCObjectStreamer::mayHaveInstructions(MCSection &Sec) const {
void MCObjectStreamer::EmitInstruction(const MCInst &Inst,
const MCSubtargetInfo &STI) {
- getAssembler().getBackend().handleCodePaddingInstructionBegin(Inst);
+ getAssembler().getBackend().alignBranchesBegin(*this, Inst);
EmitInstructionImpl(Inst, STI);
- getAssembler().getBackend().handleCodePaddingInstructionEnd(Inst);
+ getAssembler().getBackend().alignBranchesEnd(*this, Inst);
}
void MCObjectStreamer::EmitInstructionImpl(const MCInst &Inst,
@@ -520,12 +572,6 @@ void MCObjectStreamer::EmitBytes(StringRef Data) {
MCDataFragment *DF = getOrCreateDataFragment();
flushPendingLabels(DF, DF->getContents().size());
DF->getContents().append(Data.begin(), Data.end());
-
- // EmitBytes might not cover all possible ways we emit data (or could be used
- // to emit executable code in some cases), but is the best method we have
- // right now for checking this.
- MCSection *Sec = getCurrentSectionOnly();
- Sec->setHasData(true);
}
void MCObjectStreamer::EmitValueToAlignment(unsigned ByteAlignment,
@@ -554,16 +600,6 @@ void MCObjectStreamer::emitValueToOffset(const MCExpr *Offset,
insert(new MCOrgFragment(*Offset, Value, Loc));
}
-void MCObjectStreamer::EmitCodePaddingBasicBlockStart(
- const MCCodePaddingContext &Context) {
- getAssembler().getBackend().handleCodePaddingBasicBlockStart(this, Context);
-}
-
-void MCObjectStreamer::EmitCodePaddingBasicBlockEnd(
- const MCCodePaddingContext &Context) {
- getAssembler().getBackend().handleCodePaddingBasicBlockEnd(Context);
-}
-
// Associate DTPRel32 fixup with data and resize data area
void MCObjectStreamer::EmitDTPRel32Value(const MCExpr *Value) {
MCDataFragment *DF = getOrCreateDataFragment();
@@ -725,7 +761,9 @@ void MCObjectStreamer::FinishImpl() {
// Dump out the dwarf file & directory tables and line tables.
MCDwarfLineTable::Emit(this, getAssembler().getDWARFLinetableParams());
+ // Update any remaining pending labels with empty data fragments.
flushPendingLabels();
+
resolvePendingFixups();
getAssembler().Finish();
}
diff --git a/contrib/llvm-project/llvm/lib/MC/MCParser/AsmParser.cpp b/contrib/llvm-project/llvm/lib/MC/MCParser/AsmParser.cpp
index b59ac08ad6cc..94a44c1f93b1 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCParser/AsmParser.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCParser/AsmParser.cpp
@@ -91,16 +91,13 @@ struct MacroInstantiation {
SMLoc InstantiationLoc;
/// The buffer where parsing should resume upon instantiation completion.
- int ExitBuffer;
+ unsigned ExitBuffer;
/// The location where parsing should resume upon instantiation completion.
SMLoc ExitLoc;
/// The depth of TheCondStack at the start of the instantiation.
size_t CondStackDepth;
-
-public:
- MacroInstantiation(SMLoc IL, int EB, SMLoc EL, size_t CondStackDepth);
};
struct ParseStatementInfo {
@@ -916,13 +913,12 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
// While we have input, parse each statement.
while (Lexer.isNot(AsmToken::Eof)) {
ParseStatementInfo Info(&AsmStrRewrites);
- if (!parseStatement(Info, nullptr))
- continue;
+ bool Parsed = parseStatement(Info, nullptr);
// If we have a Lexer Error we are on an Error Token. Load in Lexer Error
// for printing ErrMsg via Lex() only if no (presumably better) parser error
// exists.
- if (!hasPendingError() && Lexer.getTok().is(AsmToken::Error)) {
+ if (Parsed && !hasPendingError() && Lexer.getTok().is(AsmToken::Error)) {
Lex();
}
@@ -930,7 +926,7 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
printPendingErrors();
// Skipping to the next line if needed.
- if (!getLexer().isAtStartOfStatement())
+ if (Parsed && !getLexer().isAtStartOfStatement())
eatToEndOfStatement();
}
@@ -2521,11 +2517,6 @@ bool AsmParser::expandMacro(raw_svector_ostream &OS, StringRef Body,
return false;
}
-MacroInstantiation::MacroInstantiation(SMLoc IL, int EB, SMLoc EL,
- size_t CondStackDepth)
- : InstantiationLoc(IL), ExitBuffer(EB), ExitLoc(EL),
- CondStackDepth(CondStackDepth) {}
-
static bool isOperator(AsmToken::TokenKind kind) {
switch (kind) {
default:
@@ -2800,8 +2791,8 @@ bool AsmParser::handleMacroEntry(const MCAsmMacro *M, SMLoc NameLoc) {
// Create the macro instantiation object and add to the current macro
// instantiation stack.
- MacroInstantiation *MI = new MacroInstantiation(
- NameLoc, CurBuffer, getTok().getLoc(), TheCondStack.size());
+ MacroInstantiation *MI = new MacroInstantiation{
+ NameLoc, CurBuffer, getTok().getLoc(), TheCondStack.size()};
ActiveMacros.push_back(MI);
++NumOfMacroInstantiations;
@@ -3139,8 +3130,9 @@ bool AsmParser::parseRealValue(const fltSemantics &Semantics, APInt &Res) {
Value = APFloat::getNaN(Semantics, false, ~0);
else
return TokError("invalid floating point literal");
- } else if (Value.convertFromString(IDVal, APFloat::rmNearestTiesToEven) ==
- APFloat::opInvalidOp)
+ } else if (errorToBool(
+ Value.convertFromString(IDVal, APFloat::rmNearestTiesToEven)
+ .takeError()))
return TokError("invalid floating point literal");
if (IsNeg)
Value.changeSign();
@@ -5545,8 +5537,8 @@ void AsmParser::instantiateMacroLikeBody(MCAsmMacro *M, SMLoc DirectiveLoc,
// Create the macro instantiation object and add to the current macro
// instantiation stack.
- MacroInstantiation *MI = new MacroInstantiation(
- DirectiveLoc, CurBuffer, getTok().getLoc(), TheCondStack.size());
+ MacroInstantiation *MI = new MacroInstantiation{
+ DirectiveLoc, CurBuffer, getTok().getLoc(), TheCondStack.size()};
ActiveMacros.push_back(MI);
// Jump to the macro instantiation and prime the lexer.
@@ -5813,10 +5805,6 @@ bool AsmParser::parseMSInlineAsm(
for (unsigned i = 1, e = Info.ParsedOperands.size(); i != e; ++i) {
MCParsedAsmOperand &Operand = *Info.ParsedOperands[i];
- // Immediate.
- if (Operand.isImm())
- continue;
-
// Register operand.
if (Operand.isReg() && !Operand.needAddressOf() &&
!getTargetParser().OmitRegisterFromClobberLists(Operand.getReg())) {
@@ -5836,19 +5824,31 @@ bool AsmParser::parseMSInlineAsm(
if (!OpDecl)
continue;
+ StringRef Constraint = Operand.getConstraint();
+ if (Operand.isImm()) {
+ // Offset as immediate
+ if (Operand.isOffsetOfLocal())
+ Constraint = "r";
+ else
+ Constraint = "i";
+ }
+
bool isOutput = (i == 1) && Desc.mayStore();
SMLoc Start = SMLoc::getFromPointer(SymName.data());
if (isOutput) {
++InputIdx;
OutputDecls.push_back(OpDecl);
OutputDeclsAddressOf.push_back(Operand.needAddressOf());
- OutputConstraints.push_back(("=" + Operand.getConstraint()).str());
+ OutputConstraints.push_back(("=" + Constraint).str());
AsmStrRewrites.emplace_back(AOK_Output, Start, SymName.size());
} else {
InputDecls.push_back(OpDecl);
InputDeclsAddressOf.push_back(Operand.needAddressOf());
- InputConstraints.push_back(Operand.getConstraint().str());
- AsmStrRewrites.emplace_back(AOK_Input, Start, SymName.size());
+ InputConstraints.push_back(Constraint.str());
+ if (Desc.OpInfo[i - 1].isBranchTarget())
+ AsmStrRewrites.emplace_back(AOK_CallInput, Start, SymName.size());
+ else
+ AsmStrRewrites.emplace_back(AOK_Input, Start, SymName.size());
}
}
@@ -5895,7 +5895,11 @@ bool AsmParser::parseMSInlineAsm(
const char *AsmStart = ASMString.begin();
const char *AsmEnd = ASMString.end();
array_pod_sort(AsmStrRewrites.begin(), AsmStrRewrites.end(), rewritesSort);
- for (const AsmRewrite &AR : AsmStrRewrites) {
+ for (auto it = AsmStrRewrites.begin(); it != AsmStrRewrites.end(); ++it) {
+ const AsmRewrite &AR = *it;
+ // Check if this has already been covered by another rewrite...
+ if (AR.Done)
+ continue;
AsmRewriteKind Kind = AR.Kind;
const char *Loc = AR.Loc.getPointer();
@@ -5926,9 +5930,32 @@ bool AsmParser::parseMSInlineAsm(
OS << (AR.IntelExp.hasBaseReg() ? " + " : "")
<< AR.IntelExp.IndexReg;
if (AR.IntelExp.Scale > 1)
- OS << " * $$" << AR.IntelExp.Scale;
- if (AR.IntelExp.Imm || !AR.IntelExp.hasRegs())
- OS << (AR.IntelExp.hasRegs() ? " + $$" : "$$") << AR.IntelExp.Imm;
+ OS << " * $$" << AR.IntelExp.Scale;
+ if (AR.IntelExp.hasOffset()) {
+ if (AR.IntelExp.hasRegs())
+ OS << " + ";
+ // Fuse this rewrite with a rewrite of the offset name, if present.
+ StringRef OffsetName = AR.IntelExp.OffsetName;
+ SMLoc OffsetLoc = SMLoc::getFromPointer(AR.IntelExp.OffsetName.data());
+ size_t OffsetLen = OffsetName.size();
+ auto rewrite_it = std::find_if(
+ it, AsmStrRewrites.end(), [&](const AsmRewrite &FusingAR) {
+ return FusingAR.Loc == OffsetLoc && FusingAR.Len == OffsetLen &&
+ (FusingAR.Kind == AOK_Input ||
+ FusingAR.Kind == AOK_CallInput);
+ });
+ if (rewrite_it == AsmStrRewrites.end()) {
+ OS << "offset " << OffsetName;
+ } else if (rewrite_it->Kind == AOK_CallInput) {
+ OS << "${" << InputIdx++ << ":P}";
+ rewrite_it->Done = true;
+ } else {
+ OS << '$' << InputIdx++;
+ rewrite_it->Done = true;
+ }
+ }
+ if (AR.IntelExp.Imm || AR.IntelExp.emitImm())
+ OS << (AR.IntelExp.emitImm() ? "$$" : " + $$") << AR.IntelExp.Imm;
if (AR.IntelExp.NeedBracs)
OS << "]";
break;
@@ -5938,6 +5965,9 @@ bool AsmParser::parseMSInlineAsm(
case AOK_Input:
OS << '$' << InputIdx++;
break;
+ case AOK_CallInput:
+ OS << "${" << InputIdx++ << ":P}";
+ break;
case AOK_Output:
OS << '$' << OutputIdx++;
break;
diff --git a/contrib/llvm-project/llvm/lib/MC/MCParser/COFFAsmParser.cpp b/contrib/llvm-project/llvm/lib/MC/MCParser/COFFAsmParser.cpp
index 06f8310ae061..51bb1fe92b73 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCParser/COFFAsmParser.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCParser/COFFAsmParser.cpp
@@ -144,7 +144,7 @@ public:
COFFAsmParser() = default;
};
-} // end annonomous namespace.
+} // end anonymous namespace.
static SectionKind computeSectionKind(unsigned Flags) {
if (Flags & COFF::IMAGE_SCN_MEM_EXECUTE)
diff --git a/contrib/llvm-project/llvm/lib/MC/MCSection.cpp b/contrib/llvm-project/llvm/lib/MC/MCSection.cpp
index 2c892ab81608..074534bd73db 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCSection.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCSection.cpp
@@ -22,8 +22,7 @@ using namespace llvm;
MCSection::MCSection(SectionVariant V, SectionKind K, MCSymbol *Begin)
: Begin(Begin), BundleGroupBeforeFirstInst(false), HasInstructions(false),
- HasData(false), IsRegistered(false), DummyFragment(this), Variant(V),
- Kind(K) {}
+ IsRegistered(false), DummyFragment(this), Variant(V), Kind(K) {}
MCSymbol *MCSection::getEndSymbol(MCContext &Ctx) {
if (!End)
@@ -86,6 +85,41 @@ MCSection::getSubsectionInsertionPoint(unsigned Subsection) {
return IP;
}
+void MCSection::addPendingLabel(MCSymbol* label, unsigned Subsection) {
+ PendingLabels.push_back(PendingLabel(label, Subsection));
+}
+
+void MCSection::flushPendingLabels(MCFragment *F, uint64_t FOffset,
+ unsigned Subsection) {
+ if (PendingLabels.empty())
+ return;
+
+ // Set the fragment and fragment offset for all pending symbols in the
+ // specified Subsection, and remove those symbols from the pending list.
+ for (auto It = PendingLabels.begin(); It != PendingLabels.end(); ++It) {
+ PendingLabel& Label = *It;
+ if (Label.Subsection == Subsection) {
+ Label.Sym->setFragment(F);
+ Label.Sym->setOffset(FOffset);
+ PendingLabels.erase(It--);
+ }
+ }
+}
+
+void MCSection::flushPendingLabels() {
+ // Make sure all remaining pending labels point to data fragments, by
+ // creating new empty data fragments for each Subsection with labels pending.
+ while (!PendingLabels.empty()) {
+ PendingLabel& Label = PendingLabels[0];
+ iterator CurInsertionPoint =
+ this->getSubsectionInsertionPoint(Label.Subsection);
+ MCFragment *F = new MCDataFragment();
+ getFragmentList().insert(CurInsertionPoint, F);
+ F->setParent(this);
+ flushPendingLabels(F, 0, Label.Subsection);
+ }
+}
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void MCSection::dump() const {
raw_ostream &OS = errs();
diff --git a/contrib/llvm-project/llvm/lib/MC/MCSectionXCOFF.cpp b/contrib/llvm-project/llvm/lib/MC/MCSectionXCOFF.cpp
index d52959f15f92..8377e295532a 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCSectionXCOFF.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCSectionXCOFF.cpp
@@ -15,18 +15,6 @@ using namespace llvm;
MCSectionXCOFF::~MCSectionXCOFF() = default;
-static StringRef getMappingClassString(XCOFF::StorageMappingClass SMC) {
- switch (SMC) {
- case XCOFF::XMC_DS:
- return "DS";
- case XCOFF::XMC_RW:
- return "RW";
- case XCOFF::XMC_PR:
- return "PR";
- default:
- report_fatal_error("Unhandled storage-mapping class.");
- }
-}
void MCSectionXCOFF::PrintSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
raw_ostream &OS,
@@ -35,9 +23,14 @@ void MCSectionXCOFF::PrintSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
if (getMappingClass() != XCOFF::XMC_PR)
report_fatal_error("Unhandled storage-mapping class for .text csect");
- OS << "\t.csect " << getSectionName() << "["
- << getMappingClassString(getMappingClass())
- << "]" << '\n';
+ OS << "\t.csect " << QualName->getName() << '\n';
+ return;
+ }
+
+ if (getKind().isReadOnly()) {
+ if (getMappingClass() != XCOFF::XMC_RO)
+ report_fatal_error("Unhandled storage-mapping class for .rodata csect.");
+ OS << "\t.csect " << QualName->getName() << '\n';
return;
}
@@ -45,8 +38,9 @@ void MCSectionXCOFF::PrintSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
switch (getMappingClass()) {
case XCOFF::XMC_RW:
case XCOFF::XMC_DS:
- OS << "\t.csect " << getSectionName() << "["
- << getMappingClassString(getMappingClass()) << "]" << '\n';
+ OS << "\t.csect " << QualName->getName() << '\n';
+ break;
+ case XCOFF::XMC_TC:
break;
case XCOFF::XMC_TC0:
OS << "\t.toc\n";
diff --git a/contrib/llvm-project/llvm/lib/MC/MCStreamer.cpp b/contrib/llvm-project/llvm/lib/MC/MCStreamer.cpp
index b8278cb11079..0ab883536779 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCStreamer.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCStreamer.cpp
@@ -977,9 +977,10 @@ void MCStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
}
void MCTargetStreamer::prettyPrintAsm(MCInstPrinter &InstPrinter,
- raw_ostream &OS, const MCInst &Inst,
- const MCSubtargetInfo &STI) {
- InstPrinter.printInst(&Inst, OS, "", STI);
+ uint64_t Address, const MCInst &Inst,
+ const MCSubtargetInfo &STI,
+ raw_ostream &OS) {
+ InstPrinter.printInst(&Inst, Address, "", STI, OS);
}
void MCStreamer::visitUsedSymbol(const MCSymbol &Sym) {
@@ -1063,7 +1064,8 @@ void MCStreamer::EmitCOFFSymbolStorageClass(int StorageClass) {
void MCStreamer::EmitCOFFSymbolType(int Type) {
llvm_unreachable("this directive only supported on COFF targets");
}
-void MCStreamer::EmitXCOFFLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+void MCStreamer::EmitXCOFFLocalCommonSymbol(MCSymbol *LabelSym, uint64_t Size,
+ MCSymbol *CsectSym,
unsigned ByteAlign) {
llvm_unreachable("this directive only supported on XCOFF targets");
}
diff --git a/contrib/llvm-project/llvm/lib/MC/MCSymbolELF.cpp b/contrib/llvm-project/llvm/lib/MC/MCSymbolELF.cpp
index a07c56c64f84..1830b87fd856 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCSymbolELF.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCSymbolELF.cpp
@@ -40,8 +40,6 @@ enum {
void MCSymbolELF::setBinding(unsigned Binding) const {
setIsBindingSet();
- if (getType() == ELF::STT_SECTION && Binding != ELF::STB_LOCAL)
- setType(ELF::STT_NOTYPE);
unsigned Val;
switch (Binding) {
default:
@@ -93,8 +91,6 @@ unsigned MCSymbolELF::getBinding() const {
void MCSymbolELF::setType(unsigned Type) const {
unsigned Val;
- if (Type == ELF::STT_SECTION && getBinding() != ELF::STB_LOCAL)
- return;
switch (Type) {
default:
llvm_unreachable("Unsupported Binding");
diff --git a/contrib/llvm-project/llvm/lib/MC/MCTargetOptions.cpp b/contrib/llvm-project/llvm/lib/MC/MCTargetOptions.cpp
index 96bb094134fe..5848e3ecadbe 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCTargetOptions.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCTargetOptions.cpp
@@ -15,8 +15,8 @@ MCTargetOptions::MCTargetOptions()
: MCRelaxAll(false), MCNoExecStack(false), MCFatalWarnings(false),
MCNoWarn(false), MCNoDeprecatedWarn(false), MCSaveTempLabels(false),
MCUseDwarfDirectory(false), MCIncrementalLinkerCompatible(false),
- MCPIECopyRelocations(false), ShowMCEncoding(false), ShowMCInst(false),
- AsmVerbose(false), PreserveAsmComments(true) {}
+ ShowMCEncoding(false), ShowMCInst(false), AsmVerbose(false),
+ PreserveAsmComments(true) {}
StringRef MCTargetOptions::getABIName() const {
return ABIName;
diff --git a/contrib/llvm-project/llvm/lib/MC/MCValue.cpp b/contrib/llvm-project/llvm/lib/MC/MCValue.cpp
index 81da47b2eced..b6bcec9b2ca0 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCValue.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCValue.cpp
@@ -54,8 +54,5 @@ MCSymbolRefExpr::VariantKind MCValue::getAccessVariant() const {
if (!A)
return MCSymbolRefExpr::VK_None;
- MCSymbolRefExpr::VariantKind Kind = A->getKind();
- if (Kind == MCSymbolRefExpr::VK_WEAKREF)
- return MCSymbolRefExpr::VK_None;
- return Kind;
+ return A->getKind();
}
diff --git a/contrib/llvm-project/llvm/lib/MC/MCXCOFFStreamer.cpp b/contrib/llvm-project/llvm/lib/MC/MCXCOFFStreamer.cpp
index 50937d6adc0c..6efa167ced42 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCXCOFFStreamer.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCXCOFFStreamer.cpp
@@ -50,12 +50,6 @@ void MCXCOFFStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
XCOFF::C_HIDEXT);
Symbol->setCommon(Size, ByteAlignment);
- // Need to add this symbol to the current Fragment which will belong to the
- // containing CSECT.
- auto *F = dyn_cast_or_null<MCDataFragment>(getCurrentFragment());
- assert(F && "Expected a valid section with a fragment set.");
- Symbol->setFragment(F);
-
// Emit the alignment and storage for the variable to the section.
EmitValueToAlignment(ByteAlignment);
EmitZeros(Size);
@@ -94,8 +88,9 @@ MCStreamer *llvm::createXCOFFStreamer(MCContext &Context,
return S;
}
-void MCXCOFFStreamer::EmitXCOFFLocalCommonSymbol(MCSymbol *Symbol,
+void MCXCOFFStreamer::EmitXCOFFLocalCommonSymbol(MCSymbol *LabelSym,
uint64_t Size,
+ MCSymbol *CsectSym,
unsigned ByteAlignment) {
- EmitCommonSymbol(Symbol, Size, ByteAlignment);
+ EmitCommonSymbol(CsectSym, Size, ByteAlignment);
}
diff --git a/contrib/llvm-project/llvm/lib/MC/WasmObjectWriter.cpp b/contrib/llvm-project/llvm/lib/MC/WasmObjectWriter.cpp
index c1ff3cc2480c..321f93d76092 100644
--- a/contrib/llvm-project/llvm/lib/MC/WasmObjectWriter.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/WasmObjectWriter.cpp
@@ -1324,6 +1324,14 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm,
Comdats[C->getName()].emplace_back(
WasmComdatEntry{wasm::WASM_COMDAT_FUNCTION, Index});
}
+
+ if (WS.hasExportName()) {
+ wasm::WasmExport Export;
+ Export.Name = WS.getExportName();
+ Export.Kind = wasm::WASM_EXTERNAL_FUNCTION;
+ Export.Index = Index;
+ Exports.push_back(Export);
+ }
} else {
// An import; the index was assigned above.
Index = WasmIndices.find(&WS)->second;
@@ -1452,8 +1460,10 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm,
Flags |= wasm::WASM_SYMBOL_EXPORTED;
}
}
- if (WS.getName() != WS.getImportName())
+ if (WS.hasImportName())
Flags |= wasm::WASM_SYMBOL_EXPLICIT_NAME;
+ if (WS.hasExportName())
+ Flags |= wasm::WASM_SYMBOL_EXPORTED;
wasm::WasmSymbolInfo Info;
Info.Name = WS.getName();
diff --git a/contrib/llvm-project/llvm/lib/MC/XCOFFObjectWriter.cpp b/contrib/llvm-project/llvm/lib/MC/XCOFFObjectWriter.cpp
index 353c21068735..e584c6222a5a 100644
--- a/contrib/llvm-project/llvm/lib/MC/XCOFFObjectWriter.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/XCOFFObjectWriter.cpp
@@ -44,6 +44,7 @@ using namespace llvm;
namespace {
constexpr unsigned DefaultSectionAlign = 4;
+constexpr int16_t MaxSectionIndex = INT16_MAX;
// Packs the csect's alignment and type into a byte.
uint8_t getEncodedType(const MCSectionXCOFF *);
@@ -73,6 +74,14 @@ struct ControlSection {
: MCCsect(MCSec), SymbolTableIndex(-1), Address(-1), Size(0) {}
};
+// Type to be used for a container representing a set of csects with
+// (approximately) the same storage mapping class. For example all the csects
+// with a storage mapping class of `xmc_pr` will get placed into the same
+// container.
+using CsectGroup = std::deque<ControlSection>;
+
+using CsectGroups = std::deque<CsectGroup *>;
+
// Represents the data related to a section excluding the csects that make up
// the raw data of the section. The csects are stored separately as not all
// sections contain csects, and some sections contain csects which are better
@@ -94,49 +103,70 @@ struct Section {
// Virtual sections do not need storage allocated in the object file.
const bool IsVirtual;
+ // XCOFF has special section numbers for symbols:
+ // -2 Specifies N_DEBUG, a special symbolic debugging symbol.
+ // -1 Specifies N_ABS, an absolute symbol. The symbol has a value but is not
+ // relocatable.
+ // 0 Specifies N_UNDEF, an undefined external symbol.
+ // Therefore, we choose -3 (N_DEBUG - 1) to represent a section index that
+ // hasn't been initialized.
+ static constexpr int16_t UninitializedIndex =
+ XCOFF::ReservedSectionNum::N_DEBUG - 1;
+
+ CsectGroups Groups;
+
void reset() {
Address = 0;
Size = 0;
FileOffsetToData = 0;
FileOffsetToRelocations = 0;
RelocationCount = 0;
- Index = -1;
+ Index = UninitializedIndex;
+ // Clear any csects we have stored.
+ for (auto *Group : Groups)
+ Group->clear();
}
- Section(const char *N, XCOFF::SectionTypeFlags Flags, bool IsVirtual)
+ Section(const char *N, XCOFF::SectionTypeFlags Flags, bool IsVirtual,
+ CsectGroups Groups)
: Address(0), Size(0), FileOffsetToData(0), FileOffsetToRelocations(0),
- RelocationCount(0), Flags(Flags), Index(-1), IsVirtual(IsVirtual) {
+ RelocationCount(0), Flags(Flags), Index(UninitializedIndex),
+ IsVirtual(IsVirtual), Groups(Groups) {
strncpy(Name, N, XCOFF::NameSize);
}
};
class XCOFFObjectWriter : public MCObjectWriter {
- // Type to be used for a container representing a set of csects with
- // (approximately) the same storage mapping class. For example all the csects
- // with a storage mapping class of `xmc_pr` will get placed into the same
- // container.
- using CsectGroup = std::deque<ControlSection>;
+
+ uint32_t SymbolTableEntryCount = 0;
+ uint32_t SymbolTableOffset = 0;
+ uint16_t SectionCount = 0;
support::endian::Writer W;
std::unique_ptr<MCXCOFFObjectTargetWriter> TargetObjectWriter;
StringTableBuilder Strings;
- // The non-empty sections, in the order they will appear in the section header
- // table.
- std::vector<Section *> Sections;
-
- // The Predefined sections.
- Section Text;
- Section BSS;
-
// CsectGroups. These store the csects which make up different parts of
// the sections. Should have one for each set of csects that get mapped into
// the same section and get handled in a 'similar' way.
+ CsectGroup UndefinedCsects;
CsectGroup ProgramCodeCsects;
+ CsectGroup ReadOnlyCsects;
+ CsectGroup DataCsects;
+ CsectGroup FuncDSCsects;
+ CsectGroup TOCCsects;
CsectGroup BSSCsects;
- uint32_t SymbolTableEntryCount = 0;
- uint32_t SymbolTableOffset = 0;
+ // The Predefined sections.
+ Section Text;
+ Section Data;
+ Section BSS;
+
+ // All the XCOFF sections, in the order they will appear in the section header
+ // table.
+ std::array<Section *const, 3> Sections{{&Text, &Data, &BSS}};
+
+ CsectGroup &getCsectGroup(const MCSectionXCOFF *MCSec);
virtual void reset() override;
@@ -190,27 +220,72 @@ XCOFFObjectWriter::XCOFFObjectWriter(
std::unique_ptr<MCXCOFFObjectTargetWriter> MOTW, raw_pwrite_stream &OS)
: W(OS, support::big), TargetObjectWriter(std::move(MOTW)),
Strings(StringTableBuilder::XCOFF),
- Text(".text", XCOFF::STYP_TEXT, /* IsVirtual */ false),
- BSS(".bss", XCOFF::STYP_BSS, /* IsVirtual */ true) {}
+ Text(".text", XCOFF::STYP_TEXT, /* IsVirtual */ false,
+ CsectGroups{&ProgramCodeCsects, &ReadOnlyCsects}),
+ Data(".data", XCOFF::STYP_DATA, /* IsVirtual */ false,
+ CsectGroups{&DataCsects, &FuncDSCsects, &TOCCsects}),
+ BSS(".bss", XCOFF::STYP_BSS, /* IsVirtual */ true,
+ CsectGroups{&BSSCsects}) {}
void XCOFFObjectWriter::reset() {
+ UndefinedCsects.clear();
+
// Reset any sections we have written to, and empty the section header table.
for (auto *Sec : Sections)
Sec->reset();
- Sections.clear();
-
- // Clear any csects we have stored.
- ProgramCodeCsects.clear();
- BSSCsects.clear();
// Reset the symbol table and string table.
SymbolTableEntryCount = 0;
SymbolTableOffset = 0;
+ SectionCount = 0;
Strings.clear();
MCObjectWriter::reset();
}
+CsectGroup &XCOFFObjectWriter::getCsectGroup(const MCSectionXCOFF *MCSec) {
+ switch (MCSec->getMappingClass()) {
+ case XCOFF::XMC_PR:
+ assert(XCOFF::XTY_SD == MCSec->getCSectType() &&
+ "Only an initialized csect can contain program code.");
+ return ProgramCodeCsects;
+ case XCOFF::XMC_RO:
+ assert(XCOFF::XTY_SD == MCSec->getCSectType() &&
+ "Only an initialized csect can contain read only data.");
+ return ReadOnlyCsects;
+ case XCOFF::XMC_RW:
+ if (XCOFF::XTY_CM == MCSec->getCSectType())
+ return BSSCsects;
+
+ if (XCOFF::XTY_SD == MCSec->getCSectType())
+ return DataCsects;
+
+ report_fatal_error("Unhandled mapping of read-write csect to section.");
+ case XCOFF::XMC_DS:
+ return FuncDSCsects;
+ case XCOFF::XMC_BS:
+ assert(XCOFF::XTY_CM == MCSec->getCSectType() &&
+ "Mapping invalid csect. CSECT with bss storage class must be "
+ "common type.");
+ return BSSCsects;
+ case XCOFF::XMC_TC0:
+ assert(XCOFF::XTY_SD == MCSec->getCSectType() &&
+ "Only an initialized csect can contain TOC-base.");
+ assert(TOCCsects.empty() &&
+ "We should have only one TOC-base, and it should be the first csect "
+ "in this CsectGroup.");
+ return TOCCsects;
+ case XCOFF::XMC_TC:
+ assert(XCOFF::XTY_SD == MCSec->getCSectType() &&
+ "Only an initialized csect can contain TC entry.");
+ assert(!TOCCsects.empty() &&
+ "We should at least have a TOC-base in this CsectGroup.");
+ return TOCCsects;
+ default:
+ report_fatal_error("Unhandled mapping of csect to section.");
+ }
+}
+
void XCOFFObjectWriter::executePostLayoutBinding(MCAssembler &Asm,
const MCAsmLayout &Layout) {
if (TargetObjectWriter->is64Bit())
@@ -225,49 +300,38 @@ void XCOFFObjectWriter::executePostLayoutBinding(MCAssembler &Asm,
const auto *MCSec = cast<const MCSectionXCOFF>(&S);
assert(WrapperMap.find(MCSec) == WrapperMap.end() &&
"Cannot add a csect twice.");
+ assert(XCOFF::XTY_ER != MCSec->getCSectType() &&
+ "An undefined csect should not get registered.");
// If the name does not fit in the storage provided in the symbol table
// entry, add it to the string table.
if (nameShouldBeInStringTable(MCSec->getSectionName()))
Strings.add(MCSec->getSectionName());
- switch (MCSec->getMappingClass()) {
- case XCOFF::XMC_PR:
- assert(XCOFF::XTY_SD == MCSec->getCSectType() &&
- "Only an initialized csect can contain program code.");
- ProgramCodeCsects.emplace_back(MCSec);
- WrapperMap[MCSec] = &ProgramCodeCsects.back();
- break;
- case XCOFF::XMC_RW:
- if (XCOFF::XTY_CM == MCSec->getCSectType()) {
- BSSCsects.emplace_back(MCSec);
- WrapperMap[MCSec] = &BSSCsects.back();
- break;
- }
- report_fatal_error("Unhandled mapping of read-write csect to section.");
- case XCOFF::XMC_TC0:
- // TODO FIXME Handle emiting the TOC base.
- break;
- case XCOFF::XMC_BS:
- assert(XCOFF::XTY_CM == MCSec->getCSectType() &&
- "Mapping invalid csect. CSECT with bss storage class must be "
- "common type.");
- BSSCsects.emplace_back(MCSec);
- WrapperMap[MCSec] = &BSSCsects.back();
- break;
- default:
- report_fatal_error("Unhandled mapping of csect to section.");
- }
+ CsectGroup &Group = getCsectGroup(MCSec);
+ Group.emplace_back(MCSec);
+ WrapperMap[MCSec] = &Group.back();
}
for (const MCSymbol &S : Asm.symbols()) {
// Nothing to do for temporary symbols.
if (S.isTemporary())
continue;
- const MCSymbolXCOFF *XSym = cast<MCSymbolXCOFF>(&S);
- // Map the symbol into its containing csect.
+ const MCSymbolXCOFF *XSym = cast<MCSymbolXCOFF>(&S);
const MCSectionXCOFF *ContainingCsect = XSym->getContainingCsect();
+
+ // Handle undefined symbol.
+ if (ContainingCsect->getCSectType() == XCOFF::XTY_ER) {
+ UndefinedCsects.emplace_back(ContainingCsect);
+ continue;
+ }
+
+ // If the symbol is the csect itself, we don't need to put the symbol
+ // into csect's Syms.
+ if (XSym == ContainingCsect->getQualNameSymbol())
+ continue;
+
assert(WrapperMap.find(ContainingCsect) != WrapperMap.end() &&
"Expected containing csect to exist in map");
@@ -287,27 +351,37 @@ void XCOFFObjectWriter::executePostLayoutBinding(MCAssembler &Asm,
void XCOFFObjectWriter::recordRelocation(MCAssembler &, const MCAsmLayout &,
const MCFragment *, const MCFixup &,
MCValue, uint64_t &) {
- report_fatal_error("XCOFF relocations not supported.");
+ // TODO: recordRelocation is not yet implemented.
}
void XCOFFObjectWriter::writeSections(const MCAssembler &Asm,
const MCAsmLayout &Layout) {
- // Write the program code control sections one at a time.
- uint32_t CurrentAddressLocation = Text.Address;
- for (const auto &Csect : ProgramCodeCsects) {
- if (uint32_t PaddingSize = Csect.Address - CurrentAddressLocation)
- W.OS.write_zeros(PaddingSize);
- Asm.writeSectionData(W.OS, Csect.MCCsect, Layout);
- CurrentAddressLocation = Csect.Address + Csect.Size;
- }
+ uint32_t CurrentAddressLocation = 0;
+ for (const auto *Section : Sections) {
+ // Nothing to write for this Section.
+ if (Section->Index == Section::UninitializedIndex || Section->IsVirtual)
+ continue;
+
+ assert(CurrentAddressLocation == Section->Address &&
+ "Sections should be written consecutively.");
+ for (const auto *Group : Section->Groups) {
+ for (const auto &Csect : *Group) {
+ if (uint32_t PaddingSize = Csect.Address - CurrentAddressLocation)
+ W.OS.write_zeros(PaddingSize);
+ if (Csect.Size)
+ Asm.writeSectionData(W.OS, Csect.MCCsect, Layout);
+ CurrentAddressLocation = Csect.Address + Csect.Size;
+ }
+ }
- if (Text.Index != -1) {
// The size of the tail padding in a section is the end virtual address of
// the current section minus the the end virtual address of the last csect
// in that section.
if (uint32_t PaddingSize =
- Text.Address + Text.Size - CurrentAddressLocation)
+ Section->Address + Section->Size - CurrentAddressLocation) {
W.OS.write_zeros(PaddingSize);
+ CurrentAddressLocation += PaddingSize;
+ }
}
}
@@ -345,7 +419,7 @@ void XCOFFObjectWriter::writeSymbolName(const StringRef &SymbolName) {
W.write<int32_t>(0);
W.write<uint32_t>(Strings.getOffset(SymbolName));
} else {
- char Name[XCOFF::NameSize];
+ char Name[XCOFF::NameSize+1];
std::strncpy(Name, SymbolName.data(), XCOFF::NameSize);
ArrayRef<char> NameRef(Name, XCOFF::NameSize);
W.write(NameRef);
@@ -431,7 +505,7 @@ void XCOFFObjectWriter::writeFileHeader() {
// Magic.
W.write<uint16_t>(0x01df);
// Number of sections.
- W.write<uint16_t>(Sections.size());
+ W.write<uint16_t>(SectionCount);
// Timestamp field. For reproducible output we write a 0, which represents no
// timestamp.
W.write<int32_t>(0);
@@ -447,6 +521,10 @@ void XCOFFObjectWriter::writeFileHeader() {
void XCOFFObjectWriter::writeSectionHeaderTable() {
for (const auto *Sec : Sections) {
+ // Nothing to write for this Section.
+ if (Sec->Index == Section::UninitializedIndex)
+ continue;
+
// Write Name.
ArrayRef<char> NameRef(Sec->Name, XCOFF::NameSize);
W.write(NameRef);
@@ -472,104 +550,112 @@ void XCOFFObjectWriter::writeSectionHeaderTable() {
}
void XCOFFObjectWriter::writeSymbolTable(const MCAsmLayout &Layout) {
- // Print out symbol table for the program code.
- for (const auto &Csect : ProgramCodeCsects) {
- // Write out the control section first and then each symbol in it.
- writeSymbolTableEntryForControlSection(Csect, Text.Index,
- Csect.MCCsect->getStorageClass());
- for (const auto &Sym : Csect.Syms)
- writeSymbolTableEntryForCsectMemberLabel(
- Sym, Csect, Text.Index, Layout.getSymbolOffset(*Sym.MCSym));
+ for (const auto &Csect : UndefinedCsects) {
+ writeSymbolTableEntryForControlSection(
+ Csect, XCOFF::ReservedSectionNum::N_UNDEF, Csect.MCCsect->getStorageClass());
}
- // The BSS Section is special in that the csects must contain a single symbol,
- // and the contained symbol cannot be represented in the symbol table as a
- // label definition.
- for (auto &Csect : BSSCsects) {
- assert(Csect.Syms.size() == 1 &&
- "Uninitialized csect cannot contain more then 1 symbol.");
- Symbol &Sym = Csect.Syms.back();
- writeSymbolTableEntryForControlSection(Csect, BSS.Index,
- Sym.getStorageClass());
+ for (const auto *Section : Sections) {
+ // Nothing to write for this Section.
+ if (Section->Index == Section::UninitializedIndex)
+ continue;
+
+ for (const auto *Group : Section->Groups) {
+ if (Group->empty())
+ continue;
+
+ const int16_t SectionIndex = Section->Index;
+ for (const auto &Csect : *Group) {
+ // Write out the control section first and then each symbol in it.
+ writeSymbolTableEntryForControlSection(
+ Csect, SectionIndex, Csect.MCCsect->getStorageClass());
+
+ for (const auto &Sym : Csect.Syms)
+ writeSymbolTableEntryForCsectMemberLabel(
+ Sym, Csect, SectionIndex, Layout.getSymbolOffset(*(Sym.MCSym)));
+ }
+ }
}
}
void XCOFFObjectWriter::assignAddressesAndIndices(const MCAsmLayout &Layout) {
+ // The first symbol table entry is for the file name. We are not emitting it
+ // yet, so start at index 0.
+ uint32_t SymbolTableIndex = 0;
+
+ // Calculate indices for undefined symbols.
+ for (auto &Csect : UndefinedCsects) {
+ Csect.Size = 0;
+ Csect.Address = 0;
+ Csect.SymbolTableIndex = SymbolTableIndex;
+ // 1 main and 1 auxiliary symbol table entry for each contained symbol.
+ SymbolTableIndex += 2;
+ }
+
// The address corrresponds to the address of sections and symbols in the
// object file. We place the shared address 0 immediately after the
// section header table.
uint32_t Address = 0;
// Section indices are 1-based in XCOFF.
- int16_t SectionIndex = 1;
- // The first symbol table entry is for the file name. We are not emitting it
- // yet, so start at index 0.
- uint32_t SymbolTableIndex = 0;
+ int32_t SectionIndex = 1;
- // Text section comes first.
- if (!ProgramCodeCsects.empty()) {
- Sections.push_back(&Text);
- Text.Index = SectionIndex++;
- for (auto &Csect : ProgramCodeCsects) {
- const MCSectionXCOFF *MCSec = Csect.MCCsect;
- Csect.Address = alignTo(Address, MCSec->getAlignment());
- Csect.Size = Layout.getSectionAddressSize(MCSec);
- Address = Csect.Address + Csect.Size;
- Csect.SymbolTableIndex = SymbolTableIndex;
- // 1 main and 1 auxiliary symbol table entry for the csect.
- SymbolTableIndex += 2;
- for (auto &Sym : Csect.Syms) {
- Sym.SymbolTableIndex = SymbolTableIndex;
- // 1 main and 1 auxiliary symbol table entry for each contained symbol
+ for (auto *Section : Sections) {
+ const bool IsEmpty =
+ llvm::all_of(Section->Groups,
+ [](const CsectGroup *Group) { return Group->empty(); });
+ if (IsEmpty)
+ continue;
+
+ if (SectionIndex > MaxSectionIndex)
+ report_fatal_error("Section index overflow!");
+ Section->Index = SectionIndex++;
+ SectionCount++;
+
+ bool SectionAddressSet = false;
+ for (auto *Group : Section->Groups) {
+ if (Group->empty())
+ continue;
+
+ for (auto &Csect : *Group) {
+ const MCSectionXCOFF *MCSec = Csect.MCCsect;
+ Csect.Address = alignTo(Address, MCSec->getAlignment());
+ Csect.Size = Layout.getSectionAddressSize(MCSec);
+ Address = Csect.Address + Csect.Size;
+ Csect.SymbolTableIndex = SymbolTableIndex;
+ // 1 main and 1 auxiliary symbol table entry for the csect.
SymbolTableIndex += 2;
+
+ for (auto &Sym : Csect.Syms) {
+ Sym.SymbolTableIndex = SymbolTableIndex;
+ // 1 main and 1 auxiliary symbol table entry for each contained
+ // symbol.
+ SymbolTableIndex += 2;
+ }
}
- }
- Address = alignTo(Address, DefaultSectionAlign);
- // The first csect of a section can be aligned by adjusting the virtual
- // address of its containing section instead of writing zeroes into the
- // object file.
- Text.Address = ProgramCodeCsects.front().Address;
-
- Text.Size = Address - Text.Address;
- }
-
- // Data section Second. TODO
-
- // BSS Section third.
- if (!BSSCsects.empty()) {
- Sections.push_back(&BSS);
- BSS.Index = SectionIndex++;
- for (auto &Csect : BSSCsects) {
- const MCSectionXCOFF *MCSec = Csect.MCCsect;
- Csect.Address = alignTo(Address, MCSec->getAlignment());
- Csect.Size = Layout.getSectionAddressSize(MCSec);
- Address = Csect.Address + Csect.Size;
- Csect.SymbolTableIndex = SymbolTableIndex;
- // 1 main and 1 auxiliary symbol table entry for the csect.
- SymbolTableIndex += 2;
-
- assert(Csect.Syms.size() == 1 &&
- "csect in the BSS can only contain a single symbol.");
- Csect.Syms[0].SymbolTableIndex = Csect.SymbolTableIndex;
+ if (!SectionAddressSet) {
+ Section->Address = Group->front().Address;
+ SectionAddressSet = true;
+ }
}
- // Pad out Address to the default alignment. This is to match how the system
- // assembler handles the .bss section. Its size is always a multiple of 4.
- Address = alignTo(Address, DefaultSectionAlign);
- BSS.Address = BSSCsects.front().Address;
- BSS.Size = Address - BSS.Address;
+ // Make sure the address of the next section aligned to
+ // DefaultSectionAlign.
+ Address = alignTo(Address, DefaultSectionAlign);
+ Section->Size = Address - Section->Address;
}
SymbolTableEntryCount = SymbolTableIndex;
// Calculate the RawPointer value for each section.
uint64_t RawPointer = sizeof(XCOFF::FileHeader32) + auxiliaryHeaderSize() +
- Sections.size() * sizeof(XCOFF::SectionHeader32);
+ SectionCount * sizeof(XCOFF::SectionHeader32);
for (auto *Sec : Sections) {
- if (!Sec->IsVirtual) {
- Sec->FileOffsetToData = RawPointer;
- RawPointer += Sec->Size;
- }
+ if (Sec->Index == Section::UninitializedIndex || Sec->IsVirtual)
+ continue;
+
+ Sec->FileOffsetToData = RawPointer;
+ RawPointer += Sec->Size;
}
// TODO Add in Relocation storage to the RawPointer Calculation.
diff --git a/contrib/llvm-project/llvm/lib/MCA/HardwareUnits/ResourceManager.cpp b/contrib/llvm-project/llvm/lib/MCA/HardwareUnits/ResourceManager.cpp
index 088aea3e23c6..30c4f14d13ae 100644
--- a/contrib/llvm-project/llvm/lib/MCA/HardwareUnits/ResourceManager.cpp
+++ b/contrib/llvm-project/llvm/lib/MCA/HardwareUnits/ResourceManager.cpp
@@ -281,7 +281,7 @@ void ResourceManager::releaseBuffers(uint64_t ConsumedBuffers) {
uint64_t ResourceManager::checkAvailability(const InstrDesc &Desc) const {
uint64_t BusyResourceMask = 0;
- for (const std::pair<uint64_t, const ResourceUsage> &E : Desc.Resources) {
+ for (const std::pair<uint64_t, ResourceUsage> &E : Desc.Resources) {
unsigned NumUnits = E.second.isReserved() ? 0U : E.second.NumUnits;
unsigned Index = getResourceStateIndex(E.first);
if (!Resources[Index]->isReady(NumUnits))
diff --git a/contrib/llvm-project/llvm/lib/MCA/InstrBuilder.cpp b/contrib/llvm-project/llvm/lib/MCA/InstrBuilder.cpp
index bd28c733535c..c137f1da8a44 100644
--- a/contrib/llvm-project/llvm/lib/MCA/InstrBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/MCA/InstrBuilder.cpp
@@ -302,7 +302,7 @@ void InstrBuilder::populateWrites(InstrDesc &ID, const MCInst &MCI,
unsigned NumVariadicOps = MCI.getNumOperands() - MCDesc.getNumOperands();
ID.Writes.resize(TotalDefs + NumVariadicOps);
// Iterate over the operands list, and skip non-register operands.
- // The first NumExplictDefs register operands are expected to be register
+ // The first NumExplicitDefs register operands are expected to be register
// definitions.
unsigned CurrentDef = 0;
unsigned i = 0;
diff --git a/contrib/llvm-project/llvm/lib/MCA/Stages/InstructionTables.cpp b/contrib/llvm-project/llvm/lib/MCA/Stages/InstructionTables.cpp
index adeefb45ec2d..a0cdfb89c553 100644
--- a/contrib/llvm-project/llvm/lib/MCA/Stages/InstructionTables.cpp
+++ b/contrib/llvm-project/llvm/lib/MCA/Stages/InstructionTables.cpp
@@ -24,7 +24,8 @@ Error InstructionTables::execute(InstRef &IR) {
UsedResources.clear();
// Identify the resources consumed by this instruction.
- for (const std::pair<uint64_t, ResourceUsage> Resource : Desc.Resources) {
+ for (const std::pair<const uint64_t, ResourceUsage> Resource :
+ Desc.Resources) {
// Skip zero-cycle resources (i.e., unused resources).
if (!Resource.second.size())
continue;
diff --git a/contrib/llvm-project/llvm/lib/Object/ELF.cpp b/contrib/llvm-project/llvm/lib/Object/ELF.cpp
index d491288579df..f17a6da23d7d 100644
--- a/contrib/llvm-project/llvm/lib/Object/ELF.cpp
+++ b/contrib/llvm-project/llvm/lib/Object/ELF.cpp
@@ -477,7 +477,7 @@ std::string ELFFile<ELFT>::getDynamicTagAsString(unsigned Arch,
#define PPC64_DYNAMIC_TAG(name, value)
// Also ignore marker tags such as DT_HIOS (maps to DT_VERNEEDNUM), etc.
#define DYNAMIC_TAG_MARKER(name, value)
-#define DYNAMIC_TAG(name, value) DYNAMIC_STRINGIFY_ENUM(name, value)
+#define DYNAMIC_TAG(name, value) case value: return #name;
#include "llvm/BinaryFormat/DynamicTags.def"
#undef DYNAMIC_TAG
#undef AARCH64_DYNAMIC_TAG
diff --git a/contrib/llvm-project/llvm/lib/Object/MachOObjectFile.cpp b/contrib/llvm-project/llvm/lib/Object/MachOObjectFile.cpp
index c0c873f97354..8540b7ab03cd 100644
--- a/contrib/llvm-project/llvm/lib/Object/MachOObjectFile.cpp
+++ b/contrib/llvm-project/llvm/lib/Object/MachOObjectFile.cpp
@@ -128,6 +128,10 @@ static unsigned getCPUType(const MachOObjectFile &O) {
return O.getHeader().cputype;
}
+static unsigned getCPUSubType(const MachOObjectFile &O) {
+ return O.getHeader().cpusubtype;
+}
+
static uint32_t
getPlainRelocationAddress(const MachO::any_relocation_info &RE) {
return RE.r_word0;
@@ -2565,7 +2569,7 @@ StringRef MachOObjectFile::getFileFormatName() const {
}
}
-Triple::ArchType MachOObjectFile::getArch(uint32_t CPUType) {
+Triple::ArchType MachOObjectFile::getArch(uint32_t CPUType, uint32_t CPUSubType) {
switch (CPUType) {
case MachO::CPU_TYPE_I386:
return Triple::x86;
@@ -2737,7 +2741,7 @@ ArrayRef<StringRef> MachOObjectFile::getValidArchs() {
}
Triple::ArchType MachOObjectFile::getArch() const {
- return getArch(getCPUType(*this));
+ return getArch(getCPUType(*this), getCPUSubType(*this));
}
Triple MachOObjectFile::getArchTriple(const char **McpuDefault) const {
diff --git a/contrib/llvm-project/llvm/lib/Object/ModuleSymbolTable.cpp b/contrib/llvm-project/llvm/lib/Object/ModuleSymbolTable.cpp
index d1e0ce5edae1..17ac4afda2d6 100644
--- a/contrib/llvm-project/llvm/lib/Object/ModuleSymbolTable.cpp
+++ b/contrib/llvm-project/llvm/lib/Object/ModuleSymbolTable.cpp
@@ -83,7 +83,8 @@ initializeRecordStreamer(const Module &M,
if (!MRI)
return;
- std::unique_ptr<MCAsmInfo> MAI(T->createMCAsmInfo(*MRI, TT.str()));
+ MCTargetOptions MCOptions;
+ std::unique_ptr<MCAsmInfo> MAI(T->createMCAsmInfo(*MRI, TT.str(), MCOptions));
if (!MAI)
return;
@@ -109,7 +110,6 @@ initializeRecordStreamer(const Module &M,
std::unique_ptr<MCAsmParser> Parser(
createMCAsmParser(SrcMgr, MCCtx, Streamer, *MAI));
- MCTargetOptions MCOptions;
std::unique_ptr<MCTargetAsmParser> TAP(
T->createMCAsmParser(*STI, *Parser, *MCII, MCOptions));
if (!TAP)
diff --git a/contrib/llvm-project/llvm/lib/Object/ObjectFile.cpp b/contrib/llvm-project/llvm/lib/Object/ObjectFile.cpp
index e0e63a5a7d76..098b3d8f8dd0 100644
--- a/contrib/llvm-project/llvm/lib/Object/ObjectFile.cpp
+++ b/contrib/llvm-project/llvm/lib/Object/ObjectFile.cpp
@@ -32,6 +32,13 @@
using namespace llvm;
using namespace object;
+raw_ostream &object::operator<<(raw_ostream &OS, const SectionedAddress &Addr) {
+ OS << "SectionedAddress{" << format_hex(Addr.Address, 10);
+ if (Addr.SectionIndex != SectionedAddress::UndefSection)
+ OS << ", " << Addr.SectionIndex;
+ return OS << "}";
+}
+
void ObjectFile::anchor() {}
ObjectFile::ObjectFile(unsigned int Type, MemoryBufferRef Source)
diff --git a/contrib/llvm-project/llvm/lib/Object/RelocationResolver.cpp b/contrib/llvm-project/llvm/lib/Object/RelocationResolver.cpp
index 119dc9b18f25..31478be7899e 100644
--- a/contrib/llvm-project/llvm/lib/Object/RelocationResolver.cpp
+++ b/contrib/llvm-project/llvm/lib/Object/RelocationResolver.cpp
@@ -339,6 +339,7 @@ static bool supportsRISCV(uint64_t Type) {
switch (Type) {
case ELF::R_RISCV_NONE:
case ELF::R_RISCV_32:
+ case ELF::R_RISCV_32_PCREL:
case ELF::R_RISCV_64:
case ELF::R_RISCV_SET6:
case ELF::R_RISCV_SUB6:
@@ -363,12 +364,14 @@ static uint64_t resolveRISCV(RelocationRef R, uint64_t S, uint64_t A) {
return A;
case ELF::R_RISCV_32:
return (S + RA) & 0xFFFFFFFF;
+ case ELF::R_RISCV_32_PCREL:
+ return (S + RA - R.getOffset()) & 0xFFFFFFFF;
case ELF::R_RISCV_64:
return S + RA;
case ELF::R_RISCV_SET6:
- return (A + (S + RA)) & 0xFF;
+ return (A & 0xC0) | ((S + RA) & 0x3F);
case ELF::R_RISCV_SUB6:
- return (A - (S + RA)) & 0xFF;
+ return (A & 0xC0) | (((A & 0x3F) - (S + RA)) & 0x3F);
case ELF::R_RISCV_ADD8:
return (A + (S + RA)) & 0xFF;
case ELF::R_RISCV_SUB8:
diff --git a/contrib/llvm-project/llvm/lib/Object/WasmObjectFile.cpp b/contrib/llvm-project/llvm/lib/Object/WasmObjectFile.cpp
index 014b403556df..ab8918ce1919 100644
--- a/contrib/llvm-project/llvm/lib/Object/WasmObjectFile.cpp
+++ b/contrib/llvm-project/llvm/lib/Object/WasmObjectFile.cpp
@@ -343,7 +343,7 @@ Error WasmObjectFile::parseDylinkSection(ReadContext &Ctx) {
Error WasmObjectFile::parseNameSection(ReadContext &Ctx) {
llvm::DenseSet<uint64_t> Seen;
- if (Functions.size() != FunctionTypes.size()) {
+ if (FunctionTypes.size() && !SeenCodeSection) {
return make_error<GenericBinaryError>("Names must come after code section",
object_error::parse_failed);
}
@@ -389,7 +389,7 @@ Error WasmObjectFile::parseNameSection(ReadContext &Ctx) {
Error WasmObjectFile::parseLinkingSection(ReadContext &Ctx) {
HasLinkingSection = true;
- if (Functions.size() != FunctionTypes.size()) {
+ if (FunctionTypes.size() && !SeenCodeSection) {
return make_error<GenericBinaryError>(
"Linking data must come after code section",
object_error::parse_failed);
@@ -940,6 +940,7 @@ Error WasmObjectFile::parseImportSection(ReadContext &Ctx) {
Error WasmObjectFile::parseFunctionSection(ReadContext &Ctx) {
uint32_t Count = readVaruint32(Ctx);
FunctionTypes.reserve(Count);
+ Functions.resize(Count);
uint32_t NumTypes = Signatures.size();
while (Count--) {
uint32_t Type = readVaruint32(Ctx);
@@ -1029,9 +1030,11 @@ Error WasmObjectFile::parseExportSection(ReadContext &Ctx) {
Ex.Index = readVaruint32(Ctx);
switch (Ex.Kind) {
case wasm::WASM_EXTERNAL_FUNCTION:
- if (!isValidFunctionIndex(Ex.Index))
+
+ if (!isDefinedFunctionIndex(Ex.Index))
return make_error<GenericBinaryError>("Invalid function export",
object_error::parse_failed);
+ getDefinedFunction(Ex.Index).ExportName = Ex.Name;
break;
case wasm::WASM_EXTERNAL_GLOBAL:
if (!isValidGlobalIndex(Ex.Index))
@@ -1132,6 +1135,7 @@ Error WasmObjectFile::parseStartSection(ReadContext &Ctx) {
}
Error WasmObjectFile::parseCodeSection(ReadContext &Ctx) {
+ SeenCodeSection = true;
CodeSection = Sections.size();
uint32_t FunctionCount = readVaruint32(Ctx);
if (FunctionCount != FunctionTypes.size()) {
@@ -1139,14 +1143,14 @@ Error WasmObjectFile::parseCodeSection(ReadContext &Ctx) {
object_error::parse_failed);
}
- while (FunctionCount--) {
- wasm::WasmFunction Function;
+ for (uint32_t i = 0; i < FunctionCount; i++) {
+ wasm::WasmFunction& Function = Functions[i];
const uint8_t *FunctionStart = Ctx.Ptr;
uint32_t Size = readVaruint32(Ctx);
const uint8_t *FunctionEnd = Ctx.Ptr + Size;
Function.CodeOffset = Ctx.Ptr - FunctionStart;
- Function.Index = NumImportedFunctions + Functions.size();
+ Function.Index = NumImportedFunctions + i;
Function.CodeSectionOffset = FunctionStart - Ctx.Start;
Function.Size = FunctionEnd - FunctionStart;
@@ -1165,7 +1169,6 @@ Error WasmObjectFile::parseCodeSection(ReadContext &Ctx) {
Function.Comdat = UINT32_MAX;
Ctx.Ptr += BodySize;
assert(Ctx.Ptr == FunctionEnd);
- Functions.push_back(Function);
}
if (Ctx.Ptr != Ctx.End)
return make_error<GenericBinaryError>("Code section ended prematurely",
diff --git a/contrib/llvm-project/llvm/lib/Object/XCOFFObjectFile.cpp b/contrib/llvm-project/llvm/lib/Object/XCOFFObjectFile.cpp
index 98782c2701c1..f98cd69a0d37 100644
--- a/contrib/llvm-project/llvm/lib/Object/XCOFFObjectFile.cpp
+++ b/contrib/llvm-project/llvm/lib/Object/XCOFFObjectFile.cpp
@@ -46,6 +46,21 @@ static StringRef generateXCOFFFixedNameStringRef(const char *Name) {
: StringRef(Name, XCOFF::NameSize);
}
+template <typename T> StringRef XCOFFSectionHeader<T>::getName() const {
+ const T &DerivedXCOFFSectionHeader = static_cast<const T &>(*this);
+ return generateXCOFFFixedNameStringRef(DerivedXCOFFSectionHeader.Name);
+}
+
+template <typename T> uint16_t XCOFFSectionHeader<T>::getSectionType() const {
+ const T &DerivedXCOFFSectionHeader = static_cast<const T &>(*this);
+ return DerivedXCOFFSectionHeader.Flags & SectionFlagsTypeMask;
+}
+
+template <typename T>
+bool XCOFFSectionHeader<T>::isReservedSectionType() const {
+ return getSectionType() & SectionFlagsReservedMask;
+}
+
bool XCOFFRelocation32::isRelocationSigned() const {
return Info & XR_SIGN_INDICATOR_MASK;
}
@@ -176,9 +191,8 @@ Expected<StringRef> XCOFFObjectFile::getSymbolName(DataRefImpl Symb) const {
}
Expected<uint64_t> XCOFFObjectFile::getSymbolAddress(DataRefImpl Symb) const {
- uint64_t Result = 0;
- llvm_unreachable("Not yet implemented!");
- return Result;
+ assert(!is64Bit() && "Symbol table support not implemented for 64-bit.");
+ return toSymbolEntry(Symb)->Value;
}
uint64_t XCOFFObjectFile::getSymbolValueImpl(DataRefImpl Symb) const {
@@ -251,7 +265,21 @@ uint64_t XCOFFObjectFile::getSectionSize(DataRefImpl Sec) const {
Expected<ArrayRef<uint8_t>>
XCOFFObjectFile::getSectionContents(DataRefImpl Sec) const {
- llvm_unreachable("Not yet implemented!");
+ if (isSectionVirtual(Sec))
+ return ArrayRef<uint8_t>();
+
+ uint64_t OffsetToRaw;
+ if (is64Bit())
+ OffsetToRaw = toSection64(Sec)->FileOffsetToRawData;
+ else
+ OffsetToRaw = toSection32(Sec)->FileOffsetToRawData;
+
+ const uint8_t * ContentStart = base() + OffsetToRaw;
+ uint64_t SectionSize = getSectionSize(Sec);
+ if (checkOffset(Data, uintptr_t(ContentStart), SectionSize))
+ return make_error<BinaryError>();
+
+ return makeArrayRef(ContentStart,SectionSize);
}
uint64_t XCOFFObjectFile::getSectionAlignment(DataRefImpl Sec) const {
@@ -281,9 +309,8 @@ bool XCOFFObjectFile::isSectionBSS(DataRefImpl Sec) const {
}
bool XCOFFObjectFile::isSectionVirtual(DataRefImpl Sec) const {
- bool Result = false;
- llvm_unreachable("Not yet implemented!");
- return Result;
+ return is64Bit() ? toSection64(Sec)->FileOffsetToRawData == 0
+ : toSection32(Sec)->FileOffsetToRawData == 0;
}
relocation_iterator XCOFFObjectFile::section_rel_begin(DataRefImpl Sec) const {
@@ -369,7 +396,6 @@ Triple::ArchType XCOFFObjectFile::getArch() const {
}
SubtargetFeatures XCOFFObjectFile::getFeatures() const {
- llvm_unreachable("Not yet implemented!");
return SubtargetFeatures();
}
@@ -688,14 +714,6 @@ ObjectFile::createXCOFFObjectFile(MemoryBufferRef MemBufRef,
return XCOFFObjectFile::create(FileType, MemBufRef);
}
-StringRef XCOFFSectionHeader32::getName() const {
- return generateXCOFFFixedNameStringRef(Name);
-}
-
-StringRef XCOFFSectionHeader64::getName() const {
- return generateXCOFFFixedNameStringRef(Name);
-}
-
XCOFF::StorageClass XCOFFSymbolRef::getStorageClass() const {
return OwningObjectPtr->toSymbolEntry(SymEntDataRef)->StorageClass;
}
@@ -762,5 +780,9 @@ bool XCOFFSymbolRef::isFunction() const {
return (OwningObjectPtr->getSectionFlags(SI.get()) & XCOFF::STYP_TEXT);
}
+// Explictly instantiate template classes.
+template struct XCOFFSectionHeader<XCOFFSectionHeader32>;
+template struct XCOFFSectionHeader<XCOFFSectionHeader64>;
+
} // namespace object
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/ObjectYAML/COFFEmitter.cpp b/contrib/llvm-project/llvm/lib/ObjectYAML/COFFEmitter.cpp
index efcdc51e1670..ec3ec55011f9 100644
--- a/contrib/llvm-project/llvm/lib/ObjectYAML/COFFEmitter.cpp
+++ b/contrib/llvm-project/llvm/lib/ObjectYAML/COFFEmitter.cpp
@@ -260,9 +260,12 @@ static bool layoutCOFF(COFFParser &CP) {
CurrentSectionDataOffset += S.Header.SizeOfRawData;
if (!S.Relocations.empty()) {
S.Header.PointerToRelocations = CurrentSectionDataOffset;
- S.Header.NumberOfRelocations = S.Relocations.size();
- CurrentSectionDataOffset +=
- S.Header.NumberOfRelocations * COFF::RelocationSize;
+ if (S.Header.Characteristics & COFF::IMAGE_SCN_LNK_NRELOC_OVFL) {
+ S.Header.NumberOfRelocations = 0xffff;
+ CurrentSectionDataOffset += COFF::RelocationSize;
+ } else
+ S.Header.NumberOfRelocations = S.Relocations.size();
+ CurrentSectionDataOffset += S.Relocations.size() * COFF::RelocationSize;
}
} else {
// Leave SizeOfRawData unaltered. For .bss sections in object files, it
@@ -506,6 +509,10 @@ static bool writeCOFF(COFFParser &CP, raw_ostream &OS) {
S.SectionData.writeAsBinary(OS);
assert(S.Header.SizeOfRawData >= S.SectionData.binary_size());
OS.write_zeros(S.Header.SizeOfRawData - S.SectionData.binary_size());
+ if (S.Header.Characteristics & COFF::IMAGE_SCN_LNK_NRELOC_OVFL)
+ OS << binary_le<uint32_t>(/*VirtualAddress=*/ S.Relocations.size() + 1)
+ << binary_le<uint32_t>(/*SymbolTableIndex=*/ 0)
+ << binary_le<uint16_t>(/*Type=*/ 0);
for (const COFFYAML::Relocation &R : S.Relocations) {
uint32_t SymbolTableIndex;
if (R.SymbolTableIndex) {
diff --git a/contrib/llvm-project/llvm/lib/ObjectYAML/CodeViewYAMLDebugSections.cpp b/contrib/llvm-project/llvm/lib/ObjectYAML/CodeViewYAMLDebugSections.cpp
index eeebb694589b..02f053bb0e0f 100644
--- a/contrib/llvm-project/llvm/lib/ObjectYAML/CodeViewYAMLDebugSections.cpp
+++ b/contrib/llvm-project/llvm/lib/ObjectYAML/CodeViewYAMLDebugSections.cpp
@@ -421,7 +421,7 @@ std::shared_ptr<DebugSubsection> YAMLLinesSubsection::toCodeViewSubsection(
for (const auto &LC : Lines.Blocks) {
Result->createBlock(LC.FileName);
if (Result->hasColumnInfo()) {
- for (const auto &Item : zip(LC.Lines, LC.Columns)) {
+ for (auto Item : zip(LC.Lines, LC.Columns)) {
auto &L = std::get<0>(Item);
auto &C = std::get<1>(Item);
uint32_t LE = L.LineStart + L.EndDelta;
diff --git a/contrib/llvm-project/llvm/lib/ObjectYAML/DWARFEmitter.cpp b/contrib/llvm-project/llvm/lib/ObjectYAML/DWARFEmitter.cpp
index 2ae66997cf59..b410fed16f09 100644
--- a/contrib/llvm-project/llvm/lib/ObjectYAML/DWARFEmitter.cpp
+++ b/contrib/llvm-project/llvm/lib/ObjectYAML/DWARFEmitter.cpp
@@ -314,7 +314,10 @@ public:
DIEFixupVisitor(DWARFYAML::Data &DI) : DWARFYAML::Visitor(DI){};
private:
- virtual void onStartCompileUnit(DWARFYAML::Unit &CU) { Length = 7; }
+ virtual void onStartCompileUnit(DWARFYAML::Unit &CU) {
+ // Size of the unit header, excluding the length field itself.
+ Length = CU.Version >= 5 ? 8 : 7;
+ }
virtual void onEndCompileUnit(DWARFYAML::Unit &CU) {
CU.Length.setLength(Length);
diff --git a/contrib/llvm-project/llvm/lib/ObjectYAML/ELFEmitter.cpp b/contrib/llvm-project/llvm/lib/ObjectYAML/ELFEmitter.cpp
index e0faed256f6b..ee7d5f616a73 100644
--- a/contrib/llvm-project/llvm/lib/ObjectYAML/ELFEmitter.cpp
+++ b/contrib/llvm-project/llvm/lib/ObjectYAML/ELFEmitter.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/MC/StringTableBuilder.h"
@@ -36,6 +37,16 @@ class ContiguousBlobAccumulator {
SmallVector<char, 128> Buf;
raw_svector_ostream OS;
+public:
+ ContiguousBlobAccumulator(uint64_t InitialOffset_)
+ : InitialOffset(InitialOffset_), Buf(), OS(Buf) {}
+
+ template <class Integer>
+ raw_ostream &getOSAndAlignedOffset(Integer &Offset, unsigned Align) {
+ Offset = padToAlignment(Align);
+ return OS;
+ }
+
/// \returns The new offset.
uint64_t padToAlignment(unsigned Align) {
if (Align == 0)
@@ -46,14 +57,6 @@ class ContiguousBlobAccumulator {
return AlignedOffset; // == CurrentOffset;
}
-public:
- ContiguousBlobAccumulator(uint64_t InitialOffset_)
- : InitialOffset(InitialOffset_), Buf(), OS(Buf) {}
- template <class Integer>
- raw_ostream &getOSAndAlignedOffset(Integer &Offset, unsigned Align) {
- Offset = padToAlignment(Align);
- return OS;
- }
void writeBlobToStream(raw_ostream &Out) { Out << OS.str(); }
};
@@ -86,6 +89,15 @@ public:
unsigned size() const { return Map.size(); }
};
+namespace {
+struct Fragment {
+ uint64_t Offset;
+ uint64_t Size;
+ uint32_t Type;
+ uint64_t AddrAlign;
+};
+} // namespace
+
/// "Single point of truth" for the ELF file construction.
/// TODO: This class still has a ways to go before it is truly a "single
/// point of truth".
@@ -98,6 +110,7 @@ template <class ELFT> class ELFState {
typedef typename ELFT::Rela Elf_Rela;
typedef typename ELFT::Relr Elf_Relr;
typedef typename ELFT::Dyn Elf_Dyn;
+ typedef typename ELFT::uint uintX_t;
enum class SymtabType { Static, Dynamic };
@@ -140,6 +153,11 @@ template <class ELFT> class ELFState {
ELFYAML::Section *YAMLSec);
void setProgramHeaderLayout(std::vector<Elf_Phdr> &PHeaders,
std::vector<Elf_Shdr> &SHeaders);
+
+ std::vector<Fragment>
+ getPhdrFragments(const ELFYAML::ProgramHeader &Phdr,
+ ArrayRef<typename ELFT::Shdr> SHeaders);
+
void finalizeStrings();
void writeELFHeader(ContiguousBlobAccumulator &CBA, raw_ostream &OS);
void writeSectionContent(Elf_Shdr &SHeader,
@@ -148,6 +166,9 @@ template <class ELFT> class ELFState {
void writeSectionContent(Elf_Shdr &SHeader,
const ELFYAML::RelocationSection &Section,
ContiguousBlobAccumulator &CBA);
+ void writeSectionContent(Elf_Shdr &SHeader,
+ const ELFYAML::RelrSection &Section,
+ ContiguousBlobAccumulator &CBA);
void writeSectionContent(Elf_Shdr &SHeader, const ELFYAML::Group &Group,
ContiguousBlobAccumulator &CBA);
void writeSectionContent(Elf_Shdr &SHeader,
@@ -177,6 +198,20 @@ template <class ELFT> class ELFState {
void writeSectionContent(Elf_Shdr &SHeader,
const ELFYAML::AddrsigSection &Section,
ContiguousBlobAccumulator &CBA);
+ void writeSectionContent(Elf_Shdr &SHeader,
+ const ELFYAML::NoteSection &Section,
+ ContiguousBlobAccumulator &CBA);
+ void writeSectionContent(Elf_Shdr &SHeader,
+ const ELFYAML::GnuHashSection &Section,
+ ContiguousBlobAccumulator &CBA);
+ void writeSectionContent(Elf_Shdr &SHeader,
+ const ELFYAML::LinkerOptionsSection &Section,
+ ContiguousBlobAccumulator &CBA);
+ void writeSectionContent(Elf_Shdr &SHeader,
+ const ELFYAML::DependentLibrariesSection &Section,
+ ContiguousBlobAccumulator &CBA);
+
+ void writeFill(ELFYAML::Fill &Fill, ContiguousBlobAccumulator &CBA);
ELFState(ELFYAML::Object &D, yaml::ErrorHandler EH);
@@ -199,31 +234,25 @@ template <class T> static void zero(T &Obj) { memset(&Obj, 0, sizeof(Obj)); }
template <class ELFT>
ELFState<ELFT>::ELFState(ELFYAML::Object &D, yaml::ErrorHandler EH)
: Doc(D), ErrHandler(EH) {
+ std::vector<ELFYAML::Section *> Sections = Doc.getSections();
StringSet<> DocSections;
- for (std::unique_ptr<ELFYAML::Section> &D : Doc.Sections) {
- if (!D->Name.empty())
- DocSections.insert(D->Name);
-
- // Some sections wants to link to .symtab by default.
- // That means we want to create the symbol table for them.
- if (D->Type == llvm::ELF::SHT_REL || D->Type == llvm::ELF::SHT_RELA)
- if (!Doc.Symbols && D->Link.empty())
- Doc.Symbols.emplace();
- }
+ for (const ELFYAML::Section *Sec : Sections)
+ if (!Sec->Name.empty())
+ DocSections.insert(Sec->Name);
// Insert SHT_NULL section implicitly when it is not defined in YAML.
- if (Doc.Sections.empty() || Doc.Sections.front()->Type != ELF::SHT_NULL)
- Doc.Sections.insert(
- Doc.Sections.begin(),
+ if (Sections.empty() || Sections.front()->Type != ELF::SHT_NULL)
+ Doc.Chunks.insert(
+ Doc.Chunks.begin(),
std::make_unique<ELFYAML::Section>(
- ELFYAML::Section::SectionKind::RawContent, /*IsImplicit=*/true));
+ ELFYAML::Chunk::ChunkKind::RawContent, /*IsImplicit=*/true));
std::vector<StringRef> ImplicitSections;
if (Doc.Symbols)
ImplicitSections.push_back(".symtab");
ImplicitSections.insert(ImplicitSections.end(), {".strtab", ".shstrtab"});
- if (!Doc.DynamicSymbols.empty())
+ if (Doc.DynamicSymbols)
ImplicitSections.insert(ImplicitSections.end(), {".dynsym", ".dynstr"});
// Insert placeholders for implicit sections that are not
@@ -232,10 +261,10 @@ ELFState<ELFT>::ELFState(ELFYAML::Object &D, yaml::ErrorHandler EH)
if (DocSections.count(SecName))
continue;
- std::unique_ptr<ELFYAML::Section> Sec = std::make_unique<ELFYAML::Section>(
- ELFYAML::Section::SectionKind::RawContent, true /*IsImplicit*/);
+ std::unique_ptr<ELFYAML::Chunk> Sec = std::make_unique<ELFYAML::Section>(
+ ELFYAML::Chunk::ChunkKind::RawContent, true /*IsImplicit*/);
Sec->Name = SecName;
- Doc.Sections.push_back(std::move(Sec));
+ Doc.Chunks.push_back(std::move(Sec));
}
}
@@ -273,7 +302,7 @@ void ELFState<ELFT>::writeELFHeader(ContiguousBlobAccumulator &CBA, raw_ostream
Header.e_shoff =
Doc.Header.SHOff ? typename ELFT::uint(*Doc.Header.SHOff) : SHOff;
Header.e_shnum =
- Doc.Header.SHNum ? (uint16_t)*Doc.Header.SHNum : Doc.Sections.size();
+ Doc.Header.SHNum ? (uint16_t)*Doc.Header.SHNum : Doc.getSections().size();
Header.e_shstrndx = Doc.Header.SHStrNdx ? (uint16_t)*Doc.Header.SHStrNdx
: SN2I.get(".shstrtab");
@@ -325,6 +354,20 @@ unsigned ELFState<ELFT>::toSymbolIndex(StringRef S, StringRef LocSec,
}
template <class ELFT>
+static void overrideFields(ELFYAML::Section *From, typename ELFT::Shdr &To) {
+ if (!From)
+ return;
+ if (From->ShFlags)
+ To.sh_flags = *From->ShFlags;
+ if (From->ShName)
+ To.sh_name = *From->ShName;
+ if (From->ShOffset)
+ To.sh_offset = *From->ShOffset;
+ if (From->ShSize)
+ To.sh_size = *From->ShSize;
+}
+
+template <class ELFT>
bool ELFState<ELFT>::initImplicitHeader(ContiguousBlobAccumulator &CBA,
Elf_Shdr &Header, StringRef SecName,
ELFYAML::Section *YAMLSec) {
@@ -345,16 +388,8 @@ bool ELFState<ELFT>::initImplicitHeader(ContiguousBlobAccumulator &CBA,
else
return false;
- // Override the fields if requested.
- if (YAMLSec) {
- if (YAMLSec->ShName)
- Header.sh_name = *YAMLSec->ShName;
- if (YAMLSec->ShOffset)
- Header.sh_offset = *YAMLSec->ShOffset;
- if (YAMLSec->ShSize)
- Header.sh_size = *YAMLSec->ShSize;
- }
-
+ // Override section fields if requested.
+ overrideFields<ELFT>(YAMLSec, Header);
return true;
}
@@ -370,18 +405,25 @@ void ELFState<ELFT>::initSectionHeaders(std::vector<Elf_Shdr> &SHeaders,
ContiguousBlobAccumulator &CBA) {
// Ensure SHN_UNDEF entry is present. An all-zero section header is a
// valid SHN_UNDEF entry since SHT_NULL == 0.
- SHeaders.resize(Doc.Sections.size());
+ SHeaders.resize(Doc.getSections().size());
+
+ size_t SecNdx = -1;
+ for (const std::unique_ptr<ELFYAML::Chunk> &D : Doc.Chunks) {
+ if (auto S = dyn_cast<ELFYAML::Fill>(D.get())) {
+ writeFill(*S, CBA);
+ continue;
+ }
- for (size_t I = 0; I < Doc.Sections.size(); ++I) {
- ELFYAML::Section *Sec = Doc.Sections[I].get();
- if (I == 0 && Sec->IsImplicit)
+ ++SecNdx;
+ ELFYAML::Section *Sec = cast<ELFYAML::Section>(D.get());
+ if (SecNdx == 0 && Sec->IsImplicit)
continue;
// We have a few sections like string or symbol tables that are usually
// added implicitly to the end. However, if they are explicitly specified
// in the YAML, we need to write them here. This ensures the file offset
// remains correct.
- Elf_Shdr &SHeader = SHeaders[I];
+ Elf_Shdr &SHeader = SHeaders[SecNdx];
if (initImplicitHeader(CBA, SHeader, Sec->Name,
Sec->IsImplicit ? nullptr : Sec))
continue;
@@ -400,7 +442,7 @@ void ELFState<ELFT>::initSectionHeaders(std::vector<Elf_Shdr> &SHeaders,
if (!Sec->Link.empty())
SHeader.sh_link = toSectionIndex(Sec->Link, Sec->Name);
- if (I == 0) {
+ if (SecNdx == 0) {
if (auto RawSec = dyn_cast<ELFYAML::RawContentSection>(Sec)) {
// We do not write any content for special SHN_UNDEF section.
if (RawSec->Size)
@@ -416,6 +458,8 @@ void ELFState<ELFT>::initSectionHeaders(std::vector<Elf_Shdr> &SHeaders,
writeSectionContent(SHeader, *S, CBA);
} else if (auto S = dyn_cast<ELFYAML::RelocationSection>(Sec)) {
writeSectionContent(SHeader, *S, CBA);
+ } else if (auto S = dyn_cast<ELFYAML::RelrSection>(Sec)) {
+ writeSectionContent(SHeader, *S, CBA);
} else if (auto S = dyn_cast<ELFYAML::Group>(Sec)) {
writeSectionContent(SHeader, *S, CBA);
} else if (auto S = dyn_cast<ELFYAML::MipsABIFlags>(Sec)) {
@@ -440,19 +484,20 @@ void ELFState<ELFT>::initSectionHeaders(std::vector<Elf_Shdr> &SHeaders,
writeSectionContent(SHeader, *S, CBA);
} else if (auto S = dyn_cast<ELFYAML::AddrsigSection>(Sec)) {
writeSectionContent(SHeader, *S, CBA);
+ } else if (auto S = dyn_cast<ELFYAML::LinkerOptionsSection>(Sec)) {
+ writeSectionContent(SHeader, *S, CBA);
+ } else if (auto S = dyn_cast<ELFYAML::NoteSection>(Sec)) {
+ writeSectionContent(SHeader, *S, CBA);
+ } else if (auto S = dyn_cast<ELFYAML::GnuHashSection>(Sec)) {
+ writeSectionContent(SHeader, *S, CBA);
+ } else if (auto S = dyn_cast<ELFYAML::DependentLibrariesSection>(Sec)) {
+ writeSectionContent(SHeader, *S, CBA);
} else {
llvm_unreachable("Unknown section type");
}
- // Override the fields if requested.
- if (Sec) {
- if (Sec->ShName)
- SHeader.sh_name = *Sec->ShName;
- if (Sec->ShOffset)
- SHeader.sh_offset = *Sec->ShOffset;
- if (Sec->ShSize)
- SHeader.sh_size = *Sec->ShSize;
- }
+ // Override section fields if requested.
+ overrideFields<ELFT>(Sec, SHeader);
}
}
@@ -487,7 +532,7 @@ ELFState<ELFT>::toELFSymbols(ArrayRef<ELFYAML::Symbol> Symbols,
Ret.resize(Symbols.size() + 1);
size_t I = 0;
- for (const auto &Sym : Symbols) {
+ for (const ELFYAML::Symbol &Sym : Symbols) {
Elf_Sym &Symbol = Ret[++I];
// If NameIndex, which contains the name offset, is explicitly specified, we
@@ -522,21 +567,24 @@ void ELFState<ELFT>::initSymtabSectionHeader(Elf_Shdr &SHeader,
ArrayRef<ELFYAML::Symbol> Symbols;
if (IsStatic && Doc.Symbols)
Symbols = *Doc.Symbols;
- else if (!IsStatic)
- Symbols = Doc.DynamicSymbols;
+ else if (!IsStatic && Doc.DynamicSymbols)
+ Symbols = *Doc.DynamicSymbols;
ELFYAML::RawContentSection *RawSec =
dyn_cast_or_null<ELFYAML::RawContentSection>(YAMLSec);
- if (RawSec && !Symbols.empty() && (RawSec->Content || RawSec->Size)) {
- if (RawSec->Content)
- reportError("cannot specify both `Content` and " +
- (IsStatic ? Twine("`Symbols`") : Twine("`DynamicSymbols`")) +
- " for symbol table section '" + RawSec->Name + "'");
- if (RawSec->Size)
- reportError("cannot specify both `Size` and " +
- (IsStatic ? Twine("`Symbols`") : Twine("`DynamicSymbols`")) +
- " for symbol table section '" + RawSec->Name + "'");
- return;
+ if (RawSec && (RawSec->Content || RawSec->Size)) {
+ bool HasSymbolsDescription =
+ (IsStatic && Doc.Symbols) || (!IsStatic && Doc.DynamicSymbols);
+ if (HasSymbolsDescription) {
+ StringRef Property = (IsStatic ? "`Symbols`" : "`DynamicSymbols`");
+ if (RawSec->Content)
+ reportError("cannot specify both `Content` and " + Property +
+ " for symbol table section '" + RawSec->Name + "'");
+ if (RawSec->Size)
+ reportError("cannot specify both `Size` and " + Property +
+ " for symbol table section '" + RawSec->Name + "'");
+ return;
+ }
}
zero(SHeader);
@@ -636,22 +684,43 @@ template <class ELFT> void ELFState<ELFT>::reportError(const Twine &Msg) {
}
template <class ELFT>
+std::vector<Fragment>
+ELFState<ELFT>::getPhdrFragments(const ELFYAML::ProgramHeader &Phdr,
+ ArrayRef<typename ELFT::Shdr> SHeaders) {
+ DenseMap<StringRef, ELFYAML::Fill *> NameToFill;
+ for (const std::unique_ptr<ELFYAML::Chunk> &D : Doc.Chunks)
+ if (auto S = dyn_cast<ELFYAML::Fill>(D.get()))
+ NameToFill[S->Name] = S;
+
+ std::vector<Fragment> Ret;
+ for (const ELFYAML::SectionName &SecName : Phdr.Sections) {
+ unsigned Index;
+ if (SN2I.lookup(SecName.Section, Index)) {
+ const typename ELFT::Shdr &H = SHeaders[Index];
+ Ret.push_back({H.sh_offset, H.sh_size, H.sh_type, H.sh_addralign});
+ continue;
+ }
+
+ if (ELFYAML::Fill *Fill = NameToFill.lookup(SecName.Section)) {
+ Ret.push_back({Fill->ShOffset, Fill->Size, llvm::ELF::SHT_PROGBITS,
+ /*ShAddrAlign=*/1});
+ continue;
+ }
+
+ reportError("unknown section or fill referenced: '" + SecName.Section +
+ "' by program header");
+ }
+
+ return Ret;
+}
+
+template <class ELFT>
void ELFState<ELFT>::setProgramHeaderLayout(std::vector<Elf_Phdr> &PHeaders,
std::vector<Elf_Shdr> &SHeaders) {
uint32_t PhdrIdx = 0;
for (auto &YamlPhdr : Doc.ProgramHeaders) {
Elf_Phdr &PHeader = PHeaders[PhdrIdx++];
-
- std::vector<Elf_Shdr *> Sections;
- for (const ELFYAML::SectionName &SecName : YamlPhdr.Sections) {
- unsigned Index;
- if (!SN2I.lookup(SecName.Section, Index)) {
- reportError("unknown section referenced: '" + SecName.Section +
- "' by program header");
- continue;
- }
- Sections.push_back(&SHeaders[Index]);
- }
+ std::vector<Fragment> Fragments = getPhdrFragments(YamlPhdr, SHeaders);
if (YamlPhdr.Offset) {
PHeader.p_offset = *YamlPhdr.Offset;
@@ -662,19 +731,19 @@ void ELFState<ELFT>::setProgramHeaderLayout(std::vector<Elf_Phdr> &PHeaders,
PHeader.p_offset = 0;
// Find the minimum offset for the program header.
- for (Elf_Shdr *SHeader : Sections)
- PHeader.p_offset = std::min(PHeader.p_offset, SHeader->sh_offset);
+ for (const Fragment &F : Fragments)
+ PHeader.p_offset = std::min((uint64_t)PHeader.p_offset, F.Offset);
}
// Find the maximum offset of the end of a section in order to set p_filesz
// and p_memsz. When setting p_filesz, trailing SHT_NOBITS sections are not
// counted.
uint64_t FileOffset = PHeader.p_offset, MemOffset = PHeader.p_offset;
- for (Elf_Shdr *SHeader : Sections) {
- uint64_t End = SHeader->sh_offset + SHeader->sh_size;
+ for (const Fragment &F : Fragments) {
+ uint64_t End = F.Offset + F.Size;
MemOffset = std::max(MemOffset, End);
- if (SHeader->sh_type != llvm::ELF::SHT_NOBITS)
+ if (F.Type != llvm::ELF::SHT_NOBITS)
FileOffset = std::max(FileOffset, End);
}
@@ -691,8 +760,8 @@ void ELFState<ELFT>::setProgramHeaderLayout(std::vector<Elf_Phdr> &PHeaders,
// sections so that by default the segment has a valid and sensible
// alignment.
PHeader.p_align = 1;
- for (Elf_Shdr *SHeader : Sections)
- PHeader.p_align = std::max(PHeader.p_align, SHeader->sh_addralign);
+ for (const Fragment &F : Fragments)
+ PHeader.p_align = std::max((uint64_t)PHeader.p_align, F.AddrAlign);
}
}
}
@@ -707,10 +776,6 @@ void ELFState<ELFT>::writeSectionContent(
if (Section.EntSize)
SHeader.sh_entsize = *Section.EntSize;
- else if (Section.Type == llvm::ELF::SHT_RELR)
- SHeader.sh_entsize = sizeof(Elf_Relr);
- else
- SHeader.sh_entsize = 0;
if (Section.Info)
SHeader.sh_info = *Section.Info;
@@ -735,8 +800,9 @@ void ELFState<ELFT>::writeSectionContent(
SHeader.sh_size = SHeader.sh_entsize * Section.Relocations.size();
// For relocation section set link to .symtab by default.
- if (Section.Link.empty())
- SHeader.sh_link = SN2I.get(".symtab");
+ unsigned Link = 0;
+ if (Section.Link.empty() && SN2I.lookup(".symtab", Link))
+ SHeader.sh_link = Link;
if (!Section.RelocatableSec.empty())
SHeader.sh_info = toSectionIndex(Section.RelocatableSec, Section.Name);
@@ -764,6 +830,33 @@ void ELFState<ELFT>::writeSectionContent(
}
template <class ELFT>
+void ELFState<ELFT>::writeSectionContent(Elf_Shdr &SHeader,
+ const ELFYAML::RelrSection &Section,
+ ContiguousBlobAccumulator &CBA) {
+ raw_ostream &OS =
+ CBA.getOSAndAlignedOffset(SHeader.sh_offset, SHeader.sh_addralign);
+ SHeader.sh_entsize =
+ Section.EntSize ? uint64_t(*Section.EntSize) : sizeof(Elf_Relr);
+
+ if (Section.Content) {
+ SHeader.sh_size = writeContent(OS, Section.Content, None);
+ return;
+ }
+
+ if (!Section.Entries)
+ return;
+
+ for (llvm::yaml::Hex64 E : *Section.Entries) {
+ if (!ELFT::Is64Bits && E > UINT32_MAX)
+ reportError(Section.Name + ": the value is too large for 32-bits: 0x" +
+ Twine::utohexstr(E));
+ support::endian::write<uintX_t>(OS, E, ELFT::TargetEndianness);
+ }
+
+ SHeader.sh_size = sizeof(uintX_t) * Section.Entries->size();
+}
+
+template <class ELFT>
void ELFState<ELFT>::writeSectionContent(
Elf_Shdr &SHeader, const ELFYAML::SymtabShndxSection &Shndx,
ContiguousBlobAccumulator &CBA) {
@@ -784,10 +877,16 @@ void ELFState<ELFT>::writeSectionContent(Elf_Shdr &SHeader,
assert(Section.Type == llvm::ELF::SHT_GROUP &&
"Section type is not SHT_GROUP");
+ unsigned Link = 0;
+ if (Section.Link.empty() && SN2I.lookup(".symtab", Link))
+ SHeader.sh_link = Link;
+
SHeader.sh_entsize = 4;
SHeader.sh_size = SHeader.sh_entsize * Section.Members.size();
- SHeader.sh_info =
- toSymbolIndex(Section.Signature, Section.Name, /*IsDynamic=*/false);
+
+ if (Section.Signature)
+ SHeader.sh_info =
+ toSymbolIndex(*Section.Signature, Section.Name, /*IsDynamic=*/false);
raw_ostream &OS =
CBA.getOSAndAlignedOffset(SHeader.sh_offset, SHeader.sh_addralign);
@@ -819,7 +918,6 @@ template <class ELFT>
void ELFState<ELFT>::writeSectionContent(
Elf_Shdr &SHeader, const ELFYAML::StackSizesSection &Section,
ContiguousBlobAccumulator &CBA) {
- using uintX_t = typename ELFT::uint;
raw_ostream &OS =
CBA.getOSAndAlignedOffset(SHeader.sh_offset, SHeader.sh_addralign);
@@ -835,6 +933,52 @@ void ELFState<ELFT>::writeSectionContent(
}
template <class ELFT>
+void ELFState<ELFT>::writeSectionContent(
+ Elf_Shdr &SHeader, const ELFYAML::LinkerOptionsSection &Section,
+ ContiguousBlobAccumulator &CBA) {
+ raw_ostream &OS =
+ CBA.getOSAndAlignedOffset(SHeader.sh_offset, SHeader.sh_addralign);
+
+ if (Section.Content) {
+ SHeader.sh_size = writeContent(OS, Section.Content, None);
+ return;
+ }
+
+ if (!Section.Options)
+ return;
+
+ for (const ELFYAML::LinkerOption &LO : *Section.Options) {
+ OS.write(LO.Key.data(), LO.Key.size());
+ OS.write('\0');
+ OS.write(LO.Value.data(), LO.Value.size());
+ OS.write('\0');
+ SHeader.sh_size += (LO.Key.size() + LO.Value.size() + 2);
+ }
+}
+
+template <class ELFT>
+void ELFState<ELFT>::writeSectionContent(
+ Elf_Shdr &SHeader, const ELFYAML::DependentLibrariesSection &Section,
+ ContiguousBlobAccumulator &CBA) {
+ raw_ostream &OS =
+ CBA.getOSAndAlignedOffset(SHeader.sh_offset, SHeader.sh_addralign);
+
+ if (Section.Content) {
+ SHeader.sh_size = writeContent(OS, Section.Content, None);
+ return;
+ }
+
+ if (!Section.Libs)
+ return;
+
+ for (StringRef Lib : *Section.Libs) {
+ OS.write(Lib.data(), Lib.size());
+ OS.write('\0');
+ SHeader.sh_size += Lib.size() + 1;
+ }
+}
+
+template <class ELFT>
void ELFState<ELFT>::writeSectionContent(Elf_Shdr &SHeader,
const ELFYAML::HashSection &Section,
ContiguousBlobAccumulator &CBA) {
@@ -871,9 +1015,19 @@ void ELFState<ELFT>::writeSectionContent(Elf_Shdr &SHeader,
raw_ostream &OS =
CBA.getOSAndAlignedOffset(SHeader.sh_offset, SHeader.sh_addralign);
+ SHeader.sh_info = Section.Info;
+
+ if (Section.Content) {
+ SHeader.sh_size = writeContent(OS, Section.Content, None);
+ return;
+ }
+
+ if (!Section.Entries)
+ return;
+
uint64_t AuxCnt = 0;
- for (size_t I = 0; I < Section.Entries.size(); ++I) {
- const ELFYAML::VerdefEntry &E = Section.Entries[I];
+ for (size_t I = 0; I < Section.Entries->size(); ++I) {
+ const ELFYAML::VerdefEntry &E = (*Section.Entries)[I];
Elf_Verdef VerDef;
VerDef.vd_version = E.Version;
@@ -882,7 +1036,7 @@ void ELFState<ELFT>::writeSectionContent(Elf_Shdr &SHeader,
VerDef.vd_hash = E.Hash;
VerDef.vd_aux = sizeof(Elf_Verdef);
VerDef.vd_cnt = E.VerNames.size();
- if (I == Section.Entries.size() - 1)
+ if (I == Section.Entries->size() - 1)
VerDef.vd_next = 0;
else
VerDef.vd_next =
@@ -900,9 +1054,8 @@ void ELFState<ELFT>::writeSectionContent(Elf_Shdr &SHeader,
}
}
- SHeader.sh_size = Section.Entries.size() * sizeof(Elf_Verdef) +
+ SHeader.sh_size = Section.Entries->size() * sizeof(Elf_Verdef) +
AuxCnt * sizeof(Elf_Verdaux);
- SHeader.sh_info = Section.Info;
}
template <class ELFT>
@@ -913,15 +1066,24 @@ void ELFState<ELFT>::writeSectionContent(Elf_Shdr &SHeader,
typedef typename ELFT::Vernaux Elf_Vernaux;
auto &OS = CBA.getOSAndAlignedOffset(SHeader.sh_offset, SHeader.sh_addralign);
+ SHeader.sh_info = Section.Info;
+
+ if (Section.Content) {
+ SHeader.sh_size = writeContent(OS, Section.Content, None);
+ return;
+ }
+
+ if (!Section.VerneedV)
+ return;
uint64_t AuxCnt = 0;
- for (size_t I = 0; I < Section.VerneedV.size(); ++I) {
- const ELFYAML::VerneedEntry &VE = Section.VerneedV[I];
+ for (size_t I = 0; I < Section.VerneedV->size(); ++I) {
+ const ELFYAML::VerneedEntry &VE = (*Section.VerneedV)[I];
Elf_Verneed VerNeed;
VerNeed.vn_version = VE.Version;
VerNeed.vn_file = DotDynstr.getOffset(VE.File);
- if (I == Section.VerneedV.size() - 1)
+ if (I == Section.VerneedV->size() - 1)
VerNeed.vn_next = 0;
else
VerNeed.vn_next =
@@ -946,9 +1108,8 @@ void ELFState<ELFT>::writeSectionContent(Elf_Shdr &SHeader,
}
}
- SHeader.sh_size = Section.VerneedV.size() * sizeof(Elf_Verneed) +
+ SHeader.sh_size = Section.VerneedV->size() * sizeof(Elf_Verneed) +
AuxCnt * sizeof(Elf_Vernaux);
- SHeader.sh_info = Section.Info;
}
template <class ELFT>
@@ -982,8 +1143,6 @@ template <class ELFT>
void ELFState<ELFT>::writeSectionContent(Elf_Shdr &SHeader,
const ELFYAML::DynamicSection &Section,
ContiguousBlobAccumulator &CBA) {
- typedef typename ELFT::uint uintX_t;
-
assert(Section.Type == llvm::ELF::SHT_DYNAMIC &&
"Section type is not SHT_DYNAMIC");
@@ -1035,16 +1194,158 @@ void ELFState<ELFT>::writeSectionContent(Elf_Shdr &SHeader,
}
}
+template <class ELFT>
+void ELFState<ELFT>::writeSectionContent(Elf_Shdr &SHeader,
+ const ELFYAML::NoteSection &Section,
+ ContiguousBlobAccumulator &CBA) {
+ raw_ostream &OS =
+ CBA.getOSAndAlignedOffset(SHeader.sh_offset, SHeader.sh_addralign);
+ uint64_t Offset = OS.tell();
+
+ if (Section.Content || Section.Size) {
+ SHeader.sh_size = writeContent(OS, Section.Content, Section.Size);
+ return;
+ }
+
+ for (const ELFYAML::NoteEntry &NE : *Section.Notes) {
+ // Write name size.
+ if (NE.Name.empty())
+ support::endian::write<uint32_t>(OS, 0, ELFT::TargetEndianness);
+ else
+ support::endian::write<uint32_t>(OS, NE.Name.size() + 1,
+ ELFT::TargetEndianness);
+
+ // Write description size.
+ if (NE.Desc.binary_size() == 0)
+ support::endian::write<uint32_t>(OS, 0, ELFT::TargetEndianness);
+ else
+ support::endian::write<uint32_t>(OS, NE.Desc.binary_size(),
+ ELFT::TargetEndianness);
+
+ // Write type.
+ support::endian::write<uint32_t>(OS, NE.Type, ELFT::TargetEndianness);
+
+ // Write name, null terminator and padding.
+ if (!NE.Name.empty()) {
+ support::endian::write<uint8_t>(OS, arrayRefFromStringRef(NE.Name),
+ ELFT::TargetEndianness);
+ support::endian::write<uint8_t>(OS, 0, ELFT::TargetEndianness);
+ CBA.padToAlignment(4);
+ }
+
+ // Write description and padding.
+ if (NE.Desc.binary_size() != 0) {
+ NE.Desc.writeAsBinary(OS);
+ CBA.padToAlignment(4);
+ }
+ }
+
+ SHeader.sh_size = OS.tell() - Offset;
+}
+
+template <class ELFT>
+void ELFState<ELFT>::writeSectionContent(Elf_Shdr &SHeader,
+ const ELFYAML::GnuHashSection &Section,
+ ContiguousBlobAccumulator &CBA) {
+ raw_ostream &OS =
+ CBA.getOSAndAlignedOffset(SHeader.sh_offset, SHeader.sh_addralign);
+
+ unsigned Link = 0;
+ if (Section.Link.empty() && SN2I.lookup(".dynsym", Link))
+ SHeader.sh_link = Link;
+
+ if (Section.Content) {
+ SHeader.sh_size = writeContent(OS, Section.Content, None);
+ return;
+ }
+
+ // We write the header first, starting with the hash buckets count. Normally
+ // it is the number of entries in HashBuckets, but the "NBuckets" property can
+ // be used to override this field, which is useful for producing broken
+ // objects.
+ if (Section.Header->NBuckets)
+ support::endian::write<uint32_t>(OS, *Section.Header->NBuckets,
+ ELFT::TargetEndianness);
+ else
+ support::endian::write<uint32_t>(OS, Section.HashBuckets->size(),
+ ELFT::TargetEndianness);
+
+ // Write the index of the first symbol in the dynamic symbol table accessible
+ // via the hash table.
+ support::endian::write<uint32_t>(OS, Section.Header->SymNdx,
+ ELFT::TargetEndianness);
+
+ // Write the number of words in the Bloom filter. As above, the "MaskWords"
+ // property can be used to set this field to any value.
+ if (Section.Header->MaskWords)
+ support::endian::write<uint32_t>(OS, *Section.Header->MaskWords,
+ ELFT::TargetEndianness);
+ else
+ support::endian::write<uint32_t>(OS, Section.BloomFilter->size(),
+ ELFT::TargetEndianness);
+
+ // Write the shift constant used by the Bloom filter.
+ support::endian::write<uint32_t>(OS, Section.Header->Shift2,
+ ELFT::TargetEndianness);
+
+ // We've finished writing the header. Now write the Bloom filter.
+ for (llvm::yaml::Hex64 Val : *Section.BloomFilter)
+ support::endian::write<typename ELFT::uint>(OS, Val,
+ ELFT::TargetEndianness);
+
+ // Write an array of hash buckets.
+ for (llvm::yaml::Hex32 Val : *Section.HashBuckets)
+ support::endian::write<uint32_t>(OS, Val, ELFT::TargetEndianness);
+
+ // Write an array of hash values.
+ for (llvm::yaml::Hex32 Val : *Section.HashValues)
+ support::endian::write<uint32_t>(OS, Val, ELFT::TargetEndianness);
+
+ SHeader.sh_size = 16 /*Header size*/ +
+ Section.BloomFilter->size() * sizeof(typename ELFT::uint) +
+ Section.HashBuckets->size() * 4 +
+ Section.HashValues->size() * 4;
+}
+
+template <class ELFT>
+void ELFState<ELFT>::writeFill(ELFYAML::Fill &Fill,
+ ContiguousBlobAccumulator &CBA) {
+ raw_ostream &OS = CBA.getOSAndAlignedOffset(Fill.ShOffset, /*Align=*/1);
+
+ size_t PatternSize = Fill.Pattern ? Fill.Pattern->binary_size() : 0;
+ if (!PatternSize) {
+ OS.write_zeros(Fill.Size);
+ return;
+ }
+
+ // Fill the content with the specified pattern.
+ uint64_t Written = 0;
+ for (; Written + PatternSize <= Fill.Size; Written += PatternSize)
+ Fill.Pattern->writeAsBinary(OS);
+ Fill.Pattern->writeAsBinary(OS, Fill.Size - Written);
+}
+
template <class ELFT> void ELFState<ELFT>::buildSectionIndex() {
- for (unsigned I = 0, E = Doc.Sections.size(); I != E; ++I) {
- StringRef Name = Doc.Sections[I]->Name;
- if (Name.empty())
+ size_t SecNdx = -1;
+ StringSet<> Seen;
+ for (size_t I = 0; I < Doc.Chunks.size(); ++I) {
+ const std::unique_ptr<ELFYAML::Chunk> &C = Doc.Chunks[I];
+ bool IsSection = isa<ELFYAML::Section>(C.get());
+ if (IsSection)
+ ++SecNdx;
+
+ if (C->Name.empty())
+ continue;
+
+ if (!Seen.insert(C->Name).second)
+ reportError("repeated section/fill name: '" + C->Name +
+ "' at YAML section/fill number " + Twine(I));
+ if (!IsSection || HasError)
continue;
- DotShStrtab.add(ELFYAML::dropUniqueSuffix(Name));
- if (!SN2I.addName(Name, I))
- reportError("repeated section name: '" + Name +
- "' at YAML section number " + Twine(I));
+ if (!SN2I.addName(C->Name, SecNdx))
+ llvm_unreachable("buildSectionIndex() failed");
+ DotShStrtab.add(ELFYAML::dropUniqueSuffix(C->Name));
}
DotShStrtab.finalize();
@@ -1061,7 +1362,8 @@ template <class ELFT> void ELFState<ELFT>::buildSymbolIndexes() {
if (Doc.Symbols)
Build(*Doc.Symbols, SymN2I);
- Build(Doc.DynamicSymbols, DynSymN2I);
+ if (Doc.DynamicSymbols)
+ Build(*Doc.DynamicSymbols, DynSymN2I);
}
template <class ELFT> void ELFState<ELFT>::finalizeStrings() {
@@ -1072,22 +1374,26 @@ template <class ELFT> void ELFState<ELFT>::finalizeStrings() {
DotStrtab.finalize();
// Add the dynamic symbol names to .dynstr section.
- for (const ELFYAML::Symbol &Sym : Doc.DynamicSymbols)
- DotDynstr.add(ELFYAML::dropUniqueSuffix(Sym.Name));
+ if (Doc.DynamicSymbols)
+ for (const ELFYAML::Symbol &Sym : *Doc.DynamicSymbols)
+ DotDynstr.add(ELFYAML::dropUniqueSuffix(Sym.Name));
// SHT_GNU_verdef and SHT_GNU_verneed sections might also
// add strings to .dynstr section.
- for (const std::unique_ptr<ELFYAML::Section> &Sec : Doc.Sections) {
- if (auto VerNeed = dyn_cast<ELFYAML::VerneedSection>(Sec.get())) {
- for (const ELFYAML::VerneedEntry &VE : VerNeed->VerneedV) {
- DotDynstr.add(VE.File);
- for (const ELFYAML::VernauxEntry &Aux : VE.AuxV)
- DotDynstr.add(Aux.Name);
+ for (const ELFYAML::Chunk *Sec : Doc.getSections()) {
+ if (auto VerNeed = dyn_cast<ELFYAML::VerneedSection>(Sec)) {
+ if (VerNeed->VerneedV) {
+ for (const ELFYAML::VerneedEntry &VE : *VerNeed->VerneedV) {
+ DotDynstr.add(VE.File);
+ for (const ELFYAML::VernauxEntry &Aux : VE.AuxV)
+ DotDynstr.add(Aux.Name);
+ }
}
- } else if (auto VerDef = dyn_cast<ELFYAML::VerdefSection>(Sec.get())) {
- for (const ELFYAML::VerdefEntry &E : VerDef->Entries)
- for (StringRef Name : E.VerNames)
- DotDynstr.add(Name);
+ } else if (auto VerDef = dyn_cast<ELFYAML::VerdefSection>(Sec)) {
+ if (VerDef->Entries)
+ for (const ELFYAML::VerdefEntry &E : *VerDef->Entries)
+ for (StringRef Name : E.VerNames)
+ DotDynstr.add(Name);
}
}
@@ -1105,6 +1411,9 @@ bool ELFState<ELFT>::writeELF(raw_ostream &OS, ELFYAML::Object &Doc,
State.finalizeStrings();
State.buildSectionIndex();
+ if (State.HasError)
+ return false;
+
State.buildSymbolIndexes();
std::vector<Elf_Phdr> PHeaders;
@@ -1119,7 +1428,7 @@ bool ELFState<ELFT>::writeELF(raw_ostream &OS, ELFYAML::Object &Doc,
std::vector<Elf_Shdr> SHeaders;
State.initSectionHeaders(SHeaders, CBA);
- // Now we can decide segment offsets
+ // Now we can decide segment offsets.
State.setProgramHeaderLayout(PHeaders, SHeaders);
if (State.HasError)
diff --git a/contrib/llvm-project/llvm/lib/ObjectYAML/ELFYAML.cpp b/contrib/llvm-project/llvm/lib/ObjectYAML/ELFYAML.cpp
index 29585abe6e80..efa7ecb4728b 100644
--- a/contrib/llvm-project/llvm/lib/ObjectYAML/ELFYAML.cpp
+++ b/contrib/llvm-project/llvm/lib/ObjectYAML/ELFYAML.cpp
@@ -24,7 +24,7 @@
namespace llvm {
-ELFYAML::Section::~Section() = default;
+ELFYAML::Chunk::~Chunk() = default;
namespace yaml {
@@ -54,6 +54,7 @@ void ScalarEnumerationTraits<ELFYAML::ELF_PT>::enumeration(
ECase(PT_GNU_EH_FRAME);
ECase(PT_GNU_STACK);
ECase(PT_GNU_RELRO);
+ ECase(PT_GNU_PROPERTY);
#undef ECase
IO.enumFallback<Hex32>(Value);
}
@@ -252,6 +253,7 @@ void ScalarEnumerationTraits<ELFYAML::ELF_ELFOSABI>::enumeration(
ECase(ELFOSABI_HPUX);
ECase(ELFOSABI_NETBSD);
ECase(ELFOSABI_GNU);
+ ECase(ELFOSABI_LINUX);
ECase(ELFOSABI_HURD);
ECase(ELFOSABI_SOLARIS);
ECase(ELFOSABI_AIX);
@@ -273,6 +275,7 @@ void ScalarEnumerationTraits<ELFYAML::ELF_ELFOSABI>::enumeration(
ECase(ELFOSABI_C6000_LINUX);
ECase(ELFOSABI_STANDALONE);
#undef ECase
+ IO.enumFallback<Hex8>(Value);
}
void ScalarBitSetTraits<ELFYAML::ELF_EF>::bitset(IO &IO,
@@ -863,6 +866,24 @@ template <> struct SequenceElementTraits<StOtherPiece> {
static const bool flow = true;
};
+template <> struct ScalarTraits<ELFYAML::YAMLFlowString> {
+ static void output(const ELFYAML::YAMLFlowString &Val, void *,
+ raw_ostream &Out) {
+ Out << Val;
+ }
+ static StringRef input(StringRef Scalar, void *,
+ ELFYAML::YAMLFlowString &Val) {
+ Val = Scalar;
+ return {};
+ }
+ static QuotingType mustQuote(StringRef S) {
+ return ScalarTraits<StringRef>::mustQuote(S);
+ }
+};
+template <> struct SequenceElementTraits<ELFYAML::YAMLFlowString> {
+ static const bool flow = true;
+};
+
namespace {
struct NormalizedOther {
@@ -998,10 +1019,12 @@ static void commonSectionMapping(IO &IO, ELFYAML::Section &Section) {
// are producing YAML, because yaml2obj sets appropriate values for them
// automatically when they are not explicitly defined.
assert(!IO.outputting() ||
- (!Section.ShOffset.hasValue() && !Section.ShSize.hasValue()));
+ (!Section.ShOffset.hasValue() && !Section.ShSize.hasValue() &&
+ !Section.ShName.hasValue() && !Section.ShFlags.hasValue()));
IO.mapOptional("ShName", Section.ShName);
IO.mapOptional("ShOffset", Section.ShOffset);
IO.mapOptional("ShSize", Section.ShSize);
+ IO.mapOptional("ShFlags", Section.ShFlags);
}
static void sectionMapping(IO &IO, ELFYAML::DynamicSection &Section) {
@@ -1032,6 +1055,22 @@ static void sectionMapping(IO &IO, ELFYAML::HashSection &Section) {
IO.mapOptional("Size", Section.Size);
}
+static void sectionMapping(IO &IO, ELFYAML::NoteSection &Section) {
+ commonSectionMapping(IO, Section);
+ IO.mapOptional("Content", Section.Content);
+ IO.mapOptional("Size", Section.Size);
+ IO.mapOptional("Notes", Section.Notes);
+}
+
+
+static void sectionMapping(IO &IO, ELFYAML::GnuHashSection &Section) {
+ commonSectionMapping(IO, Section);
+ IO.mapOptional("Content", Section.Content);
+ IO.mapOptional("Header", Section.Header);
+ IO.mapOptional("BloomFilter", Section.BloomFilter);
+ IO.mapOptional("HashBuckets", Section.HashBuckets);
+ IO.mapOptional("HashValues", Section.HashValues);
+}
static void sectionMapping(IO &IO, ELFYAML::NoBitsSection &Section) {
commonSectionMapping(IO, Section);
IO.mapOptional("Size", Section.Size, Hex64(0));
@@ -1040,7 +1079,8 @@ static void sectionMapping(IO &IO, ELFYAML::NoBitsSection &Section) {
static void sectionMapping(IO &IO, ELFYAML::VerdefSection &Section) {
commonSectionMapping(IO, Section);
IO.mapRequired("Info", Section.Info);
- IO.mapRequired("Entries", Section.Entries);
+ IO.mapOptional("Entries", Section.Entries);
+ IO.mapOptional("Content", Section.Content);
}
static void sectionMapping(IO &IO, ELFYAML::SymverSection &Section) {
@@ -1051,7 +1091,8 @@ static void sectionMapping(IO &IO, ELFYAML::SymverSection &Section) {
static void sectionMapping(IO &IO, ELFYAML::VerneedSection &Section) {
commonSectionMapping(IO, Section);
IO.mapRequired("Info", Section.Info);
- IO.mapRequired("Dependencies", Section.VerneedV);
+ IO.mapOptional("Dependencies", Section.VerneedV);
+ IO.mapOptional("Content", Section.Content);
}
static void sectionMapping(IO &IO, ELFYAML::RelocationSection &Section) {
@@ -1060,9 +1101,15 @@ static void sectionMapping(IO &IO, ELFYAML::RelocationSection &Section) {
IO.mapOptional("Relocations", Section.Relocations);
}
+static void sectionMapping(IO &IO, ELFYAML::RelrSection &Section) {
+ commonSectionMapping(IO, Section);
+ IO.mapOptional("Entries", Section.Entries);
+ IO.mapOptional("Content", Section.Content);
+}
+
static void groupSectionMapping(IO &IO, ELFYAML::Group &Group) {
commonSectionMapping(IO, Group);
- IO.mapOptional("Info", Group.Signature, StringRef());
+ IO.mapOptional("Info", Group.Signature);
IO.mapRequired("Members", Group.Members);
}
@@ -1078,6 +1125,25 @@ static void sectionMapping(IO &IO, ELFYAML::AddrsigSection &Section) {
IO.mapOptional("Symbols", Section.Symbols);
}
+static void fillMapping(IO &IO, ELFYAML::Fill &Fill) {
+ IO.mapOptional("Name", Fill.Name, StringRef());
+ IO.mapOptional("Pattern", Fill.Pattern);
+ IO.mapRequired("Size", Fill.Size);
+}
+
+static void sectionMapping(IO &IO, ELFYAML::LinkerOptionsSection &Section) {
+ commonSectionMapping(IO, Section);
+ IO.mapOptional("Options", Section.Options);
+ IO.mapOptional("Content", Section.Content);
+}
+
+static void sectionMapping(IO &IO,
+ ELFYAML::DependentLibrariesSection &Section) {
+ commonSectionMapping(IO, Section);
+ IO.mapOptional("Libraries", Section.Libs);
+ IO.mapOptional("Content", Section.Content);
+}
+
void MappingTraits<ELFYAML::SectionOrType>::mapping(
IO &IO, ELFYAML::SectionOrType &sectionOrType) {
IO.mapRequired("SectionOrType", sectionOrType.sectionNameOrType);
@@ -1108,15 +1174,27 @@ static void sectionMapping(IO &IO, ELFYAML::MipsABIFlags &Section) {
IO.mapOptional("Flags2", Section.Flags2, Hex32(0));
}
-void MappingTraits<std::unique_ptr<ELFYAML::Section>>::mapping(
- IO &IO, std::unique_ptr<ELFYAML::Section> &Section) {
- ELFYAML::ELF_SHT sectionType;
- if (IO.outputting())
- sectionType = Section->Type;
- else
- IO.mapRequired("Type", sectionType);
+void MappingTraits<std::unique_ptr<ELFYAML::Chunk>>::mapping(
+ IO &IO, std::unique_ptr<ELFYAML::Chunk> &Section) {
+ ELFYAML::ELF_SHT Type;
+ if (IO.outputting()) {
+ Type = cast<ELFYAML::Section>(Section.get())->Type;
+ } else {
+ // When the Type string does not have a "SHT_" prefix, we know it is not a
+ // description of a regular ELF output section. Currently, we have one
+ // special type named "Fill". See comments for Fill.
+ StringRef StrType;
+ IO.mapRequired("Type", StrType);
+ if (StrType == "Fill") {
+ Section.reset(new ELFYAML::Fill());
+ fillMapping(IO, *cast<ELFYAML::Fill>(Section.get()));
+ return;
+ }
+
+ IO.mapRequired("Type", Type);
+ }
- switch (sectionType) {
+ switch (Type) {
case ELF::SHT_DYNAMIC:
if (!IO.outputting())
Section.reset(new ELFYAML::DynamicSection());
@@ -1128,6 +1206,11 @@ void MappingTraits<std::unique_ptr<ELFYAML::Section>>::mapping(
Section.reset(new ELFYAML::RelocationSection());
sectionMapping(IO, *cast<ELFYAML::RelocationSection>(Section.get()));
break;
+ case ELF::SHT_RELR:
+ if (!IO.outputting())
+ Section.reset(new ELFYAML::RelrSection());
+ sectionMapping(IO, *cast<ELFYAML::RelrSection>(Section.get()));
+ break;
case ELF::SHT_GROUP:
if (!IO.outputting())
Section.reset(new ELFYAML::Group());
@@ -1143,6 +1226,16 @@ void MappingTraits<std::unique_ptr<ELFYAML::Section>>::mapping(
Section.reset(new ELFYAML::HashSection());
sectionMapping(IO, *cast<ELFYAML::HashSection>(Section.get()));
break;
+ case ELF::SHT_NOTE:
+ if (!IO.outputting())
+ Section.reset(new ELFYAML::NoteSection());
+ sectionMapping(IO, *cast<ELFYAML::NoteSection>(Section.get()));
+ break;
+ case ELF::SHT_GNU_HASH:
+ if (!IO.outputting())
+ Section.reset(new ELFYAML::GnuHashSection());
+ sectionMapping(IO, *cast<ELFYAML::GnuHashSection>(Section.get()));
+ break;
case ELF::SHT_MIPS_ABIFLAGS:
if (!IO.outputting())
Section.reset(new ELFYAML::MipsABIFlags());
@@ -1173,6 +1266,17 @@ void MappingTraits<std::unique_ptr<ELFYAML::Section>>::mapping(
Section.reset(new ELFYAML::AddrsigSection());
sectionMapping(IO, *cast<ELFYAML::AddrsigSection>(Section.get()));
break;
+ case ELF::SHT_LLVM_LINKER_OPTIONS:
+ if (!IO.outputting())
+ Section.reset(new ELFYAML::LinkerOptionsSection());
+ sectionMapping(IO, *cast<ELFYAML::LinkerOptionsSection>(Section.get()));
+ break;
+ case ELF::SHT_LLVM_DEPENDENT_LIBRARIES:
+ if (!IO.outputting())
+ Section.reset(new ELFYAML::DependentLibrariesSection());
+ sectionMapping(IO,
+ *cast<ELFYAML::DependentLibrariesSection>(Section.get()));
+ break;
default:
if (!IO.outputting()) {
StringRef Name;
@@ -1192,17 +1296,18 @@ void MappingTraits<std::unique_ptr<ELFYAML::Section>>::mapping(
}
}
-StringRef MappingTraits<std::unique_ptr<ELFYAML::Section>>::validate(
- IO &io, std::unique_ptr<ELFYAML::Section> &Section) {
- if (const auto *RawSection =
- dyn_cast<ELFYAML::RawContentSection>(Section.get())) {
+StringRef MappingTraits<std::unique_ptr<ELFYAML::Chunk>>::validate(
+ IO &io, std::unique_ptr<ELFYAML::Chunk> &C) {
+ if (const auto *RawSection = dyn_cast<ELFYAML::RawContentSection>(C.get())) {
if (RawSection->Size && RawSection->Content &&
(uint64_t)(*RawSection->Size) < RawSection->Content->binary_size())
return "Section size must be greater than or equal to the content size";
+ if (RawSection->Flags && RawSection->ShFlags)
+ return "ShFlags and Flags cannot be used together";
return {};
}
- if (const auto *SS = dyn_cast<ELFYAML::StackSizesSection>(Section.get())) {
+ if (const auto *SS = dyn_cast<ELFYAML::StackSizesSection>(C.get())) {
if (!SS->Entries && !SS->Content && !SS->Size)
return ".stack_sizes: one of Content, Entries and Size must be specified";
@@ -1222,7 +1327,7 @@ StringRef MappingTraits<std::unique_ptr<ELFYAML::Section>>::validate(
return {};
}
- if (const auto *HS = dyn_cast<ELFYAML::HashSection>(Section.get())) {
+ if (const auto *HS = dyn_cast<ELFYAML::HashSection>(C.get())) {
if (!HS->Content && !HS->Bucket && !HS->Chain && !HS->Size)
return "one of \"Content\", \"Size\", \"Bucket\" or \"Chain\" must be "
"specified";
@@ -1245,7 +1350,7 @@ StringRef MappingTraits<std::unique_ptr<ELFYAML::Section>>::validate(
return {};
}
- if (const auto *Sec = dyn_cast<ELFYAML::AddrsigSection>(Section.get())) {
+ if (const auto *Sec = dyn_cast<ELFYAML::AddrsigSection>(C.get())) {
if (!Sec->Symbols && !Sec->Content && !Sec->Size)
return "one of \"Content\", \"Size\" or \"Symbols\" must be specified";
@@ -1270,6 +1375,89 @@ StringRef MappingTraits<std::unique_ptr<ELFYAML::Section>>::validate(
return {};
}
+ if (const auto *NS = dyn_cast<ELFYAML::NoteSection>(C.get())) {
+ if (!NS->Content && !NS->Size && !NS->Notes)
+ return "one of \"Content\", \"Size\" or \"Notes\" must be "
+ "specified";
+
+ if (!NS->Content && !NS->Size)
+ return {};
+
+ if (NS->Size && NS->Content &&
+ (uint64_t)*NS->Size < NS->Content->binary_size())
+ return "\"Size\" must be greater than or equal to the content "
+ "size";
+
+ if (NS->Notes)
+ return "\"Notes\" cannot be used with \"Content\" or \"Size\"";
+ return {};
+ }
+
+ if (const auto *Sec = dyn_cast<ELFYAML::GnuHashSection>(C.get())) {
+ if (!Sec->Content && !Sec->Header && !Sec->BloomFilter &&
+ !Sec->HashBuckets && !Sec->HashValues)
+ return "either \"Content\" or \"Header\", \"BloomFilter\", "
+ "\"HashBuckets\" and \"HashBuckets\" must be specified";
+
+ if (Sec->Header || Sec->BloomFilter || Sec->HashBuckets ||
+ Sec->HashValues) {
+ if (!Sec->Header || !Sec->BloomFilter || !Sec->HashBuckets ||
+ !Sec->HashValues)
+ return "\"Header\", \"BloomFilter\", "
+ "\"HashBuckets\" and \"HashValues\" must be used together";
+ if (Sec->Content)
+ return "\"Header\", \"BloomFilter\", "
+ "\"HashBuckets\" and \"HashValues\" can't be used together with "
+ "\"Content\"";
+ return {};
+ }
+
+ // Only Content is specified.
+ return {};
+ }
+
+ if (const auto *Sec = dyn_cast<ELFYAML::LinkerOptionsSection>(C.get())) {
+ if (Sec->Options && Sec->Content)
+ return "\"Options\" and \"Content\" can't be used together";
+ return {};
+ }
+
+ if (const auto *Sec = dyn_cast<ELFYAML::DependentLibrariesSection>(C.get())) {
+ if (Sec->Libs && Sec->Content)
+ return "SHT_LLVM_DEPENDENT_LIBRARIES: \"Libraries\" and \"Content\" "
+ "can't "
+ "be used together";
+ return {};
+ }
+
+ if (const auto *F = dyn_cast<ELFYAML::Fill>(C.get())) {
+ if (!F->Pattern)
+ return {};
+ if (F->Pattern->binary_size() != 0 && !F->Size)
+ return "\"Size\" can't be 0 when \"Pattern\" is not empty";
+ return {};
+ }
+
+ if (const auto *VD = dyn_cast<ELFYAML::VerdefSection>(C.get())) {
+ if (VD->Entries && VD->Content)
+ return "SHT_GNU_verdef: \"Entries\" and \"Content\" can't be used "
+ "together";
+ return {};
+ }
+
+ if (const auto *VD = dyn_cast<ELFYAML::VerneedSection>(C.get())) {
+ if (VD->VerneedV && VD->Content)
+ return "SHT_GNU_verneed: \"Dependencies\" and \"Content\" can't be used "
+ "together";
+ return {};
+ }
+
+ if (const auto *RS = dyn_cast<ELFYAML::RelrSection>(C.get())) {
+ if (RS->Entries && RS->Content)
+ return "\"Entries\" and \"Content\" can't be used together";
+ return {};
+ }
+
return {};
}
@@ -1305,6 +1493,15 @@ void MappingTraits<ELFYAML::StackSizeEntry>::mapping(
IO.mapRequired("Size", E.Size);
}
+void MappingTraits<ELFYAML::GnuHashHeader>::mapping(IO &IO,
+ ELFYAML::GnuHashHeader &E) {
+ assert(IO.getContext() && "The IO context is not initialized");
+ IO.mapOptional("NBuckets", E.NBuckets);
+ IO.mapRequired("SymNdx", E.SymNdx);
+ IO.mapOptional("MaskWords", E.MaskWords);
+ IO.mapRequired("Shift2", E.Shift2);
+}
+
void MappingTraits<ELFYAML::DynamicEntry>::mapping(IO &IO,
ELFYAML::DynamicEntry &Rel) {
assert(IO.getContext() && "The IO context is not initialized");
@@ -1313,6 +1510,14 @@ void MappingTraits<ELFYAML::DynamicEntry>::mapping(IO &IO,
IO.mapRequired("Value", Rel.Val);
}
+void MappingTraits<ELFYAML::NoteEntry>::mapping(IO &IO, ELFYAML::NoteEntry &N) {
+ assert(IO.getContext() && "The IO context is not initialized");
+
+ IO.mapOptional("Name", N.Name);
+ IO.mapOptional("Desc", N.Desc);
+ IO.mapRequired("Type", N.Type);
+}
+
void MappingTraits<ELFYAML::VerdefEntry>::mapping(IO &IO,
ELFYAML::VerdefEntry &E) {
assert(IO.getContext() && "The IO context is not initialized");
@@ -1371,7 +1576,7 @@ void MappingTraits<ELFYAML::Object>::mapping(IO &IO, ELFYAML::Object &Object) {
IO.mapTag("!ELF", true);
IO.mapRequired("FileHeader", Object.Header);
IO.mapOptional("ProgramHeaders", Object.ProgramHeaders);
- IO.mapOptional("Sections", Object.Sections);
+ IO.mapOptional("Sections", Object.Chunks);
IO.mapOptional("Symbols", Object.Symbols);
IO.mapOptional("DynamicSymbols", Object.DynamicSymbols);
IO.setContext(nullptr);
@@ -1383,6 +1588,13 @@ void MappingTraits<ELFYAML::AddrsigSymbol>::mapping(IO &IO, ELFYAML::AddrsigSymb
IO.mapOptional("Index", Sym.Index);
}
+void MappingTraits<ELFYAML::LinkerOption>::mapping(IO &IO,
+ ELFYAML::LinkerOption &Opt) {
+ assert(IO.getContext() && "The IO context is not initialized");
+ IO.mapRequired("Name", Opt.Key);
+ IO.mapRequired("Value", Opt.Value);
+}
+
LLVM_YAML_STRONG_TYPEDEF(uint8_t, MIPS_AFL_REG)
LLVM_YAML_STRONG_TYPEDEF(uint8_t, MIPS_ABI_FP)
LLVM_YAML_STRONG_TYPEDEF(uint32_t, MIPS_AFL_EXT)
diff --git a/contrib/llvm-project/llvm/lib/ObjectYAML/MachOEmitter.cpp b/contrib/llvm-project/llvm/lib/ObjectYAML/MachOEmitter.cpp
index b56f811ce67d..bda4aed885b4 100644
--- a/contrib/llvm-project/llvm/lib/ObjectYAML/MachOEmitter.cpp
+++ b/contrib/llvm-project/llvm/lib/ObjectYAML/MachOEmitter.cpp
@@ -494,7 +494,8 @@ void UniversalWriter::writeMachO(raw_ostream &OS) {
writeFatArchs(OS);
auto &FatFile = *ObjectFile.FatMachO;
- assert(FatFile.FatArchs.size() == FatFile.Slices.size());
+ assert(FatFile.FatArchs.size() >= FatFile.Slices.size() &&
+ "Cannot write Slices if not decribed in FatArches");
for (size_t i = 0; i < FatFile.Slices.size(); i++) {
ZeroToOffset(OS, FatFile.FatArchs[i].offset);
MachOWriter Writer(FatFile.Slices[i]);
diff --git a/contrib/llvm-project/llvm/lib/ObjectYAML/MinidumpYAML.cpp b/contrib/llvm-project/llvm/lib/ObjectYAML/MinidumpYAML.cpp
index 21b2a4d78629..77ea42c41a4c 100644
--- a/contrib/llvm-project/llvm/lib/ObjectYAML/MinidumpYAML.cpp
+++ b/contrib/llvm-project/llvm/lib/ObjectYAML/MinidumpYAML.cpp
@@ -337,6 +337,7 @@ static void streamMapping(yaml::IO &IO, SystemInfoStream &Stream) {
break;
case ProcessorArchitecture::ARM:
case ProcessorArchitecture::ARM64:
+ case ProcessorArchitecture::BP_ARM64:
IO.mapOptional("CPU", Info.CPU.Arm);
break;
default:
diff --git a/contrib/llvm-project/llvm/lib/ObjectYAML/YAML.cpp b/contrib/llvm-project/llvm/lib/ObjectYAML/YAML.cpp
index 6eba16e36c2a..5dcb113d3395 100644
--- a/contrib/llvm-project/llvm/lib/ObjectYAML/YAML.cpp
+++ b/contrib/llvm-project/llvm/lib/ObjectYAML/YAML.cpp
@@ -37,15 +37,17 @@ StringRef yaml::ScalarTraits<yaml::BinaryRef>::input(StringRef Scalar, void *,
return {};
}
-void yaml::BinaryRef::writeAsBinary(raw_ostream &OS) const {
+void yaml::BinaryRef::writeAsBinary(raw_ostream &OS, uint64_t N) const {
if (!DataIsHexString) {
- OS.write((const char *)Data.data(), Data.size());
+ OS.write((const char *)Data.data(), std::min<uint64_t>(N, Data.size()));
return;
}
- for (unsigned I = 0, N = Data.size(); I != N; I += 2) {
- uint8_t Byte = llvm::hexDigitValue(Data[I]);
+
+ for (uint64_t I = 0, E = std::min<uint64_t>(N, Data.size() / 2); I != E;
+ ++I) {
+ uint8_t Byte = llvm::hexDigitValue(Data[I * 2]);
Byte <<= 4;
- Byte |= llvm::hexDigitValue(Data[I + 1]);
+ Byte |= llvm::hexDigitValue(Data[I * 2 + 1]);
OS.write(Byte);
}
}
diff --git a/contrib/llvm-project/llvm/lib/Passes/PassBuilder.cpp b/contrib/llvm-project/llvm/lib/Passes/PassBuilder.cpp
index 1aaccb510f8c..53b7db8689c4 100644
--- a/contrib/llvm-project/llvm/lib/Passes/PassBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/Passes/PassBuilder.cpp
@@ -61,6 +61,7 @@
#include "llvm/IR/PassManager.h"
#include "llvm/IR/SafepointIRVerifier.h"
#include "llvm/IR/Verifier.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/Regex.h"
@@ -85,6 +86,7 @@
#include "llvm/Transforms/IPO/Inliner.h"
#include "llvm/Transforms/IPO/Internalize.h"
#include "llvm/Transforms/IPO/LowerTypeTests.h"
+#include "llvm/Transforms/IPO/MergeFunctions.h"
#include "llvm/Transforms/IPO/PartialInlining.h"
#include "llvm/Transforms/IPO/SCCP.h"
#include "llvm/Transforms/IPO/SampleProfile.h"
@@ -145,6 +147,7 @@
#include "llvm/Transforms/Scalar/LowerConstantIntrinsics.h"
#include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h"
#include "llvm/Transforms/Scalar/LowerGuardIntrinsic.h"
+#include "llvm/Transforms/Scalar/LowerMatrixIntrinsics.h"
#include "llvm/Transforms/Scalar/LowerWidenableCondition.h"
#include "llvm/Transforms/Scalar/MakeGuardsExplicit.h"
#include "llvm/Transforms/Scalar/MemCpyOptimizer.h"
@@ -169,6 +172,7 @@
#include "llvm/Transforms/Utils/BreakCriticalEdges.h"
#include "llvm/Transforms/Utils/CanonicalizeAliases.h"
#include "llvm/Transforms/Utils/EntryExitInstrumenter.h"
+#include "llvm/Transforms/Utils/InjectTLIMappings.h"
#include "llvm/Transforms/Utils/LCSSA.h"
#include "llvm/Transforms/Utils/LibCallsShrinkWrap.h"
#include "llvm/Transforms/Utils/LoopSimplify.h"
@@ -189,6 +193,11 @@ static cl::opt<bool>
cl::Hidden, cl::ZeroOrMore,
cl::desc("Run Partial inlinining pass"));
+static cl::opt<int> PreInlineThreshold(
+ "npm-preinline-threshold", cl::Hidden, cl::init(75), cl::ZeroOrMore,
+ cl::desc("Control the amount of inlining in pre-instrumentation inliner "
+ "(default = 75)"));
+
static cl::opt<bool>
RunNewGVN("enable-npm-newgvn", cl::init(false),
cl::Hidden, cl::ZeroOrMore,
@@ -398,21 +407,25 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
// Hoisting of scalars and load expressions.
- if (EnableGVNHoist)
- FPM.addPass(GVNHoistPass());
-
- // Global value numbering based sinking.
- if (EnableGVNSink) {
- FPM.addPass(GVNSinkPass());
- FPM.addPass(SimplifyCFGPass());
+ if (Level > O1) {
+ if (EnableGVNHoist)
+ FPM.addPass(GVNHoistPass());
+
+ // Global value numbering based sinking.
+ if (EnableGVNSink) {
+ FPM.addPass(GVNSinkPass());
+ FPM.addPass(SimplifyCFGPass());
+ }
}
// Speculative execution if the target has divergent branches; otherwise nop.
- FPM.addPass(SpeculativeExecutionPass());
+ if (Level > O1) {
+ FPM.addPass(SpeculativeExecutionPass());
- // Optimize based on known information about branches, and cleanup afterward.
- FPM.addPass(JumpThreadingPass());
- FPM.addPass(CorrelatedValuePropagationPass());
+ // Optimize based on known information about branches, and cleanup afterward.
+ FPM.addPass(JumpThreadingPass());
+ FPM.addPass(CorrelatedValuePropagationPass());
+ }
FPM.addPass(SimplifyCFGPass());
if (Level == O3)
FPM.addPass(AggressiveInstCombinePass());
@@ -426,10 +439,12 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
// For PGO use pipeline, try to optimize memory intrinsics such as memcpy
// using the size value profile. Don't perform this when optimizing for size.
if (PGOOpt && PGOOpt->Action == PGOOptions::IRUse &&
- !isOptimizingForSize(Level))
+ !isOptimizingForSize(Level) && Level > O1)
FPM.addPass(PGOMemOPSizeOpt());
- FPM.addPass(TailCallElimPass());
+ // TODO: Investigate the cost/benefit of tail call elimination on debugging.
+ if (Level > O1)
+ FPM.addPass(TailCallElimPass());
FPM.addPass(SimplifyCFGPass());
// Form canonically associated expression trees, and simplify the trees using
@@ -456,6 +471,7 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
// Rotate Loop - disable header duplication at -Oz
LPM1.addPass(LoopRotatePass(Level != Oz));
+ // TODO: Investigate promotion cap for O1.
LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));
LPM1.addPass(SimpleLoopUnswitchPass());
LPM2.addPass(IndVarSimplifyPass());
@@ -490,6 +506,9 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
FPM.addPass(createFunctionToLoopPassAdaptor(
std::move(LPM2), /*UseMemorySSA=*/false, DebugLogging));
+ // Delete small array after loop unroll.
+ FPM.addPass(SROA());
+
// Eliminate redundancies.
if (Level != O1) {
// These passes add substantial compile time so skip them at O1.
@@ -520,18 +539,21 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
// Re-consider control flow based optimizations after redundancy elimination,
// redo DCE, etc.
- FPM.addPass(JumpThreadingPass());
- FPM.addPass(CorrelatedValuePropagationPass());
- FPM.addPass(DSEPass());
- FPM.addPass(createFunctionToLoopPassAdaptor(
- LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap),
- EnableMSSALoopDependency, DebugLogging));
+ if (Level > O1) {
+ FPM.addPass(JumpThreadingPass());
+ FPM.addPass(CorrelatedValuePropagationPass());
+ FPM.addPass(DSEPass());
+ FPM.addPass(createFunctionToLoopPassAdaptor(
+ LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap),
+ EnableMSSALoopDependency, DebugLogging));
+ }
for (auto &C : ScalarOptimizerLateEPCallbacks)
C(FPM, Level);
// Finally, do an expensive DCE pass to catch all the dead code exposed by
// the simplifications and basic cleanup after all the simplifications.
+ // TODO: Investigate if this is too expensive.
FPM.addPass(ADCEPass());
FPM.addPass(SimplifyCFGPass());
FPM.addPass(InstCombinePass());
@@ -559,8 +581,7 @@ void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM, bool DebugLogging,
if (!isOptimizingForSize(Level) && !IsCS) {
InlineParams IP;
- // In the old pass manager, this is a cl::opt. Should still this be one?
- IP.DefaultThreshold = 75;
+ IP.DefaultThreshold = PreInlineThreshold;
// FIXME: The hint threshold has the same value used by the regular inliner.
// This should probably be lowered after performance testing.
@@ -954,8 +975,7 @@ ModulePassManager PassBuilder::buildModuleOptimizationPipeline(
// across the loop nests.
// We do UnrollAndJam in a separate LPM to ensure it happens before unroll
if (EnableUnrollAndJam && PTO.LoopUnrolling) {
- OptimizePM.addPass(
- createFunctionToLoopPassAdaptor(LoopUnrollAndJamPass(Level)));
+ OptimizePM.addPass(LoopUnrollAndJamPass(Level));
}
OptimizePM.addPass(LoopUnrollPass(
LoopUnrollOptions(Level, /*OnlyWhenForced=*/!PTO.LoopUnrolling,
@@ -1445,7 +1465,7 @@ auto parsePassParameters(ParametersParseCallableT &&Parser, StringRef Name,
Expected<ParametersT> Result = Parser(Params);
assert((Result || Result.template errorIsA<StringError>()) &&
"Pass parameter parser can only return StringErrors.");
- return std::move(Result);
+ return Result;
}
/// Parser of parameters for LoopUnroll pass.
@@ -1887,6 +1907,12 @@ Error PassBuilder::parseModulePass(ModulePassManager &MPM,
return Error::success();
}
+ // This is consistent with old pass manager invoked via opt, but
+ // inconsistent with clang. Clang doesn't enable loop vectorization
+ // but does enable slp vectorization at Oz.
+ PTO.LoopVectorization = L > O1 && L < Oz;
+ PTO.SLPVectorization = L > O1 && L < Oz;
+
if (Matches[1] == "default") {
MPM.addPass(buildPerModuleDefaultPipeline(L, DebugLogging));
} else if (Matches[1] == "thinlto-pre-link") {
diff --git a/contrib/llvm-project/llvm/lib/Passes/PassRegistry.def b/contrib/llvm-project/llvm/lib/Passes/PassRegistry.def
index 1fa274d172b1..355dd6f96812 100644
--- a/contrib/llvm-project/llvm/lib/Passes/PassRegistry.def
+++ b/contrib/llvm-project/llvm/lib/Passes/PassRegistry.def
@@ -64,6 +64,7 @@ MODULE_PASS("internalize", InternalizePass())
MODULE_PASS("invalidate<all>", InvalidateAllAnalysesPass())
MODULE_PASS("ipsccp", IPSCCPPass())
MODULE_PASS("lowertypetests", LowerTypeTestsPass(nullptr, nullptr))
+MODULE_PASS("mergefunc", MergeFunctionsPass())
MODULE_PASS("name-anon-globals", NameAnonGlobalPass())
MODULE_PASS("no-op-module", NoOpModulePass())
MODULE_PASS("partial-inliner", PartialInlinerPass())
@@ -184,10 +185,12 @@ FUNCTION_PASS("invalidate<all>", InvalidateAllAnalysesPass())
FUNCTION_PASS("float2int", Float2IntPass())
FUNCTION_PASS("no-op-function", NoOpFunctionPass())
FUNCTION_PASS("libcalls-shrinkwrap", LibCallsShrinkWrapPass())
+FUNCTION_PASS("inject-tli-mappings", InjectTLIMappings())
FUNCTION_PASS("loweratomic", LowerAtomicPass())
FUNCTION_PASS("lower-expect", LowerExpectIntrinsicPass())
FUNCTION_PASS("lower-guard-intrinsic", LowerGuardIntrinsicPass())
FUNCTION_PASS("lower-constant-intrinsics", LowerConstantIntrinsicsPass())
+FUNCTION_PASS("lower-matrix-intrinsics", LowerMatrixIntrinsicsPass())
FUNCTION_PASS("lower-widenable-condition", LowerWidenableConditionPass())
FUNCTION_PASS("guard-widening", GuardWideningPass())
FUNCTION_PASS("gvn", GVN())
@@ -233,12 +236,14 @@ FUNCTION_PASS("spec-phis", SpeculateAroundPHIsPass())
FUNCTION_PASS("sroa", SROA())
FUNCTION_PASS("tailcallelim", TailCallElimPass())
FUNCTION_PASS("unreachableblockelim", UnreachableBlockElimPass())
+FUNCTION_PASS("unroll-and-jam", LoopUnrollAndJamPass())
FUNCTION_PASS("verify", VerifierPass())
FUNCTION_PASS("verify<domtree>", DominatorTreeVerifierPass())
FUNCTION_PASS("verify<loops>", LoopVerifierPass())
FUNCTION_PASS("verify<memoryssa>", MemorySSAVerifierPass())
FUNCTION_PASS("verify<regions>", RegionInfoVerifierPass())
FUNCTION_PASS("verify<safepoint-ir>", SafepointIRVerifierPass())
+FUNCTION_PASS("verify<scalar-evolution>", ScalarEvolutionVerifierPass())
FUNCTION_PASS("view-cfg", CFGViewerPass())
FUNCTION_PASS("view-cfg-only", CFGOnlyViewerPass())
FUNCTION_PASS("transform-warning", WarnMissedTransformationsPass())
@@ -304,7 +309,6 @@ LOOP_PASS("simplify-cfg", LoopSimplifyCFGPass())
LOOP_PASS("strength-reduce", LoopStrengthReducePass())
LOOP_PASS("indvars", IndVarSimplifyPass())
LOOP_PASS("irce", IRCEPass())
-LOOP_PASS("unroll-and-jam", LoopUnrollAndJamPass())
LOOP_PASS("unroll-full", LoopFullUnrollPass())
LOOP_PASS("print-access-info", LoopAccessInfoPrinterPass(dbgs()))
LOOP_PASS("print<ddg>", DDGAnalysisPrinterPass(dbgs()))
diff --git a/contrib/llvm-project/llvm/lib/ProfileData/GCOV.cpp b/contrib/llvm-project/llvm/lib/ProfileData/GCOV.cpp
index 00e6294c57a6..228c1b3b442a 100644
--- a/contrib/llvm-project/llvm/lib/ProfileData/GCOV.cpp
+++ b/contrib/llvm-project/llvm/lib/ProfileData/GCOV.cpp
@@ -439,7 +439,7 @@ LLVM_DUMP_METHOD void GCOVBlock::dump() const { print(dbgs()); }
//===----------------------------------------------------------------------===//
// Cycles detection
//
-// The algorithm in GCC is based on the algorihtm by Hawick & James:
+// The algorithm in GCC is based on the algorithm by Hawick & James:
// "Enumerating Circuits and Loops in Graphs with Self-Arcs and Multiple-Arcs"
// http://complexity.massey.ac.nz/cstn/013/cstn-013.pdf.
diff --git a/contrib/llvm-project/llvm/lib/ProfileData/InstrProfReader.cpp b/contrib/llvm-project/llvm/lib/ProfileData/InstrProfReader.cpp
index 23d078a3ddee..b904f983dceb 100644
--- a/contrib/llvm-project/llvm/lib/ProfileData/InstrProfReader.cpp
+++ b/contrib/llvm-project/llvm/lib/ProfileData/InstrProfReader.cpp
@@ -362,7 +362,9 @@ Error RawInstrProfReader<IntPtrT>::readHeader(
CountersDelta = swap(Header.CountersDelta);
NamesDelta = swap(Header.NamesDelta);
auto DataSize = swap(Header.DataSize);
+ auto PaddingBytesBeforeCounters = swap(Header.PaddingBytesBeforeCounters);
auto CountersSize = swap(Header.CountersSize);
+ auto PaddingBytesAfterCounters = swap(Header.PaddingBytesAfterCounters);
NamesSize = swap(Header.NamesSize);
ValueKindLast = swap(Header.ValueKindLast);
@@ -370,8 +372,10 @@ Error RawInstrProfReader<IntPtrT>::readHeader(
auto PaddingSize = getNumPaddingBytes(NamesSize);
ptrdiff_t DataOffset = sizeof(RawInstrProf::Header);
- ptrdiff_t CountersOffset = DataOffset + DataSizeInBytes;
- ptrdiff_t NamesOffset = CountersOffset + sizeof(uint64_t) * CountersSize;
+ ptrdiff_t CountersOffset =
+ DataOffset + DataSizeInBytes + PaddingBytesBeforeCounters;
+ ptrdiff_t NamesOffset = CountersOffset + (sizeof(uint64_t) * CountersSize) +
+ PaddingBytesAfterCounters;
ptrdiff_t ValueDataOffset = NamesOffset + NamesSize + PaddingSize;
auto *Start = reinterpret_cast<const char *>(&Header);
diff --git a/contrib/llvm-project/llvm/lib/Remarks/BitstreamRemarkParser.cpp b/contrib/llvm-project/llvm/lib/Remarks/BitstreamRemarkParser.cpp
index 99a82e1ee3af..4c4508879114 100644
--- a/contrib/llvm-project/llvm/lib/Remarks/BitstreamRemarkParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Remarks/BitstreamRemarkParser.cpp
@@ -174,7 +174,7 @@ static Error parseBlock(T &ParserHelper, unsigned BlockID,
// Stop when there is nothing to read anymore or when we encounter an
// END_BLOCK.
while (!Stream.AtEndOfStream()) {
- Expected<BitstreamEntry> Next = Stream.advance();
+ Next = Stream.advance();
if (!Next)
return Next.takeError();
switch (Next->Kind) {
@@ -278,19 +278,20 @@ Expected<bool> BitstreamParserHelper::isRemarkBlock() {
return isBlock(Stream, META_BLOCK_ID);
}
-static Error validateMagicNumber(StringRef Magic) {
- if (Magic != remarks::ContainerMagic)
+static Error validateMagicNumber(StringRef MagicNumber) {
+ if (MagicNumber != remarks::ContainerMagic)
return createStringError(std::make_error_code(std::errc::invalid_argument),
"Unknown magic number: expecting %s, got %.4s.",
- remarks::ContainerMagic.data(), Magic.data());
+ remarks::ContainerMagic.data(), MagicNumber.data());
return Error::success();
}
static Error advanceToMetaBlock(BitstreamParserHelper &Helper) {
- Expected<std::array<char, 4>> Magic = Helper.parseMagic();
- if (!Magic)
- return Magic.takeError();
- if (Error E = validateMagicNumber(StringRef(Magic->data(), Magic->size())))
+ Expected<std::array<char, 4>> MagicNumber = Helper.parseMagic();
+ if (!MagicNumber)
+ return MagicNumber.takeError();
+ if (Error E = validateMagicNumber(
+ StringRef(MagicNumber->data(), MagicNumber->size())))
return E;
if (Error E = Helper.parseBlockInfoBlock())
return E;
@@ -309,11 +310,12 @@ remarks::createBitstreamParserFromMeta(
StringRef Buf, Optional<ParsedStringTable> StrTab,
Optional<StringRef> ExternalFilePrependPath) {
BitstreamParserHelper Helper(Buf);
- Expected<std::array<char, 4>> Magic = Helper.parseMagic();
- if (!Magic)
- return Magic.takeError();
+ Expected<std::array<char, 4>> MagicNumber = Helper.parseMagic();
+ if (!MagicNumber)
+ return MagicNumber.takeError();
- if (Error E = validateMagicNumber(StringRef(Magic->data(), Magic->size())))
+ if (Error E = validateMagicNumber(
+ StringRef(MagicNumber->data(), MagicNumber->size())))
return std::move(E);
auto Parser =
@@ -364,15 +366,15 @@ Error BitstreamRemarkParser::parseMeta() {
}
Error BitstreamRemarkParser::processCommonMeta(
- BitstreamMetaParserHelper &MetaHelper) {
- if (Optional<uint64_t> Version = MetaHelper.ContainerVersion)
+ BitstreamMetaParserHelper &Helper) {
+ if (Optional<uint64_t> Version = Helper.ContainerVersion)
ContainerVersion = *Version;
else
return createStringError(
std::make_error_code(std::errc::illegal_byte_sequence),
"Error while parsing BLOCK_META: missing container version.");
- if (Optional<uint8_t> Type = MetaHelper.ContainerType) {
+ if (Optional<uint8_t> Type = Helper.ContainerType) {
// Always >= BitstreamRemarkContainerType::First since it's unsigned.
if (*Type > static_cast<uint8_t>(BitstreamRemarkContainerType::Last))
return createStringError(
@@ -426,8 +428,13 @@ Error BitstreamRemarkParser::processExternalFilePath(
MemoryBuffer::getFile(FullPath);
if (std::error_code EC = BufferOrErr.getError())
return createFileError(FullPath, EC);
+
TmpRemarkBuffer = std::move(*BufferOrErr);
+ // Don't try to parse the file if it's empty.
+ if (TmpRemarkBuffer->getBufferSize() == 0)
+ return make_error<EndOfFileError>();
+
// Create a separate parser used for parsing the separate file.
ParserHelper = BitstreamParserHelper(TmpRemarkBuffer->getBuffer());
// Advance and check until we can parse the meta block.
diff --git a/contrib/llvm-project/llvm/lib/Remarks/BitstreamRemarkParser.h b/contrib/llvm-project/llvm/lib/Remarks/BitstreamRemarkParser.h
index 7c9cc2f1e7db..749219fc5155 100644
--- a/contrib/llvm-project/llvm/lib/Remarks/BitstreamRemarkParser.h
+++ b/contrib/llvm-project/llvm/lib/Remarks/BitstreamRemarkParser.h
@@ -34,15 +34,16 @@ struct BitstreamRemarkParser : public RemarkParser {
std::unique_ptr<MemoryBuffer> TmpRemarkBuffer;
/// The common metadata used to decide how to parse the buffer.
/// This is filled when parsing the metadata block.
- uint64_t ContainerVersion;
- uint64_t RemarkVersion;
- BitstreamRemarkContainerType ContainerType;
+ uint64_t ContainerVersion = 0;
+ uint64_t RemarkVersion = 0;
+ BitstreamRemarkContainerType ContainerType =
+ BitstreamRemarkContainerType::Standalone;
/// Wether the parser is ready to parse remarks.
bool ReadyToParseRemarks = false;
/// Create a parser that expects to find a string table embedded in the
/// stream.
- BitstreamRemarkParser(StringRef Buf)
+ explicit BitstreamRemarkParser(StringRef Buf)
: RemarkParser(Format::Bitstream), ParserHelper(Buf) {}
/// Create a parser that uses a pre-parsed string table.
diff --git a/contrib/llvm-project/llvm/lib/Remarks/RemarkFormat.cpp b/contrib/llvm-project/llvm/lib/Remarks/RemarkFormat.cpp
index f2d0331ec6a8..5006421a3c63 100644
--- a/contrib/llvm-project/llvm/lib/Remarks/RemarkFormat.cpp
+++ b/contrib/llvm-project/llvm/lib/Remarks/RemarkFormat.cpp
@@ -12,6 +12,7 @@
#include "llvm/Remarks/RemarkFormat.h"
#include "llvm/ADT/StringSwitch.h"
+#include "llvm/Remarks/BitstreamRemarkContainer.h"
using namespace llvm;
using namespace llvm::remarks;
@@ -30,3 +31,17 @@ Expected<Format> llvm::remarks::parseFormat(StringRef FormatStr) {
return Result;
}
+
+Expected<Format> llvm::remarks::magicToFormat(StringRef MagicStr) {
+ auto Result =
+ StringSwitch<Format>(MagicStr)
+ .StartsWith("--- ", Format::YAML) // This is only an assumption.
+ .StartsWith(remarks::Magic, Format::YAMLStrTab)
+ .StartsWith(remarks::ContainerMagic, Format::Bitstream)
+ .Default(Format::Unknown);
+
+ if (Result == Format::Unknown)
+ return createStringError(std::make_error_code(std::errc::invalid_argument),
+ "Unknown remark magic: '%s'", MagicStr.data());
+ return Result;
+}
diff --git a/contrib/llvm-project/llvm/lib/Remarks/RemarkLinker.cpp b/contrib/llvm-project/llvm/lib/Remarks/RemarkLinker.cpp
new file mode 100644
index 000000000000..617ce770af66
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Remarks/RemarkLinker.cpp
@@ -0,0 +1,126 @@
+//===- RemarkLinker.cpp ---------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides an implementation of the remark linker.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Remarks/RemarkLinker.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Remarks/BitstreamRemarkContainer.h"
+#include "llvm/Remarks/RemarkParser.h"
+#include "llvm/Remarks/RemarkSerializer.h"
+#include "llvm/Support/Error.h"
+
+using namespace llvm;
+using namespace llvm::remarks;
+
+static Expected<StringRef>
+getRemarksSectionName(const object::ObjectFile &Obj) {
+ if (Obj.isMachO())
+ return StringRef("__remarks");
+ // ELF -> .remarks, but there is no ELF support at this point.
+ return createStringError(std::errc::illegal_byte_sequence,
+ "Unsupported file format.");
+}
+
+Expected<Optional<StringRef>>
+llvm::remarks::getRemarksSectionContents(const object::ObjectFile &Obj) {
+ Expected<StringRef> SectionName = getRemarksSectionName(Obj);
+ if (!SectionName)
+ return SectionName.takeError();
+
+ for (const object::SectionRef &Section : Obj.sections()) {
+ Expected<StringRef> MaybeName = Section.getName();
+ if (!MaybeName)
+ return MaybeName.takeError();
+ if (*MaybeName != *SectionName)
+ continue;
+
+ if (Expected<StringRef> Contents = Section.getContents())
+ return *Contents;
+ else
+ return Contents.takeError();
+ }
+ return Optional<StringRef>{};
+}
+
+Remark &RemarkLinker::keep(std::unique_ptr<Remark> Remark) {
+ StrTab.internalize(*Remark);
+ auto Inserted = Remarks.insert(std::move(Remark));
+ return **Inserted.first;
+}
+
+void RemarkLinker::setExternalFilePrependPath(StringRef PrependPathIn) {
+ PrependPath = PrependPathIn;
+}
+
+// Discard remarks with no source location.
+static bool shouldKeepRemark(const Remark &R) { return R.Loc.hasValue(); }
+
+Error RemarkLinker::link(StringRef Buffer, Optional<Format> RemarkFormat) {
+ if (!RemarkFormat) {
+ Expected<Format> ParserFormat = magicToFormat(Buffer);
+ if (!ParserFormat)
+ return ParserFormat.takeError();
+ RemarkFormat = *ParserFormat;
+ }
+
+ Expected<std::unique_ptr<RemarkParser>> MaybeParser =
+ createRemarkParserFromMeta(
+ *RemarkFormat, Buffer, /*StrTab=*/None,
+ PrependPath ? Optional<StringRef>(StringRef(*PrependPath))
+ : Optional<StringRef>(None));
+ if (!MaybeParser)
+ return MaybeParser.takeError();
+
+ RemarkParser &Parser = **MaybeParser;
+
+ while (true) {
+ Expected<std::unique_ptr<Remark>> Next = Parser.next();
+ if (Error E = Next.takeError()) {
+ if (E.isA<EndOfFileError>()) {
+ consumeError(std::move(E));
+ break;
+ }
+ return E;
+ }
+
+ assert(*Next != nullptr);
+
+ if (shouldKeepRemark(**Next))
+ keep(std::move(*Next));
+ }
+ return Error::success();
+}
+
+Error RemarkLinker::link(const object::ObjectFile &Obj,
+ Optional<Format> RemarkFormat) {
+ Expected<Optional<StringRef>> SectionOrErr = getRemarksSectionContents(Obj);
+ if (!SectionOrErr)
+ return SectionOrErr.takeError();
+
+ if (Optional<StringRef> Section = *SectionOrErr)
+ return link(*Section, RemarkFormat);
+ return Error::success();
+}
+
+Error RemarkLinker::serialize(raw_ostream &OS, Format RemarksFormat) const {
+ Expected<std::unique_ptr<RemarkSerializer>> MaybeSerializer =
+ createRemarkSerializer(RemarksFormat, SerializerMode::Standalone, OS,
+ std::move(const_cast<StringTable &>(StrTab)));
+ if (!MaybeSerializer)
+ return MaybeSerializer.takeError();
+
+ std::unique_ptr<remarks::RemarkSerializer> Serializer =
+ std::move(*MaybeSerializer);
+
+ for (const Remark &R : remarks())
+ Serializer->emit(R);
+ return Error::success();
+}
diff --git a/contrib/llvm-project/llvm/lib/Support/AArch64TargetParser.cpp b/contrib/llvm-project/llvm/lib/Support/AArch64TargetParser.cpp
index 6f1d6d50eee2..b5cd4af0eb3d 100644
--- a/contrib/llvm-project/llvm/lib/Support/AArch64TargetParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/AArch64TargetParser.cpp
@@ -213,3 +213,51 @@ AArch64::ArchKind AArch64::parseCPUArch(StringRef CPU) {
}
return ArchKind::INVALID;
}
+
+// Parse a branch protection specification, which has the form
+// standard | none | [bti,pac-ret[+b-key,+leaf]*]
+// Returns true on success, with individual elements of the specification
+// returned in `PBP`. Returns false in error, with `Err` containing
+// an erroneous part of the spec.
+bool AArch64::parseBranchProtection(StringRef Spec, ParsedBranchProtection &PBP,
+ StringRef &Err) {
+ PBP = {"none", "a_key", false};
+ if (Spec == "none")
+ return true; // defaults are ok
+
+ if (Spec == "standard") {
+ PBP.Scope = "non-leaf";
+ PBP.BranchTargetEnforcement = true;
+ return true;
+ }
+
+ SmallVector<StringRef, 4> Opts;
+ Spec.split(Opts, "+");
+ for (int I = 0, E = Opts.size(); I != E; ++I) {
+ StringRef Opt = Opts[I].trim();
+ if (Opt == "bti") {
+ PBP.BranchTargetEnforcement = true;
+ continue;
+ }
+ if (Opt == "pac-ret") {
+ PBP.Scope = "non-leaf";
+ for (; I + 1 != E; ++I) {
+ StringRef PACOpt = Opts[I + 1].trim();
+ if (PACOpt == "leaf")
+ PBP.Scope = "all";
+ else if (PACOpt == "b-key")
+ PBP.Key = "b_key";
+ else
+ break;
+ }
+ continue;
+ }
+ if (Opt == "")
+ Err = "<empty>";
+ else
+ Err = Opt;
+ return false;
+ }
+
+ return true;
+}
diff --git a/contrib/llvm-project/llvm/lib/Support/AMDGPUMetadata.cpp b/contrib/llvm-project/llvm/lib/Support/AMDGPUMetadata.cpp
index 5f8102299f47..4ea197a97389 100644
--- a/contrib/llvm-project/llvm/lib/Support/AMDGPUMetadata.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/AMDGPUMetadata.cpp
@@ -62,6 +62,7 @@ struct ScalarEnumerationTraits<ValueKind> {
YIO.enumCase(EN, "HiddenGlobalOffsetZ", ValueKind::HiddenGlobalOffsetZ);
YIO.enumCase(EN, "HiddenNone", ValueKind::HiddenNone);
YIO.enumCase(EN, "HiddenPrintfBuffer", ValueKind::HiddenPrintfBuffer);
+ YIO.enumCase(EN, "HiddenHostcallBuffer", ValueKind::HiddenHostcallBuffer);
YIO.enumCase(EN, "HiddenDefaultQueue", ValueKind::HiddenDefaultQueue);
YIO.enumCase(EN, "HiddenCompletionAction",
ValueKind::HiddenCompletionAction);
diff --git a/contrib/llvm-project/llvm/lib/Support/APFloat.cpp b/contrib/llvm-project/llvm/lib/Support/APFloat.cpp
index b79baf1834a7..050c37baefb8 100644
--- a/contrib/llvm-project/llvm/lib/Support/APFloat.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/APFloat.cpp
@@ -20,7 +20,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Error.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include <cstring>
@@ -208,6 +208,10 @@ namespace llvm {
/* A bunch of private, handy routines. */
+static inline Error createError(const Twine &Err) {
+ return make_error<StringError>(Err, inconvertibleErrorCode());
+}
+
static inline unsigned int
partCountForBits(unsigned int bits)
{
@@ -226,9 +230,8 @@ decDigitValue(unsigned int c)
If the exponent overflows, returns a large exponent with the
appropriate sign. */
-static int
-readExponent(StringRef::iterator begin, StringRef::iterator end)
-{
+static Expected<int> readExponent(StringRef::iterator begin,
+ StringRef::iterator end) {
bool isNegative;
unsigned int absExponent;
const unsigned int overlargeExponent = 24000; /* FIXME. */
@@ -242,29 +245,28 @@ readExponent(StringRef::iterator begin, StringRef::iterator end)
isNegative = (*p == '-');
if (*p == '-' || *p == '+') {
p++;
- assert(p != end && "Exponent has no digits");
+ if (p == end)
+ return createError("Exponent has no digits");
}
absExponent = decDigitValue(*p++);
- assert(absExponent < 10U && "Invalid character in exponent");
+ if (absExponent >= 10U)
+ return createError("Invalid character in exponent");
for (; p != end; ++p) {
unsigned int value;
value = decDigitValue(*p);
- assert(value < 10U && "Invalid character in exponent");
+ if (value >= 10U)
+ return createError("Invalid character in exponent");
- value += absExponent * 10;
+ absExponent = absExponent * 10U + value;
if (absExponent >= overlargeExponent) {
absExponent = overlargeExponent;
- p = end; /* outwit assert below */
break;
}
- absExponent = value;
}
- assert(p == end && "Invalid exponent in exponent");
-
if (isNegative)
return -(int) absExponent;
else
@@ -273,20 +275,21 @@ readExponent(StringRef::iterator begin, StringRef::iterator end)
/* This is ugly and needs cleaning up, but I don't immediately see
how whilst remaining safe. */
-static int
-totalExponent(StringRef::iterator p, StringRef::iterator end,
- int exponentAdjustment)
-{
+static Expected<int> totalExponent(StringRef::iterator p,
+ StringRef::iterator end,
+ int exponentAdjustment) {
int unsignedExponent;
bool negative, overflow;
int exponent = 0;
- assert(p != end && "Exponent has no digits");
+ if (p == end)
+ return createError("Exponent has no digits");
negative = *p == '-';
if (*p == '-' || *p == '+') {
p++;
- assert(p != end && "Exponent has no digits");
+ if (p == end)
+ return createError("Exponent has no digits");
}
unsignedExponent = 0;
@@ -295,7 +298,8 @@ totalExponent(StringRef::iterator p, StringRef::iterator end,
unsigned int value;
value = decDigitValue(*p);
- assert(value < 10U && "Invalid character in exponent");
+ if (value >= 10U)
+ return createError("Invalid character in exponent");
unsignedExponent = unsignedExponent * 10 + value;
if (unsignedExponent > 32767) {
@@ -322,10 +326,9 @@ totalExponent(StringRef::iterator p, StringRef::iterator end,
return exponent;
}
-static StringRef::iterator
+static Expected<StringRef::iterator>
skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end,
- StringRef::iterator *dot)
-{
+ StringRef::iterator *dot) {
StringRef::iterator p = begin;
*dot = end;
while (p != end && *p == '0')
@@ -334,7 +337,8 @@ skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end,
if (p != end && *p == '.') {
*dot = p++;
- assert(end - begin != 1 && "Significand has no digits");
+ if (end - begin == 1)
+ return createError("Significand has no digits");
while (p != end && *p == '0')
p++;
@@ -363,12 +367,14 @@ struct decimalInfo {
int normalizedExponent;
};
-static void
-interpretDecimal(StringRef::iterator begin, StringRef::iterator end,
- decimalInfo *D)
-{
+static Error interpretDecimal(StringRef::iterator begin,
+ StringRef::iterator end, decimalInfo *D) {
StringRef::iterator dot = end;
- StringRef::iterator p = skipLeadingZeroesAndAnyDot (begin, end, &dot);
+
+ auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot);
+ if (!PtrOrErr)
+ return PtrOrErr.takeError();
+ StringRef::iterator p = *PtrOrErr;
D->firstSigDigit = p;
D->exponent = 0;
@@ -376,7 +382,8 @@ interpretDecimal(StringRef::iterator begin, StringRef::iterator end,
for (; p != end; ++p) {
if (*p == '.') {
- assert(dot == end && "String contains multiple dots");
+ if (dot != end)
+ return createError("String contains multiple dots");
dot = p++;
if (p == end)
break;
@@ -386,12 +393,18 @@ interpretDecimal(StringRef::iterator begin, StringRef::iterator end,
}
if (p != end) {
- assert((*p == 'e' || *p == 'E') && "Invalid character in significand");
- assert(p != begin && "Significand has no digits");
- assert((dot == end || p - begin != 1) && "Significand has no digits");
+ if (*p != 'e' && *p != 'E')
+ return createError("Invalid character in significand");
+ if (p == begin)
+ return createError("Significand has no digits");
+ if (dot != end && p - begin == 1)
+ return createError("Significand has no digits");
/* p points to the first non-digit in the string */
- D->exponent = readExponent(p + 1, end);
+ auto ExpOrErr = readExponent(p + 1, end);
+ if (!ExpOrErr)
+ return ExpOrErr.takeError();
+ D->exponent = *ExpOrErr;
/* Implied decimal point? */
if (dot == end)
@@ -417,15 +430,15 @@ interpretDecimal(StringRef::iterator begin, StringRef::iterator end,
}
D->lastSigDigit = p;
+ return Error::success();
}
/* Return the trailing fraction of a hexadecimal number.
DIGITVALUE is the first hex digit of the fraction, P points to
the next digit. */
-static lostFraction
+static Expected<lostFraction>
trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end,
- unsigned int digitValue)
-{
+ unsigned int digitValue) {
unsigned int hexDigit;
/* If the first trailing digit isn't 0 or 8 we can work out the
@@ -439,7 +452,8 @@ trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end,
while (p != end && (*p == '0' || *p == '.'))
p++;
- assert(p != end && "Invalid trailing hexadecimal fraction!");
+ if (p == end)
+ return createError("Invalid trailing hexadecimal fraction!");
hexDigit = hexDigitValue(*p);
@@ -978,7 +992,7 @@ IEEEFloat::integerPart IEEEFloat::subtractSignificand(const IEEEFloat &rhs,
on to the full-precision result of the multiplication. Returns the
lost fraction. */
lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs,
- const IEEEFloat *addend) {
+ IEEEFloat addend) {
unsigned int omsb; // One, not zero, based MSB.
unsigned int partsCount, newPartsCount, precision;
integerPart *lhsSignificand;
@@ -1022,7 +1036,7 @@ lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs,
// toward left by two bits, and adjust exponent accordingly.
exponent += 2;
- if (addend && addend->isNonZero()) {
+ if (addend.isNonZero()) {
// The intermediate result of the multiplication has "2 * precision"
// signicant bit; adjust the addend to be consistent with mul result.
//
@@ -1051,7 +1065,10 @@ lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs,
significand.parts = fullSignificand;
semantics = &extendedSemantics;
- IEEEFloat extendedAddend(*addend);
+ // Make a copy so we can convert it to the extended semantics.
+ // Note that we cannot convert the addend directly, as the extendedSemantics
+ // is a local variable (which we take a reference to).
+ IEEEFloat extendedAddend(addend);
status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored);
assert(status == opOK);
(void)status;
@@ -1106,6 +1123,10 @@ lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs,
return lost_fraction;
}
+lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs) {
+ return multiplySignificand(rhs, IEEEFloat(*semantics));
+}
+
/* Multiply the significands of LHS and RHS to DST. */
lostFraction IEEEFloat::divideSignificand(const IEEEFloat &rhs) {
unsigned int bit, i, partsCount;
@@ -1484,22 +1505,19 @@ lostFraction IEEEFloat::addOrSubtractSignificand(const IEEEFloat &rhs,
/* Subtraction is more subtle than one might naively expect. */
if (subtract) {
IEEEFloat temp_rhs(rhs);
- bool reverse;
- if (bits == 0) {
- reverse = compareAbsoluteValue(temp_rhs) == cmpLessThan;
+ if (bits == 0)
lost_fraction = lfExactlyZero;
- } else if (bits > 0) {
+ else if (bits > 0) {
lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
shiftSignificandLeft(1);
- reverse = false;
} else {
lost_fraction = shiftSignificandRight(-bits - 1);
temp_rhs.shiftSignificandLeft(1);
- reverse = true;
}
- if (reverse) {
+ // Should we reverse the subtraction.
+ if (compareAbsoluteValue(temp_rhs) == cmpLessThan) {
carry = temp_rhs.subtractSignificand
(*this, lost_fraction != lfExactlyZero);
copySignificand(temp_rhs);
@@ -1714,7 +1732,7 @@ IEEEFloat::opStatus IEEEFloat::multiply(const IEEEFloat &rhs,
fs = multiplySpecials(rhs);
if (isFiniteNonZero()) {
- lostFraction lost_fraction = multiplySignificand(rhs, nullptr);
+ lostFraction lost_fraction = multiplySignificand(rhs);
fs = normalize(rounding_mode, lost_fraction);
if (lost_fraction != lfExactlyZero)
fs = (opStatus) (fs | opInexact);
@@ -1815,7 +1833,7 @@ IEEEFloat::opStatus IEEEFloat::fusedMultiplyAdd(const IEEEFloat &multiplicand,
addend.isFinite()) {
lostFraction lost_fraction;
- lost_fraction = multiplySignificand(multiplicand, &addend);
+ lost_fraction = multiplySignificand(multiplicand, addend);
fs = normalize(rounding_mode, lost_fraction);
if (lost_fraction != lfExactlyZero)
fs = (opStatus) (fs | opInexact);
@@ -2302,7 +2320,7 @@ IEEEFloat::convertFromZeroExtendedInteger(const integerPart *parts,
return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
}
-IEEEFloat::opStatus
+Expected<IEEEFloat::opStatus>
IEEEFloat::convertFromHexadecimalString(StringRef s,
roundingMode rounding_mode) {
lostFraction lost_fraction = lfExactlyZero;
@@ -2320,14 +2338,18 @@ IEEEFloat::convertFromHexadecimalString(StringRef s,
StringRef::iterator begin = s.begin();
StringRef::iterator end = s.end();
StringRef::iterator dot;
- StringRef::iterator p = skipLeadingZeroesAndAnyDot(begin, end, &dot);
+ auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot);
+ if (!PtrOrErr)
+ return PtrOrErr.takeError();
+ StringRef::iterator p = *PtrOrErr;
StringRef::iterator firstSignificantDigit = p;
while (p != end) {
integerPart hex_value;
if (*p == '.') {
- assert(dot == end && "String contains multiple dots");
+ if (dot != end)
+ return createError("String contains multiple dots");
dot = p++;
continue;
}
@@ -2344,16 +2366,23 @@ IEEEFloat::convertFromHexadecimalString(StringRef s,
hex_value <<= bitPos % integerPartWidth;
significand[bitPos / integerPartWidth] |= hex_value;
} else if (!computedTrailingFraction) {
- lost_fraction = trailingHexadecimalFraction(p, end, hex_value);
+ auto FractOrErr = trailingHexadecimalFraction(p, end, hex_value);
+ if (!FractOrErr)
+ return FractOrErr.takeError();
+ lost_fraction = *FractOrErr;
computedTrailingFraction = true;
}
}
/* Hex floats require an exponent but not a hexadecimal point. */
- assert(p != end && "Hex strings require an exponent");
- assert((*p == 'p' || *p == 'P') && "Invalid character in significand");
- assert(p != begin && "Significand has no digits");
- assert((dot == end || p - begin != 1) && "Significand has no digits");
+ if (p == end)
+ return createError("Hex strings require an exponent");
+ if (*p != 'p' && *p != 'P')
+ return createError("Invalid character in significand");
+ if (p == begin)
+ return createError("Significand has no digits");
+ if (dot != end && p - begin == 1)
+ return createError("Significand has no digits");
/* Ignore the exponent if we are zero. */
if (p != firstSignificantDigit) {
@@ -2376,7 +2405,10 @@ IEEEFloat::convertFromHexadecimalString(StringRef s,
expAdjustment -= partsCount * integerPartWidth;
/* Adjust for the given exponent. */
- exponent = totalExponent(p + 1, end, expAdjustment);
+ auto ExpOrErr = totalExponent(p + 1, end, expAdjustment);
+ if (!ExpOrErr)
+ return ExpOrErr.takeError();
+ exponent = *ExpOrErr;
}
return normalize(rounding_mode, lost_fraction);
@@ -2424,7 +2456,7 @@ IEEEFloat::roundSignificandWithExponent(const integerPart *decSigParts,
if (exp >= 0) {
/* multiplySignificand leaves the precision-th bit set to 1. */
- calcLostFraction = decSig.multiplySignificand(pow5, nullptr);
+ calcLostFraction = decSig.multiplySignificand(pow5);
powHUerr = powStatus != opOK;
} else {
calcLostFraction = decSig.divideSignificand(pow5);
@@ -2467,14 +2499,15 @@ IEEEFloat::roundSignificandWithExponent(const integerPart *decSigParts,
}
}
-IEEEFloat::opStatus
+Expected<IEEEFloat::opStatus>
IEEEFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) {
decimalInfo D;
opStatus fs;
/* Scan the text. */
StringRef::iterator p = str.begin();
- interpretDecimal(p, str.end(), &D);
+ if (Error Err = interpretDecimal(p, str.end(), &D))
+ return std::move(Err);
/* Handle the quick cases. First the case of no significant digits,
i.e. zero, and then exponents that are obviously too large or too
@@ -2557,7 +2590,10 @@ IEEEFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) {
}
}
decValue = decDigitValue(*p++);
- assert(decValue < 10U && "Invalid character in significand");
+ if (decValue >= 10U) {
+ delete[] decSignificand;
+ return createError("Invalid character in significand");
+ }
multiplier *= 10;
val = val * 10 + decValue;
/* The maximum number that can be multiplied by ten with any
@@ -2608,9 +2644,10 @@ bool IEEEFloat::convertFromStringSpecials(StringRef str) {
return false;
}
-IEEEFloat::opStatus IEEEFloat::convertFromString(StringRef str,
- roundingMode rounding_mode) {
- assert(!str.empty() && "Invalid string length");
+Expected<IEEEFloat::opStatus>
+IEEEFloat::convertFromString(StringRef str, roundingMode rounding_mode) {
+ if (str.empty())
+ return createError("Invalid string length");
// Handle special cases.
if (convertFromStringSpecials(str))
@@ -2623,11 +2660,13 @@ IEEEFloat::opStatus IEEEFloat::convertFromString(StringRef str,
if (*p == '-' || *p == '+') {
p++;
slen--;
- assert(slen && "String has no digits");
+ if (!slen)
+ return createError("String has no digits");
}
if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
- assert(slen - 2 && "Invalid string");
+ if (slen == 2)
+ return createError("Invalid string");
return convertFromHexadecimalString(StringRef(p + 2, slen - 2),
rounding_mode);
}
@@ -4315,8 +4354,8 @@ APInt DoubleAPFloat::bitcastToAPInt() const {
return APInt(128, 2, Data);
}
-APFloat::opStatus DoubleAPFloat::convertFromString(StringRef S,
- roundingMode RM) {
+Expected<APFloat::opStatus> DoubleAPFloat::convertFromString(StringRef S,
+ roundingMode RM) {
assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
APFloat Tmp(semPPCDoubleDoubleLegacy);
auto Ret = Tmp.convertFromString(S, RM);
@@ -4463,7 +4502,8 @@ APFloat::Storage::Storage(IEEEFloat F, const fltSemantics &Semantics) {
llvm_unreachable("Unexpected semantics");
}
-APFloat::opStatus APFloat::convertFromString(StringRef Str, roundingMode RM) {
+Expected<APFloat::opStatus> APFloat::convertFromString(StringRef Str,
+ roundingMode RM) {
APFLOAT_DISPATCH_ON_SEMANTICS(convertFromString(Str, RM));
}
@@ -4477,7 +4517,9 @@ hash_code hash_value(const APFloat &Arg) {
APFloat::APFloat(const fltSemantics &Semantics, StringRef S)
: APFloat(Semantics) {
- convertFromString(S, rmNearestTiesToEven);
+ auto StatusOrErr = convertFromString(S, rmNearestTiesToEven);
+ assert(StatusOrErr && "Invalid floating point representation");
+ consumeError(StatusOrErr.takeError());
}
APFloat::opStatus APFloat::convert(const fltSemantics &ToSemantics,
diff --git a/contrib/llvm-project/llvm/lib/Support/APInt.cpp b/contrib/llvm-project/llvm/lib/Support/APInt.cpp
index 758fe8b4f866..9b9cd70078b3 100644
--- a/contrib/llvm-project/llvm/lib/Support/APInt.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/APInt.cpp
@@ -187,7 +187,7 @@ APInt& APInt::operator--() {
return clearUnusedBits();
}
-/// Adds the RHS APint to this APInt.
+/// Adds the RHS APInt to this APInt.
/// @returns this, after addition of RHS.
/// Addition assignment operator.
APInt& APInt::operator+=(const APInt& RHS) {
@@ -884,6 +884,31 @@ APInt APInt::trunc(unsigned width) const {
return Result;
}
+// Truncate to new width with unsigned saturation.
+APInt APInt::truncUSat(unsigned width) const {
+ assert(width < BitWidth && "Invalid APInt Truncate request");
+ assert(width && "Can't truncate to 0 bits");
+
+ // Can we just losslessly truncate it?
+ if (isIntN(width))
+ return trunc(width);
+ // If not, then just return the new limit.
+ return APInt::getMaxValue(width);
+}
+
+// Truncate to new width with signed saturation.
+APInt APInt::truncSSat(unsigned width) const {
+ assert(width < BitWidth && "Invalid APInt Truncate request");
+ assert(width && "Can't truncate to 0 bits");
+
+ // Can we just losslessly truncate it?
+ if (isSignedIntN(width))
+ return trunc(width);
+ // If not, then just return the new limits.
+ return isNegative() ? APInt::getSignedMinValue(width)
+ : APInt::getSignedMaxValue(width);
+}
+
// Sign extend to a new width.
APInt APInt::sext(unsigned Width) const {
assert(Width > BitWidth && "Invalid APInt SignExtend request");
@@ -2048,6 +2073,46 @@ APInt APInt::usub_sat(const APInt &RHS) const {
return APInt(BitWidth, 0);
}
+APInt APInt::smul_sat(const APInt &RHS) const {
+ bool Overflow;
+ APInt Res = smul_ov(RHS, Overflow);
+ if (!Overflow)
+ return Res;
+
+ // The result is negative if one and only one of inputs is negative.
+ bool ResIsNegative = isNegative() ^ RHS.isNegative();
+
+ return ResIsNegative ? APInt::getSignedMinValue(BitWidth)
+ : APInt::getSignedMaxValue(BitWidth);
+}
+
+APInt APInt::umul_sat(const APInt &RHS) const {
+ bool Overflow;
+ APInt Res = umul_ov(RHS, Overflow);
+ if (!Overflow)
+ return Res;
+
+ return APInt::getMaxValue(BitWidth);
+}
+
+APInt APInt::sshl_sat(const APInt &RHS) const {
+ bool Overflow;
+ APInt Res = sshl_ov(RHS, Overflow);
+ if (!Overflow)
+ return Res;
+
+ return isNegative() ? APInt::getSignedMinValue(BitWidth)
+ : APInt::getSignedMaxValue(BitWidth);
+}
+
+APInt APInt::ushl_sat(const APInt &RHS) const {
+ bool Overflow;
+ APInt Res = ushl_ov(RHS, Overflow);
+ if (!Overflow)
+ return Res;
+
+ return APInt::getMaxValue(BitWidth);
+}
void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) {
// Check our assumptions here
@@ -2790,7 +2855,7 @@ APInt llvm::APIntOps::RoundingSDiv(const APInt &A, const APInt &B,
return Quo;
return Quo + 1;
}
- // Currently sdiv rounds twards zero.
+ // Currently sdiv rounds towards zero.
case APInt::Rounding::TOWARD_ZERO:
return A.sdiv(B);
}
@@ -2933,7 +2998,7 @@ llvm::APIntOps::SolveQuadraticEquationWrap(APInt A, APInt B, APInt C,
APInt Q = SQ * SQ;
bool InexactSQ = Q != D;
// The calculated SQ may actually be greater than the exact (non-integer)
- // value. If that's the case, decremement SQ to get a value that is lower.
+ // value. If that's the case, decrement SQ to get a value that is lower.
if (Q.sgt(D))
SQ -= 1;
@@ -2987,6 +3052,14 @@ llvm::APIntOps::SolveQuadraticEquationWrap(APInt A, APInt B, APInt C,
return X;
}
+Optional<unsigned>
+llvm::APIntOps::GetMostSignificantDifferentBit(const APInt &A, const APInt &B) {
+ assert(A.getBitWidth() == B.getBitWidth() && "Must have the same bitwidth");
+ if (A == B)
+ return llvm::None;
+ return A.getBitWidth() - ((A ^ B).countLeadingZeros() + 1);
+}
+
/// StoreIntToMemory - Fills the StoreBytes bytes of memory starting from Dst
/// with the integer held in IntVal.
void llvm::StoreIntToMemory(const APInt &IntVal, uint8_t *Dst,
diff --git a/contrib/llvm-project/llvm/lib/Support/ARMAttributeParser.cpp b/contrib/llvm-project/llvm/lib/Support/ARMAttributeParser.cpp
index df50fff720cd..8a89f4c45fb9 100644
--- a/contrib/llvm-project/llvm/lib/Support/ARMAttributeParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/ARMAttributeParser.cpp
@@ -73,9 +73,9 @@ ARMAttributeParser::DisplayRoutines[] = {
uint64_t ARMAttributeParser::ParseInteger(const uint8_t *Data,
uint32_t &Offset) {
- unsigned Length;
- uint64_t Value = decodeULEB128(Data + Offset, &Length);
- Offset = Offset + Length;
+ unsigned DecodeLength;
+ uint64_t Value = decodeULEB128(Data + Offset, &DecodeLength);
+ Offset += DecodeLength;
return Value;
}
@@ -587,9 +587,9 @@ void ARMAttributeParser::nodefaults(AttrType Tag, const uint8_t *Data,
void ARMAttributeParser::ParseIndexList(const uint8_t *Data, uint32_t &Offset,
SmallVectorImpl<uint8_t> &IndexList) {
for (;;) {
- unsigned Length;
- uint64_t Value = decodeULEB128(Data + Offset, &Length);
- Offset = Offset + Length;
+ unsigned DecodeLength;
+ uint64_t Value = decodeULEB128(Data + Offset, &DecodeLength);
+ Offset += DecodeLength;
if (Value == 0)
break;
IndexList.push_back(Value);
@@ -599,9 +599,9 @@ void ARMAttributeParser::ParseIndexList(const uint8_t *Data, uint32_t &Offset,
void ARMAttributeParser::ParseAttributeList(const uint8_t *Data,
uint32_t &Offset, uint32_t Length) {
while (Offset < Length) {
- unsigned Length;
- uint64_t Tag = decodeULEB128(Data + Offset, &Length);
- Offset += Length;
+ unsigned DecodeLength;
+ uint64_t Tag = decodeULEB128(Data + Offset, &DecodeLength);
+ Offset += DecodeLength;
bool Handled = false;
for (unsigned AHI = 0, AHE = array_lengthof(DisplayRoutines);
diff --git a/contrib/llvm-project/llvm/lib/Support/ARMTargetParser.cpp b/contrib/llvm-project/llvm/lib/Support/ARMTargetParser.cpp
index ce5daa7fe58c..f2c22fd93c8b 100644
--- a/contrib/llvm-project/llvm/lib/Support/ARMTargetParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/ARMTargetParser.cpp
@@ -174,8 +174,6 @@ bool ARM::getFPUFeatures(unsigned FPUKind, std::vector<StringRef> &Features) {
// under FPURestriction::None, which is the only FPURestriction in
// which they would be valid (since FPURestriction::SP doesn't
// exist).
-
- {"+fpregs", "-fpregs", FPUVersion::VFPV2, FPURestriction::SP_D16},
{"+vfp2", "-vfp2", FPUVersion::VFPV2, FPURestriction::D16},
{"+vfp2sp", "-vfp2sp", FPUVersion::VFPV2, FPURestriction::SP_D16},
{"+vfp3", "-vfp3", FPUVersion::VFPV3, FPURestriction::None},
diff --git a/contrib/llvm-project/llvm/lib/Support/BinaryStreamReader.cpp b/contrib/llvm-project/llvm/lib/Support/BinaryStreamReader.cpp
index b17786593bde..a0434bdc6115 100644
--- a/contrib/llvm-project/llvm/lib/Support/BinaryStreamReader.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/BinaryStreamReader.cpp
@@ -139,10 +139,10 @@ Error BinaryStreamReader::readStreamRef(BinaryStreamRef &Ref, uint32_t Length) {
return Error::success();
}
-Error BinaryStreamReader::readSubstream(BinarySubstreamRef &Stream,
- uint32_t Size) {
- Stream.Offset = getOffset();
- return readStreamRef(Stream.StreamData, Size);
+Error BinaryStreamReader::readSubstream(BinarySubstreamRef &Ref,
+ uint32_t Length) {
+ Ref.Offset = getOffset();
+ return readStreamRef(Ref.StreamData, Length);
}
Error BinaryStreamReader::skip(uint32_t Amount) {
diff --git a/contrib/llvm-project/llvm/lib/Support/CRC.cpp b/contrib/llvm-project/llvm/lib/Support/CRC.cpp
index 7c008d3b599d..a3dba1a3aa10 100644
--- a/contrib/llvm-project/llvm/lib/Support/CRC.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/CRC.cpp
@@ -25,7 +25,7 @@
using namespace llvm;
-#if LLVM_ENABLE_ZLIB == 0 || !HAVE_ZLIB_H
+#if !LLVM_ENABLE_ZLIB
static const uint32_t CRCTable[256] = {
0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f,
diff --git a/contrib/llvm-project/llvm/lib/Support/CommandLine.cpp b/contrib/llvm-project/llvm/lib/Support/CommandLine.cpp
index 620f7ffd4c9f..cb73380ba383 100644
--- a/contrib/llvm-project/llvm/lib/Support/CommandLine.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/CommandLine.cpp
@@ -24,11 +24,13 @@
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Config/config.h"
#include "llvm/Support/ConvertUTF.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Host.h"
@@ -37,9 +39,11 @@
#include "llvm/Support/Path.h"
#include "llvm/Support/Process.h"
#include "llvm/Support/StringSaver.h"
+#include "llvm/Support/VirtualFileSystem.h"
#include "llvm/Support/raw_ostream.h"
#include <cstdlib>
#include <map>
+#include <string>
using namespace llvm;
using namespace cl;
@@ -53,6 +57,8 @@ namespace cl {
template class basic_parser<bool>;
template class basic_parser<boolOrDefault>;
template class basic_parser<int>;
+template class basic_parser<long>;
+template class basic_parser<long long>;
template class basic_parser<unsigned>;
template class basic_parser<unsigned long>;
template class basic_parser<unsigned long long>;
@@ -78,6 +84,8 @@ void basic_parser_impl::anchor() {}
void parser<bool>::anchor() {}
void parser<boolOrDefault>::anchor() {}
void parser<int>::anchor() {}
+void parser<long>::anchor() {}
+void parser<long long>::anchor() {}
void parser<unsigned>::anchor() {}
void parser<unsigned long>::anchor() {}
void parser<unsigned long long>::anchor() {}
@@ -88,21 +96,26 @@ void parser<char>::anchor() {}
//===----------------------------------------------------------------------===//
-static StringRef ArgPrefix = " -";
-static StringRef ArgPrefixLong = " --";
+const static size_t DefaultPad = 2;
+
+static StringRef ArgPrefix = "-";
+static StringRef ArgPrefixLong = "--";
static StringRef ArgHelpPrefix = " - ";
-static size_t argPlusPrefixesSize(StringRef ArgName) {
+static size_t argPlusPrefixesSize(StringRef ArgName, size_t Pad = DefaultPad) {
size_t Len = ArgName.size();
if (Len == 1)
- return Len + ArgPrefix.size() + ArgHelpPrefix.size();
- return Len + ArgPrefixLong.size() + ArgHelpPrefix.size();
+ return Len + Pad + ArgPrefix.size() + ArgHelpPrefix.size();
+ return Len + Pad + ArgPrefixLong.size() + ArgHelpPrefix.size();
}
-static StringRef argPrefix(StringRef ArgName) {
- if (ArgName.size() == 1)
- return ArgPrefix;
- return ArgPrefixLong;
+static SmallString<8> argPrefix(StringRef ArgName, size_t Pad = DefaultPad) {
+ SmallString<8> Prefix;
+ for (size_t I = 0; I < Pad; ++I) {
+ Prefix.push_back(' ');
+ }
+ Prefix.append(ArgName.size() > 1 ? ArgPrefixLong : ArgPrefix);
+ return Prefix;
}
// Option predicates...
@@ -119,13 +132,14 @@ namespace {
class PrintArg {
StringRef ArgName;
+ size_t Pad;
public:
- PrintArg(StringRef ArgName) : ArgName(ArgName) {}
- friend raw_ostream &operator<<(raw_ostream &OS, const PrintArg&);
+ PrintArg(StringRef ArgName, size_t Pad = DefaultPad) : ArgName(ArgName), Pad(Pad) {}
+ friend raw_ostream &operator<<(raw_ostream &OS, const PrintArg &);
};
raw_ostream &operator<<(raw_ostream &OS, const PrintArg& Arg) {
- OS << argPrefix(Arg.ArgName) << Arg.ArgName;
+ OS << argPrefix(Arg.ArgName, Arg.Pad) << Arg.ArgName;
return OS;
}
@@ -173,7 +187,7 @@ public:
// If we're adding this to all sub-commands, add it to the ones that have
// already been registered.
if (SC == &*AllSubCommands) {
- for (const auto &Sub : RegisteredSubCommands) {
+ for (auto *Sub : RegisteredSubCommands) {
if (SC == Sub)
continue;
addLiteralOption(Opt, Sub, Name);
@@ -229,7 +243,7 @@ public:
// If we're adding this to all sub-commands, add it to the ones that have
// already been registered.
if (SC == &*AllSubCommands) {
- for (const auto &Sub : RegisteredSubCommands) {
+ for (auto *Sub : RegisteredSubCommands) {
if (SC == Sub)
continue;
addOption(O, Sub);
@@ -304,7 +318,7 @@ public:
}
bool hasOptions() const {
- for (const auto &S : RegisteredSubCommands) {
+ for (const auto *S : RegisteredSubCommands) {
if (hasOptions(*S))
return true;
}
@@ -1037,14 +1051,16 @@ static bool hasUTF8ByteOrderMark(ArrayRef<char> S) {
return (S.size() >= 3 && S[0] == '\xef' && S[1] == '\xbb' && S[2] == '\xbf');
}
-static bool ExpandResponseFile(StringRef FName, StringSaver &Saver,
- TokenizerCallback Tokenizer,
- SmallVectorImpl<const char *> &NewArgv,
- bool MarkEOLs, bool RelativeNames) {
- ErrorOr<std::unique_ptr<MemoryBuffer>> MemBufOrErr =
- MemoryBuffer::getFile(FName);
+// FName must be an absolute path.
+static llvm::Error ExpandResponseFile(
+ StringRef FName, StringSaver &Saver, TokenizerCallback Tokenizer,
+ SmallVectorImpl<const char *> &NewArgv, bool MarkEOLs, bool RelativeNames,
+ llvm::vfs::FileSystem &FS) {
+ assert(sys::path::is_absolute(FName));
+ llvm::ErrorOr<std::unique_ptr<MemoryBuffer>> MemBufOrErr =
+ FS.getBufferForFile(FName);
if (!MemBufOrErr)
- return false;
+ return llvm::errorCodeToError(MemBufOrErr.getError());
MemoryBuffer &MemBuf = *MemBufOrErr.get();
StringRef Str(MemBuf.getBufferStart(), MemBuf.getBufferSize());
@@ -1053,7 +1069,8 @@ static bool ExpandResponseFile(StringRef FName, StringSaver &Saver,
std::string UTF8Buf;
if (hasUTF16ByteOrderMark(BufRef)) {
if (!convertUTF16ToUTF8String(BufRef, UTF8Buf))
- return false;
+ return llvm::createStringError(std::errc::illegal_byte_sequence,
+ "Could not convert UTF16 to UTF8");
Str = StringRef(UTF8Buf);
}
// If we see UTF-8 BOM sequence at the beginning of a file, we shall remove
@@ -1065,41 +1082,40 @@ static bool ExpandResponseFile(StringRef FName, StringSaver &Saver,
// Tokenize the contents into NewArgv.
Tokenizer(Str, Saver, NewArgv, MarkEOLs);
+ if (!RelativeNames)
+ return Error::success();
+ llvm::StringRef BasePath = llvm::sys::path::parent_path(FName);
// If names of nested response files should be resolved relative to including
// file, replace the included response file names with their full paths
// obtained by required resolution.
- if (RelativeNames)
- for (unsigned I = 0; I < NewArgv.size(); ++I)
- if (NewArgv[I]) {
- StringRef Arg = NewArgv[I];
- if (Arg.front() == '@') {
- StringRef FileName = Arg.drop_front();
- if (llvm::sys::path::is_relative(FileName)) {
- SmallString<128> ResponseFile;
- ResponseFile.append(1, '@');
- if (llvm::sys::path::is_relative(FName)) {
- SmallString<128> curr_dir;
- llvm::sys::fs::current_path(curr_dir);
- ResponseFile.append(curr_dir.str());
- }
- llvm::sys::path::append(
- ResponseFile, llvm::sys::path::parent_path(FName), FileName);
- NewArgv[I] = Saver.save(ResponseFile.c_str()).data();
- }
- }
- }
+ for (auto &Arg : NewArgv) {
+ // Skip non-rsp file arguments.
+ if (!Arg || Arg[0] != '@')
+ continue;
- return true;
+ StringRef FileName(Arg + 1);
+ // Skip if non-relative.
+ if (!llvm::sys::path::is_relative(FileName))
+ continue;
+
+ SmallString<128> ResponseFile;
+ ResponseFile.push_back('@');
+ ResponseFile.append(BasePath);
+ llvm::sys::path::append(ResponseFile, FileName);
+ Arg = Saver.save(ResponseFile.c_str()).data();
+ }
+ return Error::success();
}
/// Expand response files on a command line recursively using the given
/// StringSaver and tokenization strategy.
bool cl::ExpandResponseFiles(StringSaver &Saver, TokenizerCallback Tokenizer,
- SmallVectorImpl<const char *> &Argv,
- bool MarkEOLs, bool RelativeNames) {
+ SmallVectorImpl<const char *> &Argv, bool MarkEOLs,
+ bool RelativeNames, llvm::vfs::FileSystem &FS,
+ llvm::Optional<llvm::StringRef> CurrentDir) {
bool AllExpanded = true;
struct ResponseFileRecord {
- const char *File;
+ std::string File;
size_t End;
};
@@ -1133,8 +1149,31 @@ bool cl::ExpandResponseFiles(StringSaver &Saver, TokenizerCallback Tokenizer,
}
const char *FName = Arg + 1;
- auto IsEquivalent = [FName](const ResponseFileRecord &RFile) {
- return sys::fs::equivalent(RFile.File, FName);
+ // Note that CurrentDir is only used for top-level rsp files, the rest will
+ // always have an absolute path deduced from the containing file.
+ SmallString<128> CurrDir;
+ if (llvm::sys::path::is_relative(FName)) {
+ if (!CurrentDir)
+ llvm::sys::fs::current_path(CurrDir);
+ else
+ CurrDir = *CurrentDir;
+ llvm::sys::path::append(CurrDir, FName);
+ FName = CurrDir.c_str();
+ }
+ auto IsEquivalent = [FName, &FS](const ResponseFileRecord &RFile) {
+ llvm::ErrorOr<llvm::vfs::Status> LHS = FS.status(FName);
+ if (!LHS) {
+ // TODO: The error should be propagated up the stack.
+ llvm::consumeError(llvm::errorCodeToError(LHS.getError()));
+ return false;
+ }
+ llvm::ErrorOr<llvm::vfs::Status> RHS = FS.status(RFile.File);
+ if (!RHS) {
+ // TODO: The error should be propagated up the stack.
+ llvm::consumeError(llvm::errorCodeToError(RHS.getError()));
+ return false;
+ }
+ return LHS->equivalent(*RHS);
};
// Check for recursive response files.
@@ -1149,10 +1188,13 @@ bool cl::ExpandResponseFiles(StringSaver &Saver, TokenizerCallback Tokenizer,
// Replace this response file argument with the tokenization of its
// contents. Nested response files are expanded in subsequent iterations.
SmallVector<const char *, 0> ExpandedArgv;
- if (!ExpandResponseFile(FName, Saver, Tokenizer, ExpandedArgv, MarkEOLs,
- RelativeNames)) {
+ if (llvm::Error Err =
+ ExpandResponseFile(FName, Saver, Tokenizer, ExpandedArgv, MarkEOLs,
+ RelativeNames, FS)) {
// We couldn't read this file, so we leave it in the argument stream and
// move on.
+ // TODO: The error should be propagated up the stack.
+ llvm::consumeError(std::move(Err));
AllExpanded = false;
++I;
continue;
@@ -1180,9 +1222,20 @@ bool cl::ExpandResponseFiles(StringSaver &Saver, TokenizerCallback Tokenizer,
bool cl::readConfigFile(StringRef CfgFile, StringSaver &Saver,
SmallVectorImpl<const char *> &Argv) {
- if (!ExpandResponseFile(CfgFile, Saver, cl::tokenizeConfigFile, Argv,
- /*MarkEOLs*/ false, /*RelativeNames*/ true))
+ SmallString<128> AbsPath;
+ if (sys::path::is_relative(CfgFile)) {
+ llvm::sys::fs::current_path(AbsPath);
+ llvm::sys::path::append(AbsPath, CfgFile);
+ CfgFile = AbsPath.str();
+ }
+ if (llvm::Error Err =
+ ExpandResponseFile(CfgFile, Saver, cl::tokenizeConfigFile, Argv,
+ /*MarkEOLs*/ false, /*RelativeNames*/ true,
+ *llvm::vfs::getRealFileSystem())) {
+ // TODO: The error should be propagated up the stack.
+ llvm::consumeError(std::move(Err));
return false;
+ }
return ExpandResponseFiles(Saver, cl::tokenizeConfigFile, Argv,
/*MarkEOLs*/ false, /*RelativeNames*/ true);
}
@@ -1447,7 +1500,7 @@ bool CommandLineParser::ParseCommandLineOptions(int argc,
if (NearestHandler) {
// If we know a near match, report it as well.
*Errs << ProgramName << ": Did you mean '"
- << PrintArg(NearestHandlerString) << "'?\n";
+ << PrintArg(NearestHandlerString, 0) << "'?\n";
}
ErrorParsing = true;
@@ -1601,7 +1654,7 @@ bool Option::error(const Twine &Message, StringRef ArgName, raw_ostream &Errs) {
if (ArgName.empty())
Errs << HelpStr; // Be nice for positional arguments
else
- Errs << GlobalParser->ProgramName << ": for the " << PrintArg(ArgName);
+ Errs << GlobalParser->ProgramName << ": for the " << PrintArg(ArgName, 0);
Errs << " option: " << Message << "\n";
return true;
@@ -1757,6 +1810,24 @@ bool parser<int>::parse(Option &O, StringRef ArgName, StringRef Arg,
return false;
}
+// parser<long> implementation
+//
+bool parser<long>::parse(Option &O, StringRef ArgName, StringRef Arg,
+ long &Value) {
+ if (Arg.getAsInteger(0, Value))
+ return O.error("'" + Arg + "' value invalid for long argument!");
+ return false;
+}
+
+// parser<long long> implementation
+//
+bool parser<long long>::parse(Option &O, StringRef ArgName, StringRef Arg,
+ long long &Value) {
+ if (Arg.getAsInteger(0, Value))
+ return O.error("'" + Arg + "' value invalid for llong argument!");
+ return false;
+}
+
// parser<unsigned> implementation
//
bool parser<unsigned>::parse(Option &O, StringRef ArgName, StringRef Arg,
@@ -1966,6 +2037,8 @@ void generic_parser_base::printGenericOptionDiff(
PRINT_OPT_DIFF(bool)
PRINT_OPT_DIFF(boolOrDefault)
PRINT_OPT_DIFF(int)
+PRINT_OPT_DIFF(long)
+PRINT_OPT_DIFF(long long)
PRINT_OPT_DIFF(unsigned)
PRINT_OPT_DIFF(unsigned long)
PRINT_OPT_DIFF(unsigned long long)
@@ -2039,7 +2112,7 @@ static void sortOpts(StringMap<Option *> &OptMap,
static void
sortSubCommands(const SmallPtrSetImpl<SubCommand *> &SubMap,
SmallVectorImpl<std::pair<const char *, SubCommand *>> &Subs) {
- for (const auto &S : SubMap) {
+ for (auto *S : SubMap) {
if (S->getName().empty())
continue;
Subs.push_back(std::make_pair(S->getName().data(), S));
@@ -2363,6 +2436,28 @@ static VersionPrinterTy OverrideVersionPrinter = nullptr;
static std::vector<VersionPrinterTy> *ExtraVersionPrinters = nullptr;
+#if defined(__GNUC__)
+// GCC and GCC-compatible compilers define __OPTIMIZE__ when optimizations are
+// enabled.
+# if defined(__OPTIMIZE__)
+# define LLVM_IS_DEBUG_BUILD 0
+# else
+# define LLVM_IS_DEBUG_BUILD 1
+# endif
+#elif defined(_MSC_VER)
+// MSVC doesn't have a predefined macro indicating if optimizations are enabled.
+// Use _DEBUG instead. This macro actually corresponds to the choice between
+// debug and release CRTs, but it is a reasonable proxy.
+# if defined(_DEBUG)
+# define LLVM_IS_DEBUG_BUILD 1
+# else
+# define LLVM_IS_DEBUG_BUILD 0
+# endif
+#else
+// Otherwise, for an unknown compiler, assume this is an optimized build.
+# define LLVM_IS_DEBUG_BUILD 0
+#endif
+
namespace {
class VersionPrinter {
public:
@@ -2378,7 +2473,7 @@ public:
OS << " " << LLVM_VERSION_INFO;
#endif
OS << "\n ";
-#ifndef __OPTIMIZE__
+#if LLVM_IS_DEBUG_BUILD
OS << "DEBUG build";
#else
OS << "Optimized build";
diff --git a/contrib/llvm-project/llvm/lib/Support/Compression.cpp b/contrib/llvm-project/llvm/lib/Support/Compression.cpp
index 97d5ffaadf82..4165a2740cd0 100644
--- a/contrib/llvm-project/llvm/lib/Support/Compression.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/Compression.cpp
@@ -17,13 +17,13 @@
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
-#if LLVM_ENABLE_ZLIB == 1 && HAVE_ZLIB_H
+#if LLVM_ENABLE_ZLIB
#include <zlib.h>
#endif
using namespace llvm;
-#if LLVM_ENABLE_ZLIB == 1 && HAVE_LIBZ
+#if LLVM_ENABLE_ZLIB
static Error createError(StringRef Err) {
return make_error<StringError>(Err, inconvertibleErrorCode());
}
diff --git a/contrib/llvm-project/llvm/lib/Support/CrashRecoveryContext.cpp b/contrib/llvm-project/llvm/lib/Support/CrashRecoveryContext.cpp
index 9d13fce9cc52..b9031f52375c 100644
--- a/contrib/llvm-project/llvm/lib/Support/CrashRecoveryContext.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/CrashRecoveryContext.cpp
@@ -10,9 +10,17 @@
#include "llvm/Config/llvm-config.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/Signals.h"
#include "llvm/Support/ThreadLocal.h"
#include <mutex>
#include <setjmp.h>
+#ifdef _WIN32
+#include <excpt.h> // for GetExceptionInformation
+#endif
+#if LLVM_ON_UNIX
+#include <sysexits.h> // EX_IOERR
+#endif
+
using namespace llvm;
namespace {
@@ -54,7 +62,11 @@ public:
#endif
}
- void HandleCrash() {
+ // If the function ran by the CrashRecoveryContext crashes or fails, then
+ // 'RetCode' represents the returned error code, as if it was returned by a
+ // process. 'Context' represents the signal type on Unix; on Windows, it is
+ // the ExceptionContext.
+ void HandleCrash(int RetCode, uintptr_t Context) {
// Eliminate the current context entry, to avoid re-entering in case the
// cleanup code crashes.
CurrentContext->set(Next);
@@ -62,7 +74,10 @@ public:
assert(!Failed && "Crash recovery context already failed!");
Failed = true;
- // FIXME: Stash the backtrace.
+ if (CRC->DumpStackAndCleanupOnFailure)
+ sys::CleanupOnSignal(Context);
+
+ CRC->RetCode = RetCode;
// Jump back to the RunSafely we were called under.
longjmp(JumpBuffer, 1);
@@ -171,19 +186,32 @@ CrashRecoveryContext::unregisterCleanup(CrashRecoveryContextCleanup *cleanup) {
static void installExceptionOrSignalHandlers() {}
static void uninstallExceptionOrSignalHandlers() {}
-bool CrashRecoveryContext::RunSafely(function_ref<void()> Fn) {
- if (!gCrashRecoveryEnabled) {
+// We need this function because the call to GetExceptionInformation() can only
+// occur inside the __except evaluation block
+static int ExceptionFilter(bool DumpStackAndCleanup,
+ _EXCEPTION_POINTERS *Except) {
+ if (DumpStackAndCleanup)
+ sys::CleanupOnSignal((uintptr_t)Except);
+ return EXCEPTION_EXECUTE_HANDLER;
+}
+
+static bool InvokeFunctionCall(function_ref<void()> Fn,
+ bool DumpStackAndCleanup, int &RetCode) {
+ __try {
Fn();
- return true;
+ } __except (ExceptionFilter(DumpStackAndCleanup, GetExceptionInformation())) {
+ RetCode = GetExceptionCode();
+ return false;
}
+ return true;
+}
- bool Result = true;
- __try {
+bool CrashRecoveryContext::RunSafely(function_ref<void()> Fn) {
+ if (!gCrashRecoveryEnabled) {
Fn();
- } __except (1) { // Catch any exception.
- Result = false;
+ return true;
}
- return Result;
+ return InvokeFunctionCall(Fn, DumpStackAndCleanupOnFailure, RetCode);
}
#else // !_MSC_VER
@@ -237,7 +265,9 @@ static LONG CALLBACK ExceptionHandler(PEXCEPTION_POINTERS ExceptionInfo)
// implementation if we so choose.
// Handle the crash
- const_cast<CrashRecoveryContextImpl*>(CRCI)->HandleCrash();
+ const_cast<CrashRecoveryContextImpl *>(CRCI)->HandleCrash(
+ (int)ExceptionInfo->ExceptionRecord->ExceptionCode,
+ reinterpret_cast<uintptr_t>(ExceptionInfo));
// Note that we don't actually get here because HandleCrash calls
// longjmp, which means the HandleCrash function never returns.
@@ -280,7 +310,7 @@ static void uninstallExceptionOrSignalHandlers() {
// crash recovery context, and install signal handlers to invoke HandleCrash on
// the active object.
//
-// This implementation does not to attempt to chain signal handlers in any
+// This implementation does not attempt to chain signal handlers in any
// reliable fashion -- if we get a signal outside of a crash recovery context we
// simply disable crash recovery and raise the signal again.
@@ -319,8 +349,16 @@ static void CrashRecoverySignalHandler(int Signal) {
sigaddset(&SigMask, Signal);
sigprocmask(SIG_UNBLOCK, &SigMask, nullptr);
+ // As per convention, -2 indicates a crash or timeout as opposed to failure to
+ // execute (see llvm/include/llvm/Support/Program.h)
+ int RetCode = -2;
+
+ // Don't consider a broken pipe as a crash (see clang/lib/Driver/Driver.cpp)
+ if (Signal == SIGPIPE)
+ RetCode = EX_IOERR;
+
if (CRCI)
- const_cast<CrashRecoveryContextImpl*>(CRCI)->HandleCrash();
+ const_cast<CrashRecoveryContextImpl *>(CRCI)->HandleCrash(RetCode, Signal);
}
static void installExceptionOrSignalHandlers() {
@@ -364,7 +402,9 @@ bool CrashRecoveryContext::RunSafely(function_ref<void()> Fn) {
void CrashRecoveryContext::HandleCrash() {
CrashRecoveryContextImpl *CRCI = (CrashRecoveryContextImpl *) Impl;
assert(CRCI && "Crash recovery context never initialized!");
- CRCI->HandleCrash();
+ // As per convention, -2 indicates a crash or timeout as opposed to failure to
+ // execute (see llvm/include/llvm/Support/Program.h)
+ CRCI->HandleCrash(-2, 0);
}
// FIXME: Portability.
@@ -404,7 +444,10 @@ bool CrashRecoveryContext::RunSafelyOnThread(function_ref<void()> Fn,
unsigned RequestedStackSize) {
bool UseBackgroundPriority = hasThreadBackgroundPriority();
RunSafelyOnThreadInfo Info = { Fn, this, UseBackgroundPriority, false };
- llvm_execute_on_thread(RunSafelyOnThread_Dispatch, &Info, RequestedStackSize);
+ llvm_execute_on_thread(RunSafelyOnThread_Dispatch, &Info,
+ RequestedStackSize == 0
+ ? llvm::None
+ : llvm::Optional<unsigned>(RequestedStackSize));
if (CrashRecoveryContextImpl *CRC = (CrashRecoveryContextImpl *)Impl)
CRC->setSwitchedThread();
return Info.Result;
diff --git a/contrib/llvm-project/llvm/lib/Support/DebugCounter.cpp b/contrib/llvm-project/llvm/lib/Support/DebugCounter.cpp
index 6598103658da..1e3ec300964c 100644
--- a/contrib/llvm-project/llvm/lib/Support/DebugCounter.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/DebugCounter.cpp
@@ -2,7 +2,6 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/ManagedStatic.h"
-#include "llvm/Support/Options.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/Support/Error.cpp b/contrib/llvm-project/llvm/lib/Support/Error.cpp
index 9ea08c37478e..315a11e967d1 100644
--- a/contrib/llvm-project/llvm/lib/Support/Error.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/Error.cpp
@@ -103,9 +103,10 @@ std::error_code errorToErrorCode(Error Err) {
#if LLVM_ENABLE_ABI_BREAKING_CHECKS
void Error::fatalUncheckedError() const {
dbgs() << "Program aborted due to an unhandled Error:\n";
- if (getPtr())
+ if (getPtr()) {
getPtr()->log(dbgs());
- else
+ dbgs() << "\n";
+ }else
dbgs() << "Error value was Success. (Note: Success values must still be "
"checked prior to being destroyed).\n";
abort();
diff --git a/contrib/llvm-project/llvm/lib/Support/FileCheck.cpp b/contrib/llvm-project/llvm/lib/Support/FileCheck.cpp
index 841e406a7b69..2261ecc236c2 100644
--- a/contrib/llvm-project/llvm/lib/Support/FileCheck.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/FileCheck.cpp
@@ -25,15 +25,15 @@
using namespace llvm;
-Expected<uint64_t> FileCheckNumericVariableUse::eval() const {
- Optional<uint64_t> Value = NumericVariable->getValue();
+Expected<uint64_t> NumericVariableUse::eval() const {
+ Optional<uint64_t> Value = Variable->getValue();
if (Value)
return *Value;
- return make_error<FileCheckUndefVarError>(Name);
+ return make_error<UndefVarError>(Name);
}
-Expected<uint64_t> FileCheckASTBinop::eval() const {
+Expected<uint64_t> BinaryOperation::eval() const {
Expected<uint64_t> LeftOp = LeftOperand->eval();
Expected<uint64_t> RightOp = RightOperand->eval();
@@ -51,14 +51,14 @@ Expected<uint64_t> FileCheckASTBinop::eval() const {
return EvalBinop(*LeftOp, *RightOp);
}
-Expected<std::string> FileCheckNumericSubstitution::getResult() const {
- Expected<uint64_t> EvaluatedValue = ExpressionAST->eval();
+Expected<std::string> NumericSubstitution::getResult() const {
+ Expected<uint64_t> EvaluatedValue = ExpressionASTPointer->eval();
if (!EvaluatedValue)
return EvaluatedValue.takeError();
return utostr(*EvaluatedValue);
}
-Expected<std::string> FileCheckStringSubstitution::getResult() const {
+Expected<std::string> StringSubstitution::getResult() const {
// Look up the value and escape it so that we can put it into the regex.
Expected<StringRef> VarVal = Context->getPatternVarValue(FromStr);
if (!VarVal)
@@ -66,14 +66,12 @@ Expected<std::string> FileCheckStringSubstitution::getResult() const {
return Regex::escape(*VarVal);
}
-bool FileCheckPattern::isValidVarNameStart(char C) {
- return C == '_' || isalpha(C);
-}
+bool Pattern::isValidVarNameStart(char C) { return C == '_' || isalpha(C); }
-Expected<FileCheckPattern::VariableProperties>
-FileCheckPattern::parseVariable(StringRef &Str, const SourceMgr &SM) {
+Expected<Pattern::VariableProperties>
+Pattern::parseVariable(StringRef &Str, const SourceMgr &SM) {
if (Str.empty())
- return FileCheckErrorDiagnostic::get(SM, Str, "empty variable name");
+ return ErrorDiagnostic::get(SM, Str, "empty variable name");
bool ParsedOneChar = false;
unsigned I = 0;
@@ -85,7 +83,7 @@ FileCheckPattern::parseVariable(StringRef &Str, const SourceMgr &SM) {
for (unsigned E = Str.size(); I != E; ++I) {
if (!ParsedOneChar && !isValidVarNameStart(Str[I]))
- return FileCheckErrorDiagnostic::get(SM, Str, "invalid variable name");
+ return ErrorDiagnostic::get(SM, Str, "invalid variable name");
// Variable names are composed of alphanumeric characters and underscores.
if (Str[I] != '_' && !isalnum(Str[I]))
@@ -109,12 +107,11 @@ static char popFront(StringRef &S) {
return C;
}
-char FileCheckUndefVarError::ID = 0;
-char FileCheckErrorDiagnostic::ID = 0;
-char FileCheckNotFoundError::ID = 0;
+char UndefVarError::ID = 0;
+char ErrorDiagnostic::ID = 0;
+char NotFoundError::ID = 0;
-Expected<FileCheckNumericVariable *>
-FileCheckPattern::parseNumericVariableDefinition(
+Expected<NumericVariable *> Pattern::parseNumericVariableDefinition(
StringRef &Expr, FileCheckPatternContext *Context,
Optional<size_t> LineNumber, const SourceMgr &SM) {
Expected<VariableProperties> ParseVarResult = parseVariable(Expr, SM);
@@ -123,22 +120,22 @@ FileCheckPattern::parseNumericVariableDefinition(
StringRef Name = ParseVarResult->Name;
if (ParseVarResult->IsPseudo)
- return FileCheckErrorDiagnostic::get(
+ return ErrorDiagnostic::get(
SM, Name, "definition of pseudo numeric variable unsupported");
// Detect collisions between string and numeric variables when the latter
// is created later than the former.
if (Context->DefinedVariableTable.find(Name) !=
Context->DefinedVariableTable.end())
- return FileCheckErrorDiagnostic::get(
+ return ErrorDiagnostic::get(
SM, Name, "string variable with name '" + Name + "' already exists");
Expr = Expr.ltrim(SpaceChars);
if (!Expr.empty())
- return FileCheckErrorDiagnostic::get(
+ return ErrorDiagnostic::get(
SM, Expr, "unexpected characters after numeric variable name");
- FileCheckNumericVariable *DefinedNumericVariable;
+ NumericVariable *DefinedNumericVariable;
auto VarTableIter = Context->GlobalNumericVariableTable.find(Name);
if (VarTableIter != Context->GlobalNumericVariableTable.end())
DefinedNumericVariable = VarTableIter->second;
@@ -148,13 +145,11 @@ FileCheckPattern::parseNumericVariableDefinition(
return DefinedNumericVariable;
}
-Expected<std::unique_ptr<FileCheckNumericVariableUse>>
-FileCheckPattern::parseNumericVariableUse(StringRef Name, bool IsPseudo,
- Optional<size_t> LineNumber,
- FileCheckPatternContext *Context,
- const SourceMgr &SM) {
+Expected<std::unique_ptr<NumericVariableUse>> Pattern::parseNumericVariableUse(
+ StringRef Name, bool IsPseudo, Optional<size_t> LineNumber,
+ FileCheckPatternContext *Context, const SourceMgr &SM) {
if (IsPseudo && !Name.equals("@LINE"))
- return FileCheckErrorDiagnostic::get(
+ return ErrorDiagnostic::get(
SM, Name, "invalid pseudo numeric variable '" + Name + "'");
// Numeric variable definitions and uses are parsed in the order in which
@@ -166,7 +161,7 @@ FileCheckPattern::parseNumericVariableUse(StringRef Name, bool IsPseudo,
// uses of undefined variables, whether string or numeric, are then diagnosed
// in printSubstitutions() after failing to match.
auto VarTableIter = Context->GlobalNumericVariableTable.find(Name);
- FileCheckNumericVariable *NumericVariable;
+ NumericVariable *NumericVariable;
if (VarTableIter != Context->GlobalNumericVariableTable.end())
NumericVariable = VarTableIter->second;
else {
@@ -176,22 +171,20 @@ FileCheckPattern::parseNumericVariableUse(StringRef Name, bool IsPseudo,
Optional<size_t> DefLineNumber = NumericVariable->getDefLineNumber();
if (DefLineNumber && LineNumber && *DefLineNumber == *LineNumber)
- return FileCheckErrorDiagnostic::get(
+ return ErrorDiagnostic::get(
SM, Name,
"numeric variable '" + Name +
"' defined earlier in the same CHECK directive");
- return std::make_unique<FileCheckNumericVariableUse>(Name, NumericVariable);
+ return std::make_unique<NumericVariableUse>(Name, NumericVariable);
}
-Expected<std::unique_ptr<FileCheckExpressionAST>>
-FileCheckPattern::parseNumericOperand(StringRef &Expr, AllowedOperand AO,
- Optional<size_t> LineNumber,
- FileCheckPatternContext *Context,
- const SourceMgr &SM) {
+Expected<std::unique_ptr<ExpressionAST>> Pattern::parseNumericOperand(
+ StringRef &Expr, AllowedOperand AO, Optional<size_t> LineNumber,
+ FileCheckPatternContext *Context, const SourceMgr &SM) {
if (AO == AllowedOperand::LineVar || AO == AllowedOperand::Any) {
// Try to parse as a numeric variable use.
- Expected<FileCheckPattern::VariableProperties> ParseVarResult =
+ Expected<Pattern::VariableProperties> ParseVarResult =
parseVariable(Expr, SM);
if (ParseVarResult)
return parseNumericVariableUse(ParseVarResult->Name,
@@ -206,10 +199,10 @@ FileCheckPattern::parseNumericOperand(StringRef &Expr, AllowedOperand AO,
// Otherwise, parse it as a literal.
uint64_t LiteralValue;
if (!Expr.consumeInteger(/*Radix=*/10, LiteralValue))
- return std::make_unique<FileCheckExpressionLiteral>(LiteralValue);
+ return std::make_unique<ExpressionLiteral>(LiteralValue);
- return FileCheckErrorDiagnostic::get(SM, Expr,
- "invalid operand format '" + Expr + "'");
+ return ErrorDiagnostic::get(SM, Expr,
+ "invalid operand format '" + Expr + "'");
}
static uint64_t add(uint64_t LeftOp, uint64_t RightOp) {
@@ -220,10 +213,10 @@ static uint64_t sub(uint64_t LeftOp, uint64_t RightOp) {
return LeftOp - RightOp;
}
-Expected<std::unique_ptr<FileCheckExpressionAST>> FileCheckPattern::parseBinop(
- StringRef &Expr, std::unique_ptr<FileCheckExpressionAST> LeftOp,
- bool IsLegacyLineExpr, Optional<size_t> LineNumber,
- FileCheckPatternContext *Context, const SourceMgr &SM) {
+Expected<std::unique_ptr<ExpressionAST>>
+Pattern::parseBinop(StringRef &Expr, std::unique_ptr<ExpressionAST> LeftOp,
+ bool IsLegacyLineExpr, Optional<size_t> LineNumber,
+ FileCheckPatternContext *Context, const SourceMgr &SM) {
Expr = Expr.ltrim(SpaceChars);
if (Expr.empty())
return std::move(LeftOp);
@@ -241,35 +234,32 @@ Expected<std::unique_ptr<FileCheckExpressionAST>> FileCheckPattern::parseBinop(
EvalBinop = sub;
break;
default:
- return FileCheckErrorDiagnostic::get(
+ return ErrorDiagnostic::get(
SM, OpLoc, Twine("unsupported operation '") + Twine(Operator) + "'");
}
// Parse right operand.
Expr = Expr.ltrim(SpaceChars);
if (Expr.empty())
- return FileCheckErrorDiagnostic::get(SM, Expr,
- "missing operand in expression");
+ return ErrorDiagnostic::get(SM, Expr, "missing operand in expression");
// The second operand in a legacy @LINE expression is always a literal.
AllowedOperand AO =
IsLegacyLineExpr ? AllowedOperand::Literal : AllowedOperand::Any;
- Expected<std::unique_ptr<FileCheckExpressionAST>> RightOpResult =
+ Expected<std::unique_ptr<ExpressionAST>> RightOpResult =
parseNumericOperand(Expr, AO, LineNumber, Context, SM);
if (!RightOpResult)
return RightOpResult;
Expr = Expr.ltrim(SpaceChars);
- return std::make_unique<FileCheckASTBinop>(EvalBinop, std::move(LeftOp),
- std::move(*RightOpResult));
+ return std::make_unique<BinaryOperation>(EvalBinop, std::move(LeftOp),
+ std::move(*RightOpResult));
}
-Expected<std::unique_ptr<FileCheckExpressionAST>>
-FileCheckPattern::parseNumericSubstitutionBlock(
- StringRef Expr,
- Optional<FileCheckNumericVariable *> &DefinedNumericVariable,
+Expected<std::unique_ptr<ExpressionAST>> Pattern::parseNumericSubstitutionBlock(
+ StringRef Expr, Optional<NumericVariable *> &DefinedNumericVariable,
bool IsLegacyLineExpr, Optional<size_t> LineNumber,
FileCheckPatternContext *Context, const SourceMgr &SM) {
- std::unique_ptr<FileCheckExpressionAST> ExpressionAST = nullptr;
+ std::unique_ptr<ExpressionAST> ExpressionASTPointer = nullptr;
StringRef DefExpr = StringRef();
DefinedNumericVariable = None;
// Save variable definition expression if any.
@@ -286,26 +276,26 @@ FileCheckPattern::parseNumericSubstitutionBlock(
// pseudo variable.
AllowedOperand AO =
IsLegacyLineExpr ? AllowedOperand::LineVar : AllowedOperand::Any;
- Expected<std::unique_ptr<FileCheckExpressionAST>> ParseResult =
+ Expected<std::unique_ptr<ExpressionAST>> ParseResult =
parseNumericOperand(Expr, AO, LineNumber, Context, SM);
while (ParseResult && !Expr.empty()) {
ParseResult = parseBinop(Expr, std::move(*ParseResult), IsLegacyLineExpr,
LineNumber, Context, SM);
// Legacy @LINE expressions only allow 2 operands.
if (ParseResult && IsLegacyLineExpr && !Expr.empty())
- return FileCheckErrorDiagnostic::get(
+ return ErrorDiagnostic::get(
SM, Expr,
"unexpected characters at end of expression '" + Expr + "'");
}
if (!ParseResult)
return ParseResult;
- ExpressionAST = std::move(*ParseResult);
+ ExpressionASTPointer = std::move(*ParseResult);
}
// Parse the numeric variable definition.
if (DefEnd != StringRef::npos) {
DefExpr = DefExpr.ltrim(SpaceChars);
- Expected<FileCheckNumericVariable *> ParseResult =
+ Expected<NumericVariable *> ParseResult =
parseNumericVariableDefinition(DefExpr, Context, LineNumber, SM);
if (!ParseResult)
@@ -313,12 +303,11 @@ FileCheckPattern::parseNumericSubstitutionBlock(
DefinedNumericVariable = *ParseResult;
}
- return std::move(ExpressionAST);
+ return std::move(ExpressionASTPointer);
}
-bool FileCheckPattern::parsePattern(StringRef PatternStr, StringRef Prefix,
- SourceMgr &SM,
- const FileCheckRequest &Req) {
+bool Pattern::parsePattern(StringRef PatternStr, StringRef Prefix,
+ SourceMgr &SM, const FileCheckRequest &Req) {
bool MatchFullLinesHere = Req.MatchFullLines && CheckTy != Check::CheckNot;
IgnoreCase = Req.IgnoreCase;
@@ -447,7 +436,7 @@ bool FileCheckPattern::parsePattern(StringRef PatternStr, StringRef Prefix,
// Get the name (e.g. "foo") and verify it is well formed.
StringRef OrigMatchStr = MatchStr;
- Expected<FileCheckPattern::VariableProperties> ParseVarResult =
+ Expected<Pattern::VariableProperties> ParseVarResult =
parseVariable(MatchStr, SM);
if (!ParseVarResult) {
logAllUnhandledErrors(ParseVarResult.takeError(), errs());
@@ -487,10 +476,10 @@ bool FileCheckPattern::parsePattern(StringRef PatternStr, StringRef Prefix,
}
// Parse numeric substitution block.
- std::unique_ptr<FileCheckExpressionAST> ExpressionAST;
- Optional<FileCheckNumericVariable *> DefinedNumericVariable;
+ std::unique_ptr<ExpressionAST> ExpressionASTPointer;
+ Optional<NumericVariable *> DefinedNumericVariable;
if (IsNumBlock) {
- Expected<std::unique_ptr<FileCheckExpressionAST>> ParseResult =
+ Expected<std::unique_ptr<ExpressionAST>> ParseResult =
parseNumericSubstitutionBlock(MatchStr, DefinedNumericVariable,
IsLegacyLineExpr, LineNumber, Context,
SM);
@@ -498,8 +487,8 @@ bool FileCheckPattern::parsePattern(StringRef PatternStr, StringRef Prefix,
logAllUnhandledErrors(ParseResult.takeError(), errs());
return true;
}
- ExpressionAST = std::move(*ParseResult);
- SubstNeeded = ExpressionAST != nullptr;
+ ExpressionASTPointer = std::move(*ParseResult);
+ SubstNeeded = ExpressionASTPointer != nullptr;
if (DefinedNumericVariable) {
IsDefinition = true;
DefName = (*DefinedNumericVariable)->getName();
@@ -516,7 +505,7 @@ bool FileCheckPattern::parsePattern(StringRef PatternStr, StringRef Prefix,
++SubstInsertIdx;
if (IsNumBlock) {
- FileCheckNumericVariableMatch NumericVariableDefinition = {
+ NumericVariableMatch NumericVariableDefinition = {
*DefinedNumericVariable, CurParen};
NumericVariableDefs[DefName] = NumericVariableDefinition;
// This store is done here rather than in match() to allow
@@ -562,10 +551,11 @@ bool FileCheckPattern::parsePattern(StringRef PatternStr, StringRef Prefix,
} else {
// Handle substitution of string variables ([[<var>]]) defined in
// previous CHECK patterns, and substitution of expressions.
- FileCheckSubstitution *Substitution =
+ Substitution *Substitution =
IsNumBlock
? Context->makeNumericSubstitution(
- SubstStr, std::move(ExpressionAST), SubstInsertIdx)
+ SubstStr, std::move(ExpressionASTPointer),
+ SubstInsertIdx)
: Context->makeStringSubstitution(SubstStr, SubstInsertIdx);
Substitutions.push_back(Substitution);
}
@@ -589,7 +579,7 @@ bool FileCheckPattern::parsePattern(StringRef PatternStr, StringRef Prefix,
return false;
}
-bool FileCheckPattern::AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM) {
+bool Pattern::AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM) {
Regex R(RS);
std::string Error;
if (!R.isValid(Error)) {
@@ -603,14 +593,14 @@ bool FileCheckPattern::AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceM
return false;
}
-void FileCheckPattern::AddBackrefToRegEx(unsigned BackrefNum) {
+void Pattern::AddBackrefToRegEx(unsigned BackrefNum) {
assert(BackrefNum >= 1 && BackrefNum <= 9 && "Invalid backref number");
std::string Backref = std::string("\\") + std::string(1, '0' + BackrefNum);
RegExStr += Backref;
}
-Expected<size_t> FileCheckPattern::match(StringRef Buffer, size_t &MatchLen,
- const SourceMgr &SM) const {
+Expected<size_t> Pattern::match(StringRef Buffer, size_t &MatchLen,
+ const SourceMgr &SM) const {
// If this is the EOF pattern, match it immediately.
if (CheckTy == Check::CheckEOF) {
MatchLen = 0;
@@ -620,10 +610,10 @@ Expected<size_t> FileCheckPattern::match(StringRef Buffer, size_t &MatchLen,
// If this is a fixed string pattern, just match it now.
if (!FixedStr.empty()) {
MatchLen = FixedStr.size();
- size_t Pos = IgnoreCase ? Buffer.find_lower(FixedStr)
- : Buffer.find(FixedStr);
+ size_t Pos =
+ IgnoreCase ? Buffer.find_lower(FixedStr) : Buffer.find(FixedStr);
if (Pos == StringRef::npos)
- return make_error<FileCheckNotFoundError>();
+ return make_error<NotFoundError>();
return Pos;
}
@@ -663,7 +653,7 @@ Expected<size_t> FileCheckPattern::match(StringRef Buffer, size_t &MatchLen,
if (IgnoreCase)
Flags |= Regex::IgnoreCase;
if (!Regex(RegExToMatch, Flags).match(Buffer, &MatchInfo))
- return make_error<FileCheckNotFoundError>();
+ return make_error<NotFoundError>();
// Successful regex match.
assert(!MatchInfo.empty() && "Didn't get any match");
@@ -678,18 +668,18 @@ Expected<size_t> FileCheckPattern::match(StringRef Buffer, size_t &MatchLen,
// If this defines any numeric variables, remember their values.
for (const auto &NumericVariableDef : NumericVariableDefs) {
- const FileCheckNumericVariableMatch &NumericVariableMatch =
+ const NumericVariableMatch &NumericVariableMatch =
NumericVariableDef.getValue();
unsigned CaptureParenGroup = NumericVariableMatch.CaptureParenGroup;
assert(CaptureParenGroup < MatchInfo.size() && "Internal paren error");
- FileCheckNumericVariable *DefinedNumericVariable =
+ NumericVariable *DefinedNumericVariable =
NumericVariableMatch.DefinedNumericVariable;
StringRef MatchedValue = MatchInfo[CaptureParenGroup];
uint64_t Val;
if (MatchedValue.getAsInteger(10, Val))
- return FileCheckErrorDiagnostic::get(SM, MatchedValue,
- "Unable to represent numeric value");
+ return ErrorDiagnostic::get(SM, MatchedValue,
+ "Unable to represent numeric value");
DefinedNumericVariable->setValue(Val);
}
@@ -701,7 +691,7 @@ Expected<size_t> FileCheckPattern::match(StringRef Buffer, size_t &MatchLen,
return FullMatch.data() - Buffer.data() + MatchStartSkip;
}
-unsigned FileCheckPattern::computeMatchDistance(StringRef Buffer) const {
+unsigned Pattern::computeMatchDistance(StringRef Buffer) const {
// Just compute the number of matching characters. For regular expressions, we
// just compare against the regex itself and hope for the best.
//
@@ -718,8 +708,8 @@ unsigned FileCheckPattern::computeMatchDistance(StringRef Buffer) const {
return BufferPrefix.edit_distance(ExampleString);
}
-void FileCheckPattern::printSubstitutions(const SourceMgr &SM, StringRef Buffer,
- SMRange MatchRange) const {
+void Pattern::printSubstitutions(const SourceMgr &SM, StringRef Buffer,
+ SMRange MatchRange) const {
// Print what we know about substitutions.
if (!Substitutions.empty()) {
for (const auto &Substitution : Substitutions) {
@@ -731,11 +721,10 @@ void FileCheckPattern::printSubstitutions(const SourceMgr &SM, StringRef Buffer,
// variables it uses.
if (!MatchedValue) {
bool UndefSeen = false;
- handleAllErrors(MatchedValue.takeError(),
- [](const FileCheckNotFoundError &E) {},
+ handleAllErrors(MatchedValue.takeError(), [](const NotFoundError &E) {},
// Handled in PrintNoMatch().
- [](const FileCheckErrorDiagnostic &E) {},
- [&](const FileCheckUndefVarError &E) {
+ [](const ErrorDiagnostic &E) {},
+ [&](const UndefVarError &E) {
if (!UndefSeen) {
OS << "uses undefined variable(s):";
UndefSeen = true;
@@ -778,9 +767,8 @@ static SMRange ProcessMatchResult(FileCheckDiag::MatchType MatchTy,
return Range;
}
-void FileCheckPattern::printFuzzyMatch(
- const SourceMgr &SM, StringRef Buffer,
- std::vector<FileCheckDiag> *Diags) const {
+void Pattern::printFuzzyMatch(const SourceMgr &SM, StringRef Buffer,
+ std::vector<FileCheckDiag> *Diags) const {
// Attempt to find the closest/best fuzzy match. Usually an error happens
// because some string in the output didn't exactly match. In these cases, we
// would like to show the user a best guess at what "should have" matched, to
@@ -829,36 +817,34 @@ Expected<StringRef>
FileCheckPatternContext::getPatternVarValue(StringRef VarName) {
auto VarIter = GlobalVariableTable.find(VarName);
if (VarIter == GlobalVariableTable.end())
- return make_error<FileCheckUndefVarError>(VarName);
+ return make_error<UndefVarError>(VarName);
return VarIter->second;
}
template <class... Types>
-FileCheckNumericVariable *
-FileCheckPatternContext::makeNumericVariable(Types... args) {
- NumericVariables.push_back(
- std::make_unique<FileCheckNumericVariable>(args...));
+NumericVariable *FileCheckPatternContext::makeNumericVariable(Types... args) {
+ NumericVariables.push_back(std::make_unique<NumericVariable>(args...));
return NumericVariables.back().get();
}
-FileCheckSubstitution *
+Substitution *
FileCheckPatternContext::makeStringSubstitution(StringRef VarName,
size_t InsertIdx) {
Substitutions.push_back(
- std::make_unique<FileCheckStringSubstitution>(this, VarName, InsertIdx));
+ std::make_unique<StringSubstitution>(this, VarName, InsertIdx));
return Substitutions.back().get();
}
-FileCheckSubstitution *FileCheckPatternContext::makeNumericSubstitution(
+Substitution *FileCheckPatternContext::makeNumericSubstitution(
StringRef ExpressionStr,
- std::unique_ptr<FileCheckExpressionAST> ExpressionAST, size_t InsertIdx) {
- Substitutions.push_back(std::make_unique<FileCheckNumericSubstitution>(
- this, ExpressionStr, std::move(ExpressionAST), InsertIdx));
+ std::unique_ptr<ExpressionAST> ExpressionASTPointer, size_t InsertIdx) {
+ Substitutions.push_back(std::make_unique<NumericSubstitution>(
+ this, ExpressionStr, std::move(ExpressionASTPointer), InsertIdx));
return Substitutions.back().get();
}
-size_t FileCheckPattern::FindRegexVarEnd(StringRef Str, SourceMgr &SM) {
+size_t Pattern::FindRegexVarEnd(StringRef Str, SourceMgr &SM) {
// Offset keeps track of the current offset within the input Str
size_t Offset = 0;
// [...] Nesting depth
@@ -1139,7 +1125,7 @@ bool FileCheck::readCheckFile(SourceMgr &SM, StringRef Buffer,
PatternContext->createLineVariable();
- std::vector<FileCheckPattern> ImplicitNegativeChecks;
+ std::vector<Pattern> ImplicitNegativeChecks;
for (const auto &PatternString : Req.ImplicitCheckNot) {
// Create a buffer with fake command line content in order to display the
// command line option responsible for the specific implicit CHECK-NOT.
@@ -1153,12 +1139,12 @@ bool FileCheck::readCheckFile(SourceMgr &SM, StringRef Buffer,
SM.AddNewSourceBuffer(std::move(CmdLine), SMLoc());
ImplicitNegativeChecks.push_back(
- FileCheckPattern(Check::CheckNot, PatternContext.get()));
+ Pattern(Check::CheckNot, PatternContext.get()));
ImplicitNegativeChecks.back().parsePattern(PatternInBuffer,
"IMPLICIT-CHECK", SM, Req);
}
- std::vector<FileCheckPattern> DagNotMatches = ImplicitNegativeChecks;
+ std::vector<Pattern> DagNotMatches = ImplicitNegativeChecks;
// LineNumber keeps track of the line on which CheckPrefix instances are
// found.
@@ -1216,7 +1202,7 @@ bool FileCheck::readCheckFile(SourceMgr &SM, StringRef Buffer,
SMLoc PatternLoc = SMLoc::getFromPointer(Buffer.data());
// Parse the pattern.
- FileCheckPattern P(CheckTy, PatternContext.get(), LineNumber);
+ Pattern P(CheckTy, PatternContext.get(), LineNumber);
if (P.parsePattern(Buffer.substr(0, EOL), UsedPrefix, SM, Req))
return true;
@@ -1261,7 +1247,7 @@ bool FileCheck::readCheckFile(SourceMgr &SM, StringRef Buffer,
// prefix as a filler for the error message.
if (!DagNotMatches.empty()) {
CheckStrings->emplace_back(
- FileCheckPattern(Check::CheckEOF, PatternContext.get(), LineNumber + 1),
+ Pattern(Check::CheckEOF, PatternContext.get(), LineNumber + 1),
*Req.CheckPrefixes.begin(), SMLoc::getFromPointer(Buffer.data()));
std::swap(DagNotMatches, CheckStrings->back().DagNotStrings);
}
@@ -1286,7 +1272,7 @@ bool FileCheck::readCheckFile(SourceMgr &SM, StringRef Buffer,
}
static void PrintMatch(bool ExpectedMatch, const SourceMgr &SM,
- StringRef Prefix, SMLoc Loc, const FileCheckPattern &Pat,
+ StringRef Prefix, SMLoc Loc, const Pattern &Pat,
int MatchedCount, StringRef Buffer, size_t MatchPos,
size_t MatchLen, const FileCheckRequest &Req,
std::vector<FileCheckDiag> *Diags) {
@@ -1332,10 +1318,10 @@ static void PrintMatch(bool ExpectedMatch, const SourceMgr &SM,
}
static void PrintNoMatch(bool ExpectedMatch, const SourceMgr &SM,
- StringRef Prefix, SMLoc Loc,
- const FileCheckPattern &Pat, int MatchedCount,
- StringRef Buffer, bool VerboseVerbose,
- std::vector<FileCheckDiag> *Diags, Error MatchErrors) {
+ StringRef Prefix, SMLoc Loc, const Pattern &Pat,
+ int MatchedCount, StringRef Buffer,
+ bool VerboseVerbose, std::vector<FileCheckDiag> *Diags,
+ Error MatchErrors) {
assert(MatchErrors && "Called on successful match");
bool PrintDiag = true;
if (!ExpectedMatch) {
@@ -1361,9 +1347,8 @@ static void PrintNoMatch(bool ExpectedMatch, const SourceMgr &SM,
return;
}
- MatchErrors =
- handleErrors(std::move(MatchErrors),
- [](const FileCheckErrorDiagnostic &E) { E.log(errs()); });
+ MatchErrors = handleErrors(std::move(MatchErrors),
+ [](const ErrorDiagnostic &E) { E.log(errs()); });
// No problem matching the string per se.
if (!MatchErrors)
@@ -1427,7 +1412,7 @@ size_t FileCheckString::Check(const SourceMgr &SM, StringRef Buffer,
FileCheckRequest &Req,
std::vector<FileCheckDiag> *Diags) const {
size_t LastPos = 0;
- std::vector<const FileCheckPattern *> NotStrings;
+ std::vector<const Pattern *> NotStrings;
// IsLabelScanMode is true when we are scanning forward to find CHECK-LABEL
// bounds; we have not processed variable definitions within the bounded block
@@ -1565,11 +1550,11 @@ bool FileCheckString::CheckSame(const SourceMgr &SM, StringRef Buffer) const {
return false;
}
-bool FileCheckString::CheckNot(
- const SourceMgr &SM, StringRef Buffer,
- const std::vector<const FileCheckPattern *> &NotStrings,
- const FileCheckRequest &Req, std::vector<FileCheckDiag> *Diags) const {
- for (const FileCheckPattern *Pat : NotStrings) {
+bool FileCheckString::CheckNot(const SourceMgr &SM, StringRef Buffer,
+ const std::vector<const Pattern *> &NotStrings,
+ const FileCheckRequest &Req,
+ std::vector<FileCheckDiag> *Diags) const {
+ for (const Pattern *Pat : NotStrings) {
assert((Pat->getCheckTy() == Check::CheckNot) && "Expect CHECK-NOT!");
size_t MatchLen = 0;
@@ -1591,11 +1576,10 @@ bool FileCheckString::CheckNot(
return false;
}
-size_t
-FileCheckString::CheckDag(const SourceMgr &SM, StringRef Buffer,
- std::vector<const FileCheckPattern *> &NotStrings,
- const FileCheckRequest &Req,
- std::vector<FileCheckDiag> *Diags) const {
+size_t FileCheckString::CheckDag(const SourceMgr &SM, StringRef Buffer,
+ std::vector<const Pattern *> &NotStrings,
+ const FileCheckRequest &Req,
+ std::vector<FileCheckDiag> *Diags) const {
if (DagNotStrings.empty())
return 0;
@@ -1615,7 +1599,7 @@ FileCheckString::CheckDag(const SourceMgr &SM, StringRef Buffer,
// group, so we don't use a range-based for loop here.
for (auto PatItr = DagNotStrings.begin(), PatEnd = DagNotStrings.end();
PatItr != PatEnd; ++PatItr) {
- const FileCheckPattern &Pat = *PatItr;
+ const Pattern &Pat = *PatItr;
assert((Pat.getCheckTy() == Check::CheckDAG ||
Pat.getCheckTy() == Check::CheckNot) &&
"Invalid CHECK-DAG or CHECK-NOT!");
@@ -1820,8 +1804,8 @@ Error FileCheckPatternContext::defineCmdlineVariables(
if (CmdlineDef.empty()) {
Errs = joinErrors(
std::move(Errs),
- FileCheckErrorDiagnostic::get(
- SM, CmdlineDef, "missing equal sign in global definition"));
+ ErrorDiagnostic::get(SM, CmdlineDef,
+ "missing equal sign in global definition"));
continue;
}
@@ -1830,21 +1814,21 @@ Error FileCheckPatternContext::defineCmdlineVariables(
// Now parse the definition both to check that the syntax is correct and
// to create the necessary class instance.
StringRef CmdlineDefExpr = CmdlineDef.substr(1);
- Optional<FileCheckNumericVariable *> DefinedNumericVariable;
- Expected<std::unique_ptr<FileCheckExpressionAST>> ExpressionASTResult =
- FileCheckPattern::parseNumericSubstitutionBlock(
+ Optional<NumericVariable *> DefinedNumericVariable;
+ Expected<std::unique_ptr<ExpressionAST>> ExpressionASTResult =
+ Pattern::parseNumericSubstitutionBlock(
CmdlineDefExpr, DefinedNumericVariable, false, None, this, SM);
if (!ExpressionASTResult) {
Errs = joinErrors(std::move(Errs), ExpressionASTResult.takeError());
continue;
}
- std::unique_ptr<FileCheckExpressionAST> ExpressionAST =
+ std::unique_ptr<ExpressionAST> ExpressionASTPointer =
std::move(*ExpressionASTResult);
// Now evaluate the expression whose value this variable should be set
// to, since the expression of a command-line variable definition should
// only use variables defined earlier on the command-line. If not, this
// is an error and we report it.
- Expected<uint64_t> Value = ExpressionAST->eval();
+ Expected<uint64_t> Value = ExpressionASTPointer->eval();
if (!Value) {
Errs = joinErrors(std::move(Errs), Value.takeError());
continue;
@@ -1861,8 +1845,8 @@ Error FileCheckPatternContext::defineCmdlineVariables(
std::pair<StringRef, StringRef> CmdlineNameVal = CmdlineDef.split('=');
StringRef CmdlineName = CmdlineNameVal.first;
StringRef OrigCmdlineName = CmdlineName;
- Expected<FileCheckPattern::VariableProperties> ParseVarResult =
- FileCheckPattern::parseVariable(CmdlineName, SM);
+ Expected<Pattern::VariableProperties> ParseVarResult =
+ Pattern::parseVariable(CmdlineName, SM);
if (!ParseVarResult) {
Errs = joinErrors(std::move(Errs), ParseVarResult.takeError());
continue;
@@ -1872,7 +1856,7 @@ Error FileCheckPatternContext::defineCmdlineVariables(
// "FOO+2" in a "FOO+2=10" definition.
if (ParseVarResult->IsPseudo || !CmdlineName.empty()) {
Errs = joinErrors(std::move(Errs),
- FileCheckErrorDiagnostic::get(
+ ErrorDiagnostic::get(
SM, OrigCmdlineName,
"invalid name in string variable definition '" +
OrigCmdlineName + "'"));
@@ -1884,8 +1868,8 @@ Error FileCheckPatternContext::defineCmdlineVariables(
// is created later than the latter.
if (GlobalNumericVariableTable.find(Name) !=
GlobalNumericVariableTable.end()) {
- Errs = joinErrors(std::move(Errs), FileCheckErrorDiagnostic::get(
- SM, Name,
+ Errs = joinErrors(std::move(Errs),
+ ErrorDiagnostic::get(SM, Name,
"numeric variable with name '" +
Name + "' already exists"));
continue;
diff --git a/contrib/llvm-project/llvm/lib/Support/FileCheckImpl.h b/contrib/llvm-project/llvm/lib/Support/FileCheckImpl.h
index 06ce8301cec4..dc07d22aefd8 100644
--- a/contrib/llvm-project/llvm/lib/Support/FileCheckImpl.h
+++ b/contrib/llvm-project/llvm/lib/Support/FileCheckImpl.h
@@ -31,9 +31,9 @@ namespace llvm {
//===----------------------------------------------------------------------===//
/// Base class representing the AST of a given expression.
-class FileCheckExpressionAST {
+class ExpressionAST {
public:
- virtual ~FileCheckExpressionAST() = default;
+ virtual ~ExpressionAST() = default;
/// Evaluates and \returns the value of the expression represented by this
/// AST or an error if evaluation fails.
@@ -41,29 +41,29 @@ public:
};
/// Class representing an unsigned literal in the AST of an expression.
-class FileCheckExpressionLiteral : public FileCheckExpressionAST {
+class ExpressionLiteral : public ExpressionAST {
private:
/// Actual value of the literal.
uint64_t Value;
public:
/// Constructs a literal with the specified value.
- FileCheckExpressionLiteral(uint64_t Val) : Value(Val) {}
+ ExpressionLiteral(uint64_t Val) : Value(Val) {}
/// \returns the literal's value.
- Expected<uint64_t> eval() const { return Value; }
+ Expected<uint64_t> eval() const override { return Value; }
};
/// Class to represent an undefined variable error, which quotes that
/// variable's name when printed.
-class FileCheckUndefVarError : public ErrorInfo<FileCheckUndefVarError> {
+class UndefVarError : public ErrorInfo<UndefVarError> {
private:
StringRef VarName;
public:
static char ID;
- FileCheckUndefVarError(StringRef VarName) : VarName(VarName) {}
+ UndefVarError(StringRef VarName) : VarName(VarName) {}
StringRef getVarName() const { return VarName; }
@@ -79,7 +79,7 @@ public:
};
/// Class representing a numeric variable and its associated current value.
-class FileCheckNumericVariable {
+class NumericVariable {
private:
/// Name of the numeric variable.
StringRef Name;
@@ -95,8 +95,8 @@ private:
public:
/// Constructor for a variable \p Name defined at line \p DefLineNumber or
/// defined before input is parsed if \p DefLineNumber is None.
- explicit FileCheckNumericVariable(StringRef Name,
- Optional<size_t> DefLineNumber = None)
+ explicit NumericVariable(StringRef Name,
+ Optional<size_t> DefLineNumber = None)
: Name(Name), DefLineNumber(DefLineNumber) {}
/// \returns name of this numeric variable.
@@ -114,47 +114,45 @@ public:
/// \returns the line number where this variable is defined, if any, or None
/// if defined before input is parsed.
- Optional<size_t> getDefLineNumber() { return DefLineNumber; }
+ Optional<size_t> getDefLineNumber() const { return DefLineNumber; }
};
/// Class representing the use of a numeric variable in the AST of an
/// expression.
-class FileCheckNumericVariableUse : public FileCheckExpressionAST {
+class NumericVariableUse : public ExpressionAST {
private:
/// Name of the numeric variable.
StringRef Name;
/// Pointer to the class instance for the variable this use is about.
- FileCheckNumericVariable *NumericVariable;
+ NumericVariable *Variable;
public:
- FileCheckNumericVariableUse(StringRef Name,
- FileCheckNumericVariable *NumericVariable)
- : Name(Name), NumericVariable(NumericVariable) {}
+ NumericVariableUse(StringRef Name, NumericVariable *Variable)
+ : Name(Name), Variable(Variable) {}
/// \returns the value of the variable referenced by this instance.
- Expected<uint64_t> eval() const;
+ Expected<uint64_t> eval() const override;
};
/// Type of functions evaluating a given binary operation.
using binop_eval_t = uint64_t (*)(uint64_t, uint64_t);
/// Class representing a single binary operation in the AST of an expression.
-class FileCheckASTBinop : public FileCheckExpressionAST {
+class BinaryOperation : public ExpressionAST {
private:
/// Left operand.
- std::unique_ptr<FileCheckExpressionAST> LeftOperand;
+ std::unique_ptr<ExpressionAST> LeftOperand;
/// Right operand.
- std::unique_ptr<FileCheckExpressionAST> RightOperand;
+ std::unique_ptr<ExpressionAST> RightOperand;
/// Pointer to function that can evaluate this binary operation.
binop_eval_t EvalBinop;
public:
- FileCheckASTBinop(binop_eval_t EvalBinop,
- std::unique_ptr<FileCheckExpressionAST> LeftOp,
- std::unique_ptr<FileCheckExpressionAST> RightOp)
+ BinaryOperation(binop_eval_t EvalBinop, std::unique_ptr<ExpressionAST> LeftOp,
+ std::unique_ptr<ExpressionAST> RightOp)
: EvalBinop(EvalBinop) {
LeftOperand = std::move(LeftOp);
RightOperand = std::move(RightOp);
@@ -164,13 +162,13 @@ public:
/// using EvalBinop on the result of recursively evaluating the operands.
/// \returns the expression value or an error if an undefined numeric
/// variable is used in one of the operands.
- Expected<uint64_t> eval() const;
+ Expected<uint64_t> eval() const override;
};
class FileCheckPatternContext;
/// Class representing a substitution to perform in the RegExStr string.
-class FileCheckSubstitution {
+class Substitution {
protected:
/// Pointer to a class instance holding, among other things, the table with
/// the values of live string variables at the start of any given CHECK line.
@@ -188,11 +186,11 @@ protected:
size_t InsertIdx;
public:
- FileCheckSubstitution(FileCheckPatternContext *Context, StringRef VarName,
- size_t InsertIdx)
+ Substitution(FileCheckPatternContext *Context, StringRef VarName,
+ size_t InsertIdx)
: Context(Context), FromStr(VarName), InsertIdx(InsertIdx) {}
- virtual ~FileCheckSubstitution() = default;
+ virtual ~Substitution() = default;
/// \returns the string to be substituted for something else.
StringRef getFromString() const { return FromStr; }
@@ -205,29 +203,28 @@ public:
virtual Expected<std::string> getResult() const = 0;
};
-class FileCheckStringSubstitution : public FileCheckSubstitution {
+class StringSubstitution : public Substitution {
public:
- FileCheckStringSubstitution(FileCheckPatternContext *Context,
- StringRef VarName, size_t InsertIdx)
- : FileCheckSubstitution(Context, VarName, InsertIdx) {}
+ StringSubstitution(FileCheckPatternContext *Context, StringRef VarName,
+ size_t InsertIdx)
+ : Substitution(Context, VarName, InsertIdx) {}
/// \returns the text that the string variable in this substitution matched
/// when defined, or an error if the variable is undefined.
Expected<std::string> getResult() const override;
};
-class FileCheckNumericSubstitution : public FileCheckSubstitution {
+class NumericSubstitution : public Substitution {
private:
/// Pointer to the class representing the expression whose value is to be
/// substituted.
- std::unique_ptr<FileCheckExpressionAST> ExpressionAST;
+ std::unique_ptr<ExpressionAST> ExpressionASTPointer;
public:
- FileCheckNumericSubstitution(FileCheckPatternContext *Context, StringRef Expr,
- std::unique_ptr<FileCheckExpressionAST> ExprAST,
- size_t InsertIdx)
- : FileCheckSubstitution(Context, Expr, InsertIdx) {
- ExpressionAST = std::move(ExprAST);
+ NumericSubstitution(FileCheckPatternContext *Context, StringRef Expr,
+ std::unique_ptr<ExpressionAST> ExprAST, size_t InsertIdx)
+ : Substitution(Context, Expr, InsertIdx) {
+ ExpressionASTPointer = std::move(ExprAST);
}
/// \returns a string containing the result of evaluating the expression in
@@ -241,11 +238,11 @@ public:
struct FileCheckDiag;
-/// Class holding the FileCheckPattern global state, shared by all patterns:
-/// tables holding values of variables and whether they are defined or not at
-/// any given time in the matching process.
+/// Class holding the Pattern global state, shared by all patterns: tables
+/// holding values of variables and whether they are defined or not at any
+/// given time in the matching process.
class FileCheckPatternContext {
- friend class FileCheckPattern;
+ friend class Pattern;
private:
/// When matching a given pattern, this holds the value of all the string
@@ -262,21 +259,20 @@ private:
/// When matching a given pattern, this holds the pointers to the classes
/// representing the numeric variables defined in previous patterns. When
/// matching a pattern all definitions for that pattern are recorded in the
- /// NumericVariableDefs table in the FileCheckPattern instance of that
- /// pattern.
- StringMap<FileCheckNumericVariable *> GlobalNumericVariableTable;
+ /// NumericVariableDefs table in the Pattern instance of that pattern.
+ StringMap<NumericVariable *> GlobalNumericVariableTable;
/// Pointer to the class instance representing the @LINE pseudo variable for
/// easily updating its value.
- FileCheckNumericVariable *LineVariable = nullptr;
+ NumericVariable *LineVariable = nullptr;
/// Vector holding pointers to all parsed numeric variables. Used to
/// automatically free them once they are guaranteed to no longer be used.
- std::vector<std::unique_ptr<FileCheckNumericVariable>> NumericVariables;
+ std::vector<std::unique_ptr<NumericVariable>> NumericVariables;
/// Vector holding pointers to all substitutions. Used to automatically free
/// them once they are guaranteed to no longer be used.
- std::vector<std::unique_ptr<FileCheckSubstitution>> Substitutions;
+ std::vector<std::unique_ptr<Substitution>> Substitutions;
public:
/// \returns the value of string variable \p VarName or an error if no such
@@ -303,32 +299,30 @@ public:
private:
/// Makes a new numeric variable and registers it for destruction when the
/// context is destroyed.
- template <class... Types>
- FileCheckNumericVariable *makeNumericVariable(Types... args);
+ template <class... Types> NumericVariable *makeNumericVariable(Types... args);
/// Makes a new string substitution and registers it for destruction when the
/// context is destroyed.
- FileCheckSubstitution *makeStringSubstitution(StringRef VarName,
- size_t InsertIdx);
+ Substitution *makeStringSubstitution(StringRef VarName, size_t InsertIdx);
/// Makes a new numeric substitution and registers it for destruction when
/// the context is destroyed.
- FileCheckSubstitution *
+ Substitution *
makeNumericSubstitution(StringRef ExpressionStr,
- std::unique_ptr<FileCheckExpressionAST> ExpressionAST,
+ std::unique_ptr<ExpressionAST> ExpressionAST,
size_t InsertIdx);
};
/// Class to represent an error holding a diagnostic with location information
/// used when printing it.
-class FileCheckErrorDiagnostic : public ErrorInfo<FileCheckErrorDiagnostic> {
+class ErrorDiagnostic : public ErrorInfo<ErrorDiagnostic> {
private:
SMDiagnostic Diagnostic;
public:
static char ID;
- FileCheckErrorDiagnostic(SMDiagnostic &&Diag) : Diagnostic(Diag) {}
+ ErrorDiagnostic(SMDiagnostic &&Diag) : Diagnostic(Diag) {}
std::error_code convertToErrorCode() const override {
return inconvertibleErrorCode();
@@ -338,7 +332,7 @@ public:
void log(raw_ostream &OS) const override { Diagnostic.print(nullptr, OS); }
static Error get(const SourceMgr &SM, SMLoc Loc, const Twine &ErrMsg) {
- return make_error<FileCheckErrorDiagnostic>(
+ return make_error<ErrorDiagnostic>(
SM.GetMessage(Loc, SourceMgr::DK_Error, ErrMsg));
}
@@ -347,7 +341,7 @@ public:
}
};
-class FileCheckNotFoundError : public ErrorInfo<FileCheckNotFoundError> {
+class NotFoundError : public ErrorInfo<NotFoundError> {
public:
static char ID;
@@ -361,7 +355,7 @@ public:
}
};
-class FileCheckPattern {
+class Pattern {
SMLoc PatternLoc;
/// A fixed string to match as the pattern or empty if this pattern requires
@@ -378,7 +372,7 @@ class FileCheckPattern {
/// RegExStr will contain "foobaz" and we'll get two entries in this vector
/// that tells us to insert the value of string variable "bar" at offset 3
/// and the value of expression "N+1" at offset 6.
- std::vector<FileCheckSubstitution *> Substitutions;
+ std::vector<Substitution *> Substitutions;
/// Maps names of string variables defined in a pattern to the number of
/// their parenthesis group in RegExStr capturing their last definition.
@@ -397,10 +391,10 @@ class FileCheckPattern {
/// It holds the pointer to the class representing the numeric variable whose
/// value is being defined and the number of the parenthesis group in
/// RegExStr to capture that value.
- struct FileCheckNumericVariableMatch {
+ struct NumericVariableMatch {
/// Pointer to class representing the numeric variable whose value is being
/// defined.
- FileCheckNumericVariable *DefinedNumericVariable;
+ NumericVariable *DefinedNumericVariable;
/// Number of the parenthesis group in RegExStr that captures the value of
/// this numeric variable definition.
@@ -408,10 +402,9 @@ class FileCheckPattern {
};
/// Holds the number of the parenthesis group in RegExStr and pointer to the
- /// corresponding FileCheckNumericVariable class instance of all numeric
- /// variable definitions. Used to set the matched value of all those
- /// variables.
- StringMap<FileCheckNumericVariableMatch> NumericVariableDefs;
+ /// corresponding NumericVariable class instance of all numeric variable
+ /// definitions. Used to set the matched value of all those variables.
+ StringMap<NumericVariableMatch> NumericVariableDefs;
/// Pointer to a class instance holding the global state shared by all
/// patterns:
@@ -432,8 +425,8 @@ class FileCheckPattern {
bool IgnoreCase = false;
public:
- FileCheckPattern(Check::FileCheckType Ty, FileCheckPatternContext *Context,
- Optional<size_t> Line = None)
+ Pattern(Check::FileCheckType Ty, FileCheckPatternContext *Context,
+ Optional<size_t> Line = None)
: Context(Context), CheckTy(Ty), LineNumber(Line) {}
/// \returns the location in source code.
@@ -469,14 +462,12 @@ public:
/// substitution was successful, sets \p DefinedNumericVariable to point to
/// the class representing the numeric variable defined in this numeric
/// substitution block, or None if this block does not define any variable.
- static Expected<std::unique_ptr<FileCheckExpressionAST>>
- parseNumericSubstitutionBlock(
- StringRef Expr,
- Optional<FileCheckNumericVariable *> &DefinedNumericVariable,
+ static Expected<std::unique_ptr<ExpressionAST>> parseNumericSubstitutionBlock(
+ StringRef Expr, Optional<NumericVariable *> &DefinedNumericVariable,
bool IsLegacyLineExpr, Optional<size_t> LineNumber,
FileCheckPatternContext *Context, const SourceMgr &SM);
- /// Parses the pattern in \p PatternStr and initializes this FileCheckPattern
- /// instance accordingly.
+ /// Parses the pattern in \p PatternStr and initializes this Pattern instance
+ /// accordingly.
///
/// \p Prefix provides which prefix is being matched, \p Req describes the
/// global options that influence the parsing such as whitespace
@@ -491,8 +482,8 @@ public:
/// matched string.
///
/// The GlobalVariableTable StringMap in the FileCheckPatternContext class
- /// instance provides the current values of FileCheck string variables and
- /// is updated if this match defines new values. Likewise, the
+ /// instance provides the current values of FileCheck string variables and is
+ /// updated if this match defines new values. Likewise, the
/// GlobalNumericVariableTable StringMap in the same class provides the
/// current values of FileCheck numeric variables and is updated if this
/// match defines new numeric values.
@@ -523,17 +514,17 @@ private:
/// Finds the closing sequence of a regex variable usage or definition.
///
/// \p Str has to point in the beginning of the definition (right after the
- /// opening sequence). \p SM holds the SourceMgr used for error repporting.
+ /// opening sequence). \p SM holds the SourceMgr used for error reporting.
/// \returns the offset of the closing sequence within Str, or npos if it
/// was not found.
- size_t FindRegexVarEnd(StringRef Str, SourceMgr &SM);
+ static size_t FindRegexVarEnd(StringRef Str, SourceMgr &SM);
/// Parses \p Expr for the name of a numeric variable to be defined at line
/// \p LineNumber, or before input is parsed if \p LineNumber is None.
/// \returns a pointer to the class instance representing that variable,
/// creating it if needed, or an error holding a diagnostic against \p SM
/// should defining such a variable be invalid.
- static Expected<FileCheckNumericVariable *> parseNumericVariableDefinition(
+ static Expected<NumericVariable *> parseNumericVariableDefinition(
StringRef &Expr, FileCheckPatternContext *Context,
Optional<size_t> LineNumber, const SourceMgr &SM);
/// Parses \p Name as a (pseudo if \p IsPseudo is true) numeric variable use
@@ -542,11 +533,9 @@ private:
/// string and numeric variables. \returns the pointer to the class instance
/// representing that variable if successful, or an error holding a
/// diagnostic against \p SM otherwise.
- static Expected<std::unique_ptr<FileCheckNumericVariableUse>>
- parseNumericVariableUse(StringRef Name, bool IsPseudo,
- Optional<size_t> LineNumber,
- FileCheckPatternContext *Context,
- const SourceMgr &SM);
+ static Expected<std::unique_ptr<NumericVariableUse>> parseNumericVariableUse(
+ StringRef Name, bool IsPseudo, Optional<size_t> LineNumber,
+ FileCheckPatternContext *Context, const SourceMgr &SM);
enum class AllowedOperand { LineVar, Literal, Any };
/// Parses \p Expr for use of a numeric operand at line \p LineNumber, or
/// before input is parsed if \p LineNumber is None. Accepts both literal
@@ -555,7 +544,7 @@ private:
/// numeric variables. \returns the class representing that operand in the
/// AST of the expression or an error holding a diagnostic against \p SM
/// otherwise.
- static Expected<std::unique_ptr<FileCheckExpressionAST>>
+ static Expected<std::unique_ptr<ExpressionAST>>
parseNumericOperand(StringRef &Expr, AllowedOperand AO,
Optional<size_t> LineNumber,
FileCheckPatternContext *Context, const SourceMgr &SM);
@@ -566,8 +555,8 @@ private:
/// the class instance holding the live string and numeric variables.
/// \returns the class representing the binary operation in the AST of the
/// expression, or an error holding a diagnostic against \p SM otherwise.
- static Expected<std::unique_ptr<FileCheckExpressionAST>>
- parseBinop(StringRef &Expr, std::unique_ptr<FileCheckExpressionAST> LeftOp,
+ static Expected<std::unique_ptr<ExpressionAST>>
+ parseBinop(StringRef &Expr, std::unique_ptr<ExpressionAST> LeftOp,
bool IsLegacyLineExpr, Optional<size_t> LineNumber,
FileCheckPatternContext *Context, const SourceMgr &SM);
};
@@ -579,7 +568,7 @@ private:
/// A check that we found in the input file.
struct FileCheckString {
/// The pattern to match.
- FileCheckPattern Pat;
+ Pattern Pat;
/// Which prefix name this check matched.
StringRef Prefix;
@@ -589,9 +578,9 @@ struct FileCheckString {
/// All of the strings that are disallowed from occurring between this match
/// string and the previous one (or start of file).
- std::vector<FileCheckPattern> DagNotStrings;
+ std::vector<Pattern> DagNotStrings;
- FileCheckString(const FileCheckPattern &P, StringRef S, SMLoc L)
+ FileCheckString(const Pattern &P, StringRef S, SMLoc L)
: Pat(P), Prefix(S), Loc(L) {}
/// Matches check string and its "not strings" and/or "dag strings".
@@ -609,12 +598,12 @@ struct FileCheckString {
/// \p Buffer. Errors are reported against \p SM and diagnostics recorded in
/// \p Diags according to the verbosity level set in \p Req.
bool CheckNot(const SourceMgr &SM, StringRef Buffer,
- const std::vector<const FileCheckPattern *> &NotStrings,
+ const std::vector<const Pattern *> &NotStrings,
const FileCheckRequest &Req,
std::vector<FileCheckDiag> *Diags) const;
/// Matches "dag strings" and their mixed "not strings".
size_t CheckDag(const SourceMgr &SM, StringRef Buffer,
- std::vector<const FileCheckPattern *> &NotStrings,
+ std::vector<const Pattern *> &NotStrings,
const FileCheckRequest &Req,
std::vector<FileCheckDiag> *Diags) const;
};
diff --git a/contrib/llvm-project/llvm/lib/Support/FileOutputBuffer.cpp b/contrib/llvm-project/llvm/lib/Support/FileOutputBuffer.cpp
index 024dd3e57a40..0a5306f684d4 100644
--- a/contrib/llvm-project/llvm/lib/Support/FileOutputBuffer.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/FileOutputBuffer.cpp
@@ -189,7 +189,10 @@ FileOutputBuffer::create(StringRef Path, size_t Size, unsigned Flags) {
case fs::file_type::regular_file:
case fs::file_type::file_not_found:
case fs::file_type::status_error:
- return createOnDiskBuffer(Path, Size, Mode);
+ if (Flags & F_no_mmap)
+ return createInMemoryBuffer(Path, Size, Mode);
+ else
+ return createOnDiskBuffer(Path, Size, Mode);
default:
return createInMemoryBuffer(Path, Size, Mode);
}
diff --git a/contrib/llvm-project/llvm/lib/Support/Host.cpp b/contrib/llvm-project/llvm/lib/Support/Host.cpp
index 2a473a1994c2..ef38c1c09413 100644
--- a/contrib/llvm-project/llvm/lib/Support/Host.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/Host.cpp
@@ -35,7 +35,7 @@
#ifdef _MSC_VER
#include <intrin.h>
#endif
-#if defined(__APPLE__) && (defined(__ppc__) || defined(__powerpc__))
+#if defined(__APPLE__) && (!defined(__x86_64__))
#include <mach/host_info.h>
#include <mach/mach.h>
#include <mach/mach_host.h>
@@ -140,6 +140,9 @@ StringRef sys::detail::getHostCPUNameForPowerPC(StringRef ProcCpuinfoContent) {
.Case("POWER8E", "pwr8")
.Case("POWER8NVL", "pwr8")
.Case("POWER9", "pwr9")
+ // FIXME: If we get a simulator or machine with the capabilities of
+ // mcpu=future, we should revisit this and add the name reported by the
+ // simulator/machine.
.Default(generic);
}
@@ -265,14 +268,12 @@ StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) {
unsigned Exynos = (Variant << 12) | Part;
switch (Exynos) {
default:
- // Default by falling through to Exynos M1.
+ // Default by falling through to Exynos M3.
LLVM_FALLTHROUGH;
-
- case 0x1001:
- return "exynos-m1";
-
- case 0x4001:
- return "exynos-m2";
+ case 0x1002:
+ return "exynos-m3";
+ case 0x1003:
+ return "exynos-m4";
}
}
@@ -961,9 +962,9 @@ static void getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model,
break; // "btver2"
case 23:
*Type = X86::AMDFAM17H;
- if (Model >= 0x30 && Model <= 0x3f) {
+ if ((Model >= 0x30 && Model <= 0x3f) || Model == 0x71) {
*Subtype = X86::AMDFAM17H_ZNVER2;
- break; // "znver2"; 30h-3fh: Zen2
+ break; // "znver2"; 30h-3fh, 71h: Zen2
}
if (Model <= 0x0f) {
*Subtype = X86::AMDFAM17H_ZNVER1;
@@ -1029,7 +1030,15 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
const unsigned AVXBits = (1 << 27) | (1 << 28);
bool HasAVX = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) &&
((EAX & 0x6) == 0x6);
+#if defined(__APPLE__)
+ // Darwin lazily saves the AVX512 context on first use: trust that the OS will
+ // save the AVX512 context if we use AVX512 instructions, even the bit is not
+ // set right now.
+ bool HasAVX512Save = true;
+#else
+ // AVX512 requires additional context to be saved by the OS.
bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0);
+#endif
if (HasAVX)
setFeature(X86::FEATURE_AVX);
@@ -1222,6 +1231,33 @@ StringRef sys::getHostCPUName() {
StringRef Content = P ? P->getBuffer() : "";
return detail::getHostCPUNameForS390x(Content);
}
+#elif defined(__APPLE__) && defined(__aarch64__)
+StringRef sys::getHostCPUName() {
+ return "cyclone";
+}
+#elif defined(__APPLE__) && defined(__arm__)
+StringRef sys::getHostCPUName() {
+ host_basic_info_data_t hostInfo;
+ mach_msg_type_number_t infoCount;
+
+ infoCount = HOST_BASIC_INFO_COUNT;
+ mach_port_t hostPort = mach_host_self();
+ host_info(hostPort, HOST_BASIC_INFO, (host_info_t)&hostInfo,
+ &infoCount);
+ mach_port_deallocate(mach_task_self(), hostPort);
+
+ if (hostInfo.cpu_type != CPU_TYPE_ARM) {
+ assert(false && "CPUType not equal to ARM should not be possible on ARM");
+ return "generic";
+ }
+ switch (hostInfo.cpu_subtype) {
+ case CPU_SUBTYPE_ARM_V7S:
+ return "swift";
+ default:;
+ }
+
+ return "generic";
+}
#else
StringRef sys::getHostCPUName() { return "generic"; }
#endif
@@ -1339,8 +1375,15 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
// switch, then we have full AVX support.
bool HasAVXSave = ((ECX >> 27) & 1) && ((ECX >> 28) & 1) &&
!getX86XCR0(&EAX, &EDX) && ((EAX & 0x6) == 0x6);
+#if defined(__APPLE__)
+ // Darwin lazily saves the AVX512 context on first use: trust that the OS will
+ // save the AVX512 context if we use AVX512 instructions, even the bit is not
+ // set right now.
+ bool HasAVX512Save = true;
+#else
// AVX512 requires additional context to be saved by the OS.
bool HasAVX512Save = HasAVXSave && ((EAX & 0xe0) == 0xe0);
+#endif
Features["avx"] = HasAVXSave;
Features["fma"] = ((ECX >> 12) & 1) && HasAVXSave;
diff --git a/contrib/llvm-project/llvm/lib/Support/InitLLVM.cpp b/contrib/llvm-project/llvm/lib/Support/InitLLVM.cpp
index 0d7d7fcc8cb6..bb9b569d2de6 100644
--- a/contrib/llvm-project/llvm/lib/Support/InitLLVM.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/InitLLVM.cpp
@@ -21,7 +21,11 @@
using namespace llvm;
using namespace llvm::sys;
-InitLLVM::InitLLVM(int &Argc, const char **&Argv) : StackPrinter(Argc, Argv) {
+InitLLVM::InitLLVM(int &Argc, const char **&Argv,
+ bool InstallPipeSignalExitHandler)
+ : StackPrinter(Argc, Argv) {
+ if (InstallPipeSignalExitHandler)
+ sys::SetOneShotPipeSignalFunction(sys::DefaultOneShotPipeSignalHandler);
sys::PrintStackTraceOnErrorSignal(Argv[0]);
install_out_of_memory_new_handler();
diff --git a/contrib/llvm-project/llvm/lib/Support/ItaniumManglingCanonicalizer.cpp b/contrib/llvm-project/llvm/lib/Support/ItaniumManglingCanonicalizer.cpp
index da6514f7170b..bbc06d186fba 100644
--- a/contrib/llvm-project/llvm/lib/Support/ItaniumManglingCanonicalizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/ItaniumManglingCanonicalizer.cpp
@@ -36,17 +36,6 @@ struct FoldingSetNodeIDBuilder {
operator()(T V) {
ID.AddInteger((unsigned long long)V);
}
- void operator()(itanium_demangle::NodeOrString NS) {
- if (NS.isNode()) {
- ID.AddInteger(0);
- (*this)(NS.asNode());
- } else if (NS.isString()) {
- ID.AddInteger(1);
- (*this)(NS.asString());
- } else {
- ID.AddInteger(2);
- }
- }
void operator()(itanium_demangle::NodeArray A) {
ID.AddInteger(A.size());
for (const Node *N : A)
@@ -307,16 +296,32 @@ ItaniumManglingCanonicalizer::addEquivalence(FragmentKind Kind, StringRef First,
return EquivalenceError::Success;
}
+static ItaniumManglingCanonicalizer::Key
+parseMaybeMangledName(CanonicalizingDemangler &Demangler, StringRef Mangling,
+ bool CreateNewNodes) {
+ Demangler.ASTAllocator.setCreateNewNodes(CreateNewNodes);
+ Demangler.reset(Mangling.begin(), Mangling.end());
+ // Attempt demangling only for names that look like C++ mangled names.
+ // Otherwise, treat them as extern "C" names. We permit the latter to
+ // be remapped by (eg)
+ // encoding 6memcpy 7memmove
+ // consistent with how they are encoded as local-names inside a C++ mangling.
+ Node *N;
+ if (Mangling.startswith("_Z") || Mangling.startswith("__Z") ||
+ Mangling.startswith("___Z") || Mangling.startswith("____Z"))
+ N = Demangler.parse();
+ else
+ N = Demangler.make<itanium_demangle::NameType>(
+ StringView(Mangling.data(), Mangling.size()));
+ return reinterpret_cast<ItaniumManglingCanonicalizer::Key>(N);
+}
+
ItaniumManglingCanonicalizer::Key
ItaniumManglingCanonicalizer::canonicalize(StringRef Mangling) {
- P->Demangler.ASTAllocator.setCreateNewNodes(true);
- P->Demangler.reset(Mangling.begin(), Mangling.end());
- return reinterpret_cast<Key>(P->Demangler.parse());
+ return parseMaybeMangledName(P->Demangler, Mangling, true);
}
ItaniumManglingCanonicalizer::Key
ItaniumManglingCanonicalizer::lookup(StringRef Mangling) {
- P->Demangler.ASTAllocator.setCreateNewNodes(false);
- P->Demangler.reset(Mangling.begin(), Mangling.end());
- return reinterpret_cast<Key>(P->Demangler.parse());
+ return parseMaybeMangledName(P->Demangler, Mangling, false);
}
diff --git a/contrib/llvm-project/llvm/lib/Support/KnownBits.cpp b/contrib/llvm-project/llvm/lib/Support/KnownBits.cpp
index a6c591fca312..8f3f4aa8caea 100644
--- a/contrib/llvm-project/llvm/lib/Support/KnownBits.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/KnownBits.cpp
@@ -21,8 +21,8 @@ static KnownBits computeForAddCarry(
assert(!(CarryZero && CarryOne) &&
"Carry can't be zero and one at the same time");
- APInt PossibleSumZero = ~LHS.Zero + ~RHS.Zero + !CarryZero;
- APInt PossibleSumOne = LHS.One + RHS.One + CarryOne;
+ APInt PossibleSumZero = LHS.getMaxValue() + RHS.getMaxValue() + !CarryZero;
+ APInt PossibleSumOne = LHS.getMinValue() + RHS.getMinValue() + CarryOne;
// Compute known bits of the carry.
APInt CarryKnownZero = ~(PossibleSumZero ^ LHS.Zero ^ RHS.Zero);
diff --git a/contrib/llvm-project/llvm/lib/Support/LockFileManager.cpp b/contrib/llvm-project/llvm/lib/Support/LockFileManager.cpp
index 10181192afbd..5c6508c3b007 100644
--- a/contrib/llvm-project/llvm/lib/Support/LockFileManager.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/LockFileManager.cpp
@@ -290,7 +290,8 @@ LockFileManager::~LockFileManager() {
sys::DontRemoveFileOnSignal(UniqueLockFileName);
}
-LockFileManager::WaitForUnlockResult LockFileManager::waitForUnlock() {
+LockFileManager::WaitForUnlockResult
+LockFileManager::waitForUnlock(const unsigned MaxSeconds) {
if (getState() != LFS_Shared)
return Res_Success;
@@ -301,9 +302,6 @@ LockFileManager::WaitForUnlockResult LockFileManager::waitForUnlock() {
Interval.tv_sec = 0;
Interval.tv_nsec = 1000000;
#endif
- // Don't wait more than 40s per iteration. Total timeout for the file
- // to appear is ~1.5 minutes.
- const unsigned MaxSeconds = 40;
do {
// Sleep for the designated interval, to allow the owning process time to
// finish up and remove the lock file.
diff --git a/contrib/llvm-project/llvm/lib/Support/Options.cpp b/contrib/llvm-project/llvm/lib/Support/Options.cpp
deleted file mode 100644
index 770b7381c20e..000000000000
--- a/contrib/llvm-project/llvm/lib/Support/Options.cpp
+++ /dev/null
@@ -1,32 +0,0 @@
-//===- llvm/Support/Options.cpp - Debug options support ---------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the helper objects for defining debug options using the
-// new API built on cl::opt, but not requiring the use of static globals.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Support/Options.h"
-#include "llvm/Support/ManagedStatic.h"
-
-using namespace llvm;
-
-OptionRegistry::~OptionRegistry() {
- for (auto IT = Options.begin(); IT != Options.end(); ++IT)
- delete IT->second;
-}
-
-void OptionRegistry::addOption(void *Key, cl::Option *O) {
- assert(Options.find(Key) == Options.end() &&
- "Argument with this key already registerd");
- Options.insert(std::make_pair(Key, O));
-}
-
-static ManagedStatic<OptionRegistry> OR;
-
-OptionRegistry &OptionRegistry::instance() { return *OR; }
diff --git a/contrib/llvm-project/llvm/lib/Support/Parallel.cpp b/contrib/llvm-project/llvm/lib/Support/Parallel.cpp
index 355c64b7d079..523665d14b02 100644
--- a/contrib/llvm-project/llvm/lib/Support/Parallel.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/Parallel.cpp
@@ -8,14 +8,17 @@
#include "llvm/Support/Parallel.h"
#include "llvm/Config/llvm-config.h"
+#include "llvm/Support/ManagedStatic.h"
#if LLVM_ENABLE_THREADS
#include "llvm/Support/Threading.h"
#include <atomic>
+#include <future>
#include <stack>
#include <thread>
+#include <vector>
namespace llvm {
namespace parallel {
@@ -36,30 +39,53 @@ public:
/// in filo order.
class ThreadPoolExecutor : public Executor {
public:
- explicit ThreadPoolExecutor(unsigned ThreadCount = hardware_concurrency())
- : Done(ThreadCount) {
+ explicit ThreadPoolExecutor(unsigned ThreadCount = hardware_concurrency()) {
// Spawn all but one of the threads in another thread as spawning threads
// can take a while.
- std::thread([&, ThreadCount] {
- for (size_t i = 1; i < ThreadCount; ++i) {
- std::thread([=] { work(); }).detach();
+ Threads.reserve(ThreadCount);
+ Threads.resize(1);
+ std::lock_guard<std::mutex> Lock(Mutex);
+ Threads[0] = std::thread([&, ThreadCount] {
+ for (unsigned i = 1; i < ThreadCount; ++i) {
+ Threads.emplace_back([=] { work(); });
+ if (Stop)
+ break;
}
+ ThreadsCreated.set_value();
work();
- }).detach();
+ });
}
- ~ThreadPoolExecutor() override {
- std::unique_lock<std::mutex> Lock(Mutex);
- Stop = true;
- Lock.unlock();
+ void stop() {
+ {
+ std::lock_guard<std::mutex> Lock(Mutex);
+ if (Stop)
+ return;
+ Stop = true;
+ }
Cond.notify_all();
- // Wait for ~Latch.
+ ThreadsCreated.get_future().wait();
}
+ ~ThreadPoolExecutor() override {
+ stop();
+ std::thread::id CurrentThreadId = std::this_thread::get_id();
+ for (std::thread &T : Threads)
+ if (T.get_id() == CurrentThreadId)
+ T.detach();
+ else
+ T.join();
+ }
+
+ struct Deleter {
+ static void call(void *Ptr) { ((ThreadPoolExecutor *)Ptr)->stop(); }
+ };
+
void add(std::function<void()> F) override {
- std::unique_lock<std::mutex> Lock(Mutex);
- WorkStack.push(F);
- Lock.unlock();
+ {
+ std::lock_guard<std::mutex> Lock(Mutex);
+ WorkStack.push(F);
+ }
Cond.notify_one();
}
@@ -75,19 +101,39 @@ private:
Lock.unlock();
Task();
}
- Done.dec();
}
std::atomic<bool> Stop{false};
std::stack<std::function<void()>> WorkStack;
std::mutex Mutex;
std::condition_variable Cond;
- parallel::detail::Latch Done;
+ std::promise<void> ThreadsCreated;
+ std::vector<std::thread> Threads;
};
Executor *Executor::getDefaultExecutor() {
- static ThreadPoolExecutor exec;
- return &exec;
+ // The ManagedStatic enables the ThreadPoolExecutor to be stopped via
+ // llvm_shutdown() which allows a "clean" fast exit, e.g. via _exit(). This
+ // stops the thread pool and waits for any worker thread creation to complete
+ // but does not wait for the threads to finish. The wait for worker thread
+ // creation to complete is important as it prevents intermittent crashes on
+ // Windows due to a race condition between thread creation and process exit.
+ //
+ // The ThreadPoolExecutor will only be destroyed when the static unique_ptr to
+ // it is destroyed, i.e. in a normal full exit. The ThreadPoolExecutor
+ // destructor ensures it has been stopped and waits for worker threads to
+ // finish. The wait is important as it prevents intermittent crashes on
+ // Windows when the process is doing a full exit.
+ //
+ // The Windows crashes appear to only occur with the MSVC static runtimes and
+ // are more frequent with the debug static runtime.
+ //
+ // This also prevents intermittent deadlocks on exit with the MinGW runtime.
+ static ManagedStatic<ThreadPoolExecutor, object_creator<ThreadPoolExecutor>,
+ ThreadPoolExecutor::Deleter>
+ ManagedExec;
+ static std::unique_ptr<ThreadPoolExecutor> Exec(&(*ManagedExec));
+ return Exec.get();
}
} // namespace
diff --git a/contrib/llvm-project/llvm/lib/Support/Path.cpp b/contrib/llvm-project/llvm/lib/Support/Path.cpp
index 14def83802da..3c9a08cb4077 100644
--- a/contrib/llvm-project/llvm/lib/Support/Path.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/Path.cpp
@@ -496,27 +496,50 @@ void replace_extension(SmallVectorImpl<char> &path, const Twine &extension,
path.append(ext.begin(), ext.end());
}
-void replace_path_prefix(SmallVectorImpl<char> &Path,
+bool replace_path_prefix(SmallVectorImpl<char> &Path,
const StringRef &OldPrefix, const StringRef &NewPrefix,
- Style style) {
+ Style style, bool strict) {
if (OldPrefix.empty() && NewPrefix.empty())
- return;
+ return false;
StringRef OrigPath(Path.begin(), Path.size());
- if (!OrigPath.startswith(OldPrefix))
- return;
+ StringRef OldPrefixDir;
+
+ if (!strict && OldPrefix.size() > OrigPath.size())
+ return false;
+
+ // Ensure OldPrefixDir does not have a trailing separator.
+ if (!OldPrefix.empty() && is_separator(OldPrefix.back()))
+ OldPrefixDir = parent_path(OldPrefix, style);
+ else
+ OldPrefixDir = OldPrefix;
+
+ if (!OrigPath.startswith(OldPrefixDir))
+ return false;
+
+ if (OrigPath.size() > OldPrefixDir.size())
+ if (!is_separator(OrigPath[OldPrefixDir.size()], style) && strict)
+ return false;
// If prefixes have the same size we can simply copy the new one over.
- if (OldPrefix.size() == NewPrefix.size()) {
+ if (OldPrefixDir.size() == NewPrefix.size() && !strict) {
llvm::copy(NewPrefix, Path.begin());
- return;
+ return true;
}
- StringRef RelPath = OrigPath.substr(OldPrefix.size());
+ StringRef RelPath = OrigPath.substr(OldPrefixDir.size());
SmallString<256> NewPath;
path::append(NewPath, style, NewPrefix);
- path::append(NewPath, style, RelPath);
+ if (!RelPath.empty()) {
+ if (!is_separator(RelPath[0], style) || !strict)
+ path::append(NewPath, style, RelPath);
+ else
+ path::append(NewPath, style, relative_path(RelPath, style));
+ }
+
Path.swap(NewPath);
+
+ return true;
}
void native(const Twine &path, SmallVectorImpl<char> &result, Style style) {
diff --git a/contrib/llvm-project/llvm/lib/Support/SHA1.cpp b/contrib/llvm-project/llvm/lib/Support/SHA1.cpp
index 47a5f07fbe7b..a98ca41a3354 100644
--- a/contrib/llvm-project/llvm/lib/Support/SHA1.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/SHA1.cpp
@@ -16,6 +16,7 @@
#include "llvm/Support/SHA1.h"
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/Support/Endian.h"
#include "llvm/Support/Host.h"
using namespace llvm;
@@ -26,45 +27,45 @@ using namespace llvm;
#define SHA_BIG_ENDIAN
#endif
-static uint32_t rol(uint32_t Number, int Bits) {
+static inline uint32_t rol(uint32_t Number, int Bits) {
return (Number << Bits) | (Number >> (32 - Bits));
}
-static uint32_t blk0(uint32_t *Buf, int I) { return Buf[I]; }
+static inline uint32_t blk0(uint32_t *Buf, int I) { return Buf[I]; }
-static uint32_t blk(uint32_t *Buf, int I) {
+static inline uint32_t blk(uint32_t *Buf, int I) {
Buf[I & 15] = rol(Buf[(I + 13) & 15] ^ Buf[(I + 8) & 15] ^ Buf[(I + 2) & 15] ^
Buf[I & 15],
1);
return Buf[I & 15];
}
-static void r0(uint32_t &A, uint32_t &B, uint32_t &C, uint32_t &D, uint32_t &E,
- int I, uint32_t *Buf) {
+static inline void r0(uint32_t &A, uint32_t &B, uint32_t &C, uint32_t &D,
+ uint32_t &E, int I, uint32_t *Buf) {
E += ((B & (C ^ D)) ^ D) + blk0(Buf, I) + 0x5A827999 + rol(A, 5);
B = rol(B, 30);
}
-static void r1(uint32_t &A, uint32_t &B, uint32_t &C, uint32_t &D, uint32_t &E,
- int I, uint32_t *Buf) {
+static inline void r1(uint32_t &A, uint32_t &B, uint32_t &C, uint32_t &D,
+ uint32_t &E, int I, uint32_t *Buf) {
E += ((B & (C ^ D)) ^ D) + blk(Buf, I) + 0x5A827999 + rol(A, 5);
B = rol(B, 30);
}
-static void r2(uint32_t &A, uint32_t &B, uint32_t &C, uint32_t &D, uint32_t &E,
- int I, uint32_t *Buf) {
+static inline void r2(uint32_t &A, uint32_t &B, uint32_t &C, uint32_t &D,
+ uint32_t &E, int I, uint32_t *Buf) {
E += (B ^ C ^ D) + blk(Buf, I) + 0x6ED9EBA1 + rol(A, 5);
B = rol(B, 30);
}
-static void r3(uint32_t &A, uint32_t &B, uint32_t &C, uint32_t &D, uint32_t &E,
- int I, uint32_t *Buf) {
+static inline void r3(uint32_t &A, uint32_t &B, uint32_t &C, uint32_t &D,
+ uint32_t &E, int I, uint32_t *Buf) {
E += (((B | C) & D) | (B & C)) + blk(Buf, I) + 0x8F1BBCDC + rol(A, 5);
B = rol(B, 30);
}
-static void r4(uint32_t &A, uint32_t &B, uint32_t &C, uint32_t &D, uint32_t &E,
- int I, uint32_t *Buf) {
+static inline void r4(uint32_t &A, uint32_t &B, uint32_t &C, uint32_t &D,
+ uint32_t &E, int I, uint32_t *Buf) {
E += (B ^ C ^ D) + blk(Buf, I) + 0xCA62C1D6 + rol(A, 5);
B = rol(B, 30);
}
@@ -210,8 +211,31 @@ void SHA1::writebyte(uint8_t Data) {
}
void SHA1::update(ArrayRef<uint8_t> Data) {
- for (auto &C : Data)
- writebyte(C);
+ InternalState.ByteCount += Data.size();
+
+ // Finish the current block.
+ if (InternalState.BufferOffset > 0) {
+ const size_t Remainder = std::min<size_t>(
+ Data.size(), BLOCK_LENGTH - InternalState.BufferOffset);
+ for (size_t I = 0; I < Remainder; ++I)
+ addUncounted(Data[I]);
+ Data = Data.drop_front(Remainder);
+ }
+
+ // Fast buffer filling for large inputs.
+ while (Data.size() >= BLOCK_LENGTH) {
+ assert(InternalState.BufferOffset == 0);
+ assert(BLOCK_LENGTH % 4 == 0);
+ constexpr size_t BLOCK_LENGTH_32 = BLOCK_LENGTH / 4;
+ for (size_t I = 0; I < BLOCK_LENGTH_32; ++I)
+ InternalState.Buffer.L[I] = support::endian::read32be(&Data[I * 4]);
+ hashBlock();
+ Data = Data.drop_front(BLOCK_LENGTH);
+ }
+
+ // Finish the remainder.
+ for (uint8_t C : Data)
+ addUncounted(C);
}
void SHA1::pad() {
diff --git a/contrib/llvm-project/llvm/lib/Support/Signals.cpp b/contrib/llvm-project/llvm/lib/Support/Signals.cpp
index 173a07f009d2..add6fde0eb5e 100644
--- a/contrib/llvm-project/llvm/lib/Support/Signals.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/Signals.cpp
@@ -15,19 +15,19 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Config/llvm-config.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorOr.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/FileUtilities.h"
#include "llvm/Support/Format.h"
-#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/FormatAdapters.h"
+#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Mutex.h"
#include "llvm/Support/Program.h"
#include "llvm/Support/StringSaver.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/Options.h"
#include <vector>
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm-project/llvm/lib/Support/SpecialCaseList.cpp b/contrib/llvm-project/llvm/lib/Support/SpecialCaseList.cpp
index 9bd1f18a4ee7..d1ff44cefb08 100644
--- a/contrib/llvm-project/llvm/lib/Support/SpecialCaseList.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/SpecialCaseList.cpp
@@ -18,6 +18,7 @@
#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Regex.h"
+#include "llvm/Support/VirtualFileSystem.h"
#include <string>
#include <system_error>
#include <utility>
@@ -71,9 +72,9 @@ unsigned SpecialCaseList::Matcher::match(StringRef Query) const {
std::unique_ptr<SpecialCaseList>
SpecialCaseList::create(const std::vector<std::string> &Paths,
- std::string &Error) {
+ llvm::vfs::FileSystem &FS, std::string &Error) {
std::unique_ptr<SpecialCaseList> SCL(new SpecialCaseList());
- if (SCL->createInternal(Paths, Error))
+ if (SCL->createInternal(Paths, FS, Error))
return SCL;
return nullptr;
}
@@ -87,19 +88,20 @@ std::unique_ptr<SpecialCaseList> SpecialCaseList::create(const MemoryBuffer *MB,
}
std::unique_ptr<SpecialCaseList>
-SpecialCaseList::createOrDie(const std::vector<std::string> &Paths) {
+SpecialCaseList::createOrDie(const std::vector<std::string> &Paths,
+ llvm::vfs::FileSystem &FS) {
std::string Error;
- if (auto SCL = create(Paths, Error))
+ if (auto SCL = create(Paths, FS, Error))
return SCL;
report_fatal_error(Error);
}
bool SpecialCaseList::createInternal(const std::vector<std::string> &Paths,
- std::string &Error) {
+ vfs::FileSystem &VFS, std::string &Error) {
StringMap<size_t> Sections;
for (const auto &Path : Paths) {
ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr =
- MemoryBuffer::getFile(Path);
+ VFS.getBufferForFile(Path);
if (std::error_code EC = FileOrErr.getError()) {
Error = (Twine("can't open file '") + Path + "': " + EC.message()).str();
return false;
diff --git a/contrib/llvm-project/llvm/lib/Support/Statistic.cpp b/contrib/llvm-project/llvm/lib/Support/Statistic.cpp
index 8b4177c7fba6..25f13871e2e4 100644
--- a/contrib/llvm-project/llvm/lib/Support/Statistic.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/Statistic.cpp
@@ -38,7 +38,7 @@ using namespace llvm;
/// -stats - Command line option to cause transformations to emit stats about
/// what they did.
///
-static cl::opt<bool> Stats(
+static cl::opt<bool> EnableStats(
"stats",
cl::desc("Enable statistics output from program (available with Asserts)"),
cl::Hidden);
@@ -104,7 +104,7 @@ void TrackingStatistic::RegisterStatistic() {
// Check Initialized again after acquiring the lock.
if (Initialized.load(std::memory_order_relaxed))
return;
- if (Stats || Enabled)
+ if (EnableStats || Enabled)
SI.addStatistic(this);
// Remember we have been registered.
@@ -119,17 +119,17 @@ StatisticInfo::StatisticInfo() {
// Print information when destroyed, iff command line option is specified.
StatisticInfo::~StatisticInfo() {
- if (::Stats || PrintOnExit)
+ if (EnableStats || PrintOnExit)
llvm::PrintStatistics();
}
-void llvm::EnableStatistics(bool PrintOnExit) {
+void llvm::EnableStatistics(bool DoPrintOnExit) {
Enabled = true;
- ::PrintOnExit = PrintOnExit;
+ PrintOnExit = DoPrintOnExit;
}
bool llvm::AreStatisticsEnabled() {
- return Enabled || Stats;
+ return Enabled || EnableStats;
}
void StatisticInfo::sort() {
@@ -242,7 +242,7 @@ void llvm::PrintStatistics() {
// Check if the -stats option is set instead of checking
// !Stats.Stats.empty(). In release builds, Statistics operators
// do nothing, so stats are never Registered.
- if (Stats) {
+ if (EnableStats) {
// Get the stream to write to.
std::unique_ptr<raw_ostream> OutStream = CreateInfoOutputFile();
(*OutStream) << "Statistics are disabled. "
diff --git a/contrib/llvm-project/llvm/lib/Support/StringRef.cpp b/contrib/llvm-project/llvm/lib/Support/StringRef.cpp
index 4bafc4ec7181..104482de4ad7 100644
--- a/contrib/llvm-project/llvm/lib/Support/StringRef.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/StringRef.cpp
@@ -12,6 +12,7 @@
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/edit_distance.h"
+#include "llvm/Support/Error.h"
#include <bitset>
using namespace llvm;
@@ -372,11 +373,16 @@ void StringRef::split(SmallVectorImpl<StringRef> &A, char Separator,
size_t StringRef::count(StringRef Str) const {
size_t Count = 0;
size_t N = Str.size();
- if (N > Length)
+ if (!N || N > Length)
return 0;
- for (size_t i = 0, e = Length - N + 1; i != e; ++i)
- if (substr(i, N).equals(Str))
+ for (size_t i = 0, e = Length - N + 1; i < e;) {
+ if (substr(i, N).equals(Str)) {
++Count;
+ i += N;
+ }
+ else
+ ++i;
+ }
return Count;
}
@@ -582,8 +588,11 @@ bool StringRef::getAsInteger(unsigned Radix, APInt &Result) const {
bool StringRef::getAsDouble(double &Result, bool AllowInexact) const {
APFloat F(0.0);
- APFloat::opStatus Status =
- F.convertFromString(*this, APFloat::rmNearestTiesToEven);
+ auto StatusOrErr = F.convertFromString(*this, APFloat::rmNearestTiesToEven);
+ if (errorToBool(StatusOrErr.takeError()))
+ return true;
+
+ APFloat::opStatus Status = *StatusOrErr;
if (Status != APFloat::opOK) {
if (!AllowInexact || !(Status & APFloat::opInexact))
return true;
diff --git a/contrib/llvm-project/llvm/lib/Support/TargetParser.cpp b/contrib/llvm-project/llvm/lib/Support/TargetParser.cpp
index d213b9a8c6af..84ead58b98cd 100644
--- a/contrib/llvm-project/llvm/lib/Support/TargetParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/TargetParser.cpp
@@ -132,7 +132,7 @@ StringRef llvm::AMDGPU::getArchNameR600(GPUKind AK) {
}
AMDGPU::GPUKind llvm::AMDGPU::parseArchAMDGCN(StringRef CPU) {
- for (const auto C : AMDGCNGPUs) {
+ for (const auto &C : AMDGCNGPUs) {
if (CPU == C.Name)
return C.Kind;
}
@@ -141,7 +141,7 @@ AMDGPU::GPUKind llvm::AMDGPU::parseArchAMDGCN(StringRef CPU) {
}
AMDGPU::GPUKind llvm::AMDGPU::parseArchR600(StringRef CPU) {
- for (const auto C : R600GPUs) {
+ for (const auto &C : R600GPUs) {
if (CPU == C.Name)
return C.Kind;
}
@@ -163,12 +163,12 @@ unsigned AMDGPU::getArchAttrR600(GPUKind AK) {
void AMDGPU::fillValidArchListAMDGCN(SmallVectorImpl<StringRef> &Values) {
// XXX: Should this only report unique canonical names?
- for (const auto C : AMDGCNGPUs)
+ for (const auto &C : AMDGCNGPUs)
Values.push_back(C.Name);
}
void AMDGPU::fillValidArchListR600(SmallVectorImpl<StringRef> &Values) {
- for (const auto C : R600GPUs)
+ for (const auto &C : R600GPUs)
Values.push_back(C.Name);
}
diff --git a/contrib/llvm-project/llvm/lib/Support/Threading.cpp b/contrib/llvm-project/llvm/lib/Support/Threading.cpp
index e5899a60f4db..48750cef5ec2 100644
--- a/contrib/llvm-project/llvm/lib/Support/Threading.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/Threading.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Support/Threading.h"
+#include "llvm/ADT/Optional.h"
#include "llvm/Config/config.h"
#include "llvm/Support/Host.h"
@@ -39,8 +40,8 @@ bool llvm::llvm_is_multithreaded() {
(!defined(_WIN32) && !defined(HAVE_PTHREAD_H))
// Support for non-Win32, non-pthread implementation.
void llvm::llvm_execute_on_thread(void (*Fn)(void *), void *UserData,
- unsigned RequestedStackSize) {
- (void)RequestedStackSize;
+ llvm::Optional<unsigned> StackSizeInBytes) {
+ (void)StackSizeInBytes;
Fn(UserData);
}
@@ -56,6 +57,25 @@ void llvm::set_thread_name(const Twine &Name) {}
void llvm::get_thread_name(SmallVectorImpl<char> &Name) { Name.clear(); }
+#if LLVM_ENABLE_THREADS == 0
+void llvm::llvm_execute_on_thread_async(
+ llvm::unique_function<void()> Func,
+ llvm::Optional<unsigned> StackSizeInBytes) {
+ (void)Func;
+ (void)StackSizeInBytes;
+ report_fatal_error("Spawning a detached thread doesn't make sense with no "
+ "threading support");
+}
+#else
+// Support for non-Win32, non-pthread implementation.
+void llvm::llvm_execute_on_thread_async(
+ llvm::unique_function<void()> Func,
+ llvm::Optional<unsigned> StackSizeInBytes) {
+ (void)StackSizeInBytes;
+ std::thread(std::move(Func)).detach();
+}
+#endif
+
#else
#include <thread>
@@ -84,6 +104,17 @@ unsigned llvm::hardware_concurrency() {
return 1;
}
+namespace {
+struct SyncThreadInfo {
+ void (*UserFn)(void *);
+ void *UserData;
+};
+
+using AsyncThreadInfo = llvm::unique_function<void()>;
+
+enum class JoiningPolicy { Join, Detach };
+} // namespace
+
// Include the platform-specific parts of this class.
#ifdef LLVM_ON_UNIX
#include "Unix/Threading.inc"
@@ -92,4 +123,20 @@ unsigned llvm::hardware_concurrency() {
#include "Windows/Threading.inc"
#endif
+void llvm::llvm_execute_on_thread(void (*Fn)(void *), void *UserData,
+ llvm::Optional<unsigned> StackSizeInBytes) {
+
+ SyncThreadInfo Info = {Fn, UserData};
+ llvm_execute_on_thread_impl(threadFuncSync, &Info, StackSizeInBytes,
+ JoiningPolicy::Join);
+}
+
+void llvm::llvm_execute_on_thread_async(
+ llvm::unique_function<void()> Func,
+ llvm::Optional<unsigned> StackSizeInBytes) {
+ llvm_execute_on_thread_impl(&threadFuncAsync,
+ new AsyncThreadInfo(std::move(Func)),
+ StackSizeInBytes, JoiningPolicy::Detach);
+}
+
#endif
diff --git a/contrib/llvm-project/llvm/lib/Support/TimeProfiler.cpp b/contrib/llvm-project/llvm/lib/Support/TimeProfiler.cpp
index ca9119e30b65..a7c85509064e 100644
--- a/contrib/llvm-project/llvm/lib/Support/TimeProfiler.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/TimeProfiler.cpp
@@ -13,8 +13,8 @@
#include "llvm/Support/TimeProfiler.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/FileSystem.h"
#include "llvm/Support/JSON.h"
+#include "llvm/Support/Path.h"
#include <cassert>
#include <chrono>
#include <string>
@@ -33,14 +33,14 @@ typedef std::pair<std::string, CountAndDurationType>
NameAndCountAndDurationType;
struct Entry {
- TimePointType Start;
+ const TimePointType Start;
TimePointType End;
- std::string Name;
- std::string Detail;
+ const std::string Name;
+ const std::string Detail;
Entry(TimePointType &&S, TimePointType &&E, std::string &&N, std::string &&Dt)
: Start(std::move(S)), End(std::move(E)), Name(std::move(N)),
- Detail(std::move(Dt)){};
+ Detail(std::move(Dt)) {}
// Calculate timings for FlameGraph. Cast time points to microsecond precision
// rather than casting duration. This avoid truncation issues causing inner
@@ -59,9 +59,9 @@ struct Entry {
};
struct TimeTraceProfiler {
- TimeTraceProfiler() {
- StartTime = steady_clock::now();
- }
+ TimeTraceProfiler(unsigned TimeTraceGranularity = 0, StringRef ProcName = "")
+ : StartTime(steady_clock::now()), ProcName(ProcName),
+ TimeTraceGranularity(TimeTraceGranularity) {}
void begin(std::string Name, llvm::function_ref<std::string()> Detail) {
Stack.emplace_back(steady_clock::now(), TimePointType(), std::move(Name),
@@ -123,7 +123,9 @@ struct TimeTraceProfiler {
J.attribute("ts", StartUs);
J.attribute("dur", DurUs);
J.attribute("name", E.Name);
- J.attributeObject("args", [&] { J.attribute("detail", E.Detail); });
+ if (!E.Detail.empty()) {
+ J.attributeObject("args", [&] { J.attribute("detail", E.Detail); });
+ }
});
}
@@ -168,7 +170,7 @@ struct TimeTraceProfiler {
J.attribute("ts", 0);
J.attribute("ph", "M");
J.attribute("name", "process_name");
- J.attributeObject("args", [&] { J.attribute("name", "clang"); });
+ J.attributeObject("args", [&] { J.attribute("name", ProcName); });
});
J.arrayEnd();
@@ -179,17 +181,19 @@ struct TimeTraceProfiler {
SmallVector<Entry, 16> Stack;
SmallVector<Entry, 128> Entries;
StringMap<CountAndDurationType> CountAndTotalPerName;
- TimePointType StartTime;
+ const TimePointType StartTime;
+ const std::string ProcName;
// Minimum time granularity (in microseconds)
- unsigned TimeTraceGranularity;
+ const unsigned TimeTraceGranularity;
};
-void timeTraceProfilerInitialize(unsigned TimeTraceGranularity) {
+void timeTraceProfilerInitialize(unsigned TimeTraceGranularity,
+ StringRef ProcName) {
assert(TimeTraceProfilerInstance == nullptr &&
"Profiler should not be initialized");
- TimeTraceProfilerInstance = new TimeTraceProfiler();
- TimeTraceProfilerInstance->TimeTraceGranularity = TimeTraceGranularity;
+ TimeTraceProfilerInstance = new TimeTraceProfiler(
+ TimeTraceGranularity, llvm::sys::path::filename(ProcName));
}
void timeTraceProfilerCleanup() {
diff --git a/contrib/llvm-project/llvm/lib/Support/Timer.cpp b/contrib/llvm-project/llvm/lib/Support/Timer.cpp
index 10c9b8e0b329..613d2eaae6d3 100644
--- a/contrib/llvm-project/llvm/lib/Support/Timer.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/Timer.cpp
@@ -91,14 +91,15 @@ static TimerGroup *getDefaultTimerGroup() { return &*DefaultTimerGroup; }
// Timer Implementation
//===----------------------------------------------------------------------===//
-void Timer::init(StringRef Name, StringRef Description) {
- init(Name, Description, *getDefaultTimerGroup());
+void Timer::init(StringRef TimerName, StringRef TimerDescription) {
+ init(TimerName, TimerDescription, *getDefaultTimerGroup());
}
-void Timer::init(StringRef Name, StringRef Description, TimerGroup &tg) {
+void Timer::init(StringRef TimerName, StringRef TimerDescription,
+ TimerGroup &tg) {
assert(!TG && "Timer already initialized");
- this->Name.assign(Name.begin(), Name.end());
- this->Description.assign(Description.begin(), Description.end());
+ Name.assign(TimerName.begin(), TimerName.end());
+ Description.assign(TimerDescription.begin(), TimerDescription.end());
Running = Triggered = false;
TG = &tg;
TG->addTimer(*this);
diff --git a/contrib/llvm-project/llvm/lib/Support/Triple.cpp b/contrib/llvm-project/llvm/lib/Support/Triple.cpp
index 18b013e1df3f..2c480c1094a5 100644
--- a/contrib/llvm-project/llvm/lib/Support/Triple.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/Triple.cpp
@@ -21,55 +21,56 @@ StringRef Triple::getArchTypeName(ArchType Kind) {
case UnknownArch: return "unknown";
case aarch64: return "aarch64";
- case aarch64_be: return "aarch64_be";
case aarch64_32: return "aarch64_32";
+ case aarch64_be: return "aarch64_be";
+ case amdgcn: return "amdgcn";
+ case amdil64: return "amdil64";
+ case amdil: return "amdil";
+ case arc: return "arc";
case arm: return "arm";
case armeb: return "armeb";
- case arc: return "arc";
case avr: return "avr";
- case bpfel: return "bpfel";
case bpfeb: return "bpfeb";
+ case bpfel: return "bpfel";
case hexagon: return "hexagon";
- case mips: return "mips";
- case mipsel: return "mipsel";
+ case hsail64: return "hsail64";
+ case hsail: return "hsail";
+ case kalimba: return "kalimba";
+ case lanai: return "lanai";
+ case le32: return "le32";
+ case le64: return "le64";
case mips64: return "mips64";
case mips64el: return "mips64el";
+ case mips: return "mips";
+ case mipsel: return "mipsel";
case msp430: return "msp430";
+ case nvptx64: return "nvptx64";
+ case nvptx: return "nvptx";
case ppc64: return "powerpc64";
case ppc64le: return "powerpc64le";
case ppc: return "powerpc";
case r600: return "r600";
- case amdgcn: return "amdgcn";
+ case renderscript32: return "renderscript32";
+ case renderscript64: return "renderscript64";
case riscv32: return "riscv32";
case riscv64: return "riscv64";
+ case shave: return "shave";
case sparc: return "sparc";
- case sparcv9: return "sparcv9";
case sparcel: return "sparcel";
+ case sparcv9: return "sparcv9";
+ case spir64: return "spir64";
+ case spir: return "spir";
case systemz: return "s390x";
case tce: return "tce";
case tcele: return "tcele";
case thumb: return "thumb";
case thumbeb: return "thumbeb";
+ case ve: return "ve";
+ case wasm32: return "wasm32";
+ case wasm64: return "wasm64";
case x86: return "i386";
case x86_64: return "x86_64";
case xcore: return "xcore";
- case nvptx: return "nvptx";
- case nvptx64: return "nvptx64";
- case le32: return "le32";
- case le64: return "le64";
- case amdil: return "amdil";
- case amdil64: return "amdil64";
- case hsail: return "hsail";
- case hsail64: return "hsail64";
- case spir: return "spir";
- case spir64: return "spir64";
- case kalimba: return "kalimba";
- case lanai: return "lanai";
- case shave: return "shave";
- case wasm32: return "wasm32";
- case wasm64: return "wasm64";
- case renderscript32: return "renderscript32";
- case renderscript64: return "renderscript64";
}
llvm_unreachable("Invalid ArchType!");
@@ -144,6 +145,8 @@ StringRef Triple::getArchTypePrefix(ArchType Kind) {
case riscv32:
case riscv64: return "riscv";
+
+ case ve: return "ve";
}
}
@@ -151,22 +154,22 @@ StringRef Triple::getVendorTypeName(VendorType Kind) {
switch (Kind) {
case UnknownVendor: return "unknown";
+ case AMD: return "amd";
case Apple: return "apple";
- case PC: return "pc";
- case SCEI: return "scei";
case BGP: return "bgp";
case BGQ: return "bgq";
+ case CSR: return "csr";
case Freescale: return "fsl";
case IBM: return "ibm";
case ImaginationTechnologies: return "img";
+ case Mesa: return "mesa";
case MipsTechnologies: return "mti";
- case NVIDIA: return "nvidia";
- case CSR: return "csr";
case Myriad: return "myriad";
- case AMD: return "amd";
- case Mesa: return "mesa";
- case SUSE: return "suse";
+ case NVIDIA: return "nvidia";
case OpenEmbedded: return "oe";
+ case PC: return "pc";
+ case SCEI: return "scei";
+ case SUSE: return "suse";
}
llvm_unreachable("Invalid VendorType!");
@@ -176,41 +179,41 @@ StringRef Triple::getOSTypeName(OSType Kind) {
switch (Kind) {
case UnknownOS: return "unknown";
+ case AIX: return "aix";
+ case AMDHSA: return "amdhsa";
+ case AMDPAL: return "amdpal";
case Ananas: return "ananas";
+ case CNK: return "cnk";
+ case CUDA: return "cuda";
case CloudABI: return "cloudabi";
+ case Contiki: return "contiki";
case Darwin: return "darwin";
case DragonFly: return "dragonfly";
+ case ELFIAMCU: return "elfiamcu";
+ case Emscripten: return "emscripten";
case FreeBSD: return "freebsd";
case Fuchsia: return "fuchsia";
+ case Haiku: return "haiku";
+ case HermitCore: return "hermit";
+ case Hurd: return "hurd";
case IOS: return "ios";
case KFreeBSD: return "kfreebsd";
case Linux: return "linux";
case Lv2: return "lv2";
case MacOSX: return "macosx";
- case NetBSD: return "netbsd";
- case OpenBSD: return "openbsd";
- case Solaris: return "solaris";
- case Win32: return "windows";
- case Haiku: return "haiku";
+ case Mesa3D: return "mesa3d";
case Minix: return "minix";
- case RTEMS: return "rtems";
- case NaCl: return "nacl";
- case CNK: return "cnk";
- case AIX: return "aix";
- case CUDA: return "cuda";
case NVCL: return "nvcl";
- case AMDHSA: return "amdhsa";
+ case NaCl: return "nacl";
+ case NetBSD: return "netbsd";
+ case OpenBSD: return "openbsd";
case PS4: return "ps4";
- case ELFIAMCU: return "elfiamcu";
+ case RTEMS: return "rtems";
+ case Solaris: return "solaris";
case TvOS: return "tvos";
- case WatchOS: return "watchos";
- case Mesa3D: return "mesa3d";
- case Contiki: return "contiki";
- case AMDPAL: return "amdpal";
- case HermitCore: return "hermit";
- case Hurd: return "hurd";
case WASI: return "wasi";
- case Emscripten: return "emscripten";
+ case WatchOS: return "watchos";
+ case Win32: return "windows";
}
llvm_unreachable("Invalid OSType");
@@ -219,27 +222,25 @@ StringRef Triple::getOSTypeName(OSType Kind) {
StringRef Triple::getEnvironmentTypeName(EnvironmentType Kind) {
switch (Kind) {
case UnknownEnvironment: return "unknown";
+ case Android: return "android";
+ case CODE16: return "code16";
+ case CoreCLR: return "coreclr";
+ case Cygnus: return "cygnus";
+ case EABI: return "eabi";
+ case EABIHF: return "eabihf";
case GNU: return "gnu";
- case GNUABIN32: return "gnuabin32";
case GNUABI64: return "gnuabi64";
- case GNUEABIHF: return "gnueabihf";
+ case GNUABIN32: return "gnuabin32";
case GNUEABI: return "gnueabi";
+ case GNUEABIHF: return "gnueabihf";
case GNUX32: return "gnux32";
- case CODE16: return "code16";
- case EABI: return "eabi";
- case EABIHF: return "eabihf";
- case ELFv1: return "elfv1";
- case ELFv2: return "elfv2";
- case Android: return "android";
+ case Itanium: return "itanium";
+ case MSVC: return "msvc";
+ case MacABI: return "macabi";
case Musl: return "musl";
case MuslEABI: return "musleabi";
case MuslEABIHF: return "musleabihf";
- case MSVC: return "msvc";
- case Itanium: return "itanium";
- case Cygnus: return "cygnus";
- case CoreCLR: return "coreclr";
case Simulator: return "simulator";
- case MacABI: return "macabi";
}
llvm_unreachable("Invalid EnvironmentType!");
@@ -315,6 +316,7 @@ Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) {
.Case("wasm64", wasm64)
.Case("renderscript32", renderscript32)
.Case("renderscript64", renderscript64)
+ .Case("ve", ve)
.Default(UnknownArch);
}
@@ -438,11 +440,12 @@ static Triple::ArchType parseArch(StringRef ArchName) {
.Case("spir64", Triple::spir64)
.StartsWith("kalimba", Triple::kalimba)
.Case("lanai", Triple::lanai)
+ .Case("renderscript32", Triple::renderscript32)
+ .Case("renderscript64", Triple::renderscript64)
.Case("shave", Triple::shave)
+ .Case("ve", Triple::ve)
.Case("wasm32", Triple::wasm32)
.Case("wasm64", Triple::wasm64)
- .Case("renderscript32", Triple::renderscript32)
- .Case("renderscript64", Triple::renderscript64)
.Default(Triple::UnknownArch);
// Some architectures require special parsing logic just to compute the
@@ -524,8 +527,6 @@ static Triple::EnvironmentType parseEnvironment(StringRef EnvironmentName) {
return StringSwitch<Triple::EnvironmentType>(EnvironmentName)
.StartsWith("eabihf", Triple::EABIHF)
.StartsWith("eabi", Triple::EABI)
- .StartsWith("elfv1", Triple::ELFv1)
- .StartsWith("elfv2", Triple::ELFv2)
.StartsWith("gnuabin32", Triple::GNUABIN32)
.StartsWith("gnuabi64", Triple::GNUABI64)
.StartsWith("gnueabihf", Triple::GNUEABIHF)
@@ -640,10 +641,10 @@ static Triple::SubArchType parseSubArch(StringRef SubArchName) {
static StringRef getObjectFormatTypeName(Triple::ObjectFormatType Kind) {
switch (Kind) {
case Triple::UnknownObjectFormat: return "";
- case Triple::COFF: return "coff";
- case Triple::ELF: return "elf";
+ case Triple::COFF: return "coff";
+ case Triple::ELF: return "elf";
case Triple::MachO: return "macho";
- case Triple::Wasm: return "wasm";
+ case Triple::Wasm: return "wasm";
case Triple::XCOFF: return "xcoff";
}
llvm_unreachable("unknown object format type");
@@ -665,28 +666,28 @@ static Triple::ObjectFormatType getDefaultFormat(const Triple &T) {
return Triple::ELF;
case Triple::aarch64_be:
- case Triple::arc:
case Triple::amdgcn:
- case Triple::amdil:
case Triple::amdil64:
+ case Triple::amdil:
+ case Triple::arc:
case Triple::armeb:
case Triple::avr:
case Triple::bpfeb:
case Triple::bpfel:
case Triple::hexagon:
- case Triple::lanai:
- case Triple::hsail:
case Triple::hsail64:
+ case Triple::hsail:
case Triple::kalimba:
+ case Triple::lanai:
case Triple::le32:
case Triple::le64:
- case Triple::mips:
case Triple::mips64:
case Triple::mips64el:
+ case Triple::mips:
case Triple::mipsel:
case Triple::msp430:
- case Triple::nvptx:
case Triple::nvptx64:
+ case Triple::nvptx:
case Triple::ppc64le:
case Triple::r600:
case Triple::renderscript32:
@@ -697,17 +698,18 @@ static Triple::ObjectFormatType getDefaultFormat(const Triple &T) {
case Triple::sparc:
case Triple::sparcel:
case Triple::sparcv9:
- case Triple::spir:
case Triple::spir64:
+ case Triple::spir:
case Triple::systemz:
case Triple::tce:
case Triple::tcele:
case Triple::thumbeb:
+ case Triple::ve:
case Triple::xcore:
return Triple::ELF;
- case Triple::ppc:
case Triple::ppc64:
+ case Triple::ppc:
if (T.isOSDarwin())
return Triple::MachO;
else if (T.isOSAIX())
@@ -1238,55 +1240,56 @@ static unsigned getArchPointerBitWidth(llvm::Triple::ArchType Arch) {
return 16;
case llvm::Triple::aarch64_32:
+ case llvm::Triple::amdil:
case llvm::Triple::arc:
case llvm::Triple::arm:
case llvm::Triple::armeb:
case llvm::Triple::hexagon:
+ case llvm::Triple::hsail:
+ case llvm::Triple::kalimba:
+ case llvm::Triple::lanai:
case llvm::Triple::le32:
case llvm::Triple::mips:
case llvm::Triple::mipsel:
case llvm::Triple::nvptx:
case llvm::Triple::ppc:
case llvm::Triple::r600:
+ case llvm::Triple::renderscript32:
case llvm::Triple::riscv32:
+ case llvm::Triple::shave:
case llvm::Triple::sparc:
case llvm::Triple::sparcel:
+ case llvm::Triple::spir:
case llvm::Triple::tce:
case llvm::Triple::tcele:
case llvm::Triple::thumb:
case llvm::Triple::thumbeb:
+ case llvm::Triple::wasm32:
case llvm::Triple::x86:
case llvm::Triple::xcore:
- case llvm::Triple::amdil:
- case llvm::Triple::hsail:
- case llvm::Triple::spir:
- case llvm::Triple::kalimba:
- case llvm::Triple::lanai:
- case llvm::Triple::shave:
- case llvm::Triple::wasm32:
- case llvm::Triple::renderscript32:
return 32;
case llvm::Triple::aarch64:
case llvm::Triple::aarch64_be:
case llvm::Triple::amdgcn:
- case llvm::Triple::bpfel:
+ case llvm::Triple::amdil64:
case llvm::Triple::bpfeb:
+ case llvm::Triple::bpfel:
+ case llvm::Triple::hsail64:
case llvm::Triple::le64:
case llvm::Triple::mips64:
case llvm::Triple::mips64el:
case llvm::Triple::nvptx64:
case llvm::Triple::ppc64:
case llvm::Triple::ppc64le:
+ case llvm::Triple::renderscript64:
case llvm::Triple::riscv64:
case llvm::Triple::sparcv9:
- case llvm::Triple::systemz:
- case llvm::Triple::x86_64:
- case llvm::Triple::amdil64:
- case llvm::Triple::hsail64:
case llvm::Triple::spir64:
+ case llvm::Triple::systemz:
+ case llvm::Triple::ve:
case llvm::Triple::wasm64:
- case llvm::Triple::renderscript64:
+ case llvm::Triple::x86_64:
return 64;
}
llvm_unreachable("Invalid architecture value");
@@ -1310,60 +1313,61 @@ Triple Triple::get32BitArchVariant() const {
case Triple::UnknownArch:
case Triple::amdgcn:
case Triple::avr:
- case Triple::bpfel:
case Triple::bpfeb:
+ case Triple::bpfel:
case Triple::msp430:
- case Triple::systemz:
case Triple::ppc64le:
+ case Triple::systemz:
+ case Triple::ve:
T.setArch(UnknownArch);
break;
case Triple::aarch64_32:
case Triple::amdil:
- case Triple::hsail:
- case Triple::spir:
case Triple::arc:
case Triple::arm:
case Triple::armeb:
case Triple::hexagon:
+ case Triple::hsail:
case Triple::kalimba:
+ case Triple::lanai:
case Triple::le32:
case Triple::mips:
case Triple::mipsel:
case Triple::nvptx:
case Triple::ppc:
case Triple::r600:
+ case Triple::renderscript32:
case Triple::riscv32:
+ case Triple::shave:
case Triple::sparc:
case Triple::sparcel:
+ case Triple::spir:
case Triple::tce:
case Triple::tcele:
case Triple::thumb:
case Triple::thumbeb:
+ case Triple::wasm32:
case Triple::x86:
case Triple::xcore:
- case Triple::lanai:
- case Triple::shave:
- case Triple::wasm32:
- case Triple::renderscript32:
// Already 32-bit.
break;
case Triple::aarch64: T.setArch(Triple::arm); break;
case Triple::aarch64_be: T.setArch(Triple::armeb); break;
+ case Triple::amdil64: T.setArch(Triple::amdil); break;
+ case Triple::hsail64: T.setArch(Triple::hsail); break;
case Triple::le64: T.setArch(Triple::le32); break;
case Triple::mips64: T.setArch(Triple::mips); break;
case Triple::mips64el: T.setArch(Triple::mipsel); break;
case Triple::nvptx64: T.setArch(Triple::nvptx); break;
case Triple::ppc64: T.setArch(Triple::ppc); break;
- case Triple::sparcv9: T.setArch(Triple::sparc); break;
+ case Triple::renderscript64: T.setArch(Triple::renderscript32); break;
case Triple::riscv64: T.setArch(Triple::riscv32); break;
- case Triple::x86_64: T.setArch(Triple::x86); break;
- case Triple::amdil64: T.setArch(Triple::amdil); break;
- case Triple::hsail64: T.setArch(Triple::hsail); break;
+ case Triple::sparcv9: T.setArch(Triple::sparc); break;
case Triple::spir64: T.setArch(Triple::spir); break;
case Triple::wasm64: T.setArch(Triple::wasm32); break;
- case Triple::renderscript64: T.setArch(Triple::renderscript32); break;
+ case Triple::x86_64: T.setArch(Triple::x86); break;
}
return T;
}
@@ -1379,55 +1383,56 @@ Triple Triple::get64BitArchVariant() const {
case Triple::lanai:
case Triple::msp430:
case Triple::r600:
+ case Triple::shave:
+ case Triple::sparcel:
case Triple::tce:
case Triple::tcele:
case Triple::xcore:
- case Triple::sparcel:
- case Triple::shave:
T.setArch(UnknownArch);
break;
case Triple::aarch64:
case Triple::aarch64_be:
- case Triple::bpfel:
- case Triple::bpfeb:
- case Triple::le64:
- case Triple::amdil64:
case Triple::amdgcn:
+ case Triple::amdil64:
+ case Triple::bpfeb:
+ case Triple::bpfel:
case Triple::hsail64:
- case Triple::spir64:
+ case Triple::le64:
case Triple::mips64:
case Triple::mips64el:
case Triple::nvptx64:
case Triple::ppc64:
case Triple::ppc64le:
+ case Triple::renderscript64:
case Triple::riscv64:
case Triple::sparcv9:
+ case Triple::spir64:
case Triple::systemz:
- case Triple::x86_64:
+ case Triple::ve:
case Triple::wasm64:
- case Triple::renderscript64:
+ case Triple::x86_64:
// Already 64-bit.
break;
case Triple::aarch64_32: T.setArch(Triple::aarch64); break;
+ case Triple::amdil: T.setArch(Triple::amdil64); break;
case Triple::arm: T.setArch(Triple::aarch64); break;
case Triple::armeb: T.setArch(Triple::aarch64_be); break;
+ case Triple::hsail: T.setArch(Triple::hsail64); break;
case Triple::le32: T.setArch(Triple::le64); break;
case Triple::mips: T.setArch(Triple::mips64); break;
case Triple::mipsel: T.setArch(Triple::mips64el); break;
case Triple::nvptx: T.setArch(Triple::nvptx64); break;
case Triple::ppc: T.setArch(Triple::ppc64); break;
- case Triple::sparc: T.setArch(Triple::sparcv9); break;
+ case Triple::renderscript32: T.setArch(Triple::renderscript64); break;
case Triple::riscv32: T.setArch(Triple::riscv64); break;
- case Triple::x86: T.setArch(Triple::x86_64); break;
- case Triple::amdil: T.setArch(Triple::amdil64); break;
- case Triple::hsail: T.setArch(Triple::hsail64); break;
+ case Triple::sparc: T.setArch(Triple::sparcv9); break;
case Triple::spir: T.setArch(Triple::spir64); break;
case Triple::thumb: T.setArch(Triple::aarch64); break;
case Triple::thumbeb: T.setArch(Triple::aarch64_be); break;
case Triple::wasm32: T.setArch(Triple::wasm64); break;
- case Triple::renderscript32: T.setArch(Triple::renderscript64); break;
+ case Triple::x86: T.setArch(Triple::x86_64); break;
}
return T;
}
@@ -1453,6 +1458,8 @@ Triple Triple::getBigEndianArchVariant() const {
case Triple::nvptx64:
case Triple::nvptx:
case Triple::r600:
+ case Triple::renderscript32:
+ case Triple::renderscript64:
case Triple::riscv32:
case Triple::riscv64:
case Triple::shave:
@@ -1463,8 +1470,7 @@ Triple Triple::getBigEndianArchVariant() const {
case Triple::x86:
case Triple::x86_64:
case Triple::xcore:
- case Triple::renderscript32:
- case Triple::renderscript64:
+ case Triple::ve:
// ARM is intentionally unsupported here, changing the architecture would
// drop any arch suffixes.
@@ -1473,13 +1479,13 @@ Triple Triple::getBigEndianArchVariant() const {
T.setArch(UnknownArch);
break;
- case Triple::tcele: T.setArch(Triple::tce); break;
case Triple::aarch64: T.setArch(Triple::aarch64_be); break;
case Triple::bpfel: T.setArch(Triple::bpfeb); break;
case Triple::mips64el:T.setArch(Triple::mips64); break;
case Triple::mipsel: T.setArch(Triple::mips); break;
case Triple::ppc64le: T.setArch(Triple::ppc64); break;
case Triple::sparcel: T.setArch(Triple::sparc); break;
+ case Triple::tcele: T.setArch(Triple::tce); break;
default:
llvm_unreachable("getBigEndianArchVariant: unknown triple.");
}
@@ -1505,13 +1511,13 @@ Triple Triple::getLittleEndianArchVariant() const {
T.setArch(UnknownArch);
break;
- case Triple::tce: T.setArch(Triple::tcele); break;
case Triple::aarch64_be: T.setArch(Triple::aarch64); break;
case Triple::bpfeb: T.setArch(Triple::bpfel); break;
case Triple::mips64: T.setArch(Triple::mips64el); break;
case Triple::mips: T.setArch(Triple::mipsel); break;
case Triple::ppc64: T.setArch(Triple::ppc64le); break;
case Triple::sparc: T.setArch(Triple::sparcel); break;
+ case Triple::tce: T.setArch(Triple::tcele); break;
default:
llvm_unreachable("getLittleEndianArchVariant: unknown triple.");
}
@@ -1541,21 +1547,22 @@ bool Triple::isLittleEndian() const {
case Triple::nvptx:
case Triple::ppc64le:
case Triple::r600:
+ case Triple::renderscript32:
+ case Triple::renderscript64:
case Triple::riscv32:
case Triple::riscv64:
case Triple::shave:
case Triple::sparcel:
case Triple::spir64:
case Triple::spir:
+ case Triple::tcele:
case Triple::thumb:
+ case Triple::ve:
case Triple::wasm32:
case Triple::wasm64:
case Triple::x86:
case Triple::x86_64:
case Triple::xcore:
- case Triple::tcele:
- case Triple::renderscript32:
- case Triple::renderscript64:
return true;
default:
return false;
@@ -1610,10 +1617,10 @@ StringRef Triple::getARMCPUForArch(StringRef MArch) const {
case llvm::Triple::Win32:
// FIXME: this is invalid for WindowsCE
return "cortex-a9";
- case llvm::Triple::MacOSX:
case llvm::Triple::IOS:
- case llvm::Triple::WatchOS:
+ case llvm::Triple::MacOSX:
case llvm::Triple::TvOS:
+ case llvm::Triple::WatchOS:
if (MArch == "v7k")
return "cortex-a7";
break;
@@ -1633,10 +1640,10 @@ StringRef Triple::getARMCPUForArch(StringRef MArch) const {
switch (getOS()) {
case llvm::Triple::NetBSD:
switch (getEnvironment()) {
- case llvm::Triple::GNUEABIHF:
- case llvm::Triple::GNUEABI:
- case llvm::Triple::EABIHF:
case llvm::Triple::EABI:
+ case llvm::Triple::EABIHF:
+ case llvm::Triple::GNUEABI:
+ case llvm::Triple::GNUEABIHF:
return "arm926ej-s";
default:
return "strongarm";
diff --git a/contrib/llvm-project/llvm/lib/Support/Unix/Memory.inc b/contrib/llvm-project/llvm/lib/Support/Unix/Memory.inc
index 05f8e32896fa..79b1759359e1 100644
--- a/contrib/llvm-project/llvm/lib/Support/Unix/Memory.inc
+++ b/contrib/llvm-project/llvm/lib/Support/Unix/Memory.inc
@@ -42,9 +42,7 @@ extern "C" void sys_icache_invalidate(const void *Addr, size_t len);
extern "C" void __clear_cache(void *, void*);
#endif
-namespace {
-
-int getPosixProtectionFlags(unsigned Flags) {
+static int getPosixProtectionFlags(unsigned Flags) {
switch (Flags & llvm::sys::Memory::MF_RWE_MASK) {
case llvm::sys::Memory::MF_READ:
return PROT_READ;
@@ -76,8 +74,6 @@ int getPosixProtectionFlags(unsigned Flags) {
return PROT_NONE;
}
-} // anonymous namespace
-
namespace llvm {
namespace sys {
diff --git a/contrib/llvm-project/llvm/lib/Support/Unix/Path.inc b/contrib/llvm-project/llvm/lib/Support/Unix/Path.inc
index 03f330622e99..2a03dc682bce 100644
--- a/contrib/llvm-project/llvm/lib/Support/Unix/Path.inc
+++ b/contrib/llvm-project/llvm/lib/Support/Unix/Path.inc
@@ -201,8 +201,8 @@ std::string getMainExecutable(const char *argv0, void *MainAddr) {
if (elf_aux_info(AT_EXECPATH, exe_path, sizeof(exe_path)) == 0)
return exe_path;
#else
- // elf_aux_info(AT_EXECPATH, ... is not available on older FreeBSD. Fall
- // back to finding the ELF auxiliary vectors after the processes's
+ // elf_aux_info(AT_EXECPATH, ... is not available in all supported versions,
+ // fall back to finding the ELF auxiliary vectors after the process's
// environment.
char **p = ::environ;
while (*p++ != 0)
@@ -221,7 +221,7 @@ std::string getMainExecutable(const char *argv0, void *MainAddr) {
// Fall back to argv[0] if auxiliary vectors are not available.
if (getprogpath(exe_path, argv0) != NULL)
return exe_path;
-#elif defined(__NetBSD__) || defined(__OpenBSD__) || defined(__minix) || \
+#elif defined(__NetBSD__) || defined(__OpenBSD__) || defined(__minix) || \
defined(__DragonFly__) || defined(__FreeBSD_kernel__) || defined(_AIX)
const char *curproc = "/proc/curproc/file";
char exe_path[PATH_MAX];
@@ -238,7 +238,7 @@ std::string getMainExecutable(const char *argv0, void *MainAddr) {
// If we don't have procfs mounted, fall back to argv[0]
if (getprogpath(exe_path, argv0) != NULL)
return exe_path;
-#elif defined(__linux__) || defined(__CYGWIN__)
+#elif defined(__linux__) || defined(__CYGWIN__) || defined(__gnu_hurd__)
char exe_path[MAXPATHLEN];
const char *aPath = "/proc/self/exe";
if (sys::fs::exists(aPath)) {
diff --git a/contrib/llvm-project/llvm/lib/Support/Unix/Signals.inc b/contrib/llvm-project/llvm/lib/Support/Unix/Signals.inc
index 5e0cde4a81ed..f68374d29f02 100644
--- a/contrib/llvm-project/llvm/lib/Support/Unix/Signals.inc
+++ b/contrib/llvm-project/llvm/lib/Support/Unix/Signals.inc
@@ -82,18 +82,15 @@ using namespace llvm;
static RETSIGTYPE SignalHandler(int Sig); // defined below.
static RETSIGTYPE InfoSignalHandler(int Sig); // defined below.
-static void DefaultPipeSignalFunction() {
- exit(EX_IOERR);
-}
-
using SignalHandlerFunctionType = void (*)();
/// The function to call if ctrl-c is pressed.
static std::atomic<SignalHandlerFunctionType> InterruptFunction =
ATOMIC_VAR_INIT(nullptr);
static std::atomic<SignalHandlerFunctionType> InfoSignalFunction =
ATOMIC_VAR_INIT(nullptr);
-static std::atomic<SignalHandlerFunctionType> PipeSignalFunction =
- ATOMIC_VAR_INIT(DefaultPipeSignalFunction);
+/// The function to call on SIGPIPE (one-time use only).
+static std::atomic<SignalHandlerFunctionType> OneShotPipeSignalFunction =
+ ATOMIC_VAR_INIT(nullptr);
namespace {
/// Signal-safe removal of files.
@@ -212,7 +209,7 @@ static StringRef Argv0;
/// if there is, it's not our direct responsibility. For whatever reason, our
/// continued execution is no longer desirable.
static const int IntSigs[] = {
- SIGHUP, SIGINT, SIGPIPE, SIGTERM, SIGUSR2
+ SIGHUP, SIGINT, SIGTERM, SIGUSR2
};
/// Signals that represent that we have a bug, and our prompt termination has
@@ -243,7 +240,7 @@ static const int InfoSigs[] = {
static const size_t NumSigs =
array_lengthof(IntSigs) + array_lengthof(KillSigs) +
- array_lengthof(InfoSigs);
+ array_lengthof(InfoSigs) + 1 /* SIGPIPE */;
static std::atomic<unsigned> NumRegisteredSignals = ATOMIC_VAR_INIT(0);
@@ -328,6 +325,8 @@ static void RegisterHandlers() { // Not signal-safe.
registerHandler(S, SignalKind::IsKill);
for (auto S : KillSigs)
registerHandler(S, SignalKind::IsKill);
+ if (OneShotPipeSignalFunction)
+ registerHandler(SIGPIPE, SignalKind::IsKill);
for (auto S : InfoSigs)
registerHandler(S, SignalKind::IsInfo);
}
@@ -346,6 +345,22 @@ static void RemoveFilesToRemove() {
FileToRemoveList::removeAllFiles(FilesToRemove);
}
+void sys::CleanupOnSignal(uintptr_t Context) {
+ int Sig = (int)Context;
+
+ if (llvm::is_contained(InfoSigs, Sig)) {
+ InfoSignalHandler(Sig);
+ return;
+ }
+
+ RemoveFilesToRemove();
+
+ if (llvm::is_contained(IntSigs, Sig) || Sig == SIGPIPE)
+ return;
+
+ llvm::sys::RunSignalHandlers();
+}
+
// The signal handler that runs.
static RETSIGTYPE SignalHandler(int Sig) {
// Restore the signal behavior to default, so that the program actually
@@ -362,16 +377,16 @@ static RETSIGTYPE SignalHandler(int Sig) {
{
RemoveFilesToRemove();
+ if (Sig == SIGPIPE)
+ if (auto OldOneShotPipeFunction =
+ OneShotPipeSignalFunction.exchange(nullptr))
+ return OldOneShotPipeFunction();
+
if (std::find(std::begin(IntSigs), std::end(IntSigs), Sig)
!= std::end(IntSigs)) {
if (auto OldInterruptFunction = InterruptFunction.exchange(nullptr))
return OldInterruptFunction();
- // Send a special return code that drivers can check for, from sysexits.h.
- if (Sig == SIGPIPE)
- if (SignalHandlerFunctionType CurrentPipeFunction = PipeSignalFunction)
- CurrentPipeFunction();
-
raise(Sig); // Execute the default handler.
return;
}
@@ -410,11 +425,16 @@ void llvm::sys::SetInfoSignalFunction(void (*Handler)()) {
RegisterHandlers();
}
-void llvm::sys::SetPipeSignalFunction(void (*Handler)()) {
- PipeSignalFunction.exchange(Handler);
+void llvm::sys::SetOneShotPipeSignalFunction(void (*Handler)()) {
+ OneShotPipeSignalFunction.exchange(Handler);
RegisterHandlers();
}
+void llvm::sys::DefaultOneShotPipeSignalHandler() {
+ // Send a special return code that drivers can check for, from sysexits.h.
+ exit(EX_IOERR);
+}
+
// The public API
bool llvm::sys::RemoveFileOnSignal(StringRef Filename,
std::string* ErrMsg) {
diff --git a/contrib/llvm-project/llvm/lib/Support/Unix/Threading.inc b/contrib/llvm-project/llvm/lib/Support/Unix/Threading.inc
index ed9a96563055..afb887fc1096 100644
--- a/contrib/llvm-project/llvm/lib/Support/Unix/Threading.inc
+++ b/contrib/llvm-project/llvm/lib/Support/Unix/Threading.inc
@@ -10,6 +10,8 @@
//
//===----------------------------------------------------------------------===//
+#include "Unix.h"
+#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/Twine.h"
@@ -40,47 +42,56 @@
#include <unistd.h> // For syscall()
#endif
-namespace {
- struct ThreadInfo {
- void(*UserFn)(void *);
- void *UserData;
- };
+static void *threadFuncSync(void *Arg) {
+ SyncThreadInfo *TI = static_cast<SyncThreadInfo *>(Arg);
+ TI->UserFn(TI->UserData);
+ return nullptr;
}
-static void *ExecuteOnThread_Dispatch(void *Arg) {
- ThreadInfo *TI = reinterpret_cast<ThreadInfo*>(Arg);
- TI->UserFn(TI->UserData);
+static void *threadFuncAsync(void *Arg) {
+ std::unique_ptr<AsyncThreadInfo> Info(static_cast<AsyncThreadInfo *>(Arg));
+ (*Info)();
return nullptr;
}
-void llvm::llvm_execute_on_thread(void(*Fn)(void*), void *UserData,
- unsigned RequestedStackSize) {
- ThreadInfo Info = { Fn, UserData };
- pthread_attr_t Attr;
- pthread_t Thread;
+static void
+llvm_execute_on_thread_impl(void *(*ThreadFunc)(void *), void *Arg,
+ llvm::Optional<unsigned> StackSizeInBytes,
+ JoiningPolicy JP) {
+ int errnum;
// Construct the attributes object.
- if (::pthread_attr_init(&Attr) != 0)
- return;
+ pthread_attr_t Attr;
+ if ((errnum = ::pthread_attr_init(&Attr)) != 0) {
+ ReportErrnumFatal("pthread_attr_init failed", errnum);
+ }
+
+ auto AttrGuard = llvm::make_scope_exit([&] {
+ if ((errnum = ::pthread_attr_destroy(&Attr)) != 0) {
+ ReportErrnumFatal("pthread_attr_destroy failed", errnum);
+ }
+ });
// Set the requested stack size, if given.
- if (RequestedStackSize != 0) {
- if (::pthread_attr_setstacksize(&Attr, RequestedStackSize) != 0)
- goto error;
+ if (StackSizeInBytes) {
+ if ((errnum = ::pthread_attr_setstacksize(&Attr, *StackSizeInBytes)) != 0) {
+ ReportErrnumFatal("pthread_attr_setstacksize failed", errnum);
+ }
}
// Construct and execute the thread.
- if (::pthread_create(&Thread, &Attr, ExecuteOnThread_Dispatch, &Info) != 0)
- goto error;
-
- // Wait for the thread and clean up.
- ::pthread_join(Thread, nullptr);
+ pthread_t Thread;
+ if ((errnum = ::pthread_create(&Thread, &Attr, ThreadFunc, Arg)) != 0)
+ ReportErrnumFatal("pthread_create failed", errnum);
-error:
- ::pthread_attr_destroy(&Attr);
+ if (JP == JoiningPolicy::Join) {
+ // Wait for the thread
+ if ((errnum = ::pthread_join(Thread, nullptr)) != 0) {
+ ReportErrnumFatal("pthread_join failed", errnum);
+ }
+ }
}
-
uint64_t llvm::get_threadid() {
#if defined(__APPLE__)
// Calling "mach_thread_self()" bumps the reference count on the thread
diff --git a/contrib/llvm-project/llvm/lib/Support/Unix/Unix.h b/contrib/llvm-project/llvm/lib/Support/Unix/Unix.h
index 86309b0567f5..1fc9a414f749 100644
--- a/contrib/llvm-project/llvm/lib/Support/Unix/Unix.h
+++ b/contrib/llvm-project/llvm/lib/Support/Unix/Unix.h
@@ -21,6 +21,7 @@
#include "llvm/Config/config.h"
#include "llvm/Support/Chrono.h"
#include "llvm/Support/Errno.h"
+#include "llvm/Support/ErrorHandling.h"
#include <algorithm>
#include <assert.h>
#include <cerrno>
@@ -69,6 +70,14 @@ static inline bool MakeErrMsg(
return true;
}
+// Include StrError(errnum) in a fatal error message.
+LLVM_ATTRIBUTE_NORETURN static inline void ReportErrnumFatal(const char *Msg,
+ int errnum) {
+ std::string ErrMsg;
+ MakeErrMsg(&ErrMsg, Msg, errnum);
+ llvm::report_fatal_error(ErrMsg);
+}
+
namespace llvm {
namespace sys {
diff --git a/contrib/llvm-project/llvm/lib/Support/VirtualFileSystem.cpp b/contrib/llvm-project/llvm/lib/Support/VirtualFileSystem.cpp
index c390cb1b2227..edd4234fe501 100644
--- a/contrib/llvm-project/llvm/lib/Support/VirtualFileSystem.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/VirtualFileSystem.cpp
@@ -894,7 +894,7 @@ class InMemoryDirIterator : public llvm::vfs::detail::DirIterImpl {
if (I != E) {
SmallString<256> Path(RequestedDirName);
llvm::sys::path::append(Path, I->second->getFileName());
- sys::fs::file_type Type;
+ sys::fs::file_type Type = sys::fs::file_type::type_unknown;
switch (I->second->getKind()) {
case detail::IME_File:
case detail::IME_HardLink:
@@ -1073,6 +1073,19 @@ std::error_code RedirectingFileSystem::isLocal(const Twine &Path,
return ExternalFS->isLocal(Path, Result);
}
+std::error_code RedirectingFileSystem::makeAbsolute(SmallVectorImpl<char> &Path) const {
+ if (llvm::sys::path::is_absolute(Path, llvm::sys::path::Style::posix) ||
+ llvm::sys::path::is_absolute(Path, llvm::sys::path::Style::windows))
+ return {};
+
+ auto WorkingDir = getCurrentWorkingDirectory();
+ if (!WorkingDir)
+ return WorkingDir.getError();
+
+ llvm::sys::fs::make_absolute(WorkingDir.get(), Path);
+ return {};
+}
+
directory_iterator RedirectingFileSystem::dir_begin(const Twine &Dir,
std::error_code &EC) {
ErrorOr<RedirectingFileSystem::Entry *> E = lookupPath(Dir);
@@ -1428,21 +1441,31 @@ class llvm::vfs::RedirectingFileSystemParser {
return nullptr;
}
- if (IsRootEntry && !sys::path::is_absolute(Name)) {
- assert(NameValueNode && "Name presence should be checked earlier");
- error(NameValueNode,
- "entry with relative path at the root level is not discoverable");
- return nullptr;
+ sys::path::Style path_style = sys::path::Style::native;
+ if (IsRootEntry) {
+ // VFS root entries may be in either Posix or Windows style. Figure out
+ // which style we have, and use it consistently.
+ if (sys::path::is_absolute(Name, sys::path::Style::posix)) {
+ path_style = sys::path::Style::posix;
+ } else if (sys::path::is_absolute(Name, sys::path::Style::windows)) {
+ path_style = sys::path::Style::windows;
+ } else {
+ assert(NameValueNode && "Name presence should be checked earlier");
+ error(NameValueNode,
+ "entry with relative path at the root level is not discoverable");
+ return nullptr;
+ }
}
// Remove trailing slash(es), being careful not to remove the root path
StringRef Trimmed(Name);
- size_t RootPathLen = sys::path::root_path(Trimmed).size();
+ size_t RootPathLen = sys::path::root_path(Trimmed, path_style).size();
while (Trimmed.size() > RootPathLen &&
- sys::path::is_separator(Trimmed.back()))
+ sys::path::is_separator(Trimmed.back(), path_style))
Trimmed = Trimmed.slice(0, Trimmed.size() - 1);
+
// Get the last component
- StringRef LastComponent = sys::path::filename(Trimmed);
+ StringRef LastComponent = sys::path::filename(Trimmed, path_style);
std::unique_ptr<RedirectingFileSystem::Entry> Result;
switch (Kind) {
@@ -1460,12 +1483,12 @@ class llvm::vfs::RedirectingFileSystemParser {
break;
}
- StringRef Parent = sys::path::parent_path(Trimmed);
+ StringRef Parent = sys::path::parent_path(Trimmed, path_style);
if (Parent.empty())
return Result;
// if 'name' contains multiple components, create implicit directory entries
- for (sys::path::reverse_iterator I = sys::path::rbegin(Parent),
+ for (sys::path::reverse_iterator I = sys::path::rbegin(Parent, path_style),
E = sys::path::rend(Parent);
I != E; ++I) {
std::vector<std::unique_ptr<RedirectingFileSystem::Entry>> Entries;
@@ -1671,9 +1694,7 @@ RedirectingFileSystem::lookupPath(sys::path::const_iterator Start,
// Forward the search to the next component in case this is an empty one.
if (!FromName.empty()) {
- if (CaseSensitive ? !Start->equals(FromName)
- : !Start->equals_lower(FromName))
- // failure to match
+ if (!pathComponentMatches(*Start, FromName))
return make_error_code(llvm::errc::no_such_file_or_directory);
++Start;
@@ -1695,6 +1716,7 @@ RedirectingFileSystem::lookupPath(sys::path::const_iterator Start,
if (Result || Result.getError() != llvm::errc::no_such_file_or_directory)
return Result;
}
+
return make_error_code(llvm::errc::no_such_file_or_directory);
}
@@ -2073,7 +2095,7 @@ std::error_code VFSFromYamlDirIterImpl::incrementContent(bool IsFirstTime) {
while (Current != End) {
SmallString<128> PathStr(Dir);
llvm::sys::path::append(PathStr, (*Current)->getName());
- sys::fs::file_type Type;
+ sys::fs::file_type Type = sys::fs::file_type::type_unknown;
switch ((*Current)->getKind()) {
case RedirectingFileSystem::EK_Directory:
Type = sys::fs::file_type::directory_file;
diff --git a/contrib/llvm-project/llvm/lib/Support/Windows/Memory.inc b/contrib/llvm-project/llvm/lib/Support/Windows/Memory.inc
index a67f9c7d0f35..c5566f9910a5 100644
--- a/contrib/llvm-project/llvm/lib/Support/Windows/Memory.inc
+++ b/contrib/llvm-project/llvm/lib/Support/Windows/Memory.inc
@@ -19,9 +19,7 @@
// The Windows.h header must be the last one included.
#include "WindowsSupport.h"
-namespace {
-
-DWORD getWindowsProtectionFlags(unsigned Flags) {
+static DWORD getWindowsProtectionFlags(unsigned Flags) {
switch (Flags & llvm::sys::Memory::MF_RWE_MASK) {
// Contrary to what you might expect, the Windows page protection flags
// are not a bitwise combination of RWX values
@@ -50,7 +48,7 @@ DWORD getWindowsProtectionFlags(unsigned Flags) {
// While we'd be happy to allocate single pages, the Windows allocation
// granularity may be larger than a single page (in practice, it is 64K)
// so mapping less than that will create an unreachable fragment of memory.
-size_t getAllocationGranularity() {
+static size_t getAllocationGranularity() {
SYSTEM_INFO Info;
::GetSystemInfo(&Info);
if (Info.dwPageSize > Info.dwAllocationGranularity)
@@ -91,8 +89,6 @@ static size_t enableProcessLargePages() {
return 0;
}
-} // namespace
-
namespace llvm {
namespace sys {
diff --git a/contrib/llvm-project/llvm/lib/Support/Windows/Process.inc b/contrib/llvm-project/llvm/lib/Support/Windows/Process.inc
index 4b91f9f7fc66..3526e3dee6fa 100644
--- a/contrib/llvm-project/llvm/lib/Support/Windows/Process.inc
+++ b/contrib/llvm-project/llvm/lib/Support/Windows/Process.inc
@@ -439,13 +439,6 @@ const char *Process::ResetColor() {
return 0;
}
-// Include GetLastError() in a fatal error message.
-static void ReportLastErrorFatal(const char *Msg) {
- std::string ErrMsg;
- MakeErrMsg(&ErrMsg, Msg);
- report_fatal_error(ErrMsg);
-}
-
unsigned Process::GetRandomNumber() {
HCRYPTPROV HCPC;
if (!::CryptAcquireContextW(&HCPC, NULL, NULL, PROV_RSA_FULL,
diff --git a/contrib/llvm-project/llvm/lib/Support/Windows/Program.inc b/contrib/llvm-project/llvm/lib/Support/Windows/Program.inc
index a23ed95fc390..a1482bf17c60 100644
--- a/contrib/llvm-project/llvm/lib/Support/Windows/Program.inc
+++ b/contrib/llvm-project/llvm/lib/Support/Windows/Program.inc
@@ -197,7 +197,7 @@ static bool Execute(ProcessInfo &PI, StringRef Program,
// An environment block consists of a null-terminated block of
// null-terminated strings. Convert the array of environment variables to
// an environment block by concatenating them.
- for (const auto E : *Env) {
+ for (StringRef E : *Env) {
SmallVector<wchar_t, MAX_PATH> EnvString;
if (std::error_code ec = windows::UTF8ToUTF16(E, EnvString)) {
SetLastError(ec.value());
diff --git a/contrib/llvm-project/llvm/lib/Support/Windows/Signals.inc b/contrib/llvm-project/llvm/lib/Support/Windows/Signals.inc
index d962daf79348..8b525f1bd4ac 100644
--- a/contrib/llvm-project/llvm/lib/Support/Windows/Signals.inc
+++ b/contrib/llvm-project/llvm/lib/Support/Windows/Signals.inc
@@ -521,10 +521,13 @@ void sys::PrintStackTraceOnErrorSignal(StringRef Argv0,
extern "C" VOID WINAPI RtlCaptureContext(PCONTEXT ContextRecord);
#endif
-void llvm::sys::PrintStackTrace(raw_ostream &OS) {
- STACKFRAME64 StackFrame = {};
- CONTEXT Context = {};
- ::RtlCaptureContext(&Context);
+static void LocalPrintStackTrace(raw_ostream &OS, PCONTEXT C) {
+ STACKFRAME64 StackFrame{};
+ CONTEXT Context{};
+ if (!C) {
+ ::RtlCaptureContext(&Context);
+ C = &Context;
+ }
#if defined(_M_X64)
StackFrame.AddrPC.Offset = Context.Rip;
StackFrame.AddrStack.Offset = Context.Rsp;
@@ -546,9 +549,12 @@ void llvm::sys::PrintStackTrace(raw_ostream &OS) {
StackFrame.AddrStack.Mode = AddrModeFlat;
StackFrame.AddrFrame.Mode = AddrModeFlat;
PrintStackTraceForThread(OS, GetCurrentProcess(), GetCurrentThread(),
- StackFrame, &Context);
+ StackFrame, C);
}
+void llvm::sys::PrintStackTrace(raw_ostream &OS) {
+ LocalPrintStackTrace(OS, nullptr);
+}
void llvm::sys::SetInterruptFunction(void (*IF)()) {
RegisterHandler();
@@ -560,7 +566,11 @@ void llvm::sys::SetInfoSignalFunction(void (*Handler)()) {
// Unimplemented.
}
-void llvm::sys::SetPipeSignalFunction(void (*Handler)()) {
+void llvm::sys::SetOneShotPipeSignalFunction(void (*Handler)()) {
+ // Unimplemented.
+}
+
+void llvm::sys::DefaultOneShotPipeSignalHandler() {
// Unimplemented.
}
@@ -788,6 +798,10 @@ WriteWindowsDumpFile(PMINIDUMP_EXCEPTION_INFORMATION ExceptionInfo) {
return std::error_code();
}
+void sys::CleanupOnSignal(uintptr_t Context) {
+ LLVMUnhandledExceptionFilter((LPEXCEPTION_POINTERS)Context);
+}
+
static LONG WINAPI LLVMUnhandledExceptionFilter(LPEXCEPTION_POINTERS ep) {
Cleanup();
@@ -806,42 +820,9 @@ static LONG WINAPI LLVMUnhandledExceptionFilter(LPEXCEPTION_POINTERS ep) {
<< "\n";
}
- // Initialize the STACKFRAME structure.
- STACKFRAME64 StackFrame = {};
-
-#if defined(_M_X64)
- StackFrame.AddrPC.Offset = ep->ContextRecord->Rip;
- StackFrame.AddrPC.Mode = AddrModeFlat;
- StackFrame.AddrStack.Offset = ep->ContextRecord->Rsp;
- StackFrame.AddrStack.Mode = AddrModeFlat;
- StackFrame.AddrFrame.Offset = ep->ContextRecord->Rbp;
- StackFrame.AddrFrame.Mode = AddrModeFlat;
-#elif defined(_M_IX86)
- StackFrame.AddrPC.Offset = ep->ContextRecord->Eip;
- StackFrame.AddrPC.Mode = AddrModeFlat;
- StackFrame.AddrStack.Offset = ep->ContextRecord->Esp;
- StackFrame.AddrStack.Mode = AddrModeFlat;
- StackFrame.AddrFrame.Offset = ep->ContextRecord->Ebp;
- StackFrame.AddrFrame.Mode = AddrModeFlat;
-#elif defined(_M_ARM64) || defined(_M_ARM)
- StackFrame.AddrPC.Offset = ep->ContextRecord->Pc;
- StackFrame.AddrPC.Mode = AddrModeFlat;
- StackFrame.AddrStack.Offset = ep->ContextRecord->Sp;
- StackFrame.AddrStack.Mode = AddrModeFlat;
-#if defined(_M_ARM64)
- StackFrame.AddrFrame.Offset = ep->ContextRecord->Fp;
-#else
- StackFrame.AddrFrame.Offset = ep->ContextRecord->R11;
-#endif
- StackFrame.AddrFrame.Mode = AddrModeFlat;
-#endif
-
- HANDLE hProcess = GetCurrentProcess();
- HANDLE hThread = GetCurrentThread();
- PrintStackTraceForThread(llvm::errs(), hProcess, hThread, StackFrame,
- ep->ContextRecord);
+ LocalPrintStackTrace(llvm::errs(), ep ? ep->ContextRecord : nullptr);
- _exit(ep->ExceptionRecord->ExceptionCode);
+ return EXCEPTION_EXECUTE_HANDLER;
}
static BOOL WINAPI LLVMConsoleCtrlHandler(DWORD dwCtrlType) {
diff --git a/contrib/llvm-project/llvm/lib/Support/Windows/Threading.inc b/contrib/llvm-project/llvm/lib/Support/Windows/Threading.inc
index 96649472cc90..9456efa686ff 100644
--- a/contrib/llvm-project/llvm/lib/Support/Windows/Threading.inc
+++ b/contrib/llvm-project/llvm/lib/Support/Windows/Threading.inc
@@ -21,36 +21,36 @@
#undef MemoryFence
#endif
-namespace {
- struct ThreadInfo {
- void(*func)(void*);
- void *param;
- };
+static unsigned __stdcall threadFuncSync(void *Arg) {
+ SyncThreadInfo *TI = static_cast<SyncThreadInfo *>(Arg);
+ TI->UserFn(TI->UserData);
+ return 0;
}
-static unsigned __stdcall ThreadCallback(void *param) {
- struct ThreadInfo *info = reinterpret_cast<struct ThreadInfo *>(param);
- info->func(info->param);
-
+static unsigned __stdcall threadFuncAsync(void *Arg) {
+ std::unique_ptr<AsyncThreadInfo> Info(static_cast<AsyncThreadInfo *>(Arg));
+ (*Info)();
return 0;
}
-void llvm::llvm_execute_on_thread(void(*Fn)(void*), void *UserData,
- unsigned RequestedStackSize) {
- struct ThreadInfo param = { Fn, UserData };
-
- HANDLE hThread = (HANDLE)::_beginthreadex(NULL,
- RequestedStackSize, ThreadCallback,
- &param, 0, NULL);
-
- if (hThread) {
- // We actually don't care whether the wait succeeds or fails, in
- // the same way we don't care whether the pthread_join call succeeds
- // or fails. There's not much we could do if this were to fail. But
- // on success, this call will wait until the thread finishes executing
- // before returning.
- (void)::WaitForSingleObject(hThread, INFINITE);
- ::CloseHandle(hThread);
+static void
+llvm_execute_on_thread_impl(unsigned (__stdcall *ThreadFunc)(void *), void *Arg,
+ llvm::Optional<unsigned> StackSizeInBytes,
+ JoiningPolicy JP) {
+ HANDLE hThread = (HANDLE)::_beginthreadex(
+ NULL, StackSizeInBytes.getValueOr(0), ThreadFunc, Arg, 0, NULL);
+
+ if (!hThread) {
+ ReportLastErrorFatal("_beginthreadex failed");
+ }
+
+ if (JP == JoiningPolicy::Join) {
+ if (::WaitForSingleObject(hThread, INFINITE) == WAIT_FAILED) {
+ ReportLastErrorFatal("WaitForSingleObject failed");
+ }
+ }
+ if (::CloseHandle(hThread) == FALSE) {
+ ReportLastErrorFatal("CloseHandle failed");
}
}
diff --git a/contrib/llvm-project/llvm/lib/Support/Windows/WindowsSupport.h b/contrib/llvm-project/llvm/lib/Support/Windows/WindowsSupport.h
index 2e2e97430b76..bb7e79b86018 100644
--- a/contrib/llvm-project/llvm/lib/Support/Windows/WindowsSupport.h
+++ b/contrib/llvm-project/llvm/lib/Support/Windows/WindowsSupport.h
@@ -41,6 +41,7 @@
#include "llvm/Support/Allocator.h"
#include "llvm/Support/Chrono.h"
#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/VersionTuple.h"
#include <cassert>
#include <string>
@@ -66,6 +67,13 @@ llvm::VersionTuple GetWindowsOSVersion();
bool MakeErrMsg(std::string *ErrMsg, const std::string &prefix);
+// Include GetLastError() in a fatal error message.
+LLVM_ATTRIBUTE_NORETURN inline void ReportLastErrorFatal(const char *Msg) {
+ std::string ErrMsg;
+ MakeErrMsg(&ErrMsg, Msg);
+ llvm::report_fatal_error(ErrMsg);
+}
+
template <typename HandleTraits>
class ScopedHandle {
typedef typename HandleTraits::handle_type handle_type;
diff --git a/contrib/llvm-project/llvm/lib/Support/YAMLParser.cpp b/contrib/llvm-project/llvm/lib/Support/YAMLParser.cpp
index 9b2fe9c4418a..d17e7b227f4a 100644
--- a/contrib/llvm-project/llvm/lib/Support/YAMLParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/YAMLParser.cpp
@@ -178,10 +178,10 @@ namespace {
/// others) before the SimpleKey's Tok.
struct SimpleKey {
TokenQueueT::iterator Tok;
- unsigned Column;
- unsigned Line;
- unsigned FlowLevel;
- bool IsRequired;
+ unsigned Column = 0;
+ unsigned Line = 0;
+ unsigned FlowLevel = 0;
+ bool IsRequired = false;
bool operator ==(const SimpleKey &Other) {
return Tok == Other.Tok;
@@ -789,6 +789,7 @@ Token &Scanner::peekNext() {
if (TokenQueue.empty() || NeedMore) {
if (!fetchMoreTokens()) {
TokenQueue.clear();
+ SimpleKeys.clear();
TokenQueue.push_back(Token());
return TokenQueue.front();
}
@@ -932,12 +933,16 @@ void Scanner::scan_ns_uri_char() {
}
bool Scanner::consume(uint32_t Expected) {
- if (Expected >= 0x80)
- report_fatal_error("Not dealing with this yet");
+ if (Expected >= 0x80) {
+ setError("Cannot consume non-ascii characters");
+ return false;
+ }
if (Current == End)
return false;
- if (uint8_t(*Current) >= 0x80)
- report_fatal_error("Not dealing with this yet");
+ if (uint8_t(*Current) >= 0x80) {
+ setError("Cannot consume non-ascii characters");
+ return false;
+ }
if (uint8_t(*Current) == Expected) {
++Current;
++Column;
@@ -1227,7 +1232,10 @@ bool Scanner::scanValue() {
if (i == SK.Tok)
break;
}
- assert(i != e && "SimpleKey not in token queue!");
+ if (i == e) {
+ Failed = true;
+ return false;
+ }
i = TokenQueue.insert(i, T);
// We may also need to add a Block-Mapping-Start token.
@@ -1772,10 +1780,11 @@ Stream::~Stream() = default;
bool Stream::failed() { return scanner->failed(); }
void Stream::printError(Node *N, const Twine &Msg) {
- scanner->printError( N->getSourceRange().Start
+ SMRange Range = N ? N->getSourceRange() : SMRange();
+ scanner->printError( Range.Start
, SourceMgr::DK_Error
, Msg
- , N->getSourceRange());
+ , Range);
}
document_iterator Stream::begin() {
@@ -1934,15 +1943,18 @@ StringRef ScalarNode::unescapeDoubleQuoted( StringRef UnquotedValue
UnquotedValue = UnquotedValue.substr(1);
break;
default:
- if (UnquotedValue.size() == 1)
- // TODO: Report error.
- break;
+ if (UnquotedValue.size() == 1) {
+ Token T;
+ T.Range = StringRef(UnquotedValue.begin(), 1);
+ setError("Unrecognized escape code", T);
+ return "";
+ }
UnquotedValue = UnquotedValue.substr(1);
switch (UnquotedValue[0]) {
default: {
Token T;
T.Range = StringRef(UnquotedValue.begin(), 1);
- setError("Unrecognized escape code!", T);
+ setError("Unrecognized escape code", T);
return "";
}
case '\r':
@@ -2078,7 +2090,14 @@ Node *KeyValueNode::getKey() {
Node *KeyValueNode::getValue() {
if (Value)
return Value;
- getKey()->skip();
+
+ if (Node* Key = getKey())
+ Key->skip();
+ else {
+ setError("Null key in Key Value.", peekNext());
+ return Value = new (getAllocator()) NullNode(Doc);
+ }
+
if (failed())
return Value = new (getAllocator()) NullNode(Doc);
@@ -2269,8 +2288,8 @@ Document::Document(Stream &S) : stream(S), Root(nullptr) {
bool Document::skip() {
if (stream.scanner->failed())
return false;
- if (!Root)
- getRoot();
+ if (!Root && !getRoot())
+ return false;
Root->skip();
Token &T = peekNext();
if (T.Kind == Token::TK_StreamEnd)
@@ -2394,6 +2413,15 @@ parse_property:
// TODO: Properly handle tags. "[!!str ]" should resolve to !!str "", not
// !!null null.
return new (NodeAllocator) NullNode(stream.CurrentDoc);
+ case Token::TK_FlowMappingEnd:
+ case Token::TK_FlowSequenceEnd:
+ case Token::TK_FlowEntry: {
+ if (Root && (isa<MappingNode>(Root) || isa<SequenceNode>(Root)))
+ return new (NodeAllocator) NullNode(stream.CurrentDoc);
+
+ setError("Unexpected token", T);
+ return nullptr;
+ }
case Token::TK_Error:
return nullptr;
}
diff --git a/contrib/llvm-project/llvm/lib/Support/YAMLTraits.cpp b/contrib/llvm-project/llvm/lib/Support/YAMLTraits.cpp
index eba22fd14725..5f0cedc71829 100644
--- a/contrib/llvm-project/llvm/lib/Support/YAMLTraits.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/YAMLTraits.cpp
@@ -87,7 +87,6 @@ bool Input::setCurrentDocument() {
if (DocIterator != Strm->end()) {
Node *N = DocIterator->getRoot();
if (!N) {
- assert(Strm->failed() && "Root is NULL iff parsing failed");
EC = make_error_code(errc::invalid_argument);
return false;
}
@@ -394,7 +393,7 @@ std::unique_ptr<Input::HNode> Input::createHNodes(Node *N) {
auto mapHNode = std::make_unique<MapHNode>(N);
for (KeyValueNode &KVN : *Map) {
Node *KeyNode = KVN.getKey();
- ScalarNode *Key = dyn_cast<ScalarNode>(KeyNode);
+ ScalarNode *Key = dyn_cast_or_null<ScalarNode>(KeyNode);
Node *Value = KVN.getValue();
if (!Key || !Value) {
if (!Key)
diff --git a/contrib/llvm-project/llvm/lib/Support/raw_ostream.cpp b/contrib/llvm-project/llvm/lib/Support/raw_ostream.cpp
index b9989371f5ea..4bb315f824af 100644
--- a/contrib/llvm-project/llvm/lib/Support/raw_ostream.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/raw_ostream.cpp
@@ -82,7 +82,7 @@ raw_ostream::~raw_ostream() {
assert(OutBufCur == OutBufStart &&
"raw_ostream destructor called with non-empty buffer!");
- if (BufferMode == InternalBuffer)
+ if (BufferMode == BufferKind::InternalBuffer)
delete [] OutBufStart;
}
@@ -102,14 +102,14 @@ void raw_ostream::SetBuffered() {
void raw_ostream::SetBufferAndMode(char *BufferStart, size_t Size,
BufferKind Mode) {
- assert(((Mode == Unbuffered && !BufferStart && Size == 0) ||
- (Mode != Unbuffered && BufferStart && Size != 0)) &&
+ assert(((Mode == BufferKind::Unbuffered && !BufferStart && Size == 0) ||
+ (Mode != BufferKind::Unbuffered && BufferStart && Size != 0)) &&
"stream must be unbuffered or have at least one byte");
// Make sure the current buffer is free of content (we can't flush here; the
// child buffer management logic will be in write_impl).
assert(GetNumBytesInBuffer() == 0 && "Current buffer is non-empty!");
- if (BufferMode == InternalBuffer)
+ if (BufferMode == BufferKind::InternalBuffer)
delete [] OutBufStart;
OutBufStart = BufferStart;
OutBufEnd = OutBufStart+Size;
@@ -223,7 +223,7 @@ raw_ostream &raw_ostream::write(unsigned char C) {
// Group exceptional cases into a single branch.
if (LLVM_UNLIKELY(OutBufCur >= OutBufEnd)) {
if (LLVM_UNLIKELY(!OutBufStart)) {
- if (BufferMode == Unbuffered) {
+ if (BufferMode == BufferKind::Unbuffered) {
write_impl(reinterpret_cast<char*>(&C), 1);
return *this;
}
@@ -243,7 +243,7 @@ raw_ostream &raw_ostream::write(const char *Ptr, size_t Size) {
// Group exceptional cases into a single branch.
if (LLVM_UNLIKELY(size_t(OutBufEnd - OutBufCur) < Size)) {
if (LLVM_UNLIKELY(!OutBufStart)) {
- if (BufferMode == Unbuffered) {
+ if (BufferMode == BufferKind::Unbuffered) {
write_impl(Ptr, Size);
return *this;
}
diff --git a/contrib/llvm-project/llvm/lib/TableGen/Main.cpp b/contrib/llvm-project/llvm/lib/TableGen/Main.cpp
index 48ded6c45a46..427bd6778577 100644
--- a/contrib/llvm-project/llvm/lib/TableGen/Main.cpp
+++ b/contrib/llvm-project/llvm/lib/TableGen/Main.cpp
@@ -73,7 +73,7 @@ static int createDependencyFile(const TGParser &Parser, const char *argv0) {
EC.message() + "\n");
DepOut.os() << OutputFilename << ":";
for (const auto &Dep : Parser.getDependencies()) {
- DepOut.os() << ' ' << Dep.first;
+ DepOut.os() << ' ' << Dep;
}
DepOut.os() << "\n";
DepOut.keep();
diff --git a/contrib/llvm-project/llvm/lib/TableGen/Record.cpp b/contrib/llvm-project/llvm/lib/TableGen/Record.cpp
index 835ef8c7141b..9db842dc678e 100644
--- a/contrib/llvm-project/llvm/lib/TableGen/Record.cpp
+++ b/contrib/llvm-project/llvm/lib/TableGen/Record.cpp
@@ -788,6 +788,21 @@ Init *UnOpInit::Fold(Record *CurRec, bool IsFinal) const {
if (StringInit *LHSs = dyn_cast<StringInit>(LHS))
return IntInit::get(LHSs->getValue().empty());
break;
+
+ case GETOP:
+ if (DagInit *Dag = dyn_cast<DagInit>(LHS)) {
+ DefInit *DI = DefInit::get(Dag->getOperatorAsDef({}));
+ if (!DI->getType()->typeIsA(getType())) {
+ PrintFatalError(CurRec->getLoc(),
+ Twine("Expected type '") +
+ getType()->getAsString() + "', got '" +
+ DI->getType()->getAsString() + "' in: " +
+ getAsString() + "\n");
+ } else {
+ return DI;
+ }
+ }
+ break;
}
return const_cast<UnOpInit *>(this);
}
@@ -809,6 +824,7 @@ std::string UnOpInit::getAsString() const {
case TAIL: Result = "!tail"; break;
case SIZE: Result = "!size"; break;
case EMPTY: Result = "!empty"; break;
+ case GETOP: Result = "!getop"; break;
}
return Result + "(" + LHS->getAsString() + ")";
}
@@ -887,13 +903,18 @@ Init *BinOpInit::Fold(Record *CurRec) const {
if (LHSs && RHSs) {
DefInit *LOp = dyn_cast<DefInit>(LHSs->getOperator());
DefInit *ROp = dyn_cast<DefInit>(RHSs->getOperator());
- if (!LOp || !ROp)
+ if ((!LOp && !isa<UnsetInit>(LHSs->getOperator())) ||
+ (!ROp && !isa<UnsetInit>(RHSs->getOperator())))
break;
- if (LOp->getDef() != ROp->getDef()) {
+ if (LOp && ROp && LOp->getDef() != ROp->getDef()) {
PrintFatalError(Twine("Concatenated Dag operators do not match: '") +
LHSs->getAsString() + "' vs. '" + RHSs->getAsString() +
"'");
}
+ Init *Op = LOp ? LOp : ROp;
+ if (!Op)
+ Op = UnsetInit::get();
+
SmallVector<Init*, 8> Args;
SmallVector<StringInit*, 8> ArgNames;
for (unsigned i = 0, e = LHSs->getNumArgs(); i != e; ++i) {
@@ -904,7 +925,7 @@ Init *BinOpInit::Fold(Record *CurRec) const {
Args.push_back(RHSs->getArg(i));
ArgNames.push_back(RHSs->getArgName(i));
}
- return DagInit::get(LHSs->getOperator(), nullptr, Args, ArgNames);
+ return DagInit::get(Op, nullptr, Args, ArgNames);
}
break;
}
@@ -975,6 +996,20 @@ Init *BinOpInit::Fold(Record *CurRec) const {
break;
}
+ case SETOP: {
+ DagInit *Dag = dyn_cast<DagInit>(LHS);
+ DefInit *Op = dyn_cast<DefInit>(RHS);
+ if (Dag && Op) {
+ SmallVector<Init*, 8> Args;
+ SmallVector<StringInit*, 8> ArgNames;
+ for (unsigned i = 0, e = Dag->getNumArgs(); i != e; ++i) {
+ Args.push_back(Dag->getArg(i));
+ ArgNames.push_back(Dag->getArgName(i));
+ }
+ return DagInit::get(Op, nullptr, Args, ArgNames);
+ }
+ break;
+ }
case ADD:
case MUL:
case AND:
@@ -1037,6 +1072,7 @@ std::string BinOpInit::getAsString() const {
case LISTCONCAT: Result = "!listconcat"; break;
case LISTSPLAT: Result = "!listsplat"; break;
case STRCONCAT: Result = "!strconcat"; break;
+ case SETOP: Result = "!setop"; break;
}
return Result + "(" + LHS->getAsString() + ", " + RHS->getAsString() + ")";
}
@@ -2277,6 +2313,21 @@ Record *Record::getValueAsDef(StringRef FieldName) const {
FieldName + "' does not have a def initializer!");
}
+Record *Record::getValueAsOptionalDef(StringRef FieldName) const {
+ const RecordVal *R = getValue(FieldName);
+ if (!R || !R->getValue())
+ PrintFatalError(getLoc(), "Record `" + getName() +
+ "' does not have a field named `" + FieldName + "'!\n");
+
+ if (DefInit *DI = dyn_cast<DefInit>(R->getValue()))
+ return DI->getDef();
+ if (isa<UnsetInit>(R->getValue()))
+ return nullptr;
+ PrintFatalError(getLoc(), "Record `" + getName() + "', field `" +
+ FieldName + "' does not have either a def initializer or '?'!");
+}
+
+
bool Record::getValueAsBit(StringRef FieldName) const {
const RecordVal *R = getValue(FieldName);
if (!R || !R->getValue())
diff --git a/contrib/llvm-project/llvm/lib/TableGen/TGLexer.cpp b/contrib/llvm-project/llvm/lib/TableGen/TGLexer.cpp
index da2286e41fe5..1a3f5a7392d5 100644
--- a/contrib/llvm-project/llvm/lib/TableGen/TGLexer.cpp
+++ b/contrib/llvm-project/llvm/lib/TableGen/TGLexer.cpp
@@ -350,6 +350,10 @@ tgtok::TokKind TGLexer::LexIdentifier() {
.Case("field", tgtok::Field)
.Case("let", tgtok::Let)
.Case("in", tgtok::In)
+ .Case("defvar", tgtok::Defvar)
+ .Case("if", tgtok::If)
+ .Case("then", tgtok::Then)
+ .Case("else", tgtok::ElseKW)
.Default(tgtok::Id);
if (Kind == tgtok::Id)
@@ -379,15 +383,7 @@ bool TGLexer::LexInclude() {
return true;
}
- DependenciesMapTy::const_iterator Found = Dependencies.find(IncludedFile);
- if (Found != Dependencies.end()) {
- PrintError(getLoc(),
- "File '" + IncludedFile + "' has already been included.");
- SrcMgr.PrintMessage(Found->second, SourceMgr::DK_Note,
- "previously included here");
- return true;
- }
- Dependencies.insert(std::make_pair(IncludedFile, getLoc()));
+ Dependencies.insert(IncludedFile);
// Save the line number and lex buffer of the includer.
CurBuf = SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer();
CurPtr = CurBuf.begin();
@@ -567,6 +563,8 @@ tgtok::TokKind TGLexer::LexExclaim() {
.Case("listconcat", tgtok::XListConcat)
.Case("listsplat", tgtok::XListSplat)
.Case("strconcat", tgtok::XStrConcat)
+ .Case("setop", tgtok::XSetOp)
+ .Case("getop", tgtok::XGetOp)
.Default(tgtok::Error);
return Kind != tgtok::Error ? Kind : ReturnError(Start-1, "Unknown operator");
diff --git a/contrib/llvm-project/llvm/lib/TableGen/TGLexer.h b/contrib/llvm-project/llvm/lib/TableGen/TGLexer.h
index 3085ab2c0478..6d10af348674 100644
--- a/contrib/llvm-project/llvm/lib/TableGen/TGLexer.h
+++ b/contrib/llvm-project/llvm/lib/TableGen/TGLexer.h
@@ -19,8 +19,8 @@
#include "llvm/Support/DataTypes.h"
#include "llvm/Support/SMLoc.h"
#include <cassert>
-#include <map>
#include <memory>
+#include <set>
#include <string>
namespace llvm {
@@ -44,14 +44,15 @@ namespace tgtok {
equal, question, // = ?
paste, // #
- // Keywords.
+ // Keywords. ('ElseKW' is named to distinguish it from the existing 'Else'
+ // that means the preprocessor #else.)
Bit, Bits, Class, Code, Dag, Def, Foreach, Defm, Field, In, Int, Let, List,
- MultiClass, String, Defset,
+ MultiClass, String, Defset, Defvar, If, Then, ElseKW,
// !keywords.
XConcat, XADD, XMUL, XAND, XOR, XSRA, XSRL, XSHL, XListConcat, XListSplat,
XStrConcat, XCast, XSubst, XForEach, XFoldl, XHead, XTail, XSize, XEmpty,
- XIf, XCond, XEq, XIsA, XDag, XNe, XLe, XLt, XGe, XGt,
+ XIf, XCond, XEq, XIsA, XDag, XNe, XLe, XLt, XGe, XGt, XSetOp, XGetOp,
// Integer value.
IntVal,
@@ -73,24 +74,25 @@ namespace tgtok {
class TGLexer {
SourceMgr &SrcMgr;
- const char *CurPtr;
+ const char *CurPtr = nullptr;
StringRef CurBuf;
// Information about the current token.
- const char *TokStart;
- tgtok::TokKind CurCode;
+ const char *TokStart = nullptr;
+ tgtok::TokKind CurCode = tgtok::TokKind::Eof;
std::string CurStrVal; // This is valid for ID, STRVAL, VARNAME, CODEFRAGMENT
- int64_t CurIntVal; // This is valid for INTVAL.
+ int64_t CurIntVal = 0; // This is valid for INTVAL.
/// CurBuffer - This is the current buffer index we're lexing from as managed
/// by the SourceMgr object.
- unsigned CurBuffer;
+ unsigned CurBuffer = 0;
public:
- typedef std::map<std::string, SMLoc> DependenciesMapTy;
+ typedef std::set<std::string> DependenciesSetTy;
+
private:
/// Dependencies - This is the list of all included files.
- DependenciesMapTy Dependencies;
+ DependenciesSetTy Dependencies;
public:
TGLexer(SourceMgr &SrcMgr, ArrayRef<std::string> Macros);
@@ -99,7 +101,7 @@ public:
return CurCode = LexToken(CurPtr == CurBuf.begin());
}
- const DependenciesMapTy &getDependencies() const {
+ const DependenciesSetTy &getDependencies() const {
return Dependencies;
}
diff --git a/contrib/llvm-project/llvm/lib/TableGen/TGParser.cpp b/contrib/llvm-project/llvm/lib/TableGen/TGParser.cpp
index c373e2899a5d..01cc1af34ab6 100644
--- a/contrib/llvm-project/llvm/lib/TableGen/TGParser.cpp
+++ b/contrib/llvm-project/llvm/lib/TableGen/TGParser.cpp
@@ -391,9 +391,11 @@ bool TGParser::resolve(const ForeachLoop &Loop, SubstStack &Substs,
bool Error = false;
for (auto Elt : *LI) {
- Substs.emplace_back(Loop.IterVar->getNameInit(), Elt);
+ if (Loop.IterVar)
+ Substs.emplace_back(Loop.IterVar->getNameInit(), Elt);
Error = resolve(Loop.Entries, Substs, Final, Dest);
- Substs.pop_back();
+ if (Loop.IterVar)
+ Substs.pop_back();
if (Error)
break;
}
@@ -482,7 +484,7 @@ bool TGParser::addDefOne(std::unique_ptr<Record> Rec) {
static bool isObjectStart(tgtok::TokKind K) {
return K == tgtok::Class || K == tgtok::Def || K == tgtok::Defm ||
K == tgtok::Let || K == tgtok::MultiClass || K == tgtok::Foreach ||
- K == tgtok::Defset;
+ K == tgtok::Defset || K == tgtok::Defvar || K == tgtok::If;
}
/// ParseObjectName - If a valid object name is specified, return it. If no
@@ -869,11 +871,17 @@ Init *TGParser::ParseIDValue(Record *CurRec, StringInit *Name, SMLoc NameLoc,
}
}
+ if (CurLocalScope)
+ if (Init *I = CurLocalScope->getVar(Name->getValue()))
+ return I;
+
// If this is in a foreach loop, make sure it's not a loop iterator
for (const auto &L : Loops) {
- VarInit *IterVar = dyn_cast<VarInit>(L->IterVar);
- if (IterVar && IterVar->getNameInit() == Name)
- return IterVar;
+ if (L->IterVar) {
+ VarInit *IterVar = dyn_cast<VarInit>(L->IterVar);
+ if (IterVar && IterVar->getNameInit() == Name)
+ return IterVar;
+ }
}
if (Mode == ParseNameMode)
@@ -905,7 +913,8 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
case tgtok::XTail:
case tgtok::XSize:
case tgtok::XEmpty:
- case tgtok::XCast: { // Value ::= !unop '(' Value ')'
+ case tgtok::XCast:
+ case tgtok::XGetOp: { // Value ::= !unop '(' Value ')'
UnOpInit::UnaryOp Code;
RecTy *Type = nullptr;
@@ -941,6 +950,28 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
Code = UnOpInit::EMPTY;
Type = IntRecTy::get();
break;
+ case tgtok::XGetOp:
+ Lex.Lex(); // eat the operation
+ if (Lex.getCode() == tgtok::less) {
+ // Parse an optional type suffix, so that you can say
+ // !getop<BaseClass>(someDag) as a shorthand for
+ // !cast<BaseClass>(!getop(someDag)).
+ Type = ParseOperatorType();
+
+ if (!Type) {
+ TokError("did not get type for unary operator");
+ return nullptr;
+ }
+
+ if (!isa<RecordRecTy>(Type)) {
+ TokError("type for !getop must be a record type");
+ // but keep parsing, to consume the operand
+ }
+ } else {
+ Type = RecordRecTy::get({});
+ }
+ Code = UnOpInit::GETOP;
+ break;
}
if (Lex.getCode() != tgtok::l_paren) {
TokError("expected '(' after unary operator");
@@ -1055,7 +1086,8 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
case tgtok::XGt:
case tgtok::XListConcat:
case tgtok::XListSplat:
- case tgtok::XStrConcat: { // Value ::= !binop '(' Value ',' Value ')'
+ case tgtok::XStrConcat:
+ case tgtok::XSetOp: { // Value ::= !binop '(' Value ',' Value ')'
tgtok::TokKind OpTok = Lex.getCode();
SMLoc OpLoc = Lex.getLoc();
Lex.Lex(); // eat the operation
@@ -1080,6 +1112,7 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
case tgtok::XListConcat: Code = BinOpInit::LISTCONCAT; break;
case tgtok::XListSplat: Code = BinOpInit::LISTSPLAT; break;
case tgtok::XStrConcat: Code = BinOpInit::STRCONCAT; break;
+ case tgtok::XSetOp: Code = BinOpInit::SETOP; break;
}
RecTy *Type = nullptr;
@@ -1088,6 +1121,7 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
default:
llvm_unreachable("Unhandled code!");
case tgtok::XConcat:
+ case tgtok::XSetOp:
Type = DagRecTy::get();
ArgType = DagRecTy::get();
break;
@@ -1146,7 +1180,6 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
InitList.push_back(ParseValue(CurRec, ArgType));
if (!InitList.back()) return nullptr;
- // All BinOps require their arguments to be of compatible types.
RecTy *ListType = cast<TypedInit>(InitList.back())->getType();
if (!ArgType) {
ArgType = ListType;
@@ -1212,6 +1245,18 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
ArgType = Resolved;
}
+ // Deal with BinOps whose arguments have different types, by
+ // rewriting ArgType in between them.
+ switch (Code) {
+ case BinOpInit::SETOP:
+ // After parsing the first dag argument, switch to expecting
+ // a record, with no restriction on its superclasses.
+ ArgType = RecordRecTy::get({});
+ break;
+ default:
+ break;
+ }
+
if (Lex.getCode() != tgtok::comma)
break;
Lex.Lex(); // eat the ','
@@ -2024,7 +2069,8 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType,
}
case tgtok::l_paren: { // Value ::= '(' IDValue DagArgList ')'
Lex.Lex(); // eat the '('
- if (Lex.getCode() != tgtok::Id && Lex.getCode() != tgtok::XCast) {
+ if (Lex.getCode() != tgtok::Id && Lex.getCode() != tgtok::XCast &&
+ Lex.getCode() != tgtok::question && Lex.getCode() != tgtok::XGetOp) {
TokError("expected identifier in dag init");
return nullptr;
}
@@ -2062,7 +2108,8 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType,
case tgtok::XTail:
case tgtok::XSize:
case tgtok::XEmpty:
- case tgtok::XCast: // Value ::= !unop '(' Value ')'
+ case tgtok::XCast:
+ case tgtok::XGetOp: // Value ::= !unop '(' Value ')'
case tgtok::XIsA:
case tgtok::XConcat:
case tgtok::XDag:
@@ -2081,7 +2128,8 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType,
case tgtok::XGt:
case tgtok::XListConcat:
case tgtok::XListSplat:
- case tgtok::XStrConcat: // Value ::= !binop '(' Value ',' Value ')'
+ case tgtok::XStrConcat:
+ case tgtok::XSetOp: // Value ::= !binop '(' Value ',' Value ')'
case tgtok::XIf:
case tgtok::XCond:
case tgtok::XFoldl:
@@ -2555,7 +2603,11 @@ bool TGParser::ParseTemplateArgList(Record *CurRec) {
///
/// BodyItem ::= Declaration ';'
/// BodyItem ::= LET ID OptionalBitList '=' Value ';'
+/// BodyItem ::= Defvar
bool TGParser::ParseBodyItem(Record *CurRec) {
+ if (Lex.getCode() == tgtok::Defvar)
+ return ParseDefvar();
+
if (Lex.getCode() != tgtok::Let) {
if (!ParseDeclaration(CurRec, false))
return true;
@@ -2618,10 +2670,15 @@ bool TGParser::ParseBody(Record *CurRec) {
// Eat the '{'.
Lex.Lex();
+ // An object body introduces a new scope for local variables.
+ TGLocalVarScope *BodyScope = PushLocalScope();
+
while (Lex.getCode() != tgtok::r_brace)
if (ParseBodyItem(CurRec))
return true;
+ PopLocalScope(BodyScope);
+
// Eat the '}'.
Lex.Lex();
return false;
@@ -2760,6 +2817,45 @@ bool TGParser::ParseDefset() {
return false;
}
+/// ParseDefvar - Parse a defvar statement.
+///
+/// Defvar ::= DEFVAR Id '=' Value ';'
+///
+bool TGParser::ParseDefvar() {
+ assert(Lex.getCode() == tgtok::Defvar);
+ Lex.Lex(); // Eat the 'defvar' token
+
+ if (Lex.getCode() != tgtok::Id)
+ return TokError("expected identifier");
+ StringInit *DeclName = StringInit::get(Lex.getCurStrVal());
+ if (CurLocalScope) {
+ if (CurLocalScope->varAlreadyDefined(DeclName->getValue()))
+ return TokError("local variable of this name already exists");
+ } else {
+ if (Records.getGlobal(DeclName->getValue()))
+ return TokError("def or global variable of this name already exists");
+ }
+
+ if (Lex.Lex() != tgtok::equal) // Eat the identifier
+ return TokError("expected '='");
+ Lex.Lex(); // Eat the '='
+
+ Init *Value = ParseValue(nullptr);
+ if (!Value)
+ return true;
+
+ if (Lex.getCode() != tgtok::semi)
+ return TokError("expected ';'");
+ Lex.Lex(); // Eat the ';'
+
+ if (CurLocalScope)
+ CurLocalScope->addVar(DeclName->getValue(), Value);
+ else
+ Records.addExtraGlobal(DeclName->getValue(), Value);
+
+ return false;
+}
+
/// ParseForeach - Parse a for statement. Return the record corresponding
/// to it. This returns true on error.
///
@@ -2785,6 +2881,9 @@ bool TGParser::ParseForeach(MultiClass *CurMultiClass) {
// Create a loop object and remember it.
Loops.push_back(std::make_unique<ForeachLoop>(Loc, IterName, ListValue));
+ // A foreach loop introduces a new scope for local variables.
+ TGLocalVarScope *ForeachScope = PushLocalScope();
+
if (Lex.getCode() != tgtok::l_brace) {
// FOREACH Declaration IN Object
if (ParseObject(CurMultiClass))
@@ -2805,6 +2904,8 @@ bool TGParser::ParseForeach(MultiClass *CurMultiClass) {
Lex.Lex(); // Eat the }
}
+ PopLocalScope(ForeachScope);
+
// Resolve the loop or store it for later resolution.
std::unique_ptr<ForeachLoop> Loop = std::move(Loops.back());
Loops.pop_back();
@@ -2812,6 +2913,115 @@ bool TGParser::ParseForeach(MultiClass *CurMultiClass) {
return addEntry(std::move(Loop));
}
+/// ParseIf - Parse an if statement.
+///
+/// If ::= IF Value THEN IfBody
+/// If ::= IF Value THEN IfBody ELSE IfBody
+///
+bool TGParser::ParseIf(MultiClass *CurMultiClass) {
+ SMLoc Loc = Lex.getLoc();
+ assert(Lex.getCode() == tgtok::If && "Unknown tok");
+ Lex.Lex(); // Eat the 'if' token.
+
+ // Make a temporary object to record items associated with the for
+ // loop.
+ Init *Condition = ParseValue(nullptr);
+ if (!Condition)
+ return true;
+
+ if (Lex.getCode() != tgtok::Then)
+ return TokError("Unknown tok");
+ Lex.Lex(); // Eat the 'then'
+
+ // We have to be able to save if statements to execute later, and they have
+ // to live on the same stack as foreach loops. The simplest implementation
+ // technique is to convert each 'then' or 'else' clause *into* a foreach
+ // loop, over a list of length 0 or 1 depending on the condition, and with no
+ // iteration variable being assigned.
+
+ ListInit *EmptyList = ListInit::get({}, BitRecTy::get());
+ ListInit *SingletonList = ListInit::get({BitInit::get(1)}, BitRecTy::get());
+ RecTy *BitListTy = ListRecTy::get(BitRecTy::get());
+
+ // The foreach containing the then-clause selects SingletonList if
+ // the condition is true.
+ Init *ThenClauseList =
+ TernOpInit::get(TernOpInit::IF, Condition, SingletonList, EmptyList,
+ BitListTy)
+ ->Fold(nullptr);
+ Loops.push_back(std::make_unique<ForeachLoop>(Loc, nullptr, ThenClauseList));
+
+ if (ParseIfBody(CurMultiClass, "then"))
+ return true;
+
+ std::unique_ptr<ForeachLoop> Loop = std::move(Loops.back());
+ Loops.pop_back();
+
+ if (addEntry(std::move(Loop)))
+ return true;
+
+ // Now look for an optional else clause. The if-else syntax has the usual
+ // dangling-else ambiguity, and by greedily matching an else here if we can,
+ // we implement the usual resolution of pairing with the innermost unmatched
+ // if.
+ if (Lex.getCode() == tgtok::ElseKW) {
+ Lex.Lex(); // Eat the 'else'
+
+ // The foreach containing the else-clause uses the same pair of lists as
+ // above, but this time, selects SingletonList if the condition is *false*.
+ Init *ElseClauseList =
+ TernOpInit::get(TernOpInit::IF, Condition, EmptyList, SingletonList,
+ BitListTy)
+ ->Fold(nullptr);
+ Loops.push_back(
+ std::make_unique<ForeachLoop>(Loc, nullptr, ElseClauseList));
+
+ if (ParseIfBody(CurMultiClass, "else"))
+ return true;
+
+ Loop = std::move(Loops.back());
+ Loops.pop_back();
+
+ if (addEntry(std::move(Loop)))
+ return true;
+ }
+
+ return false;
+}
+
+/// ParseIfBody - Parse the then-clause or else-clause of an if statement.
+///
+/// IfBody ::= Object
+/// IfBody ::= '{' ObjectList '}'
+///
+bool TGParser::ParseIfBody(MultiClass *CurMultiClass, StringRef Kind) {
+ TGLocalVarScope *BodyScope = PushLocalScope();
+
+ if (Lex.getCode() != tgtok::l_brace) {
+ // A single object.
+ if (ParseObject(CurMultiClass))
+ return true;
+ } else {
+ SMLoc BraceLoc = Lex.getLoc();
+ // A braced block.
+ Lex.Lex(); // eat the '{'.
+
+ // Parse the object list.
+ if (ParseObjectList(CurMultiClass))
+ return true;
+
+ if (Lex.getCode() != tgtok::r_brace) {
+ TokError("expected '}' at end of '" + Kind + "' clause");
+ return Error(BraceLoc, "to match this '{'");
+ }
+
+ Lex.Lex(); // Eat the }
+ }
+
+ PopLocalScope(BodyScope);
+ return false;
+}
+
/// ParseClass - Parse a tblgen class definition.
///
/// ClassInst ::= CLASS ID TemplateArgList? ObjectBody
@@ -2917,6 +3127,8 @@ bool TGParser::ParseTopLevelLet(MultiClass *CurMultiClass) {
return TokError("expected 'in' at end of top-level 'let'");
Lex.Lex();
+ TGLocalVarScope *LetScope = PushLocalScope();
+
// If this is a scalar let, just handle it now
if (Lex.getCode() != tgtok::l_brace) {
// LET LetList IN Object
@@ -2938,6 +3150,8 @@ bool TGParser::ParseTopLevelLet(MultiClass *CurMultiClass) {
Lex.Lex();
}
+ PopLocalScope(LetScope);
+
// Outside this let scope, this let block is not active.
LetStack.pop_back();
return false;
@@ -3011,21 +3225,28 @@ bool TGParser::ParseMultiClass() {
if (Lex.Lex() == tgtok::r_brace) // eat the '{'.
return TokError("multiclass must contain at least one def");
+ // A multiclass body introduces a new scope for local variables.
+ TGLocalVarScope *MulticlassScope = PushLocalScope();
+
while (Lex.getCode() != tgtok::r_brace) {
switch (Lex.getCode()) {
default:
- return TokError("expected 'let', 'def', 'defm' or 'foreach' in "
- "multiclass body");
+ return TokError("expected 'let', 'def', 'defm', 'defvar', 'foreach' "
+ "or 'if' in multiclass body");
case tgtok::Let:
case tgtok::Def:
case tgtok::Defm:
+ case tgtok::Defvar:
case tgtok::Foreach:
+ case tgtok::If:
if (ParseObject(CurMultiClass))
return true;
break;
}
}
Lex.Lex(); // eat the '}'.
+
+ PopLocalScope(MulticlassScope);
}
CurMultiClass = nullptr;
@@ -3167,19 +3388,24 @@ bool TGParser::ParseDefm(MultiClass *CurMultiClass) {
/// Object ::= DefMInst
/// Object ::= LETCommand '{' ObjectList '}'
/// Object ::= LETCommand Object
+/// Object ::= Defset
+/// Object ::= Defvar
bool TGParser::ParseObject(MultiClass *MC) {
switch (Lex.getCode()) {
default:
- return TokError("Expected class, def, defm, defset, multiclass, let or "
- "foreach");
+ return TokError("Expected class, def, defm, defset, multiclass, let, "
+ "foreach or if");
case tgtok::Let: return ParseTopLevelLet(MC);
case tgtok::Def: return ParseDef(MC);
case tgtok::Foreach: return ParseForeach(MC);
+ case tgtok::If: return ParseIf(MC);
case tgtok::Defm: return ParseDefm(MC);
case tgtok::Defset:
if (MC)
return TokError("defset is not allowed inside multiclass");
return ParseDefset();
+ case tgtok::Defvar:
+ return ParseDefvar();
case tgtok::Class:
if (MC)
return TokError("class is not allowed inside multiclass");
diff --git a/contrib/llvm-project/llvm/lib/TableGen/TGParser.h b/contrib/llvm-project/llvm/lib/TableGen/TGParser.h
index af2b639f8d59..c66c79771298 100644
--- a/contrib/llvm-project/llvm/lib/TableGen/TGParser.h
+++ b/contrib/llvm-project/llvm/lib/TableGen/TGParser.h
@@ -56,6 +56,10 @@ namespace llvm {
/// ForeachLoop - Record the iteration state associated with a for loop.
/// This is used to instantiate items in the loop body.
+ ///
+ /// IterVar is allowed to be null, in which case no iteration variable is
+ /// defined in the loop at all. (This happens when a ForeachLoop is
+ /// constructed by desugaring an if statement.)
struct ForeachLoop {
SMLoc Loc;
VarInit *IterVar;
@@ -70,10 +74,50 @@ namespace llvm {
struct DefsetRecord {
SMLoc Loc;
- RecTy *EltTy;
+ RecTy *EltTy = nullptr;
SmallVector<Init *, 16> Elements;
};
+class TGLocalVarScope {
+ // A scope to hold local variable definitions from defvar.
+ std::map<std::string, Init *, std::less<>> vars;
+ std::unique_ptr<TGLocalVarScope> parent;
+
+public:
+ TGLocalVarScope() = default;
+ TGLocalVarScope(std::unique_ptr<TGLocalVarScope> parent)
+ : parent(std::move(parent)) {}
+
+ std::unique_ptr<TGLocalVarScope> extractParent() {
+ // This is expected to be called just before we are destructed, so
+ // it doesn't much matter what state we leave 'parent' in.
+ return std::move(parent);
+ }
+
+ Init *getVar(StringRef Name) const {
+ auto It = vars.find(Name);
+ if (It != vars.end())
+ return It->second;
+ if (parent)
+ return parent->getVar(Name);
+ return nullptr;
+ }
+
+ bool varAlreadyDefined(StringRef Name) const {
+ // When we check whether a variable is already defined, for the purpose of
+ // reporting an error on redefinition, we don't look up to the parent
+ // scope, because it's all right to shadow an outer definition with an
+ // inner one.
+ return vars.find(Name) != vars.end();
+ }
+
+ void addVar(StringRef Name, Init *I) {
+ bool Ins = vars.insert(std::make_pair(Name, I)).second;
+ (void)Ins;
+ assert(Ins && "Local variable already exists");
+ }
+};
+
struct MultiClass {
Record Rec; // Placeholder for template args and Name.
std::vector<RecordsEntry> Entries;
@@ -99,6 +143,10 @@ class TGParser {
/// current value.
MultiClass *CurMultiClass;
+ /// CurLocalScope - Innermost of the current nested scopes for 'defvar' local
+ /// variables.
+ std::unique_ptr<TGLocalVarScope> CurLocalScope;
+
// Record tracker
RecordKeeper &Records;
@@ -114,9 +162,9 @@ class TGParser {
};
public:
- TGParser(SourceMgr &SrcMgr, ArrayRef<std::string> Macros,
+ TGParser(SourceMgr &SM, ArrayRef<std::string> Macros,
RecordKeeper &records)
- : Lex(SrcMgr, Macros), CurMultiClass(nullptr), Records(records) {}
+ : Lex(SM, Macros), CurMultiClass(nullptr), Records(records) {}
/// ParseFile - Main entrypoint for parsing a tblgen file. These parser
/// routines return true on error, or false on success.
@@ -129,11 +177,24 @@ public:
bool TokError(const Twine &Msg) const {
return Error(Lex.getLoc(), Msg);
}
- const TGLexer::DependenciesMapTy &getDependencies() const {
+ const TGLexer::DependenciesSetTy &getDependencies() const {
return Lex.getDependencies();
}
-private: // Semantic analysis methods.
+ TGLocalVarScope *PushLocalScope() {
+ CurLocalScope = std::make_unique<TGLocalVarScope>(std::move(CurLocalScope));
+ // Returns a pointer to the new scope, so that the caller can pass it back
+ // to PopLocalScope which will check by assertion that the pushes and pops
+ // match up properly.
+ return CurLocalScope.get();
+ }
+ void PopLocalScope(TGLocalVarScope *ExpectedStackTop) {
+ assert(ExpectedStackTop == CurLocalScope.get() &&
+ "Mismatched pushes and pops of local variable scopes");
+ CurLocalScope = CurLocalScope->extractParent();
+ }
+
+private: // Semantic analysis methods.
bool AddValue(Record *TheRec, SMLoc Loc, const RecordVal &RV);
bool SetValue(Record *TheRec, SMLoc Loc, Init *ValName,
ArrayRef<unsigned> BitList, Init *V,
@@ -161,7 +222,10 @@ private: // Parser methods.
bool ParseDefm(MultiClass *CurMultiClass);
bool ParseDef(MultiClass *CurMultiClass);
bool ParseDefset();
+ bool ParseDefvar();
bool ParseForeach(MultiClass *CurMultiClass);
+ bool ParseIf(MultiClass *CurMultiClass);
+ bool ParseIfBody(MultiClass *CurMultiClass, StringRef Kind);
bool ParseTopLevelLet(MultiClass *CurMultiClass);
void ParseLetList(SmallVectorImpl<LetRecord> &Result);
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64.td
index 5b4c9e2149da..0106355b1a44 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64.td
@@ -448,9 +448,9 @@ include "AArch64SchedA57.td"
include "AArch64SchedCyclone.td"
include "AArch64SchedFalkor.td"
include "AArch64SchedKryo.td"
-include "AArch64SchedExynosM1.td"
include "AArch64SchedExynosM3.td"
include "AArch64SchedExynosM4.td"
+include "AArch64SchedExynosM5.td"
include "AArch64SchedThunderX.td"
include "AArch64SchedThunderX2T99.td"
@@ -565,8 +565,8 @@ def ProcA76 : SubtargetFeature<"a76", "ARMProcFamily", "CortexA76",
// Note that cyclone does not fuse AES instructions, but newer apple chips do
// perform the fusion and cyclone is used by default when targetting apple OSes.
-def ProcCyclone : SubtargetFeature<"cyclone", "ARMProcFamily", "Cyclone",
- "Cyclone", [
+def ProcAppleA7 : SubtargetFeature<"apple-a7", "ARMProcFamily", "AppleA7",
+ "Apple A7 (the CPU formerly known as Cyclone)", [
FeatureAlternateSExtLoadCVTF32Pattern,
FeatureArithmeticBccFusion,
FeatureArithmeticCbzFusion,
@@ -582,32 +582,82 @@ def ProcCyclone : SubtargetFeature<"cyclone", "ARMProcFamily", "Cyclone",
FeatureZCZeroingFPWorkaround
]>;
-def ProcExynosM1 : SubtargetFeature<"exynosm1", "ARMProcFamily", "ExynosM1",
- "Samsung Exynos-M1 processors",
- [FeatureSlowPaired128,
- FeatureCRC,
- FeatureCrypto,
- FeatureExynosCheapAsMoveHandling,
- FeatureForce32BitJumpTables,
- FeatureFuseAES,
- FeaturePerfMon,
- FeaturePostRAScheduler,
- FeatureSlowMisaligned128Store,
- FeatureUseRSqrt,
- FeatureZCZeroingFP]>;
+def ProcAppleA10 : SubtargetFeature<"apple-a10", "ARMProcFamily", "AppleA10",
+ "Apple A10", [
+ FeatureAlternateSExtLoadCVTF32Pattern,
+ FeatureArithmeticBccFusion,
+ FeatureArithmeticCbzFusion,
+ FeatureCrypto,
+ FeatureDisableLatencySchedHeuristic,
+ FeatureFPARMv8,
+ FeatureFuseAES,
+ FeatureFuseCryptoEOR,
+ FeatureNEON,
+ FeaturePerfMon,
+ FeatureZCRegMove,
+ FeatureZCZeroing,
+ FeatureCRC,
+ FeatureRDM,
+ FeaturePAN,
+ FeatureLOR,
+ FeatureVH,
+ ]>;
-def ProcExynosM2 : SubtargetFeature<"exynosm2", "ARMProcFamily", "ExynosM1",
- "Samsung Exynos-M2 processors",
- [FeatureSlowPaired128,
- FeatureCRC,
+def ProcAppleA11 : SubtargetFeature<"apple-a11", "ARMProcFamily", "AppleA11",
+ "Apple A11", [
+ FeatureAlternateSExtLoadCVTF32Pattern,
+ FeatureArithmeticBccFusion,
+ FeatureArithmeticCbzFusion,
+ FeatureCrypto,
+ FeatureDisableLatencySchedHeuristic,
+ FeatureFPARMv8,
+ FeatureFuseAES,
+ FeatureFuseCryptoEOR,
+ FeatureNEON,
+ FeaturePerfMon,
+ FeatureZCRegMove,
+ FeatureZCZeroing,
+ FeatureFullFP16,
+ HasV8_2aOps
+ ]>;
+
+def ProcAppleA12 : SubtargetFeature<"apple-a12", "ARMProcFamily", "AppleA12",
+ "Apple A12", [
+ FeatureAlternateSExtLoadCVTF32Pattern,
+ FeatureArithmeticBccFusion,
+ FeatureArithmeticCbzFusion,
+ FeatureCrypto,
+ FeatureDisableLatencySchedHeuristic,
+ FeatureFPARMv8,
+ FeatureFuseAES,
+ FeatureFuseCryptoEOR,
+ FeatureNEON,
+ FeaturePerfMon,
+ FeatureZCRegMove,
+ FeatureZCZeroing,
+ FeatureFullFP16,
+ HasV8_3aOps
+ ]>;
+
+def ProcAppleA13 : SubtargetFeature<"apple-a13", "ARMProcFamily", "AppleA13",
+ "Apple A13", [
+ FeatureAlternateSExtLoadCVTF32Pattern,
+ FeatureArithmeticBccFusion,
+ FeatureArithmeticCbzFusion,
FeatureCrypto,
- FeatureExynosCheapAsMoveHandling,
- FeatureForce32BitJumpTables,
+ FeatureDisableLatencySchedHeuristic,
+ FeatureFPARMv8,
FeatureFuseAES,
+ FeatureFuseCryptoEOR,
+ FeatureNEON,
FeaturePerfMon,
- FeaturePostRAScheduler,
- FeatureSlowMisaligned128Store,
- FeatureZCZeroingFP]>;
+ FeatureZCRegMove,
+ FeatureZCZeroing,
+ FeatureFullFP16,
+ FeatureFP16FML,
+ FeatureSHA3,
+ HasV8_4aOps
+ ]>;
def ProcExynosM3 : SubtargetFeature<"exynosm3", "ARMProcFamily", "ExynosM3",
"Samsung Exynos-M3 processors",
@@ -815,12 +865,9 @@ def : ProcessorModel<"cortex-a76", CortexA57Model, [ProcA76]>;
def : ProcessorModel<"cortex-a76ae", CortexA57Model, [ProcA76]>;
def : ProcessorModel<"neoverse-e1", CortexA53Model, [ProcNeoverseE1]>;
def : ProcessorModel<"neoverse-n1", CortexA57Model, [ProcNeoverseN1]>;
-def : ProcessorModel<"cyclone", CycloneModel, [ProcCyclone]>;
-def : ProcessorModel<"exynos-m1", ExynosM1Model, [ProcExynosM1]>;
-def : ProcessorModel<"exynos-m2", ExynosM1Model, [ProcExynosM2]>;
def : ProcessorModel<"exynos-m3", ExynosM3Model, [ProcExynosM3]>;
def : ProcessorModel<"exynos-m4", ExynosM4Model, [ProcExynosM4]>;
-def : ProcessorModel<"exynos-m5", ExynosM4Model, [ProcExynosM4]>;
+def : ProcessorModel<"exynos-m5", ExynosM5Model, [ProcExynosM4]>;
def : ProcessorModel<"falkor", FalkorModel, [ProcFalkor]>;
def : ProcessorModel<"saphira", FalkorModel, [ProcSaphira]>;
def : ProcessorModel<"kryo", KryoModel, [ProcKryo]>;
@@ -834,8 +881,24 @@ def : ProcessorModel<"thunderx2t99", ThunderX2T99Model, [ProcThunderX2T99]>;
// FIXME: HiSilicon TSV110 is currently modeled as a Cortex-A57.
def : ProcessorModel<"tsv110", CortexA57Model, [ProcTSV110]>;
+// Support cyclone as an alias for apple-a7 so we can still LTO old bitcode.
+def : ProcessorModel<"cyclone", CycloneModel, [ProcAppleA7]>;
+
+// iPhone and iPad CPUs
+def : ProcessorModel<"apple-a7", CycloneModel, [ProcAppleA7]>;
+def : ProcessorModel<"apple-a8", CycloneModel, [ProcAppleA7]>;
+def : ProcessorModel<"apple-a9", CycloneModel, [ProcAppleA7]>;
+def : ProcessorModel<"apple-a10", CycloneModel, [ProcAppleA10]>;
+def : ProcessorModel<"apple-a11", CycloneModel, [ProcAppleA11]>;
+def : ProcessorModel<"apple-a12", CycloneModel, [ProcAppleA12]>;
+def : ProcessorModel<"apple-a13", CycloneModel, [ProcAppleA13]>;
+
+// watch CPUs.
+def : ProcessorModel<"apple-s4", CycloneModel, [ProcAppleA12]>;
+def : ProcessorModel<"apple-s5", CycloneModel, [ProcAppleA12]>;
+
// Alias for the latest Apple processor model supported by LLVM.
-def : ProcessorModel<"apple-latest", CycloneModel, [ProcCyclone]>;
+def : ProcessorModel<"apple-latest", CycloneModel, [ProcAppleA13]>;
//===----------------------------------------------------------------------===//
// Assembly parser
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
index 7ea7915c2ca6..00e321f9b850 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -84,6 +84,7 @@ public:
return MCInstLowering.lowerOperand(MO, MCOp);
}
+ void EmitStartOfAsmFile(Module &M) override;
void EmitJumpTableInfo() override;
void emitJumpTableEntry(const MachineJumpTableInfo *MJTI,
const MachineBasicBlock *MBB, unsigned JTI);
@@ -181,8 +182,79 @@ private:
} // end anonymous namespace
+void AArch64AsmPrinter::EmitStartOfAsmFile(Module &M) {
+ if (!TM.getTargetTriple().isOSBinFormatELF())
+ return;
+
+ // Assemble feature flags that may require creation of a note section.
+ unsigned Flags = ELF::GNU_PROPERTY_AARCH64_FEATURE_1_BTI |
+ ELF::GNU_PROPERTY_AARCH64_FEATURE_1_PAC;
+
+ if (any_of(M, [](const Function &F) {
+ return !F.isDeclaration() &&
+ !F.hasFnAttribute("branch-target-enforcement");
+ })) {
+ Flags &= ~ELF::GNU_PROPERTY_AARCH64_FEATURE_1_BTI;
+ }
+
+ if ((Flags & ELF::GNU_PROPERTY_AARCH64_FEATURE_1_BTI) == 0 &&
+ any_of(M, [](const Function &F) {
+ return F.hasFnAttribute("branch-target-enforcement");
+ })) {
+ errs() << "warning: some functions compiled with BTI and some compiled "
+ "without BTI\n"
+ << "warning: not setting BTI in feature flags\n";
+ }
+
+ if (any_of(M, [](const Function &F) {
+ if (F.isDeclaration())
+ return false;
+ Attribute A = F.getFnAttribute("sign-return-address");
+ return !A.isStringAttribute() || A.getValueAsString() == "none";
+ })) {
+ Flags &= ~ELF::GNU_PROPERTY_AARCH64_FEATURE_1_PAC;
+ }
+
+ if (Flags == 0)
+ return;
+
+ // Emit a .note.gnu.property section with the flags.
+ MCSection *Cur = OutStreamer->getCurrentSectionOnly();
+ MCSection *Nt = MMI->getContext().getELFSection(
+ ".note.gnu.property", ELF::SHT_NOTE, ELF::SHF_ALLOC);
+ OutStreamer->SwitchSection(Nt);
+
+ // Emit the note header.
+ EmitAlignment(Align(8));
+ OutStreamer->EmitIntValue(4, 4); // data size for "GNU\0"
+ OutStreamer->EmitIntValue(4 * 4, 4); // Elf_Prop size
+ OutStreamer->EmitIntValue(ELF::NT_GNU_PROPERTY_TYPE_0, 4);
+ OutStreamer->EmitBytes(StringRef("GNU", 4)); // note name
+
+ // Emit the PAC/BTI properties.
+ OutStreamer->EmitIntValue(ELF::GNU_PROPERTY_AARCH64_FEATURE_1_AND, 4);
+ OutStreamer->EmitIntValue(4, 4); // data size
+ OutStreamer->EmitIntValue(Flags, 4); // data
+ OutStreamer->EmitIntValue(0, 4); // pad
+
+ OutStreamer->endSection(Nt);
+ OutStreamer->SwitchSection(Cur);
+}
+
void AArch64AsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI)
{
+ const Function &F = MF->getFunction();
+ if (F.hasFnAttribute("patchable-function-entry")) {
+ unsigned Num;
+ if (F.getFnAttribute("patchable-function-entry")
+ .getValueAsString()
+ .getAsInteger(10, Num))
+ return;
+ for (; Num; --Num)
+ EmitToStreamer(*OutStreamer, MCInstBuilder(AArch64::HINT).addImm(0));
+ return;
+ }
+
EmitSled(MI, SledKind::FUNCTION_ENTER);
}
@@ -458,8 +530,8 @@ void AArch64AsmPrinter::EmitEndOfAsmFile(Module &M) {
// linker can safely perform dead code stripping. Since LLVM never
// generates code that does this, it is always safe to set.
OutStreamer->EmitAssemblerFlag(MCAF_SubsectionsViaSymbols);
- emitStackMaps(SM);
}
+ emitStackMaps(SM);
}
void AArch64AsmPrinter::EmitLOHs() {
@@ -794,7 +866,11 @@ void AArch64AsmPrinter::LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM,
const MachineInstr &MI) {
unsigned NumNOPBytes = StackMapOpers(&MI).getNumPatchBytes();
- SM.recordStackMap(MI);
+ auto &Ctx = OutStreamer.getContext();
+ MCSymbol *MILabel = Ctx.createTempSymbol();
+ OutStreamer.EmitLabel(MILabel);
+
+ SM.recordStackMap(*MILabel, MI);
assert(NumNOPBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
// Scan ahead to trim the shadow.
@@ -820,7 +896,10 @@ void AArch64AsmPrinter::LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM,
// [<def>], <id>, <numBytes>, <target>, <numArgs>
void AArch64AsmPrinter::LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
const MachineInstr &MI) {
- SM.recordPatchPoint(MI);
+ auto &Ctx = OutStreamer.getContext();
+ MCSymbol *MILabel = Ctx.createTempSymbol();
+ OutStreamer.EmitLabel(MILabel);
+ SM.recordPatchPoint(*MILabel, MI);
PatchPointOpers Opers(&MI);
@@ -1219,7 +1298,7 @@ void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
// Force static initialization.
-extern "C" void LLVMInitializeAArch64AsmPrinter() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAArch64AsmPrinter() {
RegisterAsmPrinter<AArch64AsmPrinter> X(getTheAArch64leTarget());
RegisterAsmPrinter<AArch64AsmPrinter> Y(getTheAArch64beTarget());
RegisterAsmPrinter<AArch64AsmPrinter> Z(getTheARM64Target());
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64CallLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64CallLowering.cpp
index ed93d02aa615..76ff238234d9 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64CallLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64CallLowering.cpp
@@ -160,7 +160,7 @@ struct OutgoingArgHandler : public CallLowering::ValueHandler {
MIRBuilder.buildConstant(OffsetReg, Offset);
Register AddrReg = MRI.createGenericVirtualRegister(p0);
- MIRBuilder.buildGEP(AddrReg, SPReg, OffsetReg);
+ MIRBuilder.buildPtrAdd(AddrReg, SPReg, OffsetReg);
MPO = MachinePointerInfo::getStack(MF, Offset);
return AddrReg;
@@ -815,7 +815,7 @@ bool AArch64CallLowering::lowerTailCall(
// Tell the call which registers are clobbered.
auto TRI = MF.getSubtarget<AArch64Subtarget>().getRegisterInfo();
- const uint32_t *Mask = TRI->getCallPreservedMask(MF, F.getCallingConv());
+ const uint32_t *Mask = TRI->getCallPreservedMask(MF, CalleeCC);
if (MF.getSubtarget<AArch64Subtarget>().hasCustomCallingConv())
TRI->UpdateCustomCallPreservedMask(MF, &Mask);
MIB.addRegMask(Mask);
@@ -972,7 +972,7 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
// Tell the call which registers are clobbered.
auto TRI = MF.getSubtarget<AArch64Subtarget>().getRegisterInfo();
- const uint32_t *Mask = TRI->getCallPreservedMask(MF, F.getCallingConv());
+ const uint32_t *Mask = TRI->getCallPreservedMask(MF, Info.CallConv);
if (MF.getSubtarget<AArch64Subtarget>().hasCustomCallingConv())
TRI->UpdateCustomCallPreservedMask(MF, &Mask);
MIB.addRegMask(Mask);
@@ -1000,10 +1000,10 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
0));
// Finally we can copy the returned value back into its virtual-register. In
- // symmetry with the arugments, the physical register must be an
+ // symmetry with the arguments, the physical register must be an
// implicit-define of the call instruction.
if (!Info.OrigRet.Ty->isVoidTy()) {
- CCAssignFn *RetAssignFn = TLI.CCAssignFnForReturn(F.getCallingConv());
+ CCAssignFn *RetAssignFn = TLI.CCAssignFnForReturn(Info.CallConv);
CallReturnHandler Handler(MIRBuilder, MRI, MIB, RetAssignFn);
if (!handleAssignments(MIRBuilder, InArgs, Handler))
return false;
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64CallingConvention.h b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64CallingConvention.h
index 5a55d090d7c8..59939e0684ed 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64CallingConvention.h
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64CallingConvention.h
@@ -31,6 +31,9 @@ bool CC_AArch64_DarwinPCS_ILP32_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT,
bool CC_AArch64_Win64_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT,
CCValAssign::LocInfo LocInfo,
ISD::ArgFlagsTy ArgFlags, CCState &State);
+bool CC_AArch64_Win64_CFGuard_Check(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State);
bool CC_AArch64_WebKit_JS(unsigned ValNo, MVT ValVT, MVT LocVT,
CCValAssign::LocInfo LocInfo,
ISD::ArgFlagsTy ArgFlags, CCState &State);
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64CallingConvention.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64CallingConvention.td
index bccbbd4591ed..a0b2d7712b66 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64CallingConvention.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64CallingConvention.td
@@ -75,10 +75,10 @@ def CC_AArch64_AAPCS : CallingConv<[
CCIfConsecutiveRegs<CCCustom<"CC_AArch64_Custom_Block">>,
CCIfType<[nxv16i8, nxv8i16, nxv4i32, nxv2i64, nxv2f16, nxv4f16, nxv8f16,
- nxv1f32, nxv2f32, nxv4f32, nxv1f64, nxv2f64],
+ nxv2f32, nxv4f32, nxv2f64],
CCAssignToReg<[Z0, Z1, Z2, Z3, Z4, Z5, Z6, Z7]>>,
CCIfType<[nxv16i8, nxv8i16, nxv4i32, nxv2i64, nxv2f16, nxv4f16, nxv8f16,
- nxv1f32, nxv2f32, nxv4f32, nxv1f64, nxv2f64],
+ nxv2f32, nxv4f32, nxv2f64],
CCPassIndirect<i64>>,
CCIfType<[nxv2i1, nxv4i1, nxv8i1, nxv16i1],
@@ -155,7 +155,7 @@ def RetCC_AArch64_AAPCS : CallingConv<[
CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
CCIfType<[nxv16i8, nxv8i16, nxv4i32, nxv2i64, nxv2f16, nxv4f16, nxv8f16,
- nxv1f32, nxv2f32, nxv4f32, nxv1f64, nxv2f64],
+ nxv2f32, nxv4f32, nxv2f64],
CCAssignToReg<[Z0, Z1, Z2, Z3, Z4, Z5, Z6, Z7]>>,
CCIfType<[nxv2i1, nxv4i1, nxv8i1, nxv16i1],
@@ -170,6 +170,13 @@ def CC_AArch64_Win64_VarArg : CallingConv<[
CCDelegateTo<CC_AArch64_AAPCS>
]>;
+// Windows Control Flow Guard checks take a single argument (the target function
+// address) and have no return value.
+let Entry = 1 in
+def CC_AArch64_Win64_CFGuard_Check : CallingConv<[
+ CCIfType<[i64], CCAssignToReg<[X15]>>
+]>;
+
// Darwin uses a calling convention which differs in only two ways
// from the standard one at this level:
@@ -384,6 +391,12 @@ def CSR_Win_AArch64_AAPCS : CalleeSavedRegs<(add X19, X20, X21, X22, X23, X24,
D8, D9, D10, D11,
D12, D13, D14, D15)>;
+// The Control Flow Guard check call uses a custom calling convention that also
+// preserves X0-X8 and Q0-Q7.
+def CSR_Win_AArch64_CFGuard_Check : CalleeSavedRegs<(add CSR_Win_AArch64_AAPCS,
+ (sequence "X%u", 0, 8),
+ (sequence "Q%u", 0, 7))>;
+
// AArch64 PCS for vector functions (VPCS)
// must (additionally) preserve full Q8-Q23 registers
def CSR_AArch64_AAVPCS : CalleeSavedRegs<(add X19, X20, X21, X22, X23, X24,
@@ -392,10 +405,10 @@ def CSR_AArch64_AAVPCS : CalleeSavedRegs<(add X19, X20, X21, X22, X23, X24,
// Functions taking SVE arguments or returning an SVE type
// must (additionally) preserve full Z8-Z23 and predicate registers P4-P15
-def CSR_AArch64_SVE_AAPCS : CalleeSavedRegs<(add X19, X20, X21, X22, X23, X24,
- X25, X26, X27, X28, LR, FP,
- (sequence "Z%u", 8, 23),
- (sequence "P%u", 4, 15))>;
+def CSR_AArch64_SVE_AAPCS : CalleeSavedRegs<(add (sequence "Z%u", 8, 23),
+ (sequence "P%u", 4, 15),
+ X19, X20, X21, X22, X23, X24,
+ X25, X26, X27, X28, LR, FP)>;
// Constructors and destructors return 'this' in the iOS 64-bit C++ ABI; since
// 'this' and the pointer return value are both passed in X0 in these cases,
@@ -473,5 +486,7 @@ def CSR_AArch64_RT_MostRegs_SCS
: CalleeSavedRegs<(add CSR_AArch64_RT_MostRegs, X18)>;
def CSR_AArch64_AAVPCS_SCS
: CalleeSavedRegs<(add CSR_AArch64_AAVPCS, X18)>;
+def CSR_AArch64_SVE_AAPCS_SCS
+ : CalleeSavedRegs<(add CSR_AArch64_SVE_AAPCS, X18)>;
def CSR_AArch64_AAPCS_SCS
: CalleeSavedRegs<(add CSR_AArch64_AAPCS, X18)>;
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64CompressJumpTables.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64CompressJumpTables.cpp
index 48dab79b32d3..259238705965 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64CompressJumpTables.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64CompressJumpTables.cpp
@@ -20,6 +20,7 @@
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/Support/Alignment.h"
#include "llvm/Support/Debug.h"
using namespace llvm;
@@ -74,10 +75,16 @@ void AArch64CompressJumpTables::scanFunction() {
BlockInfo.clear();
BlockInfo.resize(MF->getNumBlockIDs());
- int Offset = 0;
+ unsigned Offset = 0;
for (MachineBasicBlock &MBB : *MF) {
- BlockInfo[MBB.getNumber()] = Offset;
- Offset += computeBlockSize(MBB);
+ const Align Alignment = MBB.getAlignment();
+ unsigned AlignedOffset;
+ if (Alignment == Align::None())
+ AlignedOffset = Offset;
+ else
+ AlignedOffset = alignTo(Offset, Alignment);
+ BlockInfo[MBB.getNumber()] = AlignedOffset;
+ Offset = AlignedOffset + computeBlockSize(MBB);
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ConditionOptimizer.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ConditionOptimizer.cpp
index a6efb115ed44..51b2ce029701 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ConditionOptimizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ConditionOptimizer.cpp
@@ -74,6 +74,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ConditionalCompares.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ConditionalCompares.cpp
index 43ae9f8ec47f..054ef8f482ca 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ConditionalCompares.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ConditionalCompares.cpp
@@ -33,6 +33,7 @@
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -351,8 +352,7 @@ MachineInstr *SSACCmpConv::findConvertibleCompare(MachineBasicBlock *MBB) {
}
// Check for flag reads and clobbers.
- MIOperands::PhysRegInfo PRI =
- MIOperands(*I).analyzePhysReg(AArch64::NZCV, TRI);
+ PhysRegInfo PRI = AnalyzePhysRegInBundle(*I, AArch64::NZCV, TRI);
if (PRI.Read) {
// The ccmp doesn't produce exactly the same flags as the original
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index 082e17e44d04..3b8f8a19fe49 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -110,6 +110,8 @@ bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB,
unsigned BitSize) {
MachineInstr &MI = *MBBI;
Register DstReg = MI.getOperand(0).getReg();
+ uint64_t RenamableState =
+ MI.getOperand(0).isRenamable() ? RegState::Renamable : 0;
uint64_t Imm = MI.getOperand(1).getImm();
if (DstReg == AArch64::XZR || DstReg == AArch64::WZR) {
@@ -144,7 +146,8 @@ bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB,
bool DstIsDead = MI.getOperand(0).isDead();
MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
.addReg(DstReg, RegState::Define |
- getDeadRegState(DstIsDead && LastItem))
+ getDeadRegState(DstIsDead && LastItem) |
+ RenamableState)
.addImm(I->Op1)
.addImm(I->Op2));
} break;
@@ -155,7 +158,8 @@ bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB,
MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
.addReg(DstReg,
RegState::Define |
- getDeadRegState(DstIsDead && LastItem))
+ getDeadRegState(DstIsDead && LastItem) |
+ RenamableState)
.addReg(DstReg)
.addImm(I->Op1)
.addImm(I->Op2));
@@ -692,10 +696,12 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
return true;
}
case AArch64::TAGPstack: {
- BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDG))
+ int64_t Offset = MI.getOperand(2).getImm();
+ BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(Offset >= 0 ? AArch64::ADDG : AArch64::SUBG))
.add(MI.getOperand(0))
.add(MI.getOperand(1))
- .add(MI.getOperand(2))
+ .addImm(std::abs(Offset))
.add(MI.getOperand(4));
MI.eraseFromParent();
return true;
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp
index b54fc2e51bac..c1fc183b04f6 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp
@@ -42,6 +42,7 @@
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Metadata.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FastISel.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FastISel.cpp
index 277a3052f1e5..7e9c68f2bb30 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FastISel.cpp
@@ -348,6 +348,8 @@ CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
return CC_AArch64_WebKit_JS;
if (CC == CallingConv::GHC)
return CC_AArch64_GHC;
+ if (CC == CallingConv::CFGuard_Check)
+ return CC_AArch64_Win64_CFGuard_Check;
return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS;
}
@@ -3251,6 +3253,13 @@ bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
if (Callee && !computeCallAddress(Callee, Addr))
return false;
+ // The weak function target may be zero; in that case we must use indirect
+ // addressing via a stub on windows as it may be out of range for a
+ // PC-relative jump.
+ if (Subtarget->isTargetWindows() && Addr.getGlobalValue() &&
+ Addr.getGlobalValue()->hasExternalWeakLinkage())
+ return false;
+
// Handle the arguments now that we've gotten them.
unsigned NumBytes;
if (!processCallArgs(CLI, OutVTs, NumBytes))
@@ -3836,11 +3845,6 @@ bool AArch64FastISel::selectRet(const Instruction *I) {
if (!FuncInfo.CanLowerReturn)
return false;
- // FIXME: in principle it could. Mostly just a case of zero extending outgoing
- // pointers.
- if (Subtarget->isTargetILP32())
- return false;
-
if (F.isVarArg())
return false;
@@ -3920,6 +3924,11 @@ bool AArch64FastISel::selectRet(const Instruction *I) {
return false;
}
+ // "Callee" (i.e. value producer) zero extends pointers at function
+ // boundary.
+ if (Subtarget->isTargetILP32() && RV->getType()->isPointerTy())
+ SrcReg = emitAnd_ri(MVT::i64, SrcReg, false, 0xffffffff);
+
// Make the copy.
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg);
@@ -5009,6 +5018,9 @@ std::pair<unsigned, bool> AArch64FastISel::getRegForGEPIndex(const Value *Idx) {
/// simple cases. This is because the standard fastEmit functions don't cover
/// MUL at all and ADD is lowered very inefficientily.
bool AArch64FastISel::selectGetElementPtr(const Instruction *I) {
+ if (Subtarget->isTargetILP32())
+ return false;
+
unsigned N = getRegForValue(I->getOperand(0));
if (!N)
return false;
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index 68e1e6a30224..ea3e800a1ad2 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -206,6 +206,11 @@ static unsigned estimateRSStackSizeLimit(MachineFunction &MF) {
return DefaultSafeSPDisplacement;
}
+TargetStackID::Value
+AArch64FrameLowering::getStackIDForScalableVectors() const {
+ return TargetStackID::SVEVector;
+}
+
/// Returns the size of the entire SVE stackframe (calleesaves + spills).
static StackOffset getSVEStackSize(const MachineFunction &MF) {
const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
@@ -222,7 +227,7 @@ bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
const MachineFrameInfo &MFI = MF.getFrameInfo();
const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
- unsigned NumBytes = AFI->getLocalStackSize();
+ uint64_t NumBytes = AFI->getLocalStackSize();
return !(MFI.hasCalls() || hasFP(MF) || NumBytes > 128 ||
getSVEStackSize(MF));
@@ -239,7 +244,7 @@ bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const {
if (MF.hasEHFunclets())
return true;
// Retain behavior of always omitting the FP for leaf functions when possible.
- if (MFI.hasCalls() && MF.getTarget().Options.DisableFramePointerElim(MF))
+ if (MF.getTarget().Options.DisableFramePointerElim(MF))
return true;
if (MFI.hasVarSizedObjects() || MFI.isFrameAddressTaken() ||
MFI.hasStackMap() || MFI.hasPatchPoint() ||
@@ -424,7 +429,7 @@ bool AArch64FrameLowering::canUseAsPrologue(
}
static bool windowsRequiresStackProbe(MachineFunction &MF,
- unsigned StackSizeInBytes) {
+ uint64_t StackSizeInBytes) {
const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
if (!Subtarget.isTargetWindows())
return false;
@@ -441,15 +446,12 @@ static bool windowsRequiresStackProbe(MachineFunction &MF,
}
bool AArch64FrameLowering::shouldCombineCSRLocalStackBump(
- MachineFunction &MF, unsigned StackBumpBytes) const {
+ MachineFunction &MF, uint64_t StackBumpBytes) const {
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
const MachineFrameInfo &MFI = MF.getFrameInfo();
const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
- if (MF.getFunction().hasOptSize())
- return false;
-
if (AFI->getLocalStackSize() == 0)
return false;
@@ -723,7 +725,7 @@ static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
// Fixup callee-save register save/restore instructions to take into account
// combined SP bump by adding the local stack size to the stack offsets.
static void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI,
- unsigned LocalStackSize,
+ uint64_t LocalStackSize,
bool NeedsWinCFI,
bool *HasWinCFI) {
if (AArch64InstrInfo::isSEHInstruction(MI))
@@ -834,6 +836,24 @@ static bool isTargetDarwin(const MachineFunction &MF) {
return MF.getSubtarget<AArch64Subtarget>().isTargetDarwin();
}
+static bool isTargetWindows(const MachineFunction &MF) {
+ return MF.getSubtarget<AArch64Subtarget>().isTargetWindows();
+}
+
+// Convenience function to determine whether I is an SVE callee save.
+static bool IsSVECalleeSave(MachineBasicBlock::iterator I) {
+ switch (I->getOpcode()) {
+ default:
+ return false;
+ case AArch64::STR_ZXI:
+ case AArch64::STR_PXI:
+ case AArch64::LDR_ZXI:
+ case AArch64::LDR_PXI:
+ return I->getFlag(MachineInstr::FrameSetup) ||
+ I->getFlag(MachineInstr::FrameDestroy);
+ }
+}
+
void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator MBBI = MBB.begin();
@@ -844,8 +864,8 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
MachineModuleInfo &MMI = MF.getMMI();
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
- bool needsFrameMoves = (MMI.hasDebugInfo() || F.needsUnwindTableEntry()) &&
- !MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
+ bool needsFrameMoves =
+ MF.needsFrameMoves() && !MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
bool HasFP = hasFP(MF);
bool NeedsWinCFI = needsWinCFI(MF);
bool HasWinCFI = false;
@@ -897,8 +917,8 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
// pointer from the funclet. We only save the callee saved registers in the
// funclet, which are really the callee saved registers of the parent
// function, including the funclet.
- int NumBytes = IsFunclet ? (int)getWinEHFuncletFrameSize(MF)
- : (int)MFI.getStackSize();
+ int64_t NumBytes = IsFunclet ? getWinEHFuncletFrameSize(MF)
+ : MFI.getStackSize();
if (!AFI->hasStackFrame() && !windowsRequiresStackProbe(MF, NumBytes)) {
assert(!HasFP && "unexpected function without stack frame but with FP");
assert(!SVEStackSize &&
@@ -916,15 +936,15 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP,
{-NumBytes, MVT::i8}, TII, MachineInstr::FrameSetup,
false, NeedsWinCFI, &HasWinCFI);
- if (!NeedsWinCFI) {
+ if (!NeedsWinCFI && needsFrameMoves) {
// Label used to tie together the PROLOG_LABEL and the MachineMoves.
MCSymbol *FrameLabel = MMI.getContext().createTempSymbol();
- // Encode the stack size of the leaf function.
- unsigned CFIIndex = MF.addFrameInst(
- MCCFIInstruction::createDefCfaOffset(FrameLabel, -NumBytes));
- BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex)
- .setMIFlags(MachineInstr::FrameSetup);
+ // Encode the stack size of the leaf function.
+ unsigned CFIIndex = MF.addFrameInst(
+ MCCFIInstruction::createDefCfaOffset(FrameLabel, -NumBytes));
+ BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
}
}
@@ -965,7 +985,8 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
// and pre-inc if we decided to combine the callee-save and local stack
// pointer bump above.
MachineBasicBlock::iterator End = MBB.end();
- while (MBBI != End && MBBI->getFlag(MachineInstr::FrameSetup)) {
+ while (MBBI != End && MBBI->getFlag(MachineInstr::FrameSetup) &&
+ !IsSVECalleeSave(MBBI)) {
if (CombineSPBump)
fixupCalleeSaveRestoreStackOffset(*MBBI, AFI->getLocalStackSize(),
NeedsWinCFI, &HasWinCFI);
@@ -999,7 +1020,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
if (HasFP) {
// Only set up FP if we actually need to.
- int FPOffset = isTargetDarwin(MF) ? (AFI->getCalleeSavedStackSize() - 16) : 0;
+ int64_t FPOffset = isTargetDarwin(MF) ? (AFI->getCalleeSavedStackSize() - 16) : 0;
if (CombineSPBump)
FPOffset += AFI->getLocalStackSize();
@@ -1014,7 +1035,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
}
if (windowsRequiresStackProbe(MF, NumBytes)) {
- uint32_t NumWords = NumBytes >> 4;
+ uint64_t NumWords = NumBytes >> 4;
if (NeedsWinCFI) {
HasWinCFI = true;
// alloc_l can hold at most 256MB, so assume that NumBytes doesn't
@@ -1107,7 +1128,35 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
NumBytes = 0;
}
- emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -SVEStackSize, TII,
+ StackOffset AllocateBefore = SVEStackSize, AllocateAfter = {};
+ MachineBasicBlock::iterator CalleeSavesBegin = MBBI, CalleeSavesEnd = MBBI;
+
+ // Process the SVE callee-saves to determine what space needs to be
+ // allocated.
+ if (AFI->getSVECalleeSavedStackSize()) {
+ // Find callee save instructions in frame.
+ CalleeSavesBegin = MBBI;
+ assert(IsSVECalleeSave(CalleeSavesBegin) && "Unexpected instruction");
+ while (IsSVECalleeSave(MBBI) && MBBI != MBB.getFirstTerminator())
+ ++MBBI;
+ CalleeSavesEnd = MBBI;
+
+ int64_t OffsetToFirstCalleeSaveFromSP =
+ MFI.getObjectOffset(AFI->getMaxSVECSFrameIndex());
+ StackOffset OffsetToCalleeSavesFromSP =
+ StackOffset(OffsetToFirstCalleeSaveFromSP, MVT::nxv1i8) + SVEStackSize;
+ AllocateBefore -= OffsetToCalleeSavesFromSP;
+ AllocateAfter = SVEStackSize - AllocateBefore;
+ }
+
+ // Allocate space for the callee saves (if any).
+ emitFrameOffset(MBB, CalleeSavesBegin, DL, AArch64::SP, AArch64::SP,
+ -AllocateBefore, TII,
+ MachineInstr::FrameSetup);
+
+ // Finally allocate remaining SVE stack space.
+ emitFrameOffset(MBB, CalleeSavesEnd, DL, AArch64::SP, AArch64::SP,
+ -AllocateAfter, TII,
MachineInstr::FrameSetup);
// Allocate space for the rest of the frame.
@@ -1343,8 +1392,8 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
IsFunclet = isFuncletReturnInstr(*MBBI);
}
- int NumBytes = IsFunclet ? (int)getWinEHFuncletFrameSize(MF)
- : MFI.getStackSize();
+ int64_t NumBytes = IsFunclet ? getWinEHFuncletFrameSize(MF)
+ : MFI.getStackSize();
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
// All calls are tail calls in GHC calling conv, and functions have no
@@ -1444,7 +1493,8 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock::iterator Begin = MBB.begin();
while (LastPopI != Begin) {
--LastPopI;
- if (!LastPopI->getFlag(MachineInstr::FrameDestroy)) {
+ if (!LastPopI->getFlag(MachineInstr::FrameDestroy) ||
+ IsSVECalleeSave(LastPopI)) {
++LastPopI;
break;
} else if (CombineSPBump)
@@ -1476,11 +1526,53 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
NumBytes -= PrologueSaveSize;
assert(NumBytes >= 0 && "Negative stack allocation size!?");
+ // Process the SVE callee-saves to determine what space needs to be
+ // deallocated.
+ StackOffset DeallocateBefore = {}, DeallocateAfter = SVEStackSize;
+ MachineBasicBlock::iterator RestoreBegin = LastPopI, RestoreEnd = LastPopI;
+ if (AFI->getSVECalleeSavedStackSize()) {
+ RestoreBegin = std::prev(RestoreEnd);;
+ while (IsSVECalleeSave(RestoreBegin) &&
+ RestoreBegin != MBB.begin())
+ --RestoreBegin;
+ ++RestoreBegin;
+
+ assert(IsSVECalleeSave(RestoreBegin) &&
+ IsSVECalleeSave(std::prev(RestoreEnd)) && "Unexpected instruction");
+
+ int64_t OffsetToFirstCalleeSaveFromSP =
+ MFI.getObjectOffset(AFI->getMaxSVECSFrameIndex());
+ StackOffset OffsetToCalleeSavesFromSP =
+ StackOffset(OffsetToFirstCalleeSaveFromSP, MVT::nxv1i8) + SVEStackSize;
+ DeallocateBefore = OffsetToCalleeSavesFromSP;
+ DeallocateAfter = SVEStackSize - DeallocateBefore;
+ }
+
// Deallocate the SVE area.
- if (SVEStackSize)
- if (!AFI->isStackRealigned())
- emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, SVEStackSize,
- TII, MachineInstr::FrameDestroy);
+ if (SVEStackSize) {
+ if (AFI->isStackRealigned()) {
+ if (AFI->getSVECalleeSavedStackSize())
+ // Set SP to start of SVE area, from which the callee-save reloads
+ // can be done. The code below will deallocate the stack space
+ // space by moving FP -> SP.
+ emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::FP,
+ -SVEStackSize, TII, MachineInstr::FrameDestroy);
+ } else {
+ if (AFI->getSVECalleeSavedStackSize()) {
+ // Deallocate the non-SVE locals first before we can deallocate (and
+ // restore callee saves) from the SVE area.
+ emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
+ {NumBytes, MVT::i8}, TII, MachineInstr::FrameDestroy);
+ NumBytes = 0;
+ }
+
+ emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
+ DeallocateBefore, TII, MachineInstr::FrameDestroy);
+
+ emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
+ DeallocateAfter, TII, MachineInstr::FrameDestroy);
+ }
+ }
if (!hasFP(MF)) {
bool RedZone = canUseRedZone(MF);
@@ -1490,7 +1582,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
return;
bool NoCalleeSaveRestore = PrologueSaveSize == 0;
- int StackRestoreBytes = RedZone ? 0 : NumBytes;
+ int64_t StackRestoreBytes = RedZone ? 0 : NumBytes;
if (NoCalleeSaveRestore)
StackRestoreBytes += AfterCSRPopSize;
@@ -1582,19 +1674,20 @@ int AArch64FrameLowering::getNonLocalFrameIndexReference(
return getSEHFrameIndexOffset(MF, FI);
}
-static StackOffset getFPOffset(const MachineFunction &MF, int ObjectOffset) {
+static StackOffset getFPOffset(const MachineFunction &MF, int64_t ObjectOffset) {
const auto *AFI = MF.getInfo<AArch64FunctionInfo>();
const auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
bool IsWin64 =
Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv());
unsigned FixedObject = IsWin64 ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0;
- unsigned FPAdjust = isTargetDarwin(MF) ? 16 : AFI->getCalleeSavedStackSize();
+ unsigned FPAdjust = isTargetDarwin(MF)
+ ? 16 : AFI->getCalleeSavedStackSize(MF.getFrameInfo());
return {ObjectOffset + FixedObject + FPAdjust, MVT::i8};
}
-static StackOffset getStackOffset(const MachineFunction &MF, int ObjectOffset) {
+static StackOffset getStackOffset(const MachineFunction &MF, int64_t ObjectOffset) {
const auto &MFI = MF.getFrameInfo();
- return {ObjectOffset + (int)MFI.getStackSize(), MVT::i8};
+ return {ObjectOffset + (int64_t)MFI.getStackSize(), MVT::i8};
}
int AArch64FrameLowering::getSEHFrameIndexOffset(const MachineFunction &MF,
@@ -1611,7 +1704,7 @@ StackOffset AArch64FrameLowering::resolveFrameIndexReference(
const MachineFunction &MF, int FI, unsigned &FrameReg, bool PreferFP,
bool ForSimm) const {
const auto &MFI = MF.getFrameInfo();
- int ObjectOffset = MFI.getObjectOffset(FI);
+ int64_t ObjectOffset = MFI.getObjectOffset(FI);
bool isFixed = MFI.isFixedObjectIndex(FI);
bool isSVE = MFI.getStackID(FI) == TargetStackID::SVEVector;
return resolveFrameOffsetReference(MF, ObjectOffset, isFixed, isSVE, FrameReg,
@@ -1619,7 +1712,7 @@ StackOffset AArch64FrameLowering::resolveFrameIndexReference(
}
StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
- const MachineFunction &MF, int ObjectOffset, bool isFixed, bool isSVE,
+ const MachineFunction &MF, int64_t ObjectOffset, bool isFixed, bool isSVE,
unsigned &FrameReg, bool PreferFP, bool ForSimm) const {
const auto &MFI = MF.getFrameInfo();
const auto *RegInfo = static_cast<const AArch64RegisterInfo *>(
@@ -1627,10 +1720,10 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
const auto *AFI = MF.getInfo<AArch64FunctionInfo>();
const auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
- int FPOffset = getFPOffset(MF, ObjectOffset).getBytes();
- int Offset = getStackOffset(MF, ObjectOffset).getBytes();
+ int64_t FPOffset = getFPOffset(MF, ObjectOffset).getBytes();
+ int64_t Offset = getStackOffset(MF, ObjectOffset).getBytes();
bool isCSR =
- !isFixed && ObjectOffset >= -((int)AFI->getCalleeSavedStackSize());
+ !isFixed && ObjectOffset >= -((int)AFI->getCalleeSavedStackSize(MFI));
const StackOffset &SVEStackSize = getSVEStackSize(MF);
@@ -1781,6 +1874,8 @@ static bool invalidateWindowsRegisterPairing(unsigned Reg1, unsigned Reg2,
// TODO: LR can be paired with any register. We don't support this yet in
// the MCLayer. We need to add support for the save_lrpair unwind code.
+ if (Reg2 == AArch64::FP)
+ return true;
if (!NeedsWinCFI)
return false;
if (Reg2 == Reg1 + 1)
@@ -1793,9 +1888,9 @@ static bool invalidateWindowsRegisterPairing(unsigned Reg1, unsigned Reg2,
/// LR and FP need to be allocated together when the frame needs to save
/// the frame-record. This means any other register pairing with LR is invalid.
static bool invalidateRegisterPairing(unsigned Reg1, unsigned Reg2,
- bool NeedsWinCFI, bool NeedsFrameRecord) {
- if (NeedsWinCFI)
- return invalidateWindowsRegisterPairing(Reg1, Reg2, true);
+ bool UsesWinAAPCS, bool NeedsWinCFI, bool NeedsFrameRecord) {
+ if (UsesWinAAPCS)
+ return invalidateWindowsRegisterPairing(Reg1, Reg2, NeedsWinCFI);
// If we need to store the frame record, don't pair any register
// with LR other than FP.
@@ -1812,11 +1907,27 @@ struct RegPairInfo {
unsigned Reg2 = AArch64::NoRegister;
int FrameIdx;
int Offset;
- enum RegType { GPR, FPR64, FPR128 } Type;
+ enum RegType { GPR, FPR64, FPR128, PPR, ZPR } Type;
RegPairInfo() = default;
bool isPaired() const { return Reg2 != AArch64::NoRegister; }
+
+ unsigned getScale() const {
+ switch (Type) {
+ case PPR:
+ return 2;
+ case GPR:
+ case FPR64:
+ return 8;
+ case ZPR:
+ case FPR128:
+ return 16;
+ }
+ llvm_unreachable("Unsupported type");
+ }
+
+ bool isScalable() const { return Type == PPR || Type == ZPR; }
};
} // end anonymous namespace
@@ -1829,6 +1940,7 @@ static void computeCalleeSaveRegisterPairs(
if (CSI.empty())
return;
+ bool IsWindows = isTargetWindows(MF);
bool NeedsWinCFI = needsWinCFI(MF);
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
MachineFrameInfo &MFI = MF.getFrameInfo();
@@ -1841,7 +1953,8 @@ static void computeCalleeSaveRegisterPairs(
CC == CallingConv::PreserveMost ||
(Count & 1) == 0) &&
"Odd number of callee-saved regs to spill!");
- int Offset = AFI->getCalleeSavedStackSize();
+ int ByteOffset = AFI->getCalleeSavedStackSize();
+ int ScalableByteOffset = AFI->getSVECalleeSavedStackSize();
// On Linux, we will have either one or zero non-paired register. On Windows
// with CFI, we can have multiple unpaired registers in order to utilize the
// available unwind codes. This flag assures that the alignment fixup is done
@@ -1857,6 +1970,10 @@ static void computeCalleeSaveRegisterPairs(
RPI.Type = RegPairInfo::FPR64;
else if (AArch64::FPR128RegClass.contains(RPI.Reg1))
RPI.Type = RegPairInfo::FPR128;
+ else if (AArch64::ZPRRegClass.contains(RPI.Reg1))
+ RPI.Type = RegPairInfo::ZPR;
+ else if (AArch64::PPRRegClass.contains(RPI.Reg1))
+ RPI.Type = RegPairInfo::PPR;
else
llvm_unreachable("Unsupported register class.");
@@ -1866,7 +1983,7 @@ static void computeCalleeSaveRegisterPairs(
switch (RPI.Type) {
case RegPairInfo::GPR:
if (AArch64::GPR64RegClass.contains(NextReg) &&
- !invalidateRegisterPairing(RPI.Reg1, NextReg, NeedsWinCFI,
+ !invalidateRegisterPairing(RPI.Reg1, NextReg, IsWindows, NeedsWinCFI,
NeedsFrameRecord))
RPI.Reg2 = NextReg;
break;
@@ -1879,6 +1996,9 @@ static void computeCalleeSaveRegisterPairs(
if (AArch64::FPR128RegClass.contains(NextReg))
RPI.Reg2 = NextReg;
break;
+ case RegPairInfo::PPR:
+ case RegPairInfo::ZPR:
+ break;
}
}
@@ -1905,6 +2025,11 @@ static void computeCalleeSaveRegisterPairs(
RPI.Reg1 == AArch64::LR) &&
"FrameRecord must be allocated together with LR");
+ // Windows AAPCS has FP and LR reversed.
+ assert((!RPI.isPaired() || !NeedsFrameRecord || RPI.Reg1 != AArch64::FP ||
+ RPI.Reg2 == AArch64::LR) &&
+ "FrameRecord must be allocated together with LR");
+
// MachO's compact unwind format relies on all registers being stored in
// adjacent register pairs.
assert((!produceCompactUnwindFrame(MF) ||
@@ -1916,23 +2041,33 @@ static void computeCalleeSaveRegisterPairs(
RPI.FrameIdx = CSI[i].getFrameIdx();
- int Scale = RPI.Type == RegPairInfo::FPR128 ? 16 : 8;
- Offset -= RPI.isPaired() ? 2 * Scale : Scale;
+ int Scale = RPI.getScale();
+ if (RPI.isScalable())
+ ScalableByteOffset -= Scale;
+ else
+ ByteOffset -= RPI.isPaired() ? 2 * Scale : Scale;
+
+ assert(!(RPI.isScalable() && RPI.isPaired()) &&
+ "Paired spill/fill instructions don't exist for SVE vectors");
// Round up size of non-pair to pair size if we need to pad the
// callee-save area to ensure 16-byte alignment.
if (AFI->hasCalleeSaveStackFreeSpace() && !FixupDone &&
- RPI.Type != RegPairInfo::FPR128 && !RPI.isPaired()) {
+ !RPI.isScalable() && RPI.Type != RegPairInfo::FPR128 &&
+ !RPI.isPaired()) {
FixupDone = true;
- Offset -= 8;
- assert(Offset % 16 == 0);
+ ByteOffset -= 8;
+ assert(ByteOffset % 16 == 0);
assert(MFI.getObjectAlignment(RPI.FrameIdx) <= 16);
MFI.setObjectAlignment(RPI.FrameIdx, 16);
}
+ int Offset = RPI.isScalable() ? ScalableByteOffset : ByteOffset;
assert(Offset % Scale == 0);
RPI.Offset = Offset / Scale;
- assert((RPI.Offset >= -64 && RPI.Offset <= 63) &&
+
+ assert(((!RPI.isScalable() && RPI.Offset >= -64 && RPI.Offset <= 63) ||
+ (RPI.isScalable() && RPI.Offset >= -256 && RPI.Offset <= 255)) &&
"Offset out of bounds for LDP/STP immediate");
RegPairs.push_back(RPI);
@@ -2024,6 +2159,16 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
Size = 16;
Align = 16;
break;
+ case RegPairInfo::ZPR:
+ StrOpc = AArch64::STR_ZXI;
+ Size = 16;
+ Align = 16;
+ break;
+ case RegPairInfo::PPR:
+ StrOpc = AArch64::STR_PXI;
+ Size = 2;
+ Align = 2;
+ break;
}
LLVM_DEBUG(dbgs() << "CSR spill: (" << printReg(Reg1, TRI);
if (RPI.isPaired()) dbgs() << ", " << printReg(Reg2, TRI);
@@ -2064,6 +2209,11 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
if (NeedsWinCFI)
InsertSEH(MIB, TII, MachineInstr::FrameSetup);
+ // Update the StackIDs of the SVE stack slots.
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ if (RPI.Type == RegPairInfo::ZPR || RPI.Type == RegPairInfo::PPR)
+ MFI.setStackID(RPI.FrameIdx, TargetStackID::SVEVector);
+
}
return true;
}
@@ -2115,6 +2265,16 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
Size = 16;
Align = 16;
break;
+ case RegPairInfo::ZPR:
+ LdrOpc = AArch64::LDR_ZXI;
+ Size = 16;
+ Align = 16;
+ break;
+ case RegPairInfo::PPR:
+ LdrOpc = AArch64::LDR_PXI;
+ Size = 2;
+ Align = 2;
+ break;
}
LLVM_DEBUG(dbgs() << "CSR restore: (" << printReg(Reg1, TRI);
if (RPI.isPaired()) dbgs() << ", " << printReg(Reg2, TRI);
@@ -2149,12 +2309,20 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
if (NeedsWinCFI)
InsertSEH(MIB, TII, MachineInstr::FrameDestroy);
};
- if (ReverseCSRRestoreSeq)
- for (const RegPairInfo &RPI : reverse(RegPairs))
+
+ // SVE objects are always restored in reverse order.
+ for (const RegPairInfo &RPI : reverse(RegPairs))
+ if (RPI.isScalable())
EmitMI(RPI);
- else
+
+ if (ReverseCSRRestoreSeq) {
+ for (const RegPairInfo &RPI : reverse(RegPairs))
+ if (!RPI.isScalable())
+ EmitMI(RPI);
+ } else
for (const RegPairInfo &RPI : RegPairs)
- EmitMI(RPI);
+ if (!RPI.isScalable())
+ EmitMI(RPI);
if (NeedShadowCallStackProlog) {
// Shadow call stack epilog: ldr x30, [x18, #-8]!
@@ -2201,7 +2369,12 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
SavedRegs.set(Reg);
bool RegUsed = SavedRegs.test(Reg);
- unsigned PairedReg = CSRegs[i ^ 1];
+ unsigned PairedReg = AArch64::NoRegister;
+ if (AArch64::GPR64RegClass.contains(Reg) ||
+ AArch64::FPR64RegClass.contains(Reg) ||
+ AArch64::FPR128RegClass.contains(Reg))
+ PairedReg = CSRegs[i ^ 1];
+
if (!RegUsed) {
if (AArch64::GPR64RegClass.contains(Reg) &&
!RegInfo->isReservedReg(MF, Reg)) {
@@ -2225,16 +2398,23 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
// Calculates the callee saved stack size.
unsigned CSStackSize = 0;
+ unsigned SVECSStackSize = 0;
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
const MachineRegisterInfo &MRI = MF.getRegInfo();
- for (unsigned Reg : SavedRegs.set_bits())
- CSStackSize += TRI->getRegSizeInBits(Reg, MRI) / 8;
+ for (unsigned Reg : SavedRegs.set_bits()) {
+ auto RegSize = TRI->getRegSizeInBits(Reg, MRI) / 8;
+ if (AArch64::PPRRegClass.contains(Reg) ||
+ AArch64::ZPRRegClass.contains(Reg))
+ SVECSStackSize += RegSize;
+ else
+ CSStackSize += RegSize;
+ }
// Save number of saved regs, so we can easily update CSStackSize later.
unsigned NumSavedRegs = SavedRegs.count();
// The frame record needs to be created by saving the appropriate registers
- unsigned EstimatedStackSize = MFI.estimateStackSize(MF);
+ uint64_t EstimatedStackSize = MFI.estimateStackSize(MF);
if (hasFP(MF) ||
windowsRequiresStackProbe(MF, EstimatedStackSize + CSStackSize + 16)) {
SavedRegs.set(AArch64::FP);
@@ -2248,10 +2428,8 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
dbgs() << "\n";);
// If any callee-saved registers are used, the frame cannot be eliminated.
- unsigned MaxAlign = getStackAlignment();
int64_t SVEStackSize =
- alignTo(determineSVEStackSize(MFI, MaxAlign), MaxAlign);
- assert(MaxAlign <= 16 && "Cannot align scalable vectors more than 16 bytes");
+ alignTo(SVECSStackSize + estimateSVEStackObjectOffsets(MFI), 16);
bool CanEliminateFrame = (SavedRegs.count() == 0) && !SVEStackSize;
// The CSR spill slots have not been allocated yet, so estimateStackSize
@@ -2299,15 +2477,20 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
// Adding the size of additional 64bit GPR saves.
CSStackSize += 8 * (SavedRegs.count() - NumSavedRegs);
- unsigned AlignedCSStackSize = alignTo(CSStackSize, 16);
+ uint64_t AlignedCSStackSize = alignTo(CSStackSize, 16);
LLVM_DEBUG(dbgs() << "Estimated stack frame size: "
<< EstimatedStackSize + AlignedCSStackSize
<< " bytes.\n");
+ assert((!MFI.isCalleeSavedInfoValid() ||
+ AFI->getCalleeSavedStackSize() == AlignedCSStackSize) &&
+ "Should not invalidate callee saved info");
+
// Round up to register pair alignment to avoid additional SP adjustment
// instructions.
AFI->setCalleeSavedStackSize(AlignedCSStackSize);
AFI->setCalleeSaveStackHasFreeSpace(AlignedCSStackSize != CSStackSize);
+ AFI->setSVECalleeSavedStackSize(alignTo(SVECSStackSize, 16));
}
bool AArch64FrameLowering::enableStackSlotScavenging(
@@ -2316,9 +2499,40 @@ bool AArch64FrameLowering::enableStackSlotScavenging(
return AFI->hasCalleeSaveStackFreeSpace();
}
-int64_t AArch64FrameLowering::determineSVEStackSize(MachineFrameInfo &MFI,
- unsigned &MaxAlign) const {
- // Process all fixed stack objects.
+/// returns true if there are any SVE callee saves.
+static bool getSVECalleeSaveSlotRange(const MachineFrameInfo &MFI,
+ int &Min, int &Max) {
+ Min = std::numeric_limits<int>::max();
+ Max = std::numeric_limits<int>::min();
+
+ if (!MFI.isCalleeSavedInfoValid())
+ return false;
+
+ const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
+ for (auto &CS : CSI) {
+ if (AArch64::ZPRRegClass.contains(CS.getReg()) ||
+ AArch64::PPRRegClass.contains(CS.getReg())) {
+ assert((Max == std::numeric_limits<int>::min() ||
+ Max + 1 == CS.getFrameIdx()) &&
+ "SVE CalleeSaves are not consecutive");
+
+ Min = std::min(Min, CS.getFrameIdx());
+ Max = std::max(Max, CS.getFrameIdx());
+ }
+ }
+ return Min != std::numeric_limits<int>::max();
+}
+
+// Process all the SVE stack objects and determine offsets for each
+// object. If AssignOffsets is true, the offsets get assigned.
+// Fills in the first and last callee-saved frame indices into
+// Min/MaxCSFrameIndex, respectively.
+// Returns the size of the stack.
+static int64_t determineSVEStackObjectOffsets(MachineFrameInfo &MFI,
+ int &MinCSFrameIndex,
+ int &MaxCSFrameIndex,
+ bool AssignOffsets) {
+ // First process all fixed stack objects.
int64_t Offset = 0;
for (int I = MFI.getObjectIndexBegin(); I != 0; ++I)
if (MFI.getStackID(I) == TargetStackID::SVEVector) {
@@ -2327,12 +2541,69 @@ int64_t AArch64FrameLowering::determineSVEStackSize(MachineFrameInfo &MFI,
Offset = FixedOffset;
}
- // Note: We don't take allocatable stack objects into
- // account yet, because allocation for those is not yet
- // implemented.
+ auto Assign = [&MFI](int FI, int64_t Offset) {
+ LLVM_DEBUG(dbgs() << "alloc FI(" << FI << ") at SP[" << Offset << "]\n");
+ MFI.setObjectOffset(FI, Offset);
+ };
+
+ // Then process all callee saved slots.
+ if (getSVECalleeSaveSlotRange(MFI, MinCSFrameIndex, MaxCSFrameIndex)) {
+ // Make sure to align the last callee save slot.
+ MFI.setObjectAlignment(MaxCSFrameIndex, 16U);
+
+ // Assign offsets to the callee save slots.
+ for (int I = MinCSFrameIndex; I <= MaxCSFrameIndex; ++I) {
+ Offset += MFI.getObjectSize(I);
+ Offset = alignTo(Offset, MFI.getObjectAlignment(I));
+ if (AssignOffsets)
+ Assign(I, -Offset);
+ }
+ }
+
+ // Create a buffer of SVE objects to allocate and sort it.
+ SmallVector<int, 8> ObjectsToAllocate;
+ for (int I = 0, E = MFI.getObjectIndexEnd(); I != E; ++I) {
+ unsigned StackID = MFI.getStackID(I);
+ if (StackID != TargetStackID::SVEVector)
+ continue;
+ if (MaxCSFrameIndex >= I && I >= MinCSFrameIndex)
+ continue;
+ if (MFI.isDeadObjectIndex(I))
+ continue;
+
+ ObjectsToAllocate.push_back(I);
+ }
+
+ // Allocate all SVE locals and spills
+ for (unsigned FI : ObjectsToAllocate) {
+ unsigned Align = MFI.getObjectAlignment(FI);
+ // FIXME: Given that the length of SVE vectors is not necessarily a power of
+ // two, we'd need to align every object dynamically at runtime if the
+ // alignment is larger than 16. This is not yet supported.
+ if (Align > 16)
+ report_fatal_error(
+ "Alignment of scalable vectors > 16 bytes is not yet supported");
+
+ Offset = alignTo(Offset + MFI.getObjectSize(FI), Align);
+ if (AssignOffsets)
+ Assign(FI, -Offset);
+ }
+
return Offset;
}
+int64_t AArch64FrameLowering::estimateSVEStackObjectOffsets(
+ MachineFrameInfo &MFI) const {
+ int MinCSFrameIndex, MaxCSFrameIndex;
+ return determineSVEStackObjectOffsets(MFI, MinCSFrameIndex, MaxCSFrameIndex, false);
+}
+
+int64_t AArch64FrameLowering::assignSVEStackObjectOffsets(
+ MachineFrameInfo &MFI, int &MinCSFrameIndex, int &MaxCSFrameIndex) const {
+ return determineSVEStackObjectOffsets(MFI, MinCSFrameIndex, MaxCSFrameIndex,
+ true);
+}
+
void AArch64FrameLowering::processFunctionBeforeFrameFinalized(
MachineFunction &MF, RegScavenger *RS) const {
MachineFrameInfo &MFI = MF.getFrameInfo();
@@ -2340,12 +2611,13 @@ void AArch64FrameLowering::processFunctionBeforeFrameFinalized(
assert(getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown &&
"Upwards growing stack unsupported");
- unsigned MaxAlign = getStackAlignment();
- int64_t SVEStackSize = determineSVEStackSize(MFI, MaxAlign);
+ int MinCSFrameIndex, MaxCSFrameIndex;
+ int64_t SVEStackSize =
+ assignSVEStackObjectOffsets(MFI, MinCSFrameIndex, MaxCSFrameIndex);
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
- AFI->setStackSizeSVE(alignTo(SVEStackSize, MaxAlign));
- assert(MaxAlign <= 16 && "Cannot align scalable vectors more than 16 bytes");
+ AFI->setStackSizeSVE(alignTo(SVEStackSize, 16U));
+ AFI->setMinMaxSVECSFrameIndex(MinCSFrameIndex, MaxCSFrameIndex);
// If this function isn't doing Win64-style C++ EH, we don't need to do
// anything.
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.h
index ac150e86c9eb..b5719feb6b15 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.h
@@ -44,7 +44,7 @@ public:
unsigned &FrameReg, bool PreferFP,
bool ForSimm) const;
StackOffset resolveFrameOffsetReference(const MachineFunction &MF,
- int ObjectOffset, bool isFixed,
+ int64_t ObjectOffset, bool isFixed,
bool isSVE, unsigned &FrameReg,
bool PreferFP, bool ForSimm) const;
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
@@ -72,6 +72,7 @@ public:
}
bool enableStackSlotScavenging(const MachineFunction &MF) const override;
+ TargetStackID::Value getStackIDForScalableVectors() const override;
void processFunctionBeforeFrameFinalized(MachineFunction &MF,
RegScavenger *RS) const override;
@@ -100,8 +101,12 @@ public:
private:
bool shouldCombineCSRLocalStackBump(MachineFunction &MF,
- unsigned StackBumpBytes) const;
- int64_t determineSVEStackSize(MachineFrameInfo &MF, unsigned &MaxAlign) const;
+ uint64_t StackBumpBytes) const;
+
+ int64_t estimateSVEStackObjectOffsets(MachineFrameInfo &MF) const;
+ int64_t assignSVEStackObjectOffsets(MachineFrameInfo &MF,
+ int &MinCSFrameIndex,
+ int &MaxCSFrameIndex) const;
};
} // End llvm namespace
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 1f08505f37e7..a51aa85a931c 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -17,6 +17,7 @@
#include "llvm/IR/Function.h" // To access function attributes.
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsAArch64.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
@@ -39,20 +40,16 @@ class AArch64DAGToDAGISel : public SelectionDAGISel {
/// make the right decision when generating code for different targets.
const AArch64Subtarget *Subtarget;
- bool ForCodeSize;
-
public:
explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,
CodeGenOpt::Level OptLevel)
- : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr),
- ForCodeSize(false) {}
+ : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr) {}
StringRef getPassName() const override {
return "AArch64 Instruction Selection";
}
bool runOnMachineFunction(MachineFunction &MF) override {
- ForCodeSize = MF.getFunction().hasOptSize();
Subtarget = &MF.getSubtarget<AArch64Subtarget>();
return SelectionDAGISel::runOnMachineFunction(MF);
}
@@ -140,6 +137,59 @@ public:
return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
}
+ bool SelectDupZeroOrUndef(SDValue N) {
+ switch(N->getOpcode()) {
+ case ISD::UNDEF:
+ return true;
+ case AArch64ISD::DUP:
+ case ISD::SPLAT_VECTOR: {
+ auto Opnd0 = N->getOperand(0);
+ if (auto CN = dyn_cast<ConstantSDNode>(Opnd0))
+ if (CN->isNullValue())
+ return true;
+ if (auto CN = dyn_cast<ConstantFPSDNode>(Opnd0))
+ if (CN->isZero())
+ return true;
+ break;
+ }
+ default:
+ break;
+ }
+
+ return false;
+ }
+
+ template<MVT::SimpleValueType VT>
+ bool SelectSVEAddSubImm(SDValue N, SDValue &Imm, SDValue &Shift) {
+ return SelectSVEAddSubImm(N, VT, Imm, Shift);
+ }
+
+ template<MVT::SimpleValueType VT>
+ bool SelectSVELogicalImm(SDValue N, SDValue &Imm) {
+ return SelectSVELogicalImm(N, VT, Imm);
+ }
+
+ // Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
+ template<signed Min, signed Max, signed Scale, bool Shift>
+ bool SelectCntImm(SDValue N, SDValue &Imm) {
+ if (!isa<ConstantSDNode>(N))
+ return false;
+
+ int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
+ if (Shift)
+ MulImm = 1LL << MulImm;
+
+ if ((MulImm % std::abs(Scale)) != 0)
+ return false;
+
+ MulImm /= Scale;
+ if ((MulImm >= Min) && (MulImm <= Max)) {
+ Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);
+ return true;
+ }
+
+ return false;
+ }
/// Form sequences of consecutive 64/128-bit registers for use in NEON
/// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have
@@ -177,6 +227,7 @@ public:
bool tryBitfieldInsertOp(SDNode *N);
bool tryBitfieldInsertInZeroOp(SDNode *N);
bool tryShiftAmountMod(SDNode *N);
+ bool tryHighFPExt(SDNode *N);
bool tryReadRegister(SDNode *N);
bool tryWriteRegister(SDNode *N);
@@ -217,6 +268,13 @@ private:
bool SelectCMP_SWAP(SDNode *N);
+ bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
+
+ bool SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm);
+
+ bool SelectSVESignedArithImm(SDValue N, SDValue &Imm);
+
+ bool SelectSVEArithImm(SDValue N, SDValue &Imm);
};
} // end anonymous namespace
@@ -250,7 +308,6 @@ bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(
switch(ConstraintID) {
default:
llvm_unreachable("Unexpected asm memory constraint");
- case InlineAsm::Constraint_i:
case InlineAsm::Constraint_m:
case InlineAsm::Constraint_Q:
// We need to make sure that this one operand does not end up in XZR, thus
@@ -378,7 +435,7 @@ static bool isWorthFoldingSHL(SDValue V) {
bool AArch64DAGToDAGISel::isWorthFolding(SDValue V) const {
// Trivial if we are optimizing for code size or if there is only
// one use of the value.
- if (ForCodeSize || V.hasOneUse())
+ if (CurDAG->shouldOptForSize() || V.hasOneUse())
return true;
// If a subtarget has a fastpath LSL we can fold a logical shift into
// the addressing mode and save a cycle.
@@ -1772,6 +1829,35 @@ bool AArch64DAGToDAGISel::tryBitfieldExtractOpFromSExt(SDNode *N) {
return true;
}
+/// Try to form fcvtl2 instructions from a floating-point extend of a high-half
+/// extract of a subvector.
+bool AArch64DAGToDAGISel::tryHighFPExt(SDNode *N) {
+ assert(N->getOpcode() == ISD::FP_EXTEND);
+
+ // There are 2 forms of fcvtl2 - extend to double or extend to float.
+ SDValue Extract = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+ EVT NarrowVT = Extract.getValueType();
+ if ((VT != MVT::v2f64 || NarrowVT != MVT::v2f32) &&
+ (VT != MVT::v4f32 || NarrowVT != MVT::v4f16))
+ return false;
+
+ // Optionally look past a bitcast.
+ Extract = peekThroughBitcasts(Extract);
+ if (Extract.getOpcode() != ISD::EXTRACT_SUBVECTOR)
+ return false;
+
+ // Match extract from start of high half index.
+ // Example: v8i16 -> v4i16 means the extract must begin at index 4.
+ unsigned ExtractIndex = Extract.getConstantOperandVal(1);
+ if (ExtractIndex != Extract.getValueType().getVectorNumElements())
+ return false;
+
+ auto Opcode = VT == MVT::v2f64 ? AArch64::FCVTLv4i32 : AArch64::FCVTLv8i16;
+ CurDAG->SelectNodeTo(N, Opcode, VT, Extract.getOperand(0));
+ return true;
+}
+
static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,
SDValue &Opd0, unsigned &Immr, unsigned &Imms,
unsigned NumberOfIgnoredLowBits = 0,
@@ -2793,6 +2879,102 @@ bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
return true;
}
+bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift) {
+ if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
+ const int64_t ImmVal = CNode->getZExtValue();
+ SDLoc DL(N);
+
+ switch (VT.SimpleTy) {
+ case MVT::i8:
+ if ((ImmVal & 0xFF) == ImmVal) {
+ Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
+ Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32);
+ return true;
+ }
+ break;
+ case MVT::i16:
+ case MVT::i32:
+ case MVT::i64:
+ if ((ImmVal & 0xFF) == ImmVal) {
+ Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
+ Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32);
+ return true;
+ } else if ((ImmVal & 0xFF00) == ImmVal) {
+ Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
+ Imm = CurDAG->getTargetConstant(ImmVal >> 8, DL, MVT::i32);
+ return true;
+ }
+ break;
+ default:
+ break;
+ }
+ }
+
+ return false;
+}
+
+bool AArch64DAGToDAGISel::SelectSVESignedArithImm(SDValue N, SDValue &Imm) {
+ if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
+ int64_t ImmVal = CNode->getSExtValue();
+ SDLoc DL(N);
+ if (ImmVal >= -127 && ImmVal < 127) {
+ Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32);
+ return true;
+ }
+ }
+ return false;
+}
+
+bool AArch64DAGToDAGISel::SelectSVEArithImm(SDValue N, SDValue &Imm) {
+ if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
+ uint64_t ImmVal = CNode->getSExtValue();
+ SDLoc DL(N);
+ ImmVal = ImmVal & 0xFF;
+ if (ImmVal < 256) {
+ Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32);
+ return true;
+ }
+ }
+ return false;
+}
+
+bool AArch64DAGToDAGISel::SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm) {
+ if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
+ uint64_t ImmVal = CNode->getZExtValue();
+ SDLoc DL(N);
+
+ // Shift mask depending on type size.
+ switch (VT.SimpleTy) {
+ case MVT::i8:
+ ImmVal &= 0xFF;
+ ImmVal |= ImmVal << 8;
+ ImmVal |= ImmVal << 16;
+ ImmVal |= ImmVal << 32;
+ break;
+ case MVT::i16:
+ ImmVal &= 0xFFFF;
+ ImmVal |= ImmVal << 16;
+ ImmVal |= ImmVal << 32;
+ break;
+ case MVT::i32:
+ ImmVal &= 0xFFFFFFFF;
+ ImmVal |= ImmVal << 32;
+ break;
+ case MVT::i64:
+ break;
+ default:
+ llvm_unreachable("Unexpected type");
+ }
+
+ uint64_t encoding;
+ if (AArch64_AM::processLogicalImmediate(ImmVal, 64, encoding)) {
+ Imm = CurDAG->getTargetConstant(encoding, DL, MVT::i64);
+ return true;
+ }
+ }
+ return false;
+}
+
bool AArch64DAGToDAGISel::trySelectStackSlotTagP(SDNode *N) {
// tagp(FrameIndex, IRGstack, tag_offset):
// since the offset between FrameIndex and IRGstack is a compile-time
@@ -2908,6 +3090,11 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
return;
break;
+ case ISD::FP_EXTEND:
+ if (tryHighFPExt(Node))
+ return;
+ break;
+
case ISD::OR:
if (tryBitfieldInsertOp(Node))
return;
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 2746117e8ee5..d45a80057564 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -10,9 +10,9 @@
//
//===----------------------------------------------------------------------===//
-#include "AArch64ExpandImm.h"
#include "AArch64ISelLowering.h"
#include "AArch64CallingConvention.h"
+#include "AArch64ExpandImm.h"
#include "AArch64MachineFunctionInfo.h"
#include "AArch64PerfectShuffle.h"
#include "AArch64RegisterInfo.h"
@@ -58,6 +58,7 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsAArch64.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/OperandTraits.h"
#include "llvm/IR/PatternMatch.h"
@@ -178,11 +179,25 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
addRegisterClass(MVT::nxv2f16, &AArch64::ZPRRegClass);
addRegisterClass(MVT::nxv4f16, &AArch64::ZPRRegClass);
addRegisterClass(MVT::nxv8f16, &AArch64::ZPRRegClass);
- addRegisterClass(MVT::nxv1f32, &AArch64::ZPRRegClass);
addRegisterClass(MVT::nxv2f32, &AArch64::ZPRRegClass);
addRegisterClass(MVT::nxv4f32, &AArch64::ZPRRegClass);
- addRegisterClass(MVT::nxv1f64, &AArch64::ZPRRegClass);
addRegisterClass(MVT::nxv2f64, &AArch64::ZPRRegClass);
+
+ for (auto VT : { MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64 }) {
+ setOperationAction(ISD::SADDSAT, VT, Legal);
+ setOperationAction(ISD::UADDSAT, VT, Legal);
+ setOperationAction(ISD::SSUBSAT, VT, Legal);
+ setOperationAction(ISD::USUBSAT, VT, Legal);
+ setOperationAction(ISD::SMAX, VT, Legal);
+ setOperationAction(ISD::UMAX, VT, Legal);
+ setOperationAction(ISD::SMIN, VT, Legal);
+ setOperationAction(ISD::UMIN, VT, Legal);
+ }
+
+ for (auto VT :
+ { MVT::nxv2i8, MVT::nxv2i16, MVT::nxv2i32, MVT::nxv2i64, MVT::nxv4i8,
+ MVT::nxv4i16, MVT::nxv4i32, MVT::nxv8i8, MVT::nxv8i16 })
+ setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Legal);
}
// Compute derived properties from the register classes
@@ -422,14 +437,10 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FSUB, MVT::v4f16, Promote);
setOperationAction(ISD::FMUL, MVT::v4f16, Promote);
setOperationAction(ISD::FDIV, MVT::v4f16, Promote);
- setOperationAction(ISD::FP_EXTEND, MVT::v4f16, Promote);
- setOperationAction(ISD::FP_ROUND, MVT::v4f16, Promote);
AddPromotedToType(ISD::FADD, MVT::v4f16, MVT::v4f32);
AddPromotedToType(ISD::FSUB, MVT::v4f16, MVT::v4f32);
AddPromotedToType(ISD::FMUL, MVT::v4f16, MVT::v4f32);
AddPromotedToType(ISD::FDIV, MVT::v4f16, MVT::v4f32);
- AddPromotedToType(ISD::FP_EXTEND, MVT::v4f16, MVT::v4f32);
- AddPromotedToType(ISD::FP_ROUND, MVT::v4f16, MVT::v4f32);
setOperationAction(ISD::FABS, MVT::v4f16, Expand);
setOperationAction(ISD::FNEG, MVT::v4f16, Expand);
@@ -510,6 +521,10 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Custom);
setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom);
+ // 128-bit loads and stores can be done without expanding
+ setOperationAction(ISD::LOAD, MVT::i128, Custom);
+ setOperationAction(ISD::STORE, MVT::i128, Custom);
+
// Lower READCYCLECOUNTER using an mrs from PMCCNTR_EL0.
// This requires the Performance Monitors extension.
if (Subtarget->hasPerfMon())
@@ -525,6 +540,12 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
}
+ if (Subtarget->getTargetTriple().isOSMSVCRT()) {
+ // MSVCRT doesn't have powi; fall back to pow
+ setLibcallName(RTLIB::POWI_F32, nullptr);
+ setLibcallName(RTLIB::POWI_F64, nullptr);
+ }
+
// Make floating-point constants legal for the large code model, so they don't
// become loads from the constant pool.
if (Subtarget->isTargetMachO() && TM.getCodeModel() == CodeModel::Large) {
@@ -601,7 +622,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::ANY_EXTEND);
setTargetDAGCombine(ISD::ZERO_EXTEND);
setTargetDAGCombine(ISD::SIGN_EXTEND);
- setTargetDAGCombine(ISD::BITCAST);
+ setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
setTargetDAGCombine(ISD::CONCAT_VECTORS);
setTargetDAGCombine(ISD::STORE);
if (Subtarget->supportsAddressTopByteIgnored())
@@ -734,14 +755,20 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::MUL, MVT::v4i32, Custom);
setOperationAction(ISD::MUL, MVT::v2i64, Custom);
- // Vector reductions
for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32,
MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
+ // Vector reductions
setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
+
+ // Saturates
+ setOperationAction(ISD::SADDSAT, VT, Legal);
+ setOperationAction(ISD::UADDSAT, VT, Legal);
+ setOperationAction(ISD::SSUBSAT, VT, Legal);
+ setOperationAction(ISD::USUBSAT, VT, Legal);
}
for (MVT VT : { MVT::v4f16, MVT::v2f32,
MVT::v8f16, MVT::v4f32, MVT::v2f64 }) {
@@ -802,10 +829,15 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
}
if (Subtarget->hasSVE()) {
+ // FIXME: Add custom lowering of MLOAD to handle different passthrus (not a
+ // splat of 0 or undef) once vector selects supported in SVE codegen. See
+ // D68877 for more details.
for (MVT VT : MVT::integer_scalable_vector_valuetypes()) {
- if (isTypeLegal(VT) && VT.getVectorElementType() != MVT::i1)
+ if (isTypeLegal(VT))
setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
}
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom);
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom);
}
PredictableSelectIsExpensive = Subtarget->predictableSelectIsExpensive();
@@ -1257,6 +1289,19 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
case AArch64ISD::UMINV: return "AArch64ISD::UMINV";
case AArch64ISD::SMAXV: return "AArch64ISD::SMAXV";
case AArch64ISD::UMAXV: return "AArch64ISD::UMAXV";
+ case AArch64ISD::SMAXV_PRED: return "AArch64ISD::SMAXV_PRED";
+ case AArch64ISD::UMAXV_PRED: return "AArch64ISD::UMAXV_PRED";
+ case AArch64ISD::SMINV_PRED: return "AArch64ISD::SMINV_PRED";
+ case AArch64ISD::UMINV_PRED: return "AArch64ISD::UMINV_PRED";
+ case AArch64ISD::ORV_PRED: return "AArch64ISD::ORV_PRED";
+ case AArch64ISD::EORV_PRED: return "AArch64ISD::EORV_PRED";
+ case AArch64ISD::ANDV_PRED: return "AArch64ISD::ANDV_PRED";
+ case AArch64ISD::CLASTA_N: return "AArch64ISD::CLASTA_N";
+ case AArch64ISD::CLASTB_N: return "AArch64ISD::CLASTB_N";
+ case AArch64ISD::LASTA: return "AArch64ISD::LASTA";
+ case AArch64ISD::LASTB: return "AArch64ISD::LASTB";
+ case AArch64ISD::REV: return "AArch64ISD::REV";
+ case AArch64ISD::TBL: return "AArch64ISD::TBL";
case AArch64ISD::NOT: return "AArch64ISD::NOT";
case AArch64ISD::BIT: return "AArch64ISD::BIT";
case AArch64ISD::CBZ: return "AArch64ISD::CBZ";
@@ -1311,6 +1356,32 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
case AArch64ISD::SUNPKLO: return "AArch64ISD::SUNPKLO";
case AArch64ISD::UUNPKHI: return "AArch64ISD::UUNPKHI";
case AArch64ISD::UUNPKLO: return "AArch64ISD::UUNPKLO";
+ case AArch64ISD::INSR: return "AArch64ISD::INSR";
+ case AArch64ISD::PTEST: return "AArch64ISD::PTEST";
+ case AArch64ISD::PTRUE: return "AArch64ISD::PTRUE";
+ case AArch64ISD::GLD1: return "AArch64ISD::GLD1";
+ case AArch64ISD::GLD1_SCALED: return "AArch64ISD::GLD1_SCALED";
+ case AArch64ISD::GLD1_SXTW: return "AArch64ISD::GLD1_SXTW";
+ case AArch64ISD::GLD1_UXTW: return "AArch64ISD::GLD1_UXTW";
+ case AArch64ISD::GLD1_SXTW_SCALED: return "AArch64ISD::GLD1_SXTW_SCALED";
+ case AArch64ISD::GLD1_UXTW_SCALED: return "AArch64ISD::GLD1_UXTW_SCALED";
+ case AArch64ISD::GLD1_IMM: return "AArch64ISD::GLD1_IMM";
+ case AArch64ISD::GLD1S: return "AArch64ISD::GLD1S";
+ case AArch64ISD::GLD1S_SCALED: return "AArch64ISD::GLD1S_SCALED";
+ case AArch64ISD::GLD1S_SXTW: return "AArch64ISD::GLD1S_SXTW";
+ case AArch64ISD::GLD1S_UXTW: return "AArch64ISD::GLD1S_UXTW";
+ case AArch64ISD::GLD1S_SXTW_SCALED: return "AArch64ISD::GLD1S_SXTW_SCALED";
+ case AArch64ISD::GLD1S_UXTW_SCALED: return "AArch64ISD::GLD1S_UXTW_SCALED";
+ case AArch64ISD::GLD1S_IMM: return "AArch64ISD::GLD1S_IMM";
+ case AArch64ISD::SST1: return "AArch64ISD::SST1";
+ case AArch64ISD::SST1_SCALED: return "AArch64ISD::SST1_SCALED";
+ case AArch64ISD::SST1_SXTW: return "AArch64ISD::SST1_SXTW";
+ case AArch64ISD::SST1_UXTW: return "AArch64ISD::SST1_UXTW";
+ case AArch64ISD::SST1_SXTW_SCALED: return "AArch64ISD::SST1_SXTW_SCALED";
+ case AArch64ISD::SST1_UXTW_SCALED: return "AArch64ISD::SST1_UXTW_SCALED";
+ case AArch64ISD::SST1_IMM: return "AArch64ISD::SST1_IMM";
+ case AArch64ISD::LDP: return "AArch64ISD::LDP";
+ case AArch64ISD::STP: return "AArch64ISD::STP";
}
return nullptr;
}
@@ -1568,7 +1639,8 @@ static void changeVectorFPCCToAArch64CC(ISD::CondCode CC,
// All of the compare-mask comparisons are ordered, but we can switch
// between the two by a double inversion. E.g. ULE == !OGT.
Invert = true;
- changeFPCCToAArch64CC(getSetCCInverse(CC, false), CondCode, CondCode2);
+ changeFPCCToAArch64CC(getSetCCInverse(CC, /* FP inverse */ MVT::f32),
+ CondCode, CondCode2);
break;
}
}
@@ -1815,7 +1887,7 @@ static SDValue emitConjunctionRec(SelectionDAG &DAG, SDValue Val,
ISD::CondCode CC = cast<CondCodeSDNode>(Val->getOperand(2))->get();
bool isInteger = LHS.getValueType().isInteger();
if (Negate)
- CC = getSetCCInverse(CC, isInteger);
+ CC = getSetCCInverse(CC, LHS.getValueType());
SDLoc DL(Val);
// Determine OutCC and handle FP special case.
if (isInteger) {
@@ -2287,7 +2359,7 @@ static SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) {
if (CTVal->isAllOnesValue() && CFVal->isNullValue()) {
std::swap(TVal, FVal);
std::swap(CTVal, CFVal);
- CC = ISD::getSetCCInverse(CC, true);
+ CC = ISD::getSetCCInverse(CC, LHS.getValueType());
}
// If the constants line up, perform the transform!
@@ -2861,6 +2933,55 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::aarch64_sve_uunpklo:
return DAG.getNode(AArch64ISD::UUNPKLO, dl, Op.getValueType(),
Op.getOperand(1));
+ case Intrinsic::aarch64_sve_clasta_n:
+ return DAG.getNode(AArch64ISD::CLASTA_N, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
+ case Intrinsic::aarch64_sve_clastb_n:
+ return DAG.getNode(AArch64ISD::CLASTB_N, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
+ case Intrinsic::aarch64_sve_lasta:
+ return DAG.getNode(AArch64ISD::LASTA, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+ case Intrinsic::aarch64_sve_lastb:
+ return DAG.getNode(AArch64ISD::LASTB, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+ case Intrinsic::aarch64_sve_rev:
+ return DAG.getNode(AArch64ISD::REV, dl, Op.getValueType(),
+ Op.getOperand(1));
+ case Intrinsic::aarch64_sve_tbl:
+ return DAG.getNode(AArch64ISD::TBL, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+ case Intrinsic::aarch64_sve_trn1:
+ return DAG.getNode(AArch64ISD::TRN1, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+ case Intrinsic::aarch64_sve_trn2:
+ return DAG.getNode(AArch64ISD::TRN2, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+ case Intrinsic::aarch64_sve_uzp1:
+ return DAG.getNode(AArch64ISD::UZP1, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+ case Intrinsic::aarch64_sve_uzp2:
+ return DAG.getNode(AArch64ISD::UZP2, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+ case Intrinsic::aarch64_sve_zip1:
+ return DAG.getNode(AArch64ISD::ZIP1, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+ case Intrinsic::aarch64_sve_zip2:
+ return DAG.getNode(AArch64ISD::ZIP2, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+ case Intrinsic::aarch64_sve_ptrue:
+ return DAG.getNode(AArch64ISD::PTRUE, dl, Op.getValueType(),
+ Op.getOperand(1));
+
+ case Intrinsic::aarch64_sve_insr: {
+ SDValue Scalar = Op.getOperand(2);
+ EVT ScalarTy = Scalar.getValueType();
+ if ((ScalarTy == MVT::i8) || (ScalarTy == MVT::i16))
+ Scalar = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Scalar);
+
+ return DAG.getNode(AArch64ISD::INSR, dl, Op.getValueType(),
+ Op.getOperand(1), Scalar);
+ }
case Intrinsic::localaddress: {
const auto &MF = DAG.getMachineFunction();
@@ -2886,6 +3007,10 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
}
}
+bool AArch64TargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
+ return ExtVal.getValueType().isScalableVector();
+}
+
// Custom lower trunc store for v4i8 vectors, since it is promoted to v4i16.
static SDValue LowerTruncateVectorStore(SDLoc DL, StoreSDNode *ST,
EVT VT, EVT MemVT,
@@ -2920,7 +3045,7 @@ static SDValue LowerTruncateVectorStore(SDLoc DL, StoreSDNode *ST,
// Custom lowering for any store, vector or scalar and/or default or with
// a truncate operations. Currently only custom lower truncate operation
-// from vector v4i16 to v4i8.
+// from vector v4i16 to v4i8 or volatile stores of i128.
SDValue AArch64TargetLowering::LowerSTORE(SDValue Op,
SelectionDAG &DAG) const {
SDLoc Dl(Op);
@@ -2932,18 +3057,32 @@ SDValue AArch64TargetLowering::LowerSTORE(SDValue Op,
EVT VT = Value.getValueType();
EVT MemVT = StoreNode->getMemoryVT();
- assert (VT.isVector() && "Can only custom lower vector store types");
-
- unsigned AS = StoreNode->getAddressSpace();
- unsigned Align = StoreNode->getAlignment();
- if (Align < MemVT.getStoreSize() &&
- !allowsMisalignedMemoryAccesses(
- MemVT, AS, Align, StoreNode->getMemOperand()->getFlags(), nullptr)) {
- return scalarizeVectorStore(StoreNode, DAG);
- }
-
- if (StoreNode->isTruncatingStore()) {
- return LowerTruncateVectorStore(Dl, StoreNode, VT, MemVT, DAG);
+ if (VT.isVector()) {
+ unsigned AS = StoreNode->getAddressSpace();
+ unsigned Align = StoreNode->getAlignment();
+ if (Align < MemVT.getStoreSize() &&
+ !allowsMisalignedMemoryAccesses(MemVT, AS, Align,
+ StoreNode->getMemOperand()->getFlags(),
+ nullptr)) {
+ return scalarizeVectorStore(StoreNode, DAG);
+ }
+
+ if (StoreNode->isTruncatingStore()) {
+ return LowerTruncateVectorStore(Dl, StoreNode, VT, MemVT, DAG);
+ }
+ } else if (MemVT == MVT::i128 && StoreNode->isVolatile()) {
+ assert(StoreNode->getValue()->getValueType(0) == MVT::i128);
+ SDValue Lo =
+ DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i64, StoreNode->getValue(),
+ DAG.getConstant(0, Dl, MVT::i64));
+ SDValue Hi =
+ DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i64, StoreNode->getValue(),
+ DAG.getConstant(1, Dl, MVT::i64));
+ SDValue Result = DAG.getMemIntrinsicNode(
+ AArch64ISD::STP, Dl, DAG.getVTList(MVT::Other),
+ {StoreNode->getChain(), Lo, Hi, StoreNode->getBasePtr()},
+ StoreNode->getMemoryVT(), StoreNode->getMemOperand());
+ return Result;
}
return SDValue();
@@ -3092,6 +3231,9 @@ CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC,
switch (CC) {
default:
report_fatal_error("Unsupported calling convention.");
+ case CallingConv::AArch64_SVE_VectorCall:
+ // Calling SVE functions is currently not yet supported.
+ report_fatal_error("Unsupported calling convention.");
case CallingConv::WebKit_JS:
return CC_AArch64_WebKit_JS;
case CallingConv::GHC:
@@ -3111,8 +3253,10 @@ CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC,
: CC_AArch64_DarwinPCS_VarArg;
case CallingConv::Win64:
return IsVarArg ? CC_AArch64_Win64_VarArg : CC_AArch64_AAPCS;
- case CallingConv::AArch64_VectorCall:
- return CC_AArch64_AAPCS;
+ case CallingConv::CFGuard_Check:
+ return CC_AArch64_Win64_CFGuard_Check;
+ case CallingConv::AArch64_VectorCall:
+ return CC_AArch64_AAPCS;
}
}
@@ -3848,11 +3992,10 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
}
// Walk the register/memloc assignments, inserting copies/loads.
- for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); i != e;
- ++i, ++realArgIdx) {
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
- SDValue Arg = OutVals[realArgIdx];
- ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
+ SDValue Arg = OutVals[i];
+ ISD::ArgFlagsTy Flags = Outs[i].Flags;
// Promote the value if needed.
switch (VA.getLocInfo()) {
@@ -3867,7 +4010,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
break;
case CCValAssign::AExt:
- if (Outs[realArgIdx].ArgVT == MVT::i1) {
+ if (Outs[i].ArgVT == MVT::i1) {
// AAPCS requires i1 to be zero-extended to 8-bits by the caller.
Arg = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Arg);
Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i8, Arg);
@@ -3896,7 +4039,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
}
if (VA.isRegLoc()) {
- if (realArgIdx == 0 && Flags.isReturned() && !Flags.isSwiftSelf() &&
+ if (i == 0 && Flags.isReturned() && !Flags.isSwiftSelf() &&
Outs[0].VT == MVT::i64) {
assert(VA.getLocVT() == MVT::i64 &&
"unexpected calling convention register assignment");
@@ -4014,14 +4157,11 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
// node so that legalize doesn't hack it.
if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
auto GV = G->getGlobal();
- if (Subtarget->classifyGlobalFunctionReference(GV, getTargetMachine()) ==
- AArch64II::MO_GOT) {
- Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_GOT);
+ unsigned OpFlags =
+ Subtarget->classifyGlobalFunctionReference(GV, getTargetMachine());
+ if (OpFlags & AArch64II::MO_GOT) {
+ Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags);
Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee);
- } else if (Subtarget->isTargetCOFF() && GV->hasDLLImportStorageClass()) {
- assert(Subtarget->isTargetWindows() &&
- "Windows is the only supported COFF target");
- Callee = getGOT(G, DAG, AArch64II::MO_DLLIMPORT);
} else {
const GlobalValue *GV = G->getGlobal();
Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0);
@@ -4456,6 +4596,97 @@ AArch64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op,
return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Chain.getValue(1));
}
+/// Convert a thread-local variable reference into a sequence of instructions to
+/// compute the variable's address for the local exec TLS model of ELF targets.
+/// The sequence depends on the maximum TLS area size.
+SDValue AArch64TargetLowering::LowerELFTLSLocalExec(const GlobalValue *GV,
+ SDValue ThreadBase,
+ const SDLoc &DL,
+ SelectionDAG &DAG) const {
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
+ SDValue TPOff, Addr;
+
+ switch (DAG.getTarget().Options.TLSSize) {
+ default:
+ llvm_unreachable("Unexpected TLS size");
+
+ case 12: {
+ // mrs x0, TPIDR_EL0
+ // add x0, x0, :tprel_lo12:a
+ SDValue Var = DAG.getTargetGlobalAddress(
+ GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_PAGEOFF);
+ return SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, ThreadBase,
+ Var,
+ DAG.getTargetConstant(0, DL, MVT::i32)),
+ 0);
+ }
+
+ case 24: {
+ // mrs x0, TPIDR_EL0
+ // add x0, x0, :tprel_hi12:a
+ // add x0, x0, :tprel_lo12_nc:a
+ SDValue HiVar = DAG.getTargetGlobalAddress(
+ GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_HI12);
+ SDValue LoVar = DAG.getTargetGlobalAddress(
+ GV, DL, PtrVT, 0,
+ AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
+ Addr = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, ThreadBase,
+ HiVar,
+ DAG.getTargetConstant(0, DL, MVT::i32)),
+ 0);
+ return SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, Addr,
+ LoVar,
+ DAG.getTargetConstant(0, DL, MVT::i32)),
+ 0);
+ }
+
+ case 32: {
+ // mrs x1, TPIDR_EL0
+ // movz x0, #:tprel_g1:a
+ // movk x0, #:tprel_g0_nc:a
+ // add x0, x1, x0
+ SDValue HiVar = DAG.getTargetGlobalAddress(
+ GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_G1);
+ SDValue LoVar = DAG.getTargetGlobalAddress(
+ GV, DL, PtrVT, 0,
+ AArch64II::MO_TLS | AArch64II::MO_G0 | AArch64II::MO_NC);
+ TPOff = SDValue(DAG.getMachineNode(AArch64::MOVZXi, DL, PtrVT, HiVar,
+ DAG.getTargetConstant(16, DL, MVT::i32)),
+ 0);
+ TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKXi, DL, PtrVT, TPOff, LoVar,
+ DAG.getTargetConstant(0, DL, MVT::i32)),
+ 0);
+ return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadBase, TPOff);
+ }
+
+ case 48: {
+ // mrs x1, TPIDR_EL0
+ // movz x0, #:tprel_g2:a
+ // movk x0, #:tprel_g1_nc:a
+ // movk x0, #:tprel_g0_nc:a
+ // add x0, x1, x0
+ SDValue HiVar = DAG.getTargetGlobalAddress(
+ GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_G2);
+ SDValue MiVar = DAG.getTargetGlobalAddress(
+ GV, DL, PtrVT, 0,
+ AArch64II::MO_TLS | AArch64II::MO_G1 | AArch64II::MO_NC);
+ SDValue LoVar = DAG.getTargetGlobalAddress(
+ GV, DL, PtrVT, 0,
+ AArch64II::MO_TLS | AArch64II::MO_G0 | AArch64II::MO_NC);
+ TPOff = SDValue(DAG.getMachineNode(AArch64::MOVZXi, DL, PtrVT, HiVar,
+ DAG.getTargetConstant(32, DL, MVT::i32)),
+ 0);
+ TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKXi, DL, PtrVT, TPOff, MiVar,
+ DAG.getTargetConstant(16, DL, MVT::i32)),
+ 0);
+ TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKXi, DL, PtrVT, TPOff, LoVar,
+ DAG.getTargetConstant(0, DL, MVT::i32)),
+ 0);
+ return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadBase, TPOff);
+ }
+ }
+}
+
/// When accessing thread-local variables under either the general-dynamic or
/// local-dynamic system, we make a "TLS-descriptor" call. The variable will
/// have a descriptor, accessible via a PC-relative ADRP, and whose first entry
@@ -4493,15 +4724,7 @@ SDValue
AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op,
SelectionDAG &DAG) const {
assert(Subtarget->isTargetELF() && "This function expects an ELF target");
- if (getTargetMachine().getCodeModel() == CodeModel::Large)
- report_fatal_error("ELF TLS only supported in small memory model");
- // Different choices can be made for the maximum size of the TLS area for a
- // module. For the small address model, the default TLS size is 16MiB and the
- // maximum TLS size is 4GiB.
- // FIXME: add -mtls-size command line option and make it control the 16MiB
- // vs. 4GiB code sequence generation.
- // FIXME: add tiny codemodel support. We currently generate the same code as
- // small, which may be larger than needed.
+
const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
TLSModel::Model Model = getTargetMachine().getTLSModel(GA->getGlobal());
@@ -4511,6 +4734,17 @@ AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op,
Model = TLSModel::GeneralDynamic;
}
+ if (getTargetMachine().getCodeModel() == CodeModel::Large &&
+ Model != TLSModel::LocalExec)
+ report_fatal_error("ELF TLS only supported in small memory model or "
+ "in local exec TLS model");
+ // Different choices can be made for the maximum size of the TLS area for a
+ // module. For the small address model, the default TLS size is 16MiB and the
+ // maximum TLS size is 4GiB.
+ // FIXME: add tiny and large code model support for TLS access models other
+ // than local exec. We currently generate the same code as small for tiny,
+ // which may be larger than needed.
+
SDValue TPOff;
EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDLoc DL(Op);
@@ -4519,23 +4753,7 @@ AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op,
SDValue ThreadBase = DAG.getNode(AArch64ISD::THREAD_POINTER, DL, PtrVT);
if (Model == TLSModel::LocalExec) {
- SDValue HiVar = DAG.getTargetGlobalAddress(
- GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_HI12);
- SDValue LoVar = DAG.getTargetGlobalAddress(
- GV, DL, PtrVT, 0,
- AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
-
- SDValue TPWithOff_lo =
- SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, ThreadBase,
- HiVar,
- DAG.getTargetConstant(0, DL, MVT::i32)),
- 0);
- SDValue TPWithOff =
- SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPWithOff_lo,
- LoVar,
- DAG.getTargetConstant(0, DL, MVT::i32)),
- 0);
- return TPWithOff;
+ return LowerELFTLSLocalExec(GV, ThreadBase, DL, DAG);
} else if (Model == TLSModel::InitialExec) {
TPOff = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
TPOff = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, TPOff);
@@ -4961,8 +5179,8 @@ SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
if (LHS.getValueType().isInteger()) {
SDValue CCVal;
- SDValue Cmp =
- getAArch64Cmp(LHS, RHS, ISD::getSetCCInverse(CC, true), CCVal, DAG, dl);
+ SDValue Cmp = getAArch64Cmp(
+ LHS, RHS, ISD::getSetCCInverse(CC, LHS.getValueType()), CCVal, DAG, dl);
// Note that we inverted the condition above, so we reverse the order of
// the true and false operands here. This will allow the setcc to be
@@ -4981,7 +5199,8 @@ SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
AArch64CC::CondCode CC1, CC2;
changeFPCCToAArch64CC(CC, CC1, CC2);
if (CC2 == AArch64CC::AL) {
- changeFPCCToAArch64CC(ISD::getSetCCInverse(CC, false), CC1, CC2);
+ changeFPCCToAArch64CC(ISD::getSetCCInverse(CC, LHS.getValueType()), CC1,
+ CC2);
SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
// Note that we inverted the condition above, so we reverse the order of
@@ -5042,18 +5261,18 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(ISD::CondCode CC, SDValue LHS,
if (CTVal && CFVal && CTVal->isAllOnesValue() && CFVal->isNullValue()) {
std::swap(TVal, FVal);
std::swap(CTVal, CFVal);
- CC = ISD::getSetCCInverse(CC, true);
+ CC = ISD::getSetCCInverse(CC, LHS.getValueType());
} else if (CTVal && CFVal && CTVal->isOne() && CFVal->isNullValue()) {
std::swap(TVal, FVal);
std::swap(CTVal, CFVal);
- CC = ISD::getSetCCInverse(CC, true);
+ CC = ISD::getSetCCInverse(CC, LHS.getValueType());
} else if (TVal.getOpcode() == ISD::XOR) {
// If TVal is a NOT we want to swap TVal and FVal so that we can match
// with a CSINV rather than a CSEL.
if (isAllOnesConstant(TVal.getOperand(1))) {
std::swap(TVal, FVal);
std::swap(CTVal, CFVal);
- CC = ISD::getSetCCInverse(CC, true);
+ CC = ISD::getSetCCInverse(CC, LHS.getValueType());
}
} else if (TVal.getOpcode() == ISD::SUB) {
// If TVal is a negation (SUB from 0) we want to swap TVal and FVal so
@@ -5061,7 +5280,7 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(ISD::CondCode CC, SDValue LHS,
if (isNullConstant(TVal.getOperand(0))) {
std::swap(TVal, FVal);
std::swap(CTVal, CFVal);
- CC = ISD::getSetCCInverse(CC, true);
+ CC = ISD::getSetCCInverse(CC, LHS.getValueType());
}
} else if (CTVal && CFVal) {
const int64_t TrueVal = CTVal->getSExtValue();
@@ -5104,7 +5323,7 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(ISD::CondCode CC, SDValue LHS,
if (Swap) {
std::swap(TVal, FVal);
std::swap(CTVal, CFVal);
- CC = ISD::getSetCCInverse(CC, true);
+ CC = ISD::getSetCCInverse(CC, LHS.getValueType());
}
if (Opcode != AArch64ISD::CSEL) {
@@ -5531,7 +5750,7 @@ SDValue AArch64TargetLowering::LowerSPONENTRY(SDValue Op,
// FIXME? Maybe this could be a TableGen attribute on some registers and
// this table could be generated automatically from RegInfo.
Register AArch64TargetLowering::
-getRegisterByName(const char* RegName, EVT VT, const MachineFunction &MF) const {
+getRegisterByName(const char* RegName, LLT VT, const MachineFunction &MF) const {
Register Reg = MatchRegisterName(RegName);
if (AArch64::X1 <= Reg && Reg <= AArch64::X28) {
const MCRegisterInfo *MRI = Subtarget->getRegisterInfo();
@@ -6946,19 +7165,55 @@ SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
// Otherwise, duplicate from the lane of the input vector.
unsigned Opcode = getDUPLANEOp(V1.getValueType().getVectorElementType());
- // SelectionDAGBuilder may have "helpfully" already extracted or conatenated
- // to make a vector of the same size as this SHUFFLE. We can ignore the
- // extract entirely, and canonicalise the concat using WidenVector.
- if (V1.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
- Lane += cast<ConstantSDNode>(V1.getOperand(1))->getZExtValue();
+ // Try to eliminate a bitcasted extract subvector before a DUPLANE.
+ auto getScaledOffsetDup = [](SDValue BitCast, int &LaneC, MVT &CastVT) {
+ // Match: dup (bitcast (extract_subv X, C)), LaneC
+ if (BitCast.getOpcode() != ISD::BITCAST ||
+ BitCast.getOperand(0).getOpcode() != ISD::EXTRACT_SUBVECTOR)
+ return false;
+
+ // The extract index must align in the destination type. That may not
+ // happen if the bitcast is from narrow to wide type.
+ SDValue Extract = BitCast.getOperand(0);
+ unsigned ExtIdx = Extract.getConstantOperandVal(1);
+ unsigned SrcEltBitWidth = Extract.getScalarValueSizeInBits();
+ unsigned ExtIdxInBits = ExtIdx * SrcEltBitWidth;
+ unsigned CastedEltBitWidth = BitCast.getScalarValueSizeInBits();
+ if (ExtIdxInBits % CastedEltBitWidth != 0)
+ return false;
+
+ // Update the lane value by offsetting with the scaled extract index.
+ LaneC += ExtIdxInBits / CastedEltBitWidth;
+
+ // Determine the casted vector type of the wide vector input.
+ // dup (bitcast (extract_subv X, C)), LaneC --> dup (bitcast X), LaneC'
+ // Examples:
+ // dup (bitcast (extract_subv v2f64 X, 1) to v2f32), 1 --> dup v4f32 X, 3
+ // dup (bitcast (extract_subv v16i8 X, 8) to v4i16), 1 --> dup v8i16 X, 5
+ unsigned SrcVecNumElts =
+ Extract.getOperand(0).getValueSizeInBits() / CastedEltBitWidth;
+ CastVT = MVT::getVectorVT(BitCast.getSimpleValueType().getScalarType(),
+ SrcVecNumElts);
+ return true;
+ };
+ MVT CastVT;
+ if (getScaledOffsetDup(V1, Lane, CastVT)) {
+ V1 = DAG.getBitcast(CastVT, V1.getOperand(0).getOperand(0));
+ } else if (V1.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
+ // The lane is incremented by the index of the extract.
+ // Example: dup v2f32 (extract v4f32 X, 2), 1 --> dup v4f32 X, 3
+ Lane += V1.getConstantOperandVal(1);
V1 = V1.getOperand(0);
} else if (V1.getOpcode() == ISD::CONCAT_VECTORS) {
+ // The lane is decremented if we are splatting from the 2nd operand.
+ // Example: dup v4i32 (concat v2i32 X, v2i32 Y), 3 --> dup v4i32 Y, 1
unsigned Idx = Lane >= (int)VT.getVectorNumElements() / 2;
Lane -= Idx * VT.getVectorNumElements() / 2;
V1 = WidenVector(V1.getOperand(Idx), DAG);
- } else if (VT.getSizeInBits() == 64)
+ } else if (VT.getSizeInBits() == 64) {
+ // Widen the operand to 128-bit register with undef.
V1 = WidenVector(V1, DAG);
-
+ }
return DAG.getNode(Opcode, dl, VT, V1, DAG.getConstant(Lane, dl, MVT::i64));
}
@@ -7077,26 +7332,31 @@ SDValue AArch64TargetLowering::LowerSPLAT_VECTOR(SDValue Op,
switch (ElemVT.getSimpleVT().SimpleTy) {
case MVT::i8:
case MVT::i16:
+ case MVT::i32:
SplatVal = DAG.getAnyExtOrTrunc(SplatVal, dl, MVT::i32);
- break;
+ return DAG.getNode(AArch64ISD::DUP, dl, VT, SplatVal);
case MVT::i64:
SplatVal = DAG.getAnyExtOrTrunc(SplatVal, dl, MVT::i64);
- break;
- case MVT::i32:
- // Fine as is
- break;
- // TODO: we can support splats of i1s and float types, but haven't added
- // patterns yet.
- case MVT::i1:
+ return DAG.getNode(AArch64ISD::DUP, dl, VT, SplatVal);
+ case MVT::i1: {
+ // The general case of i1. There isn't any natural way to do this,
+ // so we use some trickery with whilelo.
+ // TODO: Add special cases for splat of constant true/false.
+ SplatVal = DAG.getAnyExtOrTrunc(SplatVal, dl, MVT::i64);
+ SplatVal = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::i64, SplatVal,
+ DAG.getValueType(MVT::i1));
+ SDValue ID = DAG.getTargetConstant(Intrinsic::aarch64_sve_whilelo, dl,
+ MVT::i64);
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, ID,
+ DAG.getConstant(0, dl, MVT::i64), SplatVal);
+ }
+ // TODO: we can support float types, but haven't added patterns yet.
case MVT::f16:
case MVT::f32:
case MVT::f64:
default:
- llvm_unreachable("Unsupported SPLAT_VECTOR input operand type");
- break;
+ report_fatal_error("Unsupported SPLAT_VECTOR input operand type");
}
-
- return DAG.getNode(AArch64ISD::DUP, dl, VT, SplatVal);
}
static bool resolveBuildVector(BuildVectorSDNode *BVN, APInt &CnstBits,
@@ -8443,6 +8703,26 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.align = Align(16);
Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MOVolatile;
return true;
+ case Intrinsic::aarch64_sve_ldnt1: {
+ PointerType *PtrTy = cast<PointerType>(I.getArgOperand(1)->getType());
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ Info.memVT = MVT::getVT(PtrTy->getElementType());
+ Info.ptrVal = I.getArgOperand(1);
+ Info.offset = 0;
+ Info.align = MaybeAlign(DL.getABITypeAlignment(PtrTy->getElementType()));
+ Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MONonTemporal;
+ return true;
+ }
+ case Intrinsic::aarch64_sve_stnt1: {
+ PointerType *PtrTy = cast<PointerType>(I.getArgOperand(2)->getType());
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ Info.memVT = MVT::getVT(PtrTy->getElementType());
+ Info.ptrVal = I.getArgOperand(2);
+ Info.offset = 0;
+ Info.align = MaybeAlign(DL.getABITypeAlignment(PtrTy->getElementType()));
+ Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MONonTemporal;
+ return true;
+ }
default:
break;
}
@@ -8515,11 +8795,12 @@ bool AArch64TargetLowering::isProfitableToHoist(Instruction *I) const {
return true;
const TargetOptions &Options = getTargetMachine().Options;
- const DataLayout &DL = I->getModule()->getDataLayout();
- EVT VT = getValueType(DL, User->getOperand(0)->getType());
+ const Function *F = I->getFunction();
+ const DataLayout &DL = F->getParent()->getDataLayout();
+ Type *Ty = User->getOperand(0)->getType();
- return !(isFMAFasterThanFMulAndFAdd(VT) &&
- isOperationLegalOrCustom(ISD::FMA, VT) &&
+ return !(isFMAFasterThanFMulAndFAdd(*F, Ty) &&
+ isOperationLegalOrCustom(ISD::FMA, getValueType(DL, Ty)) &&
(Options.AllowFPOpFusion == FPOpFusion::Fast ||
Options.UnsafeFPMath));
}
@@ -9176,7 +9457,8 @@ int AArch64TargetLowering::getScalingFactorCost(const DataLayout &DL,
return -1;
}
-bool AArch64TargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
+bool AArch64TargetLowering::isFMAFasterThanFMulAndFAdd(
+ const MachineFunction &MF, EVT VT) const {
VT = VT.getScalarType();
if (!VT.isSimple())
@@ -9193,6 +9475,17 @@ bool AArch64TargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
return false;
}
+bool AArch64TargetLowering::isFMAFasterThanFMulAndFAdd(const Function &F,
+ Type *Ty) const {
+ switch (Ty->getScalarType()->getTypeID()) {
+ case Type::FloatTyID:
+ case Type::DoubleTyID:
+ return true;
+ default:
+ return false;
+ }
+}
+
const MCPhysReg *
AArch64TargetLowering::getScratchRegisters(CallingConv::ID) const {
// LR is a callee-save register, but we must treat it as clobbered by any call
@@ -9363,6 +9656,19 @@ AArch64TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), SRA);
}
+static bool IsSVECntIntrinsic(SDValue S) {
+ switch(getIntrinsicID(S.getNode())) {
+ default:
+ break;
+ case Intrinsic::aarch64_sve_cntb:
+ case Intrinsic::aarch64_sve_cnth:
+ case Intrinsic::aarch64_sve_cntw:
+ case Intrinsic::aarch64_sve_cntd:
+ return true;
+ }
+ return false;
+}
+
static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const AArch64Subtarget *Subtarget) {
@@ -9373,9 +9679,18 @@ static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
if (!isa<ConstantSDNode>(N->getOperand(1)))
return SDValue();
+ SDValue N0 = N->getOperand(0);
ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(1));
const APInt &ConstValue = C->getAPIntValue();
+ // Allow the scaling to be folded into the `cnt` instruction by preventing
+ // the scaling to be obscured here. This makes it easier to pattern match.
+ if (IsSVECntIntrinsic(N0) ||
+ (N0->getOpcode() == ISD::TRUNCATE &&
+ (IsSVECntIntrinsic(N0->getOperand(0)))))
+ if (ConstValue.sge(1) && ConstValue.sle(16))
+ return SDValue();
+
// Multiplication of a power of two plus/minus one can be done more
// cheaply as as shift+add/sub. For now, this is true unilaterally. If
// future CPUs have a cheaper MADD instruction, this may need to be
@@ -9386,7 +9701,6 @@ static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
// e.g. 6=3*2=(2+1)*2.
// TODO: consider lowering more cases, e.g. C = 14, -6, -14 or even 45
// which equals to (1+2)*16-(1+2).
- SDValue N0 = N->getOperand(0);
// TrailingZeroes is used to test if the mul can be lowered to
// shift+add+shift.
unsigned TrailingZeroes = ConstValue.countTrailingZeros();
@@ -9821,6 +10135,67 @@ static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
return SDValue();
}
+static bool isConstantSplatVectorMaskForType(SDNode *N, EVT MemVT) {
+ if (!MemVT.getVectorElementType().isSimple())
+ return false;
+
+ uint64_t MaskForTy = 0ull;
+ switch (MemVT.getVectorElementType().getSimpleVT().SimpleTy) {
+ case MVT::i8:
+ MaskForTy = 0xffull;
+ break;
+ case MVT::i16:
+ MaskForTy = 0xffffull;
+ break;
+ case MVT::i32:
+ MaskForTy = 0xffffffffull;
+ break;
+ default:
+ return false;
+ break;
+ }
+
+ if (N->getOpcode() == AArch64ISD::DUP || N->getOpcode() == ISD::SPLAT_VECTOR)
+ if (auto *Op0 = dyn_cast<ConstantSDNode>(N->getOperand(0)))
+ return Op0->getAPIntValue().getLimitedValue() == MaskForTy;
+
+ return false;
+}
+
+static SDValue performSVEAndCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ if (DCI.isBeforeLegalizeOps())
+ return SDValue();
+
+ SDValue Src = N->getOperand(0);
+ SDValue Mask = N->getOperand(1);
+
+ if (!Src.hasOneUse())
+ return SDValue();
+
+ // GLD1* instructions perform an implicit zero-extend, which makes them
+ // perfect candidates for combining.
+ switch (Src->getOpcode()) {
+ case AArch64ISD::GLD1:
+ case AArch64ISD::GLD1_SCALED:
+ case AArch64ISD::GLD1_SXTW:
+ case AArch64ISD::GLD1_SXTW_SCALED:
+ case AArch64ISD::GLD1_UXTW:
+ case AArch64ISD::GLD1_UXTW_SCALED:
+ case AArch64ISD::GLD1_IMM:
+ break;
+ default:
+ return SDValue();
+ }
+
+ EVT MemVT = cast<VTSDNode>(Src->getOperand(4))->getVT();
+
+ if (isConstantSplatVectorMaskForType(Mask.getNode(), MemVT))
+ return Src;
+
+ return SDValue();
+}
+
static SDValue performANDCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
SelectionDAG &DAG = DCI.DAG;
@@ -9829,6 +10204,9 @@ static SDValue performANDCombine(SDNode *N,
if (!VT.isVector() || !DAG.getTargetLoweringInfo().isTypeLegal(VT))
return SDValue();
+ if (VT.isScalableVector())
+ return performSVEAndCombine(N, DCI);
+
BuildVectorSDNode *BVN =
dyn_cast<BuildVectorSDNode>(N->getOperand(1).getNode());
if (!BVN)
@@ -9889,74 +10267,6 @@ static SDValue performSRLCombine(SDNode *N,
return SDValue();
}
-static SDValue performBitcastCombine(SDNode *N,
- TargetLowering::DAGCombinerInfo &DCI,
- SelectionDAG &DAG) {
- // Wait 'til after everything is legalized to try this. That way we have
- // legal vector types and such.
- if (DCI.isBeforeLegalizeOps())
- return SDValue();
-
- // Remove extraneous bitcasts around an extract_subvector.
- // For example,
- // (v4i16 (bitconvert
- // (extract_subvector (v2i64 (bitconvert (v8i16 ...)), (i64 1)))))
- // becomes
- // (extract_subvector ((v8i16 ...), (i64 4)))
-
- // Only interested in 64-bit vectors as the ultimate result.
- EVT VT = N->getValueType(0);
- if (!VT.isVector())
- return SDValue();
- if (VT.getSimpleVT().getSizeInBits() != 64)
- return SDValue();
- // Is the operand an extract_subvector starting at the beginning or halfway
- // point of the vector? A low half may also come through as an
- // EXTRACT_SUBREG, so look for that, too.
- SDValue Op0 = N->getOperand(0);
- if (Op0->getOpcode() != ISD::EXTRACT_SUBVECTOR &&
- !(Op0->isMachineOpcode() &&
- Op0->getMachineOpcode() == AArch64::EXTRACT_SUBREG))
- return SDValue();
- uint64_t idx = cast<ConstantSDNode>(Op0->getOperand(1))->getZExtValue();
- if (Op0->getOpcode() == ISD::EXTRACT_SUBVECTOR) {
- if (Op0->getValueType(0).getVectorNumElements() != idx && idx != 0)
- return SDValue();
- } else if (Op0->getMachineOpcode() == AArch64::EXTRACT_SUBREG) {
- if (idx != AArch64::dsub)
- return SDValue();
- // The dsub reference is equivalent to a lane zero subvector reference.
- idx = 0;
- }
- // Look through the bitcast of the input to the extract.
- if (Op0->getOperand(0)->getOpcode() != ISD::BITCAST)
- return SDValue();
- SDValue Source = Op0->getOperand(0)->getOperand(0);
- // If the source type has twice the number of elements as our destination
- // type, we know this is an extract of the high or low half of the vector.
- EVT SVT = Source->getValueType(0);
- if (!SVT.isVector() ||
- SVT.getVectorNumElements() != VT.getVectorNumElements() * 2)
- return SDValue();
-
- LLVM_DEBUG(
- dbgs() << "aarch64-lower: bitcast extract_subvector simplification\n");
-
- // Create the simplified form to just extract the low or high half of the
- // vector directly rather than bothering with the bitcasts.
- SDLoc dl(N);
- unsigned NumElements = VT.getVectorNumElements();
- if (idx) {
- SDValue HalfIdx = DAG.getConstant(NumElements, dl, MVT::i64);
- return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Source, HalfIdx);
- } else {
- SDValue SubReg = DAG.getTargetConstant(AArch64::dsub, dl, MVT::i32);
- return SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, VT,
- Source, SubReg),
- 0);
- }
-}
-
static SDValue performConcatVectorsCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG) {
@@ -10263,10 +10573,10 @@ static SDValue performSetccAddFolding(SDNode *Op, SelectionDAG &DAG) {
MVT::i32);
Cmp = *InfoAndKind.Info.AArch64.Cmp;
} else
- Cmp = getAArch64Cmp(*InfoAndKind.Info.Generic.Opnd0,
- *InfoAndKind.Info.Generic.Opnd1,
- ISD::getSetCCInverse(InfoAndKind.Info.Generic.CC, true),
- CCVal, DAG, dl);
+ Cmp = getAArch64Cmp(
+ *InfoAndKind.Info.Generic.Opnd0, *InfoAndKind.Info.Generic.Opnd1,
+ ISD::getSetCCInverse(InfoAndKind.Info.Generic.CC, CmpVT), CCVal, DAG,
+ dl);
EVT VT = Op->getValueType(0);
LHS = DAG.getNode(ISD::ADD, dl, VT, RHS, DAG.getConstant(1, dl, VT));
@@ -10456,6 +10766,154 @@ static SDValue combineAcrossLanesIntrinsic(unsigned Opc, SDNode *N,
DAG.getConstant(0, dl, MVT::i64));
}
+static SDValue LowerSVEIntReduction(SDNode *N, unsigned Opc,
+ SelectionDAG &DAG) {
+ SDLoc dl(N);
+ LLVMContext &Ctx = *DAG.getContext();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ EVT VT = N->getValueType(0);
+ SDValue Pred = N->getOperand(1);
+ SDValue Data = N->getOperand(2);
+ EVT DataVT = Data.getValueType();
+
+ if (DataVT.getVectorElementType().isScalarInteger() &&
+ (VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64)) {
+ if (!TLI.isTypeLegal(DataVT))
+ return SDValue();
+
+ EVT OutputVT = EVT::getVectorVT(Ctx, VT,
+ AArch64::NeonBitsPerVector / VT.getSizeInBits());
+ SDValue Reduce = DAG.getNode(Opc, dl, OutputVT, Pred, Data);
+ SDValue Zero = DAG.getConstant(0, dl, MVT::i64);
+ SDValue Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Reduce, Zero);
+
+ return Result;
+ }
+
+ return SDValue();
+}
+
+static SDValue LowerSVEIntrinsicEXT(SDNode *N, SelectionDAG &DAG) {
+ SDLoc dl(N);
+ LLVMContext &Ctx = *DAG.getContext();
+ EVT VT = N->getValueType(0);
+
+ assert(VT.isScalableVector() && "Expected a scalable vector.");
+
+ // Current lowering only supports the SVE-ACLE types.
+ if (VT.getSizeInBits().getKnownMinSize() != AArch64::SVEBitsPerBlock)
+ return SDValue();
+
+ unsigned ElemSize = VT.getVectorElementType().getSizeInBits() / 8;
+ unsigned ByteSize = VT.getSizeInBits().getKnownMinSize() / 8;
+ EVT ByteVT = EVT::getVectorVT(Ctx, MVT::i8, { ByteSize, true });
+
+ // Convert everything to the domain of EXT (i.e bytes).
+ SDValue Op0 = DAG.getNode(ISD::BITCAST, dl, ByteVT, N->getOperand(1));
+ SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, ByteVT, N->getOperand(2));
+ SDValue Op2 = DAG.getNode(ISD::MUL, dl, MVT::i32, N->getOperand(3),
+ DAG.getConstant(ElemSize, dl, MVT::i32));
+
+ SDValue EXT = DAG.getNode(AArch64ISD::EXT, dl, ByteVT, Op0, Op1, Op2);
+ return DAG.getNode(ISD::BITCAST, dl, VT, EXT);
+}
+
+static SDValue tryConvertSVEWideCompare(SDNode *N, unsigned ReplacementIID,
+ bool Invert,
+ TargetLowering::DAGCombinerInfo &DCI,
+ SelectionDAG &DAG) {
+ if (DCI.isBeforeLegalize())
+ return SDValue();
+
+ SDValue Comparator = N->getOperand(3);
+ if (Comparator.getOpcode() == AArch64ISD::DUP ||
+ Comparator.getOpcode() == ISD::SPLAT_VECTOR) {
+ unsigned IID = getIntrinsicID(N);
+ EVT VT = N->getValueType(0);
+ EVT CmpVT = N->getOperand(2).getValueType();
+ SDValue Pred = N->getOperand(1);
+ SDValue Imm;
+ SDLoc DL(N);
+
+ switch (IID) {
+ default:
+ llvm_unreachable("Called with wrong intrinsic!");
+ break;
+
+ // Signed comparisons
+ case Intrinsic::aarch64_sve_cmpeq_wide:
+ case Intrinsic::aarch64_sve_cmpne_wide:
+ case Intrinsic::aarch64_sve_cmpge_wide:
+ case Intrinsic::aarch64_sve_cmpgt_wide:
+ case Intrinsic::aarch64_sve_cmplt_wide:
+ case Intrinsic::aarch64_sve_cmple_wide: {
+ if (auto *CN = dyn_cast<ConstantSDNode>(Comparator.getOperand(0))) {
+ int64_t ImmVal = CN->getSExtValue();
+ if (ImmVal >= -16 && ImmVal <= 15)
+ Imm = DAG.getConstant(ImmVal, DL, MVT::i32);
+ else
+ return SDValue();
+ }
+ break;
+ }
+ // Unsigned comparisons
+ case Intrinsic::aarch64_sve_cmphs_wide:
+ case Intrinsic::aarch64_sve_cmphi_wide:
+ case Intrinsic::aarch64_sve_cmplo_wide:
+ case Intrinsic::aarch64_sve_cmpls_wide: {
+ if (auto *CN = dyn_cast<ConstantSDNode>(Comparator.getOperand(0))) {
+ uint64_t ImmVal = CN->getZExtValue();
+ if (ImmVal <= 127)
+ Imm = DAG.getConstant(ImmVal, DL, MVT::i32);
+ else
+ return SDValue();
+ }
+ break;
+ }
+ }
+
+ SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR, DL, CmpVT, Imm);
+ SDValue ID = DAG.getTargetConstant(ReplacementIID, DL, MVT::i64);
+ SDValue Op0, Op1;
+ if (Invert) {
+ Op0 = Splat;
+ Op1 = N->getOperand(2);
+ } else {
+ Op0 = N->getOperand(2);
+ Op1 = Splat;
+ }
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
+ ID, Pred, Op0, Op1);
+ }
+
+ return SDValue();
+}
+
+static SDValue getPTest(SelectionDAG &DAG, EVT VT, SDValue Pg, SDValue Op,
+ AArch64CC::CondCode Cond) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ SDLoc DL(Op);
+ assert(Op.getValueType().isScalableVector() &&
+ TLI.isTypeLegal(Op.getValueType()) &&
+ "Expected legal scalable vector type!");
+
+ // Ensure target specific opcodes are using legal type.
+ EVT OutVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ SDValue TVal = DAG.getConstant(1, DL, OutVT);
+ SDValue FVal = DAG.getConstant(0, DL, OutVT);
+
+ // Set condition code (CC) flags.
+ SDValue Test = DAG.getNode(AArch64ISD::PTEST, DL, MVT::Other, Pg, Op);
+
+ // Convert CC to integer based on requested condition.
+ // NOTE: Cond is inverted to promote CSEL's removal when it feeds a compare.
+ SDValue CC = DAG.getConstant(getInvertedCondCode(Cond), DL, MVT::i32);
+ SDValue Res = DAG.getNode(AArch64ISD::CSEL, DL, OutVT, FVal, TVal, CC, Test);
+ return DAG.getZExtOrTrunc(Res, DL, VT);
+}
+
static SDValue performIntrinsicCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
const AArch64Subtarget *Subtarget) {
@@ -10510,6 +10968,61 @@ static SDValue performIntrinsicCombine(SDNode *N,
case Intrinsic::aarch64_crc32h:
case Intrinsic::aarch64_crc32ch:
return tryCombineCRC32(0xffff, N, DAG);
+ case Intrinsic::aarch64_sve_smaxv:
+ return LowerSVEIntReduction(N, AArch64ISD::SMAXV_PRED, DAG);
+ case Intrinsic::aarch64_sve_umaxv:
+ return LowerSVEIntReduction(N, AArch64ISD::UMAXV_PRED, DAG);
+ case Intrinsic::aarch64_sve_sminv:
+ return LowerSVEIntReduction(N, AArch64ISD::SMINV_PRED, DAG);
+ case Intrinsic::aarch64_sve_uminv:
+ return LowerSVEIntReduction(N, AArch64ISD::UMINV_PRED, DAG);
+ case Intrinsic::aarch64_sve_orv:
+ return LowerSVEIntReduction(N, AArch64ISD::ORV_PRED, DAG);
+ case Intrinsic::aarch64_sve_eorv:
+ return LowerSVEIntReduction(N, AArch64ISD::EORV_PRED, DAG);
+ case Intrinsic::aarch64_sve_andv:
+ return LowerSVEIntReduction(N, AArch64ISD::ANDV_PRED, DAG);
+ case Intrinsic::aarch64_sve_ext:
+ return LowerSVEIntrinsicEXT(N, DAG);
+ case Intrinsic::aarch64_sve_cmpeq_wide:
+ return tryConvertSVEWideCompare(N, Intrinsic::aarch64_sve_cmpeq,
+ false, DCI, DAG);
+ case Intrinsic::aarch64_sve_cmpne_wide:
+ return tryConvertSVEWideCompare(N, Intrinsic::aarch64_sve_cmpne,
+ false, DCI, DAG);
+ case Intrinsic::aarch64_sve_cmpge_wide:
+ return tryConvertSVEWideCompare(N, Intrinsic::aarch64_sve_cmpge,
+ false, DCI, DAG);
+ case Intrinsic::aarch64_sve_cmpgt_wide:
+ return tryConvertSVEWideCompare(N, Intrinsic::aarch64_sve_cmpgt,
+ false, DCI, DAG);
+ case Intrinsic::aarch64_sve_cmplt_wide:
+ return tryConvertSVEWideCompare(N, Intrinsic::aarch64_sve_cmpgt,
+ true, DCI, DAG);
+ case Intrinsic::aarch64_sve_cmple_wide:
+ return tryConvertSVEWideCompare(N, Intrinsic::aarch64_sve_cmpge,
+ true, DCI, DAG);
+ case Intrinsic::aarch64_sve_cmphs_wide:
+ return tryConvertSVEWideCompare(N, Intrinsic::aarch64_sve_cmphs,
+ false, DCI, DAG);
+ case Intrinsic::aarch64_sve_cmphi_wide:
+ return tryConvertSVEWideCompare(N, Intrinsic::aarch64_sve_cmphi,
+ false, DCI, DAG);
+ case Intrinsic::aarch64_sve_cmplo_wide:
+ return tryConvertSVEWideCompare(N, Intrinsic::aarch64_sve_cmphi, true,
+ DCI, DAG);
+ case Intrinsic::aarch64_sve_cmpls_wide:
+ return tryConvertSVEWideCompare(N, Intrinsic::aarch64_sve_cmphs, true,
+ DCI, DAG);
+ case Intrinsic::aarch64_sve_ptest_any:
+ return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2),
+ AArch64CC::ANY_ACTIVE);
+ case Intrinsic::aarch64_sve_ptest_first:
+ return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2),
+ AArch64CC::FIRST_ACTIVE);
+ case Intrinsic::aarch64_sve_ptest_last:
+ return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2),
+ AArch64CC::LAST_ACTIVE);
}
return SDValue();
}
@@ -10652,6 +11165,48 @@ static SDValue splitStoreSplat(SelectionDAG &DAG, StoreSDNode &St,
return NewST1;
}
+static SDValue performLDNT1Combine(SDNode *N, SelectionDAG &DAG) {
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+ EVT PtrTy = N->getOperand(3).getValueType();
+
+ EVT LoadVT = VT;
+ if (VT.isFloatingPoint())
+ LoadVT = VT.changeTypeToInteger();
+
+ auto *MINode = cast<MemIntrinsicSDNode>(N);
+ SDValue PassThru = DAG.getConstant(0, DL, LoadVT);
+ SDValue L = DAG.getMaskedLoad(LoadVT, DL, MINode->getChain(),
+ MINode->getOperand(3), DAG.getUNDEF(PtrTy),
+ MINode->getOperand(2), PassThru,
+ MINode->getMemoryVT(), MINode->getMemOperand(),
+ ISD::UNINDEXED, ISD::NON_EXTLOAD, false);
+
+ if (VT.isFloatingPoint()) {
+ SDValue Ops[] = { DAG.getNode(ISD::BITCAST, DL, VT, L), L.getValue(1) };
+ return DAG.getMergeValues(Ops, DL);
+ }
+
+ return L;
+}
+
+static SDValue performSTNT1Combine(SDNode *N, SelectionDAG &DAG) {
+ SDLoc DL(N);
+
+ SDValue Data = N->getOperand(2);
+ EVT DataVT = Data.getValueType();
+ EVT PtrTy = N->getOperand(4).getValueType();
+
+ if (DataVT.isFloatingPoint())
+ Data = DAG.getNode(ISD::BITCAST, DL, DataVT.changeTypeToInteger(), Data);
+
+ auto *MINode = cast<MemIntrinsicSDNode>(N);
+ return DAG.getMaskedStore(MINode->getChain(), DL, Data, MINode->getOperand(4),
+ DAG.getUNDEF(PtrTy), MINode->getOperand(3),
+ MINode->getMemoryVT(), MINode->getMemOperand(),
+ ISD::UNINDEXED, false, false);
+}
+
/// Replace a splat of zeros to a vector store by scalar stores of WZR/XZR. The
/// load store optimizer pass will merge them to store pair stores. This should
/// be better than a movi to create the vector zero followed by a vector store
@@ -11703,6 +12258,215 @@ static SDValue performGlobalAddressCombine(SDNode *N, SelectionDAG &DAG,
DAG.getConstant(MinOffset, DL, MVT::i64));
}
+// Returns an SVE type that ContentTy can be trivially sign or zero extended
+// into.
+static MVT getSVEContainerType(EVT ContentTy) {
+ assert(ContentTy.isSimple() && "No SVE containers for extended types");
+
+ switch (ContentTy.getSimpleVT().SimpleTy) {
+ default:
+ llvm_unreachable("No known SVE container for this MVT type");
+ case MVT::nxv2i8:
+ case MVT::nxv2i16:
+ case MVT::nxv2i32:
+ case MVT::nxv2i64:
+ case MVT::nxv2f32:
+ case MVT::nxv2f64:
+ return MVT::nxv2i64;
+ case MVT::nxv4i8:
+ case MVT::nxv4i16:
+ case MVT::nxv4i32:
+ case MVT::nxv4f32:
+ return MVT::nxv4i32;
+ }
+}
+
+static SDValue performST1ScatterCombine(SDNode *N, SelectionDAG &DAG,
+ unsigned Opcode,
+ bool OnlyPackedOffsets = true) {
+ const SDValue Src = N->getOperand(2);
+ const EVT SrcVT = Src->getValueType(0);
+ assert(SrcVT.isScalableVector() &&
+ "Scatter stores are only possible for SVE vectors");
+
+ SDLoc DL(N);
+ MVT SrcElVT = SrcVT.getVectorElementType().getSimpleVT();
+
+ // Make sure that source data will fit into an SVE register
+ if (SrcVT.getSizeInBits().getKnownMinSize() > AArch64::SVEBitsPerBlock)
+ return SDValue();
+
+ // For FPs, ACLE only supports _packed_ single and double precision types.
+ if (SrcElVT.isFloatingPoint())
+ if ((SrcVT != MVT::nxv4f32) && (SrcVT != MVT::nxv2f64))
+ return SDValue();
+
+ // Depending on the addressing mode, this is either a pointer or a vector of
+ // pointers (that fits into one register)
+ const SDValue Base = N->getOperand(4);
+ // Depending on the addressing mode, this is either a single offset or a
+ // vector of offsets (that fits into one register)
+ SDValue Offset = N->getOperand(5);
+
+ auto &TLI = DAG.getTargetLoweringInfo();
+ if (!TLI.isTypeLegal(Base.getValueType()))
+ return SDValue();
+
+ // Some scatter store variants allow unpacked offsets, but only as nxv2i32
+ // vectors. These are implicitly sign (sxtw) or zero (zxtw) extend to
+ // nxv2i64. Legalize accordingly.
+ if (!OnlyPackedOffsets &&
+ Offset.getValueType().getSimpleVT().SimpleTy == MVT::nxv2i32)
+ Offset = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::nxv2i64, Offset).getValue(0);
+
+ if (!TLI.isTypeLegal(Offset.getValueType()))
+ return SDValue();
+
+ // Source value type that is representable in hardware
+ EVT HwSrcVt = getSVEContainerType(SrcVT);
+
+ // Keep the original type of the input data to store - this is needed to
+ // differentiate between ST1B, ST1H, ST1W and ST1D. For FP values we want the
+ // integer equivalent, so just use HwSrcVt.
+ SDValue InputVT = DAG.getValueType(SrcVT);
+ if (SrcVT.isFloatingPoint())
+ InputVT = DAG.getValueType(HwSrcVt);
+
+ SDVTList VTs = DAG.getVTList(MVT::Other);
+ SDValue SrcNew;
+
+ if (Src.getValueType().isFloatingPoint())
+ SrcNew = DAG.getNode(ISD::BITCAST, DL, HwSrcVt, Src);
+ else
+ SrcNew = DAG.getNode(ISD::ANY_EXTEND, DL, HwSrcVt, Src);
+
+ SDValue Ops[] = {N->getOperand(0), // Chain
+ SrcNew,
+ N->getOperand(3), // Pg
+ Base,
+ Offset,
+ InputVT};
+
+ return DAG.getNode(Opcode, DL, VTs, Ops);
+}
+
+static SDValue performLD1GatherCombine(SDNode *N, SelectionDAG &DAG,
+ unsigned Opcode,
+ bool OnlyPackedOffsets = true) {
+ EVT RetVT = N->getValueType(0);
+ assert(RetVT.isScalableVector() &&
+ "Gather loads are only possible for SVE vectors");
+ SDLoc DL(N);
+
+ if (RetVT.getSizeInBits().getKnownMinSize() > AArch64::SVEBitsPerBlock)
+ return SDValue();
+
+ // Depending on the addressing mode, this is either a pointer or a vector of
+ // pointers (that fits into one register)
+ const SDValue Base = N->getOperand(3);
+ // Depending on the addressing mode, this is either a single offset or a
+ // vector of offsets (that fits into one register)
+ SDValue Offset = N->getOperand(4);
+
+ auto &TLI = DAG.getTargetLoweringInfo();
+ if (!TLI.isTypeLegal(Base.getValueType()))
+ return SDValue();
+
+ // Some gather load variants allow unpacked offsets, but only as nxv2i32
+ // vectors. These are implicitly sign (sxtw) or zero (zxtw) extend to
+ // nxv2i64. Legalize accordingly.
+ if (!OnlyPackedOffsets &&
+ Offset.getValueType().getSimpleVT().SimpleTy == MVT::nxv2i32)
+ Offset = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::nxv2i64, Offset).getValue(0);
+
+ // Return value type that is representable in hardware
+ EVT HwRetVt = getSVEContainerType(RetVT);
+
+ // Keep the original output value type around - this will better inform
+ // optimisations (e.g. instruction folding when load is followed by
+ // zext/sext). This will only be used for ints, so the value for FPs
+ // doesn't matter.
+ SDValue OutVT = DAG.getValueType(RetVT);
+ if (RetVT.isFloatingPoint())
+ OutVT = DAG.getValueType(HwRetVt);
+
+ SDVTList VTs = DAG.getVTList(HwRetVt, MVT::Other);
+ SDValue Ops[] = {N->getOperand(0), // Chain
+ N->getOperand(2), // Pg
+ Base, Offset, OutVT};
+
+ SDValue Load = DAG.getNode(Opcode, DL, VTs, Ops);
+ SDValue LoadChain = SDValue(Load.getNode(), 1);
+
+ if (RetVT.isInteger() && (RetVT != HwRetVt))
+ Load = DAG.getNode(ISD::TRUNCATE, DL, RetVT, Load.getValue(0));
+
+ // If the original return value was FP, bitcast accordingly. Doing it here
+ // means that we can avoid adding TableGen patterns for FPs.
+ if (RetVT.isFloatingPoint())
+ Load = DAG.getNode(ISD::BITCAST, DL, RetVT, Load.getValue(0));
+
+ return DAG.getMergeValues({Load, LoadChain}, DL);
+}
+
+
+static SDValue
+performSignExtendInRegCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
+ SelectionDAG &DAG) {
+ if (DCI.isBeforeLegalizeOps())
+ return SDValue();
+
+ SDValue Src = N->getOperand(0);
+ unsigned Opc = Src->getOpcode();
+
+ // Gather load nodes (e.g. AArch64ISD::GLD1) are straightforward candidates
+ // for DAG Combine with SIGN_EXTEND_INREG. Bail out for all other nodes.
+ unsigned NewOpc;
+ switch (Opc) {
+ case AArch64ISD::GLD1:
+ NewOpc = AArch64ISD::GLD1S;
+ break;
+ case AArch64ISD::GLD1_SCALED:
+ NewOpc = AArch64ISD::GLD1S_SCALED;
+ break;
+ case AArch64ISD::GLD1_SXTW:
+ NewOpc = AArch64ISD::GLD1S_SXTW;
+ break;
+ case AArch64ISD::GLD1_SXTW_SCALED:
+ NewOpc = AArch64ISD::GLD1S_SXTW_SCALED;
+ break;
+ case AArch64ISD::GLD1_UXTW:
+ NewOpc = AArch64ISD::GLD1S_UXTW;
+ break;
+ case AArch64ISD::GLD1_UXTW_SCALED:
+ NewOpc = AArch64ISD::GLD1S_UXTW_SCALED;
+ break;
+ case AArch64ISD::GLD1_IMM:
+ NewOpc = AArch64ISD::GLD1S_IMM;
+ break;
+ default:
+ return SDValue();
+ }
+
+ EVT SignExtSrcVT = cast<VTSDNode>(N->getOperand(1))->getVT();
+ EVT GLD1SrcMemVT = cast<VTSDNode>(Src->getOperand(4))->getVT();
+
+ if ((SignExtSrcVT != GLD1SrcMemVT) || !Src.hasOneUse())
+ return SDValue();
+
+ EVT DstVT = N->getValueType(0);
+ SDVTList VTs = DAG.getVTList(DstVT, MVT::Other);
+ SDValue Ops[] = {Src->getOperand(0), Src->getOperand(1), Src->getOperand(2),
+ Src->getOperand(3), Src->getOperand(4)};
+
+ SDValue ExtLoad = DAG.getNode(NewOpc, SDLoc(N), VTs, Ops);
+ DCI.CombineTo(N, ExtLoad);
+ DCI.CombineTo(Src.getNode(), ExtLoad, ExtLoad.getValue(1));
+
+ // Return N so it doesn't get rechecked
+ return SDValue(N, 0);
+}
+
SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -11737,8 +12501,8 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::ZERO_EXTEND:
case ISD::SIGN_EXTEND:
return performExtendCombine(N, DCI, DAG);
- case ISD::BITCAST:
- return performBitcastCombine(N, DCI, DAG);
+ case ISD::SIGN_EXTEND_INREG:
+ return performSignExtendInRegCombine(N, DCI, DAG);
case ISD::CONCAT_VECTORS:
return performConcatVectorsCombine(N, DCI, DAG);
case ISD::SELECT:
@@ -11789,6 +12553,46 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
case Intrinsic::aarch64_neon_st3lane:
case Intrinsic::aarch64_neon_st4lane:
return performNEONPostLDSTCombine(N, DCI, DAG);
+ case Intrinsic::aarch64_sve_ldnt1:
+ return performLDNT1Combine(N, DAG);
+ case Intrinsic::aarch64_sve_stnt1:
+ return performSTNT1Combine(N, DAG);
+ case Intrinsic::aarch64_sve_ld1_gather:
+ return performLD1GatherCombine(N, DAG, AArch64ISD::GLD1);
+ case Intrinsic::aarch64_sve_ld1_gather_index:
+ return performLD1GatherCombine(N, DAG, AArch64ISD::GLD1_SCALED);
+ case Intrinsic::aarch64_sve_ld1_gather_sxtw:
+ return performLD1GatherCombine(N, DAG, AArch64ISD::GLD1_SXTW,
+ /*OnlyPackedOffsets=*/false);
+ case Intrinsic::aarch64_sve_ld1_gather_uxtw:
+ return performLD1GatherCombine(N, DAG, AArch64ISD::GLD1_UXTW,
+ /*OnlyPackedOffsets=*/false);
+ case Intrinsic::aarch64_sve_ld1_gather_sxtw_index:
+ return performLD1GatherCombine(N, DAG, AArch64ISD::GLD1_SXTW_SCALED,
+ /*OnlyPackedOffsets=*/false);
+ case Intrinsic::aarch64_sve_ld1_gather_uxtw_index:
+ return performLD1GatherCombine(N, DAG, AArch64ISD::GLD1_UXTW_SCALED,
+ /*OnlyPackedOffsets=*/false);
+ case Intrinsic::aarch64_sve_ld1_gather_imm:
+ return performLD1GatherCombine(N, DAG, AArch64ISD::GLD1_IMM);
+ case Intrinsic::aarch64_sve_st1_scatter:
+ return performST1ScatterCombine(N, DAG, AArch64ISD::SST1);
+ case Intrinsic::aarch64_sve_st1_scatter_index:
+ return performST1ScatterCombine(N, DAG, AArch64ISD::SST1_SCALED);
+ case Intrinsic::aarch64_sve_st1_scatter_sxtw:
+ return performST1ScatterCombine(N, DAG, AArch64ISD::SST1_SXTW,
+ /*OnlyPackedOffsets=*/false);
+ case Intrinsic::aarch64_sve_st1_scatter_uxtw:
+ return performST1ScatterCombine(N, DAG, AArch64ISD::SST1_UXTW,
+ /*OnlyPackedOffsets=*/false);
+ case Intrinsic::aarch64_sve_st1_scatter_sxtw_index:
+ return performST1ScatterCombine(N, DAG, AArch64ISD::SST1_SXTW_SCALED,
+ /*OnlyPackedOffsets=*/false);
+ case Intrinsic::aarch64_sve_st1_scatter_uxtw_index:
+ return performST1ScatterCombine(N, DAG, AArch64ISD::SST1_UXTW_SCALED,
+ /*OnlyPackedOffsets=*/false);
+ case Intrinsic::aarch64_sve_st1_scatter_imm:
+ return performST1ScatterCombine(N, DAG, AArch64ISD::SST1_IMM);
default:
break;
}
@@ -12084,6 +12888,69 @@ void AArch64TargetLowering::ReplaceNodeResults(
case ISD::ATOMIC_CMP_SWAP:
ReplaceCMP_SWAP_128Results(N, Results, DAG, Subtarget);
return;
+ case ISD::LOAD: {
+ assert(SDValue(N, 0).getValueType() == MVT::i128 &&
+ "unexpected load's value type");
+ LoadSDNode *LoadNode = cast<LoadSDNode>(N);
+ if (!LoadNode->isVolatile() || LoadNode->getMemoryVT() != MVT::i128) {
+ // Non-volatile loads are optimized later in AArch64's load/store
+ // optimizer.
+ return;
+ }
+
+ SDValue Result = DAG.getMemIntrinsicNode(
+ AArch64ISD::LDP, SDLoc(N),
+ DAG.getVTList({MVT::i64, MVT::i64, MVT::Other}),
+ {LoadNode->getChain(), LoadNode->getBasePtr()}, LoadNode->getMemoryVT(),
+ LoadNode->getMemOperand());
+
+ SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, SDLoc(N), MVT::i128,
+ Result.getValue(0), Result.getValue(1));
+ Results.append({Pair, Result.getValue(2) /* Chain */});
+ return;
+ }
+ case ISD::INTRINSIC_WO_CHAIN: {
+ EVT VT = N->getValueType(0);
+ assert((VT == MVT::i8 || VT == MVT::i16) &&
+ "custom lowering for unexpected type");
+
+ ConstantSDNode *CN = cast<ConstantSDNode>(N->getOperand(0));
+ Intrinsic::ID IntID = static_cast<Intrinsic::ID>(CN->getZExtValue());
+ switch (IntID) {
+ default:
+ return;
+ case Intrinsic::aarch64_sve_clasta_n: {
+ SDLoc DL(N);
+ auto Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, N->getOperand(2));
+ auto V = DAG.getNode(AArch64ISD::CLASTA_N, DL, MVT::i32,
+ N->getOperand(1), Op2, N->getOperand(3));
+ Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, V));
+ return;
+ }
+ case Intrinsic::aarch64_sve_clastb_n: {
+ SDLoc DL(N);
+ auto Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, N->getOperand(2));
+ auto V = DAG.getNode(AArch64ISD::CLASTB_N, DL, MVT::i32,
+ N->getOperand(1), Op2, N->getOperand(3));
+ Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, V));
+ return;
+ }
+ case Intrinsic::aarch64_sve_lasta: {
+ SDLoc DL(N);
+ auto V = DAG.getNode(AArch64ISD::LASTA, DL, MVT::i32,
+ N->getOperand(1), N->getOperand(2));
+ Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, V));
+ return;
+ }
+ case Intrinsic::aarch64_sve_lastb: {
+ SDLoc DL(N);
+ auto V = DAG.getNode(AArch64ISD::LASTB, DL, MVT::i32,
+ N->getOperand(1), N->getOperand(2));
+ Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, V));
+ return;
+ }
+ }
+ }
}
}
@@ -12351,7 +13218,7 @@ bool AArch64TargetLowering::
bool AArch64TargetLowering::shouldExpandShift(SelectionDAG &DAG,
SDNode *N) const {
if (DAG.getMachineFunction().getFunction().hasMinSize() &&
- !Subtarget->isTargetWindows())
+ !Subtarget->isTargetWindows() && !Subtarget->isTargetDarwin())
return false;
return true;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 00fa96bc4e6d..672dfc4fcbc0 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -155,6 +155,14 @@ enum NodeType : unsigned {
SMAXV,
UMAXV,
+ SMAXV_PRED,
+ UMAXV_PRED,
+ SMINV_PRED,
+ UMINV_PRED,
+ ORV_PRED,
+ EORV_PRED,
+ ANDV_PRED,
+
// Vector bitwise negation
NOT,
@@ -196,6 +204,43 @@ enum NodeType : unsigned {
UUNPKHI,
UUNPKLO,
+ CLASTA_N,
+ CLASTB_N,
+ LASTA,
+ LASTB,
+ REV,
+ TBL,
+
+ INSR,
+ PTEST,
+ PTRUE,
+
+ // Unsigned gather loads.
+ GLD1,
+ GLD1_SCALED,
+ GLD1_UXTW,
+ GLD1_SXTW,
+ GLD1_UXTW_SCALED,
+ GLD1_SXTW_SCALED,
+ GLD1_IMM,
+
+ // Signed gather loads
+ GLD1S,
+ GLD1S_SCALED,
+ GLD1S_UXTW,
+ GLD1S_SXTW,
+ GLD1S_UXTW_SCALED,
+ GLD1S_SXTW_SCALED,
+ GLD1S_IMM,
+ // Scatter store
+ SST1,
+ SST1_SCALED,
+ SST1_UXTW,
+ SST1_SXTW,
+ SST1_UXTW_SCALED,
+ SST1_SXTW_SCALED,
+ SST1_IMM,
+
// NEON Load/Store with post-increment base updates
LD2post = ISD::FIRST_TARGET_MEMORY_OPCODE,
LD3post,
@@ -224,8 +269,10 @@ enum NodeType : unsigned {
STG,
STZG,
ST2G,
- STZ2G
+ STZ2G,
+ LDP,
+ STP
};
} // end namespace AArch64ISD
@@ -396,7 +443,9 @@ public:
/// Return true if an FMA operation is faster than a pair of fmul and fadd
/// instructions. fmuladd intrinsics will be expanded to FMAs when this method
/// returns true, otherwise fmuladd is expanded to fmul + fadd.
- bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
+ bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
+ EVT VT) const override;
+ bool isFMAFasterThanFMulAndFAdd(const Function &F, Type *Ty) const override;
const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
@@ -648,6 +697,8 @@ private:
SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDarwinGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerELFGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerELFTLSLocalExec(const GlobalValue *GV, SDValue ThreadBase,
+ const SDLoc &DL, SelectionDAG &DAG) const;
SDValue LowerELFTLSDescCallSeq(SDValue SymAddr, const SDLoc &DL,
SelectionDAG &DAG) const;
SDValue LowerWindowsGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
@@ -713,7 +764,7 @@ private:
unsigned combineRepeatedFPDivisors() const override;
ConstraintType getConstraintType(StringRef Constraint) const override;
- Register getRegisterByName(const char* RegName, EVT VT,
+ Register getRegisterByName(const char* RegName, LLT VT,
const MachineFunction &MF) const override;
/// Examine constraint string and operand type and determine a weight value.
@@ -741,6 +792,7 @@ private:
return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
}
+ bool isVectorLoadExtDesirable(SDValue ExtVal) const override;
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset,
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrAtomics.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
index 459b53923625..27e1d8ee6b98 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
@@ -15,9 +15,9 @@
//===----------------------------------
let AddedComplexity = 15, Size = 0 in
def CompilerBarrier : Pseudo<(outs), (ins i32imm:$ordering),
- [(atomic_fence imm:$ordering, 0)]>, Sched<[]>;
-def : Pat<(atomic_fence (i64 4), (imm)), (DMB (i32 0x9))>;
-def : Pat<(atomic_fence (imm), (imm)), (DMB (i32 0xb))>;
+ [(atomic_fence timm:$ordering, 0)]>, Sched<[]>;
+def : Pat<(atomic_fence (i64 4), (timm)), (DMB (i32 0x9))>;
+def : Pat<(atomic_fence (timm), (timm)), (DMB (i32 0xb))>;
//===----------------------------------
// Atomic loads
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index f555e4123307..c3efe03a0987 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -305,7 +305,7 @@ def simm9 : Operand<i64>, ImmLeaf<i64, [{ return Imm >= -256 && Imm < 256; }]> {
}
def SImm8Operand : SImmOperand<8>;
-def simm8 : Operand<i64>, ImmLeaf<i64, [{ return Imm >= -128 && Imm < 127; }]> {
+def simm8 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= -128 && Imm < 127; }]> {
let ParserMatchClass = SImm8Operand;
let DecoderMethod = "DecodeSImm<8>";
}
@@ -358,6 +358,16 @@ def am_indexed7s128 : ComplexPattern<i64, 2, "SelectAddrModeIndexed7S128", []>;
def am_indexedu6s128 : ComplexPattern<i64, 2, "SelectAddrModeIndexedU6S128", []>;
def am_indexeds9s128 : ComplexPattern<i64, 2, "SelectAddrModeIndexedS9S128", []>;
+def UImmS2XForm : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(N->getZExtValue() / 2, SDLoc(N), MVT::i64);
+}]>;
+def UImmS4XForm : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(N->getZExtValue() / 4, SDLoc(N), MVT::i64);
+}]>;
+def UImmS8XForm : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(N->getZExtValue() / 8, SDLoc(N), MVT::i64);
+}]>;
+
// uimm5sN predicate - True if the immediate is a multiple of N in the range
// [0 * N, 32 * N].
def UImm5s2Operand : UImmScaledMemoryIndexed<5, 2>;
@@ -365,17 +375,41 @@ def UImm5s4Operand : UImmScaledMemoryIndexed<5, 4>;
def UImm5s8Operand : UImmScaledMemoryIndexed<5, 8>;
def uimm5s2 : Operand<i64>, ImmLeaf<i64,
- [{ return Imm >= 0 && Imm < (32*2) && ((Imm % 2) == 0); }]> {
+ [{ return Imm >= 0 && Imm < (32*2) && ((Imm % 2) == 0); }],
+ UImmS2XForm> {
let ParserMatchClass = UImm5s2Operand;
let PrintMethod = "printImmScale<2>";
}
def uimm5s4 : Operand<i64>, ImmLeaf<i64,
- [{ return Imm >= 0 && Imm < (32*4) && ((Imm % 4) == 0); }]> {
+ [{ return Imm >= 0 && Imm < (32*4) && ((Imm % 4) == 0); }],
+ UImmS4XForm> {
let ParserMatchClass = UImm5s4Operand;
let PrintMethod = "printImmScale<4>";
}
def uimm5s8 : Operand<i64>, ImmLeaf<i64,
- [{ return Imm >= 0 && Imm < (32*8) && ((Imm % 8) == 0); }]> {
+ [{ return Imm >= 0 && Imm < (32*8) && ((Imm % 8) == 0); }],
+ UImmS8XForm> {
+ let ParserMatchClass = UImm5s8Operand;
+ let PrintMethod = "printImmScale<8>";
+}
+
+// tuimm5sN predicate - similiar to uimm5sN, but use TImmLeaf (TargetConstant)
+// instead of ImmLeaf (Constant)
+def tuimm5s2 : Operand<i64>, TImmLeaf<i64,
+ [{ return Imm >= 0 && Imm < (32*2) && ((Imm % 2) == 0); }],
+ UImmS2XForm> {
+ let ParserMatchClass = UImm5s2Operand;
+ let PrintMethod = "printImmScale<2>";
+}
+def tuimm5s4 : Operand<i64>, TImmLeaf<i64,
+ [{ return Imm >= 0 && Imm < (32*4) && ((Imm % 4) == 0); }],
+ UImmS4XForm> {
+ let ParserMatchClass = UImm5s4Operand;
+ let PrintMethod = "printImmScale<4>";
+}
+def tuimm5s8 : Operand<i64>, TImmLeaf<i64,
+ [{ return Imm >= 0 && Imm < (32*8) && ((Imm % 8) == 0); }],
+ UImmS8XForm> {
let ParserMatchClass = UImm5s8Operand;
let PrintMethod = "printImmScale<8>";
}
@@ -590,6 +624,30 @@ def vecshiftR64Narrow : Operand<i32>, ImmLeaf<i32, [{
let ParserMatchClass = Imm1_32Operand;
}
+// Same as vecshiftR#N, but use TargetConstant (TimmLeaf) instead of Constant
+// (ImmLeaf)
+def tvecshiftR8 : Operand<i32>, TImmLeaf<i32, [{
+ return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 9);
+}]> {
+ let EncoderMethod = "getVecShiftR8OpValue";
+ let DecoderMethod = "DecodeVecShiftR8Imm";
+ let ParserMatchClass = Imm1_8Operand;
+}
+def tvecshiftR16 : Operand<i32>, TImmLeaf<i32, [{
+ return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 17);
+}]> {
+ let EncoderMethod = "getVecShiftR16OpValue";
+ let DecoderMethod = "DecodeVecShiftR16Imm";
+ let ParserMatchClass = Imm1_16Operand;
+}
+def tvecshiftR32 : Operand<i32>, TImmLeaf<i32, [{
+ return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 33);
+}]> {
+ let EncoderMethod = "getVecShiftR32OpValue";
+ let DecoderMethod = "DecodeVecShiftR32Imm";
+ let ParserMatchClass = Imm1_32Operand;
+}
+
def Imm0_1Operand : AsmImmRange<0, 1>;
def Imm0_7Operand : AsmImmRange<0, 7>;
def Imm0_15Operand : AsmImmRange<0, 15>;
@@ -713,6 +771,13 @@ def imm0_127 : Operand<i32>, ImmLeaf<i32, [{
let PrintMethod = "printImm";
}
+def imm0_127_64b : Operand<i64>, ImmLeaf<i64, [{
+ return ((uint64_t)Imm) < 128;
+}]> {
+ let ParserMatchClass = Imm0_127Operand;
+ let PrintMethod = "printImm";
+}
+
// NOTE: These imm0_N operands have to be of type i64 because i64 is the size
// for all shift-amounts.
@@ -730,6 +795,14 @@ def imm0_31 : Operand<i64>, ImmLeaf<i64, [{
let ParserMatchClass = Imm0_31Operand;
}
+// timm0_31 predicate - same ass imm0_31, but use TargetConstant (TimmLeaf)
+// instead of Contant (ImmLeaf)
+def timm0_31 : Operand<i64>, TImmLeaf<i64, [{
+ return ((uint64_t)Imm) < 32;
+}]> {
+ let ParserMatchClass = Imm0_31Operand;
+}
+
// True if the 32-bit immediate is in the range [0,31]
def imm32_0_31 : Operand<i32>, ImmLeaf<i32, [{
return ((uint64_t)Imm) < 32;
@@ -758,6 +831,13 @@ def imm0_7 : Operand<i64>, ImmLeaf<i64, [{
let ParserMatchClass = Imm0_7Operand;
}
+// imm32_0_7 predicate - True if the 32-bit immediate is in the range [0,7]
+def imm32_0_7 : Operand<i32>, ImmLeaf<i32, [{
+ return ((uint32_t)Imm) < 8;
+}]> {
+ let ParserMatchClass = Imm0_7Operand;
+}
+
// imm32_0_15 predicate - True if the 32-bit immediate is in the range [0,15]
def imm32_0_15 : Operand<i32>, ImmLeaf<i32, [{
return ((uint32_t)Imm) < 16;
@@ -1403,6 +1483,7 @@ class RCPCLoad<bits<2> sz, string asm, RegisterClass RC>
class AuthBase<bits<1> M, dag oops, dag iops, string asm, string operands,
list<dag> pattern>
: I<oops, iops, asm, operands, "", pattern>, Sched<[]> {
+ let isAuthenticated = 1;
let Inst{31-25} = 0b1101011;
let Inst{20-11} = 0b1111100001;
let Inst{10} = M;
@@ -1427,6 +1508,7 @@ class AuthOneOperand<bits<3> opc, bits<1> M, string asm>
let Inst{9-5} = Rn;
}
+let Uses = [LR,SP] in
class AuthReturn<bits<3> op, bits<1> M, string asm>
: AuthBase<M, (outs), (ins), asm, "", []> {
let Inst{24} = 0;
@@ -1441,6 +1523,7 @@ class BaseAuthLoad<bit M, bit W, dag oops, dag iops, string asm,
bits<10> offset;
bits<5> Rn;
bits<5> Rt;
+ let isAuthenticated = 1;
let Inst{31-24} = 0b11111000;
let Inst{23} = M;
let Inst{22} = offset{9};
@@ -1463,6 +1546,9 @@ multiclass AuthLoad<bit M, string asm, Operand opr> {
def : InstAlias<asm # "\t$Rt, [$Rn]",
(!cast<Instruction>(NAME # "indexed") GPR64:$Rt, GPR64sp:$Rn, 0)>;
+
+ def : InstAlias<asm # "\t$Rt, [$wback]!",
+ (!cast<Instruction>(NAME # "writeback") GPR64sp:$wback, GPR64:$Rt, 0), 0>;
}
//---
@@ -3047,6 +3133,22 @@ def ro_Windexed32 : ComplexPattern<i64, 4, "SelectAddrModeWRO<32>", []>;
def ro_Windexed64 : ComplexPattern<i64, 4, "SelectAddrModeWRO<64>", []>;
def ro_Windexed128 : ComplexPattern<i64, 4, "SelectAddrModeWRO<128>", []>;
+def gi_ro_Windexed8 :
+ GIComplexOperandMatcher<s64, "selectAddrModeWRO<8>">,
+ GIComplexPatternEquiv<ro_Windexed8>;
+def gi_ro_Windexed16 :
+ GIComplexOperandMatcher<s64, "selectAddrModeWRO<16>">,
+ GIComplexPatternEquiv<ro_Windexed16>;
+def gi_ro_Windexed32 :
+ GIComplexOperandMatcher<s64, "selectAddrModeWRO<32>">,
+ GIComplexPatternEquiv<ro_Windexed32>;
+def gi_ro_Windexed64 :
+ GIComplexOperandMatcher<s64, "selectAddrModeWRO<64>">,
+ GIComplexPatternEquiv<ro_Windexed64>;
+def gi_ro_Windexed128 :
+ GIComplexOperandMatcher<s64, "selectAddrModeWRO<128>">,
+ GIComplexPatternEquiv<ro_Windexed128>;
+
class MemExtendOperand<string Reg, int Width> : AsmOperandClass {
let Name = "Mem" # Reg # "Extend" # Width;
let PredicateMethod = "isMem" # Reg # "Extend<" # Width # ">";
@@ -5066,6 +5168,24 @@ multiclass SIMDThreeSameVector<bit U, bits<5> opc, string asm,
[(set (v2i64 V128:$Rd), (OpNode (v2i64 V128:$Rn), (v2i64 V128:$Rm)))]>;
}
+multiclass SIMDThreeSameVectorExtraPatterns<string inst, SDPatternOperator OpNode> {
+ def : Pat<(v8i8 (OpNode V64:$LHS, V64:$RHS)),
+ (!cast<Instruction>(inst#"v8i8") V64:$LHS, V64:$RHS)>;
+ def : Pat<(v4i16 (OpNode V64:$LHS, V64:$RHS)),
+ (!cast<Instruction>(inst#"v4i16") V64:$LHS, V64:$RHS)>;
+ def : Pat<(v2i32 (OpNode V64:$LHS, V64:$RHS)),
+ (!cast<Instruction>(inst#"v2i32") V64:$LHS, V64:$RHS)>;
+
+ def : Pat<(v16i8 (OpNode V128:$LHS, V128:$RHS)),
+ (!cast<Instruction>(inst#"v16i8") V128:$LHS, V128:$RHS)>;
+ def : Pat<(v8i16 (OpNode V128:$LHS, V128:$RHS)),
+ (!cast<Instruction>(inst#"v8i16") V128:$LHS, V128:$RHS)>;
+ def : Pat<(v4i32 (OpNode V128:$LHS, V128:$RHS)),
+ (!cast<Instruction>(inst#"v4i32") V128:$LHS, V128:$RHS)>;
+ def : Pat<(v2i64 (OpNode V128:$LHS, V128:$RHS)),
+ (!cast<Instruction>(inst#"v2i64") V128:$LHS, V128:$RHS)>;
+}
+
// As above, but D sized elements unsupported.
multiclass SIMDThreeSameVectorBHS<bit U, bits<5> opc, string asm,
SDPatternOperator OpNode> {
@@ -10034,15 +10154,20 @@ class ComplexRotationOperand<int Angle, int Remainder, string Type>
let DiagnosticType = "InvalidComplexRotation" # Type;
let Name = "ComplexRotation" # Type;
}
-def complexrotateop : Operand<i32> {
+def complexrotateop : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm <= 270; }],
+ SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant((N->getSExtValue() / 90), SDLoc(N), MVT::i32);
+}]>> {
let ParserMatchClass = ComplexRotationOperand<90, 0, "Even">;
let PrintMethod = "printComplexRotationOp<90, 0>";
}
-def complexrotateopodd : Operand<i32> {
+def complexrotateopodd : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm <= 270; }],
+ SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(((N->getSExtValue() - 90) / 180), SDLoc(N), MVT::i32);
+}]>> {
let ParserMatchClass = ComplexRotationOperand<180, 90, "Odd">;
let PrintMethod = "printComplexRotationOp<180, 90>";
}
-
let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
class BaseSIMDThreeSameVectorComplex<bit Q, bit U, bits<2> size, bits<3> opcode,
RegisterOperand regtype, Operand rottype,
@@ -10373,9 +10498,9 @@ class CryptoRRTied<bits<1>op0, bits<2>op1, string asm, string asmops>
let Inst{11-10} = op1;
}
class CryptoRRTied_2D<bits<1>op0, bits<2>op1, string asm>
- : CryptoRRTied<op0, op1, asm, "{\t$Vd.2d, $Vn.2d}">;
+ : CryptoRRTied<op0, op1, asm, "{\t$Vd.2d, $Vn.2d|.2d\t$Vd, $Vn}">;
class CryptoRRTied_4S<bits<1>op0, bits<2>op1, string asm>
- : CryptoRRTied<op0, op1, asm, "{\t$Vd.4s, $Vn.4s}">;
+ : CryptoRRTied<op0, op1, asm, "{\t$Vd.4s, $Vn.4s|.4s\t$Vd, $Vn}">;
class CryptoRRR<bits<1> op0, bits<2>op1, dag oops, dag iops, string asm,
string asmops, string cst>
@@ -10390,19 +10515,19 @@ class CryptoRRR<bits<1> op0, bits<2>op1, dag oops, dag iops, string asm,
}
class CryptoRRR_2D<bits<1> op0, bits<2>op1, string asm>
: CryptoRRR<op0, op1, (outs V128:$Vd), (ins V128:$Vn, V128:$Vm), asm,
- "{\t$Vd.2d, $Vn.2d, $Vm.2d}", "">;
+ "{\t$Vd.2d, $Vn.2d, $Vm.2d|.2d\t$Vd, $Vn, $Vm}", "">;
class CryptoRRRTied_2D<bits<1> op0, bits<2>op1, string asm>
: CryptoRRR<op0, op1, (outs V128:$Vdst), (ins V128:$Vd, V128:$Vn, V128:$Vm), asm,
- "{\t$Vd.2d, $Vn.2d, $Vm.2d}", "$Vd = $Vdst">;
+ "{\t$Vd.2d, $Vn.2d, $Vm.2d|.2d\t$Vd, $Vn, $Vm}", "$Vd = $Vdst">;
class CryptoRRR_4S<bits<1> op0, bits<2>op1, string asm>
: CryptoRRR<op0, op1, (outs V128:$Vd), (ins V128:$Vn, V128:$Vm), asm,
- "{\t$Vd.4s, $Vn.4s, $Vm.4s}", "">;
+ "{\t$Vd.4s, $Vn.4s, $Vm.4s|.4s\t$Vd, $Vn, $Vm}", "">;
class CryptoRRRTied_4S<bits<1> op0, bits<2>op1, string asm>
: CryptoRRR<op0, op1, (outs V128:$Vdst), (ins V128:$Vd, V128:$Vn, V128:$Vm), asm,
- "{\t$Vd.4s, $Vn.4s, $Vm.4s}", "$Vd = $Vdst">;
+ "{\t$Vd.4s, $Vn.4s, $Vm.4s|.4s\t$Vd, $Vn, $Vm}", "$Vd = $Vdst">;
class CryptoRRRTied<bits<1> op0, bits<2>op1, string asm>
: CryptoRRR<op0, op1, (outs FPR128:$Vdst), (ins FPR128:$Vd, FPR128:$Vn, V128:$Vm),
- asm, "{\t$Vd, $Vn, $Vm.2d}", "$Vd = $Vdst">;
+ asm, "{\t$Vd, $Vn, $Vm.2d|.2d\t$Vd, $Vn, $Vm}", "$Vd = $Vdst">;
class CryptoRRRR<bits<2>op0, string asm, string asmops>
: BaseCryptoV82<(outs V128:$Vd), (ins V128:$Vn, V128:$Vm, V128:$Va), asm,
@@ -10416,15 +10541,18 @@ class CryptoRRRR<bits<2>op0, string asm, string asmops>
let Inst{14-10} = Va;
}
class CryptoRRRR_16B<bits<2>op0, string asm>
- : CryptoRRRR<op0, asm, "{\t$Vd.16b, $Vn.16b, $Vm.16b, $Va.16b}"> {
+ : CryptoRRRR<op0, asm, "{\t$Vd.16b, $Vn.16b, $Vm.16b, $Va.16b" #
+ "|.16b\t$Vd, $Vn, $Vm, $Va}"> {
}
class CryptoRRRR_4S<bits<2>op0, string asm>
- : CryptoRRRR<op0, asm, "{\t$Vd.4s, $Vn.4s, $Vm.4s, $Va.4s}"> {
+ : CryptoRRRR<op0, asm, "{\t$Vd.4s, $Vn.4s, $Vm.4s, $Va.4s" #
+ "|.4s\t$Vd, $Vn, $Vm, $Va}"> {
}
class CryptoRRRi6<string asm>
: BaseCryptoV82<(outs V128:$Vd), (ins V128:$Vn, V128:$Vm, uimm6:$imm), asm,
- "{\t$Vd.2d, $Vn.2d, $Vm.2d, $imm}", "", []> {
+ "{\t$Vd.2d, $Vn.2d, $Vm.2d, $imm" #
+ "|.2d\t$Vd, $Vn, $Vm, $imm}", "", []> {
bits<6> imm;
bits<5> Vm;
let Inst{24-21} = 0b0100;
@@ -10437,7 +10565,8 @@ class CryptoRRRi6<string asm>
class CryptoRRRi2Tied<bits<1>op0, bits<2>op1, string asm>
: BaseCryptoV82<(outs V128:$Vdst),
(ins V128:$Vd, V128:$Vn, V128:$Vm, VectorIndexS:$imm),
- asm, "{\t$Vd.4s, $Vn.4s, $Vm.s$imm}", "$Vd = $Vdst", []> {
+ asm, "{\t$Vd.4s, $Vn.4s, $Vm.s$imm" #
+ "|.4s\t$Vd, $Vn, $Vm$imm}", "$Vd = $Vdst", []> {
bits<2> imm;
bits<5> Vm;
let Inst{24-21} = 0b0010;
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 5c35e5bcdd30..54f3f7c10132 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -30,6 +30,7 @@
#include "llvm/CodeGen/StackMaps.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/MC/MCAsmInfo.h"
@@ -1981,6 +1982,9 @@ bool AArch64InstrInfo::getMemOperandWithOffset(const MachineInstr &LdSt,
const MachineOperand *&BaseOp,
int64_t &Offset,
const TargetRegisterInfo *TRI) const {
+ if (!LdSt.mayLoadOrStore())
+ return false;
+
unsigned Width;
return getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, Width, TRI);
}
@@ -2025,9 +2029,8 @@ bool AArch64InstrInfo::getMemOperandWithOffsetWidth(
Offset = LdSt.getOperand(3).getImm() * Scale;
}
- assert((BaseOp->isReg() || BaseOp->isFI()) &&
- "getMemOperandWithOffset only supports base "
- "operands of type register or frame index.");
+ if (!BaseOp->isReg() && !BaseOp->isFI())
+ return false;
return true;
}
@@ -2185,12 +2188,19 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale,
MaxOffset = 4095;
break;
case AArch64::ADDG:
- case AArch64::TAGPstack:
Scale = 16;
Width = 0;
MinOffset = 0;
MaxOffset = 63;
break;
+ case AArch64::TAGPstack:
+ Scale = 16;
+ Width = 0;
+ // TAGP with a negative offset turns into SUBP, which has a maximum offset
+ // of 63 (not 64!).
+ MinOffset = -63;
+ MaxOffset = 63;
+ break;
case AArch64::LDG:
case AArch64::STGOffset:
case AArch64::STZGOffset:
@@ -2227,54 +2237,82 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale,
return true;
}
-static unsigned getOffsetStride(unsigned Opc) {
+// Scaling factor for unscaled load or store.
+int AArch64InstrInfo::getMemScale(unsigned Opc) {
switch (Opc) {
default:
- return 0;
- case AArch64::LDURQi:
- case AArch64::STURQi:
- return 16;
- case AArch64::LDURXi:
- case AArch64::LDURDi:
- case AArch64::STURXi:
- case AArch64::STURDi:
- return 8;
- case AArch64::LDURWi:
+ llvm_unreachable("Opcode has unknown scale!");
+ case AArch64::LDRBBui:
+ case AArch64::LDURBBi:
+ case AArch64::LDRSBWui:
+ case AArch64::LDURSBWi:
+ case AArch64::STRBBui:
+ case AArch64::STURBBi:
+ return 1;
+ case AArch64::LDRHHui:
+ case AArch64::LDURHHi:
+ case AArch64::LDRSHWui:
+ case AArch64::LDURSHWi:
+ case AArch64::STRHHui:
+ case AArch64::STURHHi:
+ return 2;
+ case AArch64::LDRSui:
case AArch64::LDURSi:
+ case AArch64::LDRSWui:
case AArch64::LDURSWi:
- case AArch64::STURWi:
+ case AArch64::LDRWui:
+ case AArch64::LDURWi:
+ case AArch64::STRSui:
case AArch64::STURSi:
+ case AArch64::STRWui:
+ case AArch64::STURWi:
+ case AArch64::LDPSi:
+ case AArch64::LDPSWi:
+ case AArch64::LDPWi:
+ case AArch64::STPSi:
+ case AArch64::STPWi:
return 4;
+ case AArch64::LDRDui:
+ case AArch64::LDURDi:
+ case AArch64::LDRXui:
+ case AArch64::LDURXi:
+ case AArch64::STRDui:
+ case AArch64::STURDi:
+ case AArch64::STRXui:
+ case AArch64::STURXi:
+ case AArch64::LDPDi:
+ case AArch64::LDPXi:
+ case AArch64::STPDi:
+ case AArch64::STPXi:
+ return 8;
+ case AArch64::LDRQui:
+ case AArch64::LDURQi:
+ case AArch64::STRQui:
+ case AArch64::STURQi:
+ case AArch64::LDPQi:
+ case AArch64::STPQi:
+ case AArch64::STGOffset:
+ case AArch64::STZGOffset:
+ case AArch64::ST2GOffset:
+ case AArch64::STZ2GOffset:
+ case AArch64::STGPi:
+ return 16;
}
}
// Scale the unscaled offsets. Returns false if the unscaled offset can't be
// scaled.
static bool scaleOffset(unsigned Opc, int64_t &Offset) {
- unsigned OffsetStride = getOffsetStride(Opc);
- if (OffsetStride == 0)
- return false;
+ int Scale = AArch64InstrInfo::getMemScale(Opc);
+
// If the byte-offset isn't a multiple of the stride, we can't scale this
// offset.
- if (Offset % OffsetStride != 0)
+ if (Offset % Scale != 0)
return false;
// Convert the byte-offset used by unscaled into an "element" offset used
// by the scaled pair load/store instructions.
- Offset /= OffsetStride;
- return true;
-}
-
-// Unscale the scaled offsets. Returns false if the scaled offset can't be
-// unscaled.
-static bool unscaleOffset(unsigned Opc, int64_t &Offset) {
- unsigned OffsetStride = getOffsetStride(Opc);
- if (OffsetStride == 0)
- return false;
-
- // Convert the "element" offset used by scaled pair load/store instructions
- // into the byte-offset used by unscaled.
- Offset *= OffsetStride;
+ Offset /= Scale;
return true;
}
@@ -2305,15 +2343,17 @@ static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1,
int64_t ObjectOffset1 = MFI.getObjectOffset(FI1);
int64_t ObjectOffset2 = MFI.getObjectOffset(FI2);
assert(ObjectOffset1 <= ObjectOffset2 && "Object offsets are not ordered.");
- // Get the byte-offset from the object offset.
- if (!unscaleOffset(Opcode1, Offset1) || !unscaleOffset(Opcode2, Offset2))
+ // Convert to scaled object offsets.
+ int Scale1 = AArch64InstrInfo::getMemScale(Opcode1);
+ if (ObjectOffset1 % Scale1 != 0)
return false;
+ ObjectOffset1 /= Scale1;
+ int Scale2 = AArch64InstrInfo::getMemScale(Opcode2);
+ if (ObjectOffset2 % Scale2 != 0)
+ return false;
+ ObjectOffset2 /= Scale2;
ObjectOffset1 += Offset1;
ObjectOffset2 += Offset2;
- // Get the "element" index in the object.
- if (!scaleOffset(Opcode1, ObjectOffset1) ||
- !scaleOffset(Opcode2, ObjectOffset2))
- return false;
return ObjectOffset1 + 1 == ObjectOffset2;
}
@@ -2373,7 +2413,7 @@ bool AArch64InstrInfo::shouldClusterMemOps(const MachineOperand &BaseOp1,
// The caller should already have ordered First/SecondLdSt by offset.
// Note: except for non-equal frame index bases
if (BaseOp1.isFI()) {
- assert((!BaseOp1.isIdenticalTo(BaseOp2) || Offset1 >= Offset2) &&
+ assert((!BaseOp1.isIdenticalTo(BaseOp2) || Offset1 <= Offset2) &&
"Caller should have ordered offsets.");
const MachineFrameInfo &MFI =
@@ -2382,8 +2422,7 @@ bool AArch64InstrInfo::shouldClusterMemOps(const MachineOperand &BaseOp1,
BaseOp2.getIndex(), Offset2, SecondOpc);
}
- assert((!BaseOp1.isIdenticalTo(BaseOp2) || Offset1 <= Offset2) &&
- "Caller should have ordered offsets.");
+ assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");
return Offset1 + 1 == Offset2;
}
@@ -2409,8 +2448,8 @@ static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
void AArch64InstrInfo::copyPhysRegTuple(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
- const DebugLoc &DL, unsigned DestReg,
- unsigned SrcReg, bool KillSrc,
+ const DebugLoc &DL, MCRegister DestReg,
+ MCRegister SrcReg, bool KillSrc,
unsigned Opcode,
ArrayRef<unsigned> Indices) const {
assert(Subtarget.hasNEON() && "Unexpected register copy without NEON");
@@ -2461,8 +2500,8 @@ void AArch64InstrInfo::copyGPRRegTuple(MachineBasicBlock &MBB,
void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
- const DebugLoc &DL, unsigned DestReg,
- unsigned SrcReg, bool KillSrc) const {
+ const DebugLoc &DL, MCRegister DestReg,
+ MCRegister SrcReg, bool KillSrc) const {
if (AArch64::GPR32spRegClass.contains(DestReg) &&
(AArch64::GPR32spRegClass.contains(SrcReg) || SrcReg == AArch64::WZR)) {
const TargetRegisterInfo *TRI = &getRegisterInfo();
@@ -2471,10 +2510,10 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
// If either operand is WSP, expand to ADD #0.
if (Subtarget.hasZeroCycleRegMove()) {
// Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move.
- unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32,
- &AArch64::GPR64spRegClass);
- unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32,
- &AArch64::GPR64spRegClass);
+ MCRegister DestRegX = TRI->getMatchingSuperReg(
+ DestReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
+ MCRegister SrcRegX = TRI->getMatchingSuperReg(
+ SrcReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
// This instruction is reading and writing X registers. This may upset
// the register scavenger and machine verifier, so we need to indicate
// that we are reading an undefined value from SrcRegX, but a proper
@@ -2497,10 +2536,10 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
} else {
if (Subtarget.hasZeroCycleRegMove()) {
// Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
- unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32,
- &AArch64::GPR64spRegClass);
- unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32,
- &AArch64::GPR64spRegClass);
+ MCRegister DestRegX = TRI->getMatchingSuperReg(
+ DestReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
+ MCRegister SrcRegX = TRI->getMatchingSuperReg(
+ SrcReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
// This instruction is reading and writing X registers. This may upset
// the register scavenger and machine verifier, so we need to indicate
// that we are reading an undefined value from SrcRegX, but a proper
@@ -2897,7 +2936,18 @@ void AArch64InstrInfo::storeRegToStackSlot(
}
break;
}
+ unsigned StackID = TargetStackID::Default;
+ if (AArch64::PPRRegClass.hasSubClassEq(RC)) {
+ assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
+ Opc = AArch64::STR_PXI;
+ StackID = TargetStackID::SVEVector;
+ } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
+ assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
+ Opc = AArch64::STR_ZXI;
+ StackID = TargetStackID::SVEVector;
+ }
assert(Opc && "Unknown register class");
+ MFI.setStackID(FI, StackID);
const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DebugLoc(), get(Opc))
.addReg(SrcReg, getKillRegState(isKill))
@@ -3028,7 +3078,19 @@ void AArch64InstrInfo::loadRegFromStackSlot(
}
break;
}
+
+ unsigned StackID = TargetStackID::Default;
+ if (AArch64::PPRRegClass.hasSubClassEq(RC)) {
+ assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
+ Opc = AArch64::LDR_PXI;
+ StackID = TargetStackID::SVEVector;
+ } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
+ assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
+ Opc = AArch64::LDR_ZXI;
+ StackID = TargetStackID::SVEVector;
+ }
assert(Opc && "Unknown register class");
+ MFI.setStackID(FI, StackID);
const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DebugLoc(), get(Opc))
.addReg(DestReg, getDefRegState(true))
@@ -3085,7 +3147,7 @@ static void emitFrameOffsetAdj(MachineBasicBlock &MBB,
const unsigned MaxEncodableValue = MaxEncoding << ShiftSize;
do {
- unsigned ThisVal = std::min<unsigned>(Offset, MaxEncodableValue);
+ uint64_t ThisVal = std::min<uint64_t>(Offset, MaxEncodableValue);
unsigned LocalShiftSize = 0;
if (ThisVal > MaxEncoding) {
ThisVal = ThisVal >> ShiftSize;
@@ -3548,6 +3610,18 @@ static bool isCombineInstrCandidate64(unsigned Opc) {
// Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
case AArch64::SUBXri:
case AArch64::SUBSXri:
+ case AArch64::ADDv8i8:
+ case AArch64::ADDv16i8:
+ case AArch64::ADDv4i16:
+ case AArch64::ADDv8i16:
+ case AArch64::ADDv2i32:
+ case AArch64::ADDv4i32:
+ case AArch64::SUBv8i8:
+ case AArch64::SUBv16i8:
+ case AArch64::SUBv4i16:
+ case AArch64::SUBv8i16:
+ case AArch64::SUBv2i32:
+ case AArch64::SUBv4i32:
return true;
default:
break;
@@ -3690,6 +3764,13 @@ static bool getMaddPatterns(MachineInstr &Root,
}
};
+ auto setVFound = [&](int Opcode, int Operand, MachineCombinerPattern Pattern) {
+ if (canCombine(MBB, Root.getOperand(Operand), Opcode)) {
+ Patterns.push_back(Pattern);
+ Found = true;
+ }
+ };
+
typedef MachineCombinerPattern MCP;
switch (Opc) {
@@ -3725,6 +3806,70 @@ static bool getMaddPatterns(MachineInstr &Root,
case AArch64::SUBXri:
setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULSUBXI_OP1);
break;
+ case AArch64::ADDv8i8:
+ setVFound(AArch64::MULv8i8, 1, MCP::MULADDv8i8_OP1);
+ setVFound(AArch64::MULv8i8, 2, MCP::MULADDv8i8_OP2);
+ break;
+ case AArch64::ADDv16i8:
+ setVFound(AArch64::MULv16i8, 1, MCP::MULADDv16i8_OP1);
+ setVFound(AArch64::MULv16i8, 2, MCP::MULADDv16i8_OP2);
+ break;
+ case AArch64::ADDv4i16:
+ setVFound(AArch64::MULv4i16, 1, MCP::MULADDv4i16_OP1);
+ setVFound(AArch64::MULv4i16, 2, MCP::MULADDv4i16_OP2);
+ setVFound(AArch64::MULv4i16_indexed, 1, MCP::MULADDv4i16_indexed_OP1);
+ setVFound(AArch64::MULv4i16_indexed, 2, MCP::MULADDv4i16_indexed_OP2);
+ break;
+ case AArch64::ADDv8i16:
+ setVFound(AArch64::MULv8i16, 1, MCP::MULADDv8i16_OP1);
+ setVFound(AArch64::MULv8i16, 2, MCP::MULADDv8i16_OP2);
+ setVFound(AArch64::MULv8i16_indexed, 1, MCP::MULADDv8i16_indexed_OP1);
+ setVFound(AArch64::MULv8i16_indexed, 2, MCP::MULADDv8i16_indexed_OP2);
+ break;
+ case AArch64::ADDv2i32:
+ setVFound(AArch64::MULv2i32, 1, MCP::MULADDv2i32_OP1);
+ setVFound(AArch64::MULv2i32, 2, MCP::MULADDv2i32_OP2);
+ setVFound(AArch64::MULv2i32_indexed, 1, MCP::MULADDv2i32_indexed_OP1);
+ setVFound(AArch64::MULv2i32_indexed, 2, MCP::MULADDv2i32_indexed_OP2);
+ break;
+ case AArch64::ADDv4i32:
+ setVFound(AArch64::MULv4i32, 1, MCP::MULADDv4i32_OP1);
+ setVFound(AArch64::MULv4i32, 2, MCP::MULADDv4i32_OP2);
+ setVFound(AArch64::MULv4i32_indexed, 1, MCP::MULADDv4i32_indexed_OP1);
+ setVFound(AArch64::MULv4i32_indexed, 2, MCP::MULADDv4i32_indexed_OP2);
+ break;
+ case AArch64::SUBv8i8:
+ setVFound(AArch64::MULv8i8, 1, MCP::MULSUBv8i8_OP1);
+ setVFound(AArch64::MULv8i8, 2, MCP::MULSUBv8i8_OP2);
+ break;
+ case AArch64::SUBv16i8:
+ setVFound(AArch64::MULv16i8, 1, MCP::MULSUBv16i8_OP1);
+ setVFound(AArch64::MULv16i8, 2, MCP::MULSUBv16i8_OP2);
+ break;
+ case AArch64::SUBv4i16:
+ setVFound(AArch64::MULv4i16, 1, MCP::MULSUBv4i16_OP1);
+ setVFound(AArch64::MULv4i16, 2, MCP::MULSUBv4i16_OP2);
+ setVFound(AArch64::MULv4i16_indexed, 1, MCP::MULSUBv4i16_indexed_OP1);
+ setVFound(AArch64::MULv4i16_indexed, 2, MCP::MULSUBv4i16_indexed_OP2);
+ break;
+ case AArch64::SUBv8i16:
+ setVFound(AArch64::MULv8i16, 1, MCP::MULSUBv8i16_OP1);
+ setVFound(AArch64::MULv8i16, 2, MCP::MULSUBv8i16_OP2);
+ setVFound(AArch64::MULv8i16_indexed, 1, MCP::MULSUBv8i16_indexed_OP1);
+ setVFound(AArch64::MULv8i16_indexed, 2, MCP::MULSUBv8i16_indexed_OP2);
+ break;
+ case AArch64::SUBv2i32:
+ setVFound(AArch64::MULv2i32, 1, MCP::MULSUBv2i32_OP1);
+ setVFound(AArch64::MULv2i32, 2, MCP::MULSUBv2i32_OP2);
+ setVFound(AArch64::MULv2i32_indexed, 1, MCP::MULSUBv2i32_indexed_OP1);
+ setVFound(AArch64::MULv2i32_indexed, 2, MCP::MULSUBv2i32_indexed_OP2);
+ break;
+ case AArch64::SUBv4i32:
+ setVFound(AArch64::MULv4i32, 1, MCP::MULSUBv4i32_OP1);
+ setVFound(AArch64::MULv4i32, 2, MCP::MULSUBv4i32_OP2);
+ setVFound(AArch64::MULv4i32_indexed, 1, MCP::MULSUBv4i32_indexed_OP1);
+ setVFound(AArch64::MULv4i32_indexed, 2, MCP::MULSUBv4i32_indexed_OP2);
+ break;
}
return Found;
}
@@ -3937,6 +4082,46 @@ bool AArch64InstrInfo::isThroughputPattern(
case MachineCombinerPattern::FMLSv2f64_OP2:
case MachineCombinerPattern::FMLSv4i32_indexed_OP2:
case MachineCombinerPattern::FMLSv4f32_OP2:
+ case MachineCombinerPattern::MULADDv8i8_OP1:
+ case MachineCombinerPattern::MULADDv8i8_OP2:
+ case MachineCombinerPattern::MULADDv16i8_OP1:
+ case MachineCombinerPattern::MULADDv16i8_OP2:
+ case MachineCombinerPattern::MULADDv4i16_OP1:
+ case MachineCombinerPattern::MULADDv4i16_OP2:
+ case MachineCombinerPattern::MULADDv8i16_OP1:
+ case MachineCombinerPattern::MULADDv8i16_OP2:
+ case MachineCombinerPattern::MULADDv2i32_OP1:
+ case MachineCombinerPattern::MULADDv2i32_OP2:
+ case MachineCombinerPattern::MULADDv4i32_OP1:
+ case MachineCombinerPattern::MULADDv4i32_OP2:
+ case MachineCombinerPattern::MULSUBv8i8_OP1:
+ case MachineCombinerPattern::MULSUBv8i8_OP2:
+ case MachineCombinerPattern::MULSUBv16i8_OP1:
+ case MachineCombinerPattern::MULSUBv16i8_OP2:
+ case MachineCombinerPattern::MULSUBv4i16_OP1:
+ case MachineCombinerPattern::MULSUBv4i16_OP2:
+ case MachineCombinerPattern::MULSUBv8i16_OP1:
+ case MachineCombinerPattern::MULSUBv8i16_OP2:
+ case MachineCombinerPattern::MULSUBv2i32_OP1:
+ case MachineCombinerPattern::MULSUBv2i32_OP2:
+ case MachineCombinerPattern::MULSUBv4i32_OP1:
+ case MachineCombinerPattern::MULSUBv4i32_OP2:
+ case MachineCombinerPattern::MULADDv4i16_indexed_OP1:
+ case MachineCombinerPattern::MULADDv4i16_indexed_OP2:
+ case MachineCombinerPattern::MULADDv8i16_indexed_OP1:
+ case MachineCombinerPattern::MULADDv8i16_indexed_OP2:
+ case MachineCombinerPattern::MULADDv2i32_indexed_OP1:
+ case MachineCombinerPattern::MULADDv2i32_indexed_OP2:
+ case MachineCombinerPattern::MULADDv4i32_indexed_OP1:
+ case MachineCombinerPattern::MULADDv4i32_indexed_OP2:
+ case MachineCombinerPattern::MULSUBv4i16_indexed_OP1:
+ case MachineCombinerPattern::MULSUBv4i16_indexed_OP2:
+ case MachineCombinerPattern::MULSUBv8i16_indexed_OP1:
+ case MachineCombinerPattern::MULSUBv8i16_indexed_OP2:
+ case MachineCombinerPattern::MULSUBv2i32_indexed_OP1:
+ case MachineCombinerPattern::MULSUBv2i32_indexed_OP2:
+ case MachineCombinerPattern::MULSUBv4i32_indexed_OP1:
+ case MachineCombinerPattern::MULSUBv4i32_indexed_OP2:
return true;
} // end switch (Pattern)
return false;
@@ -4040,6 +4225,80 @@ genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI,
return MUL;
}
+/// genFusedMultiplyAcc - Helper to generate fused multiply accumulate
+/// instructions.
+///
+/// \see genFusedMultiply
+static MachineInstr *genFusedMultiplyAcc(
+ MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII,
+ MachineInstr &Root, SmallVectorImpl<MachineInstr *> &InsInstrs,
+ unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC) {
+ return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
+ FMAInstKind::Accumulator);
+}
+
+/// genNeg - Helper to generate an intermediate negation of the second operand
+/// of Root
+static Register genNeg(MachineFunction &MF, MachineRegisterInfo &MRI,
+ const TargetInstrInfo *TII, MachineInstr &Root,
+ SmallVectorImpl<MachineInstr *> &InsInstrs,
+ DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
+ unsigned MnegOpc, const TargetRegisterClass *RC) {
+ Register NewVR = MRI.createVirtualRegister(RC);
+ MachineInstrBuilder MIB =
+ BuildMI(MF, Root.getDebugLoc(), TII->get(MnegOpc), NewVR)
+ .add(Root.getOperand(2));
+ InsInstrs.push_back(MIB);
+
+ assert(InstrIdxForVirtReg.empty());
+ InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
+
+ return NewVR;
+}
+
+/// genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate
+/// instructions with an additional negation of the accumulator
+static MachineInstr *genFusedMultiplyAccNeg(
+ MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII,
+ MachineInstr &Root, SmallVectorImpl<MachineInstr *> &InsInstrs,
+ DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, unsigned IdxMulOpd,
+ unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC) {
+ assert(IdxMulOpd == 1);
+
+ Register NewVR =
+ genNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, MnegOpc, RC);
+ return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
+ FMAInstKind::Accumulator, &NewVR);
+}
+
+/// genFusedMultiplyIdx - Helper to generate fused multiply accumulate
+/// instructions.
+///
+/// \see genFusedMultiply
+static MachineInstr *genFusedMultiplyIdx(
+ MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII,
+ MachineInstr &Root, SmallVectorImpl<MachineInstr *> &InsInstrs,
+ unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC) {
+ return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
+ FMAInstKind::Indexed);
+}
+
+/// genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate
+/// instructions with an additional negation of the accumulator
+static MachineInstr *genFusedMultiplyIdxNeg(
+ MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII,
+ MachineInstr &Root, SmallVectorImpl<MachineInstr *> &InsInstrs,
+ DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, unsigned IdxMulOpd,
+ unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC) {
+ assert(IdxMulOpd == 1);
+
+ Register NewVR =
+ genNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, MnegOpc, RC);
+
+ return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
+ FMAInstKind::Indexed, &NewVR);
+}
+
/// genMaddR - Generate madd instruction and combine mul and add using
/// an extra virtual register
/// Example - an ADD intermediate needs to be stored in a register:
@@ -4279,6 +4538,231 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
}
break;
}
+
+ case MachineCombinerPattern::MULADDv8i8_OP1:
+ Opc = AArch64::MLAv8i8;
+ RC = &AArch64::FPR64RegClass;
+ MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
+ break;
+ case MachineCombinerPattern::MULADDv8i8_OP2:
+ Opc = AArch64::MLAv8i8;
+ RC = &AArch64::FPR64RegClass;
+ MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
+ break;
+ case MachineCombinerPattern::MULADDv16i8_OP1:
+ Opc = AArch64::MLAv16i8;
+ RC = &AArch64::FPR128RegClass;
+ MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
+ break;
+ case MachineCombinerPattern::MULADDv16i8_OP2:
+ Opc = AArch64::MLAv16i8;
+ RC = &AArch64::FPR128RegClass;
+ MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
+ break;
+ case MachineCombinerPattern::MULADDv4i16_OP1:
+ Opc = AArch64::MLAv4i16;
+ RC = &AArch64::FPR64RegClass;
+ MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
+ break;
+ case MachineCombinerPattern::MULADDv4i16_OP2:
+ Opc = AArch64::MLAv4i16;
+ RC = &AArch64::FPR64RegClass;
+ MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
+ break;
+ case MachineCombinerPattern::MULADDv8i16_OP1:
+ Opc = AArch64::MLAv8i16;
+ RC = &AArch64::FPR128RegClass;
+ MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
+ break;
+ case MachineCombinerPattern::MULADDv8i16_OP2:
+ Opc = AArch64::MLAv8i16;
+ RC = &AArch64::FPR128RegClass;
+ MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
+ break;
+ case MachineCombinerPattern::MULADDv2i32_OP1:
+ Opc = AArch64::MLAv2i32;
+ RC = &AArch64::FPR64RegClass;
+ MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
+ break;
+ case MachineCombinerPattern::MULADDv2i32_OP2:
+ Opc = AArch64::MLAv2i32;
+ RC = &AArch64::FPR64RegClass;
+ MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
+ break;
+ case MachineCombinerPattern::MULADDv4i32_OP1:
+ Opc = AArch64::MLAv4i32;
+ RC = &AArch64::FPR128RegClass;
+ MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
+ break;
+ case MachineCombinerPattern::MULADDv4i32_OP2:
+ Opc = AArch64::MLAv4i32;
+ RC = &AArch64::FPR128RegClass;
+ MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
+ break;
+
+ case MachineCombinerPattern::MULSUBv8i8_OP1:
+ Opc = AArch64::MLAv8i8;
+ RC = &AArch64::FPR64RegClass;
+ MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
+ InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i8,
+ RC);
+ break;
+ case MachineCombinerPattern::MULSUBv8i8_OP2:
+ Opc = AArch64::MLSv8i8;
+ RC = &AArch64::FPR64RegClass;
+ MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
+ break;
+ case MachineCombinerPattern::MULSUBv16i8_OP1:
+ Opc = AArch64::MLAv16i8;
+ RC = &AArch64::FPR128RegClass;
+ MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
+ InstrIdxForVirtReg, 1, Opc, AArch64::NEGv16i8,
+ RC);
+ break;
+ case MachineCombinerPattern::MULSUBv16i8_OP2:
+ Opc = AArch64::MLSv16i8;
+ RC = &AArch64::FPR128RegClass;
+ MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
+ break;
+ case MachineCombinerPattern::MULSUBv4i16_OP1:
+ Opc = AArch64::MLAv4i16;
+ RC = &AArch64::FPR64RegClass;
+ MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
+ InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i16,
+ RC);
+ break;
+ case MachineCombinerPattern::MULSUBv4i16_OP2:
+ Opc = AArch64::MLSv4i16;
+ RC = &AArch64::FPR64RegClass;
+ MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
+ break;
+ case MachineCombinerPattern::MULSUBv8i16_OP1:
+ Opc = AArch64::MLAv8i16;
+ RC = &AArch64::FPR128RegClass;
+ MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
+ InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i16,
+ RC);
+ break;
+ case MachineCombinerPattern::MULSUBv8i16_OP2:
+ Opc = AArch64::MLSv8i16;
+ RC = &AArch64::FPR128RegClass;
+ MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
+ break;
+ case MachineCombinerPattern::MULSUBv2i32_OP1:
+ Opc = AArch64::MLAv2i32;
+ RC = &AArch64::FPR64RegClass;
+ MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
+ InstrIdxForVirtReg, 1, Opc, AArch64::NEGv2i32,
+ RC);
+ break;
+ case MachineCombinerPattern::MULSUBv2i32_OP2:
+ Opc = AArch64::MLSv2i32;
+ RC = &AArch64::FPR64RegClass;
+ MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
+ break;
+ case MachineCombinerPattern::MULSUBv4i32_OP1:
+ Opc = AArch64::MLAv4i32;
+ RC = &AArch64::FPR128RegClass;
+ MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
+ InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i32,
+ RC);
+ break;
+ case MachineCombinerPattern::MULSUBv4i32_OP2:
+ Opc = AArch64::MLSv4i32;
+ RC = &AArch64::FPR128RegClass;
+ MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
+ break;
+
+ case MachineCombinerPattern::MULADDv4i16_indexed_OP1:
+ Opc = AArch64::MLAv4i16_indexed;
+ RC = &AArch64::FPR64RegClass;
+ MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
+ break;
+ case MachineCombinerPattern::MULADDv4i16_indexed_OP2:
+ Opc = AArch64::MLAv4i16_indexed;
+ RC = &AArch64::FPR64RegClass;
+ MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
+ break;
+ case MachineCombinerPattern::MULADDv8i16_indexed_OP1:
+ Opc = AArch64::MLAv8i16_indexed;
+ RC = &AArch64::FPR128RegClass;
+ MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
+ break;
+ case MachineCombinerPattern::MULADDv8i16_indexed_OP2:
+ Opc = AArch64::MLAv8i16_indexed;
+ RC = &AArch64::FPR128RegClass;
+ MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
+ break;
+ case MachineCombinerPattern::MULADDv2i32_indexed_OP1:
+ Opc = AArch64::MLAv2i32_indexed;
+ RC = &AArch64::FPR64RegClass;
+ MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
+ break;
+ case MachineCombinerPattern::MULADDv2i32_indexed_OP2:
+ Opc = AArch64::MLAv2i32_indexed;
+ RC = &AArch64::FPR64RegClass;
+ MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
+ break;
+ case MachineCombinerPattern::MULADDv4i32_indexed_OP1:
+ Opc = AArch64::MLAv4i32_indexed;
+ RC = &AArch64::FPR128RegClass;
+ MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
+ break;
+ case MachineCombinerPattern::MULADDv4i32_indexed_OP2:
+ Opc = AArch64::MLAv4i32_indexed;
+ RC = &AArch64::FPR128RegClass;
+ MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
+ break;
+
+ case MachineCombinerPattern::MULSUBv4i16_indexed_OP1:
+ Opc = AArch64::MLAv4i16_indexed;
+ RC = &AArch64::FPR64RegClass;
+ MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
+ InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i16,
+ RC);
+ break;
+ case MachineCombinerPattern::MULSUBv4i16_indexed_OP2:
+ Opc = AArch64::MLSv4i16_indexed;
+ RC = &AArch64::FPR64RegClass;
+ MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
+ break;
+ case MachineCombinerPattern::MULSUBv8i16_indexed_OP1:
+ Opc = AArch64::MLAv8i16_indexed;
+ RC = &AArch64::FPR128RegClass;
+ MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
+ InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i16,
+ RC);
+ break;
+ case MachineCombinerPattern::MULSUBv8i16_indexed_OP2:
+ Opc = AArch64::MLSv8i16_indexed;
+ RC = &AArch64::FPR128RegClass;
+ MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
+ break;
+ case MachineCombinerPattern::MULSUBv2i32_indexed_OP1:
+ Opc = AArch64::MLAv2i32_indexed;
+ RC = &AArch64::FPR64RegClass;
+ MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
+ InstrIdxForVirtReg, 1, Opc, AArch64::NEGv2i32,
+ RC);
+ break;
+ case MachineCombinerPattern::MULSUBv2i32_indexed_OP2:
+ Opc = AArch64::MLSv2i32_indexed;
+ RC = &AArch64::FPR64RegClass;
+ MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
+ break;
+ case MachineCombinerPattern::MULSUBv4i32_indexed_OP1:
+ Opc = AArch64::MLAv4i32_indexed;
+ RC = &AArch64::FPR128RegClass;
+ MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
+ InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i32,
+ RC);
+ break;
+ case MachineCombinerPattern::MULSUBv4i32_indexed_OP2:
+ Opc = AArch64::MLSv4i32_indexed;
+ RC = &AArch64::FPR128RegClass;
+ MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
+ break;
+
// Floating Point Support
case MachineCombinerPattern::FMULADDH_OP1:
Opc = AArch64::FMADDHrrr;
@@ -5037,8 +5521,99 @@ AArch64InstrInfo::findRegisterToSaveLRTo(const outliner::Candidate &C) const {
return 0u;
}
-outliner::OutlinedFunction
-AArch64InstrInfo::getOutliningCandidateInfo(
+static bool
+outliningCandidatesSigningScopeConsensus(const outliner::Candidate &a,
+ const outliner::Candidate &b) {
+ const Function &Fa = a.getMF()->getFunction();
+ const Function &Fb = b.getMF()->getFunction();
+
+ // If none of the functions have the "sign-return-address" attribute their
+ // signing behaviour is equal
+ if (!Fa.hasFnAttribute("sign-return-address") &&
+ !Fb.hasFnAttribute("sign-return-address")) {
+ return true;
+ }
+
+ // If both functions have the "sign-return-address" attribute their signing
+ // behaviour is equal, if the values of the attributes are equal
+ if (Fa.hasFnAttribute("sign-return-address") &&
+ Fb.hasFnAttribute("sign-return-address")) {
+ StringRef ScopeA =
+ Fa.getFnAttribute("sign-return-address").getValueAsString();
+ StringRef ScopeB =
+ Fb.getFnAttribute("sign-return-address").getValueAsString();
+ return ScopeA.equals(ScopeB);
+ }
+
+ // If function B doesn't have the "sign-return-address" attribute but A does,
+ // the functions' signing behaviour is equal if A's value for
+ // "sign-return-address" is "none" and vice versa.
+ if (Fa.hasFnAttribute("sign-return-address")) {
+ StringRef ScopeA =
+ Fa.getFnAttribute("sign-return-address").getValueAsString();
+ return ScopeA.equals("none");
+ }
+
+ if (Fb.hasFnAttribute("sign-return-address")) {
+ StringRef ScopeB =
+ Fb.getFnAttribute("sign-return-address").getValueAsString();
+ return ScopeB.equals("none");
+ }
+
+ llvm_unreachable("Unkown combination of sign-return-address attributes");
+}
+
+static bool
+outliningCandidatesSigningKeyConsensus(const outliner::Candidate &a,
+ const outliner::Candidate &b) {
+ const Function &Fa = a.getMF()->getFunction();
+ const Function &Fb = b.getMF()->getFunction();
+
+ // If none of the functions have the "sign-return-address-key" attribute
+ // their keys are equal
+ if (!Fa.hasFnAttribute("sign-return-address-key") &&
+ !Fb.hasFnAttribute("sign-return-address-key")) {
+ return true;
+ }
+
+ // If both functions have the "sign-return-address-key" attribute their
+ // keys are equal if the values of "sign-return-address-key" are equal
+ if (Fa.hasFnAttribute("sign-return-address-key") &&
+ Fb.hasFnAttribute("sign-return-address-key")) {
+ StringRef KeyA =
+ Fa.getFnAttribute("sign-return-address-key").getValueAsString();
+ StringRef KeyB =
+ Fb.getFnAttribute("sign-return-address-key").getValueAsString();
+ return KeyA.equals(KeyB);
+ }
+
+ // If B doesn't have the "sign-return-address-key" attribute, both keys are
+ // equal, if function a has the default key (a_key)
+ if (Fa.hasFnAttribute("sign-return-address-key")) {
+ StringRef KeyA =
+ Fa.getFnAttribute("sign-return-address-key").getValueAsString();
+ return KeyA.equals_lower("a_key");
+ }
+
+ if (Fb.hasFnAttribute("sign-return-address-key")) {
+ StringRef KeyB =
+ Fb.getFnAttribute("sign-return-address-key").getValueAsString();
+ return KeyB.equals_lower("a_key");
+ }
+
+ llvm_unreachable("Unkown combination of sign-return-address-key attributes");
+}
+
+static bool outliningCandidatesV8_3OpsConsensus(const outliner::Candidate &a,
+ const outliner::Candidate &b) {
+ const AArch64Subtarget &SubtargetA =
+ a.getMF()->getSubtarget<AArch64Subtarget>();
+ const AArch64Subtarget &SubtargetB =
+ b.getMF()->getSubtarget<AArch64Subtarget>();
+ return SubtargetA.hasV8_3aOps() == SubtargetB.hasV8_3aOps();
+}
+
+outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo(
std::vector<outliner::Candidate> &RepeatedSequenceLocs) const {
outliner::Candidate &FirstCand = RepeatedSequenceLocs[0];
unsigned SequenceSize =
@@ -5046,12 +5621,115 @@ AArch64InstrInfo::getOutliningCandidateInfo(
[this](unsigned Sum, const MachineInstr &MI) {
return Sum + getInstSizeInBytes(MI);
});
+ unsigned NumBytesToCreateFrame = 0;
+
+ // We only allow outlining for functions having exactly matching return
+ // address signing attributes, i.e., all share the same value for the
+ // attribute "sign-return-address" and all share the same type of key they
+ // are signed with.
+ // Additionally we require all functions to simultaniously either support
+ // v8.3a features or not. Otherwise an outlined function could get signed
+ // using dedicated v8.3 instructions and a call from a function that doesn't
+ // support v8.3 instructions would therefore be invalid.
+ if (std::adjacent_find(
+ RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(),
+ [](const outliner::Candidate &a, const outliner::Candidate &b) {
+ // Return true if a and b are non-equal w.r.t. return address
+ // signing or support of v8.3a features
+ if (outliningCandidatesSigningScopeConsensus(a, b) &&
+ outliningCandidatesSigningKeyConsensus(a, b) &&
+ outliningCandidatesV8_3OpsConsensus(a, b)) {
+ return false;
+ }
+ return true;
+ }) != RepeatedSequenceLocs.end()) {
+ return outliner::OutlinedFunction();
+ }
+
+ // Since at this point all candidates agree on their return address signing
+ // picking just one is fine. If the candidate functions potentially sign their
+ // return addresses, the outlined function should do the same. Note that in
+ // the case of "sign-return-address"="non-leaf" this is an assumption: It is
+ // not certainly true that the outlined function will have to sign its return
+ // address but this decision is made later, when the decision to outline
+ // has already been made.
+ // The same holds for the number of additional instructions we need: On
+ // v8.3a RET can be replaced by RETAA/RETAB and no AUT instruction is
+ // necessary. However, at this point we don't know if the outlined function
+ // will have a RET instruction so we assume the worst.
+ const Function &FCF = FirstCand.getMF()->getFunction();
+ const TargetRegisterInfo &TRI = getRegisterInfo();
+ if (FCF.hasFnAttribute("sign-return-address")) {
+ // One PAC and one AUT instructions
+ NumBytesToCreateFrame += 8;
+
+ // We have to check if sp modifying instructions would get outlined.
+ // If so we only allow outlining if sp is unchanged overall, so matching
+ // sub and add instructions are okay to outline, all other sp modifications
+ // are not
+ auto hasIllegalSPModification = [&TRI](outliner::Candidate &C) {
+ int SPValue = 0;
+ MachineBasicBlock::iterator MBBI = C.front();
+ for (;;) {
+ if (MBBI->modifiesRegister(AArch64::SP, &TRI)) {
+ switch (MBBI->getOpcode()) {
+ case AArch64::ADDXri:
+ case AArch64::ADDWri:
+ assert(MBBI->getNumOperands() == 4 && "Wrong number of operands");
+ assert(MBBI->getOperand(2).isImm() &&
+ "Expected operand to be immediate");
+ assert(MBBI->getOperand(1).isReg() &&
+ "Expected operand to be a register");
+ // Check if the add just increments sp. If so, we search for
+ // matching sub instructions that decrement sp. If not, the
+ // modification is illegal
+ if (MBBI->getOperand(1).getReg() == AArch64::SP)
+ SPValue += MBBI->getOperand(2).getImm();
+ else
+ return true;
+ break;
+ case AArch64::SUBXri:
+ case AArch64::SUBWri:
+ assert(MBBI->getNumOperands() == 4 && "Wrong number of operands");
+ assert(MBBI->getOperand(2).isImm() &&
+ "Expected operand to be immediate");
+ assert(MBBI->getOperand(1).isReg() &&
+ "Expected operand to be a register");
+ // Check if the sub just decrements sp. If so, we search for
+ // matching add instructions that increment sp. If not, the
+ // modification is illegal
+ if (MBBI->getOperand(1).getReg() == AArch64::SP)
+ SPValue -= MBBI->getOperand(2).getImm();
+ else
+ return true;
+ break;
+ default:
+ return true;
+ }
+ }
+ if (MBBI == C.back())
+ break;
+ ++MBBI;
+ }
+ if (SPValue)
+ return true;
+ return false;
+ };
+ // Remove candidates with illegal stack modifying instructions
+ RepeatedSequenceLocs.erase(std::remove_if(RepeatedSequenceLocs.begin(),
+ RepeatedSequenceLocs.end(),
+ hasIllegalSPModification),
+ RepeatedSequenceLocs.end());
+
+ // If the sequence doesn't have enough candidates left, then we're done.
+ if (RepeatedSequenceLocs.size() < 2)
+ return outliner::OutlinedFunction();
+ }
// Properties about candidate MBBs that hold for all of them.
unsigned FlagsSetInAll = 0xF;
// Compute liveness information for each candidate, and set FlagsSetInAll.
- const TargetRegisterInfo &TRI = getRegisterInfo();
std::for_each(RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(),
[&FlagsSetInAll](outliner::Candidate &C) {
FlagsSetInAll &= C.Flags;
@@ -5107,7 +5785,7 @@ AArch64InstrInfo::getOutliningCandidateInfo(
};
unsigned FrameID = MachineOutlinerDefault;
- unsigned NumBytesToCreateFrame = 4;
+ NumBytesToCreateFrame += 4;
bool HasBTI = any_of(RepeatedSequenceLocs, [](outliner::Candidate &C) {
return C.getMF()->getFunction().hasFnAttribute("branch-target-enforcement");
@@ -5190,11 +5868,21 @@ AArch64InstrInfo::getOutliningCandidateInfo(
unsigned NumBytesNoStackCalls = 0;
std::vector<outliner::Candidate> CandidatesWithoutStackFixups;
+ // Check if we have to save LR.
for (outliner::Candidate &C : RepeatedSequenceLocs) {
C.initLRU(TRI);
+ // If we have a noreturn caller, then we're going to be conservative and
+ // say that we have to save LR. If we don't have a ret at the end of the
+ // block, then we can't reason about liveness accurately.
+ //
+ // FIXME: We can probably do better than always disabling this in
+ // noreturn functions by fixing up the liveness info.
+ bool IsNoReturn =
+ C.getMF()->getFunction().hasFnAttribute(Attribute::NoReturn);
+
// Is LR available? If so, we don't need a save.
- if (C.LRU.available(AArch64::LR)) {
+ if (C.LRU.available(AArch64::LR) && !IsNoReturn) {
NumBytesNoStackCalls += 4;
C.setCallInfo(MachineOutlinerNoLRSave, 4);
CandidatesWithoutStackFixups.push_back(C);
@@ -5376,6 +6064,19 @@ AArch64InstrInfo::getOutliningType(MachineBasicBlock::iterator &MIT,
MachineFunction *MF = MBB->getParent();
AArch64FunctionInfo *FuncInfo = MF->getInfo<AArch64FunctionInfo>();
+ // Don't outline anything used for return address signing. The outlined
+ // function will get signed later if needed
+ switch (MI.getOpcode()) {
+ case AArch64::PACIASP:
+ case AArch64::PACIBSP:
+ case AArch64::AUTIASP:
+ case AArch64::AUTIBSP:
+ case AArch64::RETAA:
+ case AArch64::RETAB:
+ case AArch64::EMITBKEY:
+ return outliner::InstrType::Illegal;
+ }
+
// Don't outline LOHs.
if (FuncInfo->getLOHRelated().count(&MI))
return outliner::InstrType::Illegal;
@@ -5528,6 +6229,59 @@ void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
}
}
+static void signOutlinedFunction(MachineFunction &MF, MachineBasicBlock &MBB,
+ bool ShouldSignReturnAddr,
+ bool ShouldSignReturnAddrWithAKey) {
+ if (ShouldSignReturnAddr) {
+ MachineBasicBlock::iterator MBBPAC = MBB.begin();
+ MachineBasicBlock::iterator MBBAUT = MBB.getFirstTerminator();
+ const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
+ const TargetInstrInfo *TII = Subtarget.getInstrInfo();
+ DebugLoc DL;
+
+ if (MBBAUT != MBB.end())
+ DL = MBBAUT->getDebugLoc();
+
+ // At the very beginning of the basic block we insert the following
+ // depending on the key type
+ //
+ // a_key: b_key:
+ // PACIASP EMITBKEY
+ // CFI_INSTRUCTION PACIBSP
+ // CFI_INSTRUCTION
+ if (ShouldSignReturnAddrWithAKey) {
+ BuildMI(MBB, MBBPAC, DebugLoc(), TII->get(AArch64::PACIASP))
+ .setMIFlag(MachineInstr::FrameSetup);
+ } else {
+ BuildMI(MBB, MBBPAC, DebugLoc(), TII->get(AArch64::EMITBKEY))
+ .setMIFlag(MachineInstr::FrameSetup);
+ BuildMI(MBB, MBBPAC, DebugLoc(), TII->get(AArch64::PACIBSP))
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+ unsigned CFIIndex =
+ MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr));
+ BuildMI(MBB, MBBPAC, DebugLoc(), TII->get(AArch64::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
+
+ // If v8.3a features are available we can replace a RET instruction by
+ // RETAA or RETAB and omit the AUT instructions
+ if (Subtarget.hasV8_3aOps() && MBBAUT != MBB.end() &&
+ MBBAUT->getOpcode() == AArch64::RET) {
+ BuildMI(MBB, MBBAUT, DL,
+ TII->get(ShouldSignReturnAddrWithAKey ? AArch64::RETAA
+ : AArch64::RETAB))
+ .copyImplicitOps(*MBBAUT);
+ MBB.erase(MBBAUT);
+ } else {
+ BuildMI(MBB, MBBAUT, DL,
+ TII->get(ShouldSignReturnAddrWithAKey ? AArch64::AUTIASP
+ : AArch64::AUTIBSP))
+ .setMIFlag(MachineInstr::FrameDestroy);
+ }
+ }
+}
+
void AArch64InstrInfo::buildOutlinedFrame(
MachineBasicBlock &MBB, MachineFunction &MF,
const outliner::OutlinedFunction &OF) const {
@@ -5543,16 +6297,19 @@ void AArch64InstrInfo::buildOutlinedFrame(
TailOpcode = AArch64::TCRETURNriALL;
}
MachineInstr *TC = BuildMI(MF, DebugLoc(), get(TailOpcode))
- .add(Call->getOperand(0))
- .addImm(0);
+ .add(Call->getOperand(0))
+ .addImm(0);
MBB.insert(MBB.end(), TC);
Call->eraseFromParent();
}
+ bool IsLeafFunction = true;
+
// Is there a call in the outlined range?
- auto IsNonTailCall = [](MachineInstr &MI) {
+ auto IsNonTailCall = [](const MachineInstr &MI) {
return MI.isCall() && !MI.isReturn();
};
+
if (std::any_of(MBB.instr_begin(), MBB.instr_end(), IsNonTailCall)) {
// Fix up the instructions in the range, since we're going to modify the
// stack.
@@ -5560,6 +6317,8 @@ void AArch64InstrInfo::buildOutlinedFrame(
"Can only fix up stack references once");
fixupPostOutline(MBB);
+ IsLeafFunction = false;
+
// LR has to be a live in so that we can save it.
MBB.addLiveIn(AArch64::LR);
@@ -5606,16 +6365,47 @@ void AArch64InstrInfo::buildOutlinedFrame(
Et = MBB.insert(Et, LDRXpost);
}
+ // If a bunch of candidates reach this point they must agree on their return
+ // address signing. It is therefore enough to just consider the signing
+ // behaviour of one of them
+ const Function &CF = OF.Candidates.front().getMF()->getFunction();
+ bool ShouldSignReturnAddr = false;
+ if (CF.hasFnAttribute("sign-return-address")) {
+ StringRef Scope =
+ CF.getFnAttribute("sign-return-address").getValueAsString();
+ if (Scope.equals("all"))
+ ShouldSignReturnAddr = true;
+ else if (Scope.equals("non-leaf") && !IsLeafFunction)
+ ShouldSignReturnAddr = true;
+ }
+
+ // a_key is the default
+ bool ShouldSignReturnAddrWithAKey = true;
+ if (CF.hasFnAttribute("sign-return-address-key")) {
+ const StringRef Key =
+ CF.getFnAttribute("sign-return-address-key").getValueAsString();
+ // Key can either be a_key or b_key
+ assert((Key.equals_lower("a_key") || Key.equals_lower("b_key")) &&
+ "Return address signing key must be either a_key or b_key");
+ ShouldSignReturnAddrWithAKey = Key.equals_lower("a_key");
+ }
+
// If this is a tail call outlined function, then there's already a return.
if (OF.FrameConstructionID == MachineOutlinerTailCall ||
- OF.FrameConstructionID == MachineOutlinerThunk)
+ OF.FrameConstructionID == MachineOutlinerThunk) {
+ signOutlinedFunction(MF, MBB, ShouldSignReturnAddr,
+ ShouldSignReturnAddrWithAKey);
return;
+ }
// It's not a tail call, so we have to insert the return ourselves.
MachineInstr *ret = BuildMI(MF, DebugLoc(), get(AArch64::RET))
.addReg(AArch64::LR, RegState::Undef);
MBB.insert(MBB.end(), ret);
+ signOutlinedFunction(MF, MBB, ShouldSignReturnAddr,
+ ShouldSignReturnAddrWithAKey);
+
// Did we have to modify the stack by saving the link register?
if (OF.FrameConstructionID != MachineOutlinerDefault)
return;
@@ -5702,29 +6492,126 @@ bool AArch64InstrInfo::shouldOutlineFromFunctionByDefault(
return MF.getFunction().hasMinSize();
}
-bool AArch64InstrInfo::isCopyInstrImpl(
- const MachineInstr &MI, const MachineOperand *&Source,
- const MachineOperand *&Destination) const {
+Optional<DestSourcePair>
+AArch64InstrInfo::isCopyInstrImpl(const MachineInstr &MI) const {
// AArch64::ORRWrs and AArch64::ORRXrs with WZR/XZR reg
// and zero immediate operands used as an alias for mov instruction.
if (MI.getOpcode() == AArch64::ORRWrs &&
MI.getOperand(1).getReg() == AArch64::WZR &&
MI.getOperand(3).getImm() == 0x0) {
- Destination = &MI.getOperand(0);
- Source = &MI.getOperand(2);
- return true;
+ return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};
}
if (MI.getOpcode() == AArch64::ORRXrs &&
MI.getOperand(1).getReg() == AArch64::XZR &&
MI.getOperand(3).getImm() == 0x0) {
- Destination = &MI.getOperand(0);
- Source = &MI.getOperand(2);
- return true;
+ return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};
}
- return false;
+ return None;
+}
+
+Optional<RegImmPair> AArch64InstrInfo::isAddImmediate(const MachineInstr &MI,
+ Register Reg) const {
+ int Sign = 1;
+ int64_t Offset = 0;
+
+ // TODO: Handle cases where Reg is a super- or sub-register of the
+ // destination register.
+ if (Reg != MI.getOperand(0).getReg())
+ return None;
+
+ switch (MI.getOpcode()) {
+ default:
+ return None;
+ case AArch64::SUBWri:
+ case AArch64::SUBXri:
+ case AArch64::SUBSWri:
+ case AArch64::SUBSXri:
+ Sign *= -1;
+ LLVM_FALLTHROUGH;
+ case AArch64::ADDSWri:
+ case AArch64::ADDSXri:
+ case AArch64::ADDWri:
+ case AArch64::ADDXri: {
+ // TODO: Third operand can be global address (usually some string).
+ if (!MI.getOperand(0).isReg() || !MI.getOperand(1).isReg() ||
+ !MI.getOperand(2).isImm())
+ return None;
+ Offset = MI.getOperand(2).getImm() * Sign;
+ int Shift = MI.getOperand(3).getImm();
+ assert((Shift == 0 || Shift == 12) && "Shift can be either 0 or 12");
+ Offset = Offset << Shift;
+ }
+ }
+ return RegImmPair{MI.getOperand(1).getReg(), Offset};
+}
+
+/// If the given ORR instruction is a copy, and \p DescribedReg overlaps with
+/// the destination register then, if possible, describe the value in terms of
+/// the source register.
+static Optional<ParamLoadedValue>
+describeORRLoadedValue(const MachineInstr &MI, Register DescribedReg,
+ const TargetInstrInfo *TII,
+ const TargetRegisterInfo *TRI) {
+ auto DestSrc = TII->isCopyInstr(MI);
+ if (!DestSrc)
+ return None;
+
+ Register DestReg = DestSrc->Destination->getReg();
+ Register SrcReg = DestSrc->Source->getReg();
+
+ auto Expr = DIExpression::get(MI.getMF()->getFunction().getContext(), {});
+
+ // If the described register is the destination, just return the source.
+ if (DestReg == DescribedReg)
+ return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr);
+
+ // ORRWrs zero-extends to 64-bits, so we need to consider such cases.
+ if (MI.getOpcode() == AArch64::ORRWrs &&
+ TRI->isSuperRegister(DestReg, DescribedReg))
+ return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr);
+
+ // We may need to describe the lower part of a ORRXrs move.
+ if (MI.getOpcode() == AArch64::ORRXrs &&
+ TRI->isSubRegister(DestReg, DescribedReg)) {
+ Register SrcSubReg = TRI->getSubReg(SrcReg, AArch64::sub_32);
+ return ParamLoadedValue(MachineOperand::CreateReg(SrcSubReg, false), Expr);
+ }
+
+ assert(!TRI->isSuperOrSubRegisterEq(DestReg, DescribedReg) &&
+ "Unhandled ORR[XW]rs copy case");
+
+ return None;
+}
+
+Optional<ParamLoadedValue>
+AArch64InstrInfo::describeLoadedValue(const MachineInstr &MI,
+ Register Reg) const {
+ const MachineFunction *MF = MI.getMF();
+ const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
+ switch (MI.getOpcode()) {
+ case AArch64::MOVZWi:
+ case AArch64::MOVZXi: {
+ // MOVZWi may be used for producing zero-extended 32-bit immediates in
+ // 64-bit parameters, so we need to consider super-registers.
+ if (!TRI->isSuperRegisterEq(MI.getOperand(0).getReg(), Reg))
+ return None;
+
+ if (!MI.getOperand(1).isImm())
+ return None;
+ int64_t Immediate = MI.getOperand(1).getImm();
+ int Shift = MI.getOperand(2).getImm();
+ return ParamLoadedValue(MachineOperand::CreateImm(Immediate << Shift),
+ nullptr);
+ }
+ case AArch64::ORRWrs:
+ case AArch64::ORRXrs:
+ return describeORRLoadedValue(MI, Reg, this, TRI);
+ }
+
+ return TargetInstrInfo::describeLoadedValue(MI, Reg);
}
#define GET_INSTRINFO_HELPERS
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.h
index 1688045e4fb8..66e517e54903 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.h
@@ -89,6 +89,12 @@ public:
/// if there is a corresponding unscaled variant available.
static Optional<unsigned> getUnscaledLdSt(unsigned Opc);
+ /// Scaling factor for (scaled or unscaled) load or store.
+ static int getMemScale(unsigned Opc);
+ static int getMemScale(const MachineInstr &MI) {
+ return getMemScale(MI.getOpcode());
+ }
+
/// Returns the index for the immediate for a given instruction.
static unsigned getLoadStoreImmIdx(unsigned Opc);
@@ -131,15 +137,15 @@ public:
unsigned NumLoads) const override;
void copyPhysRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
- const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
- bool KillSrc, unsigned Opcode,
+ const DebugLoc &DL, MCRegister DestReg,
+ MCRegister SrcReg, bool KillSrc, unsigned Opcode,
llvm::ArrayRef<unsigned> Indices) const;
void copyGPRRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
DebugLoc DL, unsigned DestReg, unsigned SrcReg,
bool KillSrc, unsigned Opcode, unsigned ZeroReg,
llvm::ArrayRef<unsigned> Indices) const;
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
- const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
+ const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg,
bool KillSrc) const override;
void storeRegToStackSlot(MachineBasicBlock &MBB,
@@ -265,15 +271,21 @@ public:
/// on Windows.
static bool isSEHInstruction(const MachineInstr &MI);
+ Optional<RegImmPair> isAddImmediate(const MachineInstr &MI,
+ Register Reg) const override;
+
+ Optional<ParamLoadedValue> describeLoadedValue(const MachineInstr &MI,
+ Register Reg) const override;
+
#define GET_INSTRINFO_HELPER_DECLS
#include "AArch64GenInstrInfo.inc"
protected:
- /// If the specific machine instruction is a instruction that moves/copies
- /// value from one register to another register return true along with
- /// @Source machine operand and @Destination machine operand.
- bool isCopyInstrImpl(const MachineInstr &MI, const MachineOperand *&Source,
- const MachineOperand *&Destination) const override;
+ /// If the specific machine instruction is an instruction that moves/copies
+ /// value from one register to another register return destination and source
+ /// registers as machine operands.
+ Optional<DestSourcePair>
+ isCopyInstrImpl(const MachineInstr &MI) const override;
private:
/// Sets the offsets on outlined instructions in \p MBB which use SP
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 1981bd5d3bf0..d590d4d913ff 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -214,6 +214,7 @@ def SDT_AArch64FCmp : SDTypeProfile<0, 2,
SDTCisSameAs<0, 1>]>;
def SDT_AArch64Dup : SDTypeProfile<1, 1, [SDTCisVec<0>]>;
def SDT_AArch64DupLane : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisInt<2>]>;
+def SDT_AArch64Insr : SDTypeProfile<1, 2, [SDTCisVec<0>]>;
def SDT_AArch64Zip : SDTypeProfile<1, 2, [SDTCisVec<0>,
SDTCisSameAs<0, 1>,
SDTCisSameAs<0, 2>]>;
@@ -242,6 +243,9 @@ def SDT_AArch64ITOF : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisSameAs<0,1>]>;
def SDT_AArch64TLSDescCall : SDTypeProfile<0, -2, [SDTCisPtrTy<0>,
SDTCisPtrTy<1>]>;
+def SDT_AArch64ldp : SDTypeProfile<2, 1, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
+def SDT_AArch64stp : SDTypeProfile<0, 3, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
+
// Generates the general dynamic sequences, i.e.
// adrp x0, :tlsdesc:var
// ldr x1, [x0, #:tlsdesc_lo12:var]
@@ -259,6 +263,110 @@ def SDT_AArch64WrapperLarge : SDTypeProfile<1, 4,
SDTCisSameAs<1, 2>, SDTCisSameAs<1, 3>,
SDTCisSameAs<1, 4>]>;
+def SDT_AArch64TBL : SDTypeProfile<1, 2, [
+ SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisInt<2>
+]>;
+
+// non-extending masked load fragment.
+def nonext_masked_load :
+ PatFrag<(ops node:$ptr, node:$pred, node:$def),
+ (masked_ld node:$ptr, undef, node:$pred, node:$def), [{
+ return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD &&
+ cast<MaskedLoadSDNode>(N)->isUnindexed() &&
+ !cast<MaskedLoadSDNode>(N)->isNonTemporal();
+}]>;
+// sign extending masked load fragments.
+def asext_masked_load :
+ PatFrag<(ops node:$ptr, node:$pred, node:$def),
+ (masked_ld node:$ptr, undef, node:$pred, node:$def),[{
+ return (cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::EXTLOAD ||
+ cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::SEXTLOAD) &&
+ cast<MaskedLoadSDNode>(N)->isUnindexed();
+}]>;
+def asext_masked_load_i8 :
+ PatFrag<(ops node:$ptr, node:$pred, node:$def),
+ (asext_masked_load node:$ptr, node:$pred, node:$def), [{
+ return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
+}]>;
+def asext_masked_load_i16 :
+ PatFrag<(ops node:$ptr, node:$pred, node:$def),
+ (asext_masked_load node:$ptr, node:$pred, node:$def), [{
+ return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
+}]>;
+def asext_masked_load_i32 :
+ PatFrag<(ops node:$ptr, node:$pred, node:$def),
+ (asext_masked_load node:$ptr, node:$pred, node:$def), [{
+ return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
+}]>;
+// zero extending masked load fragments.
+def zext_masked_load :
+ PatFrag<(ops node:$ptr, node:$pred, node:$def),
+ (masked_ld node:$ptr, undef, node:$pred, node:$def), [{
+ return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::ZEXTLOAD &&
+ cast<MaskedLoadSDNode>(N)->isUnindexed();
+}]>;
+def zext_masked_load_i8 :
+ PatFrag<(ops node:$ptr, node:$pred, node:$def),
+ (zext_masked_load node:$ptr, node:$pred, node:$def), [{
+ return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
+}]>;
+def zext_masked_load_i16 :
+ PatFrag<(ops node:$ptr, node:$pred, node:$def),
+ (zext_masked_load node:$ptr, node:$pred, node:$def), [{
+ return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
+}]>;
+def zext_masked_load_i32 :
+ PatFrag<(ops node:$ptr, node:$pred, node:$def),
+ (zext_masked_load node:$ptr, node:$pred, node:$def), [{
+ return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
+}]>;
+
+def non_temporal_load :
+ PatFrag<(ops node:$ptr, node:$pred, node:$def),
+ (masked_ld node:$ptr, undef, node:$pred, node:$def), [{
+ return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD &&
+ cast<MaskedLoadSDNode>(N)->isUnindexed() &&
+ cast<MaskedLoadSDNode>(N)->isNonTemporal();
+}]>;
+
+// non-truncating masked store fragment.
+def nontrunc_masked_store :
+ PatFrag<(ops node:$val, node:$ptr, node:$pred),
+ (masked_st node:$val, node:$ptr, undef, node:$pred), [{
+ return !cast<MaskedStoreSDNode>(N)->isTruncatingStore() &&
+ cast<MaskedStoreSDNode>(N)->isUnindexed() &&
+ !cast<MaskedStoreSDNode>(N)->isNonTemporal();
+}]>;
+// truncating masked store fragments.
+def trunc_masked_store :
+ PatFrag<(ops node:$val, node:$ptr, node:$pred),
+ (masked_st node:$val, node:$ptr, undef, node:$pred), [{
+ return cast<MaskedStoreSDNode>(N)->isTruncatingStore() &&
+ cast<MaskedStoreSDNode>(N)->isUnindexed();
+}]>;
+def trunc_masked_store_i8 :
+ PatFrag<(ops node:$val, node:$ptr, node:$pred),
+ (trunc_masked_store node:$val, node:$ptr, node:$pred), [{
+ return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
+}]>;
+def trunc_masked_store_i16 :
+ PatFrag<(ops node:$val, node:$ptr, node:$pred),
+ (trunc_masked_store node:$val, node:$ptr, node:$pred), [{
+ return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
+}]>;
+def trunc_masked_store_i32 :
+ PatFrag<(ops node:$val, node:$ptr, node:$pred),
+ (trunc_masked_store node:$val, node:$ptr, node:$pred), [{
+ return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
+}]>;
+
+def non_temporal_store :
+ PatFrag<(ops node:$val, node:$ptr, node:$pred),
+ (masked_st node:$val, node:$ptr, undef, node:$pred), [{
+ return !cast<MaskedStoreSDNode>(N)->isTruncatingStore() &&
+ cast<MaskedStoreSDNode>(N)->isUnindexed() &&
+ cast<MaskedStoreSDNode>(N)->isNonTemporal();
+}]>;
// Node definitions.
def AArch64adrp : SDNode<"AArch64ISD::ADRP", SDTIntUnaryOp, []>;
@@ -319,6 +427,8 @@ def AArch64duplane16 : SDNode<"AArch64ISD::DUPLANE16", SDT_AArch64DupLane>;
def AArch64duplane32 : SDNode<"AArch64ISD::DUPLANE32", SDT_AArch64DupLane>;
def AArch64duplane64 : SDNode<"AArch64ISD::DUPLANE64", SDT_AArch64DupLane>;
+def AArch64insr : SDNode<"AArch64ISD::INSR", SDT_AArch64Insr>;
+
def AArch64zip1 : SDNode<"AArch64ISD::ZIP1", SDT_AArch64Zip>;
def AArch64zip2 : SDNode<"AArch64ISD::ZIP2", SDT_AArch64Zip>;
def AArch64uzp1 : SDNode<"AArch64ISD::UZP1", SDT_AArch64Zip>;
@@ -432,6 +542,11 @@ def AArch64sunpklo : SDNode<"AArch64ISD::SUNPKLO", SDT_AArch64unpk>;
def AArch64uunpkhi : SDNode<"AArch64ISD::UUNPKHI", SDT_AArch64unpk>;
def AArch64uunpklo : SDNode<"AArch64ISD::UUNPKLO", SDT_AArch64unpk>;
+def AArch64ldp : SDNode<"AArch64ISD::LDP", SDT_AArch64ldp, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+def AArch64stp : SDNode<"AArch64ISD::STP", SDT_AArch64stp, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+
+def AArch64tbl : SDNode<"AArch64ISD::TBL", SDT_AArch64TBL>;
+
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
@@ -441,10 +556,10 @@ def AArch64uunpklo : SDNode<"AArch64ISD::UUNPKLO", SDT_AArch64unpk>;
// the Function object through the <Target>Subtarget and objections were raised
// to that (see post-commit review comments for r301750).
let RecomputePerFunction = 1 in {
- def ForCodeSize : Predicate<"MF->getFunction().hasOptSize()">;
- def NotForCodeSize : Predicate<"!MF->getFunction().hasOptSize()">;
+ def ForCodeSize : Predicate<"shouldOptForSize(MF)">;
+ def NotForCodeSize : Predicate<"!shouldOptForSize(MF)">;
// Avoid generating STRQro if it is slow, unless we're optimizing for code size.
- def UseSTRQro : Predicate<"!Subtarget->isSTRQroSlow() || MF->getFunction().hasOptSize()">;
+ def UseSTRQro : Predicate<"!Subtarget->isSTRQroSlow() || shouldOptForSize(MF)">;
def UseBTI : Predicate<[{ MF->getFunction().hasFnAttribute("branch-target-enforcement") }]>;
def NotUseBTI : Predicate<[{ !MF->getFunction().hasFnAttribute("branch-target-enforcement") }]>;
@@ -675,34 +790,81 @@ defm FCADD : SIMDThreeSameVectorComplexHSD<1, 0b111, complexrotateopodd,
defm FCMLA : SIMDIndexedTiedComplexHSD<1, 0, 1, complexrotateop, "fcmla",
null_frag>;
+let Predicates = [HasComplxNum, HasNEON, HasFullFP16] in {
+ def : Pat<(v4f16 (int_aarch64_neon_vcadd_rot90 (v4f16 V64:$Rn), (v4f16 V64:$Rm))),
+ (FCADDv4f16 (v4f16 V64:$Rn), (v4f16 V64:$Rm), (i32 0))>;
+ def : Pat<(v4f16 (int_aarch64_neon_vcadd_rot270 (v4f16 V64:$Rn), (v4f16 V64:$Rm))),
+ (FCADDv4f16 (v4f16 V64:$Rn), (v4f16 V64:$Rm), (i32 1))>;
+ def : Pat<(v8f16 (int_aarch64_neon_vcadd_rot90 (v8f16 V128:$Rn), (v8f16 V128:$Rm))),
+ (FCADDv8f16 (v8f16 V128:$Rn), (v8f16 V128:$Rm), (i32 0))>;
+ def : Pat<(v8f16 (int_aarch64_neon_vcadd_rot270 (v8f16 V128:$Rn), (v8f16 V128:$Rm))),
+ (FCADDv8f16 (v8f16 V128:$Rn), (v8f16 V128:$Rm), (i32 1))>;
+}
+let Predicates = [HasComplxNum, HasNEON] in {
+ def : Pat<(v2f32 (int_aarch64_neon_vcadd_rot90 (v2f32 V64:$Rn), (v2f32 V64:$Rm))),
+ (FCADDv2f32 (v2f32 V64:$Rn), (v2f32 V64:$Rm), (i32 0))>;
+ def : Pat<(v2f32 (int_aarch64_neon_vcadd_rot270 (v2f32 V64:$Rn), (v2f32 V64:$Rm))),
+ (FCADDv2f32 (v2f32 V64:$Rn), (v2f32 V64:$Rm), (i32 1))>;
+ foreach Ty = [v4f32, v2f64] in {
+ def : Pat<(Ty (int_aarch64_neon_vcadd_rot90 (Ty V128:$Rn), (Ty V128:$Rm))),
+ (!cast<Instruction>("FCADD"#Ty) (Ty V128:$Rn), (Ty V128:$Rm), (i32 0))>;
+ def : Pat<(Ty (int_aarch64_neon_vcadd_rot270 (Ty V128:$Rn), (Ty V128:$Rm))),
+ (!cast<Instruction>("FCADD"#Ty) (Ty V128:$Rn), (Ty V128:$Rm), (i32 1))>;
+ }
+}
+
// v8.3a Pointer Authentication
// These instructions inhabit part of the hint space and so can be used for
-// armv8 targets
+// armv8 targets. Keeping the old HINT mnemonic when compiling without PA is
+// important for compatibility with other assemblers (e.g. GAS) when building
+// software compatible with both CPUs that do or don't implement PA.
let Uses = [LR], Defs = [LR] in {
- def PACIAZ : SystemNoOperands<0b000, "paciaz">;
- def PACIBZ : SystemNoOperands<0b010, "pacibz">;
- def AUTIAZ : SystemNoOperands<0b100, "autiaz">;
- def AUTIBZ : SystemNoOperands<0b110, "autibz">;
+ def PACIAZ : SystemNoOperands<0b000, "hint #24">;
+ def PACIBZ : SystemNoOperands<0b010, "hint #26">;
+ let isAuthenticated = 1 in {
+ def AUTIAZ : SystemNoOperands<0b100, "hint #28">;
+ def AUTIBZ : SystemNoOperands<0b110, "hint #30">;
+ }
}
let Uses = [LR, SP], Defs = [LR] in {
- def PACIASP : SystemNoOperands<0b001, "paciasp">;
- def PACIBSP : SystemNoOperands<0b011, "pacibsp">;
- def AUTIASP : SystemNoOperands<0b101, "autiasp">;
- def AUTIBSP : SystemNoOperands<0b111, "autibsp">;
+ def PACIASP : SystemNoOperands<0b001, "hint #25">;
+ def PACIBSP : SystemNoOperands<0b011, "hint #27">;
+ let isAuthenticated = 1 in {
+ def AUTIASP : SystemNoOperands<0b101, "hint #29">;
+ def AUTIBSP : SystemNoOperands<0b111, "hint #31">;
+ }
}
let Uses = [X16, X17], Defs = [X17], CRm = 0b0001 in {
- def PACIA1716 : SystemNoOperands<0b000, "pacia1716">;
- def PACIB1716 : SystemNoOperands<0b010, "pacib1716">;
- def AUTIA1716 : SystemNoOperands<0b100, "autia1716">;
- def AUTIB1716 : SystemNoOperands<0b110, "autib1716">;
+ def PACIA1716 : SystemNoOperands<0b000, "hint #8">;
+ def PACIB1716 : SystemNoOperands<0b010, "hint #10">;
+ let isAuthenticated = 1 in {
+ def AUTIA1716 : SystemNoOperands<0b100, "hint #12">;
+ def AUTIB1716 : SystemNoOperands<0b110, "hint #14">;
+ }
}
let Uses = [LR], Defs = [LR], CRm = 0b0000 in {
- def XPACLRI : SystemNoOperands<0b111, "xpaclri">;
+ def XPACLRI : SystemNoOperands<0b111, "hint #7">;
}
-// These pointer authentication isntructions require armv8.3a
+// These pointer authentication instructions require armv8.3a
let Predicates = [HasPA] in {
+
+ // When compiling with PA, there is a better mnemonic for these instructions.
+ def : InstAlias<"paciaz", (PACIAZ), 1>;
+ def : InstAlias<"pacibz", (PACIBZ), 1>;
+ def : InstAlias<"autiaz", (AUTIAZ), 1>;
+ def : InstAlias<"autibz", (AUTIBZ), 1>;
+ def : InstAlias<"paciasp", (PACIASP), 1>;
+ def : InstAlias<"pacibsp", (PACIBSP), 1>;
+ def : InstAlias<"autiasp", (AUTIASP), 1>;
+ def : InstAlias<"autibsp", (AUTIBSP), 1>;
+ def : InstAlias<"pacia1716", (PACIA1716), 1>;
+ def : InstAlias<"pacib1716", (PACIB1716), 1>;
+ def : InstAlias<"autia1716", (AUTIA1716), 1>;
+ def : InstAlias<"autib1716", (AUTIB1716), 1>;
+ def : InstAlias<"xpaclri", (XPACLRI), 1>;
+
multiclass SignAuth<bits<3> prefix, bits<3> prefix_z, string asm> {
def IA : SignAuthOneData<prefix, 0b00, !strconcat(asm, "ia")>;
def IB : SignAuthOneData<prefix, 0b01, !strconcat(asm, "ib")>;
@@ -1478,6 +1640,8 @@ def : Pat<(ctlz (or (shl (xor (sra GPR32:$Rn, (i64 31)), GPR32:$Rn), (i64 1)),
def : Pat<(ctlz (or (shl (xor (sra GPR64:$Rn, (i64 63)), GPR64:$Rn), (i64 1)),
(i64 1))),
(CLSXr GPR64:$Rn)>;
+def : Pat<(int_aarch64_cls GPR32:$Rn), (CLSWr GPR32:$Rn)>;
+def : Pat<(int_aarch64_cls64 GPR64:$Rm), (EXTRACT_SUBREG (CLSXr GPR64:$Rm), sub_32)>;
// Unlike the other one operand instructions, the instructions with the "rev"
// mnemonic do *not* just different in the size bit, but actually use different
@@ -1859,6 +2023,9 @@ defm LDNPS : LoadPairNoAlloc<0b00, 1, FPR32Op, simm7s4, "ldnp">;
defm LDNPD : LoadPairNoAlloc<0b01, 1, FPR64Op, simm7s8, "ldnp">;
defm LDNPQ : LoadPairNoAlloc<0b10, 1, FPR128Op, simm7s16, "ldnp">;
+def : Pat<(AArch64ldp (am_indexed7s64 GPR64sp:$Rn, simm7s8:$offset)),
+ (LDPXi GPR64sp:$Rn, simm7s8:$offset)>;
+
//---
// (register offset)
//---
@@ -2552,6 +2719,9 @@ defm STNPS : StorePairNoAlloc<0b00, 1, FPR32Op, simm7s4, "stnp">;
defm STNPD : StorePairNoAlloc<0b01, 1, FPR64Op, simm7s8, "stnp">;
defm STNPQ : StorePairNoAlloc<0b10, 1, FPR128Op, simm7s16, "stnp">;
+def : Pat<(AArch64stp GPR64z:$Rt, GPR64z:$Rt2, (am_indexed7s64 GPR64sp:$Rn, simm7s8:$offset)),
+ (STPXi GPR64z:$Rt, GPR64z:$Rt2, GPR64sp:$Rn, simm7s8:$offset)>;
+
//---
// (Register offset)
@@ -3506,14 +3676,8 @@ def : Pat<(v4f32 (int_aarch64_neon_vcvthf2fp (extract_subvector (v8i16 V128:$Rn)
(i64 4)))),
(FCVTLv8i16 V128:$Rn)>;
def : Pat<(v2f64 (fpextend (v2f32 V64:$Rn))), (FCVTLv2i32 V64:$Rn)>;
-def : Pat<(v2f64 (fpextend (v2f32 (extract_subvector (v4f32 V128:$Rn),
- (i64 2))))),
- (FCVTLv4i32 V128:$Rn)>;
def : Pat<(v4f32 (fpextend (v4f16 V64:$Rn))), (FCVTLv4i16 V64:$Rn)>;
-def : Pat<(v4f32 (fpextend (v4f16 (extract_subvector (v8f16 V128:$Rn),
- (i64 4))))),
- (FCVTLv8i16 V128:$Rn)>;
defm FCVTMS : SIMDTwoVectorFPToInt<0,0,0b11011, "fcvtms",int_aarch64_neon_fcvtms>;
defm FCVTMU : SIMDTwoVectorFPToInt<1,0,0b11011, "fcvtmu",int_aarch64_neon_fcvtmu>;
@@ -3714,10 +3878,11 @@ defm FMUL : SIMDThreeSameVectorFP<1,0,0b011,"fmul", fmul>;
defm FRECPS : SIMDThreeSameVectorFP<0,0,0b111,"frecps", int_aarch64_neon_frecps>;
defm FRSQRTS : SIMDThreeSameVectorFP<0,1,0b111,"frsqrts", int_aarch64_neon_frsqrts>;
defm FSUB : SIMDThreeSameVectorFP<0,1,0b010,"fsub", fsub>;
-defm MLA : SIMDThreeSameVectorBHSTied<0, 0b10010, "mla",
- TriOpFrag<(add node:$LHS, (mul node:$MHS, node:$RHS))> >;
-defm MLS : SIMDThreeSameVectorBHSTied<1, 0b10010, "mls",
- TriOpFrag<(sub node:$LHS, (mul node:$MHS, node:$RHS))> >;
+
+// MLA and MLS are generated in MachineCombine
+defm MLA : SIMDThreeSameVectorBHSTied<0, 0b10010, "mla", null_frag>;
+defm MLS : SIMDThreeSameVectorBHSTied<1, 0b10010, "mls", null_frag>;
+
defm MUL : SIMDThreeSameVectorBHS<0, 0b10011, "mul", mul>;
defm PMUL : SIMDThreeSameVectorB<1, 0b10011, "pmul", int_aarch64_neon_pmul>;
defm SABA : SIMDThreeSameVectorBHSTied<0, 0b01111, "saba",
@@ -3760,6 +3925,12 @@ defm SQRDMLAH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10000,"sqrdmlah",
defm SQRDMLSH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10001,"sqrdmlsh",
int_aarch64_neon_sqsub>;
+// Extra saturate patterns, other than the intrinsics matches above
+defm : SIMDThreeSameVectorExtraPatterns<"SQADD", saddsat>;
+defm : SIMDThreeSameVectorExtraPatterns<"UQADD", uaddsat>;
+defm : SIMDThreeSameVectorExtraPatterns<"SQSUB", ssubsat>;
+defm : SIMDThreeSameVectorExtraPatterns<"UQSUB", usubsat>;
+
defm AND : SIMDLogicalThreeVector<0, 0b00, "and", and>;
defm BIC : SIMDLogicalThreeVector<0, 0b01, "bic",
BinOpFrag<(and node:$LHS, (vnot node:$RHS))> >;
@@ -4356,6 +4527,25 @@ defm : Neon_mul_widen_patterns<AArch64smull, SMULLv8i8_v8i16,
defm : Neon_mul_widen_patterns<AArch64umull, UMULLv8i8_v8i16,
UMULLv4i16_v4i32, UMULLv2i32_v2i64>;
+// Patterns for smull2/umull2.
+multiclass Neon_mul_high_patterns<SDPatternOperator opnode,
+ Instruction INST8B, Instruction INST4H, Instruction INST2S> {
+ def : Pat<(v8i16 (opnode (extract_high_v16i8 V128:$Rn),
+ (extract_high_v16i8 V128:$Rm))),
+ (INST8B V128:$Rn, V128:$Rm)>;
+ def : Pat<(v4i32 (opnode (extract_high_v8i16 V128:$Rn),
+ (extract_high_v8i16 V128:$Rm))),
+ (INST4H V128:$Rn, V128:$Rm)>;
+ def : Pat<(v2i64 (opnode (extract_high_v4i32 V128:$Rn),
+ (extract_high_v4i32 V128:$Rm))),
+ (INST2S V128:$Rn, V128:$Rm)>;
+}
+
+defm : Neon_mul_high_patterns<AArch64smull, SMULLv16i8_v8i16,
+ SMULLv8i16_v4i32, SMULLv4i32_v2i64>;
+defm : Neon_mul_high_patterns<AArch64umull, UMULLv16i8_v8i16,
+ UMULLv8i16_v4i32, UMULLv4i32_v2i64>;
+
// Additional patterns for SMLAL/SMLSL and UMLAL/UMLSL
multiclass Neon_mulacc_widen_patterns<SDPatternOperator opnode,
Instruction INST8B, Instruction INST4H, Instruction INST2S> {
@@ -5422,10 +5612,11 @@ def : Pat<(v2f64 (fmul V128:$Rn, (AArch64dup (f64 FPR64:$Rm)))),
defm SQDMULH : SIMDIndexedHS<0, 0b1100, "sqdmulh", int_aarch64_neon_sqdmulh>;
defm SQRDMULH : SIMDIndexedHS<0, 0b1101, "sqrdmulh", int_aarch64_neon_sqrdmulh>;
-defm MLA : SIMDVectorIndexedHSTied<1, 0b0000, "mla",
- TriOpFrag<(add node:$LHS, (mul node:$MHS, node:$RHS))>>;
-defm MLS : SIMDVectorIndexedHSTied<1, 0b0100, "mls",
- TriOpFrag<(sub node:$LHS, (mul node:$MHS, node:$RHS))>>;
+
+// Generated by MachineCombine
+defm MLA : SIMDVectorIndexedHSTied<1, 0b0000, "mla", null_frag>;
+defm MLS : SIMDVectorIndexedHSTied<1, 0b0100, "mls", null_frag>;
+
defm MUL : SIMDVectorIndexedHS<0, 0b1000, "mul", mul>;
defm SMLAL : SIMDVectorIndexedLongSDTied<0, 0b0010, "smlal",
TriOpFrag<(add node:$LHS, (int_aarch64_neon_smull node:$MHS, node:$RHS))>>;
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
index 961f38cad1e4..b9ac2657e1c5 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
@@ -32,6 +32,7 @@
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/IR/Type.h"
+#include "llvm/IR/IntrinsicsAArch64.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -205,6 +206,14 @@ private:
ComplexRendererFns
selectAddrModeShiftedExtendXReg(MachineOperand &Root,
unsigned SizeInBytes) const;
+
+ /// Returns a \p ComplexRendererFns which contains a base, offset, and whether
+ /// or not a shift + extend should be folded into an addressing mode. Returns
+ /// None when this is not profitable or possible.
+ ComplexRendererFns
+ selectExtendedSHL(MachineOperand &Root, MachineOperand &Base,
+ MachineOperand &Offset, unsigned SizeInBytes,
+ bool WantsExt) const;
ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const;
ComplexRendererFns selectAddrModeXRO(MachineOperand &Root,
unsigned SizeInBytes) const;
@@ -213,6 +222,13 @@ private:
return selectAddrModeXRO(Root, Width / 8);
}
+ ComplexRendererFns selectAddrModeWRO(MachineOperand &Root,
+ unsigned SizeInBytes) const;
+ template <int Width>
+ ComplexRendererFns selectAddrModeWRO(MachineOperand &Root) const {
+ return selectAddrModeWRO(Root, Width / 8);
+ }
+
ComplexRendererFns selectShiftedRegister(MachineOperand &Root) const;
ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const {
@@ -227,6 +243,15 @@ private:
return selectShiftedRegister(Root);
}
+ /// Given an extend instruction, determine the correct shift-extend type for
+ /// that instruction.
+ ///
+ /// If the instruction is going to be used in a load or store, pass
+ /// \p IsLoadStore = true.
+ AArch64_AM::ShiftExtendType
+ getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI,
+ bool IsLoadStore = false) const;
+
/// Instructions that accept extend modifiers like UXTW expect the register
/// being extended to be a GPR32. Narrow ExtReg to a 32-bit register using a
/// subregister copy if necessary. Return either ExtReg, or the result of the
@@ -235,9 +260,12 @@ private:
MachineIRBuilder &MIB) const;
ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const;
- void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI) const;
- void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I) const;
- void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I) const;
+ void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
+ int OpIdx = -1) const;
+ void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I,
+ int OpIdx = -1) const;
+ void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I,
+ int OpIdx = -1) const;
// Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
void materializeLargeCMVal(MachineInstr &I, const Value *V,
@@ -462,7 +490,7 @@ static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
}
} else if (OpSize == 64) {
switch (GenericOpc) {
- case TargetOpcode::G_GEP:
+ case TargetOpcode::G_PTR_ADD:
return AArch64::ADDXrr;
case TargetOpcode::G_SHL:
return AArch64::LSLVXr;
@@ -1006,6 +1034,66 @@ bool AArch64InstructionSelector::selectCompareBranch(
return true;
}
+/// Returns the element immediate value of a vector shift operand if found.
+/// This needs to detect a splat-like operation, e.g. a G_BUILD_VECTOR.
+static Optional<int64_t> getVectorShiftImm(Register Reg,
+ MachineRegisterInfo &MRI) {
+ assert(MRI.getType(Reg).isVector() && "Expected a *vector* shift operand");
+ MachineInstr *OpMI = MRI.getVRegDef(Reg);
+ assert(OpMI && "Expected to find a vreg def for vector shift operand");
+ if (OpMI->getOpcode() != TargetOpcode::G_BUILD_VECTOR)
+ return None;
+
+ // Check all operands are identical immediates.
+ int64_t ImmVal = 0;
+ for (unsigned Idx = 1; Idx < OpMI->getNumOperands(); ++Idx) {
+ auto VRegAndVal = getConstantVRegValWithLookThrough(OpMI->getOperand(Idx).getReg(), MRI);
+ if (!VRegAndVal)
+ return None;
+
+ if (Idx == 1)
+ ImmVal = VRegAndVal->Value;
+ if (ImmVal != VRegAndVal->Value)
+ return None;
+ }
+
+ return ImmVal;
+}
+
+/// Matches and returns the shift immediate value for a SHL instruction given
+/// a shift operand.
+static Optional<int64_t> getVectorSHLImm(LLT SrcTy, Register Reg, MachineRegisterInfo &MRI) {
+ Optional<int64_t> ShiftImm = getVectorShiftImm(Reg, MRI);
+ if (!ShiftImm)
+ return None;
+ // Check the immediate is in range for a SHL.
+ int64_t Imm = *ShiftImm;
+ if (Imm < 0)
+ return None;
+ switch (SrcTy.getElementType().getSizeInBits()) {
+ default:
+ LLVM_DEBUG(dbgs() << "Unhandled element type for vector shift");
+ return None;
+ case 8:
+ if (Imm > 7)
+ return None;
+ break;
+ case 16:
+ if (Imm > 15)
+ return None;
+ break;
+ case 32:
+ if (Imm > 31)
+ return None;
+ break;
+ case 64:
+ if (Imm > 63)
+ return None;
+ break;
+ }
+ return Imm;
+}
+
bool AArch64InstructionSelector::selectVectorSHL(
MachineInstr &I, MachineRegisterInfo &MRI) const {
assert(I.getOpcode() == TargetOpcode::G_SHL);
@@ -1017,21 +1105,29 @@ bool AArch64InstructionSelector::selectVectorSHL(
if (!Ty.isVector())
return false;
+ // Check if we have a vector of constants on RHS that we can select as the
+ // immediate form.
+ Optional<int64_t> ImmVal = getVectorSHLImm(Ty, Src2Reg, MRI);
+
unsigned Opc = 0;
if (Ty == LLT::vector(2, 64)) {
- Opc = AArch64::USHLv2i64;
+ Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64;
} else if (Ty == LLT::vector(4, 32)) {
- Opc = AArch64::USHLv4i32;
+ Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32;
} else if (Ty == LLT::vector(2, 32)) {
- Opc = AArch64::USHLv2i32;
+ Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32;
} else {
LLVM_DEBUG(dbgs() << "Unhandled G_SHL type");
return false;
}
MachineIRBuilder MIB(I);
- auto UShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Src2Reg});
- constrainSelectedInstRegOperands(*UShl, TII, TRI, RBI);
+ auto Shl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg});
+ if (ImmVal)
+ Shl.addImm(*ImmVal);
+ else
+ Shl.addUse(Src2Reg);
+ constrainSelectedInstRegOperands(*Shl, TII, TRI, RBI);
I.eraseFromParent();
return true;
}
@@ -1765,7 +1861,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
auto *PtrMI = MRI.getVRegDef(PtrReg);
// Try to fold a GEP into our unsigned immediate addressing mode.
- if (PtrMI->getOpcode() == TargetOpcode::G_GEP) {
+ if (PtrMI->getOpcode() == TargetOpcode::G_PTR_ADD) {
if (auto COff = getConstantVRegVal(PtrMI->getOperand(2).getReg(), MRI)) {
int64_t Imm = *COff;
const unsigned Size = MemSizeInBits / 8;
@@ -1883,7 +1979,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
- case TargetOpcode::G_GEP: {
+ case TargetOpcode::G_PTR_ADD: {
MachineIRBuilder MIRBuilder(I);
emitADD(I.getOperand(0).getReg(), I.getOperand(1), I.getOperand(2),
MIRBuilder);
@@ -2065,14 +2161,15 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
unsigned DstSize = DstTy.getSizeInBits();
unsigned SrcSize = SrcTy.getSizeInBits();
+ if (DstTy.isVector())
+ return false; // Should be handled by imported patterns.
+
assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() ==
AArch64::GPRRegBankID &&
"Unexpected ext regbank");
MachineIRBuilder MIB(I);
MachineInstr *ExtI;
- if (DstTy.isVector())
- return false; // Should be handled by imported patterns.
// First check if we're extending the result of a load which has a dest type
// smaller than 32 bits, then this zext is redundant. GPR32 is the smallest
@@ -3602,22 +3699,51 @@ bool AArch64InstructionSelector::tryOptVectorDup(MachineInstr &I) const {
return false;
// The shuffle's second operand doesn't matter if the mask is all zero.
- const Constant *Mask = I.getOperand(3).getShuffleMask();
- if (!isa<ConstantAggregateZero>(Mask))
+ ArrayRef<int> Mask = I.getOperand(3).getShuffleMask();
+ if (!all_of(Mask, [](int Elem) { return Elem == 0; }))
return false;
// We're done, now find out what kind of splat we need.
LLT VecTy = MRI.getType(I.getOperand(0).getReg());
LLT EltTy = VecTy.getElementType();
- if (VecTy.getSizeInBits() != 128 || EltTy.getSizeInBits() < 32) {
- LLVM_DEBUG(dbgs() << "Could not optimize splat pattern < 128b yet");
+ if (EltTy.getSizeInBits() < 32) {
+ LLVM_DEBUG(dbgs() << "Could not optimize splat pattern < 32b elts yet");
return false;
}
bool IsFP = ScalarRB->getID() == AArch64::FPRRegBankID;
- static const unsigned OpcTable[2][2] = {
- {AArch64::DUPv4i32gpr, AArch64::DUPv2i64gpr},
- {AArch64::DUPv4i32lane, AArch64::DUPv2i64lane}};
- unsigned Opc = OpcTable[IsFP][EltTy.getSizeInBits() == 64];
+ unsigned Opc = 0;
+ if (IsFP) {
+ switch (EltTy.getSizeInBits()) {
+ case 32:
+ if (VecTy.getNumElements() == 2) {
+ Opc = AArch64::DUPv2i32lane;
+ } else {
+ Opc = AArch64::DUPv4i32lane;
+ assert(VecTy.getNumElements() == 4);
+ }
+ break;
+ case 64:
+ assert(VecTy.getNumElements() == 2 && "Unexpected num elts");
+ Opc = AArch64::DUPv2i64lane;
+ break;
+ }
+ } else {
+ switch (EltTy.getSizeInBits()) {
+ case 32:
+ if (VecTy.getNumElements() == 2) {
+ Opc = AArch64::DUPv2i32gpr;
+ } else {
+ Opc = AArch64::DUPv4i32gpr;
+ assert(VecTy.getNumElements() == 4);
+ }
+ break;
+ case 64:
+ assert(VecTy.getNumElements() == 2 && "Unexpected num elts");
+ Opc = AArch64::DUPv2i64gpr;
+ break;
+ }
+ }
+ assert(Opc && "Did not compute an opcode for a dup");
// For FP splats, we need to widen the scalar reg via undef too.
if (IsFP) {
@@ -3652,15 +3778,12 @@ bool AArch64InstructionSelector::selectShuffleVector(
const LLT Src1Ty = MRI.getType(Src1Reg);
Register Src2Reg = I.getOperand(2).getReg();
const LLT Src2Ty = MRI.getType(Src2Reg);
- const Constant *ShuffleMask = I.getOperand(3).getShuffleMask();
+ ArrayRef<int> Mask = I.getOperand(3).getShuffleMask();
MachineBasicBlock &MBB = *I.getParent();
MachineFunction &MF = *MBB.getParent();
LLVMContext &Ctx = MF.getFunction().getContext();
- SmallVector<int, 8> Mask;
- ShuffleVectorInst::getShuffleMask(ShuffleMask, Mask);
-
// G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if
// it's originated from a <1 x T> type. Those should have been lowered into
// G_BUILD_VECTOR earlier.
@@ -4164,45 +4287,15 @@ bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg(
[](MachineInstr &Use) { return Use.mayLoadOrStore(); });
}
-/// This is used for computing addresses like this:
-///
-/// ldr x1, [x2, x3, lsl #3]
-///
-/// Where x2 is the base register, and x3 is an offset register. The shift-left
-/// is a constant value specific to this load instruction. That is, we'll never
-/// see anything other than a 3 here (which corresponds to the size of the
-/// element being loaded.)
InstructionSelector::ComplexRendererFns
-AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
- MachineOperand &Root, unsigned SizeInBytes) const {
- if (!Root.isReg())
- return None;
- MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
-
- // Make sure that the memory op is a valid size.
- int64_t LegalShiftVal = Log2_32(SizeInBytes);
- if (LegalShiftVal == 0)
- return None;
-
- // We want to find something like this:
- //
- // val = G_CONSTANT LegalShiftVal
- // shift = G_SHL off_reg val
- // ptr = G_GEP base_reg shift
- // x = G_LOAD ptr
- //
- // And fold it into this addressing mode:
- //
- // ldr x, [base_reg, off_reg, lsl #LegalShiftVal]
+AArch64InstructionSelector::selectExtendedSHL(
+ MachineOperand &Root, MachineOperand &Base, MachineOperand &Offset,
+ unsigned SizeInBytes, bool WantsExt) const {
+ assert(Base.isReg() && "Expected base to be a register operand");
+ assert(Offset.isReg() && "Expected offset to be a register operand");
- // Check if we can find the G_GEP.
- MachineInstr *Gep = getOpcodeDef(TargetOpcode::G_GEP, Root.getReg(), MRI);
- if (!Gep || !isWorthFoldingIntoExtendedReg(*Gep, MRI))
- return None;
-
- // Now, try to match an opcode which will match our specific offset.
- // We want a G_SHL or a G_MUL.
- MachineInstr *OffsetInst = getDefIgnoringCopies(Gep->getOperand(2).getReg(), MRI);
+ MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
+ MachineInstr *OffsetInst = MRI.getVRegDef(Offset.getReg());
if (!OffsetInst)
return None;
@@ -4210,6 +4303,10 @@ AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL)
return None;
+ // Make sure that the memory op is a valid size.
+ int64_t LegalShiftVal = Log2_32(SizeInBytes);
+ if (LegalShiftVal == 0)
+ return None;
if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI))
return None;
@@ -4254,27 +4351,82 @@ AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
if (ImmVal != LegalShiftVal)
return None;
+ unsigned SignExtend = 0;
+ if (WantsExt) {
+ // Check if the offset is defined by an extend.
+ MachineInstr *ExtInst = getDefIgnoringCopies(OffsetReg, MRI);
+ auto Ext = getExtendTypeForInst(*ExtInst, MRI, true);
+ if (Ext == AArch64_AM::InvalidShiftExtend)
+ return None;
+
+ SignExtend = Ext == AArch64_AM::SXTW;
+
+ // Need a 32-bit wide register here.
+ MachineIRBuilder MIB(*MRI.getVRegDef(Root.getReg()));
+ OffsetReg = ExtInst->getOperand(1).getReg();
+ OffsetReg = narrowExtendRegIfNeeded(OffsetReg, MIB);
+ }
+
// We can use the LHS of the GEP as the base, and the LHS of the shift as an
// offset. Signify that we are shifting by setting the shift flag to 1.
- return {{[=](MachineInstrBuilder &MIB) {
- MIB.addUse(Gep->getOperand(1).getReg());
- },
+ return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(Base.getReg()); },
[=](MachineInstrBuilder &MIB) { MIB.addUse(OffsetReg); },
[=](MachineInstrBuilder &MIB) {
// Need to add both immediates here to make sure that they are both
// added to the instruction.
- MIB.addImm(0);
+ MIB.addImm(SignExtend);
MIB.addImm(1);
}}};
}
/// This is used for computing addresses like this:
///
+/// ldr x1, [x2, x3, lsl #3]
+///
+/// Where x2 is the base register, and x3 is an offset register. The shift-left
+/// is a constant value specific to this load instruction. That is, we'll never
+/// see anything other than a 3 here (which corresponds to the size of the
+/// element being loaded.)
+InstructionSelector::ComplexRendererFns
+AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
+ MachineOperand &Root, unsigned SizeInBytes) const {
+ if (!Root.isReg())
+ return None;
+ MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
+
+ // We want to find something like this:
+ //
+ // val = G_CONSTANT LegalShiftVal
+ // shift = G_SHL off_reg val
+ // ptr = G_PTR_ADD base_reg shift
+ // x = G_LOAD ptr
+ //
+ // And fold it into this addressing mode:
+ //
+ // ldr x, [base_reg, off_reg, lsl #LegalShiftVal]
+
+ // Check if we can find the G_PTR_ADD.
+ MachineInstr *PtrAdd =
+ getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
+ if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI))
+ return None;
+
+ // Now, try to match an opcode which will match our specific offset.
+ // We want a G_SHL or a G_MUL.
+ MachineInstr *OffsetInst =
+ getDefIgnoringCopies(PtrAdd->getOperand(2).getReg(), MRI);
+ return selectExtendedSHL(Root, PtrAdd->getOperand(1),
+ OffsetInst->getOperand(0), SizeInBytes,
+ /*WantsExt=*/false);
+}
+
+/// This is used for computing addresses like this:
+///
/// ldr x1, [x2, x3]
///
/// Where x2 is the base register, and x3 is an offset register.
///
-/// When possible (or profitable) to fold a G_GEP into the address calculation,
+/// When possible (or profitable) to fold a G_PTR_ADD into the address calculation,
/// this will do so. Otherwise, it will return None.
InstructionSelector::ComplexRendererFns
AArch64InstructionSelector::selectAddrModeRegisterOffset(
@@ -4283,7 +4435,7 @@ AArch64InstructionSelector::selectAddrModeRegisterOffset(
// We need a GEP.
MachineInstr *Gep = MRI.getVRegDef(Root.getReg());
- if (!Gep || Gep->getOpcode() != TargetOpcode::G_GEP)
+ if (!Gep || Gep->getOpcode() != TargetOpcode::G_PTR_ADD)
return None;
// If this is used more than once, let's not bother folding.
@@ -4329,6 +4481,74 @@ AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root,
return selectAddrModeRegisterOffset(Root);
}
+/// This is used for computing addresses like this:
+///
+/// ldr x0, [xBase, wOffset, sxtw #LegalShiftVal]
+///
+/// Where we have a 64-bit base register, a 32-bit offset register, and an
+/// extend (which may or may not be signed).
+InstructionSelector::ComplexRendererFns
+AArch64InstructionSelector::selectAddrModeWRO(MachineOperand &Root,
+ unsigned SizeInBytes) const {
+ MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
+
+ MachineInstr *PtrAdd =
+ getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
+ if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI))
+ return None;
+
+ MachineOperand &LHS = PtrAdd->getOperand(1);
+ MachineOperand &RHS = PtrAdd->getOperand(2);
+ MachineInstr *OffsetInst = getDefIgnoringCopies(RHS.getReg(), MRI);
+
+ // The first case is the same as selectAddrModeXRO, except we need an extend.
+ // In this case, we try to find a shift and extend, and fold them into the
+ // addressing mode.
+ //
+ // E.g.
+ //
+ // off_reg = G_Z/S/ANYEXT ext_reg
+ // val = G_CONSTANT LegalShiftVal
+ // shift = G_SHL off_reg val
+ // ptr = G_PTR_ADD base_reg shift
+ // x = G_LOAD ptr
+ //
+ // In this case we can get a load like this:
+ //
+ // ldr x0, [base_reg, ext_reg, sxtw #LegalShiftVal]
+ auto ExtendedShl = selectExtendedSHL(Root, LHS, OffsetInst->getOperand(0),
+ SizeInBytes, /*WantsExt=*/true);
+ if (ExtendedShl)
+ return ExtendedShl;
+
+ // There was no shift. We can try and fold a G_Z/S/ANYEXT in alone though.
+ //
+ // e.g.
+ // ldr something, [base_reg, ext_reg, sxtw]
+ if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI))
+ return None;
+
+ // Check if this is an extend. We'll get an extend type if it is.
+ AArch64_AM::ShiftExtendType Ext =
+ getExtendTypeForInst(*OffsetInst, MRI, /*IsLoadStore=*/true);
+ if (Ext == AArch64_AM::InvalidShiftExtend)
+ return None;
+
+ // Need a 32-bit wide register.
+ MachineIRBuilder MIB(*PtrAdd);
+ Register ExtReg =
+ narrowExtendRegIfNeeded(OffsetInst->getOperand(1).getReg(), MIB);
+ unsigned SignExtend = Ext == AArch64_AM::SXTW;
+
+ // Base is LHS, offset is ExtReg.
+ return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(LHS.getReg()); },
+ [=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },
+ [=](MachineInstrBuilder &MIB) {
+ MIB.addImm(SignExtend);
+ MIB.addImm(0);
+ }}};
+}
+
/// Select a "register plus unscaled signed 9-bit immediate" address. This
/// should only match when there is an offset that is not valid for a scaled
/// immediate addressing mode. The "Size" argument is the size in bytes of the
@@ -4491,9 +4711,8 @@ AArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root) const {
[=](MachineInstrBuilder &MIB) { MIB.addImm(ShiftVal); }}};
}
-/// Get the correct ShiftExtendType for an extend instruction.
-static AArch64_AM::ShiftExtendType
-getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI) {
+AArch64_AM::ShiftExtendType AArch64InstructionSelector::getExtendTypeForInst(
+ MachineInstr &MI, MachineRegisterInfo &MRI, bool IsLoadStore) const {
unsigned Opc = MI.getOpcode();
// Handle explicit extend instructions first.
@@ -4540,9 +4759,9 @@ getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI) {
default:
return AArch64_AM::InvalidShiftExtend;
case 0xFF:
- return AArch64_AM::UXTB;
+ return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
case 0xFFFF:
- return AArch64_AM::UXTH;
+ return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
case 0xFFFFFFFF:
return AArch64_AM::UXTW;
}
@@ -4632,25 +4851,29 @@ AArch64InstructionSelector::selectArithExtendedRegister(
}
void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB,
- const MachineInstr &MI) const {
+ const MachineInstr &MI,
+ int OpIdx) const {
const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
- assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && "Expected G_CONSTANT");
+ assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
+ "Expected G_CONSTANT");
Optional<int64_t> CstVal = getConstantVRegVal(MI.getOperand(0).getReg(), MRI);
assert(CstVal && "Expected constant value");
MIB.addImm(CstVal.getValue());
}
void AArch64InstructionSelector::renderLogicalImm32(
- MachineInstrBuilder &MIB, const MachineInstr &I) const {
- assert(I.getOpcode() == TargetOpcode::G_CONSTANT && "Expected G_CONSTANT");
+ MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
+ assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
+ "Expected G_CONSTANT");
uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 32);
MIB.addImm(Enc);
}
void AArch64InstructionSelector::renderLogicalImm64(
- MachineInstrBuilder &MIB, const MachineInstr &I) const {
- assert(I.getOpcode() == TargetOpcode::G_CONSTANT && "Expected G_CONSTANT");
+ MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
+ assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
+ "Expected G_CONSTANT");
uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 64);
MIB.addImm(Enc);
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp
index 7a1901bd5b1e..95719a35c6da 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp
@@ -59,7 +59,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
}
getActionDefinitionsBuilder(G_IMPLICIT_DEF)
- .legalFor({p0, s1, s8, s16, s32, s64, v4s32, v2s64})
+ .legalFor({p0, s1, s8, s16, s32, s64, v2s32, v4s32, v2s64})
.clampScalar(0, s1, s64)
.widenScalarToNextPow2(0, 8)
.fewerElementsIf(
@@ -104,7 +104,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
.moreElementsToNextPow2(0)
.minScalarSameAs(1, 0);
- getActionDefinitionsBuilder(G_GEP)
+ getActionDefinitionsBuilder(G_PTR_ADD)
.legalFor({{p0, s64}})
.clampScalar(1, s64, s64);
@@ -143,7 +143,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
getActionDefinitionsBuilder({G_SMULH, G_UMULH}).legalFor({s32, s64});
getActionDefinitionsBuilder({G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_UADDO})
- .legalFor({{s32, s1}, {s64, s1}});
+ .legalFor({{s32, s1}, {s64, s1}})
+ .minScalar(0, s32);
getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FNEG})
.legalFor({s32, s64, v2s64, v4s32, v2s32});
@@ -743,7 +744,7 @@ bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI,
// Realign the list to the actual required alignment.
auto AlignMinus1 = MIRBuilder.buildConstant(IntPtrTy, Align - 1);
- auto ListTmp = MIRBuilder.buildGEP(PtrTy, List, AlignMinus1.getReg(0));
+ auto ListTmp = MIRBuilder.buildPtrAdd(PtrTy, List, AlignMinus1.getReg(0));
DstPtr = MRI.createGenericVirtualRegister(PtrTy);
MIRBuilder.buildPtrMask(DstPtr, ListTmp, Log2_64(Align));
@@ -758,7 +759,7 @@ bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI,
auto Size = MIRBuilder.buildConstant(IntPtrTy, alignTo(ValSize, PtrSize));
- auto NewList = MIRBuilder.buildGEP(PtrTy, DstPtr, Size.getReg(0));
+ auto NewList = MIRBuilder.buildPtrAdd(PtrTy, DstPtr, Size.getReg(0));
MIRBuilder.buildStore(
NewList, ListPtr,
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
index a0c4a25bb5b9..3156bb446963 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@@ -26,16 +26,19 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/DebugCounter.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
#include <cstdint>
+#include <functional>
#include <iterator>
#include <limits>
@@ -51,6 +54,9 @@ STATISTIC(NumUnscaledPairCreated,
STATISTIC(NumZeroStoresPromoted, "Number of narrow zero stores promoted");
STATISTIC(NumLoadsFromStoresPromoted, "Number of loads from stores promoted");
+DEBUG_COUNTER(RegRenamingCounter, DEBUG_TYPE "-reg-renaming",
+ "Controls which pairs are considered for renaming");
+
// The LdStLimit limits how far we search for load/store pairs.
static cl::opt<unsigned> LdStLimit("aarch64-load-store-scan-limit",
cl::init(20), cl::Hidden);
@@ -76,6 +82,11 @@ using LdStPairFlags = struct LdStPairFlags {
// to be extended, 0 means I, and 1 means the returned iterator.
int SExtIdx = -1;
+ // If not none, RenameReg can be used to rename the result register of the
+ // first store in a pair. Currently this only works when merging stores
+ // forward.
+ Optional<MCPhysReg> RenameReg = None;
+
LdStPairFlags() = default;
void setMergeForward(bool V = true) { MergeForward = V; }
@@ -83,6 +94,10 @@ using LdStPairFlags = struct LdStPairFlags {
void setSExtIdx(int V) { SExtIdx = V; }
int getSExtIdx() const { return SExtIdx; }
+
+ void setRenameReg(MCPhysReg R) { RenameReg = R; }
+ void clearRenameReg() { RenameReg = None; }
+ Optional<MCPhysReg> getRenameReg() const { return RenameReg; }
};
struct AArch64LoadStoreOpt : public MachineFunctionPass {
@@ -99,6 +114,7 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass {
// Track which register units have been modified and used.
LiveRegUnits ModifiedRegUnits, UsedRegUnits;
+ LiveRegUnits DefinedInBB;
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AAResultsWrapperPass>();
@@ -215,69 +231,6 @@ static bool isTagStore(const MachineInstr &MI) {
}
}
-// Scaling factor for unscaled load or store.
-static int getMemScale(const MachineInstr &MI) {
- switch (MI.getOpcode()) {
- default:
- llvm_unreachable("Opcode has unknown scale!");
- case AArch64::LDRBBui:
- case AArch64::LDURBBi:
- case AArch64::LDRSBWui:
- case AArch64::LDURSBWi:
- case AArch64::STRBBui:
- case AArch64::STURBBi:
- return 1;
- case AArch64::LDRHHui:
- case AArch64::LDURHHi:
- case AArch64::LDRSHWui:
- case AArch64::LDURSHWi:
- case AArch64::STRHHui:
- case AArch64::STURHHi:
- return 2;
- case AArch64::LDRSui:
- case AArch64::LDURSi:
- case AArch64::LDRSWui:
- case AArch64::LDURSWi:
- case AArch64::LDRWui:
- case AArch64::LDURWi:
- case AArch64::STRSui:
- case AArch64::STURSi:
- case AArch64::STRWui:
- case AArch64::STURWi:
- case AArch64::LDPSi:
- case AArch64::LDPSWi:
- case AArch64::LDPWi:
- case AArch64::STPSi:
- case AArch64::STPWi:
- return 4;
- case AArch64::LDRDui:
- case AArch64::LDURDi:
- case AArch64::LDRXui:
- case AArch64::LDURXi:
- case AArch64::STRDui:
- case AArch64::STURDi:
- case AArch64::STRXui:
- case AArch64::STURXi:
- case AArch64::LDPDi:
- case AArch64::LDPXi:
- case AArch64::STPDi:
- case AArch64::STPXi:
- return 8;
- case AArch64::LDRQui:
- case AArch64::LDURQi:
- case AArch64::STRQui:
- case AArch64::STURQi:
- case AArch64::LDPQi:
- case AArch64::STPQi:
- case AArch64::STGOffset:
- case AArch64::STZGOffset:
- case AArch64::ST2GOffset:
- case AArch64::STZ2GOffset:
- case AArch64::STGPi:
- return 16;
- }
-}
-
static unsigned getMatchingNonSExtOpcode(unsigned Opc,
bool *IsValidLdStrOpc = nullptr) {
if (IsValidLdStrOpc)
@@ -588,7 +541,7 @@ static void getPrePostIndexedMemOpInfo(const MachineInstr &MI, int &Scale,
// ST*G and all paired ldst have the same scale in pre/post-indexed variants
// as in the "unsigned offset" variant.
// All other pre/post indexed ldst instructions are unscaled.
- Scale = (IsTagStore || IsPaired) ? getMemScale(MI) : 1;
+ Scale = (IsTagStore || IsPaired) ? AArch64InstrInfo::getMemScale(MI) : 1;
if (IsPaired) {
MinOffset = -64;
@@ -599,8 +552,8 @@ static void getPrePostIndexedMemOpInfo(const MachineInstr &MI, int &Scale,
}
}
-static const MachineOperand &getLdStRegOp(const MachineInstr &MI,
- unsigned PairedRegOp = 0) {
+static MachineOperand &getLdStRegOp(MachineInstr &MI,
+ unsigned PairedRegOp = 0) {
assert(PairedRegOp < 2 && "Unexpected register operand idx.");
unsigned Idx = isPairedLdSt(MI) ? PairedRegOp : 0;
return MI.getOperand(Idx);
@@ -620,8 +573,8 @@ static bool isLdOffsetInRangeOfSt(MachineInstr &LoadInst,
MachineInstr &StoreInst,
const AArch64InstrInfo *TII) {
assert(isMatchingStore(LoadInst, StoreInst) && "Expect only matched ld/st.");
- int LoadSize = getMemScale(LoadInst);
- int StoreSize = getMemScale(StoreInst);
+ int LoadSize = TII->getMemScale(LoadInst);
+ int StoreSize = TII->getMemScale(StoreInst);
int UnscaledStOffset = TII->isUnscaledLdSt(StoreInst)
? getLdStOffsetOp(StoreInst).getImm()
: getLdStOffsetOp(StoreInst).getImm() * StoreSize;
@@ -731,7 +684,7 @@ AArch64LoadStoreOpt::mergeNarrowZeroStores(MachineBasicBlock::iterator I,
unsigned Opc = I->getOpcode();
bool IsScaled = !TII->isUnscaledLdSt(Opc);
- int OffsetStride = IsScaled ? 1 : getMemScale(*I);
+ int OffsetStride = IsScaled ? 1 : TII->getMemScale(*I);
bool MergeForward = Flags.getMergeForward();
// Insert our new paired instruction after whichever of the paired
@@ -783,6 +736,44 @@ AArch64LoadStoreOpt::mergeNarrowZeroStores(MachineBasicBlock::iterator I,
return NextI;
}
+// Apply Fn to all instructions between MI and the beginning of the block, until
+// a def for DefReg is reached. Returns true, iff Fn returns true for all
+// visited instructions. Stop after visiting Limit iterations.
+static bool forAllMIsUntilDef(MachineInstr &MI, MCPhysReg DefReg,
+ const TargetRegisterInfo *TRI, unsigned Limit,
+ std::function<bool(MachineInstr &, bool)> &Fn) {
+ auto MBB = MI.getParent();
+ for (MachineBasicBlock::reverse_iterator I = MI.getReverseIterator(),
+ E = MBB->rend();
+ I != E; I++) {
+ if (!Limit)
+ return false;
+ --Limit;
+
+ bool isDef = any_of(I->operands(), [DefReg, TRI](MachineOperand &MOP) {
+ return MOP.isReg() && MOP.isDef() && !MOP.isDebug() && MOP.getReg() &&
+ TRI->regsOverlap(MOP.getReg(), DefReg);
+ });
+ if (!Fn(*I, isDef))
+ return false;
+ if (isDef)
+ break;
+ }
+ return true;
+}
+
+static void updateDefinedRegisters(MachineInstr &MI, LiveRegUnits &Units,
+ const TargetRegisterInfo *TRI) {
+
+ for (const MachineOperand &MOP : phys_regs_and_masks(MI))
+ if (MOP.isReg() && MOP.isKill())
+ Units.removeReg(MOP.getReg());
+
+ for (const MachineOperand &MOP : phys_regs_and_masks(MI))
+ if (MOP.isReg() && !MOP.isKill())
+ Units.addReg(MOP.getReg());
+}
+
MachineBasicBlock::iterator
AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
MachineBasicBlock::iterator Paired,
@@ -800,9 +791,76 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
unsigned Opc =
SExtIdx == -1 ? I->getOpcode() : getMatchingNonSExtOpcode(I->getOpcode());
bool IsUnscaled = TII->isUnscaledLdSt(Opc);
- int OffsetStride = IsUnscaled ? getMemScale(*I) : 1;
+ int OffsetStride = IsUnscaled ? TII->getMemScale(*I) : 1;
bool MergeForward = Flags.getMergeForward();
+
+ Optional<MCPhysReg> RenameReg = Flags.getRenameReg();
+ if (MergeForward && RenameReg) {
+ MCRegister RegToRename = getLdStRegOp(*I).getReg();
+ DefinedInBB.addReg(*RenameReg);
+
+ // Return the sub/super register for RenameReg, matching the size of
+ // OriginalReg.
+ auto GetMatchingSubReg = [this,
+ RenameReg](MCPhysReg OriginalReg) -> MCPhysReg {
+ for (MCPhysReg SubOrSuper : TRI->sub_and_superregs_inclusive(*RenameReg))
+ if (TRI->getMinimalPhysRegClass(OriginalReg) ==
+ TRI->getMinimalPhysRegClass(SubOrSuper))
+ return SubOrSuper;
+ llvm_unreachable("Should have found matching sub or super register!");
+ };
+
+ std::function<bool(MachineInstr &, bool)> UpdateMIs =
+ [this, RegToRename, GetMatchingSubReg](MachineInstr &MI, bool IsDef) {
+ if (IsDef) {
+ bool SeenDef = false;
+ for (auto &MOP : MI.operands()) {
+ // Rename the first explicit definition and all implicit
+ // definitions matching RegToRename.
+ if (MOP.isReg() && !MOP.isDebug() && MOP.getReg() &&
+ (!SeenDef || (MOP.isDef() && MOP.isImplicit())) &&
+ TRI->regsOverlap(MOP.getReg(), RegToRename)) {
+ assert((MOP.isImplicit() ||
+ (MOP.isRenamable() && !MOP.isEarlyClobber())) &&
+ "Need renamable operands");
+ MOP.setReg(GetMatchingSubReg(MOP.getReg()));
+ SeenDef = true;
+ }
+ }
+ } else {
+ for (auto &MOP : MI.operands()) {
+ if (MOP.isReg() && !MOP.isDebug() && MOP.getReg() &&
+ TRI->regsOverlap(MOP.getReg(), RegToRename)) {
+ assert((MOP.isImplicit() ||
+ (MOP.isRenamable() && !MOP.isEarlyClobber())) &&
+ "Need renamable operands");
+ MOP.setReg(GetMatchingSubReg(MOP.getReg()));
+ }
+ }
+ }
+ LLVM_DEBUG(dbgs() << "Renamed " << MI << "\n");
+ return true;
+ };
+ forAllMIsUntilDef(*I, RegToRename, TRI, LdStLimit, UpdateMIs);
+
+#if !defined(NDEBUG)
+ // Make sure the register used for renaming is not used between the paired
+ // instructions. That would trash the content before the new paired
+ // instruction.
+ for (auto &MI :
+ iterator_range<MachineInstrBundleIterator<llvm::MachineInstr>>(
+ std::next(I), std::next(Paired)))
+ assert(all_of(MI.operands(),
+ [this, &RenameReg](const MachineOperand &MOP) {
+ return !MOP.isReg() || MOP.isDebug() || !MOP.getReg() ||
+ !TRI->regsOverlap(MOP.getReg(), *RenameReg);
+ }) &&
+ "Rename register used between paired instruction, trashing the "
+ "content");
+#endif
+ }
+
// Insert our new paired instruction after whichever of the paired
// instructions MergeForward indicates.
MachineBasicBlock::iterator InsertionPoint = MergeForward ? Paired : I;
@@ -818,11 +876,11 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
// We're trying to pair instructions that differ in how they are scaled. If
// I is scaled then scale the offset of Paired accordingly. Otherwise, do
// the opposite (i.e., make Paired's offset unscaled).
- int MemSize = getMemScale(*Paired);
+ int MemSize = TII->getMemScale(*Paired);
if (PairedIsUnscaled) {
// If the unscaled offset isn't a multiple of the MemSize, we can't
// pair the operations together.
- assert(!(PairedOffset % getMemScale(*Paired)) &&
+ assert(!(PairedOffset % TII->getMemScale(*Paired)) &&
"Offset should be a multiple of the stride!");
PairedOffset /= MemSize;
} else {
@@ -847,9 +905,9 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
int OffsetImm = getLdStOffsetOp(*RtMI).getImm();
// Scale the immediate offset, if necessary.
if (TII->isUnscaledLdSt(RtMI->getOpcode())) {
- assert(!(OffsetImm % getMemScale(*RtMI)) &&
+ assert(!(OffsetImm % TII->getMemScale(*RtMI)) &&
"Unscaled offset cannot be scaled.");
- OffsetImm /= getMemScale(*RtMI);
+ OffsetImm /= TII->getMemScale(*RtMI);
}
// Construct the new instruction.
@@ -931,6 +989,11 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
}
LLVM_DEBUG(dbgs() << "\n");
+ if (MergeForward)
+ for (const MachineOperand &MOP : phys_regs_and_masks(*I))
+ if (MOP.isReg() && MOP.isKill())
+ DefinedInBB.addReg(MOP.getReg());
+
// Erase the old instructions.
I->eraseFromParent();
Paired->eraseFromParent();
@@ -944,8 +1007,8 @@ AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI,
MachineBasicBlock::iterator NextI = LoadI;
++NextI;
- int LoadSize = getMemScale(*LoadI);
- int StoreSize = getMemScale(*StoreI);
+ int LoadSize = TII->getMemScale(*LoadI);
+ int StoreSize = TII->getMemScale(*StoreI);
Register LdRt = getLdStRegOp(*LoadI).getReg();
const MachineOperand &StMO = getLdStRegOp(*StoreI);
Register StRt = getLdStRegOp(*StoreI).getReg();
@@ -1207,6 +1270,148 @@ static bool areCandidatesToMergeOrPair(MachineInstr &FirstMI, MachineInstr &MI,
// FIXME: Can we also match a mixed sext/zext unscaled/scaled pair?
}
+static bool
+canRenameUpToDef(MachineInstr &FirstMI, LiveRegUnits &UsedInBetween,
+ SmallPtrSetImpl<const TargetRegisterClass *> &RequiredClasses,
+ const TargetRegisterInfo *TRI) {
+ if (!FirstMI.mayStore())
+ return false;
+
+ // Check if we can find an unused register which we can use to rename
+ // the register used by the first load/store.
+ auto *RegClass = TRI->getMinimalPhysRegClass(getLdStRegOp(FirstMI).getReg());
+ MachineFunction &MF = *FirstMI.getParent()->getParent();
+ if (!RegClass || !MF.getRegInfo().tracksLiveness())
+ return false;
+
+ auto RegToRename = getLdStRegOp(FirstMI).getReg();
+ // For now, we only rename if the store operand gets killed at the store.
+ if (!getLdStRegOp(FirstMI).isKill() &&
+ !any_of(FirstMI.operands(),
+ [TRI, RegToRename](const MachineOperand &MOP) {
+ return MOP.isReg() && !MOP.isDebug() && MOP.getReg() &&
+ MOP.isImplicit() && MOP.isKill() &&
+ TRI->regsOverlap(RegToRename, MOP.getReg());
+ })) {
+ LLVM_DEBUG(dbgs() << " Operand not killed at " << FirstMI << "\n");
+ return false;
+ }
+ auto canRenameMOP = [](const MachineOperand &MOP) {
+ return MOP.isImplicit() ||
+ (MOP.isRenamable() && !MOP.isEarlyClobber() && !MOP.isTied());
+ };
+
+ bool FoundDef = false;
+
+ // For each instruction between FirstMI and the previous def for RegToRename,
+ // we
+ // * check if we can rename RegToRename in this instruction
+ // * collect the registers used and required register classes for RegToRename.
+ std::function<bool(MachineInstr &, bool)> CheckMIs = [&](MachineInstr &MI,
+ bool IsDef) {
+ LLVM_DEBUG(dbgs() << "Checking " << MI << "\n");
+ // Currently we do not try to rename across frame-setup instructions.
+ if (MI.getFlag(MachineInstr::FrameSetup)) {
+ LLVM_DEBUG(dbgs() << " Cannot rename framesetup instructions currently ("
+ << MI << ")\n");
+ return false;
+ }
+
+ UsedInBetween.accumulate(MI);
+
+ // For a definition, check that we can rename the definition and exit the
+ // loop.
+ FoundDef = IsDef;
+
+ // For defs, check if we can rename the first def of RegToRename.
+ if (FoundDef) {
+ for (auto &MOP : MI.operands()) {
+ if (!MOP.isReg() || !MOP.isDef() || MOP.isDebug() || !MOP.getReg() ||
+ !TRI->regsOverlap(MOP.getReg(), RegToRename))
+ continue;
+ if (!canRenameMOP(MOP)) {
+ LLVM_DEBUG(dbgs()
+ << " Cannot rename " << MOP << " in " << MI << "\n");
+ return false;
+ }
+ RequiredClasses.insert(TRI->getMinimalPhysRegClass(MOP.getReg()));
+ }
+ return true;
+ } else {
+ for (auto &MOP : MI.operands()) {
+ if (!MOP.isReg() || MOP.isDebug() || !MOP.getReg() ||
+ !TRI->regsOverlap(MOP.getReg(), RegToRename))
+ continue;
+
+ if (!canRenameMOP(MOP)) {
+ LLVM_DEBUG(dbgs()
+ << " Cannot rename " << MOP << " in " << MI << "\n");
+ return false;
+ }
+ RequiredClasses.insert(TRI->getMinimalPhysRegClass(MOP.getReg()));
+ }
+ }
+ return true;
+ };
+
+ if (!forAllMIsUntilDef(FirstMI, RegToRename, TRI, LdStLimit, CheckMIs))
+ return false;
+
+ if (!FoundDef) {
+ LLVM_DEBUG(dbgs() << " Did not find definition for register in BB\n");
+ return false;
+ }
+ return true;
+}
+
+// Check if we can find a physical register for renaming. This register must:
+// * not be defined up to FirstMI (checking DefinedInBB)
+// * not used between the MI and the defining instruction of the register to
+// rename (checked using UsedInBetween).
+// * is available in all used register classes (checked using RequiredClasses).
+static Optional<MCPhysReg> tryToFindRegisterToRename(
+ MachineInstr &FirstMI, MachineInstr &MI, LiveRegUnits &DefinedInBB,
+ LiveRegUnits &UsedInBetween,
+ SmallPtrSetImpl<const TargetRegisterClass *> &RequiredClasses,
+ const TargetRegisterInfo *TRI) {
+ auto &MF = *FirstMI.getParent()->getParent();
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+
+ // Checks if any sub- or super-register of PR is callee saved.
+ auto AnySubOrSuperRegCalleePreserved = [&MF, TRI](MCPhysReg PR) {
+ return any_of(TRI->sub_and_superregs_inclusive(PR),
+ [&MF, TRI](MCPhysReg SubOrSuper) {
+ return TRI->isCalleeSavedPhysReg(SubOrSuper, MF);
+ });
+ };
+
+ // Check if PR or one of its sub- or super-registers can be used for all
+ // required register classes.
+ auto CanBeUsedForAllClasses = [&RequiredClasses, TRI](MCPhysReg PR) {
+ return all_of(RequiredClasses, [PR, TRI](const TargetRegisterClass *C) {
+ return any_of(TRI->sub_and_superregs_inclusive(PR),
+ [C, TRI](MCPhysReg SubOrSuper) {
+ return C == TRI->getMinimalPhysRegClass(SubOrSuper);
+ });
+ });
+ };
+
+ auto *RegClass = TRI->getMinimalPhysRegClass(getLdStRegOp(FirstMI).getReg());
+ for (const MCPhysReg &PR : *RegClass) {
+ if (DefinedInBB.available(PR) && UsedInBetween.available(PR) &&
+ !RegInfo.isReserved(PR) && !AnySubOrSuperRegCalleePreserved(PR) &&
+ CanBeUsedForAllClasses(PR)) {
+ DefinedInBB.addReg(PR);
+ LLVM_DEBUG(dbgs() << "Found rename register " << printReg(PR, TRI)
+ << "\n");
+ return {PR};
+ }
+ }
+ LLVM_DEBUG(dbgs() << "No rename register found from "
+ << TRI->getRegClassName(RegClass) << "\n");
+ return None;
+}
+
/// Scan the instructions looking for a load/store that can be combined with the
/// current instruction into a wider equivalent or a load/store pair.
MachineBasicBlock::iterator
@@ -1215,6 +1420,7 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
bool FindNarrowMerge) {
MachineBasicBlock::iterator E = I->getParent()->end();
MachineBasicBlock::iterator MBBI = I;
+ MachineBasicBlock::iterator MBBIWithRenameReg;
MachineInstr &FirstMI = *I;
++MBBI;
@@ -1223,9 +1429,16 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
Register Reg = getLdStRegOp(FirstMI).getReg();
Register BaseReg = getLdStBaseOp(FirstMI).getReg();
int Offset = getLdStOffsetOp(FirstMI).getImm();
- int OffsetStride = IsUnscaled ? getMemScale(FirstMI) : 1;
+ int OffsetStride = IsUnscaled ? TII->getMemScale(FirstMI) : 1;
bool IsPromotableZeroStore = isPromotableZeroStoreInst(FirstMI);
+ Optional<bool> MaybeCanRename = None;
+ SmallPtrSet<const TargetRegisterClass *, 5> RequiredClasses;
+ LiveRegUnits UsedInBetween;
+ UsedInBetween.init(*TRI);
+
+ Flags.clearRenameReg();
+
// Track which register units have been modified and used between the first
// insn (inclusive) and the second insn.
ModifiedRegUnits.clear();
@@ -1237,6 +1450,8 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
for (unsigned Count = 0; MBBI != E && Count < Limit; ++MBBI) {
MachineInstr &MI = *MBBI;
+ UsedInBetween.accumulate(MI);
+
// Don't count transient instructions towards the search limit since there
// may be different numbers of them if e.g. debug information is present.
if (!MI.isTransient())
@@ -1259,7 +1474,7 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
// We're trying to pair instructions that differ in how they are scaled.
// If FirstMI is scaled then scale the offset of MI accordingly.
// Otherwise, do the opposite (i.e., make MI's offset unscaled).
- int MemSize = getMemScale(MI);
+ int MemSize = TII->getMemScale(MI);
if (MIIsUnscaled) {
// If the unscaled offset isn't a multiple of the MemSize, we can't
// pair the operations together: bail and keep looking.
@@ -1329,7 +1544,9 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
!(MI.mayLoad() &&
!UsedRegUnits.available(getLdStRegOp(MI).getReg())) &&
!mayAlias(MI, MemInsns, AA)) {
+
Flags.setMergeForward(false);
+ Flags.clearRenameReg();
return MBBI;
}
@@ -1337,18 +1554,41 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
// between the two instructions and none of the instructions between the
// first and the second alias with the first, we can combine the first
// into the second.
- if (ModifiedRegUnits.available(getLdStRegOp(FirstMI).getReg()) &&
- !(MayLoad &&
+ if (!(MayLoad &&
!UsedRegUnits.available(getLdStRegOp(FirstMI).getReg())) &&
!mayAlias(FirstMI, MemInsns, AA)) {
- Flags.setMergeForward(true);
- return MBBI;
+
+ if (ModifiedRegUnits.available(getLdStRegOp(FirstMI).getReg())) {
+ Flags.setMergeForward(true);
+ Flags.clearRenameReg();
+ return MBBI;
+ }
+
+ if (DebugCounter::shouldExecute(RegRenamingCounter)) {
+ if (!MaybeCanRename)
+ MaybeCanRename = {canRenameUpToDef(FirstMI, UsedInBetween,
+ RequiredClasses, TRI)};
+
+ if (*MaybeCanRename) {
+ Optional<MCPhysReg> MaybeRenameReg = tryToFindRegisterToRename(
+ FirstMI, MI, DefinedInBB, UsedInBetween, RequiredClasses,
+ TRI);
+ if (MaybeRenameReg) {
+ Flags.setRenameReg(*MaybeRenameReg);
+ Flags.setMergeForward(true);
+ MBBIWithRenameReg = MBBI;
+ }
+ }
+ }
}
// Unable to combine these instructions due to interference in between.
// Keep looking.
}
}
+ if (Flags.getRenameReg())
+ return MBBIWithRenameReg;
+
// If the instruction wasn't a matching load or store. Stop searching if we
// encounter a call instruction that might modify memory.
if (MI.isCall())
@@ -1492,7 +1732,7 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward(
MachineBasicBlock::iterator MBBI = I;
Register BaseReg = getLdStBaseOp(MemMI).getReg();
- int MIUnscaledOffset = getLdStOffsetOp(MemMI).getImm() * getMemScale(MemMI);
+ int MIUnscaledOffset = getLdStOffsetOp(MemMI).getImm() * TII->getMemScale(MemMI);
// Scan forward looking for post-index opportunities. Updating instructions
// can't be formed if the memory instruction doesn't have the offset we're
@@ -1663,7 +1903,7 @@ bool AArch64LoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) {
// with Offset-1)
bool IsUnscaled = TII->isUnscaledLdSt(MI);
int Offset = getLdStOffsetOp(MI).getImm();
- int OffsetStride = IsUnscaled ? getMemScale(MI) : 1;
+ int OffsetStride = IsUnscaled ? TII->getMemScale(MI) : 1;
// Allow one more for offset.
if (Offset > 0)
Offset -= OffsetStride;
@@ -1680,7 +1920,13 @@ bool AArch64LoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) {
++NumUnscaledPairCreated;
// Keeping the iterator straight is a pain, so we let the merge routine tell
// us what the next instruction is after it's done mucking about.
+ auto Prev = std::prev(MBBI);
MBBI = mergePairedInsns(MBBI, Paired, Flags);
+ // Collect liveness info for instructions between Prev and the new position
+ // MBBI.
+ for (auto I = std::next(Prev); I != MBBI; I++)
+ updateDefinedRegisters(*I, DefinedInBB, TRI);
+
return true;
}
return false;
@@ -1723,7 +1969,7 @@ bool AArch64LoadStoreOpt::tryToMergeLdStUpdate
// The immediate in the load/store is scaled by the size of the memory
// operation. The immediate in the add we're looking for,
// however, is not, so adjust here.
- int UnscaledOffset = getLdStOffsetOp(MI).getImm() * getMemScale(MI);
+ int UnscaledOffset = getLdStOffsetOp(MI).getImm() * TII->getMemScale(MI);
// Look forward to try to find a pre-index instruction. For example,
// ldr x1, [x0, #64]
@@ -1742,6 +1988,7 @@ bool AArch64LoadStoreOpt::tryToMergeLdStUpdate
bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,
bool EnableNarrowZeroStOpt) {
+
bool Modified = false;
// Four tranformations to do here:
// 1) Find loads that directly read from stores and promote them by
@@ -1786,8 +2033,17 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,
// ldr x1, [x2, #8]
// ; becomes
// ldp x0, x1, [x2]
+
+ if (MBB.getParent()->getRegInfo().tracksLiveness()) {
+ DefinedInBB.clear();
+ DefinedInBB.addLiveIns(MBB);
+ }
+
for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
MBBI != E;) {
+ // Track currently live registers up to this point, to help with
+ // searching for a rename register on demand.
+ updateDefinedRegisters(*MBBI, DefinedInBB, TRI);
if (TII->isPairableLdStInst(*MBBI) && tryToPairLdStInst(MBBI))
Modified = true;
else
@@ -1825,11 +2081,14 @@ bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
// or store.
ModifiedRegUnits.init(*TRI);
UsedRegUnits.init(*TRI);
+ DefinedInBB.init(*TRI);
bool Modified = false;
bool enableNarrowZeroStOpt = !Subtarget->requiresStrictAlign();
- for (auto &MBB : Fn)
- Modified |= optimizeBlock(MBB, enableNarrowZeroStOpt);
+ for (auto &MBB : Fn) {
+ auto M = optimizeBlock(MBB, enableNarrowZeroStOpt);
+ Modified |= M;
+ }
return Modified;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
index 0009fb7b5520..6ddb3fdb0046 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
@@ -19,6 +19,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/IR/Function.h"
#include "llvm/MC/MCLinkerOptimizationHint.h"
#include <cassert>
@@ -51,10 +52,16 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
bool HasStackFrame = false;
/// Amount of stack frame size, not including callee-saved registers.
- unsigned LocalStackSize;
+ uint64_t LocalStackSize = 0;
+
+ /// The start and end frame indices for the SVE callee saves.
+ int MinSVECSFrameIndex = 0;
+ int MaxSVECSFrameIndex = 0;
/// Amount of stack frame size used for saving callee-saved registers.
- unsigned CalleeSavedStackSize;
+ unsigned CalleeSavedStackSize = 0;
+ unsigned SVECalleeSavedStackSize = 0;
+ bool HasCalleeSavedStackSize = false;
/// Number of TLS accesses using the special (combinable)
/// _TLS_MODULE_BASE_ symbol.
@@ -117,7 +124,7 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
// Offset from SP-at-entry to the tagged base pointer.
// Tagged base pointer is set up to point to the first (lowest address) tagged
// stack slot.
- unsigned TaggedBasePointerOffset;
+ unsigned TaggedBasePointerOffset = 0;
public:
AArch64FunctionInfo() = default;
@@ -160,15 +167,79 @@ public:
void setCalleeSaveStackHasFreeSpace(bool s) {
CalleeSaveStackHasFreeSpace = s;
}
-
bool isSplitCSR() const { return IsSplitCSR; }
void setIsSplitCSR(bool s) { IsSplitCSR = s; }
- void setLocalStackSize(unsigned Size) { LocalStackSize = Size; }
- unsigned getLocalStackSize() const { return LocalStackSize; }
+ void setLocalStackSize(uint64_t Size) { LocalStackSize = Size; }
+ uint64_t getLocalStackSize() const { return LocalStackSize; }
+
+ void setCalleeSavedStackSize(unsigned Size) {
+ CalleeSavedStackSize = Size;
+ HasCalleeSavedStackSize = true;
+ }
+
+ // When CalleeSavedStackSize has not been set (for example when
+ // some MachineIR pass is run in isolation), then recalculate
+ // the CalleeSavedStackSize directly from the CalleeSavedInfo.
+ // Note: This information can only be recalculated after PEI
+ // has assigned offsets to the callee save objects.
+ unsigned getCalleeSavedStackSize(const MachineFrameInfo &MFI) const {
+ bool ValidateCalleeSavedStackSize = false;
+
+#ifndef NDEBUG
+ // Make sure the calculated size derived from the CalleeSavedInfo
+ // equals the cached size that was calculated elsewhere (e.g. in
+ // determineCalleeSaves).
+ ValidateCalleeSavedStackSize = HasCalleeSavedStackSize;
+#endif
+
+ if (!HasCalleeSavedStackSize || ValidateCalleeSavedStackSize) {
+ assert(MFI.isCalleeSavedInfoValid() && "CalleeSavedInfo not calculated");
+ if (MFI.getCalleeSavedInfo().empty())
+ return 0;
+
+ int64_t MinOffset = std::numeric_limits<int64_t>::max();
+ int64_t MaxOffset = std::numeric_limits<int64_t>::min();
+ for (const auto &Info : MFI.getCalleeSavedInfo()) {
+ int FrameIdx = Info.getFrameIdx();
+ if (MFI.getStackID(FrameIdx) != TargetStackID::Default)
+ continue;
+ int64_t Offset = MFI.getObjectOffset(FrameIdx);
+ int64_t ObjSize = MFI.getObjectSize(FrameIdx);
+ MinOffset = std::min<int64_t>(Offset, MinOffset);
+ MaxOffset = std::max<int64_t>(Offset + ObjSize, MaxOffset);
+ }
+
+ unsigned Size = alignTo(MaxOffset - MinOffset, 16);
+ assert((!HasCalleeSavedStackSize || getCalleeSavedStackSize() == Size) &&
+ "Invalid size calculated for callee saves");
+ return Size;
+ }
+
+ return getCalleeSavedStackSize();
+ }
+
+ unsigned getCalleeSavedStackSize() const {
+ assert(HasCalleeSavedStackSize &&
+ "CalleeSavedStackSize has not been calculated");
+ return CalleeSavedStackSize;
+ }
+
+ // Saves the CalleeSavedStackSize for SVE vectors in 'scalable bytes'
+ void setSVECalleeSavedStackSize(unsigned Size) {
+ SVECalleeSavedStackSize = Size;
+ }
+ unsigned getSVECalleeSavedStackSize() const {
+ return SVECalleeSavedStackSize;
+ }
+
+ void setMinMaxSVECSFrameIndex(int Min, int Max) {
+ MinSVECSFrameIndex = Min;
+ MaxSVECSFrameIndex = Max;
+ }
- void setCalleeSavedStackSize(unsigned Size) { CalleeSavedStackSize = Size; }
- unsigned getCalleeSavedStackSize() const { return CalleeSavedStackSize; }
+ int getMinSVECSFrameIndex() const { return MinSVECSFrameIndex; }
+ int getMaxSVECSFrameIndex() const { return MaxSVECSFrameIndex; }
void incNumLocalDynamicTLSAccesses() { ++NumLocalDynamicTLSAccesses; }
unsigned getNumLocalDynamicTLSAccesses() const {
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64PreLegalizerCombiner.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64PreLegalizerCombiner.cpp
index d30ea120bae4..230fd514d022 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64PreLegalizerCombiner.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64PreLegalizerCombiner.cpp
@@ -62,20 +62,6 @@ bool AArch64PreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
CombinerHelper Helper(Observer, B, KB, MDT);
switch (MI.getOpcode()) {
- case TargetOpcode::G_CONCAT_VECTORS:
- return Helper.tryCombineConcatVectors(MI);
- case TargetOpcode::G_SHUFFLE_VECTOR:
- return Helper.tryCombineShuffleVector(MI);
- case TargetOpcode::G_LOAD:
- case TargetOpcode::G_SEXTLOAD:
- case TargetOpcode::G_ZEXTLOAD: {
- bool Changed = false;
- Changed |= Helper.tryCombineExtendingLoads(MI);
- Changed |= Helper.tryCombineIndexedLoadStore(MI);
- return Changed;
- }
- case TargetOpcode::G_STORE:
- return Helper.tryCombineIndexedLoadStore(MI);
case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
switch (MI.getIntrinsicID()) {
case Intrinsic::memcpy:
@@ -93,9 +79,16 @@ bool AArch64PreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
}
}
- if (Generated.tryCombineAll(Observer, MI, B))
+ if (Generated.tryCombineAll(Observer, MI, B, Helper))
return true;
+ switch (MI.getOpcode()) {
+ case TargetOpcode::G_CONCAT_VECTORS:
+ return Helper.tryCombineConcatVectors(MI);
+ case TargetOpcode::G_SHUFFLE_VECTOR:
+ return Helper.tryCombineShuffleVector(MI);
+ }
+
return false;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64PromoteConstant.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64PromoteConstant.cpp
index a594ecb71fc9..9135f1b40122 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64PromoteConstant.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64PromoteConstant.cpp
@@ -38,6 +38,7 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp
index 8ec73aa3c040..40efac261fd9 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp
@@ -222,8 +222,9 @@ unsigned AArch64RegisterBankInfo::copyCost(const RegisterBank &A,
return RegisterBankInfo::copyCost(A, B, Size);
}
-const RegisterBank &AArch64RegisterBankInfo::getRegBankFromRegClass(
- const TargetRegisterClass &RC) const {
+const RegisterBank &
+AArch64RegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC,
+ LLT) const {
switch (RC.getID()) {
case AArch64::FPR8RegClassID:
case AArch64::FPR16RegClassID:
@@ -529,7 +530,7 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
// Arithmetic ops.
case TargetOpcode::G_ADD:
case TargetOpcode::G_SUB:
- case TargetOpcode::G_GEP:
+ case TargetOpcode::G_PTR_ADD:
case TargetOpcode::G_MUL:
case TargetOpcode::G_SDIV:
case TargetOpcode::G_UDIV:
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.h b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.h
index 016fed65eb2a..e956fca1aa10 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.h
@@ -132,8 +132,8 @@ public:
unsigned copyCost(const RegisterBank &A, const RegisterBank &B,
unsigned Size) const override;
- const RegisterBank &
- getRegBankFromRegClass(const TargetRegisterClass &RC) const override;
+ const RegisterBank &getRegBankFromRegClass(const TargetRegisterClass &RC,
+ LLT) const override;
InstructionMappings
getInstrAlternativeMappings(const MachineInstr &MI) const override;
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
index de176088595d..14f839cd4f81 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -43,6 +43,8 @@ AArch64RegisterInfo::AArch64RegisterInfo(const Triple &TT)
const MCPhysReg *
AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
assert(MF && "Invalid MachineFunction pointer.");
+ if (MF->getFunction().getCallingConv() == CallingConv::CFGuard_Check)
+ return CSR_Win_AArch64_CFGuard_Check_SaveList;
if (MF->getSubtarget<AArch64Subtarget>().isTargetWindows())
return CSR_Win_AArch64_AAPCS_SaveList;
if (MF->getFunction().getCallingConv() == CallingConv::GHC)
@@ -53,6 +55,8 @@ AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
return CSR_AArch64_AllRegs_SaveList;
if (MF->getFunction().getCallingConv() == CallingConv::AArch64_VectorCall)
return CSR_AArch64_AAVPCS_SaveList;
+ if (MF->getFunction().getCallingConv() == CallingConv::AArch64_SVE_VectorCall)
+ return CSR_AArch64_SVE_AAPCS_SaveList;
if (MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS)
return MF->getInfo<AArch64FunctionInfo>()->isSplitCSR() ?
CSR_AArch64_CXX_TLS_Darwin_PE_SaveList :
@@ -123,7 +127,10 @@ AArch64RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
if (CC == CallingConv::AArch64_VectorCall)
return SCS ? CSR_AArch64_AAVPCS_SCS_RegMask : CSR_AArch64_AAVPCS_RegMask;
if (CC == CallingConv::AArch64_SVE_VectorCall)
- return CSR_AArch64_SVE_AAPCS_RegMask;
+ return SCS ? CSR_AArch64_SVE_AAPCS_SCS_RegMask
+ : CSR_AArch64_SVE_AAPCS_RegMask;
+ if (CC == CallingConv::CFGuard_Check)
+ return CSR_Win_AArch64_CFGuard_Check_RegMask;
if (MF.getSubtarget<AArch64Subtarget>().getTargetLowering()
->supportSwiftError() &&
MF.getFunction().getAttributes().hasAttrSomewhere(Attribute::SwiftError))
@@ -390,7 +397,6 @@ bool AArch64RegisterInfo::needsFrameBaseReg(MachineInstr *MI,
bool AArch64RegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
unsigned BaseReg,
int64_t Offset) const {
- assert(Offset <= INT_MAX && "Offset too big to fit in int.");
assert(MI && "Unable to get the legal offset for nil instruction.");
StackOffset SaveOffset(Offset, MVT::i8);
return isAArch64FrameOffsetLegal(*MI, SaveOffset) & AArch64FrameOffsetIsLegal;
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RegisterInfo.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
index 61fc0795c242..f52feab03953 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
@@ -481,7 +481,7 @@ def QQQQ : RegisterClass<"AArch64", [untyped], 128, (add QSeqQuads)> {
// Vector operand versions of the FP registers. Alternate name printing and
-// assmebler matching.
+// assembler matching.
def VectorReg64AsmOperand : AsmOperandClass {
let Name = "VectorReg64";
let PredicateMethod = "isNeonVectorReg";
@@ -858,35 +858,19 @@ def PPR3b64 : PPRRegOp<"d", PPRAsmOp3b64, ElementSizeD, PPR_3b>;
//******************************************************************************
-// SVE vector register class
-def ZPR : RegisterClass<"AArch64",
- [nxv16i8, nxv8i16, nxv4i32, nxv2i64,
- nxv2f16, nxv4f16, nxv8f16,
- nxv1f32, nxv2f32, nxv4f32,
- nxv1f64, nxv2f64],
- 128, (sequence "Z%u", 0, 31)> {
+// SVE vector register classes
+class ZPRClass<int lastreg> : RegisterClass<"AArch64",
+ [nxv16i8, nxv8i16, nxv4i32, nxv2i64,
+ nxv2f16, nxv4f16, nxv8f16,
+ nxv2f32, nxv4f32,
+ nxv2f64],
+ 128, (sequence "Z%u", 0, lastreg)> {
let Size = 128;
}
-// SVE restricted 4 bit scalable vector register class
-def ZPR_4b : RegisterClass<"AArch64",
- [nxv16i8, nxv8i16, nxv4i32, nxv2i64,
- nxv2f16, nxv4f16, nxv8f16,
- nxv1f32, nxv2f32, nxv4f32,
- nxv1f64, nxv2f64],
- 128, (sequence "Z%u", 0, 15)> {
- let Size = 128;
-}
-
-// SVE restricted 3 bit scalable vector register class
-def ZPR_3b : RegisterClass<"AArch64",
- [nxv16i8, nxv8i16, nxv4i32, nxv2i64,
- nxv2f16, nxv4f16, nxv8f16,
- nxv1f32, nxv2f32, nxv4f32,
- nxv1f64, nxv2f64],
- 128, (sequence "Z%u", 0, 7)> {
- let Size = 128;
-}
+def ZPR : ZPRClass<31>;
+def ZPR_4b : ZPRClass<15>; // Restricted 4 bit SVE vector register class.
+def ZPR_3b : ZPRClass<7>; // Restricted 3 bit SVE vector register class.
class ZPRAsmOperand<string name, int Width, string RegClassSuffix = "">
: AsmOperandClass {
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index b573eac76754..c849d7af9a40 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -10,6 +10,72 @@
//
//===----------------------------------------------------------------------===//
+def SDT_AArch64_GLD1 : SDTypeProfile<1, 4, [
+ SDTCisVec<0>, SDTCisVec<1>, SDTCisPtrTy<2>, SDTCisVec<3>, SDTCisVT<4, OtherVT>,
+ SDTCVecEltisVT<1,i1>, SDTCisSameNumEltsAs<0,1>
+]>;
+
+def SDT_AArch64_GLD1_IMM : SDTypeProfile<1, 4, [
+ SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisInt<3>, SDTCisVT<4, OtherVT>,
+ SDTCVecEltisVT<1,i1>, SDTCisSameNumEltsAs<0,1>
+]>;
+
+def SDT_AArch64_SST1 : SDTypeProfile<0, 5, [
+ SDTCisVec<0>, SDTCisVec<1>, SDTCisPtrTy<2>, SDTCisVec<3>, SDTCisVT<4, OtherVT>,
+ SDTCVecEltisVT<1,i1>, SDTCisSameNumEltsAs<0,1>
+]>;
+
+def SDT_AArch64_SST1_IMM : SDTypeProfile<0, 5, [
+ SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisInt<3>, SDTCisVT<4, OtherVT>,
+ SDTCVecEltisVT<1,i1>, SDTCisSameNumEltsAs<0,1>
+]>;
+
+def AArch64st1_scatter : SDNode<"AArch64ISD::SST1", SDT_AArch64_SST1, [SDNPHasChain, SDNPMayStore, SDNPOptInGlue]>;
+def AArch64st1_scatter_scaled : SDNode<"AArch64ISD::SST1_SCALED", SDT_AArch64_SST1, [SDNPHasChain, SDNPMayStore, SDNPOptInGlue]>;
+def AArch64st1_scatter_uxtw : SDNode<"AArch64ISD::SST1_UXTW", SDT_AArch64_SST1, [SDNPHasChain, SDNPMayStore, SDNPOptInGlue]>;
+def AArch64st1_scatter_sxtw : SDNode<"AArch64ISD::SST1_SXTW", SDT_AArch64_SST1, [SDNPHasChain, SDNPMayStore, SDNPOptInGlue]>;
+def AArch64st1_scatter_uxtw_scaled : SDNode<"AArch64ISD::SST1_UXTW_SCALED", SDT_AArch64_SST1, [SDNPHasChain, SDNPMayStore, SDNPOptInGlue]>;
+def AArch64st1_scatter_sxtw_scaled : SDNode<"AArch64ISD::SST1_SXTW_SCALED", SDT_AArch64_SST1, [SDNPHasChain, SDNPMayStore, SDNPOptInGlue]>;
+def AArch64st1_scatter_imm : SDNode<"AArch64ISD::SST1_IMM", SDT_AArch64_SST1_IMM, [SDNPHasChain, SDNPMayStore, SDNPOptInGlue]>;
+
+def AArch64ld1_gather : SDNode<"AArch64ISD::GLD1", SDT_AArch64_GLD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;
+def AArch64ld1_gather_scaled : SDNode<"AArch64ISD::GLD1_SCALED", SDT_AArch64_GLD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;
+def AArch64ld1_gather_uxtw : SDNode<"AArch64ISD::GLD1_UXTW", SDT_AArch64_GLD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;
+def AArch64ld1_gather_sxtw : SDNode<"AArch64ISD::GLD1_SXTW", SDT_AArch64_GLD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;
+def AArch64ld1_gather_uxtw_scaled : SDNode<"AArch64ISD::GLD1_UXTW_SCALED", SDT_AArch64_GLD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;
+def AArch64ld1_gather_sxtw_scaled : SDNode<"AArch64ISD::GLD1_SXTW_SCALED", SDT_AArch64_GLD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;
+def AArch64ld1_gather_imm : SDNode<"AArch64ISD::GLD1_IMM", SDT_AArch64_GLD1_IMM, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;
+
+def AArch64ld1s_gather : SDNode<"AArch64ISD::GLD1S", SDT_AArch64_GLD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;
+def AArch64ld1s_gather_scaled : SDNode<"AArch64ISD::GLD1S_SCALED", SDT_AArch64_GLD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;
+def AArch64ld1s_gather_uxtw : SDNode<"AArch64ISD::GLD1S_UXTW", SDT_AArch64_GLD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;
+def AArch64ld1s_gather_sxtw : SDNode<"AArch64ISD::GLD1S_SXTW", SDT_AArch64_GLD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;
+def AArch64ld1s_gather_uxtw_scaled : SDNode<"AArch64ISD::GLD1S_UXTW_SCALED", SDT_AArch64_GLD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;
+def AArch64ld1s_gather_sxtw_scaled : SDNode<"AArch64ISD::GLD1S_SXTW_SCALED", SDT_AArch64_GLD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;
+def AArch64ld1s_gather_imm : SDNode<"AArch64ISD::GLD1S_IMM", SDT_AArch64_GLD1_IMM, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;
+
+def SDT_AArch64Reduce : SDTypeProfile<1, 2, [SDTCisVec<1>, SDTCisVec<2>]>;
+
+def AArch64smaxv_pred : SDNode<"AArch64ISD::SMAXV_PRED", SDT_AArch64Reduce>;
+def AArch64umaxv_pred : SDNode<"AArch64ISD::UMAXV_PRED", SDT_AArch64Reduce>;
+def AArch64sminv_pred : SDNode<"AArch64ISD::SMINV_PRED", SDT_AArch64Reduce>;
+def AArch64uminv_pred : SDNode<"AArch64ISD::UMINV_PRED", SDT_AArch64Reduce>;
+def AArch64orv_pred : SDNode<"AArch64ISD::ORV_PRED", SDT_AArch64Reduce>;
+def AArch64eorv_pred : SDNode<"AArch64ISD::EORV_PRED", SDT_AArch64Reduce>;
+def AArch64andv_pred : SDNode<"AArch64ISD::ANDV_PRED", SDT_AArch64Reduce>;
+def AArch64lasta : SDNode<"AArch64ISD::LASTA", SDT_AArch64Reduce>;
+def AArch64lastb : SDNode<"AArch64ISD::LASTB", SDT_AArch64Reduce>;
+
+def SDT_AArch64ReduceWithInit : SDTypeProfile<1, 3, [SDTCisVec<1>, SDTCisVec<3>]>;
+def AArch64clasta_n : SDNode<"AArch64ISD::CLASTA_N", SDT_AArch64ReduceWithInit>;
+def AArch64clastb_n : SDNode<"AArch64ISD::CLASTB_N", SDT_AArch64ReduceWithInit>;
+
+def SDT_AArch64Rev : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
+def AArch64rev : SDNode<"AArch64ISD::REV", SDT_AArch64Rev>;
+
+def SDT_AArch64PTest : SDTypeProfile<0, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
+def AArch64ptest : SDNode<"AArch64ISD::PTEST", SDT_AArch64PTest>;
+
let Predicates = [HasSVE] in {
def RDFFR_PPz : sve_int_rdffr_pred<0b0, "rdffr">;
@@ -18,69 +84,69 @@ let Predicates = [HasSVE] in {
def SETFFR : sve_int_setffr<"setffr">;
def WRFFR : sve_int_wrffr<"wrffr">;
- defm ADD_ZZZ : sve_int_bin_cons_arit_0<0b000, "add">;
- defm SUB_ZZZ : sve_int_bin_cons_arit_0<0b001, "sub">;
- defm SQADD_ZZZ : sve_int_bin_cons_arit_0<0b100, "sqadd">;
- defm UQADD_ZZZ : sve_int_bin_cons_arit_0<0b101, "uqadd">;
- defm SQSUB_ZZZ : sve_int_bin_cons_arit_0<0b110, "sqsub">;
- defm UQSUB_ZZZ : sve_int_bin_cons_arit_0<0b111, "uqsub">;
-
- defm AND_ZZZ : sve_int_bin_cons_log<0b00, "and">;
- defm ORR_ZZZ : sve_int_bin_cons_log<0b01, "orr">;
- defm EOR_ZZZ : sve_int_bin_cons_log<0b10, "eor">;
- defm BIC_ZZZ : sve_int_bin_cons_log<0b11, "bic">;
-
- defm ADD_ZPmZ : sve_int_bin_pred_arit_0<0b000, "add">;
- defm SUB_ZPmZ : sve_int_bin_pred_arit_0<0b001, "sub">;
- defm SUBR_ZPmZ : sve_int_bin_pred_arit_0<0b011, "subr">;
-
- defm ORR_ZPmZ : sve_int_bin_pred_log<0b000, "orr">;
- defm EOR_ZPmZ : sve_int_bin_pred_log<0b001, "eor">;
- defm AND_ZPmZ : sve_int_bin_pred_log<0b010, "and">;
- defm BIC_ZPmZ : sve_int_bin_pred_log<0b011, "bic">;
-
- defm ADD_ZI : sve_int_arith_imm0<0b000, "add">;
- defm SUB_ZI : sve_int_arith_imm0<0b001, "sub">;
- defm SUBR_ZI : sve_int_arith_imm0<0b011, "subr">;
- defm SQADD_ZI : sve_int_arith_imm0<0b100, "sqadd">;
- defm UQADD_ZI : sve_int_arith_imm0<0b101, "uqadd">;
- defm SQSUB_ZI : sve_int_arith_imm0<0b110, "sqsub">;
- defm UQSUB_ZI : sve_int_arith_imm0<0b111, "uqsub">;
-
- defm MAD_ZPmZZ : sve_int_mladdsub_vvv_pred<0b0, "mad">;
- defm MSB_ZPmZZ : sve_int_mladdsub_vvv_pred<0b1, "msb">;
- defm MLA_ZPmZZ : sve_int_mlas_vvv_pred<0b0, "mla">;
- defm MLS_ZPmZZ : sve_int_mlas_vvv_pred<0b1, "mls">;
+ defm ADD_ZZZ : sve_int_bin_cons_arit_0<0b000, "add", add>;
+ defm SUB_ZZZ : sve_int_bin_cons_arit_0<0b001, "sub", sub>;
+ defm SQADD_ZZZ : sve_int_bin_cons_arit_0<0b100, "sqadd", saddsat>;
+ defm UQADD_ZZZ : sve_int_bin_cons_arit_0<0b101, "uqadd", uaddsat>;
+ defm SQSUB_ZZZ : sve_int_bin_cons_arit_0<0b110, "sqsub", ssubsat>;
+ defm UQSUB_ZZZ : sve_int_bin_cons_arit_0<0b111, "uqsub", usubsat>;
+
+ defm AND_ZZZ : sve_int_bin_cons_log<0b00, "and", and>;
+ defm ORR_ZZZ : sve_int_bin_cons_log<0b01, "orr", or>;
+ defm EOR_ZZZ : sve_int_bin_cons_log<0b10, "eor", xor>;
+ defm BIC_ZZZ : sve_int_bin_cons_log<0b11, "bic", null_frag>;
+
+ defm ADD_ZPmZ : sve_int_bin_pred_arit_0<0b000, "add", int_aarch64_sve_add>;
+ defm SUB_ZPmZ : sve_int_bin_pred_arit_0<0b001, "sub", int_aarch64_sve_sub>;
+ defm SUBR_ZPmZ : sve_int_bin_pred_arit_0<0b011, "subr", int_aarch64_sve_subr>;
+
+ defm ORR_ZPmZ : sve_int_bin_pred_log<0b000, "orr", int_aarch64_sve_orr>;
+ defm EOR_ZPmZ : sve_int_bin_pred_log<0b001, "eor", int_aarch64_sve_eor>;
+ defm AND_ZPmZ : sve_int_bin_pred_log<0b010, "and", int_aarch64_sve_and>;
+ defm BIC_ZPmZ : sve_int_bin_pred_log<0b011, "bic", int_aarch64_sve_bic>;
+
+ defm ADD_ZI : sve_int_arith_imm0<0b000, "add", add>;
+ defm SUB_ZI : sve_int_arith_imm0<0b001, "sub", sub>;
+ defm SUBR_ZI : sve_int_arith_imm0_subr<0b011, "subr", sub>;
+ defm SQADD_ZI : sve_int_arith_imm0<0b100, "sqadd", saddsat>;
+ defm UQADD_ZI : sve_int_arith_imm0<0b101, "uqadd", uaddsat>;
+ defm SQSUB_ZI : sve_int_arith_imm0<0b110, "sqsub", ssubsat>;
+ defm UQSUB_ZI : sve_int_arith_imm0<0b111, "uqsub", usubsat>;
+
+ defm MAD_ZPmZZ : sve_int_mladdsub_vvv_pred<0b0, "mad", int_aarch64_sve_mad>;
+ defm MSB_ZPmZZ : sve_int_mladdsub_vvv_pred<0b1, "msb", int_aarch64_sve_msb>;
+ defm MLA_ZPmZZ : sve_int_mlas_vvv_pred<0b0, "mla", int_aarch64_sve_mla>;
+ defm MLS_ZPmZZ : sve_int_mlas_vvv_pred<0b1, "mls", int_aarch64_sve_mls>;
// SVE predicated integer reductions.
- defm SADDV_VPZ : sve_int_reduce_0_saddv<0b000, "saddv">;
- defm UADDV_VPZ : sve_int_reduce_0_uaddv<0b001, "uaddv">;
- defm SMAXV_VPZ : sve_int_reduce_1<0b000, "smaxv">;
- defm UMAXV_VPZ : sve_int_reduce_1<0b001, "umaxv">;
- defm SMINV_VPZ : sve_int_reduce_1<0b010, "sminv">;
- defm UMINV_VPZ : sve_int_reduce_1<0b011, "uminv">;
- defm ORV_VPZ : sve_int_reduce_2<0b000, "orv">;
- defm EORV_VPZ : sve_int_reduce_2<0b001, "eorv">;
- defm ANDV_VPZ : sve_int_reduce_2<0b010, "andv">;
-
- defm ORR_ZI : sve_int_log_imm<0b00, "orr", "orn">;
- defm EOR_ZI : sve_int_log_imm<0b01, "eor", "eon">;
- defm AND_ZI : sve_int_log_imm<0b10, "and", "bic">;
-
- defm SMAX_ZI : sve_int_arith_imm1<0b00, "smax", simm8>;
- defm SMIN_ZI : sve_int_arith_imm1<0b10, "smin", simm8>;
- defm UMAX_ZI : sve_int_arith_imm1<0b01, "umax", imm0_255>;
- defm UMIN_ZI : sve_int_arith_imm1<0b11, "umin", imm0_255>;
-
- defm MUL_ZI : sve_int_arith_imm2<"mul">;
- defm MUL_ZPmZ : sve_int_bin_pred_arit_2<0b000, "mul">;
- defm SMULH_ZPmZ : sve_int_bin_pred_arit_2<0b010, "smulh">;
- defm UMULH_ZPmZ : sve_int_bin_pred_arit_2<0b011, "umulh">;
-
- defm SDIV_ZPmZ : sve_int_bin_pred_arit_2_div<0b100, "sdiv">;
- defm UDIV_ZPmZ : sve_int_bin_pred_arit_2_div<0b101, "udiv">;
- defm SDIVR_ZPmZ : sve_int_bin_pred_arit_2_div<0b110, "sdivr">;
- defm UDIVR_ZPmZ : sve_int_bin_pred_arit_2_div<0b111, "udivr">;
+ defm SADDV_VPZ : sve_int_reduce_0_saddv<0b000, "saddv", int_aarch64_sve_saddv>;
+ defm UADDV_VPZ : sve_int_reduce_0_uaddv<0b001, "uaddv", int_aarch64_sve_uaddv, int_aarch64_sve_saddv>;
+ defm SMAXV_VPZ : sve_int_reduce_1<0b000, "smaxv", AArch64smaxv_pred>;
+ defm UMAXV_VPZ : sve_int_reduce_1<0b001, "umaxv", AArch64umaxv_pred>;
+ defm SMINV_VPZ : sve_int_reduce_1<0b010, "sminv", AArch64sminv_pred>;
+ defm UMINV_VPZ : sve_int_reduce_1<0b011, "uminv", AArch64uminv_pred>;
+ defm ORV_VPZ : sve_int_reduce_2<0b000, "orv", AArch64orv_pred>;
+ defm EORV_VPZ : sve_int_reduce_2<0b001, "eorv", AArch64eorv_pred>;
+ defm ANDV_VPZ : sve_int_reduce_2<0b010, "andv", AArch64andv_pred>;
+
+ defm ORR_ZI : sve_int_log_imm<0b00, "orr", "orn", or>;
+ defm EOR_ZI : sve_int_log_imm<0b01, "eor", "eon", xor>;
+ defm AND_ZI : sve_int_log_imm<0b10, "and", "bic", and>;
+
+ defm SMAX_ZI : sve_int_arith_imm1<0b00, "smax", smax>;
+ defm SMIN_ZI : sve_int_arith_imm1<0b10, "smin", smin>;
+ defm UMAX_ZI : sve_int_arith_imm1_unsigned<0b01, "umax", umax>;
+ defm UMIN_ZI : sve_int_arith_imm1_unsigned<0b11, "umin", umin>;
+
+ defm MUL_ZI : sve_int_arith_imm2<"mul", mul>;
+ defm MUL_ZPmZ : sve_int_bin_pred_arit_2<0b000, "mul", int_aarch64_sve_mul>;
+ defm SMULH_ZPmZ : sve_int_bin_pred_arit_2<0b010, "smulh", int_aarch64_sve_smulh>;
+ defm UMULH_ZPmZ : sve_int_bin_pred_arit_2<0b011, "umulh", int_aarch64_sve_umulh>;
+
+ defm SDIV_ZPmZ : sve_int_bin_pred_arit_2_div<0b100, "sdiv", int_aarch64_sve_sdiv>;
+ defm UDIV_ZPmZ : sve_int_bin_pred_arit_2_div<0b101, "udiv", int_aarch64_sve_udiv>;
+ defm SDIVR_ZPmZ : sve_int_bin_pred_arit_2_div<0b110, "sdivr", int_aarch64_sve_sdivr>;
+ defm UDIVR_ZPmZ : sve_int_bin_pred_arit_2_div<0b111, "udivr", int_aarch64_sve_udivr>;
defm SDOT_ZZZ : sve_intx_dot<0b0, "sdot", int_aarch64_sve_sdot>;
defm UDOT_ZZZ : sve_intx_dot<0b1, "udot", int_aarch64_sve_udot>;
@@ -88,32 +154,32 @@ let Predicates = [HasSVE] in {
defm SDOT_ZZZI : sve_intx_dot_by_indexed_elem<0b0, "sdot", int_aarch64_sve_sdot_lane>;
defm UDOT_ZZZI : sve_intx_dot_by_indexed_elem<0b1, "udot", int_aarch64_sve_udot_lane>;
- defm SXTB_ZPmZ : sve_int_un_pred_arit_0_h<0b000, "sxtb">;
- defm UXTB_ZPmZ : sve_int_un_pred_arit_0_h<0b001, "uxtb">;
- defm SXTH_ZPmZ : sve_int_un_pred_arit_0_w<0b010, "sxth">;
- defm UXTH_ZPmZ : sve_int_un_pred_arit_0_w<0b011, "uxth">;
- defm SXTW_ZPmZ : sve_int_un_pred_arit_0_d<0b100, "sxtw">;
- defm UXTW_ZPmZ : sve_int_un_pred_arit_0_d<0b101, "uxtw">;
- defm ABS_ZPmZ : sve_int_un_pred_arit_0< 0b110, "abs", int_aarch64_sve_abs>;
- defm NEG_ZPmZ : sve_int_un_pred_arit_0< 0b111, "neg", int_aarch64_sve_neg>;
-
- defm CLS_ZPmZ : sve_int_un_pred_arit_1< 0b000, "cls", null_frag>;
- defm CLZ_ZPmZ : sve_int_un_pred_arit_1< 0b001, "clz", null_frag>;
+ defm SXTB_ZPmZ : sve_int_un_pred_arit_0_h<0b000, "sxtb", int_aarch64_sve_sxtb>;
+ defm UXTB_ZPmZ : sve_int_un_pred_arit_0_h<0b001, "uxtb", int_aarch64_sve_uxtb>;
+ defm SXTH_ZPmZ : sve_int_un_pred_arit_0_w<0b010, "sxth", int_aarch64_sve_sxth>;
+ defm UXTH_ZPmZ : sve_int_un_pred_arit_0_w<0b011, "uxth", int_aarch64_sve_uxth>;
+ defm SXTW_ZPmZ : sve_int_un_pred_arit_0_d<0b100, "sxtw", int_aarch64_sve_sxtw>;
+ defm UXTW_ZPmZ : sve_int_un_pred_arit_0_d<0b101, "uxtw", int_aarch64_sve_uxtw>;
+ defm ABS_ZPmZ : sve_int_un_pred_arit_0< 0b110, "abs", int_aarch64_sve_abs>;
+ defm NEG_ZPmZ : sve_int_un_pred_arit_0< 0b111, "neg", int_aarch64_sve_neg>;
+
+ defm CLS_ZPmZ : sve_int_un_pred_arit_1< 0b000, "cls", int_aarch64_sve_cls>;
+ defm CLZ_ZPmZ : sve_int_un_pred_arit_1< 0b001, "clz", int_aarch64_sve_clz>;
defm CNT_ZPmZ : sve_int_un_pred_arit_1< 0b010, "cnt", int_aarch64_sve_cnt>;
- defm CNOT_ZPmZ : sve_int_un_pred_arit_1< 0b011, "cnot", null_frag>;
- defm NOT_ZPmZ : sve_int_un_pred_arit_1< 0b110, "not", null_frag>;
- defm FABS_ZPmZ : sve_int_un_pred_arit_1_fp<0b100, "fabs">;
- defm FNEG_ZPmZ : sve_int_un_pred_arit_1_fp<0b101, "fneg">;
+ defm CNOT_ZPmZ : sve_int_un_pred_arit_1< 0b011, "cnot", int_aarch64_sve_cnot>;
+ defm NOT_ZPmZ : sve_int_un_pred_arit_1< 0b110, "not", int_aarch64_sve_not>;
+ defm FABS_ZPmZ : sve_int_un_pred_arit_1_fp<0b100, "fabs", int_aarch64_sve_fabs>;
+ defm FNEG_ZPmZ : sve_int_un_pred_arit_1_fp<0b101, "fneg", int_aarch64_sve_fneg>;
- defm SMAX_ZPmZ : sve_int_bin_pred_arit_1<0b000, "smax">;
- defm UMAX_ZPmZ : sve_int_bin_pred_arit_1<0b001, "umax">;
- defm SMIN_ZPmZ : sve_int_bin_pred_arit_1<0b010, "smin">;
- defm UMIN_ZPmZ : sve_int_bin_pred_arit_1<0b011, "umin">;
- defm SABD_ZPmZ : sve_int_bin_pred_arit_1<0b100, "sabd">;
- defm UABD_ZPmZ : sve_int_bin_pred_arit_1<0b101, "uabd">;
+ defm SMAX_ZPmZ : sve_int_bin_pred_arit_1<0b000, "smax", int_aarch64_sve_smax>;
+ defm UMAX_ZPmZ : sve_int_bin_pred_arit_1<0b001, "umax", int_aarch64_sve_umax>;
+ defm SMIN_ZPmZ : sve_int_bin_pred_arit_1<0b010, "smin", int_aarch64_sve_smin>;
+ defm UMIN_ZPmZ : sve_int_bin_pred_arit_1<0b011, "umin", int_aarch64_sve_umin>;
+ defm SABD_ZPmZ : sve_int_bin_pred_arit_1<0b100, "sabd", int_aarch64_sve_sabd>;
+ defm UABD_ZPmZ : sve_int_bin_pred_arit_1<0b101, "uabd", int_aarch64_sve_uabd>;
- defm FRECPE_ZZ : sve_fp_2op_u_zd<0b110, "frecpe">;
- defm FRSQRTE_ZZ : sve_fp_2op_u_zd<0b111, "frsqrte">;
+ defm FRECPE_ZZ : sve_fp_2op_u_zd<0b110, "frecpe", int_aarch64_sve_frecpe_x>;
+ defm FRSQRTE_ZZ : sve_fp_2op_u_zd<0b111, "frsqrte", int_aarch64_sve_frsqrte_x>;
defm FADD_ZPmI : sve_fp_2op_i_p_zds<0b000, "fadd", sve_fpimm_half_one>;
defm FSUB_ZPmI : sve_fp_2op_i_p_zds<0b001, "fsub", sve_fpimm_half_one>;
@@ -124,57 +190,57 @@ let Predicates = [HasSVE] in {
defm FMAX_ZPmI : sve_fp_2op_i_p_zds<0b110, "fmax", sve_fpimm_zero_one>;
defm FMIN_ZPmI : sve_fp_2op_i_p_zds<0b111, "fmin", sve_fpimm_zero_one>;
- defm FADD_ZPmZ : sve_fp_2op_p_zds<0b0000, "fadd">;
- defm FSUB_ZPmZ : sve_fp_2op_p_zds<0b0001, "fsub">;
- defm FMUL_ZPmZ : sve_fp_2op_p_zds<0b0010, "fmul">;
- defm FSUBR_ZPmZ : sve_fp_2op_p_zds<0b0011, "fsubr">;
- defm FMAXNM_ZPmZ : sve_fp_2op_p_zds<0b0100, "fmaxnm">;
- defm FMINNM_ZPmZ : sve_fp_2op_p_zds<0b0101, "fminnm">;
- defm FMAX_ZPmZ : sve_fp_2op_p_zds<0b0110, "fmax">;
- defm FMIN_ZPmZ : sve_fp_2op_p_zds<0b0111, "fmin">;
- defm FABD_ZPmZ : sve_fp_2op_p_zds<0b1000, "fabd">;
- defm FSCALE_ZPmZ : sve_fp_2op_p_zds<0b1001, "fscale">;
- defm FMULX_ZPmZ : sve_fp_2op_p_zds<0b1010, "fmulx">;
- defm FDIVR_ZPmZ : sve_fp_2op_p_zds<0b1100, "fdivr">;
- defm FDIV_ZPmZ : sve_fp_2op_p_zds<0b1101, "fdiv">;
-
- defm FADD_ZZZ : sve_fp_3op_u_zd<0b000, "fadd", fadd>;
- defm FSUB_ZZZ : sve_fp_3op_u_zd<0b001, "fsub", null_frag>;
- defm FMUL_ZZZ : sve_fp_3op_u_zd<0b010, "fmul", null_frag>;
- defm FTSMUL_ZZZ : sve_fp_3op_u_zd<0b011, "ftsmul", null_frag>;
- defm FRECPS_ZZZ : sve_fp_3op_u_zd<0b110, "frecps", null_frag>;
- defm FRSQRTS_ZZZ : sve_fp_3op_u_zd<0b111, "frsqrts", null_frag>;
-
- defm FTSSEL_ZZZ : sve_int_bin_cons_misc_0_b<"ftssel">;
-
- defm FCADD_ZPmZ : sve_fp_fcadd<"fcadd">;
- defm FCMLA_ZPmZZ : sve_fp_fcmla<"fcmla">;
-
- defm FMLA_ZPmZZ : sve_fp_3op_p_zds_a<0b00, "fmla">;
- defm FMLS_ZPmZZ : sve_fp_3op_p_zds_a<0b01, "fmls">;
- defm FNMLA_ZPmZZ : sve_fp_3op_p_zds_a<0b10, "fnmla">;
- defm FNMLS_ZPmZZ : sve_fp_3op_p_zds_a<0b11, "fnmls">;
-
- defm FMAD_ZPmZZ : sve_fp_3op_p_zds_b<0b00, "fmad">;
- defm FMSB_ZPmZZ : sve_fp_3op_p_zds_b<0b01, "fmsb">;
- defm FNMAD_ZPmZZ : sve_fp_3op_p_zds_b<0b10, "fnmad">;
- defm FNMSB_ZPmZZ : sve_fp_3op_p_zds_b<0b11, "fnmsb">;
-
- defm FTMAD_ZZI : sve_fp_ftmad<"ftmad">;
-
- defm FMLA_ZZZI : sve_fp_fma_by_indexed_elem<0b0, "fmla">;
- defm FMLS_ZZZI : sve_fp_fma_by_indexed_elem<0b1, "fmls">;
-
- defm FCMLA_ZZZI : sve_fp_fcmla_by_indexed_elem<"fcmla">;
- defm FMUL_ZZZI : sve_fp_fmul_by_indexed_elem<"fmul">;
+ defm FADD_ZPmZ : sve_fp_2op_p_zds<0b0000, "fadd", int_aarch64_sve_fadd>;
+ defm FSUB_ZPmZ : sve_fp_2op_p_zds<0b0001, "fsub", int_aarch64_sve_fsub>;
+ defm FMUL_ZPmZ : sve_fp_2op_p_zds<0b0010, "fmul", int_aarch64_sve_fmul>;
+ defm FSUBR_ZPmZ : sve_fp_2op_p_zds<0b0011, "fsubr", int_aarch64_sve_fsubr>;
+ defm FMAXNM_ZPmZ : sve_fp_2op_p_zds<0b0100, "fmaxnm", int_aarch64_sve_fmaxnm>;
+ defm FMINNM_ZPmZ : sve_fp_2op_p_zds<0b0101, "fminnm", int_aarch64_sve_fminnm>;
+ defm FMAX_ZPmZ : sve_fp_2op_p_zds<0b0110, "fmax", int_aarch64_sve_fmax>;
+ defm FMIN_ZPmZ : sve_fp_2op_p_zds<0b0111, "fmin", int_aarch64_sve_fmin>;
+ defm FABD_ZPmZ : sve_fp_2op_p_zds<0b1000, "fabd", int_aarch64_sve_fabd>;
+ defm FSCALE_ZPmZ : sve_fp_2op_p_zds_fscale<0b1001, "fscale", int_aarch64_sve_fscale>;
+ defm FMULX_ZPmZ : sve_fp_2op_p_zds<0b1010, "fmulx", int_aarch64_sve_fmulx>;
+ defm FDIVR_ZPmZ : sve_fp_2op_p_zds<0b1100, "fdivr", int_aarch64_sve_fdivr>;
+ defm FDIV_ZPmZ : sve_fp_2op_p_zds<0b1101, "fdiv", int_aarch64_sve_fdiv>;
+
+ defm FADD_ZZZ : sve_fp_3op_u_zd<0b000, "fadd", fadd>;
+ defm FSUB_ZZZ : sve_fp_3op_u_zd<0b001, "fsub", fsub>;
+ defm FMUL_ZZZ : sve_fp_3op_u_zd<0b010, "fmul", fmul>;
+ defm FTSMUL_ZZZ : sve_fp_3op_u_zd_ftsmul<0b011, "ftsmul", int_aarch64_sve_ftsmul_x>;
+ defm FRECPS_ZZZ : sve_fp_3op_u_zd<0b110, "frecps", int_aarch64_sve_frecps_x>;
+ defm FRSQRTS_ZZZ : sve_fp_3op_u_zd<0b111, "frsqrts", int_aarch64_sve_frsqrts_x>;
+
+ defm FTSSEL_ZZZ : sve_int_bin_cons_misc_0_b<"ftssel", int_aarch64_sve_ftssel_x>;
+
+ defm FCADD_ZPmZ : sve_fp_fcadd<"fcadd", int_aarch64_sve_fcadd>;
+ defm FCMLA_ZPmZZ : sve_fp_fcmla<"fcmla", int_aarch64_sve_fcmla>;
+
+ defm FMLA_ZPmZZ : sve_fp_3op_p_zds_a<0b00, "fmla", int_aarch64_sve_fmla>;
+ defm FMLS_ZPmZZ : sve_fp_3op_p_zds_a<0b01, "fmls", int_aarch64_sve_fmls>;
+ defm FNMLA_ZPmZZ : sve_fp_3op_p_zds_a<0b10, "fnmla", int_aarch64_sve_fnmla>;
+ defm FNMLS_ZPmZZ : sve_fp_3op_p_zds_a<0b11, "fnmls", int_aarch64_sve_fnmls>;
+
+ defm FMAD_ZPmZZ : sve_fp_3op_p_zds_b<0b00, "fmad", int_aarch64_sve_fmad>;
+ defm FMSB_ZPmZZ : sve_fp_3op_p_zds_b<0b01, "fmsb", int_aarch64_sve_fmsb>;
+ defm FNMAD_ZPmZZ : sve_fp_3op_p_zds_b<0b10, "fnmad", int_aarch64_sve_fnmad>;
+ defm FNMSB_ZPmZZ : sve_fp_3op_p_zds_b<0b11, "fnmsb", int_aarch64_sve_fnmsb>;
+
+ defm FTMAD_ZZI : sve_fp_ftmad<"ftmad", int_aarch64_sve_ftmad_x>;
+
+ defm FMLA_ZZZI : sve_fp_fma_by_indexed_elem<0b0, "fmla", int_aarch64_sve_fmla_lane>;
+ defm FMLS_ZZZI : sve_fp_fma_by_indexed_elem<0b1, "fmls", int_aarch64_sve_fmls_lane>;
+
+ defm FCMLA_ZZZI : sve_fp_fcmla_by_indexed_elem<"fcmla", int_aarch64_sve_fcmla_lane>;
+ defm FMUL_ZZZI : sve_fp_fmul_by_indexed_elem<"fmul", int_aarch64_sve_fmul_lane>;
// SVE floating point reductions.
- defm FADDA_VPZ : sve_fp_2op_p_vd<0b000, "fadda">;
- defm FADDV_VPZ : sve_fp_fast_red<0b000, "faddv">;
- defm FMAXNMV_VPZ : sve_fp_fast_red<0b100, "fmaxnmv">;
- defm FMINNMV_VPZ : sve_fp_fast_red<0b101, "fminnmv">;
- defm FMAXV_VPZ : sve_fp_fast_red<0b110, "fmaxv">;
- defm FMINV_VPZ : sve_fp_fast_red<0b111, "fminv">;
+ defm FADDA_VPZ : sve_fp_2op_p_vd<0b000, "fadda", int_aarch64_sve_fadda>;
+ defm FADDV_VPZ : sve_fp_fast_red<0b000, "faddv", int_aarch64_sve_faddv>;
+ defm FMAXNMV_VPZ : sve_fp_fast_red<0b100, "fmaxnmv", int_aarch64_sve_fmaxnmv>;
+ defm FMINNMV_VPZ : sve_fp_fast_red<0b101, "fminnmv", int_aarch64_sve_fminnmv>;
+ defm FMAXV_VPZ : sve_fp_fast_red<0b110, "fmaxv", int_aarch64_sve_fmaxv>;
+ defm FMINV_VPZ : sve_fp_fast_red<0b111, "fminv", int_aarch64_sve_fminv>;
// Splat immediate (unpredicated)
defm DUP_ZI : sve_int_dup_imm<"dup">;
@@ -195,21 +261,21 @@ let Predicates = [HasSVE] in {
defm CPY_ZPmV : sve_int_perm_cpy_v<"cpy">;
// Select elements from either vector (predicated)
- defm SEL_ZPZZ : sve_int_sel_vvv<"sel">;
+ defm SEL_ZPZZ : sve_int_sel_vvv<"sel", vselect>;
- defm SPLICE_ZPZ : sve_int_perm_splice<"splice">;
- defm COMPACT_ZPZ : sve_int_perm_compact<"compact">;
- defm INSR_ZR : sve_int_perm_insrs<"insr">;
- defm INSR_ZV : sve_int_perm_insrv<"insr">;
- def EXT_ZZI : sve_int_perm_extract_i<"ext">;
+ defm SPLICE_ZPZ : sve_int_perm_splice<"splice", int_aarch64_sve_splice>;
+ defm COMPACT_ZPZ : sve_int_perm_compact<"compact", int_aarch64_sve_compact>;
+ defm INSR_ZR : sve_int_perm_insrs<"insr", AArch64insr>;
+ defm INSR_ZV : sve_int_perm_insrv<"insr", AArch64insr>;
+ defm EXT_ZZI : sve_int_perm_extract_i<"ext", AArch64ext>;
- defm RBIT_ZPmZ : sve_int_perm_rev_rbit<"rbit">;
- defm REVB_ZPmZ : sve_int_perm_rev_revb<"revb">;
- defm REVH_ZPmZ : sve_int_perm_rev_revh<"revh">;
- defm REVW_ZPmZ : sve_int_perm_rev_revw<"revw">;
+ defm RBIT_ZPmZ : sve_int_perm_rev_rbit<"rbit", int_aarch64_sve_rbit>;
+ defm REVB_ZPmZ : sve_int_perm_rev_revb<"revb", int_aarch64_sve_revb, bswap>;
+ defm REVH_ZPmZ : sve_int_perm_rev_revh<"revh", int_aarch64_sve_revh>;
+ defm REVW_ZPmZ : sve_int_perm_rev_revw<"revw", int_aarch64_sve_revw>;
- defm REV_PP : sve_int_perm_reverse_p<"rev">;
- defm REV_ZZ : sve_int_perm_reverse_z<"rev">;
+ defm REV_PP : sve_int_perm_reverse_p<"rev", AArch64rev>;
+ defm REV_ZZ : sve_int_perm_reverse_z<"rev", AArch64rev>;
defm SUNPKLO_ZZ : sve_int_perm_unpk<0b00, "sunpklo", AArch64sunpklo>;
defm SUNPKHI_ZZ : sve_int_perm_unpk<0b01, "sunpkhi", AArch64sunpkhi>;
@@ -222,9 +288,7 @@ let Predicates = [HasSVE] in {
defm MOVPRFX_ZPzZ : sve_int_movprfx_pred_zero<0b000, "movprfx">;
defm MOVPRFX_ZPmZ : sve_int_movprfx_pred_merge<0b001, "movprfx">;
def MOVPRFX_ZZ : sve_int_bin_cons_misc_0_c<0b00000001, "movprfx", ZPRAny>;
- def FEXPA_ZZ_H : sve_int_bin_cons_misc_0_c<0b01000000, "fexpa", ZPR16>;
- def FEXPA_ZZ_S : sve_int_bin_cons_misc_0_c<0b10000000, "fexpa", ZPR32>;
- def FEXPA_ZZ_D : sve_int_bin_cons_misc_0_c<0b11000000, "fexpa", ZPR64>;
+ defm FEXPA_ZZ : sve_int_bin_cons_misc_0_c_fexpa<"fexpa", int_aarch64_sve_fexpa_x>;
def BRKPA_PPzPP : sve_int_brkp<0b00, "brkpa">;
def BRKPAS_PPzPP : sve_int_brkp<0b10, "brkpas">;
@@ -243,36 +307,36 @@ let Predicates = [HasSVE] in {
def PTEST_PP : sve_int_ptest<0b010000, "ptest">;
def PFALSE : sve_int_pfalse<0b000000, "pfalse">;
- defm PFIRST : sve_int_pfirst<0b00000, "pfirst">;
- defm PNEXT : sve_int_pnext<0b00110, "pnext">;
-
- def AND_PPzPP : sve_int_pred_log<0b0000, "and">;
- def BIC_PPzPP : sve_int_pred_log<0b0001, "bic">;
- def EOR_PPzPP : sve_int_pred_log<0b0010, "eor">;
- def SEL_PPPP : sve_int_pred_log<0b0011, "sel">;
- def ANDS_PPzPP : sve_int_pred_log<0b0100, "ands">;
- def BICS_PPzPP : sve_int_pred_log<0b0101, "bics">;
- def EORS_PPzPP : sve_int_pred_log<0b0110, "eors">;
- def ORR_PPzPP : sve_int_pred_log<0b1000, "orr">;
- def ORN_PPzPP : sve_int_pred_log<0b1001, "orn">;
- def NOR_PPzPP : sve_int_pred_log<0b1010, "nor">;
- def NAND_PPzPP : sve_int_pred_log<0b1011, "nand">;
- def ORRS_PPzPP : sve_int_pred_log<0b1100, "orrs">;
- def ORNS_PPzPP : sve_int_pred_log<0b1101, "orns">;
- def NORS_PPzPP : sve_int_pred_log<0b1110, "nors">;
- def NANDS_PPzPP : sve_int_pred_log<0b1111, "nands">;
-
- defm CLASTA_RPZ : sve_int_perm_clast_rz<0, "clasta">;
- defm CLASTB_RPZ : sve_int_perm_clast_rz<1, "clastb">;
- defm CLASTA_VPZ : sve_int_perm_clast_vz<0, "clasta">;
- defm CLASTB_VPZ : sve_int_perm_clast_vz<1, "clastb">;
- defm CLASTA_ZPZ : sve_int_perm_clast_zz<0, "clasta">;
- defm CLASTB_ZPZ : sve_int_perm_clast_zz<1, "clastb">;
-
- defm LASTA_RPZ : sve_int_perm_last_r<0, "lasta">;
- defm LASTB_RPZ : sve_int_perm_last_r<1, "lastb">;
- defm LASTA_VPZ : sve_int_perm_last_v<0, "lasta">;
- defm LASTB_VPZ : sve_int_perm_last_v<1, "lastb">;
+ defm PFIRST : sve_int_pfirst<0b00000, "pfirst", int_aarch64_sve_pfirst>;
+ defm PNEXT : sve_int_pnext<0b00110, "pnext", int_aarch64_sve_pnext>;
+
+ defm AND_PPzPP : sve_int_pred_log<0b0000, "and", int_aarch64_sve_and_z>;
+ defm BIC_PPzPP : sve_int_pred_log<0b0001, "bic", int_aarch64_sve_bic_z>;
+ defm EOR_PPzPP : sve_int_pred_log<0b0010, "eor", int_aarch64_sve_eor_z>;
+ defm SEL_PPPP : sve_int_pred_log<0b0011, "sel", vselect>;
+ defm ANDS_PPzPP : sve_int_pred_log<0b0100, "ands", null_frag>;
+ defm BICS_PPzPP : sve_int_pred_log<0b0101, "bics", null_frag>;
+ defm EORS_PPzPP : sve_int_pred_log<0b0110, "eors", null_frag>;
+ defm ORR_PPzPP : sve_int_pred_log<0b1000, "orr", int_aarch64_sve_orr_z>;
+ defm ORN_PPzPP : sve_int_pred_log<0b1001, "orn", int_aarch64_sve_orn_z>;
+ defm NOR_PPzPP : sve_int_pred_log<0b1010, "nor", int_aarch64_sve_nor_z>;
+ defm NAND_PPzPP : sve_int_pred_log<0b1011, "nand", int_aarch64_sve_nand_z>;
+ defm ORRS_PPzPP : sve_int_pred_log<0b1100, "orrs", null_frag>;
+ defm ORNS_PPzPP : sve_int_pred_log<0b1101, "orns", null_frag>;
+ defm NORS_PPzPP : sve_int_pred_log<0b1110, "nors", null_frag>;
+ defm NANDS_PPzPP : sve_int_pred_log<0b1111, "nands", null_frag>;
+
+ defm CLASTA_RPZ : sve_int_perm_clast_rz<0, "clasta", AArch64clasta_n>;
+ defm CLASTB_RPZ : sve_int_perm_clast_rz<1, "clastb", AArch64clastb_n>;
+ defm CLASTA_VPZ : sve_int_perm_clast_vz<0, "clasta", AArch64clasta_n>;
+ defm CLASTB_VPZ : sve_int_perm_clast_vz<1, "clastb", AArch64clastb_n>;
+ defm CLASTA_ZPZ : sve_int_perm_clast_zz<0, "clasta", int_aarch64_sve_clasta>;
+ defm CLASTB_ZPZ : sve_int_perm_clast_zz<1, "clastb", int_aarch64_sve_clastb>;
+
+ defm LASTA_RPZ : sve_int_perm_last_r<0, "lasta", AArch64lasta>;
+ defm LASTB_RPZ : sve_int_perm_last_r<1, "lastb", AArch64lastb>;
+ defm LASTA_VPZ : sve_int_perm_last_v<0, "lasta", AArch64lasta>;
+ defm LASTB_VPZ : sve_int_perm_last_v<1, "lastb", AArch64lastb>;
// continuous load with reg+immediate
defm LD1B_IMM : sve_mem_cld_si<0b0000, "ld1b", Z_b, ZPR8>;
@@ -404,115 +468,115 @@ let Predicates = [HasSVE] in {
// Gathers using unscaled 32-bit offsets, e.g.
// ld1h z0.s, p0/z, [x0, z0.s, uxtw]
- defm GLD1SB_S : sve_mem_32b_gld_vs_32_unscaled<0b0000, "ld1sb", ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only>;
- defm GLDFF1SB_S : sve_mem_32b_gld_vs_32_unscaled<0b0001, "ldff1sb", ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only>;
- defm GLD1B_S : sve_mem_32b_gld_vs_32_unscaled<0b0010, "ld1b", ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only>;
- defm GLDFF1B_S : sve_mem_32b_gld_vs_32_unscaled<0b0011, "ldff1b", ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only>;
- defm GLD1SH_S : sve_mem_32b_gld_vs_32_unscaled<0b0100, "ld1sh", ZPR32ExtSXTW8, ZPR32ExtUXTW8>;
- defm GLDFF1SH_S : sve_mem_32b_gld_vs_32_unscaled<0b0101, "ldff1sh", ZPR32ExtSXTW8, ZPR32ExtUXTW8>;
- defm GLD1H_S : sve_mem_32b_gld_vs_32_unscaled<0b0110, "ld1h", ZPR32ExtSXTW8, ZPR32ExtUXTW8>;
- defm GLDFF1H_S : sve_mem_32b_gld_vs_32_unscaled<0b0111, "ldff1h", ZPR32ExtSXTW8, ZPR32ExtUXTW8>;
- defm GLD1W : sve_mem_32b_gld_vs_32_unscaled<0b1010, "ld1w", ZPR32ExtSXTW8, ZPR32ExtUXTW8>;
- defm GLDFF1W : sve_mem_32b_gld_vs_32_unscaled<0b1011, "ldff1w", ZPR32ExtSXTW8, ZPR32ExtUXTW8>;
+ defm GLD1SB_S : sve_mem_32b_gld_vs_32_unscaled<0b0000, "ld1sb", AArch64ld1s_gather_sxtw, AArch64ld1s_gather_uxtw, ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only, nxv4i8>;
+ defm GLDFF1SB_S : sve_mem_32b_gld_vs_32_unscaled<0b0001, "ldff1sb", null_frag, null_frag, ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only, nxv4i8>;
+ defm GLD1B_S : sve_mem_32b_gld_vs_32_unscaled<0b0010, "ld1b", AArch64ld1_gather_sxtw, AArch64ld1_gather_uxtw, ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only, nxv4i8>;
+ defm GLDFF1B_S : sve_mem_32b_gld_vs_32_unscaled<0b0011, "ldff1b", null_frag, null_frag, ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only, nxv4i8>;
+ defm GLD1SH_S : sve_mem_32b_gld_vs_32_unscaled<0b0100, "ld1sh", AArch64ld1s_gather_sxtw, AArch64ld1s_gather_uxtw, ZPR32ExtSXTW8, ZPR32ExtUXTW8, nxv4i16>;
+ defm GLDFF1SH_S : sve_mem_32b_gld_vs_32_unscaled<0b0101, "ldff1sh", null_frag, null_frag, ZPR32ExtSXTW8, ZPR32ExtUXTW8, nxv4i16>;
+ defm GLD1H_S : sve_mem_32b_gld_vs_32_unscaled<0b0110, "ld1h", AArch64ld1_gather_sxtw, AArch64ld1_gather_uxtw, ZPR32ExtSXTW8, ZPR32ExtUXTW8, nxv4i16>;
+ defm GLDFF1H_S : sve_mem_32b_gld_vs_32_unscaled<0b0111, "ldff1h", null_frag, null_frag, ZPR32ExtSXTW8, ZPR32ExtUXTW8, nxv4i16>;
+ defm GLD1W : sve_mem_32b_gld_vs_32_unscaled<0b1010, "ld1w", AArch64ld1_gather_sxtw, AArch64ld1_gather_uxtw, ZPR32ExtSXTW8, ZPR32ExtUXTW8, nxv4i32>;
+ defm GLDFF1W : sve_mem_32b_gld_vs_32_unscaled<0b1011, "ldff1w", null_frag, null_frag, ZPR32ExtSXTW8, ZPR32ExtUXTW8, nxv4i32>;
// Gathers using scaled 32-bit offsets, e.g.
// ld1h z0.s, p0/z, [x0, z0.s, uxtw #1]
- defm GLD1SH_S : sve_mem_32b_gld_sv_32_scaled<0b0100, "ld1sh", ZPR32ExtSXTW16, ZPR32ExtUXTW16>;
- defm GLDFF1SH_S : sve_mem_32b_gld_sv_32_scaled<0b0101, "ldff1sh", ZPR32ExtSXTW16, ZPR32ExtUXTW16>;
- defm GLD1H_S : sve_mem_32b_gld_sv_32_scaled<0b0110, "ld1h", ZPR32ExtSXTW16, ZPR32ExtUXTW16>;
- defm GLDFF1H_S : sve_mem_32b_gld_sv_32_scaled<0b0111, "ldff1h", ZPR32ExtSXTW16, ZPR32ExtUXTW16>;
- defm GLD1W : sve_mem_32b_gld_sv_32_scaled<0b1010, "ld1w", ZPR32ExtSXTW32, ZPR32ExtUXTW32>;
- defm GLDFF1W : sve_mem_32b_gld_sv_32_scaled<0b1011, "ldff1w", ZPR32ExtSXTW32, ZPR32ExtUXTW32>;
-
- // Gathers using scaled 32-bit pointers with offset, e.g.
+ defm GLD1SH_S : sve_mem_32b_gld_sv_32_scaled<0b0100, "ld1sh", AArch64ld1s_gather_sxtw_scaled, AArch64ld1s_gather_uxtw_scaled, ZPR32ExtSXTW16, ZPR32ExtUXTW16, nxv4i16>;
+ defm GLDFF1SH_S : sve_mem_32b_gld_sv_32_scaled<0b0101, "ldff1sh", null_frag, null_frag, ZPR32ExtSXTW16, ZPR32ExtUXTW16, nxv4i16>;
+ defm GLD1H_S : sve_mem_32b_gld_sv_32_scaled<0b0110, "ld1h", AArch64ld1_gather_sxtw_scaled, AArch64ld1_gather_uxtw_scaled, ZPR32ExtSXTW16, ZPR32ExtUXTW16, nxv4i16>;
+ defm GLDFF1H_S : sve_mem_32b_gld_sv_32_scaled<0b0111, "ldff1h", null_frag, null_frag, ZPR32ExtSXTW16, ZPR32ExtUXTW16, nxv4i16>;
+ defm GLD1W : sve_mem_32b_gld_sv_32_scaled<0b1010, "ld1w", AArch64ld1_gather_sxtw_scaled, AArch64ld1_gather_uxtw_scaled, ZPR32ExtSXTW32, ZPR32ExtUXTW32, nxv4i32>;
+ defm GLDFF1W : sve_mem_32b_gld_sv_32_scaled<0b1011, "ldff1w", null_frag, null_frag, ZPR32ExtSXTW32, ZPR32ExtUXTW32, nxv4i32>;
+
+ // Gathers using 32-bit pointers with scaled offset, e.g.
// ld1h z0.s, p0/z, [z0.s, #16]
- defm GLD1SB_S : sve_mem_32b_gld_vi_32_ptrs<0b0000, "ld1sb", imm0_31>;
- defm GLDFF1SB_S : sve_mem_32b_gld_vi_32_ptrs<0b0001, "ldff1sb", imm0_31>;
- defm GLD1B_S : sve_mem_32b_gld_vi_32_ptrs<0b0010, "ld1b", imm0_31>;
- defm GLDFF1B_S : sve_mem_32b_gld_vi_32_ptrs<0b0011, "ldff1b", imm0_31>;
- defm GLD1SH_S : sve_mem_32b_gld_vi_32_ptrs<0b0100, "ld1sh", uimm5s2>;
- defm GLDFF1SH_S : sve_mem_32b_gld_vi_32_ptrs<0b0101, "ldff1sh", uimm5s2>;
- defm GLD1H_S : sve_mem_32b_gld_vi_32_ptrs<0b0110, "ld1h", uimm5s2>;
- defm GLDFF1H_S : sve_mem_32b_gld_vi_32_ptrs<0b0111, "ldff1h", uimm5s2>;
- defm GLD1W : sve_mem_32b_gld_vi_32_ptrs<0b1010, "ld1w", uimm5s4>;
- defm GLDFF1W : sve_mem_32b_gld_vi_32_ptrs<0b1011, "ldff1w", uimm5s4>;
-
- // Gathers using scaled 64-bit pointers with offset, e.g.
+ defm GLD1SB_S : sve_mem_32b_gld_vi_32_ptrs<0b0000, "ld1sb", imm0_31, AArch64ld1s_gather_imm, nxv4i8>;
+ defm GLDFF1SB_S : sve_mem_32b_gld_vi_32_ptrs<0b0001, "ldff1sb", imm0_31, null_frag, nxv4i8>;
+ defm GLD1B_S : sve_mem_32b_gld_vi_32_ptrs<0b0010, "ld1b", imm0_31, AArch64ld1_gather_imm, nxv4i8>;
+ defm GLDFF1B_S : sve_mem_32b_gld_vi_32_ptrs<0b0011, "ldff1b", imm0_31, null_frag, nxv4i8>;
+ defm GLD1SH_S : sve_mem_32b_gld_vi_32_ptrs<0b0100, "ld1sh", uimm5s2, AArch64ld1s_gather_imm, nxv4i16>;
+ defm GLDFF1SH_S : sve_mem_32b_gld_vi_32_ptrs<0b0101, "ldff1sh", uimm5s2, null_frag, nxv4i16>;
+ defm GLD1H_S : sve_mem_32b_gld_vi_32_ptrs<0b0110, "ld1h", uimm5s2, AArch64ld1_gather_imm, nxv4i16>;
+ defm GLDFF1H_S : sve_mem_32b_gld_vi_32_ptrs<0b0111, "ldff1h", uimm5s2, null_frag, nxv4i16>;
+ defm GLD1W : sve_mem_32b_gld_vi_32_ptrs<0b1010, "ld1w", uimm5s4, AArch64ld1_gather_imm, nxv4i32>;
+ defm GLDFF1W : sve_mem_32b_gld_vi_32_ptrs<0b1011, "ldff1w", uimm5s4, null_frag, nxv4i32>;
+
+ // Gathers using 64-bit pointers with scaled offset, e.g.
// ld1h z0.d, p0/z, [z0.d, #16]
- defm GLD1SB_D : sve_mem_64b_gld_vi_64_ptrs<0b0000, "ld1sb", imm0_31>;
- defm GLDFF1SB_D : sve_mem_64b_gld_vi_64_ptrs<0b0001, "ldff1sb", imm0_31>;
- defm GLD1B_D : sve_mem_64b_gld_vi_64_ptrs<0b0010, "ld1b", imm0_31>;
- defm GLDFF1B_D : sve_mem_64b_gld_vi_64_ptrs<0b0011, "ldff1b", imm0_31>;
- defm GLD1SH_D : sve_mem_64b_gld_vi_64_ptrs<0b0100, "ld1sh", uimm5s2>;
- defm GLDFF1SH_D : sve_mem_64b_gld_vi_64_ptrs<0b0101, "ldff1sh", uimm5s2>;
- defm GLD1H_D : sve_mem_64b_gld_vi_64_ptrs<0b0110, "ld1h", uimm5s2>;
- defm GLDFF1H_D : sve_mem_64b_gld_vi_64_ptrs<0b0111, "ldff1h", uimm5s2>;
- defm GLD1SW_D : sve_mem_64b_gld_vi_64_ptrs<0b1000, "ld1sw", uimm5s4>;
- defm GLDFF1SW_D : sve_mem_64b_gld_vi_64_ptrs<0b1001, "ldff1sw", uimm5s4>;
- defm GLD1W_D : sve_mem_64b_gld_vi_64_ptrs<0b1010, "ld1w", uimm5s4>;
- defm GLDFF1W_D : sve_mem_64b_gld_vi_64_ptrs<0b1011, "ldff1w", uimm5s4>;
- defm GLD1D : sve_mem_64b_gld_vi_64_ptrs<0b1110, "ld1d", uimm5s8>;
- defm GLDFF1D : sve_mem_64b_gld_vi_64_ptrs<0b1111, "ldff1d", uimm5s8>;
+ defm GLD1SB_D : sve_mem_64b_gld_vi_64_ptrs<0b0000, "ld1sb", imm0_31, AArch64ld1s_gather_imm, nxv2i8>;
+ defm GLDFF1SB_D : sve_mem_64b_gld_vi_64_ptrs<0b0001, "ldff1sb", imm0_31, null_frag, nxv2i8>;
+ defm GLD1B_D : sve_mem_64b_gld_vi_64_ptrs<0b0010, "ld1b", imm0_31, AArch64ld1_gather_imm, nxv2i8>;
+ defm GLDFF1B_D : sve_mem_64b_gld_vi_64_ptrs<0b0011, "ldff1b", imm0_31, null_frag, nxv2i8>;
+ defm GLD1SH_D : sve_mem_64b_gld_vi_64_ptrs<0b0100, "ld1sh", uimm5s2, AArch64ld1s_gather_imm, nxv2i16>;
+ defm GLDFF1SH_D : sve_mem_64b_gld_vi_64_ptrs<0b0101, "ldff1sh", uimm5s2, null_frag, nxv2i16>;
+ defm GLD1H_D : sve_mem_64b_gld_vi_64_ptrs<0b0110, "ld1h", uimm5s2, AArch64ld1_gather_imm, nxv2i16>;
+ defm GLDFF1H_D : sve_mem_64b_gld_vi_64_ptrs<0b0111, "ldff1h", uimm5s2, null_frag, nxv2i16>;
+ defm GLD1SW_D : sve_mem_64b_gld_vi_64_ptrs<0b1000, "ld1sw", uimm5s4, AArch64ld1s_gather_imm, nxv2i32>;
+ defm GLDFF1SW_D : sve_mem_64b_gld_vi_64_ptrs<0b1001, "ldff1sw", uimm5s4, null_frag, nxv2i32>;
+ defm GLD1W_D : sve_mem_64b_gld_vi_64_ptrs<0b1010, "ld1w", uimm5s4, AArch64ld1_gather_imm, nxv2i32>;
+ defm GLDFF1W_D : sve_mem_64b_gld_vi_64_ptrs<0b1011, "ldff1w", uimm5s4, null_frag, nxv2i32>;
+ defm GLD1D : sve_mem_64b_gld_vi_64_ptrs<0b1110, "ld1d", uimm5s8, AArch64ld1_gather_imm, nxv2i64>;
+ defm GLDFF1D : sve_mem_64b_gld_vi_64_ptrs<0b1111, "ldff1d", uimm5s8, null_frag, nxv2i64>;
// Gathers using unscaled 64-bit offsets, e.g.
// ld1h z0.d, p0/z, [x0, z0.d]
- defm GLD1SB_D : sve_mem_64b_gld_vs2_64_unscaled<0b0000, "ld1sb">;
- defm GLDFF1SB_D : sve_mem_64b_gld_vs2_64_unscaled<0b0001, "ldff1sb">;
- defm GLD1B_D : sve_mem_64b_gld_vs2_64_unscaled<0b0010, "ld1b">;
- defm GLDFF1B_D : sve_mem_64b_gld_vs2_64_unscaled<0b0011, "ldff1b">;
- defm GLD1SH_D : sve_mem_64b_gld_vs2_64_unscaled<0b0100, "ld1sh">;
- defm GLDFF1SH_D : sve_mem_64b_gld_vs2_64_unscaled<0b0101, "ldff1sh">;
- defm GLD1H_D : sve_mem_64b_gld_vs2_64_unscaled<0b0110, "ld1h">;
- defm GLDFF1H_D : sve_mem_64b_gld_vs2_64_unscaled<0b0111, "ldff1h">;
- defm GLD1SW_D : sve_mem_64b_gld_vs2_64_unscaled<0b1000, "ld1sw">;
- defm GLDFF1SW_D : sve_mem_64b_gld_vs2_64_unscaled<0b1001, "ldff1sw">;
- defm GLD1W_D : sve_mem_64b_gld_vs2_64_unscaled<0b1010, "ld1w">;
- defm GLDFF1W_D : sve_mem_64b_gld_vs2_64_unscaled<0b1011, "ldff1w">;
- defm GLD1D : sve_mem_64b_gld_vs2_64_unscaled<0b1110, "ld1d">;
- defm GLDFF1D : sve_mem_64b_gld_vs2_64_unscaled<0b1111, "ldff1d">;
+ defm GLD1SB_D : sve_mem_64b_gld_vs2_64_unscaled<0b0000, "ld1sb", AArch64ld1s_gather, nxv2i8>;
+ defm GLDFF1SB_D : sve_mem_64b_gld_vs2_64_unscaled<0b0001, "ldff1sb", null_frag, nxv2i8>;
+ defm GLD1B_D : sve_mem_64b_gld_vs2_64_unscaled<0b0010, "ld1b", AArch64ld1_gather, nxv2i8>;
+ defm GLDFF1B_D : sve_mem_64b_gld_vs2_64_unscaled<0b0011, "ldff1b", null_frag, nxv2i8>;
+ defm GLD1SH_D : sve_mem_64b_gld_vs2_64_unscaled<0b0100, "ld1sh", AArch64ld1s_gather, nxv2i16>;
+ defm GLDFF1SH_D : sve_mem_64b_gld_vs2_64_unscaled<0b0101, "ldff1sh", null_frag, nxv2i16>;
+ defm GLD1H_D : sve_mem_64b_gld_vs2_64_unscaled<0b0110, "ld1h", AArch64ld1_gather, nxv2i16>;
+ defm GLDFF1H_D : sve_mem_64b_gld_vs2_64_unscaled<0b0111, "ldff1h", null_frag, nxv2i16>;
+ defm GLD1SW_D : sve_mem_64b_gld_vs2_64_unscaled<0b1000, "ld1sw", AArch64ld1s_gather, nxv2i32>;
+ defm GLDFF1SW_D : sve_mem_64b_gld_vs2_64_unscaled<0b1001, "ldff1sw", null_frag, nxv2i32>;
+ defm GLD1W_D : sve_mem_64b_gld_vs2_64_unscaled<0b1010, "ld1w", AArch64ld1_gather, nxv2i32>;
+ defm GLDFF1W_D : sve_mem_64b_gld_vs2_64_unscaled<0b1011, "ldff1w", null_frag, nxv2i32>;
+ defm GLD1D : sve_mem_64b_gld_vs2_64_unscaled<0b1110, "ld1d", AArch64ld1_gather, nxv2i64>;
+ defm GLDFF1D : sve_mem_64b_gld_vs2_64_unscaled<0b1111, "ldff1d", null_frag, nxv2i64>;
// Gathers using scaled 64-bit offsets, e.g.
// ld1h z0.d, p0/z, [x0, z0.d, lsl #1]
- defm GLD1SH_D : sve_mem_64b_gld_sv2_64_scaled<0b0100, "ld1sh", ZPR64ExtLSL16>;
- defm GLDFF1SH_D : sve_mem_64b_gld_sv2_64_scaled<0b0101, "ldff1sh", ZPR64ExtLSL16>;
- defm GLD1H_D : sve_mem_64b_gld_sv2_64_scaled<0b0110, "ld1h", ZPR64ExtLSL16>;
- defm GLDFF1H_D : sve_mem_64b_gld_sv2_64_scaled<0b0111, "ldff1h", ZPR64ExtLSL16>;
- defm GLD1SW_D : sve_mem_64b_gld_sv2_64_scaled<0b1000, "ld1sw", ZPR64ExtLSL32>;
- defm GLDFF1SW_D : sve_mem_64b_gld_sv2_64_scaled<0b1001, "ldff1sw", ZPR64ExtLSL32>;
- defm GLD1W_D : sve_mem_64b_gld_sv2_64_scaled<0b1010, "ld1w", ZPR64ExtLSL32>;
- defm GLDFF1W_D : sve_mem_64b_gld_sv2_64_scaled<0b1011, "ldff1w", ZPR64ExtLSL32>;
- defm GLD1D : sve_mem_64b_gld_sv2_64_scaled<0b1110, "ld1d", ZPR64ExtLSL64>;
- defm GLDFF1D : sve_mem_64b_gld_sv2_64_scaled<0b1111, "ldff1d", ZPR64ExtLSL64>;
+ defm GLD1SH_D : sve_mem_64b_gld_sv2_64_scaled<0b0100, "ld1sh", AArch64ld1s_gather_scaled, ZPR64ExtLSL16, nxv2i16>;
+ defm GLDFF1SH_D : sve_mem_64b_gld_sv2_64_scaled<0b0101, "ldff1sh", null_frag, ZPR64ExtLSL16, nxv2i16>;
+ defm GLD1H_D : sve_mem_64b_gld_sv2_64_scaled<0b0110, "ld1h", AArch64ld1_gather_scaled, ZPR64ExtLSL16, nxv2i16>;
+ defm GLDFF1H_D : sve_mem_64b_gld_sv2_64_scaled<0b0111, "ldff1h", null_frag, ZPR64ExtLSL16, nxv2i16>;
+ defm GLD1SW_D : sve_mem_64b_gld_sv2_64_scaled<0b1000, "ld1sw", AArch64ld1s_gather_scaled, ZPR64ExtLSL32, nxv2i32>;
+ defm GLDFF1SW_D : sve_mem_64b_gld_sv2_64_scaled<0b1001, "ldff1sw", null_frag, ZPR64ExtLSL32, nxv2i32>;
+ defm GLD1W_D : sve_mem_64b_gld_sv2_64_scaled<0b1010, "ld1w", AArch64ld1_gather_scaled, ZPR64ExtLSL32, nxv2i32>;
+ defm GLDFF1W_D : sve_mem_64b_gld_sv2_64_scaled<0b1011, "ldff1w", null_frag, ZPR64ExtLSL32, nxv2i32>;
+ defm GLD1D : sve_mem_64b_gld_sv2_64_scaled<0b1110, "ld1d", AArch64ld1_gather_scaled, ZPR64ExtLSL64, nxv2i64>;
+ defm GLDFF1D : sve_mem_64b_gld_sv2_64_scaled<0b1111, "ldff1d", null_frag, ZPR64ExtLSL64, nxv2i64>;
// Gathers using unscaled 32-bit offsets unpacked in 64-bits elements, e.g.
// ld1h z0.d, p0/z, [x0, z0.d, uxtw]
- defm GLD1SB_D : sve_mem_64b_gld_vs_32_unscaled<0b0000, "ld1sb", ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only>;
- defm GLDFF1SB_D : sve_mem_64b_gld_vs_32_unscaled<0b0001, "ldff1sb", ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only>;
- defm GLD1B_D : sve_mem_64b_gld_vs_32_unscaled<0b0010, "ld1b", ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only>;
- defm GLDFF1B_D : sve_mem_64b_gld_vs_32_unscaled<0b0011, "ldff1b", ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only>;
- defm GLD1SH_D : sve_mem_64b_gld_vs_32_unscaled<0b0100, "ld1sh", ZPR64ExtSXTW8, ZPR64ExtUXTW8>;
- defm GLDFF1SH_D : sve_mem_64b_gld_vs_32_unscaled<0b0101, "ldff1sh", ZPR64ExtSXTW8, ZPR64ExtUXTW8>;
- defm GLD1H_D : sve_mem_64b_gld_vs_32_unscaled<0b0110, "ld1h", ZPR64ExtSXTW8, ZPR64ExtUXTW8>;
- defm GLDFF1H_D : sve_mem_64b_gld_vs_32_unscaled<0b0111, "ldff1h", ZPR64ExtSXTW8, ZPR64ExtUXTW8>;
- defm GLD1SW_D : sve_mem_64b_gld_vs_32_unscaled<0b1000, "ld1sw", ZPR64ExtSXTW8, ZPR64ExtUXTW8>;
- defm GLDFF1SW_D : sve_mem_64b_gld_vs_32_unscaled<0b1001, "ldff1sw", ZPR64ExtSXTW8, ZPR64ExtUXTW8>;
- defm GLD1W_D : sve_mem_64b_gld_vs_32_unscaled<0b1010, "ld1w", ZPR64ExtSXTW8, ZPR64ExtUXTW8>;
- defm GLDFF1W_D : sve_mem_64b_gld_vs_32_unscaled<0b1011, "ldff1w", ZPR64ExtSXTW8, ZPR64ExtUXTW8>;
- defm GLD1D : sve_mem_64b_gld_vs_32_unscaled<0b1110, "ld1d", ZPR64ExtSXTW8, ZPR64ExtUXTW8>;
- defm GLDFF1D : sve_mem_64b_gld_vs_32_unscaled<0b1111, "ldff1d", ZPR64ExtSXTW8, ZPR64ExtUXTW8>;
+ defm GLD1SB_D : sve_mem_64b_gld_vs_32_unscaled<0b0000, "ld1sb", AArch64ld1s_gather_sxtw, AArch64ld1s_gather_uxtw, ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only, nxv2i8>;
+ defm GLDFF1SB_D : sve_mem_64b_gld_vs_32_unscaled<0b0001, "ldff1sb", null_frag, null_frag, ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only, nxv2i8>;
+ defm GLD1B_D : sve_mem_64b_gld_vs_32_unscaled<0b0010, "ld1b", AArch64ld1_gather_sxtw, AArch64ld1_gather_uxtw, ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only, nxv2i8>;
+ defm GLDFF1B_D : sve_mem_64b_gld_vs_32_unscaled<0b0011, "ldff1b", null_frag, null_frag, ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only, nxv2i8>;
+ defm GLD1SH_D : sve_mem_64b_gld_vs_32_unscaled<0b0100, "ld1sh", AArch64ld1s_gather_sxtw, AArch64ld1s_gather_uxtw, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i16>;
+ defm GLDFF1SH_D : sve_mem_64b_gld_vs_32_unscaled<0b0101, "ldff1sh", null_frag, null_frag, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i16>;
+ defm GLD1H_D : sve_mem_64b_gld_vs_32_unscaled<0b0110, "ld1h", AArch64ld1_gather_sxtw, AArch64ld1_gather_uxtw, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i16>;
+ defm GLDFF1H_D : sve_mem_64b_gld_vs_32_unscaled<0b0111, "ldff1h", null_frag, null_frag, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i16>;
+ defm GLD1SW_D : sve_mem_64b_gld_vs_32_unscaled<0b1000, "ld1sw", AArch64ld1s_gather_sxtw, AArch64ld1s_gather_uxtw, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i32>;
+ defm GLDFF1SW_D : sve_mem_64b_gld_vs_32_unscaled<0b1001, "ldff1sw", null_frag, null_frag, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i32>;
+ defm GLD1W_D : sve_mem_64b_gld_vs_32_unscaled<0b1010, "ld1w", AArch64ld1_gather_sxtw, AArch64ld1_gather_uxtw, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i32>;
+ defm GLDFF1W_D : sve_mem_64b_gld_vs_32_unscaled<0b1011, "ldff1w", null_frag, null_frag, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i32>;
+ defm GLD1D : sve_mem_64b_gld_vs_32_unscaled<0b1110, "ld1d", AArch64ld1_gather_sxtw, AArch64ld1_gather_uxtw, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i64>;
+ defm GLDFF1D : sve_mem_64b_gld_vs_32_unscaled<0b1111, "ldff1d", null_frag, null_frag, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i64>;
// Gathers using scaled 32-bit offsets unpacked in 64-bits elements, e.g.
// ld1h z0.d, p0/z, [x0, z0.d, uxtw #1]
- defm GLD1SH_D : sve_mem_64b_gld_sv_32_scaled<0b0100, "ld1sh", ZPR64ExtSXTW16, ZPR64ExtUXTW16>;
- defm GLDFF1SH_D : sve_mem_64b_gld_sv_32_scaled<0b0101, "ldff1sh",ZPR64ExtSXTW16, ZPR64ExtUXTW16>;
- defm GLD1H_D : sve_mem_64b_gld_sv_32_scaled<0b0110, "ld1h", ZPR64ExtSXTW16, ZPR64ExtUXTW16>;
- defm GLDFF1H_D : sve_mem_64b_gld_sv_32_scaled<0b0111, "ldff1h", ZPR64ExtSXTW16, ZPR64ExtUXTW16>;
- defm GLD1SW_D : sve_mem_64b_gld_sv_32_scaled<0b1000, "ld1sw", ZPR64ExtSXTW32, ZPR64ExtUXTW32>;
- defm GLDFF1SW_D : sve_mem_64b_gld_sv_32_scaled<0b1001, "ldff1sw",ZPR64ExtSXTW32, ZPR64ExtUXTW32>;
- defm GLD1W_D : sve_mem_64b_gld_sv_32_scaled<0b1010, "ld1w", ZPR64ExtSXTW32, ZPR64ExtUXTW32>;
- defm GLDFF1W_D : sve_mem_64b_gld_sv_32_scaled<0b1011, "ldff1w", ZPR64ExtSXTW32, ZPR64ExtUXTW32>;
- defm GLD1D : sve_mem_64b_gld_sv_32_scaled<0b1110, "ld1d", ZPR64ExtSXTW64, ZPR64ExtUXTW64>;
- defm GLDFF1D : sve_mem_64b_gld_sv_32_scaled<0b1111, "ldff1d", ZPR64ExtSXTW64, ZPR64ExtUXTW64>;
+ defm GLD1SH_D : sve_mem_64b_gld_sv_32_scaled<0b0100, "ld1sh", AArch64ld1s_gather_sxtw_scaled, AArch64ld1s_gather_uxtw_scaled, ZPR64ExtSXTW16, ZPR64ExtUXTW16, nxv2i16>;
+ defm GLDFF1SH_D : sve_mem_64b_gld_sv_32_scaled<0b0101, "ldff1sh", null_frag, null_frag, ZPR64ExtSXTW16, ZPR64ExtUXTW16, nxv2i16>;
+ defm GLD1H_D : sve_mem_64b_gld_sv_32_scaled<0b0110, "ld1h", AArch64ld1_gather_sxtw_scaled, AArch64ld1_gather_uxtw_scaled, ZPR64ExtSXTW16, ZPR64ExtUXTW16, nxv2i16>;
+ defm GLDFF1H_D : sve_mem_64b_gld_sv_32_scaled<0b0111, "ldff1h", null_frag, null_frag, ZPR64ExtSXTW16, ZPR64ExtUXTW16, nxv2i16>;
+ defm GLD1SW_D : sve_mem_64b_gld_sv_32_scaled<0b1000, "ld1sw", AArch64ld1s_gather_sxtw_scaled, AArch64ld1s_gather_uxtw_scaled, ZPR64ExtSXTW32, ZPR64ExtUXTW32, nxv2i32>;
+ defm GLDFF1SW_D : sve_mem_64b_gld_sv_32_scaled<0b1001, "ldff1sw", null_frag, null_frag, ZPR64ExtSXTW32, ZPR64ExtUXTW32, nxv2i32>;
+ defm GLD1W_D : sve_mem_64b_gld_sv_32_scaled<0b1010, "ld1w", AArch64ld1_gather_sxtw_scaled, AArch64ld1_gather_uxtw_scaled, ZPR64ExtSXTW32, ZPR64ExtUXTW32, nxv2i32>;
+ defm GLDFF1W_D : sve_mem_64b_gld_sv_32_scaled<0b1011, "ldff1w", null_frag, null_frag, ZPR64ExtSXTW32, ZPR64ExtUXTW32, nxv2i32>;
+ defm GLD1D : sve_mem_64b_gld_sv_32_scaled<0b1110, "ld1d", AArch64ld1_gather_sxtw_scaled, AArch64ld1_gather_uxtw_scaled, ZPR64ExtSXTW64, ZPR64ExtUXTW64, nxv2i64>;
+ defm GLDFF1D : sve_mem_64b_gld_sv_32_scaled<0b1111, "ldff1d", null_frag, null_frag, ZPR64ExtSXTW64, ZPR64ExtUXTW64, nxv2i64>;
// Non-temporal contiguous loads (register + immediate)
defm LDNT1B_ZRI : sve_mem_cldnt_si<0b00, "ldnt1b", Z_b, ZPR8>;
@@ -550,51 +614,55 @@ let Predicates = [HasSVE] in {
defm ST1W_D : sve_mem_cst_ss<0b1011, "st1w", Z_d, ZPR64, GPR64NoXZRshifted32>;
defm ST1D : sve_mem_cst_ss<0b1111, "st1d", Z_d, ZPR64, GPR64NoXZRshifted64>;
- // Scatters using unscaled 32-bit offsets, e.g.
- // st1h z0.s, p0, [x0, z0.s, uxtw]
- // and unpacked:
+ // Scatters using unpacked, unscaled 32-bit offsets, e.g.
// st1h z0.d, p0, [x0, z0.d, uxtw]
- defm SST1B_D : sve_mem_sst_sv_32_unscaled<0b000, "st1b", Z_d, ZPR64, ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only>;
- defm SST1B_S : sve_mem_sst_sv_32_unscaled<0b001, "st1b", Z_s, ZPR32, ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only>;
- defm SST1H_D : sve_mem_sst_sv_32_unscaled<0b010, "st1h", Z_d, ZPR64, ZPR64ExtSXTW8, ZPR64ExtUXTW8>;
- defm SST1H_S : sve_mem_sst_sv_32_unscaled<0b011, "st1h", Z_s, ZPR32, ZPR32ExtSXTW8, ZPR32ExtUXTW8>;
- defm SST1W_D : sve_mem_sst_sv_32_unscaled<0b100, "st1w", Z_d, ZPR64, ZPR64ExtSXTW8, ZPR64ExtUXTW8>;
- defm SST1W : sve_mem_sst_sv_32_unscaled<0b101, "st1w", Z_s, ZPR32, ZPR32ExtSXTW8, ZPR32ExtUXTW8>;
- defm SST1D : sve_mem_sst_sv_32_unscaled<0b110, "st1d", Z_d, ZPR64, ZPR64ExtSXTW8, ZPR64ExtUXTW8>;
-
- // Scatters using scaled 32-bit offsets, e.g.
+ defm SST1B_D : sve_mem_64b_sst_sv_32_unscaled<0b000, "st1b", AArch64st1_scatter_sxtw, AArch64st1_scatter_uxtw, ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only, nxv2i8>;
+ defm SST1H_D : sve_mem_64b_sst_sv_32_unscaled<0b010, "st1h", AArch64st1_scatter_sxtw, AArch64st1_scatter_uxtw, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i16>;
+ defm SST1W_D : sve_mem_64b_sst_sv_32_unscaled<0b100, "st1w", AArch64st1_scatter_sxtw, AArch64st1_scatter_uxtw, ZPR64ExtSXTW8, ZPR64ExtUXTW8,nxv2i32>;
+ defm SST1D : sve_mem_64b_sst_sv_32_unscaled<0b110, "st1d", AArch64st1_scatter_sxtw, AArch64st1_scatter_uxtw, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i64>;
+
+ // Scatters using packed, unscaled 32-bit offsets, e.g.
+ // st1h z0.s, p0, [x0, z0.s, uxtw]
+ defm SST1B_S : sve_mem_32b_sst_sv_32_unscaled<0b001, "st1b", AArch64st1_scatter_sxtw, AArch64st1_scatter_uxtw, ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only, nxv4i8>;
+ defm SST1H_S : sve_mem_32b_sst_sv_32_unscaled<0b011, "st1h", AArch64st1_scatter_sxtw, AArch64st1_scatter_uxtw, ZPR32ExtSXTW8, ZPR32ExtUXTW8, nxv4i16>;
+ defm SST1W : sve_mem_32b_sst_sv_32_unscaled<0b101, "st1w", AArch64st1_scatter_sxtw, AArch64st1_scatter_uxtw, ZPR32ExtSXTW8, ZPR32ExtUXTW8, nxv4i32>;
+
+ // Scatters using packed, scaled 32-bit offsets, e.g.
// st1h z0.s, p0, [x0, z0.s, uxtw #1]
- // and unpacked:
+ defm SST1H_S : sve_mem_32b_sst_sv_32_scaled<0b011, "st1h", AArch64st1_scatter_sxtw_scaled, AArch64st1_scatter_uxtw_scaled, ZPR32ExtSXTW16, ZPR32ExtUXTW16, nxv4i16>;
+ defm SST1W : sve_mem_32b_sst_sv_32_scaled<0b101, "st1w", AArch64st1_scatter_sxtw_scaled, AArch64st1_scatter_uxtw_scaled, ZPR32ExtSXTW32, ZPR32ExtUXTW32, nxv4i32>;
+
+ // Scatters using unpacked, scaled 32-bit offsets, e.g.
// st1h z0.d, p0, [x0, z0.d, uxtw #1]
- defm SST1H_D : sve_mem_sst_sv_32_scaled<0b010, "st1h", Z_d, ZPR64, ZPR64ExtSXTW16, ZPR64ExtUXTW16>;
- defm SST1H_S : sve_mem_sst_sv_32_scaled<0b011, "st1h", Z_s, ZPR32, ZPR32ExtSXTW16, ZPR32ExtUXTW16>;
- defm SST1W_D : sve_mem_sst_sv_32_scaled<0b100, "st1w", Z_d, ZPR64, ZPR64ExtSXTW32, ZPR64ExtUXTW32>;
- defm SST1W : sve_mem_sst_sv_32_scaled<0b101, "st1w", Z_s, ZPR32, ZPR32ExtSXTW32, ZPR32ExtUXTW32>;
- defm SST1D : sve_mem_sst_sv_32_scaled<0b110, "st1d", Z_d, ZPR64, ZPR64ExtSXTW64, ZPR64ExtUXTW64>;
+ defm SST1H_D : sve_mem_64b_sst_sv_32_scaled<0b010, "st1h", AArch64st1_scatter_sxtw_scaled, AArch64st1_scatter_uxtw_scaled, ZPR64ExtSXTW16, ZPR64ExtUXTW16, nxv2i16>;
+ defm SST1W_D : sve_mem_64b_sst_sv_32_scaled<0b100, "st1w", AArch64st1_scatter_sxtw_scaled, AArch64st1_scatter_uxtw_scaled, ZPR64ExtSXTW32, ZPR64ExtUXTW32, nxv2i32>;
+ defm SST1D : sve_mem_64b_sst_sv_32_scaled<0b110, "st1d", AArch64st1_scatter_sxtw_scaled, AArch64st1_scatter_uxtw_scaled, ZPR64ExtSXTW64, ZPR64ExtUXTW64, nxv2i64>;
// Scatters using 32/64-bit pointers with offset, e.g.
// st1h z0.s, p0, [z0.s, #16]
+ defm SST1B_S : sve_mem_32b_sst_vi_ptrs<0b001, "st1b", timm0_31, AArch64st1_scatter_imm, nxv4i8>;
+ defm SST1H_S : sve_mem_32b_sst_vi_ptrs<0b011, "st1h", tuimm5s2, AArch64st1_scatter_imm, nxv4i16>;
+ defm SST1W : sve_mem_32b_sst_vi_ptrs<0b101, "st1w", tuimm5s4, AArch64st1_scatter_imm, nxv4i32>;
+
+ // Scatters using 32/64-bit pointers with offset, e.g.
// st1h z0.d, p0, [z0.d, #16]
- defm SST1B_D : sve_mem_sst_vi_ptrs<0b000, "st1b", Z_d, ZPR64, imm0_31>;
- defm SST1B_S : sve_mem_sst_vi_ptrs<0b001, "st1b", Z_s, ZPR32, imm0_31>;
- defm SST1H_D : sve_mem_sst_vi_ptrs<0b010, "st1h", Z_d, ZPR64, uimm5s2>;
- defm SST1H_S : sve_mem_sst_vi_ptrs<0b011, "st1h", Z_s, ZPR32, uimm5s2>;
- defm SST1W_D : sve_mem_sst_vi_ptrs<0b100, "st1w", Z_d, ZPR64, uimm5s4>;
- defm SST1W : sve_mem_sst_vi_ptrs<0b101, "st1w", Z_s, ZPR32, uimm5s4>;
- defm SST1D : sve_mem_sst_vi_ptrs<0b110, "st1d", Z_d, ZPR64, uimm5s8>;
+ defm SST1B_D : sve_mem_64b_sst_vi_ptrs<0b000, "st1b", timm0_31, AArch64st1_scatter_imm, nxv2i8>;
+ defm SST1H_D : sve_mem_64b_sst_vi_ptrs<0b010, "st1h", tuimm5s2, AArch64st1_scatter_imm, nxv2i16>;
+ defm SST1W_D : sve_mem_64b_sst_vi_ptrs<0b100, "st1w", tuimm5s4, AArch64st1_scatter_imm, nxv2i32>;
+ defm SST1D : sve_mem_64b_sst_vi_ptrs<0b110, "st1d", tuimm5s8, AArch64st1_scatter_imm, nxv2i64>;
// Scatters using unscaled 64-bit offsets, e.g.
// st1h z0.d, p0, [x0, z0.d]
- defm SST1B_D : sve_mem_sst_sv_64_unscaled<0b00, "st1b">;
- defm SST1H_D : sve_mem_sst_sv_64_unscaled<0b01, "st1h">;
- defm SST1W_D : sve_mem_sst_sv_64_unscaled<0b10, "st1w">;
- defm SST1D : sve_mem_sst_sv_64_unscaled<0b11, "st1d">;
+ defm SST1B_D : sve_mem_sst_sv_64_unscaled<0b00, "st1b", AArch64st1_scatter, nxv2i8>;
+ defm SST1H_D : sve_mem_sst_sv_64_unscaled<0b01, "st1h", AArch64st1_scatter, nxv2i16>;
+ defm SST1W_D : sve_mem_sst_sv_64_unscaled<0b10, "st1w", AArch64st1_scatter, nxv2i32>;
+ defm SST1D : sve_mem_sst_sv_64_unscaled<0b11, "st1d", AArch64st1_scatter, nxv2i64>;
// Scatters using scaled 64-bit offsets, e.g.
// st1h z0.d, p0, [x0, z0.d, lsl #1]
- defm SST1H_D_SCALED : sve_mem_sst_sv_64_scaled<0b01, "st1h", ZPR64ExtLSL16>;
- defm SST1W_D_SCALED : sve_mem_sst_sv_64_scaled<0b10, "st1w", ZPR64ExtLSL32>;
- defm SST1D_SCALED : sve_mem_sst_sv_64_scaled<0b11, "st1d", ZPR64ExtLSL64>;
+ defm SST1H_D_SCALED : sve_mem_sst_sv_64_scaled<0b01, "st1h", AArch64st1_scatter_scaled, ZPR64ExtLSL16, nxv2i16>;
+ defm SST1W_D_SCALED : sve_mem_sst_sv_64_scaled<0b10, "st1w", AArch64st1_scatter_scaled, ZPR64ExtLSL32, nxv2i32>;
+ defm SST1D_SCALED : sve_mem_sst_sv_64_scaled<0b11, "st1d", AArch64st1_scatter_scaled, ZPR64ExtLSL64, nxv2i64>;
// ST(2|3|4) structured stores (register + immediate)
defm ST2B_IMM : sve_mem_est_si<0b00, 0b01, ZZ_b, "st2b", simm4s2>;
@@ -693,58 +761,58 @@ let Predicates = [HasSVE] in {
defm ADR_LSL_ZZZ_S : sve_int_bin_cons_misc_0_a_32_lsl<0b10, "adr">;
defm ADR_LSL_ZZZ_D : sve_int_bin_cons_misc_0_a_64_lsl<0b11, "adr">;
- defm TBL_ZZZ : sve_int_perm_tbl<"tbl">;
-
- defm ZIP1_ZZZ : sve_int_perm_bin_perm_zz<0b000, "zip1">;
- defm ZIP2_ZZZ : sve_int_perm_bin_perm_zz<0b001, "zip2">;
- defm UZP1_ZZZ : sve_int_perm_bin_perm_zz<0b010, "uzp1">;
- defm UZP2_ZZZ : sve_int_perm_bin_perm_zz<0b011, "uzp2">;
- defm TRN1_ZZZ : sve_int_perm_bin_perm_zz<0b100, "trn1">;
- defm TRN2_ZZZ : sve_int_perm_bin_perm_zz<0b101, "trn2">;
-
- defm ZIP1_PPP : sve_int_perm_bin_perm_pp<0b000, "zip1">;
- defm ZIP2_PPP : sve_int_perm_bin_perm_pp<0b001, "zip2">;
- defm UZP1_PPP : sve_int_perm_bin_perm_pp<0b010, "uzp1">;
- defm UZP2_PPP : sve_int_perm_bin_perm_pp<0b011, "uzp2">;
- defm TRN1_PPP : sve_int_perm_bin_perm_pp<0b100, "trn1">;
- defm TRN2_PPP : sve_int_perm_bin_perm_pp<0b101, "trn2">;
-
- defm CMPHS_PPzZZ : sve_int_cmp_0<0b000, "cmphs">;
- defm CMPHI_PPzZZ : sve_int_cmp_0<0b001, "cmphi">;
- defm CMPGE_PPzZZ : sve_int_cmp_0<0b100, "cmpge">;
- defm CMPGT_PPzZZ : sve_int_cmp_0<0b101, "cmpgt">;
- defm CMPEQ_PPzZZ : sve_int_cmp_0<0b110, "cmpeq">;
- defm CMPNE_PPzZZ : sve_int_cmp_0<0b111, "cmpne">;
-
- defm CMPEQ_WIDE_PPzZZ : sve_int_cmp_0_wide<0b010, "cmpeq">;
- defm CMPNE_WIDE_PPzZZ : sve_int_cmp_0_wide<0b011, "cmpne">;
- defm CMPGE_WIDE_PPzZZ : sve_int_cmp_1_wide<0b000, "cmpge">;
- defm CMPGT_WIDE_PPzZZ : sve_int_cmp_1_wide<0b001, "cmpgt">;
- defm CMPLT_WIDE_PPzZZ : sve_int_cmp_1_wide<0b010, "cmplt">;
- defm CMPLE_WIDE_PPzZZ : sve_int_cmp_1_wide<0b011, "cmple">;
- defm CMPHS_WIDE_PPzZZ : sve_int_cmp_1_wide<0b100, "cmphs">;
- defm CMPHI_WIDE_PPzZZ : sve_int_cmp_1_wide<0b101, "cmphi">;
- defm CMPLO_WIDE_PPzZZ : sve_int_cmp_1_wide<0b110, "cmplo">;
- defm CMPLS_WIDE_PPzZZ : sve_int_cmp_1_wide<0b111, "cmpls">;
-
- defm CMPGE_PPzZI : sve_int_scmp_vi<0b000, "cmpge">;
- defm CMPGT_PPzZI : sve_int_scmp_vi<0b001, "cmpgt">;
- defm CMPLT_PPzZI : sve_int_scmp_vi<0b010, "cmplt">;
- defm CMPLE_PPzZI : sve_int_scmp_vi<0b011, "cmple">;
- defm CMPEQ_PPzZI : sve_int_scmp_vi<0b100, "cmpeq">;
- defm CMPNE_PPzZI : sve_int_scmp_vi<0b101, "cmpne">;
- defm CMPHS_PPzZI : sve_int_ucmp_vi<0b00, "cmphs">;
- defm CMPHI_PPzZI : sve_int_ucmp_vi<0b01, "cmphi">;
- defm CMPLO_PPzZI : sve_int_ucmp_vi<0b10, "cmplo">;
- defm CMPLS_PPzZI : sve_int_ucmp_vi<0b11, "cmpls">;
-
- defm FCMGE_PPzZZ : sve_fp_3op_p_pd<0b000, "fcmge">;
- defm FCMGT_PPzZZ : sve_fp_3op_p_pd<0b001, "fcmgt">;
- defm FCMEQ_PPzZZ : sve_fp_3op_p_pd<0b010, "fcmeq">;
- defm FCMNE_PPzZZ : sve_fp_3op_p_pd<0b011, "fcmne">;
- defm FCMUO_PPzZZ : sve_fp_3op_p_pd<0b100, "fcmuo">;
- defm FACGE_PPzZZ : sve_fp_3op_p_pd<0b101, "facge">;
- defm FACGT_PPzZZ : sve_fp_3op_p_pd<0b111, "facgt">;
+ defm TBL_ZZZ : sve_int_perm_tbl<"tbl", AArch64tbl>;
+
+ defm ZIP1_ZZZ : sve_int_perm_bin_perm_zz<0b000, "zip1", AArch64zip1>;
+ defm ZIP2_ZZZ : sve_int_perm_bin_perm_zz<0b001, "zip2", AArch64zip2>;
+ defm UZP1_ZZZ : sve_int_perm_bin_perm_zz<0b010, "uzp1", AArch64uzp1>;
+ defm UZP2_ZZZ : sve_int_perm_bin_perm_zz<0b011, "uzp2", AArch64uzp2>;
+ defm TRN1_ZZZ : sve_int_perm_bin_perm_zz<0b100, "trn1", AArch64trn1>;
+ defm TRN2_ZZZ : sve_int_perm_bin_perm_zz<0b101, "trn2", AArch64trn2>;
+
+ defm ZIP1_PPP : sve_int_perm_bin_perm_pp<0b000, "zip1", AArch64zip1>;
+ defm ZIP2_PPP : sve_int_perm_bin_perm_pp<0b001, "zip2", AArch64zip2>;
+ defm UZP1_PPP : sve_int_perm_bin_perm_pp<0b010, "uzp1", AArch64uzp1>;
+ defm UZP2_PPP : sve_int_perm_bin_perm_pp<0b011, "uzp2", AArch64uzp2>;
+ defm TRN1_PPP : sve_int_perm_bin_perm_pp<0b100, "trn1", AArch64trn1>;
+ defm TRN2_PPP : sve_int_perm_bin_perm_pp<0b101, "trn2", AArch64trn2>;
+
+ defm CMPHS_PPzZZ : sve_int_cmp_0<0b000, "cmphs", int_aarch64_sve_cmphs, SETUGE>;
+ defm CMPHI_PPzZZ : sve_int_cmp_0<0b001, "cmphi", int_aarch64_sve_cmphi, SETUGT>;
+ defm CMPGE_PPzZZ : sve_int_cmp_0<0b100, "cmpge", int_aarch64_sve_cmpge, SETGE>;
+ defm CMPGT_PPzZZ : sve_int_cmp_0<0b101, "cmpgt", int_aarch64_sve_cmpgt, SETGT>;
+ defm CMPEQ_PPzZZ : sve_int_cmp_0<0b110, "cmpeq", int_aarch64_sve_cmpeq, SETEQ>;
+ defm CMPNE_PPzZZ : sve_int_cmp_0<0b111, "cmpne", int_aarch64_sve_cmpne, SETNE>;
+
+ defm CMPEQ_WIDE_PPzZZ : sve_int_cmp_0_wide<0b010, "cmpeq", int_aarch64_sve_cmpeq_wide>;
+ defm CMPNE_WIDE_PPzZZ : sve_int_cmp_0_wide<0b011, "cmpne", int_aarch64_sve_cmpne_wide>;
+ defm CMPGE_WIDE_PPzZZ : sve_int_cmp_1_wide<0b000, "cmpge", int_aarch64_sve_cmpge_wide>;
+ defm CMPGT_WIDE_PPzZZ : sve_int_cmp_1_wide<0b001, "cmpgt", int_aarch64_sve_cmpgt_wide>;
+ defm CMPLT_WIDE_PPzZZ : sve_int_cmp_1_wide<0b010, "cmplt", int_aarch64_sve_cmplt_wide>;
+ defm CMPLE_WIDE_PPzZZ : sve_int_cmp_1_wide<0b011, "cmple", int_aarch64_sve_cmple_wide>;
+ defm CMPHS_WIDE_PPzZZ : sve_int_cmp_1_wide<0b100, "cmphs", int_aarch64_sve_cmphs_wide>;
+ defm CMPHI_WIDE_PPzZZ : sve_int_cmp_1_wide<0b101, "cmphi", int_aarch64_sve_cmphi_wide>;
+ defm CMPLO_WIDE_PPzZZ : sve_int_cmp_1_wide<0b110, "cmplo", int_aarch64_sve_cmplo_wide>;
+ defm CMPLS_WIDE_PPzZZ : sve_int_cmp_1_wide<0b111, "cmpls", int_aarch64_sve_cmpls_wide>;
+
+ defm CMPGE_PPzZI : sve_int_scmp_vi<0b000, "cmpge", SETGE, int_aarch64_sve_cmpge>;
+ defm CMPGT_PPzZI : sve_int_scmp_vi<0b001, "cmpgt", SETGT, int_aarch64_sve_cmpgt>;
+ defm CMPLT_PPzZI : sve_int_scmp_vi<0b010, "cmplt", SETLT, null_frag, int_aarch64_sve_cmpgt>;
+ defm CMPLE_PPzZI : sve_int_scmp_vi<0b011, "cmple", SETLE, null_frag, int_aarch64_sve_cmpge>;
+ defm CMPEQ_PPzZI : sve_int_scmp_vi<0b100, "cmpeq", SETEQ, int_aarch64_sve_cmpeq>;
+ defm CMPNE_PPzZI : sve_int_scmp_vi<0b101, "cmpne", SETNE, int_aarch64_sve_cmpne>;
+ defm CMPHS_PPzZI : sve_int_ucmp_vi<0b00, "cmphs", SETUGE, int_aarch64_sve_cmphs>;
+ defm CMPHI_PPzZI : sve_int_ucmp_vi<0b01, "cmphi", SETUGT, int_aarch64_sve_cmphi>;
+ defm CMPLO_PPzZI : sve_int_ucmp_vi<0b10, "cmplo", SETULT, null_frag, int_aarch64_sve_cmphi>;
+ defm CMPLS_PPzZI : sve_int_ucmp_vi<0b11, "cmpls", SETULE, null_frag, int_aarch64_sve_cmphs>;
+
+ defm FCMGE_PPzZZ : sve_fp_3op_p_pd<0b000, "fcmge", int_aarch64_sve_fcmpge>;
+ defm FCMGT_PPzZZ : sve_fp_3op_p_pd<0b001, "fcmgt", int_aarch64_sve_fcmpgt>;
+ defm FCMEQ_PPzZZ : sve_fp_3op_p_pd<0b010, "fcmeq", int_aarch64_sve_fcmpeq>;
+ defm FCMNE_PPzZZ : sve_fp_3op_p_pd<0b011, "fcmne", int_aarch64_sve_fcmpne>;
+ defm FCMUO_PPzZZ : sve_fp_3op_p_pd<0b100, "fcmuo", int_aarch64_sve_fcmpuo>;
+ defm FACGE_PPzZZ : sve_fp_3op_p_pd<0b101, "facge", int_aarch64_sve_facge>;
+ defm FACGT_PPzZZ : sve_fp_3op_p_pd<0b111, "facgt", int_aarch64_sve_facgt>;
defm FCMGE_PPzZ0 : sve_fp_2op_p_pd<0b000, "fcmge">;
defm FCMGT_PPzZ0 : sve_fp_2op_p_pd<0b001, "fcmgt">;
@@ -753,15 +821,15 @@ let Predicates = [HasSVE] in {
defm FCMEQ_PPzZ0 : sve_fp_2op_p_pd<0b100, "fcmeq">;
defm FCMNE_PPzZ0 : sve_fp_2op_p_pd<0b110, "fcmne">;
- defm WHILELT_PWW : sve_int_while4_rr<0b010, "whilelt">;
- defm WHILELE_PWW : sve_int_while4_rr<0b011, "whilele">;
- defm WHILELO_PWW : sve_int_while4_rr<0b110, "whilelo">;
- defm WHILELS_PWW : sve_int_while4_rr<0b111, "whilels">;
+ defm WHILELT_PWW : sve_int_while4_rr<0b010, "whilelt", int_aarch64_sve_whilelt>;
+ defm WHILELE_PWW : sve_int_while4_rr<0b011, "whilele", int_aarch64_sve_whilele>;
+ defm WHILELO_PWW : sve_int_while4_rr<0b110, "whilelo", int_aarch64_sve_whilelo>;
+ defm WHILELS_PWW : sve_int_while4_rr<0b111, "whilels", int_aarch64_sve_whilels>;
- defm WHILELT_PXX : sve_int_while8_rr<0b010, "whilelt">;
- defm WHILELE_PXX : sve_int_while8_rr<0b011, "whilele">;
- defm WHILELO_PXX : sve_int_while8_rr<0b110, "whilelo">;
- defm WHILELS_PXX : sve_int_while8_rr<0b111, "whilels">;
+ defm WHILELT_PXX : sve_int_while8_rr<0b010, "whilelt", int_aarch64_sve_whilelt>;
+ defm WHILELE_PXX : sve_int_while8_rr<0b011, "whilele", int_aarch64_sve_whilele>;
+ defm WHILELO_PXX : sve_int_while8_rr<0b110, "whilelo", int_aarch64_sve_whilelo>;
+ defm WHILELS_PXX : sve_int_while8_rr<0b111, "whilels", int_aarch64_sve_whilels>;
def CTERMEQ_WW : sve_int_cterm<0b0, 0b0, "ctermeq", GPR32>;
def CTERMNE_WW : sve_int_cterm<0b0, 0b1, "ctermne", GPR32>;
@@ -772,11 +840,11 @@ let Predicates = [HasSVE] in {
def ADDVL_XXI : sve_int_arith_vl<0b0, "addvl">;
def ADDPL_XXI : sve_int_arith_vl<0b1, "addpl">;
- defm CNTB_XPiI : sve_int_count<0b000, "cntb">;
- defm CNTH_XPiI : sve_int_count<0b010, "cnth">;
- defm CNTW_XPiI : sve_int_count<0b100, "cntw">;
- defm CNTD_XPiI : sve_int_count<0b110, "cntd">;
- defm CNTP_XPP : sve_int_pcount_pred<0b0000, "cntp">;
+ defm CNTB_XPiI : sve_int_count<0b000, "cntb", int_aarch64_sve_cntb>;
+ defm CNTH_XPiI : sve_int_count<0b010, "cnth", int_aarch64_sve_cnth>;
+ defm CNTW_XPiI : sve_int_count<0b100, "cntw", int_aarch64_sve_cntw>;
+ defm CNTD_XPiI : sve_int_count<0b110, "cntd", int_aarch64_sve_cntd>;
+ defm CNTP_XPP : sve_int_pcount_pred<0b0000, "cntp", int_aarch64_sve_cntp>;
defm INCB_XPiI : sve_int_pred_pattern_a<0b000, "incb">;
defm DECB_XPiI : sve_int_pred_pattern_a<0b001, "decb">;
@@ -787,76 +855,76 @@ let Predicates = [HasSVE] in {
defm INCD_XPiI : sve_int_pred_pattern_a<0b110, "incd">;
defm DECD_XPiI : sve_int_pred_pattern_a<0b111, "decd">;
- defm SQINCB_XPiWdI : sve_int_pred_pattern_b_s32<0b00000, "sqincb">;
- defm UQINCB_WPiI : sve_int_pred_pattern_b_u32<0b00001, "uqincb">;
- defm SQDECB_XPiWdI : sve_int_pred_pattern_b_s32<0b00010, "sqdecb">;
- defm UQDECB_WPiI : sve_int_pred_pattern_b_u32<0b00011, "uqdecb">;
- defm SQINCB_XPiI : sve_int_pred_pattern_b_x64<0b00100, "sqincb">;
- defm UQINCB_XPiI : sve_int_pred_pattern_b_x64<0b00101, "uqincb">;
- defm SQDECB_XPiI : sve_int_pred_pattern_b_x64<0b00110, "sqdecb">;
- defm UQDECB_XPiI : sve_int_pred_pattern_b_x64<0b00111, "uqdecb">;
-
- defm SQINCH_XPiWdI : sve_int_pred_pattern_b_s32<0b01000, "sqinch">;
- defm UQINCH_WPiI : sve_int_pred_pattern_b_u32<0b01001, "uqinch">;
- defm SQDECH_XPiWdI : sve_int_pred_pattern_b_s32<0b01010, "sqdech">;
- defm UQDECH_WPiI : sve_int_pred_pattern_b_u32<0b01011, "uqdech">;
- defm SQINCH_XPiI : sve_int_pred_pattern_b_x64<0b01100, "sqinch">;
- defm UQINCH_XPiI : sve_int_pred_pattern_b_x64<0b01101, "uqinch">;
- defm SQDECH_XPiI : sve_int_pred_pattern_b_x64<0b01110, "sqdech">;
- defm UQDECH_XPiI : sve_int_pred_pattern_b_x64<0b01111, "uqdech">;
-
- defm SQINCW_XPiWdI : sve_int_pred_pattern_b_s32<0b10000, "sqincw">;
- defm UQINCW_WPiI : sve_int_pred_pattern_b_u32<0b10001, "uqincw">;
- defm SQDECW_XPiWdI : sve_int_pred_pattern_b_s32<0b10010, "sqdecw">;
- defm UQDECW_WPiI : sve_int_pred_pattern_b_u32<0b10011, "uqdecw">;
- defm SQINCW_XPiI : sve_int_pred_pattern_b_x64<0b10100, "sqincw">;
- defm UQINCW_XPiI : sve_int_pred_pattern_b_x64<0b10101, "uqincw">;
- defm SQDECW_XPiI : sve_int_pred_pattern_b_x64<0b10110, "sqdecw">;
- defm UQDECW_XPiI : sve_int_pred_pattern_b_x64<0b10111, "uqdecw">;
-
- defm SQINCD_XPiWdI : sve_int_pred_pattern_b_s32<0b11000, "sqincd">;
- defm UQINCD_WPiI : sve_int_pred_pattern_b_u32<0b11001, "uqincd">;
- defm SQDECD_XPiWdI : sve_int_pred_pattern_b_s32<0b11010, "sqdecd">;
- defm UQDECD_WPiI : sve_int_pred_pattern_b_u32<0b11011, "uqdecd">;
- defm SQINCD_XPiI : sve_int_pred_pattern_b_x64<0b11100, "sqincd">;
- defm UQINCD_XPiI : sve_int_pred_pattern_b_x64<0b11101, "uqincd">;
- defm SQDECD_XPiI : sve_int_pred_pattern_b_x64<0b11110, "sqdecd">;
- defm UQDECD_XPiI : sve_int_pred_pattern_b_x64<0b11111, "uqdecd">;
-
- defm SQINCH_ZPiI : sve_int_countvlv<0b01000, "sqinch", ZPR16>;
- defm UQINCH_ZPiI : sve_int_countvlv<0b01001, "uqinch", ZPR16>;
- defm SQDECH_ZPiI : sve_int_countvlv<0b01010, "sqdech", ZPR16>;
- defm UQDECH_ZPiI : sve_int_countvlv<0b01011, "uqdech", ZPR16>;
+ defm SQINCB_XPiWdI : sve_int_pred_pattern_b_s32<0b00000, "sqincb", int_aarch64_sve_sqincb_n32>;
+ defm UQINCB_WPiI : sve_int_pred_pattern_b_u32<0b00001, "uqincb", int_aarch64_sve_uqincb_n32>;
+ defm SQDECB_XPiWdI : sve_int_pred_pattern_b_s32<0b00010, "sqdecb", int_aarch64_sve_sqdecb_n32>;
+ defm UQDECB_WPiI : sve_int_pred_pattern_b_u32<0b00011, "uqdecb", int_aarch64_sve_uqdecb_n32>;
+ defm SQINCB_XPiI : sve_int_pred_pattern_b_x64<0b00100, "sqincb", int_aarch64_sve_sqincb_n64>;
+ defm UQINCB_XPiI : sve_int_pred_pattern_b_x64<0b00101, "uqincb", int_aarch64_sve_uqincb_n64>;
+ defm SQDECB_XPiI : sve_int_pred_pattern_b_x64<0b00110, "sqdecb", int_aarch64_sve_sqdecb_n64>;
+ defm UQDECB_XPiI : sve_int_pred_pattern_b_x64<0b00111, "uqdecb", int_aarch64_sve_uqdecb_n64>;
+
+ defm SQINCH_XPiWdI : sve_int_pred_pattern_b_s32<0b01000, "sqinch", int_aarch64_sve_sqinch_n32>;
+ defm UQINCH_WPiI : sve_int_pred_pattern_b_u32<0b01001, "uqinch", int_aarch64_sve_uqinch_n32>;
+ defm SQDECH_XPiWdI : sve_int_pred_pattern_b_s32<0b01010, "sqdech", int_aarch64_sve_sqdech_n32>;
+ defm UQDECH_WPiI : sve_int_pred_pattern_b_u32<0b01011, "uqdech", int_aarch64_sve_uqdech_n32>;
+ defm SQINCH_XPiI : sve_int_pred_pattern_b_x64<0b01100, "sqinch", int_aarch64_sve_sqinch_n64>;
+ defm UQINCH_XPiI : sve_int_pred_pattern_b_x64<0b01101, "uqinch", int_aarch64_sve_uqinch_n64>;
+ defm SQDECH_XPiI : sve_int_pred_pattern_b_x64<0b01110, "sqdech", int_aarch64_sve_sqdech_n64>;
+ defm UQDECH_XPiI : sve_int_pred_pattern_b_x64<0b01111, "uqdech", int_aarch64_sve_uqdech_n64>;
+
+ defm SQINCW_XPiWdI : sve_int_pred_pattern_b_s32<0b10000, "sqincw", int_aarch64_sve_sqincw_n32>;
+ defm UQINCW_WPiI : sve_int_pred_pattern_b_u32<0b10001, "uqincw", int_aarch64_sve_uqincw_n32>;
+ defm SQDECW_XPiWdI : sve_int_pred_pattern_b_s32<0b10010, "sqdecw", int_aarch64_sve_sqdecw_n32>;
+ defm UQDECW_WPiI : sve_int_pred_pattern_b_u32<0b10011, "uqdecw", int_aarch64_sve_uqdecw_n32>;
+ defm SQINCW_XPiI : sve_int_pred_pattern_b_x64<0b10100, "sqincw", int_aarch64_sve_sqincw_n64>;
+ defm UQINCW_XPiI : sve_int_pred_pattern_b_x64<0b10101, "uqincw", int_aarch64_sve_uqincw_n64>;
+ defm SQDECW_XPiI : sve_int_pred_pattern_b_x64<0b10110, "sqdecw", int_aarch64_sve_sqdecw_n64>;
+ defm UQDECW_XPiI : sve_int_pred_pattern_b_x64<0b10111, "uqdecw", int_aarch64_sve_uqdecw_n64>;
+
+ defm SQINCD_XPiWdI : sve_int_pred_pattern_b_s32<0b11000, "sqincd", int_aarch64_sve_sqincd_n32>;
+ defm UQINCD_WPiI : sve_int_pred_pattern_b_u32<0b11001, "uqincd", int_aarch64_sve_uqincd_n32>;
+ defm SQDECD_XPiWdI : sve_int_pred_pattern_b_s32<0b11010, "sqdecd", int_aarch64_sve_sqdecd_n32>;
+ defm UQDECD_WPiI : sve_int_pred_pattern_b_u32<0b11011, "uqdecd", int_aarch64_sve_uqdecd_n32>;
+ defm SQINCD_XPiI : sve_int_pred_pattern_b_x64<0b11100, "sqincd", int_aarch64_sve_sqincd_n64>;
+ defm UQINCD_XPiI : sve_int_pred_pattern_b_x64<0b11101, "uqincd", int_aarch64_sve_uqincd_n64>;
+ defm SQDECD_XPiI : sve_int_pred_pattern_b_x64<0b11110, "sqdecd", int_aarch64_sve_sqdecd_n64>;
+ defm UQDECD_XPiI : sve_int_pred_pattern_b_x64<0b11111, "uqdecd", int_aarch64_sve_uqdecd_n64>;
+
+ defm SQINCH_ZPiI : sve_int_countvlv<0b01000, "sqinch", ZPR16, int_aarch64_sve_sqinch, nxv8i16>;
+ defm UQINCH_ZPiI : sve_int_countvlv<0b01001, "uqinch", ZPR16, int_aarch64_sve_uqinch, nxv8i16>;
+ defm SQDECH_ZPiI : sve_int_countvlv<0b01010, "sqdech", ZPR16, int_aarch64_sve_sqdech, nxv8i16>;
+ defm UQDECH_ZPiI : sve_int_countvlv<0b01011, "uqdech", ZPR16, int_aarch64_sve_uqdech, nxv8i16>;
defm INCH_ZPiI : sve_int_countvlv<0b01100, "inch", ZPR16>;
defm DECH_ZPiI : sve_int_countvlv<0b01101, "dech", ZPR16>;
- defm SQINCW_ZPiI : sve_int_countvlv<0b10000, "sqincw", ZPR32>;
- defm UQINCW_ZPiI : sve_int_countvlv<0b10001, "uqincw", ZPR32>;
- defm SQDECW_ZPiI : sve_int_countvlv<0b10010, "sqdecw", ZPR32>;
- defm UQDECW_ZPiI : sve_int_countvlv<0b10011, "uqdecw", ZPR32>;
+ defm SQINCW_ZPiI : sve_int_countvlv<0b10000, "sqincw", ZPR32, int_aarch64_sve_sqincw, nxv4i32>;
+ defm UQINCW_ZPiI : sve_int_countvlv<0b10001, "uqincw", ZPR32, int_aarch64_sve_uqincw, nxv4i32>;
+ defm SQDECW_ZPiI : sve_int_countvlv<0b10010, "sqdecw", ZPR32, int_aarch64_sve_sqdecw, nxv4i32>;
+ defm UQDECW_ZPiI : sve_int_countvlv<0b10011, "uqdecw", ZPR32, int_aarch64_sve_uqdecw, nxv4i32>;
defm INCW_ZPiI : sve_int_countvlv<0b10100, "incw", ZPR32>;
defm DECW_ZPiI : sve_int_countvlv<0b10101, "decw", ZPR32>;
- defm SQINCD_ZPiI : sve_int_countvlv<0b11000, "sqincd", ZPR64>;
- defm UQINCD_ZPiI : sve_int_countvlv<0b11001, "uqincd", ZPR64>;
- defm SQDECD_ZPiI : sve_int_countvlv<0b11010, "sqdecd", ZPR64>;
- defm UQDECD_ZPiI : sve_int_countvlv<0b11011, "uqdecd", ZPR64>;
+ defm SQINCD_ZPiI : sve_int_countvlv<0b11000, "sqincd", ZPR64, int_aarch64_sve_sqincd, nxv2i64>;
+ defm UQINCD_ZPiI : sve_int_countvlv<0b11001, "uqincd", ZPR64, int_aarch64_sve_uqincd, nxv2i64>;
+ defm SQDECD_ZPiI : sve_int_countvlv<0b11010, "sqdecd", ZPR64, int_aarch64_sve_sqdecd, nxv2i64>;
+ defm UQDECD_ZPiI : sve_int_countvlv<0b11011, "uqdecd", ZPR64, int_aarch64_sve_uqdecd, nxv2i64>;
defm INCD_ZPiI : sve_int_countvlv<0b11100, "incd", ZPR64>;
defm DECD_ZPiI : sve_int_countvlv<0b11101, "decd", ZPR64>;
- defm SQINCP_XPWd : sve_int_count_r_s32<0b00000, "sqincp">;
- defm SQINCP_XP : sve_int_count_r_x64<0b00010, "sqincp">;
- defm UQINCP_WP : sve_int_count_r_u32<0b00100, "uqincp">;
- defm UQINCP_XP : sve_int_count_r_x64<0b00110, "uqincp">;
- defm SQDECP_XPWd : sve_int_count_r_s32<0b01000, "sqdecp">;
- defm SQDECP_XP : sve_int_count_r_x64<0b01010, "sqdecp">;
- defm UQDECP_WP : sve_int_count_r_u32<0b01100, "uqdecp">;
- defm UQDECP_XP : sve_int_count_r_x64<0b01110, "uqdecp">;
+ defm SQINCP_XPWd : sve_int_count_r_s32<0b00000, "sqincp", int_aarch64_sve_sqincp_n32>;
+ defm SQINCP_XP : sve_int_count_r_x64<0b00010, "sqincp", int_aarch64_sve_sqincp_n64>;
+ defm UQINCP_WP : sve_int_count_r_u32<0b00100, "uqincp", int_aarch64_sve_uqincp_n32>;
+ defm UQINCP_XP : sve_int_count_r_x64<0b00110, "uqincp", int_aarch64_sve_uqincp_n64>;
+ defm SQDECP_XPWd : sve_int_count_r_s32<0b01000, "sqdecp", int_aarch64_sve_sqdecp_n32>;
+ defm SQDECP_XP : sve_int_count_r_x64<0b01010, "sqdecp", int_aarch64_sve_sqdecp_n64>;
+ defm UQDECP_WP : sve_int_count_r_u32<0b01100, "uqdecp", int_aarch64_sve_uqdecp_n32>;
+ defm UQDECP_XP : sve_int_count_r_x64<0b01110, "uqdecp", int_aarch64_sve_uqdecp_n64>;
defm INCP_XP : sve_int_count_r_x64<0b10000, "incp">;
defm DECP_XP : sve_int_count_r_x64<0b10100, "decp">;
- defm SQINCP_ZP : sve_int_count_v<0b00000, "sqincp">;
- defm UQINCP_ZP : sve_int_count_v<0b00100, "uqincp">;
- defm SQDECP_ZP : sve_int_count_v<0b01000, "sqdecp">;
- defm UQDECP_ZP : sve_int_count_v<0b01100, "uqdecp">;
+ defm SQINCP_ZP : sve_int_count_v<0b00000, "sqincp", int_aarch64_sve_sqincp>;
+ defm UQINCP_ZP : sve_int_count_v<0b00100, "uqincp", int_aarch64_sve_uqincp>;
+ defm SQDECP_ZP : sve_int_count_v<0b01000, "sqdecp", int_aarch64_sve_sqdecp>;
+ defm UQDECP_ZP : sve_int_count_v<0b01100, "uqdecp", int_aarch64_sve_uqdecp>;
defm INCP_ZP : sve_int_count_v<0b10000, "incp">;
defm DECP_ZP : sve_int_count_v<0b10100, "decp">;
@@ -878,63 +946,63 @@ let Predicates = [HasSVE] in {
defm ASR_ZPmI : sve_int_bin_pred_shift_imm_right<0b0000, "asr">;
defm LSR_ZPmI : sve_int_bin_pred_shift_imm_right<0b0001, "lsr">;
defm LSL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0011, "lsl">;
- defm ASRD_ZPmI : sve_int_bin_pred_shift_imm_right<0b0100, "asrd">;
-
- defm ASR_ZPmZ : sve_int_bin_pred_shift<0b000, "asr">;
- defm LSR_ZPmZ : sve_int_bin_pred_shift<0b001, "lsr">;
- defm LSL_ZPmZ : sve_int_bin_pred_shift<0b011, "lsl">;
- defm ASRR_ZPmZ : sve_int_bin_pred_shift<0b100, "asrr">;
- defm LSRR_ZPmZ : sve_int_bin_pred_shift<0b101, "lsrr">;
- defm LSLR_ZPmZ : sve_int_bin_pred_shift<0b111, "lslr">;
-
- defm ASR_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b000, "asr">;
- defm LSR_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b001, "lsr">;
- defm LSL_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b011, "lsl">;
-
- def FCVT_ZPmZ_StoH : sve_fp_2op_p_zd<0b1001000, "fcvt", ZPR32, ZPR16, ElementSizeS>;
- def FCVT_ZPmZ_HtoS : sve_fp_2op_p_zd<0b1001001, "fcvt", ZPR16, ZPR32, ElementSizeS>;
- def SCVTF_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0110010, "scvtf", ZPR16, ZPR16, ElementSizeH>;
- def SCVTF_ZPmZ_StoS : sve_fp_2op_p_zd<0b1010100, "scvtf", ZPR32, ZPR32, ElementSizeS>;
- def UCVTF_ZPmZ_StoS : sve_fp_2op_p_zd<0b1010101, "ucvtf", ZPR32, ZPR32, ElementSizeS>;
- def UCVTF_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0110011, "ucvtf", ZPR16, ZPR16, ElementSizeH>;
- def FCVTZS_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0111010, "fcvtzs", ZPR16, ZPR16, ElementSizeH>;
- def FCVTZS_ZPmZ_StoS : sve_fp_2op_p_zd<0b1011100, "fcvtzs", ZPR32, ZPR32, ElementSizeS>;
- def FCVTZU_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0111011, "fcvtzu", ZPR16, ZPR16, ElementSizeH>;
- def FCVTZU_ZPmZ_StoS : sve_fp_2op_p_zd<0b1011101, "fcvtzu", ZPR32, ZPR32, ElementSizeS>;
- def FCVT_ZPmZ_DtoH : sve_fp_2op_p_zd<0b1101000, "fcvt", ZPR64, ZPR16, ElementSizeD>;
- def FCVT_ZPmZ_HtoD : sve_fp_2op_p_zd<0b1101001, "fcvt", ZPR16, ZPR64, ElementSizeD>;
- def FCVT_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1101010, "fcvt", ZPR64, ZPR32, ElementSizeD>;
- def FCVT_ZPmZ_StoD : sve_fp_2op_p_zd<0b1101011, "fcvt", ZPR32, ZPR64, ElementSizeD>;
- def SCVTF_ZPmZ_StoD : sve_fp_2op_p_zd<0b1110000, "scvtf", ZPR32, ZPR64, ElementSizeD>;
- def UCVTF_ZPmZ_StoD : sve_fp_2op_p_zd<0b1110001, "ucvtf", ZPR32, ZPR64, ElementSizeD>;
- def UCVTF_ZPmZ_StoH : sve_fp_2op_p_zd<0b0110101, "ucvtf", ZPR32, ZPR16, ElementSizeS>;
- def SCVTF_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1110100, "scvtf", ZPR64, ZPR32, ElementSizeD>;
- def SCVTF_ZPmZ_StoH : sve_fp_2op_p_zd<0b0110100, "scvtf", ZPR32, ZPR16, ElementSizeS>;
- def SCVTF_ZPmZ_DtoH : sve_fp_2op_p_zd<0b0110110, "scvtf", ZPR64, ZPR16, ElementSizeD>;
- def UCVTF_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1110101, "ucvtf", ZPR64, ZPR32, ElementSizeD>;
- def UCVTF_ZPmZ_DtoH : sve_fp_2op_p_zd<0b0110111, "ucvtf", ZPR64, ZPR16, ElementSizeD>;
- def SCVTF_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1110110, "scvtf", ZPR64, ZPR64, ElementSizeD>;
- def UCVTF_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1110111, "ucvtf", ZPR64, ZPR64, ElementSizeD>;
- def FCVTZS_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1111000, "fcvtzs", ZPR64, ZPR32, ElementSizeD>;
- def FCVTZU_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1111001, "fcvtzu", ZPR64, ZPR32, ElementSizeD>;
- def FCVTZS_ZPmZ_StoD : sve_fp_2op_p_zd<0b1111100, "fcvtzs", ZPR32, ZPR64, ElementSizeD>;
- def FCVTZS_ZPmZ_HtoS : sve_fp_2op_p_zd<0b0111100, "fcvtzs", ZPR16, ZPR32, ElementSizeS>;
- def FCVTZS_ZPmZ_HtoD : sve_fp_2op_p_zd<0b0111110, "fcvtzs", ZPR16, ZPR64, ElementSizeD>;
- def FCVTZU_ZPmZ_HtoS : sve_fp_2op_p_zd<0b0111101, "fcvtzu", ZPR16, ZPR32, ElementSizeS>;
- def FCVTZU_ZPmZ_HtoD : sve_fp_2op_p_zd<0b0111111, "fcvtzu", ZPR16, ZPR64, ElementSizeD>;
- def FCVTZU_ZPmZ_StoD : sve_fp_2op_p_zd<0b1111101, "fcvtzu", ZPR32, ZPR64, ElementSizeD>;
- def FCVTZS_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1111110, "fcvtzs", ZPR64, ZPR64, ElementSizeD>;
- def FCVTZU_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1111111, "fcvtzu", ZPR64, ZPR64, ElementSizeD>;
-
- defm FRINTN_ZPmZ : sve_fp_2op_p_zd_HSD<0b00000, "frintn">;
- defm FRINTP_ZPmZ : sve_fp_2op_p_zd_HSD<0b00001, "frintp">;
- defm FRINTM_ZPmZ : sve_fp_2op_p_zd_HSD<0b00010, "frintm">;
- defm FRINTZ_ZPmZ : sve_fp_2op_p_zd_HSD<0b00011, "frintz">;
- defm FRINTA_ZPmZ : sve_fp_2op_p_zd_HSD<0b00100, "frinta">;
- defm FRINTX_ZPmZ : sve_fp_2op_p_zd_HSD<0b00110, "frintx">;
- defm FRINTI_ZPmZ : sve_fp_2op_p_zd_HSD<0b00111, "frinti">;
- defm FRECPX_ZPmZ : sve_fp_2op_p_zd_HSD<0b01100, "frecpx">;
- defm FSQRT_ZPmZ : sve_fp_2op_p_zd_HSD<0b01101, "fsqrt">;
+ defm ASRD_ZPmI : sve_int_bin_pred_shift_imm_right<0b0100, "asrd", int_aarch64_sve_asrd>;
+
+ defm ASR_ZPmZ : sve_int_bin_pred_shift<0b000, "asr", int_aarch64_sve_asr>;
+ defm LSR_ZPmZ : sve_int_bin_pred_shift<0b001, "lsr", int_aarch64_sve_lsr>;
+ defm LSL_ZPmZ : sve_int_bin_pred_shift<0b011, "lsl", int_aarch64_sve_lsl>;
+ defm ASRR_ZPmZ : sve_int_bin_pred_shift<0b100, "asrr", null_frag>;
+ defm LSRR_ZPmZ : sve_int_bin_pred_shift<0b101, "lsrr", null_frag>;
+ defm LSLR_ZPmZ : sve_int_bin_pred_shift<0b111, "lslr", null_frag>;
+
+ defm ASR_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b000, "asr", int_aarch64_sve_asr_wide>;
+ defm LSR_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b001, "lsr", int_aarch64_sve_lsr_wide>;
+ defm LSL_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b011, "lsl", int_aarch64_sve_lsl_wide>;
+
+ defm FCVT_ZPmZ_StoH : sve_fp_2op_p_zd<0b1001000, "fcvt", ZPR32, ZPR16, int_aarch64_sve_fcvt_f16f32, nxv8f16, nxv16i1, nxv4f32, ElementSizeS>;
+ defm FCVT_ZPmZ_HtoS : sve_fp_2op_p_zd<0b1001001, "fcvt", ZPR16, ZPR32, int_aarch64_sve_fcvt_f32f16, nxv4f32, nxv16i1, nxv8f16, ElementSizeS>;
+ defm SCVTF_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0110010, "scvtf", ZPR16, ZPR16, int_aarch64_sve_scvtf, nxv8f16, nxv8i1, nxv8i16, ElementSizeH>;
+ defm SCVTF_ZPmZ_StoS : sve_fp_2op_p_zd<0b1010100, "scvtf", ZPR32, ZPR32, int_aarch64_sve_scvtf, nxv4f32, nxv4i1, nxv4i32, ElementSizeS>;
+ defm UCVTF_ZPmZ_StoS : sve_fp_2op_p_zd<0b1010101, "ucvtf", ZPR32, ZPR32, int_aarch64_sve_ucvtf, nxv4f32, nxv4i1, nxv4i32, ElementSizeS>;
+ defm UCVTF_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0110011, "ucvtf", ZPR16, ZPR16, int_aarch64_sve_ucvtf, nxv8f16, nxv8i1, nxv8i16, ElementSizeH>;
+ defm FCVTZS_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0111010, "fcvtzs", ZPR16, ZPR16, int_aarch64_sve_fcvtzs, nxv8i16, nxv8i1, nxv8f16, ElementSizeH>;
+ defm FCVTZS_ZPmZ_StoS : sve_fp_2op_p_zd<0b1011100, "fcvtzs", ZPR32, ZPR32, int_aarch64_sve_fcvtzs, nxv4i32, nxv4i1, nxv4f32, ElementSizeS>;
+ defm FCVTZU_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0111011, "fcvtzu", ZPR16, ZPR16, int_aarch64_sve_fcvtzu, nxv8i16, nxv8i1, nxv8f16, ElementSizeH>;
+ defm FCVTZU_ZPmZ_StoS : sve_fp_2op_p_zd<0b1011101, "fcvtzu", ZPR32, ZPR32, int_aarch64_sve_fcvtzu, nxv4i32, nxv4i1, nxv4f32, ElementSizeS>;
+ defm FCVT_ZPmZ_DtoH : sve_fp_2op_p_zd<0b1101000, "fcvt", ZPR64, ZPR16, int_aarch64_sve_fcvt_f16f64, nxv8f16, nxv16i1, nxv2f64, ElementSizeD>;
+ defm FCVT_ZPmZ_HtoD : sve_fp_2op_p_zd<0b1101001, "fcvt", ZPR16, ZPR64, int_aarch64_sve_fcvt_f64f16, nxv2f64, nxv16i1, nxv8f16, ElementSizeD>;
+ defm FCVT_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1101010, "fcvt", ZPR64, ZPR32, int_aarch64_sve_fcvt_f32f64, nxv4f32, nxv16i1, nxv2f64, ElementSizeD>;
+ defm FCVT_ZPmZ_StoD : sve_fp_2op_p_zd<0b1101011, "fcvt", ZPR32, ZPR64, int_aarch64_sve_fcvt_f64f32, nxv2f64, nxv16i1, nxv4f32, ElementSizeD>;
+ defm SCVTF_ZPmZ_StoD : sve_fp_2op_p_zd<0b1110000, "scvtf", ZPR32, ZPR64, int_aarch64_sve_scvtf_f64i32, nxv2f64, nxv16i1, nxv4i32, ElementSizeD>;
+ defm UCVTF_ZPmZ_StoD : sve_fp_2op_p_zd<0b1110001, "ucvtf", ZPR32, ZPR64, int_aarch64_sve_ucvtf_f64i32, nxv2f64, nxv16i1, nxv4i32, ElementSizeD>;
+ defm UCVTF_ZPmZ_StoH : sve_fp_2op_p_zd<0b0110101, "ucvtf", ZPR32, ZPR16, int_aarch64_sve_ucvtf_f16i32, nxv8f16, nxv16i1, nxv4i32, ElementSizeS>;
+ defm SCVTF_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1110100, "scvtf", ZPR64, ZPR32, int_aarch64_sve_scvtf_f32i64, nxv4f32, nxv16i1, nxv2i64, ElementSizeD>;
+ defm SCVTF_ZPmZ_StoH : sve_fp_2op_p_zd<0b0110100, "scvtf", ZPR32, ZPR16, int_aarch64_sve_scvtf_f16i32, nxv8f16, nxv16i1, nxv4i32, ElementSizeS>;
+ defm SCVTF_ZPmZ_DtoH : sve_fp_2op_p_zd<0b0110110, "scvtf", ZPR64, ZPR16, int_aarch64_sve_scvtf_f16i64, nxv8f16, nxv16i1, nxv2i64, ElementSizeD>;
+ defm UCVTF_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1110101, "ucvtf", ZPR64, ZPR32, int_aarch64_sve_ucvtf_f32i64, nxv4f32, nxv16i1, nxv2i64, ElementSizeD>;
+ defm UCVTF_ZPmZ_DtoH : sve_fp_2op_p_zd<0b0110111, "ucvtf", ZPR64, ZPR16, int_aarch64_sve_ucvtf_f16i64, nxv8f16, nxv16i1, nxv2i64, ElementSizeD>;
+ defm SCVTF_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1110110, "scvtf", ZPR64, ZPR64, int_aarch64_sve_scvtf, nxv2f64, nxv2i1, nxv2i64, ElementSizeD>;
+ defm UCVTF_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1110111, "ucvtf", ZPR64, ZPR64, int_aarch64_sve_ucvtf, nxv2f64, nxv2i1, nxv2i64, ElementSizeD>;
+ defm FCVTZS_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1111000, "fcvtzs", ZPR64, ZPR32, int_aarch64_sve_fcvtzs_i32f64, nxv4i32, nxv16i1, nxv2f64, ElementSizeD>;
+ defm FCVTZU_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1111001, "fcvtzu", ZPR64, ZPR32, int_aarch64_sve_fcvtzu_i32f64, nxv4i32, nxv16i1, nxv2f64, ElementSizeD>;
+ defm FCVTZS_ZPmZ_StoD : sve_fp_2op_p_zd<0b1111100, "fcvtzs", ZPR32, ZPR64, int_aarch64_sve_fcvtzs_i64f32, nxv2i64, nxv16i1, nxv4f32, ElementSizeD>;
+ defm FCVTZS_ZPmZ_HtoS : sve_fp_2op_p_zd<0b0111100, "fcvtzs", ZPR16, ZPR32, int_aarch64_sve_fcvtzs_i32f16, nxv4i32, nxv16i1, nxv8f16, ElementSizeS>;
+ defm FCVTZS_ZPmZ_HtoD : sve_fp_2op_p_zd<0b0111110, "fcvtzs", ZPR16, ZPR64, int_aarch64_sve_fcvtzs_i64f16, nxv2i64, nxv16i1, nxv8f16, ElementSizeD>;
+ defm FCVTZU_ZPmZ_HtoS : sve_fp_2op_p_zd<0b0111101, "fcvtzu", ZPR16, ZPR32, int_aarch64_sve_fcvtzu_i32f16, nxv4i32, nxv16i1, nxv8f16, ElementSizeS>;
+ defm FCVTZU_ZPmZ_HtoD : sve_fp_2op_p_zd<0b0111111, "fcvtzu", ZPR16, ZPR64, int_aarch64_sve_fcvtzu_i64f16, nxv2i64, nxv16i1, nxv8f16, ElementSizeD>;
+ defm FCVTZU_ZPmZ_StoD : sve_fp_2op_p_zd<0b1111101, "fcvtzu", ZPR32, ZPR64, int_aarch64_sve_fcvtzu_i64f32, nxv2i64, nxv16i1, nxv4f32, ElementSizeD>;
+ defm FCVTZS_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1111110, "fcvtzs", ZPR64, ZPR64, int_aarch64_sve_fcvtzs, nxv2i64, nxv2i1, nxv2f64, ElementSizeD>;
+ defm FCVTZU_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1111111, "fcvtzu", ZPR64, ZPR64, int_aarch64_sve_fcvtzu, nxv2i64, nxv2i1, nxv2f64, ElementSizeD>;
+
+ defm FRINTN_ZPmZ : sve_fp_2op_p_zd_HSD<0b00000, "frintn", int_aarch64_sve_frintn>;
+ defm FRINTP_ZPmZ : sve_fp_2op_p_zd_HSD<0b00001, "frintp", int_aarch64_sve_frintp>;
+ defm FRINTM_ZPmZ : sve_fp_2op_p_zd_HSD<0b00010, "frintm", int_aarch64_sve_frintm>;
+ defm FRINTZ_ZPmZ : sve_fp_2op_p_zd_HSD<0b00011, "frintz", int_aarch64_sve_frintz>;
+ defm FRINTA_ZPmZ : sve_fp_2op_p_zd_HSD<0b00100, "frinta", int_aarch64_sve_frinta>;
+ defm FRINTX_ZPmZ : sve_fp_2op_p_zd_HSD<0b00110, "frintx", int_aarch64_sve_frintx>;
+ defm FRINTI_ZPmZ : sve_fp_2op_p_zd_HSD<0b00111, "frinti", int_aarch64_sve_frinti>;
+ defm FRECPX_ZPmZ : sve_fp_2op_p_zd_HSD<0b01100, "frecpx", int_aarch64_sve_frecpx>;
+ defm FSQRT_ZPmZ : sve_fp_2op_p_zd_HSD<0b01101, "fsqrt", int_aarch64_sve_fsqrt>;
// InstAliases
def : InstAlias<"mov $Zd, $Zn",
@@ -1021,6 +1089,22 @@ let Predicates = [HasSVE] in {
def : InstAlias<"fcmlt $Zd, $Pg/z, $Zm, $Zn",
(FCMGT_PPzZZ_D PPR64:$Zd, PPR3bAny:$Pg, ZPR64:$Zn, ZPR64:$Zm), 0>;
+ def : Pat<(AArch64ptest (nxv16i1 PPR:$pg), (nxv16i1 PPR:$src)),
+ (PTEST_PP PPR:$pg, PPR:$src)>;
+ def : Pat<(AArch64ptest (nxv8i1 PPR:$pg), (nxv8i1 PPR:$src)),
+ (PTEST_PP PPR:$pg, PPR:$src)>;
+ def : Pat<(AArch64ptest (nxv4i1 PPR:$pg), (nxv4i1 PPR:$src)),
+ (PTEST_PP PPR:$pg, PPR:$src)>;
+ def : Pat<(AArch64ptest (nxv2i1 PPR:$pg), (nxv2i1 PPR:$src)),
+ (PTEST_PP PPR:$pg, PPR:$src)>;
+
+ def : Pat<(sext_inreg (nxv2i64 ZPR:$Zs), nxv2i32), (SXTW_ZPmZ_D (IMPLICIT_DEF), (PTRUE_D 31), ZPR:$Zs)>;
+ def : Pat<(sext_inreg (nxv2i64 ZPR:$Zs), nxv2i16), (SXTH_ZPmZ_D (IMPLICIT_DEF), (PTRUE_D 31), ZPR:$Zs)>;
+ def : Pat<(sext_inreg (nxv2i64 ZPR:$Zs), nxv2i8), (SXTB_ZPmZ_D (IMPLICIT_DEF), (PTRUE_D 31), ZPR:$Zs)>;
+ def : Pat<(sext_inreg (nxv4i32 ZPR:$Zs), nxv4i16), (SXTH_ZPmZ_S (IMPLICIT_DEF), (PTRUE_S 31), ZPR:$Zs)>;
+ def : Pat<(sext_inreg (nxv4i32 ZPR:$Zs), nxv4i8), (SXTB_ZPmZ_S (IMPLICIT_DEF), (PTRUE_S 31), ZPR:$Zs)>;
+ def : Pat<(sext_inreg (nxv8i16 ZPR:$Zs), nxv8i8), (SXTB_ZPmZ_H (IMPLICIT_DEF), (PTRUE_H 31), ZPR:$Zs)>;
+
def : Pat<(nxv16i8 (bitconvert (nxv8i16 ZPR:$src))), (nxv16i8 ZPR:$src)>;
def : Pat<(nxv16i8 (bitconvert (nxv4i32 ZPR:$src))), (nxv16i8 ZPR:$src)>;
def : Pat<(nxv16i8 (bitconvert (nxv2i64 ZPR:$src))), (nxv16i8 ZPR:$src)>;
@@ -1070,6 +1154,83 @@ let Predicates = [HasSVE] in {
def : Pat<(nxv2f64 (bitconvert (nxv8f16 ZPR:$src))), (nxv2f64 ZPR:$src)>;
def : Pat<(nxv2f64 (bitconvert (nxv4f32 ZPR:$src))), (nxv2f64 ZPR:$src)>;
+ // Add more complex addressing modes here as required
+ multiclass pred_load<ValueType Ty, ValueType PredTy, SDPatternOperator Load,
+ Instruction RegImmInst> {
+
+ def _default_z : Pat<(Ty (Load GPR64:$base, (PredTy PPR:$gp), (SVEDup0Undef))),
+ (RegImmInst PPR:$gp, GPR64:$base, (i64 0))>;
+ }
+
+ // 2-element contiguous loads
+ defm : pred_load<nxv2i64, nxv2i1, zext_masked_load_i8, LD1B_D_IMM>;
+ defm : pred_load<nxv2i64, nxv2i1, asext_masked_load_i8, LD1SB_D_IMM>;
+ defm : pred_load<nxv2i64, nxv2i1, zext_masked_load_i16, LD1H_D_IMM>;
+ defm : pred_load<nxv2i64, nxv2i1, asext_masked_load_i16, LD1SH_D_IMM>;
+ defm : pred_load<nxv2i64, nxv2i1, zext_masked_load_i32, LD1W_D_IMM>;
+ defm : pred_load<nxv2i64, nxv2i1, asext_masked_load_i32, LD1SW_D_IMM>;
+ defm : pred_load<nxv2i64, nxv2i1, nonext_masked_load, LD1D_IMM>;
+ defm : pred_load<nxv2f16, nxv2i1, nonext_masked_load, LD1H_D_IMM>;
+ defm : pred_load<nxv2f32, nxv2i1, nonext_masked_load, LD1W_D_IMM>;
+ defm : pred_load<nxv2f64, nxv2i1, nonext_masked_load, LD1D_IMM>;
+
+ // 4-element contiguous loads
+ defm : pred_load<nxv4i32, nxv4i1, zext_masked_load_i8, LD1B_S_IMM>;
+ defm : pred_load<nxv4i32, nxv4i1, asext_masked_load_i8, LD1SB_S_IMM>;
+ defm : pred_load<nxv4i32, nxv4i1, zext_masked_load_i16, LD1H_S_IMM>;
+ defm : pred_load<nxv4i32, nxv4i1, asext_masked_load_i16, LD1SH_S_IMM>;
+ defm : pred_load<nxv4i32, nxv4i1, nonext_masked_load, LD1W_IMM>;
+ defm : pred_load<nxv4f16, nxv4i1, nonext_masked_load, LD1H_S_IMM>;
+ defm : pred_load<nxv4f32, nxv4i1, nonext_masked_load, LD1W_IMM>;
+
+ // 8-element contiguous loads
+ defm : pred_load<nxv8i16, nxv8i1, zext_masked_load_i8, LD1B_H_IMM>;
+ defm : pred_load<nxv8i16, nxv8i1, asext_masked_load_i8, LD1SB_H_IMM>;
+ defm : pred_load<nxv8i16, nxv8i1, nonext_masked_load, LD1H_IMM>;
+ defm : pred_load<nxv8f16, nxv8i1, nonext_masked_load, LD1H_IMM>;
+
+ // 16-element contiguous loads
+ defm : pred_load<nxv16i8, nxv16i1, nonext_masked_load, LD1B_IMM>;
+
+ multiclass pred_store<ValueType Ty, ValueType PredTy, SDPatternOperator Store,
+ Instruction RegImmInst> {
+ def _default : Pat<(Store (Ty ZPR:$vec), GPR64:$base, (PredTy PPR:$gp)),
+ (RegImmInst ZPR:$vec, PPR:$gp, GPR64:$base, (i64 0))>;
+ }
+
+ // 2-element contiguous stores
+ defm : pred_store<nxv2i64, nxv2i1, trunc_masked_store_i8, ST1B_D_IMM>;
+ defm : pred_store<nxv2i64, nxv2i1, trunc_masked_store_i16, ST1H_D_IMM>;
+ defm : pred_store<nxv2i64, nxv2i1, trunc_masked_store_i32, ST1W_D_IMM>;
+ defm : pred_store<nxv2i64, nxv2i1, nontrunc_masked_store, ST1D_IMM>;
+ defm : pred_store<nxv2f16, nxv2i1, nontrunc_masked_store, ST1H_D_IMM>;
+ defm : pred_store<nxv2f32, nxv2i1, nontrunc_masked_store, ST1W_D_IMM>;
+ defm : pred_store<nxv2f64, nxv2i1, nontrunc_masked_store, ST1D_IMM>;
+
+ // 4-element contiguous stores
+ defm : pred_store<nxv4i32, nxv4i1, trunc_masked_store_i8, ST1B_S_IMM>;
+ defm : pred_store<nxv4i32, nxv4i1, trunc_masked_store_i16, ST1H_S_IMM>;
+ defm : pred_store<nxv4i32, nxv4i1, nontrunc_masked_store, ST1W_IMM>;
+ defm : pred_store<nxv4f16, nxv4i1, nontrunc_masked_store, ST1H_S_IMM>;
+ defm : pred_store<nxv4f32, nxv4i1, nontrunc_masked_store, ST1W_IMM>;
+
+ // 8-element contiguous stores
+ defm : pred_store<nxv8i16, nxv8i1, trunc_masked_store_i8, ST1B_H_IMM>;
+ defm : pred_store<nxv8i16, nxv8i1, nontrunc_masked_store, ST1H_IMM>;
+ defm : pred_store<nxv8f16, nxv8i1, nontrunc_masked_store, ST1H_IMM>;
+
+ // 16-element contiguous stores
+ defm : pred_store<nxv16i8, nxv16i1, nontrunc_masked_store, ST1B_IMM>;
+
+ defm : pred_load<nxv16i8, nxv16i1, non_temporal_load, LDNT1B_ZRI>;
+ defm : pred_load<nxv8i16, nxv8i1, non_temporal_load, LDNT1H_ZRI>;
+ defm : pred_load<nxv4i32, nxv4i1, non_temporal_load, LDNT1W_ZRI>;
+ defm : pred_load<nxv2i64, nxv2i1, non_temporal_load, LDNT1D_ZRI>;
+
+ defm : pred_store<nxv16i8, nxv16i1, non_temporal_store, STNT1B_ZRI>;
+ defm : pred_store<nxv8i16, nxv8i1, non_temporal_store, STNT1H_ZRI>;
+ defm : pred_store<nxv4i32, nxv4i1, non_temporal_store, STNT1W_ZRI>;
+ defm : pred_store<nxv2i64, nxv2i1, non_temporal_store, STNT1D_ZRI>;
}
let Predicates = [HasSVE2] in {
@@ -1286,46 +1447,46 @@ let Predicates = [HasSVE2] in {
defm SBCLT_ZZZ : sve2_int_addsub_long_carry<0b11, "sbclt">;
// SVE2 bitwise shift right narrow (bottom)
- defm SQSHRUNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b000, "sqshrunb">;
- defm SQRSHRUNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b001, "sqrshrunb">;
- defm SHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b010, "shrnb">;
- defm RSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b011, "rshrnb">;
- defm SQSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b100, "sqshrnb">;
- defm SQRSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b101, "sqrshrnb">;
- defm UQSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b110, "uqshrnb">;
- defm UQRSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b111, "uqrshrnb">;
+ defm SQSHRUNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b000, "sqshrunb", int_aarch64_sve_sqshrunb>;
+ defm SQRSHRUNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b001, "sqrshrunb", int_aarch64_sve_sqrshrunb>;
+ defm SHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b010, "shrnb", int_aarch64_sve_shrnb>;
+ defm RSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b011, "rshrnb", int_aarch64_sve_rshrnb>;
+ defm SQSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b100, "sqshrnb", int_aarch64_sve_sqshrnb>;
+ defm SQRSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b101, "sqrshrnb", int_aarch64_sve_sqrshrnb>;
+ defm UQSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b110, "uqshrnb", int_aarch64_sve_uqshrnb>;
+ defm UQRSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b111, "uqrshrnb", int_aarch64_sve_uqrshrnb>;
// SVE2 bitwise shift right narrow (top)
- defm SQSHRUNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b000, "sqshrunt">;
- defm SQRSHRUNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b001, "sqrshrunt">;
- defm SHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b010, "shrnt">;
- defm RSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b011, "rshrnt">;
- defm SQSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b100, "sqshrnt">;
- defm SQRSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b101, "sqrshrnt">;
- defm UQSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b110, "uqshrnt">;
- defm UQRSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b111, "uqrshrnt">;
+ defm SQSHRUNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b000, "sqshrunt", int_aarch64_sve_sqshrunt>;
+ defm SQRSHRUNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b001, "sqrshrunt", int_aarch64_sve_sqrshrunt>;
+ defm SHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b010, "shrnt", int_aarch64_sve_shrnt>;
+ defm RSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b011, "rshrnt", int_aarch64_sve_rshrnt>;
+ defm SQSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b100, "sqshrnt", int_aarch64_sve_sqshrnt>;
+ defm SQRSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b101, "sqrshrnt", int_aarch64_sve_sqrshrnt>;
+ defm UQSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b110, "uqshrnt", int_aarch64_sve_uqshrnt>;
+ defm UQRSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b111, "uqrshrnt", int_aarch64_sve_uqrshrnt>;
// SVE2 integer add/subtract narrow high part (bottom)
- defm ADDHNB_ZZZ : sve2_int_addsub_narrow_high_bottom<0b00, "addhnb">;
- defm RADDHNB_ZZZ : sve2_int_addsub_narrow_high_bottom<0b01, "raddhnb">;
- defm SUBHNB_ZZZ : sve2_int_addsub_narrow_high_bottom<0b10, "subhnb">;
- defm RSUBHNB_ZZZ : sve2_int_addsub_narrow_high_bottom<0b11, "rsubhnb">;
+ defm ADDHNB_ZZZ : sve2_int_addsub_narrow_high_bottom<0b00, "addhnb", int_aarch64_sve_addhnb>;
+ defm RADDHNB_ZZZ : sve2_int_addsub_narrow_high_bottom<0b01, "raddhnb", int_aarch64_sve_raddhnb>;
+ defm SUBHNB_ZZZ : sve2_int_addsub_narrow_high_bottom<0b10, "subhnb", int_aarch64_sve_subhnb>;
+ defm RSUBHNB_ZZZ : sve2_int_addsub_narrow_high_bottom<0b11, "rsubhnb", int_aarch64_sve_rsubhnb>;
// SVE2 integer add/subtract narrow high part (top)
- defm ADDHNT_ZZZ : sve2_int_addsub_narrow_high_top<0b00, "addhnt">;
- defm RADDHNT_ZZZ : sve2_int_addsub_narrow_high_top<0b01, "raddhnt">;
- defm SUBHNT_ZZZ : sve2_int_addsub_narrow_high_top<0b10, "subhnt">;
- defm RSUBHNT_ZZZ : sve2_int_addsub_narrow_high_top<0b11, "rsubhnt">;
+ defm ADDHNT_ZZZ : sve2_int_addsub_narrow_high_top<0b00, "addhnt", int_aarch64_sve_addhnt>;
+ defm RADDHNT_ZZZ : sve2_int_addsub_narrow_high_top<0b01, "raddhnt", int_aarch64_sve_raddhnt>;
+ defm SUBHNT_ZZZ : sve2_int_addsub_narrow_high_top<0b10, "subhnt", int_aarch64_sve_subhnt>;
+ defm RSUBHNT_ZZZ : sve2_int_addsub_narrow_high_top<0b11, "rsubhnt", int_aarch64_sve_rsubhnt>;
// SVE2 saturating extract narrow (bottom)
- defm SQXTNB_ZZ : sve2_int_sat_extract_narrow_bottom<0b00, "sqxtnb">;
- defm UQXTNB_ZZ : sve2_int_sat_extract_narrow_bottom<0b01, "uqxtnb">;
- defm SQXTUNB_ZZ : sve2_int_sat_extract_narrow_bottom<0b10, "sqxtunb">;
+ defm SQXTNB_ZZ : sve2_int_sat_extract_narrow_bottom<0b00, "sqxtnb", int_aarch64_sve_sqxtnb>;
+ defm UQXTNB_ZZ : sve2_int_sat_extract_narrow_bottom<0b01, "uqxtnb", int_aarch64_sve_uqxtnb>;
+ defm SQXTUNB_ZZ : sve2_int_sat_extract_narrow_bottom<0b10, "sqxtunb", int_aarch64_sve_sqxtunb>;
// SVE2 saturating extract narrow (top)
- defm SQXTNT_ZZ : sve2_int_sat_extract_narrow_top<0b00, "sqxtnt">;
- defm UQXTNT_ZZ : sve2_int_sat_extract_narrow_top<0b01, "uqxtnt">;
- defm SQXTUNT_ZZ : sve2_int_sat_extract_narrow_top<0b10, "sqxtunt">;
+ defm SQXTNT_ZZ : sve2_int_sat_extract_narrow_top<0b00, "sqxtnt", int_aarch64_sve_sqxtnt>;
+ defm UQXTNT_ZZ : sve2_int_sat_extract_narrow_top<0b01, "uqxtnt", int_aarch64_sve_uqxtnt>;
+ defm SQXTUNT_ZZ : sve2_int_sat_extract_narrow_top<0b10, "sqxtunt", int_aarch64_sve_sqxtunt>;
// SVE2 character match
defm MATCH_PPzZZ : sve2_char_match<0b0, "match">;
@@ -1353,32 +1514,32 @@ let Predicates = [HasSVE2] in {
defm HISTCNT_ZPzZZ : sve2_hist_gen_vector<"histcnt">;
// SVE2 floating-point base 2 logarithm as integer
- defm FLOGB_ZPmZ : sve2_fp_flogb<"flogb">;
+ defm FLOGB_ZPmZ : sve2_fp_flogb<"flogb", int_aarch64_sve_flogb>;
// SVE2 floating-point convert precision
- defm FCVTXNT_ZPmZ : sve2_fp_convert_down_odd_rounding<"fcvtxnt">;
- defm FCVTNT_ZPmZ : sve2_fp_convert_down_narrow<"fcvtnt">;
- defm FCVTLT_ZPmZ : sve2_fp_convert_up_long<"fcvtlt">;
- def FCVTX_ZPmZ_DtoS : sve_fp_2op_p_zd<0b0001010, "fcvtx", ZPR64, ZPR32, ElementSizeD>;
+ defm FCVTXNT_ZPmZ : sve2_fp_convert_down_odd_rounding_top<"fcvtxnt", "int_aarch64_sve_fcvtxnt">;
+ defm FCVTX_ZPmZ : sve2_fp_convert_down_odd_rounding<"fcvtx", "int_aarch64_sve_fcvtx">;
+ defm FCVTNT_ZPmZ : sve2_fp_convert_down_narrow<"fcvtnt", "int_aarch64_sve_fcvtnt">;
+ defm FCVTLT_ZPmZ : sve2_fp_convert_up_long<"fcvtlt", "int_aarch64_sve_fcvtlt">;
// SVE2 floating-point pairwise operations
- defm FADDP_ZPmZZ : sve2_fp_pairwise_pred<0b000, "faddp">;
- defm FMAXNMP_ZPmZZ : sve2_fp_pairwise_pred<0b100, "fmaxnmp">;
- defm FMINNMP_ZPmZZ : sve2_fp_pairwise_pred<0b101, "fminnmp">;
- defm FMAXP_ZPmZZ : sve2_fp_pairwise_pred<0b110, "fmaxp">;
- defm FMINP_ZPmZZ : sve2_fp_pairwise_pred<0b111, "fminp">;
+ defm FADDP_ZPmZZ : sve2_fp_pairwise_pred<0b000, "faddp", int_aarch64_sve_faddp>;
+ defm FMAXNMP_ZPmZZ : sve2_fp_pairwise_pred<0b100, "fmaxnmp", int_aarch64_sve_fmaxnmp>;
+ defm FMINNMP_ZPmZZ : sve2_fp_pairwise_pred<0b101, "fminnmp", int_aarch64_sve_fminnmp>;
+ defm FMAXP_ZPmZZ : sve2_fp_pairwise_pred<0b110, "fmaxp", int_aarch64_sve_fmaxp>;
+ defm FMINP_ZPmZZ : sve2_fp_pairwise_pred<0b111, "fminp", int_aarch64_sve_fminp>;
// SVE2 floating-point multiply-add long (indexed)
- def FMLALB_ZZZI_SHH : sve2_fp_mla_long_by_indexed_elem<0b00, "fmlalb">;
- def FMLALT_ZZZI_SHH : sve2_fp_mla_long_by_indexed_elem<0b01, "fmlalt">;
- def FMLSLB_ZZZI_SHH : sve2_fp_mla_long_by_indexed_elem<0b10, "fmlslb">;
- def FMLSLT_ZZZI_SHH : sve2_fp_mla_long_by_indexed_elem<0b11, "fmlslt">;
+ defm FMLALB_ZZZI_SHH : sve2_fp_mla_long_by_indexed_elem<0b00, "fmlalb", int_aarch64_sve_fmlalb_lane>;
+ defm FMLALT_ZZZI_SHH : sve2_fp_mla_long_by_indexed_elem<0b01, "fmlalt", int_aarch64_sve_fmlalt_lane>;
+ defm FMLSLB_ZZZI_SHH : sve2_fp_mla_long_by_indexed_elem<0b10, "fmlslb", int_aarch64_sve_fmlslb_lane>;
+ defm FMLSLT_ZZZI_SHH : sve2_fp_mla_long_by_indexed_elem<0b11, "fmlslt", int_aarch64_sve_fmlslt_lane>;
// SVE2 floating-point multiply-add long
- def FMLALB_ZZZ_SHH : sve2_fp_mla_long<0b00, "fmlalb">;
- def FMLALT_ZZZ_SHH : sve2_fp_mla_long<0b01, "fmlalt">;
- def FMLSLB_ZZZ_SHH : sve2_fp_mla_long<0b10, "fmlslb">;
- def FMLSLT_ZZZ_SHH : sve2_fp_mla_long<0b11, "fmlslt">;
+ defm FMLALB_ZZZ_SHH : sve2_fp_mla_long<0b00, "fmlalb", int_aarch64_sve_fmlalb>;
+ defm FMLALT_ZZZ_SHH : sve2_fp_mla_long<0b01, "fmlalt", int_aarch64_sve_fmlalt>;
+ defm FMLSLB_ZZZ_SHH : sve2_fp_mla_long<0b10, "fmlslb", int_aarch64_sve_fmlslb>;
+ defm FMLSLT_ZZZ_SHH : sve2_fp_mla_long<0b11, "fmlslt", int_aarch64_sve_fmlslt>;
// SVE2 bitwise ternary operations
defm EOR3_ZZZZ_D : sve2_int_bitwise_ternary_op<0b000, "eor3">;
@@ -1427,15 +1588,15 @@ let Predicates = [HasSVE2] in {
defm TBX_ZZZ : sve2_int_perm_tbx<"tbx">;
// SVE2 integer compare scalar count and limit
- defm WHILEGE_PWW : sve_int_while4_rr<0b000, "whilege">;
- defm WHILEGT_PWW : sve_int_while4_rr<0b001, "whilegt">;
- defm WHILEHS_PWW : sve_int_while4_rr<0b100, "whilehs">;
- defm WHILEHI_PWW : sve_int_while4_rr<0b101, "whilehi">;
-
- defm WHILEGE_PXX : sve_int_while8_rr<0b000, "whilege">;
- defm WHILEGT_PXX : sve_int_while8_rr<0b001, "whilegt">;
- defm WHILEHS_PXX : sve_int_while8_rr<0b100, "whilehs">;
- defm WHILEHI_PXX : sve_int_while8_rr<0b101, "whilehi">;
+ defm WHILEGE_PWW : sve_int_while4_rr<0b000, "whilege", int_aarch64_sve_whilege>;
+ defm WHILEGT_PWW : sve_int_while4_rr<0b001, "whilegt", int_aarch64_sve_whilegt>;
+ defm WHILEHS_PWW : sve_int_while4_rr<0b100, "whilehs", int_aarch64_sve_whilehs>;
+ defm WHILEHI_PWW : sve_int_while4_rr<0b101, "whilehi", int_aarch64_sve_whilehi>;
+
+ defm WHILEGE_PXX : sve_int_while8_rr<0b000, "whilege", int_aarch64_sve_whilege>;
+ defm WHILEGT_PXX : sve_int_while8_rr<0b001, "whilegt", int_aarch64_sve_whilegt>;
+ defm WHILEHS_PXX : sve_int_while8_rr<0b100, "whilehs", int_aarch64_sve_whilehs>;
+ defm WHILEHI_PXX : sve_int_while8_rr<0b101, "whilehi", int_aarch64_sve_whilehi>;
// SVE2 pointer conflict compare
defm WHILEWR_PXX : sve2_int_while_rr<0b0, "whilewr">;
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedExynosM1.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedExynosM1.td
deleted file mode 100644
index f1e76e2c20d3..000000000000
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedExynosM1.td
+++ /dev/null
@@ -1,850 +0,0 @@
-//=- AArch64SchedExynosM1.td - Samsung Exynos M1 Sched Defs --*- tablegen -*-=//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the machine model for the Samsung Exynos M1 to support
-// instruction scheduling and other instruction cost heuristics.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// The Exynos-M1 is a traditional superscalar microprocessor with a
-// 4-wide in-order stage for decode and dispatch and a wider issue stage.
-// The execution units and loads and stores are out-of-order.
-
-def ExynosM1Model : SchedMachineModel {
- let IssueWidth = 4; // Up to 4 uops per cycle.
- let MicroOpBufferSize = 96; // ROB size.
- let LoopMicroOpBufferSize = 24; // Based on the instruction queue size.
- let LoadLatency = 4; // Optimistic load cases.
- let MispredictPenalty = 14; // Minimum branch misprediction penalty.
- let CompleteModel = 1; // Use the default model otherwise.
-
- list<Predicate> UnsupportedFeatures = SVEUnsupported.F;
-}
-
-//===----------------------------------------------------------------------===//
-// Define each kind of processor resource and number available on the Exynos-M1,
-// which has 9 pipelines, each with its own queue with out-of-order dispatch.
-
-let SchedModel = ExynosM1Model in {
-
-def M1UnitA : ProcResource<2>; // Simple integer
-def M1UnitC : ProcResource<1>; // Simple and complex integer
-def M1UnitD : ProcResource<1>; // Integer division (inside C, serialized)
-def M1UnitB : ProcResource<2>; // Branch
-def M1UnitL : ProcResource<1>; // Load
-def M1UnitS : ProcResource<1>; // Store
-def M1PipeF0 : ProcResource<1>; // FP #0
-let Super = M1PipeF0 in {
- def M1UnitFMAC : ProcResource<1>; // FP multiplication
- def M1UnitNAL0 : ProcResource<1>; // Simple vector
- def M1UnitNMISC : ProcResource<1>; // Miscellanea
- def M1UnitFCVT : ProcResource<1>; // FP conversion
- def M1UnitNCRYPT : ProcResource<1>; // Cryptographic
-}
-def M1PipeF1 : ProcResource<1>; // FP #1
-let Super = M1PipeF1 in {
- def M1UnitFADD : ProcResource<1>; // Simple FP
- def M1UnitNAL1 : ProcResource<1>; // Simple vector
- def M1UnitFVAR : ProcResource<1>; // FP division & square root (serialized)
- def M1UnitFST : ProcResource<1>; // FP store
-}
-
-def M1UnitALU : ProcResGroup<[M1UnitA,
- M1UnitC]>; // All integer
-def M1UnitNALU : ProcResGroup<[M1UnitNAL0,
- M1UnitNAL1]>; // All simple vector
-
-//===----------------------------------------------------------------------===//
-// Coarse scheduling model.
-
-def M1WriteA1 : SchedWriteRes<[M1UnitALU]> { let Latency = 1; }
-def M1WriteA2 : SchedWriteRes<[M1UnitALU]> { let Latency = 2; }
-def M1WriteAA : SchedWriteRes<[M1UnitALU]> { let Latency = 2;
- let ResourceCycles = [2]; }
-def M1WriteAB : SchedWriteRes<[M1UnitALU,
- M1UnitC]> { let Latency = 1;
- let NumMicroOps = 2; }
-def M1WriteAC : SchedWriteRes<[M1UnitALU,
- M1UnitALU,
- M1UnitC]> { let Latency = 2;
- let NumMicroOps = 3; }
-def M1WriteAD : SchedWriteRes<[M1UnitALU,
- M1UnitC]> { let Latency = 2;
- let NumMicroOps = 2; }
-def M1WriteAX : SchedWriteVariant<[SchedVar<ExynosArithPred, [M1WriteA1]>,
- SchedVar<ExynosLogicPred, [M1WriteA1]>,
- SchedVar<NoSchedPred, [M1WriteAA]>]>;
-def M1WriteC1 : SchedWriteRes<[M1UnitC]> { let Latency = 1; }
-def M1WriteC2 : SchedWriteRes<[M1UnitC]> { let Latency = 2; }
-
-def M1WriteB1 : SchedWriteRes<[M1UnitB]> { let Latency = 1; }
-def M1WriteBX : SchedWriteVariant<[SchedVar<ExynosBranchLinkLRPred, [M1WriteAC]>,
- SchedVar<NoSchedPred, [M1WriteAB]>]>;
-
-def M1WriteL5 : SchedWriteRes<[M1UnitL]> { let Latency = 5; }
-def M1WriteL6 : SchedWriteRes<[M1UnitL]> { let Latency = 6; }
-def M1WriteLA : SchedWriteRes<[M1UnitL]> { let Latency = 6;
- let ResourceCycles = [2]; }
-def M1WriteLB : SchedWriteRes<[M1UnitL,
- M1UnitA]> { let Latency = 4;
- let NumMicroOps = 2; }
-def M1WriteLC : SchedWriteRes<[M1UnitL,
- M1UnitA]> { let Latency = 5;
- let NumMicroOps = 2; }
-def M1WriteLD : SchedWriteRes<[M1UnitL,
- M1UnitA]> { let Latency = 6;
- let NumMicroOps = 2;
- let ResourceCycles = [2, 1]; }
-def M1WriteLH : SchedWriteRes<[]> { let Latency = 5;
- let NumMicroOps = 0; }
-def M1WriteLX : SchedWriteVariant<[SchedVar<ScaledIdxPred, [M1WriteLC]>,
- SchedVar<NoSchedPred, [M1WriteL5]>]>;
-
-def M1WriteS1 : SchedWriteRes<[M1UnitS]> { let Latency = 1; }
-def M1WriteS3 : SchedWriteRes<[M1UnitS]> { let Latency = 3; }
-def M1WriteS4 : SchedWriteRes<[M1UnitS]> { let Latency = 4; }
-def M1WriteSA : SchedWriteRes<[M1UnitS,
- M1UnitFST,
- M1UnitA]> { let Latency = 3;
- let NumMicroOps = 2; }
-def M1WriteSB : SchedWriteRes<[M1UnitS,
- M1UnitFST,
- M1UnitS,
- M1UnitFST,
- M1UnitA]> { let Latency = 3;
- let NumMicroOps = 3; }
-def M1WriteSC : SchedWriteRes<[M1UnitS,
- M1UnitA]> { let Latency = 2;
- let NumMicroOps = 2; }
-def M1WriteSX : SchedWriteVariant<[SchedVar<ScaledIdxPred, [M1WriteSC]>,
- SchedVar<NoSchedPred, [M1WriteS1]>]>;
-
-def M1ReadAdrBase : SchedReadVariant<[SchedVar<ScaledIdxPred, [ReadDefault]>,
- SchedVar<NoSchedPred, [ReadDefault]>]>;
-
-// Branch instructions.
-def : WriteRes<WriteBr, []> { let Latency = 0; }
-def : WriteRes<WriteBrReg, [M1UnitC]> { let Latency = 1; }
-
-// Arithmetic and logical integer instructions.
-def : WriteRes<WriteI, [M1UnitALU]> { let Latency = 1; }
-def : WriteRes<WriteISReg, [M1UnitALU]> { let Latency = 1; }
-def : WriteRes<WriteIEReg, [M1UnitALU]> { let Latency = 1; }
-def : WriteRes<WriteIS, [M1UnitALU]> { let Latency = 1; }
-
-// Move instructions.
-def : WriteRes<WriteImm, [M1UnitALU]> { let Latency = 1; }
-
-// Divide and multiply instructions.
-def : WriteRes<WriteID32, [M1UnitC,
- M1UnitD]> { let Latency = 13;
- let ResourceCycles = [1, 13]; }
-def : WriteRes<WriteID64, [M1UnitC,
- M1UnitD]> { let Latency = 21;
- let ResourceCycles = [1, 21]; }
-// TODO: Long multiplication take 5 cycles and also the ALU.
-def : WriteRes<WriteIM32, [M1UnitC]> { let Latency = 3; }
-def : WriteRes<WriteIM64, [M1UnitC]> { let Latency = 4;
- let ResourceCycles = [2]; }
-
-// Miscellaneous instructions.
-def : WriteRes<WriteExtr, [M1UnitALU,
- M1UnitALU]> { let Latency = 2;
- let NumMicroOps = 2; }
-
-// Addressing modes.
-def : WriteRes<WriteAdr, []> { let Latency = 1;
- let NumMicroOps = 0; }
-def : SchedAlias<ReadAdrBase, M1ReadAdrBase>;
-
-// Load instructions.
-def : WriteRes<WriteLD, [M1UnitL]> { let Latency = 4; }
-def : WriteRes<WriteLDHi, []> { let Latency = 4;
- let NumMicroOps = 0; }
-def : SchedAlias<WriteLDIdx, M1WriteLX>;
-
-// Store instructions.
-def : WriteRes<WriteST, [M1UnitS]> { let Latency = 1; }
-def : WriteRes<WriteSTP, [M1UnitS]> { let Latency = 1; }
-def : WriteRes<WriteSTX, [M1UnitS]> { let Latency = 1; }
-def : SchedAlias<WriteSTIdx, M1WriteSX>;
-
-// FP data instructions.
-def : WriteRes<WriteF, [M1UnitFADD]> { let Latency = 3; }
-def : WriteRes<WriteFCmp, [M1UnitNMISC]> { let Latency = 4; }
-def : WriteRes<WriteFDiv, [M1UnitFVAR]> { let Latency = 15;
- let ResourceCycles = [15]; }
-def : WriteRes<WriteFMul, [M1UnitFMAC]> { let Latency = 4; }
-
-// FP miscellaneous instructions.
-def : WriteRes<WriteFCvt, [M1UnitFCVT]> { let Latency = 3; }
-def : WriteRes<WriteFImm, [M1UnitNALU]> { let Latency = 1; }
-def : WriteRes<WriteFCopy, [M1UnitS]> { let Latency = 4; }
-
-// FP load instructions.
-def : WriteRes<WriteVLD, [M1UnitL]> { let Latency = 5; }
-
-// FP store instructions.
-def : WriteRes<WriteVST, [M1UnitS,
- M1UnitFST]> { let Latency = 1;
- let NumMicroOps = 1; }
-
-// ASIMD FP instructions.
-def : WriteRes<WriteV, [M1UnitFADD]> { let Latency = 3; }
-
-// Other miscellaneous instructions.
-def : WriteRes<WriteAtomic, []> { let Unsupported = 1; }
-def : WriteRes<WriteBarrier, []> { let Latency = 1; }
-def : WriteRes<WriteHint, []> { let Latency = 1; }
-def : WriteRes<WriteSys, []> { let Latency = 1; }
-
-//===----------------------------------------------------------------------===//
-// Fast forwarding.
-
-// TODO: Add FP register forwarding rules.
-def : ReadAdvance<ReadI, 0>;
-def : ReadAdvance<ReadISReg, 0>;
-def : ReadAdvance<ReadIEReg, 0>;
-def : ReadAdvance<ReadIM, 0>;
-// TODO: The forwarding for WriteIM32 saves actually 2 cycles.
-def : ReadAdvance<ReadIMA, 3, [WriteIM32, WriteIM64]>;
-def : ReadAdvance<ReadID, 0>;
-def : ReadAdvance<ReadExtrHi, 0>;
-def : ReadAdvance<ReadAdrBase, 0>;
-def : ReadAdvance<ReadVLD, 0>;
-
-//===----------------------------------------------------------------------===//
-// Finer scheduling model.
-
-def M1WriteNEONA : SchedWriteRes<[M1UnitNALU,
- M1UnitNALU,
- M1UnitFADD]> { let Latency = 9;
- let NumMicroOps = 3; }
-def M1WriteNEONB : SchedWriteRes<[M1UnitNALU,
- M1UnitFST]> { let Latency = 5;
- let NumMicroOps = 2;}
-def M1WriteNEONC : SchedWriteRes<[M1UnitNALU,
- M1UnitFST]> { let Latency = 6;
- let NumMicroOps = 2; }
-def M1WriteNEOND : SchedWriteRes<[M1UnitNALU,
- M1UnitFST,
- M1UnitL]> { let Latency = 10;
- let NumMicroOps = 3; }
-def M1WriteNEONE : SchedWriteRes<[M1UnitFCVT,
- M1UnitFST]> { let Latency = 8;
- let NumMicroOps = 2; }
-def M1WriteNEONF : SchedWriteRes<[M1UnitFCVT,
- M1UnitFST,
- M1UnitL]> { let Latency = 13;
- let NumMicroOps = 3; }
-def M1WriteNEONG : SchedWriteRes<[M1UnitNMISC,
- M1UnitFST]> { let Latency = 6;
- let NumMicroOps = 2; }
-def M1WriteNEONH : SchedWriteRes<[M1UnitNALU,
- M1UnitFST]> { let Latency = 3;
- let NumMicroOps = 2; }
-def M1WriteNEONI : SchedWriteRes<[M1UnitFST,
- M1UnitL]> { let Latency = 9;
- let NumMicroOps = 2; }
-def M1WriteNEONJ : SchedWriteRes<[M1UnitNMISC,
- M1UnitFMAC]> { let Latency = 6;
- let NumMicroOps = 2; }
-def M1WriteNEONK : SchedWriteRes<[M1UnitNMISC,
- M1UnitFMAC]> { let Latency = 7;
- let NumMicroOps = 2; }
-def M1WriteNEONL : SchedWriteRes<[M1UnitNALU]> { let Latency = 2;
- let ResourceCycles = [2]; }
-def M1WriteFADD3 : SchedWriteRes<[M1UnitFADD]> { let Latency = 3; }
-def M1WriteFCVT3 : SchedWriteRes<[M1UnitFCVT]> { let Latency = 3; }
-def M1WriteFCVT4 : SchedWriteRes<[M1UnitFCVT]> { let Latency = 4; }
-def M1WriteFMAC4 : SchedWriteRes<[M1UnitFMAC]> { let Latency = 4; }
-def M1WriteFMAC5 : SchedWriteRes<[M1UnitFMAC]> { let Latency = 5; }
-// TODO
-def M1WriteFVAR15 : SchedWriteRes<[M1UnitFVAR]> { let Latency = 15;
- let ResourceCycles = [15]; }
-def M1WriteFVAR23 : SchedWriteRes<[M1UnitFVAR]> { let Latency = 23;
- let ResourceCycles = [23]; }
-def M1WriteNALU1 : SchedWriteRes<[M1UnitNALU]> { let Latency = 1; }
-def M1WriteNALU2 : SchedWriteRes<[M1UnitNALU]> { let Latency = 2; }
-def M1WriteNAL11 : SchedWriteRes<[M1UnitNAL1]> { let Latency = 1; }
-def M1WriteNAL12 : SchedWriteRes<[M1UnitNAL1]> { let Latency = 2; }
-def M1WriteNAL13 : SchedWriteRes<[M1UnitNAL1]> { let Latency = 3; }
-def M1WriteNCRYPT1 : SchedWriteRes<[M1UnitNCRYPT]> { let Latency = 1; }
-def M1WriteNCRYPT5 : SchedWriteRes<[M1UnitNCRYPT]> { let Latency = 5; }
-def M1WriteNMISC1 : SchedWriteRes<[M1UnitNMISC]> { let Latency = 1; }
-def M1WriteNMISC2 : SchedWriteRes<[M1UnitNMISC]> { let Latency = 2; }
-def M1WriteNMISC3 : SchedWriteRes<[M1UnitNMISC]> { let Latency = 3; }
-def M1WriteNMISC4 : SchedWriteRes<[M1UnitNMISC]> { let Latency = 4; }
-def M1WriteTB : SchedWriteRes<[M1UnitC,
- M1UnitALU]> { let Latency = 2;
- let NumMicroOps = 2; }
-def M1WriteVLDA : SchedWriteRes<[M1UnitL,
- M1UnitL]> { let Latency = 6;
- let NumMicroOps = 2; }
-def M1WriteVLDB : SchedWriteRes<[M1UnitL,
- M1UnitL,
- M1UnitL]> { let Latency = 7;
- let NumMicroOps = 3; }
-def M1WriteVLDC : SchedWriteRes<[M1UnitL,
- M1UnitL,
- M1UnitL,
- M1UnitL]> { let Latency = 8;
- let NumMicroOps = 4; }
-def M1WriteVLDD : SchedWriteRes<[M1UnitL,
- M1UnitNALU]> { let Latency = 7;
- let NumMicroOps = 2;
- let ResourceCycles = [2, 1]; }
-def M1WriteVLDE : SchedWriteRes<[M1UnitL,
- M1UnitNALU]> { let Latency = 6;
- let NumMicroOps = 2; }
-def M1WriteVLDF : SchedWriteRes<[M1UnitL,
- M1UnitL]> { let Latency = 10;
- let NumMicroOps = 2;
- let ResourceCycles = [1, 1]; }
-def M1WriteVLDG : SchedWriteRes<[M1UnitL,
- M1UnitNALU,
- M1UnitNALU]> { let Latency = 7;
- let NumMicroOps = 3;
- let ResourceCycles = [2, 1, 1]; }
-def M1WriteVLDH : SchedWriteRes<[M1UnitL,
- M1UnitNALU,
- M1UnitNALU]> { let Latency = 6;
- let NumMicroOps = 3; }
-def M1WriteVLDI : SchedWriteRes<[M1UnitL,
- M1UnitL,
- M1UnitL]> { let Latency = 12;
- let NumMicroOps = 3;
- let ResourceCycles = [2, 2, 2]; }
-def M1WriteVLDJ : SchedWriteRes<[M1UnitL,
- M1UnitNALU,
- M1UnitNALU,
- M1UnitNALU]> { let Latency = 9;
- let NumMicroOps = 4;
- let ResourceCycles = [2, 1, 1, 1]; }
-def M1WriteVLDK : SchedWriteRes<[M1UnitL,
- M1UnitNALU,
- M1UnitNALU,
- M1UnitNALU,
- M1UnitNALU]> { let Latency = 9;
- let NumMicroOps = 5;
- let ResourceCycles = [2, 1, 1, 1, 1]; }
-def M1WriteVLDL : SchedWriteRes<[M1UnitL,
- M1UnitNALU,
- M1UnitNALU,
- M1UnitL,
- M1UnitNALU]> { let Latency = 7;
- let NumMicroOps = 5;
- let ResourceCycles = [1, 1, 1, 1, 1]; }
-def M1WriteVLDM : SchedWriteRes<[M1UnitL,
- M1UnitNALU,
- M1UnitNALU,
- M1UnitL,
- M1UnitNALU,
- M1UnitNALU]> { let Latency = 7;
- let NumMicroOps = 6;
- let ResourceCycles = [1, 1, 1, 1, 1, 1]; }
-def M1WriteVLDN : SchedWriteRes<[M1UnitL,
- M1UnitL,
- M1UnitL,
- M1UnitL]> { let Latency = 14;
- let NumMicroOps = 4;
- let ResourceCycles = [2, 1, 2, 1]; }
-def M1WriteVSTA : WriteSequence<[WriteVST], 2>;
-def M1WriteVSTB : WriteSequence<[WriteVST], 3>;
-def M1WriteVSTC : WriteSequence<[WriteVST], 4>;
-def M1WriteVSTD : SchedWriteRes<[M1UnitS,
- M1UnitFST,
- M1UnitFST]> { let Latency = 7;
- let NumMicroOps = 2;
- let ResourceCycles = [7, 1, 1]; }
-def M1WriteVSTE : SchedWriteRes<[M1UnitS,
- M1UnitFST,
- M1UnitS,
- M1UnitFST,
- M1UnitFST]> { let Latency = 8;
- let NumMicroOps = 3;
- let ResourceCycles = [7, 1, 1, 1, 1]; }
-def M1WriteVSTF : SchedWriteRes<[M1UnitNALU,
- M1UnitS,
- M1UnitFST,
- M1UnitS,
- M1UnitFST,
- M1UnitFST,
- M1UnitFST]> { let Latency = 15;
- let NumMicroOps = 5;
- let ResourceCycles = [1, 7, 1, 7, 1, 1, 1]; }
-def M1WriteVSTG : SchedWriteRes<[M1UnitNALU,
- M1UnitS,
- M1UnitFST,
- M1UnitS,
- M1UnitFST,
- M1UnitS,
- M1UnitFST,
- M1UnitFST,
- M1UnitFST]> { let Latency = 16;
- let NumMicroOps = 6;
- let ResourceCycles = [1, 7, 1, 7, 1, 1, 1, 1, 1]; }
-def M1WriteVSTH : SchedWriteRes<[M1UnitNALU,
- M1UnitS,
- M1UnitFST,
- M1UnitFST,
- M1UnitFST]> { let Latency = 14;
- let NumMicroOps = 4;
- let ResourceCycles = [1, 7, 1, 7, 1]; }
-def M1WriteVSTI : SchedWriteRes<[M1UnitNALU,
- M1UnitS,
- M1UnitFST,
- M1UnitS,
- M1UnitFST,
- M1UnitS,
- M1UnitFST,
- M1UnitS,
- M1UnitFST,
- M1UnitFST,
- M1UnitFST]> { let Latency = 17;
- let NumMicroOps = 7;
- let ResourceCycles = [1, 7, 1, 7, 1, 1, 1, 1, 1, 1, 1]; }
-
-// Special cases.
-def M1WriteAES : SchedWriteRes<[M1UnitNCRYPT]> { let Latency = 1; }
-def M1WriteCOPY : SchedWriteVariant<[SchedVar<ExynosFPPred, [M1WriteNALU1]>,
- SchedVar<NoSchedPred, [M1WriteA1]>]>;
-
-// Fast forwarding.
-def M1ReadAES : SchedReadAdvance<1, [M1WriteAES]>;
-
-// Branch instructions
-def : InstRW<[M1WriteB1], (instrs Bcc)>;
-def : InstRW<[M1WriteA1], (instrs BL)>;
-def : InstRW<[M1WriteBX], (instrs BLR)>;
-def : InstRW<[M1WriteC1], (instregex "^CBN?Z[WX]")>;
-def : InstRW<[M1WriteAD], (instregex "^TBN?Z[WX]")>;
-
-// Arithmetic and logical integer instructions.
-def : InstRW<[M1WriteAX], (instregex ".+rx(64)?$")>;
-def : InstRW<[M1WriteAX], (instregex ".+rs$")>;
-
-// Move instructions.
-def : InstRW<[M1WriteCOPY], (instrs COPY)>;
-
-// Divide and multiply instructions.
-
-// Miscellaneous instructions.
-
-// Load instructions.
-def : InstRW<[M1WriteLB,
- WriteLDHi,
- WriteAdr], (instregex "^LDP(SW|W|X)(post|pre)")>;
-def : InstRW<[M1WriteLC,
- ReadAdrBase], (instregex "^LDR(BB|SBW|SBX|HH|SHW|SHX|SW|W|X)roW")>;
-def : InstRW<[M1WriteL5,
- ReadAdrBase], (instregex "^LDR(BB|SBW|SBX|HH|SHW|SHX|SW|W|X)roX")>;
-def : InstRW<[M1WriteLC,
- ReadAdrBase], (instrs PRFMroW)>;
-def : InstRW<[M1WriteL5,
- ReadAdrBase], (instrs PRFMroX)>;
-
-// Store instructions.
-def : InstRW<[M1WriteSC,
- ReadAdrBase], (instregex "^STR(BB|HH|W|X)roW")>;
-def : InstRW<[WriteST,
- ReadAdrBase], (instregex "^STR(BB|HH|W|X)roX")>;
-
-// FP data instructions.
-def : InstRW<[M1WriteNALU1], (instregex "^F(ABS|NEG)[DS]r")>;
-def : InstRW<[M1WriteFADD3], (instregex "^F(ADD|SUB)[DS]rr")>;
-def : InstRW<[M1WriteNEONG], (instregex "^FCCMPE?[DS]rr")>;
-def : InstRW<[M1WriteNMISC4], (instregex "^FCMPE?[DS]r")>;
-def : InstRW<[M1WriteFVAR15], (instrs FDIVSrr)>;
-def : InstRW<[M1WriteFVAR23], (instrs FDIVDrr)>;
-def : InstRW<[M1WriteNMISC2], (instregex "^F(MAX|MIN).+rr")>;
-def : InstRW<[M1WriteFMAC4], (instregex "^FN?MUL[DS]rr")>;
-def : InstRW<[M1WriteFMAC5], (instregex "^FN?M(ADD|SUB)[DS]rrr")>;
-def : InstRW<[M1WriteFCVT3], (instregex "^FRINT.+r")>;
-def : InstRW<[M1WriteNEONH], (instregex "^FCSEL[DS]rrr")>;
-def : InstRW<[M1WriteFVAR15], (instrs FSQRTSr)>;
-def : InstRW<[M1WriteFVAR23], (instrs FSQRTDr)>;
-
-// FP miscellaneous instructions.
-def : InstRW<[M1WriteFCVT3], (instregex "^FCVT[DS][DS]r")>;
-def : InstRW<[M1WriteNEONF], (instregex "^[FSU]CVT[AMNPZ][SU](_Int)?[SU]?[XW]?[DS]?[rds]i?")>;
-def : InstRW<[M1WriteNEONE], (instregex "^[SU]CVTF[SU]")>;
-def : InstRW<[M1WriteNALU1], (instregex "^FMOV[DS][ir]")>;
-def : InstRW<[M1WriteFCVT4], (instregex "^[FU](RECP|RSQRT)Ev1")>;
-def : InstRW<[M1WriteNMISC1], (instregex "^FRECPXv1")>;
-def : InstRW<[M1WriteFMAC5], (instregex "^F(RECP|RSQRT)S(16|32|64)")>;
-def : InstRW<[M1WriteS4], (instregex "^FMOV[WX][DS](High)?r")>;
-def : InstRW<[M1WriteNEONI], (instregex "^FMOV[DS][WX](High)?r")>;
-
-// FP load instructions.
-def : InstRW<[WriteVLD], (instregex "^LDR[DSQ]l")>;
-def : InstRW<[WriteVLD], (instregex "^LDUR[BDHSQ]i")>;
-def : InstRW<[WriteVLD,
- WriteAdr], (instregex "^LDR[BDHSQ](post|pre)")>;
-def : InstRW<[WriteVLD], (instregex "^LDR[BDHSQ]ui")>;
-def : InstRW<[M1WriteLD,
- ReadAdrBase], (instregex "^LDR[BDHS]roW")>;
-def : InstRW<[WriteVLD,
- ReadAdrBase], (instregex "^LDR[BDHS]roX")>;
-def : InstRW<[M1WriteLD,
- ReadAdrBase], (instregex "^LDRQro[WX]")>;
-def : InstRW<[WriteVLD,
- M1WriteLH], (instregex "^LDN?P[DS]i")>;
-def : InstRW<[M1WriteLA,
- M1WriteLH], (instregex "^LDN?PQi")>;
-def : InstRW<[M1WriteLC,
- M1WriteLH,
- WriteAdr], (instregex "^LDP[DS](post|pre)")>;
-def : InstRW<[M1WriteLD,
- M1WriteLH,
- WriteAdr], (instregex "^LDPQ(post|pre)")>;
-
-// FP store instructions.
-def : InstRW<[WriteVST], (instregex "^STUR[BDHSQ]i")>;
-def : InstRW<[WriteVST,
- WriteAdr], (instregex "^STR[BDHSQ](post|pre)")>;
-def : InstRW<[WriteVST], (instregex "^STR[BDHSQ]ui")>;
-def : InstRW<[M1WriteSA,
- ReadAdrBase], (instregex "^STR[BDHS]roW")>;
-def : InstRW<[WriteVST,
- ReadAdrBase], (instregex "^STR[BDHS]roX")>;
-def : InstRW<[M1WriteSA,
- ReadAdrBase], (instregex "^STRQro[WX]")>;
-def : InstRW<[WriteVST], (instregex "^STN?P[DSQ]i")>;
-def : InstRW<[WriteVST,
- WriteAdr], (instregex "^STP[DS](post|pre)")>;
-def : InstRW<[M1WriteSB,
- WriteAdr], (instregex "^STPQ(post|pre)")>;
-
-// ASIMD instructions.
-def : InstRW<[M1WriteNMISC3], (instregex "^[SU]ABAL?v")>;
-def : InstRW<[M1WriteNMISC1], (instregex "^[SU]ABDL?v")>;
-def : InstRW<[M1WriteNMISC1], (instregex "^(SQ)?ABSv")>;
-def : InstRW<[M1WriteNMISC1], (instregex "^SQNEGv")>;
-def : InstRW<[M1WriteNALU1], (instregex "^(ADD|NEG|SUB)v")>;
-def : InstRW<[M1WriteNMISC3], (instregex "^[SU]?H(ADD|SUB)v")>;
-def : InstRW<[M1WriteNMISC3], (instregex "^[SU]?AD[AD](L|LP|P|W)V?2?v")>;
-def : InstRW<[M1WriteNMISC3], (instregex "^[SU]?SUB[LW]2?v")>;
-def : InstRW<[M1WriteNMISC3], (instregex "^R?(ADD|SUB)HN?2?v")>;
-def : InstRW<[M1WriteNMISC3], (instregex "^[SU]+Q(ADD|SUB)v")>;
-def : InstRW<[M1WriteNMISC3], (instregex "^[SU]RHADDv")>;
-def : InstRW<[M1WriteNMISC1], (instregex "^CM(EQ|GE|GT|HI|HS|LE|LT)v")>;
-def : InstRW<[M1WriteNALU1], (instregex "^CMTSTv")>;
-def : InstRW<[M1WriteNALU1], (instregex "^(AND|BIC|EOR|MVNI|NOT|ORN|ORR)v")>;
-def : InstRW<[M1WriteNMISC1], (instregex "^[SU](MIN|MAX)v")>;
-def : InstRW<[M1WriteNMISC2], (instregex "^[SU](MIN|MAX)Pv")>;
-def : InstRW<[M1WriteNMISC3], (instregex "^[SU](MIN|MAX)Vv")>;
-def : InstRW<[M1WriteNMISC4], (instregex "^(MUL|SQR?DMULH)v")>;
-def : InstRW<[M1WriteNMISC4], (instregex "^ML[AS]v")>;
-def : InstRW<[M1WriteNMISC4], (instregex "^(S|U|SQD|SQRD)ML[AS][HL]v")>;
-def : InstRW<[M1WriteNMISC4], (instregex "^(S|U|SQD)MULLv")>;
-def : InstRW<[M1WriteNAL13], (instregex "^(S|SR|U|UR)SRAv")>;
-def : InstRW<[M1WriteNALU1], (instregex "^SHL[dv]")>;
-def : InstRW<[M1WriteNALU1], (instregex "^[SU]SH[LR][dv]")>;
-def : InstRW<[M1WriteNALU1], (instregex "^S[RS]I[dv]")>;
-def : InstRW<[M1WriteNAL13], (instregex "^(([SU]Q)?R)?SHRU?N[bhsv]")>;
-def : InstRW<[M1WriteNAL13], (instregex "^[SU]RSH[LR][dv]")>;
-def : InstRW<[M1WriteNAL13], (instregex "^[SU]QR?SHLU?[bdhsv]")>;
-
-// ASIMD FP instructions.
-def : InstRW<[M1WriteNALU1], (instregex "^F(ABS|NEG)v")>;
-def : InstRW<[M1WriteNMISC3], (instregex "^F(ABD|ADD|SUB)v")>;
-def : InstRW<[M1WriteNEONA], (instregex "^FADDP")>;
-def : InstRW<[M1WriteNMISC1], (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)v[^1]")>;
-def : InstRW<[M1WriteFCVT3], (instregex "^[FVSU]CVTX?[AFLMNPZ][SU]?(_Int)?v")>;
-def : InstRW<[M1WriteFVAR15], (instregex "FDIVv.f32")>;
-def : InstRW<[M1WriteFVAR23], (instregex "FDIVv2f64")>;
-def : InstRW<[M1WriteFVAR15], (instregex "FSQRTv.f32")>;
-def : InstRW<[M1WriteFVAR23], (instregex "FSQRTv2f64")>;
-def : InstRW<[M1WriteNMISC1], (instregex "^F(MAX|MIN)(NM)?V?v")>;
-def : InstRW<[M1WriteNMISC2], (instregex "^F(MAX|MIN)(NM)?Pv")>;
-def : InstRW<[M1WriteNEONJ], (instregex "^FMULX?v.i")>;
-def : InstRW<[M1WriteFMAC4], (instregex "^FMULX?v.f")>;
-def : InstRW<[M1WriteNEONK], (instregex "^FML[AS]v.i")>;
-def : InstRW<[M1WriteFMAC5], (instregex "^FML[AS]v.f")>;
-def : InstRW<[M1WriteFCVT3], (instregex "^FRINT[AIMNPXZ]v")>;
-
-// ASIMD miscellaneous instructions.
-def : InstRW<[M1WriteNALU1], (instregex "^RBITv")>;
-def : InstRW<[M1WriteNAL11], (instregex "^(BIF|BIT|BSL)v")>;
-def : InstRW<[M1WriteNEONB], (instregex "^DUPv.+gpr")>;
-def : InstRW<[M1WriteNALU1], (instregex "^DUPv.+lane")>;
-def : InstRW<[M1WriteNALU1], (instregex "^EXTv8")>;
-def : InstRW<[M1WriteNEONL], (instregex "^EXTv16")>;
-def : InstRW<[M1WriteNAL13], (instregex "^[SU]?Q?XTU?Nv")>;
-def : InstRW<[M1WriteNALU1], (instregex "^CPY")>;
-def : InstRW<[M1WriteNALU1], (instregex "^INSv.+lane")>;
-def : InstRW<[M1WriteNALU1], (instregex "^MOVI[Dv]")>;
-def : InstRW<[M1WriteNALU1], (instregex "^FMOVv")>;
-def : InstRW<[M1WriteFCVT4], (instregex "^[FU](RECP|RSQRT)Ev[248]")>;
-def : InstRW<[M1WriteFMAC5], (instregex "^F(RECP|RSQRT)Sv")>;
-def : InstRW<[M1WriteNALU1], (instregex "^REV(16|32|64)v")>;
-def : InstRW<[M1WriteNAL11], (instregex "^TB[LX]v8i8One")>;
-def : InstRW<[WriteSequence<[M1WriteNAL11], 2>],
- (instregex "^TB[LX]v8i8Two")>;
-def : InstRW<[WriteSequence<[M1WriteNAL11], 3>],
- (instregex "^TB[LX]v8i8Three")>;
-def : InstRW<[WriteSequence<[M1WriteNAL11], 4>],
- (instregex "^TB[LX]v8i8Four")>;
-def : InstRW<[M1WriteNAL12], (instregex "^TB[LX]v16i8One")>;
-def : InstRW<[WriteSequence<[M1WriteNAL12], 2>],
- (instregex "^TB[LX]v16i8Two")>;
-def : InstRW<[WriteSequence<[M1WriteNAL12], 3>],
- (instregex "^TB[LX]v16i8Three")>;
-def : InstRW<[WriteSequence<[M1WriteNAL12], 4>],
- (instregex "^TB[LX]v16i8Four")>;
-def : InstRW<[M1WriteNEOND], (instregex "^[SU]MOVv")>;
-def : InstRW<[M1WriteNEONC], (instregex "^INSv.+gpr")>;
-def : InstRW<[M1WriteNALU1], (instregex "^(TRN|UZP)[12](v8i8|v4i16|v2i32)")>;
-def : InstRW<[M1WriteNALU2], (instregex "^(TRN|UZP)[12](v16i8|v8i16|v4i32|v2i64)")>;
-def : InstRW<[M1WriteNALU1], (instregex "^ZIP[12]v")>;
-
-// ASIMD load instructions.
-def : InstRW<[M1WriteVLDD], (instregex "LD1i(8|16|32)$")>;
-def : InstRW<[M1WriteVLDD,
- WriteAdr], (instregex "LD1i(8|16|32)_POST$")>;
-def : InstRW<[M1WriteVLDE], (instregex "LD1i(64)$")>;
-def : InstRW<[M1WriteVLDE,
- WriteAdr], (instregex "LD1i(64)_POST$")>;
-
-def : InstRW<[WriteVLD], (instregex "LD1Rv(8b|4h|2s)$")>;
-def : InstRW<[WriteVLD,
- WriteAdr], (instregex "LD1Rv(8b|4h|2s)_POST$")>;
-def : InstRW<[WriteVLD], (instregex "LD1Rv(1d)$")>;
-def : InstRW<[WriteVLD,
- WriteAdr], (instregex "LD1Rv(1d)_POST$")>;
-def : InstRW<[WriteVLD], (instregex "LD1Rv(16b|8h|4s|2d)$")>;
-def : InstRW<[WriteVLD,
- WriteAdr], (instregex "LD1Rv(16b|8h|4s|2d)_POST$")>;
-
-def : InstRW<[WriteVLD], (instregex "LD1Onev(8b|4h|2s|1d)$")>;
-def : InstRW<[WriteVLD,
- WriteAdr], (instregex "LD1Onev(8b|4h|2s|1d)_POST$")>;
-def : InstRW<[WriteVLD], (instregex "LD1Onev(16b|8h|4s|2d)$")>;
-def : InstRW<[WriteVLD,
- WriteAdr], (instregex "LD1Onev(16b|8h|4s|2d)_POST$")>;
-def : InstRW<[M1WriteVLDA], (instregex "LD1Twov(8b|4h|2s|1d)$")>;
-def : InstRW<[M1WriteVLDA,
- WriteAdr], (instregex "LD1Twov(8b|4h|2s|1d)_POST$")>;
-def : InstRW<[M1WriteVLDA], (instregex "LD1Twov(16b|8h|4s|2d)$")>;
-def : InstRW<[M1WriteVLDA,
- WriteAdr], (instregex "LD1Twov(16b|8h|4s|2d)_POST$")>;
-def : InstRW<[M1WriteVLDB], (instregex "LD1Threev(8b|4h|2s|1d)$")>;
-def : InstRW<[M1WriteVLDB,
- WriteAdr], (instregex "LD1Threev(8b|4h|2s|1d)_POST$")>;
-def : InstRW<[M1WriteVLDB], (instregex "LD1Threev(16b|8h|4s|2d)$")>;
-def : InstRW<[M1WriteVLDB,
- WriteAdr], (instregex "LD1Threev(16b|8h|4s|2d)_POST$")>;
-def : InstRW<[M1WriteVLDC], (instregex "LD1Fourv(8b|4h|2s|1d)$")>;
-def : InstRW<[M1WriteVLDC,
- WriteAdr], (instregex "LD1Fourv(8b|4h|2s|1d)_POST$")>;
-def : InstRW<[M1WriteVLDC], (instregex "LD1Fourv(16b|8h|4s|2d)$")>;
-def : InstRW<[M1WriteVLDC,
- WriteAdr], (instregex "LD1Fourv(16b|8h|4s|2d)_POST$")>;
-
-def : InstRW<[M1WriteVLDG], (instregex "LD2i(8|16)$")>;
-def : InstRW<[M1WriteVLDG,
- WriteAdr], (instregex "LD2i(8|16)_POST$")>;
-def : InstRW<[M1WriteVLDG], (instregex "LD2i(32)$")>;
-def : InstRW<[M1WriteVLDG,
- WriteAdr], (instregex "LD2i(32)_POST$")>;
-def : InstRW<[M1WriteVLDH], (instregex "LD2i(64)$")>;
-def : InstRW<[M1WriteVLDH,
- WriteAdr], (instregex "LD2i(64)_POST$")>;
-
-def : InstRW<[M1WriteVLDA], (instregex "LD2Rv(8b|4h|2s)$")>;
-def : InstRW<[M1WriteVLDA,
- WriteAdr], (instregex "LD2Rv(8b|4h|2s)_POST$")>;
-def : InstRW<[M1WriteVLDA], (instregex "LD2Rv(1d)$")>;
-def : InstRW<[M1WriteVLDA,
- WriteAdr], (instregex "LD2Rv(1d)_POST$")>;
-def : InstRW<[M1WriteVLDA], (instregex "LD2Rv(16b|8h|4s|2d)$")>;
-def : InstRW<[M1WriteVLDA,
- WriteAdr], (instregex "LD2Rv(16b|8h|4s|2d)_POST$")>;
-
-def : InstRW<[M1WriteVLDF], (instregex "LD2Twov(8b|4h|2s)$")>;
-def : InstRW<[M1WriteVLDF,
- WriteAdr], (instregex "LD2Twov(8b|4h|2s)_POST$")>;
-def : InstRW<[M1WriteVLDF], (instregex "LD2Twov(16b|8h|4s)$")>;
-def : InstRW<[M1WriteVLDF,
- WriteAdr], (instregex "LD2Twov(16b|8h|4s)_POST$")>;
-def : InstRW<[M1WriteVLDF], (instregex "LD2Twov(2d)$")>;
-def : InstRW<[M1WriteVLDF,
- WriteAdr], (instregex "LD2Twov(2d)_POST$")>;
-
-def : InstRW<[M1WriteVLDJ], (instregex "LD3i(8|16)$")>;
-def : InstRW<[M1WriteVLDJ,
- WriteAdr], (instregex "LD3i(8|16)_POST$")>;
-def : InstRW<[M1WriteVLDJ], (instregex "LD3i(32)$")>;
-def : InstRW<[M1WriteVLDJ,
- WriteAdr], (instregex "LD3i(32)_POST$")>;
-def : InstRW<[M1WriteVLDL], (instregex "LD3i(64)$")>;
-def : InstRW<[M1WriteVLDL,
- WriteAdr], (instregex "LD3i(64)_POST$")>;
-
-def : InstRW<[M1WriteVLDB], (instregex "LD3Rv(8b|4h|2s)$")>;
-def : InstRW<[M1WriteVLDB,
- WriteAdr], (instregex "LD3Rv(8b|4h|2s)_POST$")>;
-def : InstRW<[M1WriteVLDB], (instregex "LD3Rv(1d)$")>;
-def : InstRW<[M1WriteVLDB,
- WriteAdr], (instregex "LD3Rv(1d)_POST$")>;
-def : InstRW<[M1WriteVLDB], (instregex "LD3Rv(16b|8h|4s)$")>;
-def : InstRW<[M1WriteVLDB,
- WriteAdr], (instregex "LD3Rv(16b|8h|4s)_POST$")>;
-def : InstRW<[M1WriteVLDB], (instregex "LD3Rv(2d)$")>;
-def : InstRW<[M1WriteVLDB,
- WriteAdr], (instregex "LD3Rv(2d)_POST$")>;
-
-def : InstRW<[M1WriteVLDI], (instregex "LD3Threev(8b|4h|2s)$")>;
-def : InstRW<[M1WriteVLDI,
- WriteAdr], (instregex "LD3Threev(8b|4h|2s)_POST$")>;
-def : InstRW<[M1WriteVLDI], (instregex "LD3Threev(16b|8h|4s)$")>;
-def : InstRW<[M1WriteVLDI,
- WriteAdr], (instregex "LD3Threev(16b|8h|4s)_POST$")>;
-def : InstRW<[M1WriteVLDI], (instregex "LD3Threev(2d)$")>;
-def : InstRW<[M1WriteVLDI,
- WriteAdr], (instregex "LD3Threev(2d)_POST$")>;
-
-def : InstRW<[M1WriteVLDK], (instregex "LD4i(8|16)$")>;
-def : InstRW<[M1WriteVLDK,
- WriteAdr], (instregex "LD4i(8|16)_POST$")>;
-def : InstRW<[M1WriteVLDK], (instregex "LD4i(32)$")>;
-def : InstRW<[M1WriteVLDK,
- WriteAdr], (instregex "LD4i(32)_POST$")>;
-def : InstRW<[M1WriteVLDM], (instregex "LD4i(64)$")>;
-def : InstRW<[M1WriteVLDM,
- WriteAdr], (instregex "LD4i(64)_POST$")>;
-
-def : InstRW<[M1WriteVLDC], (instregex "LD4Rv(8b|4h|2s)$")>;
-def : InstRW<[M1WriteVLDC,
- WriteAdr], (instregex "LD4Rv(8b|4h|2s)_POST$")>;
-def : InstRW<[M1WriteVLDC], (instregex "LD4Rv(1d)$")>;
-def : InstRW<[M1WriteVLDC,
- WriteAdr], (instregex "LD4Rv(1d)_POST$")>;
-def : InstRW<[M1WriteVLDC], (instregex "LD4Rv(16b|8h|4s)$")>;
-def : InstRW<[M1WriteVLDC,
- WriteAdr], (instregex "LD4Rv(16b|8h|4s)_POST$")>;
-def : InstRW<[M1WriteVLDC], (instregex "LD4Rv(2d)$")>;
-def : InstRW<[M1WriteVLDC,
- WriteAdr], (instregex "LD4Rv(2d)_POST$")>;
-
-def : InstRW<[M1WriteVLDN], (instregex "LD4Fourv(8b|4h|2s)$")>;
-def : InstRW<[M1WriteVLDN,
- WriteAdr], (instregex "LD4Fourv(8b|4h|2s)_POST$")>;
-def : InstRW<[M1WriteVLDN], (instregex "LD4Fourv(16b|8h|4s)$")>;
-def : InstRW<[M1WriteVLDN,
- WriteAdr], (instregex "LD4Fourv(16b|8h|4s)_POST$")>;
-def : InstRW<[M1WriteVLDN], (instregex "LD4Fourv(2d)$")>;
-def : InstRW<[M1WriteVLDN,
- WriteAdr], (instregex "LD4Fourv(2d)_POST$")>;
-
-// ASIMD store instructions.
-def : InstRW<[M1WriteVSTD], (instregex "ST1i(8|16|32)$")>;
-def : InstRW<[M1WriteVSTD,
- WriteAdr], (instregex "ST1i(8|16|32)_POST$")>;
-def : InstRW<[M1WriteVSTD], (instregex "ST1i(64)$")>;
-def : InstRW<[M1WriteVSTD,
- WriteAdr], (instregex "ST1i(64)_POST$")>;
-
-def : InstRW<[WriteVST], (instregex "ST1Onev(8b|4h|2s|1d)$")>;
-def : InstRW<[WriteVST,
- WriteAdr], (instregex "ST1Onev(8b|4h|2s|1d)_POST$")>;
-def : InstRW<[WriteVST], (instregex "ST1Onev(16b|8h|4s|2d)$")>;
-def : InstRW<[WriteVST,
- WriteAdr], (instregex "ST1Onev(16b|8h|4s|2d)_POST$")>;
-def : InstRW<[M1WriteVSTA], (instregex "ST1Twov(8b|4h|2s|1d)$")>;
-def : InstRW<[M1WriteVSTA,
- WriteAdr], (instregex "ST1Twov(8b|4h|2s|1d)_POST$")>;
-def : InstRW<[M1WriteVSTA], (instregex "ST1Twov(16b|8h|4s|2d)$")>;
-def : InstRW<[M1WriteVSTA,
- WriteAdr], (instregex "ST1Twov(16b|8h|4s|2d)_POST$")>;
-def : InstRW<[M1WriteVSTB], (instregex "ST1Threev(8b|4h|2s|1d)$")>;
-def : InstRW<[M1WriteVSTB,
- WriteAdr], (instregex "ST1Threev(8b|4h|2s|1d)_POST$")>;
-def : InstRW<[M1WriteVSTB], (instregex "ST1Threev(16b|8h|4s|2d)$")>;
-def : InstRW<[M1WriteVSTB,
- WriteAdr], (instregex "ST1Threev(16b|8h|4s|2d)_POST$")>;
-def : InstRW<[M1WriteVSTC], (instregex "ST1Fourv(8b|4h|2s|1d)$")>;
-def : InstRW<[M1WriteVSTC,
- WriteAdr], (instregex "ST1Fourv(8b|4h|2s|1d)_POST$")>;
-def : InstRW<[M1WriteVSTC], (instregex "ST1Fourv(16b|8h|4s|2d)$")>;
-def : InstRW<[M1WriteVSTC,
- WriteAdr], (instregex "ST1Fourv(16b|8h|4s|2d)_POST$")>;
-
-def : InstRW<[M1WriteVSTD], (instregex "ST2i(8|16|32)$")>;
-def : InstRW<[M1WriteVSTD,
- WriteAdr], (instregex "ST2i(8|16|32)_POST$")>;
-def : InstRW<[M1WriteVSTD], (instregex "ST2i(64)$")>;
-def : InstRW<[M1WriteVSTD,
- WriteAdr], (instregex "ST2i(64)_POST$")>;
-
-def : InstRW<[M1WriteVSTD], (instregex "ST2Twov(8b|4h|2s)$")>;
-def : InstRW<[M1WriteVSTD,
- WriteAdr], (instregex "ST2Twov(8b|4h|2s)_POST$")>;
-def : InstRW<[M1WriteVSTE], (instregex "ST2Twov(16b|8h|4s)$")>;
-def : InstRW<[M1WriteVSTE,
- WriteAdr], (instregex "ST2Twov(16b|8h|4s)_POST$")>;
-def : InstRW<[M1WriteVSTE], (instregex "ST2Twov(2d)$")>;
-def : InstRW<[M1WriteVSTE,
- WriteAdr], (instregex "ST2Twov(2d)_POST$")>;
-
-def : InstRW<[M1WriteVSTH], (instregex "ST3i(8|16)$")>;
-def : InstRW<[M1WriteVSTH,
- WriteAdr], (instregex "ST3i(8|16)_POST$")>;
-def : InstRW<[M1WriteVSTH], (instregex "ST3i(32)$")>;
-def : InstRW<[M1WriteVSTH,
- WriteAdr], (instregex "ST3i(32)_POST$")>;
-def : InstRW<[M1WriteVSTF], (instregex "ST3i(64)$")>;
-def : InstRW<[M1WriteVSTF,
- WriteAdr], (instregex "ST3i(64)_POST$")>;
-
-def : InstRW<[M1WriteVSTF], (instregex "ST3Threev(8b|4h|2s)$")>;
-def : InstRW<[M1WriteVSTF,
- WriteAdr], (instregex "ST3Threev(8b|4h|2s)_POST$")>;
-def : InstRW<[M1WriteVSTG], (instregex "ST3Threev(16b|8h|4s)$")>;
-def : InstRW<[M1WriteVSTG,
- WriteAdr], (instregex "ST3Threev(16b|8h|4s)_POST$")>;
-def : InstRW<[M1WriteVSTG], (instregex "ST3Threev(2d)$")>;
-def : InstRW<[M1WriteVSTG,
- WriteAdr], (instregex "ST3Threev(2d)_POST$")>;
-
-def : InstRW<[M1WriteVSTH], (instregex "ST4i(8|16)$")>;
-def : InstRW<[M1WriteVSTH,
- WriteAdr], (instregex "ST4i(8|16)_POST$")>;
-def : InstRW<[M1WriteVSTH], (instregex "ST4i(32)$")>;
-def : InstRW<[M1WriteVSTH,
- WriteAdr], (instregex "ST4i(32)_POST$")>;
-def : InstRW<[M1WriteVSTF], (instregex "ST4i(64)$")>;
-def : InstRW<[M1WriteVSTF,
- WriteAdr], (instregex "ST4i(64)_POST$")>;
-
-def : InstRW<[M1WriteVSTF], (instregex "ST4Fourv(8b|4h|2s)$")>;
-def : InstRW<[M1WriteVSTF,
- WriteAdr], (instregex "ST4Fourv(8b|4h|2s)_POST$")>;
-def : InstRW<[M1WriteVSTI], (instregex "ST4Fourv(16b|8h|4s)$")>;
-def : InstRW<[M1WriteVSTI,
- WriteAdr], (instregex "ST4Fourv(16b|8h|4s)_POST$")>;
-def : InstRW<[M1WriteVSTI], (instregex "ST4Fourv(2d)$")>;
-def : InstRW<[M1WriteVSTI,
- WriteAdr], (instregex "ST4Fourv(2d)_POST$")>;
-
-// Cryptography instructions.
-def : InstRW<[M1WriteAES], (instregex "^AES[DE]")>;
-def : InstRW<[M1WriteAES, M1ReadAES], (instregex "^AESI?MC")>;
-
-def : InstRW<[M1WriteNCRYPT1], (instregex "^PMUL")>;
-def : InstRW<[M1WriteNCRYPT1], (instregex "^SHA1(H|SU)")>;
-def : InstRW<[M1WriteNCRYPT5], (instregex "^SHA1[CMP]")>;
-def : InstRW<[M1WriteNCRYPT1], (instregex "^SHA256SU0")>;
-def : InstRW<[M1WriteNCRYPT5], (instregex "^SHA256(H|SU1)")>;
-
-// CRC instructions.
-def : InstRW<[M1WriteC2], (instregex "^CRC32")>;
-
-} // SchedModel = ExynosM1Model
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedExynosM3.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedExynosM3.td
index c9d29d75d9db..d1734c455b2b 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedExynosM3.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedExynosM3.td
@@ -130,8 +130,10 @@ def M3WriteAU : SchedWriteVariant<[SchedVar<IsCopyIdiomPred, [M3WriteZ0]>,
SchedVar<ExynosLogicPred, [M3WriteA1]>,
SchedVar<NoSchedPred, [M3WriteAA]>]>;
def M3WriteAV : SchedWriteVariant<[SchedVar<IsCopyIdiomPred, [M3WriteZ0]>,
+ SchedVar<ExynosArithPred, [M3WriteA1]>,
SchedVar<NoSchedPred, [M3WriteAA]>]>;
def M3WriteAW : SchedWriteVariant<[SchedVar<IsZeroIdiomPred, [M3WriteZ0]>,
+ SchedVar<ExynosLogicPred, [M3WriteA1]>,
SchedVar<NoSchedPred, [M3WriteAA]>]>;
def M3WriteAX : SchedWriteVariant<[SchedVar<ExynosArithPred, [M3WriteA1]>,
SchedVar<ExynosLogicPred, [M3WriteA1]>,
@@ -165,6 +167,8 @@ def M3WriteLH : SchedWriteRes<[]> { let Latency = 5;
let NumMicroOps = 0; }
def M3WriteLX : SchedWriteVariant<[SchedVar<ExynosScaledIdxPred, [M3WriteL5]>,
SchedVar<NoSchedPred, [M3WriteL4]>]>;
+def M3WriteLY : SchedWriteVariant<[SchedVar<ExynosScaledIdxPred, [M3WriteLE]>,
+ SchedVar<NoSchedPred, [M3WriteL5]>]>;
def M3WriteS1 : SchedWriteRes<[M3UnitS]> { let Latency = 1; }
def M3WriteSA : SchedWriteRes<[M3UnitA,
@@ -174,6 +178,12 @@ def M3WriteSA : SchedWriteRes<[M3UnitA,
def M3WriteSB : SchedWriteRes<[M3UnitA,
M3UnitS]> { let Latency = 2;
let NumMicroOps = 2; }
+def M3WriteSC : SchedWriteRes<[M3UnitA,
+ M3UnitS,
+ M3UnitFST]> { let Latency = 1;
+ let NumMicroOps = 2; }
+def M3WriteSY : SchedWriteVariant<[SchedVar<ExynosScaledIdxPred, [M3WriteSA]>,
+ SchedVar<NoSchedPred, [WriteVST]>]>;
def M3ReadAdrBase : SchedReadVariant<[SchedVar<ExynosScaledIdxPred, [ReadDefault]>,
SchedVar<NoSchedPred, [ReadDefault]>]>;
@@ -557,7 +567,7 @@ def : InstRW<[M3WriteLE,
ReadAdrBase], (instregex "^LDR[BDHS]roW")>;
def : InstRW<[WriteVLD,
ReadAdrBase], (instregex "^LDR[BDHS]roX")>;
-def : InstRW<[M3WriteLE,
+def : InstRW<[M3WriteLY,
ReadAdrBase], (instregex "^LDRQro[WX]")>;
def : InstRW<[WriteVLD,
M3WriteLH], (instregex "^LDN?P[DS]i")>;
@@ -577,14 +587,16 @@ def : InstRW<[WriteVST,
def : InstRW<[WriteVST], (instregex "^STR[BDHSQ]ui")>;
def : InstRW<[M3WriteSA,
ReadAdrBase], (instregex "^STR[BDHS]roW")>;
+def : InstRW<[M3WriteSA,
+ ReadAdrBase], (instregex "^STRQroW")>;
def : InstRW<[WriteVST,
ReadAdrBase], (instregex "^STR[BDHS]roX")>;
-def : InstRW<[M3WriteSA,
- ReadAdrBase], (instregex "^STRQro[WX]")>;
+def : InstRW<[M3WriteSY,
+ ReadAdrBase], (instregex "^STRQroX")>;
def : InstRW<[WriteVST], (instregex "^STN?P[DSQ]i")>;
def : InstRW<[WriteVST,
WriteAdr], (instregex "^STP[DS](post|pre)")>;
-def : InstRW<[M3WriteSA,
+def : InstRW<[M3WriteSC,
WriteAdr], (instregex "^STPQ(post|pre)")>;
// ASIMD instructions.
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedExynosM4.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedExynosM4.td
index c8bf05f16131..d2284f9fa0b5 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedExynosM4.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedExynosM4.td
@@ -156,8 +156,10 @@ def M4WriteAU : SchedWriteVariant<[SchedVar<IsCopyIdiomPred, [M4WriteZ0]>,
SchedVar<ExynosArithPred, [M4WriteA1]>,
SchedVar<ExynosLogicExPred, [M4WriteA1]>,
SchedVar<NoSchedPred, [M4WriteAA]>]>;
-def M4WriteAV : SchedWriteVariant<[SchedVar<ExynosResetPred, [M4WriteZ0]>,
- SchedVar<NoSchedPred, [M4WriteAA]>]>;
+def M4WriteAV : SchedWriteVariant<[SchedVar<ExynosResetPred, [M4WriteZ0]>,
+ SchedVar<ExynosArithPred, [M4WriteA1]>,
+ SchedVar<ExynosLogicExPred, [M4WriteA1]>,
+ SchedVar<NoSchedPred, [M4WriteAA]>]>;
def M4WriteAX : SchedWriteVariant<[SchedVar<ExynosArithPred, [M4WriteA1]>,
SchedVar<ExynosLogicExPred, [M4WriteA1]>,
SchedVar<NoSchedPred, [M4WriteAA]>]>;
@@ -173,8 +175,10 @@ def M4WriteC3 : SchedWriteRes<[M4UnitC]> { let Latency = 3; }
def M4WriteCA : SchedWriteRes<[M4UnitC]> { let Latency = 4;
let ResourceCycles = [2]; }
-def M4WriteD12 : SchedWriteRes<[M4UnitD]> { let Latency = 12; }
-def M4WriteD21 : SchedWriteRes<[M4UnitD]> { let Latency = 21; }
+def M4WriteD12 : SchedWriteRes<[M4UnitD]> { let Latency = 12;
+ let ResourceCycles = [12]; }
+def M4WriteD21 : SchedWriteRes<[M4UnitD]> { let Latency = 21;
+ let ResourceCycles = [21]; }
def M4WriteE2 : SchedWriteRes<[M4UnitE]> { let Latency = 2; }
@@ -198,8 +202,10 @@ def M4WriteLE : SchedWriteRes<[M4UnitA,
let NumMicroOps = 2; }
def M4WriteLH : SchedWriteRes<[]> { let Latency = 5;
let NumMicroOps = 0; }
-def M4WriteLX : SchedWriteVariant<[SchedVar<ScaledIdxPred, [M4WriteL5]>,
- SchedVar<NoSchedPred, [M4WriteL4]>]>;
+def M4WriteLX : SchedWriteVariant<[SchedVar<ExynosScaledIdxPred, [M4WriteL5]>,
+ SchedVar<NoSchedPred, [M4WriteL4]>]>;
+def M4WriteLY : SchedWriteVariant<[SchedVar<ExynosScaledIdxPred, [M4WriteLE]>,
+ SchedVar<NoSchedPred, [M4WriteL5]>]>;
def M4WriteS1 : SchedWriteRes<[M4UnitS]> { let Latency = 1; }
def M4WriteSA : SchedWriteRes<[M4UnitS0]> { let Latency = 3; }
@@ -458,6 +464,8 @@ def M4WriteVSTI : SchedWriteRes<[M4UnitNSHF,
let ResourceCycles = [1, 1, 1, 1, 2, 1, 2, 1, 2, 1, 2, 1]; }
def M4WriteVSTJ : SchedWriteRes<[M4UnitA,
M4UnitS,
+ M4UnitFST,
+ M4UnitS,
M4UnitFST]> { let Latency = 1;
let NumMicroOps = 2; }
def M4WriteVSTK : SchedWriteRes<[M4UnitA,
@@ -472,6 +480,8 @@ def M4WriteVSTL : SchedWriteRes<[M4UnitNSHF,
M4UnitFST]> { let Latency = 4;
let NumMicroOps = 4;
let ResourceCycles = [1, 1, 2, 1, 2, 1]; }
+def M4WriteVSTY : SchedWriteVariant<[SchedVar<ExynosScaledIdxPred, [M4WriteVSTK]>,
+ SchedVar<NoSchedPred, [WriteVST]>]>;
// Special cases.
def M4WriteCOPY : SchedWriteVariant<[SchedVar<ExynosFPPred, [M4WriteNALU1]>,
@@ -537,7 +547,7 @@ def : SchedAlias<WriteFMul, M4WriteFMAC3>;
// FP miscellaneous instructions.
def : SchedAlias<WriteFCvt, M4WriteFCVT2>;
def : SchedAlias<WriteFImm, M4WriteNALU1>;
-def : SchedAlias<WriteFCopy, M4WriteCOPY>;
+def : SchedAlias<WriteFCopy, M4WriteNALU1>;
// FP load instructions.
def : SchedAlias<WriteVLD, M4WriteL5>;
@@ -672,7 +682,7 @@ def : InstRW<[M4WriteLE,
ReadAdrBase], (instregex "^LDR[BHSDQ]roW")>;
def : InstRW<[WriteVLD,
ReadAdrBase], (instregex "^LDR[BHSD]roX")>;
-def : InstRW<[M4WriteLE,
+def : InstRW<[M4WriteLY,
ReadAdrBase], (instrs LDRQroX)>;
def : InstRW<[WriteVLD,
M4WriteLH], (instregex "^LDN?P[SD]i")>;
@@ -696,16 +706,16 @@ def : InstRW<[WriteVST], (instregex "^STUR[BHSDQ]i")>;
def : InstRW<[WriteVST,
WriteAdr], (instregex "^STR[BHSDQ](post|pre)")>;
def : InstRW<[WriteVST], (instregex "^STR[BHSDQ]ui")>;
-def : InstRW<[M4WriteVSTJ,
+def : InstRW<[M4WriteVSTK,
ReadAdrBase], (instregex "^STR[BHSD]roW")>;
def : InstRW<[M4WriteVSTK,
ReadAdrBase], (instrs STRQroW)>;
def : InstRW<[WriteVST,
ReadAdrBase], (instregex "^STR[BHSD]roX")>;
-def : InstRW<[M4WriteVSTK,
+def : InstRW<[M4WriteVSTY,
ReadAdrBase], (instrs STRQroX)>;
def : InstRW<[WriteVST], (instregex "^STN?P[SD]i")>;
-def : InstRW<[M4WriteVSTA], (instregex "^STN?PQi")>;
+def : InstRW<[M4WriteVSTJ], (instregex "^STN?PQi")>;
def : InstRW<[WriteVST,
WriteAdr], (instregex "^STP[SD](post|pre)")>;
def : InstRW<[M4WriteVSTJ,
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedExynosM5.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedExynosM5.td
new file mode 100644
index 000000000000..df7402591e7b
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedExynosM5.td
@@ -0,0 +1,1012 @@
+//=- AArch64SchedExynosM5.td - Samsung Exynos M5 Sched Defs --*- tablegen -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the machine model for the Samsung Exynos M5 to support
+// instruction scheduling and other instruction cost heuristics.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// The Exynos-M5 is an advanced superscalar microprocessor with a 6-wide
+// in-order stage for decode and dispatch and a wider issue stage.
+// The execution units and loads and stores are out-of-order.
+
+def ExynosM5Model : SchedMachineModel {
+ let IssueWidth = 6; // Up to 6 uops per cycle.
+ let MicroOpBufferSize = 228; // ROB size.
+ let LoopMicroOpBufferSize = 60; // Based on the instruction queue size.
+ let LoadLatency = 4; // Optimistic load cases.
+ let MispredictPenalty = 15; // Minimum branch misprediction penalty.
+ let CompleteModel = 1; // Use the default model otherwise.
+
+ list<Predicate> UnsupportedFeatures = SVEUnsupported.F;
+}
+
+//===----------------------------------------------------------------------===//
+// Define each kind of processor resource and number available on the Exynos-M5.
+
+let SchedModel = ExynosM5Model in {
+
+def M5UnitA : ProcResource<2>; // Simple integer
+def M5UnitC : ProcResource<2>; // Simple and complex integer
+let Super = M5UnitC, BufferSize = 1 in
+def M5UnitD : ProcResource<1>; // Integer division (inside C0, serialized)
+def M5UnitE : ProcResource<2>; // Simple 32-bit integer
+let Super = M5UnitC in
+def M5UnitF : ProcResource<2>; // CRC (inside C)
+def M5UnitB : ProcResource<1>; // Branch
+def M5UnitL0 : ProcResource<1>; // Load
+def M5UnitS0 : ProcResource<1>; // Store
+def M5PipeLS : ProcResource<1>; // Load/Store
+let Super = M5PipeLS in {
+ def M5UnitL1 : ProcResource<1>;
+ def M5UnitS1 : ProcResource<1>;
+}
+def M5PipeF0 : ProcResource<1>; // FP #0
+let Super = M5PipeF0 in {
+ def M5UnitFMAC0 : ProcResource<1>; // FP multiplication
+ def M5UnitFADD0 : ProcResource<1>; // Simple FP
+ def M5UnitNALU0 : ProcResource<1>; // Simple vector
+ def M5UnitNDOT0 : ProcResource<1>; // Dot product vector
+ def M5UnitNHAD : ProcResource<1>; // Horizontal vector
+ def M5UnitNMSC : ProcResource<1>; // FP and vector miscellanea
+ def M5UnitNMUL0 : ProcResource<1>; // Vector multiplication
+ def M5UnitNSHT0 : ProcResource<1>; // Vector shifting
+ def M5UnitNSHF0 : ProcResource<1>; // Vector shuffling
+ def M5UnitNCRY0 : ProcResource<1>; // Cryptographic
+}
+def M5PipeF1 : ProcResource<1>; // FP #1
+let Super = M5PipeF1 in {
+ def M5UnitFMAC1 : ProcResource<1>; // FP multiplication
+ def M5UnitFADD1 : ProcResource<1>; // Simple FP
+ def M5UnitFCVT0 : ProcResource<1>; // FP conversion
+ def M5UnitFDIV0 : ProcResource<2>; // FP division (serialized)
+ def M5UnitFSQR0 : ProcResource<2>; // FP square root (serialized)
+ def M5UnitFST0 : ProcResource<1>; // FP store
+ def M5UnitNALU1 : ProcResource<1>; // Simple vector
+ def M5UnitNDOT1 : ProcResource<1>; // Dot product vector
+ def M5UnitNSHT1 : ProcResource<1>; // Vector shifting
+ def M5UnitNSHF1 : ProcResource<1>; // Vector shuffling
+}
+def M5PipeF2 : ProcResource<1>; // FP #2
+let Super = M5PipeF2 in {
+ def M5UnitFMAC2 : ProcResource<1>; // FP multiplication
+ def M5UnitFADD2 : ProcResource<1>; // Simple FP
+ def M5UnitFCVT1 : ProcResource<1>; // FP conversion
+ def M5UnitFDIV1 : ProcResource<2>; // FP division (serialized)
+ def M5UnitFSQR1 : ProcResource<2>; // FP square root (serialized)
+ def M5UnitFST1 : ProcResource<1>; // FP store
+ def M5UnitNALU2 : ProcResource<1>; // Simple vector
+ def M5UnitNDOT2 : ProcResource<1>; // Dot product vector
+ def M5UnitNMUL1 : ProcResource<1>; // Vector multiplication
+ def M5UnitNSHT2 : ProcResource<1>; // Vector shifting
+ def M5UnitNCRY1 : ProcResource<1>; // Cryptographic
+}
+
+def M5UnitAX : ProcResGroup<[M5UnitA,
+ M5UnitC]>;
+def M5UnitAW : ProcResGroup<[M5UnitA,
+ M5UnitC,
+ M5UnitE]>;
+def M5UnitL : ProcResGroup<[M5UnitL0,
+ M5UnitL1]>;
+def M5UnitS : ProcResGroup<[M5UnitS0,
+ M5UnitS1]>;
+def M5UnitFMAC : ProcResGroup<[M5UnitFMAC0,
+ M5UnitFMAC1,
+ M5UnitFMAC2]>;
+def M5UnitFADD : ProcResGroup<[M5UnitFADD0,
+ M5UnitFADD1,
+ M5UnitFADD2]>;
+def M5UnitFCVT : ProcResGroup<[M5UnitFCVT0,
+ M5UnitFCVT1]>;
+def M5UnitFDIV : ProcResGroup<[M5UnitFDIV0,
+ M5UnitFDIV1]>;
+def M5UnitFSQR : ProcResGroup<[M5UnitFSQR0,
+ M5UnitFSQR1]>;
+def M5UnitFST : ProcResGroup<[M5UnitFST0,
+ M5UnitFST1]>;
+def M5UnitNALU : ProcResGroup<[M5UnitNALU0,
+ M5UnitNALU1,
+ M5UnitNALU2]>;
+def M5UnitNDOT : ProcResGroup<[M5UnitNDOT0,
+ M5UnitNDOT1,
+ M5UnitNDOT2]>;
+def M5UnitNMUL : ProcResGroup<[M5UnitNMUL0,
+ M5UnitNMUL1]>;
+def M5UnitNSHT : ProcResGroup<[M5UnitNSHT0,
+ M5UnitNSHT1,
+ M5UnitNSHT2]>;
+def M5UnitNSHF : ProcResGroup<[M5UnitNSHF0,
+ M5UnitNSHF1]>;
+def M5UnitNCRY : ProcResGroup<[M5UnitNCRY0,
+ M5UnitNCRY1]>;
+
+//===----------------------------------------------------------------------===//
+// Resources details.
+
+def M5WriteZ0 : SchedWriteRes<[]> { let Latency = 0; }
+def M5WriteZ1 : SchedWriteRes<[]> { let Latency = 1;
+ let NumMicroOps = 0; }
+def M5WriteZ4 : SchedWriteRes<[]> { let Latency = 4;
+ let NumMicroOps = 0; }
+
+def M5WriteA1W : SchedWriteRes<[M5UnitAW]> { let Latency = 1; }
+def M5WriteA1X : SchedWriteRes<[M5UnitAX]> { let Latency = 1; }
+def M5WriteAAW : SchedWriteRes<[M5UnitAW]> { let Latency = 2;
+ let ResourceCycles = [2]; }
+def M5WriteAAX : SchedWriteRes<[M5UnitAX]> { let Latency = 2;
+ let ResourceCycles = [2]; }
+def M5WriteAB : SchedWriteRes<[M5UnitAX,
+ M5UnitC,
+ M5UnitE]> { let Latency = 2;
+ let NumMicroOps = 2; }
+def M5WriteAC : SchedWriteRes<[M5UnitAX,
+ M5UnitAX,
+ M5UnitC]> { let Latency = 3;
+ let NumMicroOps = 3; }
+def M5WriteAD : SchedWriteRes<[M5UnitAW,
+ M5UnitC]> { let Latency = 2;
+ let NumMicroOps = 2; }
+def M5WriteAFW : SchedWriteRes<[M5UnitAW]> { let Latency = 2;
+ let NumMicroOps = 2; }
+def M5WriteAFX : SchedWriteRes<[M5UnitAX]> { let Latency = 2;
+ let NumMicroOps = 2; }
+def M5WriteAUW : SchedWriteVariant<[SchedVar<IsCopyIdiomPred, [M5WriteZ0]>,
+ SchedVar<ExynosArithPred, [M5WriteA1W]>,
+ SchedVar<ExynosLogicExPred, [M5WriteA1W]>,
+ SchedVar<NoSchedPred, [M5WriteAAW]>]>;
+def M5WriteAUX : SchedWriteVariant<[SchedVar<IsCopyIdiomPred, [M5WriteZ0]>,
+ SchedVar<ExynosArithPred, [M5WriteA1X]>,
+ SchedVar<ExynosLogicExPred, [M5WriteA1X]>,
+ SchedVar<NoSchedPred, [M5WriteAAX]>]>;
+def M5WriteAVW : SchedWriteVariant<[SchedVar<ExynosResetPred, [M5WriteZ0]>,
+ SchedVar<ExynosArithPred, [M5WriteA1W]>,
+ SchedVar<ExynosLogicExPred, [M5WriteA1W]>,
+ SchedVar<NoSchedPred, [M5WriteAAW]>]>;
+def M5WriteAVX : SchedWriteVariant<[SchedVar<ExynosResetPred, [M5WriteZ0]>,
+ SchedVar<ExynosArithPred, [M5WriteA1X]>,
+ SchedVar<ExynosLogicExPred, [M5WriteA1X]>,
+ SchedVar<NoSchedPred, [M5WriteAAX]>]>;
+def M5WriteAXW : SchedWriteVariant<[SchedVar<ExynosArithPred, [M5WriteA1W]>,
+ SchedVar<ExynosLogicExPred, [M5WriteA1W]>,
+ SchedVar<NoSchedPred, [M5WriteAAW]>]>;
+def M5WriteAXX : SchedWriteVariant<[SchedVar<ExynosArithPred, [M5WriteA1X]>,
+ SchedVar<ExynosLogicExPred, [M5WriteA1X]>,
+ SchedVar<NoSchedPred, [M5WriteAAX]>]>;
+def M5WriteAYW : SchedWriteVariant<[SchedVar<ExynosRotateRightImmPred, [M5WriteA1W]>,
+ SchedVar<NoSchedPred, [M5WriteAFW]>]>;
+def M5WriteAYX : SchedWriteVariant<[SchedVar<ExynosRotateRightImmPred, [M5WriteA1X]>,
+ SchedVar<NoSchedPred, [M5WriteAFX]>]>;
+
+def M5WriteB1 : SchedWriteRes<[M5UnitB]> { let Latency = 1; }
+def M5WriteBX : SchedWriteVariant<[SchedVar<ExynosBranchLinkLRPred, [M5WriteAC]>,
+ SchedVar<NoSchedPred, [M5WriteAB]>]>;
+
+def M5WriteC1 : SchedWriteRes<[M5UnitC]> { let Latency = 1; }
+def M5WriteC2 : SchedWriteRes<[M5UnitC]> { let Latency = 2; }
+def M5WriteCA : SchedWriteRes<[M5UnitC]> { let Latency = 3;
+ let ResourceCycles = [2]; }
+
+def M5WriteD10 : SchedWriteRes<[M5UnitD]> { let Latency = 10;
+ let ResourceCycles = [10]; }
+def M5WriteD16 : SchedWriteRes<[M5UnitD]> { let Latency = 16;
+ let ResourceCycles = [16]; }
+
+def M5WriteF2 : SchedWriteRes<[M5UnitF]> { let Latency = 2; }
+
+def M5WriteL4 : SchedWriteRes<[M5UnitL]> { let Latency = 4; }
+def M5WriteL5 : SchedWriteRes<[M5UnitL]> { let Latency = 5; }
+def M5WriteL6 : SchedWriteRes<[M5UnitL]> { let Latency = 6; }
+def M5WriteLA : SchedWriteRes<[M5UnitL,
+ M5UnitL]> { let Latency = 6;
+ let NumMicroOps = 1; }
+def M5WriteLB : SchedWriteRes<[M5UnitAX,
+ M5UnitL]> { let Latency = 6;
+ let NumMicroOps = 2; }
+def M5WriteLC : SchedWriteRes<[M5UnitAX,
+ M5UnitL,
+ M5UnitL]> { let Latency = 6;
+ let NumMicroOps = 2; }
+def M5WriteLD : SchedWriteRes<[M5UnitAX,
+ M5UnitL]> { let Latency = 4;
+ let NumMicroOps = 2; }
+def M5WriteLE : SchedWriteRes<[M5UnitAX,
+ M5UnitL]> { let Latency = 7;
+ let NumMicroOps = 2; }
+def M5WriteLFW : SchedWriteRes<[M5UnitAW,
+ M5UnitAW,
+ M5UnitAW,
+ M5UnitAW,
+ M5UnitL]> { let Latency = 15;
+ let NumMicroOps = 6;
+ let ResourceCycles = [1, 1, 1, 1, 15]; }
+def M5WriteLFX : SchedWriteRes<[M5UnitAX,
+ M5UnitAX,
+ M5UnitAX,
+ M5UnitAX,
+ M5UnitL]> { let Latency = 15;
+ let NumMicroOps = 6;
+ let ResourceCycles = [1, 1, 1, 1, 15]; }
+def M5WriteLGW : SchedWriteRes<[M5UnitAW,
+ M5UnitL]> { let Latency = 13;
+ let NumMicroOps = 1;
+ let ResourceCycles = [1, 13]; }
+def M5WriteLGX : SchedWriteRes<[M5UnitAX,
+ M5UnitL]> { let Latency = 13;
+ let NumMicroOps = 1;
+ let ResourceCycles = [1, 13]; }
+def M5WriteLH : SchedWriteRes<[]> { let Latency = 6;
+ let NumMicroOps = 0; }
+def M5WriteLX : SchedWriteVariant<[SchedVar<ExynosScaledIdxPred, [M5WriteL5]>,
+ SchedVar<NoSchedPred, [M5WriteL4]>]>;
+def M5WriteLY : SchedWriteVariant<[SchedVar<ExynosScaledIdxPred, [M5WriteLE]>,
+ SchedVar<NoSchedPred, [M5WriteL6]>]>;
+
+def M5WriteS1 : SchedWriteRes<[M5UnitS]> { let Latency = 1; }
+def M5WriteSA : SchedWriteRes<[M5UnitS0]> { let Latency = 4; }
+def M5WriteSB : SchedWriteRes<[M5UnitAX,
+ M5UnitS]> { let Latency = 2;
+ let NumMicroOps = 1; }
+def M5WriteSX : SchedWriteVariant<[SchedVar<ExynosScaledIdxPred, [M5WriteSB]>,
+ SchedVar<NoSchedPred, [M5WriteS1]>]>;
+
+def M5ReadAdrBase : SchedReadVariant<[SchedVar<
+ MCSchedPredicate<
+ CheckAny<
+ [ScaledIdxFn,
+ ExynosScaledIdxFn]>>, [ReadDefault]>,
+ SchedVar<NoSchedPred, [ReadDefault]>]>;
+
+def M5WriteNEONB : SchedWriteRes<[M5UnitNALU,
+ M5UnitS0]> { let Latency = 5;
+ let NumMicroOps = 2; }
+def M5WriteNEONH : SchedWriteRes<[M5UnitNALU,
+ M5UnitS0]> { let Latency = 2;
+ let NumMicroOps = 2; }
+def M5WriteNEONI : SchedWriteRes<[M5UnitS0,
+ M5UnitNSHF]> { let Latency = 6;
+ let NumMicroOps = 2; }
+def M5WriteNEONK : SchedWriteRes<[M5UnitNSHF,
+ M5UnitFCVT0,
+ M5UnitS0]> { let Latency = 5;
+ let NumMicroOps = 2; }
+def M5WriteNEONN : SchedWriteRes<[M5UnitNMSC,
+ M5UnitNMSC]> { let Latency = 5;
+ let NumMicroOps = 2;
+ let ResourceCycles = [7, 7]; }
+def M5WriteNEONO : SchedWriteRes<[M5UnitNMSC,
+ M5UnitNMSC,
+ M5UnitNMSC]> { let Latency = 8;
+ let NumMicroOps = 3;
+ let ResourceCycles = [10, 10, 10]; }
+def M5WriteNEONP : SchedWriteRes<[M5UnitNSHF,
+ M5UnitS0,
+ M5UnitFCVT]> { let Latency = 7;
+ let NumMicroOps = 2; }
+def M5WriteNEONQ : SchedWriteRes<[M5UnitNMSC,
+ M5UnitC]> { let Latency = 3;
+ let NumMicroOps = 1; }
+def M5WriteNEONU : SchedWriteRes<[M5UnitFSQR,
+ M5UnitFSQR]> { let Latency = 7;
+ let ResourceCycles = [4, 4]; }
+def M5WriteNEONV : SchedWriteRes<[M5UnitFDIV,
+ M5UnitFDIV]> { let Latency = 7;
+ let ResourceCycles = [6, 6]; }
+def M5WriteNEONW : SchedWriteRes<[M5UnitFDIV,
+ M5UnitFDIV]> { let Latency = 12;
+ let ResourceCycles = [9, 9]; }
+def M5WriteNEONX : SchedWriteRes<[M5UnitFSQR,
+ M5UnitFSQR]> { let Latency = 8;
+ let ResourceCycles = [5, 5]; }
+def M5WriteNEONY : SchedWriteRes<[M5UnitFSQR,
+ M5UnitFSQR]> { let Latency = 12;
+ let ResourceCycles = [9, 9]; }
+def M5WriteNEONZ : SchedWriteVariant<[SchedVar<ExynosQFormPred, [M5WriteNEONO]>,
+ SchedVar<NoSchedPred, [M5WriteNEONN]>]>;
+
+def M5WriteFADD2 : SchedWriteRes<[M5UnitFADD]> { let Latency = 2; }
+
+def M5WriteFCVT2 : SchedWriteRes<[M5UnitFCVT]> { let Latency = 2; }
+def M5WriteFCVT2A : SchedWriteRes<[M5UnitFCVT0]> { let Latency = 2; }
+def M5WriteFCVT3 : SchedWriteRes<[M5UnitFCVT]> { let Latency = 3; }
+def M5WriteFCVT3A : SchedWriteRes<[M5UnitFCVT0]> { let Latency = 3; }
+def M5WriteFCVTA : SchedWriteRes<[M5UnitFCVT0,
+ M5UnitS0]> { let Latency = 3;
+ let NumMicroOps = 1; }
+def M5WriteFCVTB : SchedWriteRes<[M5UnitFCVT,
+ M5UnitS0]> { let Latency = 4;
+ let NumMicroOps = 1; }
+def M5WriteFCVTC : SchedWriteRes<[M5UnitFCVT,
+ M5UnitS0]> { let Latency = 6;
+ let NumMicroOps = 1; }
+
+def M5WriteFDIV5 : SchedWriteRes<[M5UnitFDIV]> { let Latency = 5;
+ let ResourceCycles = [2]; }
+def M5WriteFDIV7 : SchedWriteRes<[M5UnitFDIV]> { let Latency = 7;
+ let ResourceCycles = [4]; }
+def M5WriteFDIV12 : SchedWriteRes<[M5UnitFDIV]> { let Latency = 12;
+ let ResourceCycles = [9]; }
+
+def M5WriteFMAC3 : SchedWriteRes<[M5UnitFMAC]> { let Latency = 3; }
+def M5WriteFMAC4 : SchedWriteRes<[M5UnitFMAC]> { let Latency = 4; }
+def M5WriteFMAC5 : SchedWriteRes<[M5UnitFMAC]> { let Latency = 5; }
+
+def M5WriteFSQR5 : SchedWriteRes<[M5UnitFSQR]> { let Latency = 5;
+ let ResourceCycles = [2]; }
+def M5WriteFSQR7 : SchedWriteRes<[M5UnitFSQR]> { let Latency = 7;
+ let ResourceCycles = [4]; }
+def M5WriteFSQR8 : SchedWriteRes<[M5UnitFSQR]> { let Latency = 8;
+ let ResourceCycles = [5]; }
+def M5WriteFSQR12 : SchedWriteRes<[M5UnitFSQR]> { let Latency = 12;
+ let ResourceCycles = [9]; }
+
+def M5WriteNALU1 : SchedWriteRes<[M5UnitNALU]> { let Latency = 1; }
+def M5WriteNALU2 : SchedWriteRes<[M5UnitNALU]> { let Latency = 2; }
+
+def M5WriteNDOT2 : SchedWriteRes<[M5UnitNDOT]> { let Latency = 2; }
+
+def M5WriteNCRY2 : SchedWriteRes<[M5UnitNCRY]> { let Latency = 2; }
+def M5WriteNCRY1A : SchedWriteRes<[M5UnitNCRY0]> { let Latency = 1; }
+def M5WriteNCRY2A : SchedWriteRes<[M5UnitNCRY0]> { let Latency = 2; }
+def M5WriteNCRY3A : SchedWriteRes<[M5UnitNCRY0]> { let Latency = 3; }
+def M5WriteNCRY5A : SchedWriteRes<[M5UnitNCRY]> { let Latency = 5; }
+
+def M5WriteNHAD1 : SchedWriteRes<[M5UnitNHAD]> { let Latency = 1; }
+def M5WriteNHAD3 : SchedWriteRes<[M5UnitNHAD]> { let Latency = 3; }
+
+def M5WriteNMSC1 : SchedWriteRes<[M5UnitNMSC]> { let Latency = 1; }
+def M5WriteNMSC2 : SchedWriteRes<[M5UnitNMSC]> { let Latency = 2; }
+
+def M5WriteNMUL3 : SchedWriteRes<[M5UnitNMUL]> { let Latency = 3; }
+
+def M5WriteNSHF1 : SchedWriteRes<[M5UnitNSHF]> { let Latency = 1; }
+def M5WriteNSHF2 : SchedWriteRes<[M5UnitNSHF]> { let Latency = 2; }
+def M5WriteNSHFA : SchedWriteRes<[M5UnitNSHF]> { let Latency = 2; }
+def M5WriteNSHFB : SchedWriteRes<[M5UnitNSHF]> { let Latency = 4;
+ let NumMicroOps = 2; }
+def M5WriteNSHFC : SchedWriteRes<[M5UnitNSHF]> { let Latency = 6;
+ let NumMicroOps = 3; }
+def M5WriteNSHFD : SchedWriteRes<[M5UnitNSHF]> { let Latency = 8;
+ let NumMicroOps = 4; }
+
+def M5WriteNSHT2 : SchedWriteRes<[M5UnitNSHT]> { let Latency = 2; }
+def M5WriteNSHT4A : SchedWriteRes<[M5UnitNSHT1]> { let Latency = 4; }
+
+def M5WriteVLDA : SchedWriteRes<[M5UnitL,
+ M5UnitL]> { let Latency = 6;
+ let NumMicroOps = 2; }
+def M5WriteVLDB : SchedWriteRes<[M5UnitL,
+ M5UnitL,
+ M5UnitL]> { let Latency = 7;
+ let NumMicroOps = 3; }
+def M5WriteVLDC : SchedWriteRes<[M5UnitL,
+ M5UnitL,
+ M5UnitL,
+ M5UnitL]> { let Latency = 7;
+ let NumMicroOps = 4; }
+def M5WriteVLDD : SchedWriteRes<[M5UnitL,
+ M5UnitNSHF]> { let Latency = 7;
+ let NumMicroOps = 2;
+ let ResourceCycles = [2, 1]; }
+def M5WriteVLDF : SchedWriteRes<[M5UnitL,
+ M5UnitL]> { let Latency = 11;
+ let NumMicroOps = 2;
+ let ResourceCycles = [6, 5]; }
+def M5WriteVLDG : SchedWriteRes<[M5UnitL,
+ M5UnitNSHF,
+ M5UnitNSHF]> { let Latency = 7;
+ let NumMicroOps = 3;
+ let ResourceCycles = [2, 1, 1]; }
+def M5WriteVLDI : SchedWriteRes<[M5UnitL,
+ M5UnitL,
+ M5UnitL]> { let Latency = 13;
+ let NumMicroOps = 3; }
+def M5WriteVLDJ : SchedWriteRes<[M5UnitL,
+ M5UnitNSHF,
+ M5UnitNSHF,
+ M5UnitNSHF]> { let Latency = 8;
+ let NumMicroOps = 4; }
+def M5WriteVLDK : SchedWriteRes<[M5UnitL,
+ M5UnitNSHF,
+ M5UnitNSHF,
+ M5UnitNSHF,
+ M5UnitNSHF]> { let Latency = 8;
+ let NumMicroOps = 5; }
+def M5WriteVLDL : SchedWriteRes<[M5UnitL,
+ M5UnitNSHF,
+ M5UnitNSHF,
+ M5UnitL,
+ M5UnitNSHF]> { let Latency = 8;
+ let NumMicroOps = 5; }
+def M5WriteVLDM : SchedWriteRes<[M5UnitL,
+ M5UnitNSHF,
+ M5UnitNSHF,
+ M5UnitL,
+ M5UnitNSHF,
+ M5UnitNSHF]> { let Latency = 8;
+ let NumMicroOps = 6; }
+def M5WriteVLDN : SchedWriteRes<[M5UnitL,
+ M5UnitL,
+ M5UnitL,
+ M5UnitL]> { let Latency = 15;
+ let NumMicroOps = 4;
+ let ResourceCycles = [2, 2, 2, 2]; }
+
+def M5WriteVST1 : SchedWriteRes<[M5UnitS,
+ M5UnitFST]> { let Latency = 1;
+ let NumMicroOps = 1; }
+def M5WriteVSTA : SchedWriteRes<[M5UnitS,
+ M5UnitFST,
+ M5UnitS,
+ M5UnitFST]> { let Latency = 2;
+ let NumMicroOps = 2; }
+def M5WriteVSTB : SchedWriteRes<[M5UnitS,
+ M5UnitFST,
+ M5UnitS,
+ M5UnitFST,
+ M5UnitS,
+ M5UnitFST]> { let Latency = 3;
+ let NumMicroOps = 3; }
+def M5WriteVSTC : SchedWriteRes<[M5UnitS,
+ M5UnitFST,
+ M5UnitS,
+ M5UnitFST,
+ M5UnitS,
+ M5UnitFST,
+ M5UnitS,
+ M5UnitFST]> { let Latency = 4;
+ let NumMicroOps = 4; }
+def M5WriteVSTD : SchedWriteRes<[M5UnitS,
+ M5UnitFST]> { let Latency = 2; }
+def M5WriteVSTE : SchedWriteRes<[M5UnitS,
+ M5UnitFST,
+ M5UnitS,
+ M5UnitFST]> { let Latency = 2;
+ let NumMicroOps = 1; }
+def M5WriteVSTF : SchedWriteRes<[M5UnitNSHF,
+ M5UnitNSHF,
+ M5UnitS,
+ M5UnitFST]> { let Latency = 4;
+ let NumMicroOps = 3; }
+def M5WriteVSTG : SchedWriteRes<[M5UnitNSHF,
+ M5UnitNSHF,
+ M5UnitNSHF,
+ M5UnitS,
+ M5UnitFST,
+ M5UnitS,
+ M5UnitFST]> { let Latency = 4;
+ let NumMicroOps = 5; }
+def M5WriteVSTH : SchedWriteRes<[M5UnitS0,
+ M5UnitFST]> { let Latency = 1;
+ let NumMicroOps = 1; }
+def M5WriteVSTI : SchedWriteRes<[M5UnitNSHF,
+ M5UnitNSHF,
+ M5UnitNSHF,
+ M5UnitNSHF,
+ M5UnitS,
+ M5UnitFST,
+ M5UnitS,
+ M5UnitFST,
+ M5UnitS,
+ M5UnitFST,
+ M5UnitS,
+ M5UnitFST]> { let Latency = 8;
+ let NumMicroOps = 5;
+ let ResourceCycles = [1, 1, 1, 1, 2, 1, 2, 1, 2, 1, 2, 1]; }
+def M5WriteVSTJ : SchedWriteRes<[M5UnitA,
+ M5UnitS0,
+ M5UnitFST]> { let Latency = 1;
+ let NumMicroOps = 1; }
+def M5WriteVSTK : SchedWriteRes<[M5UnitAX,
+ M5UnitS,
+ M5UnitFST]> { let Latency = 3;
+ let NumMicroOps = 2; }
+def M5WriteVSTL : SchedWriteRes<[M5UnitNSHF,
+ M5UnitNSHF,
+ M5UnitS,
+ M5UnitFST,
+ M5UnitS,
+ M5UnitFST]> { let Latency = 4;
+ let NumMicroOps = 4;
+ let ResourceCycles = [1, 1, 2, 1, 2, 1]; }
+def M5WriteVSTY : SchedWriteVariant<[SchedVar<ExynosScaledIdxPred, [M5WriteVSTK]>,
+ SchedVar<NoSchedPred, [WriteVST]>]>;
+
+// Special cases.
+def M5WriteCOPY : SchedWriteVariant<[SchedVar<ExynosFPPred, [M5WriteNALU2]>,
+ SchedVar<NoSchedPred, [M5WriteZ0]>]>;
+def M5WriteMOVI : SchedWriteVariant<[SchedVar<IsZeroFPIdiomPred, [M5WriteZ0]>,
+ SchedVar<NoSchedPred, [M5WriteNALU1]>]>;
+
+// Fast forwarding.
+def M5ReadFM1 : SchedReadAdvance<+1, [M5WriteF2]>;
+def M5ReadAESM2 : SchedReadAdvance<+2, [M5WriteNCRY2]>;
+def M5ReadFMACM1 : SchedReadAdvance<+1, [M5WriteFMAC4,
+ M5WriteFMAC5]>;
+def M5ReadNMULM1 : SchedReadAdvance<+1, [M5WriteNMUL3]>;
+
+//===----------------------------------------------------------------------===//
+// Coarse scheduling model.
+
+// Branch instructions.
+def : SchedAlias<WriteBr, M5WriteZ0>;
+def : SchedAlias<WriteBrReg, M5WriteC1>;
+
+// Arithmetic and logical integer instructions.
+def : SchedAlias<WriteI, M5WriteA1W>;
+def : SchedAlias<WriteIEReg, M5WriteA1W>; // FIXME: M5WriteAX crashes TableGen.
+def : SchedAlias<WriteISReg, M5WriteA1W>; // FIXME: M5WriteAX crashes TableGen.
+def : SchedAlias<WriteIS, M5WriteA1W>;
+
+// Move instructions.
+def : SchedAlias<WriteImm, M5WriteA1W>;
+
+// Divide and multiply instructions.
+def : SchedAlias<WriteID32, M5WriteD10>;
+def : SchedAlias<WriteID64, M5WriteD16>;
+def : SchedAlias<WriteIM32, M5WriteC2>;
+def : SchedAlias<WriteIM64, M5WriteCA>;
+
+// Miscellaneous instructions.
+def : SchedAlias<WriteExtr, M5WriteAYW>;
+
+// Addressing modes.
+def : SchedAlias<WriteAdr, M5WriteZ1>;
+def : SchedAlias<ReadAdrBase, M5ReadAdrBase>;
+
+// Load instructions.
+def : SchedAlias<WriteLD, M5WriteL4>;
+def : SchedAlias<WriteLDHi, M5WriteZ4>;
+def : SchedAlias<WriteLDIdx, M5WriteLX>;
+
+// Store instructions.
+def : SchedAlias<WriteST, M5WriteS1>;
+def : SchedAlias<WriteSTP, M5WriteS1>;
+def : SchedAlias<WriteSTX, M5WriteS1>;
+def : SchedAlias<WriteSTIdx, M5WriteSX>;
+
+// Atomic load and store instructions.
+def : SchedAlias<WriteAtomic, M5WriteLGW>;
+
+// FP data instructions.
+def : SchedAlias<WriteF, M5WriteFADD2>;
+def : SchedAlias<WriteFCmp, M5WriteNMSC2>;
+def : SchedAlias<WriteFDiv, M5WriteFDIV12>;
+def : SchedAlias<WriteFMul, M5WriteFMAC3>;
+
+// FP miscellaneous instructions.
+def : SchedAlias<WriteFCvt, M5WriteFCVT2>;
+def : SchedAlias<WriteFImm, M5WriteNALU1>;
+def : SchedAlias<WriteFCopy, M5WriteNALU2>;
+
+// FP load instructions.
+def : SchedAlias<WriteVLD, M5WriteL6>;
+
+// FP store instructions.
+def : SchedAlias<WriteVST, M5WriteVST1>;
+
+// ASIMD FP instructions.
+def : SchedAlias<WriteV, M5WriteNALU1>;
+
+// Other miscellaneous instructions.
+def : WriteRes<WriteBarrier, []> { let Latency = 1; }
+def : WriteRes<WriteHint, []> { let Latency = 1; }
+def : WriteRes<WriteSys, []> { let Latency = 1; }
+
+//===----------------------------------------------------------------------===//
+// Generic fast forwarding.
+
+// TODO: Add FP register forwarding rules.
+
+def : ReadAdvance<ReadI, 0>;
+def : ReadAdvance<ReadISReg, 0>;
+def : ReadAdvance<ReadIEReg, 0>;
+def : ReadAdvance<ReadIM, 0>;
+// TODO: The forwarding for 32 bits actually saves 2 cycles.
+def : ReadAdvance<ReadIMA, 3, [WriteIM32, WriteIM64]>;
+def : ReadAdvance<ReadID, 0>;
+def : ReadAdvance<ReadExtrHi, 0>;
+def : ReadAdvance<ReadAdrBase, 0>;
+def : ReadAdvance<ReadVLD, 0>;
+
+//===----------------------------------------------------------------------===//
+// Finer scheduling model.
+
+// Branch instructions
+def : InstRW<[M5WriteB1], (instrs Bcc)>;
+def : InstRW<[M5WriteAFX], (instrs BL)>;
+def : InstRW<[M5WriteBX], (instrs BLR)>;
+def : InstRW<[M5WriteC1], (instregex "^CBN?Z[WX]")>;
+def : InstRW<[M5WriteAD], (instregex "^TBN?ZW")>;
+def : InstRW<[M5WriteAB], (instregex "^TBN?ZX")>;
+
+// Arithmetic and logical integer instructions.
+def : InstRW<[M5WriteA1W], (instregex "^(ADC|SBC)S?Wr$")>;
+def : InstRW<[M5WriteA1X], (instregex "^(ADC|SBC)S?Xr$")>;
+def : InstRW<[M5WriteAXW], (instregex "^(ADD|AND|BIC|EON|EOR|ORN|SUB)Wrs$")>;
+def : InstRW<[M5WriteAXX], (instregex "^(ADD|AND|BIC|EON|EOR|ORN|SUB)Xrs$")>;
+def : InstRW<[M5WriteAUW], (instrs ORRWrs)>;
+def : InstRW<[M5WriteAUX], (instrs ORRXrs)>;
+def : InstRW<[M5WriteAXW], (instregex "^(ADD|AND|BIC|SUB)SWrs$")>;
+def : InstRW<[M5WriteAXX], (instregex "^(ADD|AND|BIC|SUB)SXrs$")>;
+def : InstRW<[M5WriteAXW], (instregex "^(ADD|SUB)S?Wrx(64)?$")>;
+def : InstRW<[M5WriteAXX], (instregex "^(ADD|SUB)S?Xrx(64)?$")>;
+def : InstRW<[M5WriteAVW], (instrs ADDWri, ORRWri)>;
+def : InstRW<[M5WriteAVX], (instrs ADDXri, ORRXri)>;
+def : InstRW<[M5WriteA1W], (instregex "^CCM[NP]W[ir]$")>;
+def : InstRW<[M5WriteA1X], (instregex "^CCM[NP]X[ir]$")>;
+def : InstRW<[M5WriteA1W], (instrs CSELWr, CSINCWr, CSINVWr, CSNEGWr)>;
+def : InstRW<[M5WriteA1X], (instrs CSELXr, CSINCXr, CSINVXr, CSNEGXr)>;
+
+// Move instructions.
+def : InstRW<[M5WriteCOPY], (instrs COPY)>;
+def : InstRW<[M5WriteZ0], (instrs ADR, ADRP)>;
+def : InstRW<[M5WriteZ0], (instregex "^MOV[NZ][WX]i$")>;
+
+// Shift instructions.
+def : InstRW<[M5WriteA1W], (instrs ASRVWr, LSLVWr, LSRVWr, RORVWr)>;
+def : InstRW<[M5WriteA1X], (instrs ASRVXr, LSLVXr, LSRVXr, RORVXr)>;
+
+// Miscellaneous instructions.
+def : InstRW<[M5WriteAYW], (instrs EXTRWrri)>;
+def : InstRW<[M5WriteAYX], (instrs EXTRXrri)>;
+def : InstRW<[M5WriteA1W], (instrs BFMWri, SBFMWri, UBFMWri)>;
+def : InstRW<[M5WriteA1X], (instrs BFMXri, SBFMXri, UBFMXri)>;
+def : InstRW<[M5WriteA1W], (instrs CLSWr, CLZWr)>;
+def : InstRW<[M5WriteA1X], (instrs CLSXr, CLZXr)>;
+def : InstRW<[M5WriteA1W], (instrs RBITWr, REVWr, REV16Wr)>;
+def : InstRW<[M5WriteA1X], (instrs RBITXr, REVXr, REV16Xr, REV32Xr)>;
+
+// Load instructions.
+def : InstRW<[M5WriteLD,
+ WriteLDHi,
+ WriteAdr], (instregex "^LDP(SW|W|X)(post|pre)")>;
+def : InstRW<[M5WriteL5,
+ ReadAdrBase], (instregex "^LDR(BB|SBW|SBX|HH|SHW|SHX|SW|W|X)roW")>;
+def : InstRW<[WriteLDIdx,
+ ReadAdrBase], (instregex "^LDR(BB|SBW|SBX|HH|SHW|SHX|SW|W|X)roX")>;
+def : InstRW<[M5WriteL5,
+ ReadAdrBase], (instrs PRFMroW)>;
+def : InstRW<[WriteLDIdx,
+ ReadAdrBase], (instrs PRFMroX)>;
+
+// Store instructions.
+def : InstRW<[M5WriteSB,
+ ReadAdrBase], (instregex "^STR(BB|HH|W|X)roW")>;
+def : InstRW<[WriteST,
+ ReadAdrBase], (instregex "^STR(BB|HH|W|X)roX")>;
+
+// Atomic load and store instructions.
+def : InstRW<[M5WriteLGW], (instregex "^CAS(A|AL|L)?[BHW]$")>;
+def : InstRW<[M5WriteLGX], (instregex "^CAS(A|AL|L)?X$")>;
+def : InstRW<[M5WriteLFW], (instregex "^CASP(A|AL|L)?W$")>;
+def : InstRW<[M5WriteLFX], (instregex "^CASP(A|AL|L)?X$")>;
+def : InstRW<[M5WriteLGW], (instregex "^LD(ADD|CLR|EOR|SET|[SU]MAX|[SU]MIN)(A|AL|L)?[BHW]$")>;
+def : InstRW<[M5WriteLGX], (instregex "^LD(ADD|CLR|EOR|SET|[SU]MAX|[SU]MIN)(A|AL|L)?X$")>;
+def : InstRW<[M5WriteLGW], (instregex "^SWP(A|AL|L)?[BHW]$")>;
+def : InstRW<[M5WriteLGX], (instregex "^SWP(A|AL|L)?X$")>;
+
+// FP data instructions.
+def : InstRW<[M5WriteNSHF1], (instrs FABSHr, FABSSr,FABSDr)>;
+def : InstRW<[M5WriteFADD2], (instregex "^F(ADD|SUB)[HSD]rr")>;
+def : InstRW<[M5WriteFADD2], (instregex "^FADDPv.i(16|32|64)")>;
+def : InstRW<[M5WriteNEONQ], (instregex "^FCCMPE?[HSD]rr")>;
+def : InstRW<[M5WriteNMSC2], (instregex "^FCMPE?[HSD]r[ir]")>;
+def : InstRW<[M5WriteNMSC1], (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)(16|32|64|v1)")>;
+def : InstRW<[M5WriteFDIV5], (instrs FDIVHrr)>;
+def : InstRW<[M5WriteFDIV7], (instrs FDIVSrr)>;
+def : InstRW<[M5WriteFDIV12], (instrs FDIVDrr)>;
+def : InstRW<[M5WriteNMSC1], (instregex "^F(MAX|MIN)(NM)?[HSD]rr")>;
+def : InstRW<[M5WriteFMAC3], (instregex "^FN?MUL[HSD]rr")>;
+def : InstRW<[M5WriteFMAC3], (instrs FMULX16, FMULX32, FMULX64)>;
+def : InstRW<[M5WriteFMAC4,
+ M5ReadFMACM1], (instregex "^FN?M(ADD|SUB)[HSD]rrr")>;
+def : InstRW<[M5WriteNALU2], (instrs FNEGHr, FNEGSr, FNEGDr)>;
+def : InstRW<[M5WriteFCVT3A], (instregex "^FRINT.+r")>;
+def : InstRW<[M5WriteNEONH], (instregex "^FCSEL[HSD]rrr")>;
+def : InstRW<[M5WriteFSQR5], (instrs FSQRTHr)>;
+def : InstRW<[M5WriteFSQR8], (instrs FSQRTSr)>;
+def : InstRW<[M5WriteFSQR12], (instrs FSQRTDr)>;
+
+// FP miscellaneous instructions.
+def : InstRW<[M5WriteFCVT2], (instregex "^FCVT[HSD][HSD]r")>;
+def : InstRW<[M5WriteFCVTC], (instregex "^[SU]CVTF[SU][XW][HSD]ri")>;
+def : InstRW<[M5WriteFCVTB], (instregex "^FCVT[AMNPZ][SU][SU][XW][HSD]r")>;
+def : InstRW<[M5WriteNALU1], (instregex "^FMOV[HSD]i")>;
+def : InstRW<[M5WriteNALU2], (instregex "^FMOV[HSD]r")>;
+def : InstRW<[M5WriteSA], (instregex "^FMOV[WX][HSD]r")>;
+def : InstRW<[M5WriteFCVTA], (instregex "^FMOV[HSD][WX]r")>;
+def : InstRW<[M5WriteNEONI], (instregex "^FMOVXDHighr")>;
+def : InstRW<[M5WriteNEONK], (instregex "^FMOVDXHighr")>;
+def : InstRW<[M5WriteFCVT3], (instregex "^F(RECP|RSQRT)Ev1(f16|i32|i64)")>;
+def : InstRW<[M5WriteNMSC1], (instregex "^FRECPXv1")>;
+def : InstRW<[M5WriteFMAC4], (instregex "^F(RECP|RSQRT)S(16|32|64)")>;
+
+// FP load instructions.
+def : InstRW<[WriteVLD], (instregex "^LDR[SDQ]l")>;
+def : InstRW<[WriteVLD], (instregex "^LDUR[BHSDQ]i")>;
+def : InstRW<[WriteVLD,
+ WriteAdr], (instregex "^LDR[BHSDQ](post|pre)")>;
+def : InstRW<[WriteVLD], (instregex "^LDR[BHSDQ]ui")>;
+def : InstRW<[M5WriteLE,
+ ReadAdrBase], (instregex "^LDR[BHSDQ]roW")>;
+def : InstRW<[WriteVLD,
+ ReadAdrBase], (instregex "^LDR[BHSD]roX")>;
+def : InstRW<[M5WriteLY,
+ ReadAdrBase], (instrs LDRQroX)>;
+def : InstRW<[WriteVLD,
+ M5WriteLH], (instregex "^LDN?P[SD]i")>;
+def : InstRW<[M5WriteLA,
+ M5WriteLH], (instregex "^LDN?PQi")>;
+def : InstRW<[M5WriteLB,
+ M5WriteLH,
+ WriteAdr], (instregex "^LDP[SD](post|pre)")>;
+def : InstRW<[M5WriteLC,
+ M5WriteLH,
+ WriteAdr], (instregex "^LDPQ(post|pre)")>;
+
+// FP store instructions.
+def : InstRW<[WriteVST], (instregex "^STUR[BHSDQ]i")>;
+def : InstRW<[WriteVST,
+ WriteAdr], (instregex "^STR[BHSDQ](post|pre)")>;
+def : InstRW<[WriteVST], (instregex "^STR[BHSDQ]ui")>;
+def : InstRW<[WriteVST,
+ ReadAdrBase], (instregex "^STR[BHSD]ro[WX]")>;
+def : InstRW<[M5WriteVSTK,
+ ReadAdrBase], (instregex "^STRQroW")>;
+def : InstRW<[M5WriteVSTY,
+ ReadAdrBase], (instregex "^STRQroX")>;
+def : InstRW<[WriteVST], (instregex "^STN?P[SD]i")>;
+def : InstRW<[M5WriteVSTH], (instregex "^STN?PQi")>;
+def : InstRW<[WriteVST,
+ WriteAdr], (instregex "^STP[SD](post|pre)")>;
+def : InstRW<[M5WriteVSTJ,
+ WriteAdr], (instregex "^STPQ(post|pre)")>;
+
+// ASIMD instructions.
+def : InstRW<[M5WriteNHAD1], (instregex "^[SU]ABDL?v")>;
+def : InstRW<[M5WriteNHAD3], (instregex "^[SU]ABAL?v")>;
+def : InstRW<[M5WriteNMSC1], (instregex "^ABSv")>;
+def : InstRW<[M5WriteNALU2], (instregex "^(ADD|NEG|SUB)v")>;
+def : InstRW<[M5WriteNHAD3], (instregex "^[SU]?ADDL?Pv")>;
+def : InstRW<[M5WriteNHAD3], (instregex "^[SU]H(ADD|SUB)v")>;
+def : InstRW<[M5WriteNHAD3], (instregex "^[SU](ADD|SUB)[LW]v")>;
+def : InstRW<[M5WriteNHAD3], (instregex "^R?(ADD|SUB)HN2?v")>;
+def : InstRW<[M5WriteNHAD3], (instregex "^[SU]Q(ADD|SUB)v")>;
+def : InstRW<[M5WriteNHAD3], (instregex "^(SU|US)QADDv")>;
+def : InstRW<[M5WriteNHAD3], (instregex "^[SU]RHADDv")>;
+def : InstRW<[M5WriteNMSC1], (instregex "^SQ(ABS|NEG)v")>;
+def : InstRW<[M5WriteNHAD3], (instregex "^[SU]?ADDL?Vv")>;
+def : InstRW<[M5WriteNMSC1], (instregex "^CM(EQ|GE|GT|HI|HS|LE|LT)v")>;
+def : InstRW<[M5WriteNALU2], (instregex "^CMTSTv")>;
+def : InstRW<[M5WriteNALU2], (instregex "^(AND|BIC|EOR|NOT|ORN|ORR)v")>;
+def : InstRW<[M5WriteNMSC1], (instregex "^[SU](MIN|MAX)v")>;
+def : InstRW<[M5WriteNMSC2], (instregex "^[SU](MIN|MAX)Pv")>;
+def : InstRW<[M5WriteNHAD3], (instregex "^[SU](MIN|MAX)Vv")>;
+def : InstRW<[M5WriteNMUL3], (instregex "^(SQR?D)?MULH?v")>;
+def : InstRW<[M5WriteNMUL3,
+ M5ReadNMULM1], (instregex "^ML[AS]v")>;
+def : InstRW<[M5WriteNMUL3,
+ M5ReadNMULM1], (instregex "^SQRDML[AS]H")>;
+def : InstRW<[M5WriteNMUL3], (instregex "^(S|U|SQD)ML[AS]L(v1(i32|i64)|v2i32|v4i16|v8i8)")>;
+def : InstRW<[M5WriteNMUL3,
+ M5ReadNMULM1], (instregex "^(S|U|SQD)ML[AS]L(v4i32|v8i16|v16i8)")>;
+def : InstRW<[M5WriteNMUL3,
+ M5ReadNMULM1], (instregex "^(S|U|SQD)MULL(v1(i32|i64)|v2i32|v4i16|v8i8)")>;
+def : InstRW<[M5WriteNMUL3,
+ M5ReadNMULM1], (instregex "^(S|U|SQD)MULL(v4i32|v8i16|v16i8)")>;
+def : InstRW<[M5WriteNDOT2], (instregex "^[SU]DOT(lane)?v")>;
+def : InstRW<[M5WriteNHAD3], (instregex "^[SU]ADALPv")>;
+def : InstRW<[M5WriteNSHT4A], (instregex "^[SU]R?SRA[dv]")>;
+def : InstRW<[M5WriteNSHT2], (instregex "^SHL[dv]")>;
+def : InstRW<[M5WriteNSHT2], (instregex "^S[LR]I[dv]")>;
+def : InstRW<[M5WriteNSHT2], (instregex "^[SU]SH[LR][dv]")>;
+def : InstRW<[M5WriteNSHT2], (instregex "^[SU]?SHLLv")>;
+def : InstRW<[M5WriteNSHT4A], (instregex "^[SU]?Q?R?SHRU?N[bhsv]")>;
+def : InstRW<[M5WriteNSHT4A], (instregex "^[SU]RSH[LR][dv]")>;
+def : InstRW<[M5WriteNSHT4A], (instregex "^[SU]QR?SHLU?[bhsdv]")>;
+
+// ASIMD FP instructions.
+def : InstRW<[M5WriteNSHF2], (instregex "^FABSv.f(16|32|64)")>;
+def : InstRW<[M5WriteFADD2], (instregex "^F(ABD|ADD|SUB)v.f(16|32|64)")>;
+def : InstRW<[M5WriteFADD2], (instregex "^FADDPv.f(16|32|64)")>;
+def : InstRW<[M5WriteNMSC1], (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)v[^1]")>;
+def : InstRW<[M5WriteFCVT2], (instregex "^FCVT(L|N|XN)v")>;
+def : InstRW<[M5WriteFCVT2A], (instregex "^FCVT[AMNPZ][SU]v")>;
+def : InstRW<[M5WriteFCVT2], (instregex "^[SU]CVTFv.[fi](16|32|64)")>;
+def : InstRW<[M5WriteFDIV7], (instrs FDIVv4f16)>;
+def : InstRW<[M5WriteNEONV], (instrs FDIVv8f16)>;
+def : InstRW<[M5WriteFDIV7], (instrs FDIVv2f32)>;
+def : InstRW<[M5WriteNEONV], (instrs FDIVv4f32)>;
+def : InstRW<[M5WriteNEONW], (instrs FDIVv2f64)>;
+def : InstRW<[M5WriteNMSC1], (instregex "^F(MAX|MIN)(NM)?v")>;
+def : InstRW<[M5WriteNMSC2], (instregex "^F(MAX|MIN)(NM)?Pv")>;
+def : InstRW<[M5WriteNEONZ], (instregex "^F(MAX|MIN)(NM)?Vv")>;
+def : InstRW<[M5WriteFMAC3], (instregex "^FMULX?v.[fi](16|32|64)")>;
+def : InstRW<[M5WriteFMAC4,
+ M5ReadFMACM1], (instregex "^FML[AS]v.[fi](16|32|64)")>;
+def : InstRW<[M5WriteNALU2], (instregex "^FNEGv.f(16|32|64)")>;
+def : InstRW<[M5WriteFCVT3A], (instregex "^FRINT[AIMNPXZ]v")>;
+def : InstRW<[M5WriteFSQR7], (instrs FSQRTv4f16)>;
+def : InstRW<[M5WriteNEONU], (instrs FSQRTv8f16)>;
+def : InstRW<[M5WriteFSQR8], (instrs FSQRTv2f32)>;
+def : InstRW<[M5WriteNEONX], (instrs FSQRTv4f32)>;
+def : InstRW<[M5WriteNEONY], (instrs FSQRTv2f64)>;
+
+// ASIMD miscellaneous instructions.
+def : InstRW<[M5WriteNALU2], (instregex "^RBITv")>;
+def : InstRW<[M5WriteNALU2], (instregex "^(BIF|BIT|BSL)v")>;
+def : InstRW<[M5WriteNALU2], (instregex "^CL[STZ]v")>;
+def : InstRW<[M5WriteNEONB], (instregex "^DUPv.+gpr")>;
+def : InstRW<[M5WriteNSHF2], (instregex "^CPY")>;
+def : InstRW<[M5WriteNSHF2], (instregex "^DUPv.+lane")>;
+def : InstRW<[M5WriteNSHF2], (instregex "^EXTv")>;
+def : InstRW<[M5WriteNSHT4A], (instregex "^XTNv")>;
+def : InstRW<[M5WriteNSHT4A], (instregex "^[SU]?QXTU?Nv")>;
+def : InstRW<[M5WriteNEONB], (instregex "^INSv.+gpr")>;
+def : InstRW<[M5WriteNSHF2], (instregex "^INSv.+lane")>;
+def : InstRW<[M5WriteMOVI], (instregex "^(MOV|MVN)I")>;
+def : InstRW<[M5WriteNALU1], (instregex "^FMOVv.f(16|32|64)")>;
+def : InstRW<[M5WriteFCVT3], (instregex "^F(RECP|RSQRT)Ev[248]f(16|32|64)")>;
+def : InstRW<[M5WriteFCVT3], (instregex "^U(RECP|RSQRT)Ev[24]i32")>;
+def : InstRW<[M5WriteFMAC4], (instregex "^F(RECP|RSQRT)Sv.f(16|32|64)")>;
+def : InstRW<[M5WriteNSHF2], (instregex "^REV(16|32|64)v")>;
+def : InstRW<[M5WriteNSHFA], (instregex "^TB[LX]v(8|16)i8One")>;
+def : InstRW<[M5WriteNSHFB], (instregex "^TB[LX]v(8|16)i8Two")>;
+def : InstRW<[M5WriteNSHFC], (instregex "^TB[LX]v(8|16)i8Three")>;
+def : InstRW<[M5WriteNSHFD], (instregex "^TB[LX]v(8|16)i8Four")>;
+def : InstRW<[M5WriteNEONP], (instregex "^[SU]MOVv")>;
+def : InstRW<[M5WriteNSHF2], (instregex "^(TRN|UZP|ZIP)[12]v")>;
+
+// ASIMD load instructions.
+def : InstRW<[WriteVLD], (instregex "LD1Onev(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
+def : InstRW<[WriteVLD,
+ M5WriteA1X,
+ WriteAdr], (instregex "LD1Onev(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
+def : InstRW<[M5WriteVLDA], (instregex "LD1Twov(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
+def : InstRW<[M5WriteVLDA,
+ M5WriteA1X,
+ WriteAdr], (instregex "LD1Twov(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
+def : InstRW<[M5WriteVLDB], (instregex "LD1Threev(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
+def : InstRW<[M5WriteVLDB,
+ M5WriteA1X,
+ WriteAdr], (instregex "LD1Threev(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
+def : InstRW<[M5WriteVLDC], (instregex "LD1Fourv(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
+def : InstRW<[M5WriteVLDC,
+ M5WriteA1X,
+ WriteAdr], (instregex "LD1Fourv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
+def : InstRW<[M5WriteVLDD], (instregex "LD1i(8|16|32|64)$")>;
+def : InstRW<[M5WriteVLDD,
+ M5WriteA1X,
+ WriteAdr], (instregex "LD1i(8|16|32|64)_POST$")>;
+def : InstRW<[WriteVLD], (instregex "LD1Rv(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
+def : InstRW<[WriteVLD,
+ M5WriteA1X,
+ WriteAdr], (instregex "LD1Rv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
+def : InstRW<[M5WriteVLDF], (instregex "LD2Twov(8b|16b|4h|8h|2s|4s|2d)$")>;
+def : InstRW<[M5WriteVLDF,
+ M5WriteA1X,
+ WriteAdr], (instregex "LD2Twov(8b|16b|4h|8h|2s|4s|2d)_POST$")>;
+def : InstRW<[M5WriteVLDG], (instregex "LD2i(8|16|32|64)$")>;
+def : InstRW<[M5WriteVLDG,
+ M5WriteA1X,
+ WriteAdr], (instregex "LD2i(8|16|32|64)_POST$")>;
+def : InstRW<[M5WriteVLDA], (instregex "LD2Rv(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
+def : InstRW<[M5WriteVLDA,
+ M5WriteA1X,
+ WriteAdr], (instregex "LD2Rv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
+def : InstRW<[M5WriteVLDI], (instregex "LD3Threev(8b|16b|4h|8h|2s|4s|2d)$")>;
+def : InstRW<[M5WriteVLDI,
+ M5WriteA1X,
+ WriteAdr], (instregex "LD3Threev(8b|16b|4h|8h|2s|4s|2d)_POST$")>;
+def : InstRW<[M5WriteVLDJ], (instregex "LD3i(8|16|32)$")>;
+def : InstRW<[M5WriteVLDJ,
+ M5WriteA1X,
+ WriteAdr], (instregex "LD3i(8|16|32)_POST$")>;
+def : InstRW<[M5WriteVLDL], (instregex "LD3i64$")>;
+def : InstRW<[M5WriteVLDL,
+ M5WriteA1X,
+ WriteAdr], (instregex "LD3i64_POST$")>;
+def : InstRW<[M5WriteVLDB], (instregex "LD3Rv(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
+def : InstRW<[M5WriteVLDB,
+ M5WriteA1X], (instregex "LD3Rv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
+def : InstRW<[M5WriteVLDN], (instregex "LD4Fourv(8b|16b|4h|8h|2s|4s|2d)$")>;
+def : InstRW<[M5WriteVLDN,
+ M5WriteA1X,
+ WriteAdr], (instregex "LD4Fourv(8b|16b|4h|8h|2s|4s|2d)_POST$")>;
+def : InstRW<[M5WriteVLDK], (instregex "LD4i(8|16|32)$")>;
+def : InstRW<[M5WriteVLDK,
+ M5WriteA1X,
+ WriteAdr], (instregex "LD4i(8|16|32)_POST$")>;
+def : InstRW<[M5WriteVLDM], (instregex "LD4i64$")>;
+def : InstRW<[M5WriteVLDM,
+ M5WriteA1X,
+ WriteAdr], (instregex "LD4i64_POST$")>;
+def : InstRW<[M5WriteVLDC], (instregex "LD4Rv(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
+def : InstRW<[M5WriteVLDC,
+ M5WriteA1X,
+ WriteAdr], (instregex "LD4Rv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
+
+// ASIMD store instructions.
+def : InstRW<[WriteVST], (instregex "ST1Onev(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
+def : InstRW<[WriteVST,
+ M5WriteA1X,
+ WriteAdr], (instregex "ST1Onev(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
+def : InstRW<[M5WriteVSTA], (instregex "ST1Twov(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
+def : InstRW<[M5WriteVSTA,
+ M5WriteA1X,
+ WriteAdr], (instregex "ST1Twov(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
+
+def : InstRW<[M5WriteVSTB], (instregex "ST1Threev(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
+def : InstRW<[M5WriteVSTB,
+ M5WriteA1X,
+ WriteAdr], (instregex "ST1Threev(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
+def : InstRW<[M5WriteVSTC], (instregex "ST1Fourv(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
+def : InstRW<[M5WriteVSTC,
+ M5WriteA1X,
+ WriteAdr], (instregex "ST1Fourv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
+def : InstRW<[WriteVST], (instregex "ST1i(8|16|32|64)$")>;
+def : InstRW<[WriteVST,
+ M5WriteA1X,
+ WriteAdr], (instregex "ST1i(8|16|32|64)_POST$")>;
+def : InstRW<[M5WriteVSTD], (instregex "ST2Twov(8b|4h|2s)$")>;
+def : InstRW<[M5WriteVSTD,
+ M5WriteA1X,
+ WriteAdr], (instregex "ST2Twov(8b|4h|2s)_POST$")>;
+def : InstRW<[M5WriteVSTE], (instregex "ST2Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[M5WriteVSTE,
+ M5WriteA1X,
+ WriteAdr], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[M5WriteVSTD], (instregex "ST2i(8|16|32|64)$")>;
+def : InstRW<[M5WriteVSTD,
+ M5WriteA1X,
+ WriteAdr], (instregex "ST2i(8|16|32|64)_POST$")>;
+def : InstRW<[M5WriteVSTF], (instregex "ST3Threev(8b|4h|2s)$")>;
+def : InstRW<[M5WriteVSTF,
+ M5WriteA1X,
+ WriteAdr], (instregex "ST3Threev(8b|4h|2s)_POST$")>;
+def : InstRW<[M5WriteVSTG], (instregex "ST3Threev(16b|8h|4s|2d)$")>;
+def : InstRW<[M5WriteVSTG,
+ M5WriteA1X,
+ WriteAdr], (instregex "ST3Threev(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[M5WriteVSTA], (instregex "ST3i(8|16|32|64)$")>;
+def : InstRW<[M5WriteVSTA,
+ M5WriteA1X,
+ WriteAdr], (instregex "ST3i(8|16|32|64)_POST$")>;
+def : InstRW<[M5WriteVSTL], (instregex "ST4Fourv(8b|4h|2s)$")>;
+def : InstRW<[M5WriteVSTL,
+ M5WriteA1X,
+ WriteAdr], (instregex "ST4Fourv(8b|4h|2s)_POST$")>;
+def : InstRW<[M5WriteVSTI], (instregex "ST4Fourv(16b|8h|4s|2d)$")>;
+def : InstRW<[M5WriteVSTI,
+ M5WriteA1X,
+ WriteAdr], (instregex "ST4Fourv(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[M5WriteVSTA], (instregex "ST4i(8|16|32|64)$")>;
+def : InstRW<[M5WriteVSTA,
+ M5WriteA1X,
+ WriteAdr], (instregex "ST4i(8|16|32|64)_POST$")>;
+
+// Cryptography instructions.
+def : InstRW<[M5WriteNCRY2], (instregex "^AES[DE]")>;
+def : InstRW<[M5WriteNCRY2,
+ M5ReadAESM2], (instregex "^AESI?MC")>;
+def : InstRW<[M5WriteNCRY2A], (instregex "^PMULv")>;
+def : InstRW<[M5WriteNCRY1A], (instregex "^PMULLv(1|8)i")>;
+def : InstRW<[M5WriteNCRY3A], (instregex "^PMULLv(2|16)i")>;
+def : InstRW<[M5WriteNCRY2A], (instregex "^SHA1(H|SU[01])")>;
+def : InstRW<[M5WriteNCRY5A], (instregex "^SHA1[CMP]")>;
+def : InstRW<[M5WriteNCRY2A], (instrs SHA256SU0rr)>;
+def : InstRW<[M5WriteNCRY5A], (instrs SHA256SU1rrr)>;
+def : InstRW<[M5WriteNCRY5A], (instregex "^SHA256H2?")>;
+
+// CRC instructions.
+def : InstRW<[M5WriteF2,
+ M5ReadFM1], (instregex "^CRC32C?[BHWX]")>;
+
+} // SchedModel = ExynosM5Model
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedPredExynos.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedPredExynos.td
index 0c1d82d354c0..fcda2394bacf 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedPredExynos.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedPredExynos.td
@@ -50,6 +50,9 @@ def ExynosArithFn : TIIPredicate<
MCReturnStatement<ExynosCheckShift>>,
MCOpcodeSwitchCase<
IsArithUnshiftOp.ValidOpcodes,
+ MCReturnStatement<TruePred>>,
+ MCOpcodeSwitchCase<
+ IsArithImmOp.ValidOpcodes,
MCReturnStatement<TruePred>>],
MCReturnStatement<FalsePred>>>;
def ExynosArithPred : MCSchedPredicate<ExynosArithFn>;
@@ -63,6 +66,9 @@ def ExynosLogicFn : TIIPredicate<
MCReturnStatement<ExynosCheckShift>>,
MCOpcodeSwitchCase<
IsLogicUnshiftOp.ValidOpcodes,
+ MCReturnStatement<TruePred>>,
+ MCOpcodeSwitchCase<
+ IsLogicImmOp.ValidOpcodes,
MCReturnStatement<TruePred>>],
MCReturnStatement<FalsePred>>>;
def ExynosLogicPred : MCSchedPredicate<ExynosLogicFn>;
@@ -81,6 +87,9 @@ def ExynosLogicExFn : TIIPredicate<
CheckShiftBy8]>]>>>,
MCOpcodeSwitchCase<
IsLogicUnshiftOp.ValidOpcodes,
+ MCReturnStatement<TruePred>>,
+ MCOpcodeSwitchCase<
+ IsLogicImmOp.ValidOpcodes,
MCReturnStatement<TruePred>>],
MCReturnStatement<FalsePred>>>;
def ExynosLogicExPred : MCSchedPredicate<ExynosLogicExFn>;
@@ -100,7 +109,10 @@ def ExynosScaledIdxFn : TIIPredicate<"isExynosScaledAddr",
def ExynosScaledIdxPred : MCSchedPredicate<ExynosScaledIdxFn>;
// Identify FP instructions.
-def ExynosFPPred : MCSchedPredicate<CheckAny<[CheckDForm, CheckQForm]>>;
+def ExynosFPPred : MCSchedPredicate<CheckAny<[CheckHForm,
+ CheckSForm,
+ CheckDForm,
+ CheckQForm]>>;
// Identify 128-bit NEON instructions.
def ExynosQFormPred : MCSchedPredicate<CheckQForm>;
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedPredicates.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedPredicates.td
index 0ef0f3f8675a..fc13b23b4cf8 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedPredicates.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedPredicates.td
@@ -41,7 +41,7 @@ let FunctionMapper = "AArch64_AM::getMemExtendType" in {
// Check for scaling in the register offset addressing mode.
let FunctionMapper = "AArch64_AM::getMemDoShift" in
-def CheckMemScaled : CheckImmOperandSimple<3>;
+def CheckMemScaled : CheckImmOperandSimple<4>;
// Check the shifting type in arithmetic and logic instructions.
let FunctionMapper = "AArch64_AM::getShiftType" in {
@@ -60,6 +60,76 @@ foreach I = {0-3, 8} in {
// Generic predicates.
+// Identify whether an instruction is the 16-bit NEON form based on its result.
+def CheckHForm : CheckAll<[CheckIsRegOperand<0>,
+ CheckAny<[CheckRegOperand<0, H0>,
+ CheckRegOperand<0, H1>,
+ CheckRegOperand<0, H2>,
+ CheckRegOperand<0, H3>,
+ CheckRegOperand<0, H4>,
+ CheckRegOperand<0, H5>,
+ CheckRegOperand<0, H6>,
+ CheckRegOperand<0, H7>,
+ CheckRegOperand<0, H8>,
+ CheckRegOperand<0, H9>,
+ CheckRegOperand<0, H10>,
+ CheckRegOperand<0, H11>,
+ CheckRegOperand<0, H12>,
+ CheckRegOperand<0, H13>,
+ CheckRegOperand<0, H14>,
+ CheckRegOperand<0, H15>,
+ CheckRegOperand<0, H16>,
+ CheckRegOperand<0, H17>,
+ CheckRegOperand<0, H18>,
+ CheckRegOperand<0, H19>,
+ CheckRegOperand<0, H20>,
+ CheckRegOperand<0, H21>,
+ CheckRegOperand<0, H22>,
+ CheckRegOperand<0, H23>,
+ CheckRegOperand<0, H24>,
+ CheckRegOperand<0, H25>,
+ CheckRegOperand<0, H26>,
+ CheckRegOperand<0, H27>,
+ CheckRegOperand<0, H28>,
+ CheckRegOperand<0, H29>,
+ CheckRegOperand<0, H30>,
+ CheckRegOperand<0, H31>]>]>;
+
+// Identify whether an instruction is the 32-bit NEON form based on its result.
+def CheckSForm : CheckAll<[CheckIsRegOperand<0>,
+ CheckAny<[CheckRegOperand<0, S0>,
+ CheckRegOperand<0, S1>,
+ CheckRegOperand<0, S2>,
+ CheckRegOperand<0, S3>,
+ CheckRegOperand<0, S4>,
+ CheckRegOperand<0, S5>,
+ CheckRegOperand<0, S6>,
+ CheckRegOperand<0, S7>,
+ CheckRegOperand<0, S8>,
+ CheckRegOperand<0, S9>,
+ CheckRegOperand<0, S10>,
+ CheckRegOperand<0, S11>,
+ CheckRegOperand<0, S12>,
+ CheckRegOperand<0, S13>,
+ CheckRegOperand<0, S14>,
+ CheckRegOperand<0, S15>,
+ CheckRegOperand<0, S16>,
+ CheckRegOperand<0, S17>,
+ CheckRegOperand<0, S18>,
+ CheckRegOperand<0, S19>,
+ CheckRegOperand<0, S20>,
+ CheckRegOperand<0, S21>,
+ CheckRegOperand<0, S22>,
+ CheckRegOperand<0, S23>,
+ CheckRegOperand<0, S24>,
+ CheckRegOperand<0, S25>,
+ CheckRegOperand<0, S26>,
+ CheckRegOperand<0, S27>,
+ CheckRegOperand<0, S28>,
+ CheckRegOperand<0, S29>,
+ CheckRegOperand<0, S30>,
+ CheckRegOperand<0, S31>]>]>;
+
// Identify whether an instruction is the 64-bit NEON form based on its result.
def CheckDForm : CheckAll<[CheckIsRegOperand<0>,
CheckAny<[CheckRegOperand<0, D0>,
@@ -249,9 +319,10 @@ def IsLoadRegOffsetOp : CheckOpcode<[PRFMroW, PRFMroX,
LDRBroW, LDRBroX,
LDRHroW, LDRHroX,
LDRSroW, LDRSroX,
- LDRDroW, LDRDroX]>;
+ LDRDroW, LDRDroX,
+ LDRQroW, LDRQroX]>;
-// Identify whether an instruction is a load
+// Identify whether an instruction is a store
// using the register offset addressing mode.
def IsStoreRegOffsetOp : CheckOpcode<[STRBBroW, STRBBroX,
STRHHroW, STRHHroX,
@@ -260,7 +331,8 @@ def IsStoreRegOffsetOp : CheckOpcode<[STRBBroW, STRBBroX,
STRBroW, STRBroX,
STRHroW, STRHroX,
STRSroW, STRSroX,
- STRDroW, STRDroX]>;
+ STRDroW, STRDroX,
+ STRQroW, STRQroX]>;
// Identify whether an instruction is a load or
// store using the register offset addressing mode.
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedThunderX2T99.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedThunderX2T99.td
index 674ea19b082f..e2a293c06877 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedThunderX2T99.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedThunderX2T99.td
@@ -1517,25 +1517,6 @@ def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^MOVIv")>;
// ASIMD move, FP immed
def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FMOVv")>;
-// ASIMD table lookup, D-form
-def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v8i8One")>;
-def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v8i8Two")>;
-def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v8i8Three")>;
-def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v8i8Four")>;
-
-// ASIMD table lookup, Q-form
-def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v16i8One")>;
-def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v16i8Two")>;
-def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v16i8Three")>;
-def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v16i8Four")>;
-
-// ASIMD transpose
-def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^TRN1", "^TRN2")>;
-
-// ASIMD unzip/zip
-def : InstRW<[THX2T99Write_5Cyc_F01],
- (instregex "^UZP1", "^UZP2", "^ZIP1", "^ZIP2")>;
-
// ASIMD reciprocal estimate, D-form
// ASIMD reciprocal estimate, Q-form
def : InstRW<[THX2T99Write_5Cyc_F01],
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64StackOffset.h b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64StackOffset.h
index 13f12a6c9c30..f95b5dc5246e 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64StackOffset.h
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64StackOffset.h
@@ -15,6 +15,7 @@
#define LLVM_LIB_TARGET_AARCH64_AARCH64STACKOFFSET_H
#include "llvm/Support/MachineValueType.h"
+#include "llvm/Support/TypeSize.h"
namespace llvm {
@@ -45,8 +46,7 @@ public:
StackOffset() : Bytes(0), ScalableBytes(0) {}
StackOffset(int64_t Offset, MVT::SimpleValueType T) : StackOffset() {
- assert(MVT(T).getSizeInBits() % 8 == 0 &&
- "Offset type is not a multiple of bytes");
+ assert(MVT(T).isByteSized() && "Offset type is not a multiple of bytes");
*this += Part(Offset, T);
}
@@ -56,11 +56,11 @@ public:
StackOffset &operator=(const StackOffset &) = default;
StackOffset &operator+=(const StackOffset::Part &Other) {
- int64_t OffsetInBytes = Other.first * (Other.second.getSizeInBits() / 8);
- if (Other.second.isScalableVector())
- ScalableBytes += OffsetInBytes;
+ const TypeSize Size = Other.second.getSizeInBits();
+ if (Size.isScalable())
+ ScalableBytes += Other.first * ((int64_t)Size.getKnownMinSize() / 8);
else
- Bytes += OffsetInBytes;
+ Bytes += Other.first * ((int64_t)Size.getFixedSize() / 8);
return *this;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64StackTagging.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64StackTagging.cpp
index e6dbe01d3807..975502818fcd 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64StackTagging.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64StackTagging.cpp
@@ -42,6 +42,7 @@
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/IntrinsicsAArch64.h"
#include "llvm/IR/Metadata.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64StackTaggingPreRA.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64StackTaggingPreRA.cpp
index 3cc556f74aea..73bd434ef123 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64StackTaggingPreRA.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64StackTaggingPreRA.cpp
@@ -97,23 +97,49 @@ FunctionPass *llvm::createAArch64StackTaggingPreRAPass() {
static bool isUncheckedLoadOrStoreOpcode(unsigned Opcode) {
switch (Opcode) {
+ case AArch64::LDRBBui:
+ case AArch64::LDRHHui:
case AArch64::LDRWui:
- case AArch64::LDRSHWui:
case AArch64::LDRXui:
+
case AArch64::LDRBui:
- case AArch64::LDRBBui:
case AArch64::LDRHui:
case AArch64::LDRSui:
case AArch64::LDRDui:
case AArch64::LDRQui:
+
+ case AArch64::LDRSHWui:
+ case AArch64::LDRSHXui:
+
+ case AArch64::LDRSBWui:
+ case AArch64::LDRSBXui:
+
+ case AArch64::LDRSWui:
+
+ case AArch64::STRBBui:
+ case AArch64::STRHHui:
case AArch64::STRWui:
case AArch64::STRXui:
+
case AArch64::STRBui:
- case AArch64::STRBBui:
case AArch64::STRHui:
case AArch64::STRSui:
case AArch64::STRDui:
case AArch64::STRQui:
+
+ case AArch64::LDPWi:
+ case AArch64::LDPXi:
+ case AArch64::LDPSi:
+ case AArch64::LDPDi:
+ case AArch64::LDPQi:
+
+ case AArch64::LDPSWi:
+
+ case AArch64::STPWi:
+ case AArch64::STPXi:
+ case AArch64::STPSi:
+ case AArch64::STPDi:
+ case AArch64::STPQi:
return true;
default:
return false;
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
index 558bea368eff..3636d8d2b628 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -88,18 +88,16 @@ void AArch64Subtarget::initializeProperties() {
case CortexA76:
PrefFunctionLogAlignment = 4;
break;
- case Cyclone:
+ case AppleA7:
+ case AppleA10:
+ case AppleA11:
+ case AppleA12:
+ case AppleA13:
CacheLineSize = 64;
PrefetchDistance = 280;
MinPrefetchStride = 2048;
MaxPrefetchIterationsAhead = 3;
break;
- case ExynosM1:
- MaxInterleaveFactor = 4;
- MaxJumpTableSize = 8;
- PrefFunctionLogAlignment = 4;
- PrefLoopLogAlignment = 3;
- break;
case ExynosM3:
MaxInterleaveFactor = 4;
MaxJumpTableSize = 20;
@@ -257,6 +255,10 @@ unsigned AArch64Subtarget::classifyGlobalFunctionReference(
!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
return AArch64II::MO_GOT;
+ // Use ClassifyGlobalReference for setting MO_DLLIMPORT/MO_COFFSTUB.
+ if (getTargetTriple().isOSWindows())
+ return ClassifyGlobalReference(GV, TM);
+
return AArch64II::MO_NO_FLAG;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Subtarget.h b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Subtarget.h
index f3212fae8e5e..79c2c161d3cb 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -38,6 +38,11 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
public:
enum ARMProcFamilyEnum : uint8_t {
Others,
+ AppleA7,
+ AppleA10,
+ AppleA11,
+ AppleA12,
+ AppleA13,
CortexA35,
CortexA53,
CortexA55,
@@ -47,8 +52,6 @@ public:
CortexA73,
CortexA75,
CortexA76,
- Cyclone,
- ExynosM1,
ExynosM3,
Falkor,
Kryo,
@@ -476,6 +479,8 @@ public:
bool enableEarlyIfConversion() const override;
+ bool enableAdvancedRASplitCost() const override { return true; }
+
std::unique_ptr<PBQPRAConstraint> getCustomPBQPConstraints() const override;
bool isCallingConvWin64(CallingConv::ID CC) const {
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SystemOperands.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SystemOperands.td
index 05249a4ea6a8..6e82d326e519 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SystemOperands.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SystemOperands.td
@@ -1492,5 +1492,5 @@ def : ROSysReg<"TRBIDR_EL1", 0b11, 0b000, 0b1001, 0b1011, 0b111>;
// Cyclone specific system registers
// Op0 Op1 CRn CRm Op2
-let Requires = [{ {AArch64::ProcCyclone} }] in
+let Requires = [{ {AArch64::ProcAppleA7} }] in
def : RWSysReg<"CPM_IOACC_CTL_EL3", 0b11, 0b111, 0b1111, 0b0010, 0b000>;
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
index b3ed96e815be..115a7da8a6d9 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -31,6 +31,7 @@
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Function.h"
+#include "llvm/InitializePasses.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCTargetOptions.h"
#include "llvm/Pass.h"
@@ -39,6 +40,7 @@
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/Transforms/CFGuard.h"
#include "llvm/Transforms/Scalar.h"
#include <memory>
#include <string>
@@ -152,7 +154,7 @@ static cl::opt<bool>
cl::desc("Enable the AAcrh64 branch target pass"),
cl::init(true));
-extern "C" void LLVMInitializeAArch64Target() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAArch64Target() {
// Register the target.
RegisterTargetMachine<AArch64leTargetMachine> X(getTheAArch64leTarget());
RegisterTargetMachine<AArch64beTargetMachine> Y(getTheAArch64beTarget());
@@ -247,7 +249,10 @@ getEffectiveAArch64CodeModel(const Triple &TT, Optional<CodeModel::Model> CM,
// The default MCJIT memory managers make no guarantees about where they can
// find an executable page; JITed code needs to be able to refer to globals
// no matter how far away they are.
- if (JIT)
+ // We should set the CodeModel::Small for Windows ARM64 in JIT mode,
+ // since with large code model LLVM generating 4 MOV instructions, and
+ // Windows doesn't support relocating these long branch (4 MOVs).
+ if (JIT && !TT.isOSWindows())
return CodeModel::Large;
return CodeModel::Small;
}
@@ -283,6 +288,17 @@ AArch64TargetMachine::AArch64TargetMachine(const Target &T, const Triple &TT,
this->Options.TrapUnreachable = true;
}
+ if (this->Options.TLSSize == 0) // default
+ this->Options.TLSSize = 24;
+ if ((getCodeModel() == CodeModel::Small ||
+ getCodeModel() == CodeModel::Kernel) &&
+ this->Options.TLSSize > 32)
+ // for the small (and kernel) code model, the maximum TLS size is 4GiB
+ this->Options.TLSSize = 32;
+ else if (getCodeModel() == CodeModel::Tiny && this->Options.TLSSize > 24)
+ // for the tiny code model, the maximum TLS size is 1MiB (< 16MiB)
+ this->Options.TLSSize = 24;
+
// Enable GlobalISel at or below EnableGlobalISelAt0, unless this is
// MachO/CodeModel::Large, which GlobalISel does not support.
if (getOptLevel() <= EnableGlobalISelAtO &&
@@ -459,6 +475,10 @@ void AArch64PassConfig::addIRPasses() {
addPass(createAArch64StackTaggingPass(/* MergeInit = */ TM->getOptLevel() !=
CodeGenOpt::None));
+
+ // Add Control Flow Guard checks.
+ if (TM->getTargetTriple().isOSWindows())
+ addPass(createCFGuardCheckPass());
}
// Pass Pipeline Configuration
@@ -563,7 +583,7 @@ void AArch64PassConfig::addPreRegAlloc() {
if (TM->getOptLevel() != CodeGenOpt::None && EnableAdvSIMDScalar) {
addPass(createAArch64AdvSIMDScalar());
// The AdvSIMD pass may produce copies that can be rewritten to
- // be register coaleascer friendly.
+ // be register coalescer friendly.
addPass(&PeepholeOptimizerID);
}
}
@@ -609,13 +629,18 @@ void AArch64PassConfig::addPreEmitPass() {
if (EnableA53Fix835769)
addPass(createAArch64A53Fix835769());
+
+ if (EnableBranchTargets)
+ addPass(createAArch64BranchTargetsPass());
+
// Relax conditional branch instructions if they're otherwise out of
// range of their destination.
if (BranchRelaxation)
addPass(&BranchRelaxationPassID);
- if (EnableBranchTargets)
- addPass(createAArch64BranchTargetsPass());
+ // Identify valid longjmp targets for Windows Control Flow Guard.
+ if (TM->getTargetTriple().isOSWindows())
+ addPass(createCFGuardLongjmpPass());
if (TM->getOptLevel() != CodeGenOpt::None && EnableCompressJumpTables)
addPass(createAArch64CompressJumpTablesPass());
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index dc916a7b3407..4724d6b8daea 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -15,6 +15,7 @@
#include "llvm/CodeGen/CostTable.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/IntrinsicsAArch64.h"
#include "llvm/Support/Debug.h"
#include <algorithm>
using namespace llvm;
@@ -80,8 +81,8 @@ int AArch64TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
return std::max(1, Cost);
}
-int AArch64TTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx,
- const APInt &Imm, Type *Ty) {
+int AArch64TTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
+ const APInt &Imm, Type *Ty) {
assert(Ty->isIntegerTy());
unsigned BitSize = Ty->getPrimitiveSizeInBits();
@@ -146,8 +147,8 @@ int AArch64TTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx,
return AArch64TTIImpl::getIntImmCost(Imm, Ty);
}
-int AArch64TTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
- const APInt &Imm, Type *Ty) {
+int AArch64TTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
+ const APInt &Imm, Type *Ty) {
assert(Ty->isIntegerTy());
unsigned BitSize = Ty->getPrimitiveSizeInBits();
@@ -156,6 +157,12 @@ int AArch64TTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
if (BitSize == 0)
return TTI::TCC_Free;
+ // Most (all?) AArch64 intrinsics do not support folding immediates into the
+ // selected instruction, so we compute the materialization cost for the
+ // immediate directly.
+ if (IID >= Intrinsic::aarch64_addg && IID <= Intrinsic::aarch64_udiv)
+ return AArch64TTIImpl::getIntImmCost(Imm, Ty);
+
switch (IID) {
default:
return TTI::TCC_Free;
@@ -478,7 +485,8 @@ int AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
int AArch64TTIImpl::getArithmeticInstrCost(
unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info,
TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
- TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args) {
+ TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args,
+ const Instruction *CxtI) {
// Legalize the type.
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
@@ -632,12 +640,12 @@ AArch64TTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
}
int AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty,
- unsigned Alignment, unsigned AddressSpace,
+ MaybeAlign Alignment, unsigned AddressSpace,
const Instruction *I) {
auto LT = TLI->getTypeLegalizationCost(DL, Ty);
if (ST->isMisaligned128StoreSlow() && Opcode == Instruction::Store &&
- LT.second.is128BitVector() && Alignment < 16) {
+ LT.second.is128BitVector() && (!Alignment || *Alignment < Align(16))) {
// Unaligned stores are extremely inefficient. We don't split all
// unaligned 128-bit stores because the negative impact that has shown in
// practice on inlined block copy code.
@@ -703,8 +711,8 @@ int AArch64TTIImpl::getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) {
if (!I->isVectorTy())
continue;
if (I->getScalarSizeInBits() * I->getVectorNumElements() == 128)
- Cost += getMemoryOpCost(Instruction::Store, I, 128, 0) +
- getMemoryOpCost(Instruction::Load, I, 128, 0);
+ Cost += getMemoryOpCost(Instruction::Store, I, Align(128), 0) +
+ getMemoryOpCost(Instruction::Load, I, Align(128), 0);
}
return Cost;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
index 32c59f41e1c3..6f4569a49783 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -73,9 +73,10 @@ public:
using BaseT::getIntImmCost;
int getIntImmCost(int64_t Val);
int getIntImmCost(const APInt &Imm, Type *Ty);
- int getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty);
- int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
- Type *Ty);
+ int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm,
+ Type *Ty);
+ int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
+ Type *Ty);
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
/// @}
@@ -124,7 +125,8 @@ public:
TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
- ArrayRef<const Value *> Args = ArrayRef<const Value *>());
+ ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
+ const Instruction *CxtI = nullptr);
int getAddressComputationCost(Type *Ty, ScalarEvolution *SE, const SCEV *Ptr);
@@ -134,7 +136,7 @@ public:
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
bool IsZeroCmp) const;
- int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+ int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
unsigned AddressSpace, const Instruction *I = nullptr);
int getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys);
@@ -147,6 +149,29 @@ public:
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info);
+ bool isLegalMaskedLoadStore(Type *DataType, MaybeAlign Alignment) {
+ if (!isa<VectorType>(DataType) || !ST->hasSVE())
+ return false;
+
+ Type *Ty = DataType->getVectorElementType();
+ if (Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy())
+ return true;
+
+ if (Ty->isIntegerTy(8) || Ty->isIntegerTy(16) ||
+ Ty->isIntegerTy(32) || Ty->isIntegerTy(64))
+ return true;
+
+ return false;
+ }
+
+ bool isLegalMaskedLoad(Type *DataType, MaybeAlign Alignment) {
+ return isLegalMaskedLoadStore(DataType, Alignment);
+ }
+
+ bool isLegalMaskedStore(Type *DataType, MaybeAlign Alignment) {
+ return isLegalMaskedLoadStore(DataType, Alignment);
+ }
+
int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
ArrayRef<unsigned> Indices, unsigned Alignment,
unsigned AddressSpace,
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index 4fb409f020d9..be4c96022472 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -1221,8 +1221,9 @@ public:
// Calculate its FP value.
APFloat RealVal(APFloat::IEEEdouble());
- if (RealVal.convertFromString(Desc->Repr, APFloat::rmTowardZero) !=
- APFloat::opOK)
+ auto StatusOrErr =
+ RealVal.convertFromString(Desc->Repr, APFloat::rmTowardZero);
+ if (errorToBool(StatusOrErr.takeError()) || *StatusOrErr != APFloat::opOK)
llvm_unreachable("FP immediate is not exact");
if (getFPImm().bitwiseIsEqual(RealVal))
@@ -2577,8 +2578,13 @@ AArch64AsmParser::tryParseFPImm(OperandVector &Operands) {
} else {
// Parse FP representation.
APFloat RealVal(APFloat::IEEEdouble());
- auto Status =
+ auto StatusOrErr =
RealVal.convertFromString(Tok.getString(), APFloat::rmTowardZero);
+ if (errorToBool(StatusOrErr.takeError())) {
+ TokError("invalid floating point representation");
+ return MatchOperand_ParseFail;
+ }
+
if (isNegative)
RealVal.changeSign();
@@ -2589,7 +2595,7 @@ AArch64AsmParser::tryParseFPImm(OperandVector &Operands) {
AArch64Operand::CreateToken(".0", false, S, getContext()));
} else
Operands.push_back(AArch64Operand::CreateFPImm(
- RealVal, Status == APFloat::opOK, S, getContext()));
+ RealVal, *StatusOrErr == APFloat::opOK, S, getContext()));
}
Parser.Lex(); // Eat the token.
@@ -5509,7 +5515,7 @@ AArch64AsmParser::classifySymbolRef(const MCExpr *Expr,
}
/// Force static initialization.
-extern "C" void LLVMInitializeAArch64AsmParser() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAArch64AsmParser() {
RegisterMCAsmParser<AArch64AsmParser> X(getTheAArch64leTarget());
RegisterMCAsmParser<AArch64AsmParser> Y(getTheAArch64beTarget());
RegisterMCAsmParser<AArch64AsmParser> Z(getTheARM64Target());
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
index 145ffef6f6f9..d6db88603429 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
@@ -251,7 +251,6 @@ static MCDisassembler *createAArch64Disassembler(const Target &T,
DecodeStatus AArch64Disassembler::getInstruction(MCInst &MI, uint64_t &Size,
ArrayRef<uint8_t> Bytes,
uint64_t Address,
- raw_ostream &OS,
raw_ostream &CS) const {
CommentStream = &CS;
@@ -278,7 +277,7 @@ createAArch64ExternalSymbolizer(const Triple &TT, LLVMOpInfoCallback GetOpInfo,
SymbolLookUp, DisInfo);
}
-extern "C" void LLVMInitializeAArch64Disassembler() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAArch64Disassembler() {
TargetRegistry::RegisterMCDisassembler(getTheAArch64leTarget(),
createAArch64Disassembler);
TargetRegistry::RegisterMCDisassembler(getTheAArch64beTarget(),
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.h b/contrib/llvm-project/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.h
index 2ba5a695701f..374a89edcb74 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.h
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.h
@@ -25,8 +25,7 @@ public:
MCDisassembler::DecodeStatus
getInstruction(MCInst &Instr, uint64_t &Size, ArrayRef<uint8_t> Bytes,
- uint64_t Address, raw_ostream &VStream,
- raw_ostream &CStream) const override;
+ uint64_t Address, raw_ostream &CStream) const override;
};
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
index 21ce5785ea5e..9db746733aa3 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
@@ -150,6 +150,19 @@ static unsigned AdrImmBits(unsigned Value) {
return (hi19 << 5) | (lo2 << 29);
}
+static bool valueFitsIntoFixupKind(unsigned Kind, uint64_t Value) {
+ unsigned NumBits;
+ switch(Kind) {
+ case FK_Data_1: NumBits = 8; break;
+ case FK_Data_2: NumBits = 16; break;
+ case FK_Data_4: NumBits = 32; break;
+ case FK_Data_8: NumBits = 64; break;
+ default: return true;
+ }
+ return isUIntN(NumBits, Value) ||
+ isIntN(NumBits, static_cast<int64_t>(Value));
+}
+
static uint64_t adjustFixupValue(const MCFixup &Fixup, const MCValue &Target,
uint64_t Value, MCContext &Ctx,
const Triple &TheTriple, bool IsResolved) {
@@ -309,11 +322,14 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, const MCValue &Target,
if (Value & 0x3)
Ctx.reportError(Fixup.getLoc(), "fixup not sufficiently aligned");
return (Value >> 2) & 0x3ffffff;
- case FK_NONE:
case FK_Data_1:
case FK_Data_2:
case FK_Data_4:
case FK_Data_8:
+ if (!valueFitsIntoFixupKind(Fixup.getTargetKind(), Value))
+ Ctx.reportError(Fixup.getLoc(), "fixup value too large for data type!");
+ LLVM_FALLTHROUGH;
+ case FK_NONE:
case FK_SecRel_2:
case FK_SecRel_4:
return Value;
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
index 1a16468484ad..469892213ef8 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
@@ -56,9 +56,9 @@ void AArch64InstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
OS << getRegisterName(RegNo);
}
-void AArch64InstPrinter::printInst(const MCInst *MI, raw_ostream &O,
- StringRef Annot,
- const MCSubtargetInfo &STI) {
+void AArch64InstPrinter::printInst(const MCInst *MI, uint64_t Address,
+ StringRef Annot, const MCSubtargetInfo &STI,
+ raw_ostream &O) {
// Check for special encodings and print the canonical alias instead.
unsigned Opcode = MI->getOpcode();
@@ -282,6 +282,12 @@ void AArch64InstPrinter::printInst(const MCInst *MI, raw_ostream &O,
return;
}
+ if (Opcode == AArch64::SPACE) {
+ O << '\t' << MAI.getCommentString() << " SPACE";
+ printAnnotation(O, Annot);
+ return;
+ }
+
// Instruction TSB is specified as a one operand instruction, but 'csync' is
// not encoded, so for printing it is treated as a special case here:
if (Opcode == AArch64::TSB) {
@@ -290,7 +296,7 @@ void AArch64InstPrinter::printInst(const MCInst *MI, raw_ostream &O,
}
if (!printAliasInstr(MI, STI, O))
- printInstruction(MI, STI, O);
+ printInstruction(MI, Address, STI, O);
printAnnotation(O, Annot);
@@ -698,9 +704,10 @@ static const LdStNInstrDesc *getLdStNInstrDesc(unsigned Opcode) {
return nullptr;
}
-void AArch64AppleInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
+void AArch64AppleInstPrinter::printInst(const MCInst *MI, uint64_t Address,
StringRef Annot,
- const MCSubtargetInfo &STI) {
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
unsigned Opcode = MI->getOpcode();
StringRef Layout;
@@ -748,7 +755,7 @@ void AArch64AppleInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
return;
}
- AArch64InstPrinter::printInst(MI, O, Annot, STI);
+ AArch64InstPrinter::printInst(MI, Address, Annot, STI, O);
}
bool AArch64InstPrinter::printSysAlias(const MCInst *MI,
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h
index 5311f73ca21c..993f379b5343 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h
@@ -25,13 +25,13 @@ public:
AArch64InstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
const MCRegisterInfo &MRI);
- void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
- const MCSubtargetInfo &STI) override;
+ void printInst(const MCInst *MI, uint64_t Address, StringRef Annot,
+ const MCSubtargetInfo &STI, raw_ostream &O) override;
void printRegName(raw_ostream &OS, unsigned RegNo) const override;
// Autogenerated by tblgen.
- virtual void printInstruction(const MCInst *MI, const MCSubtargetInfo &STI,
- raw_ostream &O);
+ virtual void printInstruction(const MCInst *MI, uint64_t Address,
+ const MCSubtargetInfo &STI, raw_ostream &O);
virtual bool printAliasInstr(const MCInst *MI, const MCSubtargetInfo &STI,
raw_ostream &O);
virtual void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx,
@@ -197,11 +197,11 @@ public:
AArch64AppleInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
const MCRegisterInfo &MRI);
- void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
- const MCSubtargetInfo &STI) override;
+ void printInst(const MCInst *MI, uint64_t Address, StringRef Annot,
+ const MCSubtargetInfo &STI, raw_ostream &O) override;
- void printInstruction(const MCInst *MI, const MCSubtargetInfo &STI,
- raw_ostream &O) override;
+ void printInstruction(const MCInst *MI, uint64_t Address,
+ const MCSubtargetInfo &STI, raw_ostream &O) override;
bool printAliasInstr(const MCInst *MI, const MCSubtargetInfo &STI,
raw_ostream &O) override;
void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx,
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
index 8cb7a1672983..8f4d9cb94d60 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
@@ -601,8 +601,12 @@ void AArch64MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
MCFixupKind Fixup = MCFixupKind(AArch64::fixup_aarch64_tlsdesc_call);
Fixups.push_back(MCFixup::create(0, MI.getOperand(0).getExpr(), Fixup));
return;
- } else if (MI.getOpcode() == AArch64::CompilerBarrier) {
- // This just prevents the compiler from reordering accesses, no actual code.
+ }
+
+ if (MI.getOpcode() == AArch64::CompilerBarrier ||
+ MI.getOpcode() == AArch64::SPACE) {
+ // CompilerBarrier just prevents the compiler from reordering accesses, and
+ // SPACE just increases basic block size, in both cases no actual code.
return;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
index 1d583ec0087b..7dc3665baabc 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
@@ -238,7 +238,8 @@ static MCRegisterInfo *createAArch64MCRegisterInfo(const Triple &Triple) {
}
static MCAsmInfo *createAArch64MCAsmInfo(const MCRegisterInfo &MRI,
- const Triple &TheTriple) {
+ const Triple &TheTriple,
+ const MCTargetOptions &Options) {
MCAsmInfo *MAI;
if (TheTriple.isOSBinFormatMachO())
MAI = new AArch64MCAsmInfoDarwin(TheTriple.getArch() == Triple::aarch64_32);
@@ -365,7 +366,7 @@ static MCInstrAnalysis *createAArch64InstrAnalysis(const MCInstrInfo *Info) {
}
// Force static initialization.
-extern "C" void LLVMInitializeAArch64TargetMC() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAArch64TargetMC() {
for (Target *T : {&getTheAArch64leTarget(), &getTheAArch64beTarget(),
&getTheAArch64_32Target(), &getTheARM64Target(),
&getTheARM64_32Target()}) {
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/SVEInstrFormats.td b/contrib/llvm-project/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 8ccf6aa675ba..a172b8d7e6b0 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -18,7 +18,7 @@ def SVEPatternOperand : AsmOperandClass {
let DiagnosticType = "InvalidSVEPattern";
}
-def sve_pred_enum : Operand<i32>, ImmLeaf<i32, [{
+def sve_pred_enum : Operand<i32>, TImmLeaf<i32, [{
return (((uint32_t)Imm) < 32);
}]> {
@@ -202,6 +202,19 @@ def addsub_imm8_opt_lsl_i64 : imm8_opt_lsl<64, "uint64_t", SVEAddSubImmOperand64
return AArch64_AM::isSVEAddSubImm<int64_t>(Imm);
}]>;
+def SVEAddSubImm8Pat : ComplexPattern<i32, 2, "SelectSVEAddSubImm<MVT::i8>", []>;
+def SVEAddSubImm16Pat : ComplexPattern<i32, 2, "SelectSVEAddSubImm<MVT::i16>", []>;
+def SVEAddSubImm32Pat : ComplexPattern<i32, 2, "SelectSVEAddSubImm<MVT::i32>", []>;
+def SVEAddSubImm64Pat : ComplexPattern<i32, 2, "SelectSVEAddSubImm<MVT::i64>", []>;
+
+def SVELogicalImm8Pat : ComplexPattern<i64, 1, "SelectSVELogicalImm<MVT::i8>", []>;
+def SVELogicalImm16Pat : ComplexPattern<i64, 1, "SelectSVELogicalImm<MVT::i16>", []>;
+def SVELogicalImm32Pat : ComplexPattern<i64, 1, "SelectSVELogicalImm<MVT::i32>", []>;
+def SVELogicalImm64Pat : ComplexPattern<i64, 1, "SelectSVELogicalImm<MVT::i64>", []>;
+
+def SVEArithUImmPat : ComplexPattern<i32, 1, "SelectSVEArithImm", []>;
+def SVEArithSImmPat : ComplexPattern<i32, 1, "SelectSVESignedArithImm", []>;
+
class SVEExactFPImm<string Suffix, string ValA, string ValB> : AsmOperandClass {
let Name = "SVEExactFPImmOperand" # Suffix;
let DiagnosticType = "Invalid" # Name;
@@ -225,7 +238,7 @@ def sve_fpimm_zero_one
: SVEExactFPImmOperand<"ZeroOne", "AArch64ExactFPImm::zero",
"AArch64ExactFPImm::one">;
-def sve_incdec_imm : Operand<i32>, ImmLeaf<i32, [{
+def sve_incdec_imm : Operand<i32>, TImmLeaf<i32, [{
return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 17);
}]> {
let ParserMatchClass = Imm1_16Operand;
@@ -233,16 +246,21 @@ def sve_incdec_imm : Operand<i32>, ImmLeaf<i32, [{
let DecoderMethod = "DecodeSVEIncDecImm";
}
+// This allows i32 immediate extraction from i64 based arithmetic.
+def sve_cnt_mul_imm : ComplexPattern<i32, 1, "SelectCntImm<1, 16, 1, false>">;
+def sve_cnt_shl_imm : ComplexPattern<i32, 1, "SelectCntImm<1, 16, 1, true>">;
+
//===----------------------------------------------------------------------===//
// SVE PTrue - These are used extensively throughout the pattern matching so
// it's important we define them first.
//===----------------------------------------------------------------------===//
-class sve_int_ptrue<bits<2> sz8_64, bits<3> opc, string asm, PPRRegOp pprty>
+class sve_int_ptrue<bits<2> sz8_64, bits<3> opc, string asm, PPRRegOp pprty,
+ ValueType vt, SDPatternOperator op>
: I<(outs pprty:$Pd), (ins sve_pred_enum:$pattern),
asm, "\t$Pd, $pattern",
"",
- []>, Sched<[]> {
+ [(set (vt pprty:$Pd), (op sve_pred_enum:$pattern))]>, Sched<[]> {
bits<4> Pd;
bits<5> pattern;
let Inst{31-24} = 0b00100101;
@@ -258,11 +276,11 @@ class sve_int_ptrue<bits<2> sz8_64, bits<3> opc, string asm, PPRRegOp pprty>
let Defs = !if(!eq (opc{0}, 1), [NZCV], []);
}
-multiclass sve_int_ptrue<bits<3> opc, string asm> {
- def _B : sve_int_ptrue<0b00, opc, asm, PPR8>;
- def _H : sve_int_ptrue<0b01, opc, asm, PPR16>;
- def _S : sve_int_ptrue<0b10, opc, asm, PPR32>;
- def _D : sve_int_ptrue<0b11, opc, asm, PPR64>;
+multiclass sve_int_ptrue<bits<3> opc, string asm, SDPatternOperator op> {
+ def _B : sve_int_ptrue<0b00, opc, asm, PPR8, nxv16i1, op>;
+ def _H : sve_int_ptrue<0b01, opc, asm, PPR16, nxv8i1, op>;
+ def _S : sve_int_ptrue<0b10, opc, asm, PPR32, nxv4i1, op>;
+ def _D : sve_int_ptrue<0b11, opc, asm, PPR64, nxv2i1, op>;
def : InstAlias<asm # "\t$Pd",
(!cast<Instruction>(NAME # _B) PPR8:$Pd, 0b11111), 1>;
@@ -274,9 +292,12 @@ multiclass sve_int_ptrue<bits<3> opc, string asm> {
(!cast<Instruction>(NAME # _D) PPR64:$Pd, 0b11111), 1>;
}
+def SDT_AArch64PTrue : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>;
+def AArch64ptrue : SDNode<"AArch64ISD::PTRUE", SDT_AArch64PTrue>;
+
let Predicates = [HasSVE] in {
- defm PTRUE : sve_int_ptrue<0b000, "ptrue">;
- defm PTRUES : sve_int_ptrue<0b001, "ptrues">;
+ defm PTRUE : sve_int_ptrue<0b000, "ptrue", AArch64ptrue>;
+ defm PTRUES : sve_int_ptrue<0b001, "ptrues", null_frag>;
}
//===----------------------------------------------------------------------===//
@@ -288,11 +309,75 @@ class SVE_1_Op_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
: Pat<(vtd (op vt1:$Op1)),
(inst $Op1)>;
+class SVE_1_Op_Imm_OptLsl_Reverse_Pat<ValueType vt, SDPatternOperator op, ZPRRegOp zprty,
+ ValueType it, ComplexPattern cpx, Instruction inst>
+ : Pat<(vt (op (vt (AArch64dup (it (cpx i32:$imm, i32:$shift)))), (vt zprty:$Op1))),
+ (inst $Op1, i32:$imm, i32:$shift)>;
+
+class SVE_1_Op_Imm_OptLsl_Pat<ValueType vt, SDPatternOperator op, ZPRRegOp zprty,
+ ValueType it, ComplexPattern cpx, Instruction inst>
+ : Pat<(vt (op (vt zprty:$Op1), (vt (AArch64dup (it (cpx i32:$imm, i32:$shift)))))),
+ (inst $Op1, i32:$imm, i32:$shift)>;
+
+class SVE_1_Op_Imm_Arith_Pat<ValueType vt, SDPatternOperator op, ZPRRegOp zprty,
+ ValueType it, ComplexPattern cpx, Instruction inst>
+ : Pat<(vt (op (vt zprty:$Op1), (vt (AArch64dup (it (cpx i32:$imm)))))),
+ (inst $Op1, i32:$imm)>;
+
+class SVE_1_Op_Imm_Log_Pat<ValueType vt, SDPatternOperator op, ZPRRegOp zprty,
+ ValueType it, ComplexPattern cpx, Instruction inst>
+ : Pat<(vt (op (vt zprty:$Op1), (vt (AArch64dup (it (cpx i64:$imm)))))),
+ (inst $Op1, i64:$imm)>;
+
+class SVE_2_Op_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
+ ValueType vt2, Instruction inst>
+: Pat<(vtd (op vt1:$Op1, vt2:$Op2)),
+ (inst $Op1, $Op2)>;
+
+class SVE_2_Op_Pat_Reduce_To_Neon<ValueType vtd, SDPatternOperator op, ValueType vt1,
+ ValueType vt2, Instruction inst, SubRegIndex sub>
+: Pat<(vtd (op vt1:$Op1, vt2:$Op2)),
+ (INSERT_SUBREG (vtd (IMPLICIT_DEF)), (inst $Op1, $Op2), sub)>;
+
class SVE_3_Op_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
ValueType vt2, ValueType vt3, Instruction inst>
: Pat<(vtd (op vt1:$Op1, vt2:$Op2, vt3:$Op3)),
(inst $Op1, $Op2, $Op3)>;
+class SVE_4_Op_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
+ ValueType vt2, ValueType vt3, ValueType vt4,
+ Instruction inst>
+: Pat<(vtd (op vt1:$Op1, vt2:$Op2, vt3:$Op3, vt4:$Op4)),
+ (inst $Op1, $Op2, $Op3, $Op4)>;
+
+class SVE_2_Op_Imm_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
+ ValueType vt2, Operand ImmTy, Instruction inst>
+: Pat<(vtd (op vt1:$Op1, (vt2 ImmTy:$Op2))),
+ (inst $Op1, ImmTy:$Op2)>;
+
+class SVE_3_Op_Imm_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
+ ValueType vt2, ValueType vt3, Operand ImmTy,
+ Instruction inst>
+: Pat<(vtd (op vt1:$Op1, vt2:$Op2, (vt3 ImmTy:$Op3))),
+ (inst $Op1, $Op2, ImmTy:$Op3)>;
+
+class SVE_4_Op_Imm_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
+ ValueType vt2, ValueType vt3, ValueType vt4,
+ Operand ImmTy, Instruction inst>
+: Pat<(vtd (op vt1:$Op1, vt2:$Op2, vt3:$Op3, (vt4 ImmTy:$Op4))),
+ (inst $Op1, $Op2, $Op3, ImmTy:$Op4)>;
+
+def SVEDup0Undef : ComplexPattern<i64, 0, "SelectDupZeroOrUndef", []>;
+
+//
+// Common but less generic patterns.
+//
+
+class SVE_1_Op_AllActive_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
+ Instruction inst, Instruction ptrue>
+: Pat<(vtd (op vt1:$Op1)),
+ (inst (IMPLICIT_DEF), (ptrue 31), $Op1)>;
+
//===----------------------------------------------------------------------===//
// SVE Predicate Misc Group
//===----------------------------------------------------------------------===//
@@ -355,15 +440,22 @@ class sve_int_pfirst_next<bits<2> sz8_64, bits<5> opc, string asm,
let Defs = [NZCV];
}
-multiclass sve_int_pfirst<bits<5> opc, string asm> {
- def : sve_int_pfirst_next<0b01, opc, asm, PPR8>;
+multiclass sve_int_pfirst<bits<5> opc, string asm, SDPatternOperator op> {
+ def _B : sve_int_pfirst_next<0b01, opc, asm, PPR8>;
+
+ def : SVE_2_Op_Pat<nxv16i1, op, nxv16i1, nxv16i1, !cast<Instruction>(NAME # _B)>;
}
-multiclass sve_int_pnext<bits<5> opc, string asm> {
+multiclass sve_int_pnext<bits<5> opc, string asm, SDPatternOperator op> {
def _B : sve_int_pfirst_next<0b00, opc, asm, PPR8>;
def _H : sve_int_pfirst_next<0b01, opc, asm, PPR16>;
def _S : sve_int_pfirst_next<0b10, opc, asm, PPR32>;
def _D : sve_int_pfirst_next<0b11, opc, asm, PPR64>;
+
+ def : SVE_2_Op_Pat<nxv16i1, op, nxv16i1, nxv16i1, !cast<Instruction>(NAME # _B)>;
+ def : SVE_2_Op_Pat<nxv8i1, op, nxv8i1, nxv8i1, !cast<Instruction>(NAME # _H)>;
+ def : SVE_2_Op_Pat<nxv4i1, op, nxv4i1, nxv4i1, !cast<Instruction>(NAME # _S)>;
+ def : SVE_2_Op_Pat<nxv2i1, op, nxv2i1, nxv2i1, !cast<Instruction>(NAME # _D)>;
}
//===----------------------------------------------------------------------===//
@@ -394,25 +486,66 @@ class sve_int_count_r<bits<2> sz8_64, bits<5> opc, string asm,
let Constraints = "$Rdn = $_Rdn";
}
-multiclass sve_int_count_r_s32<bits<5> opc, string asm> {
+multiclass sve_int_count_r_s32<bits<5> opc, string asm,
+ SDPatternOperator op> {
def _B : sve_int_count_r<0b00, opc, asm, GPR64z, PPR8, GPR64as32>;
def _H : sve_int_count_r<0b01, opc, asm, GPR64z, PPR16, GPR64as32>;
def _S : sve_int_count_r<0b10, opc, asm, GPR64z, PPR32, GPR64as32>;
def _D : sve_int_count_r<0b11, opc, asm, GPR64z, PPR64, GPR64as32>;
+
+ def : Pat<(i32 (op GPR32:$Rn, (nxv16i1 PPRAny:$Pg))),
+ (EXTRACT_SUBREG (!cast<Instruction>(NAME # _B) PPRAny:$Pg, (INSERT_SUBREG (IMPLICIT_DEF), $Rn, sub_32)), sub_32)>;
+ def : Pat<(i64 (sext (i32 (op GPR32:$Rn, (nxv16i1 PPRAny:$Pg))))),
+ (!cast<Instruction>(NAME # _B) PPRAny:$Pg, (INSERT_SUBREG (IMPLICIT_DEF), $Rn, sub_32))>;
+
+ def : Pat<(i32 (op GPR32:$Rn, (nxv8i1 PPRAny:$Pg))),
+ (EXTRACT_SUBREG (!cast<Instruction>(NAME # _H) PPRAny:$Pg, (INSERT_SUBREG (IMPLICIT_DEF), $Rn, sub_32)), sub_32)>;
+ def : Pat<(i64 (sext (i32 (op GPR32:$Rn, (nxv8i1 PPRAny:$Pg))))),
+ (!cast<Instruction>(NAME # _H) PPRAny:$Pg, (INSERT_SUBREG (IMPLICIT_DEF), $Rn, sub_32))>;
+
+ def : Pat<(i32 (op GPR32:$Rn, (nxv4i1 PPRAny:$Pg))),
+ (EXTRACT_SUBREG (!cast<Instruction>(NAME # _S) PPRAny:$Pg, (INSERT_SUBREG (IMPLICIT_DEF), $Rn, sub_32)), sub_32)>;
+ def : Pat<(i64 (sext (i32 (op GPR32:$Rn, (nxv4i1 PPRAny:$Pg))))),
+ (!cast<Instruction>(NAME # _S) PPRAny:$Pg, (INSERT_SUBREG (IMPLICIT_DEF), $Rn, sub_32))>;
+
+ def : Pat<(i32 (op GPR32:$Rn, (nxv2i1 PPRAny:$Pg))),
+ (EXTRACT_SUBREG (!cast<Instruction>(NAME # _D) PPRAny:$Pg, (INSERT_SUBREG (IMPLICIT_DEF), $Rn, sub_32)), sub_32)>;
+ def : Pat<(i64 (sext (i32 (op GPR32:$Rn, (nxv2i1 PPRAny:$Pg))))),
+ (!cast<Instruction>(NAME # _D) PPRAny:$Pg, (INSERT_SUBREG (IMPLICIT_DEF), $Rn, sub_32))>;
}
-multiclass sve_int_count_r_u32<bits<5> opc, string asm> {
+multiclass sve_int_count_r_u32<bits<5> opc, string asm,
+ SDPatternOperator op> {
def _B : sve_int_count_r<0b00, opc, asm, GPR32z, PPR8, GPR32z>;
def _H : sve_int_count_r<0b01, opc, asm, GPR32z, PPR16, GPR32z>;
def _S : sve_int_count_r<0b10, opc, asm, GPR32z, PPR32, GPR32z>;
def _D : sve_int_count_r<0b11, opc, asm, GPR32z, PPR64, GPR32z>;
+
+ def : Pat<(i32 (op GPR32:$Rn, (nxv16i1 PPRAny:$Pg))),
+ (!cast<Instruction>(NAME # _B) PPRAny:$Pg, $Rn)>;
+ def : Pat<(i32 (op GPR32:$Rn, (nxv8i1 PPRAny:$Pg))),
+ (!cast<Instruction>(NAME # _H) PPRAny:$Pg, $Rn)>;
+ def : Pat<(i32 (op GPR32:$Rn, (nxv4i1 PPRAny:$Pg))),
+ (!cast<Instruction>(NAME # _S) PPRAny:$Pg, $Rn)>;
+ def : Pat<(i32 (op GPR32:$Rn, (nxv2i1 PPRAny:$Pg))),
+ (!cast<Instruction>(NAME # _D) PPRAny:$Pg, $Rn)>;
}
-multiclass sve_int_count_r_x64<bits<5> opc, string asm> {
+multiclass sve_int_count_r_x64<bits<5> opc, string asm,
+ SDPatternOperator op = null_frag> {
def _B : sve_int_count_r<0b00, opc, asm, GPR64z, PPR8, GPR64z>;
def _H : sve_int_count_r<0b01, opc, asm, GPR64z, PPR16, GPR64z>;
def _S : sve_int_count_r<0b10, opc, asm, GPR64z, PPR32, GPR64z>;
def _D : sve_int_count_r<0b11, opc, asm, GPR64z, PPR64, GPR64z>;
+
+ def : Pat<(i64 (op GPR64:$Rn, (nxv16i1 PPRAny:$Pg))),
+ (!cast<Instruction>(NAME # _B) PPRAny:$Pg, $Rn)>;
+ def : Pat<(i64 (op GPR64:$Rn, (nxv8i1 PPRAny:$Pg))),
+ (!cast<Instruction>(NAME # _H) PPRAny:$Pg, $Rn)>;
+ def : Pat<(i64 (op GPR64:$Rn, (nxv4i1 PPRAny:$Pg))),
+ (!cast<Instruction>(NAME # _S) PPRAny:$Pg, $Rn)>;
+ def : Pat<(i64 (op GPR64:$Rn, (nxv2i1 PPRAny:$Pg))),
+ (!cast<Instruction>(NAME # _D) PPRAny:$Pg, $Rn)>;
}
class sve_int_count_v<bits<2> sz8_64, bits<5> opc, string asm,
@@ -437,11 +570,16 @@ class sve_int_count_v<bits<2> sz8_64, bits<5> opc, string asm,
let ElementSize = ElementSizeNone;
}
-multiclass sve_int_count_v<bits<5> opc, string asm> {
+multiclass sve_int_count_v<bits<5> opc, string asm,
+ SDPatternOperator op = null_frag> {
def _H : sve_int_count_v<0b01, opc, asm, ZPR16, PPR16>;
def _S : sve_int_count_v<0b10, opc, asm, ZPR32, PPR32>;
def _D : sve_int_count_v<0b11, opc, asm, ZPR64, PPR64>;
+ def : SVE_2_Op_Pat<nxv8i16, op, nxv8i16, nxv8i1, !cast<Instruction>(NAME # _H)>;
+ def : SVE_2_Op_Pat<nxv4i32, op, nxv4i32, nxv4i1, !cast<Instruction>(NAME # _S)>;
+ def : SVE_2_Op_Pat<nxv2i64, op, nxv2i64, nxv2i1, !cast<Instruction>(NAME # _D)>;
+
def : InstAlias<asm # "\t$Zdn, $Pm",
(!cast<Instruction>(NAME # "_H") ZPR16:$Zdn, PPRAny:$Pm), 0>;
def : InstAlias<asm # "\t$Zdn, $Pm",
@@ -470,11 +608,17 @@ class sve_int_pcount_pred<bits<2> sz8_64, bits<4> opc, string asm,
let Inst{4-0} = Rd;
}
-multiclass sve_int_pcount_pred<bits<4> opc, string asm> {
+multiclass sve_int_pcount_pred<bits<4> opc, string asm,
+ SDPatternOperator int_op> {
def _B : sve_int_pcount_pred<0b00, opc, asm, PPR8>;
def _H : sve_int_pcount_pred<0b01, opc, asm, PPR16>;
def _S : sve_int_pcount_pred<0b10, opc, asm, PPR32>;
def _D : sve_int_pcount_pred<0b11, opc, asm, PPR64>;
+
+ def : SVE_2_Op_Pat<i64, int_op, nxv16i1, nxv16i1, !cast<Instruction>(NAME # _B)>;
+ def : SVE_2_Op_Pat<i64, int_op, nxv8i1, nxv8i1, !cast<Instruction>(NAME # _H)>;
+ def : SVE_2_Op_Pat<i64, int_op, nxv4i1, nxv4i1, !cast<Instruction>(NAME # _S)>;
+ def : SVE_2_Op_Pat<i64, int_op, nxv2i1, nxv2i1, !cast<Instruction>(NAME # _D)>;
}
//===----------------------------------------------------------------------===//
@@ -499,13 +643,22 @@ class sve_int_count<bits<3> opc, string asm>
let Inst{4-0} = Rd;
}
-multiclass sve_int_count<bits<3> opc, string asm> {
+multiclass sve_int_count<bits<3> opc, string asm, SDPatternOperator op> {
def NAME : sve_int_count<opc, asm>;
def : InstAlias<asm # "\t$Rd, $pattern",
(!cast<Instruction>(NAME) GPR64:$Rd, sve_pred_enum:$pattern, 1), 1>;
def : InstAlias<asm # "\t$Rd",
(!cast<Instruction>(NAME) GPR64:$Rd, 0b11111, 1), 2>;
+
+ def : Pat<(i64 (mul (op sve_pred_enum:$pattern), (sve_cnt_mul_imm i32:$imm))),
+ (!cast<Instruction>(NAME) sve_pred_enum:$pattern, sve_incdec_imm:$imm)>;
+
+ def : Pat<(i64 (shl (op sve_pred_enum:$pattern), (i64 (sve_cnt_shl_imm i32:$imm)))),
+ (!cast<Instruction>(NAME) sve_pred_enum:$pattern, sve_incdec_imm:$imm)>;
+
+ def : Pat<(i64 (op sve_pred_enum:$pattern)),
+ (!cast<Instruction>(NAME) sve_pred_enum:$pattern, 1)>;
}
class sve_int_countvlv<bits<5> opc, string asm, ZPRRegOp zprty>
@@ -531,13 +684,18 @@ class sve_int_countvlv<bits<5> opc, string asm, ZPRRegOp zprty>
let ElementSize = ElementSizeNone;
}
-multiclass sve_int_countvlv<bits<5> opc, string asm, ZPRRegOp zprty> {
+multiclass sve_int_countvlv<bits<5> opc, string asm, ZPRRegOp zprty,
+ SDPatternOperator op = null_frag,
+ ValueType vt = OtherVT> {
def NAME : sve_int_countvlv<opc, asm, zprty>;
def : InstAlias<asm # "\t$Zdn, $pattern",
(!cast<Instruction>(NAME) zprty:$Zdn, sve_pred_enum:$pattern, 1), 1>;
def : InstAlias<asm # "\t$Zdn",
(!cast<Instruction>(NAME) zprty:$Zdn, 0b11111, 1), 2>;
+
+ def : Pat<(vt (op (vt zprty:$Zn), (sve_pred_enum:$pattern), (sve_incdec_imm:$imm4))),
+ (!cast<Instruction>(NAME) $Zn, sve_pred_enum:$pattern, sve_incdec_imm:$imm4)>;
}
class sve_int_pred_pattern_a<bits<3> opc, string asm>
@@ -596,31 +754,48 @@ class sve_int_pred_pattern_b<bits<5> opc, string asm, RegisterOperand dt,
let Constraints = "$Rdn = $_Rdn";
}
-multiclass sve_int_pred_pattern_b_s32<bits<5> opc, string asm> {
+multiclass sve_int_pred_pattern_b_s32<bits<5> opc, string asm,
+ SDPatternOperator op> {
def NAME : sve_int_pred_pattern_b<opc, asm, GPR64z, GPR64as32>;
def : InstAlias<asm # "\t$Rd, $Rn, $pattern",
(!cast<Instruction>(NAME) GPR64z:$Rd, GPR64as32:$Rn, sve_pred_enum:$pattern, 1), 1>;
def : InstAlias<asm # "\t$Rd, $Rn",
(!cast<Instruction>(NAME) GPR64z:$Rd, GPR64as32:$Rn, 0b11111, 1), 2>;
+
+ // NOTE: Register allocation doesn't like tied operands of differing register
+ // class, hence the extra INSERT_SUBREG complication.
+
+ def : Pat<(i32 (op GPR32:$Rn, (sve_pred_enum:$pattern), (sve_incdec_imm:$imm4))),
+ (EXTRACT_SUBREG (!cast<Instruction>(NAME) (INSERT_SUBREG (IMPLICIT_DEF), $Rn, sub_32), sve_pred_enum:$pattern, sve_incdec_imm:$imm4), sub_32)>;
+ def : Pat<(i64 (sext (i32 (op GPR32:$Rn, (sve_pred_enum:$pattern), (sve_incdec_imm:$imm4))))),
+ (!cast<Instruction>(NAME) (INSERT_SUBREG (IMPLICIT_DEF), $Rn, sub_32), sve_pred_enum:$pattern, sve_incdec_imm:$imm4)>;
}
-multiclass sve_int_pred_pattern_b_u32<bits<5> opc, string asm> {
+multiclass sve_int_pred_pattern_b_u32<bits<5> opc, string asm,
+ SDPatternOperator op> {
def NAME : sve_int_pred_pattern_b<opc, asm, GPR32z, GPR32z>;
def : InstAlias<asm # "\t$Rdn, $pattern",
(!cast<Instruction>(NAME) GPR32z:$Rdn, sve_pred_enum:$pattern, 1), 1>;
def : InstAlias<asm # "\t$Rdn",
(!cast<Instruction>(NAME) GPR32z:$Rdn, 0b11111, 1), 2>;
+
+ def : Pat<(i32 (op GPR32:$Rn, (sve_pred_enum:$pattern), (sve_incdec_imm:$imm4))),
+ (!cast<Instruction>(NAME) $Rn, sve_pred_enum:$pattern, sve_incdec_imm:$imm4)>;
}
-multiclass sve_int_pred_pattern_b_x64<bits<5> opc, string asm> {
+multiclass sve_int_pred_pattern_b_x64<bits<5> opc, string asm,
+ SDPatternOperator op> {
def NAME : sve_int_pred_pattern_b<opc, asm, GPR64z, GPR64z>;
def : InstAlias<asm # "\t$Rdn, $pattern",
(!cast<Instruction>(NAME) GPR64z:$Rdn, sve_pred_enum:$pattern, 1), 1>;
def : InstAlias<asm # "\t$Rdn",
(!cast<Instruction>(NAME) GPR64z:$Rdn, 0b11111, 1), 2>;
+
+ def : Pat<(i64 (op GPR64:$Rn, (sve_pred_enum:$pattern), (sve_incdec_imm:$imm4))),
+ (!cast<Instruction>(NAME) $Rn, sve_pred_enum:$pattern, sve_incdec_imm:$imm4)>;
}
@@ -741,7 +916,7 @@ class sve_int_perm_tbl<bits<2> sz8_64, bits<2> opc, string asm,
let Inst{4-0} = Zd;
}
-multiclass sve_int_perm_tbl<string asm> {
+multiclass sve_int_perm_tbl<string asm, SDPatternOperator op> {
def _B : sve_int_perm_tbl<0b00, 0b10, asm, ZPR8, Z_b>;
def _H : sve_int_perm_tbl<0b01, 0b10, asm, ZPR16, Z_h>;
def _S : sve_int_perm_tbl<0b10, 0b10, asm, ZPR32, Z_s>;
@@ -755,6 +930,15 @@ multiclass sve_int_perm_tbl<string asm> {
(!cast<Instruction>(NAME # _S) ZPR32:$Zd, ZPR32:$Zn, ZPR32:$Zm), 0>;
def : InstAlias<asm # "\t$Zd, $Zn, $Zm",
(!cast<Instruction>(NAME # _D) ZPR64:$Zd, ZPR64:$Zn, ZPR64:$Zm), 0>;
+
+ def : SVE_2_Op_Pat<nxv16i8, op, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_2_Op_Pat<nxv8i16, op, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_2_Op_Pat<nxv4i32, op, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_2_Op_Pat<nxv2i64, op, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>;
+
+ def : SVE_2_Op_Pat<nxv8f16, op, nxv8f16, nxv8i16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_2_Op_Pat<nxv4f32, op, nxv4f32, nxv4i32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_2_Op_Pat<nxv2f64, op, nxv2f64, nxv2i64, !cast<Instruction>(NAME # _D)>;
}
multiclass sve2_int_perm_tbl<string asm> {
@@ -804,11 +988,20 @@ class sve_int_perm_reverse_z<bits<2> sz8_64, string asm, ZPRRegOp zprty>
let Inst{4-0} = Zd;
}
-multiclass sve_int_perm_reverse_z<string asm> {
+multiclass sve_int_perm_reverse_z<string asm, SDPatternOperator op> {
def _B : sve_int_perm_reverse_z<0b00, asm, ZPR8>;
def _H : sve_int_perm_reverse_z<0b01, asm, ZPR16>;
def _S : sve_int_perm_reverse_z<0b10, asm, ZPR32>;
def _D : sve_int_perm_reverse_z<0b11, asm, ZPR64>;
+
+ def : SVE_1_Op_Pat<nxv16i8, op, nxv16i8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_1_Op_Pat<nxv8i16, op, nxv8i16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_1_Op_Pat<nxv4i32, op, nxv4i32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_1_Op_Pat<nxv2i64, op, nxv2i64, !cast<Instruction>(NAME # _D)>;
+
+ def : SVE_1_Op_Pat<nxv8f16, op, nxv8f16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_1_Op_Pat<nxv4f32, op, nxv4f32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_1_Op_Pat<nxv2f64, op, nxv2f64, !cast<Instruction>(NAME # _D)>;
}
class sve_int_perm_reverse_p<bits<2> sz8_64, string asm, PPRRegOp pprty>
@@ -826,11 +1019,16 @@ class sve_int_perm_reverse_p<bits<2> sz8_64, string asm, PPRRegOp pprty>
let Inst{3-0} = Pd;
}
-multiclass sve_int_perm_reverse_p<string asm> {
+multiclass sve_int_perm_reverse_p<string asm, SDPatternOperator op> {
def _B : sve_int_perm_reverse_p<0b00, asm, PPR8>;
def _H : sve_int_perm_reverse_p<0b01, asm, PPR16>;
def _S : sve_int_perm_reverse_p<0b10, asm, PPR32>;
def _D : sve_int_perm_reverse_p<0b11, asm, PPR64>;
+
+ def : SVE_1_Op_Pat<nxv16i1, op, nxv16i1, !cast<Instruction>(NAME # _B)>;
+ def : SVE_1_Op_Pat<nxv8i1, op, nxv8i1, !cast<Instruction>(NAME # _H)>;
+ def : SVE_1_Op_Pat<nxv4i1, op, nxv4i1, !cast<Instruction>(NAME # _S)>;
+ def : SVE_1_Op_Pat<nxv2i1, op, nxv2i1, !cast<Instruction>(NAME # _D)>;
}
class sve_int_perm_unpk<bits<2> sz16_64, bits<2> opc, string asm,
@@ -875,14 +1073,18 @@ class sve_int_perm_insrs<bits<2> sz8_64, string asm, ZPRRegOp zprty,
let Constraints = "$Zdn = $_Zdn";
let DestructiveInstType = Destructive;
- let ElementSize = ElementSizeNone;
}
-multiclass sve_int_perm_insrs<string asm> {
+multiclass sve_int_perm_insrs<string asm, SDPatternOperator op> {
def _B : sve_int_perm_insrs<0b00, asm, ZPR8, GPR32>;
def _H : sve_int_perm_insrs<0b01, asm, ZPR16, GPR32>;
def _S : sve_int_perm_insrs<0b10, asm, ZPR32, GPR32>;
def _D : sve_int_perm_insrs<0b11, asm, ZPR64, GPR64>;
+
+ def : SVE_2_Op_Pat<nxv16i8, op, nxv16i8, i32, !cast<Instruction>(NAME # _B)>;
+ def : SVE_2_Op_Pat<nxv8i16, op, nxv8i16, i32, !cast<Instruction>(NAME # _H)>;
+ def : SVE_2_Op_Pat<nxv4i32, op, nxv4i32, i32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_2_Op_Pat<nxv2i64, op, nxv2i64, i64, !cast<Instruction>(NAME # _D)>;
}
class sve_int_perm_insrv<bits<2> sz8_64, string asm, ZPRRegOp zprty,
@@ -901,14 +1103,17 @@ class sve_int_perm_insrv<bits<2> sz8_64, string asm, ZPRRegOp zprty,
let Constraints = "$Zdn = $_Zdn";
let DestructiveInstType = Destructive;
- let ElementSize = ElementSizeNone;
}
-multiclass sve_int_perm_insrv<string asm> {
+multiclass sve_int_perm_insrv<string asm, SDPatternOperator op> {
def _B : sve_int_perm_insrv<0b00, asm, ZPR8, FPR8>;
def _H : sve_int_perm_insrv<0b01, asm, ZPR16, FPR16>;
def _S : sve_int_perm_insrv<0b10, asm, ZPR32, FPR32>;
def _D : sve_int_perm_insrv<0b11, asm, ZPR64, FPR64>;
+
+ def : SVE_2_Op_Pat<nxv8f16, op, nxv8f16, f16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_2_Op_Pat<nxv4f32, op, nxv4f32, f32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_2_Op_Pat<nxv2f64, op, nxv2f64, f64, !cast<Instruction>(NAME # _D)>;
}
//===----------------------------------------------------------------------===//
@@ -934,6 +1139,13 @@ class sve_int_perm_extract_i<string asm>
let ElementSize = ElementSizeNone;
}
+multiclass sve_int_perm_extract_i<string asm, SDPatternOperator op> {
+ def NAME : sve_int_perm_extract_i<asm>;
+
+ def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i8, nxv16i8, i32, imm0_255,
+ !cast<Instruction>(NAME)>;
+}
+
class sve2_int_perm_extract_i_cons<string asm>
: I<(outs ZPR8:$Zd), (ins ZZ_b:$Zn, imm0_255:$imm8),
asm, "\t$Zd, $Zn, $imm8",
@@ -972,12 +1184,22 @@ class sve_int_sel_vvv<bits<2> sz8_64, string asm, ZPRRegOp zprty>
let Inst{4-0} = Zd;
}
-multiclass sve_int_sel_vvv<string asm> {
+multiclass sve_int_sel_vvv<string asm, SDPatternOperator op> {
def _B : sve_int_sel_vvv<0b00, asm, ZPR8>;
def _H : sve_int_sel_vvv<0b01, asm, ZPR16>;
def _S : sve_int_sel_vvv<0b10, asm, ZPR32>;
def _D : sve_int_sel_vvv<0b11, asm, ZPR64>;
+ def : SVE_3_Op_Pat<nxv16i8, op, nxv16i1, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_3_Op_Pat<nxv8i16, op, nxv8i1, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Pat<nxv4i32, op, nxv4i1, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Pat<nxv2i64, op, nxv2i1, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>;
+
+ def : SVE_3_Op_Pat<nxv8f16, op, nxv8i1, nxv8f16, nxv8f16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Pat<nxv4f32, op, nxv4i1, nxv4f32, nxv4f32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Pat<nxv2f32, op, nxv2i1, nxv2f32, nxv2f32, !cast<Instruction>(NAME # _D)>;
+ def : SVE_3_Op_Pat<nxv2f64, op, nxv2i1, nxv2f64, nxv2f64, !cast<Instruction>(NAME # _D)>;
+
def : InstAlias<"mov $Zd, $Pg/m, $Zn",
(!cast<Instruction>(NAME # _B) ZPR8:$Zd, PPRAny:$Pg, ZPR8:$Zn, ZPR8:$Zd), 1>;
def : InstAlias<"mov $Zd, $Pg/m, $Zn",
@@ -1019,6 +1241,16 @@ class sve_int_pred_log<bits<4> opc, string asm>
!strconcat(asm, "\t$Pd, $Pg/z, $Pn, $Pm"));
let Defs = !if(!eq (opc{2}, 1), [NZCV], []);
+
+}
+
+multiclass sve_int_pred_log<bits<4> opc, string asm, SDPatternOperator op> {
+ def NAME : sve_int_pred_log<opc, asm>;
+
+ def : SVE_3_Op_Pat<nxv16i1, op, nxv16i1, nxv16i1, nxv16i1, !cast<Instruction>(NAME)>;
+ def : SVE_3_Op_Pat<nxv8i1, op, nxv8i1, nxv8i1, nxv8i1, !cast<Instruction>(NAME)>;
+ def : SVE_3_Op_Pat<nxv4i1, op, nxv4i1, nxv4i1, nxv4i1, !cast<Instruction>(NAME)>;
+ def : SVE_3_Op_Pat<nxv2i1, op, nxv2i1, nxv2i1, nxv2i1, !cast<Instruction>(NAME)>;
}
@@ -1044,9 +1276,14 @@ class sve_int_log_imm<bits<2> opc, string asm>
let ElementSize = ElementSizeNone;
}
-multiclass sve_int_log_imm<bits<2> opc, string asm, string alias> {
+multiclass sve_int_log_imm<bits<2> opc, string asm, string alias, SDPatternOperator op> {
def NAME : sve_int_log_imm<opc, asm>;
+ def : SVE_1_Op_Imm_Log_Pat<nxv16i8, op, ZPR8, i32, SVELogicalImm8Pat, !cast<Instruction>(NAME)>;
+ def : SVE_1_Op_Imm_Log_Pat<nxv8i16, op, ZPR16, i32, SVELogicalImm16Pat, !cast<Instruction>(NAME)>;
+ def : SVE_1_Op_Imm_Log_Pat<nxv4i32, op, ZPR32, i32, SVELogicalImm32Pat, !cast<Instruction>(NAME)>;
+ def : SVE_1_Op_Imm_Log_Pat<nxv2i64, op, ZPR64, i64, SVELogicalImm64Pat, !cast<Instruction>(NAME)>;
+
def : InstAlias<asm # "\t$Zdn, $Zdn, $imm",
(!cast<Instruction>(NAME) ZPR8:$Zdn, sve_logical_imm8:$imm), 4>;
def : InstAlias<asm # "\t$Zdn, $Zdn, $imm",
@@ -1120,11 +1357,16 @@ class sve_int_bin_cons_arit_0<bits<2> sz8_64, bits<3> opc, string asm,
let Inst{4-0} = Zd;
}
-multiclass sve_int_bin_cons_arit_0<bits<3> opc, string asm> {
+multiclass sve_int_bin_cons_arit_0<bits<3> opc, string asm, SDPatternOperator op> {
def _B : sve_int_bin_cons_arit_0<0b00, opc, asm, ZPR8>;
def _H : sve_int_bin_cons_arit_0<0b01, opc, asm, ZPR16>;
def _S : sve_int_bin_cons_arit_0<0b10, opc, asm, ZPR32>;
def _D : sve_int_bin_cons_arit_0<0b11, opc, asm, ZPR64>;
+
+ def : SVE_2_Op_Pat<nxv16i8, op, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_2_Op_Pat<nxv8i16, op, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_2_Op_Pat<nxv4i32, op, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_2_Op_Pat<nxv2i64, op, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>;
}
//===----------------------------------------------------------------------===//
@@ -1185,14 +1427,30 @@ class sve_fp_2op_p_zds<bits<2> sz, bits<4> opc, string asm,
let ElementSize = zprty.ElementSize;
}
-multiclass sve_fp_2op_p_zds<bits<4> opc, string asm> {
+multiclass sve_fp_2op_p_zds<bits<4> opc, string asm,
+ SDPatternOperator op> {
+ def _H : sve_fp_2op_p_zds<0b01, opc, asm, ZPR16>;
+ def _S : sve_fp_2op_p_zds<0b10, opc, asm, ZPR32>;
+ def _D : sve_fp_2op_p_zds<0b11, opc, asm, ZPR64>;
+
+ def : SVE_3_Op_Pat<nxv8f16, op, nxv8i1, nxv8f16, nxv8f16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Pat<nxv4f32, op, nxv4i1, nxv4f32, nxv4f32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Pat<nxv2f64, op, nxv2i1, nxv2f64, nxv2f64, !cast<Instruction>(NAME # _D)>;
+}
+
+multiclass sve_fp_2op_p_zds_fscale<bits<4> opc, string asm,
+ SDPatternOperator op> {
def _H : sve_fp_2op_p_zds<0b01, opc, asm, ZPR16>;
def _S : sve_fp_2op_p_zds<0b10, opc, asm, ZPR32>;
def _D : sve_fp_2op_p_zds<0b11, opc, asm, ZPR64>;
+
+ def : SVE_3_Op_Pat<nxv8f16, op, nxv8i1, nxv8f16, nxv8i16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Pat<nxv4f32, op, nxv4i1, nxv4f32, nxv4i32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Pat<nxv2f64, op, nxv2i1, nxv2f64, nxv2i64, !cast<Instruction>(NAME # _D)>;
}
class sve_fp_ftmad<bits<2> sz, string asm, ZPRRegOp zprty>
-: I<(outs zprty:$Zdn), (ins zprty:$_Zdn, zprty:$Zm, imm0_7:$imm3),
+: I<(outs zprty:$Zdn), (ins zprty:$_Zdn, zprty:$Zm, imm32_0_7:$imm3),
asm, "\t$Zdn, $_Zdn, $Zm, $imm3",
"",
[]>, Sched<[]> {
@@ -1212,10 +1470,17 @@ class sve_fp_ftmad<bits<2> sz, string asm, ZPRRegOp zprty>
let ElementSize = ElementSizeNone;
}
-multiclass sve_fp_ftmad<string asm> {
+multiclass sve_fp_ftmad<string asm, SDPatternOperator op> {
def _H : sve_fp_ftmad<0b01, asm, ZPR16>;
def _S : sve_fp_ftmad<0b10, asm, ZPR32>;
def _D : sve_fp_ftmad<0b11, asm, ZPR64>;
+
+ def : Pat<(nxv8f16 (op (nxv8f16 ZPR16:$Zn), (nxv8f16 ZPR16:$Zm), (i32 imm32_0_7:$imm))),
+ (!cast<Instruction>(NAME # _H) ZPR16:$Zn, ZPR16:$Zm, imm32_0_7:$imm)>;
+ def : Pat<(nxv4f32 (op (nxv4f32 ZPR32:$Zn), (nxv4f32 ZPR32:$Zm), (i32 imm32_0_7:$imm))),
+ (!cast<Instruction>(NAME # _S) ZPR32:$Zn, ZPR32:$Zm, imm32_0_7:$imm)>;
+ def : Pat<(nxv2f64 (op (nxv2f64 ZPR64:$Zn), (nxv2f64 ZPR64:$Zm), (i32 imm32_0_7:$imm))),
+ (!cast<Instruction>(NAME # _D) ZPR64:$Zn, ZPR64:$Zm, imm32_0_7:$imm)>;
}
@@ -1223,13 +1488,11 @@ multiclass sve_fp_ftmad<string asm> {
// SVE Floating Point Arithmetic - Unpredicated Group
//===----------------------------------------------------------------------===//
-class sve_fp_3op_u_zd<bits<2> sz, bits<3> opc, string asm,
- ZPRRegOp zprty,
- ValueType vt, ValueType vt2, SDPatternOperator op>
+class sve_fp_3op_u_zd<bits<2> sz, bits<3> opc, string asm, ZPRRegOp zprty>
: I<(outs zprty:$Zd), (ins zprty:$Zn, zprty:$Zm),
asm, "\t$Zd, $Zn, $Zm",
"",
- [(set (vt zprty:$Zd), (op (vt zprty:$Zn), (vt2 zprty:$Zm)))]>, Sched<[]> {
+ []>, Sched<[]> {
bits<5> Zd;
bits<5> Zm;
bits<5> Zn;
@@ -1244,9 +1507,24 @@ class sve_fp_3op_u_zd<bits<2> sz, bits<3> opc, string asm,
}
multiclass sve_fp_3op_u_zd<bits<3> opc, string asm, SDPatternOperator op> {
- def _H : sve_fp_3op_u_zd<0b01, opc, asm, ZPR16, nxv8f16, nxv8f16, op>;
- def _S : sve_fp_3op_u_zd<0b10, opc, asm, ZPR32, nxv4f32, nxv4f32, op>;
- def _D : sve_fp_3op_u_zd<0b11, opc, asm, ZPR64, nxv2f64, nxv2f64, op>;
+ def _H : sve_fp_3op_u_zd<0b01, opc, asm, ZPR16>;
+ def _S : sve_fp_3op_u_zd<0b10, opc, asm, ZPR32>;
+ def _D : sve_fp_3op_u_zd<0b11, opc, asm, ZPR64>;
+
+ def : SVE_2_Op_Pat<nxv8f16, op, nxv8f16, nxv8f16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_2_Op_Pat<nxv4f32, op, nxv4f32, nxv4f32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_2_Op_Pat<nxv2f64, op, nxv2f64, nxv2f64, !cast<Instruction>(NAME # _D)>;
+
+}
+
+multiclass sve_fp_3op_u_zd_ftsmul<bits<3> opc, string asm, SDPatternOperator op> {
+ def _H : sve_fp_3op_u_zd<0b01, opc, asm, ZPR16>;
+ def _S : sve_fp_3op_u_zd<0b10, opc, asm, ZPR32>;
+ def _D : sve_fp_3op_u_zd<0b11, opc, asm, ZPR64>;
+
+ def : SVE_2_Op_Pat<nxv8f16, op, nxv8f16, nxv8i16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_2_Op_Pat<nxv4f32, op, nxv4f32, nxv4i32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_2_Op_Pat<nxv2f64, op, nxv2f64, nxv2i64, !cast<Instruction>(NAME # _D)>;
}
//===----------------------------------------------------------------------===//
@@ -1277,10 +1555,14 @@ class sve_fp_3op_p_zds_a<bits<2> sz, bits<2> opc, string asm, ZPRRegOp zprty>
let ElementSize = zprty.ElementSize;
}
-multiclass sve_fp_3op_p_zds_a<bits<2> opc, string asm> {
+multiclass sve_fp_3op_p_zds_a<bits<2> opc, string asm, SDPatternOperator op> {
def _H : sve_fp_3op_p_zds_a<0b01, opc, asm, ZPR16>;
def _S : sve_fp_3op_p_zds_a<0b10, opc, asm, ZPR32>;
def _D : sve_fp_3op_p_zds_a<0b11, opc, asm, ZPR64>;
+
+ def : SVE_4_Op_Pat<nxv8f16, op, nxv8i1, nxv8f16, nxv8f16, nxv8f16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_4_Op_Pat<nxv4f32, op, nxv4i1, nxv4f32, nxv4f32, nxv4f32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_4_Op_Pat<nxv2f64, op, nxv2i1, nxv2f64, nxv2f64, nxv2f64, !cast<Instruction>(NAME # _D)>;
}
class sve_fp_3op_p_zds_b<bits<2> sz, bits<2> opc, string asm,
@@ -1308,10 +1590,14 @@ class sve_fp_3op_p_zds_b<bits<2> sz, bits<2> opc, string asm,
let ElementSize = zprty.ElementSize;
}
-multiclass sve_fp_3op_p_zds_b<bits<2> opc, string asm> {
+multiclass sve_fp_3op_p_zds_b<bits<2> opc, string asm, SDPatternOperator op> {
def _H : sve_fp_3op_p_zds_b<0b01, opc, asm, ZPR16>;
def _S : sve_fp_3op_p_zds_b<0b10, opc, asm, ZPR32>;
def _D : sve_fp_3op_p_zds_b<0b11, opc, asm, ZPR64>;
+
+ def : SVE_4_Op_Pat<nxv8f16, op, nxv8i1, nxv8f16, nxv8f16, nxv8f16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_4_Op_Pat<nxv4f32, op, nxv4i1, nxv4f32, nxv4f32, nxv4f32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_4_Op_Pat<nxv2f64, op, nxv2i1, nxv2f64, nxv2f64, nxv2f64, !cast<Instruction>(NAME # _D)>;
}
//===----------------------------------------------------------------------===//
@@ -1338,26 +1624,34 @@ class sve_fp_fma_by_indexed_elem<bits<2> sz, bit opc, string asm,
let ElementSize = ElementSizeNone;
}
-multiclass sve_fp_fma_by_indexed_elem<bit opc, string asm> {
- def _H : sve_fp_fma_by_indexed_elem<{0, ?}, opc, asm, ZPR16, ZPR3b16, VectorIndexH> {
+multiclass sve_fp_fma_by_indexed_elem<bit opc, string asm,
+ SDPatternOperator op> {
+ def _H : sve_fp_fma_by_indexed_elem<{0, ?}, opc, asm, ZPR16, ZPR3b16, VectorIndexH32b> {
bits<3> Zm;
bits<3> iop;
let Inst{22} = iop{2};
let Inst{20-19} = iop{1-0};
let Inst{18-16} = Zm;
}
- def _S : sve_fp_fma_by_indexed_elem<0b10, opc, asm, ZPR32, ZPR3b32, VectorIndexS> {
+ def _S : sve_fp_fma_by_indexed_elem<0b10, opc, asm, ZPR32, ZPR3b32, VectorIndexS32b> {
bits<3> Zm;
bits<2> iop;
let Inst{20-19} = iop;
let Inst{18-16} = Zm;
}
- def _D : sve_fp_fma_by_indexed_elem<0b11, opc, asm, ZPR64, ZPR4b64, VectorIndexD> {
+ def _D : sve_fp_fma_by_indexed_elem<0b11, opc, asm, ZPR64, ZPR4b64, VectorIndexD32b> {
bits<4> Zm;
bit iop;
let Inst{20} = iop;
let Inst{19-16} = Zm;
}
+
+ def : Pat<(nxv8f16 (op nxv8f16:$Op1, nxv8f16:$Op2, nxv8f16:$Op3, (i32 VectorIndexH32b:$idx))),
+ (!cast<Instruction>(NAME # _H) $Op1, $Op2, $Op3, VectorIndexH32b:$idx)>;
+ def : Pat<(nxv4f32 (op nxv4f32:$Op1, nxv4f32:$Op2, nxv4f32:$Op3, (i32 VectorIndexS32b:$idx))),
+ (!cast<Instruction>(NAME # _S) $Op1, $Op2, $Op3, VectorIndexS32b:$idx)>;
+ def : Pat<(nxv2f64 (op nxv2f64:$Op1, nxv2f64:$Op2, nxv2f64:$Op3, (i32 VectorIndexD32b:$idx))),
+ (!cast<Instruction>(NAME # _D) $Op1, $Op2, $Op3, VectorIndexD32b:$idx)>;
}
@@ -1379,26 +1673,33 @@ class sve_fp_fmul_by_indexed_elem<bits<2> sz, string asm, ZPRRegOp zprty,
let Inst{4-0} = Zd;
}
-multiclass sve_fp_fmul_by_indexed_elem<string asm> {
- def _H : sve_fp_fmul_by_indexed_elem<{0, ?}, asm, ZPR16, ZPR3b16, VectorIndexH> {
+multiclass sve_fp_fmul_by_indexed_elem<string asm, SDPatternOperator op> {
+ def _H : sve_fp_fmul_by_indexed_elem<{0, ?}, asm, ZPR16, ZPR3b16, VectorIndexH32b> {
bits<3> Zm;
bits<3> iop;
let Inst{22} = iop{2};
let Inst{20-19} = iop{1-0};
let Inst{18-16} = Zm;
}
- def _S : sve_fp_fmul_by_indexed_elem<0b10, asm, ZPR32, ZPR3b32, VectorIndexS> {
+ def _S : sve_fp_fmul_by_indexed_elem<0b10, asm, ZPR32, ZPR3b32, VectorIndexS32b> {
bits<3> Zm;
bits<2> iop;
let Inst{20-19} = iop;
let Inst{18-16} = Zm;
}
- def _D : sve_fp_fmul_by_indexed_elem<0b11, asm, ZPR64, ZPR4b64, VectorIndexD> {
+ def _D : sve_fp_fmul_by_indexed_elem<0b11, asm, ZPR64, ZPR4b64, VectorIndexD32b> {
bits<4> Zm;
bit iop;
let Inst{20} = iop;
let Inst{19-16} = Zm;
}
+
+ def : Pat<(nxv8f16 (op nxv8f16:$Op1, nxv8f16:$Op2, (i32 VectorIndexH32b:$idx))),
+ (!cast<Instruction>(NAME # _H) $Op1, $Op2, VectorIndexH32b:$idx)>;
+ def : Pat<(nxv4f32 (op nxv4f32:$Op1, nxv4f32:$Op2, (i32 VectorIndexS32b:$idx))),
+ (!cast<Instruction>(NAME # _S) $Op1, $Op2, VectorIndexS32b:$idx)>;
+ def : Pat<(nxv2f64 (op nxv2f64:$Op1, nxv2f64:$Op2, (i32 VectorIndexD32b:$idx))),
+ (!cast<Instruction>(NAME # _D) $Op1, $Op2, VectorIndexD32b:$idx)>;
}
//===----------------------------------------------------------------------===//
@@ -1430,10 +1731,17 @@ class sve_fp_fcmla<bits<2> sz, string asm, ZPRRegOp zprty>
let ElementSize = zprty.ElementSize;
}
-multiclass sve_fp_fcmla<string asm> {
+multiclass sve_fp_fcmla<string asm, SDPatternOperator op> {
def _H : sve_fp_fcmla<0b01, asm, ZPR16>;
def _S : sve_fp_fcmla<0b10, asm, ZPR32>;
def _D : sve_fp_fcmla<0b11, asm, ZPR64>;
+
+ def : Pat<(nxv8f16 (op nxv8i1:$Op1, nxv8f16:$Op2, nxv8f16:$Op3, nxv8f16:$Op4, (i32 complexrotateop:$imm))),
+ (!cast<Instruction>(NAME # _H) $Op1, $Op2, $Op3, $Op4, complexrotateop:$imm)>;
+ def : Pat<(nxv4f32 (op nxv4i1:$Op1, nxv4f32:$Op2, nxv4f32:$Op3, nxv4f32:$Op4, (i32 complexrotateop:$imm))),
+ (!cast<Instruction>(NAME # _S) $Op1, $Op2, $Op3, $Op4, complexrotateop:$imm)>;
+ def : Pat<(nxv2f64 (op nxv2i1:$Op1, nxv2f64:$Op2, nxv2f64:$Op3, nxv2f64:$Op4, (i32 complexrotateop:$imm))),
+ (!cast<Instruction>(NAME # _D) $Op1, $Op2, $Op3, $Op4, complexrotateop:$imm)>;
}
//===----------------------------------------------------------------------===//
@@ -1463,19 +1771,24 @@ class sve_fp_fcmla_by_indexed_elem<bits<2> sz, string asm,
let ElementSize = ElementSizeNone;
}
-multiclass sve_fp_fcmla_by_indexed_elem<string asm> {
- def _H : sve_fp_fcmla_by_indexed_elem<0b10, asm, ZPR16, ZPR3b16, VectorIndexS> {
+multiclass sve_fp_fcmla_by_indexed_elem<string asm, SDPatternOperator op> {
+ def _H : sve_fp_fcmla_by_indexed_elem<0b10, asm, ZPR16, ZPR3b16, VectorIndexS32b> {
bits<3> Zm;
bits<2> iop;
let Inst{20-19} = iop;
let Inst{18-16} = Zm;
}
- def _S : sve_fp_fcmla_by_indexed_elem<0b11, asm, ZPR32, ZPR4b32, VectorIndexD> {
+ def _S : sve_fp_fcmla_by_indexed_elem<0b11, asm, ZPR32, ZPR4b32, VectorIndexD32b> {
bits<4> Zm;
bits<1> iop;
let Inst{20} = iop;
let Inst{19-16} = Zm;
}
+
+ def : Pat<(nxv8f16 (op nxv8f16:$Op1, nxv8f16:$Op2, nxv8f16:$Op3, (i32 VectorIndexS32b:$idx), (i32 complexrotateop:$imm))),
+ (!cast<Instruction>(NAME # _H) $Op1, $Op2, $Op3, VectorIndexS32b:$idx, complexrotateop:$imm)>;
+ def : Pat<(nxv4f32 (op nxv4f32:$Op1, nxv4f32:$Op2, nxv4f32:$Op3, (i32 VectorIndexD32b:$idx), (i32 complexrotateop:$imm))),
+ (!cast<Instruction>(NAME # _S) $Op1, $Op2, $Op3, VectorIndexD32b:$idx, complexrotateop:$imm)>;
}
//===----------------------------------------------------------------------===//
@@ -1506,10 +1819,17 @@ class sve_fp_fcadd<bits<2> sz, string asm, ZPRRegOp zprty>
let ElementSize = zprty.ElementSize;
}
-multiclass sve_fp_fcadd<string asm> {
+multiclass sve_fp_fcadd<string asm, SDPatternOperator op> {
def _H : sve_fp_fcadd<0b01, asm, ZPR16>;
def _S : sve_fp_fcadd<0b10, asm, ZPR32>;
def _D : sve_fp_fcadd<0b11, asm, ZPR64>;
+
+ def : Pat<(nxv8f16 (op nxv8i1:$Op1, nxv8f16:$Op2, nxv8f16:$Op3, (i32 complexrotateopodd:$imm))),
+ (!cast<Instruction>(NAME # _H) $Op1, $Op2, $Op3, complexrotateopodd:$imm)>;
+ def : Pat<(nxv4f32 (op nxv4i1:$Op1, nxv4f32:$Op2, nxv4f32:$Op3, (i32 complexrotateopodd:$imm))),
+ (!cast<Instruction>(NAME # _S) $Op1, $Op2, $Op3, complexrotateopodd:$imm)>;
+ def : Pat<(nxv2f64 (op nxv2i1:$Op1, nxv2f64:$Op2, nxv2f64:$Op3, (i32 complexrotateopodd:$imm))),
+ (!cast<Instruction>(NAME # _D) $Op1, $Op2, $Op3, complexrotateopodd:$imm)>;
}
//===----------------------------------------------------------------------===//
@@ -1537,18 +1857,26 @@ class sve2_fp_convert_precision<bits<4> opc, string asm,
let Constraints = "$Zd = $_Zd";
}
-multiclass sve2_fp_convert_down_narrow<string asm> {
+multiclass sve2_fp_convert_down_narrow<string asm, string op> {
def _StoH : sve2_fp_convert_precision<0b1000, asm, ZPR16, ZPR32>;
def _DtoS : sve2_fp_convert_precision<0b1110, asm, ZPR32, ZPR64>;
+
+ def : SVE_3_Op_Pat<nxv8f16, !cast<SDPatternOperator>(op # _f16f32), nxv8f16, nxv16i1, nxv4f32, !cast<Instruction>(NAME # _StoH)>;
+ def : SVE_3_Op_Pat<nxv4f32, !cast<SDPatternOperator>(op # _f32f64), nxv4f32, nxv16i1, nxv2f64, !cast<Instruction>(NAME # _DtoS)>;
}
-multiclass sve2_fp_convert_up_long<string asm> {
+multiclass sve2_fp_convert_up_long<string asm, string op> {
def _HtoS : sve2_fp_convert_precision<0b1001, asm, ZPR32, ZPR16>;
def _StoD : sve2_fp_convert_precision<0b1111, asm, ZPR64, ZPR32>;
+
+ def : SVE_3_Op_Pat<nxv4f32, !cast<SDPatternOperator>(op # _f32f16), nxv4f32, nxv16i1, nxv8f16, !cast<Instruction>(NAME # _HtoS)>;
+ def : SVE_3_Op_Pat<nxv2f64, !cast<SDPatternOperator>(op # _f64f32), nxv2f64, nxv16i1, nxv4f32, !cast<Instruction>(NAME # _StoD)>;
}
-multiclass sve2_fp_convert_down_odd_rounding<string asm> {
+multiclass sve2_fp_convert_down_odd_rounding_top<string asm, string op> {
def _DtoS : sve2_fp_convert_precision<0b0010, asm, ZPR32, ZPR64>;
+
+ def : SVE_3_Op_Pat<nxv4f32, !cast<SDPatternOperator>(op # _f32f64), nxv4f32, nxv16i1, nxv2f64, !cast<Instruction>(NAME # _DtoS)>;
}
//===----------------------------------------------------------------------===//
@@ -1578,10 +1906,14 @@ class sve2_fp_pairwise_pred<bits<2> sz, bits<3> opc, string asm,
let ElementSize = zprty.ElementSize;
}
-multiclass sve2_fp_pairwise_pred<bits<3> opc, string asm> {
+multiclass sve2_fp_pairwise_pred<bits<3> opc, string asm, SDPatternOperator op> {
def _H : sve2_fp_pairwise_pred<0b01, opc, asm, ZPR16>;
def _S : sve2_fp_pairwise_pred<0b10, opc, asm, ZPR32>;
def _D : sve2_fp_pairwise_pred<0b11, opc, asm, ZPR64>;
+
+ def : SVE_3_Op_Pat<nxv8f16, op, nxv8i1, nxv8f16, nxv8f16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Pat<nxv4f32, op, nxv4i1, nxv4f32, nxv4f32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Pat<nxv2f64, op, nxv2i1, nxv2f64, nxv2f64, !cast<Instruction>(NAME # _D)>;
}
//===----------------------------------------------------------------------===//
@@ -1590,7 +1922,7 @@ multiclass sve2_fp_pairwise_pred<bits<3> opc, string asm> {
class sve2_fp_mla_long_by_indexed_elem<bits<2> opc, string asm>
: I<(outs ZPR32:$Zda), (ins ZPR32:$_Zda, ZPR16:$Zn, ZPR3b16:$Zm,
- VectorIndexH:$iop),
+ VectorIndexH32b:$iop),
asm, "\t$Zda, $Zn, $Zm$iop",
"",
[]>, Sched<[]> {
@@ -1614,6 +1946,12 @@ class sve2_fp_mla_long_by_indexed_elem<bits<2> opc, string asm>
let ElementSize = ElementSizeNone;
}
+multiclass sve2_fp_mla_long_by_indexed_elem<bits<2> opc, string asm,
+ SDPatternOperator op> {
+ def NAME : sve2_fp_mla_long_by_indexed_elem<opc, asm>;
+ def : SVE_4_Op_Imm_Pat<nxv4f32, op, nxv4f32, nxv8f16, nxv8f16, i32, VectorIndexH32b, !cast<Instruction>(NAME)>;
+}
+
//===----------------------------------------------------------------------===//
// SVE2 Floating Point Widening Multiply-Add Group
//===----------------------------------------------------------------------===//
@@ -1640,6 +1978,11 @@ class sve2_fp_mla_long<bits<2> opc, string asm>
let ElementSize = ElementSizeNone;
}
+multiclass sve2_fp_mla_long<bits<2> opc, string asm, SDPatternOperator op> {
+ def NAME : sve2_fp_mla_long<opc, asm>;
+ def : SVE_3_Op_Pat<nxv4f32, op, nxv4f32, nxv8f16, nxv8f16, !cast<Instruction>(NAME)>;
+}
+
//===----------------------------------------------------------------------===//
// SVE Stack Allocation Group
//===----------------------------------------------------------------------===//
@@ -1700,11 +2043,22 @@ class sve_int_perm_bin_perm_zz<bits<3> opc, bits<2> sz8_64, string asm,
let Inst{4-0} = Zd;
}
-multiclass sve_int_perm_bin_perm_zz<bits<3> opc, string asm> {
+multiclass sve_int_perm_bin_perm_zz<bits<3> opc, string asm,
+ SDPatternOperator op> {
def _B : sve_int_perm_bin_perm_zz<opc, 0b00, asm, ZPR8>;
def _H : sve_int_perm_bin_perm_zz<opc, 0b01, asm, ZPR16>;
def _S : sve_int_perm_bin_perm_zz<opc, 0b10, asm, ZPR32>;
def _D : sve_int_perm_bin_perm_zz<opc, 0b11, asm, ZPR64>;
+
+ def : SVE_2_Op_Pat<nxv16i8, op, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_2_Op_Pat<nxv8i16, op, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_2_Op_Pat<nxv4i32, op, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_2_Op_Pat<nxv2i64, op, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>;
+
+ def : SVE_2_Op_Pat<nxv8f16, op, nxv8f16, nxv8f16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_2_Op_Pat<nxv4f16, op, nxv4f16, nxv4f16, !cast<Instruction>(NAME # _S)>;
+ def : SVE_2_Op_Pat<nxv4f32, op, nxv4f32, nxv4f32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_2_Op_Pat<nxv2f64, op, nxv2f64, nxv2f64, !cast<Instruction>(NAME # _D)>;
}
//===----------------------------------------------------------------------===//
@@ -1734,16 +2088,39 @@ class sve_fp_2op_p_zd<bits<7> opc, string asm, RegisterOperand i_zprtype,
let ElementSize = size;
}
-multiclass sve_fp_2op_p_zd_HSD<bits<5> opc, string asm> {
+multiclass sve_fp_2op_p_zd<bits<7> opc, string asm,
+ RegisterOperand i_zprtype,
+ RegisterOperand o_zprtype,
+ SDPatternOperator op, ValueType vt1,
+ ValueType vt2, ValueType vt3, ElementSizeEnum Sz> {
+ def NAME : sve_fp_2op_p_zd<opc, asm, i_zprtype, o_zprtype, Sz>;
+
+ def : SVE_3_Op_Pat<vt1, op, vt1, vt2, vt3, !cast<Instruction>(NAME)>;
+}
+
+multiclass sve_fp_2op_p_zd_HSD<bits<5> opc, string asm, SDPatternOperator op> {
def _H : sve_fp_2op_p_zd<{ 0b01, opc }, asm, ZPR16, ZPR16, ElementSizeH>;
def _S : sve_fp_2op_p_zd<{ 0b10, opc }, asm, ZPR32, ZPR32, ElementSizeS>;
def _D : sve_fp_2op_p_zd<{ 0b11, opc }, asm, ZPR64, ZPR64, ElementSizeD>;
+
+ def : SVE_3_Op_Pat<nxv8f16, op, nxv8f16, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Pat<nxv4f32, op, nxv4f32, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Pat<nxv2f64, op, nxv2f64, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>;
}
-multiclass sve2_fp_flogb<string asm> {
+multiclass sve2_fp_flogb<string asm, SDPatternOperator op> {
def _H : sve_fp_2op_p_zd<0b0011010, asm, ZPR16, ZPR16, ElementSizeH>;
def _S : sve_fp_2op_p_zd<0b0011100, asm, ZPR32, ZPR32, ElementSizeS>;
def _D : sve_fp_2op_p_zd<0b0011110, asm, ZPR64, ZPR64, ElementSizeD>;
+
+ def : SVE_3_Op_Pat<nxv8i16, op, nxv8i16, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Pat<nxv4i32, op, nxv4i32, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Pat<nxv2i64, op, nxv2i64, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>;
+}
+
+multiclass sve2_fp_convert_down_odd_rounding<string asm, string op> {
+ def _DtoS : sve_fp_2op_p_zd<0b0001010, asm, ZPR64, ZPR32, ElementSizeD>;
+ def : SVE_3_Op_Pat<nxv4f32, !cast<SDPatternOperator>(op # _f32f64), nxv4f32, nxv16i1, nxv2f64, !cast<Instruction>(NAME # _DtoS)>;
}
//===----------------------------------------------------------------------===//
@@ -1767,10 +2144,14 @@ class sve_fp_2op_u_zd<bits<2> sz, bits<3> opc, string asm,
let Inst{4-0} = Zd;
}
-multiclass sve_fp_2op_u_zd<bits<3> opc, string asm> {
+multiclass sve_fp_2op_u_zd<bits<3> opc, string asm, SDPatternOperator op> {
def _H : sve_fp_2op_u_zd<0b01, opc, asm, ZPR16>;
def _S : sve_fp_2op_u_zd<0b10, opc, asm, ZPR32>;
def _D : sve_fp_2op_u_zd<0b11, opc, asm, ZPR64>;
+
+ def : SVE_1_Op_Pat<nxv8f16, op, nxv8f16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_1_Op_Pat<nxv4f32, op, nxv4f32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_1_Op_Pat<nxv2f64, op, nxv2f64, !cast<Instruction>(NAME # _D)>;
}
//===----------------------------------------------------------------------===//
@@ -1799,38 +2180,61 @@ class sve_int_bin_pred_arit_log<bits<2> sz8_64, bits<2> fmt, bits<3> opc,
let ElementSize = zprty.ElementSize;
}
-multiclass sve_int_bin_pred_log<bits<3> opc, string asm> {
+multiclass sve_int_bin_pred_log<bits<3> opc, string asm, SDPatternOperator op> {
def _B : sve_int_bin_pred_arit_log<0b00, 0b11, opc, asm, ZPR8>;
def _H : sve_int_bin_pred_arit_log<0b01, 0b11, opc, asm, ZPR16>;
def _S : sve_int_bin_pred_arit_log<0b10, 0b11, opc, asm, ZPR32>;
def _D : sve_int_bin_pred_arit_log<0b11, 0b11, opc, asm, ZPR64>;
+
+ def : SVE_3_Op_Pat<nxv16i8, op, nxv16i1, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_3_Op_Pat<nxv8i16, op, nxv8i1, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Pat<nxv4i32, op, nxv4i1, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Pat<nxv2i64, op, nxv2i1, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>;
}
-multiclass sve_int_bin_pred_arit_0<bits<3> opc, string asm> {
+multiclass sve_int_bin_pred_arit_0<bits<3> opc, string asm, SDPatternOperator op> {
def _B : sve_int_bin_pred_arit_log<0b00, 0b00, opc, asm, ZPR8>;
def _H : sve_int_bin_pred_arit_log<0b01, 0b00, opc, asm, ZPR16>;
def _S : sve_int_bin_pred_arit_log<0b10, 0b00, opc, asm, ZPR32>;
def _D : sve_int_bin_pred_arit_log<0b11, 0b00, opc, asm, ZPR64>;
+
+ def : SVE_3_Op_Pat<nxv16i8, op, nxv16i1, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_3_Op_Pat<nxv8i16, op, nxv8i1, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Pat<nxv4i32, op, nxv4i1, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Pat<nxv2i64, op, nxv2i1, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>;
}
-multiclass sve_int_bin_pred_arit_1<bits<3> opc, string asm> {
+multiclass sve_int_bin_pred_arit_1<bits<3> opc, string asm, SDPatternOperator op> {
def _B : sve_int_bin_pred_arit_log<0b00, 0b01, opc, asm, ZPR8>;
def _H : sve_int_bin_pred_arit_log<0b01, 0b01, opc, asm, ZPR16>;
def _S : sve_int_bin_pred_arit_log<0b10, 0b01, opc, asm, ZPR32>;
def _D : sve_int_bin_pred_arit_log<0b11, 0b01, opc, asm, ZPR64>;
+
+ def : SVE_3_Op_Pat<nxv16i8, op, nxv16i1, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_3_Op_Pat<nxv8i16, op, nxv8i1, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Pat<nxv4i32, op, nxv4i1, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Pat<nxv2i64, op, nxv2i1, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>;
}
-multiclass sve_int_bin_pred_arit_2<bits<3> opc, string asm> {
+multiclass sve_int_bin_pred_arit_2<bits<3> opc, string asm, SDPatternOperator op> {
def _B : sve_int_bin_pred_arit_log<0b00, 0b10, opc, asm, ZPR8>;
def _H : sve_int_bin_pred_arit_log<0b01, 0b10, opc, asm, ZPR16>;
def _S : sve_int_bin_pred_arit_log<0b10, 0b10, opc, asm, ZPR32>;
def _D : sve_int_bin_pred_arit_log<0b11, 0b10, opc, asm, ZPR64>;
+
+ def : SVE_3_Op_Pat<nxv16i8, op, nxv16i1, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_3_Op_Pat<nxv8i16, op, nxv8i1, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Pat<nxv4i32, op, nxv4i1, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Pat<nxv2i64, op, nxv2i1, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>;
}
// Special case for divides which are not defined for 8b/16b elements.
-multiclass sve_int_bin_pred_arit_2_div<bits<3> opc, string asm> {
+multiclass sve_int_bin_pred_arit_2_div<bits<3> opc, string asm, SDPatternOperator op> {
def _S : sve_int_bin_pred_arit_log<0b10, 0b10, opc, asm, ZPR32>;
def _D : sve_int_bin_pred_arit_log<0b11, 0b10, opc, asm, ZPR64>;
+
+ def : SVE_3_Op_Pat<nxv4i32, op, nxv4i1, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Pat<nxv2i64, op, nxv2i1, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>;
}
//===----------------------------------------------------------------------===//
@@ -1862,11 +2266,16 @@ class sve_int_mladdsub_vvv_pred<bits<2> sz8_64, bits<1> opc, string asm,
let ElementSize = zprty.ElementSize;
}
-multiclass sve_int_mladdsub_vvv_pred<bits<1> opc, string asm> {
+multiclass sve_int_mladdsub_vvv_pred<bits<1> opc, string asm, SDPatternOperator op> {
def _B : sve_int_mladdsub_vvv_pred<0b00, opc, asm, ZPR8>;
def _H : sve_int_mladdsub_vvv_pred<0b01, opc, asm, ZPR16>;
def _S : sve_int_mladdsub_vvv_pred<0b10, opc, asm, ZPR32>;
def _D : sve_int_mladdsub_vvv_pred<0b11, opc, asm, ZPR64>;
+
+ def : SVE_4_Op_Pat<nxv16i8, op, nxv16i1, nxv16i8, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_4_Op_Pat<nxv8i16, op, nxv8i1, nxv8i16, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_4_Op_Pat<nxv4i32, op, nxv4i1, nxv4i32, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_4_Op_Pat<nxv2i64, op, nxv2i1, nxv2i64, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>;
}
class sve_int_mlas_vvv_pred<bits<2> sz8_64, bits<1> opc, string asm,
@@ -1894,11 +2303,16 @@ class sve_int_mlas_vvv_pred<bits<2> sz8_64, bits<1> opc, string asm,
let ElementSize = zprty.ElementSize;
}
-multiclass sve_int_mlas_vvv_pred<bits<1> opc, string asm> {
+multiclass sve_int_mlas_vvv_pred<bits<1> opc, string asm, SDPatternOperator op> {
def _B : sve_int_mlas_vvv_pred<0b00, opc, asm, ZPR8>;
def _H : sve_int_mlas_vvv_pred<0b01, opc, asm, ZPR16>;
def _S : sve_int_mlas_vvv_pred<0b10, opc, asm, ZPR32>;
def _D : sve_int_mlas_vvv_pred<0b11, opc, asm, ZPR64>;
+
+ def : SVE_4_Op_Pat<nxv16i8, op, nxv16i1, nxv16i8, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_4_Op_Pat<nxv8i16, op, nxv8i1, nxv8i16, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_4_Op_Pat<nxv4i32, op, nxv4i1, nxv4i32, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_4_Op_Pat<nxv2i64, op, nxv2i1, nxv2i64, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>;
}
//===----------------------------------------------------------------------===//
@@ -2687,17 +3101,21 @@ class sve2_int_bin_shift_imm_narrow_bottom<bits<3> tsz8_64, bits<3> opc,
let Inst{4-0} = Zd;
}
-multiclass sve2_int_bin_shift_imm_right_narrow_bottom<bits<3> opc, string asm> {
+multiclass sve2_int_bin_shift_imm_right_narrow_bottom<bits<3> opc, string asm,
+ SDPatternOperator op> {
def _B : sve2_int_bin_shift_imm_narrow_bottom<{0,0,1}, opc, asm, ZPR8, ZPR16,
- vecshiftR8>;
+ tvecshiftR8>;
def _H : sve2_int_bin_shift_imm_narrow_bottom<{0,1,?}, opc, asm, ZPR16, ZPR32,
- vecshiftR16> {
+ tvecshiftR16> {
let Inst{19} = imm{3};
}
def _S : sve2_int_bin_shift_imm_narrow_bottom<{1,?,?}, opc, asm, ZPR32, ZPR64,
vecshiftR32> {
let Inst{20-19} = imm{4-3};
}
+ def : SVE_2_Op_Imm_Pat<nxv16i8, op, nxv8i16, i32, tvecshiftR8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_2_Op_Imm_Pat<nxv8i16, op, nxv4i32, i32, tvecshiftR16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_2_Op_Imm_Pat<nxv4i32, op, nxv2i64, i32, tvecshiftR32, !cast<Instruction>(NAME # _S)>;
}
class sve2_int_bin_shift_imm_narrow_top<bits<3> tsz8_64, bits<3> opc,
@@ -2723,17 +3141,21 @@ class sve2_int_bin_shift_imm_narrow_top<bits<3> tsz8_64, bits<3> opc,
let Constraints = "$Zd = $_Zd";
}
-multiclass sve2_int_bin_shift_imm_right_narrow_top<bits<3> opc, string asm> {
+multiclass sve2_int_bin_shift_imm_right_narrow_top<bits<3> opc, string asm,
+ SDPatternOperator op> {
def _B : sve2_int_bin_shift_imm_narrow_top<{0,0,1}, opc, asm, ZPR8, ZPR16,
- vecshiftR8>;
+ tvecshiftR8>;
def _H : sve2_int_bin_shift_imm_narrow_top<{0,1,?}, opc, asm, ZPR16, ZPR32,
- vecshiftR16> {
+ tvecshiftR16> {
let Inst{19} = imm{3};
}
def _S : sve2_int_bin_shift_imm_narrow_top<{1,?,?}, opc, asm, ZPR32, ZPR64,
vecshiftR32> {
let Inst{20-19} = imm{4-3};
}
+ def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i8, nxv8i16, i32, tvecshiftR8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i16, nxv4i32, i32, tvecshiftR16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv2i64, i32, tvecshiftR32, !cast<Instruction>(NAME # _S)>;
}
class sve2_int_addsub_narrow_high_bottom<bits<2> sz, bits<2> opc, string asm,
@@ -2754,10 +3176,15 @@ class sve2_int_addsub_narrow_high_bottom<bits<2> sz, bits<2> opc, string asm,
let Inst{4-0} = Zd;
}
-multiclass sve2_int_addsub_narrow_high_bottom<bits<2> opc, string asm> {
+multiclass sve2_int_addsub_narrow_high_bottom<bits<2> opc, string asm,
+ SDPatternOperator op> {
def _B : sve2_int_addsub_narrow_high_bottom<0b01, opc, asm, ZPR8, ZPR16>;
def _H : sve2_int_addsub_narrow_high_bottom<0b10, opc, asm, ZPR16, ZPR32>;
def _S : sve2_int_addsub_narrow_high_bottom<0b11, opc, asm, ZPR32, ZPR64>;
+
+ def : SVE_2_Op_Pat<nxv16i8, op, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _B)>;
+ def : SVE_2_Op_Pat<nxv8i16, op, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _H)>;
+ def : SVE_2_Op_Pat<nxv4i32, op, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _S)>;
}
class sve2_int_addsub_narrow_high_top<bits<2> sz, bits<2> opc, string asm,
@@ -2780,10 +3207,15 @@ class sve2_int_addsub_narrow_high_top<bits<2> sz, bits<2> opc, string asm,
let Constraints = "$Zd = $_Zd";
}
-multiclass sve2_int_addsub_narrow_high_top<bits<2> opc, string asm> {
+multiclass sve2_int_addsub_narrow_high_top<bits<2> opc, string asm,
+ SDPatternOperator op> {
def _B : sve2_int_addsub_narrow_high_top<0b01, opc, asm, ZPR8, ZPR16>;
def _H : sve2_int_addsub_narrow_high_top<0b10, opc, asm, ZPR16, ZPR32>;
def _S : sve2_int_addsub_narrow_high_top<0b11, opc, asm, ZPR32, ZPR64>;
+
+ def : SVE_3_Op_Pat<nxv16i8, op, nxv16i8, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _B)>;
+ def : SVE_3_Op_Pat<nxv8i16, op, nxv8i16, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Pat<nxv4i32, op, nxv4i32, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _S)>;
}
class sve2_int_sat_extract_narrow_bottom<bits<3> tsz8_64, bits<2> opc, string asm,
@@ -2803,10 +3235,15 @@ class sve2_int_sat_extract_narrow_bottom<bits<3> tsz8_64, bits<2> opc, string as
let Inst{4-0} = Zd;
}
-multiclass sve2_int_sat_extract_narrow_bottom<bits<2> opc, string asm> {
+multiclass sve2_int_sat_extract_narrow_bottom<bits<2> opc, string asm,
+ SDPatternOperator op> {
def _B : sve2_int_sat_extract_narrow_bottom<0b001, opc, asm, ZPR8, ZPR16>;
def _H : sve2_int_sat_extract_narrow_bottom<0b010, opc, asm, ZPR16, ZPR32>;
def _S : sve2_int_sat_extract_narrow_bottom<0b100, opc, asm, ZPR32, ZPR64>;
+
+ def : SVE_1_Op_Pat<nxv16i8, op, nxv8i16, !cast<Instruction>(NAME # _B)>;
+ def : SVE_1_Op_Pat<nxv8i16, op, nxv4i32, !cast<Instruction>(NAME # _H)>;
+ def : SVE_1_Op_Pat<nxv4i32, op, nxv2i64, !cast<Instruction>(NAME # _S)>;
}
class sve2_int_sat_extract_narrow_top<bits<3> tsz8_64, bits<2> opc, string asm,
@@ -2828,10 +3265,15 @@ class sve2_int_sat_extract_narrow_top<bits<3> tsz8_64, bits<2> opc, string asm,
let Constraints = "$Zd = $_Zd";
}
-multiclass sve2_int_sat_extract_narrow_top<bits<2> opc, string asm> {
+multiclass sve2_int_sat_extract_narrow_top<bits<2> opc, string asm,
+ SDPatternOperator op> {
def _B : sve2_int_sat_extract_narrow_top<0b001, opc, asm, ZPR8, ZPR16>;
def _H : sve2_int_sat_extract_narrow_top<0b010, opc, asm, ZPR16, ZPR32>;
def _S : sve2_int_sat_extract_narrow_top<0b100, opc, asm, ZPR32, ZPR64>;
+
+ def : SVE_2_Op_Pat<nxv16i8, op, nxv16i8, nxv8i16, !cast<Instruction>(NAME # _B)>;
+ def : SVE_2_Op_Pat<nxv8i16, op, nxv8i16, nxv4i32, !cast<Instruction>(NAME # _H)>;
+ def : SVE_2_Op_Pat<nxv4i32, op, nxv4i32, nxv2i64, !cast<Instruction>(NAME # _S)>;
}
//===----------------------------------------------------------------------===//
@@ -2875,19 +3317,31 @@ multiclass sve_int_un_pred_arit_0<bits<3> opc, string asm,
def : SVE_3_Op_Pat<nxv2i64, op, nxv2i64, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
}
-multiclass sve_int_un_pred_arit_0_h<bits<3> opc, string asm> {
+multiclass sve_int_un_pred_arit_0_h<bits<3> opc, string asm,
+ SDPatternOperator op> {
def _H : sve_int_un_pred_arit<0b01, { opc, 0b0 }, asm, ZPR16>;
def _S : sve_int_un_pred_arit<0b10, { opc, 0b0 }, asm, ZPR32>;
def _D : sve_int_un_pred_arit<0b11, { opc, 0b0 }, asm, ZPR64>;
+
+ def : SVE_3_Op_Pat<nxv8i16, op, nxv8i16, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Pat<nxv4i32, op, nxv4i32, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Pat<nxv2i64, op, nxv2i64, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
}
-multiclass sve_int_un_pred_arit_0_w<bits<3> opc, string asm> {
+multiclass sve_int_un_pred_arit_0_w<bits<3> opc, string asm,
+ SDPatternOperator op> {
def _S : sve_int_un_pred_arit<0b10, { opc, 0b0 }, asm, ZPR32>;
def _D : sve_int_un_pred_arit<0b11, { opc, 0b0 }, asm, ZPR64>;
+
+ def : SVE_3_Op_Pat<nxv4i32, op, nxv4i32, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Pat<nxv2i64, op, nxv2i64, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
}
-multiclass sve_int_un_pred_arit_0_d<bits<3> opc, string asm> {
+multiclass sve_int_un_pred_arit_0_d<bits<3> opc, string asm,
+ SDPatternOperator op> {
def _D : sve_int_un_pred_arit<0b11, { opc, 0b0 }, asm, ZPR64>;
+
+ def : SVE_3_Op_Pat<nxv2i64, op, nxv2i64, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
}
multiclass sve_int_un_pred_arit_1<bits<3> opc, string asm,
@@ -2907,10 +3361,15 @@ multiclass sve_int_un_pred_arit_1<bits<3> opc, string asm,
def : SVE_3_Op_Pat<nxv2i64, op, nxv2i64, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>;
}
-multiclass sve_int_un_pred_arit_1_fp<bits<3> opc, string asm> {
+multiclass sve_int_un_pred_arit_1_fp<bits<3> opc, string asm,
+ SDPatternOperator op> {
def _H : sve_int_un_pred_arit<0b01, { opc, 0b1 }, asm, ZPR16>;
def _S : sve_int_un_pred_arit<0b10, { opc, 0b1 }, asm, ZPR32>;
def _D : sve_int_un_pred_arit<0b11, { opc, 0b1 }, asm, ZPR64>;
+
+ def : SVE_3_Op_Pat<nxv8f16, op, nxv8f16, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Pat<nxv4f32, op, nxv4f32, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Pat<nxv2f64, op, nxv2f64, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>;
}
//===----------------------------------------------------------------------===//
@@ -3010,11 +3469,28 @@ class sve_int_arith_imm0<bits<2> sz8_64, bits<3> opc, string asm,
let ElementSize = ElementSizeNone;
}
-multiclass sve_int_arith_imm0<bits<3> opc, string asm> {
- def _B : sve_int_arith_imm0<0b00, opc, asm, ZPR8, addsub_imm8_opt_lsl_i8>;
+multiclass sve_int_arith_imm0<bits<3> opc, string asm, SDPatternOperator op> {
+ def _B : sve_int_arith_imm0<0b00, opc, asm, ZPR8, addsub_imm8_opt_lsl_i8>;
+ def _H : sve_int_arith_imm0<0b01, opc, asm, ZPR16, addsub_imm8_opt_lsl_i16>;
+ def _S : sve_int_arith_imm0<0b10, opc, asm, ZPR32, addsub_imm8_opt_lsl_i32>;
+ def _D : sve_int_arith_imm0<0b11, opc, asm, ZPR64, addsub_imm8_opt_lsl_i64>;
+
+ def : SVE_1_Op_Imm_OptLsl_Pat<nxv16i8, op, ZPR8, i32, SVEAddSubImm8Pat, !cast<Instruction>(NAME # _B)>;
+ def : SVE_1_Op_Imm_OptLsl_Pat<nxv8i16, op, ZPR16, i32, SVEAddSubImm16Pat, !cast<Instruction>(NAME # _H)>;
+ def : SVE_1_Op_Imm_OptLsl_Pat<nxv4i32, op, ZPR32, i32, SVEAddSubImm32Pat, !cast<Instruction>(NAME # _S)>;
+ def : SVE_1_Op_Imm_OptLsl_Pat<nxv2i64, op, ZPR64, i64, SVEAddSubImm64Pat, !cast<Instruction>(NAME # _D)>;
+}
+
+multiclass sve_int_arith_imm0_subr<bits<3> opc, string asm, SDPatternOperator op> {
+ def _B : sve_int_arith_imm0<0b00, opc, asm, ZPR8, addsub_imm8_opt_lsl_i8>;
def _H : sve_int_arith_imm0<0b01, opc, asm, ZPR16, addsub_imm8_opt_lsl_i16>;
def _S : sve_int_arith_imm0<0b10, opc, asm, ZPR32, addsub_imm8_opt_lsl_i32>;
def _D : sve_int_arith_imm0<0b11, opc, asm, ZPR64, addsub_imm8_opt_lsl_i64>;
+
+ def : SVE_1_Op_Imm_OptLsl_Reverse_Pat<nxv16i8, op, ZPR8, i32, SVEAddSubImm8Pat, !cast<Instruction>(NAME # _B)>;
+ def : SVE_1_Op_Imm_OptLsl_Reverse_Pat<nxv8i16, op, ZPR16, i32, SVEAddSubImm16Pat, !cast<Instruction>(NAME # _H)>;
+ def : SVE_1_Op_Imm_OptLsl_Reverse_Pat<nxv4i32, op, ZPR32, i32, SVEAddSubImm32Pat, !cast<Instruction>(NAME # _S)>;
+ def : SVE_1_Op_Imm_OptLsl_Reverse_Pat<nxv2i64, op, ZPR64, i64, SVEAddSubImm64Pat, !cast<Instruction>(NAME # _D)>;
}
class sve_int_arith_imm<bits<2> sz8_64, bits<6> opc, string asm,
@@ -3037,18 +3513,40 @@ class sve_int_arith_imm<bits<2> sz8_64, bits<6> opc, string asm,
let ElementSize = ElementSizeNone;
}
-multiclass sve_int_arith_imm1<bits<2> opc, string asm, Operand immtype> {
- def _B : sve_int_arith_imm<0b00, { 0b1010, opc }, asm, ZPR8, immtype>;
- def _H : sve_int_arith_imm<0b01, { 0b1010, opc }, asm, ZPR16, immtype>;
- def _S : sve_int_arith_imm<0b10, { 0b1010, opc }, asm, ZPR32, immtype>;
- def _D : sve_int_arith_imm<0b11, { 0b1010, opc }, asm, ZPR64, immtype>;
+multiclass sve_int_arith_imm1<bits<2> opc, string asm, SDPatternOperator op> {
+ def _B : sve_int_arith_imm<0b00, { 0b1010, opc }, asm, ZPR8, simm8>;
+ def _H : sve_int_arith_imm<0b01, { 0b1010, opc }, asm, ZPR16, simm8>;
+ def _S : sve_int_arith_imm<0b10, { 0b1010, opc }, asm, ZPR32, simm8>;
+ def _D : sve_int_arith_imm<0b11, { 0b1010, opc }, asm, ZPR64, simm8>;
+
+ def : SVE_1_Op_Imm_Arith_Pat<nxv16i8, op, ZPR8, i32, SVEArithSImmPat, !cast<Instruction>(NAME # _B)>;
+ def : SVE_1_Op_Imm_Arith_Pat<nxv8i16, op, ZPR16, i32, SVEArithSImmPat, !cast<Instruction>(NAME # _H)>;
+ def : SVE_1_Op_Imm_Arith_Pat<nxv4i32, op, ZPR32, i32, SVEArithSImmPat, !cast<Instruction>(NAME # _S)>;
+ def : SVE_1_Op_Imm_Arith_Pat<nxv2i64, op, ZPR64, i64, SVEArithSImmPat, !cast<Instruction>(NAME # _D)>;
}
-multiclass sve_int_arith_imm2<string asm> {
+multiclass sve_int_arith_imm1_unsigned<bits<2> opc, string asm, SDPatternOperator op> {
+ def _B : sve_int_arith_imm<0b00, { 0b1010, opc }, asm, ZPR8, imm0_255>;
+ def _H : sve_int_arith_imm<0b01, { 0b1010, opc }, asm, ZPR16, imm0_255>;
+ def _S : sve_int_arith_imm<0b10, { 0b1010, opc }, asm, ZPR32, imm0_255>;
+ def _D : sve_int_arith_imm<0b11, { 0b1010, opc }, asm, ZPR64, imm0_255>;
+
+ def : SVE_1_Op_Imm_Arith_Pat<nxv16i8, op, ZPR8, i32, SVEArithUImmPat, !cast<Instruction>(NAME # _B)>;
+ def : SVE_1_Op_Imm_Arith_Pat<nxv8i16, op, ZPR16, i32, SVEArithUImmPat, !cast<Instruction>(NAME # _H)>;
+ def : SVE_1_Op_Imm_Arith_Pat<nxv4i32, op, ZPR32, i32, SVEArithUImmPat, !cast<Instruction>(NAME # _S)>;
+ def : SVE_1_Op_Imm_Arith_Pat<nxv2i64, op, ZPR64, i64, SVEArithUImmPat, !cast<Instruction>(NAME # _D)>;
+}
+
+multiclass sve_int_arith_imm2<string asm, SDPatternOperator op> {
def _B : sve_int_arith_imm<0b00, 0b110000, asm, ZPR8, simm8>;
def _H : sve_int_arith_imm<0b01, 0b110000, asm, ZPR16, simm8>;
def _S : sve_int_arith_imm<0b10, 0b110000, asm, ZPR32, simm8>;
def _D : sve_int_arith_imm<0b11, 0b110000, asm, ZPR64, simm8>;
+
+ def : SVE_1_Op_Imm_Arith_Pat<nxv16i8, op, ZPR8, i32, SVEArithSImmPat, !cast<Instruction>(NAME # _B)>;
+ def : SVE_1_Op_Imm_Arith_Pat<nxv8i16, op, ZPR16, i32, SVEArithSImmPat, !cast<Instruction>(NAME # _H)>;
+ def : SVE_1_Op_Imm_Arith_Pat<nxv4i32, op, ZPR32, i32, SVEArithSImmPat, !cast<Instruction>(NAME # _S)>;
+ def : SVE_1_Op_Imm_Arith_Pat<nxv2i64, op, ZPR64, i64, SVEArithSImmPat, !cast<Instruction>(NAME # _D)>;
}
//===----------------------------------------------------------------------===//
@@ -3072,9 +3570,14 @@ class sve_int_bin_cons_log<bits<2> opc, string asm>
let Inst{4-0} = Zd;
}
-multiclass sve_int_bin_cons_log<bits<2> opc, string asm> {
+multiclass sve_int_bin_cons_log<bits<2> opc, string asm, SDPatternOperator op> {
def NAME : sve_int_bin_cons_log<opc, asm>;
+ def : SVE_2_Op_Pat<nxv16i8, op, nxv16i8, nxv16i8, !cast<Instruction>(NAME)>;
+ def : SVE_2_Op_Pat<nxv8i16, op, nxv8i16, nxv8i16, !cast<Instruction>(NAME)>;
+ def : SVE_2_Op_Pat<nxv4i32, op, nxv4i32, nxv4i32, !cast<Instruction>(NAME)>;
+ def : SVE_2_Op_Pat<nxv2i64, op, nxv2i64, nxv2i64, !cast<Instruction>(NAME)>;
+
def : InstAlias<asm # "\t$Zd, $Zn, $Zm",
(!cast<Instruction>(NAME) ZPR8:$Zd, ZPR8:$Zn, ZPR8:$Zm), 1>;
def : InstAlias<asm # "\t$Zd, $Zn, $Zm",
@@ -3284,23 +3787,37 @@ class sve_int_cmp<bit cmp_1, bits<2> sz8_64, bits<3> opc, string asm,
let Defs = [NZCV];
}
-multiclass sve_int_cmp_0<bits<3> opc, string asm> {
+multiclass sve_int_cmp_0<bits<3> opc, string asm, SDPatternOperator op,
+ CondCode cc> {
def _B : sve_int_cmp<0b0, 0b00, opc, asm, PPR8, ZPR8, ZPR8>;
def _H : sve_int_cmp<0b0, 0b01, opc, asm, PPR16, ZPR16, ZPR16>;
def _S : sve_int_cmp<0b0, 0b10, opc, asm, PPR32, ZPR32, ZPR32>;
def _D : sve_int_cmp<0b0, 0b11, opc, asm, PPR64, ZPR64, ZPR64>;
+
+ def : SVE_3_Op_Pat<nxv16i1, op, nxv16i1, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_3_Op_Pat<nxv8i1, op, nxv8i1, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Pat<nxv4i1, op, nxv4i1, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Pat<nxv2i1, op, nxv2i1, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>;
}
-multiclass sve_int_cmp_0_wide<bits<3> opc, string asm> {
+multiclass sve_int_cmp_0_wide<bits<3> opc, string asm, SDPatternOperator op> {
def _B : sve_int_cmp<0b0, 0b00, opc, asm, PPR8, ZPR8, ZPR64>;
def _H : sve_int_cmp<0b0, 0b01, opc, asm, PPR16, ZPR16, ZPR64>;
def _S : sve_int_cmp<0b0, 0b10, opc, asm, PPR32, ZPR32, ZPR64>;
+
+ def : SVE_3_Op_Pat<nxv16i1, op, nxv16i1, nxv16i8, nxv2i64, !cast<Instruction>(NAME # _B)>;
+ def : SVE_3_Op_Pat<nxv8i1, op, nxv8i1, nxv8i16, nxv2i64, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Pat<nxv4i1, op, nxv4i1, nxv4i32, nxv2i64, !cast<Instruction>(NAME # _S)>;
}
-multiclass sve_int_cmp_1_wide<bits<3> opc, string asm> {
+multiclass sve_int_cmp_1_wide<bits<3> opc, string asm, SDPatternOperator op> {
def _B : sve_int_cmp<0b1, 0b00, opc, asm, PPR8, ZPR8, ZPR64>;
def _H : sve_int_cmp<0b1, 0b01, opc, asm, PPR16, ZPR16, ZPR64>;
def _S : sve_int_cmp<0b1, 0b10, opc, asm, PPR32, ZPR32, ZPR64>;
+
+ def : SVE_3_Op_Pat<nxv16i1, op, nxv16i1, nxv16i8, nxv2i64, !cast<Instruction>(NAME # _B)>;
+ def : SVE_3_Op_Pat<nxv8i1, op, nxv8i1, nxv8i16, nxv2i64, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Pat<nxv4i1, op, nxv4i1, nxv4i32, nxv2i64, !cast<Instruction>(NAME # _S)>;
}
@@ -3332,13 +3849,70 @@ class sve_int_scmp_vi<bits<2> sz8_64, bits<3> opc, string asm, PPRRegOp pprty,
let Inst{3-0} = Pd;
let Defs = [NZCV];
+ let ElementSize = pprty.ElementSize;
}
-multiclass sve_int_scmp_vi<bits<3> opc, string asm> {
+multiclass sve_int_scmp_vi<bits<3> opc, string asm, CondCode cc,
+ SDPatternOperator op = null_frag,
+ SDPatternOperator inv_op = null_frag> {
def _B : sve_int_scmp_vi<0b00, opc, asm, PPR8, ZPR8, simm5_32b>;
def _H : sve_int_scmp_vi<0b01, opc, asm, PPR16, ZPR16, simm5_32b>;
def _S : sve_int_scmp_vi<0b10, opc, asm, PPR32, ZPR32, simm5_32b>;
def _D : sve_int_scmp_vi<0b11, opc, asm, PPR64, ZPR64, simm5_64b>;
+
+ // IR version
+ def : Pat<(nxv16i1 (setcc (nxv16i8 ZPR:$Zs1),
+ (nxv16i8 (AArch64dup (simm5_32b:$imm))),
+ cc)),
+ (!cast<Instruction>(NAME # "_B") (PTRUE_B 31), ZPR:$Zs1, simm5_32b:$imm)>;
+ def : Pat<(nxv8i1 (setcc (nxv8i16 ZPR:$Zs1),
+ (nxv8i16 (AArch64dup (simm5_32b:$imm))),
+ cc)),
+ (!cast<Instruction>(NAME # "_H") (PTRUE_H 31), ZPR:$Zs1, simm5_32b:$imm)>;
+ def : Pat<(nxv4i1 (setcc (nxv4i32 ZPR:$Zs1),
+ (nxv4i32 (AArch64dup (simm5_32b:$imm))),
+ cc)),
+ (!cast<Instruction>(NAME # "_S") (PTRUE_S 31), ZPR:$Zs1, simm5_32b:$imm)>;
+ def : Pat<(nxv2i1 (setcc (nxv2i64 ZPR:$Zs1),
+ (nxv2i64 (AArch64dup (simm5_64b:$imm))),
+ cc)),
+ (!cast<Instruction>(NAME # "_D") (PTRUE_D 31), ZPR:$Zs1, simm5_64b:$imm)>;
+
+ // Intrinsic version
+ def : Pat<(nxv16i1 (op (nxv16i1 PPR_3b:$Pg),
+ (nxv16i8 ZPR:$Zs1),
+ (nxv16i8 (AArch64dup (simm5_32b:$imm))))),
+ (!cast<Instruction>(NAME # "_B") PPR_3b:$Pg, ZPR:$Zs1, simm5_32b:$imm)>;
+ def : Pat<(nxv8i1 (op (nxv8i1 PPR_3b:$Pg),
+ (nxv8i16 ZPR:$Zs1),
+ (nxv8i16 (AArch64dup (simm5_32b:$imm))))),
+ (!cast<Instruction>(NAME # "_H") PPR_3b:$Pg, ZPR:$Zs1, simm5_32b:$imm)>;
+ def : Pat<(nxv4i1 (op (nxv4i1 PPR_3b:$Pg),
+ (nxv4i32 ZPR:$Zs1),
+ (nxv4i32 (AArch64dup (simm5_32b:$imm))))),
+ (!cast<Instruction>(NAME # "_S") PPR_3b:$Pg, ZPR:$Zs1, simm5_32b:$imm)>;
+ def : Pat<(nxv2i1 (op (nxv2i1 PPR_3b:$Pg),
+ (nxv2i64 ZPR:$Zs1),
+ (nxv2i64 (AArch64dup (simm5_64b:$imm))))),
+ (!cast<Instruction>(NAME # "_D") PPR_3b:$Pg, ZPR:$Zs1, simm5_64b:$imm)>;
+
+ // Inverted intrinsic version
+ def : Pat<(nxv16i1 (inv_op (nxv16i1 PPR_3b:$Pg),
+ (nxv16i8 (AArch64dup (simm5_32b:$imm))),
+ (nxv16i8 ZPR:$Zs1))),
+ (!cast<Instruction>(NAME # "_B") PPR_3b:$Pg, ZPR:$Zs1, simm5_32b:$imm)>;
+ def : Pat<(nxv8i1 (inv_op (nxv8i1 PPR_3b:$Pg),
+ (nxv8i16 (AArch64dup (simm5_32b:$imm))),
+ (nxv8i16 ZPR:$Zs1))),
+ (!cast<Instruction>(NAME # "_H") PPR_3b:$Pg, ZPR:$Zs1, simm5_32b:$imm)>;
+ def : Pat<(nxv4i1 (inv_op (nxv4i1 PPR_3b:$Pg),
+ (nxv4i32 (AArch64dup (simm5_32b:$imm))),
+ (nxv4i32 ZPR:$Zs1))),
+ (!cast<Instruction>(NAME # "_S") PPR_3b:$Pg, ZPR:$Zs1, simm5_32b:$imm)>;
+ def : Pat<(nxv2i1 (inv_op (nxv2i1 PPR_3b:$Pg),
+ (nxv2i64 (AArch64dup (simm5_64b:$imm))),
+ (nxv2i64 ZPR:$Zs1))),
+ (!cast<Instruction>(NAME # "_D") PPR_3b:$Pg, ZPR:$Zs1, simm5_64b:$imm)>;
}
@@ -3369,11 +3943,67 @@ class sve_int_ucmp_vi<bits<2> sz8_64, bits<2> opc, string asm, PPRRegOp pprty,
let Defs = [NZCV];
}
-multiclass sve_int_ucmp_vi<bits<2> opc, string asm> {
+multiclass sve_int_ucmp_vi<bits<2> opc, string asm, CondCode cc,
+ SDPatternOperator op = null_frag,
+ SDPatternOperator inv_op = null_frag> {
def _B : sve_int_ucmp_vi<0b00, opc, asm, PPR8, ZPR8, imm0_127>;
def _H : sve_int_ucmp_vi<0b01, opc, asm, PPR16, ZPR16, imm0_127>;
def _S : sve_int_ucmp_vi<0b10, opc, asm, PPR32, ZPR32, imm0_127>;
- def _D : sve_int_ucmp_vi<0b11, opc, asm, PPR64, ZPR64, imm0_127>;
+ def _D : sve_int_ucmp_vi<0b11, opc, asm, PPR64, ZPR64, imm0_127_64b>;
+
+ // IR version
+ def : Pat<(nxv16i1 (setcc (nxv16i8 ZPR:$Zs1),
+ (nxv16i8 (AArch64dup (imm0_127:$imm))),
+ cc)),
+ (!cast<Instruction>(NAME # "_B") (PTRUE_B 31), ZPR:$Zs1, imm0_127:$imm)>;
+ def : Pat<(nxv8i1 (setcc (nxv8i16 ZPR:$Zs1),
+ (nxv8i16 (AArch64dup (imm0_127:$imm))),
+ cc)),
+ (!cast<Instruction>(NAME # "_H") (PTRUE_H 31), ZPR:$Zs1, imm0_127:$imm)>;
+ def : Pat<(nxv4i1 (setcc (nxv4i32 ZPR:$Zs1),
+ (nxv4i32 (AArch64dup (imm0_127:$imm))),
+ cc)),
+ (!cast<Instruction>(NAME # "_S") (PTRUE_S 31), ZPR:$Zs1, imm0_127:$imm)>;
+ def : Pat<(nxv2i1 (setcc (nxv2i64 ZPR:$Zs1),
+ (nxv2i64 (AArch64dup (imm0_127_64b:$imm))),
+ cc)),
+ (!cast<Instruction>(NAME # "_D") (PTRUE_D 31), ZPR:$Zs1, imm0_127_64b:$imm)>;
+
+ // Intrinsic version
+ def : Pat<(nxv16i1 (op (nxv16i1 PPR_3b:$Pg),
+ (nxv16i8 ZPR:$Zs1),
+ (nxv16i8 (AArch64dup (imm0_127:$imm))))),
+ (!cast<Instruction>(NAME # "_B") PPR_3b:$Pg, ZPR:$Zs1, imm0_127:$imm)>;
+ def : Pat<(nxv8i1 (op (nxv8i1 PPR_3b:$Pg),
+ (nxv8i16 ZPR:$Zs1),
+ (nxv8i16 (AArch64dup (imm0_127:$imm))))),
+ (!cast<Instruction>(NAME # "_H") PPR_3b:$Pg, ZPR:$Zs1, imm0_127:$imm)>;
+ def : Pat<(nxv4i1 (op (nxv4i1 PPR_3b:$Pg),
+ (nxv4i32 ZPR:$Zs1),
+ (nxv4i32 (AArch64dup (imm0_127:$imm))))),
+ (!cast<Instruction>(NAME # "_S") PPR_3b:$Pg, ZPR:$Zs1, imm0_127:$imm)>;
+ def : Pat<(nxv2i1 (op (nxv2i1 PPR_3b:$Pg),
+ (nxv2i64 ZPR:$Zs1),
+ (nxv2i64 (AArch64dup (imm0_127_64b:$imm))))),
+ (!cast<Instruction>(NAME # "_D") PPR_3b:$Pg, ZPR:$Zs1, imm0_127_64b:$imm)>;
+
+ // Inverted intrinsic version
+ def : Pat<(nxv16i1 (inv_op (nxv16i1 PPR_3b:$Pg),
+ (nxv16i8 (AArch64dup (imm0_127:$imm))),
+ (nxv16i8 ZPR:$Zs1))),
+ (!cast<Instruction>(NAME # "_B") PPR_3b:$Pg, ZPR:$Zs1, imm0_127:$imm)>;
+ def : Pat<(nxv8i1 (inv_op (nxv8i1 PPR_3b:$Pg),
+ (nxv8i16 (AArch64dup (imm0_127:$imm))),
+ (nxv8i16 ZPR:$Zs1))),
+ (!cast<Instruction>(NAME # "_H") PPR_3b:$Pg, ZPR:$Zs1, imm0_127:$imm)>;
+ def : Pat<(nxv4i1 (inv_op (nxv4i1 PPR_3b:$Pg),
+ (nxv4i32 (AArch64dup (imm0_127:$imm))),
+ (nxv4i32 ZPR:$Zs1))),
+ (!cast<Instruction>(NAME # "_S") PPR_3b:$Pg, ZPR:$Zs1, imm0_127:$imm)>;
+ def : Pat<(nxv2i1 (inv_op (nxv2i1 PPR_3b:$Pg),
+ (nxv2i64 (AArch64dup (imm0_127_64b:$imm))),
+ (nxv2i64 ZPR:$Zs1))),
+ (!cast<Instruction>(NAME # "_D") PPR_3b:$Pg, ZPR:$Zs1, imm0_127_64b:$imm)>;
}
@@ -3401,7 +4031,8 @@ class sve_int_cterm<bit sz, bit opc, string asm, RegisterClass rt>
}
class sve_int_while_rr<bits<2> sz8_64, bits<4> opc, string asm,
- RegisterClass gprty, PPRRegOp pprty>
+ RegisterClass gprty, PPRRegOp pprty,
+ ValueType vt, SDPatternOperator op>
: I<(outs pprty:$Pd), (ins gprty:$Rn, gprty:$Rm),
asm, "\t$Pd, $Rn, $Rm",
"", []>, Sched<[]> {
@@ -3421,18 +4052,28 @@ class sve_int_while_rr<bits<2> sz8_64, bits<4> opc, string asm,
let Defs = [NZCV];
}
-multiclass sve_int_while4_rr<bits<3> opc, string asm> {
- def _B : sve_int_while_rr<0b00, { 0, opc }, asm, GPR32, PPR8>;
- def _H : sve_int_while_rr<0b01, { 0, opc }, asm, GPR32, PPR16>;
- def _S : sve_int_while_rr<0b10, { 0, opc }, asm, GPR32, PPR32>;
- def _D : sve_int_while_rr<0b11, { 0, opc }, asm, GPR32, PPR64>;
+multiclass sve_int_while4_rr<bits<3> opc, string asm, SDPatternOperator op> {
+ def _B : sve_int_while_rr<0b00, { 0, opc }, asm, GPR32, PPR8, nxv16i1, op>;
+ def _H : sve_int_while_rr<0b01, { 0, opc }, asm, GPR32, PPR16, nxv8i1, op>;
+ def _S : sve_int_while_rr<0b10, { 0, opc }, asm, GPR32, PPR32, nxv4i1, op>;
+ def _D : sve_int_while_rr<0b11, { 0, opc }, asm, GPR32, PPR64, nxv2i1, op>;
+
+ def : SVE_2_Op_Pat<nxv16i1, op, i32, i32, !cast<Instruction>(NAME # _B)>;
+ def : SVE_2_Op_Pat<nxv8i1, op, i32, i32, !cast<Instruction>(NAME # _H)>;
+ def : SVE_2_Op_Pat<nxv4i1, op, i32, i32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_2_Op_Pat<nxv2i1, op, i32, i32, !cast<Instruction>(NAME # _D)>;
}
-multiclass sve_int_while8_rr<bits<3> opc, string asm> {
- def _B : sve_int_while_rr<0b00, { 1, opc }, asm, GPR64, PPR8>;
- def _H : sve_int_while_rr<0b01, { 1, opc }, asm, GPR64, PPR16>;
- def _S : sve_int_while_rr<0b10, { 1, opc }, asm, GPR64, PPR32>;
- def _D : sve_int_while_rr<0b11, { 1, opc }, asm, GPR64, PPR64>;
+multiclass sve_int_while8_rr<bits<3> opc, string asm, SDPatternOperator op> {
+ def _B : sve_int_while_rr<0b00, { 1, opc }, asm, GPR64, PPR8, nxv16i1, op>;
+ def _H : sve_int_while_rr<0b01, { 1, opc }, asm, GPR64, PPR16, nxv8i1, op>;
+ def _S : sve_int_while_rr<0b10, { 1, opc }, asm, GPR64, PPR32, nxv4i1, op>;
+ def _D : sve_int_while_rr<0b11, { 1, opc }, asm, GPR64, PPR64, nxv2i1, op>;
+
+ def : SVE_2_Op_Pat<nxv16i1, op, i64, i64, !cast<Instruction>(NAME # _B)>;
+ def : SVE_2_Op_Pat<nxv8i1, op, i64, i64, !cast<Instruction>(NAME # _H)>;
+ def : SVE_2_Op_Pat<nxv4i1, op, i64, i64, !cast<Instruction>(NAME # _S)>;
+ def : SVE_2_Op_Pat<nxv2i1, op, i64, i64, !cast<Instruction>(NAME # _D)>;
}
class sve2_int_while_rr<bits<2> sz8_64, bits<1> rw, string asm,
@@ -3485,10 +4126,14 @@ class sve_fp_fast_red<bits<2> sz, bits<3> opc, string asm,
let Inst{4-0} = Vd;
}
-multiclass sve_fp_fast_red<bits<3> opc, string asm> {
+multiclass sve_fp_fast_red<bits<3> opc, string asm, SDPatternOperator op> {
def _H : sve_fp_fast_red<0b01, opc, asm, ZPR16, FPR16>;
def _S : sve_fp_fast_red<0b10, opc, asm, ZPR32, FPR32>;
def _D : sve_fp_fast_red<0b11, opc, asm, ZPR64, FPR64>;
+
+ def : SVE_2_Op_Pat<f16, op, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_2_Op_Pat<f32, op, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_2_Op_Pat<f64, op, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>;
}
@@ -3518,10 +4163,14 @@ class sve_fp_2op_p_vd<bits<2> sz, bits<3> opc, string asm,
let Constraints = "$Vdn = $_Vdn";
}
-multiclass sve_fp_2op_p_vd<bits<3> opc, string asm> {
+multiclass sve_fp_2op_p_vd<bits<3> opc, string asm, SDPatternOperator op> {
def _H : sve_fp_2op_p_vd<0b01, opc, asm, ZPR16, FPR16>;
def _S : sve_fp_2op_p_vd<0b10, opc, asm, ZPR32, FPR32>;
def _D : sve_fp_2op_p_vd<0b11, opc, asm, ZPR64, FPR64>;
+
+ def : SVE_3_Op_Pat<f16, op, nxv8i1, f16, nxv8f16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Pat<f32, op, nxv4i1, f32, nxv4f32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Pat<f64, op, nxv2i1, f64, nxv2f64, !cast<Instruction>(NAME # _D)>;
}
//===----------------------------------------------------------------------===//
@@ -3551,10 +4200,14 @@ class sve_fp_3op_p_pd<bits<2> sz, bits<3> opc, string asm, PPRRegOp pprty,
let Inst{3-0} = Pd;
}
-multiclass sve_fp_3op_p_pd<bits<3> opc, string asm> {
+multiclass sve_fp_3op_p_pd<bits<3> opc, string asm, SDPatternOperator op> {
def _H : sve_fp_3op_p_pd<0b01, opc, asm, PPR16, ZPR16>;
def _S : sve_fp_3op_p_pd<0b10, opc, asm, PPR32, ZPR32>;
def _D : sve_fp_3op_p_pd<0b11, opc, asm, PPR64, ZPR64>;
+
+ def : SVE_3_Op_Pat<nxv8i1, op, nxv8i1, nxv8f16, nxv8f16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Pat<nxv4i1, op, nxv4i1, nxv4f32, nxv4f32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Pat<nxv2i1, op, nxv2i1, nxv2f64, nxv2f64, !cast<Instruction>(NAME # _D)>;
}
@@ -3735,7 +4388,8 @@ multiclass sve_int_bin_pred_shift_imm_left<bits<4> opc, string asm> {
}
}
-multiclass sve_int_bin_pred_shift_imm_right<bits<4> opc, string asm> {
+multiclass sve_int_bin_pred_shift_imm_right<bits<4> opc, string asm,
+ SDPatternOperator op = null_frag> {
def _B : sve_int_bin_pred_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8,
ElementSizeB>;
def _H : sve_int_bin_pred_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16,
@@ -3751,6 +4405,11 @@ multiclass sve_int_bin_pred_shift_imm_right<bits<4> opc, string asm> {
let Inst{22} = imm{5};
let Inst{9-8} = imm{4-3};
}
+
+ def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i1, nxv16i8, i32, vecshiftR8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i1, nxv8i16, i32, vecshiftR16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i1, nxv4i32, i32, vecshiftR32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i1, nxv2i64, i32, vecshiftR64, !cast<Instruction>(NAME # _D)>;
}
class sve_int_bin_pred_shift<bits<2> sz8_64, bit wide, bits<3> opc,
@@ -3777,17 +4436,28 @@ class sve_int_bin_pred_shift<bits<2> sz8_64, bit wide, bits<3> opc,
let ElementSize = zprty.ElementSize;
}
-multiclass sve_int_bin_pred_shift<bits<3> opc, string asm> {
+multiclass sve_int_bin_pred_shift<bits<3> opc, string asm,
+ SDPatternOperator op> {
def _B : sve_int_bin_pred_shift<0b00, 0b0, opc, asm, ZPR8, ZPR8>;
def _H : sve_int_bin_pred_shift<0b01, 0b0, opc, asm, ZPR16, ZPR16>;
def _S : sve_int_bin_pred_shift<0b10, 0b0, opc, asm, ZPR32, ZPR32>;
def _D : sve_int_bin_pred_shift<0b11, 0b0, opc, asm, ZPR64, ZPR64>;
+
+ def : SVE_3_Op_Pat<nxv16i8, op, nxv16i1, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_3_Op_Pat<nxv8i16, op, nxv8i1, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Pat<nxv4i32, op, nxv4i1, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Pat<nxv2i64, op, nxv2i1, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>;
}
-multiclass sve_int_bin_pred_shift_wide<bits<3> opc, string asm> {
+multiclass sve_int_bin_pred_shift_wide<bits<3> opc, string asm,
+ SDPatternOperator op> {
def _B : sve_int_bin_pred_shift<0b00, 0b1, opc, asm, ZPR8, ZPR64>;
def _H : sve_int_bin_pred_shift<0b01, 0b1, opc, asm, ZPR16, ZPR64>;
def _S : sve_int_bin_pred_shift<0b10, 0b1, opc, asm, ZPR32, ZPR64>;
+
+ def : SVE_3_Op_Pat<nxv16i8, op, nxv16i1, nxv16i8, nxv2i64, !cast<Instruction>(NAME # _B)>;
+ def : SVE_3_Op_Pat<nxv8i16, op, nxv8i1, nxv8i16, nxv2i64, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Pat<nxv4i32, op, nxv4i1, nxv4i32, nxv2i64, !cast<Instruction>(NAME # _S)>;
}
//===----------------------------------------------------------------------===//
@@ -4109,32 +4779,84 @@ class sve_mem_sst_sv<bits<3> opc, bit xs, bit scaled, string asm,
let mayStore = 1;
}
-multiclass sve_mem_sst_sv_32_scaled<bits<3> opc, string asm,
- RegisterOperand listty,
- ZPRRegOp zprty,
+multiclass sve_mem_32b_sst_sv_32_scaled<bits<3> opc, string asm,
+ SDPatternOperator sxtw_op,
+ SDPatternOperator uxtw_op,
RegisterOperand sxtw_opnd,
- RegisterOperand uxtw_opnd > {
- def _UXTW_SCALED : sve_mem_sst_sv<opc, 0, 1, asm, listty, uxtw_opnd>;
- def _SXTW_SCALED : sve_mem_sst_sv<opc, 1, 1, asm, listty, sxtw_opnd>;
+ RegisterOperand uxtw_opnd,
+ ValueType vt > {
+ def _UXTW_SCALED : sve_mem_sst_sv<opc, 0, 1, asm, Z_s, uxtw_opnd>;
+ def _SXTW_SCALED : sve_mem_sst_sv<opc, 1, 1, asm, Z_s, sxtw_opnd>;
def : InstAlias<asm # "\t$Zt, $Pg, [$Rn, $Zm]",
- (!cast<Instruction>(NAME # _UXTW_SCALED) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm), 0>;
+ (!cast<Instruction>(NAME # _UXTW_SCALED) ZPR32:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm), 0>;
def : InstAlias<asm # "\t$Zt, $Pg, [$Rn, $Zm]",
- (!cast<Instruction>(NAME # _SXTW_SCALED) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), 0>;
+ (!cast<Instruction>(NAME # _SXTW_SCALED) ZPR32:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), 0>;
+
+ def : Pat<(uxtw_op (nxv4i32 ZPR:$data), (nxv4i1 PPR:$gp), GPR64sp:$base, (nxv4i32 ZPR:$offsets), vt),
+ (!cast<Instruction>(NAME # _UXTW_SCALED) ZPR:$data, PPR:$gp, GPR64sp:$base, ZPR:$offsets)>;
+ def : Pat<(sxtw_op (nxv4i32 ZPR:$data), (nxv4i1 PPR:$gp), GPR64sp:$base, (nxv4i32 ZPR:$offsets), vt),
+ (!cast<Instruction>(NAME # _SXTW_SCALED) ZPR:$data, PPR:$gp, GPR64sp:$base, ZPR:$offsets)>;
}
-multiclass sve_mem_sst_sv_32_unscaled<bits<3> opc, string asm,
- RegisterOperand listty,
- ZPRRegOp zprty,
- RegisterOperand sxtw_opnd,
- RegisterOperand uxtw_opnd> {
- def _UXTW : sve_mem_sst_sv<opc, 0, 0, asm, listty, uxtw_opnd>;
- def _SXTW : sve_mem_sst_sv<opc, 1, 0, asm, listty, sxtw_opnd>;
+multiclass sve_mem_64b_sst_sv_32_scaled<bits<3> opc, string asm,
+ SDPatternOperator sxtw_op,
+ SDPatternOperator uxtw_op,
+ RegisterOperand sxtw_opnd,
+ RegisterOperand uxtw_opnd,
+ ValueType vt > {
+ def _UXTW_SCALED : sve_mem_sst_sv<opc, 0, 1, asm, Z_d, uxtw_opnd>;
+ def _SXTW_SCALED : sve_mem_sst_sv<opc, 1, 1, asm, Z_d, sxtw_opnd>;
+
+ def : InstAlias<asm # "\t$Zt, $Pg, [$Rn, $Zm]",
+ (!cast<Instruction>(NAME # _UXTW_SCALED) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm), 0>;
+ def : InstAlias<asm # "\t$Zt, $Pg, [$Rn, $Zm]",
+ (!cast<Instruction>(NAME # _SXTW_SCALED) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), 0>;
+
+ def : Pat<(uxtw_op (nxv2i64 ZPR:$data), (nxv2i1 PPR:$gp), GPR64sp:$base, (nxv2i64 ZPR:$offsets), vt),
+ (!cast<Instruction>(NAME # _UXTW_SCALED) ZPR:$data, PPR:$gp, GPR64sp:$base, ZPR:$offsets)>;
+ def : Pat<(sxtw_op (nxv2i64 ZPR:$data), (nxv2i1 PPR:$gp), GPR64sp:$base, (nxv2i64 ZPR:$offsets), vt),
+ (!cast<Instruction>(NAME # _SXTW_SCALED) ZPR:$data, PPR:$gp, GPR64sp:$base, ZPR:$offsets)>;
+}
+
+multiclass sve_mem_64b_sst_sv_32_unscaled<bits<3> opc, string asm,
+ SDPatternOperator sxtw_op,
+ SDPatternOperator uxtw_op,
+ RegisterOperand sxtw_opnd,
+ RegisterOperand uxtw_opnd,
+ ValueType vt> {
+ def _UXTW : sve_mem_sst_sv<opc, 0, 0, asm, Z_d, uxtw_opnd>;
+ def _SXTW : sve_mem_sst_sv<opc, 1, 0, asm, Z_d, sxtw_opnd>;
+
+ def : InstAlias<asm # "\t$Zt, $Pg, [$Rn, $Zm]",
+ (!cast<Instruction>(NAME # _UXTW) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm), 0>;
+ def : InstAlias<asm # "\t$Zt, $Pg, [$Rn, $Zm]",
+ (!cast<Instruction>(NAME # _SXTW) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), 0>;
+
+ def : Pat<(uxtw_op (nxv2i64 ZPR:$data), (nxv2i1 PPR:$gp), GPR64sp:$base, (nxv2i64 ZPR:$offsets), vt),
+ (!cast<Instruction>(NAME # _UXTW) ZPR:$data, PPR:$gp, GPR64sp:$base, ZPR:$offsets)>;
+ def : Pat<(sxtw_op (nxv2i64 ZPR:$data), (nxv2i1 PPR:$gp), GPR64sp:$base, (nxv2i64 ZPR:$offsets), vt),
+ (!cast<Instruction>(NAME # _SXTW) ZPR:$data, PPR:$gp, GPR64sp:$base, ZPR:$offsets)>;
+}
+
+multiclass sve_mem_32b_sst_sv_32_unscaled<bits<3> opc, string asm,
+ SDPatternOperator sxtw_op,
+ SDPatternOperator uxtw_op,
+ RegisterOperand sxtw_opnd,
+ RegisterOperand uxtw_opnd,
+ ValueType vt> {
+ def _UXTW : sve_mem_sst_sv<opc, 0, 0, asm, Z_s, uxtw_opnd>;
+ def _SXTW : sve_mem_sst_sv<opc, 1, 0, asm, Z_s, sxtw_opnd>;
def : InstAlias<asm # "\t$Zt, $Pg, [$Rn, $Zm]",
- (!cast<Instruction>(NAME # _UXTW) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm), 0>;
+ (!cast<Instruction>(NAME # _UXTW) ZPR32:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm), 0>;
def : InstAlias<asm # "\t$Zt, $Pg, [$Rn, $Zm]",
- (!cast<Instruction>(NAME # _SXTW) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), 0>;
+ (!cast<Instruction>(NAME # _SXTW) ZPR32:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), 0>;
+
+ def : Pat<(uxtw_op (nxv4i32 ZPR:$data), (nxv4i1 PPR:$gp), GPR64sp:$base, (nxv4i32 ZPR:$offsets), vt),
+ (!cast<Instruction>(NAME # _UXTW) ZPR:$data, PPR:$gp, GPR64sp:$base, ZPR:$offsets)>;
+ def : Pat<(sxtw_op (nxv4i32 ZPR:$data), (nxv4i1 PPR:$gp), GPR64sp:$base, (nxv4i32 ZPR:$offsets), vt),
+ (!cast<Instruction>(NAME # _SXTW) ZPR:$data, PPR:$gp, GPR64sp:$base, ZPR:$offsets)>;
}
class sve_mem_sst_sv2<bits<2> msz, bit scaled, string asm,
@@ -4161,19 +4883,28 @@ class sve_mem_sst_sv2<bits<2> msz, bit scaled, string asm,
}
multiclass sve_mem_sst_sv_64_scaled<bits<2> msz, string asm,
- RegisterOperand zprext> {
- def "" : sve_mem_sst_sv2<msz, 1, asm, zprext>;
+ SDPatternOperator op,
+ RegisterOperand zprext,
+ ValueType vt> {
+ def _SCALED_REAL : sve_mem_sst_sv2<msz, 1, asm, zprext>;
def : InstAlias<asm # "\t$Zt, $Pg, [$Rn, $Zm]",
- (!cast<Instruction>(NAME) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, zprext:$Zm), 0>;
+ (!cast<Instruction>(NAME # _SCALED_REAL) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, zprext:$Zm), 0>;
+ def : Pat<(op (nxv2i64 ZPR:$data), (nxv2i1 PPR:$gp), GPR64sp:$base, (nxv2i64 ZPR:$indices), vt),
+ (!cast<Instruction>(NAME # _SCALED_REAL) ZPR:$data, PPR:$gp, GPR64sp:$base, ZPR:$indices)>;
}
-multiclass sve_mem_sst_sv_64_unscaled<bits<2> msz, string asm> {
- def "" : sve_mem_sst_sv2<msz, 0, asm, ZPR64ExtLSL8>;
+multiclass sve_mem_sst_sv_64_unscaled<bits<2> msz, string asm,
+ SDPatternOperator op,
+ ValueType vt> {
+ def _REAL : sve_mem_sst_sv2<msz, 0, asm, ZPR64ExtLSL8>;
def : InstAlias<asm # "\t$Zt, $Pg, [$Rn, $Zm]",
- (!cast<Instruction>(NAME) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, ZPR64ExtLSL8:$Zm), 0>;
+ (!cast<Instruction>(NAME # _REAL) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, ZPR64ExtLSL8:$Zm), 0>;
+
+ def : Pat<(op (nxv2i64 ZPR:$data), (nxv2i1 PPR:$gp), GPR64sp:$base, (nxv2i64 ZPR:$offsets), vt),
+ (!cast<Instruction>(NAME # _REAL) ZPR:$data, PPR:$gp, GPR64sp:$base, ZPR:$offsets)>;
}
class sve_mem_sst_vi<bits<3> opc, string asm, ZPRRegOp zprty,
@@ -4199,16 +4930,38 @@ class sve_mem_sst_vi<bits<3> opc, string asm, ZPRRegOp zprty,
let mayStore = 1;
}
-multiclass sve_mem_sst_vi_ptrs<bits<3> opc, string asm, RegisterOperand listty,
- ZPRRegOp zprty, Operand imm_ty> {
- def _IMM : sve_mem_sst_vi<opc, asm, zprty, listty, imm_ty>;
+multiclass sve_mem_32b_sst_vi_ptrs<bits<3> opc, string asm,
+ Operand imm_ty,
+ SDPatternOperator op,
+ ValueType vt> {
+ def _IMM : sve_mem_sst_vi<opc, asm, ZPR32, Z_s, imm_ty>;
+
+ def : InstAlias<asm # "\t$Zt, $Pg, [$Zn]",
+ (!cast<Instruction>(NAME # _IMM) ZPR32:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, 0), 0>;
+ def : InstAlias<asm # "\t$Zt, $Pg, [$Zn, $imm5]",
+ (!cast<Instruction>(NAME # _IMM) ZPR32:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, imm_ty:$imm5), 0>;
+ def : InstAlias<asm # "\t$Zt, $Pg, [$Zn]",
+ (!cast<Instruction>(NAME # _IMM) Z_s:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, 0), 1>;
+
+ def : Pat<(op (nxv4i32 ZPR:$data), (nxv4i1 PPR:$gp), (nxv4i32 ZPR:$ptrs), imm_ty:$index, vt),
+ (!cast<Instruction>(NAME # _IMM) ZPR:$data, PPR:$gp, ZPR:$ptrs, imm_ty:$index)>;
+}
+
+multiclass sve_mem_64b_sst_vi_ptrs<bits<3> opc, string asm,
+ Operand imm_ty,
+ SDPatternOperator op,
+ ValueType vt> {
+ def _IMM : sve_mem_sst_vi<opc, asm, ZPR64, Z_d, imm_ty>;
def : InstAlias<asm # "\t$Zt, $Pg, [$Zn]",
- (!cast<Instruction>(NAME # _IMM) zprty:$Zt, PPR3bAny:$Pg, zprty:$Zn, 0), 0>;
+ (!cast<Instruction>(NAME # _IMM) ZPR64:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, 0), 0>;
def : InstAlias<asm # "\t$Zt, $Pg, [$Zn, $imm5]",
- (!cast<Instruction>(NAME # _IMM) zprty:$Zt, PPR3bAny:$Pg, zprty:$Zn, imm_ty:$imm5), 0>;
+ (!cast<Instruction>(NAME # _IMM) ZPR64:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, imm_ty:$imm5), 0>;
def : InstAlias<asm # "\t$Zt, $Pg, [$Zn]",
- (!cast<Instruction>(NAME # _IMM) listty:$Zt, PPR3bAny:$Pg, zprty:$Zn, 0), 1>;
+ (!cast<Instruction>(NAME # _IMM) Z_s:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, 0), 1>;
+
+ def : Pat<(op (nxv2i64 ZPR:$data), (nxv2i1 PPR:$gp), (nxv2i64 ZPR:$ptrs), imm_ty:$index, vt),
+ (!cast<Instruction>(NAME # _IMM) ZPR:$data, PPR:$gp, ZPR:$ptrs, imm_ty:$index)>;
}
class sve_mem_z_spill<string asm>
@@ -4287,11 +5040,17 @@ class sve_int_perm_bin_perm_pp<bits<3> opc, bits<2> sz8_64, string asm,
let Inst{3-0} = Pd;
}
-multiclass sve_int_perm_bin_perm_pp<bits<3> opc, string asm> {
+multiclass sve_int_perm_bin_perm_pp<bits<3> opc, string asm,
+ SDPatternOperator op> {
def _B : sve_int_perm_bin_perm_pp<opc, 0b00, asm, PPR8>;
def _H : sve_int_perm_bin_perm_pp<opc, 0b01, asm, PPR16>;
def _S : sve_int_perm_bin_perm_pp<opc, 0b10, asm, PPR32>;
def _D : sve_int_perm_bin_perm_pp<opc, 0b11, asm, PPR64>;
+
+ def : SVE_2_Op_Pat<nxv16i1, op, nxv16i1, nxv16i1, !cast<Instruction>(NAME # _B)>;
+ def : SVE_2_Op_Pat<nxv8i1, op, nxv8i1, nxv8i1, !cast<Instruction>(NAME # _H)>;
+ def : SVE_2_Op_Pat<nxv4i1, op, nxv4i1, nxv4i1, !cast<Instruction>(NAME # _S)>;
+ def : SVE_2_Op_Pat<nxv2i1, op, nxv2i1, nxv2i1, !cast<Instruction>(NAME # _D)>;
}
class sve_int_perm_punpk<bit opc, string asm>
@@ -4397,11 +5156,16 @@ class sve_int_perm_clast_rz<bits<2> sz8_64, bit ab, string asm,
let Constraints = "$Rdn = $_Rdn";
}
-multiclass sve_int_perm_clast_rz<bit ab, string asm> {
+multiclass sve_int_perm_clast_rz<bit ab, string asm, SDPatternOperator op> {
def _B : sve_int_perm_clast_rz<0b00, ab, asm, ZPR8, GPR32>;
def _H : sve_int_perm_clast_rz<0b01, ab, asm, ZPR16, GPR32>;
def _S : sve_int_perm_clast_rz<0b10, ab, asm, ZPR32, GPR32>;
def _D : sve_int_perm_clast_rz<0b11, ab, asm, ZPR64, GPR64>;
+
+ def : SVE_3_Op_Pat<i32, op, nxv16i1, i32, nxv16i8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_3_Op_Pat<i32, op, nxv8i1, i32, nxv8i16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Pat<i32, op, nxv4i1, i32, nxv4i32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Pat<i64, op, nxv2i1, i64, nxv2i64, !cast<Instruction>(NAME # _D)>;
}
class sve_int_perm_clast_vz<bits<2> sz8_64, bit ab, string asm,
@@ -4425,11 +5189,15 @@ class sve_int_perm_clast_vz<bits<2> sz8_64, bit ab, string asm,
let Constraints = "$Vdn = $_Vdn";
}
-multiclass sve_int_perm_clast_vz<bit ab, string asm> {
+multiclass sve_int_perm_clast_vz<bit ab, string asm, SDPatternOperator op> {
def _B : sve_int_perm_clast_vz<0b00, ab, asm, ZPR8, FPR8>;
def _H : sve_int_perm_clast_vz<0b01, ab, asm, ZPR16, FPR16>;
def _S : sve_int_perm_clast_vz<0b10, ab, asm, ZPR32, FPR32>;
def _D : sve_int_perm_clast_vz<0b11, ab, asm, ZPR64, FPR64>;
+
+ def : SVE_3_Op_Pat<f16, op, nxv8i1, f16, nxv8f16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Pat<f32, op, nxv4i1, f32, nxv4f32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Pat<f64, op, nxv2i1, f64, nxv2f64, !cast<Instruction>(NAME # _D)>;
}
class sve_int_perm_clast_zz<bits<2> sz8_64, bit ab, string asm,
@@ -4455,11 +5223,20 @@ class sve_int_perm_clast_zz<bits<2> sz8_64, bit ab, string asm,
let ElementSize = ElementSizeNone;
}
-multiclass sve_int_perm_clast_zz<bit ab, string asm> {
+multiclass sve_int_perm_clast_zz<bit ab, string asm, SDPatternOperator op> {
def _B : sve_int_perm_clast_zz<0b00, ab, asm, ZPR8>;
def _H : sve_int_perm_clast_zz<0b01, ab, asm, ZPR16>;
def _S : sve_int_perm_clast_zz<0b10, ab, asm, ZPR32>;
def _D : sve_int_perm_clast_zz<0b11, ab, asm, ZPR64>;
+
+ def : SVE_3_Op_Pat<nxv16i8, op, nxv16i1, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_3_Op_Pat<nxv8i16, op, nxv8i1, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Pat<nxv4i32, op, nxv4i1, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Pat<nxv2i64, op, nxv2i1, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>;
+
+ def : SVE_3_Op_Pat<nxv8f16, op, nxv8i1, nxv8f16, nxv8f16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Pat<nxv4f32, op, nxv4i1, nxv4f32, nxv4f32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Pat<nxv2f64, op, nxv2i1, nxv2f64, nxv2f64, !cast<Instruction>(NAME # _D)>;
}
class sve_int_perm_last_r<bits<2> sz8_64, bit ab, string asm,
@@ -4481,11 +5258,16 @@ class sve_int_perm_last_r<bits<2> sz8_64, bit ab, string asm,
let Inst{4-0} = Rd;
}
-multiclass sve_int_perm_last_r<bit ab, string asm> {
+multiclass sve_int_perm_last_r<bit ab, string asm, SDPatternOperator op> {
def _B : sve_int_perm_last_r<0b00, ab, asm, ZPR8, GPR32>;
def _H : sve_int_perm_last_r<0b01, ab, asm, ZPR16, GPR32>;
def _S : sve_int_perm_last_r<0b10, ab, asm, ZPR32, GPR32>;
def _D : sve_int_perm_last_r<0b11, ab, asm, ZPR64, GPR64>;
+
+ def : SVE_2_Op_Pat<i32, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_2_Op_Pat<i32, op, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_2_Op_Pat<i32, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_2_Op_Pat<i64, op, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
}
class sve_int_perm_last_v<bits<2> sz8_64, bit ab, string asm,
@@ -4507,11 +5289,16 @@ class sve_int_perm_last_v<bits<2> sz8_64, bit ab, string asm,
let Inst{4-0} = Vd;
}
-multiclass sve_int_perm_last_v<bit ab, string asm> {
+multiclass sve_int_perm_last_v<bit ab, string asm, SDPatternOperator op> {
def _B : sve_int_perm_last_v<0b00, ab, asm, ZPR8, FPR8>;
def _H : sve_int_perm_last_v<0b01, ab, asm, ZPR16, FPR16>;
def _S : sve_int_perm_last_v<0b10, ab, asm, ZPR32, FPR32>;
def _D : sve_int_perm_last_v<0b11, ab, asm, ZPR64, FPR64>;
+
+ def : SVE_2_Op_Pat<f16, op, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_2_Op_Pat<f32, op, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_2_Op_Pat<f32, op, nxv2i1, nxv2f32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_2_Op_Pat<f64, op, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>;
}
class sve_int_perm_splice<bits<2> sz8_64, string asm, ZPRRegOp zprty>
@@ -4534,11 +5321,20 @@ class sve_int_perm_splice<bits<2> sz8_64, string asm, ZPRRegOp zprty>
let ElementSize = ElementSizeNone;
}
-multiclass sve_int_perm_splice<string asm> {
+multiclass sve_int_perm_splice<string asm, SDPatternOperator op> {
def _B : sve_int_perm_splice<0b00, asm, ZPR8>;
def _H : sve_int_perm_splice<0b01, asm, ZPR16>;
def _S : sve_int_perm_splice<0b10, asm, ZPR32>;
def _D : sve_int_perm_splice<0b11, asm, ZPR64>;
+
+ def : SVE_3_Op_Pat<nxv16i8, op, nxv16i1, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_3_Op_Pat<nxv8i16, op, nxv8i1, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Pat<nxv4i32, op, nxv4i1, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Pat<nxv2i64, op, nxv2i1, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>;
+
+ def : SVE_3_Op_Pat<nxv8f16, op, nxv8i1, nxv8f16, nxv8f16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Pat<nxv4f32, op, nxv4i1, nxv4f32, nxv4f32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Pat<nxv2f64, op, nxv2i1, nxv2f64, nxv2f64, !cast<Instruction>(NAME # _D)>;
}
class sve2_int_perm_splice_cons<bits<2> sz8_64, string asm,
@@ -4588,26 +5384,46 @@ class sve_int_perm_rev<bits<2> sz8_64, bits<2> opc, string asm,
let ElementSize = zprty.ElementSize;
}
-multiclass sve_int_perm_rev_rbit<string asm> {
+multiclass sve_int_perm_rev_rbit<string asm, SDPatternOperator op> {
def _B : sve_int_perm_rev<0b00, 0b11, asm, ZPR8>;
def _H : sve_int_perm_rev<0b01, 0b11, asm, ZPR16>;
def _S : sve_int_perm_rev<0b10, 0b11, asm, ZPR32>;
def _D : sve_int_perm_rev<0b11, 0b11, asm, ZPR64>;
+
+ def : SVE_3_Op_Pat<nxv16i8, op, nxv16i8, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_3_Op_Pat<nxv8i16, op, nxv8i16, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Pat<nxv4i32, op, nxv4i32, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Pat<nxv2i64, op, nxv2i64, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
}
-multiclass sve_int_perm_rev_revb<string asm> {
+multiclass sve_int_perm_rev_revb<string asm,
+ SDPatternOperator int_op,
+ SDPatternOperator ir_op> {
def _H : sve_int_perm_rev<0b01, 0b00, asm, ZPR16>;
def _S : sve_int_perm_rev<0b10, 0b00, asm, ZPR32>;
def _D : sve_int_perm_rev<0b11, 0b00, asm, ZPR64>;
+
+ def : SVE_3_Op_Pat<nxv8i16, int_op, nxv8i16, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Pat<nxv4i32, int_op, nxv4i32, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Pat<nxv2i64, int_op, nxv2i64, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
+
+ def : SVE_1_Op_AllActive_Pat<nxv8i16, ir_op, nxv8i16, !cast<Instruction>(NAME # _H), PTRUE_H>;
+ def : SVE_1_Op_AllActive_Pat<nxv4i32, ir_op, nxv4i32, !cast<Instruction>(NAME # _S), PTRUE_S>;
+ def : SVE_1_Op_AllActive_Pat<nxv2i64, ir_op, nxv2i64, !cast<Instruction>(NAME # _D), PTRUE_D>;
}
-multiclass sve_int_perm_rev_revh<string asm> {
+multiclass sve_int_perm_rev_revh<string asm, SDPatternOperator op> {
def _S : sve_int_perm_rev<0b10, 0b01, asm, ZPR32>;
def _D : sve_int_perm_rev<0b11, 0b01, asm, ZPR64>;
+
+ def : SVE_3_Op_Pat<nxv4i32, op, nxv4i32, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Pat<nxv2i64, op, nxv2i64, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
}
-multiclass sve_int_perm_rev_revw<string asm> {
+multiclass sve_int_perm_rev_revw<string asm, SDPatternOperator op> {
def _D : sve_int_perm_rev<0b11, 0b10, asm, ZPR64>;
+
+ def : SVE_3_Op_Pat<nxv2i64, op, nxv2i64, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
}
class sve_int_perm_cpy_r<bits<2> sz8_64, string asm, ZPRRegOp zprty,
@@ -4700,9 +5516,14 @@ class sve_int_perm_compact<bit sz, string asm, ZPRRegOp zprty>
let Inst{4-0} = Zd;
}
-multiclass sve_int_perm_compact<string asm> {
+multiclass sve_int_perm_compact<string asm, SDPatternOperator op> {
def _S : sve_int_perm_compact<0b0, asm, ZPR32>;
def _D : sve_int_perm_compact<0b1, asm, ZPR64>;
+
+ def : SVE_2_Op_Pat<nxv4i32, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_2_Op_Pat<nxv4f32, op, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_2_Op_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
+ def : SVE_2_Op_Pat<nxv2f64, op, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>;
}
@@ -4736,14 +5557,14 @@ class sve_mem_cld_si_base<bits<4> dtype, bit nf, string asm,
multiclass sve_mem_cld_si_base<bits<4> dtype, bit nf, string asm,
RegisterOperand listty, ZPRRegOp zprty> {
- def _REAL : sve_mem_cld_si_base<dtype, nf, asm, listty>;
+ def "" : sve_mem_cld_si_base<dtype, nf, asm, listty>;
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn]",
- (!cast<Instruction>(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 0>;
+ (!cast<Instruction>(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 0>;
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn, $imm4, mul vl]",
- (!cast<Instruction>(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, simm4s1:$imm4), 0>;
+ (!cast<Instruction>(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, simm4s1:$imm4), 0>;
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn]",
- (!cast<Instruction>(NAME # _REAL) listty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 1>;
+ (!cast<Instruction>(NAME) listty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 1>;
}
multiclass sve_mem_cld_si<bits<4> dtype, string asm, RegisterOperand listty,
@@ -5044,8 +5865,11 @@ class sve_mem_32b_gld_sv<bits<4> opc, bit xs, bit scaled, string asm,
}
multiclass sve_mem_32b_gld_sv_32_scaled<bits<4> opc, string asm,
+ SDPatternOperator sxtw_op,
+ SDPatternOperator uxtw_op,
RegisterOperand sxtw_opnd,
- RegisterOperand uxtw_opnd> {
+ RegisterOperand uxtw_opnd,
+ ValueType vt> {
def _UXTW_SCALED_REAL : sve_mem_32b_gld_sv<opc, 0, 1, asm, uxtw_opnd>;
def _SXTW_SCALED_REAL : sve_mem_32b_gld_sv<opc, 1, 1, asm, sxtw_opnd>;
@@ -5053,11 +5877,19 @@ multiclass sve_mem_32b_gld_sv_32_scaled<bits<4> opc, string asm,
(!cast<Instruction>(NAME # _UXTW_SCALED_REAL) ZPR32:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm), 0>;
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn, $Zm]",
(!cast<Instruction>(NAME # _SXTW_SCALED_REAL) ZPR32:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), 0>;
+
+ def : Pat<(nxv4i32 (uxtw_op (nxv4i1 PPR:$gp), GPR64sp:$base, (nxv4i32 ZPR:$indices), vt)),
+ (!cast<Instruction>(NAME # _UXTW_SCALED_REAL) PPR:$gp, GPR64sp:$base, ZPR:$indices)>;
+ def : Pat<(nxv4i32 (sxtw_op (nxv4i1 PPR:$gp), GPR64sp:$base, (nxv4i32 ZPR:$indices), vt)),
+ (!cast<Instruction>(NAME # _SXTW_SCALED_REAL) PPR:$gp, GPR64sp:$base, ZPR:$indices)>;
}
multiclass sve_mem_32b_gld_vs_32_unscaled<bits<4> opc, string asm,
+ SDPatternOperator sxtw_op,
+ SDPatternOperator uxtw_op,
RegisterOperand sxtw_opnd,
- RegisterOperand uxtw_opnd> {
+ RegisterOperand uxtw_opnd,
+ ValueType vt> {
def _UXTW_REAL : sve_mem_32b_gld_sv<opc, 0, 0, asm, uxtw_opnd>;
def _SXTW_REAL : sve_mem_32b_gld_sv<opc, 1, 0, asm, sxtw_opnd>;
@@ -5065,6 +5897,11 @@ multiclass sve_mem_32b_gld_vs_32_unscaled<bits<4> opc, string asm,
(!cast<Instruction>(NAME # _UXTW_REAL) ZPR32:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm), 0>;
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn, $Zm]",
(!cast<Instruction>(NAME # _SXTW_REAL) ZPR32:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), 0>;
+
+ def : Pat<(nxv4i32 (uxtw_op (nxv4i1 PPR:$gp), GPR64sp:$base, (nxv4i32 ZPR:$offsets), vt)),
+ (!cast<Instruction>(NAME # _UXTW_REAL) PPR:$gp, GPR64sp:$base, ZPR:$offsets)>;
+ def : Pat<(nxv4i32 (sxtw_op (nxv4i1 PPR:$gp), GPR64sp:$base, (nxv4i32 ZPR:$offsets), vt)),
+ (!cast<Instruction>(NAME # _SXTW_REAL) PPR:$gp, GPR64sp:$base, ZPR:$offsets)>;
}
@@ -5092,7 +5929,8 @@ class sve_mem_32b_gld_vi<bits<4> opc, string asm, Operand imm_ty>
let Uses = !if(!eq(opc{0}, 1), [FFR], []);
}
-multiclass sve_mem_32b_gld_vi_32_ptrs<bits<4> opc, string asm, Operand imm_ty> {
+multiclass sve_mem_32b_gld_vi_32_ptrs<bits<4> opc, string asm, Operand imm_ty,
+ SDPatternOperator op, ValueType vt> {
def _IMM_REAL : sve_mem_32b_gld_vi<opc, asm, imm_ty>;
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn]",
@@ -5101,6 +5939,9 @@ multiclass sve_mem_32b_gld_vi_32_ptrs<bits<4> opc, string asm, Operand imm_ty> {
(!cast<Instruction>(NAME # _IMM_REAL) ZPR32:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, imm_ty:$imm5), 0>;
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn]",
(!cast<Instruction>(NAME # _IMM_REAL) Z_s:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, 0), 1>;
+
+ def : Pat<(nxv4i32 (op (nxv4i1 PPR:$gp), (nxv4i32 ZPR:$ptrs), imm_ty:$index, vt)),
+ (!cast<Instruction>(NAME # _IMM_REAL) PPR:$gp, ZPR:$ptrs, imm_ty:$index)>;
}
class sve_mem_prfm_si<bits<2> msz, string asm>
@@ -5336,8 +6177,11 @@ class sve_mem_64b_gld_sv<bits<4> opc, bit xs, bit scaled, bit lsl, string asm,
}
multiclass sve_mem_64b_gld_sv_32_scaled<bits<4> opc, string asm,
+ SDPatternOperator sxtw_op,
+ SDPatternOperator uxtw_op,
RegisterOperand sxtw_opnd,
- RegisterOperand uxtw_opnd> {
+ RegisterOperand uxtw_opnd,
+ ValueType vt> {
def _UXTW_SCALED_REAL : sve_mem_64b_gld_sv<opc, 0, 1, 0, asm, uxtw_opnd>;
def _SXTW_SCALED_REAL : sve_mem_64b_gld_sv<opc, 1, 1, 0, asm, sxtw_opnd>;
@@ -5345,11 +6189,19 @@ multiclass sve_mem_64b_gld_sv_32_scaled<bits<4> opc, string asm,
(!cast<Instruction>(NAME # _UXTW_SCALED_REAL) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm), 0>;
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn, $Zm]",
(!cast<Instruction>(NAME # _SXTW_SCALED_REAL) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), 0>;
+
+ def : Pat<(nxv2i64 (uxtw_op (nxv2i1 PPR:$gp), GPR64sp:$base, (nxv2i64 ZPR:$indices), vt)),
+ (!cast<Instruction>(NAME # _UXTW_SCALED_REAL) PPR:$gp, GPR64sp:$base, ZPR:$indices)>;
+ def : Pat<(nxv2i64 (sxtw_op (nxv2i1 PPR:$gp), GPR64sp:$base, (nxv2i64 ZPR:$indices), vt)),
+ (!cast<Instruction>(NAME # _SXTW_SCALED_REAL) PPR:$gp, GPR64sp:$base, ZPR:$indices)>;
}
multiclass sve_mem_64b_gld_vs_32_unscaled<bits<4> opc, string asm,
+ SDPatternOperator sxtw_op,
+ SDPatternOperator uxtw_op,
RegisterOperand sxtw_opnd,
- RegisterOperand uxtw_opnd> {
+ RegisterOperand uxtw_opnd,
+ ValueType vt> {
def _UXTW_REAL : sve_mem_64b_gld_sv<opc, 0, 0, 0, asm, uxtw_opnd>;
def _SXTW_REAL : sve_mem_64b_gld_sv<opc, 1, 0, 0, asm, sxtw_opnd>;
@@ -5357,21 +6209,34 @@ multiclass sve_mem_64b_gld_vs_32_unscaled<bits<4> opc, string asm,
(!cast<Instruction>(NAME # _UXTW_REAL) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm), 0>;
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn, $Zm]",
(!cast<Instruction>(NAME # _SXTW_REAL) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), 0>;
+
+ def : Pat<(nxv2i64 (uxtw_op (nxv2i1 PPR:$gp), GPR64sp:$base, (nxv2i64 ZPR:$offsets), vt)),
+ (!cast<Instruction>(NAME # _UXTW_REAL) PPR:$gp, GPR64sp:$base, ZPR:$offsets)>;
+ def : Pat<(nxv2i64 (sxtw_op (nxv2i1 PPR:$gp), GPR64sp:$base, (nxv2i64 ZPR:$offsets), vt)),
+ (!cast<Instruction>(NAME # _SXTW_REAL) PPR:$gp, GPR64sp:$base, ZPR:$offsets)>;
}
multiclass sve_mem_64b_gld_sv2_64_scaled<bits<4> opc, string asm,
- RegisterOperand zprext> {
+ SDPatternOperator op,
+ RegisterOperand zprext, ValueType vt> {
def _SCALED_REAL : sve_mem_64b_gld_sv<opc, 1, 1, 1, asm, zprext>;
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn, $Zm]",
(!cast<Instruction>(NAME # _SCALED_REAL) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, zprext:$Zm), 0>;
+
+ def : Pat<(nxv2i64 (op (nxv2i1 PPR:$gp), GPR64sp:$base, (nxv2i64 ZPR:$indices), vt)),
+ (!cast<Instruction>(NAME # _SCALED_REAL) PPR:$gp, GPR64sp:$base, ZPR:$indices)>;
}
-multiclass sve_mem_64b_gld_vs2_64_unscaled<bits<4> opc, string asm> {
+multiclass sve_mem_64b_gld_vs2_64_unscaled<bits<4> opc, string asm,
+ SDPatternOperator op, ValueType vt> {
def _REAL : sve_mem_64b_gld_sv<opc, 1, 0, 1, asm, ZPR64ExtLSL8>;
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn, $Zm]",
(!cast<Instruction>(NAME # _REAL) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, ZPR64ExtLSL8:$Zm), 0>;
+
+ def : Pat<(nxv2i64 (op (nxv2i1 PPR:$gp), GPR64sp:$base, (nxv2i64 ZPR:$offsets), vt)),
+ (!cast<Instruction>(NAME # _REAL) PPR:$gp, GPR64sp:$base, ZPR:$offsets)>;
}
class sve_mem_64b_gld_vi<bits<4> opc, string asm, Operand imm_ty>
@@ -5398,7 +6263,8 @@ class sve_mem_64b_gld_vi<bits<4> opc, string asm, Operand imm_ty>
let Uses = !if(!eq(opc{0}, 1), [FFR], []);
}
-multiclass sve_mem_64b_gld_vi_64_ptrs<bits<4> opc, string asm, Operand imm_ty> {
+multiclass sve_mem_64b_gld_vi_64_ptrs<bits<4> opc, string asm, Operand imm_ty,
+ SDPatternOperator op, ValueType vt> {
def _IMM_REAL : sve_mem_64b_gld_vi<opc, asm, imm_ty>;
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn]",
@@ -5407,6 +6273,9 @@ multiclass sve_mem_64b_gld_vi_64_ptrs<bits<4> opc, string asm, Operand imm_ty> {
(!cast<Instruction>(NAME # _IMM_REAL) ZPR64:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, imm_ty:$imm5), 0>;
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn]",
(!cast<Instruction>(NAME # _IMM_REAL) Z_d:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, 0), 1>;
+
+ def : Pat<(nxv2i64 (op (nxv2i1 PPR:$gp), (nxv2i64 ZPR:$ptrs), imm_ty:$index, vt)),
+ (!cast<Instruction>(NAME # _IMM_REAL) PPR:$gp, ZPR:$ptrs, imm_ty:$index)>;
}
// bit lsl is '0' if the offsets are extended (uxtw/sxtw), '1' if shifted (lsl)
@@ -5550,10 +6419,14 @@ class sve_int_bin_cons_misc_0_b<bits<2> sz, string asm, ZPRRegOp zprty>
let Inst{4-0} = Zd;
}
-multiclass sve_int_bin_cons_misc_0_b<string asm> {
+multiclass sve_int_bin_cons_misc_0_b<string asm, SDPatternOperator op> {
def _H : sve_int_bin_cons_misc_0_b<0b01, asm, ZPR16>;
def _S : sve_int_bin_cons_misc_0_b<0b10, asm, ZPR32>;
def _D : sve_int_bin_cons_misc_0_b<0b11, asm, ZPR64>;
+
+ def : SVE_2_Op_Pat<nxv8f16, op, nxv8f16, nxv8i16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_2_Op_Pat<nxv4f32, op, nxv4f32, nxv4i32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_2_Op_Pat<nxv2f64, op, nxv2f64, nxv2i64, !cast<Instruction>(NAME # _D)>;
}
class sve_int_bin_cons_misc_0_c<bits<8> opc, string asm, ZPRRegOp zprty>
@@ -5573,6 +6446,16 @@ class sve_int_bin_cons_misc_0_c<bits<8> opc, string asm, ZPRRegOp zprty>
let Inst{4-0} = Zd;
}
+multiclass sve_int_bin_cons_misc_0_c_fexpa<string asm, SDPatternOperator op> {
+ def _H : sve_int_bin_cons_misc_0_c<0b01000000, asm, ZPR16>;
+ def _S : sve_int_bin_cons_misc_0_c<0b10000000, asm, ZPR32>;
+ def _D : sve_int_bin_cons_misc_0_c<0b11000000, asm, ZPR64>;
+
+ def : SVE_1_Op_Pat<nxv8f16, op, nxv8i16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_1_Op_Pat<nxv4f32, op, nxv4i32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_1_Op_Pat<nxv2f64, op, nxv2i64, !cast<Instruction>(NAME # _D)>;
+}
+
//===----------------------------------------------------------------------===//
// SVE Integer Reduction Group
//===----------------------------------------------------------------------===//
@@ -5597,31 +6480,51 @@ class sve_int_reduce<bits<2> sz8_32, bits<2> fmt, bits<3> opc, string asm,
let Inst{4-0} = Vd;
}
-multiclass sve_int_reduce_0_saddv<bits<3> opc, string asm> {
+multiclass sve_int_reduce_0_saddv<bits<3> opc, string asm, SDPatternOperator op> {
def _B : sve_int_reduce<0b00, 0b00, opc, asm, ZPR8, FPR64>;
def _H : sve_int_reduce<0b01, 0b00, opc, asm, ZPR16, FPR64>;
def _S : sve_int_reduce<0b10, 0b00, opc, asm, ZPR32, FPR64>;
+
+ def : SVE_2_Op_Pat<i64, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_2_Op_Pat<i64, op, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_2_Op_Pat<i64, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
}
-multiclass sve_int_reduce_0_uaddv<bits<3> opc, string asm> {
+multiclass sve_int_reduce_0_uaddv<bits<3> opc, string asm, SDPatternOperator op, SDPatternOperator opSaddv> {
def _B : sve_int_reduce<0b00, 0b00, opc, asm, ZPR8, FPR64>;
def _H : sve_int_reduce<0b01, 0b00, opc, asm, ZPR16, FPR64>;
def _S : sve_int_reduce<0b10, 0b00, opc, asm, ZPR32, FPR64>;
def _D : sve_int_reduce<0b11, 0b00, opc, asm, ZPR64, FPR64>;
+
+ def : SVE_2_Op_Pat<i64, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_2_Op_Pat<i64, op, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_2_Op_Pat<i64, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_2_Op_Pat<i64, op, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
+ def : SVE_2_Op_Pat<i64, opSaddv, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
}
-multiclass sve_int_reduce_1<bits<3> opc, string asm> {
+multiclass sve_int_reduce_1<bits<3> opc, string asm, SDPatternOperator op> {
def _B : sve_int_reduce<0b00, 0b01, opc, asm, ZPR8, FPR8>;
def _H : sve_int_reduce<0b01, 0b01, opc, asm, ZPR16, FPR16>;
def _S : sve_int_reduce<0b10, 0b01, opc, asm, ZPR32, FPR32>;
def _D : sve_int_reduce<0b11, 0b01, opc, asm, ZPR64, FPR64>;
+
+ def : SVE_2_Op_Pat_Reduce_To_Neon<v16i8, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B), bsub>;
+ def : SVE_2_Op_Pat_Reduce_To_Neon<v8i16, op, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H), hsub>;
+ def : SVE_2_Op_Pat_Reduce_To_Neon<v4i32, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S), ssub>;
+ def : SVE_2_Op_Pat_Reduce_To_Neon<v2i64, op, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D), dsub>;
}
-multiclass sve_int_reduce_2<bits<3> opc, string asm> {
+multiclass sve_int_reduce_2<bits<3> opc, string asm, SDPatternOperator op> {
def _B : sve_int_reduce<0b00, 0b11, opc, asm, ZPR8, FPR8>;
def _H : sve_int_reduce<0b01, 0b11, opc, asm, ZPR16, FPR16>;
def _S : sve_int_reduce<0b10, 0b11, opc, asm, ZPR32, FPR32>;
def _D : sve_int_reduce<0b11, 0b11, opc, asm, ZPR64, FPR64>;
+
+ def : SVE_2_Op_Pat_Reduce_To_Neon<v16i8, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B), bsub>;
+ def : SVE_2_Op_Pat_Reduce_To_Neon<v8i16, op, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H), hsub>;
+ def : SVE_2_Op_Pat_Reduce_To_Neon<v4i32, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S), ssub>;
+ def : SVE_2_Op_Pat_Reduce_To_Neon<v2i64, op, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D), dsub>;
}
class sve_int_movprfx_pred<bits<2> sz8_32, bits<3> opc, string asm,
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp
index 7f02da6a9516..a6796742117b 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp
@@ -31,7 +31,7 @@ Target &llvm::getTheARM64_32Target() {
return TheARM64_32Target;
}
-extern "C" void LLVMInitializeAArch64TargetInfo() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAArch64TargetInfo() {
// Now register the "arm64" name for use with "-march". We don't want it to
// take possession of the Triple::aarch64 tags though.
TargetRegistry::RegisterTarget(getTheARM64Target(), "arm64",
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/contrib/llvm-project/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
index 7a4fcac09ec4..87980cddb7c0 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
@@ -250,7 +250,13 @@ enum CondCode { // Meaning (integer) Meaning (floating-point)
AL = 0xe, // Always (unconditional) Always (unconditional)
NV = 0xf, // Always (unconditional) Always (unconditional)
// Note the NV exists purely to disassemble 0b1111. Execution is "always".
- Invalid
+ Invalid,
+
+ // Common aliases used for SVE.
+ ANY_ACTIVE = NE, // (!Z)
+ FIRST_ACTIVE = MI, // ( N)
+ LAST_ACTIVE = LO, // (!C)
+ NONE_ACTIVE = EQ // ( Z)
};
inline static const char *getCondCodeName(CondCode Code) {
@@ -643,6 +649,17 @@ namespace AArch64II {
};
} // end namespace AArch64II
+namespace AArch64 {
+// The number of bits in a SVE register is architecturally defined
+// to be a multiple of this value. If <M x t> has this number of bits,
+// a <n x M x t> vector can be stored in a SVE register without any
+// redundant bits. If <M x t> has this number of bits divided by P,
+// a <n x M x t> vector is stored in a SVE register by placing index i
+// in index i*P of a <n x (M*P) x t> vector. The other elements of the
+// <n x (M*P) x t> vector (such as index 1) are undefined.
+static constexpr unsigned SVEBitsPerBlock = 128;
+const unsigned NeonBitsPerVector = 128;
+} // end namespace AArch64
} // end namespace llvm
#endif
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPU.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPU.h
index b64422ae5427..fbed51de0ea4 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -11,6 +11,8 @@
#define LLVM_LIB_TARGET_AMDGPU_AMDGPU_H
#include "llvm/Target/TargetMachine.h"
+#include "llvm/IR/IntrinsicsR600.h" // TODO: Sink this.
+#include "llvm/IR/IntrinsicsAMDGPU.h" // TODO: Sink this.
namespace llvm {
@@ -154,6 +156,9 @@ extern char &SIWholeQuadModeID;
void initializeSILowerControlFlowPass(PassRegistry &);
extern char &SILowerControlFlowID;
+void initializeSIRemoveShortExecBranchesPass(PassRegistry &);
+extern char &SIRemoveShortExecBranchesID;
+
void initializeSIInsertSkipsPass(PassRegistry &);
extern char &SIInsertSkipsPassID;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
index 4c1dbd4c5304..ff2bda6bed53 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
@@ -118,7 +118,7 @@ bool AMDGPUAlwaysInline::runOnModule(Module &M) {
for (GlobalVariable &GV : M.globals()) {
// TODO: Region address
- unsigned AS = GV.getType()->getAddressSpace();
+ unsigned AS = GV.getAddressSpace();
if (AS != AMDGPUAS::LOCAL_ADDRESS && AS != AMDGPUAS::REGION_ADDRESS)
continue;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
index 71121ade0a49..6fb507083cef 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
@@ -19,6 +19,7 @@
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstVisitor.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index f2d903c8e7b1..9e07b4d252b7 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -1,4 +1,4 @@
-//===-- AMDGPUAsmPrinter.cpp - AMDGPU assembly printer -------------------===//
+//===-- AMDGPUAsmPrinter.cpp - AMDGPU assembly printer --------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -69,15 +69,14 @@ using namespace llvm::AMDGPU::HSAMD;
// We want to use these instructions, and using fp32 denormals also causes
// instructions to run at the double precision rate for the device so it's
// probably best to just report no single precision denormals.
-static uint32_t getFPMode(const MachineFunction &F) {
- const GCNSubtarget& ST = F.getSubtarget<GCNSubtarget>();
- // TODO: Is there any real use for the flush in only / flush out only modes?
+static uint32_t getFPMode(AMDGPU::SIModeRegisterDefaults Mode) {
+ // TODO: Is there any real use for the flush in only / flush out only modes?
uint32_t FP32Denormals =
- ST.hasFP32Denormals() ? FP_DENORM_FLUSH_NONE : FP_DENORM_FLUSH_IN_FLUSH_OUT;
+ Mode.FP32Denormals ? FP_DENORM_FLUSH_NONE : FP_DENORM_FLUSH_IN_FLUSH_OUT;
uint32_t FP64Denormals =
- ST.hasFP64Denormals() ? FP_DENORM_FLUSH_NONE : FP_DENORM_FLUSH_IN_FLUSH_OUT;
+ Mode.FP64FP16Denormals ? FP_DENORM_FLUSH_NONE : FP_DENORM_FLUSH_IN_FLUSH_OUT;
return FP_ROUND_MODE_SP(FP_ROUND_ROUND_TO_NEAREST) |
FP_ROUND_MODE_DP(FP_ROUND_ROUND_TO_NEAREST) |
@@ -91,7 +90,7 @@ createAMDGPUAsmPrinterPass(TargetMachine &tm,
return new AMDGPUAsmPrinter(tm, std::move(Streamer));
}
-extern "C" void LLVMInitializeAMDGPUAsmPrinter() {
+extern "C" void LLVM_EXTERNAL_VISIBILITY LLVMInitializeAMDGPUAsmPrinter() {
TargetRegistry::RegisterAsmPrinter(getTheAMDGPUTarget(),
llvm::createR600AsmPrinterPass);
TargetRegistry::RegisterAsmPrinter(getTheGCNTarget(),
@@ -793,6 +792,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
IsSGPR = false;
Width = 3;
} else if (AMDGPU::SReg_96RegClass.contains(Reg)) {
+ IsSGPR = true;
Width = 3;
} else if (AMDGPU::SReg_128RegClass.contains(Reg)) {
assert(!AMDGPU::TTMP_128RegClass.contains(Reg) &&
@@ -806,6 +806,12 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
IsSGPR = false;
IsAGPR = true;
Width = 4;
+ } else if (AMDGPU::VReg_160RegClass.contains(Reg)) {
+ IsSGPR = false;
+ Width = 5;
+ } else if (AMDGPU::SReg_160RegClass.contains(Reg)) {
+ IsSGPR = true;
+ Width = 5;
} else if (AMDGPU::SReg_256RegClass.contains(Reg)) {
assert(!AMDGPU::TTMP_256RegClass.contains(Reg) &&
"trap handler registers should not be used");
@@ -1026,11 +1032,12 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
ProgInfo.VGPRBlocks = IsaInfo::getNumVGPRBlocks(
&STM, ProgInfo.NumVGPRsForWavesPerEU);
+ const SIModeRegisterDefaults Mode = MFI->getMode();
+
// Set the value to initialize FP_ROUND and FP_DENORM parts of the mode
// register.
- ProgInfo.FloatMode = getFPMode(MF);
+ ProgInfo.FloatMode = getFPMode(Mode);
- const SIModeRegisterDefaults Mode = MFI->getMode();
ProgInfo.IEEEMode = Mode.IEEE;
// Make clamp modifier on NaN input returns 0.
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
index ba8343142c63..59aa0ea98aa7 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
@@ -20,6 +20,7 @@
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstVisitor.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#define DEBUG_TYPE "amdgpu-atomic-optimizer"
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
index 58c44acde1a7..c657ca71bfdf 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
@@ -356,7 +356,7 @@ Register AMDGPUCallLowering::lowerParameterPtr(MachineIRBuilder &B,
Register OffsetReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
B.buildConstant(OffsetReg, Offset);
- B.buildGEP(DstReg, KernArgSegmentVReg, OffsetReg);
+ B.buildPtrAdd(DstReg, KernArgSegmentVReg, OffsetReg);
return DstReg;
}
@@ -368,8 +368,7 @@ void AMDGPUCallLowering::lowerParameter(MachineIRBuilder &B,
MachineFunction &MF = B.getMF();
const Function &F = MF.getFunction();
const DataLayout &DL = F.getParent()->getDataLayout();
- PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUAS::CONSTANT_ADDRESS);
- MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
+ MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
unsigned TypeSize = DL.getTypeStoreSize(ParamTy);
Register PtrReg = lowerParameterPtr(B, ParamTy, Offset);
@@ -517,11 +516,26 @@ static void packSplitRegsToOrigType(MachineIRBuilder &B,
return;
}
+ MachineRegisterInfo &MRI = *B.getMRI();
+
assert(LLTy.isVector() && !PartLLT.isVector());
LLT DstEltTy = LLTy.getElementType();
+
+ // Pointer information was discarded. We'll need to coerce some register types
+ // to avoid violating type constraints.
+ LLT RealDstEltTy = MRI.getType(OrigRegs[0]).getElementType();
+
+ assert(DstEltTy.getSizeInBits() == RealDstEltTy.getSizeInBits());
+
if (DstEltTy == PartLLT) {
// Vector was trivially scalarized.
+
+ if (RealDstEltTy.isPointer()) {
+ for (Register Reg : Regs)
+ MRI.setType(Reg, RealDstEltTy);
+ }
+
B.buildBuildVector(OrigRegs[0], Regs);
} else if (DstEltTy.getSizeInBits() > PartLLT.getSizeInBits()) {
// Deal with vector with 64-bit elements decomposed to 32-bit
@@ -532,8 +546,9 @@ static void packSplitRegsToOrigType(MachineIRBuilder &B,
assert(DstEltTy.getSizeInBits() % PartLLT.getSizeInBits() == 0);
for (int I = 0, NumElts = LLTy.getNumElements(); I != NumElts; ++I) {
- auto Merge = B.buildMerge(DstEltTy,
- Regs.take_front(PartsPerElt));
+ auto Merge = B.buildMerge(RealDstEltTy, Regs.take_front(PartsPerElt));
+ // Fix the type in case this is really a vector of pointers.
+ MRI.setType(Merge.getReg(0), RealDstEltTy);
EltMerges.push_back(Merge.getReg(0));
Regs = Regs.drop_front(PartsPerElt);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
index 1640a4a59ee2..cf908766caa0 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
@@ -38,6 +38,7 @@
#include "llvm/IR/Operator.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include <cassert>
@@ -69,6 +70,7 @@ class AMDGPUCodeGenPrepare : public FunctionPass,
Module *Mod = nullptr;
const DataLayout *DL = nullptr;
bool HasUnsafeFPMath = false;
+ bool HasFP32Denormals = false;
/// Copies exact/nsw/nuw flags (if any) from binary operation \p I to
/// binary operation \p V.
@@ -574,7 +576,6 @@ bool AMDGPUCodeGenPrepare::visitFDiv(BinaryOperator &FDiv) {
Value *NewFDiv = nullptr;
- bool HasDenormals = ST->hasFP32Denormals();
if (VectorType *VT = dyn_cast<VectorType>(Ty)) {
NewFDiv = UndefValue::get(VT);
@@ -585,7 +586,7 @@ bool AMDGPUCodeGenPrepare::visitFDiv(BinaryOperator &FDiv) {
Value *DenEltI = Builder.CreateExtractElement(Den, I);
Value *NewElt;
- if (shouldKeepFDivF32(NumEltI, UnsafeDiv, HasDenormals)) {
+ if (shouldKeepFDivF32(NumEltI, UnsafeDiv, HasFP32Denormals)) {
NewElt = Builder.CreateFDiv(NumEltI, DenEltI);
} else {
NewElt = Builder.CreateCall(Decl, { NumEltI, DenEltI });
@@ -594,7 +595,7 @@ bool AMDGPUCodeGenPrepare::visitFDiv(BinaryOperator &FDiv) {
NewFDiv = Builder.CreateInsertElement(NewFDiv, NewElt, I);
}
} else {
- if (!shouldKeepFDivF32(Num, UnsafeDiv, HasDenormals))
+ if (!shouldKeepFDivF32(Num, UnsafeDiv, HasFP32Denormals))
NewFDiv = Builder.CreateCall(Decl, { Num, Den });
}
@@ -1033,6 +1034,7 @@ bool AMDGPUCodeGenPrepare::runOnFunction(Function &F) {
AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
DA = &getAnalysis<LegacyDivergenceAnalysis>();
HasUnsafeFPMath = hasUnsafeFPMath(F);
+ HasFP32Denormals = ST->hasFP32Denormals(F);
bool MadeChange = false;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index f2be1ca44d34..d420aa02ac28 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -30,14 +30,14 @@ def gi_vop3mods :
GIComplexOperandMatcher<s32, "selectVOP3Mods">,
GIComplexPatternEquiv<VOP3Mods>;
+def gi_vop3mods_nnan :
+ GIComplexOperandMatcher<s32, "selectVOP3Mods_nnan">,
+ GIComplexPatternEquiv<VOP3Mods_nnan>;
+
def gi_vop3omods :
GIComplexOperandMatcher<s32, "selectVOP3OMods">,
GIComplexPatternEquiv<VOP3OMods>;
-def gi_vop3omods0clamp0omod :
- GIComplexOperandMatcher<s32, "selectVOP3Mods0Clamp0OMod">,
- GIComplexPatternEquiv<VOP3Mods0Clamp0OMod>;
-
def gi_vop3opselmods0 :
GIComplexOperandMatcher<s32, "selectVOP3OpSelMods0">,
GIComplexPatternEquiv<VOP3OpSelMods0>;
@@ -117,6 +117,8 @@ def : GINodeEquiv<G_ATOMICRMW_UMAX, atomic_load_umax_glue>;
def : GINodeEquiv<G_ATOMICRMW_FADD, atomic_load_fadd_glue>;
def : GINodeEquiv<G_AMDGPU_FFBH_U32, AMDGPUffbh_u32>;
+def : GINodeEquiv<G_AMDGPU_ATOMIC_CMPXCHG, AMDGPUatomic_cmp_swap>;
+
class GISelSop2Pat <
SDPatternOperator node,
@@ -206,3 +208,15 @@ foreach Ty = [i64, p0, p1, p4] in {
def gi_as_i32timm : GICustomOperandRenderer<"renderTruncImm32">,
GISDNodeXFormEquiv<as_i32timm>;
+
+def gi_as_i16timm : GICustomOperandRenderer<"renderTruncTImm">,
+ GISDNodeXFormEquiv<as_i16timm>;
+
+def gi_NegateImm : GICustomOperandRenderer<"renderNegateImm">,
+ GISDNodeXFormEquiv<NegateImm>;
+
+def gi_bitcast_fpimm_to_i32 : GICustomOperandRenderer<"renderBitcastImm">,
+ GISDNodeXFormEquiv<bitcast_fpimm_to_i32>;
+
+def gi_IMMPopCount : GICustomOperandRenderer<"renderPopcntImm">,
+ GISDNodeXFormEquiv<IMMPopCount>;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def
index 85d1ad349157..2e92ae51660b 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def
@@ -15,29 +15,34 @@ namespace AMDGPU {
enum PartialMappingIdx {
None = - 1,
- PM_SGPR1 = 2,
- PM_SGPR16 = 6,
- PM_SGPR32 = 7,
- PM_SGPR64 = 8,
- PM_SGPR128 = 9,
- PM_SGPR256 = 10,
- PM_SGPR512 = 11,
- PM_SGPR1024 = 12,
- PM_VGPR1 = 13,
- PM_VGPR16 = 17,
- PM_VGPR32 = 18,
- PM_VGPR64 = 19,
- PM_VGPR128 = 20,
- PM_VGPR256 = 21,
- PM_VGPR512 = 22,
- PM_VGPR1024 = 23,
- PM_SGPR96 = 24,
- PM_VGPR96 = 25
+ PM_SGPR1 = 1,
+ PM_SGPR16 = 5,
+ PM_SGPR32 = 6,
+ PM_SGPR64 = 7,
+ PM_SGPR128 = 8,
+ PM_SGPR256 = 9,
+ PM_SGPR512 = 10,
+ PM_SGPR1024 = 11,
+ PM_VGPR1 = 12,
+ PM_VGPR16 = 16,
+ PM_VGPR32 = 17,
+ PM_VGPR64 = 18,
+ PM_VGPR128 = 19,
+ PM_VGPR256 = 20,
+ PM_VGPR512 = 21,
+ PM_VGPR1024 = 22,
+ PM_SGPR96 = 23,
+ PM_VGPR96 = 24,
+ PM_AGPR96 = 25,
+ PM_AGPR32 = 31,
+ PM_AGPR64 = 32,
+ PM_AGPR128 = 33,
+ PM_AGPR512 = 34,
+ PM_AGPR1024 = 35
};
const RegisterBankInfo::PartialMapping PartMappings[] {
// StartIdx, Length, RegBank
- {0, 1, SCCRegBank},
{0, 1, VCCRegBank},
{0, 1, SGPRRegBank}, // SGPR begin
@@ -58,43 +63,61 @@ const RegisterBankInfo::PartialMapping PartMappings[] {
{0, 512, VGPRRegBank},
{0, 1024, VGPRRegBank},
{0, 96, SGPRRegBank},
- {0, 96, VGPRRegBank}
+ {0, 96, VGPRRegBank},
+ {0, 96, AGPRRegBank},
+
+ {0, 32, AGPRRegBank}, // AGPR begin
+ {0, 64, AGPRRegBank},
+ {0, 128, AGPRRegBank},
+ {0, 512, AGPRRegBank},
+ {0, 1024, AGPRRegBank}
};
const RegisterBankInfo::ValueMapping ValMappings[] {
- // SCC
- {&PartMappings[0], 1},
-
// VCC
- {&PartMappings[1], 1},
+ {&PartMappings[0], 1},
// SGPRs
- {&PartMappings[2], 1}, // 1
+ {&PartMappings[1], 1}, // 1
{nullptr, 0}, // Illegal power of 2 sizes
{nullptr, 0},
{nullptr, 0},
- {&PartMappings[3], 1}, // 16
- {&PartMappings[4], 1}, // 32
- {&PartMappings[5], 1}, // 64
- {&PartMappings[6], 1}, // 128
- {&PartMappings[7], 1}, // 256
- {&PartMappings[8], 1}, // 512
- {&PartMappings[9], 1}, // 1024
+ {&PartMappings[2], 1}, // 16
+ {&PartMappings[3], 1}, // 32
+ {&PartMappings[4], 1}, // 64
+ {&PartMappings[5], 1}, // 128
+ {&PartMappings[6], 1}, // 256
+ {&PartMappings[7], 1}, // 512
+ {&PartMappings[8], 1}, // 1024
// VGPRs
- {&PartMappings[10], 1}, // 1
+ {&PartMappings[9], 1}, // 1
{nullptr, 0},
{nullptr, 0},
{nullptr, 0},
- {&PartMappings[11], 1}, // 16
- {&PartMappings[12], 1}, // 32
- {&PartMappings[13], 1}, // 64
- {&PartMappings[14], 1}, // 128
- {&PartMappings[15], 1}, // 256
- {&PartMappings[16], 1}, // 512
- {&PartMappings[17], 1}, // 1024
+ {&PartMappings[10], 1}, // 16
+ {&PartMappings[11], 1}, // 32
+ {&PartMappings[12], 1}, // 64
+ {&PartMappings[13], 1}, // 128
+ {&PartMappings[14], 1}, // 256
+ {&PartMappings[15], 1}, // 512
+ {&PartMappings[16], 1}, // 1024
+ {&PartMappings[17], 1},
{&PartMappings[18], 1},
- {&PartMappings[19], 1}
+ {&PartMappings[19], 1},
+
+ // AGPRs
+ {nullptr, 0},
+ {nullptr, 0},
+ {nullptr, 0},
+ {nullptr, 0},
+ {nullptr, 0},
+ {&PartMappings[20], 1}, // 32
+ {&PartMappings[21], 1}, // 64
+ {&PartMappings[22], 1}, // 128
+ {nullptr, 0},
+ {&PartMappings[23], 1}, // 512
+ {&PartMappings[24], 1} // 1024
};
const RegisterBankInfo::PartialMapping SGPROnly64BreakDown[] {
@@ -120,9 +143,9 @@ const RegisterBankInfo::ValueMapping ValMappingsSGPR64OnlyVGPR32[] {
};
enum ValueMappingIdx {
- SCCStartIdx = 0,
- SGPRStartIdx = 2,
- VGPRStartIdx = 13
+ SGPRStartIdx = 1,
+ VGPRStartIdx = 12,
+ AGPRStartIdx = 26
};
const RegisterBankInfo::ValueMapping *getValueMapping(unsigned BankID,
@@ -130,21 +153,38 @@ const RegisterBankInfo::ValueMapping *getValueMapping(unsigned BankID,
unsigned Idx;
switch (Size) {
case 1:
- if (BankID == AMDGPU::SCCRegBankID)
- return &ValMappings[0];
if (BankID == AMDGPU::VCCRegBankID)
- return &ValMappings[1];
+ return &ValMappings[0];
- // 1-bit values not from a compare etc.
Idx = BankID == AMDGPU::SGPRRegBankID ? PM_SGPR1 : PM_VGPR1;
break;
case 96:
- assert(BankID != AMDGPU::VCCRegBankID);
- Idx = BankID == AMDGPU::SGPRRegBankID ? PM_SGPR96 : PM_VGPR96;
+ switch (BankID) {
+ case AMDGPU::VGPRRegBankID:
+ Idx = PM_VGPR96;
+ break;
+ case AMDGPU::SGPRRegBankID:
+ Idx = PM_SGPR96;
+ break;
+ case AMDGPU::AGPRRegBankID:
+ Idx = PM_AGPR96;
+ break;
+ default: llvm_unreachable("Invalid register bank");
+ }
break;
default:
- assert(BankID != AMDGPU::VCCRegBankID);
- Idx = BankID == AMDGPU::VGPRRegBankID ? VGPRStartIdx : SGPRStartIdx;
+ switch (BankID) {
+ case AMDGPU::VGPRRegBankID:
+ Idx = VGPRStartIdx;
+ break;
+ case AMDGPU::SGPRRegBankID:
+ Idx = SGPRStartIdx;
+ break;
+ case AMDGPU::AGPRRegBankID:
+ Idx = AGPRStartIdx;
+ break;
+ default: llvm_unreachable("Invalid register bank");
+ }
Idx += Log2_32_Ceil(Size);
break;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp
new file mode 100644
index 000000000000..16d7f2c4f9e5
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp
@@ -0,0 +1,45 @@
+//===- AMDGPUGlobalISelUtils.cpp ---------------------------------*- C++ -*-==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUGlobalISelUtils.h"
+#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
+#include "llvm/IR/Constants.h"
+
+using namespace llvm;
+using namespace MIPatternMatch;
+
+std::tuple<Register, unsigned, MachineInstr *>
+AMDGPU::getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg) {
+ MachineInstr *Def = getDefIgnoringCopies(Reg, MRI);
+ if (!Def)
+ return std::make_tuple(Reg, 0, nullptr);
+
+ if (Def->getOpcode() == TargetOpcode::G_CONSTANT) {
+ unsigned Offset;
+ const MachineOperand &Op = Def->getOperand(1);
+ if (Op.isImm())
+ Offset = Op.getImm();
+ else
+ Offset = Op.getCImm()->getZExtValue();
+
+ return std::make_tuple(Register(), Offset, Def);
+ }
+
+ int64_t Offset;
+ if (Def->getOpcode() == TargetOpcode::G_ADD) {
+ // TODO: Handle G_OR used for add case
+ if (mi_match(Def->getOperand(2).getReg(), MRI, m_ICst(Offset)))
+ return std::make_tuple(Def->getOperand(1).getReg(), Offset, Def);
+
+ // FIXME: matcher should ignore copies
+ if (mi_match(Def->getOperand(2).getReg(), MRI, m_Copy(m_ICst(Offset))))
+ return std::make_tuple(Def->getOperand(1).getReg(), Offset, Def);
+ }
+
+ return std::make_tuple(Reg, 0, Def);
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.h
new file mode 100644
index 000000000000..1507ade79547
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.h
@@ -0,0 +1,29 @@
+//===- AMDGPUGlobalISelUtils -------------------------------------*- C++ -*-==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUGLOBALISELUTILS_H
+#define LLVM_LIB_TARGET_AMDGPU_AMDGPUGLOBALISELUTILS_H
+
+#include "llvm/CodeGen/Register.h"
+#include <tuple>
+
+namespace llvm {
+
+class MachineInstr;
+class MachineRegisterInfo;
+
+namespace AMDGPU {
+
+/// Returns Base register, constant offset, and offset def point.
+std::tuple<Register, unsigned, MachineInstr *>
+getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg);
+
+}
+}
+
+#endif
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
index 9f5bcd8ff5f0..511d62943189 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
@@ -421,7 +421,12 @@ void MetadataStreamerV2::emitHiddenKernelArgs(const Function &Func) {
if (HiddenArgNumBytes >= 32) {
if (Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenPrintfBuffer);
- else
+ else if (Func.getParent()->getFunction("__ockl_hostcall_internal")) {
+ // The printf runtime binding pass should have ensured that hostcall and
+ // printf are not used in the same module.
+ assert(!Func.getParent()->getNamedMetadata("llvm.printf.fmts"));
+ emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenHostcallBuffer);
+ } else
emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone);
}
@@ -854,7 +859,12 @@ void MetadataStreamerV3::emitHiddenKernelArgs(const Function &Func,
if (HiddenArgNumBytes >= 32) {
if (Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
emitKernelArg(DL, Int8PtrTy, "hidden_printf_buffer", Offset, Args);
- else
+ else if (Func.getParent()->getFunction("__ockl_hostcall_internal")) {
+ // The printf runtime binding pass should have ensured that hostcall and
+ // printf are not used in the same module.
+ assert(!Func.getParent()->getNamedMetadata("llvm.printf.fmts"));
+ emitKernelArg(DL, Int8PtrTy, "hidden_hostcall_buffer", Offset, Args);
+ } else
emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index f330bd7ebcdd..2b6308dc1549 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -19,12 +19,12 @@
#include "AMDGPURegisterInfo.h"
#include "AMDGPUSubtarget.h"
#include "AMDGPUTargetMachine.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIDefines.h"
#include "SIISelLowering.h"
#include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h"
#include "SIRegisterInfo.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
@@ -39,6 +39,7 @@
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/BasicBlock.h"
+#include "llvm/InitializePasses.h"
#ifdef EXPENSIVE_CHECKS
#include "llvm/IR/Dominators.h"
#endif
@@ -127,6 +128,10 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
// Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
// make the right decision when generating code for different targets.
const GCNSubtarget *Subtarget;
+
+ // Default FP mode for the current function.
+ AMDGPU::SIModeRegisterDefaults Mode;
+
bool EnableLateStructurizeCFG;
public:
@@ -166,6 +171,22 @@ private:
return isInlineImmediate(N, true);
}
+ bool isInlineImmediate16(int64_t Imm) const {
+ return AMDGPU::isInlinableLiteral16(Imm, Subtarget->hasInv2PiInlineImm());
+ }
+
+ bool isInlineImmediate32(int64_t Imm) const {
+ return AMDGPU::isInlinableLiteral32(Imm, Subtarget->hasInv2PiInlineImm());
+ }
+
+ bool isInlineImmediate64(int64_t Imm) const {
+ return AMDGPU::isInlinableLiteral64(Imm, Subtarget->hasInv2PiInlineImm());
+ }
+
+ bool isInlineImmediate(const APFloat &Imm) const {
+ return Subtarget->getInstrInfo()->isInlineConstant(Imm);
+ }
+
bool isVGPRImm(const SDNode *N) const;
bool isUniformLoad(const SDNode *N) const;
bool isUniformBr(const SDNode *N) const;
@@ -240,10 +261,6 @@ private:
bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
SDValue &Clamp, SDValue &Omod) const;
- bool SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, SDValue &SrcMods,
- SDValue &Clamp,
- SDValue &Omod) const;
-
bool SelectVOP3OMods(SDValue In, SDValue &Src,
SDValue &Clamp, SDValue &Omod) const;
@@ -392,6 +409,7 @@ bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
}
#endif
Subtarget = &MF.getSubtarget<GCNSubtarget>();
+ Mode = AMDGPU::SIModeRegisterDefaults(MF.getFunction(), *Subtarget);
return SelectionDAGISel::runOnMachineFunction(MF);
}
@@ -2103,7 +2121,7 @@ void AMDGPUDAGToDAGISel::SelectFMAD_FMA(SDNode *N) {
bool Sel1 = SelectVOP3PMadMixModsImpl(Src1, Src1, Src1Mods);
bool Sel2 = SelectVOP3PMadMixModsImpl(Src2, Src2, Src2Mods);
- assert((IsFMA || !Subtarget->hasFP32Denormals()) &&
+ assert((IsFMA || !Mode.FP32Denormals) &&
"fmad selected with denormals enabled");
// TODO: We can select this with f32 denormals enabled if all the sources are
// converted from f16 (in which case fmad isn't legal).
@@ -2437,14 +2455,6 @@ bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src,
return SelectVOP3Mods(In, Src, SrcMods);
}
-bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src,
- SDValue &SrcMods,
- SDValue &Clamp,
- SDValue &Omod) const {
- Clamp = Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
- return SelectVOP3Mods(In, Src, SrcMods);
-}
-
bool AMDGPUDAGToDAGISel::SelectVOP3OMods(SDValue In, SDValue &Src,
SDValue &Clamp, SDValue &Omod) const {
Src = In;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 1115d8c23620..23cc9404532d 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -451,9 +451,6 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
for (int I = 0; I < RTLIB::UNKNOWN_LIBCALL; ++I)
setLibcallName(static_cast<RTLIB::Libcall>(I), nullptr);
- setBooleanContents(ZeroOrNegativeOneBooleanContent);
- setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
-
setSchedulingPreference(Sched::RegPressure);
setJumpIsExpensive(true);
@@ -1399,16 +1396,19 @@ SDValue AMDGPUTargetLowering::SplitVectorLoad(const SDValue Op,
SelectionDAG &DAG) const {
LoadSDNode *Load = cast<LoadSDNode>(Op);
EVT VT = Op.getValueType();
+ SDLoc SL(Op);
// If this is a 2 element vector, we really want to scalarize and not create
// weird 1 element vectors.
- if (VT.getVectorNumElements() == 2)
- return scalarizeVectorLoad(Load, DAG);
+ if (VT.getVectorNumElements() == 2) {
+ SDValue Ops[2];
+ std::tie(Ops[0], Ops[1]) = scalarizeVectorLoad(Load, DAG);
+ return DAG.getMergeValues(Ops, SL);
+ }
SDValue BasePtr = Load->getBasePtr();
EVT MemVT = Load->getMemoryVT();
- SDLoc SL(Op);
const MachinePointerInfo &SrcValue = Load->getMemOperand()->getPointerInfo();
@@ -1584,8 +1584,11 @@ SDValue AMDGPUTargetLowering::LowerDIVREM24(SDValue Op, SelectionDAG &DAG,
// float fqneg = -fq;
SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FltVT, fq);
+ MachineFunction &MF = DAG.getMachineFunction();
+ const AMDGPUMachineFunction *MFI = MF.getInfo<AMDGPUMachineFunction>();
+
// float fr = mad(fqneg, fb, fa);
- unsigned OpCode = Subtarget->hasFP32Denormals() ?
+ unsigned OpCode = MFI->getMode().FP32Denormals ?
(unsigned)AMDGPUISD::FMAD_FTZ :
(unsigned)ISD::FMAD;
SDValue fr = DAG.getNode(OpCode, DL, FltVT, fqneg, fb, fa);
@@ -1666,8 +1669,11 @@ void AMDGPUTargetLowering::LowerUDIVREM64(SDValue Op,
}
if (isTypeLegal(MVT::i64)) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+
// Compute denominator reciprocal.
- unsigned FMAD = Subtarget->hasFP32Denormals() ?
+ unsigned FMAD = MFI->getMode().FP32Denormals ?
(unsigned)AMDGPUISD::FMAD_FTZ :
(unsigned)ISD::FMAD;
@@ -2158,7 +2164,8 @@ SDValue AMDGPUTargetLowering::LowerFNEARBYINT(SDValue Op, SelectionDAG &DAG) con
// Don't handle v2f16. The extra instructions to scalarize and repack around the
// compare and vselect end up producing worse code than scalarizing the whole
// operation.
-SDValue AMDGPUTargetLowering::LowerFROUND32_16(SDValue Op, SelectionDAG &DAG) const {
+SDValue AMDGPUTargetLowering::LowerFROUND_LegalFTRUNC(SDValue Op,
+ SelectionDAG &DAG) const {
SDLoc SL(Op);
SDValue X = Op.getOperand(0);
EVT VT = Op.getValueType();
@@ -2247,8 +2254,8 @@ SDValue AMDGPUTargetLowering::LowerFROUND64(SDValue Op, SelectionDAG &DAG) const
SDValue AMDGPUTargetLowering::LowerFROUND(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
- if (VT == MVT::f32 || VT == MVT::f16)
- return LowerFROUND32_16(Op, DAG);
+ if (isOperationLegal(ISD::FTRUNC, VT))
+ return LowerFROUND_LegalFTRUNC(Op, DAG);
if (VT == MVT::f64)
return LowerFROUND64(Op, DAG);
@@ -2496,15 +2503,25 @@ SDValue AMDGPUTargetLowering::LowerINT_TO_FP64(SDValue Op, SelectionDAG &DAG,
SDValue AMDGPUTargetLowering::LowerUINT_TO_FP(SDValue Op,
SelectionDAG &DAG) const {
- assert(Op.getOperand(0).getValueType() == MVT::i64 &&
- "operation should be legal");
-
// TODO: Factor out code common with LowerSINT_TO_FP.
-
EVT DestVT = Op.getValueType();
+ SDValue Src = Op.getOperand(0);
+ EVT SrcVT = Src.getValueType();
+
+ if (SrcVT == MVT::i16) {
+ if (DestVT == MVT::f16)
+ return Op;
+ SDLoc DL(Op);
+
+ // Promote src to i32
+ SDValue Ext = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Src);
+ return DAG.getNode(ISD::UINT_TO_FP, DL, DestVT, Ext);
+ }
+
+ assert(SrcVT == MVT::i64 && "operation should be legal");
+
if (Subtarget->has16BitInsts() && DestVT == MVT::f16) {
SDLoc DL(Op);
- SDValue Src = Op.getOperand(0);
SDValue IntToFp32 = DAG.getNode(Op.getOpcode(), DL, MVT::f32, Src);
SDValue FPRoundFlag = DAG.getIntPtrConstant(0, SDLoc(Op));
@@ -2523,12 +2540,25 @@ SDValue AMDGPUTargetLowering::LowerUINT_TO_FP(SDValue Op,
SDValue AMDGPUTargetLowering::LowerSINT_TO_FP(SDValue Op,
SelectionDAG &DAG) const {
- assert(Op.getOperand(0).getValueType() == MVT::i64 &&
- "operation should be legal");
+ EVT DestVT = Op.getValueType();
+
+ SDValue Src = Op.getOperand(0);
+ EVT SrcVT = Src.getValueType();
+
+ if (SrcVT == MVT::i16) {
+ if (DestVT == MVT::f16)
+ return Op;
+
+ SDLoc DL(Op);
+ // Promote src to i32
+ SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i32, Src);
+ return DAG.getNode(ISD::SINT_TO_FP, DL, DestVT, Ext);
+ }
+
+ assert(SrcVT == MVT::i64 && "operation should be legal");
// TODO: Factor out code common with LowerUINT_TO_FP.
- EVT DestVT = Op.getValueType();
if (Subtarget->has16BitInsts() && DestVT == MVT::f16) {
SDLoc DL(Op);
SDValue Src = Op.getOperand(0);
@@ -2865,11 +2895,13 @@ SDValue AMDGPUTargetLowering::performLoadCombine(SDNode *N,
// the bytes again are not eliminated in the case of an unaligned copy.
if (!allowsMisalignedMemoryAccesses(
VT, AS, Align, LN->getMemOperand()->getFlags(), &IsFast)) {
+ SDValue Ops[2];
+
if (VT.isVector())
- return scalarizeVectorLoad(LN, DAG);
+ std::tie(Ops[0], Ops[1]) = scalarizeVectorLoad(LN, DAG);
+ else
+ std::tie(Ops[0], Ops[1]) = expandUnalignedLoad(LN, DAG);
- SDValue Ops[2];
- std::tie(Ops[0], Ops[1]) = expandUnalignedLoad(LN, DAG);
return DAG.getMergeValues(Ops, SDLoc(N));
}
@@ -3565,8 +3597,8 @@ SDValue AMDGPUTargetLowering::performSelectCombine(SDNode *N,
// select (setcc x, y), k, x -> select (setccinv x, y), x, k
SDLoc SL(N);
- ISD::CondCode NewCC = getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
- LHS.getValueType().isInteger());
+ ISD::CondCode NewCC =
+ getSetCCInverse(cast<CondCodeSDNode>(CC)->get(), LHS.getValueType());
SDValue NewCond = DAG.getSetCC(SL, Cond.getValueType(), LHS, RHS, NewCC);
return DAG.getNode(ISD::SELECT, SL, VT, NewCond, False, True);
@@ -4343,7 +4375,6 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(BUFFER_ATOMIC_CMPSWAP)
NODE_NAME_CASE(BUFFER_ATOMIC_FADD)
NODE_NAME_CASE(BUFFER_ATOMIC_PK_FADD)
- NODE_NAME_CASE(ATOMIC_FADD)
NODE_NAME_CASE(ATOMIC_PK_FADD)
case AMDGPUISD::LAST_AMDGPU_ISD_NUMBER: break;
@@ -4435,12 +4466,14 @@ void AMDGPUTargetLowering::computeKnownBitsForTargetNode(
unsigned TrailZ = LHSKnown.countMinTrailingZeros() +
RHSKnown.countMinTrailingZeros();
Known.Zero.setLowBits(std::min(TrailZ, 32u));
+ // Skip extra check if all bits are known zeros.
+ if (TrailZ >= 32)
+ break;
// Truncate to 24 bits.
LHSKnown = LHSKnown.trunc(24);
RHSKnown = RHSKnown.trunc(24);
- bool Negative = false;
if (Opc == AMDGPUISD::MUL_I24) {
unsigned LHSValBits = 24 - LHSKnown.countMinSignBits();
unsigned RHSValBits = 24 - RHSKnown.countMinSignBits();
@@ -4448,16 +4481,16 @@ void AMDGPUTargetLowering::computeKnownBitsForTargetNode(
if (MaxValBits >= 32)
break;
bool LHSNegative = LHSKnown.isNegative();
- bool LHSPositive = LHSKnown.isNonNegative();
+ bool LHSNonNegative = LHSKnown.isNonNegative();
+ bool LHSPositive = LHSKnown.isStrictlyPositive();
bool RHSNegative = RHSKnown.isNegative();
- bool RHSPositive = RHSKnown.isNonNegative();
- if ((!LHSNegative && !LHSPositive) || (!RHSNegative && !RHSPositive))
- break;
- Negative = (LHSNegative && RHSPositive) || (LHSPositive && RHSNegative);
- if (Negative)
- Known.One.setHighBits(32 - MaxValBits);
- else
+ bool RHSNonNegative = RHSKnown.isNonNegative();
+ bool RHSPositive = RHSKnown.isStrictlyPositive();
+
+ if ((LHSNonNegative && RHSNonNegative) || (LHSNegative && RHSNegative))
Known.Zero.setHighBits(32 - MaxValBits);
+ else if ((LHSNegative && RHSPositive) || (LHSPositive && RHSNegative))
+ Known.One.setHighBits(32 - MaxValBits);
} else {
unsigned LHSValBits = 24 - LHSKnown.countMinLeadingZeros();
unsigned RHSValBits = 24 - RHSKnown.countMinLeadingZeros();
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
index dea0d1d4343a..a90b7f5653dc 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -52,7 +52,7 @@ protected:
SDValue LowerFRINT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFNEARBYINT(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerFROUND32_16(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFROUND_LegalFTRUNC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFROUND64(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFROUND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const;
@@ -531,7 +531,6 @@ enum NodeType : unsigned {
BUFFER_ATOMIC_CMPSWAP,
BUFFER_ATOMIC_FADD,
BUFFER_ATOMIC_PK_FADD,
- ATOMIC_FADD,
ATOMIC_PK_FADD,
LAST_AMDGPU_ISD_NUMBER
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInline.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInline.cpp
index a83ec23ec054..64d761997b0c 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInline.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInline.cpp
@@ -17,21 +17,21 @@
///
//===----------------------------------------------------------------------===//
-
#include "AMDGPU.h"
-#include "llvm/Transforms/IPO.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/InlineCost.h"
-#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/Inliner.h"
using namespace llvm;
@@ -39,7 +39,7 @@ using namespace llvm;
#define DEBUG_TYPE "inline"
static cl::opt<int>
-ArgAllocaCost("amdgpu-inline-arg-alloca-cost", cl::Hidden, cl::init(1500),
+ArgAllocaCost("amdgpu-inline-arg-alloca-cost", cl::Hidden, cl::init(4000),
cl::desc("Cost of alloca argument"));
// If the amount of scratch memory to eliminate exceeds our ability to allocate
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
index cf0ce5659951..50c451be4b86 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
@@ -244,9 +244,9 @@ def AMDGPUdiv_fmas : SDNode<"AMDGPUISD::DIV_FMAS", AMDGPUFmasOp,
// Single or double precision division fixup.
// Special case divide fixup and flags(src0 = Quotient, src1 =
// Denominator, src2 = Numerator).
-def AMDGPUdiv_fixup : SDNode<"AMDGPUISD::DIV_FIXUP", SDTFPTernaryOp>;
+def AMDGPUdiv_fixup_impl : SDNode<"AMDGPUISD::DIV_FIXUP", SDTFPTernaryOp>;
-def AMDGPUfmad_ftz : SDNode<"AMDGPUISD::FMAD_FTZ", SDTFPTernaryOp>;
+def AMDGPUfmad_ftz_impl : SDNode<"AMDGPUISD::FMAD_FTZ", SDTFPTernaryOp>;
// Look Up 2.0 / pi src0 with segment select src1[4:0]
def AMDGPUtrig_preop : SDNode<"AMDGPUISD::TRIG_PREOP", AMDGPUTrigPreOp>;
@@ -290,10 +290,10 @@ def AMDGPUffbl_b32 : SDNode<"AMDGPUISD::FFBL_B32", SDTIntUnaryOp>;
// Signed and unsigned 24-bit multiply. The highest 8-bits are ignore
// when performing the mulitply. The result is a 32-bit value.
-def AMDGPUmul_u24 : SDNode<"AMDGPUISD::MUL_U24", SDTIntBinOp,
+def AMDGPUmul_u24_impl : SDNode<"AMDGPUISD::MUL_U24", SDTIntBinOp,
[SDNPCommutative, SDNPAssociative]
>;
-def AMDGPUmul_i24 : SDNode<"AMDGPUISD::MUL_I24", SDTIntBinOp,
+def AMDGPUmul_i24_impl : SDNode<"AMDGPUISD::MUL_I24", SDTIntBinOp,
[SDNPCommutative, SDNPAssociative]
>;
@@ -434,6 +434,10 @@ def AMDGPUfmed3 : PatFrags<(ops node:$src0, node:$src1, node:$src2),
[(int_amdgcn_fmed3 node:$src0, node:$src1, node:$src2),
(AMDGPUfmed3_impl node:$src0, node:$src1, node:$src2)]>;
+def AMDGPUdiv_fixup : PatFrags<(ops node:$src0, node:$src1, node:$src2),
+ [(int_amdgcn_div_fixup node:$src0, node:$src1, node:$src2),
+ (AMDGPUdiv_fixup_impl node:$src0, node:$src1, node:$src2)]>;
+
def AMDGPUffbh_i32 : PatFrags<(ops node:$src),
[(int_amdgcn_sffbh node:$src),
(AMDGPUffbh_i32_impl node:$src)]>;
@@ -457,3 +461,15 @@ def AMDGPUpk_i16_i32 : PatFrags<(ops node:$src0, node:$src1),
def AMDGPUpk_u16_u32 : PatFrags<(ops node:$src0, node:$src1),
[(int_amdgcn_cvt_pk_u16 node:$src0, node:$src1),
(AMDGPUpk_u16_u32_impl node:$src0, node:$src1)]>;
+
+def AMDGPUfmad_ftz : PatFrags<(ops node:$src0, node:$src1, node:$src2),
+ [(int_amdgcn_fmad_ftz node:$src0, node:$src1, node:$src2),
+ (AMDGPUfmad_ftz_impl node:$src0, node:$src1, node:$src2)]>;
+
+def AMDGPUmul_u24 : PatFrags<(ops node:$src0, node:$src1),
+ [(int_amdgcn_mul_u24 node:$src0, node:$src1),
+ (AMDGPUmul_u24_impl node:$src0, node:$src1)]>;
+
+def AMDGPUmul_i24 : PatFrags<(ops node:$src0, node:$src1),
+ [(int_amdgcn_mul_i24 node:$src0, node:$src1),
+ (AMDGPUmul_i24_impl node:$src0, node:$src1)]>;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 3cfa9d57ec46..c0ea35817ec8 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -13,6 +13,7 @@
#include "AMDGPUInstructionSelector.h"
#include "AMDGPUInstrInfo.h"
+#include "AMDGPUGlobalISelUtils.h"
#include "AMDGPURegisterBankInfo.h"
#include "AMDGPURegisterInfo.h"
#include "AMDGPUSubtarget.h"
@@ -69,28 +70,6 @@ void AMDGPUInstructionSelector::setupMF(MachineFunction &MF, GISelKnownBits &KB,
InstructionSelector::setupMF(MF, KB, CoverageInfo);
}
-static bool isSCC(Register Reg, const MachineRegisterInfo &MRI) {
- if (Register::isPhysicalRegister(Reg))
- return Reg == AMDGPU::SCC;
-
- auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
- const TargetRegisterClass *RC =
- RegClassOrBank.dyn_cast<const TargetRegisterClass*>();
- if (RC) {
- // FIXME: This is ambiguous for wave32. This could be SCC or VCC, but the
- // context of the register bank has been lost.
- // Has a hack getRegClassForSizeOnBank uses exactly SGPR_32RegClass, which
- // won't ever beconstrained any further.
- if (RC != &AMDGPU::SGPR_32RegClass)
- return false;
- const LLT Ty = MRI.getType(Reg);
- return Ty.isValid() && Ty.getSizeInBits() == 1;
- }
-
- const RegisterBank *RB = RegClassOrBank.get<const RegisterBank *>();
- return RB->getID() == AMDGPU::SCCRegBankID;
-}
-
bool AMDGPUInstructionSelector::isVCC(Register Reg,
const MachineRegisterInfo &MRI) const {
if (Register::isPhysicalRegister(Reg))
@@ -133,12 +112,26 @@ bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const {
if (!RBI.constrainGenericRegister(DstReg, *TRI.getBoolRC(), *MRI))
return false;
+ const TargetRegisterClass *SrcRC
+ = TRI.getConstrainedRegClassForOperand(Src, *MRI);
+
+ Register MaskedReg = MRI->createVirtualRegister(SrcRC);
+
+ // We can't trust the high bits at this point, so clear them.
+
+ // TODO: Skip masking high bits if def is known boolean.
+
+ unsigned AndOpc = TRI.isSGPRClass(SrcRC) ?
+ AMDGPU::S_AND_B32 : AMDGPU::V_AND_B32_e32;
+ BuildMI(*BB, &I, DL, TII.get(AndOpc), MaskedReg)
+ .addImm(1)
+ .addReg(SrcReg);
BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), DstReg)
.addImm(0)
- .addReg(SrcReg);
+ .addReg(MaskedReg);
if (!MRI->getRegClassOrNull(SrcReg))
- MRI->setRegClass(SrcReg, TRI.getConstrainedRegClassForOperand(Src, *MRI));
+ MRI->setRegClass(SrcReg, SrcRC);
I.eraseFromParent();
return true;
}
@@ -195,11 +188,6 @@ bool AMDGPUInstructionSelector::selectPHI(MachineInstr &I) const {
}
const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
- if (RB.getID() == AMDGPU::SCCRegBankID) {
- LLVM_DEBUG(dbgs() << "illegal scc phi\n");
- return false;
- }
-
DefRC = TRI.getRegClassForTypeOnBank(DefTy, RB, *MRI);
if (!DefRC) {
LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
@@ -207,6 +195,7 @@ bool AMDGPUInstructionSelector::selectPHI(MachineInstr &I) const {
}
}
+ // TODO: Verify that all registers have the same bank
I.setDesc(TII.get(TargetOpcode::PHI));
return RBI.constrainGenericRegister(DefReg, *DefRC, *MRI);
}
@@ -290,6 +279,11 @@ bool AMDGPUInstructionSelector::selectG_AND_OR_XOR(MachineInstr &I) const {
if (DstRB->getID() == AMDGPU::SGPRRegBankID) {
unsigned InstOpc = getLogicalBitOpcode(I.getOpcode(), Size > 32);
I.setDesc(TII.get(InstOpc));
+ // Dead implicit-def of scc
+ I.addOperand(MachineOperand::CreateReg(AMDGPU::SCC, true, // isDef
+ true, // isImp
+ false, // isKill
+ true)); // isDead
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
@@ -393,21 +387,25 @@ bool AMDGPUInstructionSelector::selectG_ADD_SUB(MachineInstr &I) const {
return true;
}
-bool AMDGPUInstructionSelector::selectG_UADDO_USUBO(MachineInstr &I) const {
+bool AMDGPUInstructionSelector::selectG_UADDO_USUBO_UADDE_USUBE(
+ MachineInstr &I) const {
MachineBasicBlock *BB = I.getParent();
MachineFunction *MF = BB->getParent();
- MachineRegisterInfo &MRI = MF->getRegInfo();
const DebugLoc &DL = I.getDebugLoc();
Register Dst0Reg = I.getOperand(0).getReg();
Register Dst1Reg = I.getOperand(1).getReg();
- const bool IsAdd = I.getOpcode() == AMDGPU::G_UADDO;
-
- if (!isSCC(Dst1Reg, MRI)) {
- // The name of the opcodes are misleading. v_add_i32/v_sub_i32 have unsigned
- // carry out despite the _i32 name. These were renamed in VI to _U32.
- // FIXME: We should probably rename the opcodes here.
- unsigned NewOpc = IsAdd ? AMDGPU::V_ADD_I32_e64 : AMDGPU::V_SUB_I32_e64;
- I.setDesc(TII.get(NewOpc));
+ const bool IsAdd = I.getOpcode() == AMDGPU::G_UADDO ||
+ I.getOpcode() == AMDGPU::G_UADDE;
+ const bool HasCarryIn = I.getOpcode() == AMDGPU::G_UADDE ||
+ I.getOpcode() == AMDGPU::G_USUBE;
+
+ if (isVCC(Dst1Reg, *MRI)) {
+ // The name of the opcodes are misleading. v_add_i32/v_sub_i32 have unsigned
+ // carry out despite the _i32 name. These were renamed in VI to _U32.
+ // FIXME: We should probably rename the opcodes here.
+ unsigned NoCarryOpc = IsAdd ? AMDGPU::V_ADD_I32_e64 : AMDGPU::V_SUB_I32_e64;
+ unsigned CarryOpc = IsAdd ? AMDGPU::V_ADDC_U32_e64 : AMDGPU::V_SUBB_U32_e64;
+ I.setDesc(TII.get(HasCarryIn ? CarryOpc : NoCarryOpc));
I.addOperand(*MF, MachineOperand::CreateReg(AMDGPU::EXEC, false, true));
I.addOperand(*MF, MachineOperand::CreateImm(0));
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
@@ -415,19 +413,32 @@ bool AMDGPUInstructionSelector::selectG_UADDO_USUBO(MachineInstr &I) const {
Register Src0Reg = I.getOperand(2).getReg();
Register Src1Reg = I.getOperand(3).getReg();
- unsigned NewOpc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
- BuildMI(*BB, &I, DL, TII.get(NewOpc), Dst0Reg)
+
+ if (HasCarryIn) {
+ BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
+ .addReg(I.getOperand(4).getReg());
+ }
+
+ unsigned NoCarryOpc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
+ unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
+
+ BuildMI(*BB, &I, DL, TII.get(HasCarryIn ? CarryOpc : NoCarryOpc), Dst0Reg)
.add(I.getOperand(2))
.add(I.getOperand(3));
BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), Dst1Reg)
.addReg(AMDGPU::SCC);
- if (!MRI.getRegClassOrNull(Dst1Reg))
- MRI.setRegClass(Dst1Reg, &AMDGPU::SReg_32RegClass);
+ if (!MRI->getRegClassOrNull(Dst1Reg))
+ MRI->setRegClass(Dst1Reg, &AMDGPU::SReg_32RegClass);
+
+ if (!RBI.constrainGenericRegister(Dst0Reg, AMDGPU::SReg_32RegClass, *MRI) ||
+ !RBI.constrainGenericRegister(Src0Reg, AMDGPU::SReg_32RegClass, *MRI) ||
+ !RBI.constrainGenericRegister(Src1Reg, AMDGPU::SReg_32RegClass, *MRI))
+ return false;
- if (!RBI.constrainGenericRegister(Dst0Reg, AMDGPU::SReg_32RegClass, MRI) ||
- !RBI.constrainGenericRegister(Src0Reg, AMDGPU::SReg_32RegClass, MRI) ||
- !RBI.constrainGenericRegister(Src1Reg, AMDGPU::SReg_32RegClass, MRI))
+ if (HasCarryIn &&
+ !RBI.constrainGenericRegister(I.getOperand(4).getReg(),
+ AMDGPU::SReg_32RegClass, *MRI))
return false;
I.eraseFromParent();
@@ -436,15 +447,29 @@ bool AMDGPUInstructionSelector::selectG_UADDO_USUBO(MachineInstr &I) const {
bool AMDGPUInstructionSelector::selectG_EXTRACT(MachineInstr &I) const {
MachineBasicBlock *BB = I.getParent();
+ Register DstReg = I.getOperand(0).getReg();
+ Register SrcReg = I.getOperand(1).getReg();
+ LLT DstTy = MRI->getType(DstReg);
+ LLT SrcTy = MRI->getType(SrcReg);
+ const unsigned SrcSize = SrcTy.getSizeInBits();
+ const unsigned DstSize = DstTy.getSizeInBits();
+
+ // TODO: Should handle any multiple of 32 offset.
unsigned Offset = I.getOperand(2).getImm();
- if (Offset % 32 != 0)
+ if (Offset % DstSize != 0)
return false;
- unsigned SubReg = TRI.getSubRegFromChannel(Offset / 32);
+ const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, *MRI, TRI);
+ const TargetRegisterClass *SrcRC =
+ TRI.getRegClassForSizeOnBank(SrcSize, *SrcBank, *MRI);
+ if (!SrcRC)
+ return false;
+
+ ArrayRef<int16_t> SubRegs = TRI.getRegSplitParts(SrcRC, DstSize / 8);
+
const DebugLoc &DL = I.getDebugLoc();
- MachineInstr *Copy = BuildMI(*BB, &I, DL, TII.get(TargetOpcode::COPY),
- I.getOperand(0).getReg())
- .addReg(I.getOperand(1).getReg(), 0, SubReg);
+ MachineInstr *Copy = BuildMI(*BB, &I, DL, TII.get(TargetOpcode::COPY), DstReg)
+ .addReg(SrcReg, 0, SubRegs[Offset / DstSize]);
for (const MachineOperand &MO : Copy->operands()) {
const TargetRegisterClass *RC =
@@ -465,7 +490,7 @@ bool AMDGPUInstructionSelector::selectG_MERGE_VALUES(MachineInstr &MI) const {
const unsigned SrcSize = SrcTy.getSizeInBits();
if (SrcSize < 32)
- return false;
+ return selectImpl(MI, *CoverageInfo);
const DebugLoc &DL = MI.getDebugLoc();
const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI);
@@ -538,7 +563,7 @@ bool AMDGPUInstructionSelector::selectG_UNMERGE_VALUES(MachineInstr &MI) const {
return true;
}
-bool AMDGPUInstructionSelector::selectG_GEP(MachineInstr &I) const {
+bool AMDGPUInstructionSelector::selectG_PTR_ADD(MachineInstr &I) const {
return selectG_ADD_SUB(I);
}
@@ -723,7 +748,7 @@ bool AMDGPUInstructionSelector::selectG_ICMP(MachineInstr &I) const {
auto Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate();
Register CCReg = I.getOperand(0).getReg();
- if (isSCC(CCReg, *MRI)) {
+ if (!isVCC(CCReg, *MRI)) {
int Opcode = getS_CMPOpcode(Pred, Size);
if (Opcode == -1)
return false;
@@ -797,38 +822,6 @@ static unsigned extractSWZ(unsigned AuxiliaryData) {
return (AuxiliaryData >> 3) & 1;
}
-// Returns Base register, constant offset, and offset def point.
-static std::tuple<Register, unsigned, MachineInstr *>
-getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg) {
- MachineInstr *Def = getDefIgnoringCopies(Reg, MRI);
- if (!Def)
- return std::make_tuple(Reg, 0, nullptr);
-
- if (Def->getOpcode() == AMDGPU::G_CONSTANT) {
- unsigned Offset;
- const MachineOperand &Op = Def->getOperand(1);
- if (Op.isImm())
- Offset = Op.getImm();
- else
- Offset = Op.getCImm()->getZExtValue();
-
- return std::make_tuple(Register(), Offset, Def);
- }
-
- int64_t Offset;
- if (Def->getOpcode() == AMDGPU::G_ADD) {
- // TODO: Handle G_OR used for add case
- if (mi_match(Def->getOperand(1).getReg(), MRI, m_ICst(Offset)))
- return std::make_tuple(Def->getOperand(0).getReg(), Offset, Def);
-
- // FIXME: matcher should ignore copies
- if (mi_match(Def->getOperand(1).getReg(), MRI, m_Copy(m_ICst(Offset))))
- return std::make_tuple(Def->getOperand(0).getReg(), Offset, Def);
- }
-
- return std::make_tuple(Reg, 0, Def);
-}
-
static unsigned getBufferStoreOpcode(LLT Ty,
const unsigned MemSize,
const bool Offen) {
@@ -925,7 +918,7 @@ AMDGPUInstructionSelector::splitBufferOffsets(MachineIRBuilder &B,
MachineInstr *OffsetDef;
std::tie(BaseReg, TotalConstOffset, OffsetDef)
- = getBaseWithConstantOffset(*MRI, OrigOffset);
+ = AMDGPU::getBaseWithConstantOffset(*MRI, OrigOffset);
unsigned ImmOffset = TotalConstOffset;
@@ -1029,6 +1022,90 @@ bool AMDGPUInstructionSelector::selectStoreIntrinsic(MachineInstr &MI,
return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
}
+static unsigned getDSShaderTypeValue(const MachineFunction &MF) {
+ switch (MF.getFunction().getCallingConv()) {
+ case CallingConv::AMDGPU_PS:
+ return 1;
+ case CallingConv::AMDGPU_VS:
+ return 2;
+ case CallingConv::AMDGPU_GS:
+ return 3;
+ case CallingConv::AMDGPU_HS:
+ case CallingConv::AMDGPU_LS:
+ case CallingConv::AMDGPU_ES:
+ report_fatal_error("ds_ordered_count unsupported for this calling conv");
+ case CallingConv::AMDGPU_CS:
+ case CallingConv::AMDGPU_KERNEL:
+ case CallingConv::C:
+ case CallingConv::Fast:
+ default:
+ // Assume other calling conventions are various compute callable functions
+ return 0;
+ }
+}
+
+bool AMDGPUInstructionSelector::selectDSOrderedIntrinsic(
+ MachineInstr &MI, Intrinsic::ID IntrID) const {
+ MachineBasicBlock *MBB = MI.getParent();
+ MachineFunction *MF = MBB->getParent();
+ const DebugLoc &DL = MI.getDebugLoc();
+
+ unsigned IndexOperand = MI.getOperand(7).getImm();
+ bool WaveRelease = MI.getOperand(8).getImm() != 0;
+ bool WaveDone = MI.getOperand(9).getImm() != 0;
+
+ if (WaveDone && !WaveRelease)
+ report_fatal_error("ds_ordered_count: wave_done requires wave_release");
+
+ unsigned OrderedCountIndex = IndexOperand & 0x3f;
+ IndexOperand &= ~0x3f;
+ unsigned CountDw = 0;
+
+ if (STI.getGeneration() >= AMDGPUSubtarget::GFX10) {
+ CountDw = (IndexOperand >> 24) & 0xf;
+ IndexOperand &= ~(0xf << 24);
+
+ if (CountDw < 1 || CountDw > 4) {
+ report_fatal_error(
+ "ds_ordered_count: dword count must be between 1 and 4");
+ }
+ }
+
+ if (IndexOperand)
+ report_fatal_error("ds_ordered_count: bad index operand");
+
+ unsigned Instruction = IntrID == Intrinsic::amdgcn_ds_ordered_add ? 0 : 1;
+ unsigned ShaderType = getDSShaderTypeValue(*MF);
+
+ unsigned Offset0 = OrderedCountIndex << 2;
+ unsigned Offset1 = WaveRelease | (WaveDone << 1) | (ShaderType << 2) |
+ (Instruction << 4);
+
+ if (STI.getGeneration() >= AMDGPUSubtarget::GFX10)
+ Offset1 |= (CountDw - 1) << 6;
+
+ unsigned Offset = Offset0 | (Offset1 << 8);
+
+ Register M0Val = MI.getOperand(2).getReg();
+ BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
+ .addReg(M0Val);
+
+ Register DstReg = MI.getOperand(0).getReg();
+ Register ValReg = MI.getOperand(3).getReg();
+ MachineInstrBuilder DS =
+ BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::DS_ORDERED_COUNT), DstReg)
+ .addReg(ValReg)
+ .addImm(Offset)
+ .cloneMemRefs(MI);
+
+ if (!RBI.constrainGenericRegister(M0Val, AMDGPU::SReg_32RegClass, *MRI))
+ return false;
+
+ bool Ret = constrainSelectedInstRegOperands(*DS, TII, TRI, RBI);
+ MI.eraseFromParent();
+ return Ret;
+}
+
bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
MachineInstr &I) const {
MachineBasicBlock *BB = I.getParent();
@@ -1084,6 +1161,9 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
return selectStoreIntrinsic(I, false);
case Intrinsic::amdgcn_raw_buffer_store_format:
return selectStoreIntrinsic(I, true);
+ case Intrinsic::amdgcn_ds_ordered_add:
+ case Intrinsic::amdgcn_ds_ordered_swap:
+ return selectDSOrderedIntrinsic(I, IntrinsicID);
default:
return selectImpl(I, *CoverageInfo);
}
@@ -1098,7 +1178,7 @@ bool AMDGPUInstructionSelector::selectG_SELECT(MachineInstr &I) const {
assert(Size <= 32 || Size == 64);
const MachineOperand &CCOp = I.getOperand(1);
Register CCReg = CCOp.getReg();
- if (isSCC(CCReg, *MRI)) {
+ if (!isVCC(CCReg, *MRI)) {
unsigned SelectOpcode = Size == 64 ? AMDGPU::S_CSELECT_B64 :
AMDGPU::S_CSELECT_B32;
MachineInstr *CopySCC = BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
@@ -1170,10 +1250,19 @@ bool AMDGPUInstructionSelector::selectG_TRUNC(MachineInstr &I) const {
if (!DstTy.isScalar())
return false;
- const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
+ const LLT S1 = LLT::scalar(1);
+
const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, *MRI, TRI);
- if (SrcRB != DstRB)
- return false;
+ const RegisterBank *DstRB;
+ if (DstTy == S1) {
+ // This is a special case. We don't treat s1 for legalization artifacts as
+ // vcc booleans.
+ DstRB = SrcRB;
+ } else {
+ DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
+ if (SrcRB != DstRB)
+ return false;
+ }
unsigned DstSize = DstTy.getSizeInBits();
unsigned SrcSize = SrcTy.getSizeInBits();
@@ -1214,6 +1303,20 @@ static bool shouldUseAndMask(unsigned Size, unsigned &Mask) {
return SignedMask >= -16 && SignedMask <= 64;
}
+// Like RegisterBankInfo::getRegBank, but don't assume vcc for s1.
+const RegisterBank *AMDGPUInstructionSelector::getArtifactRegBank(
+ Register Reg, const MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI) const {
+ const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
+ if (auto *RB = RegClassOrBank.dyn_cast<const RegisterBank *>())
+ return RB;
+
+ // Ignore the type, since we don't use vcc in artifacts.
+ if (auto *RC = RegClassOrBank.dyn_cast<const TargetRegisterClass *>())
+ return &RBI.getRegBankFromRegClass(*RC, LLT());
+ return nullptr;
+}
+
bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const {
bool Signed = I.getOpcode() == AMDGPU::G_SEXT;
const DebugLoc &DL = I.getDebugLoc();
@@ -1223,57 +1326,17 @@ bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const {
const LLT DstTy = MRI->getType(DstReg);
const LLT SrcTy = MRI->getType(SrcReg);
- const LLT S1 = LLT::scalar(1);
const unsigned SrcSize = SrcTy.getSizeInBits();
const unsigned DstSize = DstTy.getSizeInBits();
if (!DstTy.isScalar())
return false;
- const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, *MRI, TRI);
-
- if (SrcBank->getID() == AMDGPU::SCCRegBankID) {
- if (SrcTy != S1 || DstSize > 64) // Invalid
- return false;
-
- unsigned Opcode =
- DstSize > 32 ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;
- const TargetRegisterClass *DstRC =
- DstSize > 32 ? &AMDGPU::SReg_64RegClass : &AMDGPU::SReg_32RegClass;
-
- // FIXME: Create an extra copy to avoid incorrectly constraining the result
- // of the scc producer.
- Register TmpReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
- BuildMI(MBB, I, DL, TII.get(AMDGPU::COPY), TmpReg)
- .addReg(SrcReg);
- BuildMI(MBB, I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
- .addReg(TmpReg);
-
- // The instruction operands are backwards from what you would expect.
- BuildMI(MBB, I, DL, TII.get(Opcode), DstReg)
- .addImm(0)
- .addImm(Signed ? -1 : 1);
- I.eraseFromParent();
- return RBI.constrainGenericRegister(DstReg, *DstRC, *MRI);
- }
-
- if (SrcBank->getID() == AMDGPU::VCCRegBankID && DstSize <= 32) {
- if (SrcTy != S1) // Invalid
- return false;
-
- MachineInstr *ExtI =
- BuildMI(MBB, I, DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
- .addImm(0) // src0_modifiers
- .addImm(0) // src0
- .addImm(0) // src1_modifiers
- .addImm(Signed ? -1 : 1) // src1
- .addUse(SrcReg);
- I.eraseFromParent();
- return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
- }
-
if (I.getOpcode() == AMDGPU::G_ANYEXT)
return selectCOPY(I);
+ // Artifact casts should never use vcc.
+ const RegisterBank *SrcBank = getArtifactRegBank(SrcReg, *MRI, TRI);
+
if (SrcBank->getID() == AMDGPU::VGPRRegBankID && DstSize <= 32) {
// 64-bit should have been split up in RegBankSelect
@@ -1352,49 +1415,6 @@ bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const {
return false;
}
-static int64_t getFPTrueImmVal(unsigned Size, bool Signed) {
- switch (Size) {
- case 16:
- return Signed ? 0xBC00 : 0x3C00;
- case 32:
- return Signed ? 0xbf800000 : 0x3f800000;
- case 64:
- return Signed ? 0xbff0000000000000 : 0x3ff0000000000000;
- default:
- llvm_unreachable("Invalid FP type size");
- }
-}
-
-bool AMDGPUInstructionSelector::selectG_SITOFP_UITOFP(MachineInstr &I) const {
- MachineBasicBlock *MBB = I.getParent();
- MachineFunction *MF = MBB->getParent();
- MachineRegisterInfo &MRI = MF->getRegInfo();
- Register Src = I.getOperand(1).getReg();
- if (!isSCC(Src, MRI))
- return selectImpl(I, *CoverageInfo);
-
- bool Signed = I.getOpcode() == AMDGPU::G_SITOFP;
- Register DstReg = I.getOperand(0).getReg();
- const LLT DstTy = MRI.getType(DstReg);
- const unsigned DstSize = DstTy.getSizeInBits();
- const DebugLoc &DL = I.getDebugLoc();
-
- BuildMI(*MBB, I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
- .addReg(Src);
-
- unsigned NewOpc =
- DstSize > 32 ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;
- auto MIB = BuildMI(*MBB, I, DL, TII.get(NewOpc), DstReg)
- .addImm(0)
- .addImm(getFPTrueImmVal(DstSize, Signed));
-
- if (!constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI))
- return false;
-
- I.eraseFromParent();
- return true;
-}
-
bool AMDGPUInstructionSelector::selectG_CONSTANT(MachineInstr &I) const {
MachineBasicBlock *BB = I.getParent();
MachineOperand &ImmOp = I.getOperand(1);
@@ -1478,7 +1498,7 @@ void AMDGPUInstructionSelector::getAddrModeInfo(const MachineInstr &Load,
assert(PtrMI);
- if (PtrMI->getOpcode() != TargetOpcode::G_GEP)
+ if (PtrMI->getOpcode() != TargetOpcode::G_PTR_ADD)
return;
GEPInfo GEPInfo(*PtrMI);
@@ -1568,12 +1588,15 @@ bool AMDGPUInstructionSelector::selectG_BRCOND(MachineInstr &I) const {
// GlobalISel, we should push that decision into RegBankSelect. Assume for now
// RegBankSelect knows what it's doing if the branch condition is scc, even
// though it currently does not.
- if (isSCC(CondReg, *MRI)) {
+ if (!isVCC(CondReg, *MRI)) {
+ if (MRI->getType(CondReg) != LLT::scalar(32))
+ return false;
+
CondPhysReg = AMDGPU::SCC;
BrOpcode = AMDGPU::S_CBRANCH_SCC1;
// FIXME: Hack for isSCC tests
ConstrainRC = &AMDGPU::SGPR_32RegClass;
- } else if (isVCC(CondReg, *MRI)) {
+ } else {
// FIXME: Do we have to insert an and with exec here, like in SelectionDAG?
// We sort of know that a VCC producer based on the register bank, that ands
// inactive lanes with 0. What if there was a logical operation with vcc
@@ -1582,8 +1605,7 @@ bool AMDGPUInstructionSelector::selectG_BRCOND(MachineInstr &I) const {
CondPhysReg = TRI.getVCC();
BrOpcode = AMDGPU::S_CBRANCH_VCCNZ;
ConstrainRC = TRI.getBoolRC();
- } else
- return false;
+ }
if (!MRI->getRegClassOrNull(CondReg))
MRI->setRegClass(CondReg, ConstrainRC);
@@ -1670,6 +1692,80 @@ bool AMDGPUInstructionSelector::selectG_PTR_MASK(MachineInstr &I) const {
return true;
}
+bool AMDGPUInstructionSelector::selectG_EXTRACT_VECTOR_ELT(
+ MachineInstr &MI) const {
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
+ Register IdxReg = MI.getOperand(2).getReg();
+
+ LLT DstTy = MRI->getType(DstReg);
+ LLT SrcTy = MRI->getType(SrcReg);
+
+ const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
+ const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, *MRI, TRI);
+ const RegisterBank *IdxRB = RBI.getRegBank(IdxReg, *MRI, TRI);
+
+ // The index must be scalar. If it wasn't RegBankSelect should have moved this
+ // into a waterfall loop.
+ if (IdxRB->getID() != AMDGPU::SGPRRegBankID)
+ return false;
+
+ const TargetRegisterClass *SrcRC = TRI.getRegClassForTypeOnBank(SrcTy, *SrcRB,
+ *MRI);
+ const TargetRegisterClass *DstRC = TRI.getRegClassForTypeOnBank(DstTy, *DstRB,
+ *MRI);
+ if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI) ||
+ !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) ||
+ !RBI.constrainGenericRegister(IdxReg, AMDGPU::SReg_32RegClass, *MRI))
+ return false;
+
+ MachineBasicBlock *BB = MI.getParent();
+ const DebugLoc &DL = MI.getDebugLoc();
+ const bool Is64 = DstTy.getSizeInBits() == 64;
+
+ unsigned SubReg = Is64 ? AMDGPU::sub0_sub1 : AMDGPU::sub0;
+
+ if (SrcRB->getID() == AMDGPU::SGPRRegBankID) {
+ if (DstTy.getSizeInBits() != 32 && !Is64)
+ return false;
+
+ BuildMI(*BB, &MI, DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
+ .addReg(IdxReg);
+
+ unsigned Opc = Is64 ? AMDGPU::S_MOVRELS_B64 : AMDGPU::S_MOVRELS_B32;
+ BuildMI(*BB, &MI, DL, TII.get(Opc), DstReg)
+ .addReg(SrcReg, 0, SubReg)
+ .addReg(SrcReg, RegState::Implicit);
+ MI.eraseFromParent();
+ return true;
+ }
+
+ if (SrcRB->getID() != AMDGPU::VGPRRegBankID || DstTy.getSizeInBits() != 32)
+ return false;
+
+ if (!STI.useVGPRIndexMode()) {
+ BuildMI(*BB, &MI, DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
+ .addReg(IdxReg);
+ BuildMI(*BB, &MI, DL, TII.get(AMDGPU::V_MOVRELS_B32_e32), DstReg)
+ .addReg(SrcReg, RegState::Undef, SubReg)
+ .addReg(SrcReg, RegState::Implicit);
+ MI.eraseFromParent();
+ return true;
+ }
+
+ BuildMI(*BB, MI, DL, TII.get(AMDGPU::S_SET_GPR_IDX_ON))
+ .addReg(IdxReg)
+ .addImm(AMDGPU::VGPRIndexMode::SRC0_ENABLE);
+ BuildMI(*BB, MI, DL, TII.get(AMDGPU::V_MOV_B32_e32), DstReg)
+ .addReg(SrcReg, RegState::Undef, SubReg)
+ .addReg(SrcReg, RegState::Implicit)
+ .addReg(AMDGPU::M0, RegState::Implicit);
+ BuildMI(*BB, MI, DL, TII.get(AMDGPU::S_SET_GPR_IDX_OFF));
+
+ MI.eraseFromParent();
+ return true;
+}
+
bool AMDGPUInstructionSelector::select(MachineInstr &I) {
if (I.isPHI())
return selectPHI(I);
@@ -1694,7 +1790,9 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) {
return selectG_ADD_SUB(I);
case TargetOpcode::G_UADDO:
case TargetOpcode::G_USUBO:
- return selectG_UADDO_USUBO(I);
+ case TargetOpcode::G_UADDE:
+ case TargetOpcode::G_USUBE:
+ return selectG_UADDO_USUBO_UADDE_USUBE(I);
case TargetOpcode::G_INTTOPTR:
case TargetOpcode::G_BITCAST:
case TargetOpcode::G_PTRTOINT:
@@ -1710,8 +1808,8 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) {
return selectG_MERGE_VALUES(I);
case TargetOpcode::G_UNMERGE_VALUES:
return selectG_UNMERGE_VALUES(I);
- case TargetOpcode::G_GEP:
- return selectG_GEP(I);
+ case TargetOpcode::G_PTR_ADD:
+ return selectG_PTR_ADD(I);
case TargetOpcode::G_IMPLICIT_DEF:
return selectG_IMPLICIT_DEF(I);
case TargetOpcode::G_INSERT:
@@ -1747,21 +1845,17 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) {
case TargetOpcode::G_SEXT:
case TargetOpcode::G_ZEXT:
case TargetOpcode::G_ANYEXT:
+ if (selectImpl(I, *CoverageInfo))
+ return true;
return selectG_SZA_EXT(I);
- case TargetOpcode::G_SITOFP:
- case TargetOpcode::G_UITOFP:
- return selectG_SITOFP_UITOFP(I);
case TargetOpcode::G_BRCOND:
return selectG_BRCOND(I);
case TargetOpcode::G_FRAME_INDEX:
return selectG_FRAME_INDEX(I);
- case TargetOpcode::G_FENCE:
- // FIXME: Tablegen importer doesn't handle the imm operands correctly, and
- // is checking for G_CONSTANT
- I.setDesc(TII.get(AMDGPU::ATOMIC_FENCE));
- return true;
case TargetOpcode::G_PTR_MASK:
return selectG_PTR_MASK(I);
+ case TargetOpcode::G_EXTRACT_VECTOR_ELT:
+ return selectG_EXTRACT_VECTOR_ELT(I);
default:
return selectImpl(I, *CoverageInfo);
}
@@ -1821,20 +1915,6 @@ AMDGPUInstructionSelector::selectVOP3Mods0(MachineOperand &Root) const {
}
InstructionSelector::ComplexRendererFns
-AMDGPUInstructionSelector::selectVOP3Mods0Clamp0OMod(MachineOperand &Root) const {
- Register Src;
- unsigned Mods;
- std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg());
-
- return {{
- [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); },
- [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); }, // src0_mods
- [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // clamp
- [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // omod
- }};
-}
-
-InstructionSelector::ComplexRendererFns
AMDGPUInstructionSelector::selectVOP3OMods(MachineOperand &Root) const {
return {{
[=](MachineInstrBuilder &MIB) { MIB.add(Root); },
@@ -1856,6 +1936,20 @@ AMDGPUInstructionSelector::selectVOP3Mods(MachineOperand &Root) const {
}
InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectVOP3Mods_nnan(MachineOperand &Root) const {
+ Register Src;
+ unsigned Mods;
+ std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg());
+ if (!TM.Options.NoNaNsFPMath && !isKnownNeverNaN(Src, *MRI))
+ return None;
+
+ return {{
+ [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); },
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods
+ }};
+}
+
+InstructionSelector::ComplexRendererFns
AMDGPUInstructionSelector::selectVOP3OpSelMods0(MachineOperand &Root) const {
// FIXME: Handle clamp and op_sel
return {{
@@ -1961,7 +2055,7 @@ AMDGPUInstructionSelector::selectFlatOffsetImpl(MachineOperand &Root) const {
return Default;
const MachineInstr *OpDef = MRI->getVRegDef(Root.getReg());
- if (!OpDef || OpDef->getOpcode() != AMDGPU::G_GEP)
+ if (!OpDef || OpDef->getOpcode() != AMDGPU::G_PTR_ADD)
return Default;
Optional<int64_t> Offset =
@@ -2175,10 +2269,65 @@ AMDGPUInstructionSelector::selectDS1Addr1Offset(MachineOperand &Root) const {
}
void AMDGPUInstructionSelector::renderTruncImm32(MachineInstrBuilder &MIB,
- const MachineInstr &MI) const {
- const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
- assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && "Expected G_CONSTANT");
- Optional<int64_t> CstVal = getConstantVRegVal(MI.getOperand(0).getReg(), MRI);
+ const MachineInstr &MI,
+ int OpIdx) const {
+ assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
+ "Expected G_CONSTANT");
+ Optional<int64_t> CstVal = getConstantVRegVal(MI.getOperand(0).getReg(), *MRI);
assert(CstVal && "Expected constant value");
MIB.addImm(CstVal.getValue());
}
+
+void AMDGPUInstructionSelector::renderNegateImm(MachineInstrBuilder &MIB,
+ const MachineInstr &MI,
+ int OpIdx) const {
+ assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
+ "Expected G_CONSTANT");
+ MIB.addImm(-MI.getOperand(1).getCImm()->getSExtValue());
+}
+
+void AMDGPUInstructionSelector::renderBitcastImm(MachineInstrBuilder &MIB,
+ const MachineInstr &MI,
+ int OpIdx) const {
+ assert(OpIdx == -1);
+
+ const MachineOperand &Op = MI.getOperand(1);
+ if (MI.getOpcode() == TargetOpcode::G_FCONSTANT)
+ MIB.addImm(Op.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
+ else {
+ assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && "Expected G_CONSTANT");
+ MIB.addImm(Op.getCImm()->getSExtValue());
+ }
+}
+
+void AMDGPUInstructionSelector::renderPopcntImm(MachineInstrBuilder &MIB,
+ const MachineInstr &MI,
+ int OpIdx) const {
+ assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
+ "Expected G_CONSTANT");
+ MIB.addImm(MI.getOperand(1).getCImm()->getValue().countPopulation());
+}
+
+/// This only really exists to satisfy DAG type checking machinery, so is a
+/// no-op here.
+void AMDGPUInstructionSelector::renderTruncTImm(MachineInstrBuilder &MIB,
+ const MachineInstr &MI,
+ int OpIdx) const {
+ MIB.addImm(MI.getOperand(OpIdx).getImm());
+}
+
+bool AMDGPUInstructionSelector::isInlineImmediate16(int64_t Imm) const {
+ return AMDGPU::isInlinableLiteral16(Imm, STI.hasInv2PiInlineImm());
+}
+
+bool AMDGPUInstructionSelector::isInlineImmediate32(int64_t Imm) const {
+ return AMDGPU::isInlinableLiteral32(Imm, STI.hasInv2PiInlineImm());
+}
+
+bool AMDGPUInstructionSelector::isInlineImmediate64(int64_t Imm) const {
+ return AMDGPU::isInlinableLiteral64(Imm, STI.hasInv2PiInlineImm());
+}
+
+bool AMDGPUInstructionSelector::isInlineImmediate(const APFloat &Imm) const {
+ return TII.isInlineConstant(Imm);
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
index d3c83a6a872a..38ca7fd4104b 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
@@ -38,6 +38,7 @@ class MachineInstr;
class MachineIRBuilder;
class MachineOperand;
class MachineRegisterInfo;
+class RegisterBank;
class SIInstrInfo;
class SIMachineFunctionInfo;
class SIRegisterInfo;
@@ -69,6 +70,10 @@ private:
bool isInstrUniform(const MachineInstr &MI) const;
bool isVCC(Register Reg, const MachineRegisterInfo &MRI) const;
+ const RegisterBank *getArtifactRegBank(
+ Register Reg, const MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI) const;
+
/// tblgen-erated 'select' implementation.
bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
@@ -79,15 +84,14 @@ private:
bool selectPHI(MachineInstr &I) const;
bool selectG_TRUNC(MachineInstr &I) const;
bool selectG_SZA_EXT(MachineInstr &I) const;
- bool selectG_SITOFP_UITOFP(MachineInstr &I) const;
bool selectG_CONSTANT(MachineInstr &I) const;
bool selectG_AND_OR_XOR(MachineInstr &I) const;
bool selectG_ADD_SUB(MachineInstr &I) const;
- bool selectG_UADDO_USUBO(MachineInstr &I) const;
+ bool selectG_UADDO_USUBO_UADDE_USUBE(MachineInstr &I) const;
bool selectG_EXTRACT(MachineInstr &I) const;
bool selectG_MERGE_VALUES(MachineInstr &I) const;
bool selectG_UNMERGE_VALUES(MachineInstr &I) const;
- bool selectG_GEP(MachineInstr &I) const;
+ bool selectG_PTR_ADD(MachineInstr &I) const;
bool selectG_IMPLICIT_DEF(MachineInstr &I) const;
bool selectG_INSERT(MachineInstr &I) const;
bool selectG_INTRINSIC(MachineInstr &I) const;
@@ -96,6 +100,7 @@ private:
splitBufferOffsets(MachineIRBuilder &B, Register OrigOffset) const;
bool selectStoreIntrinsic(MachineInstr &MI, bool IsFormat) const;
+ bool selectDSOrderedIntrinsic(MachineInstr &MI, Intrinsic::ID IID) const;
bool selectG_INTRINSIC_W_SIDE_EFFECTS(MachineInstr &I) const;
int getS_CMPOpcode(CmpInst::Predicate P, unsigned Size) const;
@@ -112,6 +117,7 @@ private:
bool selectG_BRCOND(MachineInstr &I) const;
bool selectG_FRAME_INDEX(MachineInstr &I) const;
bool selectG_PTR_MASK(MachineInstr &I) const;
+ bool selectG_EXTRACT_VECTOR_ELT(MachineInstr &I) const;
std::pair<Register, unsigned>
selectVOP3ModsImpl(Register Src) const;
@@ -125,11 +131,11 @@ private:
InstructionSelector::ComplexRendererFns
selectVOP3Mods0(MachineOperand &Root) const;
InstructionSelector::ComplexRendererFns
- selectVOP3Mods0Clamp0OMod(MachineOperand &Root) const;
- InstructionSelector::ComplexRendererFns
selectVOP3OMods(MachineOperand &Root) const;
InstructionSelector::ComplexRendererFns
selectVOP3Mods(MachineOperand &Root) const;
+ InstructionSelector::ComplexRendererFns
+ selectVOP3Mods_nnan(MachineOperand &Root) const;
InstructionSelector::ComplexRendererFns
selectVOP3OpSelMods0(MachineOperand &Root) const;
@@ -164,8 +170,25 @@ private:
InstructionSelector::ComplexRendererFns
selectDS1Addr1Offset(MachineOperand &Root) const;
- void renderTruncImm32(MachineInstrBuilder &MIB,
- const MachineInstr &MI) const;
+ void renderTruncImm32(MachineInstrBuilder &MIB, const MachineInstr &MI,
+ int OpIdx = -1) const;
+
+ void renderTruncTImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
+ int OpIdx) const;
+
+ void renderNegateImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
+ int OpIdx) const;
+
+ void renderBitcastImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
+ int OpIdx) const;
+
+ void renderPopcntImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
+ int OpIdx) const;
+
+ bool isInlineImmediate16(int64_t Imm) const;
+ bool isInlineImmediate32(int64_t Imm) const;
+ bool isInlineImmediate64(int64_t Imm) const;
+ bool isInlineImmediate(const APFloat &Imm) const;
const SIInstrInfo &TII;
const SIRegisterInfo &TRI;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
index 846e7f577a28..7e71dbdd1240 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
@@ -77,28 +77,39 @@ class ILFormat<dag outs, dag ins, string asmstr, list<dag> pattern>
def TruePredicate : Predicate<"">;
+// Add a predicate to the list if does not already exist to deduplicate it.
+class PredConcat<list<Predicate> lst, Predicate pred> {
+ list<Predicate> ret =
+ !foldl([pred], lst, acc, cur,
+ !listconcat(acc, !if(!eq(!cast<string>(cur),!cast<string>(pred)),
+ [], [cur])));
+}
+
class PredicateControl {
Predicate SubtargetPredicate = TruePredicate;
- list<Predicate> AssemblerPredicates = [];
Predicate AssemblerPredicate = TruePredicate;
Predicate WaveSizePredicate = TruePredicate;
list<Predicate> OtherPredicates = [];
- list<Predicate> Predicates = !listconcat([SubtargetPredicate,
- AssemblerPredicate,
- WaveSizePredicate],
- AssemblerPredicates,
- OtherPredicates);
+ list<Predicate> Predicates = PredConcat<
+ PredConcat<PredConcat<OtherPredicates,
+ SubtargetPredicate>.ret,
+ AssemblerPredicate>.ret,
+ WaveSizePredicate>.ret;
}
+
class AMDGPUPat<dag pattern, dag result> : Pat<pattern, result>,
PredicateControl;
-def FP16Denormals : Predicate<"Subtarget->hasFP16Denormals()">;
-def FP32Denormals : Predicate<"Subtarget->hasFP32Denormals()">;
-def FP64Denormals : Predicate<"Subtarget->hasFP64Denormals()">;
-def NoFP16Denormals : Predicate<"!Subtarget->hasFP16Denormals()">;
-def NoFP32Denormals : Predicate<"!Subtarget->hasFP32Denormals()">;
-def NoFP64Denormals : Predicate<"!Subtarget->hasFP64Denormals()">;
+let RecomputePerFunction = 1 in {
+def FP16Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().FP64FP16Denormals">;
+def FP32Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().FP32Denormals">;
+def FP64Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().FP64FP16Denormals">;
+def NoFP16Denormals : Predicate<"!MF->getInfo<SIMachineFunctionInfo>()->getMode().FP64FP16Denormals">;
+def NoFP32Denormals : Predicate<"!MF->getInfo<SIMachineFunctionInfo>()->getMode().FP32Denormals">;
+def NoFP64Denormals : Predicate<"!MF->getInfo<SIMachineFunctionInfo>()->getMode().FP64FP16Denormals">;
def UnsafeFPMath : Predicate<"TM.Options.UnsafeFPMath">;
+}
+
def FMA : Predicate<"Subtarget->hasFMA()">;
def InstFlag : OperandWithDefaultOps <i32, (ops (i32 0))>;
@@ -147,20 +158,30 @@ def brtarget : Operand<OtherVT>;
class HasOneUseUnaryOp<SDPatternOperator op> : PatFrag<
(ops node:$src0),
(op $src0),
- [{ return N->hasOneUse(); }]
->;
+ [{ return N->hasOneUse(); }]> {
+
+ let GISelPredicateCode = [{
+ return MRI.hasOneNonDBGUse(MI.getOperand(0).getReg());
+ }];
+}
class HasOneUseBinOp<SDPatternOperator op> : PatFrag<
(ops node:$src0, node:$src1),
(op $src0, $src1),
- [{ return N->hasOneUse(); }]
->;
+ [{ return N->hasOneUse(); }]> {
+ let GISelPredicateCode = [{
+ return MRI.hasOneNonDBGUse(MI.getOperand(0).getReg());
+ }];
+}
class HasOneUseTernaryOp<SDPatternOperator op> : PatFrag<
(ops node:$src0, node:$src1, node:$src2),
(op $src0, $src1, $src2),
- [{ return N->hasOneUse(); }]
->;
+ [{ return N->hasOneUse(); }]> {
+ let GISelPredicateCode = [{
+ return MRI.hasOneNonDBGUse(MI.getOperand(0).getReg());
+ }];
+}
let Properties = [SDNPCommutative, SDNPAssociative] in {
def smax_oneuse : HasOneUseBinOp<smax>;
@@ -315,15 +336,10 @@ class Aligned<int Bytes> {
int MinAlignment = Bytes;
}
-class LoadFrag <SDPatternOperator op> : PatFrag<(ops node:$ptr), (op node:$ptr)>;
-
-class StoreFrag<SDPatternOperator op> : PatFrag <
- (ops node:$value, node:$ptr), (op node:$value, node:$ptr)
->;
-
class StoreHi16<SDPatternOperator op> : PatFrag <
- (ops node:$value, node:$ptr), (op (srl node:$value, (i32 16)), node:$ptr)
->;
+ (ops node:$value, node:$ptr), (op (srl node:$value, (i32 16)), node:$ptr)> {
+ let IsStore = 1;
+}
def LoadAddress_constant : AddressSpaceList<[ AddrSpaces.Constant ]>;
def LoadAddress_global : AddressSpaceList<[ AddrSpaces.Global, AddrSpaces.Constant ]>;
@@ -345,48 +361,6 @@ def StoreAddress_region : AddressSpaceList<[ AddrSpaces.Region ]>;
-class GlobalLoadAddress : CodePatPred<[{
- auto AS = cast<MemSDNode>(N)->getAddressSpace();
- return AS == AMDGPUAS::GLOBAL_ADDRESS || AS == AMDGPUAS::CONSTANT_ADDRESS;
-}]>;
-
-class FlatLoadAddress : CodePatPred<[{
- const auto AS = cast<MemSDNode>(N)->getAddressSpace();
- return AS == AMDGPUAS::FLAT_ADDRESS ||
- AS == AMDGPUAS::GLOBAL_ADDRESS ||
- AS == AMDGPUAS::CONSTANT_ADDRESS;
-}]>;
-
-class GlobalAddress : CodePatPred<[{
- return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
-}]>;
-
-class PrivateAddress : CodePatPred<[{
- return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS;
-}]>;
-
-class LocalAddress : CodePatPred<[{
- return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
-}]>;
-
-class RegionAddress : CodePatPred<[{
- return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::REGION_ADDRESS;
-}]>;
-
-class FlatStoreAddress : CodePatPred<[{
- const auto AS = cast<MemSDNode>(N)->getAddressSpace();
- return AS == AMDGPUAS::FLAT_ADDRESS ||
- AS == AMDGPUAS::GLOBAL_ADDRESS;
-}]>;
-
-// TODO: Remove these when stores to new PatFrag format.
-class PrivateStore <SDPatternOperator op> : StoreFrag <op>, PrivateAddress;
-class LocalStore <SDPatternOperator op> : StoreFrag <op>, LocalAddress;
-class RegionStore <SDPatternOperator op> : StoreFrag <op>, RegionAddress;
-class GlobalStore <SDPatternOperator op> : StoreFrag<op>, GlobalAddress;
-class FlatStore <SDPatternOperator op> : StoreFrag <op>, FlatStoreAddress;
-
-
foreach as = [ "global", "flat", "constant", "local", "private", "region" ] in {
let AddressSpaces = !cast<AddressSpaceList>("LoadAddress_"#as).AddrSpaces in {
@@ -464,6 +438,10 @@ def truncstorei16_#as : PatFrag<(ops node:$val, node:$ptr),
let MemoryVT = i16;
}
+def store_hi16_#as : StoreHi16 <truncstorei16>;
+def truncstorei8_hi16_#as : StoreHi16<truncstorei8>;
+def truncstorei16_hi16_#as : StoreHi16<truncstorei16>;
+
defm atomic_store_#as : binary_atomic_op<atomic_store>;
} // End let AddressSpaces = ...
@@ -497,18 +475,7 @@ defm atomic_load_umax : ret_noret_binary_atomic_op<atomic_load_umax>;
defm atomic_load_umin : ret_noret_binary_atomic_op<atomic_load_umin>;
defm atomic_load_xor : ret_noret_binary_atomic_op<atomic_load_xor>;
defm atomic_load_fadd : ret_noret_binary_atomic_op<atomic_load_fadd, 0>;
-
-
-def store_hi16_private : StoreHi16 <truncstorei16>, PrivateAddress;
-def truncstorei8_hi16_private : StoreHi16<truncstorei8>, PrivateAddress;
-
-def store_atomic_global : GlobalStore<atomic_store>;
-def truncstorei8_hi16_global : StoreHi16 <truncstorei8>, GlobalAddress;
-def truncstorei16_hi16_global : StoreHi16 <truncstorei16>, GlobalAddress;
-
-def store_local_hi16 : StoreHi16 <truncstorei16>, LocalAddress;
-def truncstorei8_local_hi16 : StoreHi16<truncstorei8>, LocalAddress;
-def atomic_store_local : LocalStore <atomic_store>;
+defm AMDGPUatomic_cmp_swap : ret_noret_binary_atomic_op<AMDGPUatomic_cmp_swap>;
def load_align8_local : PatFrag <(ops node:$ptr), (load_local node:$ptr)> {
@@ -535,30 +502,6 @@ def store_align16_local: PatFrag<(ops node:$val, node:$ptr),
let IsTruncStore = 0;
}
-
-def atomic_store_flat : FlatStore <atomic_store>;
-def truncstorei8_hi16_flat : StoreHi16<truncstorei8>, FlatStoreAddress;
-def truncstorei16_hi16_flat : StoreHi16<truncstorei16>, FlatStoreAddress;
-
-
-class local_binary_atomic_op<SDNode atomic_op> :
- PatFrag<(ops node:$ptr, node:$value),
- (atomic_op node:$ptr, node:$value), [{
- return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
-}]>;
-
-class region_binary_atomic_op<SDNode atomic_op> :
- PatFrag<(ops node:$ptr, node:$value),
- (atomic_op node:$ptr, node:$value), [{
- return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::REGION_ADDRESS;
-}]>;
-
-
-def mskor_global : PatFrag<(ops node:$val, node:$ptr),
- (AMDGPUstore_mskor node:$val, node:$ptr), [{
- return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
-}]>;
-
let AddressSpaces = StoreAddress_local.AddrSpaces in {
defm atomic_cmp_swap_local : ternary_atomic_op<atomic_cmp_swap>;
defm atomic_cmp_swap_local_m0 : ternary_atomic_op<atomic_cmp_swap_glue>;
@@ -569,31 +512,6 @@ defm atomic_cmp_swap_region : ternary_atomic_op<atomic_cmp_swap>;
defm atomic_cmp_swap_region_m0 : ternary_atomic_op<atomic_cmp_swap_glue>;
}
-class global_binary_atomic_op_frag<SDNode atomic_op> : PatFrag<
- (ops node:$ptr, node:$value),
- (atomic_op node:$ptr, node:$value),
- [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;}]>;
-
-// Legacy.
-def AMDGPUatomic_cmp_swap_global : PatFrag<
- (ops node:$ptr, node:$value),
- (AMDGPUatomic_cmp_swap node:$ptr, node:$value)>, GlobalAddress;
-
-def atomic_cmp_swap_global : PatFrag<
- (ops node:$ptr, node:$cmp, node:$value),
- (atomic_cmp_swap node:$ptr, node:$cmp, node:$value)>, GlobalAddress;
-
-
-def atomic_cmp_swap_global_noret : PatFrag<
- (ops node:$ptr, node:$cmp, node:$value),
- (atomic_cmp_swap node:$ptr, node:$cmp, node:$value),
- [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && (SDValue(N, 0).use_empty());}]>;
-
-def atomic_cmp_swap_global_ret : PatFrag<
- (ops node:$ptr, node:$cmp, node:$value),
- (atomic_cmp_swap node:$ptr, node:$cmp, node:$value),
- [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && (!SDValue(N, 0).use_empty());}]>;
-
//===----------------------------------------------------------------------===//
// Misc Pattern Fragments
//===----------------------------------------------------------------------===//
@@ -686,12 +604,12 @@ multiclass BFIPatterns <Instruction BFI_INT,
def : AMDGPUPat <
(or (and i64:$y, i64:$x), (and i64:$z, (not i64:$x))),
(REG_SEQUENCE RC64,
- (BFI_INT (i32 (EXTRACT_SUBREG $x, sub0)),
- (i32 (EXTRACT_SUBREG $y, sub0)),
- (i32 (EXTRACT_SUBREG $z, sub0))), sub0,
- (BFI_INT (i32 (EXTRACT_SUBREG $x, sub1)),
- (i32 (EXTRACT_SUBREG $y, sub1)),
- (i32 (EXTRACT_SUBREG $z, sub1))), sub1)
+ (BFI_INT (i32 (EXTRACT_SUBREG RC64:$x, sub0)),
+ (i32 (EXTRACT_SUBREG RC64:$y, sub0)),
+ (i32 (EXTRACT_SUBREG RC64:$z, sub0))), sub0,
+ (BFI_INT (i32 (EXTRACT_SUBREG RC64:$x, sub1)),
+ (i32 (EXTRACT_SUBREG RC64:$y, sub1)),
+ (i32 (EXTRACT_SUBREG RC64:$z, sub1))), sub1)
>;
// SHA-256 Ch function
@@ -705,12 +623,12 @@ multiclass BFIPatterns <Instruction BFI_INT,
def : AMDGPUPat <
(xor i64:$z, (and i64:$x, (xor i64:$y, i64:$z))),
(REG_SEQUENCE RC64,
- (BFI_INT (i32 (EXTRACT_SUBREG $x, sub0)),
- (i32 (EXTRACT_SUBREG $y, sub0)),
- (i32 (EXTRACT_SUBREG $z, sub0))), sub0,
- (BFI_INT (i32 (EXTRACT_SUBREG $x, sub1)),
- (i32 (EXTRACT_SUBREG $y, sub1)),
- (i32 (EXTRACT_SUBREG $z, sub1))), sub1)
+ (BFI_INT (i32 (EXTRACT_SUBREG RC64:$x, sub0)),
+ (i32 (EXTRACT_SUBREG RC64:$y, sub0)),
+ (i32 (EXTRACT_SUBREG RC64:$z, sub0))), sub0,
+ (BFI_INT (i32 (EXTRACT_SUBREG RC64:$x, sub1)),
+ (i32 (EXTRACT_SUBREG RC64:$y, sub1)),
+ (i32 (EXTRACT_SUBREG RC64:$z, sub1))), sub1)
>;
def : AMDGPUPat <
@@ -721,7 +639,7 @@ multiclass BFIPatterns <Instruction BFI_INT,
def : AMDGPUPat <
(f32 (fcopysign f32:$src0, f64:$src1)),
(BFI_INT (LoadImm32 (i32 0x7fffffff)), $src0,
- (i32 (EXTRACT_SUBREG $src1, sub1)))
+ (i32 (EXTRACT_SUBREG RC64:$src1, sub1)))
>;
def : AMDGPUPat <
@@ -729,8 +647,8 @@ multiclass BFIPatterns <Instruction BFI_INT,
(REG_SEQUENCE RC64,
(i32 (EXTRACT_SUBREG $src0, sub0)), sub0,
(BFI_INT (LoadImm32 (i32 0x7fffffff)),
- (i32 (EXTRACT_SUBREG $src0, sub1)),
- (i32 (EXTRACT_SUBREG $src1, sub1))), sub1)
+ (i32 (EXTRACT_SUBREG RC64:$src0, sub1)),
+ (i32 (EXTRACT_SUBREG RC64:$src1, sub1))), sub1)
>;
def : AMDGPUPat <
@@ -738,7 +656,7 @@ multiclass BFIPatterns <Instruction BFI_INT,
(REG_SEQUENCE RC64,
(i32 (EXTRACT_SUBREG $src0, sub0)), sub0,
(BFI_INT (LoadImm32 (i32 0x7fffffff)),
- (i32 (EXTRACT_SUBREG $src0, sub1)),
+ (i32 (EXTRACT_SUBREG RC64:$src0, sub1)),
$src1), sub1)
>;
}
@@ -755,21 +673,21 @@ multiclass SHA256MaPattern <Instruction BFI_INT, Instruction XOR, RegisterClass
def : AMDGPUPat <
(or (and i64:$x, i64:$z), (and i64:$y, (or i64:$x, i64:$z))),
(REG_SEQUENCE RC64,
- (BFI_INT (XOR (i32 (EXTRACT_SUBREG $x, sub0)),
- (i32 (EXTRACT_SUBREG $y, sub0))),
- (i32 (EXTRACT_SUBREG $z, sub0)),
- (i32 (EXTRACT_SUBREG $y, sub0))), sub0,
- (BFI_INT (XOR (i32 (EXTRACT_SUBREG $x, sub1)),
- (i32 (EXTRACT_SUBREG $y, sub1))),
- (i32 (EXTRACT_SUBREG $z, sub1)),
- (i32 (EXTRACT_SUBREG $y, sub1))), sub1)
+ (BFI_INT (XOR (i32 (EXTRACT_SUBREG RC64:$x, sub0)),
+ (i32 (EXTRACT_SUBREG RC64:$y, sub0))),
+ (i32 (EXTRACT_SUBREG RC64:$z, sub0)),
+ (i32 (EXTRACT_SUBREG RC64:$y, sub0))), sub0,
+ (BFI_INT (XOR (i32 (EXTRACT_SUBREG RC64:$x, sub1)),
+ (i32 (EXTRACT_SUBREG RC64:$y, sub1))),
+ (i32 (EXTRACT_SUBREG RC64:$z, sub1)),
+ (i32 (EXTRACT_SUBREG RC64:$y, sub1))), sub1)
>;
}
// Bitfield extract patterns
-def IMMZeroBasedBitfieldMask : PatLeaf <(imm), [{
- return isMask_32(N->getZExtValue());
+def IMMZeroBasedBitfieldMask : ImmLeaf <i32, [{
+ return isMask_32(Imm);
}]>;
def IMMPopCount : SDNodeXForm<imm, [{
@@ -819,30 +737,6 @@ class ROTRPattern <Instruction BIT_ALIGN> : AMDGPUPat <
(BIT_ALIGN $src0, $src0, $src1)
>;
-multiclass IntMed3Pat<Instruction med3Inst,
- SDPatternOperator min,
- SDPatternOperator max,
- SDPatternOperator min_oneuse,
- SDPatternOperator max_oneuse,
- ValueType vt = i32> {
-
- // This matches 16 permutations of
- // min(max(a, b), max(min(a, b), c))
- def : AMDGPUPat <
- (min (max_oneuse vt:$src0, vt:$src1),
- (max_oneuse (min_oneuse vt:$src0, vt:$src1), vt:$src2)),
- (med3Inst vt:$src0, vt:$src1, vt:$src2)
->;
-
- // This matches 16 permutations of
- // max(min(x, y), min(max(x, y), z))
- def : AMDGPUPat <
- (max (min_oneuse vt:$src0, vt:$src1),
- (min_oneuse (max_oneuse vt:$src0, vt:$src1), vt:$src2)),
- (med3Inst $src0, $src1, $src2)
->;
-}
-
// Special conversion patterns
def cvt_rpi_i32_f32 : PatFrag <
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 5aba35a19ced..3f99d5cfb7f9 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -244,7 +244,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
S32, S64, S16, V2S16
};
- setAction({G_BRCOND, S1}, Legal);
+ setAction({G_BRCOND, S1}, Legal); // VCC branches
+ setAction({G_BRCOND, S32}, Legal); // SCC branches
// TODO: All multiples of 32, vectors of pointers, all v2s16 pairs, more
// elements for v3s16
@@ -272,6 +273,13 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
.scalarize(0);
}
+ // FIXME: Not really legal. Placeholder for custom lowering.
+ getActionDefinitionsBuilder({G_SDIV, G_UDIV, G_SREM, G_UREM})
+ .legalFor({S32, S64})
+ .clampScalar(0, S32, S64)
+ .widenScalarToNextPow2(0, 32)
+ .scalarize(0);
+
getActionDefinitionsBuilder({G_UMULH, G_SMULH})
.legalFor({S32})
.clampScalar(0, S32, S32)
@@ -289,7 +297,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
getActionDefinitionsBuilder({G_UADDO, G_USUBO,
G_UADDE, G_SADDE, G_USUBE, G_SSUBE})
- .legalFor({{S32, S1}})
+ .legalFor({{S32, S1}, {S32, S32}})
.clampScalar(0, S32, S32)
.scalarize(0); // TODO: Implement.
@@ -354,6 +362,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
if (ST.hasVOP3PInsts()) {
MinNumMaxNum.customFor(FPTypesPK16)
+ .moreElementsIf(isSmallOddVector(0), oneMoreElement(0))
.clampMaxNumElements(0, S16, 2)
.clampScalar(0, S16, S64)
.scalarize(0);
@@ -438,13 +447,15 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
{S96, S32},
// FIXME: Hack
{S64, LLT::scalar(33)},
- {S32, S8}, {S128, S32}, {S128, S64}, {S32, LLT::scalar(24)}})
- .scalarize(0);
+ {S32, S8}, {S32, LLT::scalar(24)}})
+ .scalarize(0)
+ .clampScalar(0, S32, S64);
// TODO: Split s1->s64 during regbankselect for VALU.
auto &IToFP = getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
- .legalFor({{S32, S32}, {S64, S32}, {S16, S32}, {S32, S1}, {S16, S1}, {S64, S1}})
+ .legalFor({{S32, S32}, {S64, S32}, {S16, S32}})
.lowerFor({{S32, S64}})
+ .lowerIf(typeIs(1, S1))
.customFor({{S64, S64}});
if (ST.has16BitInsts())
IToFP.legalFor({{S16, S16}});
@@ -462,10 +473,15 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
.scalarize(0);
getActionDefinitionsBuilder(G_INTRINSIC_ROUND)
- .legalFor({S32, S64})
- .scalarize(0);
+ .scalarize(0)
+ .lower();
- if (ST.getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS) {
+ if (ST.has16BitInsts()) {
+ getActionDefinitionsBuilder({G_INTRINSIC_TRUNC, G_FCEIL, G_FRINT})
+ .legalFor({S16, S32, S64})
+ .clampScalar(0, S16, S64)
+ .scalarize(0);
+ } else if (ST.getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS) {
getActionDefinitionsBuilder({G_INTRINSIC_TRUNC, G_FCEIL, G_FRINT})
.legalFor({S32, S64})
.clampScalar(0, S32, S64)
@@ -478,7 +494,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
.scalarize(0);
}
- getActionDefinitionsBuilder(G_GEP)
+ getActionDefinitionsBuilder(G_PTR_ADD)
.legalForCartesianProduct(AddrSpaces64, {S64})
.legalForCartesianProduct(AddrSpaces32, {S32})
.scalarize(0);
@@ -491,9 +507,20 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
auto &CmpBuilder =
getActionDefinitionsBuilder(G_ICMP)
+ // The compare output type differs based on the register bank of the output,
+ // so make both s1 and s32 legal.
+ //
+ // Scalar compares producing output in scc will be promoted to s32, as that
+ // is the allocatable register type that will be needed for the copy from
+ // scc. This will be promoted during RegBankSelect, and we assume something
+ // before that won't try to use s32 result types.
+ //
+ // Vector compares producing an output in vcc/SGPR will use s1 in VCC reg
+ // bank.
.legalForCartesianProduct(
{S1}, {S32, S64, GlobalPtr, LocalPtr, ConstantPtr, PrivatePtr, FlatPtr})
- .legalFor({{S1, S32}, {S1, S64}});
+ .legalForCartesianProduct(
+ {S32}, {S32, S64, GlobalPtr, LocalPtr, ConstantPtr, PrivatePtr, FlatPtr});
if (ST.has16BitInsts()) {
CmpBuilder.legalFor({{S1, S16}});
}
@@ -502,7 +529,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
.widenScalarToNextPow2(1)
.clampScalar(1, S32, S64)
.scalarize(0)
- .legalIf(all(typeIs(0, S1), isPointer(1)));
+ .legalIf(all(typeInSet(0, {S1, S32}), isPointer(1)));
getActionDefinitionsBuilder(G_FCMP)
.legalForCartesianProduct({S1}, ST.has16BitInsts() ? FPTypes16 : FPTypesBase)
@@ -639,6 +666,11 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
// Split vector extloads.
unsigned MemSize = Query.MMODescrs[0].SizeInBits;
+ unsigned Align = Query.MMODescrs[0].AlignInBits;
+
+ if (MemSize < DstTy.getSizeInBits())
+ MemSize = std::max(MemSize, Align);
+
if (DstTy.isVector() && DstTy.getSizeInBits() > MemSize)
return true;
@@ -653,7 +685,6 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
if (NumRegs == 3 && !ST.hasDwordx3LoadStores())
return true;
- unsigned Align = Query.MMODescrs[0].AlignInBits;
if (Align < MemSize) {
const SITargetLowering *TLI = ST.getTargetLowering();
return !TLI->allowsMisalignedMemoryAccessesImpl(MemSize, AS, Align / 8);
@@ -795,14 +826,14 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
unsigned MemSize = Query.MMODescrs[0].SizeInBits;
unsigned Align = Query.MMODescrs[0].AlignInBits;
- // No extending vector loads.
- if (Size > MemSize && Ty0.isVector())
- return false;
-
// FIXME: Widening store from alignment not valid.
if (MemSize < Size)
MemSize = std::max(MemSize, Align);
+ // No extending vector loads.
+ if (Size > MemSize && Ty0.isVector())
+ return false;
+
switch (MemSize) {
case 8:
case 16:
@@ -848,7 +879,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
{G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD, G_ATOMICRMW_SUB,
G_ATOMICRMW_AND, G_ATOMICRMW_OR, G_ATOMICRMW_XOR,
G_ATOMICRMW_MAX, G_ATOMICRMW_MIN, G_ATOMICRMW_UMAX,
- G_ATOMICRMW_UMIN, G_ATOMIC_CMPXCHG})
+ G_ATOMICRMW_UMIN})
.legalFor({{S32, GlobalPtr}, {S32, LocalPtr},
{S64, GlobalPtr}, {S64, LocalPtr}});
if (ST.hasFlatAddressSpace()) {
@@ -858,14 +889,24 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
getActionDefinitionsBuilder(G_ATOMICRMW_FADD)
.legalFor({{S32, LocalPtr}});
+ // BUFFER/FLAT_ATOMIC_CMP_SWAP on GCN GPUs needs input marshalling, and output
+ // demarshalling
+ getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG)
+ .customFor({{S32, GlobalPtr}, {S64, GlobalPtr},
+ {S32, FlatPtr}, {S64, FlatPtr}})
+ .legalFor({{S32, LocalPtr}, {S64, LocalPtr},
+ {S32, RegionPtr}, {S64, RegionPtr}});
+
getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG_WITH_SUCCESS)
.lower();
// TODO: Pointer types, any 32-bit or 64-bit vector
+
+ // Condition should be s32 for scalar, s1 for vector.
getActionDefinitionsBuilder(G_SELECT)
.legalForCartesianProduct({S32, S64, S16, V2S32, V2S16, V4S16,
GlobalPtr, LocalPtr, FlatPtr, PrivatePtr,
- LLT::vector(2, LocalPtr), LLT::vector(2, PrivatePtr)}, {S1})
+ LLT::vector(2, LocalPtr), LLT::vector(2, PrivatePtr)}, {S1, S32})
.clampScalar(0, S16, S64)
.moreElementsIf(isSmallOddVector(0), oneMoreElement(0))
.fewerElementsIf(numElementsNotEven(0), scalarize(0))
@@ -875,7 +916,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
.clampMaxNumElements(0, PrivatePtr, 2)
.scalarize(0)
.widenScalarToNextPow2(0)
- .legalIf(all(isPointer(0), typeIs(1, S1)));
+ .legalIf(all(isPointer(0), typeInSet(1, {S1, S32})));
// TODO: Only the low 4/5/6 bits of the shift amount are observed, so we can
// be more flexible with the shift amount type.
@@ -888,7 +929,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
} else
Shifts.legalFor({{S16, S32}, {S16, S16}});
- Shifts.clampScalar(1, S16, S32);
+ // TODO: Support 16-bit shift amounts
+ Shifts.clampScalar(1, S32, S32);
Shifts.clampScalar(0, S16, S64);
Shifts.widenScalarToNextPow2(0, 16);
} else {
@@ -1075,6 +1117,16 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
getActionDefinitionsBuilder(G_SEXT_INREG).lower();
+ getActionDefinitionsBuilder({G_READ_REGISTER, G_WRITE_REGISTER}).lower();
+
+ getActionDefinitionsBuilder(G_READCYCLECOUNTER)
+ .legalFor({S64});
+
+ getActionDefinitionsBuilder({G_VASTART, G_VAARG, G_BRJT, G_JUMP_TABLE,
+ G_DYN_STACKALLOC, G_INDEXED_LOAD, G_INDEXED_SEXTLOAD,
+ G_INDEXED_ZEXTLOAD, G_INDEXED_STORE})
+ .unsupported();
+
computeTables();
verify(*ST.getInstrInfo());
}
@@ -1116,6 +1168,8 @@ bool AMDGPULegalizerInfo::legalizeCustom(MachineInstr &MI,
return legalizeFMad(MI, MRI, B);
case TargetOpcode::G_FDIV:
return legalizeFDIV(MI, MRI, B);
+ case TargetOpcode::G_ATOMIC_CMPXCHG:
+ return legalizeAtomicCmpXChg(MI, MRI, B);
default:
return false;
}
@@ -1175,12 +1229,8 @@ Register AMDGPULegalizerInfo::getSegmentAperture(
// private_segment_aperture_base_hi.
uint32_t StructOffset = (AS == AMDGPUAS::LOCAL_ADDRESS) ? 0x40 : 0x44;
- // FIXME: Don't use undef
- Value *V = UndefValue::get(PointerType::get(
- Type::getInt8Ty(MF.getFunction().getContext()),
- AMDGPUAS::CONSTANT_ADDRESS));
-
- MachinePointerInfo PtrInfo(V, StructOffset);
+ // TODO: can we be smarter about machine pointer info?
+ MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
MachineMemOperand *MMO = MF.getMachineMemOperand(
PtrInfo,
MachineMemOperand::MOLoad |
@@ -1192,7 +1242,7 @@ Register AMDGPULegalizerInfo::getSegmentAperture(
Register LoadResult = MRI.createGenericVirtualRegister(S32);
Register LoadAddr;
- B.materializeGEP(LoadAddr, QueuePtr, LLT::scalar(64), StructOffset);
+ B.materializePtrAdd(LoadAddr, QueuePtr, LLT::scalar(64), StructOffset);
B.buildLoad(LoadResult, LoadAddr, *MMO);
return LoadResult;
}
@@ -1709,13 +1759,15 @@ bool AMDGPULegalizerInfo::legalizeFMad(
LLT Ty = MRI.getType(MI.getOperand(0).getReg());
assert(Ty.isScalar());
+ MachineFunction &MF = B.getMF();
+ const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+
// TODO: Always legal with future ftz flag.
- if (Ty == LLT::scalar(32) && !ST.hasFP32Denormals())
+ if (Ty == LLT::scalar(32) && !MFI->getMode().FP32Denormals)
return true;
- if (Ty == LLT::scalar(16) && !ST.hasFP16Denormals())
+ if (Ty == LLT::scalar(16) && !MFI->getMode().FP64FP16Denormals)
return true;
- MachineFunction &MF = B.getMF();
MachineIRBuilder HelperBuilder(MI);
GISelObserverWrapper DummyObserver;
@@ -1724,16 +1776,55 @@ bool AMDGPULegalizerInfo::legalizeFMad(
return Helper.lowerFMad(MI) == LegalizerHelper::Legalized;
}
+bool AMDGPULegalizerInfo::legalizeAtomicCmpXChg(
+ MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const {
+ Register DstReg = MI.getOperand(0).getReg();
+ Register PtrReg = MI.getOperand(1).getReg();
+ Register CmpVal = MI.getOperand(2).getReg();
+ Register NewVal = MI.getOperand(3).getReg();
+
+ assert(SITargetLowering::isFlatGlobalAddrSpace(
+ MRI.getType(PtrReg).getAddressSpace()) &&
+ "this should not have been custom lowered");
+
+ LLT ValTy = MRI.getType(CmpVal);
+ LLT VecTy = LLT::vector(2, ValTy);
+
+ B.setInstr(MI);
+ Register PackedVal = B.buildBuildVector(VecTy, { NewVal, CmpVal }).getReg(0);
+
+ B.buildInstr(AMDGPU::G_AMDGPU_ATOMIC_CMPXCHG)
+ .addDef(DstReg)
+ .addUse(PtrReg)
+ .addUse(PackedVal)
+ .setMemRefs(MI.memoperands());
+
+ MI.eraseFromParent();
+ return true;
+}
+
// Return the use branch instruction, otherwise null if the usage is invalid.
static MachineInstr *verifyCFIntrinsic(MachineInstr &MI,
- MachineRegisterInfo &MRI) {
+ MachineRegisterInfo &MRI,
+ MachineInstr *&Br) {
Register CondDef = MI.getOperand(0).getReg();
if (!MRI.hasOneNonDBGUse(CondDef))
return nullptr;
MachineInstr &UseMI = *MRI.use_instr_nodbg_begin(CondDef);
- return UseMI.getParent() == MI.getParent() &&
- UseMI.getOpcode() == AMDGPU::G_BRCOND ? &UseMI : nullptr;
+ if (UseMI.getParent() != MI.getParent() ||
+ UseMI.getOpcode() != AMDGPU::G_BRCOND)
+ return nullptr;
+
+ // Make sure the cond br is followed by a G_BR
+ MachineBasicBlock::iterator Next = std::next(UseMI.getIterator());
+ if (Next != MI.getParent()->end()) {
+ if (Next->getOpcode() != AMDGPU::G_BR)
+ return nullptr;
+ Br = &*Next;
+ }
+
+ return &UseMI;
}
Register AMDGPULegalizerInfo::getLiveInRegister(MachineRegisterInfo &MRI,
@@ -1823,10 +1914,22 @@ bool AMDGPULegalizerInfo::legalizeFDIV(MachineInstr &MI,
MachineRegisterInfo &MRI,
MachineIRBuilder &B) const {
B.setInstr(MI);
+ Register Dst = MI.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(Dst);
+ LLT S16 = LLT::scalar(16);
+ LLT S32 = LLT::scalar(32);
+ LLT S64 = LLT::scalar(64);
if (legalizeFastUnsafeFDIV(MI, MRI, B))
return true;
+ if (DstTy == S16)
+ return legalizeFDIV16(MI, MRI, B);
+ if (DstTy == S32)
+ return legalizeFDIV32(MI, MRI, B);
+ if (DstTy == S64)
+ return legalizeFDIV64(MI, MRI, B);
+
return false;
}
@@ -1850,7 +1953,8 @@ bool AMDGPULegalizerInfo::legalizeFastUnsafeFDIV(MachineInstr &MI,
if (!MF.getTarget().Options.UnsafeFPMath && ResTy == S64)
return false;
- if (!Unsafe && ResTy == S32 && ST.hasFP32Denormals())
+ if (!Unsafe && ResTy == S32 &&
+ MF.getInfo<SIMachineFunctionInfo>()->getMode().FP32Denormals)
return false;
if (auto CLHS = getConstantFPVRegVal(LHS, MRI)) {
@@ -1890,6 +1994,219 @@ bool AMDGPULegalizerInfo::legalizeFastUnsafeFDIV(MachineInstr &MI,
return false;
}
+bool AMDGPULegalizerInfo::legalizeFDIV16(MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const {
+ B.setInstr(MI);
+ Register Res = MI.getOperand(0).getReg();
+ Register LHS = MI.getOperand(1).getReg();
+ Register RHS = MI.getOperand(2).getReg();
+
+ uint16_t Flags = MI.getFlags();
+
+ LLT S16 = LLT::scalar(16);
+ LLT S32 = LLT::scalar(32);
+
+ auto LHSExt = B.buildFPExt(S32, LHS, Flags);
+ auto RHSExt = B.buildFPExt(S32, RHS, Flags);
+
+ auto RCP = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {S32}, false)
+ .addUse(RHSExt.getReg(0))
+ .setMIFlags(Flags);
+
+ auto QUOT = B.buildFMul(S32, LHSExt, RCP, Flags);
+ auto RDst = B.buildFPTrunc(S16, QUOT, Flags);
+
+ B.buildIntrinsic(Intrinsic::amdgcn_div_fixup, Res, false)
+ .addUse(RDst.getReg(0))
+ .addUse(RHS)
+ .addUse(LHS)
+ .setMIFlags(Flags);
+
+ MI.eraseFromParent();
+ return true;
+}
+
+// Enable or disable FP32 denorm mode. When 'Enable' is true, emit instructions
+// to enable denorm mode. When 'Enable' is false, disable denorm mode.
+static void toggleSPDenormMode(bool Enable,
+ MachineIRBuilder &B,
+ const GCNSubtarget &ST,
+ AMDGPU::SIModeRegisterDefaults Mode) {
+ // Set SP denorm mode to this value.
+ unsigned SPDenormMode =
+ Enable ? FP_DENORM_FLUSH_NONE : FP_DENORM_FLUSH_IN_FLUSH_OUT;
+
+ if (ST.hasDenormModeInst()) {
+ // Preserve default FP64FP16 denorm mode while updating FP32 mode.
+ unsigned DPDenormModeDefault = Mode.FP64FP16Denormals
+ ? FP_DENORM_FLUSH_NONE
+ : FP_DENORM_FLUSH_IN_FLUSH_OUT;
+
+ unsigned NewDenormModeValue = SPDenormMode | (DPDenormModeDefault << 2);
+ B.buildInstr(AMDGPU::S_DENORM_MODE)
+ .addImm(NewDenormModeValue);
+
+ } else {
+ // Select FP32 bit field in mode register.
+ unsigned SPDenormModeBitField = AMDGPU::Hwreg::ID_MODE |
+ (4 << AMDGPU::Hwreg::OFFSET_SHIFT_) |
+ (1 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_);
+
+ B.buildInstr(AMDGPU::S_SETREG_IMM32_B32)
+ .addImm(SPDenormMode)
+ .addImm(SPDenormModeBitField);
+ }
+}
+
+bool AMDGPULegalizerInfo::legalizeFDIV32(MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const {
+ B.setInstr(MI);
+ Register Res = MI.getOperand(0).getReg();
+ Register LHS = MI.getOperand(1).getReg();
+ Register RHS = MI.getOperand(2).getReg();
+ const SIMachineFunctionInfo *MFI = B.getMF().getInfo<SIMachineFunctionInfo>();
+ AMDGPU::SIModeRegisterDefaults Mode = MFI->getMode();
+
+ uint16_t Flags = MI.getFlags();
+
+ LLT S32 = LLT::scalar(32);
+ LLT S1 = LLT::scalar(1);
+
+ auto One = B.buildFConstant(S32, 1.0f);
+
+ auto DenominatorScaled =
+ B.buildIntrinsic(Intrinsic::amdgcn_div_scale, {S32, S1}, false)
+ .addUse(RHS)
+ .addUse(LHS)
+ .addImm(1)
+ .setMIFlags(Flags);
+ auto NumeratorScaled =
+ B.buildIntrinsic(Intrinsic::amdgcn_div_scale, {S32, S1}, false)
+ .addUse(LHS)
+ .addUse(RHS)
+ .addImm(0)
+ .setMIFlags(Flags);
+
+ auto ApproxRcp = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {S32}, false)
+ .addUse(DenominatorScaled.getReg(0))
+ .setMIFlags(Flags);
+ auto NegDivScale0 = B.buildFNeg(S32, DenominatorScaled, Flags);
+
+ // FIXME: Doesn't correctly model the FP mode switch, and the FP operations
+ // aren't modeled as reading it.
+ if (!Mode.FP32Denormals)
+ toggleSPDenormMode(true, B, ST, Mode);
+
+ auto Fma0 = B.buildFMA(S32, NegDivScale0, ApproxRcp, One, Flags);
+ auto Fma1 = B.buildFMA(S32, Fma0, ApproxRcp, ApproxRcp, Flags);
+ auto Mul = B.buildFMul(S32, NumeratorScaled, Fma1, Flags);
+ auto Fma2 = B.buildFMA(S32, NegDivScale0, Mul, NumeratorScaled, Flags);
+ auto Fma3 = B.buildFMA(S32, Fma2, Fma1, Mul, Flags);
+ auto Fma4 = B.buildFMA(S32, NegDivScale0, Fma3, NumeratorScaled, Flags);
+
+ if (!Mode.FP32Denormals)
+ toggleSPDenormMode(false, B, ST, Mode);
+
+ auto Fmas = B.buildIntrinsic(Intrinsic::amdgcn_div_fmas, {S32}, false)
+ .addUse(Fma4.getReg(0))
+ .addUse(Fma1.getReg(0))
+ .addUse(Fma3.getReg(0))
+ .addUse(NumeratorScaled.getReg(1))
+ .setMIFlags(Flags);
+
+ B.buildIntrinsic(Intrinsic::amdgcn_div_fixup, Res, false)
+ .addUse(Fmas.getReg(0))
+ .addUse(RHS)
+ .addUse(LHS)
+ .setMIFlags(Flags);
+
+ MI.eraseFromParent();
+ return true;
+}
+
+bool AMDGPULegalizerInfo::legalizeFDIV64(MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const {
+ B.setInstr(MI);
+ Register Res = MI.getOperand(0).getReg();
+ Register LHS = MI.getOperand(1).getReg();
+ Register RHS = MI.getOperand(2).getReg();
+
+ uint16_t Flags = MI.getFlags();
+
+ LLT S64 = LLT::scalar(64);
+ LLT S1 = LLT::scalar(1);
+
+ auto One = B.buildFConstant(S64, 1.0);
+
+ auto DivScale0 = B.buildIntrinsic(Intrinsic::amdgcn_div_scale, {S64, S1}, false)
+ .addUse(LHS)
+ .addUse(RHS)
+ .addImm(1)
+ .setMIFlags(Flags);
+
+ auto NegDivScale0 = B.buildFNeg(S64, DivScale0.getReg(0), Flags);
+
+ auto Rcp = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {S64}, false)
+ .addUse(DivScale0.getReg(0))
+ .setMIFlags(Flags);
+
+ auto Fma0 = B.buildFMA(S64, NegDivScale0, Rcp, One, Flags);
+ auto Fma1 = B.buildFMA(S64, Rcp, Fma0, Rcp, Flags);
+ auto Fma2 = B.buildFMA(S64, NegDivScale0, Fma1, One, Flags);
+
+ auto DivScale1 = B.buildIntrinsic(Intrinsic::amdgcn_div_scale, {S64, S1}, false)
+ .addUse(LHS)
+ .addUse(RHS)
+ .addImm(0)
+ .setMIFlags(Flags);
+
+ auto Fma3 = B.buildFMA(S64, Fma1, Fma2, Fma1, Flags);
+ auto Mul = B.buildMul(S64, DivScale1.getReg(0), Fma3, Flags);
+ auto Fma4 = B.buildFMA(S64, NegDivScale0, Mul, DivScale1.getReg(0), Flags);
+
+ Register Scale;
+ if (!ST.hasUsableDivScaleConditionOutput()) {
+ // Workaround a hardware bug on SI where the condition output from div_scale
+ // is not usable.
+
+ Scale = MRI.createGenericVirtualRegister(S1);
+
+ LLT S32 = LLT::scalar(32);
+
+ auto NumUnmerge = B.buildUnmerge(S32, LHS);
+ auto DenUnmerge = B.buildUnmerge(S32, RHS);
+ auto Scale0Unmerge = B.buildUnmerge(S32, DivScale0);
+ auto Scale1Unmerge = B.buildUnmerge(S32, DivScale1);
+
+ auto CmpNum = B.buildICmp(ICmpInst::ICMP_EQ, S1, NumUnmerge.getReg(1),
+ Scale1Unmerge.getReg(1));
+ auto CmpDen = B.buildICmp(ICmpInst::ICMP_EQ, S1, DenUnmerge.getReg(1),
+ Scale0Unmerge.getReg(1));
+ B.buildXor(Scale, CmpNum, CmpDen);
+ } else {
+ Scale = DivScale1.getReg(1);
+ }
+
+ auto Fmas = B.buildIntrinsic(Intrinsic::amdgcn_div_fmas, {S64}, false)
+ .addUse(Fma4.getReg(0))
+ .addUse(Fma3.getReg(0))
+ .addUse(Mul.getReg(0))
+ .addUse(Scale)
+ .setMIFlags(Flags);
+
+ B.buildIntrinsic(Intrinsic::amdgcn_div_fixup, makeArrayRef(Res), false)
+ .addUse(Fmas.getReg(0))
+ .addUse(RHS)
+ .addUse(LHS)
+ .setMIFlags(Flags);
+
+ MI.eraseFromParent();
+ return true;
+}
+
bool AMDGPULegalizerInfo::legalizeFDIVFastIntrin(MachineInstr &MI,
MachineRegisterInfo &MRI,
MachineIRBuilder &B) const {
@@ -1955,7 +2272,7 @@ bool AMDGPULegalizerInfo::legalizeImplicitArgPtr(MachineInstr &MI,
if (!loadInputValue(KernargPtrReg, B, Arg))
return false;
- B.buildGEP(DstReg, KernargPtrReg, B.buildConstant(IdxTy, Offset).getReg(0));
+ B.buildPtrAdd(DstReg, KernargPtrReg, B.buildConstant(IdxTy, Offset).getReg(0));
MI.eraseFromParent();
return true;
}
@@ -2032,19 +2349,38 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(MachineInstr &MI,
MachineRegisterInfo &MRI,
MachineIRBuilder &B) const {
// Replace the use G_BRCOND with the exec manipulate and branch pseudos.
- switch (MI.getIntrinsicID()) {
- case Intrinsic::amdgcn_if: {
- if (MachineInstr *BrCond = verifyCFIntrinsic(MI, MRI)) {
+ auto IntrID = MI.getIntrinsicID();
+ switch (IntrID) {
+ case Intrinsic::amdgcn_if:
+ case Intrinsic::amdgcn_else: {
+ MachineInstr *Br = nullptr;
+ if (MachineInstr *BrCond = verifyCFIntrinsic(MI, MRI, Br)) {
const SIRegisterInfo *TRI
= static_cast<const SIRegisterInfo *>(MRI.getTargetRegisterInfo());
B.setInstr(*BrCond);
Register Def = MI.getOperand(1).getReg();
Register Use = MI.getOperand(3).getReg();
- B.buildInstr(AMDGPU::SI_IF)
- .addDef(Def)
- .addUse(Use)
- .addMBB(BrCond->getOperand(1).getMBB());
+
+ MachineBasicBlock *BrTarget = BrCond->getOperand(1).getMBB();
+ if (Br)
+ BrTarget = Br->getOperand(0).getMBB();
+
+ if (IntrID == Intrinsic::amdgcn_if) {
+ B.buildInstr(AMDGPU::SI_IF)
+ .addDef(Def)
+ .addUse(Use)
+ .addMBB(BrTarget);
+ } else {
+ B.buildInstr(AMDGPU::SI_ELSE)
+ .addDef(Def)
+ .addUse(Use)
+ .addMBB(BrTarget)
+ .addImm(0);
+ }
+
+ if (Br)
+ Br->getOperand(0).setMBB(BrCond->getOperand(1).getMBB());
MRI.setRegClass(Def, TRI->getWaveMaskRegClass());
MRI.setRegClass(Use, TRI->getWaveMaskRegClass());
@@ -2056,11 +2392,14 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(MachineInstr &MI,
return false;
}
case Intrinsic::amdgcn_loop: {
- if (MachineInstr *BrCond = verifyCFIntrinsic(MI, MRI)) {
+ MachineInstr *Br = nullptr;
+ if (MachineInstr *BrCond = verifyCFIntrinsic(MI, MRI, Br)) {
const SIRegisterInfo *TRI
= static_cast<const SIRegisterInfo *>(MRI.getTargetRegisterInfo());
B.setInstr(*BrCond);
+
+ // FIXME: Need to adjust branch targets based on unconditional branch.
Register Reg = MI.getOperand(2).getReg();
B.buildInstr(AMDGPU::SI_LOOP)
.addUse(Reg)
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
index d0fba23a8686..4b1405a92787 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
@@ -72,6 +72,9 @@ public:
bool legalizeFMad(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const;
+ bool legalizeAtomicCmpXChg(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const;
+
Register getLiveInRegister(MachineRegisterInfo &MRI,
Register Reg, LLT Ty) const;
@@ -83,6 +86,12 @@ public:
bool legalizeFDIV(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const;
+ bool legalizeFDIV16(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const;
+ bool legalizeFDIV32(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const;
+ bool legalizeFDIV64(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const;
bool legalizeFastUnsafeFDIV(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const;
bool legalizeFDIVFastIntrin(MachineInstr &MI, MachineRegisterInfo &MRI,
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
index 2c94e0046651..0c56927dea02 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
@@ -11,31 +11,32 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "amdgpu-simplifylib"
-
#include "AMDGPU.h"
#include "AMDGPULibFunc.h"
#include "AMDGPUSubtarget.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSet.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/Loads.h"
-#include "llvm/ADT/StringSet.h"
-#include "llvm/ADT/StringRef.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/Function.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/ValueSymbolTable.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
-#include <vector>
#include <cmath>
+#include <vector>
+
+#define DEBUG_TYPE "amdgpu-simplifylib"
using namespace llvm;
@@ -1167,8 +1168,6 @@ bool AMDGPULibCalls::fold_rootn(CallInst *CI, IRBuilder<> &B,
return true;
}
if (ci_opr1 == 2) { // rootn(x, 2) = sqrt(x)
- std::vector<const Type*> ParamsTys;
- ParamsTys.push_back(opr0->getType());
Module *M = CI->getModule();
if (FunctionCallee FPExpr =
getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_SQRT, FInfo))) {
@@ -1194,8 +1193,6 @@ bool AMDGPULibCalls::fold_rootn(CallInst *CI, IRBuilder<> &B,
replaceCall(nval);
return true;
} else if (ci_opr1 == -2) { // rootn(x, -2) = rsqrt(x)
- std::vector<const Type*> ParamsTys;
- ParamsTys.push_back(opr0->getType());
Module *M = CI->getModule();
if (FunctionCallee FPExpr =
getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_RSQRT, FInfo))) {
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
index 3760aed87a43..ce7286dabcc8 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
@@ -344,7 +344,7 @@ void AMDGPUAsmPrinter::EmitInstruction(const MachineInstr *MI) {
AMDGPUInstPrinter InstPrinter(*TM.getMCAsmInfo(), *STI.getInstrInfo(),
*STI.getRegisterInfo());
- InstPrinter.printInst(&TmpInst, DisasmStream, StringRef(), STI);
+ InstPrinter.printInst(&TmpInst, 0, StringRef(), STI, DisasmStream);
// Disassemble instruction/operands to hex representation.
SmallVector<MCFixup, 4> Fixups;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp
index ba72f71f4322..f61af5a27943 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp
@@ -32,6 +32,7 @@
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/DebugLoc.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
@@ -861,7 +862,7 @@ void LinearizedRegion::storeLiveOuts(RegionMRT *Region,
void LinearizedRegion::print(raw_ostream &OS, const TargetRegisterInfo *TRI) {
OS << "Linearized Region {";
bool IsFirst = true;
- for (const auto &MBB : MBBs) {
+ for (auto MBB : MBBs) {
if (IsFirst) {
IsFirst = false;
} else {
@@ -995,7 +996,7 @@ MachineBasicBlock *LinearizedRegion::getExit() { return Exit; }
void LinearizedRegion::addMBB(MachineBasicBlock *MBB) { MBBs.insert(MBB); }
void LinearizedRegion::addMBBs(LinearizedRegion *InnerRegion) {
- for (const auto &MBB : InnerRegion->MBBs) {
+ for (auto MBB : InnerRegion->MBBs) {
addMBB(MBB);
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
index 89ca702f577d..940ddff85d73 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
@@ -18,6 +18,7 @@ AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF) :
LocalMemoryObjects(),
ExplicitKernArgSize(0),
LDSSize(0),
+ Mode(MF.getFunction(), MF.getSubtarget<GCNSubtarget>()),
IsEntryFunction(AMDGPU::isEntryFunctionCC(MF.getFunction().getCallingConv())),
NoSignedZerosFPMath(MF.getTarget().Options.NoSignedZerosFPMath),
MemoryBound(false),
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h
index 9818ab1ef148..1933e41c66f3 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h
@@ -11,6 +11,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "Utils/AMDGPUBaseInfo.h"
namespace llvm {
@@ -28,6 +29,9 @@ protected:
/// Number of bytes in the LDS that are being used.
unsigned LDSSize;
+ // State of MODE register, assumed FP mode.
+ AMDGPU::SIModeRegisterDefaults Mode;
+
// Kernels + shaders. i.e. functions called by the driver and not called
// by other functions.
bool IsEntryFunction;
@@ -53,6 +57,10 @@ public:
return LDSSize;
}
+ AMDGPU::SIModeRegisterDefaults getMode() const {
+ return Mode;
+ }
+
bool isEntryFunction() const {
return IsEntryFunction;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp
index 5250bf455d71..511de96b5f7c 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp
@@ -33,6 +33,7 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -581,6 +582,15 @@ bool AMDGPUPrintfRuntimeBinding::runOnModule(Module &M) {
if (Printfs.empty())
return false;
+ if (auto HostcallFunction = M.getFunction("__ockl_hostcall_internal")) {
+ for (auto &U : HostcallFunction->uses()) {
+ if (auto *CI = dyn_cast<CallInst>(U.getUser())) {
+ M.getContext().emitError(
+ CI, "Cannot use both printf and hostcall in the same module");
+ }
+ }
+ }
+
TD = &M.getDataLayout();
auto DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
DT = DTWP ? &DTWP->getDomTree() : nullptr;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index 3e9dcca114a3..14958a180ce3 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -37,6 +37,8 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsAMDGPU.h"
+#include "llvm/IR/IntrinsicsR600.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
@@ -268,21 +270,21 @@ AMDGPUPromoteAlloca::getLocalSizeYZ(IRBuilder<> &Builder) {
Value *AMDGPUPromoteAlloca::getWorkitemID(IRBuilder<> &Builder, unsigned N) {
const AMDGPUSubtarget &ST =
AMDGPUSubtarget::get(*TM, *Builder.GetInsertBlock()->getParent());
- Intrinsic::ID IntrID = Intrinsic::ID::not_intrinsic;
+ Intrinsic::ID IntrID = Intrinsic::not_intrinsic;
switch (N) {
case 0:
- IntrID = IsAMDGCN ? Intrinsic::amdgcn_workitem_id_x
- : Intrinsic::r600_read_tidig_x;
+ IntrID = IsAMDGCN ? (Intrinsic::ID)Intrinsic::amdgcn_workitem_id_x
+ : (Intrinsic::ID)Intrinsic::r600_read_tidig_x;
break;
case 1:
- IntrID = IsAMDGCN ? Intrinsic::amdgcn_workitem_id_y
- : Intrinsic::r600_read_tidig_y;
+ IntrID = IsAMDGCN ? (Intrinsic::ID)Intrinsic::amdgcn_workitem_id_y
+ : (Intrinsic::ID)Intrinsic::r600_read_tidig_y;
break;
case 2:
- IntrID = IsAMDGCN ? Intrinsic::amdgcn_workitem_id_z
- : Intrinsic::r600_read_tidig_z;
+ IntrID = IsAMDGCN ? (Intrinsic::ID)Intrinsic::amdgcn_workitem_id_z
+ : (Intrinsic::ID)Intrinsic::r600_read_tidig_z;
break;
default:
llvm_unreachable("invalid dimension");
@@ -648,7 +650,7 @@ bool AMDGPUPromoteAlloca::hasSufficientLocalMem(const Function &F) {
// Check how much local memory is being used by global objects
CurrentLocalMemUsage = 0;
for (GlobalVariable &GV : Mod->globals()) {
- if (GV.getType()->getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS)
+ if (GV.getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS)
continue;
for (const User *U : GV.users()) {
@@ -882,25 +884,25 @@ bool AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I, bool SufficientLDS) {
continue;
case Intrinsic::memcpy: {
MemCpyInst *MemCpy = cast<MemCpyInst>(Intr);
- Builder.CreateMemCpy(MemCpy->getRawDest(), MemCpy->getDestAlignment(),
- MemCpy->getRawSource(), MemCpy->getSourceAlignment(),
+ Builder.CreateMemCpy(MemCpy->getRawDest(), MemCpy->getDestAlign(),
+ MemCpy->getRawSource(), MemCpy->getSourceAlign(),
MemCpy->getLength(), MemCpy->isVolatile());
Intr->eraseFromParent();
continue;
}
case Intrinsic::memmove: {
MemMoveInst *MemMove = cast<MemMoveInst>(Intr);
- Builder.CreateMemMove(MemMove->getRawDest(), MemMove->getDestAlignment(),
- MemMove->getRawSource(), MemMove->getSourceAlignment(),
+ Builder.CreateMemMove(MemMove->getRawDest(), MemMove->getDestAlign(),
+ MemMove->getRawSource(), MemMove->getSourceAlign(),
MemMove->getLength(), MemMove->isVolatile());
Intr->eraseFromParent();
continue;
}
case Intrinsic::memset: {
MemSetInst *MemSet = cast<MemSetInst>(Intr);
- Builder.CreateMemSet(MemSet->getRawDest(), MemSet->getValue(),
- MemSet->getLength(), MemSet->getDestAlignment(),
- MemSet->isVolatile());
+ Builder.CreateMemSet(
+ MemSet->getRawDest(), MemSet->getValue(), MemSet->getLength(),
+ MaybeAlign(MemSet->getDestAlignment()), MemSet->isVolatile());
Intr->eraseFromParent();
continue;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index 4d78188b3dc3..1bb01dc8fa11 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -17,6 +17,7 @@
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIMachineFunctionInfo.h"
#include "SIRegisterInfo.h"
+#include "llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h"
#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
@@ -40,13 +41,15 @@ namespace {
// Observer to apply a register bank to new registers created by LegalizerHelper.
class ApplyRegBankMapping final : public GISelChangeObserver {
private:
+ const AMDGPURegisterBankInfo &RBI;
MachineRegisterInfo &MRI;
const RegisterBank *NewBank;
SmallVector<MachineInstr *, 4> NewInsts;
public:
- ApplyRegBankMapping(MachineRegisterInfo &MRI_, const RegisterBank *RB)
- : MRI(MRI_), NewBank(RB) {}
+ ApplyRegBankMapping(const AMDGPURegisterBankInfo &RBI_,
+ MachineRegisterInfo &MRI_, const RegisterBank *RB)
+ : RBI(RBI_), MRI(MRI_), NewBank(RB) {}
~ApplyRegBankMapping() {
for (MachineInstr *MI : NewInsts)
@@ -55,6 +58,45 @@ public:
/// Set any registers that don't have a set register class or bank to SALU.
void applyBank(MachineInstr &MI) {
+ const unsigned Opc = MI.getOpcode();
+ if (Opc == AMDGPU::G_ANYEXT || Opc == AMDGPU::G_ZEXT ||
+ Opc == AMDGPU::G_SEXT) {
+ // LegalizerHelper wants to use the basic legalization artifacts when
+ // widening etc. We don't handle selection with vcc in artifact sources,
+ // so we need to use a sslect instead to handle these properly.
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
+ const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, MRI, *RBI.TRI);
+ if (SrcBank == &AMDGPU::VCCRegBank) {
+ const LLT S32 = LLT::scalar(32);
+ assert(MRI.getType(SrcReg) == LLT::scalar(1));
+ assert(MRI.getType(DstReg) == S32);
+ assert(NewBank == &AMDGPU::VGPRRegBank);
+
+ // Replace the extension with a select, which really uses the boolean
+ // source.
+ MachineIRBuilder B(MI);
+ auto True = B.buildConstant(S32, Opc == AMDGPU::G_SEXT ? -1 : 1);
+ auto False = B.buildConstant(S32, 0);
+ B.buildSelect(DstReg, SrcReg, True, False);
+ MRI.setRegBank(True.getReg(0), *NewBank);
+ MRI.setRegBank(False.getReg(0), *NewBank);
+ MI.eraseFromParent();
+ }
+
+ assert(!MRI.getRegClassOrRegBank(DstReg));
+ MRI.setRegBank(DstReg, *NewBank);
+ return;
+ }
+
+#ifndef NDEBUG
+ if (Opc == AMDGPU::G_TRUNC) {
+ Register DstReg = MI.getOperand(0).getReg();
+ const RegisterBank *DstBank = RBI.getRegBank(DstReg, MRI, *RBI.TRI);
+ assert(DstBank != &AMDGPU::VCCRegBank);
+ }
+#endif
+
for (MachineOperand &Op : MI.operands()) {
if (!Op.isReg())
continue;
@@ -64,10 +106,14 @@ public:
continue;
const RegisterBank *RB = NewBank;
- // FIXME: This might not be enough to detect when SCC should be used.
- if (MRI.getType(Reg) == LLT::scalar(1))
- RB = (NewBank == &AMDGPU::SGPRRegBank ?
- &AMDGPU::SCCRegBank : &AMDGPU::VCCRegBank);
+ if (MRI.getType(Reg) == LLT::scalar(1)) {
+ assert(NewBank == &AMDGPU::VGPRRegBank &&
+ "s1 operands should only be used for vector bools");
+ assert((MI.getOpcode() != AMDGPU::G_TRUNC &&
+ MI.getOpcode() != AMDGPU::G_ANYEXT) &&
+ "not expecting legalization artifacts here");
+ RB = &AMDGPU::VCCRegBank;
+ }
MRI.setRegBank(Reg, *RB);
}
@@ -98,14 +144,14 @@ AMDGPURegisterBankInfo::AMDGPURegisterBankInfo(const GCNSubtarget &ST)
AlreadyInit = true;
- const RegisterBank &RBSGPR = getRegBank(AMDGPU::SGPRRegBankID);
- (void)RBSGPR;
- assert(&RBSGPR == &AMDGPU::SGPRRegBank);
-
- const RegisterBank &RBVGPR = getRegBank(AMDGPU::VGPRRegBankID);
- (void)RBVGPR;
- assert(&RBVGPR == &AMDGPU::VGPRRegBank);
+ assert(&getRegBank(AMDGPU::SGPRRegBankID) == &AMDGPU::SGPRRegBank &&
+ &getRegBank(AMDGPU::VGPRRegBankID) == &AMDGPU::VGPRRegBank &&
+ &getRegBank(AMDGPU::AGPRRegBankID) == &AMDGPU::AGPRRegBank);
+}
+static bool isVectorRegisterBank(const RegisterBank &Bank) {
+ unsigned BankID = Bank.getID();
+ return BankID == AMDGPU::VGPRRegBankID || BankID == AMDGPU::AGPRRegBankID;
}
unsigned AMDGPURegisterBankInfo::copyCost(const RegisterBank &Dst,
@@ -113,7 +159,7 @@ unsigned AMDGPURegisterBankInfo::copyCost(const RegisterBank &Dst,
unsigned Size) const {
// TODO: Should there be a UniformVGPRRegBank which can use readfirstlane?
if (Dst.getID() == AMDGPU::SGPRRegBankID &&
- Src.getID() == AMDGPU::VGPRRegBankID) {
+ isVectorRegisterBank(Src)) {
return std::numeric_limits<unsigned>::max();
}
@@ -125,17 +171,20 @@ unsigned AMDGPURegisterBankInfo::copyCost(const RegisterBank &Dst,
// have been a truncate from an arbitrary value, in which case a copy (lowered
// as a compare with 0) needs to be inserted.
if (Size == 1 &&
- (Dst.getID() == AMDGPU::SCCRegBankID ||
- Dst.getID() == AMDGPU::SGPRRegBankID) &&
- (Src.getID() == AMDGPU::SGPRRegBankID ||
- Src.getID() == AMDGPU::VGPRRegBankID ||
+ (Dst.getID() == AMDGPU::SGPRRegBankID) &&
+ (isVectorRegisterBank(Src) ||
+ Src.getID() == AMDGPU::SGPRRegBankID ||
Src.getID() == AMDGPU::VCCRegBankID))
return std::numeric_limits<unsigned>::max();
- if (Dst.getID() == AMDGPU::SCCRegBankID &&
- Src.getID() == AMDGPU::VCCRegBankID)
+ if (Src.getID() == AMDGPU::VCCRegBankID)
return std::numeric_limits<unsigned>::max();
+ // There is no direct copy between AGPRs.
+ if (Dst.getID() == AMDGPU::AGPRRegBankID &&
+ Src.getID() == AMDGPU::AGPRRegBankID)
+ return 4;
+
return RegisterBankInfo::copyCost(Dst, Src, Size);
}
@@ -164,12 +213,25 @@ unsigned AMDGPURegisterBankInfo::getBreakDownCost(
return 1;
}
-const RegisterBank &AMDGPURegisterBankInfo::getRegBankFromRegClass(
- const TargetRegisterClass &RC) const {
+const RegisterBank &
+AMDGPURegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC,
+ LLT Ty) const {
if (&RC == &AMDGPU::SReg_1RegClass)
return AMDGPU::VCCRegBank;
- return TRI->isSGPRClass(&RC) ? AMDGPU::SGPRRegBank : AMDGPU::VGPRRegBank;
+ // We promote real scalar booleans to SReg_32. Any SGPR using s1 is really a
+ // VCC-like use.
+ if (TRI->isSGPRClass(&RC)) {
+ // FIXME: This probably came from a copy from a physical register, which
+ // should be inferrrable from the copied to-type. We don't have many boolean
+ // physical register constraints so just assume a normal SGPR for now.
+ if (!Ty.isValid())
+ return AMDGPU::SGPRRegBank;
+
+ return Ty == LLT::scalar(1) ? AMDGPU::VCCRegBank : AMDGPU::SGPRRegBank;
+ }
+
+ return TRI->isAGPRClass(&RC) ? AMDGPU::AGPRRegBank : AMDGPU::VGPRRegBank;
}
template <unsigned NumOps>
@@ -323,15 +385,32 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappingsIntrinsicWSideEffects(
}
}
+static bool memOpHasNoClobbered(const MachineMemOperand *MMO) {
+ const Instruction *I = dyn_cast_or_null<Instruction>(MMO->getValue());
+ return I && I->getMetadata("amdgpu.noclobber");
+}
+
// FIXME: Returns uniform if there's no source value information. This is
// probably wrong.
-static bool isInstrUniformNonExtLoadAlign4(const MachineInstr &MI) {
+static bool isScalarLoadLegal(const MachineInstr &MI) {
if (!MI.hasOneMemOperand())
return false;
const MachineMemOperand *MMO = *MI.memoperands_begin();
+ const unsigned AS = MMO->getAddrSpace();
+ const bool IsConst = AS == AMDGPUAS::CONSTANT_ADDRESS ||
+ AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT;
+
+ // There are no extending SMRD/SMEM loads, and they require 4-byte alignment.
return MMO->getSize() >= 4 && MMO->getAlignment() >= 4 &&
- AMDGPUInstrInfo::isUniformMMO(MMO);
+ // Can't do a scalar atomic load.
+ !MMO->isAtomic() &&
+ // Don't use scalar loads for volatile accesses to non-constant address
+ // spaces.
+ (IsConst || !MMO->isVolatile()) &&
+ // Memory must be known constant, or not written before this load.
+ (IsConst || MMO->isInvariant() || memOpHasNoClobbered(MMO)) &&
+ AMDGPUInstrInfo::isUniformMMO(MMO);
}
RegisterBankInfo::InstructionMappings
@@ -347,11 +426,10 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappings(
case TargetOpcode::G_CONSTANT: {
unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
if (Size == 1) {
- static const OpRegBankEntry<1> Table[4] = {
+ static const OpRegBankEntry<1> Table[3] = {
{ { AMDGPU::VGPRRegBankID }, 1 },
{ { AMDGPU::SGPRRegBankID }, 1 },
- { { AMDGPU::VCCRegBankID }, 1 },
- { { AMDGPU::SCCRegBankID }, 1 }
+ { { AMDGPU::VCCRegBankID }, 1 }
};
return addMappingFromTable<1>(MI, MRI, {{ 0 }}, Table);
@@ -378,25 +456,17 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappings(
// s_{and|or|xor}_b32 set scc when the result of the 32-bit op is not 0.
const InstructionMapping &SCCMapping = getInstructionMapping(
1, 1, getOperandsMapping(
- {AMDGPU::getValueMapping(AMDGPU::SCCRegBankID, Size),
- AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
- AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size)}),
+ {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32),
+ AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32),
+ AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32)}),
3); // Num Operands
AltMappings.push_back(&SCCMapping);
- const InstructionMapping &SGPRMapping = getInstructionMapping(
- 1, 1, getOperandsMapping(
- {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
- AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
- AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size)}),
- 3); // Num Operands
- AltMappings.push_back(&SGPRMapping);
-
const InstructionMapping &VCCMapping0 = getInstructionMapping(
- 2, 10, getOperandsMapping(
+ 2, 1, getOperandsMapping(
{AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, Size),
- AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, Size),
- AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, Size)}),
+ AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, Size),
+ AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, Size)}),
3); // Num Operands
AltMappings.push_back(&VCCMapping0);
return AltMappings;
@@ -448,9 +518,10 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappings(
unsigned PtrSize = PtrTy.getSizeInBits();
unsigned AS = PtrTy.getAddressSpace();
LLT LoadTy = MRI.getType(MI.getOperand(0).getReg());
+
if ((AS != AMDGPUAS::LOCAL_ADDRESS && AS != AMDGPUAS::REGION_ADDRESS &&
AS != AMDGPUAS::PRIVATE_ADDRESS) &&
- isInstrUniformNonExtLoadAlign4(MI)) {
+ isScalarLoadLegal(MI)) {
const InstructionMapping &SSMapping = getInstructionMapping(
1, 1, getOperandsMapping(
{AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
@@ -476,9 +547,10 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappings(
}
case TargetOpcode::G_ICMP: {
+ // TODO: Should report 32-bit for scalar output type.
unsigned Size = getSizeInBits(MI.getOperand(2).getReg(), MRI, *TRI);
const InstructionMapping &SSMapping = getInstructionMapping(1, 1,
- getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::SCCRegBankID, 1),
+ getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 1),
nullptr, // Predicate operand.
AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size)}),
@@ -515,7 +587,7 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappings(
unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
const InstructionMapping &SSMapping = getInstructionMapping(1, 1,
getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
- AMDGPU::getValueMapping(AMDGPU::SCCRegBankID, 1),
+ AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 1),
AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size)}),
4); // Num Operands
@@ -556,10 +628,10 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappings(
const InstructionMapping &SSMapping = getInstructionMapping(1, 1,
getOperandsMapping(
{AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
- AMDGPU::getValueMapping(AMDGPU::SCCRegBankID, 1),
+ AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 1),
AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
- AMDGPU::getValueMapping(AMDGPU::SCCRegBankID, 1)}),
+ AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 1)}),
5); // Num Operands
AltMappings.push_back(&SSMapping);
@@ -576,9 +648,10 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappings(
case AMDGPU::G_BRCOND: {
assert(MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() == 1);
+ // TODO: Change type to 32 for scalar
const InstructionMapping &SMapping = getInstructionMapping(
1, 1, getOperandsMapping(
- {AMDGPU::getValueMapping(AMDGPU::SCCRegBankID, 1), nullptr}),
+ {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 1), nullptr}),
2); // Num Operands
AltMappings.push_back(&SMapping);
@@ -910,7 +983,7 @@ bool AMDGPURegisterBankInfo::executeInWaterfallLoop(
Op.setReg(Merge.getReg(0));
}
- MRI.setRegBank(Op.getReg(), getRegBank(AMDGPU::SGPRRegBankID));
+ MRI.setRegBank(Op.getReg(), AMDGPU::SGPRRegBank);
}
}
}
@@ -948,8 +1021,9 @@ bool AMDGPURegisterBankInfo::executeInWaterfallLoop(
.addDef(ExecReg)
.addReg(SaveExecReg);
- // Restore the insert point before the original instruction.
- B.setInsertPt(MBB, MBB.end());
+ // Set the insert point after the original instruction, so any new
+ // instructions will be in the remainder.
+ B.setInsertPt(*RemainderBB, RemainderBB->begin());
return true;
}
@@ -1004,11 +1078,13 @@ void AMDGPURegisterBankInfo::constrainOpWithReadfirstlane(
return;
MachineIRBuilder B(MI);
- Register SGPR = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+ Register SGPR = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
B.buildInstr(AMDGPU::V_READFIRSTLANE_B32)
.addDef(SGPR)
.addReg(Reg);
+ MRI.setType(SGPR, MRI.getType(Reg));
+
const TargetRegisterClass *Constrained =
constrainGenericRegister(Reg, AMDGPU::VGPR_32RegClass, MRI);
(void)Constrained;
@@ -1050,11 +1126,11 @@ bool AMDGPURegisterBankInfo::applyMappingWideLoad(MachineInstr &MI,
// If the pointer is an SGPR, we have nothing to do.
if (SrcRegs.empty()) {
- Register PtrReg = MI.getOperand(1).getReg();
- const RegisterBank *PtrBank = getRegBank(PtrReg, MRI, *TRI);
+ const RegisterBank *PtrBank =
+ OpdMapper.getInstrMapping().getOperandMapping(1).BreakDown[0].RegBank;
if (PtrBank == &AMDGPU::SGPRRegBank)
return false;
- SrcRegs.push_back(PtrReg);
+ SrcRegs.push_back(MI.getOperand(1).getReg());
}
assert(LoadSize % MaxNonSmrdLoadSize == 0);
@@ -1075,7 +1151,7 @@ bool AMDGPURegisterBankInfo::applyMappingWideLoad(MachineInstr &MI,
unsigned SplitElts =
MaxNonSmrdLoadSize / LoadTy.getScalarType().getSizeInBits();
const LLT LoadSplitTy = LLT::vector(SplitElts, LoadTy.getScalarType());
- ApplyRegBankMapping O(MRI, &AMDGPU::VGPRRegBank);
+ ApplyRegBankMapping O(*this, MRI, &AMDGPU::VGPRRegBank);
GISelObserverWrapper Observer(&O);
B.setChangeObserver(Observer);
LegalizerHelper Helper(B.getMF(), Observer, B);
@@ -1099,11 +1175,11 @@ bool AMDGPURegisterBankInfo::applyMappingWideLoad(MachineInstr &MI,
for (unsigned DefIdx = 0, e = DefRegs.size(); DefIdx != e; ++DefIdx) {
Register IdxReg = MRI.createGenericVirtualRegister(LLT::scalar(32));
B.buildConstant(IdxReg, DefIdx);
- MRI.setRegBank(IdxReg, getRegBank(AMDGPU::VGPRRegBankID));
+ MRI.setRegBank(IdxReg, AMDGPU::VGPRRegBank);
B.buildExtractVectorElement(DefRegs[DefIdx], TmpReg, IdxReg);
}
- MRI.setRegBank(DstReg, getRegBank(AMDGPU::VGPRRegBankID));
+ MRI.setRegBank(DstReg, AMDGPU::VGPRRegBank);
return true;
}
@@ -1134,6 +1210,39 @@ bool AMDGPURegisterBankInfo::applyMappingImage(
return true;
}
+// FIXME: Duplicated from LegalizerHelper
+static CmpInst::Predicate minMaxToCompare(unsigned Opc) {
+ switch (Opc) {
+ case TargetOpcode::G_SMIN:
+ return CmpInst::ICMP_SLT;
+ case TargetOpcode::G_SMAX:
+ return CmpInst::ICMP_SGT;
+ case TargetOpcode::G_UMIN:
+ return CmpInst::ICMP_ULT;
+ case TargetOpcode::G_UMAX:
+ return CmpInst::ICMP_UGT;
+ default:
+ llvm_unreachable("not in integer min/max");
+ }
+}
+
+// FIXME: Duplicated from LegalizerHelper, except changing the boolean type.
+void AMDGPURegisterBankInfo::lowerScalarMinMax(MachineIRBuilder &B,
+ MachineInstr &MI) const {
+ Register Dst = MI.getOperand(0).getReg();
+ Register Src0 = MI.getOperand(1).getReg();
+ Register Src1 = MI.getOperand(2).getReg();
+
+ const CmpInst::Predicate Pred = minMaxToCompare(MI.getOpcode());
+ LLT CmpType = LLT::scalar(32);
+
+ auto Cmp = B.buildICmp(Pred, CmpType, Src0, Src1);
+ B.buildSelect(Dst, Cmp, Src0, Src1);
+
+ B.getMRI()->setRegBank(Cmp.getReg(0), AMDGPU::SGPRRegBank);
+ MI.eraseFromParent();
+}
+
// For cases where only a single copy is inserted for matching register banks.
// Replace the register in the instruction operand
static void substituteSimpleCopyRegs(
@@ -1323,22 +1432,161 @@ AMDGPURegisterBankInfo::selectStoreIntrinsic(MachineIRBuilder &B,
return MIB;
}
+bool AMDGPURegisterBankInfo::buildVCopy(MachineIRBuilder &B, Register DstReg,
+ Register SrcReg) const {
+ MachineRegisterInfo &MRI = *B.getMRI();
+ LLT SrcTy = MRI.getType(SrcReg);
+ if (SrcTy.getSizeInBits() == 32) {
+ // Use a v_mov_b32 here to make the exec dependency explicit.
+ B.buildInstr(AMDGPU::V_MOV_B32_e32)
+ .addDef(DstReg)
+ .addUse(SrcReg);
+ return constrainGenericRegister(DstReg, AMDGPU::VGPR_32RegClass, MRI) &&
+ constrainGenericRegister(SrcReg, AMDGPU::SReg_32RegClass, MRI);
+ }
+
+ Register TmpReg0 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ Register TmpReg1 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+
+ B.buildInstr(AMDGPU::V_MOV_B32_e32)
+ .addDef(TmpReg0)
+ .addUse(SrcReg, 0, AMDGPU::sub0);
+ B.buildInstr(AMDGPU::V_MOV_B32_e32)
+ .addDef(TmpReg1)
+ .addUse(SrcReg, 0, AMDGPU::sub1);
+ B.buildInstr(AMDGPU::REG_SEQUENCE)
+ .addDef(DstReg)
+ .addUse(TmpReg0)
+ .addImm(AMDGPU::sub0)
+ .addUse(TmpReg1)
+ .addImm(AMDGPU::sub1);
+
+ return constrainGenericRegister(SrcReg, AMDGPU::SReg_64RegClass, MRI) &&
+ constrainGenericRegister(DstReg, AMDGPU::VReg_64RegClass, MRI);
+}
+
void AMDGPURegisterBankInfo::applyMappingImpl(
const OperandsMapper &OpdMapper) const {
MachineInstr &MI = OpdMapper.getMI();
unsigned Opc = MI.getOpcode();
MachineRegisterInfo &MRI = OpdMapper.getMRI();
switch (Opc) {
+ case AMDGPU::G_PHI: {
+ Register DstReg = MI.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ if (DstTy != LLT::scalar(1))
+ break;
+
+ const LLT S32 = LLT::scalar(32);
+ const RegisterBank *DstBank =
+ OpdMapper.getInstrMapping().getOperandMapping(0).BreakDown[0].RegBank;
+ if (DstBank == &AMDGPU::VCCRegBank) {
+ applyDefaultMapping(OpdMapper);
+ // The standard handling only considers the result register bank for
+ // phis. For VCC, blindly inserting a copy when the phi is lowered will
+ // produce an invalid copy. We can only copy with some kind of compare to
+ // get a vector boolean result. Insert a regitser bank copy that will be
+ // correctly lowered to a compare.
+ MachineIRBuilder B(*MI.getParent()->getParent());
+
+ for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
+ Register SrcReg = MI.getOperand(I).getReg();
+ const RegisterBank *SrcBank = getRegBank(SrcReg, MRI, *TRI);
+
+ if (SrcBank != &AMDGPU::VCCRegBank) {
+ MachineBasicBlock *SrcMBB = MI.getOperand(I + 1).getMBB();
+ B.setInsertPt(*SrcMBB, SrcMBB->getFirstTerminator());
+
+ auto Copy = B.buildCopy(LLT::scalar(1), SrcReg);
+ MRI.setRegBank(Copy.getReg(0), AMDGPU::VCCRegBank);
+ MI.getOperand(I).setReg(Copy.getReg(0));
+ }
+ }
+
+ return;
+ }
+
+ // Phi handling is strange and only considers the bank of the destination.
+ substituteSimpleCopyRegs(OpdMapper, 0);
+
+ // Promote SGPR/VGPR booleans to s32
+ MachineFunction *MF = MI.getParent()->getParent();
+ ApplyRegBankMapping ApplyBank(*this, MRI, DstBank);
+ GISelObserverWrapper Observer(&ApplyBank);
+ MachineIRBuilder B(MI);
+ LegalizerHelper Helper(*MF, Observer, B);
+
+ if (Helper.widenScalar(MI, 0, S32) != LegalizerHelper::Legalized)
+ llvm_unreachable("widen scalar should have succeeded");
+
+ return;
+ }
+ case AMDGPU::G_ICMP:
+ case AMDGPU::G_UADDO:
+ case AMDGPU::G_USUBO:
+ case AMDGPU::G_UADDE:
+ case AMDGPU::G_SADDE:
+ case AMDGPU::G_USUBE:
+ case AMDGPU::G_SSUBE: {
+ unsigned BoolDstOp = Opc == AMDGPU::G_ICMP ? 0 : 1;
+ Register DstReg = MI.getOperand(BoolDstOp).getReg();
+
+ const RegisterBank *DstBank =
+ OpdMapper.getInstrMapping().getOperandMapping(0).BreakDown[0].RegBank;
+ if (DstBank != &AMDGPU::SGPRRegBank)
+ break;
+
+ const bool HasCarryIn = MI.getNumOperands() == 5;
+
+ // If this is a scalar compare, promote the result to s32, as the selection
+ // will end up using a copy to a 32-bit vreg.
+ const LLT S32 = LLT::scalar(32);
+ Register NewDstReg = MRI.createGenericVirtualRegister(S32);
+ MRI.setRegBank(NewDstReg, AMDGPU::SGPRRegBank);
+ MI.getOperand(BoolDstOp).setReg(NewDstReg);
+ MachineIRBuilder B(MI);
+
+ if (HasCarryIn) {
+ Register NewSrcReg = MRI.createGenericVirtualRegister(S32);
+ MRI.setRegBank(NewSrcReg, AMDGPU::SGPRRegBank);
+ B.buildZExt(NewSrcReg, MI.getOperand(4).getReg());
+ MI.getOperand(4).setReg(NewSrcReg);
+ }
+
+ MachineBasicBlock *MBB = MI.getParent();
+ B.setInsertPt(*MBB, std::next(MI.getIterator()));
+ B.buildTrunc(DstReg, NewDstReg);
+ return;
+ }
case AMDGPU::G_SELECT: {
Register DstReg = MI.getOperand(0).getReg();
LLT DstTy = MRI.getType(DstReg);
+
+ SmallVector<Register, 1> CondRegs(OpdMapper.getVRegs(1));
+ if (CondRegs.empty())
+ CondRegs.push_back(MI.getOperand(1).getReg());
+ else {
+ assert(CondRegs.size() == 1);
+ }
+
+ const RegisterBank *CondBank = getRegBank(CondRegs[0], MRI, *TRI);
+ if (CondBank == &AMDGPU::SGPRRegBank) {
+ MachineIRBuilder B(MI);
+ const LLT S32 = LLT::scalar(32);
+ Register NewCondReg = MRI.createGenericVirtualRegister(S32);
+ MRI.setRegBank(NewCondReg, AMDGPU::SGPRRegBank);
+
+ MI.getOperand(1).setReg(NewCondReg);
+ B.buildZExt(NewCondReg, CondRegs[0]);
+ }
+
if (DstTy.getSizeInBits() != 64)
break;
+ MachineIRBuilder B(MI);
LLT HalfTy = getHalfSizedType(DstTy);
SmallVector<Register, 2> DefRegs(OpdMapper.getVRegs(0));
- SmallVector<Register, 1> Src0Regs(OpdMapper.getVRegs(1));
SmallVector<Register, 2> Src1Regs(OpdMapper.getVRegs(2));
SmallVector<Register, 2> Src2Regs(OpdMapper.getVRegs(3));
@@ -1348,13 +1596,6 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
break;
}
- MachineIRBuilder B(MI);
- if (Src0Regs.empty())
- Src0Regs.push_back(MI.getOperand(1).getReg());
- else {
- assert(Src0Regs.size() == 1);
- }
-
if (Src1Regs.empty())
split64BitValueForMapping(B, Src1Regs, HalfTy, MI.getOperand(2).getReg());
else {
@@ -1368,13 +1609,32 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
setRegsToType(MRI, DefRegs, HalfTy);
- B.buildSelect(DefRegs[0], Src0Regs[0], Src1Regs[0], Src2Regs[0]);
- B.buildSelect(DefRegs[1], Src0Regs[0], Src1Regs[1], Src2Regs[1]);
+ B.buildSelect(DefRegs[0], CondRegs[0], Src1Regs[0], Src2Regs[0]);
+ B.buildSelect(DefRegs[1], CondRegs[0], Src1Regs[1], Src2Regs[1]);
- MRI.setRegBank(DstReg, getRegBank(AMDGPU::VGPRRegBankID));
+ MRI.setRegBank(DstReg, AMDGPU::VGPRRegBank);
MI.eraseFromParent();
return;
}
+ case AMDGPU::G_BRCOND: {
+ Register CondReg = MI.getOperand(0).getReg();
+ // FIXME: Should use legalizer helper, but should change bool ext type.
+ const RegisterBank *CondBank =
+ OpdMapper.getInstrMapping().getOperandMapping(0).BreakDown[0].RegBank;
+
+ if (CondBank == &AMDGPU::SGPRRegBank) {
+ MachineIRBuilder B(MI);
+ const LLT S32 = LLT::scalar(32);
+ Register NewCondReg = MRI.createGenericVirtualRegister(S32);
+ MRI.setRegBank(NewCondReg, AMDGPU::SGPRRegBank);
+
+ MI.getOperand(0).setReg(NewCondReg);
+ B.buildZExt(NewCondReg, CondReg);
+ return;
+ }
+
+ break;
+ }
case AMDGPU::G_AND:
case AMDGPU::G_OR:
case AMDGPU::G_XOR: {
@@ -1382,6 +1642,25 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
// there is a VGPR input.
Register DstReg = MI.getOperand(0).getReg();
LLT DstTy = MRI.getType(DstReg);
+
+ if (DstTy.getSizeInBits() == 1) {
+ const RegisterBank *DstBank =
+ OpdMapper.getInstrMapping().getOperandMapping(0).BreakDown[0].RegBank;
+ if (DstBank == &AMDGPU::VCCRegBank)
+ break;
+
+ MachineFunction *MF = MI.getParent()->getParent();
+ ApplyRegBankMapping ApplyBank(*this, MRI, DstBank);
+ GISelObserverWrapper Observer(&ApplyBank);
+ MachineIRBuilder B(MI);
+ LegalizerHelper Helper(*MF, Observer, B);
+
+ if (Helper.widenScalar(MI, 0, LLT::scalar(32)) !=
+ LegalizerHelper::Legalized)
+ llvm_unreachable("widen scalar should have succeeded");
+ return;
+ }
+
if (DstTy.getSizeInBits() != 64)
break;
@@ -1427,7 +1706,7 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
.addUse(Src0Regs[1])
.addUse(Src1Regs[1]);
- MRI.setRegBank(DstReg, getRegBank(AMDGPU::VGPRRegBankID));
+ MRI.setRegBank(DstReg, AMDGPU::VGPRRegBank);
MI.eraseFromParent();
return;
}
@@ -1439,14 +1718,15 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
if (DstTy != LLT::scalar(16))
break;
- const RegisterBank *DstBank = getRegBank(DstReg, MRI, *TRI);
+ const RegisterBank *DstBank =
+ OpdMapper.getInstrMapping().getOperandMapping(0).BreakDown[0].RegBank;
if (DstBank == &AMDGPU::VGPRRegBank)
break;
// 16-bit operations are VALU only, but can be promoted to 32-bit SALU.
MachineFunction *MF = MI.getParent()->getParent();
MachineIRBuilder B(MI);
- ApplyRegBankMapping ApplySALU(MRI, &AMDGPU::SGPRRegBank);
+ ApplyRegBankMapping ApplySALU(*this, MRI, &AMDGPU::SGPRRegBank);
GISelObserverWrapper Observer(&ApplySALU);
LegalizerHelper Helper(*MF, Observer, B);
@@ -1460,15 +1740,13 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
case AMDGPU::G_UMIN:
case AMDGPU::G_UMAX: {
Register DstReg = MI.getOperand(0).getReg();
- const RegisterBank *DstBank = getRegBank(DstReg, MRI, *TRI);
+ const RegisterBank *DstBank =
+ OpdMapper.getInstrMapping().getOperandMapping(0).BreakDown[0].RegBank;
if (DstBank == &AMDGPU::VGPRRegBank)
break;
MachineFunction *MF = MI.getParent()->getParent();
MachineIRBuilder B(MI);
- ApplyRegBankMapping ApplySALU(MRI, &AMDGPU::SGPRRegBank);
- GISelObserverWrapper Observer(&ApplySALU);
- LegalizerHelper Helper(*MF, Observer, B);
// Turn scalar min/max into a compare and select.
LLT Ty = MRI.getType(DstReg);
@@ -1476,17 +1754,18 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
LLT S16 = LLT::scalar(16);
if (Ty == S16) {
+ ApplyRegBankMapping ApplySALU(*this, MRI, &AMDGPU::SGPRRegBank);
+ GISelObserverWrapper Observer(&ApplySALU);
+ LegalizerHelper Helper(*MF, Observer, B);
+
// Need to widen to s32, and expand as cmp + select.
if (Helper.widenScalar(MI, 0, S32) != LegalizerHelper::Legalized)
llvm_unreachable("widenScalar should have succeeded");
// FIXME: This is relying on widenScalar leaving MI in place.
- if (Helper.lower(MI, 0, S32) != LegalizerHelper::Legalized)
- llvm_unreachable("lower should have succeeded");
- } else {
- if (Helper.lower(MI, 0, Ty) != LegalizerHelper::Legalized)
- llvm_unreachable("lower should have succeeded");
- }
+ lowerScalarMinMax(B, MI);
+ } else
+ lowerScalarMinMax(B, MI);
return;
}
@@ -1497,13 +1776,13 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
bool Signed = Opc == AMDGPU::G_SEXT;
MachineIRBuilder B(MI);
- const RegisterBank *SrcBank = getRegBank(SrcReg, MRI, *TRI);
+ const RegisterBank *SrcBank =
+ OpdMapper.getInstrMapping().getOperandMapping(1).BreakDown[0].RegBank;
Register DstReg = MI.getOperand(0).getReg();
LLT DstTy = MRI.getType(DstReg);
if (DstTy.isScalar() &&
SrcBank != &AMDGPU::SGPRRegBank &&
- SrcBank != &AMDGPU::SCCRegBank &&
SrcBank != &AMDGPU::VCCRegBank &&
// FIXME: Should handle any type that round to s64 when irregular
// breakdowns supported.
@@ -1534,16 +1813,15 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
if (SrcTy != LLT::scalar(1))
return;
- if (SrcBank == &AMDGPU::SCCRegBank || SrcBank == &AMDGPU::VCCRegBank) {
+ if (SrcBank == &AMDGPU::VCCRegBank) {
SmallVector<Register, 2> DefRegs(OpdMapper.getVRegs(0));
- const RegisterBank *DstBank = SrcBank == &AMDGPU::SCCRegBank ?
- &AMDGPU::SGPRRegBank : &AMDGPU::VGPRRegBank;
+ const RegisterBank *DstBank = &AMDGPU::VGPRRegBank;
unsigned DstSize = DstTy.getSizeInBits();
// 64-bit select is SGPR only
const bool UseSel64 = DstSize > 32 &&
- SrcBank->getID() == AMDGPU::SCCRegBankID;
+ SrcBank->getID() == AMDGPU::SGPRRegBankID;
// TODO: Should s16 select be legal?
LLT SelType = UseSel64 ? LLT::scalar(64) : LLT::scalar(32);
@@ -1554,7 +1832,7 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
MRI.setRegBank(False.getReg(0), *DstBank);
MRI.setRegBank(DstReg, *DstBank);
- if (DstSize > 32 && SrcBank->getID() != AMDGPU::SCCRegBankID) {
+ if (DstSize > 32) {
B.buildSelect(DefRegs[0], SrcReg, True, False);
B.buildCopy(DefRegs[1], DefRegs[0]);
} else if (DstSize < 32) {
@@ -1599,7 +1877,8 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
substituteSimpleCopyRegs(OpdMapper, 1);
substituteSimpleCopyRegs(OpdMapper, 2);
- const RegisterBank *DstBank = getRegBank(DstReg, MRI, *TRI);
+ const RegisterBank *DstBank =
+ OpdMapper.getInstrMapping().getOperandMapping(0).BreakDown[0].RegBank;
if (DstBank == &AMDGPU::SGPRRegBank)
break; // Can use S_PACK_* instructions.
@@ -1609,8 +1888,10 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
Register Hi = MI.getOperand(2).getReg();
const LLT S32 = LLT::scalar(32);
- const RegisterBank *BankLo = getRegBank(Lo, MRI, *TRI);
- const RegisterBank *BankHi = getRegBank(Hi, MRI, *TRI);
+ const RegisterBank *BankLo =
+ OpdMapper.getInstrMapping().getOperandMapping(1).BreakDown[0].RegBank;
+ const RegisterBank *BankHi =
+ OpdMapper.getInstrMapping().getOperandMapping(2).BreakDown[0].RegBank;
Register ZextLo;
Register ShiftHi;
@@ -1653,25 +1934,50 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
assert(OpdMapper.getVRegs(1).empty() && OpdMapper.getVRegs(2).empty());
+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+ MachineIRBuilder B(MI);
+
+ const ValueMapping &DstMapping
+ = OpdMapper.getInstrMapping().getOperandMapping(0);
+ const RegisterBank *DstBank = DstMapping.BreakDown[0].RegBank;
+ const RegisterBank *SrcBank =
+ OpdMapper.getInstrMapping().getOperandMapping(1).BreakDown[0].RegBank;
+
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
+ Register IdxReg = MI.getOperand(2).getReg();
+
+ // If this is a VGPR result only because the index was a VGPR result, the
+ // actual indexing will be done on the SGPR source vector, which will
+ // produce a scalar result. We need to copy to the VGPR result inside the
+ // waterfall loop.
+ const bool NeedCopyToVGPR = DstBank == &AMDGPU::VGPRRegBank &&
+ SrcBank == &AMDGPU::SGPRRegBank;
if (DstRegs.empty()) {
applyDefaultMapping(OpdMapper);
+
executeInWaterfallLoop(MI, MRI, { 2 });
+
+ if (NeedCopyToVGPR) {
+ // We don't want a phi for this temporary reg.
+ Register TmpReg = MRI.createGenericVirtualRegister(DstTy);
+ MRI.setRegBank(TmpReg, AMDGPU::SGPRRegBank);
+ MI.getOperand(0).setReg(TmpReg);
+ B.setInsertPt(*MI.getParent(), ++MI.getIterator());
+
+ // Use a v_mov_b32 here to make the exec dependency explicit.
+ buildVCopy(B, DstReg, TmpReg);
+ }
+
return;
}
- Register DstReg = MI.getOperand(0).getReg();
- Register SrcReg = MI.getOperand(1).getReg();
- Register IdxReg = MI.getOperand(2).getReg();
- LLT DstTy = MRI.getType(DstReg);
- (void)DstTy;
-
assert(DstTy.getSizeInBits() == 64);
LLT SrcTy = MRI.getType(SrcReg);
const LLT S32 = LLT::scalar(32);
LLT Vec32 = LLT::vector(2 * SrcTy.getNumElements(), 32);
- MachineIRBuilder B(MI);
auto CastSrc = B.buildBitcast(Vec32, SrcReg);
auto One = B.buildConstant(S32, 1);
@@ -1684,15 +1990,11 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
// Compute 32-bit element indices, (2 * OrigIdx, 2 * OrigIdx + 1).
auto IdxLo = B.buildShl(S32, IdxReg, One);
auto IdxHi = B.buildAdd(S32, IdxLo, One);
- B.buildExtractVectorElement(DstRegs[0], CastSrc, IdxLo);
- B.buildExtractVectorElement(DstRegs[1], CastSrc, IdxHi);
- const ValueMapping &DstMapping
- = OpdMapper.getInstrMapping().getOperandMapping(0);
+ auto Extract0 = B.buildExtractVectorElement(DstRegs[0], CastSrc, IdxLo);
+ auto Extract1 = B.buildExtractVectorElement(DstRegs[1], CastSrc, IdxHi);
- // FIXME: Should be getting from mapping or not?
- const RegisterBank *SrcBank = getRegBank(SrcReg, MRI, *TRI);
- MRI.setRegBank(DstReg, *DstMapping.BreakDown[0].RegBank);
+ MRI.setRegBank(DstReg, *DstBank);
MRI.setRegBank(CastSrc.getReg(0), *SrcBank);
MRI.setRegBank(One.getReg(0), AMDGPU::SGPRRegBank);
MRI.setRegBank(IdxLo.getReg(0), AMDGPU::SGPRRegBank);
@@ -1710,6 +2012,23 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
MI.eraseFromParent();
executeInWaterfallLoop(B, make_range(Span.begin(), Span.end()),
OpsToWaterfall, MRI);
+
+ if (NeedCopyToVGPR) {
+ MachineBasicBlock *LoopBB = Extract1->getParent();
+ Register TmpReg0 = MRI.createGenericVirtualRegister(S32);
+ Register TmpReg1 = MRI.createGenericVirtualRegister(S32);
+ MRI.setRegBank(TmpReg0, AMDGPU::SGPRRegBank);
+ MRI.setRegBank(TmpReg1, AMDGPU::SGPRRegBank);
+
+ Extract0->getOperand(0).setReg(TmpReg0);
+ Extract1->getOperand(0).setReg(TmpReg1);
+
+ B.setInsertPt(*LoopBB, ++Extract1->getIterator());
+
+ buildVCopy(B, DstRegs[0], TmpReg0);
+ buildVCopy(B, DstRegs[1], TmpReg1);
+ }
+
return;
}
case AMDGPU::G_INSERT_VECTOR_ELT: {
@@ -1756,9 +2075,12 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
auto InsHi = B.buildInsertVectorElement(Vec32, InsLo, InsRegs[1], IdxHi);
B.buildBitcast(DstReg, InsHi);
- const RegisterBank *DstBank = getRegBank(DstReg, MRI, *TRI);
- const RegisterBank *SrcBank = getRegBank(SrcReg, MRI, *TRI);
- const RegisterBank *InsSrcBank = getRegBank(InsReg, MRI, *TRI);
+ const RegisterBank *DstBank =
+ OpdMapper.getInstrMapping().getOperandMapping(0).BreakDown[0].RegBank;
+ const RegisterBank *SrcBank =
+ OpdMapper.getInstrMapping().getOperandMapping(1).BreakDown[0].RegBank;
+ const RegisterBank *InsSrcBank =
+ OpdMapper.getInstrMapping().getOperandMapping(2).BreakDown[0].RegBank;
MRI.setRegBank(InsReg, *InsSrcBank);
MRI.setRegBank(CastSrc.getReg(0), *SrcBank);
@@ -1908,11 +2230,8 @@ bool AMDGPURegisterBankInfo::isSALUMapping(const MachineInstr &MI) const {
continue;
Register Reg = MI.getOperand(i).getReg();
if (const RegisterBank *Bank = getRegBank(Reg, MRI, *TRI)) {
- if (Bank->getID() == AMDGPU::VGPRRegBankID)
+ if (Bank->getID() != AMDGPU::SGPRRegBankID)
return false;
-
- assert(Bank->getID() == AMDGPU::SGPRRegBankID ||
- Bank->getID() == AMDGPU::SCCRegBankID);
}
}
return true;
@@ -1926,8 +2245,7 @@ AMDGPURegisterBankInfo::getDefaultMappingSOP(const MachineInstr &MI) const {
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
unsigned Size = getSizeInBits(MI.getOperand(i).getReg(), MRI, *TRI);
- unsigned BankID = Size == 1 ? AMDGPU::SCCRegBankID : AMDGPU::SGPRRegBankID;
- OpdsMapping[i] = AMDGPU::getValueMapping(BankID, Size);
+ OpdsMapping[i] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
}
return getInstructionMapping(1, 1, getOperandsMapping(OpdsMapping),
MI.getNumOperands());
@@ -2048,7 +2366,7 @@ AMDGPURegisterBankInfo::getInstrMappingForLoad(const MachineInstr &MI) const {
if (PtrBank == &AMDGPU::SGPRRegBank &&
(AS != AMDGPUAS::LOCAL_ADDRESS && AS != AMDGPUAS::REGION_ADDRESS &&
AS != AMDGPUAS::PRIVATE_ADDRESS) &&
- isInstrUniformNonExtLoadAlign4(MI)) {
+ isScalarLoadLegal(MI)) {
// We have a uniform instruction so we want to use an SMRD load
ValMapping = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
PtrMapping = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, PtrSize);
@@ -2072,7 +2390,6 @@ AMDGPURegisterBankInfo::getRegBankID(Register Reg,
const MachineRegisterInfo &MRI,
const TargetRegisterInfo &TRI,
unsigned Default) const {
-
const RegisterBank *Bank = getRegBank(Reg, MRI, TRI);
return Bank ? Bank->getID() : Default;
}
@@ -2083,6 +2400,22 @@ static unsigned regBankUnion(unsigned RB0, unsigned RB1) {
AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
}
+static int regBankBoolUnion(int RB0, int RB1) {
+ if (RB0 == -1)
+ return RB1;
+ if (RB1 == -1)
+ return RB0;
+
+ // vcc, vcc -> vcc
+ // vcc, sgpr -> vcc
+ // vcc, vgpr -> vcc
+ if (RB0 == AMDGPU::VCCRegBankID || RB1 == AMDGPU::VCCRegBankID)
+ return AMDGPU::VCCRegBankID;
+
+ // vcc, vgpr -> vgpr
+ return regBankUnion(RB0, RB1);
+}
+
const RegisterBankInfo::ValueMapping *
AMDGPURegisterBankInfo::getSGPROpMapping(Register Reg,
const MachineRegisterInfo &MRI,
@@ -2102,6 +2435,14 @@ AMDGPURegisterBankInfo::getVGPROpMapping(Register Reg,
return AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
}
+const RegisterBankInfo::ValueMapping *
+AMDGPURegisterBankInfo::getAGPROpMapping(Register Reg,
+ const MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI) const {
+ unsigned Size = getSizeInBits(Reg, MRI, TRI);
+ return AMDGPU::getValueMapping(AMDGPU::AGPRRegBankID, Size);
+}
+
///
/// This function must return a legal mapping, because
/// AMDGPURegisterBankInfo::getInstrAlternativeMappings() is not called
@@ -2142,6 +2483,11 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
if (MI.getOpcode() == TargetOpcode::G_PHI) {
// TODO: Generate proper invalid bank enum.
int ResultBank = -1;
+ Register DstReg = MI.getOperand(0).getReg();
+
+ // Sometimes the result may have already been assigned a bank.
+ if (const RegisterBank *DstBank = getRegBank(DstReg, MRI, *TRI))
+ ResultBank = DstBank->getID();
for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
Register Reg = MI.getOperand(I).getReg();
@@ -2153,26 +2499,14 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
break;
}
+ // FIXME: Need to promote SGPR case to s32
unsigned OpBank = Bank->getID();
- // scc, scc -> sgpr
- if (OpBank == AMDGPU::SCCRegBankID) {
- // There's only one SCC register, so a phi requires copying to SGPR.
- OpBank = AMDGPU::SGPRRegBankID;
- } else if (OpBank == AMDGPU::VCCRegBankID) {
- // vcc, vcc -> vcc
- // vcc, sgpr -> vgpr
- if (ResultBank != -1 && ResultBank != AMDGPU::VCCRegBankID) {
- ResultBank = AMDGPU::VGPRRegBankID;
- break;
- }
- }
-
- ResultBank = OpBank;
+ ResultBank = regBankBoolUnion(ResultBank, OpBank);
}
assert(ResultBank != -1);
- unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
+ unsigned Size = MRI.getType(DstReg).getSizeInBits();
const ValueMapping &ValMap =
getValueMapping(0, Size, getRegBank(ResultBank));
@@ -2208,10 +2542,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
TargetBankID = AMDGPU::VCCRegBankID;
BankLHS = AMDGPU::VCCRegBankID;
BankRHS = AMDGPU::VCCRegBankID;
- } else if (DstBank == &AMDGPU::SCCRegBank) {
- TargetBankID = AMDGPU::SCCRegBankID;
- BankLHS = AMDGPU::SGPRRegBankID;
- BankRHS = AMDGPU::SGPRRegBankID;
} else {
BankLHS = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI,
AMDGPU::SGPRRegBankID);
@@ -2233,13 +2563,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
BankRHS = AMDGPU::VCCRegBankID;
} else if (BankLHS == AMDGPU::SGPRRegBankID && BankRHS == AMDGPU::SGPRRegBankID) {
TargetBankID = AMDGPU::SGPRRegBankID;
- } else if (BankLHS == AMDGPU::SCCRegBankID || BankRHS == AMDGPU::SCCRegBankID) {
- // The operation must be done on a 32-bit register, but it will set
- // scc. The result type could interchangably be SCC or SGPR, since
- // both values will be produced.
- TargetBankID = AMDGPU::SCCRegBankID;
- BankLHS = AMDGPU::SGPRRegBankID;
- BankRHS = AMDGPU::SGPRRegBankID;
}
}
@@ -2268,7 +2591,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
LLVM_FALLTHROUGH;
}
- case AMDGPU::G_GEP:
+ case AMDGPU::G_PTR_ADD:
case AMDGPU::G_ADD:
case AMDGPU::G_SUB:
case AMDGPU::G_MUL:
@@ -2312,7 +2635,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case AMDGPU::G_FMAXNUM_IEEE:
case AMDGPU::G_FCANONICALIZE:
case AMDGPU::G_INTRINSIC_TRUNC:
- case AMDGPU::G_INTRINSIC_ROUND:
case AMDGPU::G_AMDGPU_FFBH_U32:
return getDefaultMappingVOP(MI);
case AMDGPU::G_UMULH:
@@ -2329,7 +2651,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case AMDGPU::G_FCONSTANT:
case AMDGPU::G_CONSTANT:
case AMDGPU::G_GLOBAL_VALUE:
- case AMDGPU::G_BLOCK_ADDR: {
+ case AMDGPU::G_BLOCK_ADDR:
+ case AMDGPU::G_READCYCLECOUNTER: {
unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
break;
@@ -2416,7 +2739,9 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
unsigned Bank = getRegBankID(Src, MRI, *TRI);
unsigned DstSize = getSizeInBits(Dst, MRI, *TRI);
unsigned SrcSize = getSizeInBits(Src, MRI, *TRI);
- OpdsMapping[0] = AMDGPU::getValueMapping(Bank, DstSize);
+ OpdsMapping[0] = DstSize == 1 && Bank != AMDGPU::SGPRRegBankID ?
+ AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, DstSize) :
+ AMDGPU::getValueMapping(Bank, DstSize);
OpdsMapping[1] = AMDGPU::getValueMapping(Bank, SrcSize);
break;
}
@@ -2432,7 +2757,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
const RegisterBank *SrcBank = getRegBank(Src, MRI, *TRI);
assert(SrcBank);
switch (SrcBank->getID()) {
- case AMDGPU::SCCRegBankID:
case AMDGPU::SGPRRegBankID:
DstBank = AMDGPU::SGPRRegBankID;
break;
@@ -2493,9 +2817,13 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
(Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE) &&
Subtarget.hasScalarCompareEq64()));
- unsigned Op0Bank = CanUseSCC ? AMDGPU::SCCRegBankID : AMDGPU::VCCRegBankID;
+ unsigned Op0Bank = CanUseSCC ? AMDGPU::SGPRRegBankID : AMDGPU::VCCRegBankID;
+
+ // TODO: Use 32-bit for scalar output size.
+ // SCC results will need to be copied to a 32-bit SGPR virtual register.
+ const unsigned ResultSize = 1;
- OpdsMapping[0] = AMDGPU::getValueMapping(Op0Bank, 1);
+ OpdsMapping[0] = AMDGPU::getValueMapping(Op0Bank, ResultSize);
OpdsMapping[1] = nullptr; // Predicate Operand.
OpdsMapping[2] = AMDGPU::getValueMapping(Op2Bank, Size);
OpdsMapping[3] = AMDGPU::getValueMapping(Op3Bank, Size);
@@ -2555,6 +2883,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
default:
return getInvalidInstructionMapping();
case Intrinsic::amdgcn_div_fmas:
+ case Intrinsic::amdgcn_div_fixup:
case Intrinsic::amdgcn_trig_preop:
case Intrinsic::amdgcn_sin:
case Intrinsic::amdgcn_cos:
@@ -2725,6 +3054,38 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
break;
}
+ case Intrinsic::amdgcn_mfma_f32_4x4x1f32:
+ case Intrinsic::amdgcn_mfma_f32_4x4x4f16:
+ case Intrinsic::amdgcn_mfma_i32_4x4x4i8:
+ case Intrinsic::amdgcn_mfma_f32_4x4x2bf16:
+ case Intrinsic::amdgcn_mfma_f32_16x16x1f32:
+ case Intrinsic::amdgcn_mfma_f32_16x16x4f32:
+ case Intrinsic::amdgcn_mfma_f32_16x16x4f16:
+ case Intrinsic::amdgcn_mfma_f32_16x16x16f16:
+ case Intrinsic::amdgcn_mfma_i32_16x16x4i8:
+ case Intrinsic::amdgcn_mfma_i32_16x16x16i8:
+ case Intrinsic::amdgcn_mfma_f32_16x16x2bf16:
+ case Intrinsic::amdgcn_mfma_f32_16x16x8bf16:
+ case Intrinsic::amdgcn_mfma_f32_32x32x1f32:
+ case Intrinsic::amdgcn_mfma_f32_32x32x2f32:
+ case Intrinsic::amdgcn_mfma_f32_32x32x4f16:
+ case Intrinsic::amdgcn_mfma_f32_32x32x8f16:
+ case Intrinsic::amdgcn_mfma_i32_32x32x4i8:
+ case Intrinsic::amdgcn_mfma_i32_32x32x8i8:
+ case Intrinsic::amdgcn_mfma_f32_32x32x2bf16:
+ case Intrinsic::amdgcn_mfma_f32_32x32x4bf16: {
+ // Default for MAI intrinsics.
+ // srcC can also be an immediate which can be folded later.
+ // FIXME: Should we eventually add an alternative mapping with AGPR src
+ // for srcA/srcB?
+ //
+ // vdst, srcA, srcB, srcC
+ OpdsMapping[0] = getAGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI);
+ OpdsMapping[2] = getVGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI);
+ OpdsMapping[3] = getVGPROpMapping(MI.getOperand(3).getReg(), MRI, *TRI);
+ OpdsMapping[4] = getAGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI);
+ break;
+ }
}
break;
}
@@ -2913,19 +3274,20 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
Op3Bank == AMDGPU::SGPRRegBankID;
unsigned CondBankDefault = SGPRSrcs ?
- AMDGPU::SCCRegBankID : AMDGPU::VCCRegBankID;
+ AMDGPU::SGPRRegBankID : AMDGPU::VCCRegBankID;
unsigned CondBank = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI,
CondBankDefault);
if (CondBank == AMDGPU::SGPRRegBankID)
- CondBank = SGPRSrcs ? AMDGPU::SCCRegBankID : AMDGPU::VCCRegBankID;
+ CondBank = SGPRSrcs ? AMDGPU::SGPRRegBankID : AMDGPU::VCCRegBankID;
else if (CondBank == AMDGPU::VGPRRegBankID)
CondBank = AMDGPU::VCCRegBankID;
- unsigned Bank = SGPRSrcs && CondBank == AMDGPU::SCCRegBankID ?
+ unsigned Bank = SGPRSrcs && CondBank == AMDGPU::SGPRRegBankID ?
AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
- assert(CondBank == AMDGPU::VCCRegBankID || CondBank == AMDGPU::SCCRegBankID);
+ assert(CondBank == AMDGPU::VCCRegBankID || CondBank == AMDGPU::SGPRRegBankID);
+ // TODO: Should report 32-bit for scalar condition type.
if (Size == 64) {
OpdsMapping[0] = AMDGPU::getValueMappingSGPR64Only(Bank, Size);
OpdsMapping[1] = AMDGPU::getValueMapping(CondBank, 1);
@@ -2957,14 +3319,15 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case AMDGPU::G_ATOMICRMW_UMAX:
case AMDGPU::G_ATOMICRMW_UMIN:
case AMDGPU::G_ATOMICRMW_FADD:
- case AMDGPU::G_ATOMIC_CMPXCHG: {
+ case AMDGPU::G_ATOMIC_CMPXCHG:
+ case AMDGPU::G_AMDGPU_ATOMIC_CMPXCHG: {
return getDefaultMappingAllVGPR(MI);
}
case AMDGPU::G_BRCOND: {
unsigned Bank = getRegBankID(MI.getOperand(0).getReg(), MRI, *TRI,
AMDGPU::SGPRRegBankID);
assert(MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() == 1);
- if (Bank != AMDGPU::SCCRegBankID)
+ if (Bank != AMDGPU::SGPRRegBankID)
Bank = AMDGPU::VCCRegBankID;
OpdsMapping[0] = AMDGPU::getValueMapping(Bank, 1);
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h
index a14b74961118..1ac7d3652a8b 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h
@@ -40,10 +40,13 @@ protected:
#include "AMDGPUGenRegisterBank.inc"
};
class AMDGPURegisterBankInfo : public AMDGPUGenRegisterBankInfo {
+public:
const GCNSubtarget &Subtarget;
const SIRegisterInfo *TRI;
const SIInstrInfo *TII;
+ bool buildVCopy(MachineIRBuilder &B, Register DstReg, Register SrcReg) const;
+
bool collectWaterfallOperands(
SmallSet<Register, 4> &SGPROperandRegs,
MachineInstr &MI,
@@ -74,6 +77,8 @@ class AMDGPURegisterBankInfo : public AMDGPUGenRegisterBankInfo {
const AMDGPURegisterBankInfo::OperandsMapper &OpdMapper,
MachineRegisterInfo &MRI, int RSrcIdx) const;
+ void lowerScalarMinMax(MachineIRBuilder &B, MachineInstr &MI) const;
+
Register handleD16VData(MachineIRBuilder &B, MachineRegisterInfo &MRI,
Register Reg) const;
@@ -103,6 +108,11 @@ class AMDGPURegisterBankInfo : public AMDGPUGenRegisterBankInfo {
const MachineRegisterInfo &MRI,
const TargetRegisterInfo &TRI) const;
+ // Return a value mapping for an operand that is required to be a AGPR.
+ const ValueMapping *getAGPROpMapping(Register Reg,
+ const MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI) const;
+
/// Split 64-bit value \p Reg into two 32-bit halves and populate them into \p
/// Regs. This appropriately sets the regbank of the new registers.
void split64BitValueForMapping(MachineIRBuilder &B,
@@ -131,6 +141,7 @@ class AMDGPURegisterBankInfo : public AMDGPUGenRegisterBankInfo {
const MachineInstr &MI, const MachineRegisterInfo &MRI) const;
bool isSALUMapping(const MachineInstr &MI) const;
+
const InstructionMapping &getDefaultMappingSOP(const MachineInstr &MI) const;
const InstructionMapping &getDefaultMappingVOP(const MachineInstr &MI) const;
const InstructionMapping &getDefaultMappingAllVGPR(
@@ -149,8 +160,8 @@ public:
unsigned getBreakDownCost(const ValueMapping &ValMapping,
const RegisterBank *CurBank = nullptr) const override;
- const RegisterBank &
- getRegBankFromRegClass(const TargetRegisterClass &RC) const override;
+ const RegisterBank &getRegBankFromRegClass(const TargetRegisterClass &RC,
+ LLT) const override;
InstructionMappings
getInstrAlternativeMappings(const MachineInstr &MI) const override;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td
index 00f53b157577..c495316c5bce 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td
@@ -14,7 +14,9 @@ def VGPRRegBank : RegisterBank<"VGPR",
[VGPR_32, VReg_64, VReg_96, VReg_128, VReg_256, VReg_512, VReg_1024]
>;
-def SCCRegBank : RegisterBank <"SCC", [SReg_32, SCC_CLASS]>;
-
// It is helpful to distinguish conditions from ordinary SGPRs.
def VCCRegBank : RegisterBank <"VCC", [SReg_1]>;
+
+def AGPRRegBank : RegisterBank <"AGPR",
+ [AGPR_32, AReg_64, AReg_128, AReg_512, AReg_1024]
+>;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp
index 4f095087a57f..9a1e2fc42ed5 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp
@@ -43,12 +43,12 @@
#include "AMDGPU.h"
#include "Utils/AMDGPUBaseInfo.h"
-#include "llvm/Analysis/MemoryDependenceAnalysis.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/MemoryDependenceAnalysis.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/Attributes.h"
@@ -64,6 +64,7 @@
#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td
index 26b8b7840270..8d70536ec21c 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td
@@ -30,6 +30,147 @@ foreach intr = !listconcat(AMDGPUBufferIntrinsics,
def : RsrcIntrinsic<!cast<AMDGPURsrcIntrinsic>(intr)>;
}
+class GcnBufferFormatBase<bits<8> f, bits<8> bpc, bits<8> numc, bits<8> nfmt, bits<8> dfmt> {
+ bits<8> Format = f;
+ bits<8> BitsPerComp = bpc;
+ bits<8> NumComponents = numc;
+ bits<8> NumFormat = nfmt;
+ bits<8> DataFormat = dfmt;
+}
+
+class Gfx9BufferFormat<bits<8> f, bits<8> bpc, bits<8> numc, bits<8> nfmt, bits<8> dfmt> : GcnBufferFormatBase<f, bpc, numc, nfmt, dfmt>;
+class Gfx10PlusBufferFormat<bits<8> f, bits<8> bpc, bits<8> numc, bits<8> nfmt, bits<8> dfmt> : GcnBufferFormatBase<f, bpc, numc, nfmt, dfmt>;
+
+class GcnBufferFormatTable : GenericTable {
+ let CppTypeName = "GcnBufferFormatInfo";
+ let Fields = ["Format", "BitsPerComp", "NumComponents", "NumFormat", "DataFormat"];
+ let PrimaryKey = ["BitsPerComp", "NumComponents", "NumFormat"];
+}
+
+def Gfx9BufferFormat : GcnBufferFormatTable {
+ let FilterClass = "Gfx9BufferFormat";
+ let PrimaryKeyName = "getGfx9BufferFormatInfo";
+}
+def Gfx10PlusBufferFormat : GcnBufferFormatTable {
+ let FilterClass = "Gfx10PlusBufferFormat";
+ let PrimaryKeyName = "getGfx10PlusBufferFormatInfo";
+}
+
+def getGfx9BufferFormatInfo : SearchIndex {
+ let Table = Gfx9BufferFormat;
+ let Key = ["Format"];
+}
+def getGfx10PlusBufferFormatInfo : SearchIndex {
+ let Table = Gfx10PlusBufferFormat;
+ let Key = ["Format"];
+}
+
+// Buffer formats with equal component sizes (GFX9 and earlier)
+def : Gfx9BufferFormat< /*FORMAT_8_UNORM*/ 0x01, 8, 1, /*NUM_FORMAT_UNORM*/ 0, /*DATA_FORMAT_8*/ 1>;
+def : Gfx9BufferFormat< /*FORMAT_8_SNORM*/ 0x11, 8, 1, /*NUM_FORMAT_SNORM*/ 1, /*DATA_FORMAT_8*/ 1>;
+def : Gfx9BufferFormat< /*FORMAT_8_USCALED*/ 0x21, 8, 1, /*NUM_FORMAT_USCALED*/ 2, /*DATA_FORMAT_8*/ 1>;
+def : Gfx9BufferFormat< /*FORMAT_8_SSCALED*/ 0x31, 8, 1, /*NUM_FORMAT_SSCALED*/ 3, /*DATA_FORMAT_8*/ 1>;
+def : Gfx9BufferFormat< /*FORMAT_8_UINT*/ 0x41, 8, 1, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_8*/ 1>;
+def : Gfx9BufferFormat< /*FORMAT_8_SINT*/ 0x51, 8, 1, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_8*/ 1>;
+def : Gfx9BufferFormat< /*FORMAT_16_UNORM*/ 0x02, 16, 1, /*NUM_FORMAT_UNORM*/ 0, /*DATA_FORMAT_16*/ 2>;
+def : Gfx9BufferFormat< /*FORMAT_16_SNORM*/ 0x12, 16, 1, /*NUM_FORMAT_SNORM*/ 1, /*DATA_FORMAT_16*/ 2>;
+def : Gfx9BufferFormat< /*FORMAT_16_USCALED*/ 0x22, 16, 1, /*NUM_FORMAT_USCALED*/ 2, /*DATA_FORMAT_16*/ 2>;
+def : Gfx9BufferFormat< /*FORMAT_16_SSCALED*/ 0x32, 16, 1, /*NUM_FORMAT_SSCALED*/ 3, /*DATA_FORMAT_16*/ 2>;
+def : Gfx9BufferFormat< /*FORMAT_16_UINT*/ 0x42, 16, 1, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_16*/ 2>;
+def : Gfx9BufferFormat< /*FORMAT_16_SINT*/ 0x52, 16, 1, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_16*/ 2>;
+def : Gfx9BufferFormat< /*FORMAT_16_FLOAT*/ 0x72, 16, 1, /*NUM_FORMAT_FLOAT*/ 7, /*DATA_FORMAT_16*/ 2>;
+def : Gfx9BufferFormat< /*FORMAT_8_8_UNORM*/ 0x03, 8, 2, /*NUM_FORMAT_UNORM*/ 0, /*DATA_FORMAT_8_8*/ 3>;
+def : Gfx9BufferFormat< /*FORMAT_8_8_SNORM*/ 0x13, 8, 2, /*NUM_FORMAT_SNORM*/ 1, /*DATA_FORMAT_8_8*/ 3>;
+def : Gfx9BufferFormat< /*FORMAT_8_8_USCALED*/ 0x23, 8, 2, /*NUM_FORMAT_USCALED*/ 2, /*DATA_FORMAT_8_8*/ 3>;
+def : Gfx9BufferFormat< /*FORMAT_8_8_SSCALED*/ 0x33, 8, 2, /*NUM_FORMAT_SSCALED*/ 3, /*DATA_FORMAT_8_8*/ 3>;
+def : Gfx9BufferFormat< /*FORMAT_8_8_UINT*/ 0x43, 8, 2, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_8_8*/ 3>;
+def : Gfx9BufferFormat< /*FORMAT_8_8_SINT*/ 0x53, 8, 2, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_8_8*/ 3>;
+def : Gfx9BufferFormat< /*FORMAT_32_UINT*/ 0x44, 32, 1, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_32*/ 4>;
+def : Gfx9BufferFormat< /*FORMAT_32_SINT*/ 0x54, 32, 1, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_32*/ 4>;
+def : Gfx9BufferFormat< /*FORMAT_32_FLOAT*/ 0x74, 32, 1, /*NUM_FORMAT_FLOAT*/ 7, /*DATA_FORMAT_32*/ 4>;
+def : Gfx9BufferFormat< /*FORMAT_16_16_UNORM*/ 0x05, 16, 2, /*NUM_FORMAT_UNORM*/ 0, /*DATA_FORMAT_16_16*/ 5>;
+def : Gfx9BufferFormat< /*FORMAT_16_16_SNORM*/ 0x15, 16, 2, /*NUM_FORMAT_SNORM*/ 1, /*DATA_FORMAT_16_16*/ 5>;
+def : Gfx9BufferFormat< /*FORMAT_16_16_USCALED*/ 0x25, 16, 2, /*NUM_FORMAT_USCALED*/ 2, /*DATA_FORMAT_16_16*/ 5>;
+def : Gfx9BufferFormat< /*FORMAT_16_16_SSCALED*/ 0x35, 16, 2, /*NUM_FORMAT_SSCALED*/ 3, /*DATA_FORMAT_16_16*/ 5>;
+def : Gfx9BufferFormat< /*FORMAT_16_16_UINT*/ 0x45, 16, 2, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_16_16*/ 5>;
+def : Gfx9BufferFormat< /*FORMAT_16_16_SINT*/ 0x55, 16, 2, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_16_16*/ 5>;
+def : Gfx9BufferFormat< /*FORMAT_16_16_FLOAT*/ 0x75, 16, 2, /*NUM_FORMAT_FLOAT*/ 7, /*DATA_FORMAT_16_16*/ 5>;
+def : Gfx9BufferFormat< /*FORMAT_8_8_8_8_UNORM*/ 0x0A, 8, 4, /*NUM_FORMAT_UNORM*/ 0, /*DATA_FORMAT_8_8_8_8*/ 10>;
+def : Gfx9BufferFormat< /*FORMAT_8_8_8_8_SNORM*/ 0x1A, 8, 4, /*NUM_FORMAT_SNORM*/ 1, /*DATA_FORMAT_8_8_8_8*/ 10>;
+def : Gfx9BufferFormat< /*FORMAT_8_8_8_8_USCALED*/ 0x2A, 8, 4, /*NUM_FORMAT_USCALED*/ 2, /*DATA_FORMAT_8_8_8_8*/ 10>;
+def : Gfx9BufferFormat< /*FORMAT_8_8_8_8_SSCALED*/ 0x3A, 8, 4, /*NUM_FORMAT_SSCALED*/ 3, /*DATA_FORMAT_8_8_8_8*/ 10>;
+def : Gfx9BufferFormat< /*FORMAT_8_8_8_8_UINT*/ 0x4A, 8, 4, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_8_8_8_8*/ 10>;
+def : Gfx9BufferFormat< /*FORMAT_8_8_8_8_SINT*/ 0x5A, 8, 4, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_8_8_8_8*/ 10>;
+def : Gfx9BufferFormat< /*FORMAT_32_32_UINT*/ 0x4B, 32, 2, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_32_32*/ 11>;
+def : Gfx9BufferFormat< /*FORMAT_32_32_SINT*/ 0x5B, 32, 2, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_32_32*/ 11>;
+def : Gfx9BufferFormat< /*FORMAT_32_32_FLOAT*/ 0x7B, 32, 2, /*NUM_FORMAT_FLOAT*/ 7, /*DATA_FORMAT_32_32*/ 11>;
+def : Gfx9BufferFormat< /*FORMAT_16_16_16_16_UNORM*/ 0x0C, 16, 4, /*NUM_FORMAT_UNORM*/ 0, /*DATA_FORMAT_16_16_16_16*/ 12>;
+def : Gfx9BufferFormat< /*FORMAT_16_16_16_16_SNORM*/ 0x1C, 16, 4, /*NUM_FORMAT_SNORM*/ 1, /*DATA_FORMAT_16_16_16_16*/ 12>;
+def : Gfx9BufferFormat< /*FORMAT_16_16_16_16_USCALED*/ 0x2C, 16, 4, /*NUM_FORMAT_USCALED*/ 2, /*DATA_FORMAT_16_16_16_16*/ 12>;
+def : Gfx9BufferFormat< /*FORMAT_16_16_16_16_SSCALED*/ 0x3C, 16, 4, /*NUM_FORMAT_SSCALED*/ 3, /*DATA_FORMAT_16_16_16_16*/ 12>;
+def : Gfx9BufferFormat< /*FORMAT_16_16_16_16_UINT*/ 0x4C, 16, 4, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_16_16_16_16*/ 12>;
+def : Gfx9BufferFormat< /*FORMAT_16_16_16_16_SINT*/ 0x5C, 16, 4, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_16_16_16_16*/ 12>;
+def : Gfx9BufferFormat< /*FORMAT_16_16_16_16_FLOAT*/ 0x7C, 16, 4, /*NUM_FORMAT_FLOAT*/ 7, /*DATA_FORMAT_16_16_16_16*/ 12>;
+def : Gfx9BufferFormat< /*FORMAT_32_32_32_UINT*/ 0x4D, 32, 3, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_32_32_32*/ 13>;
+def : Gfx9BufferFormat< /*FORMAT_32_32_32_SINT*/ 0x5D, 32, 3, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_32_32_32*/ 13>;
+def : Gfx9BufferFormat< /*FORMAT_32_32_32_FLOAT*/ 0x7D, 32, 3, /*NUM_FORMAT_FLOAT*/ 7, /*DATA_FORMAT_32_32_32*/ 13>;
+def : Gfx9BufferFormat< /*FORMAT_32_32_32_32_UINT*/ 0x4E, 32, 4, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_32_32_32_32*/ 14>;
+def : Gfx9BufferFormat< /*FORMAT_32_32_32_32_SINT*/ 0x5E, 32, 4, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_32_32_32_32*/ 14>;
+def : Gfx9BufferFormat< /*FORMAT_32_32_32_32_FLOAT*/ 0x7E, 32, 4, /*NUM_FORMAT_FLOAT*/ 7, /*DATA_FORMAT_32_32_32_32*/ 14>;
+
+// Buffer formats with equal component sizes (GFX10 and later)
+def : Gfx10PlusBufferFormat< /*FORMAT_8_UNORM*/ 0x01, 8, 1, /*NUM_FORMAT_UNORM*/ 0, /*DATA_FORMAT_8*/ 1>;
+def : Gfx10PlusBufferFormat< /*FORMAT_8_SNORM*/ 0x02, 8, 1, /*NUM_FORMAT_SNORM*/ 1, /*DATA_FORMAT_8*/ 1>;
+def : Gfx10PlusBufferFormat< /*FORMAT_8_USCALED*/ 0x03, 8, 1, /*NUM_FORMAT_USCALED*/ 2, /*DATA_FORMAT_8*/ 1>;
+def : Gfx10PlusBufferFormat< /*FORMAT_8_SSCALED*/ 0x04, 8, 1, /*NUM_FORMAT_SSCALED*/ 3, /*DATA_FORMAT_8*/ 1>;
+def : Gfx10PlusBufferFormat< /*FORMAT_8_UINT*/ 0x05, 8, 1, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_8*/ 1>;
+def : Gfx10PlusBufferFormat< /*FORMAT_8_SINT*/ 0x06, 8, 1, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_8*/ 1>;
+def : Gfx10PlusBufferFormat< /*FORMAT_16_UNORM*/ 0x07, 16, 1, /*NUM_FORMAT_UNORM*/ 0, /*DATA_FORMAT_16*/ 2>;
+def : Gfx10PlusBufferFormat< /*FORMAT_16_SNORM*/ 0x08, 16, 1, /*NUM_FORMAT_SNORM*/ 1, /*DATA_FORMAT_16*/ 2>;
+def : Gfx10PlusBufferFormat< /*FORMAT_16_USCALED*/ 0x09, 16, 1, /*NUM_FORMAT_USCALED*/ 2, /*DATA_FORMAT_16*/ 2>;
+def : Gfx10PlusBufferFormat< /*FORMAT_16_SSCALED*/ 0x0A, 16, 1, /*NUM_FORMAT_SSCALED*/ 3, /*DATA_FORMAT_16*/ 2>;
+def : Gfx10PlusBufferFormat< /*FORMAT_16_UINT*/ 0x0B, 16, 1, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_16*/ 2>;
+def : Gfx10PlusBufferFormat< /*FORMAT_16_SINT*/ 0x0C, 16, 1, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_16*/ 2>;
+def : Gfx10PlusBufferFormat< /*FORMAT_16_FLOAT*/ 0x0D, 16, 1, /*NUM_FORMAT_FLOAT*/ 7, /*DATA_FORMAT_16*/ 2>;
+def : Gfx10PlusBufferFormat< /*FORMAT_8_8_UNORM*/ 0x0E, 8, 2, /*NUM_FORMAT_UNORM*/ 0, /*DATA_FORMAT_8_8*/ 3>;
+def : Gfx10PlusBufferFormat< /*FORMAT_8_8_SNORM*/ 0x0F, 8, 2, /*NUM_FORMAT_SNORM*/ 1, /*DATA_FORMAT_8_8*/ 3>;
+def : Gfx10PlusBufferFormat< /*FORMAT_8_8_USCALED*/ 0x10, 8, 2, /*NUM_FORMAT_USCALED*/ 2, /*DATA_FORMAT_8_8*/ 3>;
+def : Gfx10PlusBufferFormat< /*FORMAT_8_8_SSCALED*/ 0x11, 8, 2, /*NUM_FORMAT_SSCALED*/ 3, /*DATA_FORMAT_8_8*/ 3>;
+def : Gfx10PlusBufferFormat< /*FORMAT_8_8_UINT*/ 0x12, 8, 2, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_8_8*/ 3>;
+def : Gfx10PlusBufferFormat< /*FORMAT_8_8_SINT*/ 0x13, 8, 2, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_8_8*/ 3>;
+def : Gfx10PlusBufferFormat< /*FORMAT_32_UINT*/ 0x14, 32, 1, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_32*/ 4>;
+def : Gfx10PlusBufferFormat< /*FORMAT_32_SINT*/ 0x15, 32, 1, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_32*/ 4>;
+def : Gfx10PlusBufferFormat< /*FORMAT_32_FLOAT*/ 0x16, 32, 1, /*NUM_FORMAT_FLOAT*/ 7, /*DATA_FORMAT_32*/ 4>;
+def : Gfx10PlusBufferFormat< /*FORMAT_16_16_UNORM*/ 0x17, 16, 2, /*NUM_FORMAT_UNORM*/ 0, /*DATA_FORMAT_16_16*/ 5>;
+def : Gfx10PlusBufferFormat< /*FORMAT_16_16_SNORM*/ 0x18, 16, 2, /*NUM_FORMAT_SNORM*/ 1, /*DATA_FORMAT_16_16*/ 5>;
+def : Gfx10PlusBufferFormat< /*FORMAT_16_16_USCALED*/ 0x19, 16, 2, /*NUM_FORMAT_USCALED*/ 2, /*DATA_FORMAT_16_16*/ 5>;
+def : Gfx10PlusBufferFormat< /*FORMAT_16_16_SSCALED*/ 0x1A, 16, 2, /*NUM_FORMAT_SSCALED*/ 3, /*DATA_FORMAT_16_16*/ 5>;
+def : Gfx10PlusBufferFormat< /*FORMAT_16_16_UINT*/ 0x1B, 16, 2, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_16_16*/ 5>;
+def : Gfx10PlusBufferFormat< /*FORMAT_16_16_SINT*/ 0x1C, 16, 2, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_16_16*/ 5>;
+def : Gfx10PlusBufferFormat< /*FORMAT_16_16_FLOAT*/ 0x1D, 16, 2, /*NUM_FORMAT_FLOAT*/ 7, /*DATA_FORMAT_16_16*/ 5>;
+def : Gfx10PlusBufferFormat< /*FORMAT_8_8_8_8_UNORM*/ 0x38, 8, 4, /*NUM_FORMAT_UNORM*/ 0, /*DATA_FORMAT_8_8_8_8*/ 10>;
+def : Gfx10PlusBufferFormat< /*FORMAT_8_8_8_8_SNORM*/ 0x39, 8, 4, /*NUM_FORMAT_SNORM*/ 1, /*DATA_FORMAT_8_8_8_8*/ 10>;
+def : Gfx10PlusBufferFormat< /*FORMAT_8_8_8_8_USCALED*/ 0x3A, 8, 4, /*NUM_FORMAT_USCALED*/ 2, /*DATA_FORMAT_8_8_8_8*/ 10>;
+def : Gfx10PlusBufferFormat< /*FORMAT_8_8_8_8_SSCALED*/ 0x3B, 8, 4, /*NUM_FORMAT_SSCALED*/ 3, /*DATA_FORMAT_8_8_8_8*/ 10>;
+def : Gfx10PlusBufferFormat< /*FORMAT_8_8_8_8_UINT*/ 0x3C, 8, 4, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_8_8_8_8*/ 10>;
+def : Gfx10PlusBufferFormat< /*FORMAT_8_8_8_8_SINT*/ 0x3D, 8, 4, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_8_8_8_8*/ 10>;
+def : Gfx10PlusBufferFormat< /*FORMAT_32_32_UINT*/ 0x3E, 32, 2, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_32_32*/ 11>;
+def : Gfx10PlusBufferFormat< /*FORMAT_32_32_SINT*/ 0x3F, 32, 2, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_32_32*/ 11>;
+def : Gfx10PlusBufferFormat< /*FORMAT_32_32_FLOAT*/ 0x40, 32, 2, /*NUM_FORMAT_FLOAT*/ 7, /*DATA_FORMAT_32_32*/ 11>;
+def : Gfx10PlusBufferFormat< /*FORMAT_16_16_16_16_UNORM*/ 0x41, 16, 4, /*NUM_FORMAT_UNORM*/ 0, /*DATA_FORMAT_16_16_16_16*/ 12>;
+def : Gfx10PlusBufferFormat< /*FORMAT_16_16_16_16_SNORM*/ 0x42, 16, 4, /*NUM_FORMAT_SNORM*/ 1, /*DATA_FORMAT_16_16_16_16*/ 12>;
+def : Gfx10PlusBufferFormat< /*FORMAT_16_16_16_16_USCALED*/ 0x43, 16, 4, /*NUM_FORMAT_USCALED*/ 2, /*DATA_FORMAT_16_16_16_16*/ 12>;
+def : Gfx10PlusBufferFormat< /*FORMAT_16_16_16_16_SSCALED*/ 0x44, 16, 4, /*NUM_FORMAT_SSCALED*/ 3, /*DATA_FORMAT_16_16_16_16*/ 12>;
+def : Gfx10PlusBufferFormat< /*FORMAT_16_16_16_16_UINT*/ 0x45, 16, 4, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_16_16_16_16*/ 12>;
+def : Gfx10PlusBufferFormat< /*FORMAT_16_16_16_16_SINT*/ 0x46, 16, 4, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_16_16_16_16*/ 12>;
+def : Gfx10PlusBufferFormat< /*FORMAT_16_16_16_16_FLOAT*/ 0x47, 16, 4, /*NUM_FORMAT_FLOAT*/ 7, /*DATA_FORMAT_16_16_16_16*/ 12>;
+def : Gfx10PlusBufferFormat< /*FORMAT_32_32_32_UINT*/ 0x48, 32, 3, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_32_32_32*/ 13>;
+def : Gfx10PlusBufferFormat< /*FORMAT_32_32_32_SINT*/ 0x49, 32, 3, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_32_32_32*/ 13>;
+def : Gfx10PlusBufferFormat< /*FORMAT_32_32_32_FLOAT*/ 0x4A, 32, 3, /*NUM_FORMAT_FLOAT*/ 7, /*DATA_FORMAT_32_32_32*/ 13>;
+def : Gfx10PlusBufferFormat< /*FORMAT_32_32_32_32_UINT*/ 0x4B, 32, 4, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_32_32_32_32*/ 14>;
+def : Gfx10PlusBufferFormat< /*FORMAT_32_32_32_32_SINT*/ 0x4C, 32, 4, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_32_32_32_32*/ 14>;
+def : Gfx10PlusBufferFormat< /*FORMAT_32_32_32_32_FLOAT*/ 0x4D, 32, 4, /*NUM_FORMAT_FLOAT*/ 7, /*DATA_FORMAT_32_32_32_32*/ 14>;
+
class SourceOfDivergence<Intrinsic intr> {
Intrinsic Intr = intr;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 3bb6dd4571c0..445e91092499 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -45,6 +45,11 @@ static cl::opt<bool> DisablePowerSched(
cl::desc("Disable scheduling to minimize mAI power bursts"),
cl::init(false));
+static cl::opt<bool> EnableVGPRIndexMode(
+ "amdgpu-vgpr-index-mode",
+ cl::desc("Use GPR indexing mode instead of movrel for vector indexing"),
+ cl::init(false));
+
GCNSubtarget::~GCNSubtarget() = default;
R600Subtarget &
@@ -343,11 +348,6 @@ AMDGPUSubtarget::getOccupancyWithLocalMemSize(const MachineFunction &MF) const {
std::pair<unsigned, unsigned>
AMDGPUSubtarget::getDefaultFlatWorkGroupSize(CallingConv::ID CC) const {
switch (CC) {
- case CallingConv::AMDGPU_CS:
- case CallingConv::AMDGPU_KERNEL:
- case CallingConv::SPIR_KERNEL:
- return std::make_pair(getWavefrontSize() * 2,
- std::max(getWavefrontSize() * 4, 256u));
case CallingConv::AMDGPU_VS:
case CallingConv::AMDGPU_LS:
case CallingConv::AMDGPU_HS:
@@ -356,13 +356,12 @@ AMDGPUSubtarget::getDefaultFlatWorkGroupSize(CallingConv::ID CC) const {
case CallingConv::AMDGPU_PS:
return std::make_pair(1, getWavefrontSize());
default:
- return std::make_pair(1, 16 * getWavefrontSize());
+ return std::make_pair(1u, getMaxFlatWorkGroupSize());
}
}
std::pair<unsigned, unsigned> AMDGPUSubtarget::getFlatWorkGroupSizes(
const Function &F) const {
- // FIXME: 1024 if function.
// Default minimum/maximum flat work group sizes.
std::pair<unsigned, unsigned> Default =
getDefaultFlatWorkGroupSize(F.getCallingConv());
@@ -567,6 +566,10 @@ bool GCNSubtarget::hasMadF16() const {
return InstrInfo.pseudoToMCOpcode(AMDGPU::V_MAD_F16) != -1;
}
+bool GCNSubtarget::useVGPRIndexMode() const {
+ return !hasMovrel() || (EnableVGPRIndexMode && hasVGPRIndexMode());
+}
+
unsigned GCNSubtarget::getOccupancyWithNumSGPRs(unsigned SGPRs) const {
if (getGeneration() >= AMDGPUSubtarget::GFX10)
return getMaxWavesPerEU();
@@ -713,6 +716,43 @@ unsigned GCNSubtarget::getMaxNumVGPRs(const MachineFunction &MF) const {
return MaxNumVGPRs;
}
+void GCNSubtarget::adjustSchedDependency(SUnit *Src, SUnit *Dst,
+ SDep &Dep) const {
+ if (Dep.getKind() != SDep::Kind::Data || !Dep.getReg() ||
+ !Src->isInstr() || !Dst->isInstr())
+ return;
+
+ MachineInstr *SrcI = Src->getInstr();
+ MachineInstr *DstI = Dst->getInstr();
+
+ if (SrcI->isBundle()) {
+ const SIRegisterInfo *TRI = getRegisterInfo();
+ auto Reg = Dep.getReg();
+ MachineBasicBlock::const_instr_iterator I(SrcI->getIterator());
+ MachineBasicBlock::const_instr_iterator E(SrcI->getParent()->instr_end());
+ unsigned Lat = 0;
+ for (++I; I != E && I->isBundledWithPred(); ++I) {
+ if (I->modifiesRegister(Reg, TRI))
+ Lat = InstrInfo.getInstrLatency(getInstrItineraryData(), *I);
+ else if (Lat)
+ --Lat;
+ }
+ Dep.setLatency(Lat);
+ } else if (DstI->isBundle()) {
+ const SIRegisterInfo *TRI = getRegisterInfo();
+ auto Reg = Dep.getReg();
+ MachineBasicBlock::const_instr_iterator I(DstI->getIterator());
+ MachineBasicBlock::const_instr_iterator E(DstI->getParent()->instr_end());
+ unsigned Lat = InstrInfo.getInstrLatency(getInstrItineraryData(), *SrcI);
+ for (++I; I != E && I->isBundledWithPred() && Lat; ++I) {
+ if (I->readsRegister(Reg, TRI))
+ break;
+ --Lat;
+ }
+ Dep.setLatency(Lat);
+ }
+}
+
namespace {
struct MemOpClusterMutation : ScheduleDAGMutation {
const SIInstrInfo *TII;
@@ -773,6 +813,11 @@ struct FillMFMAShadowMutation : ScheduleDAGMutation {
return MI && TII->isSALU(*MI) && !MI->isTerminator();
}
+ bool isVALU(const SUnit *SU) const {
+ const MachineInstr *MI = SU->getInstr();
+ return MI && TII->isVALU(*MI);
+ }
+
bool canAddEdge(const SUnit *Succ, const SUnit *Pred) const {
if (Pred->NodeNum < Succ->NodeNum)
return true;
@@ -821,7 +866,7 @@ struct FillMFMAShadowMutation : ScheduleDAGMutation {
for (SDep &SI : From->Succs) {
SUnit *SUv = SI.getSUnit();
- if (SUv != From && TII->isVALU(*SUv->getInstr()) && canAddEdge(SUv, SU))
+ if (SUv != From && isVALU(SUv) && canAddEdge(SUv, SU))
SUv->addPred(SDep(SU, SDep::Artificial), false);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
index 936feb00c62b..19a240800ba1 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -148,7 +148,12 @@ public:
return HasMadMixInsts;
}
- bool hasFP32Denormals() const {
+ bool hasFP32Denormals(const Function &F) const {
+ // FIXME: This should not be a property of the subtarget. This should be a
+ // property with a default set by the calling convention which can be
+ // overridden by attributes. For now, use the subtarget feature as a
+ // placeholder attribute. The function arguments only purpose is to
+ // discourage use without a function context until this is removed.
return FP32Denormals;
}
@@ -612,11 +617,17 @@ public:
unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
const Function &) const;
- bool hasFP16Denormals() const {
+ /// Alias for hasFP64FP16Denormals
+ bool hasFP16Denormals(const Function &F) const {
return FP64FP16Denormals;
}
- bool hasFP64Denormals() const {
+ /// Alias for hasFP64FP16Denormals
+ bool hasFP64Denormals(const Function &F) const {
+ return FP64FP16Denormals;
+ }
+
+ bool hasFP64FP16Denormals(const Function &F) const {
return FP64FP16Denormals;
}
@@ -930,9 +941,7 @@ public:
return HasVGPRIndexMode;
}
- bool useVGPRIndexMode(bool UserEnable) const {
- return !hasMovrel() || (UserEnable && hasVGPRIndexMode());
- }
+ bool useVGPRIndexMode() const;
bool hasScalarCompareEq64() const {
return getGeneration() >= VOLCANIC_ISLANDS;
@@ -1203,6 +1212,8 @@ public:
unsigned getMinWavesPerEU() const override {
return AMDGPU::IsaInfo::getMinWavesPerEU(this);
}
+
+ void adjustSchedDependency(SUnit *Src, SUnit *Dst, SDep &Dep) const override;
};
class R600Subtarget final : public R600GenSubtargetInfo,
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index e8cf77161a14..eb30d659bf0b 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -37,6 +37,7 @@
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
@@ -182,7 +183,7 @@ static cl::opt<bool> EnableScalarIRPasses(
cl::init(true),
cl::Hidden);
-extern "C" void LLVMInitializeAMDGPUTarget() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
// Register the target
RegisterTargetMachine<R600TargetMachine> X(getTheAMDGPUTarget());
RegisterTargetMachine<GCNTargetMachine> Y(getTheGCNTarget());
@@ -227,6 +228,7 @@ extern "C" void LLVMInitializeAMDGPUTarget() {
initializeSIModeRegisterPass(*PR);
initializeSIWholeQuadModePass(*PR);
initializeSILowerControlFlowPass(*PR);
+ initializeSIRemoveShortExecBranchesPass(*PR);
initializeSIInsertSkipsPass(*PR);
initializeSIMemoryLegalizerPass(*PR);
initializeSIOptimizeExecMaskingPass(*PR);
@@ -947,7 +949,7 @@ void GCNPassConfig::addOptimizedRegAlloc() {
insertPass(&RegisterCoalescerID, &SIPreAllocateWWMRegsID, false);
if (EnableDCEInRA)
- insertPass(&RenameIndependentSubregsID, &DeadMachineInstructionElimID);
+ insertPass(&DetectDeadLanesID, &DeadMachineInstructionElimID);
TargetPassConfig::addOptimizedRegAlloc();
}
@@ -992,6 +994,7 @@ void GCNPassConfig::addPreEmitPass() {
// be better for it to emit S_NOP <N> when possible.
addPass(&PostRAHazardRecognizerID);
+ addPass(&SIRemoveShortExecBranchesID);
addPass(&SIInsertSkipsPassID);
addPass(&BranchRelaxationPassID);
}
@@ -1151,6 +1154,8 @@ bool GCNTargetMachine::parseMachineFunctionInfo(
MFI->Mode.IEEE = YamlMFI.Mode.IEEE;
MFI->Mode.DX10Clamp = YamlMFI.Mode.DX10Clamp;
+ MFI->Mode.FP32Denormals = YamlMFI.Mode.FP32Denormals;
+ MFI->Mode.FP64FP16Denormals = YamlMFI.Mode.FP64FP16Denormals;
return false;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index 616196ad5ba3..c4eeb81c5133 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -57,7 +57,7 @@ using namespace llvm;
static cl::opt<unsigned> UnrollThresholdPrivate(
"amdgpu-unroll-threshold-private",
cl::desc("Unroll threshold for AMDGPU if private memory used in a loop"),
- cl::init(2000), cl::Hidden);
+ cl::init(2700), cl::Hidden);
static cl::opt<unsigned> UnrollThresholdLocal(
"amdgpu-unroll-threshold-local",
@@ -90,7 +90,8 @@ static bool dependsOnLocalPhi(const Loop *L, const Value *Cond,
void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP) {
- UP.Threshold = 300; // Twice the default.
+ const Function &F = *L->getHeader()->getParent();
+ UP.Threshold = AMDGPU::getIntegerAttribute(F, "amdgpu-unroll-threshold", 300);
UP.MaxCount = std::numeric_limits<unsigned>::max();
UP.Partial = true;
@@ -337,10 +338,13 @@ bool GCNTTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
}
}
-int GCNTTIImpl::getArithmeticInstrCost(
- unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info,
- TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
- TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args ) {
+int GCNTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
+ TTI::OperandValueKind Opd1Info,
+ TTI::OperandValueKind Opd2Info,
+ TTI::OperandValueProperties Opd1PropInfo,
+ TTI::OperandValueProperties Opd2PropInfo,
+ ArrayRef<const Value *> Args,
+ const Instruction *CxtI) {
EVT OrigTy = TLI->getValueType(DL, Ty);
if (!OrigTy.isSimple()) {
return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
@@ -365,6 +369,9 @@ int GCNTTIImpl::getArithmeticInstrCost(
if (SLT == MVT::i64)
return get64BitInstrCost() * LT.first * NElts;
+ if (ST->has16BitInsts() && SLT == MVT::i16)
+ NElts = (NElts + 1) / 2;
+
// i32
return getFullRateInstrCost() * LT.first * NElts;
case ISD::ADD:
@@ -372,11 +379,14 @@ int GCNTTIImpl::getArithmeticInstrCost(
case ISD::AND:
case ISD::OR:
case ISD::XOR:
- if (SLT == MVT::i64){
+ if (SLT == MVT::i64) {
// and, or and xor are typically split into 2 VALU instructions.
return 2 * getFullRateInstrCost() * LT.first * NElts;
}
+ if (ST->has16BitInsts() && SLT == MVT::i16)
+ NElts = (NElts + 1) / 2;
+
return LT.first * NElts * getFullRateInstrCost();
case ISD::MUL: {
const int QuarterRateCost = getQuarterRateInstrCost();
@@ -385,6 +395,9 @@ int GCNTTIImpl::getArithmeticInstrCost(
return (4 * QuarterRateCost + (2 * 2) * FullRateCost) * LT.first * NElts;
}
+ if (ST->has16BitInsts() && SLT == MVT::i16)
+ NElts = (NElts + 1) / 2;
+
// i32
return QuarterRateCost * NElts * LT.first;
}
@@ -394,6 +407,9 @@ int GCNTTIImpl::getArithmeticInstrCost(
if (SLT == MVT::f64)
return LT.first * NElts * get64BitInstrCost();
+ if (ST->has16BitInsts() && SLT == MVT::f16)
+ NElts = (NElts + 1) / 2;
+
if (SLT == MVT::f32 || SLT == MVT::f16)
return LT.first * NElts * getFullRateInstrCost();
break;
@@ -412,7 +428,7 @@ int GCNTTIImpl::getArithmeticInstrCost(
if (!Args.empty() && match(Args[0], PatternMatch::m_FPOne())) {
// TODO: This is more complicated, unsafe flags etc.
- if ((SLT == MVT::f32 && !ST->hasFP32Denormals()) ||
+ if ((SLT == MVT::f32 && !HasFP32Denormals) ||
(SLT == MVT::f16 && ST->has16BitInsts())) {
return LT.first * getQuarterRateInstrCost() * NElts;
}
@@ -431,7 +447,7 @@ int GCNTTIImpl::getArithmeticInstrCost(
if (SLT == MVT::f32 || SLT == MVT::f16) {
int Cost = 7 * getFullRateInstrCost() + 1 * getQuarterRateInstrCost();
- if (!ST->hasFP32Denormals()) {
+ if (!HasFP32Denormals) {
// FP mode switches.
Cost += 2 * getFullRateInstrCost();
}
@@ -447,6 +463,49 @@ int GCNTTIImpl::getArithmeticInstrCost(
Opd1PropInfo, Opd2PropInfo);
}
+template <typename T>
+int GCNTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
+ ArrayRef<T *> Args,
+ FastMathFlags FMF, unsigned VF) {
+ if (ID != Intrinsic::fma)
+ return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF);
+
+ EVT OrigTy = TLI->getValueType(DL, RetTy);
+ if (!OrigTy.isSimple()) {
+ return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF);
+ }
+
+ // Legalize the type.
+ std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, RetTy);
+
+ unsigned NElts = LT.second.isVector() ?
+ LT.second.getVectorNumElements() : 1;
+
+ MVT::SimpleValueType SLT = LT.second.getScalarType().SimpleTy;
+
+ if (SLT == MVT::f64)
+ return LT.first * NElts * get64BitInstrCost();
+
+ if (ST->has16BitInsts() && SLT == MVT::f16)
+ NElts = (NElts + 1) / 2;
+
+ return LT.first * NElts * (ST->hasFastFMAF32() ? getHalfRateInstrCost()
+ : getQuarterRateInstrCost());
+}
+
+int GCNTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
+ ArrayRef<Value*> Args, FastMathFlags FMF,
+ unsigned VF) {
+ return getIntrinsicInstrCost<Value>(ID, RetTy, Args, FMF, VF);
+}
+
+int GCNTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
+ ArrayRef<Type *> Tys, FastMathFlags FMF,
+ unsigned ScalarizationCostPassed) {
+ return getIntrinsicInstrCost<Type>(ID, RetTy, Tys, FMF,
+ ScalarizationCostPassed);
+}
+
unsigned GCNTTIImpl::getCFInstrCost(unsigned Opcode) {
// XXX - For some reason this isn't called for switch.
switch (Opcode) {
@@ -671,10 +730,13 @@ unsigned GCNTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
bool GCNTTIImpl::areInlineCompatible(const Function *Caller,
const Function *Callee) const {
const TargetMachine &TM = getTLI()->getTargetMachine();
- const FeatureBitset &CallerBits =
- TM.getSubtargetImpl(*Caller)->getFeatureBits();
- const FeatureBitset &CalleeBits =
- TM.getSubtargetImpl(*Callee)->getFeatureBits();
+ const GCNSubtarget *CallerST
+ = static_cast<const GCNSubtarget *>(TM.getSubtargetImpl(*Caller));
+ const GCNSubtarget *CalleeST
+ = static_cast<const GCNSubtarget *>(TM.getSubtargetImpl(*Callee));
+
+ const FeatureBitset &CallerBits = CallerST->getFeatureBits();
+ const FeatureBitset &CalleeBits = CalleeST->getFeatureBits();
FeatureBitset RealCallerBits = CallerBits & ~InlineFeatureIgnoreList;
FeatureBitset RealCalleeBits = CalleeBits & ~InlineFeatureIgnoreList;
@@ -683,8 +745,8 @@ bool GCNTTIImpl::areInlineCompatible(const Function *Caller,
// FIXME: dx10_clamp can just take the caller setting, but there seems to be
// no way to support merge for backend defined attributes.
- AMDGPU::SIModeRegisterDefaults CallerMode(*Caller);
- AMDGPU::SIModeRegisterDefaults CalleeMode(*Callee);
+ AMDGPU::SIModeRegisterDefaults CallerMode(*Caller, *CallerST);
+ AMDGPU::SIModeRegisterDefaults CalleeMode(*Callee, *CalleeST);
return CallerMode.isInlineCompatible(CalleeMode);
}
@@ -695,34 +757,114 @@ void GCNTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
unsigned GCNTTIImpl::getUserCost(const User *U,
ArrayRef<const Value *> Operands) {
- // Estimate extractelement elimination
- if (const ExtractElementInst *EE = dyn_cast<ExtractElementInst>(U)) {
- ConstantInt *CI = dyn_cast<ConstantInt>(EE->getOperand(1));
+ const Instruction *I = dyn_cast<Instruction>(U);
+ if (!I)
+ return BaseT::getUserCost(U, Operands);
+
+ // Estimate different operations to be optimized out
+ switch (I->getOpcode()) {
+ case Instruction::ExtractElement: {
+ ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1));
unsigned Idx = -1;
if (CI)
Idx = CI->getZExtValue();
- return getVectorInstrCost(EE->getOpcode(), EE->getOperand(0)->getType(),
- Idx);
+ return getVectorInstrCost(I->getOpcode(), I->getOperand(0)->getType(), Idx);
}
-
- // Estimate insertelement elimination
- if (const InsertElementInst *IE = dyn_cast<InsertElementInst>(U)) {
- ConstantInt *CI = dyn_cast<ConstantInt>(IE->getOperand(2));
+ case Instruction::InsertElement: {
+ ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(2));
unsigned Idx = -1;
if (CI)
Idx = CI->getZExtValue();
- return getVectorInstrCost(IE->getOpcode(), IE->getType(), Idx);
+ return getVectorInstrCost(I->getOpcode(), I->getType(), Idx);
}
+ case Instruction::Call: {
+ if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) {
+ SmallVector<Value *, 4> Args(II->arg_operands());
+ FastMathFlags FMF;
+ if (auto *FPMO = dyn_cast<FPMathOperator>(II))
+ FMF = FPMO->getFastMathFlags();
+ return getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(), Args,
+ FMF);
+ } else {
+ return BaseT::getUserCost(U, Operands);
+ }
+ }
+ case Instruction::ShuffleVector: {
+ const ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I);
+ Type *Ty = Shuffle->getType();
+ Type *SrcTy = Shuffle->getOperand(0)->getType();
+
+ // TODO: Identify and add costs for insert subvector, etc.
+ int SubIndex;
+ if (Shuffle->isExtractSubvectorMask(SubIndex))
+ return getShuffleCost(TTI::SK_ExtractSubvector, SrcTy, SubIndex, Ty);
+
+ if (Shuffle->changesLength())
+ return BaseT::getUserCost(U, Operands);
+
+ if (Shuffle->isIdentity())
+ return 0;
- // Estimate different intrinsics, e.g. llvm.fabs
- if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) {
- SmallVector<Value *, 4> Args(II->arg_operands());
- FastMathFlags FMF;
- if (auto *FPMO = dyn_cast<FPMathOperator>(II))
- FMF = FPMO->getFastMathFlags();
- return getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(), Args,
- FMF);
+ if (Shuffle->isReverse())
+ return getShuffleCost(TTI::SK_Reverse, Ty, 0, nullptr);
+
+ if (Shuffle->isSelect())
+ return getShuffleCost(TTI::SK_Select, Ty, 0, nullptr);
+
+ if (Shuffle->isTranspose())
+ return getShuffleCost(TTI::SK_Transpose, Ty, 0, nullptr);
+
+ if (Shuffle->isZeroEltSplat())
+ return getShuffleCost(TTI::SK_Broadcast, Ty, 0, nullptr);
+
+ if (Shuffle->isSingleSource())
+ return getShuffleCost(TTI::SK_PermuteSingleSrc, Ty, 0, nullptr);
+
+ return getShuffleCost(TTI::SK_PermuteTwoSrc, Ty, 0, nullptr);
+ }
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::FPExt:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ case Instruction::SIToFP:
+ case Instruction::UIToFP:
+ case Instruction::Trunc:
+ case Instruction::FPTrunc:
+ case Instruction::BitCast:
+ case Instruction::AddrSpaceCast: {
+ return getCastInstrCost(I->getOpcode(), I->getType(),
+ I->getOperand(0)->getType(), I);
}
+ case Instruction::Add:
+ case Instruction::FAdd:
+ case Instruction::Sub:
+ case Instruction::FSub:
+ case Instruction::Mul:
+ case Instruction::FMul:
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::FDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::FRem:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::FNeg: {
+ return getArithmeticInstrCost(I->getOpcode(), I->getType(),
+ TTI::OK_AnyValue, TTI::OK_AnyValue,
+ TTI::OP_None, TTI::OP_None, Operands, I);
+ }
+ default:
+ break;
+ }
+
return BaseT::getUserCost(U, Operands);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
index 67f7f9074f10..0b48f9f602b7 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
@@ -46,7 +46,7 @@ class AMDGPUTTIImpl final : public BasicTTIImplBase<AMDGPUTTIImpl> {
Triple TargetTriple;
- const TargetSubtargetInfo *ST;
+ const GCNSubtarget *ST;
const TargetLoweringBase *TLI;
const TargetSubtargetInfo *getST() const { return ST; }
@@ -73,6 +73,7 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
const AMDGPUTargetLowering *TLI;
AMDGPUTTIImpl CommonTTI;
bool IsGraphicsShader;
+ bool HasFP32Denormals;
const FeatureBitset InlineFeatureIgnoreList = {
// Codegen control options which don't matter.
@@ -131,7 +132,8 @@ public:
ST(static_cast<const GCNSubtarget*>(TM->getSubtargetImpl(F))),
TLI(ST->getTargetLowering()),
CommonTTI(TM, F),
- IsGraphicsShader(AMDGPU::isShader(F.getCallingConv())) {}
+ IsGraphicsShader(AMDGPU::isShader(F.getCallingConv())),
+ HasFP32Denormals(ST->hasFP32Denormals(F)) { }
bool hasBranchDivergence() { return true; }
@@ -170,12 +172,13 @@ public:
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const;
int getArithmeticInstrCost(
- unsigned Opcode, Type *Ty,
- TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
- TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
- TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
- TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
- ArrayRef<const Value *> Args = ArrayRef<const Value *>());
+ unsigned Opcode, Type *Ty,
+ TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
+ TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
+ TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
+ TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
+ ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
+ const Instruction *CxtI = nullptr);
unsigned getCFInstrCost(unsigned Opcode);
@@ -204,13 +207,23 @@ public:
bool areInlineCompatible(const Function *Caller,
const Function *Callee) const;
- unsigned getInliningThresholdMultiplier() { return 9; }
+ unsigned getInliningThresholdMultiplier() { return 11; }
int getInlinerVectorBonusPercent() { return 0; }
int getArithmeticReductionCost(unsigned Opcode,
Type *Ty,
bool IsPairwise);
+ template <typename T>
+ int getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
+ ArrayRef<T *> Args, FastMathFlags FMF,
+ unsigned VF);
+ int getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
+ ArrayRef<Type *> Tys, FastMathFlags FMF,
+ unsigned ScalarizationCostPassed = UINT_MAX);
+ int getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
+ ArrayRef<Value *> Args, FastMathFlags FMF,
+ unsigned VF = 1);
int getMinMaxReductionCost(Type *Ty, Type *CondTy,
bool IsPairwiseForm,
bool IsUnsigned);
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
index 396e0ed2e76c..191f603a66d6 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
@@ -27,7 +27,6 @@
#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
#include "llvm/Analysis/PostDominators.h"
#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
@@ -36,10 +35,12 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Type.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils.h"
+#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUUnifyMetadata.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUUnifyMetadata.cpp
index d4401a22a1ad..281ae6d646e9 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUUnifyMetadata.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUUnifyMetadata.cpp
@@ -61,7 +61,7 @@ namespace {
return false;
MDNode *MaxMD = nullptr;
auto MaxVer = 0U;
- for (const auto &VersionMD : NamedMD->operands()) {
+ for (auto VersionMD : NamedMD->operands()) {
assert(VersionMD->getNumOperands() == 2);
auto CMajor = mdconst::extract<ConstantInt>(VersionMD->getOperand(0));
auto VersionMajor = CMajor->getZExtValue();
@@ -94,7 +94,7 @@ namespace {
return false;
SmallVector<Metadata *, 4> All;
- for (const auto &MD : NamedMD->operands())
+ for (auto MD : NamedMD->operands())
for (const auto &Op : MD->operands())
if (std::find(All.begin(), All.end(), Op.get()) == All.end())
All.push_back(Op.get());
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp
index 101ecfc0c87c..1f28688a7296 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp
@@ -8,9 +8,9 @@
#include "AMDGPU.h"
#include "AMDGPUSubtarget.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "R600InstrInfo.h"
#include "R600RegisterInfo.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/SCCIterator.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -30,6 +30,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 9dd511fab57c..f3aa1a582368 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -45,7 +45,7 @@
#include "llvm/Support/AMDHSAKernelDescriptor.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Compiler.h"
-#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Error.h"
#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/SMLoc.h"
@@ -1320,6 +1320,7 @@ private:
bool validateIntClampSupported(const MCInst &Inst);
bool validateMIMGAtomicDMask(const MCInst &Inst);
bool validateMIMGGatherDMask(const MCInst &Inst);
+ bool validateMovrels(const MCInst &Inst);
bool validateMIMGDataSize(const MCInst &Inst);
bool validateMIMGAddrSize(const MCInst &Inst);
bool validateMIMGD16(const MCInst &Inst);
@@ -2362,7 +2363,7 @@ AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
APFloat RealVal(APFloat::IEEEdouble());
auto roundMode = APFloat::rmNearestTiesToEven;
- if (RealVal.convertFromString(Num, roundMode) == APFloat::opInvalidOp) {
+ if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
return MatchOperand_ParseFail;
}
if (Negate)
@@ -3049,6 +3050,41 @@ bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
}
+static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
+{
+ switch (Opcode) {
+ case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
+ case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
+ case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
+ return true;
+ default:
+ return false;
+ }
+}
+
+// movrels* opcodes should only allow VGPRS as src0.
+// This is specified in .td description for vop1/vop3,
+// but sdwa is handled differently. See isSDWAOperand.
+bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst) {
+
+ const unsigned Opc = Inst.getOpcode();
+ const MCInstrDesc &Desc = MII.get(Opc);
+
+ if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
+ return true;
+
+ const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
+ assert(Src0Idx != -1);
+
+ const MCOperand &Src0 = Inst.getOperand(Src0Idx);
+ if (!Src0.isReg())
+ return false;
+
+ auto Reg = Src0.getReg();
+ const MCRegisterInfo *TRI = getContext().getRegisterInfo();
+ return !isSGPR(mc2PseudoReg(Reg), TRI);
+}
+
bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
const unsigned Opc = Inst.getOpcode();
@@ -3469,6 +3505,10 @@ bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
"invalid image_gather dmask: only one bit must be set");
return false;
}
+ if (!validateMovrels(Inst)) {
+ Error(IDLoc, "source operand must be a VGPR");
+ return false;
+ }
if (!validateFlatOffset(Inst, Operands)) {
return false;
}
@@ -6940,7 +6980,7 @@ AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
}
/// Force static initialization.
-extern "C" void LLVMInitializeAMDGPUAsmParser() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/BUFInstructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/BUFInstructions.td
index 1b12550aed88..691aff4ecbb8 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/BUFInstructions.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/BUFInstructions.td
@@ -1621,8 +1621,8 @@ multiclass MUBUFStore_Atomic_Pattern <MUBUF_Pseudo Instr_ADDR64, MUBUF_Pseudo In
>;
}
let SubtargetPredicate = isGFX6GFX7 in {
-defm : MUBUFStore_Atomic_Pattern <BUFFER_STORE_DWORD_ADDR64, BUFFER_STORE_DWORD_OFFSET, i32, store_atomic_global>;
-defm : MUBUFStore_Atomic_Pattern <BUFFER_STORE_DWORDX2_ADDR64, BUFFER_STORE_DWORDX2_OFFSET, i64, store_atomic_global>;
+defm : MUBUFStore_Atomic_Pattern <BUFFER_STORE_DWORD_ADDR64, BUFFER_STORE_DWORD_OFFSET, i32, atomic_store_global_32>;
+defm : MUBUFStore_Atomic_Pattern <BUFFER_STORE_DWORDX2_ADDR64, BUFFER_STORE_DWORDX2_OFFSET, i64, atomic_store_global_64>;
} // End Predicates = isGFX6GFX7
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/DSInstructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/DSInstructions.td
index 816ec14a0e98..fe7faca8b157 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/DSInstructions.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/DSInstructions.td
@@ -58,6 +58,8 @@ class DS_Real <DS_Pseudo ds> :
let isPseudo = 0;
let isCodeGenOnly = 0;
+ let DS = 1;
+ let UseNamedOperandTable = 1;
// copy relevant pseudo op flags
let SubtargetPredicate = ds.SubtargetPredicate;
@@ -617,7 +619,7 @@ def DS_ADD_SRC2_F32 : DS_1A<"ds_add_src2_f32">;
def : GCNPat <
(int_amdgcn_ds_swizzle i32:$src, timm:$offset16),
- (DS_SWIZZLE_B32 $src, (as_i16imm $offset16), (i1 0))
+ (DS_SWIZZLE_B32 VGPR_32:$src, (as_i16timm $offset16), (i1 0))
>;
class DSReadPat <DS_Pseudo inst, ValueType vt, PatFrag frag, int gds=0> : GCNPat <
@@ -731,8 +733,8 @@ defm : DSAtomicWritePat_mc <DS_WRITE_B32, i32, "atomic_store_local_32">;
defm : DSAtomicWritePat_mc <DS_WRITE_B64, i64, "atomic_store_local_64">;
let OtherPredicates = [D16PreservesUnusedBits] in {
-def : DSWritePat <DS_WRITE_B16_D16_HI, i32, store_local_hi16>;
-def : DSWritePat <DS_WRITE_B8_D16_HI, i32, truncstorei8_local_hi16>;
+def : DSWritePat <DS_WRITE_B16_D16_HI, i32, store_hi16_local>;
+def : DSWritePat <DS_WRITE_B8_D16_HI, i32, truncstorei8_hi16_local>;
}
@@ -1075,7 +1077,7 @@ defm DS_MAX_SRC2_F64 : DS_Real_gfx6_gfx7_gfx10<0x0d3>;
class DS_Real_vi <bits<8> op, DS_Pseudo ds> :
DS_Real <ds>,
SIMCInstr <ds.Mnemonic, SIEncodingFamily.VI> {
- let AssemblerPredicates = [isGFX8GFX9];
+ let AssemblerPredicate = isGFX8GFX9;
let DecoderNamespace = "GFX8";
// encoding
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index ec2e2c4e8b71..419513bdc248 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -73,7 +73,7 @@ addOperand(MCInst &Inst, const MCOperand& Opnd) {
Inst.addOperand(Opnd);
return Opnd.isValid() ?
MCDisassembler::Success :
- MCDisassembler::SoftFail;
+ MCDisassembler::Fail;
}
static int insertNamedMCOperand(MCInst &MI, const MCOperand &Op,
@@ -268,7 +268,6 @@ static bool isValidDPP8(const MCInst &MI) {
DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
ArrayRef<uint8_t> Bytes_,
uint64_t Address,
- raw_ostream &WS,
raw_ostream &CS) const {
CommentStream = &CS;
bool IsSDWA = false;
@@ -304,15 +303,6 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
Res = tryDecodeInst(DecoderTableSDWA1064, MI, QW, Address);
if (Res) { IsSDWA = true; break; }
- // Some GFX9 subtargets repurposed the v_mad_mix_f32, v_mad_mixlo_f16 and
- // v_mad_mixhi_f16 for FMA variants. Try to decode using this special
- // table first so we print the correct name.
-
- if (STI.getFeatureBits()[AMDGPU::FeatureFmaMixInsts]) {
- Res = tryDecodeInst(DecoderTableGFX9_DL64, MI, QW, Address);
- if (Res) break;
- }
-
if (STI.getFeatureBits()[AMDGPU::FeatureUnpackedD16VMem]) {
Res = tryDecodeInst(DecoderTableGFX80_UNPACKED64, MI, QW, Address);
if (Res)
@@ -1262,7 +1252,7 @@ static MCDisassembler *createAMDGPUDisassembler(const Target &T,
return new AMDGPUDisassembler(STI, Ctx, T.createMCInstrInfo());
}
-extern "C" void LLVMInitializeAMDGPUDisassembler() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUDisassembler() {
TargetRegistry::RegisterMCDisassembler(getTheGCNTarget(),
createAMDGPUDisassembler);
TargetRegistry::RegisterMCSymbolizer(getTheGCNTarget(),
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
index c5eaba615c2a..f975af409a09 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
@@ -53,7 +53,7 @@ public:
DecodeStatus getInstruction(MCInst &MI, uint64_t &Size,
ArrayRef<uint8_t> Bytes, uint64_t Address,
- raw_ostream &WS, raw_ostream &CS) const override;
+ raw_ostream &CS) const override;
const char* getRegClassName(unsigned RegClassID) const;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/FLATInstructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/FLATInstructions.td
index 80ee17eba141..2057cac346d4 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -364,32 +364,12 @@ multiclass FLAT_Global_Atomic_Pseudo<
string opName,
RegisterClass vdst_rc,
ValueType vt,
- SDPatternOperator atomic = null_frag,
+ SDPatternOperator atomic_rtn = null_frag,
+ SDPatternOperator atomic_no_rtn = null_frag,
ValueType data_vt = vt,
RegisterClass data_rc = vdst_rc> :
- FLAT_Global_Atomic_Pseudo_NO_RTN<opName, vdst_rc, vt, atomic, data_vt, data_rc>,
- FLAT_Global_Atomic_Pseudo_RTN<opName, vdst_rc, vt, atomic, data_vt, data_rc>;
-
-class flat_binary_atomic_op<SDNode atomic_op> : PatFrag<
- (ops node:$ptr, node:$value),
- (atomic_op node:$ptr, node:$value),
- [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS;}]
->;
-
-def atomic_cmp_swap_flat : flat_binary_atomic_op<AMDGPUatomic_cmp_swap>;
-def atomic_swap_flat : flat_binary_atomic_op<atomic_swap>;
-def atomic_add_flat : flat_binary_atomic_op<atomic_load_add>;
-def atomic_and_flat : flat_binary_atomic_op<atomic_load_and>;
-def atomic_max_flat : flat_binary_atomic_op<atomic_load_max>;
-def atomic_min_flat : flat_binary_atomic_op<atomic_load_min>;
-def atomic_or_flat : flat_binary_atomic_op<atomic_load_or>;
-def atomic_sub_flat : flat_binary_atomic_op<atomic_load_sub>;
-def atomic_umax_flat : flat_binary_atomic_op<atomic_load_umax>;
-def atomic_umin_flat : flat_binary_atomic_op<atomic_load_umin>;
-def atomic_xor_flat : flat_binary_atomic_op<atomic_load_xor>;
-def atomic_inc_flat : flat_binary_atomic_op<SIatomic_inc>;
-def atomic_dec_flat : flat_binary_atomic_op<SIatomic_dec>;
-
+ FLAT_Global_Atomic_Pseudo_NO_RTN<opName, vdst_rc, vt, atomic_no_rtn, data_vt, data_rc>,
+ FLAT_Global_Atomic_Pseudo_RTN<opName, vdst_rc, vt, atomic_rtn, data_vt, data_rc>;
//===----------------------------------------------------------------------===//
@@ -425,84 +405,84 @@ def FLAT_STORE_SHORT_D16_HI : FLAT_Store_Pseudo <"flat_store_short_d16_hi", VGPR
}
defm FLAT_ATOMIC_CMPSWAP : FLAT_Atomic_Pseudo <"flat_atomic_cmpswap",
- VGPR_32, i32, atomic_cmp_swap_flat,
+ VGPR_32, i32, AMDGPUatomic_cmp_swap_flat_32,
v2i32, VReg_64>;
defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_cmpswap_x2",
- VReg_64, i64, atomic_cmp_swap_flat,
+ VReg_64, i64, AMDGPUatomic_cmp_swap_flat_64,
v2i64, VReg_128>;
defm FLAT_ATOMIC_SWAP : FLAT_Atomic_Pseudo <"flat_atomic_swap",
- VGPR_32, i32, atomic_swap_flat>;
+ VGPR_32, i32, atomic_swap_flat_32>;
defm FLAT_ATOMIC_SWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_swap_x2",
- VReg_64, i64, atomic_swap_flat>;
+ VReg_64, i64, atomic_swap_flat_64>;
defm FLAT_ATOMIC_ADD : FLAT_Atomic_Pseudo <"flat_atomic_add",
- VGPR_32, i32, atomic_add_flat>;
+ VGPR_32, i32, atomic_load_add_flat_32>;
defm FLAT_ATOMIC_SUB : FLAT_Atomic_Pseudo <"flat_atomic_sub",
- VGPR_32, i32, atomic_sub_flat>;
+ VGPR_32, i32, atomic_load_sub_flat_32>;
defm FLAT_ATOMIC_SMIN : FLAT_Atomic_Pseudo <"flat_atomic_smin",
- VGPR_32, i32, atomic_min_flat>;
+ VGPR_32, i32, atomic_load_min_flat_32>;
defm FLAT_ATOMIC_UMIN : FLAT_Atomic_Pseudo <"flat_atomic_umin",
- VGPR_32, i32, atomic_umin_flat>;
+ VGPR_32, i32, atomic_load_umin_flat_32>;
defm FLAT_ATOMIC_SMAX : FLAT_Atomic_Pseudo <"flat_atomic_smax",
- VGPR_32, i32, atomic_max_flat>;
+ VGPR_32, i32, atomic_load_max_flat_32>;
defm FLAT_ATOMIC_UMAX : FLAT_Atomic_Pseudo <"flat_atomic_umax",
- VGPR_32, i32, atomic_umax_flat>;
+ VGPR_32, i32, atomic_load_umax_flat_32>;
defm FLAT_ATOMIC_AND : FLAT_Atomic_Pseudo <"flat_atomic_and",
- VGPR_32, i32, atomic_and_flat>;
+ VGPR_32, i32, atomic_load_and_flat_32>;
defm FLAT_ATOMIC_OR : FLAT_Atomic_Pseudo <"flat_atomic_or",
- VGPR_32, i32, atomic_or_flat>;
+ VGPR_32, i32, atomic_load_or_flat_32>;
defm FLAT_ATOMIC_XOR : FLAT_Atomic_Pseudo <"flat_atomic_xor",
- VGPR_32, i32, atomic_xor_flat>;
+ VGPR_32, i32, atomic_load_xor_flat_32>;
defm FLAT_ATOMIC_INC : FLAT_Atomic_Pseudo <"flat_atomic_inc",
- VGPR_32, i32, atomic_inc_flat>;
+ VGPR_32, i32, atomic_inc_flat_32>;
defm FLAT_ATOMIC_DEC : FLAT_Atomic_Pseudo <"flat_atomic_dec",
- VGPR_32, i32, atomic_dec_flat>;
+ VGPR_32, i32, atomic_dec_flat_32>;
defm FLAT_ATOMIC_ADD_X2 : FLAT_Atomic_Pseudo <"flat_atomic_add_x2",
- VReg_64, i64, atomic_add_flat>;
+ VReg_64, i64, atomic_load_add_flat_64>;
defm FLAT_ATOMIC_SUB_X2 : FLAT_Atomic_Pseudo <"flat_atomic_sub_x2",
- VReg_64, i64, atomic_sub_flat>;
+ VReg_64, i64, atomic_load_sub_flat_64>;
defm FLAT_ATOMIC_SMIN_X2 : FLAT_Atomic_Pseudo <"flat_atomic_smin_x2",
- VReg_64, i64, atomic_min_flat>;
+ VReg_64, i64, atomic_load_min_flat_64>;
defm FLAT_ATOMIC_UMIN_X2 : FLAT_Atomic_Pseudo <"flat_atomic_umin_x2",
- VReg_64, i64, atomic_umin_flat>;
+ VReg_64, i64, atomic_load_umin_flat_64>;
defm FLAT_ATOMIC_SMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_smax_x2",
- VReg_64, i64, atomic_max_flat>;
+ VReg_64, i64, atomic_load_max_flat_64>;
defm FLAT_ATOMIC_UMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_umax_x2",
- VReg_64, i64, atomic_umax_flat>;
+ VReg_64, i64, atomic_load_umax_flat_64>;
defm FLAT_ATOMIC_AND_X2 : FLAT_Atomic_Pseudo <"flat_atomic_and_x2",
- VReg_64, i64, atomic_and_flat>;
+ VReg_64, i64, atomic_load_and_flat_64>;
defm FLAT_ATOMIC_OR_X2 : FLAT_Atomic_Pseudo <"flat_atomic_or_x2",
- VReg_64, i64, atomic_or_flat>;
+ VReg_64, i64, atomic_load_or_flat_64>;
defm FLAT_ATOMIC_XOR_X2 : FLAT_Atomic_Pseudo <"flat_atomic_xor_x2",
- VReg_64, i64, atomic_xor_flat>;
+ VReg_64, i64, atomic_load_xor_flat_64>;
defm FLAT_ATOMIC_INC_X2 : FLAT_Atomic_Pseudo <"flat_atomic_inc_x2",
- VReg_64, i64, atomic_inc_flat>;
+ VReg_64, i64, atomic_inc_flat_64>;
defm FLAT_ATOMIC_DEC_X2 : FLAT_Atomic_Pseudo <"flat_atomic_dec_x2",
- VReg_64, i64, atomic_dec_flat>;
+ VReg_64, i64, atomic_dec_flat_64>;
// GFX7-, GFX10-only flat instructions.
let SubtargetPredicate = isGFX7GFX10 in {
@@ -556,11 +536,12 @@ defm GLOBAL_STORE_SHORT_D16_HI : FLAT_Global_Store_Pseudo <"global_store_short_d
let is_flat_global = 1 in {
defm GLOBAL_ATOMIC_CMPSWAP : FLAT_Global_Atomic_Pseudo <"global_atomic_cmpswap",
- VGPR_32, i32, AMDGPUatomic_cmp_swap_global,
+ VGPR_32, i32, AMDGPUatomic_cmp_swap_global_32, null_frag,
v2i32, VReg_64>;
defm GLOBAL_ATOMIC_CMPSWAP_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_cmpswap_x2",
- VReg_64, i64, AMDGPUatomic_cmp_swap_global,
+ VReg_64, i64, AMDGPUatomic_cmp_swap_global_64,
+ null_frag,
v2i64, VReg_128>;
defm GLOBAL_ATOMIC_SWAP : FLAT_Global_Atomic_Pseudo <"global_atomic_swap",
@@ -778,7 +759,6 @@ def : FlatLoadPat <FLAT_LOAD_USHORT, zextloadi16_flat, i32>;
def : FlatLoadPat <FLAT_LOAD_USHORT, load_flat, i16>;
def : FlatLoadPat <FLAT_LOAD_SSHORT, sextloadi16_flat, i32>;
def : FlatLoadPat <FLAT_LOAD_DWORDX3, load_flat, v3i32>;
-def : FlatLoadPat <FLAT_LOAD_DWORDX4, load_flat, v4i32>;
def : FlatLoadAtomicPat <FLAT_LOAD_DWORD, atomic_load_32_flat, i32>;
def : FlatLoadAtomicPat <FLAT_LOAD_DWORDX2, atomic_load_64_flat, i64>;
@@ -797,7 +777,11 @@ def : FlatLoadPat <FLAT_LOAD_DWORDX2, load_flat, vt>;
}
def : FlatStorePat <FLAT_STORE_DWORDX3, store_flat, v3i32, VReg_96>;
-def : FlatStorePat <FLAT_STORE_DWORDX4, store_flat, v4i32, VReg_128>;
+
+foreach vt = VReg_128.RegTypes in {
+def : FlatLoadPat <FLAT_LOAD_DWORDX4, load_flat, vt>;
+def : FlatStorePat <FLAT_STORE_DWORDX4, store_flat, vt, VReg_128>;
+}
def : FlatStoreAtomicPat <FLAT_STORE_DWORD, atomic_store_flat_32, i32>;
def : FlatStoreAtomicPat <FLAT_STORE_DWORDX2, atomic_store_flat_64, i64, VReg_64>;
@@ -813,7 +797,7 @@ def : FlatAtomicPat <FLAT_ATOMIC_SMIN_RTN, atomic_load_min_global_32, i32>;
def : FlatAtomicPat <FLAT_ATOMIC_UMIN_RTN, atomic_load_umin_global_32, i32>;
def : FlatAtomicPat <FLAT_ATOMIC_OR_RTN, atomic_load_or_global_32, i32>;
def : FlatAtomicPat <FLAT_ATOMIC_SWAP_RTN, atomic_swap_global_32, i32>;
-def : FlatAtomicPat <FLAT_ATOMIC_CMPSWAP_RTN, AMDGPUatomic_cmp_swap_global, i32, v2i32>;
+def : FlatAtomicPat <FLAT_ATOMIC_CMPSWAP_RTN, AMDGPUatomic_cmp_swap_global_32, i32, v2i32>;
def : FlatAtomicPat <FLAT_ATOMIC_XOR_RTN, atomic_load_xor_global_32, i32>;
def : FlatAtomicPat <FLAT_ATOMIC_ADD_X2_RTN, atomic_load_add_global_64, i64>;
@@ -827,7 +811,7 @@ def : FlatAtomicPat <FLAT_ATOMIC_SMIN_X2_RTN, atomic_load_min_global_64, i64>;
def : FlatAtomicPat <FLAT_ATOMIC_UMIN_X2_RTN, atomic_load_umin_global_64, i64>;
def : FlatAtomicPat <FLAT_ATOMIC_OR_X2_RTN, atomic_load_or_global_64, i64>;
def : FlatAtomicPat <FLAT_ATOMIC_SWAP_X2_RTN, atomic_swap_global_64, i64>;
-def : FlatAtomicPat <FLAT_ATOMIC_CMPSWAP_X2_RTN, AMDGPUatomic_cmp_swap_global, i64, v2i64>;
+def : FlatAtomicPat <FLAT_ATOMIC_CMPSWAP_X2_RTN, AMDGPUatomic_cmp_swap_global_64, i64, v2i64>;
def : FlatAtomicPat <FLAT_ATOMIC_XOR_X2_RTN, atomic_load_xor_global_64, i64>;
def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_flat, i16>;
@@ -878,7 +862,11 @@ def : FlatStoreSignedPat <GLOBAL_STORE_DWORDX2, store_global, vt, VReg_64>;
}
def : FlatLoadSignedPat <GLOBAL_LOAD_DWORDX3, load_global, v3i32>;
-def : FlatLoadSignedPat <GLOBAL_LOAD_DWORDX4, load_global, v4i32>;
+
+foreach vt = VReg_128.RegTypes in {
+def : FlatLoadSignedPat <GLOBAL_LOAD_DWORDX4, load_global, vt>;
+def : FlatStoreSignedPat <GLOBAL_STORE_DWORDX4, store_global, vt, VReg_128>;
+}
def : FlatLoadAtomicPat <GLOBAL_LOAD_DWORD, atomic_load_32_global, i32>;
def : FlatLoadAtomicPat <GLOBAL_LOAD_DWORDX2, atomic_load_64_global, i64>;
@@ -888,7 +876,6 @@ def : FlatStoreSignedPat <GLOBAL_STORE_BYTE, truncstorei8_global, i16, VGPR_32>;
def : FlatStoreSignedPat <GLOBAL_STORE_SHORT, truncstorei16_global, i32, VGPR_32>;
def : FlatStoreSignedPat <GLOBAL_STORE_SHORT, store_global, i16, VGPR_32>;
def : FlatStoreSignedPat <GLOBAL_STORE_DWORDX3, store_global, v3i32, VReg_96>;
-def : FlatStoreSignedPat <GLOBAL_STORE_DWORDX4, store_global, v4i32, VReg_128>;
let OtherPredicates = [D16PreservesUnusedBits] in {
def : FlatStoreSignedPat <GLOBAL_STORE_SHORT_D16_HI, truncstorei16_hi16_global, i32>;
@@ -909,8 +896,8 @@ def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SHORT_D16, load_d16_lo_global, v2i16>;
def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SHORT_D16, load_d16_lo_global, v2f16>;
}
-def : FlatStoreSignedAtomicPat <GLOBAL_STORE_DWORD, store_atomic_global, i32>;
-def : FlatStoreSignedAtomicPat <GLOBAL_STORE_DWORDX2, store_atomic_global, i64, VReg_64>;
+def : FlatStoreSignedAtomicPat <GLOBAL_STORE_DWORD, atomic_store_global_32, i32>;
+def : FlatStoreSignedAtomicPat <GLOBAL_STORE_DWORDX2, atomic_store_global_64, i64, VReg_64>;
def : FlatSignedAtomicPat <GLOBAL_ATOMIC_ADD_RTN, atomic_load_add_global_32, i32>;
def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SUB_RTN, atomic_load_sub_global_32, i32>;
@@ -923,7 +910,7 @@ def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SMIN_RTN, atomic_load_min_global_32, i3
def : FlatSignedAtomicPat <GLOBAL_ATOMIC_UMIN_RTN, atomic_load_umin_global_32, i32>;
def : FlatSignedAtomicPat <GLOBAL_ATOMIC_OR_RTN, atomic_load_or_global_32, i32>;
def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SWAP_RTN, atomic_swap_global_32, i32>;
-def : FlatSignedAtomicPat <GLOBAL_ATOMIC_CMPSWAP_RTN, AMDGPUatomic_cmp_swap_global, i32, v2i32>;
+def : FlatSignedAtomicPat <GLOBAL_ATOMIC_CMPSWAP_RTN, AMDGPUatomic_cmp_swap_global_32, i32, v2i32>;
def : FlatSignedAtomicPat <GLOBAL_ATOMIC_XOR_RTN, atomic_load_xor_global_32, i32>;
def : FlatSignedAtomicPat <GLOBAL_ATOMIC_ADD_X2_RTN, atomic_load_add_global_64, i64>;
@@ -937,7 +924,7 @@ def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SMIN_X2_RTN, atomic_load_min_global_64,
def : FlatSignedAtomicPat <GLOBAL_ATOMIC_UMIN_X2_RTN, atomic_load_umin_global_64, i64>;
def : FlatSignedAtomicPat <GLOBAL_ATOMIC_OR_X2_RTN, atomic_load_or_global_64, i64>;
def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SWAP_X2_RTN, atomic_swap_global_64, i64>;
-def : FlatSignedAtomicPat <GLOBAL_ATOMIC_CMPSWAP_X2_RTN, AMDGPUatomic_cmp_swap_global, i64, v2i64>;
+def : FlatSignedAtomicPat <GLOBAL_ATOMIC_CMPSWAP_X2_RTN, AMDGPUatomic_cmp_swap_global_64, i64, v2i64>;
def : FlatSignedAtomicPat <GLOBAL_ATOMIC_XOR_X2_RTN, atomic_load_xor_global_64, i64>;
def : FlatAtomicPatNoRtn <GLOBAL_ATOMIC_ADD_F32, atomic_fadd_global_noret, f32>;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
index 98678873e37c..10e2c3a263f1 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
@@ -104,6 +104,9 @@ public:
AU.setPreservesCFG();
MachineFunctionPass::getAnalysisUsage(AU);
}
+
+private:
+ int getDPPOp(unsigned Op) const;
};
} // end anonymous namespace
@@ -118,13 +121,13 @@ FunctionPass *llvm::createGCNDPPCombinePass() {
return new GCNDPPCombine();
}
-static int getDPPOp(unsigned Op) {
+int GCNDPPCombine::getDPPOp(unsigned Op) const {
auto DPP32 = AMDGPU::getDPPOp32(Op);
- if (DPP32 != -1)
- return DPP32;
-
- auto E32 = AMDGPU::getVOPe32(Op);
- return E32 != -1 ? AMDGPU::getDPPOp32(E32) : -1;
+ if (DPP32 == -1) {
+ auto E32 = AMDGPU::getVOPe32(Op);
+ DPP32 = (E32 == -1)? -1 : AMDGPU::getDPPOp32(E32);
+ }
+ return (DPP32 == -1 || TII->pseudoToMCOpcode(DPP32) == -1) ? -1 : DPP32;
}
// tracks the register operand definition and returns:
@@ -235,7 +238,8 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
}
if (auto *Src2 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src2)) {
- if (!TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src2)) {
+ if (!TII->getNamedOperand(*DPPInst.getInstr(), AMDGPU::OpName::src2) ||
+ !TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src2)) {
LLVM_DEBUG(dbgs() << " failed: src2 is illegal\n");
Fail = true;
break;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index 9528aee4c50e..3ef5a77af45e 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -185,7 +185,7 @@ GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
if (SIInstrInfo::isMAI(*MI) && checkMAIHazards(MI) > 0)
return NoopHazard;
- if ((MI->mayLoad() || MI->mayStore()) && checkMAILdStHazards(MI) > 0)
+ if (MI->mayLoadOrStore() && checkMAILdStHazards(MI) > 0)
return NoopHazard;
if (MI->isInlineAsm() && checkInlineAsmHazards(MI) > 0)
@@ -296,7 +296,7 @@ unsigned GCNHazardRecognizer::PreEmitNoopsCommon(MachineInstr *MI) {
if (SIInstrInfo::isMAI(*MI))
return std::max(WaitStates, checkMAIHazards(MI));
- if (MI->mayLoad() || MI->mayStore())
+ if (MI->mayLoadOrStore())
return std::max(WaitStates, checkMAILdStHazards(MI));
return WaitStates;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp
index 36a8f74150f5..f6023f3a40a2 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp
@@ -23,6 +23,7 @@
#include "llvm/CodeGen/LiveRegMatrix.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/MathExtras.h"
#include <algorithm>
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNRegBankReassign.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNRegBankReassign.cpp
index 2927d4eb745a..76593bc0e5ac 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNRegBankReassign.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNRegBankReassign.cpp
@@ -32,9 +32,9 @@
#include "AMDGPU.h"
#include "AMDGPUSubtarget.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/LiveInterval.h"
@@ -43,6 +43,7 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/MathExtras.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index 973491a70d3c..e109eed5f607 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -16,6 +16,7 @@
#include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h"
#include "SIRegisterInfo.h"
+#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/Support/MathExtras.h"
@@ -389,8 +390,16 @@ void GCNScheduleDAGMILive::schedule() {
}
if (WavesAfter >= MinOccupancy) {
- Pressure[RegionIdx] = PressureAfter;
- return;
+ unsigned TotalVGPRs = AMDGPU::IsaInfo::getAddressableNumVGPRs(&ST);
+ unsigned TotalSGPRs = AMDGPU::IsaInfo::getAddressableNumSGPRs(&ST);
+ if (WavesAfter > MFI.getMinWavesPerEU() ||
+ PressureAfter.less(ST, PressureBefore) ||
+ (TotalVGPRs >= PressureAfter.getVGPRNum() &&
+ TotalSGPRs >= PressureAfter.getSGPRNum())) {
+ Pressure[RegionIdx] = PressureAfter;
+ return;
+ }
+ LLVM_DEBUG(dbgs() << "New pressure will result in more spilling.\n");
}
LLVM_DEBUG(dbgs() << "Attempting to revert scheduling.\n");
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
index a9888e6ed924..f65dc25d7eec 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
@@ -26,10 +26,11 @@
using namespace llvm;
using namespace llvm::AMDGPU;
-void AMDGPUInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
- StringRef Annot, const MCSubtargetInfo &STI) {
+void AMDGPUInstPrinter::printInst(const MCInst *MI, uint64_t Address,
+ StringRef Annot, const MCSubtargetInfo &STI,
+ raw_ostream &OS) {
OS.flush();
- printInstruction(MI, STI, OS);
+ printInstruction(MI, Address, STI, OS);
printAnnotation(OS, Annot);
}
@@ -1342,10 +1343,11 @@ void AMDGPUInstPrinter::printEndpgm(const MCInst *MI, unsigned OpNo,
#include "AMDGPUGenAsmWriter.inc"
-void R600InstPrinter::printInst(const MCInst *MI, raw_ostream &O,
- StringRef Annot, const MCSubtargetInfo &STI) {
+void R600InstPrinter::printInst(const MCInst *MI, uint64_t Address,
+ StringRef Annot, const MCSubtargetInfo &STI,
+ raw_ostream &O) {
O.flush();
- printInstruction(MI, O);
+ printInstruction(MI, Address, O);
printAnnotation(O, Annot);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
index 66b70831ff9e..ba53003e9041 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
@@ -23,12 +23,12 @@ public:
: MCInstPrinter(MAI, MII, MRI) {}
//Autogenerated by tblgen
- void printInstruction(const MCInst *MI, const MCSubtargetInfo &STI,
- raw_ostream &O);
+ void printInstruction(const MCInst *MI, uint64_t Address,
+ const MCSubtargetInfo &STI, raw_ostream &O);
static const char *getRegisterName(unsigned RegNo);
- void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
- const MCSubtargetInfo &STI) override;
+ void printInst(const MCInst *MI, uint64_t Address, StringRef Annot,
+ const MCSubtargetInfo &STI, raw_ostream &O) override;
static void printRegOperand(unsigned RegNo, raw_ostream &O,
const MCRegisterInfo &MRI);
@@ -240,9 +240,9 @@ public:
const MCRegisterInfo &MRI)
: MCInstPrinter(MAI, MII, MRI) {}
- void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
- const MCSubtargetInfo &STI) override;
- void printInstruction(const MCInst *MI, raw_ostream &O);
+ void printInst(const MCInst *MI, uint64_t Address, StringRef Annot,
+ const MCSubtargetInfo &STI, raw_ostream &O) override;
+ void printInstruction(const MCInst *MI, uint64_t Address, raw_ostream &O);
static const char *getRegisterName(unsigned RegNo);
void printAbs(const MCInst *MI, unsigned OpNo, raw_ostream &O);
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp
index 9e04ab9bae93..9644e66fda4e 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp
@@ -14,7 +14,9 @@
using namespace llvm;
-AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(const Triple &TT) : MCAsmInfoELF() {
+AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(const Triple &TT,
+ const MCTargetOptions &Options)
+ : MCAsmInfoELF() {
CodePointerSize = (TT.getArch() == Triple::amdgcn) ? 8 : 4;
StackGrowsUp = true;
HasSingleParameterDotFile = false;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.h
index 71e63ec27a8f..65c9b1917bf8 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.h
@@ -25,7 +25,7 @@ class Triple;
// with 'L' as a local symbol.
class AMDGPUMCAsmInfo : public MCAsmInfoELF {
public:
- explicit AMDGPUMCAsmInfo(const Triple &TT);
+ explicit AMDGPUMCAsmInfo(const Triple &TT, const MCTargetOptions &Options);
bool shouldOmitSectionDirective(StringRef SectionName) const override;
unsigned getMaxInstLength(const MCSubtargetInfo *STI) const override;
};
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
index 88df64d18cc5..9507836c64c2 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
@@ -134,7 +134,7 @@ static MCInstrAnalysis *createAMDGPUMCInstrAnalysis(const MCInstrInfo *Info) {
return new AMDGPUMCInstrAnalysis(Info);
}
-extern "C" void LLVMInitializeAMDGPUTargetMC() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTargetMC() {
TargetRegistry::RegisterMCInstrInfo(getTheGCNTarget(), createAMDGPUMCInstrInfo);
TargetRegistry::RegisterMCInstrInfo(getTheAMDGPUTarget(), createR600MCInstrInfo);
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index c15da8075a34..fef665c2900e 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -60,6 +60,7 @@ StringRef AMDGPUTargetStreamer::getArchNameFromElfMach(unsigned ElfMach) {
AMDGPU::GPUKind AK;
switch (ElfMach) {
+ default: llvm_unreachable("Unhandled ELF::EF_AMDGPU type");
case ELF::EF_AMDGPU_MACH_R600_R600: AK = GK_R600; break;
case ELF::EF_AMDGPU_MACH_R600_R630: AK = GK_R630; break;
case ELF::EF_AMDGPU_MACH_R600_RS880: AK = GK_RS880; break;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MIMGInstructions.td
index f33ad950d5d9..4006a6205fb8 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MIMGInstructions.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MIMGInstructions.td
@@ -183,7 +183,7 @@ class MIMGNSAHelper<int num_addrs> {
class MIMG_gfx6789<bits<8> op, dag outs, string dns = "">
: MIMG<outs, dns>, MIMGe_gfx6789<op> {
let SubtargetPredicate = isGFX6GFX7GFX8GFX9;
- let AssemblerPredicates = [isGFX6GFX7GFX8GFX9];
+ let AssemblerPredicate = isGFX6GFX7GFX8GFX9;
let MIMGEncoding = MIMGEncGfx6;
@@ -194,7 +194,7 @@ class MIMG_gfx6789<bits<8> op, dag outs, string dns = "">
class MIMG_gfx10<int op, dag outs, string dns = "">
: MIMG<outs, dns>, MIMGe_gfx10<op> {
let SubtargetPredicate = isGFX10Plus;
- let AssemblerPredicates = [isGFX10Plus];
+ let AssemblerPredicate = isGFX10Plus;
let MIMGEncoding = MIMGEncGfx10Default;
@@ -207,7 +207,7 @@ class MIMG_gfx10<int op, dag outs, string dns = "">
class MIMG_nsa_gfx10<int op, dag outs, int num_addrs, string dns="">
: MIMG<outs, dns>, MIMGe_gfx10<op> {
let SubtargetPredicate = isGFX10Plus;
- let AssemblerPredicates = [isGFX10Plus];
+ let AssemblerPredicate = isGFX10Plus;
let MIMGEncoding = MIMGEncGfx10NSA;
@@ -416,13 +416,13 @@ class MIMG_Atomic_si<mimg op, string asm, RegisterClass data_rc,
RegisterClass addr_rc, bit enableDasm = 0>
: MIMG_Atomic_gfx6789_base<op.SI_GFX10, asm, data_rc, addr_rc,
!if(enableDasm, "GFX6GFX7", "")> {
- let AssemblerPredicates = [isGFX6GFX7];
+ let AssemblerPredicate = isGFX6GFX7;
}
class MIMG_Atomic_vi<mimg op, string asm, RegisterClass data_rc,
RegisterClass addr_rc, bit enableDasm = 0>
: MIMG_Atomic_gfx6789_base<op.VI, asm, data_rc, addr_rc, !if(enableDasm, "GFX8", "")> {
- let AssemblerPredicates = [isGFX8GFX9];
+ let AssemblerPredicate = isGFX8GFX9;
let MIMGEncoding = MIMGEncGfx8;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600AsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600AsmPrinter.cpp
index b29cd75f75cf..ed23c8ea814b 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600AsmPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600AsmPrinter.cpp
@@ -1,4 +1,4 @@
-//===-- R600AsmPrinter.cpp - R600 Assebly printer ------------------------===//
+//===-- R600AsmPrinter.cpp - R600 Assembly printer ------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
index 659458b0b752..1b1f5f9a404a 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -37,6 +37,7 @@
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/IntrinsicsR600.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
@@ -62,6 +63,9 @@ R600TargetLowering::R600TargetLowering(const TargetMachine &TM,
addRegisterClass(MVT::v4f32, &R600::R600_Reg128RegClass);
addRegisterClass(MVT::v4i32, &R600::R600_Reg128RegClass);
+ setBooleanContents(ZeroOrNegativeOneBooleanContent);
+ setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
+
computeRegisterProperties(Subtarget->getRegisterInfo());
// Legalize loads and stores to the private address space.
@@ -223,10 +227,8 @@ R600TargetLowering::R600TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FMA, MVT::f64, Expand);
}
- // FIXME: This was moved from AMDGPUTargetLowering, I'm not sure if we
- // need it for R600.
- if (!Subtarget->hasFP32Denormals())
- setOperationAction(ISD::FMAD, MVT::f32, Legal);
+ // FIXME: May need no denormals check
+ setOperationAction(ISD::FMAD, MVT::f32, Legal);
if (!Subtarget->hasBFI()) {
// fcopysign can be done in a single instruction with BFI.
@@ -970,10 +972,9 @@ SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
//
// Move hardware True/False values to the correct operand.
- ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
- ISD::CondCode InverseCC =
- ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
if (isHWTrueValue(False) && isHWFalseValue(True)) {
+ ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
+ ISD::CondCode InverseCC = ISD::getSetCCInverse(CCOpcode, CompareVT);
if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
std::swap(False, True);
CC = DAG.getCondCode(InverseCC);
@@ -1013,7 +1014,7 @@ SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
CC = DAG.getCondCode(CCSwapped);
} else {
// Try inverting the conditon and then swapping the operands
- ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT.isInteger());
+ ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT);
CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
std::swap(True, False);
@@ -1039,7 +1040,7 @@ SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
case ISD::SETONE:
case ISD::SETUNE:
case ISD::SETNE:
- CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
+ CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT);
Temp = True;
True = False;
False = Temp;
@@ -1174,8 +1175,7 @@ SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store,
// Load dword
// TODO: can we be smarter about machine pointer info?
- MachinePointerInfo PtrInfo(UndefValue::get(
- Type::getInt32PtrTy(*DAG.getContext(), AMDGPUAS::PRIVATE_ADDRESS)));
+ MachinePointerInfo PtrInfo(AMDGPUAS::PRIVATE_ADDRESS);
SDValue Dst = DAG.getLoad(MVT::i32, DL, Chain, Ptr, PtrInfo);
Chain = Dst.getValue(1);
@@ -1405,8 +1405,7 @@ SDValue R600TargetLowering::lowerPrivateExtLoad(SDValue Op,
// Load dword
// TODO: can we be smarter about machine pointer info?
- MachinePointerInfo PtrInfo(UndefValue::get(
- Type::getInt32PtrTy(*DAG.getContext(), AMDGPUAS::PRIVATE_ADDRESS)));
+ MachinePointerInfo PtrInfo(AMDGPUAS::PRIVATE_ADDRESS);
SDValue Read = DAG.getLoad(MVT::i32, DL, Chain, Ptr, PtrInfo);
// Get offset within the register.
@@ -1457,7 +1456,9 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
if ((LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
LoadNode->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) &&
VT.isVector()) {
- return scalarizeVectorLoad(LoadNode, DAG);
+ SDValue Ops[2];
+ std::tie(Ops[0], Ops[1]) = scalarizeVectorLoad(LoadNode, DAG);
+ return DAG.getMergeValues(Ops, DL);
}
// This is still used for explicit load from addrspace(8)
@@ -1500,7 +1501,6 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
// buffer. However SEXT loads from other address spaces are not supported, so
// we need to expand them here.
if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
- EVT MemVT = LoadNode->getMemoryVT();
assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
SDValue NewLoad = DAG.getExtLoad(
ISD::EXTLOAD, DL, VT, Chain, Ptr, LoadNode->getPointerInfo(), MemVT,
@@ -1608,9 +1608,6 @@ SDValue R600TargetLowering::LowerFormalArguments(
continue;
}
- PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
- AMDGPUAS::PARAM_I_ADDRESS);
-
// i64 isn't a legal type, so the register type used ends up as i32, which
// isn't expected here. It attempts to create this sextload, but it ends up
// being invalid. Somehow this seems to work with i64 arguments, but breaks
@@ -1631,11 +1628,10 @@ SDValue R600TargetLowering::LowerFormalArguments(
// XXX - I think PartOffset should give you this, but it seems to give the
// size of the register which isn't useful.
- unsigned ValBase = ArgLocs[In.getOrigArgIndex()].getLocMemOffset();
unsigned PartOffset = VA.getLocMemOffset();
unsigned Alignment = MinAlign(VT.getStoreSize(), PartOffset);
- MachinePointerInfo PtrInfo(UndefValue::get(PtrTy), PartOffset - ValBase);
+ MachinePointerInfo PtrInfo(AMDGPUAS::PARAM_I_ADDRESS);
SDValue Arg = DAG.getLoad(
ISD::UNINDEXED, Ext, VT, DL, Chain,
DAG.getConstant(PartOffset, DL, MVT::i32), DAG.getUNDEF(MVT::i32),
@@ -1715,12 +1711,7 @@ static SDValue CompactSwizzlableVector(
if (NewBldVec[i].isUndef())
continue;
- // Fix spurious warning with gcc 7.3 -O3
- // warning: array subscript is above array bounds [-Warray-bounds]
- // if (NewBldVec[i] == NewBldVec[j]) {
- // ~~~~~~~~~~~^
- if (i >= 4)
- continue;
+
for (unsigned j = 0; j < i; j++) {
if (NewBldVec[i] == NewBldVec[j]) {
NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
@@ -1889,8 +1880,6 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
DAG.getConstant(-1, DL, MVT::i32), // True
DAG.getConstant(0, DL, MVT::i32), // False
SelectCC.getOperand(4)); // CC
-
- break;
}
// insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx
@@ -1999,8 +1988,7 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::SETNE: return LHS;
case ISD::SETEQ: {
ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
- LHSCC = ISD::getSetCCInverse(LHSCC,
- LHS.getOperand(0).getValueType().isInteger());
+ LHSCC = ISD::getSetCCInverse(LHSCC, LHS.getOperand(0).getValueType());
if (DCI.isBeforeLegalizeOps() ||
isCondCodeLegal(LHSCC, LHS.getOperand(0).getSimpleValueType()))
return DAG.getSelectCC(DL,
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp
index 04a5e93f6213..346296c77377 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp
@@ -60,8 +60,8 @@ bool R600InstrInfo::isVector(const MachineInstr &MI) const {
void R600InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- const DebugLoc &DL, unsigned DestReg,
- unsigned SrcReg, bool KillSrc) const {
+ const DebugLoc &DL, MCRegister DestReg,
+ MCRegister SrcReg, bool KillSrc) const {
unsigned VectorComponents = 0;
if ((R600::R600_Reg128RegClass.contains(DestReg) ||
R600::R600_Reg128VerticalRegClass.contains(DestReg)) &&
@@ -541,7 +541,7 @@ R600InstrInfo::fitsReadPortLimitations(const std::vector<MachineInstr *> &IG,
std::vector<std::vector<std::pair<int, unsigned>>> IGSrcs;
ValidSwizzle.clear();
- unsigned ConstCount;
+ unsigned ConstCount = 0;
BankSwizzle TransBS = ALU_VEC_012_SCL_210;
for (unsigned i = 0, e = IG.size(); i < e; ++i) {
IGSrcs.push_back(ExtractSrcs(*IG[i], PV, ConstCount));
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600InstrInfo.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600InstrInfo.h
index 00d96c9676aa..873ee08470cb 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600InstrInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600InstrInfo.h
@@ -73,7 +73,7 @@ public:
}
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
- const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
+ const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg,
bool KillSrc) const override;
bool isLegalToSplitMBBAt(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI) const override;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600Instructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600Instructions.td
index f40eece859ee..cbdf0de44f87 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600Instructions.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600Instructions.td
@@ -295,9 +295,23 @@ class VTX_READ <string name, dag outs, list<dag> pattern>
let VTXInst = 1;
}
-// FIXME: Deprecated.
-class LocalLoad <SDPatternOperator op> : LoadFrag <op>, LocalAddress;
+// Legacy.
+def atomic_cmp_swap_global_noret : PatFrag<
+ (ops node:$ptr, node:$cmp, node:$value),
+ (atomic_cmp_swap node:$ptr, node:$cmp, node:$value),
+ [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && (SDValue(N, 0).use_empty());}]>;
+
+def atomic_cmp_swap_global_ret : PatFrag<
+ (ops node:$ptr, node:$cmp, node:$value),
+ (atomic_cmp_swap node:$ptr, node:$cmp, node:$value),
+ [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && (!SDValue(N, 0).use_empty());}]>;
+
+def mskor_global : PatFrag<(ops node:$val, node:$ptr),
+ (AMDGPUstore_mskor node:$val, node:$ptr), [{
+ return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
+}]>;
+// FIXME: These are deprecated
class AZExtLoadBase <SDPatternOperator ld_node>: PatFrag<(ops node:$ptr),
(ld_node node:$ptr), [{
LoadSDNode *L = cast<LoadSDNode>(N);
@@ -319,9 +333,10 @@ def az_extloadi32 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{
return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i32;
}]>;
-// FIXME: These are deprecated
-def az_extloadi8_local : LocalLoad <az_extloadi8>;
-def az_extloadi16_local : LocalLoad <az_extloadi16>;
+let AddressSpaces = LoadAddress_local.AddrSpaces in {
+def az_extloadi8_local : PatFrag<(ops node:$ptr), (az_extloadi8 node:$ptr)>;
+def az_extloadi16_local : PatFrag<(ops node:$ptr), (az_extloadi16 node:$ptr)>;
+}
class LoadParamFrag <PatFrag load_type> : PatFrag <
(ops node:$ptr), (load_type node:$ptr),
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
index b764ca7d7061..27320472cacb 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
@@ -32,6 +32,7 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/ValueHandle.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
index 65286751c12d..914d2a5ef148 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
@@ -66,9 +66,9 @@
#include "AMDGPU.h"
#include "AMDGPUSubtarget.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIInstrInfo.h"
#include "SIRegisterInfo.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"
@@ -82,6 +82,7 @@
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 4eac03168760..2ff8baf29394 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -14,7 +14,7 @@
#include "SIMachineFunctionInfo.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -312,6 +312,19 @@ static bool isUseMIInFoldList(ArrayRef<FoldCandidate> FoldList,
return false;
}
+static void appendFoldCandidate(SmallVectorImpl<FoldCandidate> &FoldList,
+ MachineInstr *MI, unsigned OpNo,
+ MachineOperand *FoldOp, bool Commuted = false,
+ int ShrinkOp = -1) {
+ // Skip additional folding on the same operand.
+ for (FoldCandidate &Fold : FoldList)
+ if (Fold.UseMI == MI && Fold.UseOpNo == OpNo)
+ return;
+ LLVM_DEBUG(dbgs() << "Append " << (Commuted ? "commuted" : "normal")
+ << " operand " << OpNo << "\n " << *MI << '\n');
+ FoldList.push_back(FoldCandidate(MI, OpNo, FoldOp, Commuted, ShrinkOp));
+}
+
static bool tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,
MachineInstr *MI, unsigned OpNo,
MachineOperand *OpToFold,
@@ -344,7 +357,7 @@ static bool tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,
// Special case for s_setreg_b32
if (Opc == AMDGPU::S_SETREG_B32 && OpToFold->isImm()) {
MI->setDesc(TII->get(AMDGPU::S_SETREG_IMM32_B32));
- FoldList.push_back(FoldCandidate(MI, OpNo, OpToFold));
+ appendFoldCandidate(FoldList, MI, OpNo, OpToFold);
return true;
}
@@ -403,8 +416,7 @@ static bool tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,
unsigned MaybeCommutedOpc = MI->getOpcode();
int Op32 = AMDGPU::getVOPe32(MaybeCommutedOpc);
- FoldList.push_back(FoldCandidate(MI, CommuteOpNo, OpToFold, true,
- Op32));
+ appendFoldCandidate(FoldList, MI, CommuteOpNo, OpToFold, true, Op32);
return true;
}
@@ -412,11 +424,34 @@ static bool tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,
return false;
}
- FoldList.push_back(FoldCandidate(MI, CommuteOpNo, OpToFold, true));
+ appendFoldCandidate(FoldList, MI, CommuteOpNo, OpToFold, true);
return true;
}
- FoldList.push_back(FoldCandidate(MI, OpNo, OpToFold));
+ // Check the case where we might introduce a second constant operand to a
+ // scalar instruction
+ if (TII->isSALU(MI->getOpcode())) {
+ const MCInstrDesc &InstDesc = MI->getDesc();
+ const MCOperandInfo &OpInfo = InstDesc.OpInfo[OpNo];
+ const SIRegisterInfo &SRI = TII->getRegisterInfo();
+
+ // Fine if the operand can be encoded as an inline constant
+ if (OpToFold->isImm()) {
+ if (!SRI.opCanUseInlineConstant(OpInfo.OperandType) ||
+ !TII->isInlineConstant(*OpToFold, OpInfo)) {
+ // Otherwise check for another constant
+ for (unsigned i = 0, e = InstDesc.getNumOperands(); i != e; ++i) {
+ auto &Op = MI->getOperand(i);
+ if (OpNo != i &&
+ TII->isLiteralConstantLike(Op, OpInfo)) {
+ return false;
+ }
+ }
+ }
+ }
+ }
+
+ appendFoldCandidate(FoldList, MI, OpNo, OpToFold);
return true;
}
@@ -429,6 +464,42 @@ static bool isUseSafeToFold(const SIInstrInfo *TII,
//return !MI.hasRegisterImplicitUseOperand(UseMO.getReg());
}
+// Find a def of the UseReg, check if it is a reg_seqence and find initializers
+// for each subreg, tracking it to foldable inline immediate if possible.
+// Returns true on success.
+static bool getRegSeqInit(
+ SmallVectorImpl<std::pair<MachineOperand*, unsigned>> &Defs,
+ Register UseReg, uint8_t OpTy,
+ const SIInstrInfo *TII, const MachineRegisterInfo &MRI) {
+ MachineInstr *Def = MRI.getUniqueVRegDef(UseReg);
+ if (!Def || !Def->isRegSequence())
+ return false;
+
+ for (unsigned I = 1, E = Def->getNumExplicitOperands(); I < E; I += 2) {
+ MachineOperand *Sub = &Def->getOperand(I);
+ assert (Sub->isReg());
+
+ for (MachineInstr *SubDef = MRI.getUniqueVRegDef(Sub->getReg());
+ SubDef && Sub->isReg() && !Sub->getSubReg() &&
+ TII->isFoldableCopy(*SubDef);
+ SubDef = MRI.getUniqueVRegDef(Sub->getReg())) {
+ MachineOperand *Op = &SubDef->getOperand(1);
+ if (Op->isImm()) {
+ if (TII->isInlineConstant(*Op, OpTy))
+ Sub = Op;
+ break;
+ }
+ if (!Op->isReg())
+ break;
+ Sub = Op;
+ }
+
+ Defs.push_back(std::make_pair(Sub, Def->getOperand(I + 1).getImm()));
+ }
+
+ return true;
+}
+
static bool tryToFoldACImm(const SIInstrInfo *TII,
const MachineOperand &OpToFold,
MachineInstr *UseMI,
@@ -462,39 +533,30 @@ static bool tryToFoldACImm(const SIInstrInfo *TII,
return false;
MachineRegisterInfo &MRI = UseMI->getParent()->getParent()->getRegInfo();
- const MachineInstr *Def = MRI.getUniqueVRegDef(UseReg);
- if (!Def || !Def->isRegSequence())
+ SmallVector<std::pair<MachineOperand*, unsigned>, 32> Defs;
+ if (!getRegSeqInit(Defs, UseReg, OpTy, TII, MRI))
return false;
- int64_t Imm;
- MachineOperand *Op;
- for (unsigned I = 1, E = Def->getNumExplicitOperands(); I < E; I += 2) {
- const MachineOperand &Sub = Def->getOperand(I);
- if (!Sub.isReg() || Sub.getSubReg())
- return false;
- MachineInstr *SubDef = MRI.getUniqueVRegDef(Sub.getReg());
- while (SubDef && !SubDef->isMoveImmediate() &&
- !SubDef->getOperand(1).isImm() && TII->isFoldableCopy(*SubDef))
- SubDef = MRI.getUniqueVRegDef(SubDef->getOperand(1).getReg());
- if (!SubDef || !SubDef->isMoveImmediate() || !SubDef->getOperand(1).isImm())
+ int32_t Imm;
+ for (unsigned I = 0, E = Defs.size(); I != E; ++I) {
+ const MachineOperand *Op = Defs[I].first;
+ if (!Op->isImm())
return false;
- Op = &SubDef->getOperand(1);
+
auto SubImm = Op->getImm();
- if (I == 1) {
- if (!TII->isInlineConstant(SubDef->getOperand(1), OpTy))
+ if (!I) {
+ Imm = SubImm;
+ if (!TII->isInlineConstant(*Op, OpTy) ||
+ !TII->isOperandLegal(*UseMI, UseOpIdx, Op))
return false;
- Imm = SubImm;
continue;
}
if (Imm != SubImm)
return false; // Can only fold splat constants
}
- if (!TII->isOperandLegal(*UseMI, UseOpIdx, Op))
- return false;
-
- FoldList.push_back(FoldCandidate(UseMI, UseOpIdx, Op));
+ appendFoldCandidate(FoldList, UseMI, UseOpIdx, Defs[0].first);
return true;
}
@@ -513,18 +575,6 @@ void SIFoldOperands::foldOperand(
if (UseOp.isReg() && OpToFold.isReg()) {
if (UseOp.isImplicit() || UseOp.getSubReg() != AMDGPU::NoSubRegister)
return;
-
- // Don't fold subregister extracts into tied operands, only if it is a full
- // copy since a subregister use tied to a full register def doesn't really
- // make sense. e.g. don't fold:
- //
- // %1 = COPY %0:sub1
- // %2<tied3> = V_MAC_{F16, F32} %3, %4, %1<tied0>
- //
- // into
- // %2<tied3> = V_MAC_{F16, F32} %3, %4, %0:sub1<tied0>
- if (UseOp.isTied() && OpToFold.getSubReg() != AMDGPU::NoSubRegister)
- return;
}
// Special case for REG_SEQUENCE: We can't fold literals into
@@ -639,16 +689,97 @@ void SIFoldOperands::foldOperand(
CopiesToReplace.push_back(UseMI);
} else {
if (UseMI->isCopy() && OpToFold.isReg() &&
- Register::isVirtualRegister(UseMI->getOperand(0).getReg()) &&
- TRI->isVectorRegister(*MRI, UseMI->getOperand(0).getReg()) &&
- TRI->isVectorRegister(*MRI, UseMI->getOperand(1).getReg()) &&
+ UseMI->getOperand(0).getReg().isVirtual() &&
!UseMI->getOperand(1).getSubReg()) {
+ LLVM_DEBUG(dbgs() << "Folding " << OpToFold
+ << "\n into " << *UseMI << '\n');
unsigned Size = TII->getOpSize(*UseMI, 1);
- UseMI->getOperand(1).setReg(OpToFold.getReg());
+ Register UseReg = OpToFold.getReg();
+ UseMI->getOperand(1).setReg(UseReg);
UseMI->getOperand(1).setSubReg(OpToFold.getSubReg());
UseMI->getOperand(1).setIsKill(false);
CopiesToReplace.push_back(UseMI);
OpToFold.setIsKill(false);
+
+ // That is very tricky to store a value into an AGPR. v_accvgpr_write_b32
+ // can only accept VGPR or inline immediate. Recreate a reg_sequence with
+ // its initializers right here, so we will rematerialize immediates and
+ // avoid copies via different reg classes.
+ SmallVector<std::pair<MachineOperand*, unsigned>, 32> Defs;
+ if (Size > 4 && TRI->isAGPR(*MRI, UseMI->getOperand(0).getReg()) &&
+ getRegSeqInit(Defs, UseReg, AMDGPU::OPERAND_REG_INLINE_C_INT32, TII,
+ *MRI)) {
+ const DebugLoc &DL = UseMI->getDebugLoc();
+ MachineBasicBlock &MBB = *UseMI->getParent();
+
+ UseMI->setDesc(TII->get(AMDGPU::REG_SEQUENCE));
+ for (unsigned I = UseMI->getNumOperands() - 1; I > 0; --I)
+ UseMI->RemoveOperand(I);
+
+ MachineInstrBuilder B(*MBB.getParent(), UseMI);
+ DenseMap<TargetInstrInfo::RegSubRegPair, Register> VGPRCopies;
+ SmallSetVector<TargetInstrInfo::RegSubRegPair, 32> SeenAGPRs;
+ for (unsigned I = 0; I < Size / 4; ++I) {
+ MachineOperand *Def = Defs[I].first;
+ TargetInstrInfo::RegSubRegPair CopyToVGPR;
+ if (Def->isImm() &&
+ TII->isInlineConstant(*Def, AMDGPU::OPERAND_REG_INLINE_C_INT32)) {
+ int64_t Imm = Def->getImm();
+
+ auto Tmp = MRI->createVirtualRegister(&AMDGPU::AGPR_32RegClass);
+ BuildMI(MBB, UseMI, DL,
+ TII->get(AMDGPU::V_ACCVGPR_WRITE_B32), Tmp).addImm(Imm);
+ B.addReg(Tmp);
+ } else if (Def->isReg() && TRI->isAGPR(*MRI, Def->getReg())) {
+ auto Src = getRegSubRegPair(*Def);
+ Def->setIsKill(false);
+ if (!SeenAGPRs.insert(Src)) {
+ // We cannot build a reg_sequence out of the same registers, they
+ // must be copied. Better do it here before copyPhysReg() created
+ // several reads to do the AGPR->VGPR->AGPR copy.
+ CopyToVGPR = Src;
+ } else {
+ B.addReg(Src.Reg, Def->isUndef() ? RegState::Undef : 0,
+ Src.SubReg);
+ }
+ } else {
+ assert(Def->isReg());
+ Def->setIsKill(false);
+ auto Src = getRegSubRegPair(*Def);
+
+ // Direct copy from SGPR to AGPR is not possible. To avoid creation
+ // of exploded copies SGPR->VGPR->AGPR in the copyPhysReg() later,
+ // create a copy here and track if we already have such a copy.
+ if (TRI->isSGPRReg(*MRI, Src.Reg)) {
+ CopyToVGPR = Src;
+ } else {
+ auto Tmp = MRI->createVirtualRegister(&AMDGPU::AGPR_32RegClass);
+ BuildMI(MBB, UseMI, DL, TII->get(AMDGPU::COPY), Tmp).add(*Def);
+ B.addReg(Tmp);
+ }
+ }
+
+ if (CopyToVGPR.Reg) {
+ Register Vgpr;
+ if (VGPRCopies.count(CopyToVGPR)) {
+ Vgpr = VGPRCopies[CopyToVGPR];
+ } else {
+ Vgpr = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ BuildMI(MBB, UseMI, DL, TII->get(AMDGPU::COPY), Vgpr).add(*Def);
+ VGPRCopies[CopyToVGPR] = Vgpr;
+ }
+ auto Tmp = MRI->createVirtualRegister(&AMDGPU::AGPR_32RegClass);
+ BuildMI(MBB, UseMI, DL,
+ TII->get(AMDGPU::V_ACCVGPR_WRITE_B32), Tmp).addReg(Vgpr);
+ B.addReg(Tmp);
+ }
+
+ B.addImm(Defs[I].second);
+ }
+ LLVM_DEBUG(dbgs() << "Folded " << *UseMI << '\n');
+ return;
+ }
+
if (Size != 4)
return;
if (TRI->isAGPR(*MRI, UseMI->getOperand(0).getReg()) &&
@@ -1099,6 +1230,7 @@ void SIFoldOperands::foldInstOperand(MachineInstr &MI,
Copy->addImplicitDefUseOperands(*MF);
for (FoldCandidate &Fold : FoldList) {
+ assert(!Fold.isReg() || Fold.OpToFold);
if (Fold.isReg() && Register::isVirtualRegister(Fold.OpToFold->getReg())) {
Register Reg = Fold.OpToFold->getReg();
MachineInstr *DefMI = Fold.OpToFold->getParent();
@@ -1249,8 +1381,8 @@ SIFoldOperands::isOMod(const MachineInstr &MI) const {
case AMDGPU::V_MUL_F32_e64:
case AMDGPU::V_MUL_F16_e64: {
// If output denormals are enabled, omod is ignored.
- if ((Op == AMDGPU::V_MUL_F32_e64 && ST->hasFP32Denormals()) ||
- (Op == AMDGPU::V_MUL_F16_e64 && ST->hasFP16Denormals()))
+ if ((Op == AMDGPU::V_MUL_F32_e64 && MFI->getMode().FP32Denormals) ||
+ (Op == AMDGPU::V_MUL_F16_e64 && MFI->getMode().FP64FP16Denormals))
return std::make_pair(nullptr, SIOutMods::NONE);
const MachineOperand *RegOp = nullptr;
@@ -1279,8 +1411,8 @@ SIFoldOperands::isOMod(const MachineInstr &MI) const {
case AMDGPU::V_ADD_F32_e64:
case AMDGPU::V_ADD_F16_e64: {
// If output denormals are enabled, omod is ignored.
- if ((Op == AMDGPU::V_ADD_F32_e64 && ST->hasFP32Denormals()) ||
- (Op == AMDGPU::V_ADD_F16_e64 && ST->hasFP16Denormals()))
+ if ((Op == AMDGPU::V_ADD_F32_e64 && MFI->getMode().FP32Denormals) ||
+ (Op == AMDGPU::V_ADD_F16_e64 && MFI->getMode().FP64FP16Denormals))
return std::make_pair(nullptr, SIOutMods::NONE);
// Look through the DAGCombiner canonicalization fmul x, 2 -> fadd x, x
@@ -1358,15 +1490,16 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
tryFoldInst(TII, &MI);
if (!TII->isFoldableCopy(MI)) {
+ // Saw an unknown clobber of m0, so we no longer know what it is.
+ if (CurrentKnownM0Val && MI.modifiesRegister(AMDGPU::M0, TRI))
+ CurrentKnownM0Val = nullptr;
+
// TODO: Omod might be OK if there is NSZ only on the source
// instruction, and not the omod multiply.
if (IsIEEEMode || (!HasNSZ && !MI.getFlag(MachineInstr::FmNsz)) ||
!tryFoldOMod(MI))
tryFoldClamp(MI);
- // Saw an unknown clobber of m0, so we no longer know what it is.
- if (CurrentKnownM0Val && MI.modifiesRegister(AMDGPU::M0, TRI))
- CurrentKnownM0Val = nullptr;
continue;
}
@@ -1409,5 +1542,5 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
foldInstOperand(MI, OpToFold);
}
}
- return false;
+ return true;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
index 26bae5734df7..8ef02e73865d 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
@@ -16,13 +16,14 @@
#include "AMDGPU.h"
#include "AMDGPUSubtarget.h"
#include "GCNRegPressure.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h"
#include "SIRegisterInfo.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/InitializePasses.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
index ed07ed100a19..8364665dda04 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -578,10 +578,7 @@ void SIFrameLowering::emitEntryFunctionScratchSetup(const GCNSubtarget &ST,
// We now have the GIT ptr - now get the scratch descriptor from the entry
// at offset 0 (or offset 16 for a compute shader).
- PointerType *PtrTy =
- PointerType::get(Type::getInt64Ty(MF.getFunction().getContext()),
- AMDGPUAS::CONSTANT_ADDRESS);
- MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
+ MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
const MCInstrDesc &LoadDwordX4 = TII->get(AMDGPU::S_LOAD_DWORDX4_IMM);
auto MMO = MF.getMachineMemOperand(PtrInfo,
MachineMemOperand::MOLoad |
@@ -623,10 +620,7 @@ void SIFrameLowering::emitEntryFunctionScratchSetup(const GCNSubtarget &ST,
} else {
const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);
- PointerType *PtrTy =
- PointerType::get(Type::getInt64Ty(MF.getFunction().getContext()),
- AMDGPUAS::CONSTANT_ADDRESS);
- MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
+ MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
auto MMO = MF.getMachineMemOperand(PtrInfo,
MachineMemOperand::MOLoad |
MachineMemOperand::MOInvariant |
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 56ebf9c06741..e73d87cd66af 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -90,16 +90,21 @@ using namespace llvm;
STATISTIC(NumTailCalls, "Number of tail calls");
-static cl::opt<bool> EnableVGPRIndexMode(
- "amdgpu-vgpr-index-mode",
- cl::desc("Use GPR indexing mode instead of movrel for vector indexing"),
- cl::init(false));
-
static cl::opt<bool> DisableLoopAlignment(
"amdgpu-disable-loop-alignment",
cl::desc("Do not align and prefetch loops"),
cl::init(false));
+static bool hasFP32Denormals(const MachineFunction &MF) {
+ const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
+ return Info->getMode().FP32Denormals;
+}
+
+static bool hasFP64FP16Denormals(const MachineFunction &MF) {
+ const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
+ return Info->getMode().FP64FP16Denormals;
+}
+
static unsigned findFirstFreeSGPR(CCState &CCInfo) {
unsigned NumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
for (unsigned Reg = 0; Reg < NumSGPRs; ++Reg) {
@@ -160,6 +165,13 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
computeRegisterProperties(Subtarget->getRegisterInfo());
+ // The boolean content concept here is too inflexible. Compares only ever
+ // really produce a 1-bit result. Any copy/extend from these will turn into a
+ // select, and zext/1 or sext/-1 are equally cheap. Arbitrarily choose 0/1, as
+ // it's what most targets use.
+ setBooleanContents(ZeroOrOneBooleanContent);
+ setBooleanVectorContents(ZeroOrOneBooleanContent);
+
// We need to custom lower vector stores from local memory
setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
setOperationAction(ISD::LOAD, MVT::v3i32, Custom);
@@ -358,14 +370,16 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setOperationAction(ISD::DEBUGTRAP, MVT::Other, Custom);
if (Subtarget->has16BitInsts()) {
+ setOperationAction(ISD::FPOW, MVT::f16, Promote);
setOperationAction(ISD::FLOG, MVT::f16, Custom);
setOperationAction(ISD::FEXP, MVT::f16, Custom);
setOperationAction(ISD::FLOG10, MVT::f16, Custom);
}
- // v_mad_f32 does not support denormals according to some sources.
- if (!Subtarget->hasFP32Denormals())
- setOperationAction(ISD::FMAD, MVT::f32, Legal);
+ // v_mad_f32 does not support denormals. We report it as unconditionally
+ // legal, and the context where it is formed will disallow it when fp32
+ // denormals are enabled.
+ setOperationAction(ISD::FMAD, MVT::f32, Legal);
if (!Subtarget->hasBFI()) {
// fcopysign can be done in a single instruction with BFI.
@@ -473,8 +487,6 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote);
setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
- setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
- setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
// F16 - Constant Actions.
setOperationAction(ISD::ConstantFP, MVT::f16, Legal);
@@ -489,6 +501,10 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FP_ROUND, MVT::f16, Custom);
setOperationAction(ISD::FCOS, MVT::f16, Promote);
setOperationAction(ISD::FSIN, MVT::f16, Promote);
+
+ setOperationAction(ISD::SINT_TO_FP, MVT::i16, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i16, Custom);
+
setOperationAction(ISD::FP_TO_SINT, MVT::f16, Promote);
setOperationAction(ISD::FP_TO_UINT, MVT::f16, Promote);
setOperationAction(ISD::SINT_TO_FP, MVT::f16, Promote);
@@ -503,7 +519,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
// F16 - VOP3 Actions.
setOperationAction(ISD::FMA, MVT::f16, Legal);
- if (!Subtarget->hasFP16Denormals() && STI.hasMadF16())
+ if (STI.hasMadF16())
setOperationAction(ISD::FMAD, MVT::f16, Legal);
for (MVT VT : {MVT::v2i16, MVT::v2f16, MVT::v4i16, MVT::v4f16}) {
@@ -761,12 +777,13 @@ const GCNSubtarget *SITargetLowering::getSubtarget() const {
//
// There is only one special case when denormals are enabled we don't currently,
// where this is OK to use.
-bool SITargetLowering::isFPExtFoldable(unsigned Opcode,
- EVT DestVT, EVT SrcVT) const {
+bool SITargetLowering::isFPExtFoldable(const SelectionDAG &DAG, unsigned Opcode,
+ EVT DestVT, EVT SrcVT) const {
return ((Opcode == ISD::FMAD && Subtarget->hasMadMixInsts()) ||
(Opcode == ISD::FMA && Subtarget->hasFmaMixInsts())) &&
- DestVT.getScalarType() == MVT::f32 && !Subtarget->hasFP32Denormals() &&
- SrcVT.getScalarType() == MVT::f16;
+ DestVT.getScalarType() == MVT::f32 &&
+ SrcVT.getScalarType() == MVT::f16 &&
+ !hasFP32Denormals(DAG.getMachineFunction());
}
bool SITargetLowering::isShuffleMaskLegal(ArrayRef<int>, EVT) const {
@@ -1069,23 +1086,18 @@ bool SITargetLowering::isLegalFlatAddressingMode(const AddrMode &AM) const {
return AM.BaseOffs == 0 && AM.Scale == 0;
}
- // GFX9 added a 13-bit signed offset. When using regular flat instructions,
- // the sign bit is ignored and is treated as a 12-bit unsigned offset.
-
- // GFX10 shrinked signed offset to 12 bits. When using regular flat
- // instructions, the sign bit is also ignored and is treated as 11-bit
- // unsigned offset.
-
- if (Subtarget->getGeneration() >= AMDGPUSubtarget::GFX10)
- return isUInt<11>(AM.BaseOffs) && AM.Scale == 0;
-
- // Just r + i
- return isUInt<12>(AM.BaseOffs) && AM.Scale == 0;
+ return AM.Scale == 0 &&
+ (AM.BaseOffs == 0 || Subtarget->getInstrInfo()->isLegalFLATOffset(
+ AM.BaseOffs, AMDGPUAS::FLAT_ADDRESS,
+ /*Signed=*/false));
}
bool SITargetLowering::isLegalGlobalAddressingMode(const AddrMode &AM) const {
if (Subtarget->hasFlatGlobalInsts())
- return isInt<13>(AM.BaseOffs) && AM.Scale == 0;
+ return AM.Scale == 0 &&
+ (AM.BaseOffs == 0 || Subtarget->getInstrInfo()->isLegalFLATOffset(
+ AM.BaseOffs, AMDGPUAS::GLOBAL_ADDRESS,
+ /*Signed=*/true));
if (!Subtarget->hasAddr64() || Subtarget->useFlatForGlobal()) {
// Assume the we will use FLAT for all global memory accesses
@@ -1326,13 +1338,6 @@ EVT SITargetLowering::getOptimalMemOpType(
return MVT::Other;
}
-static bool isFlatGlobalAddrSpace(unsigned AS) {
- return AS == AMDGPUAS::GLOBAL_ADDRESS ||
- AS == AMDGPUAS::FLAT_ADDRESS ||
- AS == AMDGPUAS::CONSTANT_ADDRESS ||
- AS > AMDGPUAS::MAX_AMDGPU_ADDRESS;
-}
-
bool SITargetLowering::isNoopAddrSpaceCast(unsigned SrcAS,
unsigned DestAS) const {
return isFlatGlobalAddrSpace(SrcAS) && isFlatGlobalAddrSpace(DestAS);
@@ -1466,9 +1471,7 @@ SDValue SITargetLowering::lowerKernargMemParameter(
const SDLoc &SL, SDValue Chain,
uint64_t Offset, unsigned Align, bool Signed,
const ISD::InputArg *Arg) const {
- Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
- PointerType *PtrTy = PointerType::get(Ty, AMDGPUAS::CONSTANT_ADDRESS);
- MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
+ MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
// Try to avoid using an extload by loading earlier than the argument address,
// and extracting the relevant bits. The load should hopefully be merged with
@@ -2666,9 +2669,7 @@ bool SITargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
const Function *ParentFn = CI->getParent()->getParent();
if (AMDGPU::isEntryFunctionCC(ParentFn->getCallingConv()))
return false;
-
- auto Attr = ParentFn->getFnAttribute("disable-tail-calls");
- return (Attr.getValueAsString() != "true");
+ return true;
}
// The wave scratch offset register is used as the global base pointer.
@@ -2787,10 +2788,9 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
MVT PtrVT = MVT::i32;
// Walk the register/memloc assignments, inserting copies/loads.
- for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); i != e;
- ++i, ++realArgIdx) {
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
- SDValue Arg = OutVals[realArgIdx];
+ SDValue Arg = OutVals[i];
// Promote the value if needed.
switch (VA.getLocInfo()) {
@@ -2830,7 +2830,7 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
MaybeAlign Alignment;
if (IsTailCall) {
- ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
+ ISD::ArgFlagsTy Flags = Outs[i].Flags;
unsigned OpSize = Flags.isByVal() ?
Flags.getByValSize() : VA.getValVT().getStoreSize();
@@ -2868,8 +2868,7 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
Chain, DL, DstAddr, Arg, SizeNode, Outs[i].Flags.getByValAlign(),
/*isVol = */ false, /*AlwaysInline = */ true,
/*isTailCall = */ false, DstInfo,
- MachinePointerInfo(UndefValue::get(Type::getInt8PtrTy(
- *DAG.getContext(), AMDGPUAS::PRIVATE_ADDRESS))));
+ MachinePointerInfo(AMDGPUAS::PRIVATE_ADDRESS));
MemOpChains.push_back(Cpy);
} else {
@@ -2986,7 +2985,7 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
IsThisReturn ? OutVals[0] : SDValue());
}
-Register SITargetLowering::getRegisterByName(const char* RegName, EVT VT,
+Register SITargetLowering::getRegisterByName(const char* RegName, LLT VT,
const MachineFunction &MF) const {
Register Reg = StringSwitch<Register>(RegName)
.Case("m0", AMDGPU::M0)
@@ -3411,7 +3410,7 @@ static MachineBasicBlock *emitIndirectSrc(MachineInstr &MI,
std::tie(SubReg, Offset)
= computeIndirectRegAndOffset(TRI, VecRC, SrcReg, Offset);
- bool UseGPRIdxMode = ST.useVGPRIndexMode(EnableVGPRIndexMode);
+ const bool UseGPRIdxMode = ST.useVGPRIndexMode();
if (setM0ToIndexFromSGPR(TII, MRI, MI, Offset, UseGPRIdxMode, true)) {
MachineBasicBlock::iterator I(&MI);
@@ -3506,7 +3505,7 @@ static MachineBasicBlock *emitIndirectDst(MachineInstr &MI,
std::tie(SubReg, Offset) = computeIndirectRegAndOffset(TRI, VecRC,
SrcVec->getReg(),
Offset);
- bool UseGPRIdxMode = ST.useVGPRIndexMode(EnableVGPRIndexMode);
+ const bool UseGPRIdxMode = ST.useVGPRIndexMode();
if (Idx->getReg() == AMDGPU::NoRegister) {
MachineBasicBlock::iterator I(&MI);
@@ -3920,7 +3919,8 @@ MVT SITargetLowering::getScalarShiftAmountTy(const DataLayout &, EVT VT) const {
// however does not support denormals, so we do report fma as faster if we have
// a fast fma device and require denormals.
//
-bool SITargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
+bool SITargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
+ EVT VT) const {
VT = VT.getScalarType();
switch (VT.getSimpleVT().SimpleTy) {
@@ -3929,7 +3929,7 @@ bool SITargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
// mad available which returns the same result as the separate operations
// which we should prefer over fma. We can't use this if we want to support
// denormals, so only report this in these cases.
- if (Subtarget->hasFP32Denormals())
+ if (hasFP32Denormals(MF))
return Subtarget->hasFastFMAF32() || Subtarget->hasDLInsts();
// If the subtarget has v_fmac_f32, that's just as good as v_mac_f32.
@@ -3938,7 +3938,7 @@ bool SITargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
case MVT::f64:
return true;
case MVT::f16:
- return Subtarget->has16BitInsts() && Subtarget->hasFP16Denormals();
+ return Subtarget->has16BitInsts() && hasFP64FP16Denormals(MF);
default:
break;
}
@@ -3946,6 +3946,21 @@ bool SITargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
return false;
}
+bool SITargetLowering::isFMADLegalForFAddFSub(const SelectionDAG &DAG,
+ const SDNode *N) const {
+ // TODO: Check future ftz flag
+ // v_mad_f32/v_mac_f32 do not support denormals.
+ EVT VT = N->getValueType(0);
+ if (VT == MVT::f32)
+ return !hasFP32Denormals(DAG.getMachineFunction());
+ if (VT == MVT::f16) {
+ return Subtarget->hasMadF16() &&
+ !hasFP64FP16Denormals(DAG.getMachineFunction());
+ }
+
+ return false;
+}
+
//===----------------------------------------------------------------------===//
// Custom DAG Lowering Operations
//===----------------------------------------------------------------------===//
@@ -4416,8 +4431,8 @@ unsigned SITargetLowering::isCFIntrinsic(const SDNode *Intr) const {
bool SITargetLowering::shouldEmitFixup(const GlobalValue *GV) const {
const Triple &TT = getTargetMachine().getTargetTriple();
- return (GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
- GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT) &&
+ return (GV->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
+ GV->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT) &&
AMDGPU::shouldEmitConstantsToTextSection(TT);
}
@@ -4425,9 +4440,9 @@ bool SITargetLowering::shouldEmitGOTReloc(const GlobalValue *GV) const {
// FIXME: Either avoid relying on address space here or change the default
// address space for functions to avoid the explicit check.
return (GV->getValueType()->isFunctionTy() ||
- GV->getType()->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS ||
- GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
- GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT) &&
+ GV->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS ||
+ GV->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
+ GV->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT) &&
!shouldEmitFixup(GV) &&
!getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV);
}
@@ -4694,10 +4709,7 @@ SDValue SITargetLowering::getSegmentAperture(unsigned AS, const SDLoc &DL,
// TODO: Use custom target PseudoSourceValue.
// TODO: We should use the value from the IR intrinsic call, but it might not
// be available and how do we get it?
- Value *V = UndefValue::get(PointerType::get(Type::getInt8Ty(*DAG.getContext()),
- AMDGPUAS::CONSTANT_ADDRESS));
-
- MachinePointerInfo PtrInfo(V, StructOffset);
+ MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
return DAG.getLoad(MVT::i32, DL, QueuePtr.getValue(1), Ptr, PtrInfo,
MinAlign(64, StructOffset),
MachineMemOperand::MODereferenceable |
@@ -5646,11 +5658,16 @@ SDValue SITargetLowering::lowerSBuffer(EVT VT, SDLoc DL, SDValue Rsrc,
SDValue Offset, SDValue GLC, SDValue DLC,
SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
+
+ const DataLayout &DataLayout = DAG.getDataLayout();
+ unsigned Align =
+ DataLayout.getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext()));
+
MachineMemOperand *MMO = MF.getMachineMemOperand(
MachinePointerInfo(),
MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
MachineMemOperand::MOInvariant,
- VT.getStoreSize(), VT.getStoreSize());
+ VT.getStoreSize(), Align);
if (!Offset->isDivergent()) {
SDValue Ops[] = {
@@ -5659,6 +5676,20 @@ SDValue SITargetLowering::lowerSBuffer(EVT VT, SDLoc DL, SDValue Rsrc,
GLC,
DLC,
};
+
+ // Widen vec3 load to vec4.
+ if (VT.isVector() && VT.getVectorNumElements() == 3) {
+ EVT WidenedVT =
+ EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(), 4);
+ auto WidenedOp = DAG.getMemIntrinsicNode(
+ AMDGPUISD::SBUFFER_LOAD, DL, DAG.getVTList(WidenedVT), Ops, WidenedVT,
+ MF.getMachineMemOperand(MMO, 0, WidenedVT.getStoreSize()));
+ auto Subvector = DAG.getNode(
+ ISD::EXTRACT_SUBVECTOR, DL, VT, WidenedOp,
+ DAG.getConstant(0, DL, getVectorIdxTy(DAG.getDataLayout())));
+ return Subvector;
+ }
+
return DAG.getMemIntrinsicNode(AMDGPUISD::SBUFFER_LOAD, DL,
DAG.getVTList(VT), Ops, VT, MMO);
}
@@ -5670,11 +5701,10 @@ SDValue SITargetLowering::lowerSBuffer(EVT VT, SDLoc DL, SDValue Rsrc,
MVT LoadVT = VT.getSimpleVT();
unsigned NumElts = LoadVT.isVector() ? LoadVT.getVectorNumElements() : 1;
assert((LoadVT.getScalarType() == MVT::i32 ||
- LoadVT.getScalarType() == MVT::f32) &&
- isPowerOf2_32(NumElts));
+ LoadVT.getScalarType() == MVT::f32));
if (NumElts == 8 || NumElts == 16) {
- NumLoads = NumElts == 16 ? 4 : 2;
+ NumLoads = NumElts / 4;
LoadVT = MVT::v4i32;
}
@@ -5698,8 +5728,8 @@ SDValue SITargetLowering::lowerSBuffer(EVT VT, SDLoc DL, SDValue Rsrc,
uint64_t InstOffset = cast<ConstantSDNode>(Ops[5])->getZExtValue();
for (unsigned i = 0; i < NumLoads; ++i) {
Ops[5] = DAG.getTargetConstant(InstOffset + 16 * i, DL, MVT::i32);
- Loads.push_back(DAG.getMemIntrinsicNode(AMDGPUISD::BUFFER_LOAD, DL, VTList,
- Ops, LoadVT, MMO));
+ Loads.push_back(getMemIntrinsicNode(AMDGPUISD::BUFFER_LOAD, DL, VTList, Ops,
+ LoadVT, MMO, DAG));
}
if (VT == MVT::v8i32 || VT == MVT::v16i32)
@@ -5918,22 +5948,6 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getNode(AMDGPUISD::INTERP_P1LL_F16, DL, MVT::f32, Ops);
}
}
- case Intrinsic::amdgcn_interp_p2_f16: {
- SDValue ToM0 = DAG.getCopyToReg(DAG.getEntryNode(), DL, AMDGPU::M0,
- Op.getOperand(6), SDValue());
- SDValue Ops[] = {
- Op.getOperand(2), // Src0
- Op.getOperand(3), // Attrchan
- Op.getOperand(4), // Attr
- DAG.getTargetConstant(0, DL, MVT::i32), // $src0_modifiers
- Op.getOperand(1), // Src2
- DAG.getTargetConstant(0, DL, MVT::i32), // $src2_modifiers
- Op.getOperand(5), // high
- DAG.getTargetConstant(0, DL, MVT::i1), // $clamp
- ToM0.getValue(1)
- };
- return DAG.getNode(AMDGPUISD::INTERP_P2_F16, DL, MVT::f16, Ops);
- }
case Intrinsic::amdgcn_sin:
return DAG.getNode(AMDGPUISD::SIN_HW, DL, VT, Op.getOperand(1));
@@ -7088,13 +7102,16 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
EVT VT = Op.getOperand(3).getValueType();
auto *M = cast<MemSDNode>(Op);
- unsigned Opcode = VT.isVector() ? AMDGPUISD::ATOMIC_PK_FADD
- : AMDGPUISD::ATOMIC_FADD;
+ if (VT.isVector()) {
+ return DAG.getMemIntrinsicNode(
+ AMDGPUISD::ATOMIC_PK_FADD, DL, Op->getVTList(), Ops, VT,
+ M->getMemOperand());
+ }
- return DAG.getMemIntrinsicNode(Opcode, DL, Op->getVTList(), Ops, VT,
- M->getMemOperand());
+ return DAG.getAtomic(ISD::ATOMIC_LOAD_FADD, DL, VT,
+ DAG.getVTList(VT, MVT::Other), Ops,
+ M->getMemOperand()).getValue(1);
}
-
case Intrinsic::amdgcn_end_cf:
return SDValue(DAG.getMachineNode(AMDGPU::SI_END_CF, DL, MVT::Other,
Op->getOperand(2), Chain), 0);
@@ -7451,8 +7468,11 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
// resource descriptor, we can only make private accesses up to a certain
// size.
switch (Subtarget->getMaxPrivateElementSize()) {
- case 4:
- return scalarizeVectorLoad(Load, DAG);
+ case 4: {
+ SDValue Ops[2];
+ std::tie(Ops[0], Ops[1]) = scalarizeVectorLoad(Load, DAG);
+ return DAG.getMergeValues(Ops, DL);
+ }
case 8:
if (NumElements > 2)
return SplitVectorLoad(Op, DAG);
@@ -7529,7 +7549,7 @@ SDValue SITargetLowering::lowerFastUnsafeFDIV(SDValue Op,
const SDNodeFlags Flags = Op->getFlags();
bool Unsafe = DAG.getTarget().Options.UnsafeFPMath || Flags.hasAllowReciprocal();
- if (!Unsafe && VT == MVT::f32 && Subtarget->hasFP32Denormals())
+ if (!Unsafe && VT == MVT::f32 && hasFP32Denormals(DAG.getMachineFunction()))
return SDValue();
if (const ConstantFPSDNode *CLHS = dyn_cast<ConstantFPSDNode>(LHS)) {
@@ -7672,7 +7692,7 @@ SDValue SITargetLowering::lowerFDIV_FAST(SDValue Op, SelectionDAG &DAG) const {
static const SDValue getSPDenormModeValue(int SPDenormMode, SelectionDAG &DAG,
const SDLoc &SL, const GCNSubtarget *ST) {
assert(ST->hasDenormModeInst() && "Requires S_DENORM_MODE");
- int DPDenormModeDefault = ST->hasFP64Denormals()
+ int DPDenormModeDefault = hasFP64FP16Denormals(DAG.getMachineFunction())
? FP_DENORM_FLUSH_NONE
: FP_DENORM_FLUSH_IN_FLUSH_OUT;
@@ -7708,7 +7728,9 @@ SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
(1 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_);
const SDValue BitField = DAG.getTargetConstant(Denorm32Reg, SL, MVT::i16);
- if (!Subtarget->hasFP32Denormals()) {
+ const bool HasFP32Denormals = hasFP32Denormals(DAG.getMachineFunction());
+
+ if (!HasFP32Denormals) {
SDVTList BindParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue EnableDenorm;
@@ -7752,8 +7774,7 @@ SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
SDValue Fma4 = getFPTernOp(DAG, ISD::FMA, SL, MVT::f32, NegDivScale0, Fma3,
NumeratorScaled, Fma3);
- if (!Subtarget->hasFP32Denormals()) {
-
+ if (!HasFP32Denormals) {
SDValue DisableDenorm;
if (Subtarget->hasDenormModeInst()) {
const SDValue DisableDenormValue =
@@ -8727,7 +8748,7 @@ bool SITargetLowering::isCanonicalized(SelectionDAG &DAG, SDValue Op,
auto F = CFP->getValueAPF();
if (F.isNaN() && F.isSignaling())
return false;
- return !F.isDenormal() || denormalsEnabledForType(Op.getValueType());
+ return !F.isDenormal() || denormalsEnabledForType(DAG, Op.getValueType());
}
// If source is a result of another standard FP operation it is already in
@@ -8796,7 +8817,7 @@ bool SITargetLowering::isCanonicalized(SelectionDAG &DAG, SDValue Op,
// snans will be quieted, so we only need to worry about denormals.
if (Subtarget->supportsMinMaxDenormModes() ||
- denormalsEnabledForType(Op.getValueType()))
+ denormalsEnabledForType(DAG, Op.getValueType()))
return true;
// Flushing may be required.
@@ -8868,7 +8889,7 @@ bool SITargetLowering::isCanonicalized(SelectionDAG &DAG, SDValue Op,
LLVM_FALLTHROUGH;
}
default:
- return denormalsEnabledForType(Op.getValueType()) &&
+ return denormalsEnabledForType(DAG, Op.getValueType()) &&
DAG.isKnownNeverSNaN(Op);
}
@@ -8879,7 +8900,7 @@ bool SITargetLowering::isCanonicalized(SelectionDAG &DAG, SDValue Op,
SDValue SITargetLowering::getCanonicalConstantFP(
SelectionDAG &DAG, const SDLoc &SL, EVT VT, const APFloat &C) const {
// Flush denormals to 0 if not enabled.
- if (C.isDenormal() && !denormalsEnabledForType(VT))
+ if (C.isDenormal() && !denormalsEnabledForType(DAG, VT))
return DAG.getConstantFP(0.0, SL, VT);
if (C.isNaN()) {
@@ -9417,8 +9438,8 @@ unsigned SITargetLowering::getFusedOpcode(const SelectionDAG &DAG,
// Only do this if we are not trying to support denormals. v_mad_f32 does not
// support denormals ever.
- if (((VT == MVT::f32 && !Subtarget->hasFP32Denormals()) ||
- (VT == MVT::f16 && !Subtarget->hasFP16Denormals() &&
+ if (((VT == MVT::f32 && !hasFP32Denormals(DAG.getMachineFunction())) ||
+ (VT == MVT::f16 && !hasFP64FP16Denormals(DAG.getMachineFunction()) &&
getSubtarget()->hasMadF16())) &&
isOperationLegal(ISD::FMAD, VT))
return ISD::FMAD;
@@ -9427,7 +9448,7 @@ unsigned SITargetLowering::getFusedOpcode(const SelectionDAG &DAG,
if ((Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath ||
(N0->getFlags().hasAllowContract() &&
N1->getFlags().hasAllowContract())) &&
- isFMAFasterThanFMulAndFAdd(VT)) {
+ isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT)) {
return ISD::FMA;
}
@@ -9543,6 +9564,8 @@ SDValue SITargetLowering::performAddCombine(SDNode *N,
case ISD::SIGN_EXTEND:
case ISD::ANY_EXTEND: {
auto Cond = RHS.getOperand(0);
+ // If this won't be a real VOPC output, we would still need to insert an
+ // extra instruction anyway.
if (!isBoolSGPR(Cond))
break;
SDVTList VTList = DAG.getVTList(MVT::i32, MVT::i1);
@@ -9573,6 +9596,26 @@ SDValue SITargetLowering::performSubCombine(SDNode *N,
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
+ // sub x, zext (setcc) => subcarry x, 0, setcc
+ // sub x, sext (setcc) => addcarry x, 0, setcc
+ unsigned Opc = RHS.getOpcode();
+ switch (Opc) {
+ default: break;
+ case ISD::ZERO_EXTEND:
+ case ISD::SIGN_EXTEND:
+ case ISD::ANY_EXTEND: {
+ auto Cond = RHS.getOperand(0);
+ // If this won't be a real VOPC output, we would still need to insert an
+ // extra instruction anyway.
+ if (!isBoolSGPR(Cond))
+ break;
+ SDVTList VTList = DAG.getVTList(MVT::i32, MVT::i1);
+ SDValue Args[] = { LHS, DAG.getConstant(0, SL, MVT::i32), Cond };
+ Opc = (Opc == ISD::SIGN_EXTEND) ? ISD::ADDCARRY : ISD::SUBCARRY;
+ return DAG.getNode(Opc, SL, VTList, Args);
+ }
+ }
+
if (LHS.getOpcode() == ISD::SUBCARRY) {
// sub (subcarry x, 0, cc), y => subcarry x, y, cc
auto C = dyn_cast<ConstantSDNode>(LHS.getOperand(1));
@@ -10884,14 +10927,14 @@ bool SITargetLowering::isSDNodeSourceOfDivergence(const SDNode * N,
return false;
}
-bool SITargetLowering::denormalsEnabledForType(EVT VT) const {
+bool SITargetLowering::denormalsEnabledForType(const SelectionDAG &DAG,
+ EVT VT) const {
switch (VT.getScalarType().getSimpleVT().SimpleTy) {
case MVT::f32:
- return Subtarget->hasFP32Denormals();
+ return hasFP32Denormals(DAG.getMachineFunction());
case MVT::f64:
- return Subtarget->hasFP64Denormals();
case MVT::f16:
- return Subtarget->hasFP16Denormals();
+ return hasFP64FP16Denormals(DAG.getMachineFunction());
default:
return false;
}
@@ -10930,6 +10973,12 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
// TODO: Do have these for flat. Older targets also had them for buffers.
unsigned AS = RMW->getPointerAddressSpace();
+
+ if (AS == AMDGPUAS::GLOBAL_ADDRESS && Subtarget->hasAtomicFaddInsts()) {
+ return RMW->use_empty() ? AtomicExpansionKind::None :
+ AtomicExpansionKind::CmpXChg;
+ }
+
return (AS == AMDGPUAS::LOCAL_ADDRESS && Subtarget->hasLDSFPAtomics()) ?
AtomicExpansionKind::None : AtomicExpansionKind::CmpXChg;
}
@@ -10956,6 +11005,8 @@ SITargetLowering::getRegClassFor(MVT VT, bool isDivergent) const {
}
static bool hasCFUser(const Value *V, SmallPtrSet<const Value *, 16> &Visited) {
+ if (!isa<Instruction>(V))
+ return false;
if (!Visited.insert(V).second)
return false;
bool Result = false;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.h
index f0102feb65c4..d59495b052a4 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -222,7 +222,8 @@ public:
const GCNSubtarget *getSubtarget() const;
- bool isFPExtFoldable(unsigned Opcode, EVT DestVT, EVT SrcVT) const override;
+ bool isFPExtFoldable(const SelectionDAG &DAG, unsigned Opcode, EVT DestVT,
+ EVT SrcVT) const override;
bool isShuffleMaskLegal(ArrayRef<int> /*Mask*/, EVT /*VT*/) const override;
@@ -260,6 +261,14 @@ public:
bool isMemOpUniform(const SDNode *N) const;
bool isMemOpHasNoClobberedMemOperand(const SDNode *N) const;
+
+ static bool isFlatGlobalAddrSpace(unsigned AS) {
+ return AS == AMDGPUAS::GLOBAL_ADDRESS ||
+ AS == AMDGPUAS::FLAT_ADDRESS ||
+ AS == AMDGPUAS::CONSTANT_ADDRESS ||
+ AS > AMDGPUAS::MAX_AMDGPU_ADDRESS;
+ }
+
bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override;
bool isFreeAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override;
@@ -321,7 +330,7 @@ public:
SDValue LowerCall(CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const override;
- Register getRegisterByName(const char* RegName, EVT VT,
+ Register getRegisterByName(const char* RegName, LLT VT,
const MachineFunction &MF) const override;
MachineBasicBlock *splitKillBlock(MachineInstr &MI,
@@ -340,7 +349,11 @@ public:
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
EVT VT) const override;
MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override;
- bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
+ bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
+ EVT VT) const override;
+ bool isFMADLegalForFAddFSub(const SelectionDAG &DAG,
+ const SDNode *N) const override;
+
SDValue splitUnaryVectorOp(SDValue Op, SelectionDAG &DAG) const;
SDValue splitBinaryVectorOp(SDValue Op, SelectionDAG &DAG) const;
SDValue splitTernaryVectorOp(SDValue Op, SelectionDAG &DAG) const;
@@ -380,7 +393,7 @@ public:
bool isCanonicalized(SelectionDAG &DAG, SDValue Op,
unsigned MaxDepth = 5) const;
- bool denormalsEnabledForType(EVT VT) const;
+ bool denormalsEnabledForType(const SelectionDAG &DAG, EVT VT) const;
bool isKnownNeverNaNForTargetNode(SDValue Op,
const SelectionDAG &DAG,
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp
index 87e63fcc4a04..80c044ec00cb 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp
@@ -41,7 +41,7 @@ using namespace llvm;
#define DEBUG_TYPE "si-insert-skips"
static cl::opt<unsigned> SkipThresholdFlag(
- "amdgpu-skip-threshold",
+ "amdgpu-skip-threshold-legacy",
cl::desc("Number of instructions before jumping over divergent control flow"),
cl::init(12), cl::Hidden);
@@ -466,6 +466,9 @@ bool SIInsertSkips::runOnMachineFunction(MachineFunction &MF) {
MachineInstr &MI = *I;
switch (MI.getOpcode()) {
+ case AMDGPU::S_CBRANCH_EXECZ:
+ ExecBranchStack.push_back(MI.getOperand(0).getMBB());
+ break;
case AMDGPU::SI_MASK_BRANCH:
ExecBranchStack.push_back(MI.getOperand(0).getMBB());
MadeChange |= skipMaskBranch(MI, MBB);
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index dcb04e426584..ef662d55cb0a 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -42,7 +42,9 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachinePostDominators.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/InitializePasses.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
@@ -372,7 +374,8 @@ private:
AMDGPU::IsaVersion IV;
DenseSet<MachineInstr *> TrackedWaitcntSet;
- DenseSet<MachineInstr *> VCCZBugHandledSet;
+ DenseMap<const Value *, MachineBasicBlock *> SLoadAddresses;
+ MachinePostDominatorTree *PDT;
struct BlockInfo {
MachineBasicBlock *MBB;
@@ -407,6 +410,7 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
+ AU.addRequired<MachinePostDominatorTree>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -752,7 +756,10 @@ void WaitcntBrackets::determineWait(InstCounterType T, uint32_t ScoreToWait,
// with a conservative value of 0 for the counter.
addWait(Wait, T, 0);
} else {
- addWait(Wait, T, UB - ScoreToWait);
+ // If a counter has been maxed out avoid overflow by waiting for
+ // MAX(CounterType) - 1 instead.
+ uint32_t NeededWait = std::min(UB - ScoreToWait, getWaitCountMax(T) - 1);
+ addWait(Wait, T, NeededWait);
}
}
}
@@ -790,6 +797,7 @@ bool WaitcntBrackets::counterOutOfOrder(InstCounterType T) const {
INITIALIZE_PASS_BEGIN(SIInsertWaitcnts, DEBUG_TYPE, "SI Insert Waitcnts", false,
false)
+INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
INITIALIZE_PASS_END(SIInsertWaitcnts, DEBUG_TYPE, "SI Insert Waitcnts", false,
false)
@@ -939,19 +947,33 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(
}
if (MI.isCall() && callWaitsOnFunctionEntry(MI)) {
- // Don't bother waiting on anything except the call address. The function
- // is going to insert a wait on everything in its prolog. This still needs
- // to be careful if the call target is a load (e.g. a GOT load).
+ // The function is going to insert a wait on everything in its prolog.
+ // This still needs to be careful if the call target is a load (e.g. a GOT
+ // load). We also need to check WAW depenancy with saved PC.
Wait = AMDGPU::Waitcnt();
int CallAddrOpIdx =
AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0);
- RegInterval Interval = ScoreBrackets.getRegInterval(&MI, TII, MRI, TRI,
- CallAddrOpIdx, false);
- for (signed RegNo = Interval.first; RegNo < Interval.second; ++RegNo) {
+ RegInterval CallAddrOpInterval = ScoreBrackets.getRegInterval(
+ &MI, TII, MRI, TRI, CallAddrOpIdx, false);
+
+ for (signed RegNo = CallAddrOpInterval.first;
+ RegNo < CallAddrOpInterval.second; ++RegNo)
ScoreBrackets.determineWait(
LGKM_CNT, ScoreBrackets.getRegScore(RegNo, LGKM_CNT), Wait);
+
+ int RtnAddrOpIdx =
+ AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dst);
+ if (RtnAddrOpIdx != -1) {
+ RegInterval RtnAddrOpInterval = ScoreBrackets.getRegInterval(
+ &MI, TII, MRI, TRI, RtnAddrOpIdx, false);
+
+ for (signed RegNo = RtnAddrOpInterval.first;
+ RegNo < RtnAddrOpInterval.second; ++RegNo)
+ ScoreBrackets.determineWait(
+ LGKM_CNT, ScoreBrackets.getRegScore(RegNo, LGKM_CNT), Wait);
}
+
} else {
// FIXME: Should not be relying on memoperands.
// Look at the source operands of every instruction to see if
@@ -996,6 +1018,13 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(
if (MI.mayStore()) {
// FIXME: Should not be relying on memoperands.
for (const MachineMemOperand *Memop : MI.memoperands()) {
+ const Value *Ptr = Memop->getValue();
+ if (SLoadAddresses.count(Ptr)) {
+ addWait(Wait, LGKM_CNT, 0);
+ if (PDT->dominates(MI.getParent(),
+ SLoadAddresses.find(Ptr)->second))
+ SLoadAddresses.erase(Ptr);
+ }
unsigned AS = Memop->getAddrSpace();
if (AS != AMDGPUAS::LOCAL_ADDRESS)
continue;
@@ -1065,7 +1094,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(
assert(II->getOpcode() == AMDGPU::S_WAITCNT_VSCNT);
assert(II->getOperand(0).getReg() == AMDGPU::SGPR_NULL);
ScoreBrackets.applyWaitcnt(
- AMDGPU::Waitcnt(0, 0, 0, II->getOperand(1).getImm()));
+ AMDGPU::Waitcnt(~0u, ~0u, ~0u, II->getOperand(1).getImm()));
}
}
}
@@ -1124,7 +1153,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(
Wait.VsCnt = ~0u;
}
- LLVM_DEBUG(dbgs() << "updateWaitcntInBlock\n"
+ LLVM_DEBUG(dbgs() << "generateWaitcntInstBefore\n"
<< "Old Instr: " << MI << '\n'
<< "New Instr: " << *II << '\n');
@@ -1141,7 +1170,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(
TrackedWaitcntSet.insert(SWaitInst);
Modified = true;
- LLVM_DEBUG(dbgs() << "insertWaitcntInBlock\n"
+ LLVM_DEBUG(dbgs() << "generateWaitcntInstBefore\n"
<< "Old Instr: " << MI << '\n'
<< "New Instr: " << *SWaitInst << '\n');
}
@@ -1157,7 +1186,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(
TrackedWaitcntSet.insert(SWaitInst);
Modified = true;
- LLVM_DEBUG(dbgs() << "insertWaitcntInBlock\n"
+ LLVM_DEBUG(dbgs() << "generateWaitcntInstBefore\n"
<< "Old Instr: " << MI << '\n'
<< "New Instr: " << *SWaitInst << '\n');
}
@@ -1195,7 +1224,7 @@ void SIInsertWaitcnts::updateEventWaitcntAfter(MachineInstr &Inst,
ScoreBrackets->updateByEvent(TII, TRI, MRI, LDS_ACCESS, Inst);
}
} else if (TII->isFLAT(Inst)) {
- assert(Inst.mayLoad() || Inst.mayStore());
+ assert(Inst.mayLoadOrStore());
if (TII->usesVM_CNT(Inst)) {
if (!ST->hasVscnt())
@@ -1374,16 +1403,22 @@ bool SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF,
}
bool VCCZBugWorkAround = false;
- if (readsVCCZ(Inst) &&
- (!VCCZBugHandledSet.count(&Inst))) {
+ if (readsVCCZ(Inst)) {
if (ScoreBrackets.getScoreLB(LGKM_CNT) <
ScoreBrackets.getScoreUB(LGKM_CNT) &&
ScoreBrackets.hasPendingEvent(SMEM_ACCESS)) {
- if (ST->getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS)
+ if (ST->hasReadVCCZBug())
VCCZBugWorkAround = true;
}
}
+ if (TII->isSMRD(Inst)) {
+ for (const MachineMemOperand *Memop : Inst.memoperands()) {
+ const Value *Ptr = Memop->getValue();
+ SLoadAddresses.insert(std::make_pair(Ptr, Inst.getParent()));
+ }
+ }
+
// Generate an s_waitcnt instruction to be placed before
// cur_Inst, if needed.
Modified |= generateWaitcntInstBefore(Inst, ScoreBrackets, OldWaitcntInstr);
@@ -1417,7 +1452,6 @@ bool SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF,
TII->get(ST->isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64),
TRI->getVCC())
.addReg(TRI->getVCC());
- VCCZBugHandledSet.insert(&Inst);
Modified = true;
}
@@ -1434,6 +1468,7 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
MRI = &MF.getRegInfo();
IV = AMDGPU::getIsaVersion(ST->getCPU());
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+ PDT = &getAnalysis<MachinePostDominatorTree>();
ForceEmitZeroWaitcnts = ForceEmitZeroFlag;
for (auto T : inst_counter_types())
@@ -1457,7 +1492,6 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
RegisterEncoding.SGPR0 + HardwareLimits.NumSGPRsMax - 1;
TrackedWaitcntSet.clear();
- VCCZBugHandledSet.clear();
RpotIdxMap.clear();
BlockInfos.clear();
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index d97e6a62971b..d53950ca4465 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -85,7 +85,9 @@ BranchOffsetBits("amdgpu-s-branch-bits", cl::ReallyHidden, cl::init(16),
SIInstrInfo::SIInstrInfo(const GCNSubtarget &ST)
: AMDGPUGenInstrInfo(AMDGPU::ADJCALLSTACKUP, AMDGPU::ADJCALLSTACKDOWN),
- RI(ST), ST(ST) {}
+ RI(ST), ST(ST) {
+ SchedModel.init(&ST);
+}
//===----------------------------------------------------------------------===//
// TargetInstrInfo callbacks
@@ -260,6 +262,9 @@ bool SIInstrInfo::getMemOperandWithOffset(const MachineInstr &LdSt,
const MachineOperand *&BaseOp,
int64_t &Offset,
const TargetRegisterInfo *TRI) const {
+ if (!LdSt.mayLoadOrStore())
+ return false;
+
unsigned Opc = LdSt.getOpcode();
if (isDS(LdSt)) {
@@ -270,12 +275,11 @@ bool SIInstrInfo::getMemOperandWithOffset(const MachineInstr &LdSt,
BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::addr);
// TODO: ds_consume/ds_append use M0 for the base address. Is it safe to
// report that here?
- if (!BaseOp)
+ if (!BaseOp || !BaseOp->isReg())
return false;
Offset = OffsetImm->getImm();
- assert(BaseOp->isReg() && "getMemOperandWithOffset only supports base "
- "operands of type register.");
+
return true;
}
@@ -307,9 +311,11 @@ bool SIInstrInfo::getMemOperandWithOffset(const MachineInstr &LdSt,
EltSize *= 64;
BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::addr);
+ if (!BaseOp->isReg())
+ return false;
+
Offset = EltSize * Offset0;
- assert(BaseOp->isReg() && "getMemOperandWithOffset only supports base "
- "operands of type register.");
+
return true;
}
@@ -346,12 +352,12 @@ bool SIInstrInfo::getMemOperandWithOffset(const MachineInstr &LdSt,
getNamedOperand(LdSt, AMDGPU::OpName::offset);
BaseOp = AddrReg;
Offset = OffsetImm->getImm();
-
if (SOffset) // soffset can be an inline immediate.
Offset += SOffset->getImm();
- assert(BaseOp->isReg() && "getMemOperandWithOffset only supports base "
- "operands of type register.");
+ if (!BaseOp->isReg())
+ return false;
+
return true;
}
@@ -364,8 +370,9 @@ bool SIInstrInfo::getMemOperandWithOffset(const MachineInstr &LdSt,
const MachineOperand *SBaseReg = getNamedOperand(LdSt, AMDGPU::OpName::sbase);
BaseOp = SBaseReg;
Offset = OffsetImm->getImm();
- assert(BaseOp->isReg() && "getMemOperandWithOffset only supports base "
- "operands of type register.");
+ if (!BaseOp->isReg())
+ return false;
+
return true;
}
@@ -383,8 +390,8 @@ bool SIInstrInfo::getMemOperandWithOffset(const MachineInstr &LdSt,
}
Offset = getNamedOperand(LdSt, AMDGPU::OpName::offset)->getImm();
- assert(BaseOp->isReg() && "getMemOperandWithOffset only supports base "
- "operands of type register.");
+ if (!BaseOp->isReg())
+ return false;
return true;
}
@@ -418,7 +425,7 @@ static bool memOpsHaveSameBasePtr(const MachineInstr &MI1,
const MachineFunction &MF = *MI1.getParent()->getParent();
const DataLayout &DL = MF.getFunction().getParent()->getDataLayout();
Base1 = GetUnderlyingObject(Base1, DL);
- Base2 = GetUnderlyingObject(Base1, DL);
+ Base2 = GetUnderlyingObject(Base2, DL);
if (isa<UndefValue>(Base1) || isa<UndefValue>(Base2))
return false;
@@ -508,8 +515,8 @@ bool SIInstrInfo::shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1,
static void reportIllegalCopy(const SIInstrInfo *TII, MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- const DebugLoc &DL, unsigned DestReg,
- unsigned SrcReg, bool KillSrc) {
+ const DebugLoc &DL, MCRegister DestReg,
+ MCRegister SrcReg, bool KillSrc) {
MachineFunction *MF = MBB.getParent();
DiagnosticInfoUnsupported IllegalCopy(MF->getFunction(),
"illegal SGPR to VGPR copy",
@@ -523,8 +530,8 @@ static void reportIllegalCopy(const SIInstrInfo *TII, MachineBasicBlock &MBB,
void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- const DebugLoc &DL, unsigned DestReg,
- unsigned SrcReg, bool KillSrc) const {
+ const DebugLoc &DL, MCRegister DestReg,
+ MCRegister SrcReg, bool KillSrc) const {
const TargetRegisterClass *RC = RI.getPhysRegClass(DestReg);
if (RC == &AMDGPU::VGPR_32RegClass) {
@@ -542,7 +549,7 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
RC == &AMDGPU::SReg_32RegClass) {
if (SrcReg == AMDGPU::SCC) {
BuildMI(MBB, MI, DL, get(AMDGPU::S_CSELECT_B32), DestReg)
- .addImm(-1)
+ .addImm(1)
.addImm(0);
return;
}
@@ -840,7 +847,7 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB,
Register SReg = MRI.createVirtualRegister(BoolXExecRC);
BuildMI(MBB, I, DL, get(ST.isWave32() ? AMDGPU::S_CSELECT_B32
: AMDGPU::S_CSELECT_B64), SReg)
- .addImm(-1)
+ .addImm(1)
.addImm(0);
BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
.addImm(0)
@@ -855,7 +862,7 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB,
BuildMI(MBB, I, DL, get(ST.isWave32() ? AMDGPU::S_CSELECT_B32
: AMDGPU::S_CSELECT_B64), SReg)
.addImm(0)
- .addImm(-1);
+ .addImm(1);
BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
.addImm(0)
.addReg(FalseReg)
@@ -900,7 +907,7 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB,
.addImm(0);
BuildMI(MBB, I, DL, get(ST.isWave32() ? AMDGPU::S_CSELECT_B32
: AMDGPU::S_CSELECT_B64), SReg)
- .addImm(-1)
+ .addImm(1)
.addImm(0);
BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
.addImm(0)
@@ -919,7 +926,7 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB,
BuildMI(MBB, I, DL, get(ST.isWave32() ? AMDGPU::S_CSELECT_B32
: AMDGPU::S_CSELECT_B64), SReg)
.addImm(0)
- .addImm(-1);
+ .addImm(1);
BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
.addImm(0)
.addReg(FalseReg)
@@ -1062,6 +1069,7 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
if (RI.isSGPRClass(RC)) {
MFI->setHasSpilledSGPRs();
+ assert(SrcReg != AMDGPU::M0 && "m0 should not be spilled");
// We are only allowed to create one new instruction when spilling
// registers, so we need to use pseudo instruction for spilling SGPRs.
@@ -1190,6 +1198,7 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
if (RI.isSGPRClass(RC)) {
MFI->setHasSpilledSGPRs();
+ assert(DestReg != AMDGPU::M0 && "m0 should not be reloaded into");
// FIXME: Maybe this should not include a memoperand because it will be
// lowered to non-memory instructions.
@@ -2542,9 +2551,9 @@ bool SIInstrInfo::checkInstOffsetsDoNotOverlap(const MachineInstr &MIa,
bool SIInstrInfo::areMemAccessesTriviallyDisjoint(const MachineInstr &MIa,
const MachineInstr &MIb) const {
- assert((MIa.mayLoad() || MIa.mayStore()) &&
+ assert(MIa.mayLoadOrStore() &&
"MIa must load from or modify a memory location");
- assert((MIb.mayLoad() || MIb.mayStore()) &&
+ assert(MIb.mayLoadOrStore() &&
"MIb must load from or modify a memory location");
if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects())
@@ -3921,20 +3930,18 @@ bool SIInstrInfo::isLegalRegOperand(const MachineRegisterInfo &MRI,
? MRI.getRegClass(Reg)
: RI.getPhysRegClass(Reg);
- const SIRegisterInfo *TRI =
- static_cast<const SIRegisterInfo*>(MRI.getTargetRegisterInfo());
- RC = TRI->getSubRegClass(RC, MO.getSubReg());
-
- // In order to be legal, the common sub-class must be equal to the
- // class of the current operand. For example:
- //
- // v_mov_b32 s0 ; Operand defined as vsrc_b32
- // ; RI.getCommonSubClass(s0,vsrc_b32) = sgpr ; LEGAL
- //
- // s_sendmsg 0, s0 ; Operand defined as m0reg
- // ; RI.getCommonSubClass(s0,m0reg) = m0reg ; NOT LEGAL
+ const TargetRegisterClass *DRC = RI.getRegClass(OpInfo.RegClass);
+ if (MO.getSubReg()) {
+ const MachineFunction *MF = MO.getParent()->getParent()->getParent();
+ const TargetRegisterClass *SuperRC = RI.getLargestLegalSuperClass(RC, *MF);
+ if (!SuperRC)
+ return false;
- return RI.getCommonSubClass(RC, RI.getRegClass(OpInfo.RegClass)) == RC;
+ DRC = RI.getMatchingSuperRegClass(SuperRC, DRC, MO.getSubReg());
+ if (!DRC)
+ return false;
+ }
+ return RC->hasSuperClassEq(DRC);
}
bool SIInstrInfo::isLegalVSrcOperand(const MachineRegisterInfo &MRI,
@@ -4451,12 +4458,12 @@ static void loadSRsrcFromVGPR(const SIInstrInfo &TII, MachineInstr &MI,
// Update dominators. We know that MBB immediately dominates LoopBB, that
// LoopBB immediately dominates RemainderBB, and that RemainderBB immediately
// dominates all of the successors transferred to it from MBB that MBB used
- // to dominate.
+ // to properly dominate.
if (MDT) {
MDT->addNewBlock(LoopBB, &MBB);
MDT->addNewBlock(RemainderBB, LoopBB);
for (auto &Succ : RemainderBB->successors()) {
- if (MDT->dominates(&MBB, Succ)) {
+ if (MDT->properlyDominates(&MBB, Succ)) {
MDT->changeImmediateDominator(Succ, RemainderBB);
}
}
@@ -6211,7 +6218,11 @@ MachineInstrBuilder SIInstrInfo::getAddNoCarry(MachineBasicBlock &MBB,
if (ST.hasAddNoCarry())
return BuildMI(MBB, I, DL, get(AMDGPU::V_ADD_U32_e32), DestReg);
- Register UnusedCarry = RS.scavengeRegister(RI.getBoolRC(), I, 0, false);
+ // If available, prefer to use vcc.
+ Register UnusedCarry = !RS.isRegUsed(AMDGPU::VCC)
+ ? Register(RI.getVCC())
+ : RS.scavengeRegister(RI.getBoolRC(), I, 0, false);
+
// TODO: Users need to deal with this.
if (!UnusedCarry.isValid())
return MachineInstrBuilder();
@@ -6329,6 +6340,26 @@ static SIEncodingFamily subtargetEncodingFamily(const GCNSubtarget &ST) {
llvm_unreachable("Unknown subtarget generation!");
}
+bool SIInstrInfo::isAsmOnlyOpcode(int MCOp) const {
+ switch(MCOp) {
+ // These opcodes use indirect register addressing so
+ // they need special handling by codegen (currently missing).
+ // Therefore it is too risky to allow these opcodes
+ // to be selected by dpp combiner or sdwa peepholer.
+ case AMDGPU::V_MOVRELS_B32_dpp_gfx10:
+ case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
+ case AMDGPU::V_MOVRELD_B32_dpp_gfx10:
+ case AMDGPU::V_MOVRELD_B32_sdwa_gfx10:
+ case AMDGPU::V_MOVRELSD_B32_dpp_gfx10:
+ case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
+ case AMDGPU::V_MOVRELSD_2_B32_dpp_gfx10:
+ case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
+ return true;
+ default:
+ return false;
+ }
+}
+
int SIInstrInfo::pseudoToMCOpcode(int Opcode) const {
SIEncodingFamily Gen = subtargetEncodingFamily(ST);
@@ -6367,6 +6398,9 @@ int SIInstrInfo::pseudoToMCOpcode(int Opcode) const {
if (MCOp == (uint16_t)-1)
return -1;
+ if (isAsmOnlyOpcode(MCOp))
+ return -1;
+
return MCOp;
}
@@ -6541,14 +6575,14 @@ MachineInstr *SIInstrInfo::createPHIDestinationCopy(
MachineInstr *SIInstrInfo::createPHISourceCopy(
MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt,
- const DebugLoc &DL, Register Src, Register SrcSubReg, Register Dst) const {
+ const DebugLoc &DL, Register Src, unsigned SrcSubReg, Register Dst) const {
if (InsPt != MBB.end() &&
(InsPt->getOpcode() == AMDGPU::SI_IF ||
InsPt->getOpcode() == AMDGPU::SI_ELSE ||
InsPt->getOpcode() == AMDGPU::SI_IF_BREAK) &&
InsPt->definesRegister(Src)) {
InsPt++;
- return BuildMI(MBB, InsPt, InsPt->getDebugLoc(),
+ return BuildMI(MBB, InsPt, DL,
get(ST.isWave32() ? AMDGPU::S_MOV_B32_term
: AMDGPU::S_MOV_B64_term),
Dst)
@@ -6560,3 +6594,53 @@ MachineInstr *SIInstrInfo::createPHISourceCopy(
}
bool llvm::SIInstrInfo::isWave32() const { return ST.isWave32(); }
+
+MachineInstr *SIInstrInfo::foldMemoryOperandImpl(
+ MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops,
+ MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS,
+ VirtRegMap *VRM) const {
+ // This is a bit of a hack (copied from AArch64). Consider this instruction:
+ //
+ // %0:sreg_32 = COPY $m0
+ //
+ // We explicitly chose SReg_32 for the virtual register so such a copy might
+ // be eliminated by RegisterCoalescer. However, that may not be possible, and
+ // %0 may even spill. We can't spill $m0 normally (it would require copying to
+ // a numbered SGPR anyway), and since it is in the SReg_32 register class,
+ // TargetInstrInfo::foldMemoryOperand() is going to try.
+ //
+ // To prevent that, constrain the %0 register class here.
+ if (MI.isFullCopy()) {
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
+
+ if (DstReg == AMDGPU::M0 && SrcReg.isVirtual()) {
+ MF.getRegInfo().constrainRegClass(SrcReg, &AMDGPU::SReg_32_XM0RegClass);
+ return nullptr;
+ }
+
+ if (SrcReg == AMDGPU::M0 && DstReg.isVirtual()) {
+ MF.getRegInfo().constrainRegClass(DstReg, &AMDGPU::SReg_32_XM0RegClass);
+ return nullptr;
+ }
+ }
+
+ return nullptr;
+}
+
+unsigned SIInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
+ const MachineInstr &MI,
+ unsigned *PredCost) const {
+ if (MI.isBundle()) {
+ MachineBasicBlock::const_instr_iterator I(MI.getIterator());
+ MachineBasicBlock::const_instr_iterator E(MI.getParent()->instr_end());
+ unsigned Lat = 0, Count = 0;
+ for (++I; I != E && I->isBundledWithPred(); ++I) {
+ ++Count;
+ Lat = std::max(Lat, SchedModel.computeInstrLatency(&*I));
+ }
+ return Lat + Count - 1;
+ }
+
+ return SchedModel.computeInstrLatency(&MI);
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index be463442c888..b151a94b0d11 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -25,6 +25,7 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/TargetSchedule.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/Support/Compiler.h"
#include <cassert>
@@ -46,6 +47,7 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
private:
const SIRegisterInfo RI;
const GCNSubtarget &ST;
+ TargetSchedModel SchedModel;
// The inverse predicate should have the negative value.
enum BranchPredicate {
@@ -192,7 +194,7 @@ public:
int64_t Offset1, unsigned NumLoads) const override;
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
- const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
+ const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg,
bool KillSrc) const override;
unsigned calculateLDSSpillAddress(MachineBasicBlock &MBB, MachineInstr &MI,
@@ -692,6 +694,10 @@ public:
bool isInlineConstant(const APInt &Imm) const;
+ bool isInlineConstant(const APFloat &Imm) const {
+ return isInlineConstant(Imm.bitcastToAPInt());
+ }
+
bool isInlineConstant(const MachineOperand &MO, uint8_t OperandType) const;
bool isInlineConstant(const MachineOperand &MO,
@@ -977,7 +983,7 @@ public:
MachineInstr *createPHISourceCopy(MachineBasicBlock &MBB,
MachineBasicBlock::iterator InsPt,
const DebugLoc &DL, Register Src,
- Register SrcSubReg,
+ unsigned SrcSubReg,
Register Dst) const override;
bool isWave32() const;
@@ -1017,6 +1023,10 @@ public:
/// not exist. If Opcode is not a pseudo instruction, this is identity.
int pseudoToMCOpcode(int Opcode) const;
+ /// \brief Check if this instruction should only be used by assembler.
+ /// Return true if this opcode should not be used by codegen.
+ bool isAsmOnlyOpcode(int MCOp) const;
+
const TargetRegisterClass *getRegClass(const MCInstrDesc &TID, unsigned OpNum,
const TargetRegisterInfo *TRI,
const MachineFunction &MF)
@@ -1027,6 +1037,17 @@ public:
}
void fixImplicitOperands(MachineInstr &MI) const;
+
+ MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI,
+ ArrayRef<unsigned> Ops,
+ MachineBasicBlock::iterator InsertPt,
+ int FrameIndex,
+ LiveIntervals *LIS = nullptr,
+ VirtRegMap *VRM = nullptr) const override;
+
+ unsigned getInstrLatency(const InstrItineraryData *ItinData,
+ const MachineInstr &MI,
+ unsigned *PredCost = nullptr) const override;
};
/// \brief Returns true if a reg:subreg pair P has a TRC class
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 1eecbf555613..85e8d0582dcd 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -225,7 +225,6 @@ class SDGlobalAtomicNoRtn<string opcode, ValueType ty> : SDNode <opcode,
[SDNPMemOperand, SDNPHasChain, SDNPMayLoad, SDNPMayStore]
>;
-def SIglobal_atomic_fadd : SDGlobalAtomicNoRtn <"AMDGPUISD::ATOMIC_FADD", f32>;
def SIglobal_atomic_pk_fadd : SDGlobalAtomicNoRtn <"AMDGPUISD::ATOMIC_PK_FADD", v2f16>;
def SIpc_add_rel_offset : SDNode<"AMDGPUISD::PC_ADD_REL_OFFSET",
@@ -324,7 +323,7 @@ defm atomic_load_fmax_#as : binary_atomic_op<SIatomic_fmax, 0>;
def atomic_fadd_global_noret : PatFrag<
(ops node:$ptr, node:$value),
- (SIglobal_atomic_fadd node:$ptr, node:$value)> {
+ (atomic_load_fadd node:$ptr, node:$value)> {
// FIXME: Move this
let MemoryVT = f32;
let IsAtomic = 1;
@@ -580,46 +579,37 @@ def si_setcc_uniform : PatFrag <
// SDNodes PatFrags for d16 loads
//===----------------------------------------------------------------------===//
-class LoadD16Frag <SDPatternOperator op> : PatFrag<(ops node:$ptr, node:$tied_in), (op node:$ptr, node:$tied_in)>;
-class LocalLoadD16 <SDPatternOperator op> : LoadD16Frag <op>, LocalAddress;
-class GlobalLoadD16 <SDPatternOperator op> : LoadD16Frag <op>, GlobalLoadAddress;
-class PrivateLoadD16 <SDPatternOperator op> : LoadD16Frag <op>, PrivateAddress;
-class FlatLoadD16 <SDPatternOperator op> : LoadD16Frag <op>, FlatLoadAddress;
-
-def load_d16_hi_local : LocalLoadD16 <SIload_d16_hi>;
-def az_extloadi8_d16_hi_local : LocalLoadD16 <SIload_d16_hi_u8>;
-def sextloadi8_d16_hi_local : LocalLoadD16 <SIload_d16_hi_i8>;
-
-def load_d16_hi_global : GlobalLoadD16 <SIload_d16_hi>;
-def az_extloadi8_d16_hi_global : GlobalLoadD16 <SIload_d16_hi_u8>;
-def sextloadi8_d16_hi_global : GlobalLoadD16 <SIload_d16_hi_i8>;
-
-def load_d16_hi_private : PrivateLoadD16 <SIload_d16_hi>;
-def az_extloadi8_d16_hi_private : PrivateLoadD16 <SIload_d16_hi_u8>;
-def sextloadi8_d16_hi_private : PrivateLoadD16 <SIload_d16_hi_i8>;
+class LoadD16Frag <SDPatternOperator op> : PatFrag<
+ (ops node:$ptr, node:$tied_in),
+ (op node:$ptr, node:$tied_in)> {
+ let IsLoad = 1;
+}
-def load_d16_hi_flat : FlatLoadD16 <SIload_d16_hi>;
-def az_extloadi8_d16_hi_flat : FlatLoadD16 <SIload_d16_hi_u8>;
-def sextloadi8_d16_hi_flat : FlatLoadD16 <SIload_d16_hi_i8>;
+foreach as = [ "global", "flat", "constant", "local", "private", "region" ] in {
+let AddressSpaces = !cast<AddressSpaceList>("LoadAddress_"#as).AddrSpaces in {
+def load_d16_hi_#as : LoadD16Frag <SIload_d16_hi>;
-def load_d16_lo_local : LocalLoadD16 <SIload_d16_lo>;
-def az_extloadi8_d16_lo_local : LocalLoadD16 <SIload_d16_lo_u8>;
-def sextloadi8_d16_lo_local : LocalLoadD16 <SIload_d16_lo_i8>;
+def az_extloadi8_d16_hi_#as : LoadD16Frag <SIload_d16_hi_u8> {
+ let MemoryVT = i8;
+}
-def load_d16_lo_global : GlobalLoadD16 <SIload_d16_lo>;
-def az_extloadi8_d16_lo_global : GlobalLoadD16 <SIload_d16_lo_u8>;
-def sextloadi8_d16_lo_global : GlobalLoadD16 <SIload_d16_lo_i8>;
+def sextloadi8_d16_hi_#as : LoadD16Frag <SIload_d16_hi_i8> {
+ let MemoryVT = i8;
+}
-def load_d16_lo_private : PrivateLoadD16 <SIload_d16_lo>;
-def az_extloadi8_d16_lo_private : PrivateLoadD16 <SIload_d16_lo_u8>;
-def sextloadi8_d16_lo_private : PrivateLoadD16 <SIload_d16_lo_i8>;
+def load_d16_lo_#as : LoadD16Frag <SIload_d16_lo>;
-def load_d16_lo_flat : FlatLoadD16 <SIload_d16_lo>;
-def az_extloadi8_d16_lo_flat : FlatLoadD16 <SIload_d16_lo_u8>;
-def sextloadi8_d16_lo_flat : FlatLoadD16 <SIload_d16_lo_i8>;
+def az_extloadi8_d16_lo_#as : LoadD16Frag <SIload_d16_lo_u8> {
+ let MemoryVT = i8;
+}
+def sextloadi8_d16_lo_#as : LoadD16Frag <SIload_d16_lo_i8> {
+ let MemoryVT = i8;
+}
+} // End let AddressSpaces = ...
+} // End foreach AddrSpace
def lshr_rev : PatFrag <
(ops node:$src1, node:$src0),
@@ -687,6 +677,10 @@ def as_i16imm : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i16);
}]>;
+def as_i16timm : SDNodeXForm<timm, [{
+ return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i16);
+}]>;
+
def as_i32imm: SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i32);
}]>;
@@ -738,14 +732,27 @@ def i64imm_32bit : ImmLeaf<i64, [{
return (Imm & 0xffffffffULL) == static_cast<uint64_t>(Imm);
}]>;
-class InlineImm <ValueType vt> : PatLeaf <(vt imm), [{
- return isInlineImmediate(N);
+def InlineImm16 : ImmLeaf<i16, [{
+ return isInlineImmediate16(Imm);
+}]>;
+
+def InlineImm32 : ImmLeaf<i32, [{
+ return isInlineImmediate32(Imm);
+}]>;
+
+def InlineImm64 : ImmLeaf<i64, [{
+ return isInlineImmediate64(Imm);
}]>;
-class InlineFPImm <ValueType vt> : PatLeaf <(vt fpimm), [{
- return isInlineImmediate(N);
+def InlineImmFP32 : FPImmLeaf<f32, [{
+ return isInlineImmediate(Imm);
}]>;
+def InlineImmFP64 : FPImmLeaf<f64, [{
+ return isInlineImmediate(Imm);
+}]>;
+
+
class VGPRImm <dag frag> : PatLeaf<frag, [{
return isVGPRImm(N);
}]>;
@@ -763,8 +770,8 @@ def NegSubInlineConst16 : ImmLeaf<i16, [{
return Imm < -16 && Imm >= -64;
}], NegateImm>;
-def ShiftAmt32Imm : PatLeaf <(imm), [{
- return N->getZExtValue() < 32;
+def ShiftAmt32Imm : ImmLeaf <i32, [{
+ return Imm < 32;
}]>;
def getNegV2I16Imm : SDNodeXForm<build_vector, [{
@@ -996,6 +1003,12 @@ class NamedOperandBit<string Name, AsmOperandClass MatchClass> : Operand<i1> {
let ParserMatchClass = MatchClass;
}
+class NamedOperandBit_0<string Name, AsmOperandClass MatchClass> :
+ OperandWithDefaultOps<i1, (ops (i1 0))> {
+ let PrintMethod = "print"#Name;
+ let ParserMatchClass = MatchClass;
+}
+
class NamedOperandU8<string Name, AsmOperandClass MatchClass> : Operand<i8> {
let PrintMethod = "print"#Name;
let ParserMatchClass = MatchClass;
@@ -1011,6 +1024,12 @@ class NamedOperandU32<string Name, AsmOperandClass MatchClass> : Operand<i32> {
let ParserMatchClass = MatchClass;
}
+class NamedOperandU32_0<string Name, AsmOperandClass MatchClass> :
+ OperandWithDefaultOps<i32, (ops (i32 0))> {
+ let PrintMethod = "print"#Name;
+ let ParserMatchClass = MatchClass;
+}
+
class NamedOperandU32Default0<string Name, AsmOperandClass MatchClass> :
OperandWithDefaultOps<i32, (ops (i32 0))> {
let PrintMethod = "print"#Name;
@@ -1031,7 +1050,13 @@ def offset1 : NamedOperandU8<"Offset1", NamedMatchClass<"Offset1">>;
def gds : NamedOperandBit<"GDS", NamedMatchClass<"GDS">>;
def omod : NamedOperandU32<"OModSI", NamedMatchClass<"OModSI">>;
+def omod0 : NamedOperandU32_0<"OModSI", NamedMatchClass<"OModSI">>;
+
+// We need to make the cases with a default of 0 distinct from no
+// default to help deal with some cases where the operand appears
+// before a mandatory operand.
def clampmod : NamedOperandBit<"ClampSI", NamedMatchClass<"ClampSI">>;
+def clampmod0 : NamedOperandBit_0<"ClampSI", NamedMatchClass<"ClampSI">>;
def highmod : NamedOperandBit<"High", NamedMatchClass<"High">>;
def DLC : NamedOperandBit<"DLC", NamedMatchClass<"DLC">>;
@@ -1245,7 +1270,6 @@ def MOVRELOffset : ComplexPattern<i32, 2, "SelectMOVRELOffset">;
def VOP3Mods0 : ComplexPattern<untyped, 4, "SelectVOP3Mods0">;
def VOP3Mods0Clamp : ComplexPattern<untyped, 3, "SelectVOP3Mods0Clamp">;
-def VOP3Mods0Clamp0OMod : ComplexPattern<untyped, 4, "SelectVOP3Mods0Clamp0OMod">;
def VOP3Mods : ComplexPattern<untyped, 2, "SelectVOP3Mods">;
def VOP3NoMods : ComplexPattern<untyped, 1, "SelectVOP3NoMods">;
// VOP3Mods, but the input source is known to never be NaN.
@@ -1381,7 +1405,7 @@ multiclass EXP_m<bit done, SDPatternOperator node> {
def _si : EXP_Helper<done>,
SIMCInstr <"exp"#!if(done, "_done", ""), SIEncodingFamily.SI>,
EXPe {
- let AssemblerPredicates = [isGFX6GFX7];
+ let AssemblerPredicate = isGFX6GFX7;
let DecoderNamespace = "GFX6GFX7";
let DisableDecoder = DisableSIDecoder;
}
@@ -1389,7 +1413,7 @@ multiclass EXP_m<bit done, SDPatternOperator node> {
def _vi : EXP_Helper<done>,
SIMCInstr <"exp"#!if(done, "_done", ""), SIEncodingFamily.VI>,
EXPe_vi {
- let AssemblerPredicates = [isGFX8GFX9];
+ let AssemblerPredicate = isGFX8GFX9;
let DecoderNamespace = "GFX8";
let DisableDecoder = DisableVIDecoder;
}
@@ -1397,7 +1421,7 @@ multiclass EXP_m<bit done, SDPatternOperator node> {
def _gfx10 : EXP_Helper<done>,
SIMCInstr <"exp"#!if(done, "_done", ""), SIEncodingFamily.GFX10>,
EXPe {
- let AssemblerPredicates = [isGFX10Plus];
+ let AssemblerPredicate = isGFX10Plus;
let DecoderNamespace = "GFX10";
let DisableDecoder = DisableSIDecoder;
}
@@ -1587,11 +1611,11 @@ class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC,
!if (!eq(HasModifiers, 1),
// VOP1 with modifiers
(ins Src0Mod:$src0_modifiers, Src0RC:$src0,
- clampmod:$clamp, omod:$omod)
+ clampmod0:$clamp, omod0:$omod)
/* else */,
// VOP1 without modifiers
!if (!eq(HasIntClamp, 1),
- (ins Src0RC:$src0, clampmod:$clamp),
+ (ins Src0RC:$src0, clampmod0:$clamp),
(ins Src0RC:$src0))
/* endif */ ),
!if (!eq(NumSrcArgs, 2),
@@ -1600,14 +1624,14 @@ class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC,
!if( !eq(HasOMod, 1),
(ins Src0Mod:$src0_modifiers, Src0RC:$src0,
Src1Mod:$src1_modifiers, Src1RC:$src1,
- clampmod:$clamp, omod:$omod),
+ clampmod0:$clamp, omod0:$omod),
(ins Src0Mod:$src0_modifiers, Src0RC:$src0,
Src1Mod:$src1_modifiers, Src1RC:$src1,
- clampmod:$clamp))
+ clampmod0:$clamp))
/* else */,
// VOP2 without modifiers
!if (!eq(HasIntClamp, 1),
- (ins Src0RC:$src0, Src1RC:$src1, clampmod:$clamp),
+ (ins Src0RC:$src0, Src1RC:$src1, clampmod0:$clamp),
(ins Src0RC:$src0, Src1RC:$src1))
/* endif */ )
@@ -1619,12 +1643,12 @@ class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC,
(ins Src0Mod:$src0_modifiers, Src0RC:$src0,
Src1Mod:$src1_modifiers, Src1RC:$src1,
Src2Mod:$src2_modifiers, Src2RC:$src2,
- clampmod:$clamp, omod:$omod),
+ clampmod0:$clamp, omod0:$omod),
!if (!eq(HasIntClamp, 1),
(ins Src0Mod:$src0_modifiers, Src0RC:$src0,
Src1Mod:$src1_modifiers, Src1RC:$src1,
Src2Mod:$src2_modifiers, Src2RC:$src2,
- clampmod:$clamp),
+ clampmod0:$clamp),
(ins Src0Mod:$src0_modifiers, Src0RC:$src0,
Src1Mod:$src1_modifiers, Src1RC:$src1,
Src2Mod:$src2_modifiers, Src2RC:$src2))),
@@ -1632,18 +1656,18 @@ class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC,
!if (!eq(HasOMod, 1),
(ins Src0Mod:$src0_modifiers, Src0RC:$src0,
Src1Mod:$src1_modifiers, Src1RC:$src1,
- Src2RC:$src2, clampmod:$clamp, omod:$omod),
+ Src2RC:$src2, clampmod0:$clamp, omod0:$omod),
!if (!eq(HasIntClamp, 1),
(ins Src0Mod:$src0_modifiers, Src0RC:$src0,
Src1Mod:$src1_modifiers, Src1RC:$src1,
- Src2RC:$src2, clampmod:$clamp),
+ Src2RC:$src2, clampmod0:$clamp),
(ins Src0Mod:$src0_modifiers, Src0RC:$src0,
Src1Mod:$src1_modifiers, Src1RC:$src1,
Src2RC:$src2))))
/* else */,
// VOP3 without modifiers
!if (!eq(HasIntClamp, 1),
- (ins Src0RC:$src0, Src1RC:$src1, Src2RC:$src2, clampmod:$clamp),
+ (ins Src0RC:$src0, Src1RC:$src1, Src2RC:$src2, clampmod0:$clamp),
(ins Src0RC:$src0, Src1RC:$src1, Src2RC:$src2))
/* endif */ ))));
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstructions.td
index 21984c6ad910..d84720f820ee 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -86,7 +86,7 @@ defm V_INTERP_MOV_F32 : VINTRP_m <
//===----------------------------------------------------------------------===//
def ATOMIC_FENCE : SPseudoInstSI<
(outs), (ins i32imm:$ordering, i32imm:$scope),
- [(atomic_fence (i32 imm:$ordering), (i32 imm:$scope))],
+ [(atomic_fence (i32 timm:$ordering), (i32 timm:$scope))],
"ATOMIC_FENCE $ordering, $scope"> {
let hasSideEffects = 1;
let maybeAtomic = 1;
@@ -646,22 +646,22 @@ def : Pat <
def : Pat <
(int_amdgcn_kill i1:$src),
- (SI_KILL_I1_PSEUDO $src, 0)
+ (SI_KILL_I1_PSEUDO SCSrc_i1:$src, 0)
>;
def : Pat <
(int_amdgcn_kill (i1 (not i1:$src))),
- (SI_KILL_I1_PSEUDO $src, -1)
+ (SI_KILL_I1_PSEUDO SCSrc_i1:$src, -1)
>;
def : Pat <
(AMDGPUkill i32:$src),
- (SI_KILL_F32_COND_IMM_PSEUDO $src, 0, 3) // 3 means SETOGE
+ (SI_KILL_F32_COND_IMM_PSEUDO VSrc_b32:$src, 0, 3) // 3 means SETOGE
>;
def : Pat <
- (int_amdgcn_kill (i1 (setcc f32:$src, InlineFPImm<f32>:$imm, cond:$cond))),
- (SI_KILL_F32_COND_IMM_PSEUDO $src, (bitcast_fpimm_to_i32 $imm), (cond_as_i32imm $cond))
+ (int_amdgcn_kill (i1 (setcc f32:$src, InlineImmFP32:$imm, cond:$cond))),
+ (SI_KILL_F32_COND_IMM_PSEUDO VSrc_b32:$src, (bitcast_fpimm_to_i32 $imm), (cond_as_i32imm $cond))
>;
// TODO: we could add more variants for other types of conditionals
@@ -782,13 +782,14 @@ defm : FMADPat <f32, V_MAC_F32_e64>;
class FMADModsPat<Instruction inst, SDPatternOperator mad_opr, ValueType Ty>
: GCNPat<
- (Ty (mad_opr (VOP3Mods Ty:$src0, i32:$src0_mod),
- (VOP3Mods Ty:$src1, i32:$src1_mod),
- (VOP3Mods Ty:$src2, i32:$src2_mod))),
+ (Ty (mad_opr (Ty (VOP3Mods Ty:$src0, i32:$src0_mod)),
+ (Ty (VOP3Mods Ty:$src1, i32:$src1_mod)),
+ (Ty (VOP3Mods Ty:$src2, i32:$src2_mod)))),
(inst $src0_mod, $src0, $src1_mod, $src1,
$src2_mod, $src2, DSTCLAMP.NONE, DSTOMOD.NONE)
>;
+// FIXME: This should select to V_MAC_F32
def : FMADModsPat<V_MAD_F32, AMDGPUfmad_ftz, f32>;
def : FMADModsPat<V_MAD_F16, AMDGPUfmad_ftz, f16> {
let SubtargetPredicate = Has16BitInsts;
@@ -1323,8 +1324,8 @@ def : GCNPat <
>;
def : GCNPat <
- (i64 InlineImm<i64>:$imm),
- (S_MOV_B64 InlineImm<i64>:$imm)
+ (i64 InlineImm64:$imm),
+ (S_MOV_B64 InlineImm64:$imm)
>;
// XXX - Should this use a s_cmp to set SCC?
@@ -1345,14 +1346,15 @@ def : GCNPat <
}
def : GCNPat <
- (f64 InlineFPImm<f64>:$imm),
- (S_MOV_B64 (f64 (bitcast_fpimm_to_i64 InlineFPImm<f64>:$imm)))
+ (f64 InlineImmFP64:$imm),
+ (S_MOV_B64 (f64 (bitcast_fpimm_to_i64 InlineImmFP64:$imm)))
>;
/********** ================== **********/
/********** Intrinsic Patterns **********/
/********** ================== **********/
+// FIXME: Should use _e64 and select source modifiers.
def : POW_Common <V_LOG_F32_e32, V_EXP_F32_e32, V_MUL_LEGACY_F32_e32>;
def : GCNPat <
@@ -1792,54 +1794,59 @@ def : ExpPattern<AMDGPUexport_done, i32, EXP_DONE>;
// COPY is workaround tablegen bug from multiple outputs
// from S_LSHL_B32's multiple outputs from implicit scc def.
def : GCNPat <
- (v2i16 (build_vector (i16 0), i16:$src1)),
- (v2i16 (COPY (S_LSHL_B32 i16:$src1, (i16 16))))
+ (v2i16 (build_vector (i16 0), (i16 SReg_32:$src1))),
+ (S_LSHL_B32 SReg_32:$src1, (i16 16))
>;
def : GCNPat <
- (v2i16 (build_vector i16:$src0, (i16 undef))),
- (v2i16 (COPY $src0))
+ (v2i16 (build_vector (i16 SReg_32:$src0), (i16 undef))),
+ (COPY_TO_REGCLASS SReg_32:$src0, SReg_32)
+>;
+
+def : GCNPat <
+ (v2i16 (build_vector (i16 VGPR_32:$src0), (i16 undef))),
+ (COPY_TO_REGCLASS VGPR_32:$src0, VGPR_32)
>;
def : GCNPat <
(v2f16 (build_vector f16:$src0, (f16 undef))),
- (v2f16 (COPY $src0))
+ (COPY $src0)
>;
def : GCNPat <
- (v2i16 (build_vector (i16 undef), i16:$src1)),
- (v2i16 (COPY (S_LSHL_B32 $src1, (i32 16))))
+ (v2i16 (build_vector (i16 undef), (i16 SReg_32:$src1))),
+ (S_LSHL_B32 SReg_32:$src1, (i32 16))
>;
def : GCNPat <
- (v2f16 (build_vector (f16 undef), f16:$src1)),
- (v2f16 (COPY (S_LSHL_B32 $src1, (i32 16))))
+ (v2f16 (build_vector (f16 undef), (f16 SReg_32:$src1))),
+ (S_LSHL_B32 SReg_32:$src1, (i32 16))
>;
let SubtargetPredicate = HasVOP3PInsts in {
def : GCNPat <
- (v2i16 (build_vector i16:$src0, i16:$src1)),
- (v2i16 (S_PACK_LL_B32_B16 $src0, $src1))
+ (v2i16 (build_vector (i16 SReg_32:$src0), (i16 SReg_32:$src1))),
+ (S_PACK_LL_B32_B16 SReg_32:$src0, SReg_32:$src1)
>;
// With multiple uses of the shift, this will duplicate the shift and
// increase register pressure.
def : GCNPat <
- (v2i16 (build_vector i16:$src0, (i16 (trunc (srl_oneuse i32:$src1, (i32 16)))))),
- (v2i16 (S_PACK_LH_B32_B16 i16:$src0, i32:$src1))
+ (v2i16 (build_vector (i16 SReg_32:$src0), (i16 (trunc (srl_oneuse SReg_32:$src1, (i32 16)))))),
+ (v2i16 (S_PACK_LH_B32_B16 SReg_32:$src0, SReg_32:$src1))
>;
def : GCNPat <
- (v2i16 (build_vector (i16 (trunc (srl_oneuse i32:$src0, (i32 16)))),
- (i16 (trunc (srl_oneuse i32:$src1, (i32 16)))))),
- (v2i16 (S_PACK_HH_B32_B16 $src0, $src1))
+ (v2i16 (build_vector (i16 (trunc (srl_oneuse SReg_32:$src0, (i32 16)))),
+ (i16 (trunc (srl_oneuse SReg_32:$src1, (i32 16)))))),
+ (S_PACK_HH_B32_B16 SReg_32:$src0, SReg_32:$src1)
>;
// TODO: Should source modifiers be matched to v_pack_b32_f16?
def : GCNPat <
- (v2f16 (build_vector f16:$src0, f16:$src1)),
- (v2f16 (S_PACK_LL_B32_B16 $src0, $src1))
+ (v2f16 (build_vector (f16 SReg_32:$src0), (f16 SReg_32:$src1))),
+ (S_PACK_LL_B32_B16 SReg_32:$src0, SReg_32:$src1)
>;
} // End SubtargetPredicate = HasVOP3PInsts
@@ -1923,15 +1930,28 @@ def : GCNPat <
// TODO: Also do for 64-bit.
def : GCNPat<
(add i32:$src0, (i32 NegSubInlineConst32:$src1)),
- (S_SUB_I32 $src0, NegSubInlineConst32:$src1)
+ (S_SUB_I32 SReg_32:$src0, NegSubInlineConst32:$src1)
>;
+def : GCNPat<
+ (add i32:$src0, (i32 NegSubInlineConst32:$src1)),
+ (V_SUB_U32_e64 VS_32:$src0, NegSubInlineConst32:$src1)> {
+ let SubtargetPredicate = HasAddNoCarryInsts;
+}
+
+def : GCNPat<
+ (add i32:$src0, (i32 NegSubInlineConst32:$src1)),
+ (V_SUB_I32_e64 VS_32:$src0, NegSubInlineConst32:$src1)> {
+ let SubtargetPredicate = NotHasAddNoCarryInsts;
+}
+
+
// Avoid pointlessly materializing a constant in VGPR.
// FIXME: Should also do this for readlane, but tablegen crashes on
// the ignored src1.
def : GCNPat<
(int_amdgcn_readfirstlane (i32 imm:$src)),
- (S_MOV_B32 $src)
+ (S_MOV_B32 SReg_32:$src)
>;
multiclass BFMPatterns <ValueType vt, InstSI BFM, InstSI MOV> {
@@ -1952,6 +1972,29 @@ defm : BFMPatterns <i32, S_BFM_B32, S_MOV_B32>;
defm : BFEPattern <V_BFE_U32, V_BFE_I32, S_MOV_B32>;
defm : SHA256MaPattern <V_BFI_B32, V_XOR_B32_e64, SReg_64>;
+multiclass IntMed3Pat<Instruction med3Inst,
+ SDPatternOperator min,
+ SDPatternOperator max,
+ SDPatternOperator min_oneuse,
+ SDPatternOperator max_oneuse> {
+
+ // This matches 16 permutations of
+ // min(max(a, b), max(min(a, b), c))
+ def : AMDGPUPat <
+ (min (max_oneuse i32:$src0, i32:$src1),
+ (max_oneuse (min_oneuse i32:$src0, i32:$src1), i32:$src2)),
+ (med3Inst VSrc_b32:$src0, VSrc_b32:$src1, VSrc_b32:$src2)
+>;
+
+ // This matches 16 permutations of
+ // max(min(x, y), min(max(x, y), z))
+ def : AMDGPUPat <
+ (max (min_oneuse i32:$src0, i32:$src1),
+ (min_oneuse (max_oneuse i32:$src0, i32:$src1), i32:$src2)),
+ (med3Inst VSrc_b32:$src0, VSrc_b32:$src1, VSrc_b32:$src2)
+>;
+}
+
defm : IntMed3Pat<V_MED3_I32, smin, smax, smin_oneuse, smax_oneuse>;
defm : IntMed3Pat<V_MED3_U32, umin, umax, umin_oneuse, umax_oneuse>;
@@ -1982,22 +2025,21 @@ multiclass Int16Med3Pat<Instruction med3Inst,
SDPatternOperator min,
SDPatternOperator max,
SDPatternOperator max_oneuse,
- SDPatternOperator min_oneuse,
- ValueType vt = i16> {
+ SDPatternOperator min_oneuse> {
// This matches 16 permutations of
// max(min(x, y), min(max(x, y), z))
def : GCNPat <
- (max (min_oneuse vt:$src0, vt:$src1),
- (min_oneuse (max_oneuse vt:$src0, vt:$src1), vt:$src2)),
- (med3Inst SRCMODS.NONE, $src0, SRCMODS.NONE, $src1, SRCMODS.NONE, $src2, DSTCLAMP.NONE)
+ (max (min_oneuse i16:$src0, i16:$src1),
+ (min_oneuse (max_oneuse i16:$src0, i16:$src1), i16:$src2)),
+ (med3Inst SRCMODS.NONE, VSrc_b16:$src0, SRCMODS.NONE, VSrc_b16:$src1, SRCMODS.NONE, VSrc_b16:$src2, DSTCLAMP.NONE)
>;
// This matches 16 permutations of
// min(max(a, b), max(min(a, b), c))
def : GCNPat <
- (min (max_oneuse vt:$src0, vt:$src1),
- (max_oneuse (min_oneuse vt:$src0, vt:$src1), vt:$src2)),
- (med3Inst SRCMODS.NONE, $src0, SRCMODS.NONE, $src1, SRCMODS.NONE, $src2, DSTCLAMP.NONE)
+ (min (max_oneuse i16:$src0, i16:$src1),
+ (max_oneuse (min_oneuse i16:$src0, i16:$src1), i16:$src2)),
+ (med3Inst SRCMODS.NONE, VSrc_b16:$src0, SRCMODS.NONE, VSrc_b16:$src1, SRCMODS.NONE, VSrc_b16:$src2, DSTCLAMP.NONE)
>;
}
@@ -2018,3 +2060,14 @@ def G_AMDGPU_FFBH_U32 : AMDGPUGenericInstruction {
let InOperandList = (ins type1:$src);
let hasSideEffects = 0;
}
+
+// Atomic cmpxchg. $cmpval ad $newval are packed in a single vector
+// operand Expects a MachineMemOperand in addition to explicit
+// operands.
+def G_AMDGPU_ATOMIC_CMPXCHG : AMDGPUGenericInstruction {
+ let OutOperandList = (outs type0:$oldval);
+ let InOperandList = (ins ptype1:$addr, type0:$cmpval_nnenwval);
+ let hasSideEffects = 0;
+ let mayLoad = 1;
+ let mayStore = 1;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
index 20db1c37f354..d2b1abc8a9fb 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
@@ -75,6 +75,7 @@
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/IR/DebugLoc.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
@@ -98,6 +99,8 @@ enum InstClassEnum {
BUFFER_LOAD,
BUFFER_STORE,
MIMG,
+ TBUFFER_LOAD,
+ TBUFFER_STORE,
};
enum RegisterEnum {
@@ -112,22 +115,16 @@ enum RegisterEnum {
class SILoadStoreOptimizer : public MachineFunctionPass {
struct CombineInfo {
MachineBasicBlock::iterator I;
- MachineBasicBlock::iterator Paired;
unsigned EltSize;
- unsigned Offset0;
- unsigned Offset1;
- unsigned Width0;
- unsigned Width1;
+ unsigned Offset;
+ unsigned Width;
+ unsigned Format;
unsigned BaseOff;
- unsigned DMask0;
- unsigned DMask1;
+ unsigned DMask;
InstClassEnum InstClass;
- bool GLC0;
- bool GLC1;
- bool SLC0;
- bool SLC1;
- bool DLC0;
- bool DLC1;
+ bool GLC;
+ bool SLC;
+ bool DLC;
bool UseST64;
SmallVector<MachineInstr *, 8> InstsToMove;
int AddrIdx[5];
@@ -183,7 +180,6 @@ class SILoadStoreOptimizer : public MachineFunctionPass {
void setMI(MachineBasicBlock::iterator MI, const SIInstrInfo &TII,
const GCNSubtarget &STM);
- void setPaired(MachineBasicBlock::iterator MI, const SIInstrInfo &TII);
};
struct BaseRegisters {
@@ -205,30 +201,39 @@ private:
const GCNSubtarget *STM = nullptr;
const SIInstrInfo *TII = nullptr;
const SIRegisterInfo *TRI = nullptr;
+ const MCSubtargetInfo *STI = nullptr;
MachineRegisterInfo *MRI = nullptr;
AliasAnalysis *AA = nullptr;
bool OptimizeAgain;
- static bool dmasksCanBeCombined(const CombineInfo &CI, const SIInstrInfo &TII);
- static bool offsetsCanBeCombined(CombineInfo &CI);
- static bool widthsFit(const GCNSubtarget &STM, const CombineInfo &CI);
- static unsigned getNewOpcode(const CombineInfo &CI);
- static std::pair<unsigned, unsigned> getSubRegIdxs(const CombineInfo &CI);
- const TargetRegisterClass *getTargetRegisterClass(const CombineInfo &CI);
-
- bool findMatchingInst(CombineInfo &CI);
+ static bool dmasksCanBeCombined(const CombineInfo &CI,
+ const SIInstrInfo &TII,
+ const CombineInfo &Paired);
+ static bool offsetsCanBeCombined(CombineInfo &CI, const MCSubtargetInfo &STI,
+ CombineInfo &Paired);
+ static bool widthsFit(const GCNSubtarget &STM, const CombineInfo &CI,
+ const CombineInfo &Paired);
+ static unsigned getNewOpcode(const CombineInfo &CI, const CombineInfo &Paired);
+ static std::pair<unsigned, unsigned> getSubRegIdxs(const CombineInfo &CI,
+ const CombineInfo &Paired);
+ const TargetRegisterClass *getTargetRegisterClass(const CombineInfo &CI,
+ const CombineInfo &Paired);
+
+ bool findMatchingInst(CombineInfo &CI, CombineInfo &Paired);
unsigned read2Opcode(unsigned EltSize) const;
unsigned read2ST64Opcode(unsigned EltSize) const;
- MachineBasicBlock::iterator mergeRead2Pair(CombineInfo &CI);
+ MachineBasicBlock::iterator mergeRead2Pair(CombineInfo &CI, CombineInfo &Paired);
unsigned write2Opcode(unsigned EltSize) const;
unsigned write2ST64Opcode(unsigned EltSize) const;
- MachineBasicBlock::iterator mergeWrite2Pair(CombineInfo &CI);
- MachineBasicBlock::iterator mergeImagePair(CombineInfo &CI);
- MachineBasicBlock::iterator mergeSBufferLoadImmPair(CombineInfo &CI);
- MachineBasicBlock::iterator mergeBufferLoadPair(CombineInfo &CI);
- MachineBasicBlock::iterator mergeBufferStorePair(CombineInfo &CI);
+ MachineBasicBlock::iterator mergeWrite2Pair(CombineInfo &CI, CombineInfo &Paired);
+ MachineBasicBlock::iterator mergeImagePair(CombineInfo &CI, CombineInfo &Paired);
+ MachineBasicBlock::iterator mergeSBufferLoadImmPair(CombineInfo &CI, CombineInfo &Paired);
+ MachineBasicBlock::iterator mergeBufferLoadPair(CombineInfo &CI, CombineInfo &Paired);
+ MachineBasicBlock::iterator mergeBufferStorePair(CombineInfo &CI, CombineInfo &Paired);
+ MachineBasicBlock::iterator mergeTBufferLoadPair(CombineInfo &CI, CombineInfo &Paired);
+ MachineBasicBlock::iterator mergeTBufferStorePair(CombineInfo &CI, CombineInfo &Paired);
void updateBaseAndOffset(MachineInstr &I, unsigned NewBase,
int32_t NewOffset) const;
@@ -284,6 +289,9 @@ static unsigned getOpcodeWidth(const MachineInstr &MI, const SIInstrInfo &TII) {
TII.getNamedOperand(MI, AMDGPU::OpName::dmask)->getImm();
return countPopulation(DMaskImm);
}
+ if (TII.isMTBUF(Opc)) {
+ return AMDGPU::getMTBUFElements(Opc);
+ }
switch (Opc) {
case AMDGPU::S_BUFFER_LOAD_DWORD_IMM:
@@ -322,10 +330,27 @@ static InstClassEnum getInstClass(unsigned Opc, const SIInstrInfo &TII) {
if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr) == -1)
return UNKNOWN;
// TODO: Support IMAGE_GET_RESINFO and IMAGE_GET_LOD.
- if (TII.get(Opc).mayStore() || !TII.get(Opc).mayLoad() || TII.isGather4(Opc))
+ if (TII.get(Opc).mayStore() || !TII.get(Opc).mayLoad() ||
+ TII.isGather4(Opc))
return UNKNOWN;
return MIMG;
}
+ if (TII.isMTBUF(Opc)) {
+ switch (AMDGPU::getMTBUFBaseOpcode(Opc)) {
+ default:
+ return UNKNOWN;
+ case AMDGPU::TBUFFER_LOAD_FORMAT_X_OFFEN:
+ case AMDGPU::TBUFFER_LOAD_FORMAT_X_OFFEN_exact:
+ case AMDGPU::TBUFFER_LOAD_FORMAT_X_OFFSET:
+ case AMDGPU::TBUFFER_LOAD_FORMAT_X_OFFSET_exact:
+ return TBUFFER_LOAD;
+ case AMDGPU::TBUFFER_STORE_FORMAT_X_OFFEN:
+ case AMDGPU::TBUFFER_STORE_FORMAT_X_OFFEN_exact:
+ case AMDGPU::TBUFFER_STORE_FORMAT_X_OFFSET:
+ case AMDGPU::TBUFFER_STORE_FORMAT_X_OFFSET_exact:
+ return TBUFFER_STORE;
+ }
+ }
return UNKNOWN;
case AMDGPU::S_BUFFER_LOAD_DWORD_IMM:
case AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM:
@@ -356,6 +381,8 @@ static unsigned getInstSubclass(unsigned Opc, const SIInstrInfo &TII) {
assert(Info);
return Info->BaseOpcode;
}
+ if (TII.isMTBUF(Opc))
+ return AMDGPU::getMTBUFBaseOpcode(Opc);
return -1;
case AMDGPU::DS_READ_B32:
case AMDGPU::DS_READ_B32_gfx9:
@@ -397,6 +424,24 @@ static unsigned getRegs(unsigned Opc, const SIInstrInfo &TII) {
const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
if (Info && AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode)->Sampler)
result |= SSAMP;
+
+ return result;
+ }
+ if (TII.isMTBUF(Opc)) {
+ unsigned result = 0;
+
+ if (AMDGPU::getMTBUFHasVAddr(Opc)) {
+ result |= VADDR;
+ }
+
+ if (AMDGPU::getMTBUFHasSrsrc(Opc)) {
+ result |= SRSRC;
+ }
+
+ if (AMDGPU::getMTBUFHasSoffset(Opc)) {
+ result |= SOFFSET;
+ }
+
return result;
}
@@ -419,7 +464,6 @@ static unsigned getRegs(unsigned Opc, const SIInstrInfo &TII) {
}
}
-
void SILoadStoreOptimizer::CombineInfo::setMI(MachineBasicBlock::iterator MI,
const SIInstrInfo &TII,
const GCNSubtarget &STM) {
@@ -450,22 +494,25 @@ void SILoadStoreOptimizer::CombineInfo::setMI(MachineBasicBlock::iterator MI,
}
if (InstClass == MIMG) {
- DMask0 = TII.getNamedOperand(*I, AMDGPU::OpName::dmask)->getImm();
+ DMask = TII.getNamedOperand(*I, AMDGPU::OpName::dmask)->getImm();
} else {
int OffsetIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::offset);
- Offset0 = I->getOperand(OffsetIdx).getImm();
+ Offset = I->getOperand(OffsetIdx).getImm();
}
- Width0 = getOpcodeWidth(*I, TII);
+ if (InstClass == TBUFFER_LOAD || InstClass == TBUFFER_STORE)
+ Format = TII.getNamedOperand(*I, AMDGPU::OpName::format)->getImm();
+
+ Width = getOpcodeWidth(*I, TII);
if ((InstClass == DS_READ) || (InstClass == DS_WRITE)) {
- Offset0 &= 0xffff;
+ Offset &= 0xffff;
} else if (InstClass != MIMG) {
- GLC0 = TII.getNamedOperand(*I, AMDGPU::OpName::glc)->getImm();
+ GLC = TII.getNamedOperand(*I, AMDGPU::OpName::glc)->getImm();
if (InstClass != S_BUFFER_LOAD_IMM) {
- SLC0 = TII.getNamedOperand(*I, AMDGPU::OpName::slc)->getImm();
+ SLC = TII.getNamedOperand(*I, AMDGPU::OpName::slc)->getImm();
}
- DLC0 = TII.getNamedOperand(*I, AMDGPU::OpName::dlc)->getImm();
+ DLC = TII.getNamedOperand(*I, AMDGPU::OpName::dlc)->getImm();
}
unsigned AddrOpName[5] = {0};
@@ -504,32 +551,6 @@ void SILoadStoreOptimizer::CombineInfo::setMI(MachineBasicBlock::iterator MI,
InstsToMove.clear();
}
-void SILoadStoreOptimizer::CombineInfo::setPaired(MachineBasicBlock::iterator MI,
- const SIInstrInfo &TII) {
- Paired = MI;
- assert(InstClass == getInstClass(Paired->getOpcode(), TII));
-
- if (InstClass == MIMG) {
- DMask1 = TII.getNamedOperand(*Paired, AMDGPU::OpName::dmask)->getImm();
- } else {
- int OffsetIdx =
- AMDGPU::getNamedOperandIdx(I->getOpcode(), AMDGPU::OpName::offset);
- Offset1 = Paired->getOperand(OffsetIdx).getImm();
- }
-
- Width1 = getOpcodeWidth(*Paired, TII);
- if ((InstClass == DS_READ) || (InstClass == DS_WRITE)) {
- Offset1 &= 0xffff;
- } else if (InstClass != MIMG) {
- GLC1 = TII.getNamedOperand(*Paired, AMDGPU::OpName::glc)->getImm();
- if (InstClass != S_BUFFER_LOAD_IMM) {
- SLC1 = TII.getNamedOperand(*Paired, AMDGPU::OpName::slc)->getImm();
- }
- DLC1 = TII.getNamedOperand(*Paired, AMDGPU::OpName::dlc)->getImm();
- }
-}
-
-
} // end anonymous namespace.
INITIALIZE_PASS_BEGIN(SILoadStoreOptimizer, DEBUG_TYPE,
@@ -635,7 +656,9 @@ static MachineMemOperand *combineKnownAdjacentMMOs(MachineFunction &MF,
return MMO;
}
-bool SILoadStoreOptimizer::dmasksCanBeCombined(const CombineInfo &CI, const SIInstrInfo &TII) {
+bool SILoadStoreOptimizer::dmasksCanBeCombined(const CombineInfo &CI,
+ const SIInstrInfo &TII,
+ const CombineInfo &Paired) {
assert(CI.InstClass == MIMG);
// Ignore instructions with tfe/lwe set.
@@ -652,16 +675,16 @@ bool SILoadStoreOptimizer::dmasksCanBeCombined(const CombineInfo &CI, const SIIn
for (auto op : OperandsToMatch) {
int Idx = AMDGPU::getNamedOperandIdx(CI.I->getOpcode(), op);
- if (AMDGPU::getNamedOperandIdx(CI.Paired->getOpcode(), op) != Idx)
+ if (AMDGPU::getNamedOperandIdx(Paired.I->getOpcode(), op) != Idx)
return false;
if (Idx != -1 &&
- CI.I->getOperand(Idx).getImm() != CI.Paired->getOperand(Idx).getImm())
+ CI.I->getOperand(Idx).getImm() != Paired.I->getOperand(Idx).getImm())
return false;
}
// Check DMask for overlaps.
- unsigned MaxMask = std::max(CI.DMask0, CI.DMask1);
- unsigned MinMask = std::min(CI.DMask0, CI.DMask1);
+ unsigned MaxMask = std::max(CI.DMask, Paired.DMask);
+ unsigned MinMask = std::min(CI.DMask, Paired.DMask);
unsigned AllowedBitsForMin = llvm::countTrailingZeros(MaxMask);
if ((1u << AllowedBitsForMin) <= MinMask)
@@ -670,62 +693,113 @@ bool SILoadStoreOptimizer::dmasksCanBeCombined(const CombineInfo &CI, const SIIn
return true;
}
-bool SILoadStoreOptimizer::offsetsCanBeCombined(CombineInfo &CI) {
+static unsigned getBufferFormatWithCompCount(unsigned OldFormat,
+ unsigned ComponentCount,
+ const MCSubtargetInfo &STI) {
+ if (ComponentCount > 4)
+ return 0;
+
+ const llvm::AMDGPU::GcnBufferFormatInfo *OldFormatInfo =
+ llvm::AMDGPU::getGcnBufferFormatInfo(OldFormat, STI);
+ if (!OldFormatInfo)
+ return 0;
+
+ const llvm::AMDGPU::GcnBufferFormatInfo *NewFormatInfo =
+ llvm::AMDGPU::getGcnBufferFormatInfo(OldFormatInfo->BitsPerComp,
+ ComponentCount,
+ OldFormatInfo->NumFormat, STI);
+
+ if (!NewFormatInfo)
+ return 0;
+
+ assert(NewFormatInfo->NumFormat == OldFormatInfo->NumFormat &&
+ NewFormatInfo->BitsPerComp == OldFormatInfo->BitsPerComp);
+
+ return NewFormatInfo->Format;
+}
+
+bool SILoadStoreOptimizer::offsetsCanBeCombined(CombineInfo &CI,
+ const MCSubtargetInfo &STI,
+ CombineInfo &Paired) {
assert(CI.InstClass != MIMG);
// XXX - Would the same offset be OK? Is there any reason this would happen or
// be useful?
- if (CI.Offset0 == CI.Offset1)
+ if (CI.Offset == Paired.Offset)
return false;
// This won't be valid if the offset isn't aligned.
- if ((CI.Offset0 % CI.EltSize != 0) || (CI.Offset1 % CI.EltSize != 0))
+ if ((CI.Offset % CI.EltSize != 0) || (Paired.Offset % CI.EltSize != 0))
return false;
- unsigned EltOffset0 = CI.Offset0 / CI.EltSize;
- unsigned EltOffset1 = CI.Offset1 / CI.EltSize;
+ if (CI.InstClass == TBUFFER_LOAD || CI.InstClass == TBUFFER_STORE) {
+
+ const llvm::AMDGPU::GcnBufferFormatInfo *Info0 =
+ llvm::AMDGPU::getGcnBufferFormatInfo(CI.Format, STI);
+ if (!Info0)
+ return false;
+ const llvm::AMDGPU::GcnBufferFormatInfo *Info1 =
+ llvm::AMDGPU::getGcnBufferFormatInfo(Paired.Format, STI);
+ if (!Info1)
+ return false;
+
+ if (Info0->BitsPerComp != Info1->BitsPerComp ||
+ Info0->NumFormat != Info1->NumFormat)
+ return false;
+
+ // TODO: Should be possible to support more formats, but if format loads
+ // are not dword-aligned, the merged load might not be valid.
+ if (Info0->BitsPerComp != 32)
+ return false;
+
+ if (getBufferFormatWithCompCount(CI.Format, CI.Width + Paired.Width, STI) == 0)
+ return false;
+ }
+
+ unsigned EltOffset0 = CI.Offset / CI.EltSize;
+ unsigned EltOffset1 = Paired.Offset / CI.EltSize;
CI.UseST64 = false;
CI.BaseOff = 0;
// Handle SMEM and VMEM instructions.
if ((CI.InstClass != DS_READ) && (CI.InstClass != DS_WRITE)) {
- return (EltOffset0 + CI.Width0 == EltOffset1 ||
- EltOffset1 + CI.Width1 == EltOffset0) &&
- CI.GLC0 == CI.GLC1 && CI.DLC0 == CI.DLC1 &&
- (CI.InstClass == S_BUFFER_LOAD_IMM || CI.SLC0 == CI.SLC1);
+ return (EltOffset0 + CI.Width == EltOffset1 ||
+ EltOffset1 + Paired.Width == EltOffset0) &&
+ CI.GLC == Paired.GLC && CI.DLC == Paired.DLC &&
+ (CI.InstClass == S_BUFFER_LOAD_IMM || CI.SLC == Paired.SLC);
}
// If the offset in elements doesn't fit in 8-bits, we might be able to use
// the stride 64 versions.
if ((EltOffset0 % 64 == 0) && (EltOffset1 % 64) == 0 &&
isUInt<8>(EltOffset0 / 64) && isUInt<8>(EltOffset1 / 64)) {
- CI.Offset0 = EltOffset0 / 64;
- CI.Offset1 = EltOffset1 / 64;
+ CI.Offset = EltOffset0 / 64;
+ Paired.Offset = EltOffset1 / 64;
CI.UseST64 = true;
return true;
}
// Check if the new offsets fit in the reduced 8-bit range.
if (isUInt<8>(EltOffset0) && isUInt<8>(EltOffset1)) {
- CI.Offset0 = EltOffset0;
- CI.Offset1 = EltOffset1;
+ CI.Offset = EltOffset0;
+ Paired.Offset = EltOffset1;
return true;
}
// Try to shift base address to decrease offsets.
unsigned OffsetDiff = std::abs((int)EltOffset1 - (int)EltOffset0);
- CI.BaseOff = std::min(CI.Offset0, CI.Offset1);
+ CI.BaseOff = std::min(CI.Offset, Paired.Offset);
if ((OffsetDiff % 64 == 0) && isUInt<8>(OffsetDiff / 64)) {
- CI.Offset0 = (EltOffset0 - CI.BaseOff / CI.EltSize) / 64;
- CI.Offset1 = (EltOffset1 - CI.BaseOff / CI.EltSize) / 64;
+ CI.Offset = (EltOffset0 - CI.BaseOff / CI.EltSize) / 64;
+ Paired.Offset = (EltOffset1 - CI.BaseOff / CI.EltSize) / 64;
CI.UseST64 = true;
return true;
}
if (isUInt<8>(OffsetDiff)) {
- CI.Offset0 = EltOffset0 - CI.BaseOff / CI.EltSize;
- CI.Offset1 = EltOffset1 - CI.BaseOff / CI.EltSize;
+ CI.Offset = EltOffset0 - CI.BaseOff / CI.EltSize;
+ Paired.Offset = EltOffset1 - CI.BaseOff / CI.EltSize;
return true;
}
@@ -733,8 +807,9 @@ bool SILoadStoreOptimizer::offsetsCanBeCombined(CombineInfo &CI) {
}
bool SILoadStoreOptimizer::widthsFit(const GCNSubtarget &STM,
- const CombineInfo &CI) {
- const unsigned Width = (CI.Width0 + CI.Width1);
+ const CombineInfo &CI,
+ const CombineInfo &Paired) {
+ const unsigned Width = (CI.Width + Paired.Width);
switch (CI.InstClass) {
default:
return (Width <= 4) && (STM.hasDwordx3LoadStores() || (Width != 3));
@@ -749,7 +824,8 @@ bool SILoadStoreOptimizer::widthsFit(const GCNSubtarget &STM,
}
}
-bool SILoadStoreOptimizer::findMatchingInst(CombineInfo &CI) {
+bool SILoadStoreOptimizer::findMatchingInst(CombineInfo &CI,
+ CombineInfo &Paired) {
MachineBasicBlock *MBB = CI.I->getParent();
MachineBasicBlock::iterator E = MBB->end();
MachineBasicBlock::iterator MBBI = CI.I;
@@ -813,6 +889,11 @@ bool SILoadStoreOptimizer::findMatchingInst(CombineInfo &CI) {
if (MBBI->hasOrderedMemoryRef())
return false;
+ int Swizzled =
+ AMDGPU::getNamedOperandIdx(MBBI->getOpcode(), AMDGPU::OpName::swz);
+ if (Swizzled != -1 && MBBI->getOperand(Swizzled).getImm())
+ return false;
+
// Handle a case like
// DS_WRITE_B32 addr, v, idx0
// w = DS_READ_B32 addr, idx0
@@ -826,14 +907,14 @@ bool SILoadStoreOptimizer::findMatchingInst(CombineInfo &CI) {
bool Match = CI.hasSameBaseAddress(*MBBI);
if (Match) {
- CI.setPaired(MBBI, *TII);
+ Paired.setMI(MBBI, *TII, *STM);
// Check both offsets (or masks for MIMG) can be combined and fit in the
// reduced range.
bool canBeCombined =
CI.InstClass == MIMG
- ? dmasksCanBeCombined(CI, *TII)
- : widthsFit(*STM, CI) && offsetsCanBeCombined(CI);
+ ? dmasksCanBeCombined(CI, *TII, Paired)
+ : widthsFit(*STM, CI, Paired) && offsetsCanBeCombined(CI, *STI, Paired);
// We also need to go through the list of instructions that we plan to
// move and make sure they are all safe to move down past the merged
@@ -869,7 +950,7 @@ unsigned SILoadStoreOptimizer::read2ST64Opcode(unsigned EltSize) const {
}
MachineBasicBlock::iterator
-SILoadStoreOptimizer::mergeRead2Pair(CombineInfo &CI) {
+SILoadStoreOptimizer::mergeRead2Pair(CombineInfo &CI, CombineInfo &Paired) {
MachineBasicBlock *MBB = CI.I->getParent();
// Be careful, since the addresses could be subregisters themselves in weird
@@ -877,10 +958,10 @@ SILoadStoreOptimizer::mergeRead2Pair(CombineInfo &CI) {
const auto *AddrReg = TII->getNamedOperand(*CI.I, AMDGPU::OpName::addr);
const auto *Dest0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::vdst);
- const auto *Dest1 = TII->getNamedOperand(*CI.Paired, AMDGPU::OpName::vdst);
+ const auto *Dest1 = TII->getNamedOperand(*Paired.I, AMDGPU::OpName::vdst);
- unsigned NewOffset0 = CI.Offset0;
- unsigned NewOffset1 = CI.Offset1;
+ unsigned NewOffset0 = CI.Offset;
+ unsigned NewOffset1 = Paired.Offset;
unsigned Opc =
CI.UseST64 ? read2ST64Opcode(CI.EltSize) : read2Opcode(CI.EltSize);
@@ -909,13 +990,13 @@ SILoadStoreOptimizer::mergeRead2Pair(CombineInfo &CI) {
unsigned BaseRegFlags = 0;
if (CI.BaseOff) {
Register ImmReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
- BuildMI(*MBB, CI.Paired, DL, TII->get(AMDGPU::S_MOV_B32), ImmReg)
+ BuildMI(*MBB, Paired.I, DL, TII->get(AMDGPU::S_MOV_B32), ImmReg)
.addImm(CI.BaseOff);
BaseReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
BaseRegFlags = RegState::Kill;
- TII->getAddNoCarry(*MBB, CI.Paired, DL, BaseReg)
+ TII->getAddNoCarry(*MBB, Paired.I, DL, BaseReg)
.addReg(ImmReg)
.addReg(AddrReg->getReg(), 0, BaseSubReg)
.addImm(0); // clamp bit
@@ -923,29 +1004,29 @@ SILoadStoreOptimizer::mergeRead2Pair(CombineInfo &CI) {
}
MachineInstrBuilder Read2 =
- BuildMI(*MBB, CI.Paired, DL, Read2Desc, DestReg)
+ BuildMI(*MBB, Paired.I, DL, Read2Desc, DestReg)
.addReg(BaseReg, BaseRegFlags, BaseSubReg) // addr
.addImm(NewOffset0) // offset0
.addImm(NewOffset1) // offset1
.addImm(0) // gds
- .cloneMergedMemRefs({&*CI.I, &*CI.Paired});
+ .cloneMergedMemRefs({&*CI.I, &*Paired.I});
(void)Read2;
const MCInstrDesc &CopyDesc = TII->get(TargetOpcode::COPY);
// Copy to the old destination registers.
- BuildMI(*MBB, CI.Paired, DL, CopyDesc)
+ BuildMI(*MBB, Paired.I, DL, CopyDesc)
.add(*Dest0) // Copy to same destination including flags and sub reg.
.addReg(DestReg, 0, SubRegIdx0);
- MachineInstr *Copy1 = BuildMI(*MBB, CI.Paired, DL, CopyDesc)
+ MachineInstr *Copy1 = BuildMI(*MBB, Paired.I, DL, CopyDesc)
.add(*Dest1)
.addReg(DestReg, RegState::Kill, SubRegIdx1);
moveInstsAfter(Copy1, CI.InstsToMove);
CI.I->eraseFromParent();
- CI.Paired->eraseFromParent();
+ Paired.I->eraseFromParent();
LLVM_DEBUG(dbgs() << "Inserted read2: " << *Read2 << '\n');
return Read2;
@@ -968,7 +1049,7 @@ unsigned SILoadStoreOptimizer::write2ST64Opcode(unsigned EltSize) const {
}
MachineBasicBlock::iterator
-SILoadStoreOptimizer::mergeWrite2Pair(CombineInfo &CI) {
+SILoadStoreOptimizer::mergeWrite2Pair(CombineInfo &CI, CombineInfo &Paired) {
MachineBasicBlock *MBB = CI.I->getParent();
// Be sure to use .addOperand(), and not .addReg() with these. We want to be
@@ -978,10 +1059,10 @@ SILoadStoreOptimizer::mergeWrite2Pair(CombineInfo &CI) {
const MachineOperand *Data0 =
TII->getNamedOperand(*CI.I, AMDGPU::OpName::data0);
const MachineOperand *Data1 =
- TII->getNamedOperand(*CI.Paired, AMDGPU::OpName::data0);
+ TII->getNamedOperand(*Paired.I, AMDGPU::OpName::data0);
- unsigned NewOffset0 = CI.Offset0;
- unsigned NewOffset1 = CI.Offset1;
+ unsigned NewOffset0 = CI.Offset;
+ unsigned NewOffset1 = Paired.Offset;
unsigned Opc =
CI.UseST64 ? write2ST64Opcode(CI.EltSize) : write2Opcode(CI.EltSize);
@@ -1002,13 +1083,13 @@ SILoadStoreOptimizer::mergeWrite2Pair(CombineInfo &CI) {
unsigned BaseRegFlags = 0;
if (CI.BaseOff) {
Register ImmReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
- BuildMI(*MBB, CI.Paired, DL, TII->get(AMDGPU::S_MOV_B32), ImmReg)
+ BuildMI(*MBB, Paired.I, DL, TII->get(AMDGPU::S_MOV_B32), ImmReg)
.addImm(CI.BaseOff);
BaseReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
BaseRegFlags = RegState::Kill;
- TII->getAddNoCarry(*MBB, CI.Paired, DL, BaseReg)
+ TII->getAddNoCarry(*MBB, Paired.I, DL, BaseReg)
.addReg(ImmReg)
.addReg(AddrReg->getReg(), 0, BaseSubReg)
.addImm(0); // clamp bit
@@ -1016,38 +1097,38 @@ SILoadStoreOptimizer::mergeWrite2Pair(CombineInfo &CI) {
}
MachineInstrBuilder Write2 =
- BuildMI(*MBB, CI.Paired, DL, Write2Desc)
+ BuildMI(*MBB, Paired.I, DL, Write2Desc)
.addReg(BaseReg, BaseRegFlags, BaseSubReg) // addr
.add(*Data0) // data0
.add(*Data1) // data1
.addImm(NewOffset0) // offset0
.addImm(NewOffset1) // offset1
.addImm(0) // gds
- .cloneMergedMemRefs({&*CI.I, &*CI.Paired});
+ .cloneMergedMemRefs({&*CI.I, &*Paired.I});
moveInstsAfter(Write2, CI.InstsToMove);
CI.I->eraseFromParent();
- CI.Paired->eraseFromParent();
+ Paired.I->eraseFromParent();
LLVM_DEBUG(dbgs() << "Inserted write2 inst: " << *Write2 << '\n');
return Write2;
}
MachineBasicBlock::iterator
-SILoadStoreOptimizer::mergeImagePair(CombineInfo &CI) {
+SILoadStoreOptimizer::mergeImagePair(CombineInfo &CI, CombineInfo &Paired) {
MachineBasicBlock *MBB = CI.I->getParent();
DebugLoc DL = CI.I->getDebugLoc();
- const unsigned Opcode = getNewOpcode(CI);
+ const unsigned Opcode = getNewOpcode(CI, Paired);
- const TargetRegisterClass *SuperRC = getTargetRegisterClass(CI);
+ const TargetRegisterClass *SuperRC = getTargetRegisterClass(CI, Paired);
Register DestReg = MRI->createVirtualRegister(SuperRC);
- unsigned MergedDMask = CI.DMask0 | CI.DMask1;
+ unsigned MergedDMask = CI.DMask | Paired.DMask;
unsigned DMaskIdx =
AMDGPU::getNamedOperandIdx(CI.I->getOpcode(), AMDGPU::OpName::dmask);
- auto MIB = BuildMI(*MBB, CI.Paired, DL, TII->get(Opcode), DestReg);
+ auto MIB = BuildMI(*MBB, Paired.I, DL, TII->get(Opcode), DestReg);
for (unsigned I = 1, E = (*CI.I).getNumOperands(); I != E; ++I) {
if (I == DMaskIdx)
MIB.addImm(MergedDMask);
@@ -1058,100 +1139,99 @@ SILoadStoreOptimizer::mergeImagePair(CombineInfo &CI) {
// It shouldn't be possible to get this far if the two instructions
// don't have a single memoperand, because MachineInstr::mayAlias()
// will return true if this is the case.
- assert(CI.I->hasOneMemOperand() && CI.Paired->hasOneMemOperand());
+ assert(CI.I->hasOneMemOperand() && Paired.I->hasOneMemOperand());
const MachineMemOperand *MMOa = *CI.I->memoperands_begin();
- const MachineMemOperand *MMOb = *CI.Paired->memoperands_begin();
+ const MachineMemOperand *MMOb = *Paired.I->memoperands_begin();
MachineInstr *New = MIB.addMemOperand(combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb));
- std::pair<unsigned, unsigned> SubRegIdx = getSubRegIdxs(CI);
- const unsigned SubRegIdx0 = std::get<0>(SubRegIdx);
- const unsigned SubRegIdx1 = std::get<1>(SubRegIdx);
+ unsigned SubRegIdx0, SubRegIdx1;
+ std::tie(SubRegIdx0, SubRegIdx1) = getSubRegIdxs(CI, Paired);
// Copy to the old destination registers.
const MCInstrDesc &CopyDesc = TII->get(TargetOpcode::COPY);
const auto *Dest0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::vdata);
- const auto *Dest1 = TII->getNamedOperand(*CI.Paired, AMDGPU::OpName::vdata);
+ const auto *Dest1 = TII->getNamedOperand(*Paired.I, AMDGPU::OpName::vdata);
- BuildMI(*MBB, CI.Paired, DL, CopyDesc)
+ BuildMI(*MBB, Paired.I, DL, CopyDesc)
.add(*Dest0) // Copy to same destination including flags and sub reg.
.addReg(DestReg, 0, SubRegIdx0);
- MachineInstr *Copy1 = BuildMI(*MBB, CI.Paired, DL, CopyDesc)
+ MachineInstr *Copy1 = BuildMI(*MBB, Paired.I, DL, CopyDesc)
.add(*Dest1)
.addReg(DestReg, RegState::Kill, SubRegIdx1);
moveInstsAfter(Copy1, CI.InstsToMove);
CI.I->eraseFromParent();
- CI.Paired->eraseFromParent();
+ Paired.I->eraseFromParent();
return New;
}
MachineBasicBlock::iterator
-SILoadStoreOptimizer::mergeSBufferLoadImmPair(CombineInfo &CI) {
+SILoadStoreOptimizer::mergeSBufferLoadImmPair(CombineInfo &CI, CombineInfo &Paired) {
MachineBasicBlock *MBB = CI.I->getParent();
DebugLoc DL = CI.I->getDebugLoc();
- const unsigned Opcode = getNewOpcode(CI);
+ const unsigned Opcode = getNewOpcode(CI, Paired);
- const TargetRegisterClass *SuperRC = getTargetRegisterClass(CI);
+ const TargetRegisterClass *SuperRC = getTargetRegisterClass(CI, Paired);
Register DestReg = MRI->createVirtualRegister(SuperRC);
- unsigned MergedOffset = std::min(CI.Offset0, CI.Offset1);
+ unsigned MergedOffset = std::min(CI.Offset, Paired.Offset);
// It shouldn't be possible to get this far if the two instructions
// don't have a single memoperand, because MachineInstr::mayAlias()
// will return true if this is the case.
- assert(CI.I->hasOneMemOperand() && CI.Paired->hasOneMemOperand());
+ assert(CI.I->hasOneMemOperand() && Paired.I->hasOneMemOperand());
const MachineMemOperand *MMOa = *CI.I->memoperands_begin();
- const MachineMemOperand *MMOb = *CI.Paired->memoperands_begin();
+ const MachineMemOperand *MMOb = *Paired.I->memoperands_begin();
MachineInstr *New =
- BuildMI(*MBB, CI.Paired, DL, TII->get(Opcode), DestReg)
+ BuildMI(*MBB, Paired.I, DL, TII->get(Opcode), DestReg)
.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::sbase))
.addImm(MergedOffset) // offset
- .addImm(CI.GLC0) // glc
- .addImm(CI.DLC0) // dlc
+ .addImm(CI.GLC) // glc
+ .addImm(CI.DLC) // dlc
.addMemOperand(combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb));
- std::pair<unsigned, unsigned> SubRegIdx = getSubRegIdxs(CI);
+ std::pair<unsigned, unsigned> SubRegIdx = getSubRegIdxs(CI, Paired);
const unsigned SubRegIdx0 = std::get<0>(SubRegIdx);
const unsigned SubRegIdx1 = std::get<1>(SubRegIdx);
// Copy to the old destination registers.
const MCInstrDesc &CopyDesc = TII->get(TargetOpcode::COPY);
const auto *Dest0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::sdst);
- const auto *Dest1 = TII->getNamedOperand(*CI.Paired, AMDGPU::OpName::sdst);
+ const auto *Dest1 = TII->getNamedOperand(*Paired.I, AMDGPU::OpName::sdst);
- BuildMI(*MBB, CI.Paired, DL, CopyDesc)
+ BuildMI(*MBB, Paired.I, DL, CopyDesc)
.add(*Dest0) // Copy to same destination including flags and sub reg.
.addReg(DestReg, 0, SubRegIdx0);
- MachineInstr *Copy1 = BuildMI(*MBB, CI.Paired, DL, CopyDesc)
+ MachineInstr *Copy1 = BuildMI(*MBB, Paired.I, DL, CopyDesc)
.add(*Dest1)
.addReg(DestReg, RegState::Kill, SubRegIdx1);
moveInstsAfter(Copy1, CI.InstsToMove);
CI.I->eraseFromParent();
- CI.Paired->eraseFromParent();
+ Paired.I->eraseFromParent();
return New;
}
MachineBasicBlock::iterator
-SILoadStoreOptimizer::mergeBufferLoadPair(CombineInfo &CI) {
+SILoadStoreOptimizer::mergeBufferLoadPair(CombineInfo &CI, CombineInfo &Paired) {
MachineBasicBlock *MBB = CI.I->getParent();
DebugLoc DL = CI.I->getDebugLoc();
- const unsigned Opcode = getNewOpcode(CI);
+ const unsigned Opcode = getNewOpcode(CI, Paired);
- const TargetRegisterClass *SuperRC = getTargetRegisterClass(CI);
+ const TargetRegisterClass *SuperRC = getTargetRegisterClass(CI, Paired);
// Copy to the new source register.
Register DestReg = MRI->createVirtualRegister(SuperRC);
- unsigned MergedOffset = std::min(CI.Offset0, CI.Offset1);
+ unsigned MergedOffset = std::min(CI.Offset, Paired.Offset);
- auto MIB = BuildMI(*MBB, CI.Paired, DL, TII->get(Opcode), DestReg);
+ auto MIB = BuildMI(*MBB, Paired.I, DL, TII->get(Opcode), DestReg);
const unsigned Regs = getRegs(Opcode, *TII);
@@ -1161,47 +1241,178 @@ SILoadStoreOptimizer::mergeBufferLoadPair(CombineInfo &CI) {
// It shouldn't be possible to get this far if the two instructions
// don't have a single memoperand, because MachineInstr::mayAlias()
// will return true if this is the case.
- assert(CI.I->hasOneMemOperand() && CI.Paired->hasOneMemOperand());
+ assert(CI.I->hasOneMemOperand() && Paired.I->hasOneMemOperand());
const MachineMemOperand *MMOa = *CI.I->memoperands_begin();
- const MachineMemOperand *MMOb = *CI.Paired->memoperands_begin();
+ const MachineMemOperand *MMOb = *Paired.I->memoperands_begin();
MachineInstr *New =
MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::srsrc))
.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset))
.addImm(MergedOffset) // offset
- .addImm(CI.GLC0) // glc
- .addImm(CI.SLC0) // slc
+ .addImm(CI.GLC) // glc
+ .addImm(CI.SLC) // slc
.addImm(0) // tfe
- .addImm(CI.DLC0) // dlc
+ .addImm(CI.DLC) // dlc
.addImm(0) // swz
.addMemOperand(combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb));
- std::pair<unsigned, unsigned> SubRegIdx = getSubRegIdxs(CI);
+ std::pair<unsigned, unsigned> SubRegIdx = getSubRegIdxs(CI, Paired);
const unsigned SubRegIdx0 = std::get<0>(SubRegIdx);
const unsigned SubRegIdx1 = std::get<1>(SubRegIdx);
// Copy to the old destination registers.
const MCInstrDesc &CopyDesc = TII->get(TargetOpcode::COPY);
const auto *Dest0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::vdata);
- const auto *Dest1 = TII->getNamedOperand(*CI.Paired, AMDGPU::OpName::vdata);
+ const auto *Dest1 = TII->getNamedOperand(*Paired.I, AMDGPU::OpName::vdata);
- BuildMI(*MBB, CI.Paired, DL, CopyDesc)
+ BuildMI(*MBB, Paired.I, DL, CopyDesc)
.add(*Dest0) // Copy to same destination including flags and sub reg.
.addReg(DestReg, 0, SubRegIdx0);
- MachineInstr *Copy1 = BuildMI(*MBB, CI.Paired, DL, CopyDesc)
+ MachineInstr *Copy1 = BuildMI(*MBB, Paired.I, DL, CopyDesc)
.add(*Dest1)
.addReg(DestReg, RegState::Kill, SubRegIdx1);
moveInstsAfter(Copy1, CI.InstsToMove);
CI.I->eraseFromParent();
- CI.Paired->eraseFromParent();
+ Paired.I->eraseFromParent();
return New;
}
-unsigned SILoadStoreOptimizer::getNewOpcode(const CombineInfo &CI) {
- const unsigned Width = CI.Width0 + CI.Width1;
+MachineBasicBlock::iterator
+SILoadStoreOptimizer::mergeTBufferLoadPair(CombineInfo &CI, CombineInfo &Paired) {
+ MachineBasicBlock *MBB = CI.I->getParent();
+ DebugLoc DL = CI.I->getDebugLoc();
+
+ const unsigned Opcode = getNewOpcode(CI, Paired);
+
+ const TargetRegisterClass *SuperRC = getTargetRegisterClass(CI, Paired);
+
+ // Copy to the new source register.
+ Register DestReg = MRI->createVirtualRegister(SuperRC);
+ unsigned MergedOffset = std::min(CI.Offset, Paired.Offset);
+
+ auto MIB = BuildMI(*MBB, Paired.I, DL, TII->get(Opcode), DestReg);
+
+ const unsigned Regs = getRegs(Opcode, *TII);
+
+ if (Regs & VADDR)
+ MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::vaddr));
+
+ unsigned JoinedFormat =
+ getBufferFormatWithCompCount(CI.Format, CI.Width + Paired.Width, *STI);
+
+ // It shouldn't be possible to get this far if the two instructions
+ // don't have a single memoperand, because MachineInstr::mayAlias()
+ // will return true if this is the case.
+ assert(CI.I->hasOneMemOperand() && Paired.I->hasOneMemOperand());
+
+ const MachineMemOperand *MMOa = *CI.I->memoperands_begin();
+ const MachineMemOperand *MMOb = *Paired.I->memoperands_begin();
+
+ MachineInstr *New =
+ MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::srsrc))
+ .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset))
+ .addImm(MergedOffset) // offset
+ .addImm(JoinedFormat) // format
+ .addImm(CI.GLC) // glc
+ .addImm(CI.SLC) // slc
+ .addImm(0) // tfe
+ .addImm(CI.DLC) // dlc
+ .addImm(0) // swz
+ .addMemOperand(
+ combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb));
+
+ std::pair<unsigned, unsigned> SubRegIdx = getSubRegIdxs(CI, Paired);
+ const unsigned SubRegIdx0 = std::get<0>(SubRegIdx);
+ const unsigned SubRegIdx1 = std::get<1>(SubRegIdx);
+
+ // Copy to the old destination registers.
+ const MCInstrDesc &CopyDesc = TII->get(TargetOpcode::COPY);
+ const auto *Dest0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::vdata);
+ const auto *Dest1 = TII->getNamedOperand(*Paired.I, AMDGPU::OpName::vdata);
+
+ BuildMI(*MBB, Paired.I, DL, CopyDesc)
+ .add(*Dest0) // Copy to same destination including flags and sub reg.
+ .addReg(DestReg, 0, SubRegIdx0);
+ MachineInstr *Copy1 = BuildMI(*MBB, Paired.I, DL, CopyDesc)
+ .add(*Dest1)
+ .addReg(DestReg, RegState::Kill, SubRegIdx1);
+
+ moveInstsAfter(Copy1, CI.InstsToMove);
+
+ CI.I->eraseFromParent();
+ Paired.I->eraseFromParent();
+ return New;
+}
+
+MachineBasicBlock::iterator
+SILoadStoreOptimizer::mergeTBufferStorePair(CombineInfo &CI, CombineInfo &Paired) {
+ MachineBasicBlock *MBB = CI.I->getParent();
+ DebugLoc DL = CI.I->getDebugLoc();
+
+ const unsigned Opcode = getNewOpcode(CI, Paired);
+
+ std::pair<unsigned, unsigned> SubRegIdx = getSubRegIdxs(CI, Paired);
+ const unsigned SubRegIdx0 = std::get<0>(SubRegIdx);
+ const unsigned SubRegIdx1 = std::get<1>(SubRegIdx);
+
+ // Copy to the new source register.
+ const TargetRegisterClass *SuperRC = getTargetRegisterClass(CI, Paired);
+ Register SrcReg = MRI->createVirtualRegister(SuperRC);
+
+ const auto *Src0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::vdata);
+ const auto *Src1 = TII->getNamedOperand(*Paired.I, AMDGPU::OpName::vdata);
+
+ BuildMI(*MBB, Paired.I, DL, TII->get(AMDGPU::REG_SEQUENCE), SrcReg)
+ .add(*Src0)
+ .addImm(SubRegIdx0)
+ .add(*Src1)
+ .addImm(SubRegIdx1);
+
+ auto MIB = BuildMI(*MBB, Paired.I, DL, TII->get(Opcode))
+ .addReg(SrcReg, RegState::Kill);
+
+ const unsigned Regs = getRegs(Opcode, *TII);
+
+ if (Regs & VADDR)
+ MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::vaddr));
+
+ unsigned JoinedFormat =
+ getBufferFormatWithCompCount(CI.Format, CI.Width + Paired.Width, *STI);
+
+ // It shouldn't be possible to get this far if the two instructions
+ // don't have a single memoperand, because MachineInstr::mayAlias()
+ // will return true if this is the case.
+ assert(CI.I->hasOneMemOperand() && Paired.I->hasOneMemOperand());
+
+ const MachineMemOperand *MMOa = *CI.I->memoperands_begin();
+ const MachineMemOperand *MMOb = *Paired.I->memoperands_begin();
+
+ MachineInstr *New =
+ MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::srsrc))
+ .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset))
+ .addImm(std::min(CI.Offset, Paired.Offset)) // offset
+ .addImm(JoinedFormat) // format
+ .addImm(CI.GLC) // glc
+ .addImm(CI.SLC) // slc
+ .addImm(0) // tfe
+ .addImm(CI.DLC) // dlc
+ .addImm(0) // swz
+ .addMemOperand(
+ combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb));
+
+ moveInstsAfter(MIB, CI.InstsToMove);
+
+ CI.I->eraseFromParent();
+ Paired.I->eraseFromParent();
+ return New;
+}
+
+unsigned SILoadStoreOptimizer::getNewOpcode(const CombineInfo &CI,
+ const CombineInfo &Paired) {
+ const unsigned Width = CI.Width + Paired.Width;
switch (CI.InstClass) {
default:
@@ -1209,6 +1420,11 @@ unsigned SILoadStoreOptimizer::getNewOpcode(const CombineInfo &CI) {
// FIXME: Handle d16 correctly
return AMDGPU::getMUBUFOpcode(AMDGPU::getMUBUFBaseOpcode(CI.I->getOpcode()),
Width);
+ case TBUFFER_LOAD:
+ case TBUFFER_STORE:
+ return AMDGPU::getMTBUFOpcode(AMDGPU::getMTBUFBaseOpcode(CI.I->getOpcode()),
+ Width);
+
case UNKNOWN:
llvm_unreachable("Unknown instruction class");
case S_BUFFER_LOAD_IMM:
@@ -1221,24 +1437,24 @@ unsigned SILoadStoreOptimizer::getNewOpcode(const CombineInfo &CI) {
return AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM;
}
case MIMG:
- assert("No overlaps" && (countPopulation(CI.DMask0 | CI.DMask1) == Width));
+ assert("No overlaps" && (countPopulation(CI.DMask | Paired.DMask) == Width));
return AMDGPU::getMaskedMIMGOp(CI.I->getOpcode(), Width);
}
}
std::pair<unsigned, unsigned>
-SILoadStoreOptimizer::getSubRegIdxs(const CombineInfo &CI) {
+SILoadStoreOptimizer::getSubRegIdxs(const CombineInfo &CI, const CombineInfo &Paired) {
- if (CI.Width0 == 0 || CI.Width0 == 0 || CI.Width0 + CI.Width1 > 4)
+ if (CI.Width == 0 || Paired.Width == 0 || CI.Width + Paired.Width > 4)
return std::make_pair(0, 0);
bool ReverseOrder;
if (CI.InstClass == MIMG) {
- assert((countPopulation(CI.DMask0 | CI.DMask1) == CI.Width0 + CI.Width1) &&
+ assert((countPopulation(CI.DMask | Paired.DMask) == CI.Width + Paired.Width) &&
"No overlaps");
- ReverseOrder = CI.DMask0 > CI.DMask1;
+ ReverseOrder = CI.DMask > Paired.DMask;
} else
- ReverseOrder = CI.Offset0 > CI.Offset1;
+ ReverseOrder = CI.Offset > Paired.Offset;
static const unsigned Idxs[4][4] = {
{AMDGPU::sub0, AMDGPU::sub0_sub1, AMDGPU::sub0_sub1_sub2, AMDGPU::sub0_sub1_sub2_sub3},
@@ -1249,24 +1465,25 @@ SILoadStoreOptimizer::getSubRegIdxs(const CombineInfo &CI) {
unsigned Idx0;
unsigned Idx1;
- assert(CI.Width0 >= 1 && CI.Width0 <= 3);
- assert(CI.Width1 >= 1 && CI.Width1 <= 3);
+ assert(CI.Width >= 1 && CI.Width <= 3);
+ assert(Paired.Width >= 1 && Paired.Width <= 3);
if (ReverseOrder) {
- Idx1 = Idxs[0][CI.Width1 - 1];
- Idx0 = Idxs[CI.Width1][CI.Width0 - 1];
+ Idx1 = Idxs[0][Paired.Width - 1];
+ Idx0 = Idxs[Paired.Width][CI.Width - 1];
} else {
- Idx0 = Idxs[0][CI.Width0 - 1];
- Idx1 = Idxs[CI.Width0][CI.Width1 - 1];
+ Idx0 = Idxs[0][CI.Width - 1];
+ Idx1 = Idxs[CI.Width][Paired.Width - 1];
}
return std::make_pair(Idx0, Idx1);
}
const TargetRegisterClass *
-SILoadStoreOptimizer::getTargetRegisterClass(const CombineInfo &CI) {
+SILoadStoreOptimizer::getTargetRegisterClass(const CombineInfo &CI,
+ const CombineInfo &Paired) {
if (CI.InstClass == S_BUFFER_LOAD_IMM) {
- switch (CI.Width0 + CI.Width1) {
+ switch (CI.Width + Paired.Width) {
default:
return nullptr;
case 2:
@@ -1279,7 +1496,7 @@ SILoadStoreOptimizer::getTargetRegisterClass(const CombineInfo &CI) {
return &AMDGPU::SReg_512RegClass;
}
} else {
- switch (CI.Width0 + CI.Width1) {
+ switch (CI.Width + Paired.Width) {
default:
return nullptr;
case 2:
@@ -1293,30 +1510,30 @@ SILoadStoreOptimizer::getTargetRegisterClass(const CombineInfo &CI) {
}
MachineBasicBlock::iterator
-SILoadStoreOptimizer::mergeBufferStorePair(CombineInfo &CI) {
+SILoadStoreOptimizer::mergeBufferStorePair(CombineInfo &CI, CombineInfo &Paired) {
MachineBasicBlock *MBB = CI.I->getParent();
DebugLoc DL = CI.I->getDebugLoc();
- const unsigned Opcode = getNewOpcode(CI);
+ const unsigned Opcode = getNewOpcode(CI, Paired);
- std::pair<unsigned, unsigned> SubRegIdx = getSubRegIdxs(CI);
+ std::pair<unsigned, unsigned> SubRegIdx = getSubRegIdxs(CI, Paired);
const unsigned SubRegIdx0 = std::get<0>(SubRegIdx);
const unsigned SubRegIdx1 = std::get<1>(SubRegIdx);
// Copy to the new source register.
- const TargetRegisterClass *SuperRC = getTargetRegisterClass(CI);
+ const TargetRegisterClass *SuperRC = getTargetRegisterClass(CI, Paired);
Register SrcReg = MRI->createVirtualRegister(SuperRC);
const auto *Src0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::vdata);
- const auto *Src1 = TII->getNamedOperand(*CI.Paired, AMDGPU::OpName::vdata);
+ const auto *Src1 = TII->getNamedOperand(*Paired.I, AMDGPU::OpName::vdata);
- BuildMI(*MBB, CI.Paired, DL, TII->get(AMDGPU::REG_SEQUENCE), SrcReg)
+ BuildMI(*MBB, Paired.I, DL, TII->get(AMDGPU::REG_SEQUENCE), SrcReg)
.add(*Src0)
.addImm(SubRegIdx0)
.add(*Src1)
.addImm(SubRegIdx1);
- auto MIB = BuildMI(*MBB, CI.Paired, DL, TII->get(Opcode))
+ auto MIB = BuildMI(*MBB, Paired.I, DL, TII->get(Opcode))
.addReg(SrcReg, RegState::Kill);
const unsigned Regs = getRegs(Opcode, *TII);
@@ -1328,26 +1545,26 @@ SILoadStoreOptimizer::mergeBufferStorePair(CombineInfo &CI) {
// It shouldn't be possible to get this far if the two instructions
// don't have a single memoperand, because MachineInstr::mayAlias()
// will return true if this is the case.
- assert(CI.I->hasOneMemOperand() && CI.Paired->hasOneMemOperand());
+ assert(CI.I->hasOneMemOperand() && Paired.I->hasOneMemOperand());
const MachineMemOperand *MMOa = *CI.I->memoperands_begin();
- const MachineMemOperand *MMOb = *CI.Paired->memoperands_begin();
+ const MachineMemOperand *MMOb = *Paired.I->memoperands_begin();
MachineInstr *New =
MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::srsrc))
.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset))
- .addImm(std::min(CI.Offset0, CI.Offset1)) // offset
- .addImm(CI.GLC0) // glc
- .addImm(CI.SLC0) // slc
+ .addImm(std::min(CI.Offset, Paired.Offset)) // offset
+ .addImm(CI.GLC) // glc
+ .addImm(CI.SLC) // slc
.addImm(0) // tfe
- .addImm(CI.DLC0) // dlc
+ .addImm(CI.DLC) // dlc
.addImm(0) // swz
.addMemOperand(combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb));
moveInstsAfter(MIB, CI.InstsToMove);
CI.I->eraseFromParent();
- CI.Paired->eraseFromParent();
+ Paired.I->eraseFromParent();
return New;
}
@@ -1429,7 +1646,9 @@ unsigned SILoadStoreOptimizer::computeBase(MachineInstr &MI,
void SILoadStoreOptimizer::updateBaseAndOffset(MachineInstr &MI,
unsigned NewBase,
int32_t NewOffset) const {
- TII->getNamedOperand(MI, AMDGPU::OpName::vaddr)->setReg(NewBase);
+ auto Base = TII->getNamedOperand(MI, AMDGPU::OpName::vaddr);
+ Base->setReg(NewBase);
+ Base->setIsKill(false);
TII->getNamedOperand(MI, AMDGPU::OpName::offset)->setImm(NewOffset);
}
@@ -1664,8 +1883,8 @@ bool SILoadStoreOptimizer::promoteConstantOffsetToImm(
void SILoadStoreOptimizer::addInstToMergeableList(const CombineInfo &CI,
std::list<std::list<CombineInfo> > &MergeableInsts) const {
for (std::list<CombineInfo> &AddrList : MergeableInsts) {
- if (AddrList.front().hasSameBaseAddress(*CI.I) &&
- AddrList.front().InstClass == CI.InstClass) {
+ if (AddrList.front().InstClass == CI.InstClass &&
+ AddrList.front().hasSameBaseAddress(*CI.I)) {
AddrList.emplace_back(CI);
return;
}
@@ -1760,63 +1979,70 @@ SILoadStoreOptimizer::optimizeInstsWithSameBaseAddr(
bool Modified = false;
for (auto I = MergeList.begin(); I != MergeList.end(); ++I) {
CombineInfo &CI = *I;
+ CombineInfo Paired;
+
+ if (CI.InstClass == UNKNOWN)
+ continue;
+
+ if (!findMatchingInst(CI, Paired))
+ goto done;
+
+ Modified = true;
+ removeCombinedInst(MergeList, *Paired.I);
switch (CI.InstClass) {
default:
+ llvm_unreachable("unknown InstClass");
break;
- case DS_READ:
- if (findMatchingInst(CI)) {
- Modified = true;
- removeCombinedInst(MergeList, *CI.Paired);
- MachineBasicBlock::iterator NewMI = mergeRead2Pair(CI);
- CI.setMI(NewMI, *TII, *STM);
- }
+ case DS_READ: {
+ MachineBasicBlock::iterator NewMI = mergeRead2Pair(CI, Paired);
+ CI.setMI(NewMI, *TII, *STM);
break;
- case DS_WRITE:
- if (findMatchingInst(CI)) {
- Modified = true;
- removeCombinedInst(MergeList, *CI.Paired);
- MachineBasicBlock::iterator NewMI = mergeWrite2Pair(CI);
- CI.setMI(NewMI, *TII, *STM);
- }
+ }
+ case DS_WRITE: {
+ MachineBasicBlock::iterator NewMI = mergeWrite2Pair(CI, Paired);
+ CI.setMI(NewMI, *TII, *STM);
break;
- case S_BUFFER_LOAD_IMM:
- if (findMatchingInst(CI)) {
- Modified = true;
- removeCombinedInst(MergeList, *CI.Paired);
- MachineBasicBlock::iterator NewMI = mergeSBufferLoadImmPair(CI);
- CI.setMI(NewMI, *TII, *STM);
- OptimizeListAgain |= (CI.Width0 + CI.Width1) < 16;
- }
+ }
+ case S_BUFFER_LOAD_IMM: {
+ MachineBasicBlock::iterator NewMI = mergeSBufferLoadImmPair(CI, Paired);
+ CI.setMI(NewMI, *TII, *STM);
+ OptimizeListAgain |= (CI.Width + Paired.Width) < 16;
break;
- case BUFFER_LOAD:
- if (findMatchingInst(CI)) {
- Modified = true;
- removeCombinedInst(MergeList, *CI.Paired);
- MachineBasicBlock::iterator NewMI = mergeBufferLoadPair(CI);
- CI.setMI(NewMI, *TII, *STM);
- OptimizeListAgain |= (CI.Width0 + CI.Width1) < 4;
- }
+ }
+ case BUFFER_LOAD: {
+ MachineBasicBlock::iterator NewMI = mergeBufferLoadPair(CI, Paired);
+ CI.setMI(NewMI, *TII, *STM);
+ OptimizeListAgain |= (CI.Width + Paired.Width) < 4;
break;
- case BUFFER_STORE:
- if (findMatchingInst(CI)) {
- Modified = true;
- removeCombinedInst(MergeList, *CI.Paired);
- MachineBasicBlock::iterator NewMI = mergeBufferStorePair(CI);
- CI.setMI(NewMI, *TII, *STM);
- OptimizeListAgain |= (CI.Width0 + CI.Width1) < 4;
- }
+ }
+ case BUFFER_STORE: {
+ MachineBasicBlock::iterator NewMI = mergeBufferStorePair(CI, Paired);
+ CI.setMI(NewMI, *TII, *STM);
+ OptimizeListAgain |= (CI.Width + Paired.Width) < 4;
break;
- case MIMG:
- if (findMatchingInst(CI)) {
- Modified = true;
- removeCombinedInst(MergeList, *CI.Paired);
- MachineBasicBlock::iterator NewMI = mergeImagePair(CI);
- CI.setMI(NewMI, *TII, *STM);
- OptimizeListAgain |= (CI.Width0 + CI.Width1) < 4;
- }
+ }
+ case MIMG: {
+ MachineBasicBlock::iterator NewMI = mergeImagePair(CI, Paired);
+ CI.setMI(NewMI, *TII, *STM);
+ OptimizeListAgain |= (CI.Width + Paired.Width) < 4;
+ break;
+ }
+ case TBUFFER_LOAD: {
+ MachineBasicBlock::iterator NewMI = mergeTBufferLoadPair(CI, Paired);
+ CI.setMI(NewMI, *TII, *STM);
+ OptimizeListAgain |= (CI.Width + Paired.Width) < 4;
+ break;
+ }
+ case TBUFFER_STORE: {
+ MachineBasicBlock::iterator NewMI = mergeTBufferStorePair(CI, Paired);
+ CI.setMI(NewMI, *TII, *STM);
+ OptimizeListAgain |= (CI.Width + Paired.Width) < 4;
break;
}
+ }
+
+done:
// Clear the InstsToMove after we have finished searching so we don't have
// stale values left over if we search for this CI again in another pass
// over the block.
@@ -1836,6 +2062,7 @@ bool SILoadStoreOptimizer::runOnMachineFunction(MachineFunction &MF) {
TII = STM->getInstrInfo();
TRI = &TII->getRegisterInfo();
+ STI = &MF.getSubtarget<MCSubtargetInfo>();
MRI = &MF.getRegInfo();
AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
index 6f9abd3a8d9b..61d2719a3aad 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
@@ -244,9 +244,9 @@ void SILowerControlFlow::emitIf(MachineInstr &MI) {
BuildMI(MBB, I, DL, TII->get(MovTermOpc), Exec)
.addReg(Tmp, RegState::Kill);
- // Insert a pseudo terminator to help keep the verifier happy. This will also
- // be used later when inserting skips.
- MachineInstr *NewBr = BuildMI(MBB, I, DL, TII->get(AMDGPU::SI_MASK_BRANCH))
+ // Insert the S_CBRANCH_EXECZ instruction which will be optimized later
+ // during SIRemoveShortExecBranches.
+ MachineInstr *NewBr = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_CBRANCH_EXECZ))
.add(MI.getOperand(2));
if (!LIS) {
@@ -323,8 +323,8 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) {
.addReg(DstReg);
MachineInstr *Branch =
- BuildMI(MBB, ElsePt, DL, TII->get(AMDGPU::SI_MASK_BRANCH))
- .addMBB(DestBB);
+ BuildMI(MBB, ElsePt, DL, TII->get(AMDGPU::S_CBRANCH_EXECZ))
+ .addMBB(DestBB);
if (!LIS) {
MI.eraseFromParent();
@@ -372,12 +372,15 @@ void SILowerControlFlow::emitIfBreak(MachineInstr &MI) {
// exit" mask.
MachineInstr *And = nullptr, *Or = nullptr;
if (!SkipAnding) {
- And = BuildMI(MBB, &MI, DL, TII->get(AndOpc), Dst)
+ Register AndReg = MRI->createVirtualRegister(BoolRC);
+ And = BuildMI(MBB, &MI, DL, TII->get(AndOpc), AndReg)
.addReg(Exec)
.add(MI.getOperand(1));
Or = BuildMI(MBB, &MI, DL, TII->get(OrOpc), Dst)
- .addReg(Dst)
+ .addReg(AndReg)
.add(MI.getOperand(2));
+ if (LIS)
+ LIS->createAndComputeVirtRegInterval(AndReg);
} else
Or = BuildMI(MBB, &MI, DL, TII->get(OrOpc), Dst)
.add(MI.getOperand(1))
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
index b45412536356..1d45e6241d22 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
@@ -33,6 +33,7 @@
#include "llvm/CodeGen/MachineSSAUpdater.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/Debug.h"
#include "llvm/Target/TargetMachine.h"
@@ -541,7 +542,7 @@ void SILowerI1Copies::lowerPhis() {
MachineSSAUpdater SSAUpdater(*MF);
LoopFinder LF(*DT, *PDT);
PhiIncomingAnalysis PIA(*PDT);
- SmallVector<MachineInstr *, 4> DeadPhis;
+ SmallVector<MachineInstr *, 4> Vreg1Phis;
SmallVector<MachineBasicBlock *, 4> IncomingBlocks;
SmallVector<unsigned, 4> IncomingRegs;
SmallVector<unsigned, 4> IncomingUpdated;
@@ -550,118 +551,117 @@ void SILowerI1Copies::lowerPhis() {
#endif
for (MachineBasicBlock &MBB : *MF) {
- LF.initialize(MBB);
-
for (MachineInstr &MI : MBB.phis()) {
- Register DstReg = MI.getOperand(0).getReg();
- if (!isVreg1(DstReg))
- continue;
+ if (isVreg1(MI.getOperand(0).getReg()))
+ Vreg1Phis.push_back(&MI);
+ }
+ }
- LLVM_DEBUG(dbgs() << "Lower PHI: " << MI);
+ MachineBasicBlock *PrevMBB = nullptr;
+ for (MachineInstr *MI : Vreg1Phis) {
+ MachineBasicBlock &MBB = *MI->getParent();
+ if (&MBB != PrevMBB) {
+ LF.initialize(MBB);
+ PrevMBB = &MBB;
+ }
- MRI->setRegClass(DstReg, IsWave32 ? &AMDGPU::SReg_32RegClass
- : &AMDGPU::SReg_64RegClass);
+ LLVM_DEBUG(dbgs() << "Lower PHI: " << *MI);
- // Collect incoming values.
- for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
- assert(i + 1 < MI.getNumOperands());
- Register IncomingReg = MI.getOperand(i).getReg();
- MachineBasicBlock *IncomingMBB = MI.getOperand(i + 1).getMBB();
- MachineInstr *IncomingDef = MRI->getUniqueVRegDef(IncomingReg);
-
- if (IncomingDef->getOpcode() == AMDGPU::COPY) {
- IncomingReg = IncomingDef->getOperand(1).getReg();
- assert(isLaneMaskReg(IncomingReg) || isVreg1(IncomingReg));
- assert(!IncomingDef->getOperand(1).getSubReg());
- } else if (IncomingDef->getOpcode() == AMDGPU::IMPLICIT_DEF) {
- continue;
- } else {
- assert(IncomingDef->isPHI() || PhiRegisters.count(IncomingReg));
- }
+ Register DstReg = MI->getOperand(0).getReg();
+ MRI->setRegClass(DstReg, IsWave32 ? &AMDGPU::SReg_32RegClass
+ : &AMDGPU::SReg_64RegClass);
+
+ // Collect incoming values.
+ for (unsigned i = 1; i < MI->getNumOperands(); i += 2) {
+ assert(i + 1 < MI->getNumOperands());
+ Register IncomingReg = MI->getOperand(i).getReg();
+ MachineBasicBlock *IncomingMBB = MI->getOperand(i + 1).getMBB();
+ MachineInstr *IncomingDef = MRI->getUniqueVRegDef(IncomingReg);
- IncomingBlocks.push_back(IncomingMBB);
- IncomingRegs.push_back(IncomingReg);
+ if (IncomingDef->getOpcode() == AMDGPU::COPY) {
+ IncomingReg = IncomingDef->getOperand(1).getReg();
+ assert(isLaneMaskReg(IncomingReg) || isVreg1(IncomingReg));
+ assert(!IncomingDef->getOperand(1).getSubReg());
+ } else if (IncomingDef->getOpcode() == AMDGPU::IMPLICIT_DEF) {
+ continue;
+ } else {
+ assert(IncomingDef->isPHI() || PhiRegisters.count(IncomingReg));
}
+ IncomingBlocks.push_back(IncomingMBB);
+ IncomingRegs.push_back(IncomingReg);
+ }
+
#ifndef NDEBUG
- PhiRegisters.insert(DstReg);
+ PhiRegisters.insert(DstReg);
#endif
- // Phis in a loop that are observed outside the loop receive a simple but
- // conservatively correct treatment.
- std::vector<MachineBasicBlock *> DomBlocks = {&MBB};
- for (MachineInstr &Use : MRI->use_instructions(DstReg))
- DomBlocks.push_back(Use.getParent());
+ // Phis in a loop that are observed outside the loop receive a simple but
+ // conservatively correct treatment.
+ std::vector<MachineBasicBlock *> DomBlocks = {&MBB};
+ for (MachineInstr &Use : MRI->use_instructions(DstReg))
+ DomBlocks.push_back(Use.getParent());
- MachineBasicBlock *PostDomBound =
- PDT->findNearestCommonDominator(DomBlocks);
- unsigned FoundLoopLevel = LF.findLoop(PostDomBound);
-
- SSAUpdater.Initialize(DstReg);
-
- if (FoundLoopLevel) {
- LF.addLoopEntries(FoundLoopLevel, SSAUpdater, IncomingBlocks);
+ MachineBasicBlock *PostDomBound =
+ PDT->findNearestCommonDominator(DomBlocks);
+ unsigned FoundLoopLevel = LF.findLoop(PostDomBound);
- for (unsigned i = 0; i < IncomingRegs.size(); ++i) {
- IncomingUpdated.push_back(createLaneMaskReg(*MF));
- SSAUpdater.AddAvailableValue(IncomingBlocks[i],
- IncomingUpdated.back());
- }
+ SSAUpdater.Initialize(DstReg);
- for (unsigned i = 0; i < IncomingRegs.size(); ++i) {
- MachineBasicBlock &IMBB = *IncomingBlocks[i];
- buildMergeLaneMasks(
- IMBB, getSaluInsertionAtEnd(IMBB), {}, IncomingUpdated[i],
- SSAUpdater.GetValueInMiddleOfBlock(&IMBB), IncomingRegs[i]);
- }
- } else {
- // The phi is not observed from outside a loop. Use a more accurate
- // lowering.
- PIA.analyze(MBB, IncomingBlocks);
-
- for (MachineBasicBlock *MBB : PIA.predecessors())
- SSAUpdater.AddAvailableValue(MBB, insertUndefLaneMask(*MBB));
-
- for (unsigned i = 0; i < IncomingRegs.size(); ++i) {
- MachineBasicBlock &IMBB = *IncomingBlocks[i];
- if (PIA.isSource(IMBB)) {
- IncomingUpdated.push_back(0);
- SSAUpdater.AddAvailableValue(&IMBB, IncomingRegs[i]);
- } else {
- IncomingUpdated.push_back(createLaneMaskReg(*MF));
- SSAUpdater.AddAvailableValue(&IMBB, IncomingUpdated.back());
- }
- }
+ if (FoundLoopLevel) {
+ LF.addLoopEntries(FoundLoopLevel, SSAUpdater, IncomingBlocks);
- for (unsigned i = 0; i < IncomingRegs.size(); ++i) {
- if (!IncomingUpdated[i])
- continue;
+ for (unsigned i = 0; i < IncomingRegs.size(); ++i) {
+ IncomingUpdated.push_back(createLaneMaskReg(*MF));
+ SSAUpdater.AddAvailableValue(IncomingBlocks[i],
+ IncomingUpdated.back());
+ }
- MachineBasicBlock &IMBB = *IncomingBlocks[i];
- buildMergeLaneMasks(
- IMBB, getSaluInsertionAtEnd(IMBB), {}, IncomingUpdated[i],
- SSAUpdater.GetValueInMiddleOfBlock(&IMBB), IncomingRegs[i]);
+ for (unsigned i = 0; i < IncomingRegs.size(); ++i) {
+ MachineBasicBlock &IMBB = *IncomingBlocks[i];
+ buildMergeLaneMasks(
+ IMBB, getSaluInsertionAtEnd(IMBB), {}, IncomingUpdated[i],
+ SSAUpdater.GetValueInMiddleOfBlock(&IMBB), IncomingRegs[i]);
+ }
+ } else {
+ // The phi is not observed from outside a loop. Use a more accurate
+ // lowering.
+ PIA.analyze(MBB, IncomingBlocks);
+
+ for (MachineBasicBlock *MBB : PIA.predecessors())
+ SSAUpdater.AddAvailableValue(MBB, insertUndefLaneMask(*MBB));
+
+ for (unsigned i = 0; i < IncomingRegs.size(); ++i) {
+ MachineBasicBlock &IMBB = *IncomingBlocks[i];
+ if (PIA.isSource(IMBB)) {
+ IncomingUpdated.push_back(0);
+ SSAUpdater.AddAvailableValue(&IMBB, IncomingRegs[i]);
+ } else {
+ IncomingUpdated.push_back(createLaneMaskReg(*MF));
+ SSAUpdater.AddAvailableValue(&IMBB, IncomingUpdated.back());
}
}
- unsigned NewReg = SSAUpdater.GetValueInMiddleOfBlock(&MBB);
- if (NewReg != DstReg) {
- MRI->replaceRegWith(NewReg, DstReg);
+ for (unsigned i = 0; i < IncomingRegs.size(); ++i) {
+ if (!IncomingUpdated[i])
+ continue;
- // Ensure that DstReg has a single def and mark the old PHI node for
- // deletion.
- MI.getOperand(0).setReg(NewReg);
- DeadPhis.push_back(&MI);
+ MachineBasicBlock &IMBB = *IncomingBlocks[i];
+ buildMergeLaneMasks(
+ IMBB, getSaluInsertionAtEnd(IMBB), {}, IncomingUpdated[i],
+ SSAUpdater.GetValueInMiddleOfBlock(&IMBB), IncomingRegs[i]);
}
-
- IncomingBlocks.clear();
- IncomingRegs.clear();
- IncomingUpdated.clear();
}
- for (MachineInstr *MI : DeadPhis)
+ unsigned NewReg = SSAUpdater.GetValueInMiddleOfBlock(&MBB);
+ if (NewReg != DstReg) {
+ MRI->replaceRegWith(NewReg, DstReg);
MI->eraseFromParent();
- DeadPhis.clear();
+ }
+
+ IncomingBlocks.clear();
+ IncomingRegs.clear();
+ IncomingUpdated.clear();
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
index 714d403a3e8f..57ccf7641666 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
@@ -27,6 +27,7 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index 7dd0f11c95de..0c67b1467a5d 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -28,7 +28,6 @@ using namespace llvm;
SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
: AMDGPUMachineFunction(MF),
- Mode(MF.getFunction()),
PrivateSegmentBuffer(false),
DispatchPtr(false),
QueuePtr(false),
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index 7d70c786b594..ef0186f7d57f 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -236,17 +236,23 @@ template <> struct MappingTraits<SIArgumentInfo> {
struct SIMode {
bool IEEE = true;
bool DX10Clamp = true;
+ bool FP32Denormals = true;
+ bool FP64FP16Denormals = true;
SIMode() = default;
-
SIMode(const AMDGPU::SIModeRegisterDefaults &Mode) {
IEEE = Mode.IEEE;
DX10Clamp = Mode.DX10Clamp;
+ FP32Denormals = Mode.FP32Denormals;
+ FP64FP16Denormals = Mode.FP64FP16Denormals;
}
bool operator ==(const SIMode Other) const {
- return IEEE == Other.IEEE && DX10Clamp == Other.DX10Clamp;
+ return IEEE == Other.IEEE &&
+ DX10Clamp == Other.DX10Clamp &&
+ FP32Denormals == Other.FP32Denormals &&
+ FP64FP16Denormals == Other.FP64FP16Denormals;
}
};
@@ -254,6 +260,8 @@ template <> struct MappingTraits<SIMode> {
static void mapping(IO &YamlIO, SIMode &Mode) {
YamlIO.mapOptional("ieee", Mode.IEEE, true);
YamlIO.mapOptional("dx10-clamp", Mode.DX10Clamp, true);
+ YamlIO.mapOptional("fp32-denormals", Mode.FP32Denormals, true);
+ YamlIO.mapOptional("fp64-fp16-denormals", Mode.FP64FP16Denormals, true);
}
};
@@ -332,9 +340,6 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
AMDGPUFunctionArgInfo ArgInfo;
- // State of MODE register, assumed FP mode.
- AMDGPU::SIModeRegisterDefaults Mode;
-
// Graphics info.
unsigned PSInputAddr = 0;
unsigned PSInputEnable = 0;
@@ -507,10 +512,6 @@ public:
: I->second.Lanes[Lane];
}
- AMDGPU::SIModeRegisterDefaults getMode() const {
- return Mode;
- }
-
bool haveFreeLanesForSGPRSpill(const MachineFunction &MF,
unsigned NumLane) const;
bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI);
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp
index c072ba6b2d1c..004a3cb185d6 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp
@@ -609,13 +609,8 @@ void SIScheduleBlock::printDebug(bool full) {
}
dbgs() << "\nInstructions:\n";
- if (!Scheduled) {
- for (const SUnit* SU : SUnits)
+ for (const SUnit* SU : SUnits)
DAG->dumpNode(*SU);
- } else {
- for (const SUnit* SU : SUnits)
- DAG->dumpNode(*SU);
- }
dbgs() << "///////////////////////\n";
}
@@ -623,11 +618,8 @@ void SIScheduleBlock::printDebug(bool full) {
// SIScheduleBlockCreator //
-SIScheduleBlockCreator::SIScheduleBlockCreator(SIScheduleDAGMI *DAG) :
-DAG(DAG) {
-}
-
-SIScheduleBlockCreator::~SIScheduleBlockCreator() = default;
+SIScheduleBlockCreator::SIScheduleBlockCreator(SIScheduleDAGMI *DAG)
+ : DAG(DAG) {}
SIScheduleBlocks
SIScheduleBlockCreator::getBlocks(SISchedulerBlockCreatorVariant BlockVariant) {
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineScheduler.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineScheduler.h
index c28a7be4d03a..ec450a316467 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineScheduler.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineScheduler.h
@@ -248,7 +248,6 @@ class SIScheduleBlockCreator {
public:
SIScheduleBlockCreator(SIScheduleDAGMI *DAG);
- ~SIScheduleBlockCreator();
SIScheduleBlocks
getBlocks(SISchedulerBlockCreatorVariant BlockVariant);
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp
index cc9b46a75582..a9717c6ffb70 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp
@@ -8,12 +8,13 @@
#include "AMDGPU.h"
#include "AMDGPUSubtarget.h"
-#include "SIInstrInfo.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "SIInstrInfo.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/Debug.h"
using namespace llvm;
@@ -56,7 +57,7 @@ char SIOptimizeExecMasking::ID = 0;
char &llvm::SIOptimizeExecMaskingID = SIOptimizeExecMasking::ID;
/// If \p MI is a copy from exec, return the register copied to.
-static unsigned isCopyFromExec(const MachineInstr &MI, const GCNSubtarget &ST) {
+static Register isCopyFromExec(const MachineInstr &MI, const GCNSubtarget &ST) {
switch (MI.getOpcode()) {
case AMDGPU::COPY:
case AMDGPU::S_MOV_B64:
@@ -74,7 +75,7 @@ static unsigned isCopyFromExec(const MachineInstr &MI, const GCNSubtarget &ST) {
}
/// If \p MI is a copy to exec, return the register copied from.
-static unsigned isCopyToExec(const MachineInstr &MI, const GCNSubtarget &ST) {
+static Register isCopyToExec(const MachineInstr &MI, const GCNSubtarget &ST) {
switch (MI.getOpcode()) {
case AMDGPU::COPY:
case AMDGPU::S_MOV_B64:
@@ -91,12 +92,12 @@ static unsigned isCopyToExec(const MachineInstr &MI, const GCNSubtarget &ST) {
llvm_unreachable("should have been replaced");
}
- return AMDGPU::NoRegister;
+ return Register();
}
/// If \p MI is a logical operation on an exec value,
/// return the register copied to.
-static unsigned isLogicalOpOnExec(const MachineInstr &MI) {
+static Register isLogicalOpOnExec(const MachineInstr &MI) {
switch (MI.getOpcode()) {
case AMDGPU::S_AND_B64:
case AMDGPU::S_OR_B64:
@@ -244,8 +245,8 @@ static MachineBasicBlock::reverse_iterator findExecCopy(
auto E = MBB.rend();
for (unsigned N = 0; N <= InstLimit && I != E; ++I, ++N) {
- unsigned CopyFromExec = isCopyFromExec(*I, ST);
- if (CopyFromExec != AMDGPU::NoRegister)
+ Register CopyFromExec = isCopyFromExec(*I, ST);
+ if (CopyFromExec.isValid())
return I;
}
@@ -271,7 +272,7 @@ bool SIOptimizeExecMasking::runOnMachineFunction(MachineFunction &MF) {
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const SIRegisterInfo *TRI = ST.getRegisterInfo();
const SIInstrInfo *TII = ST.getInstrInfo();
- unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
+ MCRegister Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
// Optimize sequences emitted for control flow lowering. They are originally
// emitted as the separate operations because spill code may need to be
@@ -290,8 +291,8 @@ bool SIOptimizeExecMasking::runOnMachineFunction(MachineFunction &MF) {
if (I == E)
continue;
- unsigned CopyToExec = isCopyToExec(*I, ST);
- if (CopyToExec == AMDGPU::NoRegister)
+ Register CopyToExec = isCopyToExec(*I, ST);
+ if (!CopyToExec.isValid())
continue;
// Scan backwards to find the def.
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
index fdd30db6a7cb..34199d3e425c 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
@@ -21,10 +21,11 @@
#include "AMDGPU.h"
#include "AMDGPUSubtarget.h"
-#include "SIInstrInfo.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "SIInstrInfo.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/InitializePasses.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
index 9b3b2436475c..05c81feb23ec 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
@@ -26,6 +26,7 @@
#include "SIRegisterInfo.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "Utils/AMDGPUBaseInfo.h"
+#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
@@ -73,8 +74,8 @@ private:
const SIRegisterInfo *TRI;
const SIInstrInfo *TII;
- std::unordered_map<MachineInstr *, std::unique_ptr<SDWAOperand>> SDWAOperands;
- std::unordered_map<MachineInstr *, SDWAOperandsVector> PotentialMatches;
+ MapVector<MachineInstr *, std::unique_ptr<SDWAOperand>> SDWAOperands;
+ MapVector<MachineInstr *, SDWAOperandsVector> PotentialMatches;
SmallVector<MachineInstr *, 8> ConvertedInstructions;
Optional<int64_t> foldToImm(const MachineOperand &Op) const;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp
index 6cdd12d0e7bd..09dfe8753792 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp
@@ -13,18 +13,19 @@
#include "AMDGPU.h"
#include "AMDGPUSubtarget.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIInstrInfo.h"
-#include "SIRegisterInfo.h"
#include "SIMachineFunctionInfo.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "SIRegisterInfo.h"
#include "llvm/ADT/PostOrderIterator.h"
-#include "llvm/CodeGen/VirtRegMap.h"
#include "llvm/CodeGen/LiveInterval.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/LiveRegMatrix.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/InitializePasses.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index f58bc3060c42..fbadad3c84ad 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -771,8 +771,6 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
- unsigned M0CopyReg = AMDGPU::NoRegister;
-
unsigned EltSize = 4;
const TargetRegisterClass *RC = getPhysRegClass(SuperReg);
@@ -850,11 +848,6 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
}
}
- if (M0CopyReg != AMDGPU::NoRegister) {
- BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::M0)
- .addReg(M0CopyReg, RegState::Kill);
- }
-
MI->eraseFromParent();
MFI->addToSpilledSGPRs(NumSubRegs);
return true;
@@ -882,8 +875,6 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
- unsigned M0CopyReg = AMDGPU::NoRegister;
-
unsigned EltSize = 4;
const TargetRegisterClass *RC = getPhysRegClass(SuperReg);
@@ -940,11 +931,6 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
}
}
- if (M0CopyReg != AMDGPU::NoRegister) {
- BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::M0)
- .addReg(M0CopyReg, RegState::Kill);
- }
-
MI->eraseFromParent();
return true;
}
@@ -1137,11 +1123,15 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
if (!IsVOP2)
MIB.addImm(0); // clamp bit
} else {
- Register ConstOffsetReg =
- RS->scavengeRegister(&AMDGPU::SReg_32_XM0RegClass, MIB, 0, false);
+ assert(MIB->getOpcode() == AMDGPU::V_ADD_I32_e64 &&
+ "Need to reuse carry out register");
- // This should always be able to use the unused carry out.
- assert(ConstOffsetReg && "this scavenge should not be able to fail");
+ // Use scavenged unused carry out as offset register.
+ Register ConstOffsetReg;
+ if (!isWave32)
+ ConstOffsetReg = getSubReg(MIB.getReg(1), AMDGPU::sub0);
+ else
+ ConstOffsetReg = MIB.getReg(1);
BuildMI(*MBB, *MIB, DL, TII->get(AMDGPU::S_MOV_B32), ConstOffsetReg)
.addImm(Offset);
@@ -1150,10 +1140,9 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
MIB.addImm(0); // clamp bit
}
} else {
- // We have to produce a carry out, and we there isn't a free SGPR
- // pair for it. We can keep the whole computation on the SALU to
- // avoid clobbering an additional register at the cost of an extra
- // mov.
+ // We have to produce a carry out, and there isn't a free SGPR pair
+ // for it. We can keep the whole computation on the SALU to avoid
+ // clobbering an additional register at the cost of an extra mov.
// We may have 1 free scratch SGPR even though a carry out is
// unavailable. Only one additional mov is needed.
@@ -1175,9 +1164,9 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), ScaledReg)
.addReg(ScaledReg, RegState::Kill)
.addImm(Offset);
- BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHL_B32), ScaledReg)
- .addReg(DiffReg, RegState::Kill)
- .addImm(ST.getWavefrontSizeLog2());
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHL_B32), ScaledReg)
+ .addReg(DiffReg, RegState::Kill)
+ .addImm(ST.getWavefrontSizeLog2());
}
}
}
@@ -1786,14 +1775,6 @@ SIRegisterInfo::getRegClassForSizeOnBank(unsigned Size,
&AMDGPU::SReg_32_XM0_XEXECRegClass : &AMDGPU::SReg_64_XEXECRegClass;
case AMDGPU::SGPRRegBankID:
return &AMDGPU::SReg_32RegClass;
- case AMDGPU::SCCRegBankID:
- // This needs to return an allocatable class, so don't bother returning
- // the dummy SCC class.
- //
- // FIXME: This is a grotesque hack. We use SGPR_32 as an indication this
- // was not an VCC bank value since we use the larger class SReg_32 for
- // other values. These should all use SReg_32.
- return &AMDGPU::SGPR_32RegClass;
default:
llvm_unreachable("unknown register bank");
}
@@ -1803,7 +1784,7 @@ SIRegisterInfo::getRegClassForSizeOnBank(unsigned Size,
&AMDGPU::SReg_32RegClass;
case 64:
return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_64RegClass :
- &AMDGPU::SReg_64_XEXECRegClass;
+ &AMDGPU::SReg_64RegClass;
case 96:
return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_96RegClass :
&AMDGPU::SReg_96RegClass;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
index ac3dea1a1a28..ac8c56fa3a03 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
@@ -144,6 +144,11 @@ public:
return isSGPRClass(RC);
}
+ /// \returns true if this class contains only AGPR registers
+ bool isAGPRClass(const TargetRegisterClass *RC) const {
+ return hasAGPRs(RC) && !hasVGPRs(RC);
+ }
+
/// \returns true if this class contains VGPR registers.
bool hasVGPRs(const TargetRegisterClass *RC) const;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index 82219cbdf3b2..6ea6ec00e742 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -682,7 +682,21 @@ def AReg_1024 : RegisterClass<"AMDGPU", [v32i32, v32f32], 32,
let AllocationPriority = 8;
}
-def VReg_1 : RegisterClass<"AMDGPU", [i1], 32, (add VGPR_32)> {
+
+// This is not a real register. This is just to have a register to add
+// to VReg_1 that does not alias any real register that would
+// introduce inferred register classess.
+def ARTIFICIAL_VGPR : SIReg <"invalid vgpr", 0> {
+ let isArtificial = 1;
+}
+
+// FIXME: Should specify an empty set for this. No register should
+// ever be allocated using VReg_1. This is a hack for SelectionDAG
+// that should always be lowered by SILowerI1Copies. TableGen crashes
+// on an empty register set, but also sorts register classes based on
+// the number of registerss in them. Add only one register so this is
+// sorted to the end and not preferred over VGPR_32.
+def VReg_1 : RegisterClass<"AMDGPU", [i1], 32, (add ARTIFICIAL_VGPR)> {
let Size = 1;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRemoveShortExecBranches.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRemoveShortExecBranches.cpp
new file mode 100644
index 000000000000..51779e97ac62
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRemoveShortExecBranches.cpp
@@ -0,0 +1,158 @@
+//===-- SIRemoveShortExecBranches.cpp ------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This pass optmizes the s_cbranch_execz instructions.
+/// The pass removes this skip instruction for short branches,
+/// if there is no unwanted sideeffect in the fallthrough code sequence.
+///
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "SIInstrInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "si-remove-short-exec-branches"
+
+static unsigned SkipThreshold;
+
+static cl::opt<unsigned, true> SkipThresholdFlag(
+ "amdgpu-skip-threshold", cl::Hidden,
+ cl::desc(
+ "Number of instructions before jumping over divergent control flow"),
+ cl::location(SkipThreshold), cl::init(12));
+
+namespace {
+
+class SIRemoveShortExecBranches : public MachineFunctionPass {
+private:
+ const SIInstrInfo *TII = nullptr;
+ bool getBlockDestinations(MachineBasicBlock &SrcMBB,
+ MachineBasicBlock *&TrueMBB,
+ MachineBasicBlock *&FalseMBB,
+ SmallVectorImpl<MachineOperand> &Cond);
+ bool mustRetainExeczBranch(const MachineBasicBlock &From,
+ const MachineBasicBlock &To) const;
+ bool removeExeczBranch(MachineInstr &MI, MachineBasicBlock &SrcMBB);
+
+public:
+ static char ID;
+
+ SIRemoveShortExecBranches() : MachineFunctionPass(ID) {
+ initializeSIRemoveShortExecBranchesPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+};
+
+} // End anonymous namespace.
+
+INITIALIZE_PASS(SIRemoveShortExecBranches, DEBUG_TYPE,
+ "SI remove short exec branches", false, false)
+
+char SIRemoveShortExecBranches::ID = 0;
+
+char &llvm::SIRemoveShortExecBranchesID = SIRemoveShortExecBranches::ID;
+
+bool SIRemoveShortExecBranches::getBlockDestinations(
+ MachineBasicBlock &SrcMBB, MachineBasicBlock *&TrueMBB,
+ MachineBasicBlock *&FalseMBB, SmallVectorImpl<MachineOperand> &Cond) {
+ if (TII->analyzeBranch(SrcMBB, TrueMBB, FalseMBB, Cond))
+ return false;
+
+ if (!FalseMBB)
+ FalseMBB = SrcMBB.getNextNode();
+
+ return true;
+}
+
+bool SIRemoveShortExecBranches::mustRetainExeczBranch(
+ const MachineBasicBlock &From, const MachineBasicBlock &To) const {
+ unsigned NumInstr = 0;
+ const MachineFunction *MF = From.getParent();
+
+ for (MachineFunction::const_iterator MBBI(&From), ToI(&To), End = MF->end();
+ MBBI != End && MBBI != ToI; ++MBBI) {
+ const MachineBasicBlock &MBB = *MBBI;
+
+ for (MachineBasicBlock::const_iterator I = MBB.begin(), E = MBB.end();
+ I != E; ++I) {
+ // When a uniform loop is inside non-uniform control flow, the branch
+ // leaving the loop might be an S_CBRANCH_VCCNZ, which is never taken
+ // when EXEC = 0. We should skip the loop lest it becomes infinite.
+ if (I->getOpcode() == AMDGPU::S_CBRANCH_VCCNZ ||
+ I->getOpcode() == AMDGPU::S_CBRANCH_VCCZ)
+ return true;
+
+ if (TII->hasUnwantedEffectsWhenEXECEmpty(*I))
+ return true;
+
+ // These instructions are potentially expensive even if EXEC = 0.
+ if (TII->isSMRD(*I) || TII->isVMEM(*I) || TII->isFLAT(*I) ||
+ I->getOpcode() == AMDGPU::S_WAITCNT)
+ return true;
+
+ ++NumInstr;
+ if (NumInstr >= SkipThreshold)
+ return true;
+ }
+ }
+
+ return false;
+}
+
+// Returns true if the skip branch instruction is removed.
+bool SIRemoveShortExecBranches::removeExeczBranch(MachineInstr &MI,
+ MachineBasicBlock &SrcMBB) {
+ MachineBasicBlock *TrueMBB = nullptr;
+ MachineBasicBlock *FalseMBB = nullptr;
+ SmallVector<MachineOperand, 1> Cond;
+
+ if (!getBlockDestinations(SrcMBB, TrueMBB, FalseMBB, Cond))
+ return false;
+
+ // Consider only the forward branches.
+ if ((SrcMBB.getNumber() >= TrueMBB->getNumber()) ||
+ mustRetainExeczBranch(*FalseMBB, *TrueMBB))
+ return false;
+
+ LLVM_DEBUG(dbgs() << "Removing the execz branch: " << MI);
+ MI.eraseFromParent();
+ SrcMBB.removeSuccessor(TrueMBB);
+
+ return true;
+}
+
+bool SIRemoveShortExecBranches::runOnMachineFunction(MachineFunction &MF) {
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+ TII = ST.getInstrInfo();
+ MF.RenumberBlocks();
+ bool Changed = false;
+
+ for (MachineBasicBlock &MBB : MF) {
+ MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
+ if (MBBI == MBB.end())
+ continue;
+
+ MachineInstr &MI = *MBBI;
+ switch (MI.getOpcode()) {
+ case AMDGPU::S_CBRANCH_EXECZ:
+ Changed = removeExeczBranch(MI, MBB);
+ break;
+ default:
+ break;
+ }
+ }
+
+ return Changed;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
index 8afca2cdc325..3986ca6dfa81 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
@@ -603,8 +603,10 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
// =>
// s_nop (N + M)
if (MI.getOpcode() == AMDGPU::S_NOP &&
+ MI.getNumOperands() == 1 && // Don't merge with implicit operands
Next != MBB.end() &&
- (*Next).getOpcode() == AMDGPU::S_NOP) {
+ (*Next).getOpcode() == AMDGPU::S_NOP &&
+ (*Next).getNumOperands() == 1) {
MachineInstr &NextMI = *Next;
// The instruction encodes the amount to wait with an offset of 1,
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
index cb4cf68d709a..39f5df767977 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
@@ -57,9 +57,9 @@
#include "AMDGPU.h"
#include "AMDGPUSubtarget.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SmallVector.h"
@@ -77,6 +77,7 @@
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/DebugLoc.h"
+#include "llvm/InitializePasses.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
@@ -155,6 +156,7 @@ private:
DenseMap<const MachineInstr *, InstrInfo> Instructions;
DenseMap<MachineBasicBlock *, BlockInfo> Blocks;
SmallVector<MachineInstr *, 1> LiveMaskQueries;
+ SmallVector<MachineInstr *, 4> LowerToMovInstrs;
SmallVector<MachineInstr *, 4> LowerToCopyInstrs;
void printInfo();
@@ -351,7 +353,7 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF,
// inactive lanes.
markInstructionUses(MI, StateWWM, Worklist);
GlobalFlags |= StateWWM;
- LowerToCopyInstrs.push_back(&MI);
+ LowerToMovInstrs.push_back(&MI);
continue;
} else if (Opcode == AMDGPU::V_SET_INACTIVE_B32 ||
Opcode == AMDGPU::V_SET_INACTIVE_B64) {
@@ -851,9 +853,8 @@ void SIWholeQuadMode::lowerLiveMaskQueries(unsigned LiveMaskReg) {
}
void SIWholeQuadMode::lowerCopyInstrs() {
- for (MachineInstr *MI : LowerToCopyInstrs) {
- for (unsigned i = MI->getNumExplicitOperands() - 1; i > 1; i--)
- MI->RemoveOperand(i);
+ for (MachineInstr *MI : LowerToMovInstrs) {
+ assert(MI->getNumExplicitOperands() == 2);
const Register Reg = MI->getOperand(0).getReg();
@@ -871,6 +872,22 @@ void SIWholeQuadMode::lowerCopyInstrs() {
MI->setDesc(TII->get(AMDGPU::COPY));
}
}
+ for (MachineInstr *MI : LowerToCopyInstrs) {
+ if (MI->getOpcode() == AMDGPU::V_SET_INACTIVE_B32 ||
+ MI->getOpcode() == AMDGPU::V_SET_INACTIVE_B64) {
+ assert(MI->getNumExplicitOperands() == 3);
+ // the only reason we should be here is V_SET_INACTIVE has
+ // an undef input so it is being replaced by a simple copy.
+ // There should be a second undef source that we should remove.
+ assert(MI->getOperand(2).isUndef());
+ MI->RemoveOperand(2);
+ MI->untieRegOperand(1);
+ } else {
+ assert(MI->getNumExplicitOperands() == 2);
+ }
+
+ MI->setDesc(TII->get(AMDGPU::COPY));
+ }
}
bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
@@ -878,6 +895,7 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
Blocks.clear();
LiveMaskQueries.clear();
LowerToCopyInstrs.clear();
+ LowerToMovInstrs.clear();
CallingConv = MF.getFunction().getCallingConv();
ST = &MF.getSubtarget<GCNSubtarget>();
@@ -892,7 +910,7 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
unsigned Exec = ST->isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
if (!(GlobalFlags & StateWQM)) {
lowerLiveMaskQueries(Exec);
- if (!(GlobalFlags & StateWWM) && LowerToCopyInstrs.empty())
+ if (!(GlobalFlags & StateWWM) && LowerToCopyInstrs.empty() && LowerToMovInstrs.empty())
return !LiveMaskQueries.empty();
} else {
// Store a copy of the original live mask when required
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SMInstructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SMInstructions.td
index 1a74ebbf8165..79982d96c2c8 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SMInstructions.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SMInstructions.td
@@ -417,7 +417,7 @@ class SMRD_Real_si <bits<5> op, SM_Pseudo ps>
, SIMCInstr<ps.PseudoInstr, SIEncodingFamily.SI>
, Enc32 {
- let AssemblerPredicates = [isGFX6GFX7];
+ let AssemblerPredicate = isGFX6GFX7;
let DecoderNamespace = "GFX6GFX7";
let Inst{7-0} = !if(ps.has_offset, offset{7-0}, ?);
@@ -471,7 +471,7 @@ class SMEM_Real_vi <bits<8> op, SM_Pseudo ps>
, Enc64 {
bit glc;
- let AssemblerPredicates = [isGFX8GFX9];
+ let AssemblerPredicate = isGFX8GFX9;
let DecoderNamespace = "GFX8";
let Inst{5-0} = !if(ps.has_sbase, sbase{6-1}, ?);
@@ -660,7 +660,7 @@ class SMRD_Real_Load_IMM_ci <bits<5> op, SM_Load_Pseudo ps> :
SM_Real<ps>,
Enc64 {
- let AssemblerPredicates = [isGFX7Only];
+ let AssemblerPredicate = isGFX7Only;
let DecoderNamespace = "GFX7";
let InOperandList = (ins ps.BaseClass:$sbase, smrd_literal_offset:$offset, GLC:$glc, DLC:$dlc);
@@ -697,7 +697,7 @@ class SMRD_Real_ci <bits<5> op, SM_Pseudo ps>
, SIMCInstr<ps.PseudoInstr, SIEncodingFamily.SI>
, Enc32 {
- let AssemblerPredicates = [isGFX7Only];
+ let AssemblerPredicate = isGFX7Only;
let DecoderNamespace = "GFX7";
let Inst{7-0} = !if(ps.has_offset, offset{7-0}, ?);
@@ -835,7 +835,7 @@ class SMEM_Real_gfx10<bits<8> op, SM_Pseudo ps> :
bit glc;
bit dlc;
- let AssemblerPredicates = [isGFX10Plus];
+ let AssemblerPredicate = isGFX10Plus;
let DecoderNamespace = "GFX10";
let Inst{5-0} = !if(ps.has_sbase, sbase{6-1}, ?);
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SOPInstructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SOPInstructions.td
index d31a49f428ee..73ba2ae367f7 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -85,6 +85,11 @@ class SOP1_32 <string opName, list<dag> pattern=[], bit tied_in = 0> : SOP1_Pseu
let Constraints = !if(tied_in, "$sdst = $sdst_in", "");
}
+// Only register input allowed.
+class SOP1_32R <string opName, list<dag> pattern=[]> : SOP1_Pseudo <
+ opName, (outs SReg_32:$sdst), (ins SReg_32:$src0),
+ "$sdst, $src0", pattern>;
+
// 32-bit input, no output.
class SOP1_0_32 <string opName, list<dag> pattern = []> : SOP1_Pseudo <
opName, (outs), (ins SSrc_b32:$src0),
@@ -103,6 +108,12 @@ class SOP1_64 <string opName, list<dag> pattern=[]> : SOP1_Pseudo <
"$sdst, $src0", pattern
>;
+// Only register input allowed.
+class SOP1_64R <string opName, list<dag> pattern=[]> : SOP1_Pseudo <
+ opName, (outs SReg_64:$sdst), (ins SReg_64:$src0),
+ "$sdst, $src0", pattern
+>;
+
// 64-bit input, 32-bit output.
class SOP1_32_64 <string opName, list<dag> pattern=[]> : SOP1_Pseudo <
opName, (outs SReg_32:$sdst), (ins SSrc_b64:$src0),
@@ -159,14 +170,14 @@ let Defs = [SCC] in {
let WaveSizePredicate = isWave32 in {
def : GCNPat <
(int_amdgcn_wqm_vote i1:$src0),
- (S_WQM_B32 $src0)
+ (S_WQM_B32 SSrc_b32:$src0)
>;
}
let WaveSizePredicate = isWave64 in {
def : GCNPat <
(int_amdgcn_wqm_vote i1:$src0),
- (S_WQM_B64 $src0)
+ (S_WQM_B64 SSrc_b64:$src0)
>;
}
@@ -254,8 +265,8 @@ def S_QUADMASK_B32 : SOP1_32 <"s_quadmask_b32">;
def S_QUADMASK_B64 : SOP1_64 <"s_quadmask_b64">;
let Uses = [M0] in {
-def S_MOVRELS_B32 : SOP1_32 <"s_movrels_b32">;
-def S_MOVRELS_B64 : SOP1_64 <"s_movrels_b64">;
+def S_MOVRELS_B32 : SOP1_32R <"s_movrels_b32">;
+def S_MOVRELS_B64 : SOP1_64R <"s_movrels_b64">;
def S_MOVRELD_B32 : SOP1_32 <"s_movreld_b32">;
def S_MOVRELD_B64 : SOP1_64 <"s_movreld_b64">;
} // End Uses = [M0]
@@ -1202,7 +1213,7 @@ def : GCNPat <
def : GCNPat <
(i32 (smax i32:$x, (i32 (ineg i32:$x)))),
- (S_ABS_I32 $x)
+ (S_ABS_I32 SReg_32:$x)
>;
def : GCNPat <
@@ -1503,7 +1514,7 @@ defm S_SETREG_IMM32_B32 : SOPK_Real64_gfx6_gfx7_gfx10<0x015>;
class Select_vi<string opName> :
SIMCInstr<opName, SIEncodingFamily.VI> {
- list<Predicate> AssemblerPredicates = [isGFX8GFX9];
+ Predicate AssemblerPredicate = isGFX8GFX9;
string DecoderNamespace = "GFX8";
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/TargetInfo/AMDGPUTargetInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/TargetInfo/AMDGPUTargetInfo.cpp
index 30cf12337c6e..9ec437760c0a 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/TargetInfo/AMDGPUTargetInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/TargetInfo/AMDGPUTargetInfo.cpp
@@ -28,7 +28,7 @@ Target &llvm::getTheGCNTarget() {
}
/// Extern function to initialize the targets for the AMDGPU backend
-extern "C" void LLVMInitializeAMDGPUTargetInfo() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTargetInfo() {
RegisterTarget<Triple::r600, false> R600(getTheAMDGPUTarget(), "r600",
"AMD GPUs HD2XXX-HD6XXX", "AMDGPU");
RegisterTarget<Triple::amdgcn, false> GCN(getTheGCNTarget(), "amdgcn",
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index afb2fd987afd..5271bc3aacc6 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -7,10 +7,10 @@
//===----------------------------------------------------------------------===//
#include "AMDGPUBaseInfo.h"
-#include "AMDGPUTargetTransformInfo.h"
#include "AMDGPU.h"
-#include "SIDefines.h"
#include "AMDGPUAsmUtils.h"
+#include "AMDGPUTargetTransformInfo.h"
+#include "SIDefines.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
#include "llvm/BinaryFormat/ELF.h"
@@ -20,6 +20,8 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/Instruction.h"
+#include "llvm/IR/IntrinsicsAMDGPU.h"
+#include "llvm/IR/IntrinsicsR600.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/MC/MCContext.h"
@@ -312,7 +314,8 @@ unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI) {
}
unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI) {
- return 2048;
+ // Some subtargets allow encoding 2048, but this isn't tested or supported.
+ return 1024;
}
unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI,
@@ -542,16 +545,17 @@ amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor(
}
bool isGroupSegment(const GlobalValue *GV) {
- return GV->getType()->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
+ return GV->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
}
bool isGlobalSegment(const GlobalValue *GV) {
- return GV->getType()->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
+ return GV->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
}
bool isReadOnlySegment(const GlobalValue *GV) {
- return GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
- GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT;
+ unsigned AS = GV->getAddressSpace();
+ return AS == AMDGPUAS::CONSTANT_ADDRESS ||
+ AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT;
}
bool shouldEmitConstantsToTextSection(const Triple &TT) {
@@ -1301,7 +1305,8 @@ bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset,
return true;
}
-SIModeRegisterDefaults::SIModeRegisterDefaults(const Function &F) {
+SIModeRegisterDefaults::SIModeRegisterDefaults(const Function &F,
+ const GCNSubtarget &ST) {
*this = getDefaultForCallingConv(F.getCallingConv());
StringRef IEEEAttr = F.getFnAttribute("amdgpu-ieee").getValueAsString();
@@ -1312,6 +1317,9 @@ SIModeRegisterDefaults::SIModeRegisterDefaults(const Function &F) {
= F.getFnAttribute("amdgpu-dx10-clamp").getValueAsString();
if (!DX10ClampAttr.empty())
DX10Clamp = DX10ClampAttr == "true";
+
+ FP32Denormals = ST.hasFP32Denormals(F);
+ FP64FP16Denormals = ST.hasFP64FP16Denormals(F);
}
namespace {
@@ -1322,6 +1330,8 @@ struct SourceOfDivergence {
const SourceOfDivergence *lookupSourceOfDivergence(unsigned Intr);
#define GET_SourcesOfDivergence_IMPL
+#define GET_Gfx9BufferFormat_IMPL
+#define GET_Gfx10PlusBufferFormat_IMPL
#include "AMDGPUGenSearchableTables.inc"
} // end anonymous namespace
@@ -1330,5 +1340,21 @@ bool isIntrinsicSourceOfDivergence(unsigned IntrID) {
return lookupSourceOfDivergence(IntrID);
}
+const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp,
+ uint8_t NumComponents,
+ uint8_t NumFormat,
+ const MCSubtargetInfo &STI) {
+ return isGFX10(STI)
+ ? getGfx10PlusBufferFormatInfo(BitsPerComp, NumComponents,
+ NumFormat)
+ : getGfx9BufferFormatInfo(BitsPerComp, NumComponents, NumFormat);
+}
+
+const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format,
+ const MCSubtargetInfo &STI) {
+ return isGFX10(STI) ? getGfx10PlusBufferFormatInfo(Format)
+ : getGfx9BufferFormatInfo(Format);
+}
+
} // namespace AMDGPU
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index f78dadd447ff..a5bada2890d2 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -41,6 +41,14 @@ class Triple;
namespace AMDGPU {
+struct GcnBufferFormatInfo {
+ unsigned Format;
+ unsigned BitsPerComp;
+ unsigned NumComponents;
+ unsigned NumFormat;
+ unsigned DataFormat;
+};
+
#define GET_MIMGBaseOpcode_DECL
#define GET_MIMGDim_DECL
#define GET_MIMGEncoding_DECL
@@ -300,6 +308,15 @@ LLVM_READONLY
bool getMUBUFHasSoffset(unsigned Opc);
LLVM_READONLY
+const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp,
+ uint8_t NumComponents,
+ uint8_t NumFormat,
+ const MCSubtargetInfo &STI);
+LLVM_READONLY
+const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format,
+ const MCSubtargetInfo &STI);
+
+LLVM_READONLY
int getMCOpcode(uint16_t Opcode, unsigned Gen);
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
@@ -646,7 +663,6 @@ bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset,
/// \returns true if the intrinsic is divergent
bool isIntrinsicSourceOfDivergence(unsigned IntrID);
-
// Track defaults for fields in the MODE registser.
struct SIModeRegisterDefaults {
/// Floating point opcodes that support exception flag gathering quiet and
@@ -659,29 +675,60 @@ struct SIModeRegisterDefaults {
/// clamp NaN to zero; otherwise, pass NaN through.
bool DX10Clamp : 1;
- // TODO: FP mode fields
+ /// If this is set, neither input or output denormals are flushed for most f32
+ /// instructions.
+ ///
+ /// TODO: Split into separate input and output fields if necessary like the
+ /// control bits really provide?
+ bool FP32Denormals : 1;
+
+ /// If this is set, neither input or output denormals are flushed for both f64
+ /// and f16/v2f16 instructions.
+ bool FP64FP16Denormals : 1;
SIModeRegisterDefaults() :
IEEE(true),
- DX10Clamp(true) {}
+ DX10Clamp(true),
+ FP32Denormals(true),
+ FP64FP16Denormals(true) {}
- SIModeRegisterDefaults(const Function &F);
+ // FIXME: Should not depend on the subtarget
+ SIModeRegisterDefaults(const Function &F, const GCNSubtarget &ST);
static SIModeRegisterDefaults getDefaultForCallingConv(CallingConv::ID CC) {
+ const bool IsCompute = AMDGPU::isCompute(CC);
+
SIModeRegisterDefaults Mode;
Mode.DX10Clamp = true;
- Mode.IEEE = AMDGPU::isCompute(CC);
+ Mode.IEEE = IsCompute;
+ Mode.FP32Denormals = false; // FIXME: Should be on by default.
+ Mode.FP64FP16Denormals = true;
return Mode;
}
bool operator ==(const SIModeRegisterDefaults Other) const {
- return IEEE == Other.IEEE && DX10Clamp == Other.DX10Clamp;
+ return IEEE == Other.IEEE && DX10Clamp == Other.DX10Clamp &&
+ FP32Denormals == Other.FP32Denormals &&
+ FP64FP16Denormals == Other.FP64FP16Denormals;
+ }
+
+ /// Returns true if a flag is compatible if it's enabled in the callee, but
+ /// disabled in the caller.
+ static bool oneWayCompatible(bool CallerMode, bool CalleeMode) {
+ return CallerMode == CalleeMode || (CallerMode && !CalleeMode);
}
// FIXME: Inlining should be OK for dx10-clamp, since the caller's mode should
// be able to override.
bool isInlineCompatible(SIModeRegisterDefaults CalleeMode) const {
- return *this == CalleeMode;
+ if (DX10Clamp != CalleeMode.DX10Clamp)
+ return false;
+ if (IEEE != CalleeMode.IEEE)
+ return false;
+
+ // Allow inlining denormals enabled into denormals flushed functions.
+ return oneWayCompatible(FP64FP16Denormals, CalleeMode.FP64FP16Denormals) &&
+ oneWayCompatible(FP32Denormals, CalleeMode.FP32Denormals);
}
};
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index f1cdc3097dc0..c7aed0985540 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -156,7 +156,7 @@ def V_READFIRSTLANE_B32 :
InstSI <(outs SReg_32:$vdst),
(ins VRegOrLds_32:$src0),
"v_readfirstlane_b32 $vdst, $src0",
- [(set i32:$vdst, (int_amdgcn_readfirstlane i32:$src0))]>,
+ [(set i32:$vdst, (int_amdgcn_readfirstlane (i32 VRegOrLds_32:$src0)))]>,
Enc32 {
let isCodeGenOnly = 0;
@@ -260,62 +260,58 @@ defm V_CLREXCP : VOP1Inst <"v_clrexcp", VOP_NO_EXT<VOP_NONE>>;
}
// Restrict src0 to be VGPR
-def VOP_I32_VI32_NO_EXT : VOPProfile<[i32, i32, untyped, untyped]> {
+def VOP_MOVRELS : VOPProfile<[i32, i32, untyped, untyped]> {
let Src0RC32 = VRegSrc_32;
let Src0RC64 = VRegSrc_32;
-
- let HasExt = 0;
- let HasExtDPP = 0;
- let HasExtSDWA = 0;
- let HasExtSDWA9 = 0;
}
// Special case because there are no true output operands. Hack vdst
// to be a src operand. The custom inserter must add a tied implicit
// def and use of the super register since there seems to be no way to
// add an implicit def of a virtual register in tablegen.
-def VOP_MOVRELD : VOPProfile<[untyped, i32, untyped, untyped]> {
+class VOP_MOVREL<RegisterOperand Src1RC> : VOPProfile<[untyped, i32, untyped, untyped]> {
let Src0RC32 = VOPDstOperand<VGPR_32>;
let Src0RC64 = VOPDstOperand<VGPR_32>;
let Outs = (outs);
- let Ins32 = (ins Src0RC32:$vdst, VSrc_b32:$src0);
- let Ins64 = (ins Src0RC64:$vdst, VSrc_b32:$src0);
- let InsDPP = (ins DstRC:$vdst, DstRC:$old, Src0RC32:$src0,
- dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
- bank_mask:$bank_mask, bound_ctrl:$bound_ctrl);
- let InsDPP16 = !con(InsDPP, (ins FI:$fi));
-
- let InsSDWA = (ins Src0RC32:$vdst, Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0,
- clampmod:$clamp, omod:$omod, dst_sel:$dst_sel, dst_unused:$dst_unused,
- src0_sel:$src0_sel);
-
+ let Ins32 = (ins Src0RC32:$vdst, Src1RC:$src0);
+ let Ins64 = (ins Src0RC64:$vdst, Src1RC:$src0);
let Asm32 = getAsm32<1, 1>.ret;
let Asm64 = getAsm64<1, 1, 0, 0, 1>.ret;
- let AsmDPP = getAsmDPP<1, 1, 0>.ret;
- let AsmDPP16 = getAsmDPP16<1, 1, 0>.ret;
- let AsmSDWA = getAsmSDWA<1, 1>.ret;
+
+ let OutsSDWA = (outs Src0RC32:$vdst);
+ let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0,
+ clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused,
+ src0_sel:$src0_sel);
let AsmSDWA9 = getAsmSDWA9<1, 0, 1>.ret;
- let HasExt = 0;
- let HasExtDPP = 0;
- let HasExtSDWA = 0;
- let HasExtSDWA9 = 0;
+ let OutsDPP = (outs Src0RC32:$vdst);
+ let InsDPP16 = (ins Src0RC32:$old, Src0RC32:$src0,
+ dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
+ bank_mask:$bank_mask, bound_ctrl:$bound_ctrl, FI:$fi);
+ let AsmDPP16 = getAsmDPP16<1, 1, 0>.ret;
+
+ let OutsDPP8 = (outs Src0RC32:$vdst);
+ let InsDPP8 = (ins Src0RC32:$old, Src0RC32:$src0, dpp8:$dpp8, FI:$fi);
+ let AsmDPP8 = getAsmDPP8<1, 1, 0>.ret;
let HasDst = 0;
let EmitDst = 1; // force vdst emission
}
+def VOP_MOVRELD : VOP_MOVREL<VSrc_b32>;
+def VOP_MOVRELSD : VOP_MOVREL<VRegSrc_32>;
+
let SubtargetPredicate = HasMovrel, Uses = [M0, EXEC] in {
-// v_movreld_b32 is a special case because the destination output
+ // v_movreld_b32 is a special case because the destination output
// register is really a source. It isn't actually read (but may be
// written), and is only to provide the base register to start
// indexing from. Tablegen seems to not let you define an implicit
// virtual register output for the super register being written into,
// so this must have an implicit def of the register added to it.
defm V_MOVRELD_B32 : VOP1Inst <"v_movreld_b32", VOP_MOVRELD>;
-defm V_MOVRELS_B32 : VOP1Inst <"v_movrels_b32", VOP_I32_VI32_NO_EXT>;
-defm V_MOVRELSD_B32 : VOP1Inst <"v_movrelsd_b32", VOP_NO_EXT<VOP_I32_I32>>;
+defm V_MOVRELS_B32 : VOP1Inst <"v_movrels_b32", VOP_MOVRELS>;
+defm V_MOVRELSD_B32 : VOP1Inst <"v_movrelsd_b32", VOP_MOVRELSD>;
} // End Uses = [M0, EXEC]
defm V_MOV_FED_B32 : VOP1Inst <"v_mov_fed_b32", VOP_I32_I32>;
@@ -430,9 +426,8 @@ let SubtargetPredicate = isGFX10Plus in {
defm V_PIPEFLUSH : VOP1Inst<"v_pipeflush", VOP_NONE>;
let Uses = [M0] in {
- // FIXME-GFX10: Should V_MOVRELSD_2_B32 be VOP_NO_EXT?
defm V_MOVRELSD_2_B32 :
- VOP1Inst<"v_movrelsd_2_b32", VOP_NO_EXT<VOP_I32_I32>>;
+ VOP1Inst<"v_movrelsd_2_b32", VOP_MOVRELSD>;
def V_SWAPREL_B32 : VOP1_Pseudo<"v_swaprel_b32", VOP_SWAP_I32, [], 1> {
let Constraints = "$vdst = $src1, $vdst1 = $src0";
@@ -526,16 +521,10 @@ let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in {
}
} // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10"
-multiclass VOP1_Real_gfx10_no_dpp<bits<9> op> :
- VOP1_Real_e32_gfx10<op>, VOP1_Real_e64_gfx10<op>,
- VOP1_Real_sdwa_gfx10<op>;
-
-multiclass VOP1_Real_gfx10_no_dpp8<bits<9> op> :
- VOP1_Real_e32_gfx10<op>, VOP1_Real_e64_gfx10<op>,
- VOP1_Real_sdwa_gfx10<op>, VOP1_Real_dpp_gfx10<op>;
-
multiclass VOP1_Real_gfx10<bits<9> op> :
- VOP1_Real_gfx10_no_dpp8<op>, VOP1_Real_dpp8_gfx10<op>;
+ VOP1_Real_e32_gfx10<op>, VOP1_Real_e64_gfx10<op>,
+ VOP1_Real_sdwa_gfx10<op>, VOP1_Real_dpp_gfx10<op>,
+ VOP1_Real_dpp8_gfx10<op>;
defm V_PIPEFLUSH : VOP1_Real_gfx10<0x01b>;
defm V_MOVRELSD_2_B32 : VOP1_Real_gfx10<0x048>;
@@ -618,12 +607,6 @@ multiclass VOP1_Real_gfx6_gfx7<bits<9> op> :
multiclass VOP1_Real_gfx6_gfx7_gfx10<bits<9> op> :
VOP1_Real_gfx6_gfx7<op>, VOP1_Real_gfx10<op>;
-multiclass VOP1_Real_gfx6_gfx7_gfx10_no_dpp8<bits<9> op> :
- VOP1_Real_gfx6_gfx7<op>, VOP1_Real_gfx10_no_dpp8<op>;
-
-multiclass VOP1_Real_gfx6_gfx7_gfx10_no_dpp<bits<9> op> :
- VOP1_Real_gfx6_gfx7<op>, VOP1_Real_gfx10_no_dpp<op>;
-
defm V_LOG_CLAMP_F32 : VOP1_Real_gfx6_gfx7<0x026>;
defm V_RCP_CLAMP_F32 : VOP1_Real_gfx6_gfx7<0x028>;
defm V_RCP_LEGACY_F32 : VOP1_Real_gfx6_gfx7<0x029>;
@@ -681,9 +664,9 @@ defm V_FRACT_F64 : VOP1_Real_gfx6_gfx7_gfx10<0x03e>;
defm V_FREXP_EXP_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x03f>;
defm V_FREXP_MANT_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x040>;
defm V_CLREXCP : VOP1_Real_gfx6_gfx7_gfx10<0x041>;
-defm V_MOVRELD_B32 : VOP1_Real_gfx6_gfx7_gfx10_no_dpp<0x042>;
-defm V_MOVRELS_B32 : VOP1_Real_gfx6_gfx7_gfx10_no_dpp8<0x043>;
-defm V_MOVRELSD_B32 : VOP1_Real_gfx6_gfx7_gfx10_no_dpp8<0x044>;
+defm V_MOVRELD_B32 : VOP1_Real_gfx6_gfx7_gfx10<0x042>;
+defm V_MOVRELS_B32 : VOP1_Real_gfx6_gfx7_gfx10<0x043>;
+defm V_MOVRELSD_B32 : VOP1_Real_gfx6_gfx7_gfx10<0x044>;
//===----------------------------------------------------------------------===//
// GFX8, GFX9 (VI).
@@ -699,7 +682,7 @@ class VOP1_DPPe <bits<8> op, VOP1_DPP_Pseudo ps, VOPProfile P = ps.Pfl> :
}
multiclass VOP1Only_Real_vi <bits<10> op> {
- let AssemblerPredicates = [isGFX8GFX9], DecoderNamespace = "GFX8" in {
+ let AssemblerPredicate = isGFX8GFX9, DecoderNamespace = "GFX8" in {
def _vi :
VOP1_Real<!cast<VOP1_Pseudo>(NAME), SIEncodingFamily.VI>,
VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME).Pfl>;
@@ -707,7 +690,7 @@ multiclass VOP1Only_Real_vi <bits<10> op> {
}
multiclass VOP1_Real_e32e64_vi <bits<10> op> {
- let AssemblerPredicates = [isGFX8GFX9], DecoderNamespace = "GFX8" in {
+ let AssemblerPredicate = isGFX8GFX9, DecoderNamespace = "GFX8" in {
def _e32_vi :
VOP1_Real<!cast<VOP1_Pseudo>(NAME#"_e32"), SIEncodingFamily.VI>,
VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32").Pfl>;
@@ -899,7 +882,7 @@ def : GCNPat <
//===----------------------------------------------------------------------===//
multiclass VOP1_Real_gfx9 <bits<10> op> {
- let AssemblerPredicates = [isGFX9Only], DecoderNamespace = "GFX9" in {
+ let AssemblerPredicate = isGFX9Only, DecoderNamespace = "GFX9" in {
defm NAME : VOP1_Real_e32e64_vi <op>;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index 1ab0fc1ab58d..aaadc3dbc721 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -467,9 +467,9 @@ defm V_SUB_F32 : VOP2Inst <"v_sub_f32", VOP_F32_F32_F32, fsub>;
defm V_SUBREV_F32 : VOP2Inst <"v_subrev_f32", VOP_F32_F32_F32, null_frag, "v_sub_f32">;
defm V_MUL_LEGACY_F32 : VOP2Inst <"v_mul_legacy_f32", VOP_F32_F32_F32, AMDGPUfmul_legacy>;
defm V_MUL_F32 : VOP2Inst <"v_mul_f32", VOP_F32_F32_F32, fmul>;
-defm V_MUL_I32_I24 : VOP2Inst <"v_mul_i32_i24", VOP_PAT_GEN<VOP_I32_I32_I32, 2>, AMDGPUmul_i24>;
+defm V_MUL_I32_I24 : VOP2Inst <"v_mul_i32_i24", VOP_I32_I32_I32, AMDGPUmul_i24>;
defm V_MUL_HI_I32_I24 : VOP2Inst <"v_mul_hi_i32_i24", VOP_PAT_GEN<VOP_I32_I32_I32, 2>, AMDGPUmulhi_i24>;
-defm V_MUL_U32_U24 : VOP2Inst <"v_mul_u32_u24", VOP_PAT_GEN<VOP_I32_I32_I32, 2>, AMDGPUmul_u24>;
+defm V_MUL_U32_U24 : VOP2Inst <"v_mul_u32_u24", VOP_I32_I32_I32, AMDGPUmul_u24>;
defm V_MUL_HI_U32_U24 : VOP2Inst <"v_mul_hi_u32_u24", VOP_PAT_GEN<VOP_I32_I32_I32, 2>, AMDGPUmulhi_u24>;
defm V_MIN_F32 : VOP2Inst <"v_min_f32", VOP_F32_F32_F32, fminnum_like>;
defm V_MAX_F32 : VOP2Inst <"v_max_f32", VOP_F32_F32_F32, fmaxnum_like>;
@@ -729,7 +729,7 @@ multiclass Arithmetic_i16_0Hi_Pats <SDPatternOperator op, Instruction inst> {
def : GCNPat<
(i32 (zext (op i16:$src0, i16:$src1))),
- (inst $src0, $src1)
+ (inst VSrc_b16:$src0, VSrc_b16:$src1)
>;
def : GCNPat<
@@ -766,7 +766,22 @@ def : GCNPat <
let Predicates = [Has16BitInsts] in {
+// Undo sub x, c -> add x, -c canonicalization since c is more likely
+// an inline immediate than -c.
+// TODO: Also do for 64-bit.
+def : GCNPat<
+ (add i16:$src0, (i16 NegSubInlineConst16:$src1)),
+ (V_SUB_U16_e64 VSrc_b16:$src0, NegSubInlineConst16:$src1)
+>;
+
+
let Predicates = [Has16BitInsts, isGFX7GFX8GFX9] in {
+
+def : GCNPat<
+ (i32 (zext (add i16:$src0, (i16 NegSubInlineConst16:$src1)))),
+ (V_SUB_U16_e64 VSrc_b16:$src0, NegSubInlineConst16:$src1)
+>;
+
defm : Arithmetic_i16_0Hi_Pats<add, V_ADD_U16_e64>;
defm : Arithmetic_i16_0Hi_Pats<mul, V_MUL_LO_U16_e64>;
defm : Arithmetic_i16_0Hi_Pats<sub, V_SUB_U16_e64>;
@@ -777,7 +792,7 @@ defm : Arithmetic_i16_0Hi_Pats<umax, V_MAX_U16_e64>;
defm : Arithmetic_i16_0Hi_Pats<lshl_rev, V_LSHLREV_B16_e64>;
defm : Arithmetic_i16_0Hi_Pats<lshr_rev, V_LSHRREV_B16_e64>;
defm : Arithmetic_i16_0Hi_Pats<ashr_rev, V_ASHRREV_I16_e64>;
-}
+} // End Predicates = [Has16BitInsts, isGFX7GFX8GFX9]
def : ZExt_i16_i1_Pat<zext>;
def : ZExt_i16_i1_Pat<anyext>;
@@ -788,15 +803,7 @@ def : GCNPat <
/*src1mod*/(i32 0), /*src1*/(i32 -1), $src)
>;
-// Undo sub x, c -> add x, -c canonicalization since c is more likely
-// an inline immediate than -c.
-// TODO: Also do for 64-bit.
-def : GCNPat<
- (add i16:$src0, (i16 NegSubInlineConst16:$src1)),
- (V_SUB_U16_e64 $src0, NegSubInlineConst16:$src1)
->;
-
-} // End Predicates = [Has16BitInsts, isGFX7GFX8GFX9]
+} // End Predicates = [Has16BitInsts]
//===----------------------------------------------------------------------===//
@@ -1282,7 +1289,7 @@ defm V_MADAK_F32 : VOP2Only_Real_MADK_gfx6_gfx7_gfx10<0x021>;
// GFX8, GFX9 (VI).
//===----------------------------------------------------------------------===//
-let AssemblerPredicates = [isGFX8GFX9], DecoderNamespace = "GFX8" in {
+let AssemblerPredicate = isGFX8GFX9, DecoderNamespace = "GFX8" in {
multiclass VOP2_Real_MADK_vi <bits<6> op> {
def _vi : VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.VI>,
@@ -1316,7 +1323,7 @@ multiclass Base_VOP2_Real_e32e64_vi <bits<6> op> :
VOP2_Real_e32_vi<op>,
VOP2_Real_e64_vi<{0, 1, 0, 0, op{5-0}}>;
-} // End AssemblerPredicates = [isGFX8GFX9], DecoderNamespace = "GFX8"
+} // End AssemblerPredicate = isGFX8GFX9, DecoderNamespace = "GFX8"
multiclass VOP2_SDWA_Real <bits<6> op> {
foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA>.ret in
@@ -1332,7 +1339,7 @@ multiclass VOP2_SDWA9_Real <bits<6> op> {
VOP2_SDWA9Ae <op{5-0}, !cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa").Pfl>;
}
-let AssemblerPredicates = [isGFX8Only] in {
+let AssemblerPredicate = isGFX8Only in {
multiclass VOP2be_Real_e32e64_vi_only <bits<6> op, string OpName, string AsmName> {
def _e32_vi :
@@ -1366,7 +1373,7 @@ multiclass VOP2be_Real_e32e64_vi_only <bits<6> op, string OpName, string AsmName
}
}
-let AssemblerPredicates = [isGFX9Only] in {
+let AssemblerPredicate = isGFX9Only in {
multiclass VOP2be_Real_e32e64_gfx9 <bits<6> op, string OpName, string AsmName> {
def _e32_gfx9 :
@@ -1424,7 +1431,7 @@ multiclass VOP2_Real_e32e64_gfx9 <bits<6> op> {
}
}
-} // AssemblerPredicates = [isGFX9Only]
+} // AssemblerPredicate = isGFX9Only
multiclass VOP2_Real_e32e64_vi <bits<6> op> :
Base_VOP2_Real_e32e64_vi<op>, VOP2_SDWA_Real<op>, VOP2_SDWA9_Real<op> {
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 605425972b1c..67c8b926302d 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -258,15 +258,15 @@ class getInterp16Ins <bit HasSrc2, bit HasOMod,
(ins Src0Mod:$src0_modifiers, VRegSrc_32:$src0,
Attr:$attr, AttrChan:$attrchan,
Src2Mod:$src2_modifiers, VRegSrc_32:$src2,
- highmod:$high, clampmod:$clamp, omod:$omod),
+ highmod:$high, clampmod0:$clamp, omod0:$omod),
(ins Src0Mod:$src0_modifiers, VRegSrc_32:$src0,
Attr:$attr, AttrChan:$attrchan,
Src2Mod:$src2_modifiers, VRegSrc_32:$src2,
- highmod:$high, clampmod:$clamp)
+ highmod:$high, clampmod0:$clamp)
),
(ins Src0Mod:$src0_modifiers, VRegSrc_32:$src0,
Attr:$attr, AttrChan:$attrchan,
- highmod:$high, clampmod:$clamp, omod:$omod)
+ highmod:$high, clampmod0:$clamp, omod0:$omod)
);
}
@@ -452,14 +452,16 @@ def V_MAD_I16 : VOP3Inst <"v_mad_i16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_CL
let FPDPRounding = 1 in {
def V_MAD_F16 : VOP3Inst <"v_mad_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, fmad>;
let Uses = [M0, EXEC] in {
+// For some reason the intrinsic operands are in a different order
+// from the instruction operands.
def V_INTERP_P2_F16 : VOP3Interp <"v_interp_p2_f16", VOP3_INTERP16<[f16, f32, i32, f32]>,
- [(set f16:$vdst, (AMDGPUinterp_p2_f16 f32:$src0, (i32 timm:$attrchan),
- (i32 timm:$attr),
- (i32 timm:$src0_modifiers),
- (f32 VRegSrc_32:$src2),
- (i32 timm:$src2_modifiers),
- (i1 timm:$high),
- (i1 timm:$clamp)))]>;
+ [(set f16:$vdst,
+ (int_amdgcn_interp_p2_f16 (VOP3Mods f32:$src2, i32:$src2_modifiers),
+ (VOP3Mods f32:$src0, i32:$src0_modifiers),
+ (i32 timm:$attrchan),
+ (i32 timm:$attr),
+ (i1 timm:$high),
+ M0))]>;
} // End Uses = [M0, EXEC]
} // End FPDPRounding = 1
} // End renamedInGFX9 = 1
@@ -497,11 +499,11 @@ def V_INTERP_P1LV_F16 : VOP3Interp <"v_interp_p1lv_f16", VOP3_INTERP16<[f32, f32
} // End SubtargetPredicate = Has16BitInsts, isCommutable = 1
-let SubtargetPredicate = isGFX8GFX9 in {
+let SubtargetPredicate = isGFX8Plus, Uses = [M0, EXEC] in {
def V_INTERP_P1_F32_e64 : VOP3Interp <"v_interp_p1_f32", VOP3_INTERP>;
def V_INTERP_P2_F32_e64 : VOP3Interp <"v_interp_p2_f32", VOP3_INTERP>;
def V_INTERP_MOV_F32_e64 : VOP3Interp <"v_interp_mov_f32", VOP3_INTERP_MOV>;
-} // End SubtargetPredicate = isGFX8GFX9
+} // End SubtargetPredicate = isGFX8Plus, Uses = [M0, EXEC]
let Predicates = [Has16BitInsts, isGFX6GFX7GFX8GFX9] in {
@@ -770,6 +772,10 @@ defm V_SUB_NC_I32 :
defm V_ADD_NC_I32 :
VOP3_Real_gfx10_with_name<0x37f, "V_ADD_I32_gfx9", "v_add_nc_i32">;
+defm V_INTERP_P1_F32_e64 : VOP3Interp_Real_gfx10<0x200>;
+defm V_INTERP_P2_F32_e64 : VOP3Interp_Real_gfx10<0x201>;
+defm V_INTERP_MOV_F32_e64 : VOP3Interp_Real_gfx10<0x202>;
+
defm V_INTERP_P1LL_F16 : VOP3Interp_Real_gfx10<0x342>;
defm V_INTERP_P1LV_F16 : VOP3Interp_Real_gfx10<0x343>;
defm V_INTERP_P2_F16 : VOP3Interp_Real_gfx10<0x35a>;
@@ -923,7 +929,7 @@ defm V_DIV_SCALE_F64 : VOP3be_Real_gfx6_gfx7_gfx10<0x16e>;
// GFX8, GFX9 (VI).
//===----------------------------------------------------------------------===//
-let AssemblerPredicates = [isGFX8GFX9], DecoderNamespace = "GFX8" in {
+let AssemblerPredicate = isGFX8GFX9, DecoderNamespace = "GFX8" in {
multiclass VOP3_Real_vi<bits<10> op> {
def _vi : VOP3_Real<!cast<VOP_Pseudo>(NAME), SIEncodingFamily.VI>,
@@ -945,9 +951,9 @@ multiclass VOP3Interp_Real_vi<bits<10> op> {
VOP3Interp_vi <op, !cast<VOP_Pseudo>(NAME).Pfl>;
}
-} // End AssemblerPredicates = [isGFX8GFX9], DecoderNamespace = "GFX8"
+} // End AssemblerPredicate = isGFX8GFX9, DecoderNamespace = "GFX8"
-let AssemblerPredicates = [isGFX8Only], DecoderNamespace = "GFX8" in {
+let AssemblerPredicate = isGFX8Only, DecoderNamespace = "GFX8" in {
multiclass VOP3_F16_Real_vi<bits<10> op> {
def _vi : VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.VI>,
@@ -959,9 +965,9 @@ multiclass VOP3Interp_F16_Real_vi<bits<10> op> {
VOP3Interp_vi <op, !cast<VOP3_Pseudo>(NAME).Pfl>;
}
-} // End AssemblerPredicates = [isGFX8Only], DecoderNamespace = "GFX8"
+} // End AssemblerPredicate = isGFX8Only, DecoderNamespace = "GFX8"
-let AssemblerPredicates = [isGFX9Only], DecoderNamespace = "GFX9" in {
+let AssemblerPredicate = isGFX9Only, DecoderNamespace = "GFX9" in {
multiclass VOP3_F16_Real_gfx9<bits<10> op, string OpName, string AsmName> {
def _gfx9 : VOP3_Real<!cast<VOP3_Pseudo>(OpName), SIEncodingFamily.GFX9>,
@@ -995,7 +1001,7 @@ multiclass VOP3_Real_gfx9<bits<10> op, string AsmName> {
}
}
-} // End AssemblerPredicates = [isGFX9Only], DecoderNamespace = "GFX9"
+} // End AssemblerPredicate = isGFX9Only, DecoderNamespace = "GFX9"
defm V_MAD_U64_U32 : VOP3be_Real_vi <0x1E8>;
defm V_MAD_I64_I32 : VOP3be_Real_vi <0x1E9>;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
index 0c13f39fec02..933acc2278fd 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -404,7 +404,7 @@ def : MnemonicAlias<"v_accvgpr_write", "v_accvgpr_write_b32">;
multiclass VOP3P_Real_vi<bits<10> op> {
def _vi : VOP3P_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.VI>,
VOP3Pe <op, !cast<VOP3_Pseudo>(NAME).Pfl> {
- let AssemblerPredicates = [HasVOP3PInsts];
+ let AssemblerPredicate = HasVOP3PInsts;
let DecoderNamespace = "GFX8";
}
}
@@ -412,7 +412,7 @@ multiclass VOP3P_Real_vi<bits<10> op> {
multiclass VOP3P_Real_MAI<bits<10> op> {
def _vi : VOP3P_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.VI>,
VOP3Pe_MAI <op, !cast<VOP3_Pseudo>(NAME).Pfl> {
- let AssemblerPredicates = [HasMAIInsts];
+ let AssemblerPredicate = HasMAIInsts;
let DecoderNamespace = "GFX8";
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOPCInstructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOPCInstructions.td
index 8ef0ec7b71f4..39d18794f947 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOPCInstructions.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOPCInstructions.td
@@ -650,7 +650,7 @@ class getVOPCClassPat64 <VOPProfile P> {
list<dag> ret =
[(set i1:$sdst,
(AMDGPUfp_class
- (P.Src0VT (VOP3Mods0Clamp0OMod P.Src0VT:$src0, i32:$src0_modifiers)),
+ (P.Src0VT (VOP3Mods P.Src0VT:$src0, i32:$src0_modifiers)),
P.Src1VT:$src1))];
}
@@ -1218,7 +1218,7 @@ defm V_CMPX_T_U64 : VOPCX_Real_gfx6_gfx7_gfx10<0x0f7>;
//===----------------------------------------------------------------------===//
multiclass VOPC_Real_vi <bits<10> op> {
- let AssemblerPredicates = [isGFX8GFX9], DecoderNamespace = "GFX8" in {
+ let AssemblerPredicate = isGFX8GFX9, DecoderNamespace = "GFX8" in {
def _e32_vi :
VOPC_Real<!cast<VOPC_Pseudo>(NAME#"_e32"), SIEncodingFamily.VI>,
VOPCe<op{7-0}>;
diff --git a/contrib/llvm-project/llvm/lib/Target/ARC/ARCAsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/ARC/ARCAsmPrinter.cpp
index 5c3e2c9e773c..7915ca003316 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARC/ARCAsmPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARC/ARCAsmPrinter.cpp
@@ -62,6 +62,6 @@ void ARCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
// Force static initialization.
-extern "C" void LLVMInitializeARCAsmPrinter() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeARCAsmPrinter() {
RegisterAsmPrinter<ARCAsmPrinter> X(getTheARCTarget());
}
diff --git a/contrib/llvm-project/llvm/lib/Target/ARC/ARCBranchFinalize.cpp b/contrib/llvm-project/llvm/lib/Target/ARC/ARCBranchFinalize.cpp
index 633c081b3137..2f05ac4ceeed 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARC/ARCBranchFinalize.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARC/ARCBranchFinalize.cpp
@@ -20,6 +20,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/Debug.h"
#include <vector>
diff --git a/contrib/llvm-project/llvm/lib/Target/ARC/ARCISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/ARC/ARCISelLowering.cpp
index 751fd567bae8..8df2b5d2b6a7 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARC/ARCISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARC/ARCISelLowering.cpp
@@ -119,7 +119,7 @@ ARCTargetLowering::ARCTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::BR_JT, MVT::Other, Expand);
setOperationAction(ISD::JumpTable, MVT::i32, Custom);
- // Have psuedo instruction for frame addresses.
+ // Have pseudo instruction for frame addresses.
setOperationAction(ISD::FRAMEADDR, MVT::i32, Legal);
// Custom lower global addresses.
setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
diff --git a/contrib/llvm-project/llvm/lib/Target/ARC/ARCInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/ARC/ARCInstrInfo.cpp
index 2a660e3c4dd1..2bf2c1f6bbc5 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARC/ARCInstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARC/ARCInstrInfo.cpp
@@ -280,8 +280,8 @@ unsigned ARCInstrInfo::removeBranch(MachineBasicBlock &MBB,
void ARCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
- const DebugLoc &dl, unsigned DestReg,
- unsigned SrcReg, bool KillSrc) const {
+ const DebugLoc &dl, MCRegister DestReg,
+ MCRegister SrcReg, bool KillSrc) const {
assert(ARC::GPR32RegClass.contains(SrcReg) &&
"Only GPR32 src copy supported.");
assert(ARC::GPR32RegClass.contains(DestReg) &&
diff --git a/contrib/llvm-project/llvm/lib/Target/ARC/ARCInstrInfo.h b/contrib/llvm-project/llvm/lib/Target/ARC/ARCInstrInfo.h
index 1289b37c37b3..6f894478d3d3 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARC/ARCInstrInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/ARC/ARCInstrInfo.h
@@ -64,7 +64,7 @@ public:
int *BytesRemoved = nullptr) const override;
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
- const DebugLoc &dl, unsigned DestReg, unsigned SrcReg,
+ const DebugLoc &dl, MCRegister DestReg, MCRegister SrcReg,
bool KillSrc) const override;
void storeRegToStackSlot(MachineBasicBlock &MBB,
diff --git a/contrib/llvm-project/llvm/lib/Target/ARC/ARCOptAddrMode.cpp b/contrib/llvm-project/llvm/lib/Target/ARC/ARCOptAddrMode.cpp
index 22a3b9111c8e..232a7be2a9f5 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARC/ARCOptAddrMode.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARC/ARCOptAddrMode.cpp
@@ -22,6 +22,7 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/IR/Function.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -185,7 +186,7 @@ bool ARCOptAddrMode::noUseOfAddBeforeLoadOrStore(const MachineInstr *Add,
}
MachineInstr *ARCOptAddrMode::tryToCombine(MachineInstr &Ldst) {
- assert((Ldst.mayLoad() || Ldst.mayStore()) && "LD/ST instruction expected");
+ assert(Ldst.mayLoadOrStore() && "LD/ST instruction expected");
unsigned BasePos, OffsetPos;
diff --git a/contrib/llvm-project/llvm/lib/Target/ARC/ARCRegisterInfo.cpp b/contrib/llvm-project/llvm/lib/Target/ARC/ARCRegisterInfo.cpp
index a7f89b385ffe..490f08930091 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARC/ARCRegisterInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARC/ARCRegisterInfo.cpp
@@ -128,7 +128,7 @@ static void ReplaceFrameIndex(MachineBasicBlock::iterator II,
ARCRegisterInfo::ARCRegisterInfo() : ARCGenRegisterInfo(ARC::BLINK) {}
bool ARCRegisterInfo::needsFrameMoves(const MachineFunction &MF) {
- return MF.getMMI().hasDebugInfo() || MF.getFunction().needsUnwindTableEntry();
+ return MF.needsFrameMoves();
}
const MCPhysReg *
diff --git a/contrib/llvm-project/llvm/lib/Target/ARC/ARCTargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/ARC/ARCTargetMachine.cpp
index 34700dc22c54..ab74fecb7804 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARC/ARCTargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARC/ARCTargetMachine.cpp
@@ -81,7 +81,7 @@ void ARCPassConfig::addPreRegAlloc() {
}
// Force static initialization.
-extern "C" void LLVMInitializeARCTarget() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeARCTarget() {
RegisterTargetMachine<ARCTargetMachine> X(getTheARCTarget());
}
diff --git a/contrib/llvm-project/llvm/lib/Target/ARC/Disassembler/ARCDisassembler.cpp b/contrib/llvm-project/llvm/lib/Target/ARC/Disassembler/ARCDisassembler.cpp
index 82da18617b91..611fd0e3e78d 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARC/Disassembler/ARCDisassembler.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARC/Disassembler/ARCDisassembler.cpp
@@ -42,7 +42,6 @@ public:
DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
ArrayRef<uint8_t> Bytes, uint64_t Address,
- raw_ostream &VStream,
raw_ostream &CStream) const override;
};
@@ -297,7 +296,6 @@ static DecodeStatus DecodeMoveHRegInstruction(MCInst &Inst, uint64_t Insn,
DecodeStatus ARCDisassembler::getInstruction(MCInst &Instr, uint64_t &Size,
ArrayRef<uint8_t> Bytes,
uint64_t Address,
- raw_ostream &vStream,
raw_ostream &cStream) const {
MCDisassembler::DecodeStatus Result;
if (Bytes.size() < 2) {
@@ -365,7 +363,7 @@ static MCDisassembler *createARCDisassembler(const Target &T,
return new ARCDisassembler(STI, Ctx, T.createMCInstrInfo());
}
-extern "C" void LLVMInitializeARCDisassembler() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeARCDisassembler() {
// Register the disassembler.
TargetRegistry::RegisterMCDisassembler(getTheARCTarget(),
createARCDisassembler);
diff --git a/contrib/llvm-project/llvm/lib/Target/ARC/MCTargetDesc/ARCInstPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/ARC/MCTargetDesc/ARCInstPrinter.cpp
index e3e0ea489957..8eefae5ee491 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARC/MCTargetDesc/ARCInstPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARC/MCTargetDesc/ARCInstPrinter.cpp
@@ -97,9 +97,10 @@ void ARCInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
OS << StringRef(getRegisterName(RegNo)).lower();
}
-void ARCInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
- StringRef Annot, const MCSubtargetInfo &STI) {
- printInstruction(MI, O);
+void ARCInstPrinter::printInst(const MCInst *MI, uint64_t Address,
+ StringRef Annot, const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ printInstruction(MI, Address, O);
printAnnotation(O, Annot);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/ARC/MCTargetDesc/ARCInstPrinter.h b/contrib/llvm-project/llvm/lib/Target/ARC/MCTargetDesc/ARCInstPrinter.h
index 5ea58407f9ed..53ca4066c02d 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARC/MCTargetDesc/ARCInstPrinter.h
+++ b/contrib/llvm-project/llvm/lib/Target/ARC/MCTargetDesc/ARCInstPrinter.h
@@ -26,12 +26,12 @@ public:
: MCInstPrinter(MAI, MII, MRI) {}
// Autogenerated by tblgen.
- void printInstruction(const MCInst *MI, raw_ostream &O);
+ void printInstruction(const MCInst *MI, uint64_t Address, raw_ostream &O);
static const char *getRegisterName(unsigned RegNo);
void printRegName(raw_ostream &OS, unsigned RegNo) const override;
- void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
- const MCSubtargetInfo &STI) override;
+ void printInst(const MCInst *MI, uint64_t Address, StringRef Annot,
+ const MCSubtargetInfo &STI, raw_ostream &O) override;
private:
void printMemOperandRI(const MCInst *MI, unsigned OpNum, raw_ostream &O);
diff --git a/contrib/llvm-project/llvm/lib/Target/ARC/MCTargetDesc/ARCMCTargetDesc.cpp b/contrib/llvm-project/llvm/lib/Target/ARC/MCTargetDesc/ARCMCTargetDesc.cpp
index aa4818cd57ac..997e95e1a35f 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARC/MCTargetDesc/ARCMCTargetDesc.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARC/MCTargetDesc/ARCMCTargetDesc.cpp
@@ -52,7 +52,8 @@ static MCSubtargetInfo *createARCMCSubtargetInfo(const Triple &TT,
}
static MCAsmInfo *createARCMCAsmInfo(const MCRegisterInfo &MRI,
- const Triple &TT) {
+ const Triple &TT,
+ const MCTargetOptions &Options) {
MCAsmInfo *MAI = new ARCMCAsmInfo(TT);
// Initial state of the frame pointer is SP.
@@ -81,7 +82,7 @@ static MCTargetStreamer *createTargetAsmStreamer(MCStreamer &S,
}
// Force static initialization.
-extern "C" void LLVMInitializeARCTargetMC() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeARCTargetMC() {
// Register the MC asm info.
Target &TheARCTarget = getTheARCTarget();
RegisterMCAsmInfoFn X(TheARCTarget, createARCMCAsmInfo);
diff --git a/contrib/llvm-project/llvm/lib/Target/ARC/TargetInfo/ARCTargetInfo.cpp b/contrib/llvm-project/llvm/lib/Target/ARC/TargetInfo/ARCTargetInfo.cpp
index 59b9f806d590..d4a74e1c4174 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARC/TargetInfo/ARCTargetInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARC/TargetInfo/ARCTargetInfo.cpp
@@ -16,6 +16,6 @@ Target &llvm::getTheARCTarget() {
return TheARCTarget;
}
-extern "C" void LLVMInitializeARCTargetInfo() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeARCTargetInfo() {
RegisterTarget<Triple::arc> X(getTheARCTarget(), "arc", "ARC", "ARC");
}
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/A15SDOptimizer.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/A15SDOptimizer.cpp
index 30b9c8071ba2..f8a86a70c077 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/A15SDOptimizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/A15SDOptimizer.cpp
@@ -157,9 +157,8 @@ unsigned A15SDOptimizer::getPrefSPRLane(unsigned SReg) {
MachineInstr *MI = MRI->getVRegDef(SReg);
if (!MI) return ARM::ssub_0;
MachineOperand *MO = MI->findRegisterDefOperand(SReg);
-
- assert(MO->isReg() && "Non-register operand found!");
if (!MO) return ARM::ssub_0;
+ assert(MO->isReg() && "Non-register operand found!");
if (MI->isCopy() && usesRegClass(MI->getOperand(1),
&ARM::SPRRegClass)) {
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARM.h b/contrib/llvm-project/llvm/lib/Target/ARM/ARM.h
index 2e6f756d522c..3412813a3ef2 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARM.h
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARM.h
@@ -43,7 +43,6 @@ FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM,
FunctionPass *createA15SDOptimizerPass();
FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false);
FunctionPass *createARMExpandPseudoPass();
-FunctionPass *createARMCodeGenPreparePass();
FunctionPass *createARMConstantIslandPass();
FunctionPass *createMLxExpansionPass();
FunctionPass *createThumb2ITBlockPass();
@@ -54,6 +53,7 @@ FunctionPass *createThumb2SizeReductionPass(
InstructionSelector *
createARMInstructionSelector(const ARMBaseTargetMachine &TM, const ARMSubtarget &STI,
const ARMRegisterBankInfo &RBI);
+Pass *createMVEGatherScatterLoweringPass();
void LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
ARMAsmPrinter &AP);
@@ -61,7 +61,6 @@ void LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
void initializeARMParallelDSPPass(PassRegistry &);
void initializeARMLoadStoreOptPass(PassRegistry &);
void initializeARMPreAllocLoadStoreOptPass(PassRegistry &);
-void initializeARMCodeGenPreparePass(PassRegistry &);
void initializeARMConstantIslandsPass(PassRegistry &);
void initializeARMExpandPseudoPass(PassRegistry &);
void initializeThumb2SizeReducePass(PassRegistry &);
@@ -69,6 +68,7 @@ void initializeThumb2ITBlockPass(PassRegistry &);
void initializeMVEVPTBlockPass(PassRegistry &);
void initializeARMLowOverheadLoopsPass(PassRegistry &);
void initializeMVETailPredicationPass(PassRegistry &);
+void initializeMVEGatherScatterLoweringPass(PassRegistry &);
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARM.td b/contrib/llvm-project/llvm/lib/Target/ARM/ARM.td
index fed4cb2b9316..380eaa863689 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARM.td
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARM.td
@@ -303,6 +303,10 @@ def FeatureNonpipelinedVFP : SubtargetFeature<"nonpipelined-vfp",
def FeatureHasSlowFPVMLx : SubtargetFeature<"slowfpvmlx", "SlowFPVMLx", "true",
"Disable VFP / NEON MAC instructions">;
+// VFPv4 added VFMA instructions that can similar be fast or slow.
+def FeatureHasSlowFPVFMx : SubtargetFeature<"slowfpvfmx", "SlowFPVFMx", "true",
+ "Disable VFP / NEON FMA instructions">;
+
// Cortex-A8 / A9 Advanced SIMD has multiplier accumulator forwarding.
def FeatureVMLxForwarding : SubtargetFeature<"vmlx-forwarding",
"HasVMLxForwarding", "true",
@@ -415,10 +419,6 @@ def FeatureNoPostRASched : SubtargetFeature<"disable-postra-scheduler",
"DisablePostRAScheduler", "true",
"Don't schedule again after register allocation">;
-// Enable use of alias analysis during code generation
-def FeatureUseAA : SubtargetFeature<"use-aa", "UseAA", "true",
- "Use alias analysis during codegen">;
-
// Armv8.5-A extensions
def FeatureSB : SubtargetFeature<"sb", "HasSB", "true",
@@ -584,7 +584,6 @@ def ProcExynos : SubtargetFeature<"exynos", "ARMProcFamily", "Exynos",
"Samsung Exynos processors",
[FeatureZCZeroing,
FeatureUseWideStrideVFP,
- FeatureUseAA,
FeatureSplatVFPToNeon,
FeatureSlowVGETLNi32,
FeatureSlowVDUP32,
@@ -593,6 +592,7 @@ def ProcExynos : SubtargetFeature<"exynos", "ARMProcFamily", "Exynos",
FeatureHWDivThumb,
FeatureHWDivARM,
FeatureHasSlowFPVMLx,
+ FeatureHasSlowFPVFMx,
FeatureHasRetAddrStack,
FeatureFuseLiterals,
FeatureFuseAES,
@@ -923,6 +923,7 @@ def : ProcessorModel<"cortex-a5", CortexA8Model, [ARMv7a, ProcA5,
FeatureTrustZone,
FeatureSlowFPBrcc,
FeatureHasSlowFPVMLx,
+ FeatureHasSlowFPVFMx,
FeatureVMLxForwarding,
FeatureMP,
FeatureVFP4]>;
@@ -933,6 +934,7 @@ def : ProcessorModel<"cortex-a7", CortexA8Model, [ARMv7a, ProcA7,
FeatureSlowFPBrcc,
FeatureHasVMLxHazards,
FeatureHasSlowFPVMLx,
+ FeatureHasSlowFPVFMx,
FeatureVMLxForwarding,
FeatureMP,
FeatureVFP4,
@@ -945,6 +947,7 @@ def : ProcessorModel<"cortex-a8", CortexA8Model, [ARMv7a, ProcA8,
FeatureSlowFPBrcc,
FeatureHasVMLxHazards,
FeatureHasSlowFPVMLx,
+ FeatureHasSlowFPVFMx,
FeatureVMLxForwarding]>;
def : ProcessorModel<"cortex-a9", CortexA9Model, [ARMv7a, ProcA9,
@@ -1014,6 +1017,7 @@ def : ProcessorModel<"swift", SwiftModel, [ARMv7a, ProcSwift,
FeatureAvoidPartialCPSR,
FeatureAvoidMOVsShOp,
FeatureHasSlowFPVMLx,
+ FeatureHasSlowFPVFMx,
FeatureHasVMLxHazards,
FeatureProfUnpredicate,
FeaturePrefISHSTBarrier,
@@ -1032,6 +1036,7 @@ def : ProcessorModel<"cortex-r4f", CortexA8Model, [ARMv7r, ProcR4,
FeatureHasRetAddrStack,
FeatureSlowFPBrcc,
FeatureHasSlowFPVMLx,
+ FeatureHasSlowFPVFMx,
FeatureVFP3_D16,
FeatureAvoidPartialCPSR]>;
@@ -1041,6 +1046,7 @@ def : ProcessorModel<"cortex-r5", CortexA8Model, [ARMv7r, ProcR5,
FeatureSlowFPBrcc,
FeatureHWDivARM,
FeatureHasSlowFPVMLx,
+ FeatureHasSlowFPVFMx,
FeatureAvoidPartialCPSR]>;
def : ProcessorModel<"cortex-r7", CortexA8Model, [ARMv7r, ProcR7,
@@ -1051,6 +1057,7 @@ def : ProcessorModel<"cortex-r7", CortexA8Model, [ARMv7r, ProcR7,
FeatureSlowFPBrcc,
FeatureHWDivARM,
FeatureHasSlowFPVMLx,
+ FeatureHasSlowFPVFMx,
FeatureAvoidPartialCPSR]>;
def : ProcessorModel<"cortex-r8", CortexA8Model, [ARMv7r,
@@ -1061,27 +1068,26 @@ def : ProcessorModel<"cortex-r8", CortexA8Model, [ARMv7r,
FeatureSlowFPBrcc,
FeatureHWDivARM,
FeatureHasSlowFPVMLx,
+ FeatureHasSlowFPVFMx,
FeatureAvoidPartialCPSR]>;
def : ProcessorModel<"cortex-m3", CortexM4Model, [ARMv7m,
ProcM3,
FeaturePrefLoopAlign32,
FeatureUseMISched,
- FeatureUseAA,
FeatureHasNoBranchPredictor]>;
def : ProcessorModel<"sc300", CortexM4Model, [ARMv7m,
ProcM3,
FeatureUseMISched,
- FeatureUseAA,
FeatureHasNoBranchPredictor]>;
def : ProcessorModel<"cortex-m4", CortexM4Model, [ARMv7em,
FeatureVFP4_D16_SP,
FeaturePrefLoopAlign32,
FeatureHasSlowFPVMLx,
+ FeatureHasSlowFPVFMx,
FeatureUseMISched,
- FeatureUseAA,
FeatureHasNoBranchPredictor]>;
def : ProcNoItin<"cortex-m7", [ARMv7em,
@@ -1095,8 +1101,8 @@ def : ProcessorModel<"cortex-m33", CortexM4Model, [ARMv8mMainline,
FeatureFPARMv8_D16_SP,
FeaturePrefLoopAlign32,
FeatureHasSlowFPVMLx,
+ FeatureHasSlowFPVFMx,
FeatureUseMISched,
- FeatureUseAA,
FeatureHasNoBranchPredictor]>;
def : ProcessorModel<"cortex-m35p", CortexM4Model, [ARMv8mMainline,
@@ -1104,8 +1110,8 @@ def : ProcessorModel<"cortex-m35p", CortexM4Model, [ARMv8mMainline,
FeatureFPARMv8_D16_SP,
FeaturePrefLoopAlign32,
FeatureHasSlowFPVMLx,
+ FeatureHasSlowFPVFMx,
FeatureUseMISched,
- FeatureUseAA,
FeatureHasNoBranchPredictor]>;
@@ -1192,13 +1198,12 @@ def : ProcessorModel<"cyclone", SwiftModel, [ARMv8a, ProcSwift,
FeatureAvoidPartialCPSR,
FeatureAvoidMOVsShOp,
FeatureHasSlowFPVMLx,
+ FeatureHasSlowFPVFMx,
FeatureCrypto,
FeatureUseMISched,
FeatureZCZeroing,
FeatureNoPostRASched]>;
-def : ProcNoItin<"exynos-m1", [ARMv8a, ProcExynos]>;
-def : ProcNoItin<"exynos-m2", [ARMv8a, ProcExynos]>;
def : ProcNoItin<"exynos-m3", [ARMv8a, ProcExynos]>;
def : ProcNoItin<"exynos-m4", [ARMv82a, ProcExynos,
FeatureFullFP16,
@@ -1215,8 +1220,7 @@ def : ProcNoItin<"kryo", [ARMv8a, ProcKryo,
def : ProcessorModel<"cortex-r52", CortexR52Model, [ARMv8r, ProcR52,
FeatureUseMISched,
- FeatureFPAO,
- FeatureUseAA]>;
+ FeatureFPAO]>;
//===----------------------------------------------------------------------===//
// Register File Description
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
index c8c91e53c44e..6f26ca127f94 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -54,8 +54,8 @@ using namespace llvm;
ARMAsmPrinter::ARMAsmPrinter(TargetMachine &TM,
std::unique_ptr<MCStreamer> Streamer)
- : AsmPrinter(TM, std::move(Streamer)), AFI(nullptr), MCP(nullptr),
- InConstantPool(false), OptimizationGoals(-1) {}
+ : AsmPrinter(TM, std::move(Streamer)), Subtarget(nullptr), AFI(nullptr),
+ MCP(nullptr), InConstantPool(false), OptimizationGoals(-1) {}
void ARMAsmPrinter::EmitFunctionBodyEnd() {
// Make sure to terminate any constant pools that were at the end
@@ -1170,10 +1170,16 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
break;
case ARM::ADDri:
case ARM::t2ADDri:
+ case ARM::t2ADDri12:
+ case ARM::t2ADDspImm:
+ case ARM::t2ADDspImm12:
Offset = -MI->getOperand(2).getImm();
break;
case ARM::SUBri:
case ARM::t2SUBri:
+ case ARM::t2SUBri12:
+ case ARM::t2SUBspImm:
+ case ARM::t2SUBspImm12:
Offset = MI->getOperand(2).getImm();
break;
case ARM::tSUBspi:
@@ -2142,7 +2148,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
//===----------------------------------------------------------------------===//
// Force static initialization.
-extern "C" void LLVMInitializeARMAsmPrinter() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeARMAsmPrinter() {
RegisterAsmPrinter<ARMAsmPrinter> X(getTheARMLETarget());
RegisterAsmPrinter<ARMAsmPrinter> Y(getTheARMBETarget());
RegisterAsmPrinter<ARMAsmPrinter> A(getTheThumbLETarget());
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 684cd1def977..48f781510254 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -134,7 +134,7 @@ ScheduleHazardRecognizer *ARMBaseInstrInfo::
CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
const ScheduleDAG *DAG) const {
if (Subtarget.isThumb2() || Subtarget.hasVFP2Base())
- return (ScheduleHazardRecognizer *)new ARMHazardRecognizer(II, DAG);
+ return new ARMHazardRecognizer(II, DAG);
return TargetInstrInfo::CreateTargetPostRAHazardRecognizer(II, DAG);
}
@@ -829,8 +829,8 @@ void llvm::addPredicatedMveVpredROp(MachineInstrBuilder &MIB,
void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
- const DebugLoc &DL, unsigned DestReg,
- unsigned SrcReg, bool KillSrc) const {
+ const DebugLoc &DL, MCRegister DestReg,
+ MCRegister SrcReg, bool KillSrc) const {
bool GPRDest = ARM::GPRRegClass.contains(DestReg);
bool GPRSrc = ARM::GPRRegClass.contains(SrcReg);
@@ -993,9 +993,8 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
Mov->addRegisterKilled(SrcReg, TRI);
}
-bool ARMBaseInstrInfo::isCopyInstrImpl(const MachineInstr &MI,
- const MachineOperand *&Src,
- const MachineOperand *&Dest) const {
+Optional<DestSourcePair>
+ARMBaseInstrInfo::isCopyInstrImpl(const MachineInstr &MI) const {
// VMOVRRD is also a copy instruction but it requires
// special way of handling. It is more complex copy version
// and since that we are not considering it. For recognition
@@ -1006,10 +1005,8 @@ bool ARMBaseInstrInfo::isCopyInstrImpl(const MachineInstr &MI,
if (!MI.isMoveReg() ||
(MI.getOpcode() == ARM::VORRq &&
MI.getOperand(1).getReg() != MI.getOperand(2).getReg()))
- return false;
- Dest = &MI.getOperand(0);
- Src = &MI.getOperand(1);
- return true;
+ return None;
+ return DestSourcePair{MI.getOperand(0), MI.getOperand(1)};
}
const MachineInstrBuilder &
@@ -2726,25 +2723,6 @@ static bool isSuitableForMask(MachineInstr *&MI, unsigned SrcReg,
return false;
}
-/// getSwappedCondition - assume the flags are set by MI(a,b), return
-/// the condition code if we modify the instructions such that flags are
-/// set by MI(b,a).
-inline static ARMCC::CondCodes getSwappedCondition(ARMCC::CondCodes CC) {
- switch (CC) {
- default: return ARMCC::AL;
- case ARMCC::EQ: return ARMCC::EQ;
- case ARMCC::NE: return ARMCC::NE;
- case ARMCC::HS: return ARMCC::LS;
- case ARMCC::LO: return ARMCC::HI;
- case ARMCC::HI: return ARMCC::LO;
- case ARMCC::LS: return ARMCC::HS;
- case ARMCC::GE: return ARMCC::LE;
- case ARMCC::LT: return ARMCC::GT;
- case ARMCC::GT: return ARMCC::LT;
- case ARMCC::LE: return ARMCC::GE;
- }
-}
-
/// getCmpToAddCondition - assume the flags are set by CMP(a,b), return
/// the condition code if we modify the instructions such that flags are
/// set by ADD(a,b,X).
@@ -3279,22 +3257,26 @@ bool ARMBaseInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
}
break;
case ARM::t2ADDrr:
- case ARM::t2SUBrr:
+ case ARM::t2SUBrr: {
if (UseOpc == ARM::t2SUBrr && Commute)
return false;
// ADD/SUB are special because they're essentially the same operation, so
// we can handle a larger range of immediates.
+ const bool ToSP = DefMI.getOperand(0).getReg() == ARM::SP;
+ const unsigned t2ADD = ToSP ? ARM::t2ADDspImm : ARM::t2ADDri;
+ const unsigned t2SUB = ToSP ? ARM::t2SUBspImm : ARM::t2SUBri;
if (ARM_AM::isT2SOImmTwoPartVal(ImmVal))
- NewUseOpc = UseOpc == ARM::t2ADDrr ? ARM::t2ADDri : ARM::t2SUBri;
+ NewUseOpc = UseOpc == ARM::t2ADDrr ? t2ADD : t2SUB;
else if (ARM_AM::isT2SOImmTwoPartVal(-ImmVal)) {
ImmVal = -ImmVal;
- NewUseOpc = UseOpc == ARM::t2ADDrr ? ARM::t2SUBri : ARM::t2ADDri;
+ NewUseOpc = UseOpc == ARM::t2ADDrr ? t2SUB : t2ADD;
} else
return false;
SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal);
SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal);
break;
+ }
case ARM::t2ORRrr:
case ARM::t2EORrr:
if (!ARM_AM::isT2SOImmTwoPartVal(ImmVal))
@@ -3314,7 +3296,8 @@ bool ARMBaseInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
unsigned OpIdx = Commute ? 2 : 1;
Register Reg1 = UseMI.getOperand(OpIdx).getReg();
bool isKill = UseMI.getOperand(OpIdx).isKill();
- Register NewReg = MRI->createVirtualRegister(MRI->getRegClass(Reg));
+ const TargetRegisterClass *TRC = MRI->getRegClass(Reg);
+ Register NewReg = MRI->createVirtualRegister(TRC);
BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(), get(NewUseOpc),
NewReg)
.addReg(Reg1, getKillRegState(isKill))
@@ -3326,6 +3309,18 @@ bool ARMBaseInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
UseMI.getOperand(1).setIsKill();
UseMI.getOperand(2).ChangeToImmediate(SOImmValV2);
DefMI.eraseFromParent();
+ // FIXME: t2ADDrr should be split, as different rulles apply when writing to SP.
+ // Just as t2ADDri, that was split to [t2ADDri, t2ADDspImm].
+ // Then the below code will not be needed, as the input/output register
+ // classes will be rgpr or gprSP.
+ // For now, we fix the UseMI operand explicitly here:
+ switch(NewUseOpc){
+ case ARM::t2ADDspImm:
+ case ARM::t2SUBspImm:
+ case ARM::t2ADDri:
+ case ARM::t2SUBri:
+ MRI->setRegClass(UseMI.getOperand(0).getReg(), TRC);
+ }
return true;
}
@@ -5350,6 +5345,34 @@ ARMBaseInstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
return makeArrayRef(TargetFlags);
}
+Optional<RegImmPair> ARMBaseInstrInfo::isAddImmediate(const MachineInstr &MI,
+ Register Reg) const {
+ int Sign = 1;
+ unsigned Opcode = MI.getOpcode();
+ int64_t Offset = 0;
+
+ // TODO: Handle cases where Reg is a super- or sub-register of the
+ // destination register.
+ if (Reg != MI.getOperand(0).getReg())
+ return None;
+
+ // We describe SUBri or ADDri instructions.
+ if (Opcode == ARM::SUBri)
+ Sign = -1;
+ else if (Opcode != ARM::ADDri)
+ return None;
+
+ // TODO: Third operand can be global address (usually some string). Since
+ // strings can be relocated we cannot calculate their offsets for
+ // now.
+ if (!MI.getOperand(0).isReg() || !MI.getOperand(1).isReg() ||
+ !MI.getOperand(2).isImm())
+ return None;
+
+ Offset = MI.getOperand(2).getImm() * Sign;
+ return RegImmPair{MI.getOperand(1).getReg(), Offset};
+}
+
bool llvm::registerDefinedBetween(unsigned Reg,
MachineBasicBlock::iterator From,
MachineBasicBlock::iterator To,
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
index c232b6f0b45d..f6d4ebe3a090 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -99,12 +99,11 @@ protected:
MachineInstr *commuteInstructionImpl(MachineInstr &MI, bool NewMI,
unsigned OpIdx1,
unsigned OpIdx2) const override;
-
- /// If the specific machine instruction is a instruction that moves/copies
- /// value from one register to another register return true along with
- /// @Source machine operand and @Destination machine operand.
- bool isCopyInstrImpl(const MachineInstr &MI, const MachineOperand *&Source,
- const MachineOperand *&Destination) const override;
+ /// If the specific machine instruction is an instruction that moves/copies
+ /// value from one register to another register return destination and source
+ /// registers as machine operands.
+ Optional<DestSourcePair>
+ isCopyInstrImpl(const MachineInstr &MI) const override;
public:
// Return whether the target has an explicit NOP encoding.
@@ -203,7 +202,7 @@ public:
const ARMSubtarget &Subtarget) const;
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
- const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
+ const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg,
bool KillSrc) const override;
void storeRegToStackSlot(MachineBasicBlock &MBB,
@@ -455,6 +454,9 @@ public:
// 3 - predicate reg
return MI.getOperand(3).getReg();
}
+
+ Optional<RegImmPair> isAddImmediate(const MachineInstr &MI,
+ Register Reg) const override;
};
/// Get the operands corresponding to the given \p Pred value. By default, the
@@ -486,6 +488,27 @@ bool isUncondBranchOpcode(int Opc) {
return Opc == ARM::B || Opc == ARM::tB || Opc == ARM::t2B;
}
+// This table shows the VPT instruction variants, i.e. the different
+// mask field encodings, see also B5.6. Predication/conditional execution in
+// the ArmARM.
+enum VPTMaskValue {
+ T = 8, // 0b1000
+ TT = 4, // 0b0100
+ TE = 12, // 0b1100
+ TTT = 2, // 0b0010
+ TTE = 6, // 0b0110
+ TEE = 10, // 0b1010
+ TET = 14, // 0b1110
+ TTTT = 1, // 0b0001
+ TTTE = 3, // 0b0011
+ TTEE = 5, // 0b0101
+ TTET = 7, // 0b0111
+ TEEE = 9, // 0b1001
+ TEET = 11, // 0b1011
+ TETT = 13, // 0b1101
+ TETE = 15 // 0b1111
+};
+
static inline bool isVPTOpcode(int Opc) {
return Opc == ARM::MVE_VPTv16i8 || Opc == ARM::MVE_VPTv16u8 ||
Opc == ARM::MVE_VPTv16s8 || Opc == ARM::MVE_VPTv8i16 ||
@@ -502,6 +525,97 @@ static inline bool isVPTOpcode(int Opc) {
}
static inline
+unsigned VCMPOpcodeToVPT(unsigned Opcode) {
+ switch (Opcode) {
+ default:
+ return 0;
+ case ARM::MVE_VCMPf32:
+ return ARM::MVE_VPTv4f32;
+ case ARM::MVE_VCMPf16:
+ return ARM::MVE_VPTv8f16;
+ case ARM::MVE_VCMPi8:
+ return ARM::MVE_VPTv16i8;
+ case ARM::MVE_VCMPi16:
+ return ARM::MVE_VPTv8i16;
+ case ARM::MVE_VCMPi32:
+ return ARM::MVE_VPTv4i32;
+ case ARM::MVE_VCMPu8:
+ return ARM::MVE_VPTv16u8;
+ case ARM::MVE_VCMPu16:
+ return ARM::MVE_VPTv8u16;
+ case ARM::MVE_VCMPu32:
+ return ARM::MVE_VPTv4u32;
+ case ARM::MVE_VCMPs8:
+ return ARM::MVE_VPTv16s8;
+ case ARM::MVE_VCMPs16:
+ return ARM::MVE_VPTv8s16;
+ case ARM::MVE_VCMPs32:
+ return ARM::MVE_VPTv4s32;
+
+ case ARM::MVE_VCMPf32r:
+ return ARM::MVE_VPTv4f32r;
+ case ARM::MVE_VCMPf16r:
+ return ARM::MVE_VPTv8f16r;
+ case ARM::MVE_VCMPi8r:
+ return ARM::MVE_VPTv16i8r;
+ case ARM::MVE_VCMPi16r:
+ return ARM::MVE_VPTv8i16r;
+ case ARM::MVE_VCMPi32r:
+ return ARM::MVE_VPTv4i32r;
+ case ARM::MVE_VCMPu8r:
+ return ARM::MVE_VPTv16u8r;
+ case ARM::MVE_VCMPu16r:
+ return ARM::MVE_VPTv8u16r;
+ case ARM::MVE_VCMPu32r:
+ return ARM::MVE_VPTv4u32r;
+ case ARM::MVE_VCMPs8r:
+ return ARM::MVE_VPTv16s8r;
+ case ARM::MVE_VCMPs16r:
+ return ARM::MVE_VPTv8s16r;
+ case ARM::MVE_VCMPs32r:
+ return ARM::MVE_VPTv4s32r;
+ }
+}
+
+static inline
+unsigned VCTPOpcodeToLSTP(unsigned Opcode, bool IsDoLoop) {
+ switch (Opcode) {
+ default:
+ llvm_unreachable("unhandled vctp opcode");
+ break;
+ case ARM::MVE_VCTP8:
+ return IsDoLoop ? ARM::MVE_DLSTP_8 : ARM::MVE_WLSTP_8;
+ case ARM::MVE_VCTP16:
+ return IsDoLoop ? ARM::MVE_DLSTP_16 : ARM::MVE_WLSTP_16;
+ case ARM::MVE_VCTP32:
+ return IsDoLoop ? ARM::MVE_DLSTP_32 : ARM::MVE_WLSTP_32;
+ case ARM::MVE_VCTP64:
+ return IsDoLoop ? ARM::MVE_DLSTP_64 : ARM::MVE_WLSTP_64;
+ }
+ return 0;
+}
+
+static inline
+bool isVCTP(MachineInstr *MI) {
+ switch (MI->getOpcode()) {
+ default:
+ break;
+ case ARM::MVE_VCTP8:
+ case ARM::MVE_VCTP16:
+ case ARM::MVE_VCTP32:
+ case ARM::MVE_VCTP64:
+ return true;
+ }
+ return false;
+}
+
+static inline
+bool isLoopStart(MachineInstr &MI) {
+ return MI.getOpcode() == ARM::t2DoLoopStart ||
+ MI.getOpcode() == ARM::t2WhileLoopStart;
+}
+
+static inline
bool isCondBranchOpcode(int Opc) {
return Opc == ARM::Bcc || Opc == ARM::tBcc || Opc == ARM::t2Bcc;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 1eaf871867e0..52e6d05c3155 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -75,6 +75,8 @@ ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
// GHC set of callee saved regs is empty as all those regs are
// used for passing STG regs around
return CSR_NoRegs_SaveList;
+ } else if (F.getCallingConv() == CallingConv::CFGuard_Check) {
+ return CSR_Win_AAPCS_CFGuard_Check_SaveList;
} else if (F.hasFnAttribute("interrupt")) {
if (STI.isMClass()) {
// M-class CPUs have hardware which saves the registers needed to allow a
@@ -123,7 +125,8 @@ ARMBaseRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
if (CC == CallingConv::GHC)
// This is academic because all GHC calls are (supposed to be) tail calls
return CSR_NoRegs_RegMask;
-
+ if (CC == CallingConv::CFGuard_Check)
+ return CSR_Win_AAPCS_CFGuard_Check_RegMask;
if (STI.getTargetLowering()->supportSwiftError() &&
MF.getFunction().getAttributes().hasAttrSomewhere(Attribute::SwiftError))
return STI.isTargetDarwin() ? CSR_iOS_SwiftError_RegMask
@@ -191,7 +194,7 @@ getReservedRegs(const MachineFunction &MF) const {
markSuperRegs(Reserved, ARM::PC);
markSuperRegs(Reserved, ARM::FPSCR);
markSuperRegs(Reserved, ARM::APSR_NZCV);
- if (TFI->hasFP(MF) || STI.isTargetDarwin())
+ if (TFI->hasFP(MF))
markSuperRegs(Reserved, getFramePointerReg(STI));
if (hasBasePointer(MF))
markSuperRegs(Reserved, BasePtr);
@@ -385,7 +388,7 @@ bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const {
return true;
// Thumb has trouble with negative offsets from the FP. Thumb2 has a limited
- // negative range for ldr/str (255), and thumb1 is positive offsets only.
+ // negative range for ldr/str (255), and Thumb1 is positive offsets only.
//
// It's going to be better to use the SP or Base Pointer instead. When there
// are variable sized objects, we can't reference off of the SP, so we
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMCallLowering.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMCallLowering.cpp
index d3b595ce8323..ce260a9ba145 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMCallLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMCallLowering.cpp
@@ -106,7 +106,7 @@ struct OutgoingValueHandler : public CallLowering::ValueHandler {
MIRBuilder.buildConstant(OffsetReg, Offset);
Register AddrReg = MRI.createGenericVirtualRegister(p0);
- MIRBuilder.buildGEP(AddrReg, SPReg, OffsetReg);
+ MIRBuilder.buildPtrAdd(AddrReg, SPReg, OffsetReg);
MPO = MachinePointerInfo::getStack(MIRBuilder.getMF(), Offset);
return AddrReg;
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMCallingConv.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMCallingConv.cpp
index 92ebc542b423..a47c59512592 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMCallingConv.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMCallingConv.cpp
@@ -18,8 +18,8 @@
using namespace llvm;
// APCS f64 is in register pairs, possibly split to stack
-static bool f64AssignAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
+static bool f64AssignAPCS(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo,
CCState &State, bool CanFail) {
static const MCPhysReg RegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
@@ -48,9 +48,9 @@ static bool f64AssignAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
return true;
}
-static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags,
+static bool CC_ARM_APCS_Custom_f64(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags,
CCState &State) {
if (!f64AssignAPCS(ValNo, ValVT, LocVT, LocInfo, State, true))
return false;
@@ -61,8 +61,8 @@ static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
}
// AAPCS f64 is in aligned register pairs
-static bool f64AssignAAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
+static bool f64AssignAAPCS(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo,
CCState &State, bool CanFail) {
static const MCPhysReg HiRegList[] = { ARM::R0, ARM::R2 };
static const MCPhysReg LoRegList[] = { ARM::R1, ARM::R3 };
@@ -102,9 +102,9 @@ static bool f64AssignAAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
return true;
}
-static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags,
+static bool CC_ARM_AAPCS_Custom_f64(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags,
CCState &State) {
if (!f64AssignAAPCS(ValNo, ValVT, LocVT, LocInfo, State, true))
return false;
@@ -114,8 +114,8 @@ static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
return true; // we handled it
}
-static bool f64RetAssign(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
- CCValAssign::LocInfo &LocInfo, CCState &State) {
+static bool f64RetAssign(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo, CCState &State) {
static const MCPhysReg HiRegList[] = { ARM::R0, ARM::R2 };
static const MCPhysReg LoRegList[] = { ARM::R1, ARM::R3 };
@@ -134,9 +134,9 @@ static bool f64RetAssign(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
return true;
}
-static bool RetCC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags,
+static bool RetCC_ARM_APCS_Custom_f64(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags,
CCState &State) {
if (!f64RetAssign(ValNo, ValVT, LocVT, LocInfo, State))
return false;
@@ -145,9 +145,9 @@ static bool RetCC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
return true; // we handled it
}
-static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags,
+static bool RetCC_ARM_AAPCS_Custom_f64(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags,
CCState &State) {
return RetCC_ARM_APCS_Custom_f64(ValNo, ValVT, LocVT, LocInfo, ArgFlags,
State);
@@ -169,10 +169,10 @@ static const MCPhysReg QRegList[] = { ARM::Q0, ARM::Q1, ARM::Q2, ARM::Q3 };
// InConsecutiveRegsLast set. We must process all members of the HA before
// we can allocate it, as we need to know the total number of registers that
// will be needed in order to (attempt to) allocate a contiguous block.
-static bool CC_ARM_AAPCS_Custom_Aggregate(unsigned &ValNo, MVT &ValVT,
- MVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags,
+static bool CC_ARM_AAPCS_Custom_Aggregate(unsigned ValNo, MVT ValVT,
+ MVT LocVT,
+ CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags,
CCState &State) {
SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs();
@@ -181,7 +181,7 @@ static bool CC_ARM_AAPCS_Custom_Aggregate(unsigned &ValNo, MVT &ValVT,
assert(PendingMembers[0].getLocVT() == LocVT);
// Add the argument to the list to be allocated once we know the size of the
- // aggregate. Store the type's required alignmnent as extra info for later: in
+ // aggregate. Store the type's required alignment as extra info for later: in
// the [N x i64] case all trace has been removed by the time we actually get
// to do allocation.
PendingMembers.push_back(CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo,
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMCallingConv.h b/contrib/llvm-project/llvm/lib/Target/ARM/ARMCallingConv.h
index 615634551d90..7c692f03b440 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMCallingConv.h
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMCallingConv.h
@@ -32,6 +32,9 @@ bool CC_ARM_APCS_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
bool FastCC_ARM_APCS(unsigned ValNo, MVT ValVT, MVT LocVT,
CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
CCState &State);
+bool CC_ARM_Win32_CFGuard_Check(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State);
bool RetCC_ARM_AAPCS(unsigned ValNo, MVT ValVT, MVT LocVT,
CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
CCState &State);
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMCallingConv.td b/contrib/llvm-project/llvm/lib/Target/ARM/ARMCallingConv.td
index 61d2d83ddc40..5df5b56f5afa 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMCallingConv.td
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMCallingConv.td
@@ -20,7 +20,7 @@ def CC_ARM_APCS : CallingConv<[
// Handles byval parameters.
CCIfByVal<CCPassByVal<4, 4>>,
-
+
CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
// Pass SwiftSelf in a callee saved register.
@@ -80,7 +80,7 @@ def FastCC_ARM_APCS : CallingConv<[
S9, S10, S11, S12, S13, S14, S15]>>,
// CPRCs may be allocated to co-processor registers or the stack - they
- // may never be allocated to core registers.
+ // may never be allocated to core registers.
CCIfType<[f32], CCAssignToStackWithShadow<4, 4, [Q0, Q1, Q2, Q3]>>,
CCIfType<[f64], CCAssignToStackWithShadow<8, 4, [Q0, Q1, Q2, Q3]>>,
CCIfType<[v2f64], CCAssignToStackWithShadow<16, 4, [Q0, Q1, Q2, Q3]>>,
@@ -165,8 +165,8 @@ def CC_ARM_AAPCS : CallingConv<[
CCIfNest<CCAssignToReg<[R12]>>,
// Handle all vector types as either f64 or v2f64.
- CCIfType<[v1i64, v2i32, v4i16, v4f16, v4f16, v8i8, v2f32], CCBitConvertToType<f64>>,
- CCIfType<[v2i64, v4i32, v8i16, v8f16, v8f16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+ CCIfType<[v1i64, v2i32, v4i16, v4f16, v8i8, v2f32], CCBitConvertToType<f64>>,
+ CCIfType<[v2i64, v4i32, v8i16, v8f16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
// Pass SwiftSelf in a callee saved register.
CCIfSwiftSelf<CCIfType<[i32], CCAssignToReg<[R10]>>>,
@@ -182,8 +182,8 @@ def CC_ARM_AAPCS : CallingConv<[
let Entry = 1 in
def RetCC_ARM_AAPCS : CallingConv<[
// Handle all vector types as either f64 or v2f64.
- CCIfType<[v1i64, v2i32, v4i16, v4f16, v4f16, v8i8, v2f32], CCBitConvertToType<f64>>,
- CCIfType<[v2i64, v4i32, v8i16, v8f16, v8f16,v16i8, v4f32], CCBitConvertToType<v2f64>>,
+ CCIfType<[v1i64, v2i32, v4i16, v4f16, v8i8, v2f32], CCBitConvertToType<f64>>,
+ CCIfType<[v2i64, v4i32, v8i16, v8f16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
// Pass SwiftSelf in a callee saved register.
CCIfSwiftSelf<CCIfType<[i32], CCAssignToReg<[R10]>>>,
@@ -208,8 +208,8 @@ def CC_ARM_AAPCS_VFP : CallingConv<[
CCIfByVal<CCPassByVal<4, 4>>,
// Handle all vector types as either f64 or v2f64.
- CCIfType<[v1i64, v2i32, v4i16, v4f16, v4f16, v8i8, v2f32], CCBitConvertToType<f64>>,
- CCIfType<[v2i64, v4i32, v8i16, v8f16, v8f16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+ CCIfType<[v1i64, v2i32, v4i16, v4f16, v8i8, v2f32], CCBitConvertToType<f64>>,
+ CCIfType<[v2i64, v4i32, v8i16, v8f16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
// Pass SwiftSelf in a callee saved register.
CCIfSwiftSelf<CCIfType<[i32], CCAssignToReg<[R10]>>>,
@@ -230,8 +230,8 @@ def CC_ARM_AAPCS_VFP : CallingConv<[
let Entry = 1 in
def RetCC_ARM_AAPCS_VFP : CallingConv<[
// Handle all vector types as either f64 or v2f64.
- CCIfType<[v1i64, v2i32, v4i16, v4f16, v4f16, v8i8, v2f32], CCBitConvertToType<f64>>,
- CCIfType<[v2i64, v4i32, v8i16, v8f16, v8f16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+ CCIfType<[v1i64, v2i32, v4i16, v4f16, v8i8, v2f32], CCBitConvertToType<f64>>,
+ CCIfType<[v2i64, v4i32, v8i16, v8f16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
// Pass SwiftSelf in a callee saved register.
CCIfSwiftSelf<CCIfType<[i32], CCAssignToReg<[R10]>>>,
@@ -246,6 +246,16 @@ def RetCC_ARM_AAPCS_VFP : CallingConv<[
CCDelegateTo<RetCC_ARM_AAPCS_Common>
]>;
+
+// Windows Control Flow Guard checks take a single argument (the target function
+// address) and have no return value.
+let Entry = 1 in
+def CC_ARM_Win32_CFGuard_Check : CallingConv<[
+ CCIfType<[i32], CCAssignToReg<[R0]>>
+]>;
+
+
+
//===----------------------------------------------------------------------===//
// Callee-saved register lists.
//===----------------------------------------------------------------------===//
@@ -256,6 +266,11 @@ def CSR_FPRegs : CalleeSavedRegs<(add (sequence "D%u", 0, 31))>;
def CSR_AAPCS : CalleeSavedRegs<(add LR, R11, R10, R9, R8, R7, R6, R5, R4,
(sequence "D%u", 15, 8))>;
+// The Windows Control Flow Guard Check function preserves the same registers as
+// AAPCS, and also preserves all floating point registers.
+def CSR_Win_AAPCS_CFGuard_Check : CalleeSavedRegs<(add LR, R11, R10, R9, R8, R7,
+ R6, R5, R4, (sequence "D%u", 15, 0))>;
+
// R8 is used to pass swifterror, remove it from CSR.
def CSR_AAPCS_SwiftError : CalleeSavedRegs<(sub CSR_AAPCS, R8)>;
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
index 24ca25f73e96..634fb89b8e89 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -1917,6 +1917,7 @@ bool ARMConstantIslands::optimizeThumb2Branches() {
MachineInstrBuilder MIB = BuildMI(*MBB, Br.MI, Br.MI->getDebugLoc(),
TII->get(ARM::t2LE));
+ // Swapped a t2Bcc for a t2LE, so no need to update the size of the block.
MIB.add(Br.MI->getOperand(0));
Br.MI->eraseFromParent();
Br.MI = MIB;
@@ -1975,21 +1976,20 @@ bool ARMConstantIslands::optimizeThumb2Branches() {
.addMBB(DestBB, Br.MI->getOperand(0).getTargetFlags());
Cmp.MI->eraseFromParent();
- BBInfoVector &BBInfo = BBUtils->getBBInfo();
- BBInfo[MBB->getNumber()].Size -= 2;
if (Br.MI->getOpcode() == ARM::tBcc) {
Br.MI->eraseFromParent();
Br.MI = NewBR;
- } else if (&MBB->back() != Br.MI) {
- // We've generated an LE and already erased the original conditional
- // branch. The CBN?Z is now used to branch to the other successor, so an
- // unconditional branch terminator is now redundant.
+ BBUtils->adjustBBSize(MBB, -2);
+ } else if (MBB->back().getOpcode() != ARM::t2LE) {
+ // An LE has been generated, but it's not the terminator - that is an
+ // unconditional branch. However, the logic has now been reversed with the
+ // CBN?Z being the conditional branch and the LE being the unconditional
+ // branch. So this means we can remove the redundant unconditional branch
+ // at the end of the block.
MachineInstr *LastMI = &MBB->back();
- if (LastMI != Br.MI) {
- BBInfo[MBB->getNumber()].Size -= LastMI->getDesc().getSize();
- LastMI->eraseFromParent();
- }
+ BBUtils->adjustBBSize(MBB, -LastMI->getDesc().getSize());
+ LastMI->eraseFromParent();
}
BBUtils->adjustBBOffsetsAfter(MBB);
++NumCBZ;
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index 563fdda56104..2c3ac816219f 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -1213,9 +1213,10 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
MBBI = NewMI;
return true;
}
+ case ARM::VMOVHcc:
case ARM::VMOVScc:
case ARM::VMOVDcc: {
- unsigned newOpc = Opcode == ARM::VMOVScc ? ARM::VMOVS : ARM::VMOVD;
+ unsigned newOpc = Opcode != ARM::VMOVDcc ? ARM::VMOVS : ARM::VMOVD;
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(newOpc),
MI.getOperand(1).getReg())
.add(MI.getOperand(2))
@@ -1951,6 +1952,24 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
MI.eraseFromParent();
return true;
}
+ case ARM::LOADDUAL:
+ case ARM::STOREDUAL: {
+ Register PairReg = MI.getOperand(0).getReg();
+
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(Opcode == ARM::LOADDUAL ? ARM::LDRD : ARM::STRD))
+ .addReg(TRI->getSubReg(PairReg, ARM::gsub_0),
+ Opcode == ARM::LOADDUAL ? RegState::Define : 0)
+ .addReg(TRI->getSubReg(PairReg, ARM::gsub_1),
+ Opcode == ARM::LOADDUAL ? RegState::Define : 0);
+ for (unsigned i = 1; i < MI.getNumOperands(); i++)
+ MIB.add(MI.getOperand(i));
+ MIB.add(predOps(ARMCC::AL));
+ MIB.cloneMemRefs(MI);
+ MI.eraseFromParent();
+ return true;
+ }
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMFastISel.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMFastISel.cpp
index 1fc5ff6921c6..6e19db3c7e22 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMFastISel.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMFastISel.cpp
@@ -1879,6 +1879,8 @@ CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC,
report_fatal_error("Can't return in GHC call convention");
else
return CC_ARM_APCS_GHC;
+ case CallingConv::CFGuard_Check:
+ return (Return ? RetCC_ARM_AAPCS : CC_ARM_Win32_CFGuard_Check);
}
}
@@ -2564,8 +2566,12 @@ bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) {
return SelectCall(&I, "memset");
}
case Intrinsic::trap: {
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(
- Subtarget->useNaClTrap() ? ARM::TRAPNaCl : ARM::TRAP));
+ unsigned Opcode;
+ if (Subtarget->isThumb())
+ Opcode = ARM::tTRAP;
+ else
+ Opcode = Subtarget->useNaClTrap() ? ARM::TRAPNaCl : ARM::TRAP;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opcode));
return true;
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMFrameLowering.cpp
index 01ae93086dcb..cb98b2b34efd 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMFrameLowering.cpp
@@ -2128,10 +2128,16 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
AFI->setLRIsSpilledForFarJump(true);
}
AFI->setLRIsSpilled(SavedRegs.test(ARM::LR));
+}
+
+void ARMFrameLowering::getCalleeSaves(const MachineFunction &MF,
+ BitVector &SavedRegs) const {
+ TargetFrameLowering::getCalleeSaves(MF, SavedRegs);
// If we have the "returned" parameter attribute which guarantees that we
// return the value which was passed in r0 unmodified (e.g. C++ 'structors),
// record that fact for IPRA.
+ const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
if (AFI->getPreservesR0())
SavedRegs.set(ARM::R0);
}
@@ -2418,7 +2424,8 @@ void ARMFrameLowering::adjustForSegmentedStacks(
} else {
// Get TLS base address from the coprocessor
// mrc p15, #0, SR0, c13, c0, #3
- BuildMI(McrMBB, DL, TII.get(ARM::MRC), ScratchReg0)
+ BuildMI(McrMBB, DL, TII.get(Thumb ? ARM::t2MRC : ARM::MRC),
+ ScratchReg0)
.addImm(15)
.addImm(0)
.addImm(13)
@@ -2432,7 +2439,8 @@ void ARMFrameLowering::adjustForSegmentedStacks(
// Get the stack limit from the right offset
// ldr SR0, [sr0, #4 * TlsOffset]
- BuildMI(GetMBB, DL, TII.get(ARM::LDRi12), ScratchReg0)
+ BuildMI(GetMBB, DL, TII.get(Thumb ? ARM::t2LDRi12 : ARM::LDRi12),
+ ScratchReg0)
.addReg(ScratchReg0)
.addImm(4 * TlsOffset)
.add(predOps(ARMCC::AL));
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMFrameLowering.h b/contrib/llvm-project/llvm/lib/Target/ARM/ARMFrameLowering.h
index 6d8aee597945..0462b01af707 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMFrameLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMFrameLowering.h
@@ -53,6 +53,8 @@ public:
int ResolveFrameIndexReference(const MachineFunction &MF, int FI,
unsigned &FrameReg, int SPAdj) const;
+ void getCalleeSaves(const MachineFunction &MF,
+ BitVector &SavedRegs) const override;
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
RegScavenger *RS) const override;
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMHazardRecognizer.h b/contrib/llvm-project/llvm/lib/Target/ARM/ARMHazardRecognizer.h
index b5ac694e01f7..ca02cc739e11 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMHazardRecognizer.h
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMHazardRecognizer.h
@@ -27,14 +27,13 @@ class MachineInstr;
/// ARM preRA scheduler uses an unspecialized instance of the
/// ScoreboardHazardRecognizer.
class ARMHazardRecognizer : public ScoreboardHazardRecognizer {
- MachineInstr *LastMI;
- unsigned FpMLxStalls;
+ MachineInstr *LastMI = nullptr;
+ unsigned FpMLxStalls = 0;
public:
ARMHazardRecognizer(const InstrItineraryData *ItinData,
const ScheduleDAG *DAG)
- : ScoreboardHazardRecognizer(ItinData, DAG, "post-RA-sched"),
- LastMI(nullptr) {}
+ : ScoreboardHazardRecognizer(ItinData, DAG, "post-RA-sched") {}
HazardType getHazardType(SUnit *SU, int Stalls) override;
void Reset() override;
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 8f6515c423eb..76a9ac12062d 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -28,6 +28,7 @@
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsARM.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -144,6 +145,8 @@ public:
// Thumb 2 Addressing Modes:
bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
+ template <unsigned Shift>
+ bool SelectT2AddrModeImm8(SDValue N, SDValue &Base, SDValue &OffImm);
bool SelectT2AddrModeImm8(SDValue N, SDValue &Base,
SDValue &OffImm);
bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
@@ -158,6 +161,9 @@ public:
SDValue &OffReg, SDValue &ShImm);
bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm);
+ template<int Min, int Max>
+ bool SelectImmediateInRange(SDValue N, SDValue &OffImm);
+
inline bool is_so_imm(unsigned Imm) const {
return ARM_AM::getSOImmVal(Imm) != -1;
}
@@ -209,6 +215,59 @@ private:
unsigned NumVecs, const uint16_t *DOpcodes,
const uint16_t *QOpcodes);
+ /// Helper functions for setting up clusters of MVE predication operands.
+ template <typename SDValueVector>
+ void AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
+ SDValue PredicateMask);
+ template <typename SDValueVector>
+ void AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
+ SDValue PredicateMask, SDValue Inactive);
+
+ template <typename SDValueVector>
+ void AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc);
+ template <typename SDValueVector>
+ void AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, EVT InactiveTy);
+
+ /// SelectMVE_WB - Select MVE writeback load/store intrinsics.
+ void SelectMVE_WB(SDNode *N, const uint16_t *Opcodes, bool Predicated);
+
+ /// SelectMVE_LongShift - Select MVE 64-bit scalar shift intrinsics.
+ void SelectMVE_LongShift(SDNode *N, uint16_t Opcode, bool Immediate,
+ bool HasSaturationOperand);
+
+ /// SelectMVE_VADCSBC - Select MVE vector add/sub-with-carry intrinsics.
+ void SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry,
+ uint16_t OpcodeWithNoCarry, bool Add, bool Predicated);
+
+ /// Select long MVE vector reductions with two vector operands
+ /// Stride is the number of vector element widths the instruction can operate
+ /// on:
+ /// 2 for long non-rounding variants, vml{a,s}ldav[a][x]: [i16, i32]
+ /// 1 for long rounding variants: vrml{a,s}ldavh[a][x]: [i32]
+ /// Stride is used when addressing the OpcodesS array which contains multiple
+ /// opcodes for each element width.
+ /// TySize is the index into the list of element types listed above
+ void SelectBaseMVE_VMLLDAV(SDNode *N, bool Predicated,
+ const uint16_t *OpcodesS, const uint16_t *OpcodesU,
+ size_t Stride, size_t TySize);
+
+ /// Select a 64-bit MVE vector reduction with two vector operands
+ /// arm_mve_vmlldava_[predicated]
+ void SelectMVE_VMLLDAV(SDNode *N, bool Predicated, const uint16_t *OpcodesS,
+ const uint16_t *OpcodesU);
+ /// Select a 72-bit MVE vector rounding reduction with two vector operands
+ /// int_arm_mve_vrmlldavha[_predicated]
+ void SelectMVE_VRMLLDAVH(SDNode *N, bool Predicated, const uint16_t *OpcodesS,
+ const uint16_t *OpcodesU);
+
+ /// SelectMVE_VLD - Select MVE interleaving load intrinsics. NumVecs
+ /// should be 2 or 4. The opcode array specifies the instructions
+ /// used for 8, 16 and 32-bit lane sizes respectively, and each
+ /// pointer points to a set of NumVecs sub-opcodes used for the
+ /// different stages (e.g. VLD20 versus VLD21) of each load family.
+ void SelectMVE_VLD(SDNode *N, unsigned NumVecs,
+ const uint16_t *const *Opcodes);
+
/// SelectVLDDup - Select NEON load-duplicate intrinsics. NumVecs
/// should be 1, 2, 3 or 4. The opcode array specifies the instructions used
/// for loading D registers.
@@ -1237,6 +1296,33 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
return true;
}
+template <unsigned Shift>
+bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, SDValue &Base,
+ SDValue &OffImm) {
+ if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) {
+ int RHSC;
+ if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -255, 256, RHSC)) {
+ Base = N.getOperand(0);
+ if (Base.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(
+ FI, TLI->getPointerTy(CurDAG->getDataLayout()));
+ }
+
+ if (N.getOpcode() == ISD::SUB)
+ RHSC = -RHSC;
+ OffImm =
+ CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32);
+ return true;
+ }
+ }
+
+ // Base only.
+ Base = N;
+ OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
+ return true;
+}
+
bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
SDValue &Base, SDValue &OffImm) {
// Match simple R - imm8 operands.
@@ -1319,11 +1405,27 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N,
SDValue &OffImm,
unsigned Shift) {
unsigned Opcode = Op->getOpcode();
- ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
- ? cast<LoadSDNode>(Op)->getAddressingMode()
- : cast<StoreSDNode>(Op)->getAddressingMode();
+ ISD::MemIndexedMode AM;
+ switch (Opcode) {
+ case ISD::LOAD:
+ AM = cast<LoadSDNode>(Op)->getAddressingMode();
+ break;
+ case ISD::STORE:
+ AM = cast<StoreSDNode>(Op)->getAddressingMode();
+ break;
+ case ISD::MLOAD:
+ AM = cast<MaskedLoadSDNode>(Op)->getAddressingMode();
+ break;
+ case ISD::MSTORE:
+ AM = cast<MaskedStoreSDNode>(Op)->getAddressingMode();
+ break;
+ default:
+ llvm_unreachable("Unexpected Opcode for Imm7Offset");
+ }
+
int RHSC;
- if (isScaledConstantInRange(N, 1 << Shift, 0, 0x80, RHSC)) { // 7 bits.
+ // 7 bit constant, shifted by Shift.
+ if (isScaledConstantInRange(N, 1 << Shift, 0, 0x80, RHSC)) {
OffImm =
((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
? CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32)
@@ -1334,6 +1436,16 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N,
return false;
}
+template <int Min, int Max>
+bool ARMDAGToDAGISel::SelectImmediateInRange(SDValue N, SDValue &OffImm) {
+ int Val;
+ if (isScaledConstantInRange(N, 1, Min, Max, Val)) {
+ OffImm = CurDAG->getTargetConstant(Val, SDLoc(N), MVT::i32);
+ return true;
+ }
+ return false;
+}
+
bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
SDValue &Base,
SDValue &OffReg, SDValue &ShImm) {
@@ -1593,58 +1705,93 @@ bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) {
}
bool ARMDAGToDAGISel::tryMVEIndexedLoad(SDNode *N) {
- LoadSDNode *LD = cast<LoadSDNode>(N);
- ISD::MemIndexedMode AM = LD->getAddressingMode();
- if (AM == ISD::UNINDEXED)
- return false;
- EVT LoadedVT = LD->getMemoryVT();
- if (!LoadedVT.isVector())
- return false;
- bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
- SDValue Offset;
- bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
+ EVT LoadedVT;
unsigned Opcode = 0;
- unsigned Align = LD->getAlignment();
- bool IsLE = Subtarget->isLittle();
+ bool isSExtLd, isPre;
+ unsigned Align;
+ ARMVCC::VPTCodes Pred;
+ SDValue PredReg;
+ SDValue Chain, Base, Offset;
+
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+ ISD::MemIndexedMode AM = LD->getAddressingMode();
+ if (AM == ISD::UNINDEXED)
+ return false;
+ LoadedVT = LD->getMemoryVT();
+ if (!LoadedVT.isVector())
+ return false;
+
+ Chain = LD->getChain();
+ Base = LD->getBasePtr();
+ Offset = LD->getOffset();
+ Align = LD->getAlignment();
+ isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
+ isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
+ Pred = ARMVCC::None;
+ PredReg = CurDAG->getRegister(0, MVT::i32);
+ } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
+ ISD::MemIndexedMode AM = LD->getAddressingMode();
+ if (AM == ISD::UNINDEXED)
+ return false;
+ LoadedVT = LD->getMemoryVT();
+ if (!LoadedVT.isVector())
+ return false;
+ Chain = LD->getChain();
+ Base = LD->getBasePtr();
+ Offset = LD->getOffset();
+ Align = LD->getAlignment();
+ isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
+ isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
+ Pred = ARMVCC::Then;
+ PredReg = LD->getMask();
+ } else
+ llvm_unreachable("Expected a Load or a Masked Load!");
+
+ // We allow LE non-masked loads to change the type (for example use a vldrb.8
+ // as opposed to a vldrw.32). This can allow extra addressing modes or
+ // alignments for what is otherwise an equivalent instruction.
+ bool CanChangeType = Subtarget->isLittle() && !isa<MaskedLoadSDNode>(N);
+
+ SDValue NewOffset;
if (Align >= 2 && LoadedVT == MVT::v4i16 &&
- SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 1)) {
+ SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 1)) {
if (isSExtLd)
Opcode = isPre ? ARM::MVE_VLDRHS32_pre : ARM::MVE_VLDRHS32_post;
else
Opcode = isPre ? ARM::MVE_VLDRHU32_pre : ARM::MVE_VLDRHU32_post;
} else if (LoadedVT == MVT::v8i8 &&
- SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 0)) {
+ SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) {
if (isSExtLd)
Opcode = isPre ? ARM::MVE_VLDRBS16_pre : ARM::MVE_VLDRBS16_post;
else
Opcode = isPre ? ARM::MVE_VLDRBU16_pre : ARM::MVE_VLDRBU16_post;
} else if (LoadedVT == MVT::v4i8 &&
- SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 0)) {
+ SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) {
if (isSExtLd)
Opcode = isPre ? ARM::MVE_VLDRBS32_pre : ARM::MVE_VLDRBS32_post;
else
Opcode = isPre ? ARM::MVE_VLDRBU32_pre : ARM::MVE_VLDRBU32_post;
} else if (Align >= 4 &&
- (IsLE || LoadedVT == MVT::v4i32 || LoadedVT == MVT::v4f32) &&
- SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 2))
+ (CanChangeType || LoadedVT == MVT::v4i32 ||
+ LoadedVT == MVT::v4f32) &&
+ SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 2))
Opcode = isPre ? ARM::MVE_VLDRWU32_pre : ARM::MVE_VLDRWU32_post;
else if (Align >= 2 &&
- (IsLE || LoadedVT == MVT::v8i16 || LoadedVT == MVT::v8f16) &&
- SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 1))
+ (CanChangeType || LoadedVT == MVT::v8i16 ||
+ LoadedVT == MVT::v8f16) &&
+ SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 1))
Opcode = isPre ? ARM::MVE_VLDRHU16_pre : ARM::MVE_VLDRHU16_post;
- else if ((IsLE || LoadedVT == MVT::v16i8) &&
- SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 0))
+ else if ((CanChangeType || LoadedVT == MVT::v16i8) &&
+ SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0))
Opcode = isPre ? ARM::MVE_VLDRBU8_pre : ARM::MVE_VLDRBU8_post;
else
return false;
- SDValue Chain = LD->getChain();
- SDValue Base = LD->getBasePtr();
- SDValue Ops[] = {Base, Offset,
- CurDAG->getTargetConstant(ARMVCC::None, SDLoc(N), MVT::i32),
- CurDAG->getRegister(0, MVT::i32), Chain};
- SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), LD->getValueType(0),
+ SDValue Ops[] = {Base, NewOffset,
+ CurDAG->getTargetConstant(Pred, SDLoc(N), MVT::i32), PredReg,
+ Chain};
+ SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), N->getValueType(0),
MVT::i32, MVT::Other, Ops);
transferMemOperands(N, New);
ReplaceUses(SDValue(N, 0), SDValue(New, 1));
@@ -2304,6 +2451,268 @@ void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
CurDAG->RemoveDeadNode(N);
}
+template <typename SDValueVector>
+void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
+ SDValue PredicateMask) {
+ Ops.push_back(CurDAG->getTargetConstant(ARMVCC::Then, Loc, MVT::i32));
+ Ops.push_back(PredicateMask);
+}
+
+template <typename SDValueVector>
+void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
+ SDValue PredicateMask,
+ SDValue Inactive) {
+ Ops.push_back(CurDAG->getTargetConstant(ARMVCC::Then, Loc, MVT::i32));
+ Ops.push_back(PredicateMask);
+ Ops.push_back(Inactive);
+}
+
+template <typename SDValueVector>
+void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc) {
+ Ops.push_back(CurDAG->getTargetConstant(ARMVCC::None, Loc, MVT::i32));
+ Ops.push_back(CurDAG->getRegister(0, MVT::i32));
+}
+
+template <typename SDValueVector>
+void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
+ EVT InactiveTy) {
+ Ops.push_back(CurDAG->getTargetConstant(ARMVCC::None, Loc, MVT::i32));
+ Ops.push_back(CurDAG->getRegister(0, MVT::i32));
+ Ops.push_back(SDValue(
+ CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, Loc, InactiveTy), 0));
+}
+
+void ARMDAGToDAGISel::SelectMVE_WB(SDNode *N, const uint16_t *Opcodes,
+ bool Predicated) {
+ SDLoc Loc(N);
+ SmallVector<SDValue, 8> Ops;
+
+ uint16_t Opcode;
+ switch (N->getValueType(1).getVectorElementType().getSizeInBits()) {
+ case 32:
+ Opcode = Opcodes[0];
+ break;
+ case 64:
+ Opcode = Opcodes[1];
+ break;
+ default:
+ llvm_unreachable("bad vector element size in SelectMVE_WB");
+ }
+
+ Ops.push_back(N->getOperand(2)); // vector of base addresses
+
+ int32_t ImmValue = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue();
+ Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate offset
+
+ if (Predicated)
+ AddMVEPredicateToOps(Ops, Loc, N->getOperand(4));
+ else
+ AddEmptyMVEPredicateToOps(Ops, Loc);
+
+ Ops.push_back(N->getOperand(0)); // chain
+
+ CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), makeArrayRef(Ops));
+}
+
+void ARMDAGToDAGISel::SelectMVE_LongShift(SDNode *N, uint16_t Opcode,
+ bool Immediate,
+ bool HasSaturationOperand) {
+ SDLoc Loc(N);
+ SmallVector<SDValue, 8> Ops;
+
+ // Two 32-bit halves of the value to be shifted
+ Ops.push_back(N->getOperand(1));
+ Ops.push_back(N->getOperand(2));
+
+ // The shift count
+ if (Immediate) {
+ int32_t ImmValue = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue();
+ Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate shift count
+ } else {
+ Ops.push_back(N->getOperand(3));
+ }
+
+ // The immediate saturation operand, if any
+ if (HasSaturationOperand) {
+ int32_t SatOp = cast<ConstantSDNode>(N->getOperand(4))->getZExtValue();
+ int SatBit = (SatOp == 64 ? 0 : 1);
+ Ops.push_back(getI32Imm(SatBit, Loc));
+ }
+
+ // MVE scalar shifts are IT-predicable, so include the standard
+ // predicate arguments.
+ Ops.push_back(getAL(CurDAG, Loc));
+ Ops.push_back(CurDAG->getRegister(0, MVT::i32));
+
+ CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), makeArrayRef(Ops));
+}
+
+void ARMDAGToDAGISel::SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry,
+ uint16_t OpcodeWithNoCarry,
+ bool Add, bool Predicated) {
+ SDLoc Loc(N);
+ SmallVector<SDValue, 8> Ops;
+ uint16_t Opcode;
+
+ unsigned FirstInputOp = Predicated ? 2 : 1;
+
+ // Two input vectors and the input carry flag
+ Ops.push_back(N->getOperand(FirstInputOp));
+ Ops.push_back(N->getOperand(FirstInputOp + 1));
+ SDValue CarryIn = N->getOperand(FirstInputOp + 2);
+ ConstantSDNode *CarryInConstant = dyn_cast<ConstantSDNode>(CarryIn);
+ uint32_t CarryMask = 1 << 29;
+ uint32_t CarryExpected = Add ? 0 : CarryMask;
+ if (CarryInConstant &&
+ (CarryInConstant->getZExtValue() & CarryMask) == CarryExpected) {
+ Opcode = OpcodeWithNoCarry;
+ } else {
+ Ops.push_back(CarryIn);
+ Opcode = OpcodeWithCarry;
+ }
+
+ if (Predicated)
+ AddMVEPredicateToOps(Ops, Loc,
+ N->getOperand(FirstInputOp + 3), // predicate
+ N->getOperand(FirstInputOp - 1)); // inactive
+ else
+ AddEmptyMVEPredicateToOps(Ops, Loc, N->getValueType(0));
+
+ CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), makeArrayRef(Ops));
+}
+
+static bool SDValueToConstBool(SDValue SDVal) {
+ assert(isa<ConstantSDNode>(SDVal) && "expected a compile-time constant");
+ ConstantSDNode *SDValConstant = dyn_cast<ConstantSDNode>(SDVal);
+ uint64_t Value = SDValConstant->getZExtValue();
+ assert((Value == 0 || Value == 1) && "expected value 0 or 1");
+ return Value;
+}
+
+void ARMDAGToDAGISel::SelectBaseMVE_VMLLDAV(SDNode *N, bool Predicated,
+ const uint16_t *OpcodesS,
+ const uint16_t *OpcodesU,
+ size_t Stride, size_t TySize) {
+ assert(TySize < Stride && "Invalid TySize");
+ bool IsUnsigned = SDValueToConstBool(N->getOperand(1));
+ bool IsSub = SDValueToConstBool(N->getOperand(2));
+ bool IsExchange = SDValueToConstBool(N->getOperand(3));
+ if (IsUnsigned) {
+ assert(!IsSub &&
+ "Unsigned versions of vmlsldav[a]/vrmlsldavh[a] do not exist");
+ assert(!IsExchange &&
+ "Unsigned versions of vmlaldav[a]x/vrmlaldavh[a]x do not exist");
+ }
+
+ auto OpIsZero = [N](size_t OpNo) {
+ if (ConstantSDNode *OpConst = dyn_cast<ConstantSDNode>(N->getOperand(OpNo)))
+ if (OpConst->getZExtValue() == 0)
+ return true;
+ return false;
+ };
+
+ // If the input accumulator value is not zero, select an instruction with
+ // accumulator, otherwise select an instruction without accumulator
+ bool IsAccum = !(OpIsZero(4) && OpIsZero(5));
+
+ const uint16_t *Opcodes = IsUnsigned ? OpcodesU : OpcodesS;
+ if (IsSub)
+ Opcodes += 4 * Stride;
+ if (IsExchange)
+ Opcodes += 2 * Stride;
+ if (IsAccum)
+ Opcodes += Stride;
+ uint16_t Opcode = Opcodes[TySize];
+
+ SDLoc Loc(N);
+ SmallVector<SDValue, 8> Ops;
+ // Push the accumulator operands, if they are used
+ if (IsAccum) {
+ Ops.push_back(N->getOperand(4));
+ Ops.push_back(N->getOperand(5));
+ }
+ // Push the two vector operands
+ Ops.push_back(N->getOperand(6));
+ Ops.push_back(N->getOperand(7));
+
+ if (Predicated)
+ AddMVEPredicateToOps(Ops, Loc, N->getOperand(8));
+ else
+ AddEmptyMVEPredicateToOps(Ops, Loc);
+
+ CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), makeArrayRef(Ops));
+}
+
+void ARMDAGToDAGISel::SelectMVE_VMLLDAV(SDNode *N, bool Predicated,
+ const uint16_t *OpcodesS,
+ const uint16_t *OpcodesU) {
+ EVT VecTy = N->getOperand(6).getValueType();
+ size_t SizeIndex;
+ switch (VecTy.getVectorElementType().getSizeInBits()) {
+ case 16:
+ SizeIndex = 0;
+ break;
+ case 32:
+ SizeIndex = 1;
+ break;
+ default:
+ llvm_unreachable("bad vector element size");
+ }
+
+ SelectBaseMVE_VMLLDAV(N, Predicated, OpcodesS, OpcodesU, 2, SizeIndex);
+}
+
+void ARMDAGToDAGISel::SelectMVE_VRMLLDAVH(SDNode *N, bool Predicated,
+ const uint16_t *OpcodesS,
+ const uint16_t *OpcodesU) {
+ assert(
+ N->getOperand(6).getValueType().getVectorElementType().getSizeInBits() ==
+ 32 &&
+ "bad vector element size");
+ SelectBaseMVE_VMLLDAV(N, Predicated, OpcodesS, OpcodesU, 1, 0);
+}
+
+void ARMDAGToDAGISel::SelectMVE_VLD(SDNode *N, unsigned NumVecs,
+ const uint16_t *const *Opcodes) {
+ EVT VT = N->getValueType(0);
+ SDLoc Loc(N);
+
+ const uint16_t *OurOpcodes;
+ switch (VT.getVectorElementType().getSizeInBits()) {
+ case 8:
+ OurOpcodes = Opcodes[0];
+ break;
+ case 16:
+ OurOpcodes = Opcodes[1];
+ break;
+ case 32:
+ OurOpcodes = Opcodes[2];
+ break;
+ default:
+ llvm_unreachable("bad vector element size in SelectMVE_VLD");
+ }
+
+ EVT DataTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, NumVecs * 2);
+ EVT ResultTys[] = {DataTy, MVT::Other};
+
+ auto Data = SDValue(
+ CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, Loc, DataTy), 0);
+ SDValue Chain = N->getOperand(0);
+ for (unsigned Stage = 0; Stage < NumVecs; ++Stage) {
+ SDValue Ops[] = {Data, N->getOperand(2), Chain};
+ auto LoadInst =
+ CurDAG->getMachineNode(OurOpcodes[Stage], Loc, ResultTys, Ops);
+ Data = SDValue(LoadInst, 0);
+ Chain = SDValue(LoadInst, 1);
+ }
+
+ for (unsigned i = 0; i < NumVecs; i++)
+ ReplaceUses(SDValue(N, i),
+ CurDAG->getTargetExtractSubreg(ARM::qsub_0 + i, Loc, VT, Data));
+ ReplaceUses(SDValue(N, NumVecs), Chain);
+ CurDAG->RemoveDeadNode(N);
+}
+
void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool IsIntrinsic,
bool isUpdating, unsigned NumVecs,
const uint16_t *DOpcodes,
@@ -3089,6 +3498,11 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
// Other cases are autogenerated.
break;
}
+ case ISD::MLOAD:
+ if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N))
+ return;
+ // Other cases are autogenerated.
+ break;
case ARMISD::WLS:
case ARMISD::LE: {
SDValue Ops[] = { N->getOperand(1),
@@ -3101,6 +3515,26 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
CurDAG->RemoveDeadNode(N);
return;
}
+ case ARMISD::LDRD: {
+ if (Subtarget->isThumb2())
+ break; // TableGen handles isel in this case.
+ SDValue Base, RegOffset, ImmOffset;
+ const SDValue &Chain = N->getOperand(0);
+ const SDValue &Addr = N->getOperand(1);
+ SelectAddrMode3(Addr, Base, RegOffset, ImmOffset);
+ SDValue Ops[] = {Base, RegOffset, ImmOffset, Chain};
+ SDNode *New = CurDAG->getMachineNode(ARM::LOADDUAL, dl,
+ {MVT::Untyped, MVT::Other}, Ops);
+ SDValue Lo = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32,
+ SDValue(New, 0));
+ SDValue Hi = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,
+ SDValue(New, 0));
+ ReplaceUses(SDValue(N, 0), Lo);
+ ReplaceUses(SDValue(N, 1), Hi);
+ ReplaceUses(SDValue(N, 2), SDValue(New, 1));
+ CurDAG->RemoveDeadNode(N);
+ return;
+ }
case ARMISD::LOOP_DEC: {
SDValue Ops[] = { N->getOperand(1),
N->getOperand(2),
@@ -4028,6 +4462,117 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes);
return;
}
+
+ case Intrinsic::arm_mve_vldr_gather_base_wb:
+ case Intrinsic::arm_mve_vldr_gather_base_wb_predicated: {
+ static const uint16_t Opcodes[] = {ARM::MVE_VLDRWU32_qi_pre,
+ ARM::MVE_VLDRDU64_qi_pre};
+ SelectMVE_WB(N, Opcodes,
+ IntNo == Intrinsic::arm_mve_vldr_gather_base_wb_predicated);
+ return;
+ }
+
+ case Intrinsic::arm_mve_vld2q: {
+ static const uint16_t Opcodes8[] = {ARM::MVE_VLD20_8, ARM::MVE_VLD21_8};
+ static const uint16_t Opcodes16[] = {ARM::MVE_VLD20_16,
+ ARM::MVE_VLD21_16};
+ static const uint16_t Opcodes32[] = {ARM::MVE_VLD20_32,
+ ARM::MVE_VLD21_32};
+ static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
+ SelectMVE_VLD(N, 2, Opcodes);
+ return;
+ }
+
+ case Intrinsic::arm_mve_vld4q: {
+ static const uint16_t Opcodes8[] = {ARM::MVE_VLD40_8, ARM::MVE_VLD41_8,
+ ARM::MVE_VLD42_8, ARM::MVE_VLD43_8};
+ static const uint16_t Opcodes16[] = {ARM::MVE_VLD40_16, ARM::MVE_VLD41_16,
+ ARM::MVE_VLD42_16,
+ ARM::MVE_VLD43_16};
+ static const uint16_t Opcodes32[] = {ARM::MVE_VLD40_32, ARM::MVE_VLD41_32,
+ ARM::MVE_VLD42_32,
+ ARM::MVE_VLD43_32};
+ static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
+ SelectMVE_VLD(N, 4, Opcodes);
+ return;
+ }
+ }
+ break;
+ }
+
+ case ISD::INTRINSIC_WO_CHAIN: {
+ unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
+ switch (IntNo) {
+ default:
+ break;
+
+ case Intrinsic::arm_mve_urshrl:
+ SelectMVE_LongShift(N, ARM::MVE_URSHRL, true, false);
+ return;
+ case Intrinsic::arm_mve_uqshll:
+ SelectMVE_LongShift(N, ARM::MVE_UQSHLL, true, false);
+ return;
+ case Intrinsic::arm_mve_srshrl:
+ SelectMVE_LongShift(N, ARM::MVE_SRSHRL, true, false);
+ return;
+ case Intrinsic::arm_mve_sqshll:
+ SelectMVE_LongShift(N, ARM::MVE_SQSHLL, true, false);
+ return;
+ case Intrinsic::arm_mve_uqrshll:
+ SelectMVE_LongShift(N, ARM::MVE_UQRSHLL, false, true);
+ return;
+ case Intrinsic::arm_mve_sqrshrl:
+ SelectMVE_LongShift(N, ARM::MVE_SQRSHRL, false, true);
+ return;
+ case Intrinsic::arm_mve_lsll:
+ SelectMVE_LongShift(N, ARM::MVE_LSLLr, false, false);
+ return;
+ case Intrinsic::arm_mve_asrl:
+ SelectMVE_LongShift(N, ARM::MVE_ASRLr, false, false);
+ return;
+
+ case Intrinsic::arm_mve_vadc:
+ case Intrinsic::arm_mve_vadc_predicated:
+ SelectMVE_VADCSBC(N, ARM::MVE_VADC, ARM::MVE_VADCI, true,
+ IntNo == Intrinsic::arm_mve_vadc_predicated);
+ return;
+
+ case Intrinsic::arm_mve_vmlldava:
+ case Intrinsic::arm_mve_vmlldava_predicated: {
+ static const uint16_t OpcodesU[] = {
+ ARM::MVE_VMLALDAVu16, ARM::MVE_VMLALDAVu32,
+ ARM::MVE_VMLALDAVau16, ARM::MVE_VMLALDAVau32,
+ };
+ static const uint16_t OpcodesS[] = {
+ ARM::MVE_VMLALDAVs16, ARM::MVE_VMLALDAVs32,
+ ARM::MVE_VMLALDAVas16, ARM::MVE_VMLALDAVas32,
+ ARM::MVE_VMLALDAVxs16, ARM::MVE_VMLALDAVxs32,
+ ARM::MVE_VMLALDAVaxs16, ARM::MVE_VMLALDAVaxs32,
+ ARM::MVE_VMLSLDAVs16, ARM::MVE_VMLSLDAVs32,
+ ARM::MVE_VMLSLDAVas16, ARM::MVE_VMLSLDAVas32,
+ ARM::MVE_VMLSLDAVxs16, ARM::MVE_VMLSLDAVxs32,
+ ARM::MVE_VMLSLDAVaxs16, ARM::MVE_VMLSLDAVaxs32,
+ };
+ SelectMVE_VMLLDAV(N, IntNo == Intrinsic::arm_mve_vmlldava_predicated,
+ OpcodesS, OpcodesU);
+ return;
+ }
+
+ case Intrinsic::arm_mve_vrmlldavha:
+ case Intrinsic::arm_mve_vrmlldavha_predicated: {
+ static const uint16_t OpcodesU[] = {
+ ARM::MVE_VRMLALDAVHu32, ARM::MVE_VRMLALDAVHau32,
+ };
+ static const uint16_t OpcodesS[] = {
+ ARM::MVE_VRMLALDAVHs32, ARM::MVE_VRMLALDAVHas32,
+ ARM::MVE_VRMLALDAVHxs32, ARM::MVE_VRMLALDAVHaxs32,
+ ARM::MVE_VRMLSLDAVHs32, ARM::MVE_VRMLSLDAVHas32,
+ ARM::MVE_VRMLSLDAVHxs32, ARM::MVE_VRMLSLDAVHaxs32,
+ };
+ SelectMVE_VRMLLDAVH(N, IntNo == Intrinsic::arm_mve_vrmlldavha_predicated,
+ OpcodesS, OpcodesU);
+ return;
+ }
}
break;
}
@@ -4551,10 +5096,6 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
switch(ConstraintID) {
default:
llvm_unreachable("Unexpected asm memory constraint");
- case InlineAsm::Constraint_i:
- // FIXME: It seems strange that 'i' is needed here since it's supposed to
- // be an immediate and not a memory constraint.
- LLVM_FALLTHROUGH;
case InlineAsm::Constraint_m:
case InlineAsm::Constraint_o:
case InlineAsm::Constraint_Q:
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.cpp
index db26feb57010..cf738cd66434 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -78,6 +78,7 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsARM.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
@@ -142,6 +143,11 @@ static cl::opt<unsigned> ConstpoolPromotionMaxTotal(
cl::desc("Maximum size of ALL constants to promote into a constant pool"),
cl::init(128));
+static cl::opt<unsigned>
+MVEMaxSupportedInterleaveFactor("mve-max-interleave-factor", cl::Hidden,
+ cl::desc("Maximum interleave factor for MVE VLDn to generate."),
+ cl::init(2));
+
// The APCS parameter registers.
static const MCPhysReg GPRArgRegs[] = {
ARM::R0, ARM::R1, ARM::R2, ARM::R3
@@ -209,6 +215,9 @@ void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT,
VT != MVT::v2i64 && VT != MVT::v1i64)
for (auto Opcode : {ISD::ABS, ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
setOperationAction(Opcode, VT, Legal);
+ if (!VT.isFloatingPoint())
+ for (auto Opcode : {ISD::SADDSAT, ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT})
+ setOperationAction(Opcode, VT, Legal);
}
void ARMTargetLowering::addDRTypeForNEON(MVT VT) {
@@ -296,6 +305,8 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
setIndexedLoadAction(im, VT, Legal);
setIndexedStoreAction(im, VT, Legal);
+ setIndexedMaskedLoadAction(im, VT, Legal);
+ setIndexedMaskedStoreAction(im, VT, Legal);
}
}
@@ -322,6 +333,8 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
setIndexedLoadAction(im, VT, Legal);
setIndexedStoreAction(im, VT, Legal);
+ setIndexedMaskedLoadAction(im, VT, Legal);
+ setIndexedMaskedStoreAction(im, VT, Legal);
}
if (HasMVEFP) {
@@ -366,6 +379,13 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
addAllExtLoads(MVT::v4i32, MVT::v4i16, Legal);
addAllExtLoads(MVT::v4i32, MVT::v4i8, Legal);
+ // It is legal to sign extend from v4i8/v4i16 to v4i32 or v8i8 to v8i16.
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Legal);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Legal);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Legal);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v8i8, Legal);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v8i16, Legal);
+
// Some truncating stores are legal too.
setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal);
@@ -374,12 +394,12 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
// Pre and Post inc on these are legal, given the correct extends
for (unsigned im = (unsigned)ISD::PRE_INC;
im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
- setIndexedLoadAction(im, MVT::v8i8, Legal);
- setIndexedStoreAction(im, MVT::v8i8, Legal);
- setIndexedLoadAction(im, MVT::v4i8, Legal);
- setIndexedStoreAction(im, MVT::v4i8, Legal);
- setIndexedLoadAction(im, MVT::v4i16, Legal);
- setIndexedStoreAction(im, MVT::v4i16, Legal);
+ for (auto VT : {MVT::v8i8, MVT::v4i8, MVT::v4i16}) {
+ setIndexedLoadAction(im, VT, Legal);
+ setIndexedStoreAction(im, VT, Legal);
+ setIndexedMaskedLoadAction(im, VT, Legal);
+ setIndexedMaskedStoreAction(im, VT, Legal);
+ }
}
// Predicate types
@@ -446,7 +466,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
{ RTLIB::OGE_F32, "__gesf2vfp", ISD::SETNE },
{ RTLIB::OGT_F32, "__gtsf2vfp", ISD::SETNE },
{ RTLIB::UO_F32, "__unordsf2vfp", ISD::SETNE },
- { RTLIB::O_F32, "__unordsf2vfp", ISD::SETEQ },
// Double-precision comparisons.
{ RTLIB::OEQ_F64, "__eqdf2vfp", ISD::SETNE },
@@ -456,7 +475,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
{ RTLIB::OGE_F64, "__gedf2vfp", ISD::SETNE },
{ RTLIB::OGT_F64, "__gtdf2vfp", ISD::SETNE },
{ RTLIB::UO_F64, "__unorddf2vfp", ISD::SETNE },
- { RTLIB::O_F64, "__unorddf2vfp", ISD::SETEQ },
// Floating-point to integer conversions.
// i64 conversions are done via library routines even when generating VFP
@@ -520,7 +538,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
{ RTLIB::OGE_F64, "__aeabi_dcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
{ RTLIB::OGT_F64, "__aeabi_dcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
{ RTLIB::UO_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
- { RTLIB::O_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ },
// Single-precision floating-point arithmetic helper functions
// RTABI chapter 4.1.2, Table 4
@@ -538,7 +555,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
{ RTLIB::OGE_F32, "__aeabi_fcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
{ RTLIB::OGT_F32, "__aeabi_fcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
{ RTLIB::UO_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
- { RTLIB::O_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ },
// Floating-point to integer conversions.
// RTABI chapter 4.1.2, Table 6
@@ -964,19 +980,26 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FP_TO_SINT, MVT::f64, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::f64, Custom);
setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::f64, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::f64, Custom);
+ setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Custom);
}
if (!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) {
setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom);
- if (Subtarget->hasFullFP16())
+ setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Custom);
+ if (Subtarget->hasFullFP16()) {
setOperationAction(ISD::FP_ROUND, MVT::f16, Custom);
+ setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom);
+ }
}
- if (!Subtarget->hasFP16())
+ if (!Subtarget->hasFP16()) {
setOperationAction(ISD::FP_EXTEND, MVT::f32, Custom);
-
- if (!Subtarget->hasFP64())
- setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
+ setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Custom);
+ }
computeRegisterProperties(Subtarget->getRegisterInfo());
@@ -1050,6 +1073,8 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SRA, MVT::i64, Custom);
setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
+ setOperationAction(ISD::LOAD, MVT::i64, Custom);
+ setOperationAction(ISD::STORE, MVT::i64, Custom);
// MVE lowers 64 bit shifts to lsll and lsrl
// assuming that ISD::SRL and SRA of i64 are already marked custom
@@ -1170,9 +1195,11 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
}
- if (Subtarget->isTargetWindows() && Subtarget->getTargetTriple().isOSMSVCRT())
- for (auto &VT : {MVT::f32, MVT::f64})
- setOperationAction(ISD::FPOWI, VT, Custom);
+ if (Subtarget->getTargetTriple().isOSMSVCRT()) {
+ // MSVCRT doesn't have powi; fall back to pow
+ setLibcallName(RTLIB::POWI_F32, nullptr);
+ setLibcallName(RTLIB::POWI_F64, nullptr);
+ }
setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
@@ -1571,6 +1598,9 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::PRELOAD: return "ARMISD::PRELOAD";
+ case ARMISD::LDRD: return "ARMISD::LDRD";
+ case ARMISD::STRD: return "ARMISD::STRD";
+
case ARMISD::WIN__CHKSTK: return "ARMISD::WIN__CHKSTK";
case ARMISD::WIN__DBZCHK: return "ARMISD::WIN__DBZCHK";
@@ -1855,6 +1885,7 @@ ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC,
case CallingConv::ARM_AAPCS:
case CallingConv::ARM_APCS:
case CallingConv::GHC:
+ case CallingConv::CFGuard_Check:
return CC;
case CallingConv::PreserveMost:
return CallingConv::PreserveMost;
@@ -1914,6 +1945,8 @@ CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC);
case CallingConv::PreserveMost:
return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
+ case CallingConv::CFGuard_Check:
+ return (Return ? RetCC_ARM_AAPCS : CC_ARM_Win32_CFGuard_Check);
}
}
@@ -2062,11 +2095,10 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
MachineFunction::CallSiteInfo CSInfo;
bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
bool isThisReturn = false;
- auto Attr = MF.getFunction().getFnAttribute("disable-tail-calls");
bool PreferIndirect = false;
// Disable tail calls if they're not supported.
- if (!Subtarget->supportsTailCall() || Attr.getValueAsString() == "true")
+ if (!Subtarget->supportsTailCall())
isTailCall = false;
if (isa<GlobalAddressSDNode>(Callee)) {
@@ -2331,12 +2363,14 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
} else if (Subtarget->isTargetCOFF()) {
assert(Subtarget->isTargetWindows() &&
"Windows is the only supported COFF target");
- unsigned TargetFlags = GV->hasDLLImportStorageClass()
- ? ARMII::MO_DLLIMPORT
- : ARMII::MO_NO_FLAG;
+ unsigned TargetFlags = ARMII::MO_NO_FLAG;
+ if (GV->hasDLLImportStorageClass())
+ TargetFlags = ARMII::MO_DLLIMPORT;
+ else if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
+ TargetFlags = ARMII::MO_COFFSTUB;
Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, /*offset=*/0,
TargetFlags);
- if (GV->hasDLLImportStorageClass())
+ if (TargetFlags & (ARMII::MO_DLLIMPORT | ARMII::MO_COFFSTUB))
Callee =
DAG.getLoad(PtrVt, dl, DAG.getEntryNode(),
DAG.getNode(ARMISD::Wrapper, dl, PtrVt, Callee),
@@ -2941,9 +2975,7 @@ bool ARMTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
if (!Subtarget->supportsTailCall())
return false;
- auto Attr =
- CI->getParent()->getParent()->getFnAttribute("disable-tail-calls");
- if (!CI->isTailCall() || Attr.getValueAsString() == "true")
+ if (!CI->isTailCall())
return false;
return true;
@@ -3629,6 +3661,49 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
EVT PtrVT = getPointerTy(DAG.getDataLayout());
return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
}
+ case Intrinsic::arm_cls: {
+ const SDValue &Operand = Op.getOperand(1);
+ const EVT VTy = Op.getValueType();
+ SDValue SRA =
+ DAG.getNode(ISD::SRA, dl, VTy, Operand, DAG.getConstant(31, dl, VTy));
+ SDValue XOR = DAG.getNode(ISD::XOR, dl, VTy, SRA, Operand);
+ SDValue SHL =
+ DAG.getNode(ISD::SHL, dl, VTy, XOR, DAG.getConstant(1, dl, VTy));
+ SDValue OR =
+ DAG.getNode(ISD::OR, dl, VTy, SHL, DAG.getConstant(1, dl, VTy));
+ SDValue Result = DAG.getNode(ISD::CTLZ, dl, VTy, OR);
+ return Result;
+ }
+ case Intrinsic::arm_cls64: {
+ // cls(x) = if cls(hi(x)) != 31 then cls(hi(x))
+ // else 31 + clz(if hi(x) == 0 then lo(x) else not(lo(x)))
+ const SDValue &Operand = Op.getOperand(1);
+ const EVT VTy = Op.getValueType();
+
+ SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VTy, Operand,
+ DAG.getConstant(1, dl, VTy));
+ SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VTy, Operand,
+ DAG.getConstant(0, dl, VTy));
+ SDValue Constant0 = DAG.getConstant(0, dl, VTy);
+ SDValue Constant1 = DAG.getConstant(1, dl, VTy);
+ SDValue Constant31 = DAG.getConstant(31, dl, VTy);
+ SDValue SRAHi = DAG.getNode(ISD::SRA, dl, VTy, Hi, Constant31);
+ SDValue XORHi = DAG.getNode(ISD::XOR, dl, VTy, SRAHi, Hi);
+ SDValue SHLHi = DAG.getNode(ISD::SHL, dl, VTy, XORHi, Constant1);
+ SDValue ORHi = DAG.getNode(ISD::OR, dl, VTy, SHLHi, Constant1);
+ SDValue CLSHi = DAG.getNode(ISD::CTLZ, dl, VTy, ORHi);
+ SDValue CheckLo =
+ DAG.getSetCC(dl, MVT::i1, CLSHi, Constant31, ISD::CondCode::SETEQ);
+ SDValue HiIsZero =
+ DAG.getSetCC(dl, MVT::i1, Hi, Constant0, ISD::CondCode::SETEQ);
+ SDValue AdjustedLo =
+ DAG.getSelect(dl, VTy, HiIsZero, Lo, DAG.getNOT(dl, Lo, VTy));
+ SDValue CLZAdjustedLo = DAG.getNode(ISD::CTLZ, dl, VTy, AdjustedLo);
+ SDValue Result =
+ DAG.getSelect(dl, VTy, CheckLo,
+ DAG.getNode(ISD::ADD, dl, VTy, CLZAdjustedLo, Constant31), CLSHi);
+ return Result;
+ }
case Intrinsic::eh_sjlj_lsda: {
MachineFunction &MF = DAG.getMachineFunction();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
@@ -3698,6 +3773,10 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
case Intrinsic::arm_neon_vtbl2:
return DAG.getNode(ARMISD::VTBL2, SDLoc(Op), Op.getValueType(),
Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
+ case Intrinsic::arm_mve_pred_i2v:
+ case Intrinsic::arm_mve_pred_v2i:
+ return DAG.getNode(ARMISD::PREDICATE_CAST, SDLoc(Op), Op.getValueType(),
+ Op.getOperand(1));
}
}
@@ -4887,7 +4966,7 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
Opcode = ARMISD::CSINC;
std::swap(TrueVal, FalseVal);
std::swap(TVal, FVal);
- CC = ISD::getSetCCInverse(CC, true);
+ CC = ISD::getSetCCInverse(CC, LHS.getValueType());
}
if (Opcode) {
@@ -4897,7 +4976,7 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
HasLowerConstantMaterializationCost(FVal, TVal, Subtarget)) {
std::swap(TrueVal, FalseVal);
std::swap(TVal, FVal);
- CC = ISD::getSetCCInverse(CC, true);
+ CC = ISD::getSetCCInverse(CC, LHS.getValueType());
}
// Attempt to use ZR checking TVal is 0, possibly inverting the condition
@@ -4906,7 +4985,7 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
if (FVal == 0 && Opcode != ARMISD::CSINC) {
std::swap(TrueVal, FalseVal);
std::swap(TVal, FVal);
- CC = ISD::getSetCCInverse(CC, true);
+ CC = ISD::getSetCCInverse(CC, LHS.getValueType());
}
if (TVal == 0)
TrueVal = DAG.getRegister(ARM::ZR, MVT::i32);
@@ -4950,7 +5029,7 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
if (CondCode == ARMCC::LT || CondCode == ARMCC::LE ||
CondCode == ARMCC::VC || CondCode == ARMCC::NE) {
- CC = ISD::getSetCCInverse(CC, true);
+ CC = ISD::getSetCCInverse(CC, LHS.getValueType());
std::swap(TrueVal, FalseVal);
}
}
@@ -5310,17 +5389,31 @@ SDValue ARMTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
if (VT.isVector())
return LowerVectorFP_TO_INT(Op, DAG);
- if (isUnsupportedFloatingType(Op.getOperand(0).getValueType())) {
+
+ bool IsStrict = Op->isStrictFPOpcode();
+ SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
+
+ if (isUnsupportedFloatingType(SrcVal.getValueType())) {
RTLIB::Libcall LC;
- if (Op.getOpcode() == ISD::FP_TO_SINT)
- LC = RTLIB::getFPTOSINT(Op.getOperand(0).getValueType(),
+ if (Op.getOpcode() == ISD::FP_TO_SINT ||
+ Op.getOpcode() == ISD::STRICT_FP_TO_SINT)
+ LC = RTLIB::getFPTOSINT(SrcVal.getValueType(),
Op.getValueType());
else
- LC = RTLIB::getFPTOUINT(Op.getOperand(0).getValueType(),
+ LC = RTLIB::getFPTOUINT(SrcVal.getValueType(),
Op.getValueType());
+ SDLoc Loc(Op);
MakeLibCallOptions CallOptions;
- return makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(0),
- CallOptions, SDLoc(Op)).first;
+ SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
+ SDValue Result;
+ std::tie(Result, Chain) = makeLibCall(DAG, LC, Op.getValueType(), SrcVal,
+ CallOptions, Loc, Chain);
+ return IsStrict ? DAG.getMergeValues({Result, Chain}, Loc) : Result;
+ }
+
+ // FIXME: Remove this when we have strict fp instruction selection patterns
+ if (IsStrict) {
+ DAG.mutateStrictFPToFP(Op.getNode());
}
return Op;
@@ -5517,7 +5610,7 @@ SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
// FIXME? Maybe this could be a TableGen attribute on some registers and
// this table could be generated automatically from RegInfo.
-Register ARMTargetLowering::getRegisterByName(const char* RegName, EVT VT,
+Register ARMTargetLowering::getRegisterByName(const char* RegName, LLT VT,
const MachineFunction &MF) const {
Register Reg = StringSwitch<unsigned>(RegName)
.Case("sp", ARM::SP)
@@ -7745,6 +7838,92 @@ static SDValue LowerVECTOR_SHUFFLE_i1(SDValue Op, SelectionDAG &DAG,
DAG.getConstant(ARMCC::NE, dl, MVT::i32));
}
+static SDValue LowerVECTOR_SHUFFLEUsingMovs(SDValue Op,
+ ArrayRef<int> ShuffleMask,
+ SelectionDAG &DAG) {
+ // Attempt to lower the vector shuffle using as many whole register movs as
+ // possible. This is useful for types smaller than 32bits, which would
+ // often otherwise become a series for grp movs.
+ SDLoc dl(Op);
+ EVT VT = Op.getValueType();
+ if (VT.getScalarSizeInBits() >= 32)
+ return SDValue();
+
+ assert((VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i8) &&
+ "Unexpected vector type");
+ int NumElts = VT.getVectorNumElements();
+ int QuarterSize = NumElts / 4;
+ // The four final parts of the vector, as i32's
+ SDValue Parts[4];
+
+ // Look for full lane vmovs like <0,1,2,3> or <u,5,6,7> etc, (but not
+ // <u,u,u,u>), returning the vmov lane index
+ auto getMovIdx = [](ArrayRef<int> ShuffleMask, int Start, int Length) {
+ // Detect which mov lane this would be from the first non-undef element.
+ int MovIdx = -1;
+ for (int i = 0; i < Length; i++) {
+ if (ShuffleMask[Start + i] >= 0) {
+ if (ShuffleMask[Start + i] % Length != i)
+ return -1;
+ MovIdx = ShuffleMask[Start + i] / Length;
+ break;
+ }
+ }
+ // If all items are undef, leave this for other combines
+ if (MovIdx == -1)
+ return -1;
+ // Check the remaining values are the correct part of the same mov
+ for (int i = 1; i < Length; i++) {
+ if (ShuffleMask[Start + i] >= 0 &&
+ (ShuffleMask[Start + i] / Length != MovIdx ||
+ ShuffleMask[Start + i] % Length != i))
+ return -1;
+ }
+ return MovIdx;
+ };
+
+ for (int Part = 0; Part < 4; ++Part) {
+ // Does this part look like a mov
+ int Elt = getMovIdx(ShuffleMask, Part * QuarterSize, QuarterSize);
+ if (Elt != -1) {
+ SDValue Input = Op->getOperand(0);
+ if (Elt >= 4) {
+ Input = Op->getOperand(1);
+ Elt -= 4;
+ }
+ SDValue BitCast = DAG.getBitcast(MVT::v4i32, Input);
+ Parts[Part] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, BitCast,
+ DAG.getConstant(Elt, dl, MVT::i32));
+ }
+ }
+
+ // Nothing interesting found, just return
+ if (!Parts[0] && !Parts[1] && !Parts[2] && !Parts[3])
+ return SDValue();
+
+ // The other parts need to be built with the old shuffle vector, cast to a
+ // v4i32 and extract_vector_elts
+ if (!Parts[0] || !Parts[1] || !Parts[2] || !Parts[3]) {
+ SmallVector<int, 16> NewShuffleMask;
+ for (int Part = 0; Part < 4; ++Part)
+ for (int i = 0; i < QuarterSize; i++)
+ NewShuffleMask.push_back(
+ Parts[Part] ? -1 : ShuffleMask[Part * QuarterSize + i]);
+ SDValue NewShuffle = DAG.getVectorShuffle(
+ VT, dl, Op->getOperand(0), Op->getOperand(1), NewShuffleMask);
+ SDValue BitCast = DAG.getBitcast(MVT::v4i32, NewShuffle);
+
+ for (int Part = 0; Part < 4; ++Part)
+ if (!Parts[Part])
+ Parts[Part] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
+ BitCast, DAG.getConstant(Part, dl, MVT::i32));
+ }
+ // Build a vector out of the various parts and bitcast it back to the original
+ // type.
+ SDValue NewVec = DAG.getBuildVector(MVT::v4i32, dl, Parts);
+ return DAG.getBitcast(VT, NewVec);
+}
+
static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
const ARMSubtarget *ST) {
SDValue V1 = Op.getOperand(0);
@@ -7939,6 +8118,10 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
if (SDValue NewOp = LowerVECTOR_SHUFFLEv8i8(Op, ShuffleMask, DAG))
return NewOp;
+ if (ST->hasMVEIntegerOps())
+ if (SDValue NewOp = LowerVECTOR_SHUFFLEUsingMovs(Op, ShuffleMask, DAG))
+ return NewOp;
+
return SDValue();
}
@@ -8905,6 +9088,24 @@ static SDValue LowerPredicateLoad(SDValue Op, SelectionDAG &DAG) {
return DAG.getMergeValues({Pred, Load.getValue(1)}, dl);
}
+void ARMTargetLowering::LowerLOAD(SDNode *N, SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG) const {
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ EVT MemVT = LD->getMemoryVT();
+ assert(LD->isUnindexed() && "Loads should be unindexed at this point.");
+
+ if (MemVT == MVT::i64 && Subtarget->hasV5TEOps() &&
+ !Subtarget->isThumb1Only() && LD->isVolatile()) {
+ SDLoc dl(N);
+ SDValue Result = DAG.getMemIntrinsicNode(
+ ARMISD::LDRD, dl, DAG.getVTList({MVT::i32, MVT::i32, MVT::Other}),
+ {LD->getChain(), LD->getBasePtr()}, MemVT, LD->getMemOperand());
+ SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64,
+ Result.getValue(0), Result.getValue(1));
+ Results.append({Pair, Result.getValue(2)});
+ }
+}
+
static SDValue LowerPredicateStore(SDValue Op, SelectionDAG &DAG) {
StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
EVT MemVT = ST->getMemoryVT();
@@ -8934,6 +9135,40 @@ static SDValue LowerPredicateStore(SDValue Op, SelectionDAG &DAG) {
ST->getMemOperand());
}
+static SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG,
+ const ARMSubtarget *Subtarget) {
+ StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
+ EVT MemVT = ST->getMemoryVT();
+ assert(ST->isUnindexed() && "Stores should be unindexed at this point.");
+
+ if (MemVT == MVT::i64 && Subtarget->hasV5TEOps() &&
+ !Subtarget->isThumb1Only() && ST->isVolatile()) {
+ SDNode *N = Op.getNode();
+ SDLoc dl(N);
+
+ SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, ST->getValue(),
+ DAG.getTargetConstant(0, dl, MVT::i32));
+ SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, ST->getValue(),
+ DAG.getTargetConstant(1, dl, MVT::i32));
+
+ return DAG.getMemIntrinsicNode(ARMISD::STRD, dl, DAG.getVTList(MVT::Other),
+ {ST->getChain(), Lo, Hi, ST->getBasePtr()},
+ MemVT, ST->getMemOperand());
+ } else if (Subtarget->hasMVEIntegerOps() &&
+ ((MemVT == MVT::v4i1 || MemVT == MVT::v8i1 ||
+ MemVT == MVT::v16i1))) {
+ return LowerPredicateStore(Op, DAG);
+ }
+
+ return SDValue();
+}
+
+static bool isZeroVector(SDValue N) {
+ return (ISD::isBuildVectorAllZeros(N.getNode()) ||
+ (N->getOpcode() == ARMISD::VMOVIMM &&
+ isNullConstant(N->getOperand(0))));
+}
+
static SDValue LowerMLOAD(SDValue Op, SelectionDAG &DAG) {
MaskedLoadSDNode *N = cast<MaskedLoadSDNode>(Op.getNode());
MVT VT = Op.getSimpleValueType();
@@ -8941,13 +9176,7 @@ static SDValue LowerMLOAD(SDValue Op, SelectionDAG &DAG) {
SDValue PassThru = N->getPassThru();
SDLoc dl(Op);
- auto IsZero = [](SDValue PassThru) {
- return (ISD::isBuildVectorAllZeros(PassThru.getNode()) ||
- (PassThru->getOpcode() == ARMISD::VMOVIMM &&
- isNullConstant(PassThru->getOperand(0))));
- };
-
- if (IsZero(PassThru))
+ if (isZeroVector(PassThru))
return Op;
// MVE Masked loads use zero as the passthru value. Here we convert undef to
@@ -8955,12 +9184,13 @@ static SDValue LowerMLOAD(SDValue Op, SelectionDAG &DAG) {
SDValue ZeroVec = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
DAG.getTargetConstant(0, dl, MVT::i32));
SDValue NewLoad = DAG.getMaskedLoad(
- VT, dl, N->getChain(), N->getBasePtr(), Mask, ZeroVec, N->getMemoryVT(),
- N->getMemOperand(), N->getExtensionType(), N->isExpandingLoad());
+ VT, dl, N->getChain(), N->getBasePtr(), N->getOffset(), Mask, ZeroVec,
+ N->getMemoryVT(), N->getMemOperand(), N->getAddressingMode(),
+ N->getExtensionType(), N->isExpandingLoad());
SDValue Combo = NewLoad;
if (!PassThru.isUndef() &&
(PassThru.getOpcode() != ISD::BITCAST ||
- !IsZero(PassThru->getOperand(0))))
+ !isZeroVector(PassThru->getOperand(0))))
Combo = DAG.getNode(ISD::VSELECT, dl, VT, Mask, NewLoad, PassThru);
return DAG.getMergeValues({Combo, NewLoad.getValue(1)}, dl);
}
@@ -9043,58 +9273,6 @@ static void ReplaceCMP_SWAP_64Results(SDNode *N,
Results.push_back(SDValue(CmpSwap, 2));
}
-static SDValue LowerFPOWI(SDValue Op, const ARMSubtarget &Subtarget,
- SelectionDAG &DAG) {
- const auto &TLI = DAG.getTargetLoweringInfo();
-
- assert(Subtarget.getTargetTriple().isOSMSVCRT() &&
- "Custom lowering is MSVCRT specific!");
-
- SDLoc dl(Op);
- SDValue Val = Op.getOperand(0);
- MVT Ty = Val->getSimpleValueType(0);
- SDValue Exponent = DAG.getNode(ISD::SINT_TO_FP, dl, Ty, Op.getOperand(1));
- SDValue Callee = DAG.getExternalSymbol(Ty == MVT::f32 ? "powf" : "pow",
- TLI.getPointerTy(DAG.getDataLayout()));
-
- TargetLowering::ArgListTy Args;
- TargetLowering::ArgListEntry Entry;
-
- Entry.Node = Val;
- Entry.Ty = Val.getValueType().getTypeForEVT(*DAG.getContext());
- Entry.IsZExt = true;
- Args.push_back(Entry);
-
- Entry.Node = Exponent;
- Entry.Ty = Exponent.getValueType().getTypeForEVT(*DAG.getContext());
- Entry.IsZExt = true;
- Args.push_back(Entry);
-
- Type *LCRTy = Val.getValueType().getTypeForEVT(*DAG.getContext());
-
- // In the in-chain to the call is the entry node If we are emitting a
- // tailcall, the chain will be mutated if the node has a non-entry input
- // chain.
- SDValue InChain = DAG.getEntryNode();
- SDValue TCChain = InChain;
-
- const Function &F = DAG.getMachineFunction().getFunction();
- bool IsTC = TLI.isInTailCallPosition(DAG, Op.getNode(), TCChain) &&
- F.getReturnType() == LCRTy;
- if (IsTC)
- InChain = TCChain;
-
- TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(dl)
- .setChain(InChain)
- .setCallee(CallingConv::ARM_AAPCS_VFP, LCRTy, Callee, std::move(Args))
- .setTailCall(IsTC);
- std::pair<SDValue, SDValue> CI = TLI.LowerCallTo(CLI);
-
- // Return the chain (the DAG root) if it is a tail call
- return !CI.second.getNode() ? DAG.getRoot() : CI.first;
-}
-
SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
LLVM_DEBUG(dbgs() << "Lowering node: "; Op.dump());
switch (Op.getOpcode()) {
@@ -9114,6 +9292,8 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::PREFETCH: return LowerPREFETCH(Op, DAG, Subtarget);
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
+ case ISD::STRICT_FP_TO_SINT:
+ case ISD::STRICT_FP_TO_UINT:
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG);
case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
@@ -9170,7 +9350,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::LOAD:
return LowerPredicateLoad(Op, DAG);
case ISD::STORE:
- return LowerPredicateStore(Op, DAG);
+ return LowerSTORE(Op, DAG, Subtarget);
case ISD::MLOAD:
return LowerMLOAD(Op, DAG);
case ISD::ATOMIC_LOAD:
@@ -9182,9 +9362,10 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
if (Subtarget->isTargetWindows())
return LowerDYNAMIC_STACKALLOC(Op, DAG);
llvm_unreachable("Don't know how to custom lower this!");
+ case ISD::STRICT_FP_ROUND:
case ISD::FP_ROUND: return LowerFP_ROUND(Op, DAG);
+ case ISD::STRICT_FP_EXTEND:
case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
- case ISD::FPOWI: return LowerFPOWI(Op, *Subtarget, DAG);
case ARMISD::WIN__DBZCHK: return SDValue();
}
}
@@ -9271,7 +9452,9 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
case ISD::ABS:
lowerABS(N, Results, DAG);
return ;
-
+ case ISD::LOAD:
+ LowerLOAD(N, Results, DAG);
+ break;
}
if (Res.getNode())
Results.push_back(Res);
@@ -11711,7 +11894,8 @@ static SDValue PerformADDCombine(SDNode *N,
/// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB.
///
static SDValue PerformSUBCombine(SDNode *N,
- TargetLowering::DAGCombinerInfo &DCI) {
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -11720,7 +11904,28 @@ static SDValue PerformSUBCombine(SDNode *N,
if (SDValue Result = combineSelectAndUse(N, N1, N0, DCI))
return Result;
- return SDValue();
+ if (!Subtarget->hasMVEIntegerOps() || !N->getValueType(0).isVector())
+ return SDValue();
+
+ // Fold (sub (ARMvmovImm 0), (ARMvdup x)) -> (ARMvdup (sub 0, x))
+ // so that we can readily pattern match more mve instructions which can use
+ // a scalar operand.
+ SDValue VDup = N->getOperand(1);
+ if (VDup->getOpcode() != ARMISD::VDUP)
+ return SDValue();
+
+ SDValue VMov = N->getOperand(0);
+ if (VMov->getOpcode() == ISD::BITCAST)
+ VMov = VMov->getOperand(0);
+
+ if (VMov->getOpcode() != ARMISD::VMOVIMM || !isZeroVector(VMov))
+ return SDValue();
+
+ SDLoc dl(N);
+ SDValue Negate = DCI.DAG.getNode(ISD::SUB, dl, MVT::i32,
+ DCI.DAG.getConstant(0, dl, MVT::i32),
+ VDup->getOperand(0));
+ return DCI.DAG.getNode(ARMISD::VDUP, dl, N->getValueType(0), Negate);
}
/// PerformVMULCombine
@@ -12736,6 +12941,39 @@ PerformPREDICATE_CASTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
return SDValue();
}
+static SDValue PerformVCMPCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
+ if (!Subtarget->hasMVEIntegerOps())
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ ARMCC::CondCodes Cond =
+ (ARMCC::CondCodes)cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
+ SDLoc dl(N);
+
+ // vcmp X, 0, cc -> vcmpz X, cc
+ if (isZeroVector(Op1))
+ return DCI.DAG.getNode(ARMISD::VCMPZ, dl, VT, Op0,
+ N->getOperand(2));
+
+ unsigned SwappedCond = getSwappedCondition(Cond);
+ if (isValidMVECond(SwappedCond, VT.isFloatingPoint())) {
+ // vcmp 0, X, cc -> vcmpz X, reversed(cc)
+ if (isZeroVector(Op0))
+ return DCI.DAG.getNode(ARMISD::VCMPZ, dl, VT, Op1,
+ DCI.DAG.getConstant(SwappedCond, dl, MVT::i32));
+ // vcmp vdup(Y), X, cc -> vcmp X, vdup(Y), reversed(cc)
+ if (Op0->getOpcode() == ARMISD::VDUP && Op1->getOpcode() != ARMISD::VDUP)
+ return DCI.DAG.getNode(ARMISD::VCMP, dl, VT, Op1, Op0,
+ DCI.DAG.getConstant(SwappedCond, dl, MVT::i32));
+ }
+
+ return SDValue();
+}
+
/// PerformInsertEltCombine - Target-specific dag combine xforms for
/// ISD::INSERT_VECTOR_ELT.
static SDValue PerformInsertEltCombine(SDNode *N,
@@ -13844,11 +14082,12 @@ static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG,
const ARMSubtarget *ST) {
SDValue N0 = N->getOperand(0);
- // Check for sign- and zero-extensions of vector extract operations of 8-
- // and 16-bit vector elements. NEON supports these directly. They are
+ // Check for sign- and zero-extensions of vector extract operations of 8- and
+ // 16-bit vector elements. NEON and MVE support these directly. They are
// handled during DAG combining because type legalization will promote them
// to 32-bit types and it is messy to recognize the operations after that.
- if (ST->hasNEON() && N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
+ if ((ST->hasNEON() || ST->hasMVEIntegerOps()) &&
+ N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
SDValue Vec = N0.getOperand(0);
SDValue Lane = N0.getOperand(1);
EVT VT = N->getValueType(0);
@@ -14067,7 +14306,7 @@ static SDValue PerformHWLoopCombine(SDNode *N,
return SDValue();
if (Negate)
- CC = ISD::getSetCCInverse(CC, true);
+ CC = ISD::getSetCCInverse(CC, /* Integer inverse */ MVT::i32);
auto IsTrueIfZero = [](ISD::CondCode CC, int Imm) {
return (CC == ISD::SETEQ && Imm == 0) ||
@@ -14371,7 +14610,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
case ARMISD::ADDE: return PerformADDECombine(N, DCI, Subtarget);
case ARMISD::UMLAL: return PerformUMLALCombine(N, DCI.DAG, Subtarget);
case ISD::ADD: return PerformADDCombine(N, DCI, Subtarget);
- case ISD::SUB: return PerformSUBCombine(N, DCI);
+ case ISD::SUB: return PerformSUBCombine(N, DCI, Subtarget);
case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget);
case ISD::OR: return PerformORCombine(N, DCI, Subtarget);
case ISD::XOR: return PerformXORCombine(N, DCI, Subtarget);
@@ -14415,6 +14654,8 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
return PerformARMBUILD_VECTORCombine(N, DCI);
case ARMISD::PREDICATE_CAST:
return PerformPREDICATE_CASTCombine(N, DCI);
+ case ARMISD::VCMP:
+ return PerformVCMPCombine(N, DCI, Subtarget);
case ARMISD::SMULWB: {
unsigned BitWidth = N->getValueType(0).getSizeInBits();
APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 16);
@@ -14523,7 +14764,7 @@ bool ARMTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, unsigned,
if (!VT.isSimple())
return false;
- // The AllowsUnaliged flag models the SCTLR.A setting in ARM cpus
+ // The AllowsUnaligned flag models the SCTLR.A setting in ARM cpus
bool AllowsUnaligned = Subtarget->allowsUnalignedMem();
auto Ty = VT.getSimpleVT().SimpleTy;
@@ -14725,8 +14966,12 @@ bool ARMTargetLowering::shouldSinkOperands(Instruction *I,
switch (I->getOpcode()) {
case Instruction::Add:
case Instruction::Mul:
+ case Instruction::ICmp:
return true;
case Instruction::Sub:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
return Operand == 1;
default:
return false;
@@ -14808,6 +15053,40 @@ int ARMTargetLowering::getScalingFactorCost(const DataLayout &DL,
return -1;
}
+/// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
+/// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
+/// expanded to FMAs when this method returns true, otherwise fmuladd is
+/// expanded to fmul + fadd.
+///
+/// ARM supports both fused and unfused multiply-add operations; we already
+/// lower a pair of fmul and fadd to the latter so it's not clear that there
+/// would be a gain or that the gain would be worthwhile enough to risk
+/// correctness bugs.
+///
+/// For MVE, we set this to true as it helps simplify the need for some
+/// patterns (and we don't have the non-fused floating point instruction).
+bool ARMTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
+ EVT VT) const {
+ if (!VT.isSimple())
+ return false;
+
+ switch (VT.getSimpleVT().SimpleTy) {
+ case MVT::v4f32:
+ case MVT::v8f16:
+ return Subtarget->hasMVEFloatOps();
+ case MVT::f16:
+ return Subtarget->useFPVFMx16();
+ case MVT::f32:
+ return Subtarget->useFPVFMx();
+ case MVT::f64:
+ return Subtarget->useFPVFMx64();
+ default:
+ break;
+ }
+
+ return false;
+}
+
static bool isLegalT1AddressImmediate(int64_t V, EVT VT) {
if (V < 0)
return false;
@@ -14850,7 +15129,7 @@ static bool isLegalT2AddressImmediate(int64_t V, EVT VT,
V = -V;
}
- unsigned NumBytes = std::max(VT.getSizeInBits() / 8, 1U);
+ unsigned NumBytes = std::max((unsigned)VT.getSizeInBits() / 8, 1U);
// MVE: size * imm7
if (VT.isVector() && Subtarget->hasMVEIntegerOps()) {
@@ -15155,14 +15434,19 @@ static bool getT2IndexedAddressParts(SDNode *Ptr, EVT VT,
}
static bool getMVEIndexedAddressParts(SDNode *Ptr, EVT VT, unsigned Align,
- bool isSEXTLoad, bool isLE, SDValue &Base,
- SDValue &Offset, bool &isInc,
- SelectionDAG &DAG) {
+ bool isSEXTLoad, bool IsMasked, bool isLE,
+ SDValue &Base, SDValue &Offset,
+ bool &isInc, SelectionDAG &DAG) {
if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
return false;
if (!isa<ConstantSDNode>(Ptr->getOperand(1)))
return false;
+ // We allow LE non-masked loads to change the type (for example use a vldrb.8
+ // as opposed to a vldrw.32). This can allow extra addressing modes or
+ // alignments for what is otherwise an equivalent instruction.
+ bool CanChangeType = isLE && !IsMasked;
+
ConstantSDNode *RHS = cast<ConstantSDNode>(Ptr->getOperand(1));
int RHSC = (int)RHS->getZExtValue();
@@ -15181,7 +15465,7 @@ static bool getMVEIndexedAddressParts(SDNode *Ptr, EVT VT, unsigned Align,
};
// Try to find a matching instruction based on s/zext, Alignment, Offset and
- // (in BE) type.
+ // (in BE/masked) type.
Base = Ptr->getOperand(0);
if (VT == MVT::v4i16) {
if (Align >= 2 && IsInRange(RHSC, 0x80, 2))
@@ -15189,13 +15473,15 @@ static bool getMVEIndexedAddressParts(SDNode *Ptr, EVT VT, unsigned Align,
} else if (VT == MVT::v4i8 || VT == MVT::v8i8) {
if (IsInRange(RHSC, 0x80, 1))
return true;
- } else if (Align >= 4 && (isLE || VT == MVT::v4i32 || VT == MVT::v4f32) &&
+ } else if (Align >= 4 &&
+ (CanChangeType || VT == MVT::v4i32 || VT == MVT::v4f32) &&
IsInRange(RHSC, 0x80, 4))
return true;
- else if (Align >= 2 && (isLE || VT == MVT::v8i16 || VT == MVT::v8f16) &&
+ else if (Align >= 2 &&
+ (CanChangeType || VT == MVT::v8i16 || VT == MVT::v8f16) &&
IsInRange(RHSC, 0x80, 2))
return true;
- else if ((isLE || VT == MVT::v16i8) && IsInRange(RHSC, 0x80, 1))
+ else if ((CanChangeType || VT == MVT::v16i8) && IsInRange(RHSC, 0x80, 1))
return true;
return false;
}
@@ -15215,6 +15501,7 @@ ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
SDValue Ptr;
unsigned Align;
bool isSEXTLoad = false;
+ bool IsMasked = false;
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
Ptr = LD->getBasePtr();
VT = LD->getMemoryVT();
@@ -15224,6 +15511,17 @@ ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
Ptr = ST->getBasePtr();
VT = ST->getMemoryVT();
Align = ST->getAlignment();
+ } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
+ Ptr = LD->getBasePtr();
+ VT = LD->getMemoryVT();
+ Align = LD->getAlignment();
+ isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
+ IsMasked = true;
+ } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(N)) {
+ Ptr = ST->getBasePtr();
+ VT = ST->getMemoryVT();
+ Align = ST->getAlignment();
+ IsMasked = true;
} else
return false;
@@ -15232,8 +15530,8 @@ ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
if (VT.isVector())
isLegal = Subtarget->hasMVEIntegerOps() &&
getMVEIndexedAddressParts(Ptr.getNode(), VT, Align, isSEXTLoad,
- Subtarget->isLittle(), Base, Offset,
- isInc, DAG);
+ IsMasked, Subtarget->isLittle(), Base,
+ Offset, isInc, DAG);
else {
if (Subtarget->isThumb2())
isLegal = getT2IndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
@@ -15261,6 +15559,7 @@ bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
SDValue Ptr;
unsigned Align;
bool isSEXTLoad = false, isNonExt;
+ bool IsMasked = false;
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
VT = LD->getMemoryVT();
Ptr = LD->getBasePtr();
@@ -15272,6 +15571,19 @@ bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
Ptr = ST->getBasePtr();
Align = ST->getAlignment();
isNonExt = !ST->isTruncatingStore();
+ } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
+ VT = LD->getMemoryVT();
+ Ptr = LD->getBasePtr();
+ Align = LD->getAlignment();
+ isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
+ isNonExt = LD->getExtensionType() == ISD::NON_EXTLOAD;
+ IsMasked = true;
+ } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(N)) {
+ VT = ST->getMemoryVT();
+ Ptr = ST->getBasePtr();
+ Align = ST->getAlignment();
+ isNonExt = !ST->isTruncatingStore();
+ IsMasked = true;
} else
return false;
@@ -15295,7 +15607,7 @@ bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
bool isLegal = false;
if (VT.isVector())
isLegal = Subtarget->hasMVEIntegerOps() &&
- getMVEIndexedAddressParts(Op, VT, Align, isSEXTLoad,
+ getMVEIndexedAddressParts(Op, VT, Align, isSEXTLoad, IsMasked,
Subtarget->isLittle(), Base, Offset,
isInc, DAG);
else {
@@ -16048,7 +16360,8 @@ ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const
}
SDValue ARMTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
- SDValue SrcVal = Op.getOperand(0);
+ bool IsStrict = Op->isStrictFPOpcode();
+ SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
const unsigned DstSz = Op.getValueType().getSizeInBits();
const unsigned SrcSz = SrcVal.getValueType().getSizeInBits();
assert(DstSz > SrcSz && DstSz <= 64 && SrcSz >= 16 &&
@@ -16068,34 +16381,35 @@ SDValue ARMTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
SDLoc Loc(Op);
RTLIB::Libcall LC;
MakeLibCallOptions CallOptions;
- if (SrcSz == 16) {
- // Instruction from 16 -> 32
- if (Subtarget->hasFP16())
- SrcVal = DAG.getNode(ISD::FP_EXTEND, Loc, MVT::f32, SrcVal);
- // Lib call from 16 -> 32
- else {
- LC = RTLIB::getFPEXT(MVT::f16, MVT::f32);
+ SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
+ for (unsigned Sz = SrcSz; Sz <= 32 && Sz < DstSz; Sz *= 2) {
+ bool Supported = (Sz == 16 ? Subtarget->hasFP16() : Subtarget->hasFP64());
+ MVT SrcVT = (Sz == 16 ? MVT::f16 : MVT::f32);
+ MVT DstVT = (Sz == 16 ? MVT::f32 : MVT::f64);
+ if (Supported) {
+ if (IsStrict) {
+ SrcVal = DAG.getNode(ISD::STRICT_FP_EXTEND, Loc,
+ {DstVT, MVT::Other}, {Chain, SrcVal});
+ Chain = SrcVal.getValue(1);
+ } else {
+ SrcVal = DAG.getNode(ISD::FP_EXTEND, Loc, DstVT, SrcVal);
+ }
+ } else {
+ LC = RTLIB::getFPEXT(SrcVT, DstVT);
assert(LC != RTLIB::UNKNOWN_LIBCALL &&
"Unexpected type for custom-lowering FP_EXTEND");
- SrcVal =
- makeLibCall(DAG, LC, MVT::f32, SrcVal, CallOptions, Loc).first;
+ std::tie(SrcVal, Chain) = makeLibCall(DAG, LC, DstVT, SrcVal, CallOptions,
+ Loc, Chain);
}
}
- if (DstSz != 64)
- return SrcVal;
- // For sure now SrcVal is 32 bits
- if (Subtarget->hasFP64()) // Instruction from 32 -> 64
- return DAG.getNode(ISD::FP_EXTEND, Loc, MVT::f64, SrcVal);
-
- LC = RTLIB::getFPEXT(MVT::f32, MVT::f64);
- assert(LC != RTLIB::UNKNOWN_LIBCALL &&
- "Unexpected type for custom-lowering FP_EXTEND");
- return makeLibCall(DAG, LC, MVT::f64, SrcVal, CallOptions, Loc).first;
+ return IsStrict ? DAG.getMergeValues({SrcVal, Chain}, Loc) : SrcVal;
}
SDValue ARMTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
- SDValue SrcVal = Op.getOperand(0);
+ bool IsStrict = Op->isStrictFPOpcode();
+
+ SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
EVT SrcVT = SrcVal.getValueType();
EVT DstVT = Op.getValueType();
const unsigned DstSz = Op.getValueType().getSizeInBits();
@@ -16118,7 +16432,11 @@ SDValue ARMTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
assert(LC != RTLIB::UNKNOWN_LIBCALL &&
"Unexpected type for custom-lowering FP_ROUND");
MakeLibCallOptions CallOptions;
- return makeLibCall(DAG, LC, DstVT, SrcVal, CallOptions, Loc).first;
+ SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
+ SDValue Result;
+ std::tie(Result, Chain) = makeLibCall(DAG, LC, DstVT, SrcVal, CallOptions,
+ Loc, Chain);
+ return IsStrict ? DAG.getMergeValues({Result, Chain}, Loc) : Result;
}
void ARMTargetLowering::lowerABS(SDNode *N, SmallVectorImpl<SDValue> &Results,
@@ -16644,15 +16962,20 @@ ARMTargetLowering::getNumInterleavedAccesses(VectorType *VecTy,
}
bool ARMTargetLowering::isLegalInterleavedAccessType(
- VectorType *VecTy, const DataLayout &DL) const {
+ unsigned Factor, VectorType *VecTy, const DataLayout &DL) const {
unsigned VecSize = DL.getTypeSizeInBits(VecTy);
unsigned ElSize = DL.getTypeSizeInBits(VecTy->getElementType());
+ if (!Subtarget->hasNEON() && !Subtarget->hasMVEIntegerOps())
+ return false;
+
// Ensure the vector doesn't have f16 elements. Even though we could do an
// i16 vldN, we can't hold the f16 vectors and will end up converting via
// f32.
- if (VecTy->getElementType()->isHalfTy())
+ if (Subtarget->hasNEON() && VecTy->getElementType()->isHalfTy())
+ return false;
+ if (Subtarget->hasMVEIntegerOps() && Factor == 3)
return false;
// Ensure the number of vector elements is greater than 1.
@@ -16665,12 +16988,16 @@ bool ARMTargetLowering::isLegalInterleavedAccessType(
// Ensure the total vector size is 64 or a multiple of 128. Types larger than
// 128 will be split into multiple interleaved accesses.
- return VecSize == 64 || VecSize % 128 == 0;
+ if (Subtarget->hasNEON() && VecSize == 64)
+ return true;
+ return VecSize % 128 == 0;
}
unsigned ARMTargetLowering::getMaxSupportedInterleaveFactor() const {
if (Subtarget->hasNEON())
return 4;
+ if (Subtarget->hasMVEIntegerOps())
+ return MVEMaxSupportedInterleaveFactor;
return TargetLoweringBase::getMaxSupportedInterleaveFactor();
}
@@ -16702,7 +17029,7 @@ bool ARMTargetLowering::lowerInterleavedLoad(
// Skip if we do not have NEON and skip illegal vector types. We can
// "legalize" wide vector types into multiple interleaved accesses as long as
// the vector types are divisible by 128.
- if (!Subtarget->hasNEON() || !isLegalInterleavedAccessType(VecTy, DL))
+ if (!isLegalInterleavedAccessType(Factor, VecTy, DL))
return false;
unsigned NumLoads = getNumInterleavedAccesses(VecTy, DL);
@@ -16734,13 +17061,37 @@ bool ARMTargetLowering::lowerInterleavedLoad(
assert(isTypeLegal(EVT::getEVT(VecTy)) && "Illegal vldN vector type!");
- Type *Int8Ptr = Builder.getInt8PtrTy(LI->getPointerAddressSpace());
- Type *Tys[] = {VecTy, Int8Ptr};
- static const Intrinsic::ID LoadInts[3] = {Intrinsic::arm_neon_vld2,
- Intrinsic::arm_neon_vld3,
- Intrinsic::arm_neon_vld4};
- Function *VldnFunc =
- Intrinsic::getDeclaration(LI->getModule(), LoadInts[Factor - 2], Tys);
+ auto createLoadIntrinsic = [&](Value *BaseAddr) {
+ if (Subtarget->hasNEON()) {
+ Type *Int8Ptr = Builder.getInt8PtrTy(LI->getPointerAddressSpace());
+ Type *Tys[] = {VecTy, Int8Ptr};
+ static const Intrinsic::ID LoadInts[3] = {Intrinsic::arm_neon_vld2,
+ Intrinsic::arm_neon_vld3,
+ Intrinsic::arm_neon_vld4};
+ Function *VldnFunc =
+ Intrinsic::getDeclaration(LI->getModule(), LoadInts[Factor - 2], Tys);
+
+ SmallVector<Value *, 2> Ops;
+ Ops.push_back(Builder.CreateBitCast(BaseAddr, Int8Ptr));
+ Ops.push_back(Builder.getInt32(LI->getAlignment()));
+
+ return Builder.CreateCall(VldnFunc, Ops, "vldN");
+ } else {
+ assert((Factor == 2 || Factor == 4) &&
+ "expected interleave factor of 2 or 4 for MVE");
+ Intrinsic::ID LoadInts =
+ Factor == 2 ? Intrinsic::arm_mve_vld2q : Intrinsic::arm_mve_vld4q;
+ Type *VecEltTy = VecTy->getVectorElementType()->getPointerTo(
+ LI->getPointerAddressSpace());
+ Type *Tys[] = {VecTy, VecEltTy};
+ Function *VldnFunc =
+ Intrinsic::getDeclaration(LI->getModule(), LoadInts, Tys);
+
+ SmallVector<Value *, 2> Ops;
+ Ops.push_back(Builder.CreateBitCast(BaseAddr, VecEltTy));
+ return Builder.CreateCall(VldnFunc, Ops, "vldN");
+ }
+ };
// Holds sub-vectors extracted from the load intrinsic return values. The
// sub-vectors are associated with the shufflevector instructions they will
@@ -16755,11 +17106,7 @@ bool ARMTargetLowering::lowerInterleavedLoad(
Builder.CreateConstGEP1_32(VecTy->getVectorElementType(), BaseAddr,
VecTy->getVectorNumElements() * Factor);
- SmallVector<Value *, 2> Ops;
- Ops.push_back(Builder.CreateBitCast(BaseAddr, Int8Ptr));
- Ops.push_back(Builder.getInt32(LI->getAlignment()));
-
- CallInst *VldN = Builder.CreateCall(VldnFunc, Ops, "vldN");
+ CallInst *VldN = createLoadIntrinsic(BaseAddr);
// Replace uses of each shufflevector with the corresponding vector loaded
// by ldN.
@@ -16838,7 +17185,7 @@ bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI,
// Skip if we do not have NEON and skip illegal vector types. We can
// "legalize" wide vector types into multiple interleaved accesses as long as
// the vector types are divisible by 128.
- if (!Subtarget->hasNEON() || !isLegalInterleavedAccessType(SubVecTy, DL))
+ if (!isLegalInterleavedAccessType(Factor, SubVecTy, DL))
return false;
unsigned NumStores = getNumInterleavedAccesses(SubVecTy, DL);
@@ -16882,11 +17229,46 @@ bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI,
auto Mask = SVI->getShuffleMask();
- Type *Int8Ptr = Builder.getInt8PtrTy(SI->getPointerAddressSpace());
- Type *Tys[] = {Int8Ptr, SubVecTy};
- static const Intrinsic::ID StoreInts[3] = {Intrinsic::arm_neon_vst2,
- Intrinsic::arm_neon_vst3,
- Intrinsic::arm_neon_vst4};
+ auto createStoreIntrinsic = [&](Value *BaseAddr,
+ SmallVectorImpl<Value *> &Shuffles) {
+ if (Subtarget->hasNEON()) {
+ static const Intrinsic::ID StoreInts[3] = {Intrinsic::arm_neon_vst2,
+ Intrinsic::arm_neon_vst3,
+ Intrinsic::arm_neon_vst4};
+ Type *Int8Ptr = Builder.getInt8PtrTy(SI->getPointerAddressSpace());
+ Type *Tys[] = {Int8Ptr, SubVecTy};
+
+ Function *VstNFunc = Intrinsic::getDeclaration(
+ SI->getModule(), StoreInts[Factor - 2], Tys);
+
+ SmallVector<Value *, 6> Ops;
+ Ops.push_back(Builder.CreateBitCast(BaseAddr, Int8Ptr));
+ for (auto S : Shuffles)
+ Ops.push_back(S);
+ Ops.push_back(Builder.getInt32(SI->getAlignment()));
+ Builder.CreateCall(VstNFunc, Ops);
+ } else {
+ assert((Factor == 2 || Factor == 4) &&
+ "expected interleave factor of 2 or 4 for MVE");
+ Intrinsic::ID StoreInts =
+ Factor == 2 ? Intrinsic::arm_mve_vst2q : Intrinsic::arm_mve_vst4q;
+ Type *EltPtrTy = SubVecTy->getVectorElementType()->getPointerTo(
+ SI->getPointerAddressSpace());
+ Type *Tys[] = {EltPtrTy, SubVecTy};
+ Function *VstNFunc =
+ Intrinsic::getDeclaration(SI->getModule(), StoreInts, Tys);
+
+ SmallVector<Value *, 6> Ops;
+ Ops.push_back(Builder.CreateBitCast(BaseAddr, EltPtrTy));
+ for (auto S : Shuffles)
+ Ops.push_back(S);
+ for (unsigned F = 0; F < Factor; F++) {
+ Ops.push_back(Builder.getInt32(F));
+ Builder.CreateCall(VstNFunc, Ops);
+ Ops.pop_back();
+ }
+ }
+ };
for (unsigned StoreCount = 0; StoreCount < NumStores; ++StoreCount) {
// If we generating more than one store, we compute the base address of
@@ -16895,17 +17277,13 @@ bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI,
BaseAddr = Builder.CreateConstGEP1_32(SubVecTy->getVectorElementType(),
BaseAddr, LaneLen * Factor);
- SmallVector<Value *, 6> Ops;
- Ops.push_back(Builder.CreateBitCast(BaseAddr, Int8Ptr));
-
- Function *VstNFunc =
- Intrinsic::getDeclaration(SI->getModule(), StoreInts[Factor - 2], Tys);
+ SmallVector<Value *, 4> Shuffles;
// Split the shufflevector operands into sub vectors for the new vstN call.
for (unsigned i = 0; i < Factor; i++) {
unsigned IdxI = StoreCount * LaneLen * Factor + i;
if (Mask[IdxI] >= 0) {
- Ops.push_back(Builder.CreateShuffleVector(
+ Shuffles.push_back(Builder.CreateShuffleVector(
Op0, Op1, createSequentialMask(Builder, Mask[IdxI], LaneLen, 0)));
} else {
unsigned StartMask = 0;
@@ -16922,13 +17300,12 @@ bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI,
// In the case of all undefs we're defaulting to using elems from 0
// Note: StartMask cannot be negative, it's checked in
// isReInterleaveMask
- Ops.push_back(Builder.CreateShuffleVector(
+ Shuffles.push_back(Builder.CreateShuffleVector(
Op0, Op1, createSequentialMask(Builder, StartMask, LaneLen, 0)));
}
}
- Ops.push_back(Builder.getInt32(SI->getAlignment()));
- Builder.CreateCall(VstNFunc, Ops);
+ createStoreIntrinsic(BaseAddr, Shuffles);
}
return true;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.h b/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.h
index 53813fad5afd..1baa22a4fa56 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -278,7 +278,11 @@ class VectorType;
VST4_UPD,
VST2LN_UPD,
VST3LN_UPD,
- VST4LN_UPD
+ VST4LN_UPD,
+
+ // Load/Store of dual registers
+ LDRD,
+ STRD
};
} // end namespace ARMISD
@@ -377,7 +381,7 @@ class VectorType;
bool isLegalT2ScaledAddressingMode(const AddrMode &AM, EVT VT) const;
- /// Returns true if the addresing mode representing by AM is legal
+ /// Returns true if the addressing mode representing by AM is legal
/// for the Thumb1 target, for a load/store of the specified type.
bool isLegalT1ScaledAddressingMode(const AddrMode &AM, EVT VT) const;
@@ -604,7 +608,7 @@ class VectorType;
/// Returns true if \p VecTy is a legal interleaved access type. This
/// function checks the vector element type and the overall width of the
/// vector.
- bool isLegalInterleavedAccessType(VectorType *VecTy,
+ bool isLegalInterleavedAccessType(unsigned Factor, VectorType *VecTy,
const DataLayout &DL) const;
bool alignLoopsWithOptSize() const override;
@@ -731,23 +735,17 @@ class VectorType;
SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
void lowerABS(SDNode *N, SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const;
+ void LowerLOAD(SDNode *N, SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG) const;
- Register getRegisterByName(const char* RegName, EVT VT,
+ Register getRegisterByName(const char* RegName, LLT VT,
const MachineFunction &MF) const override;
SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
SmallVectorImpl<SDNode *> &Created) const override;
- /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
- /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
- /// expanded to FMAs when this method returns true, otherwise fmuladd is
- /// expanded to fmul + fadd.
- ///
- /// ARM supports both fused and unfused multiply-add operations; we already
- /// lower a pair of fmul and fadd to the latter so it's not clear that there
- /// would be a gain or that the gain would be worthwhile enough to risk
- /// correctness bugs.
- bool isFMAFasterThanFMulAndFAdd(EVT VT) const override { return false; }
+ bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
+ EVT VT) const override;
SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const;
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrInfo.td
index fe696222ec70..ce67af6f1b49 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrInfo.td
@@ -243,6 +243,12 @@ def ARMqsub8b : SDNode<"ARMISD::QSUB8b", SDT_ARMAnd, []>;
def ARMqadd16b : SDNode<"ARMISD::QADD16b", SDT_ARMAnd, []>;
def ARMqsub16b : SDNode<"ARMISD::QSUB16b", SDT_ARMAnd, []>;
+def SDT_ARMldrd : SDTypeProfile<2, 1, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
+def ARMldrd : SDNode<"ARMISD::LDRD", SDT_ARMldrd, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+
+def SDT_ARMstrd : SDTypeProfile<0, 3, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
+def ARMstrd : SDNode<"ARMISD::STRD", SDT_ARMstrd, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+
// Vector operations shared between NEON and MVE
def ARMvdup : SDNode<"ARMISD::VDUP", SDTypeProfile<1, 1, [SDTCisVec<0>]>>;
@@ -297,6 +303,28 @@ class RegConstraint<string C> {
string Constraints = C;
}
+// ARMCC condition codes. See ARMCC::CondCodes
+def ARMCCeq : PatLeaf<(i32 0)>;
+def ARMCCne : PatLeaf<(i32 1)>;
+def ARMCChs : PatLeaf<(i32 2)>;
+def ARMCClo : PatLeaf<(i32 3)>;
+def ARMCCmi : PatLeaf<(i32 4)>;
+def ARMCCpl : PatLeaf<(i32 5)>;
+def ARMCCvs : PatLeaf<(i32 6)>;
+def ARMCCvc : PatLeaf<(i32 7)>;
+def ARMCChi : PatLeaf<(i32 8)>;
+def ARMCCls : PatLeaf<(i32 9)>;
+def ARMCCge : PatLeaf<(i32 10)>;
+def ARMCClt : PatLeaf<(i32 11)>;
+def ARMCCgt : PatLeaf<(i32 12)>;
+def ARMCCle : PatLeaf<(i32 13)>;
+def ARMCCal : PatLeaf<(i32 14)>;
+
+// VCC predicates. See ARMVCC::VPTCodes
+def ARMVCCNone : PatLeaf<(i32 0)>;
+def ARMVCCThen : PatLeaf<(i32 1)>;
+def ARMVCCElse : PatLeaf<(i32 2)>;
+
//===----------------------------------------------------------------------===//
// ARM specific transformation functions and pattern fragments.
//
@@ -913,7 +941,10 @@ def MVEShiftImm1_7AsmOperand: ImmAsmOperand<1,7> {
// encodings allow.
let DiagnosticString = "operand must be an immediate in the range [1,8]";
}
-def mve_shift_imm1_7 : Operand<i32> {
+def mve_shift_imm1_7 : Operand<i32>,
+ // SelectImmediateInRange / isScaledConstantInRange uses a
+ // half-open interval, so the parameters <1,8> mean 1-7 inclusive
+ ComplexPattern<i32, 1, "SelectImmediateInRange<1,8>", [], []> {
let ParserMatchClass = MVEShiftImm1_7AsmOperand;
let EncoderMethod = "getMVEShiftImmOpValue";
}
@@ -926,7 +957,10 @@ def MVEShiftImm1_15AsmOperand: ImmAsmOperand<1,15> {
// encodings allow.
let DiagnosticString = "operand must be an immediate in the range [1,16]";
}
-def mve_shift_imm1_15 : Operand<i32> {
+def mve_shift_imm1_15 : Operand<i32>,
+ // SelectImmediateInRange / isScaledConstantInRange uses a
+ // half-open interval, so the parameters <1,16> mean 1-15 inclusive
+ ComplexPattern<i32, 1, "SelectImmediateInRange<1,16>", [], []> {
let ParserMatchClass = MVEShiftImm1_15AsmOperand;
let EncoderMethod = "getMVEShiftImmOpValue";
}
@@ -2667,6 +2701,14 @@ let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in {
Requires<[IsARM, HasV5TE]>;
}
+let mayLoad = 1, hasSideEffects = 0, hasNoSchedulingInfo = 1 in {
+def LOADDUAL : ARMPseudoInst<(outs GPRPairOp:$Rt), (ins addrmode3:$addr),
+ 64, IIC_iLoad_d_r, []>,
+ Requires<[IsARM, HasV5TE]> {
+ let AM = AddrMode3;
+}
+}
+
def LDA : AIldracq<0b00, (outs GPR:$Rt), (ins addr_offset_none:$addr),
NoItinerary, "lda", "\t$Rt, $addr", []>;
def LDAB : AIldracq<0b10, (outs GPR:$Rt), (ins addr_offset_none:$addr),
@@ -2942,6 +2984,19 @@ let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in {
}
}
+let mayStore = 1, hasSideEffects = 0, hasNoSchedulingInfo = 1 in {
+def STOREDUAL : ARMPseudoInst<(outs), (ins GPRPairOp:$Rt, addrmode3:$addr),
+ 64, IIC_iStore_d_r, []>,
+ Requires<[IsARM, HasV5TE]> {
+ let AM = AddrMode3;
+}
+}
+
+let Predicates = [IsARM, HasV5TE] in {
+def : Pat<(ARMstrd GPR:$Rt, GPR:$Rt2, addrmode3:$addr),
+ (STOREDUAL (REG_SEQUENCE GPRPair, GPR:$Rt, gsub_0, GPR:$Rt2, gsub_1), addrmode3:$addr)>;
+}
+
// Indexed stores
multiclass AI2_stridx<bit isByte, string opc,
InstrItinClass iii, InstrItinClass iir> {
@@ -6214,7 +6269,7 @@ def CMP_SWAP_64 : PseudoInst<(outs GPRPair:$Rd, GPR:$temp),
}
def CompilerBarrier : PseudoInst<(outs), (ins i32imm:$ordering), NoItinerary,
- [(atomic_fence imm:$ordering, 0)]> {
+ [(atomic_fence timm:$ordering, 0)]> {
let hasSideEffects = 1;
let Size = 0;
let AsmString = "@ COMPILER BARRIER";
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrMVE.td b/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrMVE.td
index 4f67cd6e47cc..604291be822c 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrMVE.td
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrMVE.td
@@ -275,6 +275,83 @@ class mve_addr_q_shift<int shift> : MemOperand {
let MIOperandInfo = (ops MQPR:$base, i32imm:$imm);
}
+// A family of classes wrapping up information about the vector types
+// used by MVE.
+class MVEVectorVTInfo<ValueType vec, ValueType dblvec, ValueType pred,
+ bits<2> size, string suffixletter, bit unsigned> {
+ // The LLVM ValueType representing the vector, so we can use it in
+ // ISel patterns.
+ ValueType Vec = vec;
+
+ // The LLVM ValueType representing a vector with elements double the size
+ // of those in Vec, so we can use it in ISel patterns. It is up to the
+ // invoker of this class to ensure that this is a correct choice.
+ ValueType DblVec = dblvec;
+
+ // An LLVM ValueType representing a corresponding vector of
+ // predicate bits, for use in ISel patterns that handle an IR
+ // intrinsic describing the predicated form of the instruction.
+ //
+ // Usually, for a vector of N things, this will be vNi1. But for
+ // vectors of 2 values, we make an exception, and use v4i1 instead
+ // of v2i1. Rationale: MVE codegen doesn't support doing all the
+ // auxiliary operations on v2i1 (vector shuffles etc), and also,
+ // there's no MVE compare instruction that will _generate_ v2i1
+ // directly.
+ ValueType Pred = pred;
+
+ // The most common representation of the vector element size in MVE
+ // instruction encodings: a 2-bit value V representing an (8<<V)-bit
+ // vector element.
+ bits<2> Size = size;
+
+ // For vectors explicitly mentioning a signedness of integers: 0 for
+ // signed and 1 for unsigned. For anything else, undefined.
+ bit Unsigned = unsigned;
+
+ // The number of bits in a vector element, in integer form.
+ int LaneBits = !shl(8, Size);
+
+ // The suffix used in assembly language on an instruction operating
+ // on this lane if it only cares about number of bits.
+ string BitsSuffix = !if(!eq(suffixletter, "p"),
+ !if(!eq(unsigned, 0b0), "8", "16"),
+ !cast<string>(LaneBits));
+
+ // The suffix used on an instruction that mentions the whole type.
+ string Suffix = suffixletter ## BitsSuffix;
+
+ // The letter part of the suffix only.
+ string SuffixLetter = suffixletter;
+}
+
+// Integer vector types that don't treat signed and unsigned differently.
+def MVE_v16i8 : MVEVectorVTInfo<v16i8, v8i16, v16i1, 0b00, "i", ?>;
+def MVE_v8i16 : MVEVectorVTInfo<v8i16, v4i32, v8i1, 0b01, "i", ?>;
+def MVE_v4i32 : MVEVectorVTInfo<v4i32, v2i64, v4i1, 0b10, "i", ?>;
+def MVE_v2i64 : MVEVectorVTInfo<v2i64, ?, v4i1, 0b11, "i", ?>;
+
+// Explicitly signed and unsigned integer vectors. They map to the
+// same set of LLVM ValueTypes as above, but are represented
+// differently in assembly and instruction encodings.
+def MVE_v16s8 : MVEVectorVTInfo<v16i8, v8i16, v16i1, 0b00, "s", 0b0>;
+def MVE_v8s16 : MVEVectorVTInfo<v8i16, v4i32, v8i1, 0b01, "s", 0b0>;
+def MVE_v4s32 : MVEVectorVTInfo<v4i32, v2i64, v4i1, 0b10, "s", 0b0>;
+def MVE_v2s64 : MVEVectorVTInfo<v2i64, ?, v4i1, 0b11, "s", 0b0>;
+def MVE_v16u8 : MVEVectorVTInfo<v16i8, v8i16, v16i1, 0b00, "u", 0b1>;
+def MVE_v8u16 : MVEVectorVTInfo<v8i16, v4i32, v8i1, 0b01, "u", 0b1>;
+def MVE_v4u32 : MVEVectorVTInfo<v4i32, v2i64, v4i1, 0b10, "u", 0b1>;
+def MVE_v2u64 : MVEVectorVTInfo<v2i64, ?, v4i1, 0b11, "u", 0b1>;
+
+// FP vector types.
+def MVE_v8f16 : MVEVectorVTInfo<v8f16, v4f32, v8i1, 0b01, "f", ?>;
+def MVE_v4f32 : MVEVectorVTInfo<v4f32, v2f64, v4i1, 0b10, "f", ?>;
+def MVE_v2f64 : MVEVectorVTInfo<v2f64, ?, v4i1, 0b11, "f", ?>;
+
+// Polynomial vector types.
+def MVE_v16p8 : MVEVectorVTInfo<v16i8, v8i16, v16i1, 0b11, "p", 0b0>;
+def MVE_v8p16 : MVEVectorVTInfo<v8i16, v4i32, v8i1, 0b11, "p", 0b1>;
+
// --------- Start of base classes for the instructions themselves
class MVE_MI<dag oops, dag iops, InstrItinClass itin, string asm,
@@ -346,9 +423,12 @@ class MVE_ScalarShiftSingleReg<string iname, dag iops, string asm, string cstr,
let Inst{19-16} = RdaDest{3-0};
}
-class MVE_ScalarShiftSRegImm<string iname, bits<2> op5_4, list<dag> pattern=[]>
+class MVE_ScalarShiftSRegImm<string iname, bits<2> op5_4>
: MVE_ScalarShiftSingleReg<iname, (ins rGPR:$RdaSrc, long_shift:$imm),
- "$RdaSrc, $imm", "$RdaDest = $RdaSrc", pattern> {
+ "$RdaSrc, $imm", "$RdaDest = $RdaSrc",
+ [(set rGPR:$RdaDest,
+ (i32 (!cast<Intrinsic>("int_arm_mve_" # iname)
+ (i32 rGPR:$RdaSrc), (i32 imm:$imm))))]> {
bits<5> imm;
let Inst{15} = 0b0;
@@ -364,9 +444,12 @@ def MVE_SRSHR : MVE_ScalarShiftSRegImm<"srshr", 0b10>;
def MVE_UQSHL : MVE_ScalarShiftSRegImm<"uqshl", 0b00>;
def MVE_URSHR : MVE_ScalarShiftSRegImm<"urshr", 0b01>;
-class MVE_ScalarShiftSRegReg<string iname, bits<2> op5_4, list<dag> pattern=[]>
+class MVE_ScalarShiftSRegReg<string iname, bits<2> op5_4>
: MVE_ScalarShiftSingleReg<iname, (ins rGPR:$RdaSrc, rGPR:$Rm),
- "$RdaSrc, $Rm", "$RdaDest = $RdaSrc", pattern> {
+ "$RdaSrc, $Rm", "$RdaDest = $RdaSrc",
+ [(set rGPR:$RdaDest,
+ (i32 (!cast<Intrinsic>("int_arm_mve_" # iname)
+ (i32 rGPR:$RdaSrc), (i32 rGPR:$Rm))))]> {
bits<4> Rm;
let Inst{15-12} = Rm{3-0};
@@ -487,10 +570,10 @@ class MVE_rDest<dag oops, dag iops, InstrItinClass itin,
let Inst{4} = 0b0;
}
-class MVE_VABAV<string suffix, bit U, bits<2> size, list<dag> pattern=[]>
+class MVE_VABAV<string suffix, bit U, bits<2> size>
: MVE_rDest<(outs rGPR:$Rda), (ins rGPR:$Rda_src, MQPR:$Qn, MQPR:$Qm),
NoItinerary, "vabav", suffix, "$Rda, $Qn, $Qm", "$Rda = $Rda_src",
- pattern> {
+ []> {
bits<4> Qm;
bits<4> Qn;
bits<4> Rda;
@@ -509,12 +592,35 @@ class MVE_VABAV<string suffix, bit U, bits<2> size, list<dag> pattern=[]>
let Inst{0} = 0b1;
}
-def MVE_VABAVs8 : MVE_VABAV<"s8", 0b0, 0b00>;
-def MVE_VABAVs16 : MVE_VABAV<"s16", 0b0, 0b01>;
-def MVE_VABAVs32 : MVE_VABAV<"s32", 0b0, 0b10>;
-def MVE_VABAVu8 : MVE_VABAV<"u8", 0b1, 0b00>;
-def MVE_VABAVu16 : MVE_VABAV<"u16", 0b1, 0b01>;
-def MVE_VABAVu32 : MVE_VABAV<"u32", 0b1, 0b10>;
+multiclass MVE_VABAV_m<MVEVectorVTInfo VTI> {
+ def "" : MVE_VABAV<VTI.Suffix, VTI.Unsigned, VTI.Size>;
+ defvar Inst = !cast<Instruction>(NAME);
+
+ let Predicates = [HasMVEInt] in {
+ def : Pat<(i32 (int_arm_mve_vabav
+ (i32 VTI.Unsigned),
+ (i32 rGPR:$Rda_src),
+ (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm))),
+ (i32 (Inst (i32 rGPR:$Rda_src),
+ (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm)))>;
+
+ def : Pat<(i32 (int_arm_mve_vabav_predicated
+ (i32 VTI.Unsigned),
+ (i32 rGPR:$Rda_src),
+ (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
+ (VTI.Pred VCCR:$mask))),
+ (i32 (Inst (i32 rGPR:$Rda_src),
+ (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
+ ARMVCCThen, (VTI.Pred VCCR:$mask)))>;
+ }
+}
+
+defm MVE_VABAVs8 : MVE_VABAV_m<MVE_v16s8>;
+defm MVE_VABAVs16 : MVE_VABAV_m<MVE_v8s16>;
+defm MVE_VABAVs32 : MVE_VABAV_m<MVE_v4s32>;
+defm MVE_VABAVu8 : MVE_VABAV_m<MVE_v16u8>;
+defm MVE_VABAVu16 : MVE_VABAV_m<MVE_v8u16>;
+defm MVE_VABAVu32 : MVE_VABAV_m<MVE_v4u32>;
class MVE_VADDV<string iname, string suffix, dag iops, string cstr,
bit A, bit U, bits<2> size, list<dag> pattern=[]>
@@ -658,17 +764,31 @@ class MVE_VMINMAXV<string iname, string suffix, bit U, bits<2> size,
let Inst{0} = 0b0;
}
-multiclass MVE_VMINMAXV_ty<string iname, bit bit_7, list<dag> pattern=[]> {
- def s8 : MVE_VMINMAXV<iname, "s8", 0b0, 0b00, 0b1, bit_7>;
- def s16 : MVE_VMINMAXV<iname, "s16", 0b0, 0b01, 0b1, bit_7>;
- def s32 : MVE_VMINMAXV<iname, "s32", 0b0, 0b10, 0b1, bit_7>;
- def u8 : MVE_VMINMAXV<iname, "u8", 0b1, 0b00, 0b1, bit_7>;
- def u16 : MVE_VMINMAXV<iname, "u16", 0b1, 0b01, 0b1, bit_7>;
- def u32 : MVE_VMINMAXV<iname, "u32", 0b1, 0b10, 0b1, bit_7>;
+multiclass MVE_VMINMAXV_p<string iname, bit bit_17, bit bit_7,
+ MVEVectorVTInfo VTI, Intrinsic intr> {
+ def "": MVE_VMINMAXV<iname, VTI.Suffix, VTI.Unsigned, VTI.Size,
+ bit_17, bit_7>;
+ defvar Inst = !cast<Instruction>(NAME);
+
+ let Predicates = [HasMVEInt] in
+ def _pat : Pat<(i32 (intr (i32 rGPR:$prev), (VTI.Vec MQPR:$vec))),
+ (i32 (Inst (i32 rGPR:$prev), (VTI.Vec MQPR:$vec)))>;
+}
+
+multiclass MVE_VMINMAXV_ty<string iname, bit bit_7,
+ Intrinsic intr_s, Intrinsic intr_u> {
+ defm s8 : MVE_VMINMAXV_p<iname, 1, bit_7, MVE_v16s8, intr_s>;
+ defm s16: MVE_VMINMAXV_p<iname, 1, bit_7, MVE_v8s16, intr_s>;
+ defm s32: MVE_VMINMAXV_p<iname, 1, bit_7, MVE_v4s32, intr_s>;
+ defm u8 : MVE_VMINMAXV_p<iname, 1, bit_7, MVE_v16u8, intr_u>;
+ defm u16: MVE_VMINMAXV_p<iname, 1, bit_7, MVE_v8u16, intr_u>;
+ defm u32: MVE_VMINMAXV_p<iname, 1, bit_7, MVE_v4u32, intr_u>;
}
-defm MVE_VMINV : MVE_VMINMAXV_ty<"vminv", 0b1>;
-defm MVE_VMAXV : MVE_VMINMAXV_ty<"vmaxv", 0b0>;
+defm MVE_VMINV : MVE_VMINMAXV_ty<
+ "vminv", 0b1, int_arm_mve_minv_s, int_arm_mve_minv_u>;
+defm MVE_VMAXV : MVE_VMINMAXV_ty<
+ "vmaxv", 0b0, int_arm_mve_maxv_s, int_arm_mve_maxv_u>;
let Predicates = [HasMVEInt] in {
def : Pat<(i32 (vecreduce_smax (v16i8 MQPR:$src))),
@@ -709,10 +829,9 @@ defm MVE_VMINAV : MVE_VMINMAXAV_ty<"vminav", 0b1>;
defm MVE_VMAXAV : MVE_VMINMAXAV_ty<"vmaxav", 0b0>;
class MVE_VMLAMLSDAV<string iname, string suffix, dag iops, string cstr,
- bit sz, bit bit_28, bit A, bit X, bit bit_8, bit bit_0,
- list<dag> pattern=[]>
+ bit sz, bit bit_28, bit A, bit X, bit bit_8, bit bit_0>
: MVE_rDest<(outs tGPREven:$RdaDest), iops, NoItinerary, iname, suffix,
- "$RdaDest, $Qn, $Qm", cstr, pattern> {
+ "$RdaDest, $Qn, $Qm", cstr, []> {
bits<4> RdaDest;
bits<3> Qm;
bits<3> Qn;
@@ -730,47 +849,88 @@ class MVE_VMLAMLSDAV<string iname, string suffix, dag iops, string cstr,
let Inst{0} = bit_0;
}
-multiclass MVE_VMLAMLSDAV_A<string iname, string x, string suffix,
- bit sz, bit bit_28, bit X, bit bit_8, bit bit_0,
- list<dag> pattern=[]> {
- def ""#x#suffix : MVE_VMLAMLSDAV<iname # x, suffix,
+multiclass MVE_VMLAMLSDAV_A<string iname, string x, MVEVectorVTInfo VTI,
+ bit sz, bit bit_28, bit X, bit bit_8, bit bit_0> {
+ def ""#x#VTI.Suffix : MVE_VMLAMLSDAV<iname # x, VTI.Suffix,
(ins MQPR:$Qn, MQPR:$Qm), "",
- sz, bit_28, 0b0, X, bit_8, bit_0, pattern>;
- def "a"#x#suffix : MVE_VMLAMLSDAV<iname # "a" # x, suffix,
+ sz, bit_28, 0b0, X, bit_8, bit_0>;
+ def "a"#x#VTI.Suffix : MVE_VMLAMLSDAV<iname # "a" # x, VTI.Suffix,
(ins tGPREven:$RdaSrc, MQPR:$Qn, MQPR:$Qm),
"$RdaDest = $RdaSrc",
- sz, bit_28, 0b1, X, bit_8, bit_0, pattern>;
+ sz, bit_28, 0b1, X, bit_8, bit_0>;
+ let Predicates = [HasMVEInt] in {
+ def : Pat<(i32 (int_arm_mve_vmldava
+ (i32 VTI.Unsigned),
+ (i32 bit_0) /* subtract */,
+ (i32 X) /* exchange */,
+ (i32 0) /* accumulator */,
+ (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm))),
+ (i32 (!cast<Instruction>(NAME # x # VTI.Suffix)
+ (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm)))>;
+
+ def : Pat<(i32 (int_arm_mve_vmldava_predicated
+ (i32 VTI.Unsigned),
+ (i32 bit_0) /* subtract */,
+ (i32 X) /* exchange */,
+ (i32 0) /* accumulator */,
+ (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
+ (VTI.Pred VCCR:$mask))),
+ (i32 (!cast<Instruction>(NAME # x # VTI.Suffix)
+ (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
+ ARMVCCThen, (VTI.Pred VCCR:$mask)))>;
+
+ def : Pat<(i32 (int_arm_mve_vmldava
+ (i32 VTI.Unsigned),
+ (i32 bit_0) /* subtract */,
+ (i32 X) /* exchange */,
+ (i32 tGPREven:$RdaSrc),
+ (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm))),
+ (i32 (!cast<Instruction>(NAME # "a" # x # VTI.Suffix)
+ (i32 tGPREven:$RdaSrc),
+ (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm)))>;
+
+ def : Pat<(i32 (int_arm_mve_vmldava_predicated
+ (i32 VTI.Unsigned),
+ (i32 bit_0) /* subtract */,
+ (i32 X) /* exchange */,
+ (i32 tGPREven:$RdaSrc),
+ (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
+ (VTI.Pred VCCR:$mask))),
+ (i32 (!cast<Instruction>(NAME # "a" # x # VTI.Suffix)
+ (i32 tGPREven:$RdaSrc),
+ (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
+ ARMVCCThen, (VTI.Pred VCCR:$mask)))>;
+ }
}
-multiclass MVE_VMLAMLSDAV_AX<string iname, string suffix, bit sz, bit bit_28,
- bit bit_8, bit bit_0, list<dag> pattern=[]> {
- defm "" : MVE_VMLAMLSDAV_A<iname, "", suffix, sz, bit_28,
- 0b0, bit_8, bit_0, pattern>;
- defm "" : MVE_VMLAMLSDAV_A<iname, "x", suffix, sz, bit_28,
- 0b1, bit_8, bit_0, pattern>;
+multiclass MVE_VMLAMLSDAV_AX<string iname, MVEVectorVTInfo VTI, bit sz,
+ bit bit_28, bit bit_8, bit bit_0> {
+ defm "" : MVE_VMLAMLSDAV_A<iname, "", VTI, sz, bit_28,
+ 0b0, bit_8, bit_0>;
+ defm "" : MVE_VMLAMLSDAV_A<iname, "x", VTI, sz, bit_28,
+ 0b1, bit_8, bit_0>;
}
-multiclass MVE_VMLADAV_multi<string suffix, bit sz, bit bit_8,
- list<dag> pattern=[]> {
- defm "" : MVE_VMLAMLSDAV_AX<"vmladav", "s"#suffix,
- sz, 0b0, bit_8, 0b0, pattern>;
- defm "" : MVE_VMLAMLSDAV_A<"vmladav", "", "u"#suffix,
- sz, 0b1, 0b0, bit_8, 0b0, pattern>;
+multiclass MVE_VMLADAV_multi<MVEVectorVTInfo SVTI, MVEVectorVTInfo UVTI,
+ bit sz, bit bit_8> {
+ defm "" : MVE_VMLAMLSDAV_AX<"vmladav", SVTI,
+ sz, 0b0, bit_8, 0b0>;
+ defm "" : MVE_VMLAMLSDAV_A<"vmladav", "", UVTI,
+ sz, 0b1, 0b0, bit_8, 0b0>;
}
-multiclass MVE_VMLSDAV_multi<string suffix, bit sz, bit bit_28,
- list<dag> pattern=[]> {
- defm "" : MVE_VMLAMLSDAV_AX<"vmlsdav", "s"#suffix,
- sz, bit_28, 0b0, 0b1, pattern>;
+multiclass MVE_VMLSDAV_multi<MVEVectorVTInfo VTI, bit sz, bit bit_28> {
+ defm "" : MVE_VMLAMLSDAV_AX<"vmlsdav", VTI,
+ sz, bit_28, 0b0, 0b1>;
}
-defm MVE_VMLADAV : MVE_VMLADAV_multi< "8", 0b0, 0b1>;
-defm MVE_VMLADAV : MVE_VMLADAV_multi<"16", 0b0, 0b0>;
-defm MVE_VMLADAV : MVE_VMLADAV_multi<"32", 0b1, 0b0>;
+defm MVE_VMLADAV : MVE_VMLADAV_multi<MVE_v16s8, MVE_v16u8, 0b0, 0b1>;
+defm MVE_VMLADAV : MVE_VMLADAV_multi<MVE_v8s16, MVE_v8u16, 0b0, 0b0>;
+defm MVE_VMLADAV : MVE_VMLADAV_multi<MVE_v4s32, MVE_v4u32, 0b1, 0b0>;
-defm MVE_VMLSDAV : MVE_VMLSDAV_multi< "8", 0b0, 0b1>;
-defm MVE_VMLSDAV : MVE_VMLSDAV_multi<"16", 0b0, 0b0>;
-defm MVE_VMLSDAV : MVE_VMLSDAV_multi<"32", 0b1, 0b0>;
+defm MVE_VMLSDAV : MVE_VMLSDAV_multi<MVE_v16s8, 0b0, 0b1>;
+defm MVE_VMLSDAV : MVE_VMLSDAV_multi<MVE_v8s16, 0b0, 0b0>;
+defm MVE_VMLSDAV : MVE_VMLSDAV_multi<MVE_v4s32, 0b1, 0b0>;
// vmlav aliases vmladav
foreach acc = ["", "a"] in {
@@ -932,6 +1092,16 @@ let Predicates = [HasMVEFloat] in {
(v4f32 (MVE_VMAXNMf32 (v4f32 MQPR:$val1), (v4f32 MQPR:$val2)))>;
def : Pat<(v8f16 (fmaxnum (v8f16 MQPR:$val1), (v8f16 MQPR:$val2))),
(v8f16 (MVE_VMAXNMf16 (v8f16 MQPR:$val1), (v8f16 MQPR:$val2)))>;
+ def : Pat<(v4f32 (int_arm_mve_max_predicated (v4f32 MQPR:$val1), (v4f32 MQPR:$val2), (i32 0),
+ (v4i1 VCCR:$mask), (v4f32 MQPR:$inactive))),
+ (v4f32 (MVE_VMAXNMf32 (v4f32 MQPR:$val1), (v4f32 MQPR:$val2),
+ ARMVCCThen, (v4i1 VCCR:$mask),
+ (v4f32 MQPR:$inactive)))>;
+ def : Pat<(v8f16 (int_arm_mve_max_predicated (v8f16 MQPR:$val1), (v8f16 MQPR:$val2), (i32 0),
+ (v8i1 VCCR:$mask), (v8f16 MQPR:$inactive))),
+ (v8f16 (MVE_VMAXNMf16 (v8f16 MQPR:$val1), (v8f16 MQPR:$val2),
+ ARMVCCThen, (v8i1 VCCR:$mask),
+ (v8f16 MQPR:$inactive)))>;
}
def MVE_VMINNMf32 : MVE_VMINMAXNM<"vminnm", "f32", 0b0, 0b1>;
@@ -942,6 +1112,16 @@ let Predicates = [HasMVEFloat] in {
(v4f32 (MVE_VMINNMf32 (v4f32 MQPR:$val1), (v4f32 MQPR:$val2)))>;
def : Pat<(v8f16 (fminnum (v8f16 MQPR:$val1), (v8f16 MQPR:$val2))),
(v8f16 (MVE_VMINNMf16 (v8f16 MQPR:$val1), (v8f16 MQPR:$val2)))>;
+ def : Pat<(v4f32 (int_arm_mve_min_predicated (v4f32 MQPR:$val1), (v4f32 MQPR:$val2),
+ (i32 0), (v4i1 VCCR:$mask), (v4f32 MQPR:$inactive))),
+ (v4f32 (MVE_VMINNMf32 (v4f32 MQPR:$val1), (v4f32 MQPR:$val2),
+ ARMVCCThen, (v4i1 VCCR:$mask),
+ (v4f32 MQPR:$inactive)))>;
+ def : Pat<(v8f16 (int_arm_mve_min_predicated (v8f16 MQPR:$val1), (v8f16 MQPR:$val2),
+ (i32 0), (v8i1 VCCR:$mask), (v8f16 MQPR:$inactive))),
+ (v8f16 (MVE_VMINNMf16 (v8f16 MQPR:$val1), (v8f16 MQPR:$val2),
+ ARMVCCThen, (v8i1 VCCR:$mask),
+ (v8f16 MQPR:$inactive)))>;
}
@@ -957,50 +1137,48 @@ class MVE_VMINMAX<string iname, string suffix, bit U, bits<2> size,
let Inst{8} = 0b0;
let Inst{6} = 0b1;
let Inst{4} = bit_4;
+ let validForTailPredication = 1;
}
-multiclass MVE_VMINMAX_all_sizes<string iname, bit bit_4> {
- def s8 : MVE_VMINMAX<iname, "s8", 0b0, 0b00, bit_4>;
- def s16 : MVE_VMINMAX<iname, "s16", 0b0, 0b01, bit_4>;
- def s32 : MVE_VMINMAX<iname, "s32", 0b0, 0b10, bit_4>;
- def u8 : MVE_VMINMAX<iname, "u8", 0b1, 0b00, bit_4>;
- def u16 : MVE_VMINMAX<iname, "u16", 0b1, 0b01, bit_4>;
- def u32 : MVE_VMINMAX<iname, "u32", 0b1, 0b10, bit_4>;
-}
+multiclass MVE_VMINMAX_m<string iname, bit bit_4, MVEVectorVTInfo VTI,
+ SDNode unpred_op, Intrinsic pred_int> {
+ def "" : MVE_VMINMAX<iname, VTI.Suffix, VTI.Unsigned, VTI.Size, bit_4>;
+ defvar Inst = !cast<Instruction>(NAME);
-defm MVE_VMAX : MVE_VMINMAX_all_sizes<"vmax", 0b0>;
-defm MVE_VMIN : MVE_VMINMAX_all_sizes<"vmin", 0b1>;
+ let Predicates = [HasMVEInt] in {
+ // Unpredicated min/max
+ def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
-let Predicates = [HasMVEInt] in {
- def : Pat<(v16i8 (smin (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),
- (v16i8 (MVE_VMINs8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
- def : Pat<(v8i16 (smin (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))),
- (v8i16 (MVE_VMINs16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
- def : Pat<(v4i32 (smin (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))),
- (v4i32 (MVE_VMINs32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
-
- def : Pat<(v16i8 (smax (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),
- (v16i8 (MVE_VMAXs8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
- def : Pat<(v8i16 (smax (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))),
- (v8i16 (MVE_VMAXs16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
- def : Pat<(v4i32 (smax (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))),
- (v4i32 (MVE_VMAXs32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
-
- def : Pat<(v16i8 (umin (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),
- (v16i8 (MVE_VMINu8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
- def : Pat<(v8i16 (umin (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))),
- (v8i16 (MVE_VMINu16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
- def : Pat<(v4i32 (umin (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))),
- (v4i32 (MVE_VMINu32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
-
- def : Pat<(v16i8 (umax (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),
- (v16i8 (MVE_VMAXu8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
- def : Pat<(v8i16 (umax (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))),
- (v8i16 (MVE_VMAXu16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
- def : Pat<(v4i32 (umax (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))),
- (v4i32 (MVE_VMAXu32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
+ // Predicated min/max
+ def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+ (i32 VTI.Unsigned), (VTI.Pred VCCR:$mask),
+ (VTI.Vec MQPR:$inactive))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+ ARMVCCThen, (VTI.Pred VCCR:$mask),
+ (VTI.Vec MQPR:$inactive)))>;
+ }
}
+multiclass MVE_VMAX<MVEVectorVTInfo VTI>
+ : MVE_VMINMAX_m<"vmax", 0b0, VTI, !if(VTI.Unsigned, umax, smax), int_arm_mve_max_predicated>;
+multiclass MVE_VMIN<MVEVectorVTInfo VTI>
+ : MVE_VMINMAX_m<"vmin", 0b1, VTI, !if(VTI.Unsigned, umin, smin), int_arm_mve_min_predicated>;
+
+defm MVE_VMINs8 : MVE_VMIN<MVE_v16s8>;
+defm MVE_VMINs16 : MVE_VMIN<MVE_v8s16>;
+defm MVE_VMINs32 : MVE_VMIN<MVE_v4s32>;
+defm MVE_VMINu8 : MVE_VMIN<MVE_v16u8>;
+defm MVE_VMINu16 : MVE_VMIN<MVE_v8u16>;
+defm MVE_VMINu32 : MVE_VMIN<MVE_v4u32>;
+
+defm MVE_VMAXs8 : MVE_VMAX<MVE_v16s8>;
+defm MVE_VMAXs16 : MVE_VMAX<MVE_v8s16>;
+defm MVE_VMAXs32 : MVE_VMAX<MVE_v4s32>;
+defm MVE_VMAXu8 : MVE_VMAX<MVE_v16u8>;
+defm MVE_VMAXu16 : MVE_VMAX<MVE_v8u16>;
+defm MVE_VMAXu32 : MVE_VMAX<MVE_v4u32>;
+
// end of mve_comp instructions
// start of mve_bit instructions
@@ -1150,53 +1328,61 @@ foreach s=["s8", "s16", "s32", "u8", "u16", "u32", "i8", "i16", "i32", "f16", "f
(MVE_VAND MQPR:$QdSrc, MQPR:$QnSrc, MQPR:$QmSrc, vpred_r:$vp)>;
}
-let Predicates = [HasMVEInt] in {
- def : Pat<(v16i8 (and (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),
- (v16i8 (MVE_VAND (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
- def : Pat<(v8i16 (and (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))),
- (v8i16 (MVE_VAND (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
- def : Pat<(v4i32 (and (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))),
- (v4i32 (MVE_VAND (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
- def : Pat<(v2i64 (and (v2i64 MQPR:$val1), (v2i64 MQPR:$val2))),
- (v2i64 (MVE_VAND (v2i64 MQPR:$val1), (v2i64 MQPR:$val2)))>;
-
- def : Pat<(v16i8 (or (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),
- (v16i8 (MVE_VORR (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
- def : Pat<(v8i16 (or (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))),
- (v8i16 (MVE_VORR (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
- def : Pat<(v4i32 (or (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))),
- (v4i32 (MVE_VORR (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
- def : Pat<(v2i64 (or (v2i64 MQPR:$val1), (v2i64 MQPR:$val2))),
- (v2i64 (MVE_VORR (v2i64 MQPR:$val1), (v2i64 MQPR:$val2)))>;
-
- def : Pat<(v16i8 (xor (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),
- (v16i8 (MVE_VEOR (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
- def : Pat<(v8i16 (xor (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))),
- (v8i16 (MVE_VEOR (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
- def : Pat<(v4i32 (xor (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))),
- (v4i32 (MVE_VEOR (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
- def : Pat<(v2i64 (xor (v2i64 MQPR:$val1), (v2i64 MQPR:$val2))),
- (v2i64 (MVE_VEOR (v2i64 MQPR:$val1), (v2i64 MQPR:$val2)))>;
-
- def : Pat<(v16i8 (and (v16i8 MQPR:$val1), (vnotq MQPR:$val2))),
- (v16i8 (MVE_VBIC (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
- def : Pat<(v8i16 (and (v8i16 MQPR:$val1), (vnotq MQPR:$val2))),
- (v8i16 (MVE_VBIC (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
- def : Pat<(v4i32 (and (v4i32 MQPR:$val1), (vnotq MQPR:$val2))),
- (v4i32 (MVE_VBIC (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
- def : Pat<(v2i64 (and (v2i64 MQPR:$val1), (vnotq MQPR:$val2))),
- (v2i64 (MVE_VBIC (v2i64 MQPR:$val1), (v2i64 MQPR:$val2)))>;
-
- def : Pat<(v16i8 (or (v16i8 MQPR:$val1), (vnotq MQPR:$val2))),
- (v16i8 (MVE_VORN (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
- def : Pat<(v8i16 (or (v8i16 MQPR:$val1), (vnotq MQPR:$val2))),
- (v8i16 (MVE_VORN (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
- def : Pat<(v4i32 (or (v4i32 MQPR:$val1), (vnotq MQPR:$val2))),
- (v4i32 (MVE_VORN (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
- def : Pat<(v2i64 (or (v2i64 MQPR:$val1), (vnotq MQPR:$val2))),
- (v2i64 (MVE_VORN (v2i64 MQPR:$val1), (v2i64 MQPR:$val2)))>;
+multiclass MVE_bit_op<MVEVectorVTInfo VTI, SDNode unpred_op, Intrinsic pred_int, MVE_bit_ops instruction> {
+ let Predicates = [HasMVEInt] in {
+ // Unpredicated operation
+ def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn))),
+ (VTI.Vec (instruction (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
+ // Predicated operation
+ def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+ (VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive))),
+ (VTI.Vec (instruction
+ (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+ ARMVCCThen, (VTI.Pred VCCR:$mask),
+ (VTI.Vec MQPR:$inactive)))>;
+ }
+}
+
+defm : MVE_bit_op<MVE_v16i8, and, int_arm_mve_and_predicated, MVE_VAND>;
+defm : MVE_bit_op<MVE_v8i16, and, int_arm_mve_and_predicated, MVE_VAND>;
+defm : MVE_bit_op<MVE_v4i32, and, int_arm_mve_and_predicated, MVE_VAND>;
+defm : MVE_bit_op<MVE_v2i64, and, int_arm_mve_and_predicated, MVE_VAND>;
+
+defm : MVE_bit_op<MVE_v16i8, or, int_arm_mve_orr_predicated, MVE_VORR>;
+defm : MVE_bit_op<MVE_v8i16, or, int_arm_mve_orr_predicated, MVE_VORR>;
+defm : MVE_bit_op<MVE_v4i32, or, int_arm_mve_orr_predicated, MVE_VORR>;
+defm : MVE_bit_op<MVE_v2i64, or, int_arm_mve_orr_predicated, MVE_VORR>;
+
+defm : MVE_bit_op<MVE_v16i8, xor, int_arm_mve_eor_predicated, MVE_VEOR>;
+defm : MVE_bit_op<MVE_v8i16, xor, int_arm_mve_eor_predicated, MVE_VEOR>;
+defm : MVE_bit_op<MVE_v4i32, xor, int_arm_mve_eor_predicated, MVE_VEOR>;
+defm : MVE_bit_op<MVE_v2i64, xor, int_arm_mve_eor_predicated, MVE_VEOR>;
+
+multiclass MVE_bit_op_with_inv<MVEVectorVTInfo VTI, SDNode unpred_op, Intrinsic pred_int, MVE_bit_ops instruction> {
+ let Predicates = [HasMVEInt] in {
+ // Unpredicated operation
+ def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (vnotq (VTI.Vec MQPR:$Qn)))),
+ (VTI.Vec (instruction (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
+ // Predicated operation
+ def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+ (VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive))),
+ (VTI.Vec (instruction
+ (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+ ARMVCCThen, (VTI.Pred VCCR:$mask),
+ (VTI.Vec MQPR:$inactive)))>;
+ }
}
+defm : MVE_bit_op_with_inv<MVE_v16i8, and, int_arm_mve_bic_predicated, MVE_VBIC>;
+defm : MVE_bit_op_with_inv<MVE_v8i16, and, int_arm_mve_bic_predicated, MVE_VBIC>;
+defm : MVE_bit_op_with_inv<MVE_v4i32, and, int_arm_mve_bic_predicated, MVE_VBIC>;
+defm : MVE_bit_op_with_inv<MVE_v2i64, and, int_arm_mve_bic_predicated, MVE_VBIC>;
+
+defm : MVE_bit_op_with_inv<MVE_v16i8, or, int_arm_mve_orn_predicated, MVE_VORN>;
+defm : MVE_bit_op_with_inv<MVE_v8i16, or, int_arm_mve_orn_predicated, MVE_VORN>;
+defm : MVE_bit_op_with_inv<MVE_v4i32, or, int_arm_mve_orn_predicated, MVE_VORN>;
+defm : MVE_bit_op_with_inv<MVE_v2i64, or, int_arm_mve_orn_predicated, MVE_VORN>;
+
class MVE_bit_cmode<string iname, string suffix, bits<4> cmode, dag inOps>
: MVE_p<(outs MQPR:$Qd), inOps, NoItinerary,
iname, suffix, "$Qd, $imm", vpred_n, "$Qd = $Qd_src"> {
@@ -1429,8 +1615,9 @@ class MVE_int<string iname, string suffix, bits<2> size, list<dag> pattern=[]>
let Inst{3-1} = Qm{2-0};
}
-class MVE_VMULt1<string suffix, bits<2> size, list<dag> pattern=[]>
- : MVE_int<"vmul", suffix, size, pattern> {
+class MVE_VMULt1<string iname, string suffix, bits<2> size,
+ list<dag> pattern=[]>
+ : MVE_int<iname, suffix, size, pattern> {
let Inst{28} = 0b0;
let Inst{25-23} = 0b110;
@@ -1438,22 +1625,36 @@ class MVE_VMULt1<string suffix, bits<2> size, list<dag> pattern=[]>
let Inst{12-8} = 0b01001;
let Inst{4} = 0b1;
let Inst{0} = 0b0;
+ let validForTailPredication = 1;
}
-def MVE_VMULt1i8 : MVE_VMULt1<"i8", 0b00>;
-def MVE_VMULt1i16 : MVE_VMULt1<"i16", 0b01>;
-def MVE_VMULt1i32 : MVE_VMULt1<"i32", 0b10>;
+multiclass MVE_VMUL_m<string iname, MVEVectorVTInfo VTI,
+ SDNode unpred_op, Intrinsic pred_int> {
+ def "" : MVE_VMULt1<iname, VTI.Suffix, VTI.Size>;
+ defvar Inst = !cast<Instruction>(NAME);
-let Predicates = [HasMVEInt] in {
- def : Pat<(v16i8 (mul (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),
- (v16i8 (MVE_VMULt1i8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
- def : Pat<(v8i16 (mul (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))),
- (v8i16 (MVE_VMULt1i16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
- def : Pat<(v4i32 (mul (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))),
- (v4i32 (MVE_VMULt1i32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
+ let Predicates = [HasMVEInt] in {
+ // Unpredicated multiply
+ def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
+
+ // Predicated multiply
+ def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+ (VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+ ARMVCCThen, (VTI.Pred VCCR:$mask),
+ (VTI.Vec MQPR:$inactive)))>;
+ }
}
-class MVE_VQxDMULH<string iname, string suffix, bits<2> size, bit rounding,
+multiclass MVE_VMUL<MVEVectorVTInfo VTI>
+ : MVE_VMUL_m<"vmul", VTI, mul, int_arm_mve_mul_predicated>;
+
+defm MVE_VMULi8 : MVE_VMUL<MVE_v16i8>;
+defm MVE_VMULi16 : MVE_VMUL<MVE_v8i16>;
+defm MVE_VMULi32 : MVE_VMUL<MVE_v4i32>;
+
+class MVE_VQxDMULH_Base<string iname, string suffix, bits<2> size, bit rounding,
list<dag> pattern=[]>
: MVE_int<iname, suffix, size, pattern> {
@@ -1465,18 +1666,40 @@ class MVE_VQxDMULH<string iname, string suffix, bits<2> size, bit rounding,
let Inst{0} = 0b0;
}
-class MVE_VQDMULH<string suffix, bits<2> size, list<dag> pattern=[]>
- : MVE_VQxDMULH<"vqdmulh", suffix, size, 0b0, pattern>;
-class MVE_VQRDMULH<string suffix, bits<2> size, list<dag> pattern=[]>
- : MVE_VQxDMULH<"vqrdmulh", suffix, size, 0b1, pattern>;
+multiclass MVE_VQxDMULH_m<string iname, MVEVectorVTInfo VTI,
+ SDNode unpred_op, Intrinsic pred_int,
+ bit rounding> {
+ def "" : MVE_VQxDMULH_Base<iname, VTI.Suffix, VTI.Size, rounding>;
+ defvar Inst = !cast<Instruction>(NAME);
-def MVE_VQDMULHi8 : MVE_VQDMULH<"s8", 0b00>;
-def MVE_VQDMULHi16 : MVE_VQDMULH<"s16", 0b01>;
-def MVE_VQDMULHi32 : MVE_VQDMULH<"s32", 0b10>;
+ let Predicates = [HasMVEInt] in {
+ // Unpredicated multiply
+ def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
+
+ // Predicated multiply
+ def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+ (VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+ ARMVCCThen, (VTI.Pred VCCR:$mask),
+ (VTI.Vec MQPR:$inactive)))>;
+ }
+}
-def MVE_VQRDMULHi8 : MVE_VQRDMULH<"s8", 0b00>;
-def MVE_VQRDMULHi16 : MVE_VQRDMULH<"s16", 0b01>;
-def MVE_VQRDMULHi32 : MVE_VQRDMULH<"s32", 0b10>;
+multiclass MVE_VQxDMULH<string iname, MVEVectorVTInfo VTI, bit rounding>
+ : MVE_VQxDMULH_m<iname, VTI, !if(rounding, int_arm_mve_vqrdmulh,
+ int_arm_mve_vqdmulh),
+ !if(rounding, int_arm_mve_qrdmulh_predicated,
+ int_arm_mve_qdmulh_predicated),
+ rounding>;
+
+defm MVE_VQDMULHi8 : MVE_VQxDMULH<"vqdmulh", MVE_v16s8, 0b0>;
+defm MVE_VQDMULHi16 : MVE_VQxDMULH<"vqdmulh", MVE_v8s16, 0b0>;
+defm MVE_VQDMULHi32 : MVE_VQxDMULH<"vqdmulh", MVE_v4s32, 0b0>;
+
+defm MVE_VQRDMULHi8 : MVE_VQxDMULH<"vqrdmulh", MVE_v16s8, 0b1>;
+defm MVE_VQRDMULHi16 : MVE_VQxDMULH<"vqrdmulh", MVE_v8s16, 0b1>;
+defm MVE_VQRDMULHi32 : MVE_VQxDMULH<"vqrdmulh", MVE_v4s32, 0b1>;
class MVE_VADDSUB<string iname, string suffix, bits<2> size, bit subtract,
list<dag> pattern=[]>
@@ -1491,39 +1714,40 @@ class MVE_VADDSUB<string iname, string suffix, bits<2> size, bit subtract,
let validForTailPredication = 1;
}
-class MVE_VADD<string suffix, bits<2> size, list<dag> pattern=[]>
- : MVE_VADDSUB<"vadd", suffix, size, 0b0, pattern>;
-class MVE_VSUB<string suffix, bits<2> size, list<dag> pattern=[]>
- : MVE_VADDSUB<"vsub", suffix, size, 0b1, pattern>;
+multiclass MVE_VADDSUB_m<string iname, MVEVectorVTInfo VTI, bit subtract,
+ SDNode unpred_op, Intrinsic pred_int> {
+ def "" : MVE_VADDSUB<iname, VTI.Suffix, VTI.Size, subtract>;
+ defvar Inst = !cast<Instruction>(NAME);
-def MVE_VADDi8 : MVE_VADD<"i8", 0b00>;
-def MVE_VADDi16 : MVE_VADD<"i16", 0b01>;
-def MVE_VADDi32 : MVE_VADD<"i32", 0b10>;
+ let Predicates = [HasMVEInt] in {
+ // Unpredicated add/subtract
+ def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
-let Predicates = [HasMVEInt] in {
- def : Pat<(v16i8 (add (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),
- (v16i8 (MVE_VADDi8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
- def : Pat<(v8i16 (add (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))),
- (v8i16 (MVE_VADDi16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
- def : Pat<(v4i32 (add (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))),
- (v4i32 (MVE_VADDi32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
+ // Predicated add/subtract
+ def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+ (VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+ ARMVCCThen, (VTI.Pred VCCR:$mask),
+ (VTI.Vec MQPR:$inactive)))>;
+ }
}
-def MVE_VSUBi8 : MVE_VSUB<"i8", 0b00>;
-def MVE_VSUBi16 : MVE_VSUB<"i16", 0b01>;
-def MVE_VSUBi32 : MVE_VSUB<"i32", 0b10>;
+multiclass MVE_VADD<MVEVectorVTInfo VTI>
+ : MVE_VADDSUB_m<"vadd", VTI, 0b0, add, int_arm_mve_add_predicated>;
+multiclass MVE_VSUB<MVEVectorVTInfo VTI>
+ : MVE_VADDSUB_m<"vsub", VTI, 0b1, sub, int_arm_mve_sub_predicated>;
-let Predicates = [HasMVEInt] in {
- def : Pat<(v16i8 (sub (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),
- (v16i8 (MVE_VSUBi8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
- def : Pat<(v8i16 (sub (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))),
- (v8i16 (MVE_VSUBi16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
- def : Pat<(v4i32 (sub (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))),
- (v4i32 (MVE_VSUBi32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
-}
+defm MVE_VADDi8 : MVE_VADD<MVE_v16i8>;
+defm MVE_VADDi16 : MVE_VADD<MVE_v8i16>;
+defm MVE_VADDi32 : MVE_VADD<MVE_v4i32>;
+
+defm MVE_VSUBi8 : MVE_VSUB<MVE_v16i8>;
+defm MVE_VSUBi16 : MVE_VSUB<MVE_v8i16>;
+defm MVE_VSUBi32 : MVE_VSUB<MVE_v4i32>;
class MVE_VQADDSUB<string iname, string suffix, bit U, bit subtract,
- bits<2> size, ValueType vt>
+ bits<2> size>
: MVE_int<iname, suffix, size, []> {
let Inst{28} = U;
@@ -1535,50 +1759,75 @@ class MVE_VQADDSUB<string iname, string suffix, bit U, bit subtract,
let Inst{4} = 0b1;
let Inst{0} = 0b0;
let validForTailPredication = 1;
+}
- ValueType VT = vt;
+class MVE_VQADD_<string suffix, bit U, bits<2> size>
+ : MVE_VQADDSUB<"vqadd", suffix, U, 0b0, size>;
+class MVE_VQSUB_<string suffix, bit U, bits<2> size>
+ : MVE_VQADDSUB<"vqsub", suffix, U, 0b1, size>;
+
+multiclass MVE_VQADD_m<MVEVectorVTInfo VTI,
+ SDNode unpred_op, Intrinsic pred_int> {
+ def "" : MVE_VQADD_<VTI.Suffix, VTI.Unsigned, VTI.Size>;
+ defvar Inst = !cast<Instruction>(NAME);
+
+ let Predicates = [HasMVEInt] in {
+ // Unpredicated saturating add
+ def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
+
+ // Predicated saturating add
+ def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+ (i32 VTI.Unsigned), (VTI.Pred VCCR:$mask),
+ (VTI.Vec MQPR:$inactive))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+ ARMVCCThen, (VTI.Pred VCCR:$mask),
+ (VTI.Vec MQPR:$inactive)))>;
+ }
}
-class MVE_VQADD<string suffix, bit U, bits<2> size, ValueType VT>
- : MVE_VQADDSUB<"vqadd", suffix, U, 0b0, size, VT>;
-class MVE_VQSUB<string suffix, bit U, bits<2> size, ValueType VT>
- : MVE_VQADDSUB<"vqsub", suffix, U, 0b1, size, VT>;
+multiclass MVE_VQADD<MVEVectorVTInfo VTI, SDNode unpred_op>
+ : MVE_VQADD_m<VTI, unpred_op, int_arm_mve_qadd_predicated>;
+
+defm MVE_VQADDs8 : MVE_VQADD<MVE_v16s8, saddsat>;
+defm MVE_VQADDs16 : MVE_VQADD<MVE_v8s16, saddsat>;
+defm MVE_VQADDs32 : MVE_VQADD<MVE_v4s32, saddsat>;
+defm MVE_VQADDu8 : MVE_VQADD<MVE_v16u8, uaddsat>;
+defm MVE_VQADDu16 : MVE_VQADD<MVE_v8u16, uaddsat>;
+defm MVE_VQADDu32 : MVE_VQADD<MVE_v4u32, uaddsat>;
+
+multiclass MVE_VQSUB_m<MVEVectorVTInfo VTI,
+ SDNode unpred_op, Intrinsic pred_int> {
+ def "" : MVE_VQSUB_<VTI.Suffix, VTI.Unsigned, VTI.Size>;
+ defvar Inst = !cast<Instruction>(NAME);
+
+ let Predicates = [HasMVEInt] in {
+ // Unpredicated saturating subtract
+ def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
+
+ // Predicated saturating subtract
+ def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+ (i32 VTI.Unsigned), (VTI.Pred VCCR:$mask),
+ (VTI.Vec MQPR:$inactive))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+ ARMVCCThen, (VTI.Pred VCCR:$mask),
+ (VTI.Vec MQPR:$inactive)))>;
+ }
+}
-def MVE_VQADDs8 : MVE_VQADD<"s8", 0b0, 0b00, v16i8>;
-def MVE_VQADDs16 : MVE_VQADD<"s16", 0b0, 0b01, v8i16>;
-def MVE_VQADDs32 : MVE_VQADD<"s32", 0b0, 0b10, v4i32>;
-def MVE_VQADDu8 : MVE_VQADD<"u8", 0b1, 0b00, v16i8>;
-def MVE_VQADDu16 : MVE_VQADD<"u16", 0b1, 0b01, v8i16>;
-def MVE_VQADDu32 : MVE_VQADD<"u32", 0b1, 0b10, v4i32>;
+multiclass MVE_VQSUB<MVEVectorVTInfo VTI, SDNode unpred_op>
+ : MVE_VQSUB_m<VTI, unpred_op, int_arm_mve_qsub_predicated>;
-def MVE_VQSUBs8 : MVE_VQSUB<"s8", 0b0, 0b00, v16i8>;
-def MVE_VQSUBs16 : MVE_VQSUB<"s16", 0b0, 0b01, v8i16>;
-def MVE_VQSUBs32 : MVE_VQSUB<"s32", 0b0, 0b10, v4i32>;
-def MVE_VQSUBu8 : MVE_VQSUB<"u8", 0b1, 0b00, v16i8>;
-def MVE_VQSUBu16 : MVE_VQSUB<"u16", 0b1, 0b01, v8i16>;
-def MVE_VQSUBu32 : MVE_VQSUB<"u32", 0b1, 0b10, v4i32>;
+defm MVE_VQSUBs8 : MVE_VQSUB<MVE_v16s8, ssubsat>;
+defm MVE_VQSUBs16 : MVE_VQSUB<MVE_v8s16, ssubsat>;
+defm MVE_VQSUBs32 : MVE_VQSUB<MVE_v4s32, ssubsat>;
+defm MVE_VQSUBu8 : MVE_VQSUB<MVE_v16u8, usubsat>;
+defm MVE_VQSUBu16 : MVE_VQSUB<MVE_v8u16, usubsat>;
+defm MVE_VQSUBu32 : MVE_VQSUB<MVE_v4u32, usubsat>;
-let Predicates = [HasMVEInt] in {
- foreach instr = [MVE_VQADDu8, MVE_VQADDu16, MVE_VQADDu32] in
- foreach VT = [instr.VT] in
- def : Pat<(VT (uaddsat (VT MQPR:$Qm), (VT MQPR:$Qn))),
- (VT (instr (VT MQPR:$Qm), (VT MQPR:$Qn)))>;
- foreach instr = [MVE_VQADDs8, MVE_VQADDs16, MVE_VQADDs32] in
- foreach VT = [instr.VT] in
- def : Pat<(VT (saddsat (VT MQPR:$Qm), (VT MQPR:$Qn))),
- (VT (instr (VT MQPR:$Qm), (VT MQPR:$Qn)))>;
- foreach instr = [MVE_VQSUBu8, MVE_VQSUBu16, MVE_VQSUBu32] in
- foreach VT = [instr.VT] in
- def : Pat<(VT (usubsat (VT MQPR:$Qm), (VT MQPR:$Qn))),
- (VT (instr (VT MQPR:$Qm), (VT MQPR:$Qn)))>;
- foreach instr = [MVE_VQSUBs8, MVE_VQSUBs16, MVE_VQSUBs32] in
- foreach VT = [instr.VT] in
- def : Pat<(VT (ssubsat (VT MQPR:$Qm), (VT MQPR:$Qn))),
- (VT (instr (VT MQPR:$Qm), (VT MQPR:$Qn)))>;
-}
-
-
-class MVE_VABD_int<string suffix, bit U, bits<2> size, list<dag> pattern=[]>
+class MVE_VABD_int<string suffix, bit U, bits<2> size,
+ list<dag> pattern=[]>
: MVE_int<"vabd", suffix, size, pattern> {
let Inst{28} = U;
@@ -1590,14 +1839,38 @@ class MVE_VABD_int<string suffix, bit U, bits<2> size, list<dag> pattern=[]>
let validForTailPredication = 1;
}
-def MVE_VABDs8 : MVE_VABD_int<"s8", 0b0, 0b00>;
-def MVE_VABDs16 : MVE_VABD_int<"s16", 0b0, 0b01>;
-def MVE_VABDs32 : MVE_VABD_int<"s32", 0b0, 0b10>;
-def MVE_VABDu8 : MVE_VABD_int<"u8", 0b1, 0b00>;
-def MVE_VABDu16 : MVE_VABD_int<"u16", 0b1, 0b01>;
-def MVE_VABDu32 : MVE_VABD_int<"u32", 0b1, 0b10>;
+multiclass MVE_VABD_m<MVEVectorVTInfo VTI,
+ Intrinsic unpred_int, Intrinsic pred_int> {
+ def "" : MVE_VABD_int<VTI.Suffix, VTI.Unsigned, VTI.Size>;
+ defvar Inst = !cast<Instruction>(NAME);
+
+ let Predicates = [HasMVEInt] in {
+ // Unpredicated absolute difference
+ def : Pat<(VTI.Vec (unpred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+ (i32 VTI.Unsigned))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
+
+ // Predicated absolute difference
+ def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+ (i32 VTI.Unsigned), (VTI.Pred VCCR:$mask),
+ (VTI.Vec MQPR:$inactive))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+ ARMVCCThen, (VTI.Pred VCCR:$mask),
+ (VTI.Vec MQPR:$inactive)))>;
+ }
+}
+
+multiclass MVE_VABD<MVEVectorVTInfo VTI>
+ : MVE_VABD_m<VTI, int_arm_mve_vabd, int_arm_mve_abd_predicated>;
-class MVE_VRHADD<string suffix, bit U, bits<2> size, list<dag> pattern=[]>
+defm MVE_VABDs8 : MVE_VABD<MVE_v16s8>;
+defm MVE_VABDs16 : MVE_VABD<MVE_v8s16>;
+defm MVE_VABDs32 : MVE_VABD<MVE_v4s32>;
+defm MVE_VABDu8 : MVE_VABD<MVE_v16u8>;
+defm MVE_VABDu16 : MVE_VABD<MVE_v8u16>;
+defm MVE_VABDu32 : MVE_VABD<MVE_v4u32>;
+
+class MVE_VRHADD_Base<string suffix, bit U, bits<2> size, list<dag> pattern=[]>
: MVE_int<"vrhadd", suffix, size, pattern> {
let Inst{28} = U;
@@ -1609,12 +1882,36 @@ class MVE_VRHADD<string suffix, bit U, bits<2> size, list<dag> pattern=[]>
let validForTailPredication = 1;
}
-def MVE_VRHADDs8 : MVE_VRHADD<"s8", 0b0, 0b00>;
-def MVE_VRHADDs16 : MVE_VRHADD<"s16", 0b0, 0b01>;
-def MVE_VRHADDs32 : MVE_VRHADD<"s32", 0b0, 0b10>;
-def MVE_VRHADDu8 : MVE_VRHADD<"u8", 0b1, 0b00>;
-def MVE_VRHADDu16 : MVE_VRHADD<"u16", 0b1, 0b01>;
-def MVE_VRHADDu32 : MVE_VRHADD<"u32", 0b1, 0b10>;
+multiclass MVE_VRHADD_m<MVEVectorVTInfo VTI,
+ SDNode unpred_op, Intrinsic pred_int> {
+ def "" : MVE_VRHADD_Base<VTI.Suffix, VTI.Unsigned, VTI.Size>;
+ defvar Inst = !cast<Instruction>(NAME);
+
+ let Predicates = [HasMVEInt] in {
+ // Unpredicated rounding add-with-divide-by-two
+ def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+ (i32 VTI.Unsigned))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
+
+ // Predicated add-with-divide-by-two
+ def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+ (i32 VTI.Unsigned), (VTI.Pred VCCR:$mask),
+ (VTI.Vec MQPR:$inactive))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+ ARMVCCThen, (VTI.Pred VCCR:$mask),
+ (VTI.Vec MQPR:$inactive)))>;
+ }
+}
+
+multiclass MVE_VRHADD<MVEVectorVTInfo VTI>
+ : MVE_VRHADD_m<VTI, int_arm_mve_vrhadd, int_arm_mve_rhadd_predicated>;
+
+defm MVE_VRHADDs8 : MVE_VRHADD<MVE_v16s8>;
+defm MVE_VRHADDs16 : MVE_VRHADD<MVE_v8s16>;
+defm MVE_VRHADDs32 : MVE_VRHADD<MVE_v4s32>;
+defm MVE_VRHADDu8 : MVE_VRHADD<MVE_v16u8>;
+defm MVE_VRHADDu16 : MVE_VRHADD<MVE_v8u16>;
+defm MVE_VRHADDu32 : MVE_VRHADD<MVE_v4u32>;
class MVE_VHADDSUB<string iname, string suffix, bit U, bit subtract,
bits<2> size, list<dag> pattern=[]>
@@ -1631,81 +1928,73 @@ class MVE_VHADDSUB<string iname, string suffix, bit U, bit subtract,
let validForTailPredication = 1;
}
-class MVE_VHADD<string suffix, bit U, bits<2> size,
+class MVE_VHADD_<string suffix, bit U, bits<2> size,
list<dag> pattern=[]>
: MVE_VHADDSUB<"vhadd", suffix, U, 0b0, size, pattern>;
-class MVE_VHSUB<string suffix, bit U, bits<2> size,
+class MVE_VHSUB_<string suffix, bit U, bits<2> size,
list<dag> pattern=[]>
: MVE_VHADDSUB<"vhsub", suffix, U, 0b1, size, pattern>;
-def MVE_VHADDs8 : MVE_VHADD<"s8", 0b0, 0b00>;
-def MVE_VHADDs16 : MVE_VHADD<"s16", 0b0, 0b01>;
-def MVE_VHADDs32 : MVE_VHADD<"s32", 0b0, 0b10>;
-def MVE_VHADDu8 : MVE_VHADD<"u8", 0b1, 0b00>;
-def MVE_VHADDu16 : MVE_VHADD<"u16", 0b1, 0b01>;
-def MVE_VHADDu32 : MVE_VHADD<"u32", 0b1, 0b10>;
-
-def MVE_VHSUBs8 : MVE_VHSUB<"s8", 0b0, 0b00>;
-def MVE_VHSUBs16 : MVE_VHSUB<"s16", 0b0, 0b01>;
-def MVE_VHSUBs32 : MVE_VHSUB<"s32", 0b0, 0b10>;
-def MVE_VHSUBu8 : MVE_VHSUB<"u8", 0b1, 0b00>;
-def MVE_VHSUBu16 : MVE_VHSUB<"u16", 0b1, 0b01>;
-def MVE_VHSUBu32 : MVE_VHSUB<"u32", 0b1, 0b10>;
+multiclass MVE_VHADD_m<MVEVectorVTInfo VTI,
+ SDNode unpred_op, Intrinsic pred_int> {
+ def "" : MVE_VHADD_<VTI.Suffix, VTI.Unsigned, VTI.Size>;
+ defvar Inst = !cast<Instruction>(NAME);
+
+ let Predicates = [HasMVEInt] in {
+ // Unpredicated add-and-divide-by-two
+ def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), (i32 VTI.Unsigned))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
+
+ // Predicated add-and-divide-by-two
+ def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), (i32 VTI.Unsigned),
+ (VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+ ARMVCCThen, (VTI.Pred VCCR:$mask),
+ (VTI.Vec MQPR:$inactive)))>;
+ }
+}
-let Predicates = [HasMVEInt] in {
- def : Pat<(v16i8 (ARMvshrsImm
- (v16i8 (add (v16i8 MQPR:$v1), (v16i8 MQPR:$v2))), 1)),
- (v16i8 (MVE_VHADDs8
- (v16i8 MQPR:$v1), (v16i8 MQPR:$v2)))>;
- def : Pat<(v8i16 (ARMvshrsImm
- (v8i16 (add (v8i16 MQPR:$v1), (v8i16 MQPR:$v2))), 1)),
- (v8i16 (MVE_VHADDs16
- (v8i16 MQPR:$v1), (v8i16 MQPR:$v2)))>;
- def : Pat<(v4i32 (ARMvshrsImm
- (v4i32 (add (v4i32 MQPR:$v1), (v4i32 MQPR:$v2))), 1)),
- (v4i32 (MVE_VHADDs32
- (v4i32 MQPR:$v1), (v4i32 MQPR:$v2)))>;
-
- def : Pat<(v16i8 (ARMvshruImm
- (v16i8 (add (v16i8 MQPR:$v1), (v16i8 MQPR:$v2))), 1)),
- (v16i8 (MVE_VHADDu8
- (v16i8 MQPR:$v1), (v16i8 MQPR:$v2)))>;
- def : Pat<(v8i16 (ARMvshruImm
- (v8i16 (add (v8i16 MQPR:$v1), (v8i16 MQPR:$v2))), 1)),
- (v8i16 (MVE_VHADDu16
- (v8i16 MQPR:$v1), (v8i16 MQPR:$v2)))>;
- def : Pat<(v4i32 (ARMvshruImm
- (v4i32 (add (v4i32 MQPR:$v1), (v4i32 MQPR:$v2))), 1)),
- (v4i32 (MVE_VHADDu32
- (v4i32 MQPR:$v1), (v4i32 MQPR:$v2)))>;
-
- def : Pat<(v16i8 (ARMvshrsImm
- (v16i8 (sub (v16i8 MQPR:$v1), (v16i8 MQPR:$v2))), 1)),
- (v16i8 (MVE_VHSUBs8
- (v16i8 MQPR:$v1), (v16i8 MQPR:$v2)))>;
- def : Pat<(v8i16 (ARMvshrsImm
- (v8i16 (sub (v8i16 MQPR:$v1), (v8i16 MQPR:$v2))), 1)),
- (v8i16 (MVE_VHSUBs16
- (v8i16 MQPR:$v1), (v8i16 MQPR:$v2)))>;
- def : Pat<(v4i32 (ARMvshrsImm
- (v4i32 (sub (v4i32 MQPR:$v1), (v4i32 MQPR:$v2))), 1)),
- (v4i32 (MVE_VHSUBs32
- (v4i32 MQPR:$v1), (v4i32 MQPR:$v2)))>;
-
- def : Pat<(v16i8 (ARMvshruImm
- (v16i8 (sub (v16i8 MQPR:$v1), (v16i8 MQPR:$v2))), 1)),
- (v16i8 (MVE_VHSUBu8
- (v16i8 MQPR:$v1), (v16i8 MQPR:$v2)))>;
- def : Pat<(v8i16 (ARMvshruImm
- (v8i16 (sub (v8i16 MQPR:$v1), (v8i16 MQPR:$v2))), 1)),
- (v8i16 (MVE_VHSUBu16
- (v8i16 MQPR:$v1), (v8i16 MQPR:$v2)))>;
- def : Pat<(v4i32 (ARMvshruImm
- (v4i32 (sub (v4i32 MQPR:$v1), (v4i32 MQPR:$v2))), 1)),
- (v4i32 (MVE_VHSUBu32
- (v4i32 MQPR:$v1), (v4i32 MQPR:$v2)))>;
+multiclass MVE_VHADD<MVEVectorVTInfo VTI>
+ : MVE_VHADD_m<VTI, int_arm_mve_vhadd, int_arm_mve_hadd_predicated>;
+
+defm MVE_VHADDs8 : MVE_VHADD<MVE_v16s8>;
+defm MVE_VHADDs16 : MVE_VHADD<MVE_v8s16>;
+defm MVE_VHADDs32 : MVE_VHADD<MVE_v4s32>;
+defm MVE_VHADDu8 : MVE_VHADD<MVE_v16u8>;
+defm MVE_VHADDu16 : MVE_VHADD<MVE_v8u16>;
+defm MVE_VHADDu32 : MVE_VHADD<MVE_v4u32>;
+
+multiclass MVE_VHSUB_m<MVEVectorVTInfo VTI,
+ SDNode unpred_op, Intrinsic pred_int> {
+ def "" : MVE_VHSUB_<VTI.Suffix, VTI.Unsigned, VTI.Size>;
+ defvar Inst = !cast<Instruction>(NAME);
+
+ let Predicates = [HasMVEInt] in {
+ // Unpredicated subtract-and-divide-by-two
+ def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+ (i32 VTI.Unsigned))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
+
+ // Predicated subtract-and-divide-by-two
+ def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+ (i32 VTI.Unsigned), (VTI.Pred VCCR:$mask),
+ (VTI.Vec MQPR:$inactive))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+ ARMVCCThen, (VTI.Pred VCCR:$mask),
+ (VTI.Vec MQPR:$inactive)))>;
+ }
}
+multiclass MVE_VHSUB<MVEVectorVTInfo VTI>
+ : MVE_VHSUB_m<VTI, int_arm_mve_vhsub, int_arm_mve_hsub_predicated>;
+
+defm MVE_VHSUBs8 : MVE_VHSUB<MVE_v16s8>;
+defm MVE_VHSUBs16 : MVE_VHSUB<MVE_v8s16>;
+defm MVE_VHSUBs32 : MVE_VHSUB<MVE_v4s32>;
+defm MVE_VHSUBu8 : MVE_VHSUB<MVE_v16u8>;
+defm MVE_VHSUBu16 : MVE_VHSUB<MVE_v8u16>;
+defm MVE_VHSUBu32 : MVE_VHSUB<MVE_v4u32>;
+
class MVE_VDUP<string suffix, bit B, bit E, list<dag> pattern=[]>
: MVE_p<(outs MQPR:$Qd), (ins rGPR:$Rt), NoItinerary,
"vdup", suffix, "$Qd, $Rt", vpred_r, "", pattern> {
@@ -1873,6 +2162,49 @@ def MVE_VQNEGs8 : MVE_VQABSNEG<"vqneg", "s8", 0b00, 0b1>;
def MVE_VQNEGs16 : MVE_VQABSNEG<"vqneg", "s16", 0b01, 0b1>;
def MVE_VQNEGs32 : MVE_VQABSNEG<"vqneg", "s32", 0b10, 0b1>;
+// int_min/int_max: vector containing INT_MIN/INT_MAX VTI.Size times
+// zero_vec: v4i32-initialized zero vector, potentially wrapped in a bitconvert
+multiclass vqabsneg_pattern<MVEVectorVTInfo VTI, dag int_min, dag int_max,
+ dag zero_vec, MVE_VQABSNEG vqabs_instruction,
+ MVE_VQABSNEG vqneg_instruction> {
+ let Predicates = [HasMVEInt] in {
+ // The below tree can be replaced by a vqabs instruction, as it represents
+ // the following vectorized expression (r being the value in $reg):
+ // r > 0 ? r : (r == INT_MIN ? INT_MAX : -r)
+ def : Pat<(VTI.Vec (vselect
+ (VTI.Pred (ARMvcmpz (VTI.Vec MQPR:$reg), ARMCCgt)),
+ (VTI.Vec MQPR:$reg),
+ (VTI.Vec (vselect
+ (VTI.Pred (ARMvcmp (VTI.Vec MQPR:$reg), int_min, ARMCCeq)),
+ int_max,
+ (sub (VTI.Vec zero_vec), (VTI.Vec MQPR:$reg)))))),
+ (VTI.Vec (vqabs_instruction (VTI.Vec MQPR:$reg)))>;
+ // Similarly, this tree represents vqneg, i.e. the following vectorized expression:
+ // r == INT_MIN ? INT_MAX : -r
+ def : Pat<(VTI.Vec (vselect
+ (VTI.Pred (ARMvcmp (VTI.Vec MQPR:$reg), int_min, ARMCCeq)),
+ int_max,
+ (sub (VTI.Vec zero_vec), (VTI.Vec MQPR:$reg)))),
+ (VTI.Vec (vqneg_instruction (VTI.Vec MQPR:$reg)))>;
+ }
+}
+
+defm MVE_VQABSNEG_Ps8 : vqabsneg_pattern<MVE_v16i8,
+ (v16i8 (ARMvmovImm (i32 3712))),
+ (v16i8 (ARMvmovImm (i32 3711))),
+ (bitconvert (v4i32 (ARMvmovImm (i32 0)))),
+ MVE_VQABSs8, MVE_VQNEGs8>;
+defm MVE_VQABSNEG_Ps16 : vqabsneg_pattern<MVE_v8i16,
+ (v8i16 (ARMvmovImm (i32 2688))),
+ (v8i16 (ARMvmvnImm (i32 2688))),
+ (bitconvert (v4i32 (ARMvmovImm (i32 0)))),
+ MVE_VQABSs16, MVE_VQNEGs16>;
+defm MVE_VQABSNEG_Ps32 : vqabsneg_pattern<MVE_v4i32,
+ (v4i32 (ARMvmovImm (i32 1664))),
+ (v4i32 (ARMvmvnImm (i32 1664))),
+ (ARMvmovImm (i32 0)),
+ MVE_VQABSs32, MVE_VQNEGs32>;
+
class MVE_mod_imm<string iname, string suffix, bits<4> cmode, bit op,
dag iops, list<dag> pattern=[]>
: MVE_p<(outs MQPR:$Qd), iops, NoItinerary, iname, suffix, "$Qd, $imm",
@@ -1956,6 +2288,7 @@ class MVE_VMINMAXA<string iname, string suffix, bits<2> size,
let Inst{4} = 0b0;
let Inst{3-1} = Qm{2-0};
let Inst{0} = 0b1;
+ let validForTailPredication = 1;
}
def MVE_VMAXAs8 : MVE_VMINMAXA<"vmaxa", "s8", 0b00, 0b0>;
@@ -2049,8 +2382,8 @@ let Predicates = [HasMVEInt] in {
class MVE_VSHLL_imm<string iname, string suffix, bit U, bit th,
- dag immops, list<dag> pattern=[]>
- : MVE_shift_imm<(outs MQPR:$Qd), !con((ins MQPR:$Qm), immops),
+ Operand immtype, list<dag> pattern=[]>
+ : MVE_shift_imm<(outs MQPR:$Qd), (ins MQPR:$Qm, immtype:$imm),
iname, suffix, "$Qd, $Qm, $imm", vpred_r, "", pattern> {
let Inst{28} = U;
let Inst{25-23} = 0b101;
@@ -2059,6 +2392,9 @@ class MVE_VSHLL_imm<string iname, string suffix, bit U, bit th,
let Inst{11-6} = 0b111101;
let Inst{4} = 0b0;
let Inst{0} = 0b0;
+
+ // For the MVE_VSHLL_patterns multiclass to refer to
+ Operand immediateType = immtype;
}
// The immediate VSHLL instructions accept shift counts from 1 up to
@@ -2067,7 +2403,7 @@ class MVE_VSHLL_imm<string iname, string suffix, bit U, bit th,
class MVE_VSHLL_imm8<string iname, string suffix,
bit U, bit th, list<dag> pattern=[]>
- : MVE_VSHLL_imm<iname, suffix, U, th, (ins mve_shift_imm1_7:$imm), pattern> {
+ : MVE_VSHLL_imm<iname, suffix, U, th, mve_shift_imm1_7, pattern> {
bits<3> imm;
let Inst{20-19} = 0b01;
let Inst{18-16} = imm;
@@ -2075,7 +2411,7 @@ class MVE_VSHLL_imm8<string iname, string suffix,
class MVE_VSHLL_imm16<string iname, string suffix,
bit U, bit th, list<dag> pattern=[]>
- : MVE_VSHLL_imm<iname, suffix, U, th, (ins mve_shift_imm1_15:$imm), pattern> {
+ : MVE_VSHLL_imm<iname, suffix, U, th, mve_shift_imm1_15, pattern> {
bits<4> imm;
let Inst{20} = 0b1;
let Inst{19-16} = imm;
@@ -2119,11 +2455,50 @@ defm MVE_VSHLL_lws16 : MVE_VSHLL_lw<"vshll", "s16", 0b01, 0b0, "$Qd, $Qm, #16">;
defm MVE_VSHLL_lwu8 : MVE_VSHLL_lw<"vshll", "u8", 0b00, 0b1, "$Qd, $Qm, #8">;
defm MVE_VSHLL_lwu16 : MVE_VSHLL_lw<"vshll", "u16", 0b01, 0b1, "$Qd, $Qm, #16">;
+multiclass MVE_VSHLL_patterns<MVEVectorVTInfo VTI, int top> {
+ defvar suffix = !strconcat(VTI.Suffix, !if(top, "th", "bh"));
+ defvar inst_imm = !cast<MVE_VSHLL_imm>("MVE_VSHLL_imm" # suffix);
+ defvar inst_lw = !cast<MVE_VSHLL_by_lane_width>("MVE_VSHLL_lw" # suffix);
+ defvar unpred_int = int_arm_mve_vshll_imm;
+ defvar pred_int = int_arm_mve_vshll_imm_predicated;
+ defvar imm = inst_imm.immediateType;
+
+ def : Pat<(VTI.DblVec (unpred_int (VTI.Vec MQPR:$src), imm:$imm,
+ (i32 VTI.Unsigned), (i32 top))),
+ (VTI.DblVec (inst_imm (VTI.Vec MQPR:$src), imm:$imm))>;
+ def : Pat<(VTI.DblVec (unpred_int (VTI.Vec MQPR:$src), (i32 VTI.LaneBits),
+ (i32 VTI.Unsigned), (i32 top))),
+ (VTI.DblVec (inst_lw (VTI.Vec MQPR:$src)))>;
+
+ def : Pat<(VTI.DblVec (pred_int (VTI.Vec MQPR:$src), imm:$imm,
+ (i32 VTI.Unsigned), (i32 top),
+ (VTI.Pred VCCR:$mask),
+ (VTI.DblVec MQPR:$inactive))),
+ (VTI.DblVec (inst_imm (VTI.Vec MQPR:$src), imm:$imm,
+ ARMVCCThen, (VTI.Pred VCCR:$mask),
+ (VTI.DblVec MQPR:$inactive)))>;
+ def : Pat<(VTI.DblVec (pred_int (VTI.Vec MQPR:$src), (i32 VTI.LaneBits),
+ (i32 VTI.Unsigned), (i32 top),
+ (VTI.Pred VCCR:$mask),
+ (VTI.DblVec MQPR:$inactive))),
+ (VTI.DblVec (inst_lw (VTI.Vec MQPR:$src), ARMVCCThen,
+ (VTI.Pred VCCR:$mask),
+ (VTI.DblVec MQPR:$inactive)))>;
+}
+
+foreach VTI = [MVE_v16s8, MVE_v8s16, MVE_v16u8, MVE_v8u16] in
+ foreach top = [0, 1] in
+ defm : MVE_VSHLL_patterns<VTI, top>;
+
+class MVE_shift_imm_partial<Operand imm, string iname, string suffix>
+ : MVE_shift_imm<(outs MQPR:$Qd), (ins MQPR:$QdSrc, MQPR:$Qm, imm:$imm),
+ iname, suffix, "$Qd, $Qm, $imm", vpred_n, "$Qd = $QdSrc"> {
+ Operand immediateType = imm;
+}
+
class MVE_VxSHRN<string iname, string suffix, bit bit_12, bit bit_28,
- dag immops, list<dag> pattern=[]>
- : MVE_shift_imm<(outs MQPR:$Qd), !con((ins MQPR:$QdSrc, MQPR:$Qm), immops),
- iname, suffix, "$Qd, $Qm, $imm", vpred_n, "$Qd = $QdSrc",
- pattern> {
+ Operand imm, list<dag> pattern=[]>
+ : MVE_shift_imm_partial<imm, iname, suffix> {
bits<5> imm;
let Inst{28} = bit_28;
@@ -2136,45 +2511,35 @@ class MVE_VxSHRN<string iname, string suffix, bit bit_12, bit bit_28,
let Inst{0} = 0b1;
}
-def MVE_VRSHRNi16bh : MVE_VxSHRN<
- "vrshrnb", "i16", 0b0, 0b1, (ins shr_imm8:$imm)> {
+def MVE_VRSHRNi16bh : MVE_VxSHRN<"vrshrnb", "i16", 0b0, 0b1, shr_imm8> {
let Inst{20-19} = 0b01;
}
-def MVE_VRSHRNi16th : MVE_VxSHRN<
- "vrshrnt", "i16", 0b1, 0b1,(ins shr_imm8:$imm)> {
+def MVE_VRSHRNi16th : MVE_VxSHRN<"vrshrnt", "i16", 0b1, 0b1, shr_imm8> {
let Inst{20-19} = 0b01;
}
-def MVE_VRSHRNi32bh : MVE_VxSHRN<
- "vrshrnb", "i32", 0b0, 0b1, (ins shr_imm16:$imm)> {
+def MVE_VRSHRNi32bh : MVE_VxSHRN<"vrshrnb", "i32", 0b0, 0b1, shr_imm16> {
let Inst{20} = 0b1;
}
-def MVE_VRSHRNi32th : MVE_VxSHRN<
- "vrshrnt", "i32", 0b1, 0b1, (ins shr_imm16:$imm)> {
+def MVE_VRSHRNi32th : MVE_VxSHRN<"vrshrnt", "i32", 0b1, 0b1, shr_imm16> {
let Inst{20} = 0b1;
}
-def MVE_VSHRNi16bh : MVE_VxSHRN<
- "vshrnb", "i16", 0b0, 0b0, (ins shr_imm8:$imm)> {
+def MVE_VSHRNi16bh : MVE_VxSHRN<"vshrnb", "i16", 0b0, 0b0, shr_imm8> {
let Inst{20-19} = 0b01;
}
-def MVE_VSHRNi16th : MVE_VxSHRN<
- "vshrnt", "i16", 0b1, 0b0, (ins shr_imm8:$imm)> {
+def MVE_VSHRNi16th : MVE_VxSHRN<"vshrnt", "i16", 0b1, 0b0, shr_imm8> {
let Inst{20-19} = 0b01;
}
-def MVE_VSHRNi32bh : MVE_VxSHRN<
- "vshrnb", "i32", 0b0, 0b0, (ins shr_imm16:$imm)> {
+def MVE_VSHRNi32bh : MVE_VxSHRN<"vshrnb", "i32", 0b0, 0b0, shr_imm16> {
let Inst{20} = 0b1;
}
-def MVE_VSHRNi32th : MVE_VxSHRN<
- "vshrnt", "i32", 0b1, 0b0, (ins shr_imm16:$imm)> {
+def MVE_VSHRNi32th : MVE_VxSHRN<"vshrnt", "i32", 0b1, 0b0, shr_imm16> {
let Inst{20} = 0b1;
}
-class MVE_VxQRSHRUN<string iname, string suffix, bit bit_28, bit bit_12, dag immops,
- list<dag> pattern=[]>
- : MVE_shift_imm<(outs MQPR:$Qd), !con((ins MQPR:$QdSrc, MQPR:$Qm), immops),
- iname, suffix, "$Qd, $Qm, $imm", vpred_n, "$Qd = $QdSrc",
- pattern> {
+class MVE_VxQRSHRUN<string iname, string suffix, bit bit_28, bit bit_12,
+ Operand imm, list<dag> pattern=[]>
+ : MVE_shift_imm_partial<imm, iname, suffix> {
bits<5> imm;
let Inst{28} = bit_28;
@@ -2188,44 +2553,42 @@ class MVE_VxQRSHRUN<string iname, string suffix, bit bit_28, bit bit_12, dag imm
}
def MVE_VQRSHRUNs16bh : MVE_VxQRSHRUN<
- "vqrshrunb", "s16", 0b1, 0b0, (ins shr_imm8:$imm)> {
+ "vqrshrunb", "s16", 0b1, 0b0, shr_imm8> {
let Inst{20-19} = 0b01;
}
def MVE_VQRSHRUNs16th : MVE_VxQRSHRUN<
- "vqrshrunt", "s16", 0b1, 0b1, (ins shr_imm8:$imm)> {
+ "vqrshrunt", "s16", 0b1, 0b1, shr_imm8> {
let Inst{20-19} = 0b01;
}
def MVE_VQRSHRUNs32bh : MVE_VxQRSHRUN<
- "vqrshrunb", "s32", 0b1, 0b0, (ins shr_imm16:$imm)> {
+ "vqrshrunb", "s32", 0b1, 0b0, shr_imm16> {
let Inst{20} = 0b1;
}
def MVE_VQRSHRUNs32th : MVE_VxQRSHRUN<
- "vqrshrunt", "s32", 0b1, 0b1, (ins shr_imm16:$imm)> {
+ "vqrshrunt", "s32", 0b1, 0b1, shr_imm16> {
let Inst{20} = 0b1;
}
def MVE_VQSHRUNs16bh : MVE_VxQRSHRUN<
- "vqshrunb", "s16", 0b0, 0b0, (ins shr_imm8:$imm)> {
+ "vqshrunb", "s16", 0b0, 0b0, shr_imm8> {
let Inst{20-19} = 0b01;
}
def MVE_VQSHRUNs16th : MVE_VxQRSHRUN<
- "vqshrunt", "s16", 0b0, 0b1, (ins shr_imm8:$imm)> {
+ "vqshrunt", "s16", 0b0, 0b1, shr_imm8> {
let Inst{20-19} = 0b01;
}
def MVE_VQSHRUNs32bh : MVE_VxQRSHRUN<
- "vqshrunb", "s32", 0b0, 0b0, (ins shr_imm16:$imm)> {
+ "vqshrunb", "s32", 0b0, 0b0, shr_imm16> {
let Inst{20} = 0b1;
}
def MVE_VQSHRUNs32th : MVE_VxQRSHRUN<
- "vqshrunt", "s32", 0b0, 0b1, (ins shr_imm16:$imm)> {
+ "vqshrunt", "s32", 0b0, 0b1, shr_imm16> {
let Inst{20} = 0b1;
}
class MVE_VxQRSHRN<string iname, string suffix, bit bit_0, bit bit_12,
- dag immops, list<dag> pattern=[]>
- : MVE_shift_imm<(outs MQPR:$Qd), !con((ins MQPR:$QdSrc, MQPR:$Qm), immops),
- iname, suffix, "$Qd, $Qm, $imm", vpred_n, "$Qd = $QdSrc",
- pattern> {
+ Operand imm, list<dag> pattern=[]>
+ : MVE_shift_imm_partial<imm, iname, suffix> {
bits<5> imm;
let Inst{25-23} = 0b101;
@@ -2238,19 +2601,19 @@ class MVE_VxQRSHRN<string iname, string suffix, bit bit_0, bit bit_12,
}
multiclass MVE_VxQRSHRN_types<string iname, bit bit_0, bit bit_12> {
- def s16 : MVE_VxQRSHRN<iname, "s16", bit_0, bit_12, (ins shr_imm8:$imm)> {
+ def s16 : MVE_VxQRSHRN<iname, "s16", bit_0, bit_12, shr_imm8> {
let Inst{28} = 0b0;
let Inst{20-19} = 0b01;
}
- def u16 : MVE_VxQRSHRN<iname, "u16", bit_0, bit_12, (ins shr_imm8:$imm)> {
+ def u16 : MVE_VxQRSHRN<iname, "u16", bit_0, bit_12, shr_imm8> {
let Inst{28} = 0b1;
let Inst{20-19} = 0b01;
}
- def s32 : MVE_VxQRSHRN<iname, "s32", bit_0, bit_12, (ins shr_imm16:$imm)> {
+ def s32 : MVE_VxQRSHRN<iname, "s32", bit_0, bit_12, shr_imm16> {
let Inst{28} = 0b0;
let Inst{20} = 0b1;
}
- def u32 : MVE_VxQRSHRN<iname, "u32", bit_0, bit_12, (ins shr_imm16:$imm)> {
+ def u32 : MVE_VxQRSHRN<iname, "u32", bit_0, bit_12, shr_imm16> {
let Inst{28} = 0b1;
let Inst{20} = 0b1;
}
@@ -2261,6 +2624,63 @@ defm MVE_VQRSHRNth : MVE_VxQRSHRN_types<"vqrshrnt", 0b1, 0b1>;
defm MVE_VQSHRNbh : MVE_VxQRSHRN_types<"vqshrnb", 0b0, 0b0>;
defm MVE_VQSHRNth : MVE_VxQRSHRN_types<"vqshrnt", 0b0, 0b1>;
+multiclass MVE_VSHRN_patterns<MVE_shift_imm_partial inst,
+ MVEVectorVTInfo OutVTI, MVEVectorVTInfo InVTI,
+ bit q, bit r, bit top> {
+ defvar inparams = (? (OutVTI.Vec MQPR:$QdSrc), (InVTI.Vec MQPR:$Qm),
+ (inst.immediateType:$imm), (i32 q), (i32 r),
+ (i32 OutVTI.Unsigned), (i32 InVTI.Unsigned), (i32 top));
+ defvar outparams = (inst (OutVTI.Vec MQPR:$QdSrc), (InVTI.Vec MQPR:$Qm),
+ (imm:$imm));
+
+ def : Pat<(OutVTI.Vec !setop(inparams, int_arm_mve_vshrn)),
+ (OutVTI.Vec outparams)>;
+ def : Pat<(OutVTI.Vec !con(inparams, (int_arm_mve_vshrn_predicated
+ (InVTI.Pred VCCR:$pred)))),
+ (OutVTI.Vec !con(outparams, (? ARMVCCThen, VCCR:$pred)))>;
+}
+
+defm : MVE_VSHRN_patterns<MVE_VSHRNi16bh, MVE_v16s8, MVE_v8s16, 0,0,0>;
+defm : MVE_VSHRN_patterns<MVE_VSHRNi16th, MVE_v16s8, MVE_v8s16, 0,0,1>;
+defm : MVE_VSHRN_patterns<MVE_VSHRNi32bh, MVE_v8s16, MVE_v4s32, 0,0,0>;
+defm : MVE_VSHRN_patterns<MVE_VSHRNi32th, MVE_v8s16, MVE_v4s32, 0,0,1>;
+defm : MVE_VSHRN_patterns<MVE_VSHRNi16bh, MVE_v16u8, MVE_v8u16, 0,0,0>;
+defm : MVE_VSHRN_patterns<MVE_VSHRNi16th, MVE_v16u8, MVE_v8u16, 0,0,1>;
+defm : MVE_VSHRN_patterns<MVE_VSHRNi32bh, MVE_v8u16, MVE_v4u32, 0,0,0>;
+defm : MVE_VSHRN_patterns<MVE_VSHRNi32th, MVE_v8u16, MVE_v4u32, 0,0,1>;
+defm : MVE_VSHRN_patterns<MVE_VRSHRNi16bh, MVE_v16s8, MVE_v8s16, 0,1,0>;
+defm : MVE_VSHRN_patterns<MVE_VRSHRNi16th, MVE_v16s8, MVE_v8s16, 0,1,1>;
+defm : MVE_VSHRN_patterns<MVE_VRSHRNi32bh, MVE_v8s16, MVE_v4s32, 0,1,0>;
+defm : MVE_VSHRN_patterns<MVE_VRSHRNi32th, MVE_v8s16, MVE_v4s32, 0,1,1>;
+defm : MVE_VSHRN_patterns<MVE_VRSHRNi16bh, MVE_v16u8, MVE_v8u16, 0,1,0>;
+defm : MVE_VSHRN_patterns<MVE_VRSHRNi16th, MVE_v16u8, MVE_v8u16, 0,1,1>;
+defm : MVE_VSHRN_patterns<MVE_VRSHRNi32bh, MVE_v8u16, MVE_v4u32, 0,1,0>;
+defm : MVE_VSHRN_patterns<MVE_VRSHRNi32th, MVE_v8u16, MVE_v4u32, 0,1,1>;
+defm : MVE_VSHRN_patterns<MVE_VQSHRNbhs16, MVE_v16s8, MVE_v8s16, 1,0,0>;
+defm : MVE_VSHRN_patterns<MVE_VQSHRNths16, MVE_v16s8, MVE_v8s16, 1,0,1>;
+defm : MVE_VSHRN_patterns<MVE_VQSHRNbhs32, MVE_v8s16, MVE_v4s32, 1,0,0>;
+defm : MVE_VSHRN_patterns<MVE_VQSHRNths32, MVE_v8s16, MVE_v4s32, 1,0,1>;
+defm : MVE_VSHRN_patterns<MVE_VQSHRNbhu16, MVE_v16u8, MVE_v8u16, 1,0,0>;
+defm : MVE_VSHRN_patterns<MVE_VQSHRNthu16, MVE_v16u8, MVE_v8u16, 1,0,1>;
+defm : MVE_VSHRN_patterns<MVE_VQSHRNbhu32, MVE_v8u16, MVE_v4u32, 1,0,0>;
+defm : MVE_VSHRN_patterns<MVE_VQSHRNthu32, MVE_v8u16, MVE_v4u32, 1,0,1>;
+defm : MVE_VSHRN_patterns<MVE_VQRSHRNbhs16, MVE_v16s8, MVE_v8s16, 1,1,0>;
+defm : MVE_VSHRN_patterns<MVE_VQRSHRNths16, MVE_v16s8, MVE_v8s16, 1,1,1>;
+defm : MVE_VSHRN_patterns<MVE_VQRSHRNbhs32, MVE_v8s16, MVE_v4s32, 1,1,0>;
+defm : MVE_VSHRN_patterns<MVE_VQRSHRNths32, MVE_v8s16, MVE_v4s32, 1,1,1>;
+defm : MVE_VSHRN_patterns<MVE_VQRSHRNbhu16, MVE_v16u8, MVE_v8u16, 1,1,0>;
+defm : MVE_VSHRN_patterns<MVE_VQRSHRNthu16, MVE_v16u8, MVE_v8u16, 1,1,1>;
+defm : MVE_VSHRN_patterns<MVE_VQRSHRNbhu32, MVE_v8u16, MVE_v4u32, 1,1,0>;
+defm : MVE_VSHRN_patterns<MVE_VQRSHRNthu32, MVE_v8u16, MVE_v4u32, 1,1,1>;
+defm : MVE_VSHRN_patterns<MVE_VQSHRUNs16bh, MVE_v16u8, MVE_v8s16, 1,0,0>;
+defm : MVE_VSHRN_patterns<MVE_VQSHRUNs16th, MVE_v16u8, MVE_v8s16, 1,0,1>;
+defm : MVE_VSHRN_patterns<MVE_VQSHRUNs32bh, MVE_v8u16, MVE_v4s32, 1,0,0>;
+defm : MVE_VSHRN_patterns<MVE_VQSHRUNs32th, MVE_v8u16, MVE_v4s32, 1,0,1>;
+defm : MVE_VSHRN_patterns<MVE_VQRSHRUNs16bh, MVE_v16u8, MVE_v8s16, 1,1,0>;
+defm : MVE_VSHRN_patterns<MVE_VQRSHRUNs16th, MVE_v16u8, MVE_v8s16, 1,1,1>;
+defm : MVE_VSHRN_patterns<MVE_VQRSHRUNs32bh, MVE_v8u16, MVE_v4s32, 1,1,0>;
+defm : MVE_VSHRN_patterns<MVE_VQRSHRUNs32th, MVE_v8u16, MVE_v4s32, 1,1,1>;
+
// end of mve_imm_shift instructions
// start of mve_shift instructions
@@ -2293,13 +2713,31 @@ class MVE_shift_by_vec<string iname, string suffix, bit U,
let validForTailPredication = 1;
}
+multiclass MVE_shift_by_vec_p<string iname, MVEVectorVTInfo VTI, bit q, bit r> {
+ def "" : MVE_shift_by_vec<iname, VTI.Suffix, VTI.Unsigned, VTI.Size, q, r>;
+ defvar Inst = !cast<Instruction>(NAME);
+
+ def : Pat<(VTI.Vec (int_arm_mve_vshl_vector
+ (VTI.Vec MQPR:$in), (VTI.Vec MQPR:$sh),
+ (i32 q), (i32 r), (i32 VTI.Unsigned))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$in), (VTI.Vec MQPR:$sh)))>;
+
+ def : Pat<(VTI.Vec (int_arm_mve_vshl_vector_predicated
+ (VTI.Vec MQPR:$in), (VTI.Vec MQPR:$sh),
+ (i32 q), (i32 r), (i32 VTI.Unsigned),
+ (VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$in), (VTI.Vec MQPR:$sh),
+ ARMVCCThen, (VTI.Pred VCCR:$mask),
+ (VTI.Vec MQPR:$inactive)))>;
+}
+
multiclass mve_shift_by_vec_multi<string iname, bit bit_4, bit bit_8> {
- def s8 : MVE_shift_by_vec<iname, "s8", 0b0, 0b00, bit_4, bit_8>;
- def s16 : MVE_shift_by_vec<iname, "s16", 0b0, 0b01, bit_4, bit_8>;
- def s32 : MVE_shift_by_vec<iname, "s32", 0b0, 0b10, bit_4, bit_8>;
- def u8 : MVE_shift_by_vec<iname, "u8", 0b1, 0b00, bit_4, bit_8>;
- def u16 : MVE_shift_by_vec<iname, "u16", 0b1, 0b01, bit_4, bit_8>;
- def u32 : MVE_shift_by_vec<iname, "u32", 0b1, 0b10, bit_4, bit_8>;
+ defm s8 : MVE_shift_by_vec_p<iname, MVE_v16s8, bit_4, bit_8>;
+ defm s16 : MVE_shift_by_vec_p<iname, MVE_v8s16, bit_4, bit_8>;
+ defm s32 : MVE_shift_by_vec_p<iname, MVE_v4s32, bit_4, bit_8>;
+ defm u8 : MVE_shift_by_vec_p<iname, MVE_v16u8, bit_4, bit_8>;
+ defm u16 : MVE_shift_by_vec_p<iname, MVE_v8u16, bit_4, bit_8>;
+ defm u32 : MVE_shift_by_vec_p<iname, MVE_v4u32, bit_4, bit_8>;
}
defm MVE_VSHL_by_vec : mve_shift_by_vec_multi<"vshl", 0b0, 0b0>;
@@ -2340,11 +2778,18 @@ class MVE_shift_with_imm<string iname, string suffix, dag oops, dag iops,
let Inst{3-1} = Qm{2-0};
let Inst{0} = 0b0;
let validForTailPredication = 1;
+
+ // For the MVE_shift_imm_patterns multiclass to refer to
+ MVEVectorVTInfo VTI;
+ Operand immediateType;
+ Intrinsic unpred_int;
+ Intrinsic pred_int;
+ dag unsignedFlag = (?);
}
-class MVE_VSxI_imm<string iname, string suffix, bit bit_8, dag imm>
+class MVE_VSxI_imm<string iname, string suffix, bit bit_8, Operand immType>
: MVE_shift_with_imm<iname, suffix, (outs MQPR:$Qd),
- !con((ins MQPR:$Qd_src, MQPR:$Qm), imm),
+ (ins MQPR:$Qd_src, MQPR:$Qm, immType:$imm),
"$Qd, $Qm, $imm", vpred_n, "$Qd = $Qd_src"> {
bits<6> imm;
let Inst{28} = 0b1;
@@ -2353,76 +2798,99 @@ class MVE_VSxI_imm<string iname, string suffix, bit bit_8, dag imm>
let Inst{10-9} = 0b10;
let Inst{8} = bit_8;
let validForTailPredication = 1;
+
+ Operand immediateType = immType;
}
-def MVE_VSRIimm8 : MVE_VSxI_imm<"vsri", "8", 0b0, (ins shr_imm8:$imm)> {
+def MVE_VSRIimm8 : MVE_VSxI_imm<"vsri", "8", 0b0, shr_imm8> {
let Inst{21-19} = 0b001;
}
-def MVE_VSRIimm16 : MVE_VSxI_imm<"vsri", "16", 0b0, (ins shr_imm16:$imm)> {
+def MVE_VSRIimm16 : MVE_VSxI_imm<"vsri", "16", 0b0, shr_imm16> {
let Inst{21-20} = 0b01;
}
-def MVE_VSRIimm32 : MVE_VSxI_imm<"vsri", "32", 0b0, (ins shr_imm32:$imm)> {
+def MVE_VSRIimm32 : MVE_VSxI_imm<"vsri", "32", 0b0, shr_imm32> {
let Inst{21} = 0b1;
}
-def MVE_VSLIimm8 : MVE_VSxI_imm<"vsli", "8", 0b1, (ins imm0_7:$imm)> {
+def MVE_VSLIimm8 : MVE_VSxI_imm<"vsli", "8", 0b1, imm0_7> {
let Inst{21-19} = 0b001;
}
-def MVE_VSLIimm16 : MVE_VSxI_imm<"vsli", "16", 0b1, (ins imm0_15:$imm)> {
+def MVE_VSLIimm16 : MVE_VSxI_imm<"vsli", "16", 0b1, imm0_15> {
let Inst{21-20} = 0b01;
}
-def MVE_VSLIimm32 : MVE_VSxI_imm<"vsli", "32", 0b1,(ins imm0_31:$imm)> {
+def MVE_VSLIimm32 : MVE_VSxI_imm<"vsli", "32", 0b1,imm0_31> {
let Inst{21} = 0b1;
}
-class MVE_VQSHL_imm<string suffix, dag imm>
- : MVE_shift_with_imm<"vqshl", suffix, (outs MQPR:$Qd),
- !con((ins MQPR:$Qm), imm), "$Qd, $Qm, $imm",
+multiclass MVE_VSxI_patterns<MVE_VSxI_imm inst, string name,
+ MVEVectorVTInfo VTI> {
+ defvar inparams = (? (VTI.Vec MQPR:$QdSrc), (VTI.Vec MQPR:$Qm),
+ (inst.immediateType:$imm));
+ defvar outparams = (inst (VTI.Vec MQPR:$QdSrc), (VTI.Vec MQPR:$Qm),
+ (inst.immediateType:$imm));
+ defvar unpred_int = !cast<Intrinsic>("int_arm_mve_" # name);
+ defvar pred_int = !cast<Intrinsic>("int_arm_mve_" # name # "_predicated");
+
+ def : Pat<(VTI.Vec !setop(inparams, unpred_int)),
+ (VTI.Vec outparams)>;
+ def : Pat<(VTI.Vec !con(inparams, (pred_int (VTI.Pred VCCR:$pred)))),
+ (VTI.Vec !con(outparams, (? ARMVCCThen, VCCR:$pred)))>;
+}
+
+defm : MVE_VSxI_patterns<MVE_VSLIimm8, "vsli", MVE_v16i8>;
+defm : MVE_VSxI_patterns<MVE_VSLIimm16, "vsli", MVE_v8i16>;
+defm : MVE_VSxI_patterns<MVE_VSLIimm32, "vsli", MVE_v4i32>;
+defm : MVE_VSxI_patterns<MVE_VSRIimm8, "vsri", MVE_v16i8>;
+defm : MVE_VSxI_patterns<MVE_VSRIimm16, "vsri", MVE_v8i16>;
+defm : MVE_VSxI_patterns<MVE_VSRIimm32, "vsri", MVE_v4i32>;
+
+class MVE_VQSHL_imm<MVEVectorVTInfo VTI_, Operand immType>
+ : MVE_shift_with_imm<"vqshl", VTI_.Suffix, (outs MQPR:$Qd),
+ (ins MQPR:$Qm, immType:$imm), "$Qd, $Qm, $imm",
vpred_r, ""> {
bits<6> imm;
+ let Inst{28} = VTI_.Unsigned;
let Inst{25-24} = 0b11;
let Inst{21-16} = imm;
let Inst{10-8} = 0b111;
-}
-
-def MVE_VSLIimms8 : MVE_VQSHL_imm<"s8", (ins imm0_7:$imm)> {
- let Inst{28} = 0b0;
- let Inst{21-19} = 0b001;
-}
-
-def MVE_VSLIimmu8 : MVE_VQSHL_imm<"u8", (ins imm0_7:$imm)> {
- let Inst{28} = 0b1;
- let Inst{21-19} = 0b001;
-}
-def MVE_VSLIimms16 : MVE_VQSHL_imm<"s16", (ins imm0_15:$imm)> {
- let Inst{28} = 0b0;
- let Inst{21-20} = 0b01;
+ let VTI = VTI_;
+ let immediateType = immType;
+ let unsignedFlag = (? (i32 VTI.Unsigned));
}
-def MVE_VSLIimmu16 : MVE_VQSHL_imm<"u16", (ins imm0_15:$imm)> {
- let Inst{28} = 0b1;
- let Inst{21-20} = 0b01;
-}
-
-def MVE_VSLIimms32 : MVE_VQSHL_imm<"s32", (ins imm0_31:$imm)> {
- let Inst{28} = 0b0;
- let Inst{21} = 0b1;
-}
-
-def MVE_VSLIimmu32 : MVE_VQSHL_imm<"u32", (ins imm0_31:$imm)> {
- let Inst{28} = 0b1;
- let Inst{21} = 0b1;
+let unpred_int = int_arm_mve_vqshl_imm,
+ pred_int = int_arm_mve_vqshl_imm_predicated in {
+ def MVE_VQSHLimms8 : MVE_VQSHL_imm<MVE_v16s8, imm0_7> {
+ let Inst{21-19} = 0b001;
+ }
+ def MVE_VQSHLimmu8 : MVE_VQSHL_imm<MVE_v16u8, imm0_7> {
+ let Inst{21-19} = 0b001;
+ }
+
+ def MVE_VQSHLimms16 : MVE_VQSHL_imm<MVE_v8s16, imm0_15> {
+ let Inst{21-20} = 0b01;
+ }
+ def MVE_VQSHLimmu16 : MVE_VQSHL_imm<MVE_v8u16, imm0_15> {
+ let Inst{21-20} = 0b01;
+ }
+
+ def MVE_VQSHLimms32 : MVE_VQSHL_imm<MVE_v4s32, imm0_31> {
+ let Inst{21} = 0b1;
+ }
+ def MVE_VQSHLimmu32 : MVE_VQSHL_imm<MVE_v4u32, imm0_31> {
+ let Inst{21} = 0b1;
+ }
}
-class MVE_VQSHLU_imm<string suffix, dag imm>
- : MVE_shift_with_imm<"vqshlu", suffix, (outs MQPR:$Qd),
- !con((ins MQPR:$Qm), imm), "$Qd, $Qm, $imm",
+class MVE_VQSHLU_imm<MVEVectorVTInfo VTI_, Operand immType>
+ : MVE_shift_with_imm<"vqshlu", VTI_.Suffix, (outs MQPR:$Qd),
+ (ins MQPR:$Qm, immType:$imm), "$Qd, $Qm, $imm",
vpred_r, ""> {
bits<6> imm;
@@ -2430,61 +2898,103 @@ class MVE_VQSHLU_imm<string suffix, dag imm>
let Inst{25-24} = 0b11;
let Inst{21-16} = imm;
let Inst{10-8} = 0b110;
-}
-def MVE_VQSHLU_imms8 : MVE_VQSHLU_imm<"s8", (ins imm0_7:$imm)> {
- let Inst{21-19} = 0b001;
+ let VTI = VTI_;
+ let immediateType = immType;
}
-def MVE_VQSHLU_imms16 : MVE_VQSHLU_imm<"s16", (ins imm0_15:$imm)> {
- let Inst{21-20} = 0b01;
-}
+let unpred_int = int_arm_mve_vqshlu_imm,
+ pred_int = int_arm_mve_vqshlu_imm_predicated in {
+ def MVE_VQSHLU_imms8 : MVE_VQSHLU_imm<MVE_v16s8, imm0_7> {
+ let Inst{21-19} = 0b001;
+ }
-def MVE_VQSHLU_imms32 : MVE_VQSHLU_imm<"s32", (ins imm0_31:$imm)> {
- let Inst{21} = 0b1;
+ def MVE_VQSHLU_imms16 : MVE_VQSHLU_imm<MVE_v8s16, imm0_15> {
+ let Inst{21-20} = 0b01;
+ }
+
+ def MVE_VQSHLU_imms32 : MVE_VQSHLU_imm<MVE_v4s32, imm0_31> {
+ let Inst{21} = 0b1;
+ }
}
-class MVE_VRSHR_imm<string suffix, dag imm>
- : MVE_shift_with_imm<"vrshr", suffix, (outs MQPR:$Qd),
- !con((ins MQPR:$Qm), imm), "$Qd, $Qm, $imm",
+class MVE_VRSHR_imm<MVEVectorVTInfo VTI_, Operand immType>
+ : MVE_shift_with_imm<"vrshr", VTI_.Suffix, (outs MQPR:$Qd),
+ (ins MQPR:$Qm, immType:$imm), "$Qd, $Qm, $imm",
vpred_r, ""> {
bits<6> imm;
+ let Inst{28} = VTI_.Unsigned;
let Inst{25-24} = 0b11;
let Inst{21-16} = imm;
let Inst{10-8} = 0b010;
-}
-def MVE_VRSHR_imms8 : MVE_VRSHR_imm<"s8", (ins shr_imm8:$imm)> {
- let Inst{28} = 0b0;
- let Inst{21-19} = 0b001;
+ let VTI = VTI_;
+ let immediateType = immType;
+ let unsignedFlag = (? (i32 VTI.Unsigned));
}
-def MVE_VRSHR_immu8 : MVE_VRSHR_imm<"u8", (ins shr_imm8:$imm)> {
- let Inst{28} = 0b1;
- let Inst{21-19} = 0b001;
-}
+let unpred_int = int_arm_mve_vrshr_imm,
+ pred_int = int_arm_mve_vrshr_imm_predicated in {
+ def MVE_VRSHR_imms8 : MVE_VRSHR_imm<MVE_v16s8, shr_imm8> {
+ let Inst{21-19} = 0b001;
+ }
-def MVE_VRSHR_imms16 : MVE_VRSHR_imm<"s16", (ins shr_imm16:$imm)> {
- let Inst{28} = 0b0;
- let Inst{21-20} = 0b01;
-}
+ def MVE_VRSHR_immu8 : MVE_VRSHR_imm<MVE_v16u8, shr_imm8> {
+ let Inst{21-19} = 0b001;
+ }
-def MVE_VRSHR_immu16 : MVE_VRSHR_imm<"u16", (ins shr_imm16:$imm)> {
- let Inst{28} = 0b1;
- let Inst{21-20} = 0b01;
-}
+ def MVE_VRSHR_imms16 : MVE_VRSHR_imm<MVE_v8s16, shr_imm16> {
+ let Inst{21-20} = 0b01;
+ }
-def MVE_VRSHR_imms32 : MVE_VRSHR_imm<"s32", (ins shr_imm32:$imm)> {
- let Inst{28} = 0b0;
- let Inst{21} = 0b1;
-}
+ def MVE_VRSHR_immu16 : MVE_VRSHR_imm<MVE_v8u16, shr_imm16> {
+ let Inst{21-20} = 0b01;
+ }
-def MVE_VRSHR_immu32 : MVE_VRSHR_imm<"u32", (ins shr_imm32:$imm)> {
- let Inst{28} = 0b1;
- let Inst{21} = 0b1;
+ def MVE_VRSHR_imms32 : MVE_VRSHR_imm<MVE_v4s32, shr_imm32> {
+ let Inst{21} = 0b1;
+ }
+
+ def MVE_VRSHR_immu32 : MVE_VRSHR_imm<MVE_v4u32, shr_imm32> {
+ let Inst{21} = 0b1;
+ }
}
+multiclass MVE_shift_imm_patterns<MVE_shift_with_imm inst> {
+ def : Pat<(inst.VTI.Vec !con((inst.unpred_int (inst.VTI.Vec MQPR:$src),
+ inst.immediateType:$imm),
+ inst.unsignedFlag)),
+ (inst.VTI.Vec (inst (inst.VTI.Vec MQPR:$src),
+ inst.immediateType:$imm))>;
+
+ def : Pat<(inst.VTI.Vec !con((inst.pred_int (inst.VTI.Vec MQPR:$src),
+ inst.immediateType:$imm),
+ inst.unsignedFlag,
+ (? (inst.VTI.Pred VCCR:$mask),
+ (inst.VTI.Vec MQPR:$inactive)))),
+ (inst.VTI.Vec (inst (inst.VTI.Vec MQPR:$src),
+ inst.immediateType:$imm,
+ ARMVCCThen, (inst.VTI.Pred VCCR:$mask),
+ (inst.VTI.Vec MQPR:$inactive)))>;
+}
+
+defm : MVE_shift_imm_patterns<MVE_VQSHLimms8>;
+defm : MVE_shift_imm_patterns<MVE_VQSHLimmu8>;
+defm : MVE_shift_imm_patterns<MVE_VQSHLimms16>;
+defm : MVE_shift_imm_patterns<MVE_VQSHLimmu16>;
+defm : MVE_shift_imm_patterns<MVE_VQSHLimms32>;
+defm : MVE_shift_imm_patterns<MVE_VQSHLimmu32>;
+defm : MVE_shift_imm_patterns<MVE_VQSHLU_imms8>;
+defm : MVE_shift_imm_patterns<MVE_VQSHLU_imms16>;
+defm : MVE_shift_imm_patterns<MVE_VQSHLU_imms32>;
+defm : MVE_shift_imm_patterns<MVE_VRSHR_imms8>;
+defm : MVE_shift_imm_patterns<MVE_VRSHR_immu8>;
+defm : MVE_shift_imm_patterns<MVE_VRSHR_imms16>;
+defm : MVE_shift_imm_patterns<MVE_VRSHR_immu16>;
+defm : MVE_shift_imm_patterns<MVE_VRSHR_imms32>;
+defm : MVE_shift_imm_patterns<MVE_VRSHR_immu32>;
+
class MVE_VSHR_imm<string suffix, dag imm>
: MVE_shift_with_imm<"vshr", suffix, (outs MQPR:$Qd),
!con((ins MQPR:$Qm), imm), "$Qd, $Qm, $imm",
@@ -2550,27 +3060,39 @@ def MVE_VSHL_immi32 : MVE_VSHL_imm<"i32", (ins imm0_31:$imm)> {
let Inst{21} = 0b1;
}
+multiclass MVE_immediate_shift_patterns_inner<
+ MVEVectorVTInfo VTI, Operand imm_operand_type, SDNode unpred_op,
+ Intrinsic pred_int, Instruction inst, list<int> unsignedFlag = []> {
+
+ def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$src), imm_operand_type:$imm)),
+ (VTI.Vec (inst (VTI.Vec MQPR:$src), imm_operand_type:$imm))>;
+
+ def : Pat<(VTI.Vec !con((pred_int (VTI.Vec MQPR:$src), imm_operand_type:$imm),
+ !dag(pred_int, unsignedFlag, ?),
+ (pred_int (VTI.Pred VCCR:$mask),
+ (VTI.Vec MQPR:$inactive)))),
+ (VTI.Vec (inst (VTI.Vec MQPR:$src), imm_operand_type:$imm,
+ ARMVCCThen, (VTI.Pred VCCR:$mask),
+ (VTI.Vec MQPR:$inactive)))>;
+}
+
+multiclass MVE_immediate_shift_patterns<MVEVectorVTInfo VTI,
+ Operand imm_operand_type> {
+ defm : MVE_immediate_shift_patterns_inner<VTI, imm_operand_type,
+ ARMvshlImm, int_arm_mve_shl_imm_predicated,
+ !cast<Instruction>("MVE_VSHL_immi" # VTI.BitsSuffix)>;
+ defm : MVE_immediate_shift_patterns_inner<VTI, imm_operand_type,
+ ARMvshruImm, int_arm_mve_shr_imm_predicated,
+ !cast<Instruction>("MVE_VSHR_immu" # VTI.BitsSuffix), [1]>;
+ defm : MVE_immediate_shift_patterns_inner<VTI, imm_operand_type,
+ ARMvshrsImm, int_arm_mve_shr_imm_predicated,
+ !cast<Instruction>("MVE_VSHR_imms" # VTI.BitsSuffix), [0]>;
+}
+
let Predicates = [HasMVEInt] in {
- def : Pat<(v4i32 (ARMvshlImm (v4i32 MQPR:$src), imm0_31:$imm)),
- (v4i32 (MVE_VSHL_immi32 (v4i32 MQPR:$src), imm0_31:$imm))>;
- def : Pat<(v8i16 (ARMvshlImm (v8i16 MQPR:$src), imm0_15:$imm)),
- (v8i16 (MVE_VSHL_immi16 (v8i16 MQPR:$src), imm0_15:$imm))>;
- def : Pat<(v16i8 (ARMvshlImm (v16i8 MQPR:$src), imm0_7:$imm)),
- (v16i8 (MVE_VSHL_immi8 (v16i8 MQPR:$src), imm0_7:$imm))>;
-
- def : Pat<(v4i32 (ARMvshruImm (v4i32 MQPR:$src), imm0_31:$imm)),
- (v4i32 (MVE_VSHR_immu32 (v4i32 MQPR:$src), imm0_31:$imm))>;
- def : Pat<(v8i16 (ARMvshruImm (v8i16 MQPR:$src), imm0_15:$imm)),
- (v8i16 (MVE_VSHR_immu16 (v8i16 MQPR:$src), imm0_15:$imm))>;
- def : Pat<(v16i8 (ARMvshruImm (v16i8 MQPR:$src), imm0_7:$imm)),
- (v16i8 (MVE_VSHR_immu8 (v16i8 MQPR:$src), imm0_7:$imm))>;
-
- def : Pat<(v4i32 (ARMvshrsImm (v4i32 MQPR:$src), imm0_31:$imm)),
- (v4i32 (MVE_VSHR_imms32 (v4i32 MQPR:$src), imm0_31:$imm))>;
- def : Pat<(v8i16 (ARMvshrsImm (v8i16 MQPR:$src), imm0_15:$imm)),
- (v8i16 (MVE_VSHR_imms16 (v8i16 MQPR:$src), imm0_15:$imm))>;
- def : Pat<(v16i8 (ARMvshrsImm (v16i8 MQPR:$src), imm0_7:$imm)),
- (v16i8 (MVE_VSHR_imms8 (v16i8 MQPR:$src), imm0_7:$imm))>;
+ defm : MVE_immediate_shift_patterns<MVE_v16i8, imm0_7>;
+ defm : MVE_immediate_shift_patterns<MVE_v8i16, imm0_15>;
+ defm : MVE_immediate_shift_patterns<MVE_v4i32, imm0_31>;
}
// end of mve_shift instructions
@@ -2652,8 +3174,8 @@ class MVEFloatArithNeon<string iname, string suffix, bit size,
let Inst{16} = 0b0;
}
-class MVE_VMUL_fp<string suffix, bit size, list<dag> pattern=[]>
- : MVEFloatArithNeon<"vmul", suffix, size, (outs MQPR:$Qd),
+class MVE_VMUL_fp<string iname, string suffix, bit size, list<dag> pattern=[]>
+ : MVEFloatArithNeon<iname, suffix, size, (outs MQPR:$Qd),
(ins MQPR:$Qn, MQPR:$Qm), "$Qd, $Qn, $Qm", vpred_r, "",
pattern> {
bits<4> Qd;
@@ -2671,20 +3193,32 @@ class MVE_VMUL_fp<string suffix, bit size, list<dag> pattern=[]>
let validForTailPredication = 1;
}
-def MVE_VMULf32 : MVE_VMUL_fp<"f32", 0b0>;
-def MVE_VMULf16 : MVE_VMUL_fp<"f16", 0b1>;
+multiclass MVE_VMULT_fp_m<string iname, bit bit_21, MVEVectorVTInfo VTI,
+ SDNode unpred_op, Intrinsic pred_int> {
+ def "" : MVE_VMUL_fp<iname, VTI.Suffix, VTI.Size{0}>;
+ defvar Inst = !cast<Instruction>(NAME);
-let Predicates = [HasMVEFloat] in {
- def : Pat<(v4f32 (fmul (v4f32 MQPR:$val1), (v4f32 MQPR:$val2))),
- (v4f32 (MVE_VMULf32 (v4f32 MQPR:$val1), (v4f32 MQPR:$val2)))>;
- def : Pat<(v8f16 (fmul (v8f16 MQPR:$val1), (v8f16 MQPR:$val2))),
- (v8f16 (MVE_VMULf16 (v8f16 MQPR:$val1), (v8f16 MQPR:$val2)))>;
+ let Predicates = [HasMVEFloat] in {
+ def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
+ def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+ (VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+ ARMVCCThen, (VTI.Pred VCCR:$mask),
+ (VTI.Vec MQPR:$inactive)))>;
+ }
}
-class MVE_VCMLA<string suffix, bit size, list<dag> pattern=[]>
+multiclass MVE_VMUL_fp_m<MVEVectorVTInfo VTI>
+ : MVE_VMULT_fp_m<"vmul", 0, VTI, fmul, int_arm_mve_mul_predicated>;
+
+defm MVE_VMULf32 : MVE_VMUL_fp_m<MVE_v4f32>;
+defm MVE_VMULf16 : MVE_VMUL_fp_m<MVE_v8f16>;
+
+class MVE_VCMLA<string suffix, bit size>
: MVEFloatArithNeon<"vcmla", suffix, size, (outs MQPR:$Qd),
(ins MQPR:$Qd_src, MQPR:$Qn, MQPR:$Qm, complexrotateop:$rot),
- "$Qd, $Qn, $Qm, $rot", vpred_n, "$Qd = $Qd_src", pattern> {
+ "$Qd, $Qn, $Qm, $rot", vpred_n, "$Qd = $Qd_src", []> {
bits<4> Qd;
bits<4> Qn;
bits<2> rot;
@@ -2701,8 +3235,31 @@ class MVE_VCMLA<string suffix, bit size, list<dag> pattern=[]>
let Inst{4} = 0b0;
}
-def MVE_VCMLAf16 : MVE_VCMLA<"f16", 0b0>;
-def MVE_VCMLAf32 : MVE_VCMLA<"f32", 0b1>;
+multiclass MVE_VCMLA_m<MVEVectorVTInfo VTI, bit size> {
+ def "" : MVE_VCMLA<VTI.Suffix, size>;
+ defvar Inst = !cast<Instruction>(NAME);
+
+ let Predicates = [HasMVEFloat] in {
+ def : Pat<(VTI.Vec (int_arm_mve_vcmlaq
+ imm:$rot, (VTI.Vec MQPR:$Qd_src),
+ (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qd_src),
+ (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
+ imm:$rot))>;
+
+ def : Pat<(VTI.Vec (int_arm_mve_vcmlaq_predicated
+ imm:$rot, (VTI.Vec MQPR:$Qd_src),
+ (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
+ (VTI.Pred VCCR:$mask))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qd_src), (VTI.Vec MQPR:$Qn),
+ (VTI.Vec MQPR:$Qm), imm:$rot,
+ ARMVCCThen, (VTI.Pred VCCR:$mask)))>;
+
+ }
+}
+
+defm MVE_VCMLAf16 : MVE_VCMLA_m<MVE_v8f16, 0b0>;
+defm MVE_VCMLAf32 : MVE_VCMLA_m<MVE_v4f32, 0b1>;
class MVE_VADDSUBFMA_fp<string iname, string suffix, bit size, bit bit_4,
bit bit_8, bit bit_21, dag iops=(ins),
@@ -2736,63 +3293,50 @@ def MVE_VFMSf32 : MVE_VADDSUBFMA_fp<"vfms", "f32", 0b0, 0b1, 0b0, 0b1,
def MVE_VFMSf16 : MVE_VADDSUBFMA_fp<"vfms", "f16", 0b1, 0b1, 0b0, 0b1,
(ins MQPR:$Qd_src), vpred_n, "$Qd = $Qd_src">;
-let Predicates = [HasMVEFloat, UseFusedMAC] in {
- def : Pat<(v8f16 (fadd (v8f16 MQPR:$src1),
- (fmul (v8f16 MQPR:$src2),
- (v8f16 MQPR:$src3)))),
- (v8f16 (MVE_VFMAf16 $src1, $src2, $src3))>;
- def : Pat<(v4f32 (fadd (v4f32 MQPR:$src1),
- (fmul (v4f32 MQPR:$src2),
- (v4f32 MQPR:$src3)))),
- (v4f32 (MVE_VFMAf32 $src1, $src2, $src3))>;
-
- def : Pat<(v8f16 (fsub (v8f16 MQPR:$src1),
- (fmul (v8f16 MQPR:$src2),
- (v8f16 MQPR:$src3)))),
- (v8f16 (MVE_VFMSf16 $src1, $src2, $src3))>;
- def : Pat<(v4f32 (fsub (v4f32 MQPR:$src1),
- (fmul (v4f32 MQPR:$src2),
- (v4f32 MQPR:$src3)))),
- (v4f32 (MVE_VFMSf32 $src1, $src2, $src3))>;
-}
-
let Predicates = [HasMVEFloat] in {
def : Pat<(v8f16 (fma (v8f16 MQPR:$src1), (v8f16 MQPR:$src2), (v8f16 MQPR:$src3))),
(v8f16 (MVE_VFMAf16 $src3, $src1, $src2))>;
def : Pat<(v4f32 (fma (v4f32 MQPR:$src1), (v4f32 MQPR:$src2), (v4f32 MQPR:$src3))),
(v4f32 (MVE_VFMAf32 $src3, $src1, $src2))>;
+ def : Pat<(v8f16 (fma (fneg (v8f16 MQPR:$src1)), (v8f16 MQPR:$src2), (v8f16 MQPR:$src3))),
+ (v8f16 (MVE_VFMSf16 $src3, $src1, $src2))>;
+ def : Pat<(v4f32 (fma (fneg (v4f32 MQPR:$src1)), (v4f32 MQPR:$src2), (v4f32 MQPR:$src3))),
+ (v4f32 (MVE_VFMSf32 $src3, $src1, $src2))>;
}
-
-let validForTailPredication = 1 in {
- def MVE_VADDf32 : MVE_VADDSUBFMA_fp<"vadd", "f32", 0b0, 0b0, 0b1, 0b0>;
- def MVE_VADDf16 : MVE_VADDSUBFMA_fp<"vadd", "f16", 0b1, 0b0, 0b1, 0b0>;
-}
-
-let Predicates = [HasMVEFloat] in {
- def : Pat<(v4f32 (fadd (v4f32 MQPR:$val1), (v4f32 MQPR:$val2))),
- (v4f32 (MVE_VADDf32 (v4f32 MQPR:$val1), (v4f32 MQPR:$val2)))>;
- def : Pat<(v8f16 (fadd (v8f16 MQPR:$val1), (v8f16 MQPR:$val2))),
- (v8f16 (MVE_VADDf16 (v8f16 MQPR:$val1), (v8f16 MQPR:$val2)))>;
+multiclass MVE_VADDSUB_fp_m<string iname, bit bit_21, MVEVectorVTInfo VTI,
+ SDNode unpred_op, Intrinsic pred_int> {
+ def "" : MVE_VADDSUBFMA_fp<iname, VTI.Suffix, VTI.Size{0}, 0, 1, bit_21> {
+ let validForTailPredication = 1;
+ }
+ defvar Inst = !cast<Instruction>(NAME);
+
+ let Predicates = [HasMVEFloat] in {
+ def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
+ def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+ (VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+ ARMVCCThen, (VTI.Pred VCCR:$mask),
+ (VTI.Vec MQPR:$inactive)))>;
+ }
}
+multiclass MVE_VADD_fp_m<MVEVectorVTInfo VTI>
+ : MVE_VADDSUB_fp_m<"vadd", 0, VTI, fadd, int_arm_mve_add_predicated>;
+multiclass MVE_VSUB_fp_m<MVEVectorVTInfo VTI>
+ : MVE_VADDSUB_fp_m<"vsub", 1, VTI, fsub, int_arm_mve_sub_predicated>;
-let validForTailPredication = 1 in {
- def MVE_VSUBf32 : MVE_VADDSUBFMA_fp<"vsub", "f32", 0b0, 0b0, 0b1, 0b1>;
- def MVE_VSUBf16 : MVE_VADDSUBFMA_fp<"vsub", "f16", 0b1, 0b0, 0b1, 0b1>;
-}
+defm MVE_VADDf32 : MVE_VADD_fp_m<MVE_v4f32>;
+defm MVE_VADDf16 : MVE_VADD_fp_m<MVE_v8f16>;
-let Predicates = [HasMVEFloat] in {
- def : Pat<(v4f32 (fsub (v4f32 MQPR:$val1), (v4f32 MQPR:$val2))),
- (v4f32 (MVE_VSUBf32 (v4f32 MQPR:$val1), (v4f32 MQPR:$val2)))>;
- def : Pat<(v8f16 (fsub (v8f16 MQPR:$val1), (v8f16 MQPR:$val2))),
- (v8f16 (MVE_VSUBf16 (v8f16 MQPR:$val1), (v8f16 MQPR:$val2)))>;
-}
+defm MVE_VSUBf32 : MVE_VSUB_fp_m<MVE_v4f32>;
+defm MVE_VSUBf16 : MVE_VSUB_fp_m<MVE_v8f16>;
-class MVE_VCADD<string suffix, bit size, string cstr="", list<dag> pattern=[]>
+class MVE_VCADD<string suffix, bit size, string cstr="">
: MVEFloatArithNeon<"vcadd", suffix, size, (outs MQPR:$Qd),
(ins MQPR:$Qn, MQPR:$Qm, complexrotateopodd:$rot),
- "$Qd, $Qn, $Qm, $rot", vpred_r, cstr, pattern> {
+ "$Qd, $Qn, $Qm, $rot", vpred_r, cstr, []> {
bits<4> Qd;
bits<4> Qn;
bit rot;
@@ -2810,8 +3354,29 @@ class MVE_VCADD<string suffix, bit size, string cstr="", list<dag> pattern=[]>
let Inst{4} = 0b0;
}
-def MVE_VCADDf16 : MVE_VCADD<"f16", 0b0>;
-def MVE_VCADDf32 : MVE_VCADD<"f32", 0b1, "@earlyclobber $Qd">;
+multiclass MVE_VCADD_m<MVEVectorVTInfo VTI, bit size, string cstr=""> {
+ def "" : MVE_VCADD<VTI.Suffix, size, cstr>;
+ defvar Inst = !cast<Instruction>(NAME);
+
+ let Predicates = [HasMVEFloat] in {
+ def : Pat<(VTI.Vec (int_arm_mve_vcaddq (i32 1),
+ imm:$rot, (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
+ imm:$rot))>;
+
+ def : Pat<(VTI.Vec (int_arm_mve_vcaddq_predicated (i32 1),
+ imm:$rot, (VTI.Vec MQPR:$inactive),
+ (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
+ (VTI.Pred VCCR:$mask))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
+ imm:$rot, ARMVCCThen, (VTI.Pred VCCR:$mask),
+ (VTI.Vec MQPR:$inactive)))>;
+
+ }
+}
+
+defm MVE_VCADDf16 : MVE_VCADD_m<MVE_v8f16, 0b0>;
+defm MVE_VCADDf32 : MVE_VCADD_m<MVE_v4f32, 0b1, "@earlyclobber $Qd">;
class MVE_VABD_fp<string suffix, bit size>
: MVE_float<"vabd", suffix, (outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm),
@@ -2833,8 +3398,29 @@ class MVE_VABD_fp<string suffix, bit size>
let validForTailPredication = 1;
}
-def MVE_VABDf32 : MVE_VABD_fp<"f32", 0b0>;
-def MVE_VABDf16 : MVE_VABD_fp<"f16", 0b1>;
+multiclass MVE_VABDT_fp_m<MVEVectorVTInfo VTI,
+ Intrinsic unpred_int, Intrinsic pred_int> {
+ def "" : MVE_VABD_fp<VTI.Suffix, VTI.Size{0}>;
+ defvar Inst = !cast<Instruction>(NAME);
+
+ let Predicates = [HasMVEFloat] in {
+ def : Pat<(VTI.Vec (unpred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+ (i32 0))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
+ def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+ (i32 0), (VTI.Pred VCCR:$mask),
+ (VTI.Vec MQPR:$inactive))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+ ARMVCCThen, (VTI.Pred VCCR:$mask),
+ (VTI.Vec MQPR:$inactive)))>;
+ }
+}
+
+multiclass MVE_VABD_fp_m<MVEVectorVTInfo VTI>
+ : MVE_VABDT_fp_m<VTI, int_arm_mve_vabd, int_arm_mve_abd_predicated>;
+
+defm MVE_VABDf32 : MVE_VABD_fp_m<MVE_v4f32>;
+defm MVE_VABDf16 : MVE_VABD_fp_m<MVE_v8f16>;
class MVE_VCVT_fix<string suffix, bit fsi, bit U, bit op,
Operand imm_operand_type, list<dag> pattern=[]>
@@ -3186,120 +3772,120 @@ def MVE_VCMPs8r : MVE_VCMPqrs<"s8", 0b00>;
def MVE_VCMPs16r : MVE_VCMPqrs<"s16", 0b01>;
def MVE_VCMPs32r : MVE_VCMPqrs<"s32", 0b10>;
-multiclass unpred_vcmp_z<string suffix, int fc> {
- def i8 : Pat<(v16i1 (ARMvcmpz (v16i8 MQPR:$v1), (i32 fc))),
+multiclass unpred_vcmp_z<string suffix, PatLeaf fc> {
+ def i8 : Pat<(v16i1 (ARMvcmpz (v16i8 MQPR:$v1), fc)),
(v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v1), ZR, fc))>;
- def i16 : Pat<(v8i1 (ARMvcmpz (v8i16 MQPR:$v1), (i32 fc))),
+ def i16 : Pat<(v8i1 (ARMvcmpz (v8i16 MQPR:$v1), fc)),
(v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v1), ZR, fc))>;
- def i32 : Pat<(v4i1 (ARMvcmpz (v4i32 MQPR:$v1), (i32 fc))),
+ def i32 : Pat<(v4i1 (ARMvcmpz (v4i32 MQPR:$v1), fc)),
(v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v1), ZR, fc))>;
- def : Pat<(v16i1 (and (v16i1 VCCR:$p1), (v16i1 (ARMvcmpz (v16i8 MQPR:$v1), (i32 fc))))),
- (v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v1), ZR, fc, 1, VCCR:$p1))>;
- def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmpz (v8i16 MQPR:$v1), (i32 fc))))),
- (v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v1), ZR, fc, 1, VCCR:$p1))>;
- def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmpz (v4i32 MQPR:$v1), (i32 fc))))),
- (v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v1), ZR, fc, 1, VCCR:$p1))>;
+ def : Pat<(v16i1 (and (v16i1 VCCR:$p1), (v16i1 (ARMvcmpz (v16i8 MQPR:$v1), fc)))),
+ (v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v1), ZR, fc, ARMVCCThen, VCCR:$p1))>;
+ def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmpz (v8i16 MQPR:$v1), fc)))),
+ (v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v1), ZR, fc, ARMVCCThen, VCCR:$p1))>;
+ def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmpz (v4i32 MQPR:$v1), fc)))),
+ (v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v1), ZR, fc, ARMVCCThen, VCCR:$p1))>;
}
-multiclass unpred_vcmp_r<string suffix, int fc> {
- def i8 : Pat<(v16i1 (ARMvcmp (v16i8 MQPR:$v1), (v16i8 MQPR:$v2), (i32 fc))),
+multiclass unpred_vcmp_r<string suffix, PatLeaf fc> {
+ def i8 : Pat<(v16i1 (ARMvcmp (v16i8 MQPR:$v1), (v16i8 MQPR:$v2), fc)),
(v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8") (v16i8 MQPR:$v1), (v16i8 MQPR:$v2), fc))>;
- def i16 : Pat<(v8i1 (ARMvcmp (v8i16 MQPR:$v1), (v8i16 MQPR:$v2), (i32 fc))),
+ def i16 : Pat<(v8i1 (ARMvcmp (v8i16 MQPR:$v1), (v8i16 MQPR:$v2), fc)),
(v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16") (v8i16 MQPR:$v1), (v8i16 MQPR:$v2), fc))>;
- def i32 : Pat<(v4i1 (ARMvcmp (v4i32 MQPR:$v1), (v4i32 MQPR:$v2), (i32 fc))),
+ def i32 : Pat<(v4i1 (ARMvcmp (v4i32 MQPR:$v1), (v4i32 MQPR:$v2), fc)),
(v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32") (v4i32 MQPR:$v1), (v4i32 MQPR:$v2), fc))>;
- def i8r : Pat<(v16i1 (ARMvcmp (v16i8 MQPR:$v1), (v16i8 (ARMvdup GPR:$v2)), (i32 fc))),
+ def i8r : Pat<(v16i1 (ARMvcmp (v16i8 MQPR:$v1), (v16i8 (ARMvdup GPR:$v2)), fc)),
(v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v1), (i32 GPR:$v2), fc))>;
- def i16r : Pat<(v8i1 (ARMvcmp (v8i16 MQPR:$v1), (v8i16 (ARMvdup GPR:$v2)), (i32 fc))),
+ def i16r : Pat<(v8i1 (ARMvcmp (v8i16 MQPR:$v1), (v8i16 (ARMvdup GPR:$v2)), fc)),
(v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v1), (i32 GPR:$v2), fc))>;
- def i32r : Pat<(v4i1 (ARMvcmp (v4i32 MQPR:$v1), (v4i32 (ARMvdup GPR:$v2)), (i32 fc))),
+ def i32r : Pat<(v4i1 (ARMvcmp (v4i32 MQPR:$v1), (v4i32 (ARMvdup GPR:$v2)), fc)),
(v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v1), (i32 GPR:$v2), fc))>;
- def : Pat<(v16i1 (and (v16i1 VCCR:$p1), (v16i1 (ARMvcmp (v16i8 MQPR:$v1), (v16i8 MQPR:$v2), (i32 fc))))),
- (v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8") (v16i8 MQPR:$v1), (v16i8 MQPR:$v2), fc, 1, VCCR:$p1))>;
- def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8i16 MQPR:$v1), (v8i16 MQPR:$v2), (i32 fc))))),
- (v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16") (v8i16 MQPR:$v1), (v8i16 MQPR:$v2), fc, 1, VCCR:$p1))>;
- def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4i32 MQPR:$v1), (v4i32 MQPR:$v2), (i32 fc))))),
- (v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32") (v4i32 MQPR:$v1), (v4i32 MQPR:$v2), fc, 1, VCCR:$p1))>;
+ def : Pat<(v16i1 (and (v16i1 VCCR:$p1), (v16i1 (ARMvcmp (v16i8 MQPR:$v1), (v16i8 MQPR:$v2), fc)))),
+ (v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8") (v16i8 MQPR:$v1), (v16i8 MQPR:$v2), fc, ARMVCCThen, VCCR:$p1))>;
+ def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8i16 MQPR:$v1), (v8i16 MQPR:$v2), fc)))),
+ (v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16") (v8i16 MQPR:$v1), (v8i16 MQPR:$v2), fc, ARMVCCThen, VCCR:$p1))>;
+ def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4i32 MQPR:$v1), (v4i32 MQPR:$v2), fc)))),
+ (v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32") (v4i32 MQPR:$v1), (v4i32 MQPR:$v2), fc, ARMVCCThen, VCCR:$p1))>;
- def : Pat<(v16i1 (and (v16i1 VCCR:$p1), (v16i1 (ARMvcmp (v16i8 MQPR:$v1), (v16i8 (ARMvdup GPR:$v2)), (i32 fc))))),
- (v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v1), (i32 GPR:$v2), fc, 1, VCCR:$p1))>;
- def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8i16 MQPR:$v1), (v8i16 (ARMvdup GPR:$v2)), (i32 fc))))),
- (v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v1), (i32 GPR:$v2), fc, 1, VCCR:$p1))>;
- def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4i32 MQPR:$v1), (v4i32 (ARMvdup GPR:$v2)), (i32 fc))))),
- (v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v1), (i32 GPR:$v2), fc, 1, VCCR:$p1))>;
+ def : Pat<(v16i1 (and (v16i1 VCCR:$p1), (v16i1 (ARMvcmp (v16i8 MQPR:$v1), (v16i8 (ARMvdup GPR:$v2)), fc)))),
+ (v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v1), (i32 GPR:$v2), fc, ARMVCCThen, VCCR:$p1))>;
+ def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8i16 MQPR:$v1), (v8i16 (ARMvdup GPR:$v2)), fc)))),
+ (v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v1), (i32 GPR:$v2), fc, ARMVCCThen, VCCR:$p1))>;
+ def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4i32 MQPR:$v1), (v4i32 (ARMvdup GPR:$v2)), fc)))),
+ (v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v1), (i32 GPR:$v2), fc, ARMVCCThen, VCCR:$p1))>;
}
-multiclass unpred_vcmpf_z<int fc> {
- def f16 : Pat<(v8i1 (ARMvcmpz (v8f16 MQPR:$v1), (i32 fc))),
+multiclass unpred_vcmpf_z<PatLeaf fc> {
+ def f16 : Pat<(v8i1 (ARMvcmpz (v8f16 MQPR:$v1), fc)),
(v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), ZR, fc))>;
- def f32 : Pat<(v4i1 (ARMvcmpz (v4f32 MQPR:$v1), (i32 fc))),
+ def f32 : Pat<(v4i1 (ARMvcmpz (v4f32 MQPR:$v1), fc)),
(v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), ZR, fc))>;
- def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmpz (v8f16 MQPR:$v1), (i32 fc))))),
- (v8i1 (MVE_VCMPf32r (v8f16 MQPR:$v1), ZR, fc, 1, VCCR:$p1))>;
- def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmpz (v4f32 MQPR:$v1), (i32 fc))))),
- (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), ZR, fc, 1, VCCR:$p1))>;
+ def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmpz (v8f16 MQPR:$v1), fc)))),
+ (v8i1 (MVE_VCMPf32r (v8f16 MQPR:$v1), ZR, fc, ARMVCCThen, VCCR:$p1))>;
+ def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmpz (v4f32 MQPR:$v1), fc)))),
+ (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), ZR, fc, ARMVCCThen, VCCR:$p1))>;
}
multiclass unpred_vcmpf_r<int fc> {
- def f16 : Pat<(v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), (i32 fc))),
+ def f16 : Pat<(v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), fc)),
(v8i1 (MVE_VCMPf16 (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), fc))>;
- def f32 : Pat<(v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), (i32 fc))),
+ def f32 : Pat<(v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), fc)),
(v4i1 (MVE_VCMPf32 (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), fc))>;
- def f16r : Pat<(v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 (ARMvdup HPR:$v2)), (i32 fc))),
+ def f16r : Pat<(v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 (ARMvdup HPR:$v2)), fc)),
(v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), (i32 (COPY_TO_REGCLASS (f16 HPR:$v2), rGPR)), fc))>;
- def f32r : Pat<(v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 (ARMvdup SPR:$v2)), (i32 fc))),
+ def f32r : Pat<(v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 (ARMvdup SPR:$v2)), fc)),
(v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), (i32 (COPY_TO_REGCLASS (f32 SPR:$v2), rGPR)), fc))>;
- def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), (i32 fc))))),
- (v8i1 (MVE_VCMPf16 (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), fc, 1, VCCR:$p1))>;
- def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), (i32 fc))))),
- (v4i1 (MVE_VCMPf32 (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), fc, 1, VCCR:$p1))>;
+ def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), fc)))),
+ (v8i1 (MVE_VCMPf16 (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), fc, ARMVCCThen, VCCR:$p1))>;
+ def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), fc)))),
+ (v4i1 (MVE_VCMPf32 (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), fc, ARMVCCThen, VCCR:$p1))>;
- def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 (ARMvdup HPR:$v2)), (i32 fc))))),
- (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), (i32 (COPY_TO_REGCLASS (f16 HPR:$v2), rGPR)), fc, 1, VCCR:$p1))>;
- def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 (ARMvdup SPR:$v2)), (i32 fc))))),
- (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), (i32 (COPY_TO_REGCLASS (f32 SPR:$v2), rGPR)), fc, 1, VCCR:$p1))>;
+ def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 (ARMvdup HPR:$v2)), fc)))),
+ (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), (i32 (COPY_TO_REGCLASS (f16 HPR:$v2), rGPR)), fc, ARMVCCThen, VCCR:$p1))>;
+ def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 (ARMvdup SPR:$v2)), fc)))),
+ (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), (i32 (COPY_TO_REGCLASS (f32 SPR:$v2), rGPR)), fc, ARMVCCThen, VCCR:$p1))>;
}
let Predicates = [HasMVEInt] in {
- defm MVE_VCEQZ : unpred_vcmp_z<"i", 0>;
- defm MVE_VCNEZ : unpred_vcmp_z<"i", 1>;
- defm MVE_VCGEZ : unpred_vcmp_z<"s", 10>;
- defm MVE_VCLTZ : unpred_vcmp_z<"s", 11>;
- defm MVE_VCGTZ : unpred_vcmp_z<"s", 12>;
- defm MVE_VCLEZ : unpred_vcmp_z<"s", 13>;
- defm MVE_VCGTUZ : unpred_vcmp_z<"u", 8>;
- defm MVE_VCGEUZ : unpred_vcmp_z<"u", 2>;
-
- defm MVE_VCEQ : unpred_vcmp_r<"i", 0>;
- defm MVE_VCNE : unpred_vcmp_r<"i", 1>;
- defm MVE_VCGE : unpred_vcmp_r<"s", 10>;
- defm MVE_VCLT : unpred_vcmp_r<"s", 11>;
- defm MVE_VCGT : unpred_vcmp_r<"s", 12>;
- defm MVE_VCLE : unpred_vcmp_r<"s", 13>;
- defm MVE_VCGTU : unpred_vcmp_r<"u", 8>;
- defm MVE_VCGEU : unpred_vcmp_r<"u", 2>;
+ defm MVE_VCEQZ : unpred_vcmp_z<"i", ARMCCeq>;
+ defm MVE_VCNEZ : unpred_vcmp_z<"i", ARMCCne>;
+ defm MVE_VCGEZ : unpred_vcmp_z<"s", ARMCCge>;
+ defm MVE_VCLTZ : unpred_vcmp_z<"s", ARMCClt>;
+ defm MVE_VCGTZ : unpred_vcmp_z<"s", ARMCCgt>;
+ defm MVE_VCLEZ : unpred_vcmp_z<"s", ARMCCle>;
+ defm MVE_VCGTUZ : unpred_vcmp_z<"u", ARMCChi>;
+ defm MVE_VCGEUZ : unpred_vcmp_z<"u", ARMCChs>;
+
+ defm MVE_VCEQ : unpred_vcmp_r<"i", ARMCCeq>;
+ defm MVE_VCNE : unpred_vcmp_r<"i", ARMCCne>;
+ defm MVE_VCGE : unpred_vcmp_r<"s", ARMCCge>;
+ defm MVE_VCLT : unpred_vcmp_r<"s", ARMCClt>;
+ defm MVE_VCGT : unpred_vcmp_r<"s", ARMCCgt>;
+ defm MVE_VCLE : unpred_vcmp_r<"s", ARMCCle>;
+ defm MVE_VCGTU : unpred_vcmp_r<"u", ARMCChi>;
+ defm MVE_VCGEU : unpred_vcmp_r<"u", ARMCChs>;
}
let Predicates = [HasMVEFloat] in {
- defm MVE_VFCEQZ : unpred_vcmpf_z<0>;
- defm MVE_VFCNEZ : unpred_vcmpf_z<1>;
- defm MVE_VFCGEZ : unpred_vcmpf_z<10>;
- defm MVE_VFCLTZ : unpred_vcmpf_z<11>;
- defm MVE_VFCGTZ : unpred_vcmpf_z<12>;
- defm MVE_VFCLEZ : unpred_vcmpf_z<13>;
+ defm MVE_VFCEQZ : unpred_vcmpf_z<ARMCCeq>;
+ defm MVE_VFCNEZ : unpred_vcmpf_z<ARMCCne>;
+ defm MVE_VFCGEZ : unpred_vcmpf_z<ARMCCge>;
+ defm MVE_VFCLTZ : unpred_vcmpf_z<ARMCClt>;
+ defm MVE_VFCGTZ : unpred_vcmpf_z<ARMCCgt>;
+ defm MVE_VFCLEZ : unpred_vcmpf_z<ARMCCle>;
- defm MVE_VFCEQ : unpred_vcmpf_r<0>;
- defm MVE_VFCNE : unpred_vcmpf_r<1>;
- defm MVE_VFCGE : unpred_vcmpf_r<10>;
- defm MVE_VFCLT : unpred_vcmpf_r<11>;
- defm MVE_VFCGT : unpred_vcmpf_r<12>;
- defm MVE_VFCLE : unpred_vcmpf_r<13>;
+ defm MVE_VFCEQ : unpred_vcmpf_r<ARMCCeq>;
+ defm MVE_VFCNE : unpred_vcmpf_r<ARMCCne>;
+ defm MVE_VFCGE : unpred_vcmpf_r<ARMCCge>;
+ defm MVE_VFCLT : unpred_vcmpf_r<ARMCClt>;
+ defm MVE_VFCGT : unpred_vcmpf_r<ARMCCgt>;
+ defm MVE_VFCLE : unpred_vcmpf_r<ARMCCle>;
}
@@ -3403,10 +3989,10 @@ defm MVE_VQDMLSDHX : MVE_VQxDMLxDH_multi<"vqdmlsdhx", 0b1, 0b0, 0b1>;
defm MVE_VQRDMLSDH : MVE_VQxDMLxDH_multi<"vqrdmlsdh", 0b0, 0b1, 0b1>;
defm MVE_VQRDMLSDHX : MVE_VQxDMLxDH_multi<"vqrdmlsdhx", 0b1, 0b1, 0b1>;
-class MVE_VCMUL<string iname, string suffix, bit size, string cstr="", list<dag> pattern=[]>
+class MVE_VCMUL<string iname, string suffix, bit size, string cstr="">
: MVE_qDest_qSrc<iname, suffix, (outs MQPR:$Qd),
(ins MQPR:$Qn, MQPR:$Qm, complexrotateop:$rot),
- "$Qd, $Qn, $Qm, $rot", vpred_r, cstr, pattern> {
+ "$Qd, $Qn, $Qm, $rot", vpred_r, cstr, []> {
bits<4> Qn;
bits<2> rot;
@@ -3422,8 +4008,30 @@ class MVE_VCMUL<string iname, string suffix, bit size, string cstr="", list<dag>
let Predicates = [HasMVEFloat];
}
-def MVE_VCMULf16 : MVE_VCMUL<"vcmul", "f16", 0b0>;
-def MVE_VCMULf32 : MVE_VCMUL<"vcmul", "f32", 0b1, "@earlyclobber $Qd">;
+multiclass MVE_VCMUL_m<string iname, MVEVectorVTInfo VTI,
+ bit size, string cstr=""> {
+ def "" : MVE_VCMUL<iname, VTI.Suffix, size, cstr>;
+ defvar Inst = !cast<Instruction>(NAME);
+
+ let Predicates = [HasMVEFloat] in {
+ def : Pat<(VTI.Vec (int_arm_mve_vcmulq
+ imm:$rot, (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
+ imm:$rot))>;
+
+ def : Pat<(VTI.Vec (int_arm_mve_vcmulq_predicated
+ imm:$rot, (VTI.Vec MQPR:$inactive),
+ (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
+ (VTI.Pred VCCR:$mask))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
+ imm:$rot, ARMVCCThen, (VTI.Pred VCCR:$mask),
+ (VTI.Vec MQPR:$inactive)))>;
+
+ }
+}
+
+defm MVE_VCMULf16 : MVE_VCMUL_m<"vcmul", MVE_v8f16, 0b0>;
+defm MVE_VCMULf32 : MVE_VCMUL_m<"vcmul", MVE_v4f32, 0b1, "@earlyclobber $Qd">;
class MVE_VMULL<string iname, string suffix, bit bit_28, bits<2> bits_21_20,
bit T, string cstr, list<dag> pattern=[]>
@@ -3442,29 +4050,80 @@ class MVE_VMULL<string iname, string suffix, bit bit_28, bits<2> bits_21_20,
let Inst{8} = 0b0;
let Inst{7} = Qn{3};
let Inst{0} = 0b0;
+ let validForTailPredication = 1;
}
-multiclass MVE_VMULL_multi<string iname, string suffix,
- bit bit_28, bits<2> bits_21_20, string cstr=""> {
- def bh : MVE_VMULL<iname # "b", suffix, bit_28, bits_21_20, 0b0, cstr>;
- def th : MVE_VMULL<iname # "t", suffix, bit_28, bits_21_20, 0b1, cstr>;
+multiclass MVE_VMULL_m<MVEVectorVTInfo VTI,
+ SDNode unpred_op, Intrinsic pred_int,
+ bit Top, string cstr=""> {
+ def "" : MVE_VMULL<"vmull" # !if(Top, "t", "b"), VTI.Suffix, VTI.Unsigned,
+ VTI.Size, Top, cstr>;
+ defvar Inst = !cast<Instruction>(NAME);
+
+ let Predicates = [HasMVEInt] in {
+ defvar uflag = !if(!eq(VTI.SuffixLetter, "p"), (?), (? (i32 VTI.Unsigned)));
+
+ // Unpredicated multiply
+ def : Pat<(VTI.DblVec !con((unpred_op (VTI.Vec MQPR:$Qm),
+ (VTI.Vec MQPR:$Qn)),
+ uflag, (? (i32 Top)))),
+ (VTI.DblVec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
+
+ // Predicated multiply
+ def : Pat<(VTI.DblVec !con((pred_int (VTI.Vec MQPR:$Qm),
+ (VTI.Vec MQPR:$Qn)),
+ uflag, (? (i32 Top), (VTI.Pred VCCR:$mask),
+ (VTI.DblVec MQPR:$inactive)))),
+ (VTI.DblVec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+ ARMVCCThen, (VTI.Pred VCCR:$mask),
+ (VTI.DblVec MQPR:$inactive)))>;
+ }
}
-// For integer multiplies, bits 21:20 encode size, and bit 28 signedness.
-// For polynomial multiplies, bits 21:20 take the unused value 0b11, and
-// bit 28 switches to encoding the size.
-
-defm MVE_VMULLs8 : MVE_VMULL_multi<"vmull", "s8", 0b0, 0b00>;
-defm MVE_VMULLs16 : MVE_VMULL_multi<"vmull", "s16", 0b0, 0b01>;
-defm MVE_VMULLs32 : MVE_VMULL_multi<"vmull", "s32", 0b0, 0b10, "@earlyclobber $Qd">;
-defm MVE_VMULLu8 : MVE_VMULL_multi<"vmull", "u8", 0b1, 0b00>;
-defm MVE_VMULLu16 : MVE_VMULL_multi<"vmull", "u16", 0b1, 0b01>;
-defm MVE_VMULLu32 : MVE_VMULL_multi<"vmull", "u32", 0b1, 0b10, "@earlyclobber $Qd">;
-defm MVE_VMULLp8 : MVE_VMULL_multi<"vmull", "p8", 0b0, 0b11>;
-defm MVE_VMULLp16 : MVE_VMULL_multi<"vmull", "p16", 0b1, 0b11>;
-
-class MVE_VxMULH<string iname, string suffix, bit U, bits<2> size,
- bit round, list<dag> pattern=[]>
+// For polynomial multiplies, the size bits take the unused value 0b11, and
+// the unsigned bit switches to encoding the size.
+
+defm MVE_VMULLBs8 : MVE_VMULL_m<MVE_v16s8, int_arm_mve_vmull,
+ int_arm_mve_mull_int_predicated, 0b0>;
+defm MVE_VMULLTs8 : MVE_VMULL_m<MVE_v16s8, int_arm_mve_vmull,
+ int_arm_mve_mull_int_predicated, 0b1>;
+defm MVE_VMULLBs16 : MVE_VMULL_m<MVE_v8s16, int_arm_mve_vmull,
+ int_arm_mve_mull_int_predicated, 0b0>;
+defm MVE_VMULLTs16 : MVE_VMULL_m<MVE_v8s16, int_arm_mve_vmull,
+ int_arm_mve_mull_int_predicated, 0b1>;
+defm MVE_VMULLBs32 : MVE_VMULL_m<MVE_v4s32, int_arm_mve_vmull,
+ int_arm_mve_mull_int_predicated, 0b0,
+ "@earlyclobber $Qd">;
+defm MVE_VMULLTs32 : MVE_VMULL_m<MVE_v4s32, int_arm_mve_vmull,
+ int_arm_mve_mull_int_predicated, 0b1,
+ "@earlyclobber $Qd">;
+
+defm MVE_VMULLBu8 : MVE_VMULL_m<MVE_v16u8, int_arm_mve_vmull,
+ int_arm_mve_mull_int_predicated, 0b0>;
+defm MVE_VMULLTu8 : MVE_VMULL_m<MVE_v16u8, int_arm_mve_vmull,
+ int_arm_mve_mull_int_predicated, 0b1>;
+defm MVE_VMULLBu16 : MVE_VMULL_m<MVE_v8u16, int_arm_mve_vmull,
+ int_arm_mve_mull_int_predicated, 0b0>;
+defm MVE_VMULLTu16 : MVE_VMULL_m<MVE_v8u16, int_arm_mve_vmull,
+ int_arm_mve_mull_int_predicated, 0b1>;
+defm MVE_VMULLBu32 : MVE_VMULL_m<MVE_v4u32, int_arm_mve_vmull,
+ int_arm_mve_mull_int_predicated, 0b0,
+ "@earlyclobber $Qd">;
+defm MVE_VMULLTu32 : MVE_VMULL_m<MVE_v4u32, int_arm_mve_vmull,
+ int_arm_mve_mull_int_predicated, 0b1,
+ "@earlyclobber $Qd">;
+
+defm MVE_VMULLBp8 : MVE_VMULL_m<MVE_v16p8, int_arm_mve_vmull_poly,
+ int_arm_mve_mull_poly_predicated, 0b0>;
+defm MVE_VMULLTp8 : MVE_VMULL_m<MVE_v16p8, int_arm_mve_vmull_poly,
+ int_arm_mve_mull_poly_predicated, 0b1>;
+defm MVE_VMULLBp16 : MVE_VMULL_m<MVE_v8p16, int_arm_mve_vmull_poly,
+ int_arm_mve_mull_poly_predicated, 0b0>;
+defm MVE_VMULLTp16 : MVE_VMULL_m<MVE_v8p16, int_arm_mve_vmull_poly,
+ int_arm_mve_mull_poly_predicated, 0b1>;
+
+class MVE_VxMULH<string iname, string suffix, bit U, bits<2> size, bit round,
+ list<dag> pattern=[]>
: MVE_qDest_qSrc<iname, suffix, (outs MQPR:$Qd),
(ins MQPR:$Qn, MQPR:$Qm), "$Qd, $Qn, $Qm",
vpred_r, "", pattern> {
@@ -3480,19 +4139,46 @@ class MVE_VxMULH<string iname, string suffix, bit U, bits<2> size,
let Inst{0} = 0b1;
}
-def MVE_VMULHs8 : MVE_VxMULH<"vmulh", "s8", 0b0, 0b00, 0b0>;
-def MVE_VMULHs16 : MVE_VxMULH<"vmulh", "s16", 0b0, 0b01, 0b0>;
-def MVE_VMULHs32 : MVE_VxMULH<"vmulh", "s32", 0b0, 0b10, 0b0>;
-def MVE_VMULHu8 : MVE_VxMULH<"vmulh", "u8", 0b1, 0b00, 0b0>;
-def MVE_VMULHu16 : MVE_VxMULH<"vmulh", "u16", 0b1, 0b01, 0b0>;
-def MVE_VMULHu32 : MVE_VxMULH<"vmulh", "u32", 0b1, 0b10, 0b0>;
+multiclass MVE_VxMULH_m<string iname, MVEVectorVTInfo VTI, SDNode unpred_op,
+ Intrinsic pred_int, bit round> {
+ def "" : MVE_VxMULH<iname, VTI.Suffix, VTI.Unsigned, VTI.Size, round>;
+ defvar Inst = !cast<Instruction>(NAME);
+
+ let Predicates = [HasMVEInt] in {
+ // Unpredicated multiply returning high bits
+ def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+ (i32 VTI.Unsigned))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
+
+ // Predicated multiply returning high bits
+ def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+ (i32 VTI.Unsigned), (VTI.Pred VCCR:$mask),
+ (VTI.Vec MQPR:$inactive))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+ ARMVCCThen, (VTI.Pred VCCR:$mask),
+ (VTI.Vec MQPR:$inactive)))>;
+ }
+}
+
+multiclass MVE_VMULT<string iname, MVEVectorVTInfo VTI, bit round>
+ : MVE_VxMULH_m<iname, VTI, !if(round, int_arm_mve_vrmulh, int_arm_mve_vmulh),
+ !if(round, int_arm_mve_rmulh_predicated,
+ int_arm_mve_mulh_predicated),
+ round>;
+
+defm MVE_VMULHs8 : MVE_VMULT<"vmulh", MVE_v16s8, 0b0>;
+defm MVE_VMULHs16 : MVE_VMULT<"vmulh", MVE_v8s16, 0b0>;
+defm MVE_VMULHs32 : MVE_VMULT<"vmulh", MVE_v4s32, 0b0>;
+defm MVE_VMULHu8 : MVE_VMULT<"vmulh", MVE_v16u8, 0b0>;
+defm MVE_VMULHu16 : MVE_VMULT<"vmulh", MVE_v8u16, 0b0>;
+defm MVE_VMULHu32 : MVE_VMULT<"vmulh", MVE_v4u32, 0b0>;
-def MVE_VRMULHs8 : MVE_VxMULH<"vrmulh", "s8", 0b0, 0b00, 0b1>;
-def MVE_VRMULHs16 : MVE_VxMULH<"vrmulh", "s16", 0b0, 0b01, 0b1>;
-def MVE_VRMULHs32 : MVE_VxMULH<"vrmulh", "s32", 0b0, 0b10, 0b1>;
-def MVE_VRMULHu8 : MVE_VxMULH<"vrmulh", "u8", 0b1, 0b00, 0b1>;
-def MVE_VRMULHu16 : MVE_VxMULH<"vrmulh", "u16", 0b1, 0b01, 0b1>;
-def MVE_VRMULHu32 : MVE_VxMULH<"vrmulh", "u32", 0b1, 0b10, 0b1>;
+defm MVE_VRMULHs8 : MVE_VMULT<"vrmulh", MVE_v16s8, 0b1>;
+defm MVE_VRMULHs16 : MVE_VMULT<"vrmulh", MVE_v8s16, 0b1>;
+defm MVE_VRMULHs32 : MVE_VMULT<"vrmulh", MVE_v4s32, 0b1>;
+defm MVE_VRMULHu8 : MVE_VMULT<"vrmulh", MVE_v16u8, 0b1>;
+defm MVE_VRMULHu16 : MVE_VMULT<"vrmulh", MVE_v8u16, 0b1>;
+defm MVE_VRMULHu32 : MVE_VMULT<"vrmulh", MVE_v4u32, 0b1>;
class MVE_VxMOVxN<string iname, string suffix, bit bit_28, bit bit_17,
bits<2> size, bit T, list<dag> pattern=[]>
@@ -3551,19 +4237,36 @@ class MVE_VCVT_ff<string iname, string suffix, bit op, bit T,
let Predicates = [HasMVEFloat];
}
-multiclass MVE_VCVT_ff_halves<string suffix, bit op> {
- def bh : MVE_VCVT_ff<"vcvtb", suffix, op, 0b0>;
- def th : MVE_VCVT_ff<"vcvtt", suffix, op, 0b1>;
+multiclass MVE_VCVT_f2h_m<string iname, int half> {
+ def "": MVE_VCVT_ff<iname, "f16.f32", 0b0, half>;
+ defvar Inst = !cast<Instruction>(NAME);
+
+ let Predicates = [HasMVEFloat] in {
+ def : Pat<(v8f16 (int_arm_mve_vcvt_narrow
+ (v8f16 MQPR:$Qd_src), (v4f32 MQPR:$Qm), (i32 half))),
+ (v8f16 (Inst (v8f16 MQPR:$Qd_src), (v4f32 MQPR:$Qm)))>;
+ def : Pat<(v8f16 (int_arm_mve_vcvt_narrow_predicated
+ (v8f16 MQPR:$Qd_src), (v4f32 MQPR:$Qm), (i32 half),
+ (v4i1 VCCR:$mask))),
+ (v8f16 (Inst (v8f16 MQPR:$Qd_src), (v4f32 MQPR:$Qm),
+ ARMVCCThen, (v4i1 VCCR:$mask)))>;
+ }
}
-defm MVE_VCVTf16f32 : MVE_VCVT_ff_halves<"f16.f32", 0b0>;
-defm MVE_VCVTf32f16 : MVE_VCVT_ff_halves<"f32.f16", 0b1>;
+multiclass MVE_VCVT_h2f_m<string iname, int half> {
+ def "": MVE_VCVT_ff<iname, "f32.f16", 0b1, half>;
+}
+
+defm MVE_VCVTf16f32bh : MVE_VCVT_f2h_m<"vcvtb", 0b0>;
+defm MVE_VCVTf16f32th : MVE_VCVT_f2h_m<"vcvtt", 0b1>;
+defm MVE_VCVTf32f16bh : MVE_VCVT_h2f_m<"vcvtb", 0b0>;
+defm MVE_VCVTf32f16th : MVE_VCVT_h2f_m<"vcvtt", 0b1>;
class MVE_VxCADD<string iname, string suffix, bits<2> size, bit halve,
- string cstr="", list<dag> pattern=[]>
+ string cstr="">
: MVE_qDest_qSrc<iname, suffix, (outs MQPR:$Qd),
(ins MQPR:$Qn, MQPR:$Qm, complexrotateopodd:$rot),
- "$Qd, $Qn, $Qm, $rot", vpred_r, cstr, pattern> {
+ "$Qd, $Qn, $Qm, $rot", vpred_r, cstr, []> {
bits<4> Qn;
bit rot;
@@ -3577,13 +4280,35 @@ class MVE_VxCADD<string iname, string suffix, bits<2> size, bit halve,
let Inst{0} = 0b0;
}
-def MVE_VCADDi8 : MVE_VxCADD<"vcadd", "i8", 0b00, 0b1>;
-def MVE_VCADDi16 : MVE_VxCADD<"vcadd", "i16", 0b01, 0b1>;
-def MVE_VCADDi32 : MVE_VxCADD<"vcadd", "i32", 0b10, 0b1, "@earlyclobber $Qd">;
+multiclass MVE_VxCADD_m<string iname, MVEVectorVTInfo VTI,
+ bit halve, string cstr=""> {
+ def "" : MVE_VxCADD<iname, VTI.Suffix, VTI.Size, halve, cstr>;
+ defvar Inst = !cast<Instruction>(NAME);
+
+ let Predicates = [HasMVEInt] in {
+ def : Pat<(VTI.Vec (int_arm_mve_vcaddq halve,
+ imm:$rot, (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
+ imm:$rot))>;
+
+ def : Pat<(VTI.Vec (int_arm_mve_vcaddq_predicated halve,
+ imm:$rot, (VTI.Vec MQPR:$inactive),
+ (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
+ (VTI.Pred VCCR:$mask))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
+ imm:$rot, ARMVCCThen, (VTI.Pred VCCR:$mask),
+ (VTI.Vec MQPR:$inactive)))>;
+
+ }
+}
+
+defm MVE_VCADDi8 : MVE_VxCADD_m<"vcadd", MVE_v16i8, 0b1>;
+defm MVE_VCADDi16 : MVE_VxCADD_m<"vcadd", MVE_v8i16, 0b1>;
+defm MVE_VCADDi32 : MVE_VxCADD_m<"vcadd", MVE_v4i32, 0b1, "@earlyclobber $Qd">;
-def MVE_VHCADDs8 : MVE_VxCADD<"vhcadd", "s8", 0b00, 0b0>;
-def MVE_VHCADDs16 : MVE_VxCADD<"vhcadd", "s16", 0b01, 0b0>;
-def MVE_VHCADDs32 : MVE_VxCADD<"vhcadd", "s32", 0b10, 0b0, "@earlyclobber $Qd">;
+defm MVE_VHCADDs8 : MVE_VxCADD_m<"vhcadd", MVE_v16s8, 0b0>;
+defm MVE_VHCADDs16 : MVE_VxCADD_m<"vhcadd", MVE_v8s16, 0b0>;
+defm MVE_VHCADDs32 : MVE_VxCADD_m<"vhcadd", MVE_v4s32, 0b0, "@earlyclobber $Qd">;
class MVE_VADCSBC<string iname, bit I, bit subtract,
dag carryin, list<dag> pattern=[]>
@@ -3627,6 +4352,7 @@ class MVE_VQDMULL<string iname, string suffix, bit size, bit T,
let Inst{8} = 0b1;
let Inst{7} = Qn{3};
let Inst{0} = 0b1;
+ let validForTailPredication = 1;
}
multiclass MVE_VQDMULL_halves<string suffix, bit size, string cstr=""> {
@@ -3742,6 +4468,7 @@ class MVE_VQDMULL_qr<string iname, string suffix, bit size,
let Inst{12} = T;
let Inst{8} = 0b1;
let Inst{5} = 0b1;
+ let validForTailPredication = 1;
}
multiclass MVE_VQDMULL_qr_halves<string suffix, bit size, string cstr=""> {
@@ -3804,13 +4531,30 @@ class MVE_VxSHL_qr<string iname, string suffix, bit U, bits<2> size,
let validForTailPredication = 1;
}
+multiclass MVE_VxSHL_qr_p<string iname, MVEVectorVTInfo VTI, bit q, bit r> {
+ def "" : MVE_VxSHL_qr<iname, VTI.Suffix, VTI.Unsigned, VTI.Size, q, r>;
+ defvar Inst = !cast<Instruction>(NAME);
+
+ def : Pat<(VTI.Vec (int_arm_mve_vshl_scalar
+ (VTI.Vec MQPR:$in), (i32 rGPR:$sh),
+ (i32 q), (i32 r), (i32 VTI.Unsigned))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$in), (i32 rGPR:$sh)))>;
+
+ def : Pat<(VTI.Vec (int_arm_mve_vshl_scalar_predicated
+ (VTI.Vec MQPR:$in), (i32 rGPR:$sh),
+ (i32 q), (i32 r), (i32 VTI.Unsigned),
+ (VTI.Pred VCCR:$mask))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$in), (i32 rGPR:$sh),
+ ARMVCCThen, (VTI.Pred VCCR:$mask)))>;
+}
+
multiclass MVE_VxSHL_qr_types<string iname, bit bit_7, bit bit_17> {
- def s8 : MVE_VxSHL_qr<iname, "s8", 0b0, 0b00, bit_7, bit_17>;
- def s16 : MVE_VxSHL_qr<iname, "s16", 0b0, 0b01, bit_7, bit_17>;
- def s32 : MVE_VxSHL_qr<iname, "s32", 0b0, 0b10, bit_7, bit_17>;
- def u8 : MVE_VxSHL_qr<iname, "u8", 0b1, 0b00, bit_7, bit_17>;
- def u16 : MVE_VxSHL_qr<iname, "u16", 0b1, 0b01, bit_7, bit_17>;
- def u32 : MVE_VxSHL_qr<iname, "u32", 0b1, 0b10, bit_7, bit_17>;
+ defm s8 : MVE_VxSHL_qr_p<iname, MVE_v16s8, bit_7, bit_17>;
+ defm s16 : MVE_VxSHL_qr_p<iname, MVE_v8s16, bit_7, bit_17>;
+ defm s32 : MVE_VxSHL_qr_p<iname, MVE_v4s32, bit_7, bit_17>;
+ defm u8 : MVE_VxSHL_qr_p<iname, MVE_v16u8, bit_7, bit_17>;
+ defm u16 : MVE_VxSHL_qr_p<iname, MVE_v8u16, bit_7, bit_17>;
+ defm u32 : MVE_VxSHL_qr_p<iname, MVE_v4u32, bit_7, bit_17>;
}
defm MVE_VSHL_qr : MVE_VxSHL_qr_types<"vshl", 0b0, 0b0>;
@@ -4054,7 +4798,7 @@ def MVE_VDWDUPu16 : MVE_VxWDUP<"vdwdup", "u16", 0b01, 0b1>;
def MVE_VDWDUPu32 : MVE_VxWDUP<"vdwdup", "u32", 0b10, 0b1>;
let hasSideEffects = 1 in
-class MVE_VCTP<string suffix, bits<2> size, list<dag> pattern=[]>
+class MVE_VCTPInst<string suffix, bits<2> size, list<dag> pattern=[]>
: MVE_p<(outs VCCR:$P0), (ins rGPR:$Rn), NoItinerary, "vctp", suffix,
"$Rn", vpred_n, "", pattern> {
bits<4> Rn;
@@ -4072,20 +4816,22 @@ class MVE_VCTP<string suffix, bits<2> size, list<dag> pattern=[]>
let validForTailPredication = 1;
}
-def MVE_VCTP8 : MVE_VCTP<"8", 0b00>;
-def MVE_VCTP16 : MVE_VCTP<"16", 0b01>;
-def MVE_VCTP32 : MVE_VCTP<"32", 0b10>;
-def MVE_VCTP64 : MVE_VCTP<"64", 0b11>;
+multiclass MVE_VCTP<MVEVectorVTInfo VTI, Intrinsic intr> {
+ def "": MVE_VCTPInst<VTI.BitsSuffix, VTI.Size>;
+ defvar Inst = !cast<Instruction>(NAME);
-let Predicates = [HasMVEInt] in {
- def : Pat<(int_arm_vctp8 rGPR:$Rn),
- (v16i1 (MVE_VCTP8 rGPR:$Rn))>;
- def : Pat<(int_arm_vctp16 rGPR:$Rn),
- (v8i1 (MVE_VCTP16 rGPR:$Rn))>;
- def : Pat<(int_arm_vctp32 rGPR:$Rn),
- (v4i1 (MVE_VCTP32 rGPR:$Rn))>;
+ let Predicates = [HasMVEInt] in {
+ def : Pat<(intr rGPR:$Rn), (VTI.Pred (Inst rGPR:$Rn))>;
+ def : Pat<(and (intr rGPR:$Rn), (VTI.Pred VCCR:$mask)),
+ (VTI.Pred (Inst rGPR:$Rn, ARMVCCThen, VCCR:$mask))>;
+ }
}
+defm MVE_VCTP8 : MVE_VCTP<MVE_v16i8, int_arm_mve_vctp8>;
+defm MVE_VCTP16 : MVE_VCTP<MVE_v8i16, int_arm_mve_vctp16>;
+defm MVE_VCTP32 : MVE_VCTP<MVE_v4i32, int_arm_mve_vctp32>;
+defm MVE_VCTP64 : MVE_VCTP<MVE_v2i64, int_arm_mve_vctp64>;
+
// end of mve_qDest_rSrc
// start of coproc mov
@@ -4258,6 +5004,29 @@ foreach wb = [MVE_vldst24_writeback<
"vst" # n.nvecs # stage # "." # s.lanesize>;
}
+multiclass MVE_vst24_patterns<int lanesize, ValueType VT> {
+ foreach stage = [0,1] in
+ def : Pat<(int_arm_mve_vst2q i32:$addr,
+ (VT MQPR:$v0), (VT MQPR:$v1), (i32 stage)),
+ (!cast<Instruction>("MVE_VST2"#stage#"_"#lanesize)
+ (REG_SEQUENCE QQPR, VT:$v0, qsub_0, VT:$v1, qsub_1),
+ t2_addr_offset_none:$addr)>;
+
+ foreach stage = [0,1,2,3] in
+ def : Pat<(int_arm_mve_vst4q i32:$addr,
+ (VT MQPR:$v0), (VT MQPR:$v1),
+ (VT MQPR:$v2), (VT MQPR:$v3), (i32 stage)),
+ (!cast<Instruction>("MVE_VST4"#stage#"_"#lanesize)
+ (REG_SEQUENCE QQQQPR, VT:$v0, qsub_0, VT:$v1, qsub_1,
+ VT:$v2, qsub_2, VT:$v3, qsub_3),
+ t2_addr_offset_none:$addr)>;
+}
+defm : MVE_vst24_patterns<8, v16i8>;
+defm : MVE_vst24_patterns<16, v8i16>;
+defm : MVE_vst24_patterns<32, v4i32>;
+defm : MVE_vst24_patterns<16, v8f16>;
+defm : MVE_vst24_patterns<32, v4f32>;
+
// end of MVE interleaving load/store
// start of MVE predicable load/store
@@ -4513,28 +5282,90 @@ class MVE_VLDRSTR_rq_b<MVE_ldst_direction dir, MVE_memsz memsz,
string asm, string suffix, bit U, bits<2> size>
: MVE_VLDRSTR_rq<dir, memsz, U, size, 0, asm, suffix, 0>;
+// Multiclasses wrapping that to add ISel patterns for intrinsics.
+multiclass MVE_VLDR_rq_w<MVE_memsz memsz, list<MVEVectorVTInfo> VTIs> {
+ defm "": MVE_VLDRSTR_rq_w<MVE_ld, memsz, "vldr" # memsz.MnemonicLetter,
+ VTIs[0].Suffix, VTIs[0].Unsigned, VTIs[0].Size>;
+ defvar Inst = !cast<Instruction>(NAME);
+ defvar InstU = !cast<Instruction>(NAME # "_u");
+
+ foreach VTI = VTIs in
+ foreach UnsignedFlag = !if(!eq(VTI.Size, memsz.encoding),
+ [0,1], [VTI.Unsigned]) in {
+ def : Pat<(VTI.Vec (int_arm_mve_vldr_gather_offset GPR:$base, (VTIs[0].Vec MQPR:$offsets), memsz.TypeBits, 0, UnsignedFlag)),
+ (VTI.Vec (InstU GPR:$base, MQPR:$offsets))>;
+ def : Pat<(VTI.Vec (int_arm_mve_vldr_gather_offset GPR:$base, (VTIs[0].Vec MQPR:$offsets), memsz.TypeBits, memsz.shift, UnsignedFlag)),
+ (VTI.Vec (Inst GPR:$base, MQPR:$offsets))>;
+ def : Pat<(VTI.Vec (int_arm_mve_vldr_gather_offset_predicated GPR:$base, (VTIs[0].Vec MQPR:$offsets), memsz.TypeBits, 0, UnsignedFlag, (VTI.Pred VCCR:$pred))),
+ (VTI.Vec (InstU GPR:$base, MQPR:$offsets, ARMVCCThen, VCCR:$pred))>;
+ def : Pat<(VTI.Vec (int_arm_mve_vldr_gather_offset_predicated GPR:$base, (VTIs[0].Vec MQPR:$offsets), memsz.TypeBits, memsz.shift, UnsignedFlag, (VTI.Pred VCCR:$pred))),
+ (VTI.Vec (Inst GPR:$base, MQPR:$offsets, ARMVCCThen, VCCR:$pred))>;
+ }
+}
+multiclass MVE_VLDR_rq_b<list<MVEVectorVTInfo> VTIs> {
+ def "": MVE_VLDRSTR_rq_b<MVE_ld, MVE_memB, "vldrb",
+ VTIs[0].Suffix, VTIs[0].Unsigned, VTIs[0].Size>;
+ defvar Inst = !cast<Instruction>(NAME);
+
+ foreach VTI = VTIs in {
+ def : Pat<(VTI.Vec (int_arm_mve_vldr_gather_offset GPR:$base, (VTIs[0].Vec MQPR:$offsets), 8, 0, VTI.Unsigned)),
+ (VTI.Vec (Inst GPR:$base, MQPR:$offsets))>;
+ def : Pat<(VTI.Vec (int_arm_mve_vldr_gather_offset_predicated GPR:$base, (VTIs[0].Vec MQPR:$offsets), 8, 0, VTI.Unsigned, (VTI.Pred VCCR:$pred))),
+ (VTI.Vec (Inst GPR:$base, MQPR:$offsets, ARMVCCThen, VCCR:$pred))>;
+ }
+}
+multiclass MVE_VSTR_rq_w<MVE_memsz memsz, list<MVEVectorVTInfo> VTIs> {
+ defm "": MVE_VLDRSTR_rq_w<MVE_st, memsz, "vstr" # memsz.MnemonicLetter,
+ VTIs[0].BitsSuffix, 0, VTIs[0].Size>;
+ defvar Inst = !cast<Instruction>(NAME);
+ defvar InstU = !cast<Instruction>(NAME # "_u");
+
+ foreach VTI = VTIs in {
+ def : Pat<(int_arm_mve_vstr_scatter_offset GPR:$base, (VTIs[0].Vec MQPR:$offsets), (VTI.Vec MQPR:$data), memsz.TypeBits, 0),
+ (InstU MQPR:$data, GPR:$base, MQPR:$offsets)>;
+ def : Pat<(int_arm_mve_vstr_scatter_offset GPR:$base, (VTIs[0].Vec MQPR:$offsets), (VTI.Vec MQPR:$data), memsz.TypeBits, memsz.shift),
+ (Inst MQPR:$data, GPR:$base, MQPR:$offsets)>;
+ def : Pat<(int_arm_mve_vstr_scatter_offset_predicated GPR:$base, (VTIs[0].Vec MQPR:$offsets), (VTI.Vec MQPR:$data), memsz.TypeBits, 0, (VTI.Pred VCCR:$pred)),
+ (InstU MQPR:$data, GPR:$base, MQPR:$offsets, ARMVCCThen, VCCR:$pred)>;
+ def : Pat<(int_arm_mve_vstr_scatter_offset_predicated GPR:$base, (VTIs[0].Vec MQPR:$offsets), (VTI.Vec MQPR:$data), memsz.TypeBits, memsz.shift, (VTI.Pred VCCR:$pred)),
+ (Inst MQPR:$data, GPR:$base, MQPR:$offsets, ARMVCCThen, VCCR:$pred)>;
+ }
+}
+multiclass MVE_VSTR_rq_b<list<MVEVectorVTInfo> VTIs> {
+ def "": MVE_VLDRSTR_rq_b<MVE_st, MVE_memB, "vstrb",
+ VTIs[0].BitsSuffix, 0, VTIs[0].Size>;
+ defvar Inst = !cast<Instruction>(NAME);
+
+ foreach VTI = VTIs in {
+ def : Pat<(int_arm_mve_vstr_scatter_offset GPR:$base, (VTIs[0].Vec MQPR:$offsets), (VTI.Vec MQPR:$data), 8, 0),
+ (Inst MQPR:$data, GPR:$base, MQPR:$offsets)>;
+ def : Pat<(int_arm_mve_vstr_scatter_offset_predicated GPR:$base, (VTIs[0].Vec MQPR:$offsets), (VTI.Vec MQPR:$data), 8, 0, (VTI.Pred VCCR:$pred)),
+ (Inst MQPR:$data, GPR:$base, MQPR:$offsets, ARMVCCThen, VCCR:$pred)>;
+ }
+}
+
// Actually define all the loads and stores in this family.
-def MVE_VLDRBU8_rq : MVE_VLDRSTR_rq_b<MVE_ld, MVE_memB, "vldrb","u8", 1,0b00>;
-def MVE_VLDRBU16_rq: MVE_VLDRSTR_rq_b<MVE_ld, MVE_memB, "vldrb","u16", 1,0b01>;
-def MVE_VLDRBS16_rq: MVE_VLDRSTR_rq_b<MVE_ld, MVE_memB, "vldrb","s16", 0,0b01>;
-def MVE_VLDRBU32_rq: MVE_VLDRSTR_rq_b<MVE_ld, MVE_memB, "vldrb","u32", 1,0b10>;
-def MVE_VLDRBS32_rq: MVE_VLDRSTR_rq_b<MVE_ld, MVE_memB, "vldrb","s32", 0,0b10>;
+defm MVE_VLDRBU8_rq : MVE_VLDR_rq_b<[MVE_v16u8,MVE_v16s8]>;
+defm MVE_VLDRBU16_rq: MVE_VLDR_rq_b<[MVE_v8u16]>;
+defm MVE_VLDRBS16_rq: MVE_VLDR_rq_b<[MVE_v8s16]>;
+defm MVE_VLDRBU32_rq: MVE_VLDR_rq_b<[MVE_v4u32]>;
+defm MVE_VLDRBS32_rq: MVE_VLDR_rq_b<[MVE_v4s32]>;
-defm MVE_VLDRHU16_rq: MVE_VLDRSTR_rq_w<MVE_ld, MVE_memH, "vldrh","u16", 1,0b01>;
-defm MVE_VLDRHU32_rq: MVE_VLDRSTR_rq_w<MVE_ld, MVE_memH, "vldrh","u32", 1,0b10>;
-defm MVE_VLDRHS32_rq: MVE_VLDRSTR_rq_w<MVE_ld, MVE_memH, "vldrh","s32", 0,0b10>;
-defm MVE_VLDRWU32_rq: MVE_VLDRSTR_rq_w<MVE_ld, MVE_memW, "vldrw","u32", 1,0b10>;
-defm MVE_VLDRDU64_rq: MVE_VLDRSTR_rq_w<MVE_ld, MVE_memD, "vldrd","u64", 1,0b11>;
+defm MVE_VLDRHU16_rq: MVE_VLDR_rq_w<MVE_memH, [MVE_v8u16,MVE_v8s16,MVE_v8f16]>;
+defm MVE_VLDRHU32_rq: MVE_VLDR_rq_w<MVE_memH, [MVE_v4u32]>;
+defm MVE_VLDRHS32_rq: MVE_VLDR_rq_w<MVE_memH, [MVE_v4s32]>;
+defm MVE_VLDRWU32_rq: MVE_VLDR_rq_w<MVE_memW, [MVE_v4u32,MVE_v4s32,MVE_v4f32]>;
+defm MVE_VLDRDU64_rq: MVE_VLDR_rq_w<MVE_memD, [MVE_v2u64,MVE_v2s64]>;
-def MVE_VSTRB8_rq : MVE_VLDRSTR_rq_b<MVE_st, MVE_memB, "vstrb","8", 0,0b00>;
-def MVE_VSTRB16_rq : MVE_VLDRSTR_rq_b<MVE_st, MVE_memB, "vstrb","16", 0,0b01>;
-def MVE_VSTRB32_rq : MVE_VLDRSTR_rq_b<MVE_st, MVE_memB, "vstrb","32", 0,0b10>;
+defm MVE_VSTRB8_rq : MVE_VSTR_rq_b<[MVE_v16i8]>;
+defm MVE_VSTRB16_rq : MVE_VSTR_rq_b<[MVE_v8i16]>;
+defm MVE_VSTRB32_rq : MVE_VSTR_rq_b<[MVE_v4i32]>;
-defm MVE_VSTRH16_rq : MVE_VLDRSTR_rq_w<MVE_st, MVE_memH, "vstrh","16", 0,0b01>;
-defm MVE_VSTRH32_rq : MVE_VLDRSTR_rq_w<MVE_st, MVE_memH, "vstrh","32", 0,0b10>;
-defm MVE_VSTRW32_rq : MVE_VLDRSTR_rq_w<MVE_st, MVE_memW, "vstrw","32", 0,0b10>;
-defm MVE_VSTRD64_rq : MVE_VLDRSTR_rq_w<MVE_st, MVE_memD, "vstrd","64", 0,0b11>;
+defm MVE_VSTRH16_rq : MVE_VSTR_rq_w<MVE_memH, [MVE_v8i16,MVE_v8f16]>;
+defm MVE_VSTRH32_rq : MVE_VSTR_rq_w<MVE_memH, [MVE_v4i32]>;
+defm MVE_VSTRW32_rq : MVE_VSTR_rq_w<MVE_memW, [MVE_v4i32,MVE_v4f32]>;
+defm MVE_VSTRD64_rq : MVE_VSTR_rq_w<MVE_memD, [MVE_v2i64]>;
// Gather loads / scatter stores whose address operand is of the form
// [Qm,#imm], i.e. a vector containing a full base address for each
@@ -4573,11 +5404,58 @@ multiclass MVE_VLDRSTR_qi_m<MVE_ldst_direction dir, MVE_memsz memsz,
}
}
+// Multiclasses wrapping that one, adding selection patterns for the
+// non-writeback loads and all the stores. (The writeback loads must
+// deliver multiple output values, so they have to be selected by C++
+// code.)
+multiclass MVE_VLDR_qi<MVE_memsz memsz, MVEVectorVTInfo AVTI,
+ list<MVEVectorVTInfo> DVTIs> {
+ defm "" : MVE_VLDRSTR_qi_m<MVE_ld, memsz, "vldr" # memsz.MnemonicLetter,
+ "u" # memsz.TypeBits>;
+ defvar Inst = !cast<Instruction>(NAME);
+
+ foreach DVTI = DVTIs in {
+ def : Pat<(DVTI.Vec (int_arm_mve_vldr_gather_base
+ (AVTI.Vec MQPR:$addr), (i32 imm:$offset))),
+ (DVTI.Vec (Inst (AVTI.Vec MQPR:$addr), (i32 imm:$offset)))>;
+ def : Pat<(DVTI.Vec (int_arm_mve_vldr_gather_base_predicated
+ (AVTI.Vec MQPR:$addr), (i32 imm:$offset), (AVTI.Pred VCCR:$pred))),
+ (DVTI.Vec (Inst (AVTI.Vec MQPR:$addr), (i32 imm:$offset),
+ ARMVCCThen, VCCR:$pred))>;
+ }
+}
+multiclass MVE_VSTR_qi<MVE_memsz memsz, MVEVectorVTInfo AVTI,
+ list<MVEVectorVTInfo> DVTIs> {
+ defm "" : MVE_VLDRSTR_qi_m<MVE_st, memsz, "vstr" # memsz.MnemonicLetter,
+ !cast<string>(memsz.TypeBits)>;
+ defvar Inst = !cast<Instruction>(NAME);
+ defvar InstPre = !cast<Instruction>(NAME # "_pre");
+
+ foreach DVTI = DVTIs in {
+ def : Pat<(int_arm_mve_vstr_scatter_base
+ (AVTI.Vec MQPR:$addr), (i32 imm:$offset), (DVTI.Vec MQPR:$data)),
+ (Inst (DVTI.Vec MQPR:$data), (AVTI.Vec MQPR:$addr),
+ (i32 imm:$offset))>;
+ def : Pat<(int_arm_mve_vstr_scatter_base_predicated
+ (AVTI.Vec MQPR:$addr), (i32 imm:$offset), (DVTI.Vec MQPR:$data), (AVTI.Pred VCCR:$pred)),
+ (Inst (DVTI.Vec MQPR:$data), (AVTI.Vec MQPR:$addr),
+ (i32 imm:$offset), ARMVCCThen, VCCR:$pred)>;
+ def : Pat<(AVTI.Vec (int_arm_mve_vstr_scatter_base_wb
+ (AVTI.Vec MQPR:$addr), (i32 imm:$offset), (DVTI.Vec MQPR:$data))),
+ (AVTI.Vec (InstPre (DVTI.Vec MQPR:$data), (AVTI.Vec MQPR:$addr),
+ (i32 imm:$offset)))>;
+ def : Pat<(AVTI.Vec (int_arm_mve_vstr_scatter_base_wb_predicated
+ (AVTI.Vec MQPR:$addr), (i32 imm:$offset), (DVTI.Vec MQPR:$data), (AVTI.Pred VCCR:$pred))),
+ (AVTI.Vec (InstPre (DVTI.Vec MQPR:$data), (AVTI.Vec MQPR:$addr),
+ (i32 imm:$offset), ARMVCCThen, VCCR:$pred))>;
+ }
+}
+
// Actual instruction definitions.
-defm MVE_VLDRWU32_qi: MVE_VLDRSTR_qi_m<MVE_ld, MVE_memW, "vldrw", "u32">;
-defm MVE_VLDRDU64_qi: MVE_VLDRSTR_qi_m<MVE_ld, MVE_memD, "vldrd", "u64">;
-defm MVE_VSTRW32_qi: MVE_VLDRSTR_qi_m<MVE_st, MVE_memW, "vstrw", "32">;
-defm MVE_VSTRD64_qi: MVE_VLDRSTR_qi_m<MVE_st, MVE_memD, "vstrd", "64">;
+defm MVE_VLDRWU32_qi: MVE_VLDR_qi<MVE_memW, MVE_v4i32, [MVE_v4i32,MVE_v4f32]>;
+defm MVE_VLDRDU64_qi: MVE_VLDR_qi<MVE_memD, MVE_v2i64, [MVE_v2i64,MVE_v2f64]>;
+defm MVE_VSTRW32_qi: MVE_VSTR_qi<MVE_memW, MVE_v4i32, [MVE_v4i32,MVE_v4f32]>;
+defm MVE_VSTRD64_qi: MVE_VSTR_qi<MVE_memD, MVE_v2i64, [MVE_v2i64,MVE_v2f64]>;
// Define aliases for all the instructions where memory size and
// vector lane size are the same. These are mnemonic aliases, so they
@@ -4595,21 +5473,21 @@ defm MVE_VSTRD64_qi: MVE_VLDRSTR_qi_m<MVE_st, MVE_memD, "vstrd", "64">;
foreach vpt_cond = ["", "t", "e"] in
foreach memsz = [MVE_memB, MVE_memH, MVE_memW, MVE_memD] in
foreach suffix = memsz.suffixes in {
+ // Define an alias with every suffix in the list, except for the one
+ // used by the real Instruction record (i.e. the one that all the
+ // rest are aliases *for*).
+
+ if !ne(suffix, memsz.CanonLoadSuffix) then {
+ def : MnemonicAlias<
+ "vldr" # memsz.MnemonicLetter # vpt_cond # suffix,
+ "vldr" # memsz.MnemonicLetter # vpt_cond # memsz.CanonLoadSuffix>;
+ }
- // These foreaches are conceptually ifs, implemented by iterating a
- // dummy variable over a list with 0 or 1 elements depending on the
- // condition. The idea is to iterate over _nearly_ all the suffixes
- // in memsz.suffixes, but omit the one we want all the others to alias.
-
- foreach _ = !if(!ne(suffix, memsz.CanonLoadSuffix), [1], []<int>) in
- def : MnemonicAlias<
- "vldr" # memsz.MnemonicLetter # vpt_cond # suffix,
- "vldr" # memsz.MnemonicLetter # vpt_cond # memsz.CanonLoadSuffix>;
-
- foreach _ = !if(!ne(suffix, memsz.CanonStoreSuffix), [1], []<int>) in
- def : MnemonicAlias<
- "vstr" # memsz.MnemonicLetter # vpt_cond # suffix,
- "vstr" # memsz.MnemonicLetter # vpt_cond # memsz.CanonStoreSuffix>;
+ if !ne(suffix, memsz.CanonStoreSuffix) then {
+ def : MnemonicAlias<
+ "vstr" # memsz.MnemonicLetter # vpt_cond # suffix,
+ "vstr" # memsz.MnemonicLetter # vpt_cond # memsz.CanonStoreSuffix>;
+ }
}
// end of MVE predicable load/store
@@ -4632,7 +5510,6 @@ class MVE_VPT<string suffix, bits<2> size, dag iops, string asm, list<dag> patte
let Inst{4} = 0b0;
let Defs = [VPR];
- let validForTailPredication = 1;
}
class MVE_VPTt1<string suffix, bits<2> size, dag iops>
@@ -4644,7 +5521,6 @@ class MVE_VPTt1<string suffix, bits<2> size, dag iops>
let Inst{5} = Qm{3};
let Inst{3-1} = Qm{2-0};
let Inst{0} = fc{1};
- let validForTailPredication = 1;
}
class MVE_VPTt1i<string suffix, bits<2> size>
@@ -4746,7 +5622,6 @@ class MVE_VPTf<string suffix, bit size, dag iops, string asm, list<dag> pattern=
let Defs = [VPR];
let Predicates = [HasMVEFloat];
- let validForTailPredication = 1;
}
class MVE_VPTft1<string suffix, bit size>
@@ -4816,7 +5691,6 @@ def MVE_VPSEL : MVE_p<(outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm), NoItinerary,
let Inst{4} = 0b0;
let Inst{3-1} = Qm{2-0};
let Inst{0} = 0b1;
- let validForTailPredication = 1;
}
foreach suffix = ["s8", "s16", "s32", "u8", "u16", "u32",
@@ -4826,87 +5700,87 @@ def : MVEInstAlias<"vpsel${vp}." # suffix # "\t$Qd, $Qn, $Qm",
let Predicates = [HasMVEInt] in {
def : Pat<(v16i8 (vselect (v16i1 VCCR:$pred), (v16i8 MQPR:$v1), (v16i8 MQPR:$v2))),
- (v16i8 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0, VCCR:$pred))>;
+ (v16i8 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone, VCCR:$pred))>;
def : Pat<(v8i16 (vselect (v8i1 VCCR:$pred), (v8i16 MQPR:$v1), (v8i16 MQPR:$v2))),
- (v8i16 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0, VCCR:$pred))>;
+ (v8i16 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone, VCCR:$pred))>;
def : Pat<(v4i32 (vselect (v4i1 VCCR:$pred), (v4i32 MQPR:$v1), (v4i32 MQPR:$v2))),
- (v4i32 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0, VCCR:$pred))>;
+ (v4i32 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone, VCCR:$pred))>;
def : Pat<(v8f16 (vselect (v8i1 VCCR:$pred), (v8f16 MQPR:$v1), (v8f16 MQPR:$v2))),
- (v8f16 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0, VCCR:$pred))>;
+ (v8f16 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone, VCCR:$pred))>;
def : Pat<(v4f32 (vselect (v4i1 VCCR:$pred), (v4f32 MQPR:$v1), (v4f32 MQPR:$v2))),
- (v4f32 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0, VCCR:$pred))>;
+ (v4f32 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone, VCCR:$pred))>;
def : Pat<(v16i8 (vselect (v16i8 MQPR:$pred), (v16i8 MQPR:$v1), (v16i8 MQPR:$v2))),
- (v16i8 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0,
- (MVE_VCMPi8 (v16i8 MQPR:$pred), (MVE_VMOVimmi8 0), 1)))>;
+ (v16i8 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone,
+ (MVE_VCMPi8 (v16i8 MQPR:$pred), (MVE_VMOVimmi8 0), ARMCCne)))>;
def : Pat<(v8i16 (vselect (v8i16 MQPR:$pred), (v8i16 MQPR:$v1), (v8i16 MQPR:$v2))),
- (v8i16 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0,
- (MVE_VCMPi16 (v8i16 MQPR:$pred), (MVE_VMOVimmi16 0), 1)))>;
+ (v8i16 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone,
+ (MVE_VCMPi16 (v8i16 MQPR:$pred), (MVE_VMOVimmi16 0), ARMCCne)))>;
def : Pat<(v4i32 (vselect (v4i32 MQPR:$pred), (v4i32 MQPR:$v1), (v4i32 MQPR:$v2))),
- (v4i32 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0,
- (MVE_VCMPi32 (v4i32 MQPR:$pred), (MVE_VMOVimmi32 0), 1)))>;
+ (v4i32 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone,
+ (MVE_VCMPi32 (v4i32 MQPR:$pred), (MVE_VMOVimmi32 0), ARMCCne)))>;
def : Pat<(v8f16 (vselect (v8i16 MQPR:$pred), (v8f16 MQPR:$v1), (v8f16 MQPR:$v2))),
- (v8f16 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0,
- (MVE_VCMPi16 (v8i16 MQPR:$pred), (MVE_VMOVimmi16 0), 1)))>;
+ (v8f16 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone,
+ (MVE_VCMPi16 (v8i16 MQPR:$pred), (MVE_VMOVimmi16 0), ARMCCne)))>;
def : Pat<(v4f32 (vselect (v4i32 MQPR:$pred), (v4f32 MQPR:$v1), (v4f32 MQPR:$v2))),
- (v4f32 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0,
- (MVE_VCMPi32 (v4i32 MQPR:$pred), (MVE_VMOVimmi32 0), 1)))>;
+ (v4f32 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone,
+ (MVE_VCMPi32 (v4i32 MQPR:$pred), (MVE_VMOVimmi32 0), ARMCCne)))>;
// Pred <-> Int
def : Pat<(v16i8 (zext (v16i1 VCCR:$pred))),
- (v16i8 (MVE_VPSEL (MVE_VMOVimmi8 1), (MVE_VMOVimmi8 0), 0, VCCR:$pred))>;
+ (v16i8 (MVE_VPSEL (MVE_VMOVimmi8 1), (MVE_VMOVimmi8 0), ARMVCCNone, VCCR:$pred))>;
def : Pat<(v8i16 (zext (v8i1 VCCR:$pred))),
- (v8i16 (MVE_VPSEL (MVE_VMOVimmi16 1), (MVE_VMOVimmi16 0), 0, VCCR:$pred))>;
+ (v8i16 (MVE_VPSEL (MVE_VMOVimmi16 1), (MVE_VMOVimmi16 0), ARMVCCNone, VCCR:$pred))>;
def : Pat<(v4i32 (zext (v4i1 VCCR:$pred))),
- (v4i32 (MVE_VPSEL (MVE_VMOVimmi32 1), (MVE_VMOVimmi32 0), 0, VCCR:$pred))>;
+ (v4i32 (MVE_VPSEL (MVE_VMOVimmi32 1), (MVE_VMOVimmi32 0), ARMVCCNone, VCCR:$pred))>;
def : Pat<(v16i8 (sext (v16i1 VCCR:$pred))),
- (v16i8 (MVE_VPSEL (MVE_VMOVimmi8 255), (MVE_VMOVimmi8 0), 0, VCCR:$pred))>;
+ (v16i8 (MVE_VPSEL (MVE_VMOVimmi8 255), (MVE_VMOVimmi8 0), ARMVCCNone, VCCR:$pred))>;
def : Pat<(v8i16 (sext (v8i1 VCCR:$pred))),
- (v8i16 (MVE_VPSEL (MVE_VMOVimmi8 255), (MVE_VMOVimmi16 0), 0, VCCR:$pred))>;
+ (v8i16 (MVE_VPSEL (MVE_VMOVimmi8 255), (MVE_VMOVimmi16 0), ARMVCCNone, VCCR:$pred))>;
def : Pat<(v4i32 (sext (v4i1 VCCR:$pred))),
- (v4i32 (MVE_VPSEL (MVE_VMOVimmi8 255), (MVE_VMOVimmi32 0), 0, VCCR:$pred))>;
+ (v4i32 (MVE_VPSEL (MVE_VMOVimmi8 255), (MVE_VMOVimmi32 0), ARMVCCNone, VCCR:$pred))>;
def : Pat<(v16i8 (anyext (v16i1 VCCR:$pred))),
- (v16i8 (MVE_VPSEL (MVE_VMOVimmi8 1), (MVE_VMOVimmi8 0), 0, VCCR:$pred))>;
+ (v16i8 (MVE_VPSEL (MVE_VMOVimmi8 1), (MVE_VMOVimmi8 0), ARMVCCNone, VCCR:$pred))>;
def : Pat<(v8i16 (anyext (v8i1 VCCR:$pred))),
- (v8i16 (MVE_VPSEL (MVE_VMOVimmi16 1), (MVE_VMOVimmi16 0), 0, VCCR:$pred))>;
+ (v8i16 (MVE_VPSEL (MVE_VMOVimmi16 1), (MVE_VMOVimmi16 0), ARMVCCNone, VCCR:$pred))>;
def : Pat<(v4i32 (anyext (v4i1 VCCR:$pred))),
- (v4i32 (MVE_VPSEL (MVE_VMOVimmi32 1), (MVE_VMOVimmi32 0), 0, VCCR:$pred))>;
+ (v4i32 (MVE_VPSEL (MVE_VMOVimmi32 1), (MVE_VMOVimmi32 0), ARMVCCNone, VCCR:$pred))>;
def : Pat<(v16i1 (trunc (v16i8 MQPR:$v1))),
- (v16i1 (MVE_VCMPi32r (v16i8 MQPR:$v1), ZR, 1))>;
+ (v16i1 (MVE_VCMPi32r (v16i8 MQPR:$v1), ZR, ARMCCne))>;
def : Pat<(v8i1 (trunc (v8i16 MQPR:$v1))),
- (v8i1 (MVE_VCMPi32r (v8i16 MQPR:$v1), ZR, 1))>;
+ (v8i1 (MVE_VCMPi32r (v8i16 MQPR:$v1), ZR, ARMCCne))>;
def : Pat<(v4i1 (trunc (v4i32 MQPR:$v1))),
- (v4i1 (MVE_VCMPi32r (v4i32 MQPR:$v1), ZR, 1))>;
+ (v4i1 (MVE_VCMPi32r (v4i32 MQPR:$v1), ZR, ARMCCne))>;
}
let Predicates = [HasMVEFloat] in {
// Pred <-> Float
// 112 is 1.0 in float
def : Pat<(v4f32 (uint_to_fp (v4i1 VCCR:$pred))),
- (v4f32 (MVE_VPSEL (v4f32 (MVE_VMOVimmf32 112)), (v4f32 (MVE_VMOVimmi32 0)), 0, VCCR:$pred))>;
+ (v4f32 (MVE_VPSEL (v4f32 (MVE_VMOVimmf32 112)), (v4f32 (MVE_VMOVimmi32 0)), ARMVCCNone, VCCR:$pred))>;
// 2620 in 1.0 in half
def : Pat<(v8f16 (uint_to_fp (v8i1 VCCR:$pred))),
- (v8f16 (MVE_VPSEL (v8f16 (MVE_VMOVimmi16 2620)), (v8f16 (MVE_VMOVimmi16 0)), 0, VCCR:$pred))>;
+ (v8f16 (MVE_VPSEL (v8f16 (MVE_VMOVimmi16 2620)), (v8f16 (MVE_VMOVimmi16 0)), ARMVCCNone, VCCR:$pred))>;
// 240 is -1.0 in float
def : Pat<(v4f32 (sint_to_fp (v4i1 VCCR:$pred))),
- (v4f32 (MVE_VPSEL (v4f32 (MVE_VMOVimmf32 240)), (v4f32 (MVE_VMOVimmi32 0)), 0, VCCR:$pred))>;
+ (v4f32 (MVE_VPSEL (v4f32 (MVE_VMOVimmf32 240)), (v4f32 (MVE_VMOVimmi32 0)), ARMVCCNone, VCCR:$pred))>;
// 2748 is -1.0 in half
def : Pat<(v8f16 (sint_to_fp (v8i1 VCCR:$pred))),
- (v8f16 (MVE_VPSEL (v8f16 (MVE_VMOVimmi16 2748)), (v8f16 (MVE_VMOVimmi16 0)), 0, VCCR:$pred))>;
+ (v8f16 (MVE_VPSEL (v8f16 (MVE_VMOVimmi16 2748)), (v8f16 (MVE_VMOVimmi16 0)), ARMVCCNone, VCCR:$pred))>;
def : Pat<(v4i1 (fp_to_uint (v4f32 MQPR:$v1))),
- (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), ZR, 1))>;
+ (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), ZR, ARMCCne))>;
def : Pat<(v8i1 (fp_to_uint (v8f16 MQPR:$v1))),
- (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), ZR, 1))>;
+ (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), ZR, ARMCCne))>;
def : Pat<(v4i1 (fp_to_sint (v4f32 MQPR:$v1))),
- (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), ZR, 1))>;
+ (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), ZR, ARMCCne))>;
def : Pat<(v8i1 (fp_to_sint (v8f16 MQPR:$v1))),
- (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), ZR, 1))>;
+ (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), ZR, ARMCCne))>;
}
def MVE_VPNOT : MVE_p<(outs VCCR:$P0), (ins VCCR:$P0_in), NoItinerary,
@@ -4955,6 +5829,8 @@ class MVE_WLSTP<string asm, bits<2> size>
let Inst{13} = 0b0;
let Inst{11} = label{0};
let Inst{10-1} = label{10-1};
+ let isBranch = 1;
+ let isTerminator = 1;
}
def MVE_DLSTP_8 : MVE_DLSTP<"dlstp.8", 0b00>;
@@ -4983,6 +5859,8 @@ def MVE_LETP : MVE_loltp_end<(outs GPRlr:$LRout),
let Inst{13} = 0b0;
let Inst{11} = label{0};
let Inst{10-1} = label{10-1};
+ let isBranch = 1;
+ let isTerminator = 1;
}
def MVE_LCTP : MVE_loltp_end<(outs), (ins pred:$p), "lctp${p}", ""> {
@@ -4998,61 +5876,7 @@ def MVE_LCTP : MVE_loltp_end<(outs), (ins pred:$p), "lctp${p}", ""> {
// Patterns
//===----------------------------------------------------------------------===//
-class MVE_vector_store_typed<ValueType Ty, Instruction RegImmInst,
- PatFrag StoreKind, int shift>
- : Pat<(StoreKind (Ty MQPR:$val), t2addrmode_imm7<shift>:$addr),
- (RegImmInst (Ty MQPR:$val), t2addrmode_imm7<shift>:$addr)>;
-class MVE_vector_maskedstore_typed<ValueType Ty, Instruction RegImmInst,
- PatFrag StoreKind, int shift>
- : Pat<(StoreKind (Ty MQPR:$val), t2addrmode_imm7<shift>:$addr, VCCR:$pred),
- (RegImmInst (Ty MQPR:$val), t2addrmode_imm7<shift>:$addr, (i32 1), VCCR:$pred)>;
-
-multiclass MVE_vector_store<Instruction RegImmInst, PatFrag StoreKind,
- int shift> {
- def : MVE_vector_store_typed<v16i8, RegImmInst, StoreKind, shift>;
- def : MVE_vector_store_typed<v8i16, RegImmInst, StoreKind, shift>;
- def : MVE_vector_store_typed<v8f16, RegImmInst, StoreKind, shift>;
- def : MVE_vector_store_typed<v4i32, RegImmInst, StoreKind, shift>;
- def : MVE_vector_store_typed<v4f32, RegImmInst, StoreKind, shift>;
- def : MVE_vector_store_typed<v2i64, RegImmInst, StoreKind, shift>;
- def : MVE_vector_store_typed<v2f64, RegImmInst, StoreKind, shift>;
-}
-
-class MVE_vector_load_typed<ValueType Ty, Instruction RegImmInst,
- PatFrag LoadKind, int shift>
- : Pat<(Ty (LoadKind t2addrmode_imm7<shift>:$addr)),
- (Ty (RegImmInst t2addrmode_imm7<shift>:$addr))>;
-class MVE_vector_maskedload_typed<ValueType Ty, Instruction RegImmInst,
- PatFrag LoadKind, int shift>
- : Pat<(Ty (LoadKind t2addrmode_imm7<shift>:$addr, VCCR:$pred, (Ty NEONimmAllZerosV))),
- (Ty (RegImmInst t2addrmode_imm7<shift>:$addr, (i32 1), VCCR:$pred))>;
-
-multiclass MVE_vector_load<Instruction RegImmInst, PatFrag LoadKind,
- int shift> {
- def : MVE_vector_load_typed<v16i8, RegImmInst, LoadKind, shift>;
- def : MVE_vector_load_typed<v8i16, RegImmInst, LoadKind, shift>;
- def : MVE_vector_load_typed<v8f16, RegImmInst, LoadKind, shift>;
- def : MVE_vector_load_typed<v4i32, RegImmInst, LoadKind, shift>;
- def : MVE_vector_load_typed<v4f32, RegImmInst, LoadKind, shift>;
- def : MVE_vector_load_typed<v2i64, RegImmInst, LoadKind, shift>;
- def : MVE_vector_load_typed<v2f64, RegImmInst, LoadKind, shift>;
-}
-
-class MVE_vector_offset_store_typed<ValueType Ty, Instruction Opcode,
- PatFrag StoreKind, int shift>
- : Pat<(StoreKind (Ty MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<shift>:$addr),
- (Opcode MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<shift>:$addr)>;
-
-multiclass MVE_vector_offset_store<Instruction RegImmInst, PatFrag StoreKind,
- int shift> {
- def : MVE_vector_offset_store_typed<v16i8, RegImmInst, StoreKind, shift>;
- def : MVE_vector_offset_store_typed<v8i16, RegImmInst, StoreKind, shift>;
- def : MVE_vector_offset_store_typed<v8f16, RegImmInst, StoreKind, shift>;
- def : MVE_vector_offset_store_typed<v4i32, RegImmInst, StoreKind, shift>;
- def : MVE_vector_offset_store_typed<v4f32, RegImmInst, StoreKind, shift>;
- def : MVE_vector_offset_store_typed<v2i64, RegImmInst, StoreKind, shift>;
- def : MVE_vector_offset_store_typed<v2f64, RegImmInst, StoreKind, shift>;
-}
+// PatFrags for loads and stores. Often trying to keep semi-consistent names.
def aligned32_pre_store : PatFrag<(ops node:$val, node:$ptr, node:$offset),
(pre_store node:$val, node:$ptr, node:$offset), [{
@@ -5072,77 +5896,249 @@ def aligned16_post_store : PatFrag<(ops node:$val, node:$ptr, node:$offset),
}]>;
-def maskedload8 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru),
- (masked_ld node:$ptr, node:$pred, node:$passthru), [{
+def aligned_maskedloadvi8 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru),
+ (masked_ld node:$ptr, undef, node:$pred, node:$passthru), [{
auto *Ld = cast<MaskedLoadSDNode>(N);
return Ld->getMemoryVT().getScalarType() == MVT::i8;
}]>;
-def sextmaskedload8 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru),
- (maskedload8 node:$ptr, node:$pred, node:$passthru), [{
+def aligned_sextmaskedloadvi8 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru),
+ (aligned_maskedloadvi8 node:$ptr, node:$pred, node:$passthru), [{
return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::SEXTLOAD;
}]>;
-def zextmaskedload8 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru),
- (maskedload8 node:$ptr, node:$pred, node:$passthru), [{
+def aligned_zextmaskedloadvi8 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru),
+ (aligned_maskedloadvi8 node:$ptr, node:$pred, node:$passthru), [{
return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::ZEXTLOAD;
}]>;
-def extmaskedload8 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru),
- (maskedload8 node:$ptr, node:$pred, node:$passthru), [{
+def aligned_extmaskedloadvi8 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru),
+ (aligned_maskedloadvi8 node:$ptr, node:$pred, node:$passthru), [{
auto *Ld = cast<MaskedLoadSDNode>(N);
EVT ScalarVT = Ld->getMemoryVT().getScalarType();
return ScalarVT.isInteger() && Ld->getExtensionType() == ISD::EXTLOAD;
}]>;
-def alignedmaskedload16: PatFrag<(ops node:$ptr, node:$pred, node:$passthru),
- (masked_ld node:$ptr, node:$pred, node:$passthru), [{
+def aligned_maskedloadvi16: PatFrag<(ops node:$ptr, node:$pred, node:$passthru),
+ (masked_ld node:$ptr, undef, node:$pred, node:$passthru), [{
auto *Ld = cast<MaskedLoadSDNode>(N);
EVT ScalarVT = Ld->getMemoryVT().getScalarType();
return (ScalarVT == MVT::i16 || ScalarVT == MVT::f16) && Ld->getAlignment() >= 2;
}]>;
-def sextmaskedload16 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru),
- (alignedmaskedload16 node:$ptr, node:$pred, node:$passthru), [{
+def aligned_sextmaskedloadvi16 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru),
+ (aligned_maskedloadvi16 node:$ptr, node:$pred, node:$passthru), [{
return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::SEXTLOAD;
}]>;
-def zextmaskedload16 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru),
- (alignedmaskedload16 node:$ptr, node:$pred, node:$passthru), [{
+def aligned_zextmaskedloadvi16 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru),
+ (aligned_maskedloadvi16 node:$ptr, node:$pred, node:$passthru), [{
return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::ZEXTLOAD;
}]>;
-def extmaskedload16 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru),
- (alignedmaskedload16 node:$ptr, node:$pred, node:$passthru), [{
+def aligned_extmaskedloadvi16 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru),
+ (aligned_maskedloadvi16 node:$ptr, node:$pred, node:$passthru), [{
auto *Ld = cast<MaskedLoadSDNode>(N);
EVT ScalarVT = Ld->getMemoryVT().getScalarType();
return ScalarVT.isInteger() && Ld->getExtensionType() == ISD::EXTLOAD;
}]>;
-def alignedmaskedload32: PatFrag<(ops node:$ptr, node:$pred, node:$passthru),
- (masked_ld node:$ptr, node:$pred, node:$passthru), [{
+def aligned_maskedloadvi32: PatFrag<(ops node:$ptr, node:$pred, node:$passthru),
+ (masked_ld node:$ptr, undef, node:$pred, node:$passthru), [{
auto *Ld = cast<MaskedLoadSDNode>(N);
EVT ScalarVT = Ld->getMemoryVT().getScalarType();
return (ScalarVT == MVT::i32 || ScalarVT == MVT::f32) && Ld->getAlignment() >= 4;
}]>;
-def maskedstore8 : PatFrag<(ops node:$val, node:$ptr, node:$pred),
- (masked_st node:$val, node:$ptr, node:$pred), [{
+def aligned_maskedstvi8 : PatFrag<(ops node:$val, node:$ptr, node:$pred),
+ (masked_st node:$val, node:$ptr, undef, node:$pred), [{
return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
}]>;
-def truncatingmaskedstore8 : PatFrag<(ops node:$val, node:$ptr, node:$pred),
- (maskedstore8 node:$val, node:$ptr, node:$pred), [{
- return cast<MaskedStoreSDNode>(N)->isTruncatingStore();
+def aligned_maskedstvi16 : PatFrag<(ops node:$val, node:$ptr, node:$pred),
+ (masked_st node:$val, node:$ptr, undef, node:$pred), [{
+ auto *St = cast<MaskedStoreSDNode>(N);
+ EVT ScalarVT = St->getMemoryVT().getScalarType();
+ return (ScalarVT == MVT::i16 || ScalarVT == MVT::f16) && St->getAlignment() >= 2;
+}]>;
+def aligned_maskedstvi32 : PatFrag<(ops node:$val, node:$ptr, node:$pred),
+ (masked_st node:$val, node:$ptr, undef, node:$pred), [{
+ auto *St = cast<MaskedStoreSDNode>(N);
+ EVT ScalarVT = St->getMemoryVT().getScalarType();
+ return (ScalarVT == MVT::i32 || ScalarVT == MVT::f32) && St->getAlignment() >= 4;
}]>;
-def maskedstore16 : PatFrag<(ops node:$val, node:$ptr, node:$pred),
- (masked_st node:$val, node:$ptr, node:$pred), [{
+
+def pre_maskedstore : PatFrag<(ops node:$val, node:$base, node:$offset, node:$mask),
+ (masked_st node:$val, node:$base, node:$offset, node:$mask), [{
+ ISD::MemIndexedMode AM = cast<MaskedStoreSDNode>(N)->getAddressingMode();
+ return AM == ISD::PRE_INC || AM == ISD::PRE_DEC;
+}]>;
+def post_maskedstore : PatFrag<(ops node:$val, node:$base, node:$offset, node:$mask),
+ (masked_st node:$val, node:$base, node:$offset, node:$mask), [{
+ ISD::MemIndexedMode AM = cast<MaskedStoreSDNode>(N)->getAddressingMode();
+ return AM == ISD::POST_INC || AM == ISD::POST_DEC;
+}]>;
+def aligned_pre_maskedstorevi8 : PatFrag<(ops node:$val, node:$ptr, node:$offset, node:$mask),
+ (pre_maskedstore node:$val, node:$ptr, node:$offset, node:$mask), [{
+ return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
+}]>;
+def aligned_post_maskedstorevi8 : PatFrag<(ops node:$val, node:$ptr, node:$offset, node:$mask),
+ (post_maskedstore node:$val, node:$ptr, node:$offset, node:$mask), [{
+ return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
+}]>;
+def aligned_pre_maskedstorevi16 : PatFrag<(ops node:$val, node:$ptr, node:$offset, node:$mask),
+ (pre_maskedstore node:$val, node:$ptr, node:$offset, node:$mask), [{
auto *St = cast<MaskedStoreSDNode>(N);
EVT ScalarVT = St->getMemoryVT().getScalarType();
return (ScalarVT == MVT::i16 || ScalarVT == MVT::f16) && St->getAlignment() >= 2;
}]>;
+def aligned_post_maskedstorevi16 : PatFrag<(ops node:$val, node:$ptr, node:$offset, node:$mask),
+ (post_maskedstore node:$val, node:$ptr, node:$offset, node:$mask), [{
+ auto *St = cast<MaskedStoreSDNode>(N);
+ EVT ScalarVT = St->getMemoryVT().getScalarType();
+ return (ScalarVT == MVT::i16 || ScalarVT == MVT::f16) && St->getAlignment() >= 2;
+}]>;
+def aligned_pre_maskedstorevi32 : PatFrag<(ops node:$val, node:$ptr, node:$offset, node:$mask),
+ (pre_maskedstore node:$val, node:$ptr, node:$offset, node:$mask), [{
+ auto *St = cast<MaskedStoreSDNode>(N);
+ EVT ScalarVT = St->getMemoryVT().getScalarType();
+ return (ScalarVT == MVT::i32 || ScalarVT == MVT::f32) && St->getAlignment() >= 4;
+}]>;
+def aligned_post_maskedstorevi32 : PatFrag<(ops node:$val, node:$ptr, node:$offset, node:$mask),
+ (post_maskedstore node:$val, node:$ptr, node:$offset, node:$mask), [{
+ auto *St = cast<MaskedStoreSDNode>(N);
+ EVT ScalarVT = St->getMemoryVT().getScalarType();
+ return (ScalarVT == MVT::i32 || ScalarVT == MVT::f32) && St->getAlignment() >= 4;
+}]>;
+
+
+// PatFrags for "Aligned" extending / truncating
-def truncatingmaskedstore16 : PatFrag<(ops node:$val, node:$ptr, node:$pred),
- (maskedstore16 node:$val, node:$ptr, node:$pred), [{
+def aligned_extloadvi8 : PatFrag<(ops node:$ptr), (extloadvi8 node:$ptr)>;
+def aligned_sextloadvi8 : PatFrag<(ops node:$ptr), (sextloadvi8 node:$ptr)>;
+def aligned_zextloadvi8 : PatFrag<(ops node:$ptr), (zextloadvi8 node:$ptr)>;
+
+def aligned_truncstvi8 : PatFrag<(ops node:$val, node:$ptr),
+ (truncstorevi8 node:$val, node:$ptr)>;
+def aligned_post_truncstvi8 : PatFrag<(ops node:$val, node:$base, node:$offset),
+ (post_truncstvi8 node:$val, node:$base, node:$offset)>;
+def aligned_pre_truncstvi8 : PatFrag<(ops node:$val, node:$base, node:$offset),
+ (pre_truncstvi8 node:$val, node:$base, node:$offset)>;
+
+let MinAlignment = 2 in {
+ def aligned_extloadvi16 : PatFrag<(ops node:$ptr), (extloadvi16 node:$ptr)>;
+ def aligned_sextloadvi16 : PatFrag<(ops node:$ptr), (sextloadvi16 node:$ptr)>;
+ def aligned_zextloadvi16 : PatFrag<(ops node:$ptr), (zextloadvi16 node:$ptr)>;
+
+ def aligned_truncstvi16 : PatFrag<(ops node:$val, node:$ptr),
+ (truncstorevi16 node:$val, node:$ptr)>;
+ def aligned_post_truncstvi16 : PatFrag<(ops node:$val, node:$base, node:$offset),
+ (post_truncstvi16 node:$val, node:$base, node:$offset)>;
+ def aligned_pre_truncstvi16 : PatFrag<(ops node:$val, node:$base, node:$offset),
+ (pre_truncstvi16 node:$val, node:$base, node:$offset)>;
+}
+
+def truncmaskedst : PatFrag<(ops node:$val, node:$base, node:$pred),
+ (masked_st node:$val, node:$base, undef, node:$pred), [{
return cast<MaskedStoreSDNode>(N)->isTruncatingStore();
}]>;
-def maskedstore32 : PatFrag<(ops node:$val, node:$ptr, node:$pred),
- (masked_st node:$val, node:$ptr, node:$pred), [{
+def aligned_truncmaskedstvi8 : PatFrag<(ops node:$val, node:$base, node:$pred),
+ (truncmaskedst node:$val, node:$base, node:$pred), [{
+ return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
+}]>;
+def aligned_truncmaskedstvi16 : PatFrag<(ops node:$val, node:$base, node:$pred),
+ (truncmaskedst node:$val, node:$base, node:$pred), [{
auto *St = cast<MaskedStoreSDNode>(N);
EVT ScalarVT = St->getMemoryVT().getScalarType();
- return (ScalarVT == MVT::i32 || ScalarVT == MVT::f32) && St->getAlignment() >= 4;
+ return (ScalarVT == MVT::i16 || ScalarVT == MVT::f16) && St->getAlignment() >= 2;
}]>;
+def pre_truncmaskedst : PatFrag<(ops node:$val, node:$base, node:$offset, node:$pred),
+ (masked_st node:$val, node:$base, node:$offset, node:$pred), [{
+ ISD::MemIndexedMode AM = cast<MaskedStoreSDNode>(N)->getAddressingMode();
+ return cast<MaskedStoreSDNode>(N)->isTruncatingStore() && (AM == ISD::PRE_INC || AM == ISD::PRE_DEC);
+}]>;
+def aligned_pre_truncmaskedstvi8 : PatFrag<(ops node:$val, node:$base, node:$offset, node:$pred),
+ (pre_truncmaskedst node:$val, node:$base, node:$offset, node:$pred), [{
+ return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
+}]>;
+def aligned_pre_truncmaskedstvi16 : PatFrag<(ops node:$val, node:$base, node:$offset, node:$pred),
+ (pre_truncmaskedst node:$val, node:$base, node:$offset, node:$pred), [{
+ auto *St = cast<MaskedStoreSDNode>(N);
+ EVT ScalarVT = St->getMemoryVT().getScalarType();
+ return (ScalarVT == MVT::i16 || ScalarVT == MVT::f16) && St->getAlignment() >= 2;
+}]>;
+def post_truncmaskedst : PatFrag<(ops node:$val, node:$base, node:$offset, node:$postd),
+ (masked_st node:$val, node:$base, node:$offset, node:$postd), [{
+ ISD::MemIndexedMode AM = cast<MaskedStoreSDNode>(N)->getAddressingMode();
+ return cast<MaskedStoreSDNode>(N)->isTruncatingStore() && (AM == ISD::POST_INC || AM == ISD::POST_DEC);
+}]>;
+def aligned_post_truncmaskedstvi8 : PatFrag<(ops node:$val, node:$base, node:$offset, node:$postd),
+ (post_truncmaskedst node:$val, node:$base, node:$offset, node:$postd), [{
+ return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
+}]>;
+def aligned_post_truncmaskedstvi16 : PatFrag<(ops node:$val, node:$base, node:$offset, node:$postd),
+ (post_truncmaskedst node:$val, node:$base, node:$offset, node:$postd), [{
+ auto *St = cast<MaskedStoreSDNode>(N);
+ EVT ScalarVT = St->getMemoryVT().getScalarType();
+ return (ScalarVT == MVT::i16 || ScalarVT == MVT::f16) && St->getAlignment() >= 2;
+}]>;
+
+// Load/store patterns
+
+class MVE_vector_store_typed<ValueType Ty, Instruction RegImmInst,
+ PatFrag StoreKind, int shift>
+ : Pat<(StoreKind (Ty MQPR:$val), t2addrmode_imm7<shift>:$addr),
+ (RegImmInst (Ty MQPR:$val), t2addrmode_imm7<shift>:$addr)>;
+
+class MVE_vector_maskedstore_typed<ValueType Ty, Instruction RegImmInst,
+ PatFrag StoreKind, int shift>
+ : Pat<(StoreKind (Ty MQPR:$val), t2addrmode_imm7<shift>:$addr, VCCR:$pred),
+ (RegImmInst (Ty MQPR:$val), t2addrmode_imm7<shift>:$addr, ARMVCCThen, VCCR:$pred)>;
+
+multiclass MVE_vector_store<Instruction RegImmInst, PatFrag StoreKind,
+ int shift> {
+ def : MVE_vector_store_typed<v16i8, RegImmInst, StoreKind, shift>;
+ def : MVE_vector_store_typed<v8i16, RegImmInst, StoreKind, shift>;
+ def : MVE_vector_store_typed<v8f16, RegImmInst, StoreKind, shift>;
+ def : MVE_vector_store_typed<v4i32, RegImmInst, StoreKind, shift>;
+ def : MVE_vector_store_typed<v4f32, RegImmInst, StoreKind, shift>;
+ def : MVE_vector_store_typed<v2i64, RegImmInst, StoreKind, shift>;
+ def : MVE_vector_store_typed<v2f64, RegImmInst, StoreKind, shift>;
+}
+
+class MVE_vector_load_typed<ValueType Ty, Instruction RegImmInst,
+ PatFrag LoadKind, int shift>
+ : Pat<(Ty (LoadKind t2addrmode_imm7<shift>:$addr)),
+ (Ty (RegImmInst t2addrmode_imm7<shift>:$addr))>;
+
+class MVE_vector_maskedload_typed<ValueType Ty, Instruction RegImmInst,
+ PatFrag LoadKind, int shift>
+ : Pat<(Ty (LoadKind t2addrmode_imm7<shift>:$addr, VCCR:$pred, (Ty NEONimmAllZerosV))),
+ (Ty (RegImmInst t2addrmode_imm7<shift>:$addr, ARMVCCThen, VCCR:$pred))>;
+
+multiclass MVE_vector_load<Instruction RegImmInst, PatFrag LoadKind,
+ int shift> {
+ def : MVE_vector_load_typed<v16i8, RegImmInst, LoadKind, shift>;
+ def : MVE_vector_load_typed<v8i16, RegImmInst, LoadKind, shift>;
+ def : MVE_vector_load_typed<v8f16, RegImmInst, LoadKind, shift>;
+ def : MVE_vector_load_typed<v4i32, RegImmInst, LoadKind, shift>;
+ def : MVE_vector_load_typed<v4f32, RegImmInst, LoadKind, shift>;
+ def : MVE_vector_load_typed<v2i64, RegImmInst, LoadKind, shift>;
+ def : MVE_vector_load_typed<v2f64, RegImmInst, LoadKind, shift>;
+}
+
+class MVE_vector_offset_store_typed<ValueType Ty, Instruction Opcode,
+ PatFrag StoreKind, int shift>
+ : Pat<(StoreKind (Ty MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<shift>:$addr),
+ (Opcode MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<shift>:$addr)>;
+
+class MVE_vector_offset_maskedstore_typed<ValueType Ty, Instruction Opcode,
+ PatFrag StoreKind, int shift>
+ : Pat<(StoreKind (Ty MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<shift>:$addr, VCCR:$pred),
+ (Opcode MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<shift>:$addr, ARMVCCThen, VCCR:$pred)>;
+
+multiclass MVE_vector_offset_store<Instruction RegImmInst, PatFrag StoreKind,
+ int shift> {
+ def : MVE_vector_offset_store_typed<v16i8, RegImmInst, StoreKind, shift>;
+ def : MVE_vector_offset_store_typed<v8i16, RegImmInst, StoreKind, shift>;
+ def : MVE_vector_offset_store_typed<v8f16, RegImmInst, StoreKind, shift>;
+ def : MVE_vector_offset_store_typed<v4i32, RegImmInst, StoreKind, shift>;
+ def : MVE_vector_offset_store_typed<v4f32, RegImmInst, StoreKind, shift>;
+ def : MVE_vector_offset_store_typed<v2i64, RegImmInst, StoreKind, shift>;
+ def : MVE_vector_offset_store_typed<v2f64, RegImmInst, StoreKind, shift>;
+}
+
let Predicates = [HasMVEInt, IsLE] in {
// Stores
@@ -5220,116 +6216,73 @@ let Predicates = [HasMVEInt, IsBE] in {
let Predicates = [HasMVEInt] in {
// Aligned masked store, shared between LE and BE
- def : MVE_vector_maskedstore_typed<v16i8, MVE_VSTRBU8, maskedstore8, 0>;
- def : MVE_vector_maskedstore_typed<v8i16, MVE_VSTRHU16, maskedstore16, 1>;
- def : MVE_vector_maskedstore_typed<v8f16, MVE_VSTRHU16, maskedstore16, 1>;
- def : MVE_vector_maskedstore_typed<v4i32, MVE_VSTRWU32, maskedstore32, 2>;
- def : MVE_vector_maskedstore_typed<v4f32, MVE_VSTRWU32, maskedstore32, 2>;
- // Truncating stores
- def : Pat<(truncatingmaskedstore8 (v8i16 MQPR:$val), t2addrmode_imm7<0>:$addr, VCCR:$pred),
- (MVE_VSTRB16 MQPR:$val, t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred)>;
- def : Pat<(truncatingmaskedstore8 (v4i32 MQPR:$val), t2addrmode_imm7<0>:$addr, VCCR:$pred),
- (MVE_VSTRB32 MQPR:$val, t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred)>;
- def : Pat<(truncatingmaskedstore16 (v4i32 MQPR:$val), t2addrmode_imm7<1>:$addr, VCCR:$pred),
- (MVE_VSTRH32 MQPR:$val, t2addrmode_imm7<1>:$addr, (i32 1), VCCR:$pred)>;
+ def : MVE_vector_maskedstore_typed<v16i8, MVE_VSTRBU8, aligned_maskedstvi8, 0>;
+ def : MVE_vector_maskedstore_typed<v8i16, MVE_VSTRHU16, aligned_maskedstvi16, 1>;
+ def : MVE_vector_maskedstore_typed<v8f16, MVE_VSTRHU16, aligned_maskedstvi16, 1>;
+ def : MVE_vector_maskedstore_typed<v4i32, MVE_VSTRWU32, aligned_maskedstvi32, 2>;
+ def : MVE_vector_maskedstore_typed<v4f32, MVE_VSTRWU32, aligned_maskedstvi32, 2>;
+
+ // Pre/Post inc masked stores
+ def : MVE_vector_offset_maskedstore_typed<v16i8, MVE_VSTRBU8_pre, aligned_pre_maskedstorevi8, 0>;
+ def : MVE_vector_offset_maskedstore_typed<v16i8, MVE_VSTRBU8_post, aligned_post_maskedstorevi8, 0>;
+ def : MVE_vector_offset_maskedstore_typed<v8i16, MVE_VSTRHU16_pre, aligned_pre_maskedstorevi16, 1>;
+ def : MVE_vector_offset_maskedstore_typed<v8i16, MVE_VSTRHU16_post, aligned_post_maskedstorevi16, 1>;
+ def : MVE_vector_offset_maskedstore_typed<v8f16, MVE_VSTRHU16_pre, aligned_pre_maskedstorevi16, 1>;
+ def : MVE_vector_offset_maskedstore_typed<v8f16, MVE_VSTRHU16_post, aligned_post_maskedstorevi16, 1>;
+ def : MVE_vector_offset_maskedstore_typed<v4i32, MVE_VSTRWU32_pre, aligned_pre_maskedstorevi32, 2>;
+ def : MVE_vector_offset_maskedstore_typed<v4i32, MVE_VSTRWU32_post, aligned_post_maskedstorevi32, 2>;
+ def : MVE_vector_offset_maskedstore_typed<v4f32, MVE_VSTRWU32_pre, aligned_pre_maskedstorevi32, 2>;
+ def : MVE_vector_offset_maskedstore_typed<v4f32, MVE_VSTRWU32_post, aligned_post_maskedstorevi32, 2>;
+
// Aligned masked loads
- def : MVE_vector_maskedload_typed<v16i8, MVE_VLDRBU8, maskedload8, 0>;
- def : MVE_vector_maskedload_typed<v8i16, MVE_VLDRHU16, alignedmaskedload16, 1>;
- def : MVE_vector_maskedload_typed<v8f16, MVE_VLDRHU16, alignedmaskedload16, 1>;
- def : MVE_vector_maskedload_typed<v4i32, MVE_VLDRWU32, alignedmaskedload32, 2>;
- def : MVE_vector_maskedload_typed<v4f32, MVE_VLDRWU32, alignedmaskedload32, 2>;
- // Extending masked loads.
- def : Pat<(v8i16 (sextmaskedload8 t2addrmode_imm7<0>:$addr, VCCR:$pred,
- (v8i16 NEONimmAllZerosV))),
- (v8i16 (MVE_VLDRBS16 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred))>;
- def : Pat<(v4i32 (sextmaskedload8 t2addrmode_imm7<0>:$addr, VCCR:$pred,
- (v4i32 NEONimmAllZerosV))),
- (v4i32 (MVE_VLDRBS32 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred))>;
- def : Pat<(v8i16 (zextmaskedload8 t2addrmode_imm7<0>:$addr, VCCR:$pred,
- (v8i16 NEONimmAllZerosV))),
- (v8i16 (MVE_VLDRBU16 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred))>;
- def : Pat<(v4i32 (zextmaskedload8 t2addrmode_imm7<0>:$addr, VCCR:$pred,
- (v4i32 NEONimmAllZerosV))),
- (v4i32 (MVE_VLDRBU32 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred))>;
- def : Pat<(v8i16 (extmaskedload8 t2addrmode_imm7<0>:$addr, VCCR:$pred,
- (v8i16 NEONimmAllZerosV))),
- (v8i16 (MVE_VLDRBU16 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred))>;
- def : Pat<(v4i32 (extmaskedload8 t2addrmode_imm7<0>:$addr, VCCR:$pred,
- (v4i32 NEONimmAllZerosV))),
- (v4i32 (MVE_VLDRBU32 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred))>;
- def : Pat<(v4i32 (sextmaskedload16 t2addrmode_imm7<1>:$addr, VCCR:$pred,
- (v4i32 NEONimmAllZerosV))),
- (v4i32 (MVE_VLDRHS32 t2addrmode_imm7<1>:$addr, (i32 1), VCCR:$pred))>;
- def : Pat<(v4i32 (zextmaskedload16 t2addrmode_imm7<1>:$addr, VCCR:$pred,
- (v4i32 NEONimmAllZerosV))),
- (v4i32 (MVE_VLDRHU32 t2addrmode_imm7<1>:$addr, (i32 1), VCCR:$pred))>;
- def : Pat<(v4i32 (extmaskedload16 t2addrmode_imm7<1>:$addr, VCCR:$pred,
- (v4i32 NEONimmAllZerosV))),
- (v4i32 (MVE_VLDRHU32 t2addrmode_imm7<1>:$addr, (i32 1), VCCR:$pred))>;
+ def : MVE_vector_maskedload_typed<v16i8, MVE_VLDRBU8, aligned_maskedloadvi8, 0>;
+ def : MVE_vector_maskedload_typed<v8i16, MVE_VLDRHU16, aligned_maskedloadvi16, 1>;
+ def : MVE_vector_maskedload_typed<v8f16, MVE_VLDRHU16, aligned_maskedloadvi16, 1>;
+ def : MVE_vector_maskedload_typed<v4i32, MVE_VLDRWU32, aligned_maskedloadvi32, 2>;
+ def : MVE_vector_maskedload_typed<v4f32, MVE_VLDRWU32, aligned_maskedloadvi32, 2>;
}
// Widening/Narrowing Loads/Stores
-let MinAlignment = 2 in {
- def truncstorevi16_align2 : PatFrag<(ops node:$val, node:$ptr),
- (truncstorevi16 node:$val, node:$ptr)>;
- def post_truncstvi16_align2 : PatFrag<(ops node:$val, node:$base, node:$offset),
- (post_truncstvi16 node:$val, node:$base, node:$offset)>;
- def pre_truncstvi16_align2 : PatFrag<(ops node:$val, node:$base, node:$offset),
- (pre_truncstvi16 node:$val, node:$base, node:$offset)>;
-}
-
-let Predicates = [HasMVEInt] in {
- def : Pat<(truncstorevi8 (v8i16 MQPR:$val), taddrmode_imm7<0>:$addr),
- (MVE_VSTRB16 MQPR:$val, taddrmode_imm7<0>:$addr)>;
- def : Pat<(truncstorevi8 (v4i32 MQPR:$val), taddrmode_imm7<0>:$addr),
- (MVE_VSTRB32 MQPR:$val, taddrmode_imm7<0>:$addr)>;
- def : Pat<(truncstorevi16_align2 (v4i32 MQPR:$val), taddrmode_imm7<1>:$addr),
- (MVE_VSTRH32 MQPR:$val, taddrmode_imm7<1>:$addr)>;
-
- def : Pat<(post_truncstvi8 (v8i16 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<0>:$addr),
- (MVE_VSTRB16_post MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<0>:$addr)>;
- def : Pat<(post_truncstvi8 (v4i32 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<0>:$addr),
- (MVE_VSTRB32_post MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<0>:$addr)>;
- def : Pat<(post_truncstvi16_align2 (v4i32 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<1>:$addr),
- (MVE_VSTRH32_post MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<1>:$addr)>;
-
- def : Pat<(pre_truncstvi8 (v8i16 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<0>:$addr),
- (MVE_VSTRB16_pre MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<0>:$addr)>;
- def : Pat<(pre_truncstvi8 (v4i32 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<0>:$addr),
- (MVE_VSTRB32_pre MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<0>:$addr)>;
- def : Pat<(pre_truncstvi16_align2 (v4i32 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<1>:$addr),
- (MVE_VSTRH32_pre MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<1>:$addr)>;
-}
-
-
-let MinAlignment = 2 in {
- def extloadvi16_align2 : PatFrag<(ops node:$ptr), (extloadvi16 node:$ptr)>;
- def sextloadvi16_align2 : PatFrag<(ops node:$ptr), (sextloadvi16 node:$ptr)>;
- def zextloadvi16_align2 : PatFrag<(ops node:$ptr), (zextloadvi16 node:$ptr)>;
-}
-
-multiclass MVEExtLoad<string DestLanes, string DestElemBits,
- string SrcElemBits, string SrcElemType,
- string Align, Operand am> {
- def _Any : Pat<(!cast<ValueType>("v" # DestLanes # "i" # DestElemBits)
- (!cast<PatFrag>("extloadvi" # SrcElemBits # Align) am:$addr)),
- (!cast<Instruction>("MVE_VLDR" # SrcElemType # "U" # DestElemBits)
- am:$addr)>;
- def _Z : Pat<(!cast<ValueType>("v" # DestLanes # "i" # DestElemBits)
- (!cast<PatFrag>("zextloadvi" # SrcElemBits # Align) am:$addr)),
- (!cast<Instruction>("MVE_VLDR" # SrcElemType # "U" # DestElemBits)
- am:$addr)>;
- def _S : Pat<(!cast<ValueType>("v" # DestLanes # "i" # DestElemBits)
- (!cast<PatFrag>("sextloadvi" # SrcElemBits # Align) am:$addr)),
- (!cast<Instruction>("MVE_VLDR" # SrcElemType # "S" # DestElemBits)
- am:$addr)>;
+multiclass MVEExtLoadStore<Instruction LoadSInst, Instruction LoadUInst, string StoreInst,
+ string Amble, ValueType VT, int Shift> {
+ // Trunc stores
+ def : Pat<(!cast<PatFrag>("aligned_truncst"#Amble) (VT MQPR:$val), taddrmode_imm7<Shift>:$addr),
+ (!cast<Instruction>(StoreInst) MQPR:$val, taddrmode_imm7<Shift>:$addr)>;
+ def : Pat<(!cast<PatFrag>("aligned_post_truncst"#Amble) (VT MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<Shift>:$addr),
+ (!cast<Instruction>(StoreInst#"_post") MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<Shift>:$addr)>;
+ def : Pat<(!cast<PatFrag>("aligned_pre_truncst"#Amble) (VT MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<Shift>:$addr),
+ (!cast<Instruction>(StoreInst#"_pre") MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<Shift>:$addr)>;
+
+ // Masked trunc stores
+ def : Pat<(!cast<PatFrag>("aligned_truncmaskedst"#Amble) (VT MQPR:$val), taddrmode_imm7<Shift>:$addr, VCCR:$pred),
+ (!cast<Instruction>(StoreInst) MQPR:$val, taddrmode_imm7<Shift>:$addr, ARMVCCThen, VCCR:$pred)>;
+ def : Pat<(!cast<PatFrag>("aligned_post_truncmaskedst"#Amble) (VT MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<Shift>:$addr, VCCR:$pred),
+ (!cast<Instruction>(StoreInst#"_post") MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<Shift>:$addr, ARMVCCThen, VCCR:$pred)>;
+ def : Pat<(!cast<PatFrag>("aligned_pre_truncmaskedst"#Amble) (VT MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<Shift>:$addr, VCCR:$pred),
+ (!cast<Instruction>(StoreInst#"_pre") MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<Shift>:$addr, ARMVCCThen, VCCR:$pred)>;
+
+ // Ext loads
+ def : Pat<(VT (!cast<PatFrag>("aligned_extload"#Amble) taddrmode_imm7<Shift>:$addr)),
+ (VT (LoadUInst taddrmode_imm7<Shift>:$addr))>;
+ def : Pat<(VT (!cast<PatFrag>("aligned_sextload"#Amble) taddrmode_imm7<Shift>:$addr)),
+ (VT (LoadSInst taddrmode_imm7<Shift>:$addr))>;
+ def : Pat<(VT (!cast<PatFrag>("aligned_zextload"#Amble) taddrmode_imm7<Shift>:$addr)),
+ (VT (LoadUInst taddrmode_imm7<Shift>:$addr))>;
+
+ // Masked ext loads
+ def : Pat<(VT (!cast<PatFrag>("aligned_extmaskedload"#Amble) taddrmode_imm7<Shift>:$addr, VCCR:$pred, (VT NEONimmAllZerosV))),
+ (VT (LoadUInst taddrmode_imm7<Shift>:$addr, ARMVCCThen, VCCR:$pred))>;
+ def : Pat<(VT (!cast<PatFrag>("aligned_sextmaskedload"#Amble) taddrmode_imm7<Shift>:$addr, VCCR:$pred, (VT NEONimmAllZerosV))),
+ (VT (LoadSInst taddrmode_imm7<Shift>:$addr, ARMVCCThen, VCCR:$pred))>;
+ def : Pat<(VT (!cast<PatFrag>("aligned_zextmaskedload"#Amble) taddrmode_imm7<Shift>:$addr, VCCR:$pred, (VT NEONimmAllZerosV))),
+ (VT (LoadUInst taddrmode_imm7<Shift>:$addr, ARMVCCThen, VCCR:$pred))>;
}
let Predicates = [HasMVEInt] in {
- defm : MVEExtLoad<"4", "32", "8", "B", "", taddrmode_imm7<0>>;
- defm : MVEExtLoad<"8", "16", "8", "B", "", taddrmode_imm7<0>>;
- defm : MVEExtLoad<"4", "32", "16", "H", "_align2", taddrmode_imm7<1>>;
+ defm : MVEExtLoadStore<MVE_VLDRBS16, MVE_VLDRBU16, "MVE_VSTRB16", "vi8", v8i16, 0>;
+ defm : MVEExtLoadStore<MVE_VLDRBS32, MVE_VLDRBU32, "MVE_VSTRB32", "vi8", v4i32, 0>;
+ defm : MVEExtLoadStore<MVE_VLDRHS32, MVE_VLDRHU32, "MVE_VSTRH32", "vi16", v4i32, 1>;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrNEON.td b/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrNEON.td
index 60ca92e58041..6244d8d9e27e 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrNEON.td
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrNEON.td
@@ -3314,30 +3314,30 @@ class N2VCvtQ<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
// source operand element sizes of 8, 16 and 32 bits:
multiclass N2V_QHS_cmp<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
bits<5> op11_7, bit op4, string opc, string Dt,
- string asm, int fc> {
+ string asm, PatFrag fc> {
// 64-bit vector types.
def v8i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 0, op4,
(outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
opc, !strconcat(Dt, "8"), asm, "",
- [(set DPR:$Vd, (v8i8 (ARMvcmpz (v8i8 DPR:$Vm), (i32 fc))))]>;
+ [(set DPR:$Vd, (v8i8 (ARMvcmpz (v8i8 DPR:$Vm), fc)))]>;
def v4i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4,
(outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
opc, !strconcat(Dt, "16"), asm, "",
- [(set DPR:$Vd, (v4i16 (ARMvcmpz (v4i16 DPR:$Vm), (i32 fc))))]>;
+ [(set DPR:$Vd, (v4i16 (ARMvcmpz (v4i16 DPR:$Vm), fc)))]>;
def v2i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4,
(outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
opc, !strconcat(Dt, "32"), asm, "",
- [(set DPR:$Vd, (v2i32 (ARMvcmpz (v2i32 DPR:$Vm), (i32 fc))))]>;
+ [(set DPR:$Vd, (v2i32 (ARMvcmpz (v2i32 DPR:$Vm), fc)))]>;
def v2f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4,
(outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
opc, "f32", asm, "",
- [(set DPR:$Vd, (v2i32 (ARMvcmpz (v2f32 DPR:$Vm), (i32 fc))))]> {
+ [(set DPR:$Vd, (v2i32 (ARMvcmpz (v2f32 DPR:$Vm), fc)))]> {
let Inst{10} = 1; // overwrite F = 1
}
def v4f16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4,
(outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
opc, "f16", asm, "",
- [(set DPR:$Vd, (v4i16 (ARMvcmpz (v4f16 DPR:$Vm), (i32 fc))))]>,
+ [(set DPR:$Vd, (v4i16 (ARMvcmpz (v4f16 DPR:$Vm), fc)))]>,
Requires<[HasNEON,HasFullFP16]> {
let Inst{10} = 1; // overwrite F = 1
}
@@ -3346,25 +3346,25 @@ multiclass N2V_QHS_cmp<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
def v16i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 1, op4,
(outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
opc, !strconcat(Dt, "8"), asm, "",
- [(set QPR:$Vd, (v16i8 (ARMvcmpz (v16i8 QPR:$Vm), (i32 fc))))]>;
+ [(set QPR:$Vd, (v16i8 (ARMvcmpz (v16i8 QPR:$Vm), fc)))]>;
def v8i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4,
(outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
opc, !strconcat(Dt, "16"), asm, "",
- [(set QPR:$Vd, (v8i16 (ARMvcmpz (v8i16 QPR:$Vm), (i32 fc))))]>;
+ [(set QPR:$Vd, (v8i16 (ARMvcmpz (v8i16 QPR:$Vm), fc)))]>;
def v4i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4,
(outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
opc, !strconcat(Dt, "32"), asm, "",
- [(set QPR:$Vd, (v4i32 (ARMvcmpz (v4i32 QPR:$Vm), (i32 fc))))]>;
+ [(set QPR:$Vd, (v4i32 (ARMvcmpz (v4i32 QPR:$Vm), fc)))]>;
def v4f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4,
(outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
opc, "f32", asm, "",
- [(set QPR:$Vd, (v4i32 (ARMvcmpz (v4f32 QPR:$Vm), (i32 fc))))]> {
+ [(set QPR:$Vd, (v4i32 (ARMvcmpz (v4f32 QPR:$Vm), fc)))]> {
let Inst{10} = 1; // overwrite F = 1
}
def v8f16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4,
(outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
opc, "f16", asm, "",
- [(set QPR:$Vd, (v8i16 (ARMvcmpz (v8f16 QPR:$Vm), (i32 fc))))]>,
+ [(set QPR:$Vd, (v8i16 (ARMvcmpz (v8f16 QPR:$Vm), fc)))]>,
Requires<[HasNEON,HasFullFP16]> {
let Inst{10} = 1; // overwrite F = 1
}
@@ -3373,11 +3373,11 @@ multiclass N2V_QHS_cmp<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
// Neon 3-register comparisons.
class N3VQ_cmp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
- ValueType ResTy, ValueType OpTy, int fc, bit Commutable>
+ ValueType ResTy, ValueType OpTy, PatFrag fc, bit Commutable>
: N3V<op24, op23, op21_20, op11_8, 1, op4,
(outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
- [(set QPR:$Vd, (ResTy (ARMvcmp (OpTy QPR:$Vn), (OpTy QPR:$Vm), (i32 fc))))]> {
+ [(set QPR:$Vd, (ResTy (ARMvcmp (OpTy QPR:$Vn), (OpTy QPR:$Vm), fc)))]> {
// All of these have a two-operand InstAlias.
let TwoOperandAliasConstraint = "$Vn = $Vd";
let isCommutable = Commutable;
@@ -3385,11 +3385,11 @@ class N3VQ_cmp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
class N3VD_cmp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
- ValueType ResTy, ValueType OpTy, int fc, bit Commutable>
+ ValueType ResTy, ValueType OpTy, PatFrag fc, bit Commutable>
: N3V<op24, op23, op21_20, op11_8, 0, op4,
(outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
- [(set DPR:$Vd, (ResTy (ARMvcmp (OpTy DPR:$Vn), (OpTy DPR:$Vm), (i32 fc))))]> {
+ [(set DPR:$Vd, (ResTy (ARMvcmp (OpTy DPR:$Vn), (OpTy DPR:$Vm), fc)))]> {
// All of these have a two-operand InstAlias.
let TwoOperandAliasConstraint = "$Vn = $Vd";
let isCommutable = Commutable;
@@ -3399,7 +3399,7 @@ multiclass N3V_QHS_cmp<bit op24, bit op23, bits<4> op11_8, bit op4,
InstrItinClass itinD16, InstrItinClass itinD32,
InstrItinClass itinQ16, InstrItinClass itinQ32,
string OpcodeStr, string Dt,
- int fc, bit Commutable = 0> {
+ PatFrag fc, bit Commutable = 0> {
// 64-bit vector types.
def v8i8 : N3VD_cmp<op24, op23, 0b00, op11_8, op4, itinD16,
OpcodeStr, !strconcat(Dt, "8"),
@@ -4287,10 +4287,10 @@ defm VRHADDu : N3VInt_QHS<1, 0, 0b0001, 0, N3RegFrm,
// VQADD : Vector Saturating Add
defm VQADDs : N3VInt_QHSD<0, 0, 0b0000, 1, N3RegFrm,
IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
- "vqadd", "s", int_arm_neon_vqadds, 1>;
+ "vqadd", "s", saddsat, 1>;
defm VQADDu : N3VInt_QHSD<1, 0, 0b0000, 1, N3RegFrm,
IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
- "vqadd", "u", int_arm_neon_vqaddu, 1>;
+ "vqadd", "u", uaddsat, 1>;
// VADDHN : Vector Add and Narrow Returning High Half (D = Q + Q)
defm VADDHN : N3VNInt_HSD<0,1,0b0100,0, "vaddhn", "i", null_frag, 1>;
// VRADDHN : Vector Rounding Add and Narrow Returning High Half (D = Q + Q)
@@ -4527,22 +4527,22 @@ let Predicates = [HasNEON, HasV8_1a] in {
defm VQRDMLAH : N3VInt3_HS<1, 0, 0b1011, 1, IIC_VMACi16D, IIC_VMACi32D,
IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlah", "s",
null_frag>;
- def : Pat<(v4i16 (int_arm_neon_vqadds
+ def : Pat<(v4i16 (saddsat
(v4i16 DPR:$src1),
(v4i16 (int_arm_neon_vqrdmulh (v4i16 DPR:$Vn),
(v4i16 DPR:$Vm))))),
(v4i16 (VQRDMLAHv4i16 DPR:$src1, DPR:$Vn, DPR:$Vm))>;
- def : Pat<(v2i32 (int_arm_neon_vqadds
+ def : Pat<(v2i32 (saddsat
(v2i32 DPR:$src1),
(v2i32 (int_arm_neon_vqrdmulh (v2i32 DPR:$Vn),
(v2i32 DPR:$Vm))))),
(v2i32 (VQRDMLAHv2i32 DPR:$src1, DPR:$Vn, DPR:$Vm))>;
- def : Pat<(v8i16 (int_arm_neon_vqadds
+ def : Pat<(v8i16 (saddsat
(v8i16 QPR:$src1),
(v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$Vn),
(v8i16 QPR:$Vm))))),
(v8i16 (VQRDMLAHv8i16 QPR:$src1, QPR:$Vn, QPR:$Vm))>;
- def : Pat<(v4i32 (int_arm_neon_vqadds
+ def : Pat<(v4i32 (saddsat
(v4i32 QPR:$src1),
(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$Vn),
(v4i32 QPR:$Vm))))),
@@ -4551,7 +4551,7 @@ let Predicates = [HasNEON, HasV8_1a] in {
defm VQRDMLAHsl : N3VMulOpSL_HS<0b1110, IIC_VMACi16D, IIC_VMACi32D,
IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlah", "s",
null_frag>;
- def : Pat<(v4i16 (int_arm_neon_vqadds
+ def : Pat<(v4i16 (saddsat
(v4i16 DPR:$src1),
(v4i16 (int_arm_neon_vqrdmulh
(v4i16 DPR:$Vn),
@@ -4559,7 +4559,7 @@ let Predicates = [HasNEON, HasV8_1a] in {
imm:$lane)))))),
(v4i16 (VQRDMLAHslv4i16 DPR:$src1, DPR:$Vn, DPR_8:$Vm,
imm:$lane))>;
- def : Pat<(v2i32 (int_arm_neon_vqadds
+ def : Pat<(v2i32 (saddsat
(v2i32 DPR:$src1),
(v2i32 (int_arm_neon_vqrdmulh
(v2i32 DPR:$Vn),
@@ -4567,7 +4567,7 @@ let Predicates = [HasNEON, HasV8_1a] in {
imm:$lane)))))),
(v2i32 (VQRDMLAHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm,
imm:$lane))>;
- def : Pat<(v8i16 (int_arm_neon_vqadds
+ def : Pat<(v8i16 (saddsat
(v8i16 QPR:$src1),
(v8i16 (int_arm_neon_vqrdmulh
(v8i16 QPR:$src2),
@@ -4579,7 +4579,7 @@ let Predicates = [HasNEON, HasV8_1a] in {
QPR:$src3,
(DSubReg_i16_reg imm:$lane))),
(SubReg_i16_lane imm:$lane)))>;
- def : Pat<(v4i32 (int_arm_neon_vqadds
+ def : Pat<(v4i32 (saddsat
(v4i32 QPR:$src1),
(v4i32 (int_arm_neon_vqrdmulh
(v4i32 QPR:$src2),
@@ -4597,22 +4597,22 @@ let Predicates = [HasNEON, HasV8_1a] in {
defm VQRDMLSH : N3VInt3_HS<1, 0, 0b1100, 1, IIC_VMACi16D, IIC_VMACi32D,
IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlsh", "s",
null_frag>;
- def : Pat<(v4i16 (int_arm_neon_vqsubs
+ def : Pat<(v4i16 (ssubsat
(v4i16 DPR:$src1),
(v4i16 (int_arm_neon_vqrdmulh (v4i16 DPR:$Vn),
(v4i16 DPR:$Vm))))),
(v4i16 (VQRDMLSHv4i16 DPR:$src1, DPR:$Vn, DPR:$Vm))>;
- def : Pat<(v2i32 (int_arm_neon_vqsubs
+ def : Pat<(v2i32 (ssubsat
(v2i32 DPR:$src1),
(v2i32 (int_arm_neon_vqrdmulh (v2i32 DPR:$Vn),
(v2i32 DPR:$Vm))))),
(v2i32 (VQRDMLSHv2i32 DPR:$src1, DPR:$Vn, DPR:$Vm))>;
- def : Pat<(v8i16 (int_arm_neon_vqsubs
+ def : Pat<(v8i16 (ssubsat
(v8i16 QPR:$src1),
(v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$Vn),
(v8i16 QPR:$Vm))))),
(v8i16 (VQRDMLSHv8i16 QPR:$src1, QPR:$Vn, QPR:$Vm))>;
- def : Pat<(v4i32 (int_arm_neon_vqsubs
+ def : Pat<(v4i32 (ssubsat
(v4i32 QPR:$src1),
(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$Vn),
(v4i32 QPR:$Vm))))),
@@ -4621,14 +4621,14 @@ let Predicates = [HasNEON, HasV8_1a] in {
defm VQRDMLSHsl : N3VMulOpSL_HS<0b1111, IIC_VMACi16D, IIC_VMACi32D,
IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlsh", "s",
null_frag>;
- def : Pat<(v4i16 (int_arm_neon_vqsubs
+ def : Pat<(v4i16 (ssubsat
(v4i16 DPR:$src1),
(v4i16 (int_arm_neon_vqrdmulh
(v4i16 DPR:$Vn),
(v4i16 (ARMvduplane (v4i16 DPR_8:$Vm),
imm:$lane)))))),
(v4i16 (VQRDMLSHslv4i16 DPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane))>;
- def : Pat<(v2i32 (int_arm_neon_vqsubs
+ def : Pat<(v2i32 (ssubsat
(v2i32 DPR:$src1),
(v2i32 (int_arm_neon_vqrdmulh
(v2i32 DPR:$Vn),
@@ -4636,7 +4636,7 @@ let Predicates = [HasNEON, HasV8_1a] in {
imm:$lane)))))),
(v2i32 (VQRDMLSHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm,
imm:$lane))>;
- def : Pat<(v8i16 (int_arm_neon_vqsubs
+ def : Pat<(v8i16 (ssubsat
(v8i16 QPR:$src1),
(v8i16 (int_arm_neon_vqrdmulh
(v8i16 QPR:$src2),
@@ -4648,7 +4648,7 @@ let Predicates = [HasNEON, HasV8_1a] in {
QPR:$src3,
(DSubReg_i16_reg imm:$lane))),
(SubReg_i16_lane imm:$lane)))>;
- def : Pat<(v4i32 (int_arm_neon_vqsubs
+ def : Pat<(v4i32 (ssubsat
(v4i32 QPR:$src1),
(v4i32 (int_arm_neon_vqrdmulh
(v4i32 QPR:$src2),
@@ -4667,20 +4667,20 @@ defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", null_frag>;
let Predicates = [HasNEON] in {
-def : Pat<(v4i32 (int_arm_neon_vqadds (v4i32 QPR:$src1),
+def : Pat<(v4i32 (saddsat (v4i32 QPR:$src1),
(v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn),
(v4i16 DPR:$Vm))))),
(VQDMLALv4i32 QPR:$src1, DPR:$Vn, DPR:$Vm)>;
-def : Pat<(v2i64 (int_arm_neon_vqadds (v2i64 QPR:$src1),
+def : Pat<(v2i64 (saddsat (v2i64 QPR:$src1),
(v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn),
(v2i32 DPR:$Vm))))),
(VQDMLALv2i64 QPR:$src1, DPR:$Vn, DPR:$Vm)>;
-def : Pat<(v4i32 (int_arm_neon_vqadds (v4i32 QPR:$src1),
+def : Pat<(v4i32 (saddsat (v4i32 QPR:$src1),
(v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn),
(v4i16 (ARMvduplane (v4i16 DPR_8:$Vm),
imm:$lane)))))),
(VQDMLALslv4i16 QPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane)>;
-def : Pat<(v2i64 (int_arm_neon_vqadds (v2i64 QPR:$src1),
+def : Pat<(v2i64 (saddsat (v2i64 QPR:$src1),
(v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn),
(v2i32 (ARMvduplane (v2i32 DPR_VFP2:$Vm),
imm:$lane)))))),
@@ -4759,20 +4759,20 @@ defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D,
defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b0111, "vqdmlsl", "s", null_frag>;
let Predicates = [HasNEON] in {
-def : Pat<(v4i32 (int_arm_neon_vqsubs (v4i32 QPR:$src1),
+def : Pat<(v4i32 (ssubsat (v4i32 QPR:$src1),
(v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn),
(v4i16 DPR:$Vm))))),
(VQDMLSLv4i32 QPR:$src1, DPR:$Vn, DPR:$Vm)>;
-def : Pat<(v2i64 (int_arm_neon_vqsubs (v2i64 QPR:$src1),
+def : Pat<(v2i64 (ssubsat (v2i64 QPR:$src1),
(v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn),
(v2i32 DPR:$Vm))))),
(VQDMLSLv2i64 QPR:$src1, DPR:$Vn, DPR:$Vm)>;
-def : Pat<(v4i32 (int_arm_neon_vqsubs (v4i32 QPR:$src1),
+def : Pat<(v4i32 (ssubsat (v4i32 QPR:$src1),
(v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn),
(v4i16 (ARMvduplane (v4i16 DPR_8:$Vm),
imm:$lane)))))),
(VQDMLSLslv4i16 QPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane)>;
-def : Pat<(v2i64 (int_arm_neon_vqsubs (v2i64 QPR:$src1),
+def : Pat<(v2i64 (ssubsat (v2i64 QPR:$src1),
(v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn),
(v2i32 (ARMvduplane (v2i32 DPR_VFP2:$Vm),
imm:$lane)))))),
@@ -5012,6 +5012,27 @@ defm VCMLA : N3VCP8ComplexTied<1, 0, "vcmla", null_frag>;
defm VCADD : N3VCP8ComplexOdd<1, 0, 0, "vcadd", null_frag>;
defm VCMLA : N3VCP8ComplexTiedLane<0, "vcmla", null_frag>;
+let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in {
+ def : Pat<(v4f16 (int_arm_neon_vcadd_rot90 (v4f16 DPR:$Rn), (v4f16 DPR:$Rm))),
+ (VCADDv4f16 (v4f16 DPR:$Rn), (v4f16 DPR:$Rm), (i32 0))>;
+ def : Pat<(v4f16 (int_arm_neon_vcadd_rot270 (v4f16 DPR:$Rn), (v4f16 DPR:$Rm))),
+ (VCADDv4f16 (v4f16 DPR:$Rn), (v4f16 DPR:$Rm), (i32 1))>;
+ def : Pat<(v8f16 (int_arm_neon_vcadd_rot90 (v8f16 QPR:$Rn), (v8f16 QPR:$Rm))),
+ (VCADDv8f16 (v8f16 QPR:$Rn), (v8f16 QPR:$Rm), (i32 0))>;
+ def : Pat<(v8f16 (int_arm_neon_vcadd_rot270 (v8f16 QPR:$Rn), (v8f16 QPR:$Rm))),
+ (VCADDv8f16 (v8f16 QPR:$Rn), (v8f16 QPR:$Rm), (i32 1))>;
+}
+let Predicates = [HasNEON,HasV8_3a] in {
+ def : Pat<(v2f32 (int_arm_neon_vcadd_rot90 (v2f32 DPR:$Rn), (v2f32 DPR:$Rm))),
+ (VCADDv2f32 (v2f32 DPR:$Rn), (v2f32 DPR:$Rm), (i32 0))>;
+ def : Pat<(v2f32 (int_arm_neon_vcadd_rot270 (v2f32 DPR:$Rn), (v2f32 DPR:$Rm))),
+ (VCADDv2f32 (v2f32 DPR:$Rn), (v2f32 DPR:$Rm), (i32 1))>;
+ def : Pat<(v4f32 (int_arm_neon_vcadd_rot90 (v4f32 QPR:$Rn), (v4f32 QPR:$Rm))),
+ (VCADDv4f32 (v4f32 QPR:$Rn), (v4f32 QPR:$Rm), (i32 0))>;
+ def : Pat<(v4f32 (int_arm_neon_vcadd_rot270 (v4f32 QPR:$Rn), (v4f32 QPR:$Rm))),
+ (VCADDv4f32 (v4f32 QPR:$Rn), (v4f32 QPR:$Rm), (i32 1))>;
+}
+
// Vector Subtract Operations.
// VSUB : Vector Subtract (integer and floating-point)
@@ -5045,10 +5066,10 @@ defm VHSUBu : N3VInt_QHS<1, 0, 0b0010, 0, N3RegFrm,
// VQSUB : Vector Saturing Subtract
defm VQSUBs : N3VInt_QHSD<0, 0, 0b0010, 1, N3RegFrm,
IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
- "vqsub", "s", int_arm_neon_vqsubs, 0>;
+ "vqsub", "s", ssubsat, 0>;
defm VQSUBu : N3VInt_QHSD<1, 0, 0b0010, 1, N3RegFrm,
IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
- "vqsub", "u", int_arm_neon_vqsubu, 0>;
+ "vqsub", "u", usubsat, 0>;
// VSUBHN : Vector Subtract and Narrow Returning High Half (D = Q - Q)
defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i", null_frag, 0>;
// VRSUBHN : Vector Rounding Subtract and Narrow Returning High Half (D=Q-Q)
@@ -5068,66 +5089,66 @@ def : Pat<(v2i32 (trunc (ARMvshruImm (sub (v2i64 QPR:$Vn), QPR:$Vm), 32))),
// VCEQ : Vector Compare Equal
defm VCEQ : N3V_QHS_cmp<1, 0, 0b1000, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
- IIC_VSUBi4Q, "vceq", "i", 0, 1>;
+ IIC_VSUBi4Q, "vceq", "i", ARMCCeq, 1>;
def VCEQfd : N3VD_cmp<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32,
- 0, 1>;
+ ARMCCeq, 1>;
def VCEQfq : N3VQ_cmp<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32,
- 0, 1>;
+ ARMCCeq, 1>;
def VCEQhd : N3VD_cmp<0,0,0b01,0b1110,0, IIC_VBIND, "vceq", "f16", v4i16, v4f16,
- 0, 1>,
+ ARMCCeq, 1>,
Requires<[HasNEON, HasFullFP16]>;
def VCEQhq : N3VQ_cmp<0,0,0b01,0b1110,0, IIC_VBINQ, "vceq", "f16", v8i16, v8f16,
- 0, 1>,
+ ARMCCeq, 1>,
Requires<[HasNEON, HasFullFP16]>;
let TwoOperandAliasConstraint = "$Vm = $Vd" in
defm VCEQz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i",
- "$Vd, $Vm, #0", 0>;
+ "$Vd, $Vm, #0", ARMCCeq>;
// VCGE : Vector Compare Greater Than or Equal
defm VCGEs : N3V_QHS_cmp<0, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
- IIC_VSUBi4Q, "vcge", "s", 10, 0>;
+ IIC_VSUBi4Q, "vcge", "s", ARMCCge, 0>;
defm VCGEu : N3V_QHS_cmp<1, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
- IIC_VSUBi4Q, "vcge", "u", 2, 0>;
+ IIC_VSUBi4Q, "vcge", "u", ARMCChs, 0>;
def VCGEfd : N3VD_cmp<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32,
- 10, 0>;
+ ARMCCge, 0>;
def VCGEfq : N3VQ_cmp<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32,
- 10, 0>;
+ ARMCCge, 0>;
def VCGEhd : N3VD_cmp<1,0,0b01,0b1110,0, IIC_VBIND, "vcge", "f16", v4i16, v4f16,
- 10, 0>,
+ ARMCCge, 0>,
Requires<[HasNEON, HasFullFP16]>;
def VCGEhq : N3VQ_cmp<1,0,0b01,0b1110,0, IIC_VBINQ, "vcge", "f16", v8i16, v8f16,
- 10, 0>,
+ ARMCCge, 0>,
Requires<[HasNEON, HasFullFP16]>;
let TwoOperandAliasConstraint = "$Vm = $Vd" in {
defm VCGEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s",
- "$Vd, $Vm, #0", 10>;
+ "$Vd, $Vm, #0", ARMCCge>;
defm VCLEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s",
- "$Vd, $Vm, #0", 13>;
+ "$Vd, $Vm, #0", ARMCCle>;
}
// VCGT : Vector Compare Greater Than
defm VCGTs : N3V_QHS_cmp<0, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
- IIC_VSUBi4Q, "vcgt", "s", 12, 0>;
+ IIC_VSUBi4Q, "vcgt", "s", ARMCCgt, 0>;
defm VCGTu : N3V_QHS_cmp<1, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
- IIC_VSUBi4Q, "vcgt", "u", 8, 0>;
+ IIC_VSUBi4Q, "vcgt", "u", ARMCChi, 0>;
def VCGTfd : N3VD_cmp<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32,
- 12, 0>;
+ ARMCCgt, 0>;
def VCGTfq : N3VQ_cmp<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32,
- 12, 0>;
+ ARMCCgt, 0>;
def VCGThd : N3VD_cmp<1,0,0b11,0b1110,0, IIC_VBIND, "vcgt", "f16", v4i16, v4f16,
- 12, 0>,
+ ARMCCgt, 0>,
Requires<[HasNEON, HasFullFP16]>;
def VCGThq : N3VQ_cmp<1,0,0b11,0b1110,0, IIC_VBINQ, "vcgt", "f16", v8i16, v8f16,
- 12, 0>,
+ ARMCCgt, 0>,
Requires<[HasNEON, HasFullFP16]>;
let TwoOperandAliasConstraint = "$Vm = $Vd" in {
defm VCGTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s",
- "$Vd, $Vm, #0", 12>;
+ "$Vd, $Vm, #0", ARMCCgt>;
defm VCLTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s",
- "$Vd, $Vm, #0", 11>;
+ "$Vd, $Vm, #0", ARMCClt>;
}
// VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE)
@@ -6797,9 +6818,12 @@ def VREV64q16 : VREV64Q<0b01, "vrev64", "16", v8i16>;
def VREV64q32 : VREV64Q<0b10, "vrev64", "32", v4i32>;
let Predicates = [HasNEON] in {
-def : Pat<(v4f32 (ARMvrev64 (v4f32 QPR:$Vm))), (VREV64q32 QPR:$Vm)>;
-def : Pat<(v8f16 (ARMvrev64 (v8f16 QPR:$Vm))), (VREV64q16 QPR:$Vm)>;
-def : Pat<(v4f16 (ARMvrev64 (v4f16 DPR:$Vm))), (VREV64d16 DPR:$Vm)>;
+ def : Pat<(v4f32 (ARMvrev64 (v4f32 QPR:$Vm))),
+ (VREV64q32 QPR:$Vm)>;
+ def : Pat<(v8f16 (ARMvrev64 (v8f16 QPR:$Vm))),
+ (VREV64q16 QPR:$Vm)>;
+ def : Pat<(v4f16 (ARMvrev64 (v4f16 DPR:$Vm))),
+ (VREV64d16 DPR:$Vm)>;
}
// VREV32 : Vector Reverse elements within 32-bit words
@@ -6821,6 +6845,13 @@ def VREV32d16 : VREV32D<0b01, "vrev32", "16", v4i16>;
def VREV32q8 : VREV32Q<0b00, "vrev32", "8", v16i8>;
def VREV32q16 : VREV32Q<0b01, "vrev32", "16", v8i16>;
+let Predicates = [HasNEON] in {
+ def : Pat<(v8f16 (ARMvrev32 (v8f16 QPR:$Vm))),
+ (VREV32q16 QPR:$Vm)>;
+ def : Pat<(v4f16 (ARMvrev32 (v4f16 DPR:$Vm))),
+ (VREV32d16 DPR:$Vm)>;
+}
+
// VREV16 : Vector Reverse elements within 16-bit halfwords
class VREV16D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrThumb2.td b/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrThumb2.td
index 25a45b39fa0c..4193e8147f47 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrThumb2.td
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrThumb2.td
@@ -270,7 +270,8 @@ def t2am_imm8_offset : MemOperand,
// t2addrmode_imm8s4 := reg +/- (imm8 << 2)
def MemImm8s4OffsetAsmOperand : AsmOperandClass {let Name = "MemImm8s4Offset";}
-class T2AddrMode_Imm8s4 : MemOperand {
+class T2AddrMode_Imm8s4 : MemOperand,
+ ComplexPattern<i32, 2, "SelectT2AddrModeImm8<2>", []> {
let EncoderMethod = "getT2AddrModeImm8s4OpValue";
let DecoderMethod = "DecodeT2AddrModeImm8s4";
let ParserMatchClass = MemImm8s4OffsetAsmOperand;
@@ -917,10 +918,26 @@ multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, SDNode opnode,
// The register-immediate version is re-materializable. This is useful
// in particular for taking the address of a local.
let isReMaterializable = 1 in {
+ def spImm : T2sTwoRegImm<
+ (outs GPRsp:$Rd), (ins GPRsp:$Rn, t2_so_imm:$imm), IIC_iALUi,
+ opc, ".w\t$Rd, $Rn, $imm",
+ []>,
+ Sched<[WriteALU, ReadALU]> {
+ let Rn = 13;
+ let Rd = 13;
+
+ let Inst{31-27} = 0b11110;
+ let Inst{25-24} = 0b01;
+ let Inst{23-21} = op23_21;
+ let Inst{15} = 0;
+
+ let DecoderMethod = "DecodeT2AddSubSPImm";
+ }
+
def ri : T2sTwoRegImm<
- (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, t2_so_imm:$imm), IIC_iALUi,
+ (outs rGPR:$Rd), (ins GPRnopc:$Rn, t2_so_imm:$imm), IIC_iALUi,
opc, ".w\t$Rd, $Rn, $imm",
- [(set GPRnopc:$Rd, (opnode GPRnopc:$Rn, t2_so_imm:$imm))]>,
+ [(set rGPR:$Rd, (opnode GPRnopc:$Rn, t2_so_imm:$imm))]>,
Sched<[WriteALU, ReadALU]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 0;
@@ -931,9 +948,9 @@ multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, SDNode opnode,
}
// 12-bit imm
def ri12 : T2I<
- (outs GPRnopc:$Rd), (ins GPR:$Rn, imm0_4095:$imm), IIC_iALUi,
+ (outs rGPR:$Rd), (ins GPR:$Rn, imm0_4095:$imm), IIC_iALUi,
!strconcat(opc, "w"), "\t$Rd, $Rn, $imm",
- [(set GPRnopc:$Rd, (opnode GPR:$Rn, imm0_4095:$imm))]>,
+ [(set rGPR:$Rd, (opnode GPR:$Rn, imm0_4095:$imm))]>,
Sched<[WriteALU, ReadALU]> {
bits<4> Rd;
bits<4> Rn;
@@ -949,6 +966,26 @@ multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, SDNode opnode,
let Inst{11-8} = Rd;
let Inst{7-0} = imm{7-0};
}
+ def spImm12 : T2I<
+ (outs GPRsp:$Rd), (ins GPRsp:$Rn, imm0_4095:$imm), IIC_iALUi,
+ !strconcat(opc, "w"), "\t$Rd, $Rn, $imm",
+ []>,
+ Sched<[WriteALU, ReadALU]> {
+ bits<4> Rd = 13;
+ bits<4> Rn = 13;
+ bits<12> imm;
+ let Inst{31-27} = 0b11110;
+ let Inst{26} = imm{11};
+ let Inst{25-24} = 0b10;
+ let Inst{23-21} = op23_21;
+ let Inst{20} = 0; // The S bit.
+ let Inst{19-16} = Rn;
+ let Inst{15} = 0;
+ let Inst{14-12} = imm{10-8};
+ let Inst{11-8} = Rd;
+ let Inst{7-0} = imm{7-0};
+ let DecoderMethod = "DecodeT2AddSubSPImm";
+ }
// register
def rr : T2sThreeReg<(outs GPRnopc:$Rd), (ins GPRnopc:$Rn, rGPR:$Rm),
IIC_iALUr, opc, ".w\t$Rd, $Rn, $Rm",
@@ -1412,7 +1449,8 @@ let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in {
// Load doubleword
def t2LDRDi8 : T2Ii8s4<1, 0, 1, (outs rGPR:$Rt, rGPR:$Rt2),
(ins t2addrmode_imm8s4:$addr),
- IIC_iLoad_d_i, "ldrd", "\t$Rt, $Rt2, $addr", "", []>,
+ IIC_iLoad_d_i, "ldrd", "\t$Rt, $Rt2, $addr", "",
+ [(set rGPR:$Rt, rGPR:$Rt2, (ARMldrd t2addrmode_imm8s4:$addr))]>,
Sched<[WriteLd]>;
} // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1
@@ -1593,7 +1631,8 @@ defm t2STRH:T2I_st<0b01,"strh", IIC_iStore_bh_i, IIC_iStore_bh_si,
let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in
def t2STRDi8 : T2Ii8s4<1, 0, 0, (outs),
(ins rGPR:$Rt, rGPR:$Rt2, t2addrmode_imm8s4:$addr),
- IIC_iStore_d_r, "strd", "\t$Rt, $Rt2, $addr", "", []>,
+ IIC_iStore_d_r, "strd", "\t$Rt, $Rt2, $addr", "",
+ [(ARMstrd rGPR:$Rt, rGPR:$Rt2, t2addrmode_imm8s4:$addr)]>,
Sched<[WriteST]>;
// Indexed stores
@@ -2264,19 +2303,29 @@ def : t2InstSubst<"sbc${s}${p} $rd, $rn, $imm",
(t2ADCri rGPR:$rd, rGPR:$rn, t2_so_imm_not:$imm, pred:$p, s_cc_out:$s)>;
def : t2InstSubst<"add${s}${p}.w $rd, $rn, $imm",
- (t2SUBri GPRnopc:$rd, GPRnopc:$rn, t2_so_imm_neg:$imm, pred:$p, s_cc_out:$s)>;
-def : t2InstSubst<"addw${p} $rd, $rn, $imm",
- (t2SUBri12 GPRnopc:$rd, GPR:$rn, t2_so_imm_neg:$imm, pred:$p)>;
+ (t2SUBri rGPR:$rd, GPRnopc:$rn, t2_so_imm_neg:$imm, pred:$p, s_cc_out:$s)>;
+def : t2InstSubst<"sub${s}${p}.w $rd, $rn, $imm",
+ (t2ADDri rGPR:$rd, GPRnopc:$rn, t2_so_imm_neg:$imm, pred:$p, s_cc_out:$s)>;
+def : t2InstSubst<"subw${p} $Rd, $Rn, $imm",
+ (t2ADDri12 rGPR:$Rd, GPR:$Rn, imm0_4095_neg:$imm, pred:$p)>;
+def : t2InstSubst<"sub${s}${p} $rd, $rn, $imm",
+ (t2ADDri rGPR:$rd, GPRnopc:$rn, t2_so_imm_neg:$imm, pred:$p, s_cc_out:$s)>;
+def : t2InstSubst<"sub${p} $rd, $rn, $imm",
+ (t2ADDri12 rGPR:$rd, GPR:$rn, imm0_4095_neg:$imm, pred:$p)>;
+
+// SP to SP alike
+def : t2InstSubst<"add${s}${p}.w $rd, $rn, $imm",
+ (t2SUBspImm GPRsp:$rd, GPRsp:$rn, t2_so_imm_neg:$imm, pred:$p, s_cc_out:$s)>;
def : t2InstSubst<"sub${s}${p}.w $rd, $rn, $imm",
- (t2ADDri GPRnopc:$rd, GPRnopc:$rn, t2_so_imm_neg:$imm, pred:$p, s_cc_out:$s)>;
-def : t2InstSubst<"subw${p} $rd, $rn, $imm",
- (t2ADDri12 GPRnopc:$rd, GPR:$rn, t2_so_imm_neg:$imm, pred:$p)>;
+ (t2ADDspImm GPRsp:$rd, GPRsp:$rn, t2_so_imm_neg:$imm, pred:$p, s_cc_out:$s)>;
def : t2InstSubst<"subw${p} $Rd, $Rn, $imm",
- (t2ADDri12 GPRnopc:$Rd, GPR:$Rn, imm0_4095_neg:$imm, pred:$p)>;
+ (t2ADDspImm12 GPRsp:$Rd, GPRsp:$Rn, imm0_4095_neg:$imm, pred:$p)>;
def : t2InstSubst<"sub${s}${p} $rd, $rn, $imm",
- (t2ADDri GPRnopc:$rd, GPRnopc:$rn, t2_so_imm_neg:$imm, pred:$p, s_cc_out:$s)>;
+ (t2ADDspImm GPRsp:$rd, GPRsp:$rn, t2_so_imm_neg:$imm, pred:$p, s_cc_out:$s)>;
def : t2InstSubst<"sub${p} $rd, $rn, $imm",
- (t2ADDri12 GPRnopc:$rd, GPR:$rn, t2_so_imm_neg:$imm, pred:$p)>;
+ (t2ADDspImm12 GPRsp:$rd, GPRsp:$rn, imm0_4095_neg:$imm, pred:$p)>;
+
+
// RSB
defm t2RSB : T2I_rbin_irs <0b1110, "rsb", sub>;
@@ -2292,12 +2341,12 @@ defm t2RSBS : T2I_rbin_s_is <ARMsubc>;
// The AddedComplexity preferences the first variant over the others since
// it can be shrunk to a 16-bit wide encoding, while the others cannot.
let AddedComplexity = 1 in
-def : T2Pat<(add GPR:$src, imm1_255_neg:$imm),
- (t2SUBri GPR:$src, imm1_255_neg:$imm)>;
-def : T2Pat<(add GPR:$src, t2_so_imm_neg:$imm),
- (t2SUBri GPR:$src, t2_so_imm_neg:$imm)>;
-def : T2Pat<(add GPR:$src, imm0_4095_neg:$imm),
- (t2SUBri12 GPR:$src, imm0_4095_neg:$imm)>;
+def : T2Pat<(add rGPR:$src, imm1_255_neg:$imm),
+ (t2SUBri rGPR:$src, imm1_255_neg:$imm)>;
+def : T2Pat<(add rGPR:$src, t2_so_imm_neg:$imm),
+ (t2SUBri rGPR:$src, t2_so_imm_neg:$imm)>;
+def : T2Pat<(add rGPR:$src, imm0_4095_neg:$imm),
+ (t2SUBri12 rGPR:$src, imm0_4095_neg:$imm)>;
def : T2Pat<(add GPR:$src, imm0_65535_neg:$imm),
(t2SUBrr GPR:$src, (t2MOVi16 (imm_neg_XFORM imm:$imm)))>;
@@ -2796,10 +2845,10 @@ def : T2Pat<(t2_so_imm_not:$src),
// Thumb2SizeReduction's chances later on we select a t2ADD for an or where
// possible.
def : T2Pat<(or AddLikeOrOp:$Rn, t2_so_imm:$imm),
- (t2ADDri $Rn, t2_so_imm:$imm)>;
+ (t2ADDri rGPR:$Rn, t2_so_imm:$imm)>;
def : T2Pat<(or AddLikeOrOp:$Rn, imm0_4095:$Rm),
- (t2ADDri12 $Rn, imm0_4095:$Rm)>;
+ (t2ADDri12 rGPR:$Rn, imm0_4095:$Rm)>;
def : T2Pat<(or AddLikeOrOp:$Rn, non_imm32:$Rm),
(t2ADDrr $Rn, $Rm)>;
@@ -4551,10 +4600,18 @@ class T2TT<bits<2> at, string asm, list<dag> pattern>
let Unpredictable{5-0} = 0b111111;
}
-def t2TT : T2TT<0b00, "tt", []>, Requires<[IsThumb,Has8MSecExt]>;
-def t2TTT : T2TT<0b01, "ttt", []>, Requires<[IsThumb,Has8MSecExt]>;
-def t2TTA : T2TT<0b10, "tta", []>, Requires<[IsThumb,Has8MSecExt]>;
-def t2TTAT : T2TT<0b11, "ttat", []>, Requires<[IsThumb,Has8MSecExt]>;
+def t2TT : T2TT<0b00, "tt",
+ [(set rGPR:$Rt, (int_arm_cmse_tt GPRnopc:$Rn))]>,
+ Requires<[IsThumb, Has8MSecExt]>;
+def t2TTT : T2TT<0b01, "ttt",
+ [(set rGPR:$Rt, (int_arm_cmse_ttt GPRnopc:$Rn))]>,
+ Requires<[IsThumb, Has8MSecExt]>;
+def t2TTA : T2TT<0b10, "tta",
+ [(set rGPR:$Rt, (int_arm_cmse_tta GPRnopc:$Rn))]>,
+ Requires<[IsThumb, Has8MSecExt]>;
+def t2TTAT : T2TT<0b11, "ttat",
+ [(set rGPR:$Rt, (int_arm_cmse_ttat GPRnopc:$Rn))]>,
+ Requires<[IsThumb, Has8MSecExt]>;
//===----------------------------------------------------------------------===//
// Non-Instruction Patterns
@@ -4655,10 +4712,10 @@ def : t2InstAlias<"sbc${s}${p} $Rd, $Rn, $ShiftedRm",
// Aliases for ADD without the ".w" optional width specifier.
def : t2InstAlias<"add${s}${p} $Rd, $Rn, $imm",
- (t2ADDri GPRnopc:$Rd, GPRnopc:$Rn, t2_so_imm:$imm, pred:$p,
+ (t2ADDri rGPR:$Rd, GPRnopc:$Rn, t2_so_imm:$imm, pred:$p,
cc_out:$s)>;
def : t2InstAlias<"add${p} $Rd, $Rn, $imm",
- (t2ADDri12 GPRnopc:$Rd, GPR:$Rn, imm0_4095:$imm, pred:$p)>;
+ (t2ADDri12 rGPR:$Rd, GPR:$Rn, imm0_4095:$imm, pred:$p)>;
def : t2InstAlias<"add${s}${p} $Rd, $Rn, $Rm",
(t2ADDrr GPRnopc:$Rd, GPRnopc:$Rn, rGPR:$Rm, pred:$p, cc_out:$s)>;
def : t2InstAlias<"add${s}${p} $Rd, $Rn, $ShiftedRm",
@@ -4666,9 +4723,11 @@ def : t2InstAlias<"add${s}${p} $Rd, $Rn, $ShiftedRm",
pred:$p, cc_out:$s)>;
// ... and with the destination and source register combined.
def : t2InstAlias<"add${s}${p} $Rdn, $imm",
- (t2ADDri GPRnopc:$Rdn, GPRnopc:$Rdn, t2_so_imm:$imm, pred:$p, cc_out:$s)>;
+ (t2ADDri rGPR:$Rdn, rGPR:$Rdn, t2_so_imm:$imm, pred:$p, cc_out:$s)>;
def : t2InstAlias<"add${p} $Rdn, $imm",
- (t2ADDri12 GPRnopc:$Rdn, GPRnopc:$Rdn, imm0_4095:$imm, pred:$p)>;
+ (t2ADDri12 rGPR:$Rdn, rGPR:$Rdn, imm0_4095:$imm, pred:$p)>;
+def : t2InstAlias<"addw${p} $Rdn, $imm",
+ (t2ADDri12 rGPR:$Rdn, rGPR:$Rdn, imm0_4095:$imm, pred:$p)>;
def : t2InstAlias<"add${s}${p} $Rdn, $Rm",
(t2ADDrr GPRnopc:$Rdn, GPRnopc:$Rdn, rGPR:$Rm, pred:$p, cc_out:$s)>;
def : t2InstAlias<"add${s}${p} $Rdn, $ShiftedRm",
@@ -4677,33 +4736,33 @@ def : t2InstAlias<"add${s}${p} $Rdn, $ShiftedRm",
// add w/ negative immediates is just a sub.
def : t2InstSubst<"add${s}${p} $Rd, $Rn, $imm",
- (t2SUBri GPRnopc:$Rd, GPRnopc:$Rn, t2_so_imm_neg:$imm, pred:$p,
+ (t2SUBri rGPR:$Rd, GPRnopc:$Rn, t2_so_imm_neg:$imm, pred:$p,
cc_out:$s)>;
def : t2InstSubst<"add${p} $Rd, $Rn, $imm",
- (t2SUBri12 GPRnopc:$Rd, GPR:$Rn, imm0_4095_neg:$imm, pred:$p)>;
+ (t2SUBri12 rGPR:$Rd, GPR:$Rn, imm0_4095_neg:$imm, pred:$p)>;
def : t2InstSubst<"add${s}${p} $Rdn, $imm",
- (t2SUBri GPRnopc:$Rdn, GPRnopc:$Rdn, t2_so_imm_neg:$imm, pred:$p,
+ (t2SUBri rGPR:$Rdn, rGPR:$Rdn, t2_so_imm_neg:$imm, pred:$p,
cc_out:$s)>;
def : t2InstSubst<"add${p} $Rdn, $imm",
- (t2SUBri12 GPRnopc:$Rdn, GPRnopc:$Rdn, imm0_4095_neg:$imm, pred:$p)>;
+ (t2SUBri12 rGPR:$Rdn, rGPR:$Rdn, imm0_4095_neg:$imm, pred:$p)>;
def : t2InstSubst<"add${s}${p}.w $Rd, $Rn, $imm",
- (t2SUBri GPRnopc:$Rd, GPRnopc:$Rn, t2_so_imm_neg:$imm, pred:$p,
+ (t2SUBri rGPR:$Rd, GPRnopc:$Rn, t2_so_imm_neg:$imm, pred:$p,
cc_out:$s)>;
def : t2InstSubst<"addw${p} $Rd, $Rn, $imm",
- (t2SUBri12 GPRnopc:$Rd, GPR:$Rn, imm0_4095_neg:$imm, pred:$p)>;
+ (t2SUBri12 rGPR:$Rd, rGPR:$Rn, imm0_4095_neg:$imm, pred:$p)>;
def : t2InstSubst<"add${s}${p}.w $Rdn, $imm",
- (t2SUBri GPRnopc:$Rdn, GPRnopc:$Rdn, t2_so_imm_neg:$imm, pred:$p,
+ (t2SUBri rGPR:$Rdn, rGPR:$Rdn, t2_so_imm_neg:$imm, pred:$p,
cc_out:$s)>;
def : t2InstSubst<"addw${p} $Rdn, $imm",
- (t2SUBri12 GPRnopc:$Rdn, GPRnopc:$Rdn, imm0_4095_neg:$imm, pred:$p)>;
+ (t2SUBri12 rGPR:$Rdn, rGPR:$Rdn, imm0_4095_neg:$imm, pred:$p)>;
// Aliases for SUB without the ".w" optional width specifier.
def : t2InstAlias<"sub${s}${p} $Rd, $Rn, $imm",
- (t2SUBri GPRnopc:$Rd, GPRnopc:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s)>;
+ (t2SUBri rGPR:$Rd, GPRnopc:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s)>;
def : t2InstAlias<"sub${p} $Rd, $Rn, $imm",
- (t2SUBri12 GPRnopc:$Rd, GPR:$Rn, imm0_4095:$imm, pred:$p)>;
+ (t2SUBri12 rGPR:$Rd, GPR:$Rn, imm0_4095:$imm, pred:$p)>;
def : t2InstAlias<"sub${s}${p} $Rd, $Rn, $Rm",
(t2SUBrr GPRnopc:$Rd, GPRnopc:$Rn, rGPR:$Rm, pred:$p, cc_out:$s)>;
def : t2InstAlias<"sub${s}${p} $Rd, $Rn, $ShiftedRm",
@@ -4711,9 +4770,11 @@ def : t2InstAlias<"sub${s}${p} $Rd, $Rn, $ShiftedRm",
pred:$p, cc_out:$s)>;
// ... and with the destination and source register combined.
def : t2InstAlias<"sub${s}${p} $Rdn, $imm",
- (t2SUBri GPRnopc:$Rdn, GPRnopc:$Rdn, t2_so_imm:$imm, pred:$p, cc_out:$s)>;
+ (t2SUBri rGPR:$Rdn, rGPR:$Rdn, t2_so_imm:$imm, pred:$p, cc_out:$s)>;
def : t2InstAlias<"sub${p} $Rdn, $imm",
- (t2SUBri12 GPRnopc:$Rdn, GPRnopc:$Rdn, imm0_4095:$imm, pred:$p)>;
+ (t2SUBri12 rGPR:$Rdn, rGPR:$Rdn, imm0_4095:$imm, pred:$p)>;
+def : t2InstAlias<"subw${p} $Rdn, $imm",
+ (t2SUBri12 rGPR:$Rdn, rGPR:$Rdn, imm0_4095:$imm, pred:$p)>;
def : t2InstAlias<"sub${s}${p}.w $Rdn, $Rm",
(t2SUBrr GPRnopc:$Rdn, GPRnopc:$Rdn, rGPR:$Rm, pred:$p, cc_out:$s)>;
def : t2InstAlias<"sub${s}${p} $Rdn, $Rm",
@@ -4722,6 +4783,65 @@ def : t2InstAlias<"sub${s}${p} $Rdn, $ShiftedRm",
(t2SUBrs GPRnopc:$Rdn, GPRnopc:$Rdn, t2_so_reg:$ShiftedRm,
pred:$p, cc_out:$s)>;
+// SP to SP alike aliases
+// Aliases for ADD without the ".w" optional width specifier.
+def : t2InstAlias<"add${s}${p} $Rd, $Rn, $imm",
+ (t2ADDspImm GPRsp:$Rd, GPRsp:$Rn, t2_so_imm:$imm, pred:$p,
+ cc_out:$s)>;
+def : t2InstAlias<"add${p} $Rd, $Rn, $imm",
+ (t2ADDspImm12 GPRsp:$Rd, GPRsp:$Rn, imm0_4095:$imm, pred:$p)>;
+// ... and with the destination and source register combined.
+def : t2InstAlias<"add${s}${p} $Rdn, $imm",
+ (t2ADDspImm GPRsp:$Rdn, GPRsp:$Rdn, t2_so_imm:$imm, pred:$p, cc_out:$s)>;
+
+def : t2InstAlias<"add${s}${p}.w $Rdn, $imm",
+ (t2ADDspImm GPRsp:$Rdn, GPRsp:$Rdn, t2_so_imm:$imm, pred:$p, cc_out:$s)>;
+
+def : t2InstAlias<"add${p} $Rdn, $imm",
+ (t2ADDspImm12 GPRsp:$Rdn, GPRsp:$Rdn, imm0_4095:$imm, pred:$p)>;
+
+def : t2InstAlias<"addw${p} $Rdn, $imm",
+ (t2ADDspImm12 GPRsp:$Rdn, GPRsp:$Rdn, imm0_4095:$imm, pred:$p)>;
+
+// add w/ negative immediates is just a sub.
+def : t2InstSubst<"add${s}${p} $Rd, $Rn, $imm",
+ (t2SUBspImm GPRsp:$Rd, GPRsp:$Rn, t2_so_imm_neg:$imm, pred:$p,
+ cc_out:$s)>;
+def : t2InstSubst<"add${p} $Rd, $Rn, $imm",
+ (t2SUBspImm12 GPRsp:$Rd, GPRsp:$Rn, imm0_4095_neg:$imm, pred:$p)>;
+def : t2InstSubst<"add${s}${p} $Rdn, $imm",
+ (t2SUBspImm GPRsp:$Rdn, GPRsp:$Rdn, t2_so_imm_neg:$imm, pred:$p,
+ cc_out:$s)>;
+def : t2InstSubst<"add${p} $Rdn, $imm",
+ (t2SUBspImm12 GPRsp:$Rdn, GPRsp:$Rdn, imm0_4095_neg:$imm, pred:$p)>;
+
+def : t2InstSubst<"add${s}${p}.w $Rd, $Rn, $imm",
+ (t2SUBspImm GPRsp:$Rd, GPRsp:$Rn, t2_so_imm_neg:$imm, pred:$p,
+ cc_out:$s)>;
+def : t2InstSubst<"addw${p} $Rd, $Rn, $imm",
+ (t2SUBspImm12 GPRsp:$Rd, GPRsp:$Rn, imm0_4095_neg:$imm, pred:$p)>;
+def : t2InstSubst<"add${s}${p}.w $Rdn, $imm",
+ (t2SUBspImm GPRsp:$Rdn, GPRsp:$Rdn, t2_so_imm_neg:$imm, pred:$p,
+ cc_out:$s)>;
+def : t2InstSubst<"addw${p} $Rdn, $imm",
+ (t2SUBspImm12 GPRsp:$Rdn, GPRsp:$Rdn, imm0_4095_neg:$imm, pred:$p)>;
+
+
+// Aliases for SUB without the ".w" optional width specifier.
+def : t2InstAlias<"sub${s}${p} $Rd, $Rn, $imm",
+ (t2SUBspImm GPRsp:$Rd, GPRsp:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s)>;
+def : t2InstAlias<"sub${p} $Rd, $Rn, $imm",
+ (t2SUBspImm12 GPRsp:$Rd, GPRsp:$Rn, imm0_4095:$imm, pred:$p)>;
+// ... and with the destination and source register combined.
+def : t2InstAlias<"sub${s}${p} $Rdn, $imm",
+ (t2SUBspImm GPRsp:$Rdn, GPRsp:$Rdn, t2_so_imm:$imm, pred:$p, cc_out:$s)>;
+def : t2InstAlias<"sub${s}${p}.w $Rdn, $imm",
+ (t2SUBspImm GPRsp:$Rdn, GPRsp:$Rdn, t2_so_imm:$imm, pred:$p, cc_out:$s)>;
+def : t2InstAlias<"sub${p} $Rdn, $imm",
+ (t2SUBspImm12 GPRsp:$Rdn, GPRsp:$Rdn, imm0_4095:$imm, pred:$p)>;
+def : t2InstAlias<"subw${p} $Rdn, $imm",
+ (t2SUBspImm12 GPRsp:$Rdn, GPRsp:$Rdn, imm0_4095:$imm, pred:$p)>;
+
// Alias for compares without the ".w" optional width specifier.
def : t2InstAlias<"cmn${p} $Rn, $Rm",
(t2CMNzrr GPRnopc:$Rn, rGPR:$Rm, pred:$p)>;
@@ -4978,10 +5098,16 @@ def : t2InstSubst<"orr${s}${p} $Rdn, $imm",
pred:$p, cc_out:$s)>;
// Likewise, "add Rd, t2_so_imm_neg" -> sub
def : t2InstSubst<"add${s}${p} $Rd, $Rn, $imm",
- (t2SUBri GPRnopc:$Rd, GPRnopc:$Rn, t2_so_imm_neg:$imm,
+ (t2SUBri rGPR:$Rd, GPRnopc:$Rn, t2_so_imm_neg:$imm,
+ pred:$p, cc_out:$s)>;
+def : t2InstSubst<"add${s}${p} $Rd, $Rn, $imm",
+ (t2SUBspImm GPRsp:$Rd, GPRsp:$Rn, t2_so_imm_neg:$imm,
+ pred:$p, cc_out:$s)>;
+def : t2InstSubst<"add${s}${p} $Rd, $imm",
+ (t2SUBri rGPR:$Rd, rGPR:$Rd, t2_so_imm_neg:$imm,
pred:$p, cc_out:$s)>;
def : t2InstSubst<"add${s}${p} $Rd, $imm",
- (t2SUBri GPRnopc:$Rd, GPRnopc:$Rd, t2_so_imm_neg:$imm,
+ (t2SUBspImm GPRsp:$Rd, GPRsp:$Rd, t2_so_imm_neg:$imm,
pred:$p, cc_out:$s)>;
// Same for CMP <--> CMN via t2_so_imm_neg
def : t2InstSubst<"cmp${p} $Rd, $imm",
@@ -5178,8 +5304,6 @@ class t2LOL<dag oops, dag iops, string asm, string ops>
let Inst{31-23} = 0b111100000;
let Inst{15-14} = 0b11;
let Inst{0} = 0b1;
- let isBranch = 1;
- let isTerminator = 1;
let DecoderMethod = "DecodeLOLoop";
let Predicates = [IsThumb2, HasV8_1MMainline, HasLOB];
}
@@ -5196,13 +5320,13 @@ def t2WLS : t2LOL<(outs GPRlr:$LR),
let Inst{11} = label{0};
let Inst{10-1} = label{10-1};
let usesCustomInserter = 1;
+ let isBranch = 1;
+ let isTerminator = 1;
}
def t2DLS : t2LOL<(outs GPRlr:$LR), (ins rGPR:$Rn),
"dls", "$LR, $Rn"> {
bits<4> Rn;
- let isBranch = 0;
- let isTerminator = 0;
let Inst{22-20} = 0b100;
let Inst{19-16} = Rn{3-0};
let Inst{13-1} = 0b1000000000000;
@@ -5218,6 +5342,8 @@ def t2LEUpdate : t2LOL<(outs GPRlr:$LRout),
let Inst{11} = label{0};
let Inst{10-1} = label{10-1};
let usesCustomInserter = 1;
+ let isBranch = 1;
+ let isTerminator = 1;
}
def t2LE : t2LOL<(outs ), (ins lelabel_u11:$label), "le", "$label"> {
@@ -5226,6 +5352,8 @@ def t2LE : t2LOL<(outs ), (ins lelabel_u11:$label), "le", "$label"> {
let Inst{13-12} = 0b00;
let Inst{11} = label{0};
let Inst{10-1} = label{10-1};
+ let isBranch = 1;
+ let isTerminator = 1;
}
def t2DoLoopStart :
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrVFP.td b/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrVFP.td
index fdd961bfbb2f..a41a483d1a4c 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrVFP.td
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrVFP.td
@@ -277,7 +277,7 @@ def : MnemonicAlias<"vstm", "vstmia">;
//
let mayLoad = 1 in
def VLLDM : AXSI4<(outs), (ins GPRnopc:$Rn, pred:$p), IndexModeNone,
- IIC_fpLoad_m, "vlldm${p}\t$Rn", "", []>,
+ NoItinerary, "vlldm${p}\t$Rn", "", []>,
Requires<[HasV8MMainline, Has8MSecExt]> {
let Inst{24-23} = 0b00;
let Inst{22} = 0;
@@ -290,7 +290,7 @@ def VLLDM : AXSI4<(outs), (ins GPRnopc:$Rn, pred:$p), IndexModeNone,
let mayStore = 1 in
def VLSTM : AXSI4<(outs), (ins GPRnopc:$Rn, pred:$p), IndexModeNone,
- IIC_fpStore_m, "vlstm${p}\t$Rn", "", []>,
+ NoItinerary, "vlstm${p}\t$Rn", "", []>,
Requires<[HasV8MMainline, Has8MSecExt]> {
let Inst{24-23} = 0b00;
let Inst{22} = 0;
@@ -2143,6 +2143,9 @@ def : Pat<(f64 (fma (fneg DPR:$Dn), DPR:$Dm, DPR:$Ddin)),
def : Pat<(f32 (fma (fneg SPR:$Sn), SPR:$Sm, SPR:$Sdin)),
(VFMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
Requires<[HasVFP4]>;
+def : Pat<(f16 (fma (fneg HPR:$Sn), HPR:$Sm, HPR:$Sdin)),
+ (VFMSH HPR:$Sdin, HPR:$Sn, HPR:$Sm)>,
+ Requires<[HasFullFP16]>;
// (fma x, (fneg y), z) -> (vfms z, x, y)
def : Pat<(f64 (fma DPR:$Dn, (fneg DPR:$Dm), DPR:$Ddin)),
(VFMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
@@ -2150,6 +2153,9 @@ def : Pat<(f64 (fma DPR:$Dn, (fneg DPR:$Dm), DPR:$Ddin)),
def : Pat<(f32 (fma SPR:$Sn, (fneg SPR:$Sm), SPR:$Sdin)),
(VFMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
Requires<[HasVFP4]>;
+def : Pat<(f16 (fma HPR:$Sn, (fneg HPR:$Sm), HPR:$Sdin)),
+ (VFMSH HPR:$Sdin, HPR:$Sn, HPR:$Sm)>,
+ Requires<[HasFullFP16]>;
def VFNMAD : ADbI<0b11101, 0b01, 1, 0,
(outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
@@ -2196,6 +2202,9 @@ def : Pat<(fneg (fma (f64 DPR:$Dn), (f64 DPR:$Dm), (f64 DPR:$Ddin))),
def : Pat<(fneg (fma (f32 SPR:$Sn), (f32 SPR:$Sm), (f32 SPR:$Sdin))),
(VFNMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
Requires<[HasVFP4]>;
+def : Pat<(fneg (fma (f16 HPR:$Sn), (f16 HPR:$Sm), (f16 HPR:$Sdin))),
+ (VFNMAH HPR:$Sdin, HPR:$Sn, HPR:$Sm)>,
+ Requires<[HasFullFP16]>;
// (fma (fneg x), y, (fneg z)) -> (vfnma z, x, y)
def : Pat<(f64 (fma (fneg DPR:$Dn), DPR:$Dm, (fneg DPR:$Ddin))),
(VFNMAD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
@@ -2203,6 +2212,9 @@ def : Pat<(f64 (fma (fneg DPR:$Dn), DPR:$Dm, (fneg DPR:$Ddin))),
def : Pat<(f32 (fma (fneg SPR:$Sn), SPR:$Sm, (fneg SPR:$Sdin))),
(VFNMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
Requires<[HasVFP4]>;
+def : Pat<(f16 (fma (fneg HPR:$Sn), HPR:$Sm, (fneg HPR:$Sdin))),
+ (VFNMAH HPR:$Sdin, HPR:$Sn, HPR:$Sm)>,
+ Requires<[HasFullFP16]>;
def VFNMSD : ADbI<0b11101, 0b01, 0, 0,
(outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
@@ -2248,6 +2260,9 @@ def : Pat<(f64 (fma DPR:$Dn, DPR:$Dm, (fneg DPR:$Ddin))),
def : Pat<(f32 (fma SPR:$Sn, SPR:$Sm, (fneg SPR:$Sdin))),
(VFNMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
Requires<[HasVFP4]>;
+def : Pat<(f16 (fma HPR:$Sn, HPR:$Sm, (fneg HPR:$Sdin))),
+ (VFNMSH HPR:$Sdin, HPR:$Sn, HPR:$Sm)>,
+ Requires<[HasFullFP16]>;
// (fneg (fma (fneg x), y, z)) -> (vfnms z, x, y)
def : Pat<(fneg (f64 (fma (fneg DPR:$Dn), DPR:$Dm, DPR:$Ddin))),
(VFNMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
@@ -2255,6 +2270,9 @@ def : Pat<(fneg (f64 (fma (fneg DPR:$Dn), DPR:$Dm, DPR:$Ddin))),
def : Pat<(fneg (f32 (fma (fneg SPR:$Sn), SPR:$Sm, SPR:$Sdin))),
(VFNMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
Requires<[HasVFP4]>;
+def : Pat<(fneg (f16 (fma (fneg HPR:$Sn), HPR:$Sm, HPR:$Sdin))),
+ (VFNMSH HPR:$Sdin, HPR:$Sn, HPR:$Sm)>,
+ Requires<[HasFullFP16]>;
// (fneg (fma x, (fneg y), z) -> (vfnms z, x, y)
def : Pat<(fneg (f64 (fma DPR:$Dn, (fneg DPR:$Dm), DPR:$Ddin))),
(VFNMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
@@ -2262,6 +2280,9 @@ def : Pat<(fneg (f64 (fma DPR:$Dn, (fneg DPR:$Dm), DPR:$Ddin))),
def : Pat<(fneg (f32 (fma SPR:$Sn, (fneg SPR:$Sm), SPR:$Sdin))),
(VFNMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
Requires<[HasVFP4]>;
+def : Pat<(fneg (f16 (fma HPR:$Sn, (fneg HPR:$Sm), HPR:$Sdin))),
+ (VFNMSH HPR:$Sdin, HPR:$Sn, HPR:$Sm)>,
+ Requires<[HasFullFP16]>;
//===----------------------------------------------------------------------===//
// FP Conditional moves.
@@ -2279,6 +2300,12 @@ def VMOVScc : PseudoInst<(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm, cmovpred:$p),
[(set (f32 SPR:$Sd),
(ARMcmov SPR:$Sn, SPR:$Sm, cmovpred:$p))]>,
RegConstraint<"$Sn = $Sd">, Requires<[HasFPRegs]>;
+
+def VMOVHcc : PseudoInst<(outs HPR:$Sd), (ins HPR:$Sn, HPR:$Sm, cmovpred:$p),
+ IIC_fpUNA16,
+ [(set (f16 HPR:$Sd),
+ (ARMcmov HPR:$Sn, HPR:$Sm, cmovpred:$p))]>,
+ RegConstraint<"$Sd = $Sn">, Requires<[HasFPRegs]>;
} // hasSideEffects
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstructionSelector.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstructionSelector.cpp
index 8e5e474c0f59..67816bc2103f 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstructionSelector.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstructionSelector.cpp
@@ -17,6 +17,7 @@
#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/IntrinsicsARM.h"
#include "llvm/Support/Debug.h"
#define DEBUG_TYPE "arm-isel"
@@ -137,8 +138,10 @@ private:
unsigned selectLoadStoreOpCode(unsigned Opc, unsigned RegBank,
unsigned Size) const;
- void renderVFPF32Imm(MachineInstrBuilder &New, const MachineInstr &Old) const;
- void renderVFPF64Imm(MachineInstrBuilder &New, const MachineInstr &Old) const;
+ void renderVFPF32Imm(MachineInstrBuilder &New, const MachineInstr &Old,
+ int OpIdx = -1) const;
+ void renderVFPF64Imm(MachineInstrBuilder &New, const MachineInstr &Old,
+ int OpIdx = -1) const;
#define GET_GLOBALISEL_PREDICATES_DECL
#include "ARMGenGlobalISel.inc"
@@ -810,9 +813,10 @@ bool ARMInstructionSelector::selectShift(unsigned ShiftOpc,
}
void ARMInstructionSelector::renderVFPF32Imm(
- MachineInstrBuilder &NewInstBuilder, const MachineInstr &OldInst) const {
+ MachineInstrBuilder &NewInstBuilder, const MachineInstr &OldInst,
+ int OpIdx) const {
assert(OldInst.getOpcode() == TargetOpcode::G_FCONSTANT &&
- "Expected G_FCONSTANT");
+ OpIdx == -1 && "Expected G_FCONSTANT");
APFloat FPImmValue = OldInst.getOperand(1).getFPImm()->getValueAPF();
int FPImmEncoding = ARM_AM::getFP32Imm(FPImmValue);
@@ -822,9 +826,9 @@ void ARMInstructionSelector::renderVFPF32Imm(
}
void ARMInstructionSelector::renderVFPF64Imm(
- MachineInstrBuilder &NewInstBuilder, const MachineInstr &OldInst) const {
+ MachineInstrBuilder &NewInstBuilder, const MachineInstr &OldInst, int OpIdx) const {
assert(OldInst.getOpcode() == TargetOpcode::G_FCONSTANT &&
- "Expected G_FCONSTANT");
+ OpIdx == -1 && "Expected G_FCONSTANT");
APFloat FPImmValue = OldInst.getOperand(1).getFPImm()->getValueAPF();
int FPImmEncoding = ARM_AM::getFP64Imm(FPImmValue);
@@ -1061,7 +1065,7 @@ bool ARMInstructionSelector::select(MachineInstr &I) {
case G_SHL: {
return selectShift(ARM_AM::ShiftOpc::lsl, MIB);
}
- case G_GEP:
+ case G_PTR_ADD:
I.setDesc(TII.get(Opcodes.ADDrr));
MIB.add(predOps(ARMCC::AL)).add(condCodeOp());
break;
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp
index 81414e6d76fe..e2dff51ea61c 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp
@@ -162,7 +162,7 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) {
.legalFor({s32, p0})
.minScalar(0, s32);
- getActionDefinitionsBuilder(G_GEP)
+ getActionDefinitionsBuilder(G_PTR_ADD)
.legalFor({{p0, s32}})
.minScalar(1, s32);
@@ -264,7 +264,7 @@ void ARMLegalizerInfo::setFCmpLibcallsAEABI() {
{RTLIB::OLE_F32, CmpInst::BAD_ICMP_PREDICATE}};
FCmp32Libcalls[CmpInst::FCMP_OLT] = {
{RTLIB::OLT_F32, CmpInst::BAD_ICMP_PREDICATE}};
- FCmp32Libcalls[CmpInst::FCMP_ORD] = {{RTLIB::O_F32, CmpInst::ICMP_EQ}};
+ FCmp32Libcalls[CmpInst::FCMP_ORD] = {{RTLIB::UO_F32, CmpInst::ICMP_EQ}};
FCmp32Libcalls[CmpInst::FCMP_UGE] = {{RTLIB::OLT_F32, CmpInst::ICMP_EQ}};
FCmp32Libcalls[CmpInst::FCMP_UGT] = {{RTLIB::OLE_F32, CmpInst::ICMP_EQ}};
FCmp32Libcalls[CmpInst::FCMP_ULE] = {{RTLIB::OGT_F32, CmpInst::ICMP_EQ}};
@@ -290,7 +290,7 @@ void ARMLegalizerInfo::setFCmpLibcallsAEABI() {
{RTLIB::OLE_F64, CmpInst::BAD_ICMP_PREDICATE}};
FCmp64Libcalls[CmpInst::FCMP_OLT] = {
{RTLIB::OLT_F64, CmpInst::BAD_ICMP_PREDICATE}};
- FCmp64Libcalls[CmpInst::FCMP_ORD] = {{RTLIB::O_F64, CmpInst::ICMP_EQ}};
+ FCmp64Libcalls[CmpInst::FCMP_ORD] = {{RTLIB::UO_F64, CmpInst::ICMP_EQ}};
FCmp64Libcalls[CmpInst::FCMP_UGE] = {{RTLIB::OLT_F64, CmpInst::ICMP_EQ}};
FCmp64Libcalls[CmpInst::FCMP_UGT] = {{RTLIB::OLE_F64, CmpInst::ICMP_EQ}};
FCmp64Libcalls[CmpInst::FCMP_ULE] = {{RTLIB::OGT_F64, CmpInst::ICMP_EQ}};
@@ -315,7 +315,7 @@ void ARMLegalizerInfo::setFCmpLibcallsGNU() {
FCmp32Libcalls[CmpInst::FCMP_OGT] = {{RTLIB::OGT_F32, CmpInst::ICMP_SGT}};
FCmp32Libcalls[CmpInst::FCMP_OLE] = {{RTLIB::OLE_F32, CmpInst::ICMP_SLE}};
FCmp32Libcalls[CmpInst::FCMP_OLT] = {{RTLIB::OLT_F32, CmpInst::ICMP_SLT}};
- FCmp32Libcalls[CmpInst::FCMP_ORD] = {{RTLIB::O_F32, CmpInst::ICMP_EQ}};
+ FCmp32Libcalls[CmpInst::FCMP_ORD] = {{RTLIB::UO_F32, CmpInst::ICMP_EQ}};
FCmp32Libcalls[CmpInst::FCMP_UGE] = {{RTLIB::OLT_F32, CmpInst::ICMP_SGE}};
FCmp32Libcalls[CmpInst::FCMP_UGT] = {{RTLIB::OLE_F32, CmpInst::ICMP_SGT}};
FCmp32Libcalls[CmpInst::FCMP_ULE] = {{RTLIB::OGT_F32, CmpInst::ICMP_SLE}};
@@ -333,7 +333,7 @@ void ARMLegalizerInfo::setFCmpLibcallsGNU() {
FCmp64Libcalls[CmpInst::FCMP_OGT] = {{RTLIB::OGT_F64, CmpInst::ICMP_SGT}};
FCmp64Libcalls[CmpInst::FCMP_OLE] = {{RTLIB::OLE_F64, CmpInst::ICMP_SLE}};
FCmp64Libcalls[CmpInst::FCMP_OLT] = {{RTLIB::OLT_F64, CmpInst::ICMP_SLT}};
- FCmp64Libcalls[CmpInst::FCMP_ORD] = {{RTLIB::O_F64, CmpInst::ICMP_EQ}};
+ FCmp64Libcalls[CmpInst::FCMP_ORD] = {{RTLIB::UO_F64, CmpInst::ICMP_EQ}};
FCmp64Libcalls[CmpInst::FCMP_UGE] = {{RTLIB::OLT_F64, CmpInst::ICMP_SGE}};
FCmp64Libcalls[CmpInst::FCMP_UGT] = {{RTLIB::OLE_F64, CmpInst::ICMP_SGT}};
FCmp64Libcalls[CmpInst::FCMP_ULE] = {{RTLIB::OGT_F64, CmpInst::ICMP_SLE}};
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index 4a193fed04a3..12dddd29ca84 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -696,18 +696,23 @@ MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(
return nullptr;
}
- int BaseOpc =
- isThumb2 ? ARM::t2ADDri :
- (isThumb1 && Base == ARM::SP) ? ARM::tADDrSPi :
- (isThumb1 && Offset < 8) ? ARM::tADDi3 :
- isThumb1 ? ARM::tADDi8 : ARM::ADDri;
+ int BaseOpc = isThumb2 ? (BaseKill && Base == ARM::SP ? ARM::t2ADDspImm
+ : ARM::t2ADDri)
+ : (isThumb1 && Base == ARM::SP)
+ ? ARM::tADDrSPi
+ : (isThumb1 && Offset < 8)
+ ? ARM::tADDi3
+ : isThumb1 ? ARM::tADDi8 : ARM::ADDri;
if (Offset < 0) {
- Offset = - Offset;
- BaseOpc =
- isThumb2 ? ARM::t2SUBri :
- (isThumb1 && Offset < 8 && Base != ARM::SP) ? ARM::tSUBi3 :
- isThumb1 ? ARM::tSUBi8 : ARM::SUBri;
+ // FIXME: There are no Thumb1 load/store instructions with negative
+ // offsets. So the Base != ARM::SP might be unnecessary.
+ Offset = -Offset;
+ BaseOpc = isThumb2 ? (BaseKill && Base == ARM::SP ? ARM::t2SUBspImm
+ : ARM::t2SUBri)
+ : (isThumb1 && Offset < 8 && Base != ARM::SP)
+ ? ARM::tSUBi3
+ : isThumb1 ? ARM::tSUBi8 : ARM::SUBri;
}
if (!TL->isLegalAddImmediate(Offset))
@@ -1186,8 +1191,10 @@ static int isIncrementOrDecrement(const MachineInstr &MI, unsigned Reg,
case ARM::tADDi8: Scale = 4; CheckCPSRDef = true; break;
case ARM::tSUBi8: Scale = -4; CheckCPSRDef = true; break;
case ARM::t2SUBri:
+ case ARM::t2SUBspImm:
case ARM::SUBri: Scale = -1; CheckCPSRDef = true; break;
case ARM::t2ADDri:
+ case ARM::t2ADDspImm:
case ARM::ADDri: Scale = 1; CheckCPSRDef = true; break;
case ARM::tADDspi: Scale = 4; CheckCPSRDef = false; break;
case ARM::tSUBspi: Scale = -4; CheckCPSRDef = false; break;
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
index e1c5a9c3e223..6717d4706aef 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
@@ -15,6 +15,26 @@
/// - t2LoopDec - placed within in the loop body.
/// - t2LoopEnd - the loop latch terminator.
///
+/// In addition to this, we also look for the presence of the VCTP instruction,
+/// which determines whether we can generated the tail-predicated low-overhead
+/// loop form.
+///
+/// Assumptions and Dependencies:
+/// Low-overhead loops are constructed and executed using a setup instruction:
+/// DLS, WLS, DLSTP or WLSTP and an instruction that loops back: LE or LETP.
+/// WLS(TP) and LE(TP) are branching instructions with a (large) limited range
+/// but fixed polarity: WLS can only branch forwards and LE can only branch
+/// backwards. These restrictions mean that this pass is dependent upon block
+/// layout and block sizes, which is why it's the last pass to run. The same is
+/// true for ConstantIslands, but this pass does not increase the size of the
+/// basic blocks, nor does it change the CFG. Instructions are mainly removed
+/// during the transform and pseudo instructions are replaced by real ones. In
+/// some cases, when we have to revert to a 'normal' loop, we have to introduce
+/// multiple instructions for a single pseudo (see RevertWhile and
+/// RevertLoopEnd). To handle this situation, t2WhileLoopStart and t2LoopEnd
+/// are defined to be as large as this maximum sequence of replacement
+/// instructions.
+///
//===----------------------------------------------------------------------===//
#include "ARM.h"
@@ -22,9 +42,16 @@
#include "ARMBaseRegisterInfo.h"
#include "ARMBasicBlockInfo.h"
#include "ARMSubtarget.h"
+#include "Thumb2InstrInfo.h"
+#include "llvm/ADT/SetOperations.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineLoopUtils.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/ReachingDefAnalysis.h"
+#include "llvm/MC/MCInstrDesc.h"
using namespace llvm;
@@ -33,10 +60,154 @@ using namespace llvm;
namespace {
+ struct PredicatedMI {
+ MachineInstr *MI = nullptr;
+ SetVector<MachineInstr*> Predicates;
+
+ public:
+ PredicatedMI(MachineInstr *I, SetVector<MachineInstr*> &Preds) :
+ MI(I) {
+ Predicates.insert(Preds.begin(), Preds.end());
+ }
+ };
+
+ // Represent a VPT block, a list of instructions that begins with a VPST and
+ // has a maximum of four proceeding instructions. All instructions within the
+ // block are predicated upon the vpr and we allow instructions to define the
+ // vpr within in the block too.
+ class VPTBlock {
+ std::unique_ptr<PredicatedMI> VPST;
+ PredicatedMI *Divergent = nullptr;
+ SmallVector<PredicatedMI, 4> Insts;
+
+ public:
+ VPTBlock(MachineInstr *MI, SetVector<MachineInstr*> &Preds) {
+ VPST = std::make_unique<PredicatedMI>(MI, Preds);
+ }
+
+ void addInst(MachineInstr *MI, SetVector<MachineInstr*> &Preds) {
+ LLVM_DEBUG(dbgs() << "ARM Loops: Adding predicated MI: " << *MI);
+ if (!Divergent && !set_difference(Preds, VPST->Predicates).empty()) {
+ Divergent = &Insts.back();
+ LLVM_DEBUG(dbgs() << " - has divergent predicate: " << *Divergent->MI);
+ }
+ Insts.emplace_back(MI, Preds);
+ assert(Insts.size() <= 4 && "Too many instructions in VPT block!");
+ }
+
+ // Have we found an instruction within the block which defines the vpr? If
+ // so, not all the instructions in the block will have the same predicate.
+ bool HasNonUniformPredicate() const {
+ return Divergent != nullptr;
+ }
+
+ // Is the given instruction part of the predicate set controlling the entry
+ // to the block.
+ bool IsPredicatedOn(MachineInstr *MI) const {
+ return VPST->Predicates.count(MI);
+ }
+
+ // Is the given instruction the only predicate which controls the entry to
+ // the block.
+ bool IsOnlyPredicatedOn(MachineInstr *MI) const {
+ return IsPredicatedOn(MI) && VPST->Predicates.size() == 1;
+ }
+
+ unsigned size() const { return Insts.size(); }
+ SmallVectorImpl<PredicatedMI> &getInsts() { return Insts; }
+ MachineInstr *getVPST() const { return VPST->MI; }
+ PredicatedMI *getDivergent() const { return Divergent; }
+ };
+
+ struct LowOverheadLoop {
+
+ MachineLoop *ML = nullptr;
+ MachineFunction *MF = nullptr;
+ MachineInstr *InsertPt = nullptr;
+ MachineInstr *Start = nullptr;
+ MachineInstr *Dec = nullptr;
+ MachineInstr *End = nullptr;
+ MachineInstr *VCTP = nullptr;
+ VPTBlock *CurrentBlock = nullptr;
+ SetVector<MachineInstr*> CurrentPredicate;
+ SmallVector<VPTBlock, 4> VPTBlocks;
+ bool Revert = false;
+ bool CannotTailPredicate = false;
+
+ LowOverheadLoop(MachineLoop *ML) : ML(ML) {
+ MF = ML->getHeader()->getParent();
+ }
+
+ // If this is an MVE instruction, check that we know how to use tail
+ // predication with it. Record VPT blocks and return whether the
+ // instruction is valid for tail predication.
+ bool ValidateMVEInst(MachineInstr *MI);
+
+ void AnalyseMVEInst(MachineInstr *MI) {
+ CannotTailPredicate = !ValidateMVEInst(MI);
+ }
+
+ bool IsTailPredicationLegal() const {
+ // For now, let's keep things really simple and only support a single
+ // block for tail predication.
+ return !Revert && FoundAllComponents() && VCTP &&
+ !CannotTailPredicate && ML->getNumBlocks() == 1;
+ }
+
+ bool ValidateTailPredicate(MachineInstr *StartInsertPt,
+ ReachingDefAnalysis *RDA,
+ MachineLoopInfo *MLI);
+
+ // Is it safe to define LR with DLS/WLS?
+ // LR can be defined if it is the operand to start, because it's the same
+ // value, or if it's going to be equivalent to the operand to Start.
+ MachineInstr *IsSafeToDefineLR(ReachingDefAnalysis *RDA);
+
+ // Check the branch targets are within range and we satisfy our
+ // restrictions.
+ void CheckLegality(ARMBasicBlockUtils *BBUtils, ReachingDefAnalysis *RDA,
+ MachineLoopInfo *MLI);
+
+ bool FoundAllComponents() const {
+ return Start && Dec && End;
+ }
+
+ SmallVectorImpl<VPTBlock> &getVPTBlocks() { return VPTBlocks; }
+
+ // Return the loop iteration count, or the number of elements if we're tail
+ // predicating.
+ MachineOperand &getCount() {
+ return IsTailPredicationLegal() ?
+ VCTP->getOperand(1) : Start->getOperand(0);
+ }
+
+ unsigned getStartOpcode() const {
+ bool IsDo = Start->getOpcode() == ARM::t2DoLoopStart;
+ if (!IsTailPredicationLegal())
+ return IsDo ? ARM::t2DLS : ARM::t2WLS;
+
+ return VCTPOpcodeToLSTP(VCTP->getOpcode(), IsDo);
+ }
+
+ void dump() const {
+ if (Start) dbgs() << "ARM Loops: Found Loop Start: " << *Start;
+ if (Dec) dbgs() << "ARM Loops: Found Loop Dec: " << *Dec;
+ if (End) dbgs() << "ARM Loops: Found Loop End: " << *End;
+ if (VCTP) dbgs() << "ARM Loops: Found VCTP: " << *VCTP;
+ if (!FoundAllComponents())
+ dbgs() << "ARM Loops: Not a low-overhead loop.\n";
+ else if (!(Start && Dec && End))
+ dbgs() << "ARM Loops: Failed to find all loop components.\n";
+ }
+ };
+
class ARMLowOverheadLoops : public MachineFunctionPass {
MachineFunction *MF = nullptr;
+ MachineLoopInfo *MLI = nullptr;
+ ReachingDefAnalysis *RDA = nullptr;
const ARMBaseInstrInfo *TII = nullptr;
MachineRegisterInfo *MRI = nullptr;
+ const TargetRegisterInfo *TRI = nullptr;
std::unique_ptr<ARMBasicBlockUtils> BBUtils = nullptr;
public:
@@ -47,6 +218,7 @@ namespace {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
AU.addRequired<MachineLoopInfo>();
+ AU.addRequired<ReachingDefAnalysis>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -54,7 +226,8 @@ namespace {
MachineFunctionProperties getRequiredProperties() const override {
return MachineFunctionProperties().set(
- MachineFunctionProperties::Property::NoVRegs);
+ MachineFunctionProperties::Property::NoVRegs).set(
+ MachineFunctionProperties::Property::TracksLiveness);
}
StringRef getPassName() const override {
@@ -64,8 +237,6 @@ namespace {
private:
bool ProcessLoop(MachineLoop *ML);
- MachineInstr * IsSafeToDefineLR(MachineInstr *MI);
-
bool RevertNonLoops();
void RevertWhile(MachineInstr *MI) const;
@@ -74,9 +245,13 @@ namespace {
void RevertLoopEnd(MachineInstr *MI, bool SkipCmp = false) const;
- void Expand(MachineLoop *ML, MachineInstr *Start,
- MachineInstr *InsertPt, MachineInstr *Dec,
- MachineInstr *End, bool Revert);
+ void RemoveLoopUpdate(LowOverheadLoop &LoLoop);
+
+ void ConvertVPTBlocks(LowOverheadLoop &LoLoop);
+
+ MachineInstr *ExpandLoopStart(LowOverheadLoop &LoLoop);
+
+ void Expand(LowOverheadLoop &LoLoop);
};
}
@@ -86,128 +261,321 @@ char ARMLowOverheadLoops::ID = 0;
INITIALIZE_PASS(ARMLowOverheadLoops, DEBUG_TYPE, ARM_LOW_OVERHEAD_LOOPS_NAME,
false, false)
-bool ARMLowOverheadLoops::runOnMachineFunction(MachineFunction &mf) {
- const ARMSubtarget &ST = static_cast<const ARMSubtarget&>(mf.getSubtarget());
- if (!ST.hasLOB())
- return false;
+MachineInstr *LowOverheadLoop::IsSafeToDefineLR(ReachingDefAnalysis *RDA) {
+ // We can define LR because LR already contains the same value.
+ if (Start->getOperand(0).getReg() == ARM::LR)
+ return Start;
- MF = &mf;
- LLVM_DEBUG(dbgs() << "ARM Loops on " << MF->getName() << " ------------- \n");
+ unsigned CountReg = Start->getOperand(0).getReg();
+ auto IsMoveLR = [&CountReg](MachineInstr *MI) {
+ return MI->getOpcode() == ARM::tMOVr &&
+ MI->getOperand(0).getReg() == ARM::LR &&
+ MI->getOperand(1).getReg() == CountReg &&
+ MI->getOperand(2).getImm() == ARMCC::AL;
+ };
- auto &MLI = getAnalysis<MachineLoopInfo>();
- MF->getProperties().set(MachineFunctionProperties::Property::TracksLiveness);
- MRI = &MF->getRegInfo();
- TII = static_cast<const ARMBaseInstrInfo*>(ST.getInstrInfo());
- BBUtils = std::unique_ptr<ARMBasicBlockUtils>(new ARMBasicBlockUtils(*MF));
- BBUtils->computeAllBlockSizes();
- BBUtils->adjustBBOffsetsAfter(&MF->front());
+ MachineBasicBlock *MBB = Start->getParent();
- bool Changed = false;
- for (auto ML : MLI) {
- if (!ML->getParentLoop())
- Changed |= ProcessLoop(ML);
- }
- Changed |= RevertNonLoops();
- return Changed;
+ // Find an insertion point:
+ // - Is there a (mov lr, Count) before Start? If so, and nothing else writes
+ // to Count before Start, we can insert at that mov.
+ if (auto *LRDef = RDA->getReachingMIDef(Start, ARM::LR))
+ if (IsMoveLR(LRDef) && RDA->hasSameReachingDef(Start, LRDef, CountReg))
+ return LRDef;
+
+ // - Is there a (mov lr, Count) after Start? If so, and nothing else writes
+ // to Count after Start, we can insert at that mov.
+ if (auto *LRDef = RDA->getLocalLiveOutMIDef(MBB, ARM::LR))
+ if (IsMoveLR(LRDef) && RDA->hasSameReachingDef(Start, LRDef, CountReg))
+ return LRDef;
+
+ // We've found no suitable LR def and Start doesn't use LR directly. Can we
+ // just define LR anyway?
+ if (!RDA->isRegUsedAfter(Start, ARM::LR))
+ return Start;
+
+ return nullptr;
}
-static bool IsLoopStart(MachineInstr &MI) {
- return MI.getOpcode() == ARM::t2DoLoopStart ||
- MI.getOpcode() == ARM::t2WhileLoopStart;
+// Can we safely move 'From' to just before 'To'? To satisfy this, 'From' must
+// not define a register that is used by any instructions, after and including,
+// 'To'. These instructions also must not redefine any of Froms operands.
+template<typename Iterator>
+static bool IsSafeToMove(MachineInstr *From, MachineInstr *To, ReachingDefAnalysis *RDA) {
+ SmallSet<int, 2> Defs;
+ // First check that From would compute the same value if moved.
+ for (auto &MO : From->operands()) {
+ if (!MO.isReg() || MO.isUndef() || !MO.getReg())
+ continue;
+ if (MO.isDef())
+ Defs.insert(MO.getReg());
+ else if (!RDA->hasSameReachingDef(From, To, MO.getReg()))
+ return false;
+ }
+
+ // Now walk checking that the rest of the instructions will compute the same
+ // value.
+ for (auto I = ++Iterator(From), E = Iterator(To); I != E; ++I) {
+ for (auto &MO : I->operands())
+ if (MO.isReg() && MO.getReg() && MO.isUse() && Defs.count(MO.getReg()))
+ return false;
+ }
+ return true;
}
-template<typename T>
-static MachineInstr* SearchForDef(MachineInstr *Begin, T End, unsigned Reg) {
- for(auto &MI : make_range(T(Begin), End)) {
- for (auto &MO : MI.operands()) {
- if (!MO.isReg() || !MO.isDef() || MO.getReg() != Reg)
+bool LowOverheadLoop::ValidateTailPredicate(MachineInstr *StartInsertPt,
+ ReachingDefAnalysis *RDA, MachineLoopInfo *MLI) {
+ assert(VCTP && "VCTP instruction expected but is not set");
+ // All predication within the loop should be based on vctp. If the block
+ // isn't predicated on entry, check whether the vctp is within the block
+ // and that all other instructions are then predicated on it.
+ for (auto &Block : VPTBlocks) {
+ if (Block.IsPredicatedOn(VCTP))
+ continue;
+ if (!Block.HasNonUniformPredicate() || !isVCTP(Block.getDivergent()->MI)) {
+ LLVM_DEBUG(dbgs() << "ARM Loops: Found unsupported diverging predicate: "
+ << *Block.getDivergent()->MI);
+ return false;
+ }
+ SmallVectorImpl<PredicatedMI> &Insts = Block.getInsts();
+ for (auto &PredMI : Insts) {
+ if (PredMI.Predicates.count(VCTP) || isVCTP(PredMI.MI))
continue;
- return &MI;
+ LLVM_DEBUG(dbgs() << "ARM Loops: Can't convert: " << *PredMI.MI
+ << " - which is predicated on:\n";
+ for (auto *MI : PredMI.Predicates)
+ dbgs() << " - " << *MI;
+ );
+ return false;
}
}
- return nullptr;
-}
-static MachineInstr* SearchForUse(MachineInstr *Begin,
- MachineBasicBlock::iterator End,
- unsigned Reg) {
- for(auto &MI : make_range(MachineBasicBlock::iterator(Begin), End)) {
- for (auto &MO : MI.operands()) {
- if (!MO.isReg() || !MO.isUse() || MO.getReg() != Reg)
- continue;
- return &MI;
+ // For tail predication, we need to provide the number of elements, instead
+ // of the iteration count, to the loop start instruction. The number of
+ // elements is provided to the vctp instruction, so we need to check that
+ // we can use this register at InsertPt.
+ Register NumElements = VCTP->getOperand(1).getReg();
+
+ // If the register is defined within loop, then we can't perform TP.
+ // TODO: Check whether this is just a mov of a register that would be
+ // available.
+ if (RDA->getReachingDef(VCTP, NumElements) >= 0) {
+ LLVM_DEBUG(dbgs() << "ARM Loops: VCTP operand is defined in the loop.\n");
+ return false;
+ }
+
+ // The element count register maybe defined after InsertPt, in which case we
+ // need to try to move either InsertPt or the def so that the [w|d]lstp can
+ // use the value.
+ MachineBasicBlock *InsertBB = InsertPt->getParent();
+ if (!RDA->isReachingDefLiveOut(InsertPt, NumElements)) {
+ if (auto *ElemDef = RDA->getLocalLiveOutMIDef(InsertBB, NumElements)) {
+ if (IsSafeToMove<MachineBasicBlock::reverse_iterator>(ElemDef, InsertPt, RDA)) {
+ ElemDef->removeFromParent();
+ InsertBB->insert(MachineBasicBlock::iterator(InsertPt), ElemDef);
+ LLVM_DEBUG(dbgs() << "ARM Loops: Moved element count def: "
+ << *ElemDef);
+ } else if (IsSafeToMove<MachineBasicBlock::iterator>(InsertPt, ElemDef, RDA)) {
+ InsertPt->removeFromParent();
+ InsertBB->insertAfter(MachineBasicBlock::iterator(ElemDef), InsertPt);
+ LLVM_DEBUG(dbgs() << "ARM Loops: Moved start past: " << *ElemDef);
+ } else {
+ LLVM_DEBUG(dbgs() << "ARM Loops: Unable to move element count to loop "
+ << "start instruction.\n");
+ return false;
+ }
}
}
- return nullptr;
+
+ // Especially in the case of while loops, InsertBB may not be the
+ // preheader, so we need to check that the register isn't redefined
+ // before entering the loop.
+ auto CannotProvideElements = [&RDA](MachineBasicBlock *MBB,
+ Register NumElements) {
+ // NumElements is redefined in this block.
+ if (RDA->getReachingDef(&MBB->back(), NumElements) >= 0)
+ return true;
+
+ // Don't continue searching up through multiple predecessors.
+ if (MBB->pred_size() > 1)
+ return true;
+
+ return false;
+ };
+
+ // First, find the block that looks like the preheader.
+ MachineBasicBlock *MBB = MLI->findLoopPreheader(ML, true);
+ if (!MBB) {
+ LLVM_DEBUG(dbgs() << "ARM Loops: Didn't find preheader.\n");
+ return false;
+ }
+
+ // Then search backwards for a def, until we get to InsertBB.
+ while (MBB != InsertBB) {
+ if (CannotProvideElements(MBB, NumElements)) {
+ LLVM_DEBUG(dbgs() << "ARM Loops: Unable to provide element count.\n");
+ return false;
+ }
+ MBB = *MBB->pred_begin();
+ }
+
+ LLVM_DEBUG(dbgs() << "ARM Loops: Will use tail predication.\n");
+ return true;
}
-// Is it safe to define LR with DLS/WLS?
-// LR can defined if it is the operand to start, because it's the same value,
-// or if it's going to be equivalent to the operand to Start.
-MachineInstr *ARMLowOverheadLoops::IsSafeToDefineLR(MachineInstr *Start) {
+void LowOverheadLoop::CheckLegality(ARMBasicBlockUtils *BBUtils,
+ ReachingDefAnalysis *RDA,
+ MachineLoopInfo *MLI) {
+ if (Revert)
+ return;
- auto IsMoveLR = [](MachineInstr *MI, unsigned Reg) {
- return MI->getOpcode() == ARM::tMOVr &&
- MI->getOperand(0).getReg() == ARM::LR &&
- MI->getOperand(1).getReg() == Reg &&
- MI->getOperand(2).getImm() == ARMCC::AL;
- };
+ if (!End->getOperand(1).isMBB())
+ report_fatal_error("Expected LoopEnd to target basic block");
- MachineBasicBlock *MBB = Start->getParent();
- unsigned CountReg = Start->getOperand(0).getReg();
- // Walk forward and backward in the block to find the closest instructions
- // that define LR. Then also filter them out if they're not a mov lr.
- MachineInstr *PredLRDef = SearchForDef(Start, MBB->rend(), ARM::LR);
- if (PredLRDef && !IsMoveLR(PredLRDef, CountReg))
- PredLRDef = nullptr;
-
- MachineInstr *SuccLRDef = SearchForDef(Start, MBB->end(), ARM::LR);
- if (SuccLRDef && !IsMoveLR(SuccLRDef, CountReg))
- SuccLRDef = nullptr;
-
- // We've either found one, two or none mov lr instructions... Now figure out
- // if they are performing the equilvant mov that the Start instruction will.
- // Do this by scanning forward and backward to see if there's a def of the
- // register holding the count value. If we find a suitable def, return it as
- // the insert point. Later, if InsertPt != Start, then we can remove the
- // redundant instruction.
- if (SuccLRDef) {
- MachineBasicBlock::iterator End(SuccLRDef);
- if (!SearchForDef(Start, End, CountReg)) {
- return SuccLRDef;
- } else
- SuccLRDef = nullptr;
+ // TODO Maybe there's cases where the target doesn't have to be the header,
+ // but for now be safe and revert.
+ if (End->getOperand(1).getMBB() != ML->getHeader()) {
+ LLVM_DEBUG(dbgs() << "ARM Loops: LoopEnd is not targetting header.\n");
+ Revert = true;
+ return;
}
- if (PredLRDef) {
- MachineBasicBlock::reverse_iterator End(PredLRDef);
- if (!SearchForDef(Start, End, CountReg)) {
- return PredLRDef;
- } else
- PredLRDef = nullptr;
+
+ // The WLS and LE instructions have 12-bits for the label offset. WLS
+ // requires a positive offset, while LE uses negative.
+ if (BBUtils->getOffsetOf(End) < BBUtils->getOffsetOf(ML->getHeader()) ||
+ !BBUtils->isBBInRange(End, ML->getHeader(), 4094)) {
+ LLVM_DEBUG(dbgs() << "ARM Loops: LE offset is out-of-range\n");
+ Revert = true;
+ return;
}
- // We can define LR because LR already contains the same value.
- if (Start->getOperand(0).getReg() == ARM::LR)
- return Start;
+ if (Start->getOpcode() == ARM::t2WhileLoopStart &&
+ (BBUtils->getOffsetOf(Start) >
+ BBUtils->getOffsetOf(Start->getOperand(1).getMBB()) ||
+ !BBUtils->isBBInRange(Start, Start->getOperand(1).getMBB(), 4094))) {
+ LLVM_DEBUG(dbgs() << "ARM Loops: WLS offset is out-of-range!\n");
+ Revert = true;
+ return;
+ }
- // We've found no suitable LR def and Start doesn't use LR directly. Can we
- // just define LR anyway?
- const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
- LivePhysRegs LiveRegs(*TRI);
- LiveRegs.addLiveOuts(*MBB);
+ InsertPt = Revert ? nullptr : IsSafeToDefineLR(RDA);
+ if (!InsertPt) {
+ LLVM_DEBUG(dbgs() << "ARM Loops: Unable to find safe insertion point.\n");
+ Revert = true;
+ return;
+ } else
+ LLVM_DEBUG(dbgs() << "ARM Loops: Start insertion point: " << *InsertPt);
- // Not if we've haven't found a suitable mov and LR is live out.
- if (LiveRegs.contains(ARM::LR))
- return nullptr;
+ if (!IsTailPredicationLegal()) {
+ LLVM_DEBUG(if (!VCTP)
+ dbgs() << "ARM Loops: Didn't find a VCTP instruction.\n";
+ dbgs() << "ARM Loops: Tail-predication is not valid.\n");
+ return;
+ }
- // If LR is not live out, we can insert the instruction if nothing else
- // uses LR after it.
- if (!SearchForUse(Start, MBB->end(), ARM::LR))
- return Start;
+ assert(ML->getBlocks().size() == 1 &&
+ "Shouldn't be processing a loop with more than one block");
+ CannotTailPredicate = !ValidateTailPredicate(InsertPt, RDA, MLI);
+ LLVM_DEBUG(if (CannotTailPredicate)
+ dbgs() << "ARM Loops: Couldn't validate tail predicate.\n");
+}
- LLVM_DEBUG(dbgs() << "ARM Loops: Failed to find suitable insertion point for"
- << " LR\n");
- return nullptr;
+bool LowOverheadLoop::ValidateMVEInst(MachineInstr* MI) {
+ if (CannotTailPredicate)
+ return false;
+
+ // Only support a single vctp.
+ if (isVCTP(MI) && VCTP)
+ return false;
+
+ // Start a new vpt block when we discover a vpt.
+ if (MI->getOpcode() == ARM::MVE_VPST) {
+ VPTBlocks.emplace_back(MI, CurrentPredicate);
+ CurrentBlock = &VPTBlocks.back();
+ return true;
+ } else if (isVCTP(MI))
+ VCTP = MI;
+ else if (MI->getOpcode() == ARM::MVE_VPSEL ||
+ MI->getOpcode() == ARM::MVE_VPNOT)
+ return false;
+
+ // TODO: Allow VPSEL and VPNOT, we currently cannot because:
+ // 1) It will use the VPR as a predicate operand, but doesn't have to be
+ // instead a VPT block, which means we can assert while building up
+ // the VPT block because we don't find another VPST to being a new
+ // one.
+ // 2) VPSEL still requires a VPR operand even after tail predicating,
+ // which means we can't remove it unless there is another
+ // instruction, such as vcmp, that can provide the VPR def.
+
+ bool IsUse = false;
+ bool IsDef = false;
+ const MCInstrDesc &MCID = MI->getDesc();
+ for (int i = MI->getNumOperands() - 1; i >= 0; --i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || MO.getReg() != ARM::VPR)
+ continue;
+
+ if (MO.isDef()) {
+ CurrentPredicate.insert(MI);
+ IsDef = true;
+ } else if (ARM::isVpred(MCID.OpInfo[i].OperandType)) {
+ CurrentBlock->addInst(MI, CurrentPredicate);
+ IsUse = true;
+ } else {
+ LLVM_DEBUG(dbgs() << "ARM Loops: Found instruction using vpr: " << *MI);
+ return false;
+ }
+ }
+
+ // If we find a vpr def that is not already predicated on the vctp, we've
+ // got disjoint predicates that may not be equivalent when we do the
+ // conversion.
+ if (IsDef && !IsUse && VCTP && !isVCTP(MI)) {
+ LLVM_DEBUG(dbgs() << "ARM Loops: Found disjoint vpr def: " << *MI);
+ return false;
+ }
+
+ uint64_t Flags = MCID.TSFlags;
+ if ((Flags & ARMII::DomainMask) != ARMII::DomainMVE)
+ return true;
+
+ // If we find an instruction that has been marked as not valid for tail
+ // predication, only allow the instruction if it's contained within a valid
+ // VPT block.
+ if ((Flags & ARMII::ValidForTailPredication) == 0 && !IsUse) {
+ LLVM_DEBUG(dbgs() << "ARM Loops: Can't tail predicate: " << *MI);
+ return false;
+ }
+
+ return true;
+}
+
+bool ARMLowOverheadLoops::runOnMachineFunction(MachineFunction &mf) {
+ const ARMSubtarget &ST = static_cast<const ARMSubtarget&>(mf.getSubtarget());
+ if (!ST.hasLOB())
+ return false;
+
+ MF = &mf;
+ LLVM_DEBUG(dbgs() << "ARM Loops on " << MF->getName() << " ------------- \n");
+
+ MLI = &getAnalysis<MachineLoopInfo>();
+ RDA = &getAnalysis<ReachingDefAnalysis>();
+ MF->getProperties().set(MachineFunctionProperties::Property::TracksLiveness);
+ MRI = &MF->getRegInfo();
+ TII = static_cast<const ARMBaseInstrInfo*>(ST.getInstrInfo());
+ TRI = ST.getRegisterInfo();
+ BBUtils = std::unique_ptr<ARMBasicBlockUtils>(new ARMBasicBlockUtils(*MF));
+ BBUtils->computeAllBlockSizes();
+ BBUtils->adjustBBOffsetsAfter(&MF->front());
+
+ bool Changed = false;
+ for (auto ML : *MLI) {
+ if (!ML->getParentLoop())
+ Changed |= ProcessLoop(ML);
+ }
+ Changed |= RevertNonLoops();
+ return Changed;
}
bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) {
@@ -218,14 +586,21 @@ bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) {
for (auto I = ML->begin(), E = ML->end(); I != E; ++I)
Changed |= ProcessLoop(*I);
- LLVM_DEBUG(dbgs() << "ARM Loops: Processing " << *ML);
+ LLVM_DEBUG(dbgs() << "ARM Loops: Processing loop containing:\n";
+ if (auto *Preheader = ML->getLoopPreheader())
+ dbgs() << " - " << Preheader->getName() << "\n";
+ else if (auto *Preheader = MLI->findLoopPreheader(ML))
+ dbgs() << " - " << Preheader->getName() << "\n";
+ for (auto *MBB : ML->getBlocks())
+ dbgs() << " - " << MBB->getName() << "\n";
+ );
// Search the given block for a loop start instruction. If one isn't found,
// and there's only one predecessor block, search that one too.
std::function<MachineInstr*(MachineBasicBlock*)> SearchForStart =
[&SearchForStart](MachineBasicBlock *MBB) -> MachineInstr* {
for (auto &MI : *MBB) {
- if (IsLoopStart(MI))
+ if (isLoopStart(MI))
return &MI;
}
if (MBB->pred_size() == 1)
@@ -233,53 +608,43 @@ bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) {
return nullptr;
};
- MachineInstr *Start = nullptr;
- MachineInstr *Dec = nullptr;
- MachineInstr *End = nullptr;
- bool Revert = false;
-
- // Search the preheader for the start intrinsic, or look through the
- // predecessors of the header to find exactly one set.iterations intrinsic.
+ LowOverheadLoop LoLoop(ML);
+ // Search the preheader for the start intrinsic.
// FIXME: I don't see why we shouldn't be supporting multiple predecessors
// with potentially multiple set.loop.iterations, so we need to enable this.
- if (auto *Preheader = ML->getLoopPreheader()) {
- Start = SearchForStart(Preheader);
- } else {
- LLVM_DEBUG(dbgs() << "ARM Loops: Failed to find loop preheader!\n"
- << " - Performing manual predecessor search.\n");
- MachineBasicBlock *Pred = nullptr;
- for (auto *MBB : ML->getHeader()->predecessors()) {
- if (!ML->contains(MBB)) {
- if (Pred) {
- LLVM_DEBUG(dbgs() << " - Found multiple out-of-loop preds.\n");
- Start = nullptr;
- break;
- }
- Pred = MBB;
- Start = SearchForStart(MBB);
- }
- }
- }
+ if (auto *Preheader = ML->getLoopPreheader())
+ LoLoop.Start = SearchForStart(Preheader);
+ else if (auto *Preheader = MLI->findLoopPreheader(ML, true))
+ LoLoop.Start = SearchForStart(Preheader);
+ else
+ return false;
// Find the low-overhead loop components and decide whether or not to fall
- // back to a normal loop.
+ // back to a normal loop. Also look for a vctp instructions and decide
+ // whether we can convert that predicate using tail predication.
for (auto *MBB : reverse(ML->getBlocks())) {
for (auto &MI : *MBB) {
if (MI.getOpcode() == ARM::t2LoopDec)
- Dec = &MI;
+ LoLoop.Dec = &MI;
else if (MI.getOpcode() == ARM::t2LoopEnd)
- End = &MI;
- else if (IsLoopStart(MI))
- Start = &MI;
+ LoLoop.End = &MI;
+ else if (isLoopStart(MI))
+ LoLoop.Start = &MI;
else if (MI.getDesc().isCall()) {
// TODO: Though the call will require LE to execute again, does this
// mean we should revert? Always executing LE hopefully should be
// faster than performing a sub,cmp,br or even subs,br.
- Revert = true;
+ LoLoop.Revert = true;
LLVM_DEBUG(dbgs() << "ARM Loops: Found call.\n");
+ } else {
+ // Record VPR defs and build up their corresponding vpt blocks.
+ // Check we know how to tail predicate any mve instructions.
+ LoLoop.AnalyseMVEInst(&MI);
}
- if (!Dec || End)
+ // We need to ensure that LR is not used or defined inbetween LoopDec and
+ // LoopEnd.
+ if (!LoLoop.Dec || LoLoop.End || LoLoop.Revert)
continue;
// If we find that LR has been written or read between LoopDec and
@@ -294,61 +659,21 @@ bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) {
if (MI.getOpcode() != ARM::t2LoopDec && MO.isReg() &&
MO.getReg() == ARM::LR) {
LLVM_DEBUG(dbgs() << "ARM Loops: Found LR Use/Def: " << MI);
- Revert = true;
+ LoLoop.Revert = true;
break;
}
}
}
-
- if (Dec && End && Revert)
- break;
}
- LLVM_DEBUG(if (Start) dbgs() << "ARM Loops: Found Loop Start: " << *Start;
- if (Dec) dbgs() << "ARM Loops: Found Loop Dec: " << *Dec;
- if (End) dbgs() << "ARM Loops: Found Loop End: " << *End;);
-
- if (!Start && !Dec && !End) {
- LLVM_DEBUG(dbgs() << "ARM Loops: Not a low-overhead loop.\n");
- return Changed;
- } else if (!(Start && Dec && End)) {
- LLVM_DEBUG(dbgs() << "ARM Loops: Failed to find all loop components.\n");
+ LLVM_DEBUG(LoLoop.dump());
+ if (!LoLoop.FoundAllComponents()) {
+ LLVM_DEBUG(dbgs() << "ARM Loops: Didn't find loop start, update, end\n");
return false;
}
- if (!End->getOperand(1).isMBB())
- report_fatal_error("Expected LoopEnd to target basic block");
-
- // TODO Maybe there's cases where the target doesn't have to be the header,
- // but for now be safe and revert.
- if (End->getOperand(1).getMBB() != ML->getHeader()) {
- LLVM_DEBUG(dbgs() << "ARM Loops: LoopEnd is not targetting header.\n");
- Revert = true;
- }
-
- // The WLS and LE instructions have 12-bits for the label offset. WLS
- // requires a positive offset, while LE uses negative.
- if (BBUtils->getOffsetOf(End) < BBUtils->getOffsetOf(ML->getHeader()) ||
- !BBUtils->isBBInRange(End, ML->getHeader(), 4094)) {
- LLVM_DEBUG(dbgs() << "ARM Loops: LE offset is out-of-range\n");
- Revert = true;
- }
- if (Start->getOpcode() == ARM::t2WhileLoopStart &&
- (BBUtils->getOffsetOf(Start) >
- BBUtils->getOffsetOf(Start->getOperand(1).getMBB()) ||
- !BBUtils->isBBInRange(Start, Start->getOperand(1).getMBB(), 4094))) {
- LLVM_DEBUG(dbgs() << "ARM Loops: WLS offset is out-of-range!\n");
- Revert = true;
- }
-
- MachineInstr *InsertPt = Revert ? nullptr : IsSafeToDefineLR(Start);
- if (!InsertPt) {
- LLVM_DEBUG(dbgs() << "ARM Loops: Unable to find safe insertion point.\n");
- Revert = true;
- } else
- LLVM_DEBUG(dbgs() << "ARM Loops: Start insertion point: " << *InsertPt);
-
- Expand(ML, Start, InsertPt, Dec, End, Revert);
+ LoLoop.CheckLegality(BBUtils.get(), RDA, MLI);
+ Expand(LoLoop);
return true;
}
@@ -365,7 +690,7 @@ void ARMLowOverheadLoops::RevertWhile(MachineInstr *MI) const {
MIB.addImm(0);
MIB.addImm(ARMCC::AL);
MIB.addReg(ARM::NoRegister);
-
+
MachineBasicBlock *DestBB = MI->getOperand(1).getMBB();
unsigned BrOpc = BBUtils->isBBInRange(MI, DestBB, 254) ?
ARM::tBcc : ARM::t2Bcc;
@@ -378,19 +703,15 @@ void ARMLowOverheadLoops::RevertWhile(MachineInstr *MI) const {
}
bool ARMLowOverheadLoops::RevertLoopDec(MachineInstr *MI,
- bool AllowFlags) const {
+ bool SetFlags) const {
LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to sub: " << *MI);
MachineBasicBlock *MBB = MI->getParent();
- // If nothing uses or defines CPSR between LoopDec and LoopEnd, use a t2SUBS.
- bool SetFlags = false;
- if (AllowFlags) {
- if (auto *Def = SearchForDef(MI, MBB->end(), ARM::CPSR)) {
- if (!SearchForUse(MI, MBB->end(), ARM::CPSR) &&
- Def->getOpcode() == ARM::t2LoopEnd)
- SetFlags = true;
- }
- }
+ // If nothing defines CPSR between LoopDec and LoopEnd, use a t2SUBS.
+ if (SetFlags &&
+ (RDA->isRegUsedAfter(MI, ARM::CPSR) ||
+ !RDA->hasSameReachingDef(MI, &MBB->back(), ARM::CPSR)))
+ SetFlags = false;
MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI->getDebugLoc(),
TII->get(ARM::t2SUBri));
@@ -438,44 +759,223 @@ void ARMLowOverheadLoops::RevertLoopEnd(MachineInstr *MI, bool SkipCmp) const {
MI->eraseFromParent();
}
-void ARMLowOverheadLoops::Expand(MachineLoop *ML, MachineInstr *Start,
- MachineInstr *InsertPt,
- MachineInstr *Dec, MachineInstr *End,
- bool Revert) {
+MachineInstr* ARMLowOverheadLoops::ExpandLoopStart(LowOverheadLoop &LoLoop) {
+ MachineInstr *InsertPt = LoLoop.InsertPt;
+ MachineInstr *Start = LoLoop.Start;
+ MachineBasicBlock *MBB = InsertPt->getParent();
+ bool IsDo = Start->getOpcode() == ARM::t2DoLoopStart;
+ unsigned Opc = LoLoop.getStartOpcode();
+ MachineOperand &Count = LoLoop.getCount();
- auto ExpandLoopStart = [this](MachineLoop *ML, MachineInstr *Start,
- MachineInstr *InsertPt) {
- MachineBasicBlock *MBB = InsertPt->getParent();
- unsigned Opc = Start->getOpcode() == ARM::t2DoLoopStart ?
- ARM::t2DLS : ARM::t2WLS;
- MachineInstrBuilder MIB =
- BuildMI(*MBB, InsertPt, InsertPt->getDebugLoc(), TII->get(Opc));
+ MachineInstrBuilder MIB =
+ BuildMI(*MBB, InsertPt, InsertPt->getDebugLoc(), TII->get(Opc));
- MIB.addDef(ARM::LR);
- MIB.add(Start->getOperand(0));
- if (Opc == ARM::t2WLS)
- MIB.add(Start->getOperand(1));
-
- if (InsertPt != Start)
- InsertPt->eraseFromParent();
- Start->eraseFromParent();
- LLVM_DEBUG(dbgs() << "ARM Loops: Inserted start: " << *MIB);
- return &*MIB;
+ MIB.addDef(ARM::LR);
+ MIB.add(Count);
+ if (!IsDo)
+ MIB.add(Start->getOperand(1));
+
+ // When using tail-predication, try to delete the dead code that was used to
+ // calculate the number of loop iterations.
+ if (LoLoop.IsTailPredicationLegal()) {
+ SmallVector<MachineInstr*, 4> Killed;
+ SmallVector<MachineInstr*, 4> Dead;
+ if (auto *Def = RDA->getReachingMIDef(Start,
+ Start->getOperand(0).getReg())) {
+ Killed.push_back(Def);
+
+ while (!Killed.empty()) {
+ MachineInstr *Def = Killed.back();
+ Killed.pop_back();
+ Dead.push_back(Def);
+ for (auto &MO : Def->operands()) {
+ if (!MO.isReg() || !MO.isKill())
+ continue;
+
+ MachineInstr *Kill = RDA->getReachingMIDef(Def, MO.getReg());
+ if (Kill && RDA->getNumUses(Kill, MO.getReg()) == 1)
+ Killed.push_back(Kill);
+ }
+ }
+ for (auto *MI : Dead)
+ MI->eraseFromParent();
+ }
+ }
+
+ // If we're inserting at a mov lr, then remove it as it's redundant.
+ if (InsertPt != Start)
+ InsertPt->eraseFromParent();
+ Start->eraseFromParent();
+ LLVM_DEBUG(dbgs() << "ARM Loops: Inserted start: " << *MIB);
+ return &*MIB;
+}
+
+// Goal is to optimise and clean-up these loops:
+//
+// vector.body:
+// renamable $vpr = MVE_VCTP32 renamable $r3, 0, $noreg
+// renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3(tied-def 0), 4
+// ..
+// $lr = MVE_DLSTP_32 renamable $r3
+//
+// The SUB is the old update of the loop iteration count expression, which
+// is no longer needed. This sub is removed when the element count, which is in
+// r3 in this example, is defined by an instruction in the loop, and it has
+// no uses.
+//
+void ARMLowOverheadLoops::RemoveLoopUpdate(LowOverheadLoop &LoLoop) {
+ Register ElemCount = LoLoop.VCTP->getOperand(1).getReg();
+ MachineInstr *LastInstrInBlock = &LoLoop.VCTP->getParent()->back();
+
+ LLVM_DEBUG(dbgs() << "ARM Loops: Trying to remove loop update stmt\n");
+
+ if (LoLoop.ML->getNumBlocks() != 1) {
+ LLVM_DEBUG(dbgs() << "ARM Loops: Single block loop expected\n");
+ return;
+ }
+
+ LLVM_DEBUG(dbgs() << "ARM Loops: Analyzing elemcount in operand: ";
+ LoLoop.VCTP->getOperand(1).dump());
+
+ // Find the definition we are interested in removing, if there is one.
+ MachineInstr *Def = RDA->getReachingMIDef(LastInstrInBlock, ElemCount);
+ if (!Def) {
+ LLVM_DEBUG(dbgs() << "ARM Loops: Can't find a def, nothing to do.\n");
+ return;
+ }
+
+ // Bail if we define CPSR and it is not dead
+ if (!Def->registerDefIsDead(ARM::CPSR, TRI)) {
+ LLVM_DEBUG(dbgs() << "ARM Loops: CPSR is not dead\n");
+ return;
+ }
+
+ // Bail if elemcount is used in exit blocks, i.e. if it is live-in.
+ if (isRegLiveInExitBlocks(LoLoop.ML, ElemCount)) {
+ LLVM_DEBUG(dbgs() << "ARM Loops: Elemcount is live-out, can't remove stmt\n");
+ return;
+ }
+
+ // Bail if there are uses after this Def in the block.
+ SmallVector<MachineInstr*, 4> Uses;
+ RDA->getReachingLocalUses(Def, ElemCount, Uses);
+ if (Uses.size()) {
+ LLVM_DEBUG(dbgs() << "ARM Loops: Local uses in block, can't remove stmt\n");
+ return;
+ }
+
+ Uses.clear();
+ RDA->getAllInstWithUseBefore(Def, ElemCount, Uses);
+
+ // Remove Def if there are no uses, or if the only use is the VCTP
+ // instruction.
+ if (!Uses.size() || (Uses.size() == 1 && Uses[0] == LoLoop.VCTP)) {
+ LLVM_DEBUG(dbgs() << "ARM Loops: Removing loop update instruction: ";
+ Def->dump());
+ Def->eraseFromParent();
+ return;
+ }
+
+ LLVM_DEBUG(dbgs() << "ARM Loops: Can't remove loop update, it's used by:\n";
+ for (auto U : Uses) U->dump());
+}
+
+void ARMLowOverheadLoops::ConvertVPTBlocks(LowOverheadLoop &LoLoop) {
+ auto RemovePredicate = [](MachineInstr *MI) {
+ LLVM_DEBUG(dbgs() << "ARM Loops: Removing predicate from: " << *MI);
+ if (int PIdx = llvm::findFirstVPTPredOperandIdx(*MI)) {
+ assert(MI->getOperand(PIdx).getImm() == ARMVCC::Then &&
+ "Expected Then predicate!");
+ MI->getOperand(PIdx).setImm(ARMVCC::None);
+ MI->getOperand(PIdx+1).setReg(0);
+ } else
+ llvm_unreachable("trying to unpredicate a non-predicated instruction");
};
+ // There are a few scenarios which we have to fix up:
+ // 1) A VPT block with is only predicated by the vctp and has no internal vpr
+ // defs.
+ // 2) A VPT block which is only predicated by the vctp but has an internal
+ // vpr def.
+ // 3) A VPT block which is predicated upon the vctp as well as another vpr
+ // def.
+ // 4) A VPT block which is not predicated upon a vctp, but contains it and
+ // all instructions within the block are predicated upon in.
+
+ for (auto &Block : LoLoop.getVPTBlocks()) {
+ SmallVectorImpl<PredicatedMI> &Insts = Block.getInsts();
+ if (Block.HasNonUniformPredicate()) {
+ PredicatedMI *Divergent = Block.getDivergent();
+ if (isVCTP(Divergent->MI)) {
+ // The vctp will be removed, so the size of the vpt block needs to be
+ // modified.
+ uint64_t Size = getARMVPTBlockMask(Block.size() - 1);
+ Block.getVPST()->getOperand(0).setImm(Size);
+ LLVM_DEBUG(dbgs() << "ARM Loops: Modified VPT block mask.\n");
+ } else if (Block.IsOnlyPredicatedOn(LoLoop.VCTP)) {
+ // The VPT block has a non-uniform predicate but it's entry is guarded
+ // only by a vctp, which means we:
+ // - Need to remove the original vpst.
+ // - Then need to unpredicate any following instructions, until
+ // we come across the divergent vpr def.
+ // - Insert a new vpst to predicate the instruction(s) that following
+ // the divergent vpr def.
+ // TODO: We could be producing more VPT blocks than necessary and could
+ // fold the newly created one into a proceeding one.
+ for (auto I = ++MachineBasicBlock::iterator(Block.getVPST()),
+ E = ++MachineBasicBlock::iterator(Divergent->MI); I != E; ++I)
+ RemovePredicate(&*I);
+
+ unsigned Size = 0;
+ auto E = MachineBasicBlock::reverse_iterator(Divergent->MI);
+ auto I = MachineBasicBlock::reverse_iterator(Insts.back().MI);
+ MachineInstr *InsertAt = nullptr;
+ while (I != E) {
+ InsertAt = &*I;
+ ++Size;
+ ++I;
+ }
+ MachineInstrBuilder MIB = BuildMI(*InsertAt->getParent(), InsertAt,
+ InsertAt->getDebugLoc(),
+ TII->get(ARM::MVE_VPST));
+ MIB.addImm(getARMVPTBlockMask(Size));
+ LLVM_DEBUG(dbgs() << "ARM Loops: Removing VPST: " << *Block.getVPST());
+ LLVM_DEBUG(dbgs() << "ARM Loops: Created VPST: " << *MIB);
+ Block.getVPST()->eraseFromParent();
+ }
+ } else if (Block.IsOnlyPredicatedOn(LoLoop.VCTP)) {
+ // A vpt block which is only predicated upon vctp and has no internal vpr
+ // defs:
+ // - Remove vpst.
+ // - Unpredicate the remaining instructions.
+ LLVM_DEBUG(dbgs() << "ARM Loops: Removing VPST: " << *Block.getVPST());
+ Block.getVPST()->eraseFromParent();
+ for (auto &PredMI : Insts)
+ RemovePredicate(PredMI.MI);
+ }
+ }
+
+ LLVM_DEBUG(dbgs() << "ARM Loops: Removing VCTP: " << *LoLoop.VCTP);
+ LoLoop.VCTP->eraseFromParent();
+}
+
+void ARMLowOverheadLoops::Expand(LowOverheadLoop &LoLoop) {
+
// Combine the LoopDec and LoopEnd instructions into LE(TP).
- auto ExpandLoopEnd = [this](MachineLoop *ML, MachineInstr *Dec,
- MachineInstr *End) {
+ auto ExpandLoopEnd = [this](LowOverheadLoop &LoLoop) {
+ MachineInstr *End = LoLoop.End;
MachineBasicBlock *MBB = End->getParent();
+ unsigned Opc = LoLoop.IsTailPredicationLegal() ?
+ ARM::MVE_LETP : ARM::t2LEUpdate;
MachineInstrBuilder MIB = BuildMI(*MBB, End, End->getDebugLoc(),
- TII->get(ARM::t2LEUpdate));
+ TII->get(Opc));
MIB.addDef(ARM::LR);
MIB.add(End->getOperand(0));
MIB.add(End->getOperand(1));
LLVM_DEBUG(dbgs() << "ARM Loops: Inserted LE: " << *MIB);
- End->eraseFromParent();
- Dec->eraseFromParent();
+ LoLoop.End->eraseFromParent();
+ LoLoop.Dec->eraseFromParent();
return &*MIB;
};
@@ -496,18 +996,22 @@ void ARMLowOverheadLoops::Expand(MachineLoop *ML, MachineInstr *Start,
}
};
- if (Revert) {
- if (Start->getOpcode() == ARM::t2WhileLoopStart)
- RevertWhile(Start);
+ if (LoLoop.Revert) {
+ if (LoLoop.Start->getOpcode() == ARM::t2WhileLoopStart)
+ RevertWhile(LoLoop.Start);
else
- Start->eraseFromParent();
- bool FlagsAlreadySet = RevertLoopDec(Dec, true);
- RevertLoopEnd(End, FlagsAlreadySet);
+ LoLoop.Start->eraseFromParent();
+ bool FlagsAlreadySet = RevertLoopDec(LoLoop.Dec, true);
+ RevertLoopEnd(LoLoop.End, FlagsAlreadySet);
} else {
- Start = ExpandLoopStart(ML, Start, InsertPt);
- RemoveDeadBranch(Start);
- End = ExpandLoopEnd(ML, Dec, End);
- RemoveDeadBranch(End);
+ LoLoop.Start = ExpandLoopStart(LoLoop);
+ RemoveDeadBranch(LoLoop.Start);
+ LoLoop.End = ExpandLoopEnd(LoLoop);
+ RemoveDeadBranch(LoLoop.End);
+ if (LoLoop.IsTailPredicationLegal()) {
+ RemoveLoopUpdate(LoLoop);
+ ConvertVPTBlocks(LoLoop);
+ }
}
}
@@ -521,7 +1025,7 @@ bool ARMLowOverheadLoops::RevertNonLoops() {
SmallVector<MachineInstr*, 4> Ends;
for (auto &I : MBB) {
- if (IsLoopStart(I))
+ if (isLoopStart(I))
Starts.push_back(&I);
else if (I.getOpcode() == ARM::t2LoopDec)
Decs.push_back(&I);
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMParallelDSP.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMParallelDSP.cpp
index ae5657a0a2c1..e2c9335db419 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMParallelDSP.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMParallelDSP.cpp
@@ -14,23 +14,24 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/ADT/Statistic.h"
+#include "ARM.h"
+#include "ARMSubtarget.h"
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/LoopAccessAnalysis.h"
#include "llvm/Analysis/OrderedBasicBlock.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicsARM.h"
#include "llvm/IR/NoFolder.h"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/Pass.h"
#include "llvm/PassRegistry.h"
#include "llvm/PassSupport.h"
#include "llvm/Support/Debug.h"
-#include "llvm/IR/PatternMatch.h"
-#include "llvm/CodeGen/TargetPassConfig.h"
-#include "ARM.h"
-#include "ARMSubtarget.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
using namespace llvm;
using namespace PatternMatch;
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMPredicates.td b/contrib/llvm-project/llvm/lib/Target/ARM/ARMPredicates.td
index b008d3e2e296..dea1d767beb4 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMPredicates.td
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMPredicates.td
@@ -182,11 +182,9 @@ def UseMulOps : Predicate<"Subtarget->useMulOps()">;
// But only select them if more precision in FP computation is allowed, and when
// they are not slower than a mul + add sequence.
// Do not use them for Darwin platforms.
-def UseFusedMAC : Predicate<"(TM.Options.AllowFPOpFusion =="
- " FPOpFusion::Fast && "
- " Subtarget->hasVFP4Base()) && "
- "!Subtarget->isTargetDarwin() &&"
- "Subtarget->useFPVMLx()">;
+def UseFusedMAC : Predicate<"TM.Options.AllowFPOpFusion =="
+ " FPOpFusion::Fast && "
+ "Subtarget->useFPVFMx()">;
def HasFastVGETLNi32 : Predicate<"!Subtarget->hasSlowVGETLNi32()">;
def HasSlowVGETLNi32 : Predicate<"Subtarget->hasSlowVGETLNi32()">;
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp
index b100150175fc..43c8cd5a89be 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp
@@ -172,8 +172,9 @@ ARMRegisterBankInfo::ARMRegisterBankInfo(const TargetRegisterInfo &TRI)
#endif
}
-const RegisterBank &ARMRegisterBankInfo::getRegBankFromRegClass(
- const TargetRegisterClass &RC) const {
+const RegisterBank &
+ARMRegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC,
+ LLT) const {
using namespace ARM;
switch (RC.getID()) {
@@ -249,7 +250,7 @@ ARMRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case G_SEXT:
case G_ZEXT:
case G_ANYEXT:
- case G_GEP:
+ case G_PTR_ADD:
case G_INTTOPTR:
case G_PTRTOINT:
case G_CTLZ:
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMRegisterBankInfo.h b/contrib/llvm-project/llvm/lib/Target/ARM/ARMRegisterBankInfo.h
index 1961f7af49bb..b8aff65a967e 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMRegisterBankInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMRegisterBankInfo.h
@@ -32,8 +32,8 @@ class ARMRegisterBankInfo final : public ARMGenRegisterBankInfo {
public:
ARMRegisterBankInfo(const TargetRegisterInfo &TRI);
- const RegisterBank &
- getRegBankFromRegClass(const TargetRegisterClass &RC) const override;
+ const RegisterBank &getRegBankFromRegClass(const TargetRegisterClass &RC,
+ LLT) const override;
const InstructionMapping &
getInstrMapping(const MachineInstr &MI) const override;
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMSubtarget.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMSubtarget.cpp
index 09603057b2c8..eb4d39b01cbb 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMSubtarget.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMSubtarget.cpp
@@ -72,6 +72,9 @@ static cl::opt<bool>
ForceFastISel("arm-force-fast-isel",
cl::init(false), cl::Hidden);
+static cl::opt<bool> EnableSubRegLiveness("arm-enable-subreg-liveness",
+ cl::init(false), cl::Hidden);
+
/// initializeSubtargetDependencies - Initializes using a CPU and feature string
/// so that we can use initializer lists for subtarget initialization.
ARMSubtarget &ARMSubtarget::initializeSubtargetDependencies(StringRef CPU,
@@ -379,11 +382,23 @@ bool ARMSubtarget::enableMachineScheduler() const {
return useMachineScheduler();
}
+bool ARMSubtarget::enableSubRegLiveness() const { return EnableSubRegLiveness; }
+
// This overrides the PostRAScheduler bit in the SchedModel for any CPU.
bool ARMSubtarget::enablePostRAScheduler() const {
+ if (enableMachineScheduler())
+ return false;
+ if (disablePostRAScheduler())
+ return false;
+ // Thumb1 cores will generally not benefit from post-ra scheduling
+ return !isThumb1Only();
+}
+
+bool ARMSubtarget::enablePostRAMachineScheduler() const {
+ if (!enableMachineScheduler())
+ return false;
if (disablePostRAScheduler())
return false;
- // Don't reschedule potential IT blocks.
return !isThumb1Only();
}
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMSubtarget.h b/contrib/llvm-project/llvm/lib/Target/ARM/ARMSubtarget.h
index ef460342a69e..6bdd021970ef 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMSubtarget.h
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMSubtarget.h
@@ -203,6 +203,10 @@ protected:
/// whether the FP VML[AS] instructions are slow (if so, don't use them).
bool SlowFPVMLx = false;
+ /// SlowFPVFMx - If the VFP4 / NEON instructions are available, indicates
+ /// whether the FP VFM[AS] instructions are slow (if so, don't use them).
+ bool SlowFPVFMx = false;
+
/// HasVMLxForwarding - If true, NEON has special multiplier accumulator
/// forwarding to allow mul + mla being issued back to back.
bool HasVMLxForwarding = false;
@@ -223,9 +227,6 @@ protected:
/// register allocation.
bool DisablePostRAScheduler = false;
- /// UseAA - True if using AA during codegen (DAGCombine, MISched, etc)
- bool UseAA = false;
-
/// HasThumb2 - True if Thumb2 instructions are supported.
bool HasThumb2 = false;
@@ -635,6 +636,11 @@ public:
bool useMulOps() const { return UseMulOps; }
bool useFPVMLx() const { return !SlowFPVMLx; }
+ bool useFPVFMx() const {
+ return !isTargetDarwin() && hasVFP4Base() && !SlowFPVFMx;
+ }
+ bool useFPVFMx16() const { return useFPVFMx() && hasFullFP16(); }
+ bool useFPVFMx64() const { return useFPVFMx() && hasFP64(); }
bool hasVMLxForwarding() const { return HasVMLxForwarding; }
bool isFPBrccSlow() const { return SlowFPBrcc; }
bool hasFP64() const { return HasFP64; }
@@ -806,9 +812,15 @@ public:
/// True for some subtargets at > -O0.
bool enablePostRAScheduler() const override;
+ /// True for some subtargets at > -O0.
+ bool enablePostRAMachineScheduler() const override;
+
+ /// Check whether this subtarget wants to use subregister liveness.
+ bool enableSubRegLiveness() const override;
+
/// Enable use of alias analysis during code generation (during MI
/// scheduling, DAGCombine, etc.).
- bool useAA() const override { return UseAA; }
+ bool useAA() const override { return true; }
// enableAtomicExpand- True if we need to expand our atomics.
bool enableAtomicExpand() const override;
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetMachine.cpp
index 5c8007f101d9..84876eda33a6 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetMachine.cpp
@@ -46,6 +46,7 @@
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/Transforms/CFGuard.h"
#include "llvm/Transforms/Scalar.h"
#include <cassert>
#include <memory>
@@ -78,7 +79,7 @@ namespace llvm {
void initializeARMExecutionDomainFixPass(PassRegistry&);
}
-extern "C" void LLVMInitializeARMTarget() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeARMTarget() {
// Register the target.
RegisterTargetMachine<ARMLETargetMachine> X(getTheARMLETarget());
RegisterTargetMachine<ARMLETargetMachine> A(getTheThumbLETarget());
@@ -90,7 +91,6 @@ extern "C" void LLVMInitializeARMTarget() {
initializeARMLoadStoreOptPass(Registry);
initializeARMPreAllocLoadStoreOptPass(Registry);
initializeARMParallelDSPPass(Registry);
- initializeARMCodeGenPreparePass(Registry);
initializeARMConstantIslandsPass(Registry);
initializeARMExecutionDomainFixPass(Registry);
initializeARMExpandPseudoPass(Registry);
@@ -98,6 +98,7 @@ extern "C" void LLVMInitializeARMTarget() {
initializeMVEVPTBlockPass(Registry);
initializeMVETailPredicationPass(Registry);
initializeARMLowOverheadLoopsPass(Registry);
+ initializeMVEGatherScatterLoweringPass(Registry);
}
static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
@@ -321,14 +322,7 @@ namespace {
class ARMPassConfig : public TargetPassConfig {
public:
ARMPassConfig(ARMBaseTargetMachine &TM, PassManagerBase &PM)
- : TargetPassConfig(TM, PM) {
- if (TM.getOptLevel() != CodeGenOpt::None) {
- ARMGenSubtargetInfo STI(TM.getTargetTriple(), TM.getTargetCPU(),
- TM.getTargetFeatureString());
- if (STI.hasFeature(ARM::FeatureUseMISched))
- substitutePass(&PostRASchedulerID, &PostMachineSchedulerID);
- }
- }
+ : TargetPassConfig(TM, PM) {}
ARMBaseTargetMachine &getARMTargetMachine() const {
return getTM<ARMBaseTargetMachine>();
@@ -411,6 +405,8 @@ void ARMPassConfig::addIRPasses() {
return ST.hasAnyDataBarrier() && !ST.isThumb1Only();
}));
+ addPass(createMVEGatherScatterLoweringPass());
+
TargetPassConfig::addIRPasses();
// Run the parallel DSP pass.
@@ -420,11 +416,15 @@ void ARMPassConfig::addIRPasses() {
// Match interleaved memory accesses to ldN/stN intrinsics.
if (TM->getOptLevel() != CodeGenOpt::None)
addPass(createInterleavedAccessPass());
+
+ // Add Control Flow Guard checks.
+ if (TM->getTargetTriple().isOSWindows())
+ addPass(createCFGuardCheckPass());
}
void ARMPassConfig::addCodeGenPrepare() {
if (getOptLevel() != CodeGenOpt::None)
- addPass(createARMCodeGenPreparePass());
+ addPass(createTypePromotionPass());
TargetPassConfig::addCodeGenPrepare();
}
@@ -518,6 +518,13 @@ void ARMPassConfig::addPreSched2() {
}
addPass(createMVEVPTBlockPass());
addPass(createThumb2ITBlockPass());
+
+ // Add both scheduling passes to give the subtarget an opportunity to pick
+ // between them.
+ if (getOptLevel() != CodeGenOpt::None) {
+ addPass(&PostMachineSchedulerID);
+ addPass(&PostRASchedulerID);
+ }
}
void ARMPassConfig::addPreEmitPass() {
@@ -534,4 +541,8 @@ void ARMPassConfig::addPreEmitPass() {
addPass(createARMConstantIslandPass());
addPass(createARMLowOverheadLoopsPass());
+
+ // Identify valid longjmp targets for Windows Control Flow Guard.
+ if (TM->getTargetTriple().isOSWindows())
+ addPass(createCFGuardLongjmpPass());
}
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetMachine.h b/contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetMachine.h
index cb8650d8139b..ac55d2bdcc2b 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetMachine.h
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetMachine.h
@@ -70,6 +70,8 @@ public:
TargetTriple.isOSWindows() ||
TargetABI == ARMBaseTargetMachine::ARM_ABI_AAPCS16;
}
+
+ bool targetSchedulesPostRAScheduling() const override { return true; };
};
/// ARM/Thumb little endian target machine.
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 86c8684d14dc..7ff05034c1f2 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -22,6 +22,7 @@
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
#include "llvm/MC/SubtargetFeature.h"
#include "llvm/Support/Casting.h"
@@ -37,13 +38,17 @@ using namespace llvm;
#define DEBUG_TYPE "armtti"
static cl::opt<bool> EnableMaskedLoadStores(
- "enable-arm-maskedldst", cl::Hidden, cl::init(false),
+ "enable-arm-maskedldst", cl::Hidden, cl::init(true),
cl::desc("Enable the generation of masked loads and stores"));
static cl::opt<bool> DisableLowOverheadLoops(
"disable-arm-loloops", cl::Hidden, cl::init(false),
cl::desc("Disable the generation of low-overhead loops"));
+extern cl::opt<bool> DisableTailPredication;
+
+extern cl::opt<bool> EnableMaskedGatherScatters;
+
bool ARMTTIImpl::areInlineCompatible(const Function *Caller,
const Function *Callee) const {
const TargetMachine &TM = getTLI()->getTargetMachine();
@@ -104,7 +109,7 @@ int ARMTTIImpl::getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx,
return 1;
}
-int ARMTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
+int ARMTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm,
Type *Ty) {
// Division by a constant can be turned into multiplication, but only if we
// know it's constant. So it's not so much that the immediate is cheap (it's
@@ -512,6 +517,27 @@ bool ARMTTIImpl::isLegalMaskedLoad(Type *DataTy, MaybeAlign Alignment) {
(EltWidth == 8);
}
+bool ARMTTIImpl::isLegalMaskedGather(Type *Ty, MaybeAlign Alignment) {
+ if (!EnableMaskedGatherScatters || !ST->hasMVEIntegerOps())
+ return false;
+
+ // This method is called in 2 places:
+ // - from the vectorizer with a scalar type, in which case we need to get
+ // this as good as we can with the limited info we have (and rely on the cost
+ // model for the rest).
+ // - from the masked intrinsic lowering pass with the actual vector type.
+ // For MVE, we have a custom lowering pass that will already have custom
+ // legalised any gathers that we can to MVE intrinsics, and want to expand all
+ // the rest. The pass runs before the masked intrinsic lowering pass, so if we
+ // are here, we know we want to expand.
+ if (isa<VectorType>(Ty))
+ return false;
+
+ unsigned EltWidth = Ty->getScalarSizeInBits();
+ return ((EltWidth == 32 && (!Alignment || Alignment >= 4)) ||
+ (EltWidth == 16 && (!Alignment || Alignment >= 2)) || EltWidth == 8);
+}
+
int ARMTTIImpl::getMemcpyCost(const Instruction *I) {
const MemCpyInst *MI = dyn_cast<MemCpyInst>(I);
assert(MI && "MemcpyInst expected");
@@ -640,58 +666,60 @@ int ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
return BaseCost * BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
}
-int ARMTTIImpl::getArithmeticInstrCost(
- unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info,
- TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo,
- TTI::OperandValueProperties Opd2PropInfo,
- ArrayRef<const Value *> Args) {
+int ARMTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
+ TTI::OperandValueKind Op1Info,
+ TTI::OperandValueKind Op2Info,
+ TTI::OperandValueProperties Opd1PropInfo,
+ TTI::OperandValueProperties Opd2PropInfo,
+ ArrayRef<const Value *> Args,
+ const Instruction *CxtI) {
int ISDOpcode = TLI->InstructionOpcodeToISD(Opcode);
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
- const unsigned FunctionCallDivCost = 20;
- const unsigned ReciprocalDivCost = 10;
- static const CostTblEntry CostTbl[] = {
- // Division.
- // These costs are somewhat random. Choose a cost of 20 to indicate that
- // vectorizing devision (added function call) is going to be very expensive.
- // Double registers types.
- { ISD::SDIV, MVT::v1i64, 1 * FunctionCallDivCost},
- { ISD::UDIV, MVT::v1i64, 1 * FunctionCallDivCost},
- { ISD::SREM, MVT::v1i64, 1 * FunctionCallDivCost},
- { ISD::UREM, MVT::v1i64, 1 * FunctionCallDivCost},
- { ISD::SDIV, MVT::v2i32, 2 * FunctionCallDivCost},
- { ISD::UDIV, MVT::v2i32, 2 * FunctionCallDivCost},
- { ISD::SREM, MVT::v2i32, 2 * FunctionCallDivCost},
- { ISD::UREM, MVT::v2i32, 2 * FunctionCallDivCost},
- { ISD::SDIV, MVT::v4i16, ReciprocalDivCost},
- { ISD::UDIV, MVT::v4i16, ReciprocalDivCost},
- { ISD::SREM, MVT::v4i16, 4 * FunctionCallDivCost},
- { ISD::UREM, MVT::v4i16, 4 * FunctionCallDivCost},
- { ISD::SDIV, MVT::v8i8, ReciprocalDivCost},
- { ISD::UDIV, MVT::v8i8, ReciprocalDivCost},
- { ISD::SREM, MVT::v8i8, 8 * FunctionCallDivCost},
- { ISD::UREM, MVT::v8i8, 8 * FunctionCallDivCost},
- // Quad register types.
- { ISD::SDIV, MVT::v2i64, 2 * FunctionCallDivCost},
- { ISD::UDIV, MVT::v2i64, 2 * FunctionCallDivCost},
- { ISD::SREM, MVT::v2i64, 2 * FunctionCallDivCost},
- { ISD::UREM, MVT::v2i64, 2 * FunctionCallDivCost},
- { ISD::SDIV, MVT::v4i32, 4 * FunctionCallDivCost},
- { ISD::UDIV, MVT::v4i32, 4 * FunctionCallDivCost},
- { ISD::SREM, MVT::v4i32, 4 * FunctionCallDivCost},
- { ISD::UREM, MVT::v4i32, 4 * FunctionCallDivCost},
- { ISD::SDIV, MVT::v8i16, 8 * FunctionCallDivCost},
- { ISD::UDIV, MVT::v8i16, 8 * FunctionCallDivCost},
- { ISD::SREM, MVT::v8i16, 8 * FunctionCallDivCost},
- { ISD::UREM, MVT::v8i16, 8 * FunctionCallDivCost},
- { ISD::SDIV, MVT::v16i8, 16 * FunctionCallDivCost},
- { ISD::UDIV, MVT::v16i8, 16 * FunctionCallDivCost},
- { ISD::SREM, MVT::v16i8, 16 * FunctionCallDivCost},
- { ISD::UREM, MVT::v16i8, 16 * FunctionCallDivCost},
- // Multiplication.
- };
-
if (ST->hasNEON()) {
+ const unsigned FunctionCallDivCost = 20;
+ const unsigned ReciprocalDivCost = 10;
+ static const CostTblEntry CostTbl[] = {
+ // Division.
+ // These costs are somewhat random. Choose a cost of 20 to indicate that
+ // vectorizing devision (added function call) is going to be very expensive.
+ // Double registers types.
+ { ISD::SDIV, MVT::v1i64, 1 * FunctionCallDivCost},
+ { ISD::UDIV, MVT::v1i64, 1 * FunctionCallDivCost},
+ { ISD::SREM, MVT::v1i64, 1 * FunctionCallDivCost},
+ { ISD::UREM, MVT::v1i64, 1 * FunctionCallDivCost},
+ { ISD::SDIV, MVT::v2i32, 2 * FunctionCallDivCost},
+ { ISD::UDIV, MVT::v2i32, 2 * FunctionCallDivCost},
+ { ISD::SREM, MVT::v2i32, 2 * FunctionCallDivCost},
+ { ISD::UREM, MVT::v2i32, 2 * FunctionCallDivCost},
+ { ISD::SDIV, MVT::v4i16, ReciprocalDivCost},
+ { ISD::UDIV, MVT::v4i16, ReciprocalDivCost},
+ { ISD::SREM, MVT::v4i16, 4 * FunctionCallDivCost},
+ { ISD::UREM, MVT::v4i16, 4 * FunctionCallDivCost},
+ { ISD::SDIV, MVT::v8i8, ReciprocalDivCost},
+ { ISD::UDIV, MVT::v8i8, ReciprocalDivCost},
+ { ISD::SREM, MVT::v8i8, 8 * FunctionCallDivCost},
+ { ISD::UREM, MVT::v8i8, 8 * FunctionCallDivCost},
+ // Quad register types.
+ { ISD::SDIV, MVT::v2i64, 2 * FunctionCallDivCost},
+ { ISD::UDIV, MVT::v2i64, 2 * FunctionCallDivCost},
+ { ISD::SREM, MVT::v2i64, 2 * FunctionCallDivCost},
+ { ISD::UREM, MVT::v2i64, 2 * FunctionCallDivCost},
+ { ISD::SDIV, MVT::v4i32, 4 * FunctionCallDivCost},
+ { ISD::UDIV, MVT::v4i32, 4 * FunctionCallDivCost},
+ { ISD::SREM, MVT::v4i32, 4 * FunctionCallDivCost},
+ { ISD::UREM, MVT::v4i32, 4 * FunctionCallDivCost},
+ { ISD::SDIV, MVT::v8i16, 8 * FunctionCallDivCost},
+ { ISD::UDIV, MVT::v8i16, 8 * FunctionCallDivCost},
+ { ISD::SREM, MVT::v8i16, 8 * FunctionCallDivCost},
+ { ISD::UREM, MVT::v8i16, 8 * FunctionCallDivCost},
+ { ISD::SDIV, MVT::v16i8, 16 * FunctionCallDivCost},
+ { ISD::UDIV, MVT::v16i8, 16 * FunctionCallDivCost},
+ { ISD::SREM, MVT::v16i8, 16 * FunctionCallDivCost},
+ { ISD::UREM, MVT::v16i8, 16 * FunctionCallDivCost},
+ // Multiplication.
+ };
+
if (const auto *Entry = CostTableLookup(CostTbl, ISDOpcode, LT.second))
return LT.first * Entry->Cost;
@@ -712,6 +740,33 @@ int ARMTTIImpl::getArithmeticInstrCost(
return Cost;
}
+ // If this operation is a shift on arm/thumb2, it might well be folded into
+ // the following instruction, hence having a cost of 0.
+ auto LooksLikeAFreeShift = [&]() {
+ if (ST->isThumb1Only() || Ty->isVectorTy())
+ return false;
+
+ if (!CxtI || !CxtI->hasOneUse() || !CxtI->isShift())
+ return false;
+ if (Op2Info != TargetTransformInfo::OK_UniformConstantValue)
+ return false;
+
+ // Folded into a ADC/ADD/AND/BIC/CMP/EOR/MVN/ORR/ORN/RSB/SBC/SUB
+ switch (cast<Instruction>(CxtI->user_back())->getOpcode()) {
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::And:
+ case Instruction::Xor:
+ case Instruction::Or:
+ case Instruction::ICmp:
+ return true;
+ default:
+ return false;
+ }
+ };
+ if (LooksLikeAFreeShift())
+ return 0;
+
int BaseCost = ST->hasMVEIntegerOps() && Ty->isVectorTy()
? ST->getMVEVectorCostFactor()
: 1;
@@ -735,11 +790,13 @@ int ARMTTIImpl::getArithmeticInstrCost(
return BaseCost;
}
-int ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
- unsigned AddressSpace, const Instruction *I) {
+int ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
+ MaybeAlign Alignment, unsigned AddressSpace,
+ const Instruction *I) {
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
- if (ST->hasNEON() && Src->isVectorTy() && Alignment != 16 &&
+ if (ST->hasNEON() && Src->isVectorTy() &&
+ (Alignment && *Alignment != Align(16)) &&
Src->getVectorElementType()->isDoubleTy()) {
// Unaligned loads/stores are extremely inefficient.
// We need 4 uops for vst.1/vld.1 vs 1uop for vldr/vstr.
@@ -751,13 +808,10 @@ int ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
return BaseCost * LT.first;
}
-int ARMTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
- unsigned Factor,
- ArrayRef<unsigned> Indices,
- unsigned Alignment,
- unsigned AddressSpace,
- bool UseMaskForCond,
- bool UseMaskForGaps) {
+int ARMTTIImpl::getInterleavedMemoryOpCost(
+ unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
+ unsigned Alignment, unsigned AddressSpace, bool UseMaskForCond,
+ bool UseMaskForGaps) {
assert(Factor >= 2 && "Invalid interleave factor");
assert(isa<VectorType>(VecTy) && "Expect a vector type");
@@ -772,9 +826,19 @@ int ARMTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
// vldN/vstN only support legal vector types of size 64 or 128 in bits.
// Accesses having vector types that are a multiple of 128 bits can be
// matched to more than one vldN/vstN instruction.
+ int BaseCost = ST->hasMVEIntegerOps() ? ST->getMVEVectorCostFactor() : 1;
if (NumElts % Factor == 0 &&
- TLI->isLegalInterleavedAccessType(SubVecTy, DL))
- return Factor * TLI->getNumInterleavedAccesses(SubVecTy, DL);
+ TLI->isLegalInterleavedAccessType(Factor, SubVecTy, DL))
+ return Factor * BaseCost * TLI->getNumInterleavedAccesses(SubVecTy, DL);
+
+ // Some smaller than legal interleaved patterns are cheap as we can make
+ // use of the vmovn or vrev patterns to interleave a standard load. This is
+ // true for v4i8, v8i8 and v4i16 at least (but not for v4f16 as it is
+ // promoted differently). The cost of 2 here is then a load and vrev or
+ // vmovn.
+ if (ST->hasMVEIntegerOps() && Factor == 2 && NumElts / Factor > 2 &&
+ VecTy->isIntOrIntVectorTy() && DL.getTypeSizeInBits(SubVecTy) <= 64)
+ return 2 * BaseCost;
}
return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
@@ -998,6 +1062,142 @@ bool ARMTTIImpl::isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
return true;
}
+static bool canTailPredicateInstruction(Instruction &I, int &ICmpCount) {
+ // We don't allow icmp's, and because we only look at single block loops,
+ // we simply count the icmps, i.e. there should only be 1 for the backedge.
+ if (isa<ICmpInst>(&I) && ++ICmpCount > 1)
+ return false;
+
+ if (isa<FCmpInst>(&I))
+ return false;
+
+ // We could allow extending/narrowing FP loads/stores, but codegen is
+ // too inefficient so reject this for now.
+ if (isa<FPExtInst>(&I) || isa<FPTruncInst>(&I))
+ return false;
+
+ // Extends have to be extending-loads
+ if (isa<SExtInst>(&I) || isa<ZExtInst>(&I) )
+ if (!I.getOperand(0)->hasOneUse() || !isa<LoadInst>(I.getOperand(0)))
+ return false;
+
+ // Truncs have to be narrowing-stores
+ if (isa<TruncInst>(&I) )
+ if (!I.hasOneUse() || !isa<StoreInst>(*I.user_begin()))
+ return false;
+
+ return true;
+}
+
+// To set up a tail-predicated loop, we need to know the total number of
+// elements processed by that loop. Thus, we need to determine the element
+// size and:
+// 1) it should be uniform for all operations in the vector loop, so we
+// e.g. don't want any widening/narrowing operations.
+// 2) it should be smaller than i64s because we don't have vector operations
+// that work on i64s.
+// 3) we don't want elements to be reversed or shuffled, to make sure the
+// tail-predication masks/predicates the right lanes.
+//
+static bool canTailPredicateLoop(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
+ const DataLayout &DL,
+ const LoopAccessInfo *LAI) {
+ PredicatedScalarEvolution PSE = LAI->getPSE();
+ int ICmpCount = 0;
+ int Stride = 0;
+
+ LLVM_DEBUG(dbgs() << "tail-predication: checking allowed instructions\n");
+ SmallVector<Instruction *, 16> LoadStores;
+ for (BasicBlock *BB : L->blocks()) {
+ for (Instruction &I : BB->instructionsWithoutDebug()) {
+ if (isa<PHINode>(&I))
+ continue;
+ if (!canTailPredicateInstruction(I, ICmpCount)) {
+ LLVM_DEBUG(dbgs() << "Instruction not allowed: "; I.dump());
+ return false;
+ }
+
+ Type *T = I.getType();
+ if (T->isPointerTy())
+ T = T->getPointerElementType();
+
+ if (T->getScalarSizeInBits() > 32) {
+ LLVM_DEBUG(dbgs() << "Unsupported Type: "; T->dump());
+ return false;
+ }
+
+ if (isa<StoreInst>(I) || isa<LoadInst>(I)) {
+ Value *Ptr = isa<LoadInst>(I) ? I.getOperand(0) : I.getOperand(1);
+ int64_t NextStride = getPtrStride(PSE, Ptr, L);
+ // TODO: for now only allow consecutive strides of 1. We could support
+ // other strides as long as it is uniform, but let's keep it simple for
+ // now.
+ if (Stride == 0 && NextStride == 1) {
+ Stride = NextStride;
+ continue;
+ }
+ if (Stride != NextStride) {
+ LLVM_DEBUG(dbgs() << "Different strides found, can't "
+ "tail-predicate\n.");
+ return false;
+ }
+ }
+ }
+ }
+
+ LLVM_DEBUG(dbgs() << "tail-predication: all instructions allowed!\n");
+ return true;
+}
+
+bool ARMTTIImpl::preferPredicateOverEpilogue(Loop *L, LoopInfo *LI,
+ ScalarEvolution &SE,
+ AssumptionCache &AC,
+ TargetLibraryInfo *TLI,
+ DominatorTree *DT,
+ const LoopAccessInfo *LAI) {
+ if (DisableTailPredication)
+ return false;
+
+ // Creating a predicated vector loop is the first step for generating a
+ // tail-predicated hardware loop, for which we need the MVE masked
+ // load/stores instructions:
+ if (!ST->hasMVEIntegerOps())
+ return false;
+
+ // For now, restrict this to single block loops.
+ if (L->getNumBlocks() > 1) {
+ LLVM_DEBUG(dbgs() << "preferPredicateOverEpilogue: not a single block "
+ "loop.\n");
+ return false;
+ }
+
+ assert(L->empty() && "preferPredicateOverEpilogue: inner-loop expected");
+
+ HardwareLoopInfo HWLoopInfo(L);
+ if (!HWLoopInfo.canAnalyze(*LI)) {
+ LLVM_DEBUG(dbgs() << "preferPredicateOverEpilogue: hardware-loop is not "
+ "analyzable.\n");
+ return false;
+ }
+
+ // This checks if we have the low-overhead branch architecture
+ // extension, and if we will create a hardware-loop:
+ if (!isHardwareLoopProfitable(L, SE, AC, TLI, HWLoopInfo)) {
+ LLVM_DEBUG(dbgs() << "preferPredicateOverEpilogue: hardware-loop is not "
+ "profitable.\n");
+ return false;
+ }
+
+ if (!HWLoopInfo.isHardwareLoopCandidate(SE, *LI, *DT)) {
+ LLVM_DEBUG(dbgs() << "preferPredicateOverEpilogue: hardware-loop is not "
+ "a candidate.\n");
+ return false;
+ }
+
+ return canTailPredicateLoop(L, LI, SE, DL, LAI);
+}
+
+
void ARMTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP) {
// Only currently enable these preferences for M-Class cores.
@@ -1035,6 +1235,11 @@ void ARMTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
unsigned Cost = 0;
for (auto *BB : L->getBlocks()) {
for (auto &I : *BB) {
+ // Don't unroll vectorised loop. MVE does not benefit from it as much as
+ // scalar code.
+ if (I.getType()->isVectorTy())
+ return;
+
if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
ImmutableCallSite CS(&I);
if (const Function *F = CS.getCalledFunction()) {
@@ -1043,10 +1248,6 @@ void ARMTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
}
return;
}
- // Don't unroll vectorised loop. MVE does not benefit from it as much as
- // scalar code.
- if (I.getType()->isVectorTy())
- return;
SmallVector<const Value*, 4> Operands(I.value_op_begin(),
I.value_op_end());
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
index a878fdcfe3c7..880588adfdfd 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -69,15 +69,15 @@ class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> {
ARM::FeatureDontWidenVMOVS, ARM::FeatureExpandMLx,
ARM::FeatureHasVMLxHazards, ARM::FeatureNEONForFPMovs,
ARM::FeatureNEONForFP, ARM::FeatureCheckVLDnAlign,
- ARM::FeatureHasSlowFPVMLx, ARM::FeatureVMLxForwarding,
- ARM::FeaturePref32BitThumb, ARM::FeatureAvoidPartialCPSR,
- ARM::FeatureCheapPredicableCPSR, ARM::FeatureAvoidMOVsShOp,
- ARM::FeatureHasRetAddrStack, ARM::FeatureHasNoBranchPredictor,
- ARM::FeatureDSP, ARM::FeatureMP, ARM::FeatureVirtualization,
- ARM::FeatureMClass, ARM::FeatureRClass, ARM::FeatureAClass,
- ARM::FeatureNaClTrap, ARM::FeatureStrictAlign, ARM::FeatureLongCalls,
- ARM::FeatureExecuteOnly, ARM::FeatureReserveR9, ARM::FeatureNoMovt,
- ARM::FeatureNoNegativeImmediates
+ ARM::FeatureHasSlowFPVMLx, ARM::FeatureHasSlowFPVFMx,
+ ARM::FeatureVMLxForwarding, ARM::FeaturePref32BitThumb,
+ ARM::FeatureAvoidPartialCPSR, ARM::FeatureCheapPredicableCPSR,
+ ARM::FeatureAvoidMOVsShOp, ARM::FeatureHasRetAddrStack,
+ ARM::FeatureHasNoBranchPredictor, ARM::FeatureDSP, ARM::FeatureMP,
+ ARM::FeatureVirtualization, ARM::FeatureMClass, ARM::FeatureRClass,
+ ARM::FeatureAClass, ARM::FeatureNaClTrap, ARM::FeatureStrictAlign,
+ ARM::FeatureLongCalls, ARM::FeatureExecuteOnly, ARM::FeatureReserveR9,
+ ARM::FeatureNoMovt, ARM::FeatureNoNegativeImmediates
};
const ARMSubtarget *getST() const { return ST; }
@@ -115,7 +115,7 @@ public:
using BaseT::getIntImmCost;
int getIntImmCost(const APInt &Imm, Type *Ty);
- int getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty);
+ int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty);
/// @}
@@ -159,6 +159,10 @@ public:
return isLegalMaskedLoad(DataTy, Alignment);
}
+ bool isLegalMaskedGather(Type *Ty, MaybeAlign Alignment);
+
+ bool isLegalMaskedScatter(Type *Ty, MaybeAlign Alignment) { return false; }
+
int getMemcpyCost(const Instruction *I);
int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp);
@@ -187,9 +191,10 @@ public:
TTI::OperandValueKind Op2Info = TTI::OK_AnyValue,
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
- ArrayRef<const Value *> Args = ArrayRef<const Value *>());
+ ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
+ const Instruction *CxtI = nullptr);
- int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+ int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
unsigned AddressSpace, const Instruction *I = nullptr);
int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
@@ -203,7 +208,12 @@ public:
AssumptionCache &AC,
TargetLibraryInfo *LibInfo,
HardwareLoopInfo &HWLoopInfo);
-
+ bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI,
+ ScalarEvolution &SE,
+ AssumptionCache &AC,
+ TargetLibraryInfo *TLI,
+ DominatorTree *DT,
+ const LoopAccessInfo *LAI);
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP);
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index d2c355c1da75..f6d76ee09534 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -6554,7 +6554,8 @@ bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic,
// Check against T3. If the second register is the PC, this is an
// alternate form of ADR, which uses encoding T4, so check for that too.
if (static_cast<ARMOperand &>(*Operands[4]).getReg() != ARM::PC &&
- static_cast<ARMOperand &>(*Operands[5]).isT2SOImm())
+ (static_cast<ARMOperand &>(*Operands[5]).isT2SOImm() ||
+ static_cast<ARMOperand &>(*Operands[5]).isT2SOImmNeg()))
return false;
// Otherwise, we use encoding T4, which does not have a cc_out
@@ -6609,9 +6610,34 @@ bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic,
static_cast<ARMOperand &>(*Operands[1]).getReg() == 0 &&
(static_cast<ARMOperand &>(*Operands[4]).isImm() ||
(Operands.size() == 6 &&
- static_cast<ARMOperand &>(*Operands[5]).isImm())))
- return true;
-
+ static_cast<ARMOperand &>(*Operands[5]).isImm()))) {
+ // Thumb2 (add|sub){s}{p}.w GPRnopc, sp, #{T2SOImm} has cc_out
+ return (!(isThumbTwo() &&
+ (static_cast<ARMOperand &>(*Operands[4]).isT2SOImm() ||
+ static_cast<ARMOperand &>(*Operands[4]).isT2SOImmNeg())));
+ }
+ // Fixme: Should join all the thumb+thumb2 (add|sub) in a single if case
+ // Thumb2 ADD r0, #4095 -> ADDW r0, r0, #4095 (T4)
+ // Thumb2 SUB r0, #4095 -> SUBW r0, r0, #4095
+ if (isThumbTwo() && (Mnemonic == "add" || Mnemonic == "sub") &&
+ (Operands.size() == 5) &&
+ static_cast<ARMOperand &>(*Operands[3]).isReg() &&
+ static_cast<ARMOperand &>(*Operands[3]).getReg() != ARM::SP &&
+ static_cast<ARMOperand &>(*Operands[3]).getReg() != ARM::PC &&
+ static_cast<ARMOperand &>(*Operands[1]).getReg() == 0 &&
+ static_cast<ARMOperand &>(*Operands[4]).isImm()) {
+ const ARMOperand &IMM = static_cast<ARMOperand &>(*Operands[4]);
+ if (IMM.isT2SOImm() || IMM.isT2SOImmNeg())
+ return false; // add.w / sub.w
+ if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(IMM.getImm())) {
+ const int64_t Value = CE->getValue();
+ // Thumb1 imm8 sub / add
+ if ((Value < ((1 << 7) - 1) << 2) && inITBlock() && (!(Value & 3)) &&
+ isARMLowRegister(static_cast<ARMOperand &>(*Operands[3]).getReg()))
+ return false;
+ return true; // Thumb2 T4 addw / subw
+ }
+ }
return false;
}
@@ -6703,7 +6729,7 @@ static void applyMnemonicAliases(StringRef &Mnemonic,
// omitted. We don't have a way to do that in tablegen, so fix it up here.
//
// We have to be careful to not emit an invalid Rt2 here, because the rest of
-// the assmebly parser could then generate confusing diagnostics refering to
+// the assembly parser could then generate confusing diagnostics refering to
// it. If we do find anything that prevents us from doing the transformation we
// bail out, and let the assembly parser report an error on the instruction as
// it is written.
@@ -7707,12 +7733,8 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst,
}
break;
- case ARM::t2ADDri:
- case ARM::t2ADDri12:
case ARM::t2ADDrr:
case ARM::t2ADDrs:
- case ARM::t2SUBri:
- case ARM::t2SUBri12:
case ARM::t2SUBrr:
case ARM::t2SUBrs:
if (Inst.getOperand(0).getReg() == ARM::SP &&
@@ -7895,10 +7917,10 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst,
case ARM::MVE_VQDMULLs32bh:
case ARM::MVE_VQDMULLs32th:
case ARM::MVE_VCMULf32:
- case ARM::MVE_VMULLs32bh:
- case ARM::MVE_VMULLs32th:
- case ARM::MVE_VMULLu32bh:
- case ARM::MVE_VMULLu32th: {
+ case ARM::MVE_VMULLBs32:
+ case ARM::MVE_VMULLTs32:
+ case ARM::MVE_VMULLBu32:
+ case ARM::MVE_VMULLTu32: {
if (Operands[3]->getReg() == Operands[4]->getReg()) {
return Error (Operands[3]->getStartLoc(),
"Qd register and Qn register can't be identical");
@@ -9750,23 +9772,33 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
}
break;
case ARM::t2ADDri12:
- // If the immediate fits for encoding T3 (t2ADDri) and the generic "add"
- // mnemonic was used (not "addw"), encoding T3 is preferred.
- if (static_cast<ARMOperand &>(*Operands[0]).getToken() != "add" ||
- ARM_AM::getT2SOImmVal(Inst.getOperand(2).getImm()) == -1)
- break;
- Inst.setOpcode(ARM::t2ADDri);
- Inst.addOperand(MCOperand::createReg(0)); // cc_out
- break;
case ARM::t2SUBri12:
- // If the immediate fits for encoding T3 (t2SUBri) and the generic "sub"
- // mnemonic was used (not "subw"), encoding T3 is preferred.
- if (static_cast<ARMOperand &>(*Operands[0]).getToken() != "sub" ||
+ case ARM::t2ADDspImm12:
+ case ARM::t2SUBspImm12: {
+ // If the immediate fits for encoding T3 and the generic
+ // mnemonic was used, encoding T3 is preferred.
+ const StringRef Token = static_cast<ARMOperand &>(*Operands[0]).getToken();
+ if ((Token != "add" && Token != "sub") ||
ARM_AM::getT2SOImmVal(Inst.getOperand(2).getImm()) == -1)
break;
- Inst.setOpcode(ARM::t2SUBri);
+ switch (Inst.getOpcode()) {
+ case ARM::t2ADDri12:
+ Inst.setOpcode(ARM::t2ADDri);
+ break;
+ case ARM::t2SUBri12:
+ Inst.setOpcode(ARM::t2SUBri);
+ break;
+ case ARM::t2ADDspImm12:
+ Inst.setOpcode(ARM::t2ADDspImm);
+ break;
+ case ARM::t2SUBspImm12:
+ Inst.setOpcode(ARM::t2SUBspImm);
+ break;
+ }
+
Inst.addOperand(MCOperand::createReg(0)); // cc_out
- break;
+ return true;
+ }
case ARM::tADDi8:
// If the immediate is in the range 0-7, we want tADDi3 iff Rd was
// explicitly specified. From the ARM ARM: "Encoding T1 is preferred
@@ -9812,6 +9844,25 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
Inst = TmpInst;
return true;
}
+ case ARM::t2ADDspImm:
+ case ARM::t2SUBspImm: {
+ // Prefer T1 encoding if possible
+ if (Inst.getOperand(5).getReg() != 0 || HasWideQualifier)
+ break;
+ unsigned V = Inst.getOperand(2).getImm();
+ if (V & 3 || V > ((1 << 7) - 1) << 2)
+ break;
+ MCInst TmpInst;
+ TmpInst.setOpcode(Inst.getOpcode() == ARM::t2ADDspImm ? ARM::tADDspi
+ : ARM::tSUBspi);
+ TmpInst.addOperand(MCOperand::createReg(ARM::SP)); // destination reg
+ TmpInst.addOperand(MCOperand::createReg(ARM::SP)); // source reg
+ TmpInst.addOperand(MCOperand::createImm(V / 4)); // immediate
+ TmpInst.addOperand(Inst.getOperand(3)); // pred
+ TmpInst.addOperand(Inst.getOperand(4));
+ Inst = TmpInst;
+ return true;
+ }
case ARM::t2ADDrr: {
// If the destination and first source operand are the same, and
// there's no setting of the flags, use encoding T2 instead of T3.
@@ -11495,7 +11546,7 @@ bool ARMAsmParser::parseDirectiveThumbSet(SMLoc L) {
}
/// Force static initialization.
-extern "C" void LLVMInitializeARMAsmParser() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeARMAsmParser() {
RegisterMCAsmParser<ARMAsmParser> X(getTheARMLETarget());
RegisterMCAsmParser<ARMAsmParser> Y(getTheARMBETarget());
RegisterMCAsmParser<ARMAsmParser> A(getTheThumbLETarget());
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index eabc26d05f47..d26b04556abb 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -137,18 +137,15 @@ public:
DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
ArrayRef<uint8_t> Bytes, uint64_t Address,
- raw_ostream &VStream,
raw_ostream &CStream) const override;
private:
DecodeStatus getARMInstruction(MCInst &Instr, uint64_t &Size,
ArrayRef<uint8_t> Bytes, uint64_t Address,
- raw_ostream &VStream,
raw_ostream &CStream) const;
DecodeStatus getThumbInstruction(MCInst &Instr, uint64_t &Size,
ArrayRef<uint8_t> Bytes, uint64_t Address,
- raw_ostream &VStream,
raw_ostream &CStream) const;
mutable ITStatus ITBlock;
@@ -204,6 +201,9 @@ static DecodeStatus DecoderGPRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder);
static DecodeStatus DecodeGPRPairRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeGPRspRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
static DecodeStatus DecodeHPRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder);
static DecodeStatus DecodeSPRRegisterClass(MCInst &Inst, unsigned RegNo,
@@ -566,6 +566,9 @@ static DecodeStatus DecodeMVEVPNOT(MCInst &Inst, unsigned Insn,
static DecodeStatus DecodeMVEOverlappingLongShift(MCInst &Inst, unsigned Insn,
uint64_t Address,
const void *Decoder);
+static DecodeStatus DecodeT2AddSubSPImm(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+
#include "ARMGenDisassemblerTables.inc"
static MCDisassembler *createARMDisassembler(const Target &T,
@@ -576,8 +579,7 @@ static MCDisassembler *createARMDisassembler(const Target &T,
// Post-decoding checks
static DecodeStatus checkDecodedInstruction(MCInst &MI, uint64_t &Size,
- uint64_t Address, raw_ostream &OS,
- raw_ostream &CS,
+ uint64_t Address, raw_ostream &CS,
uint32_t Insn,
DecodeStatus Result) {
switch (MI.getOpcode()) {
@@ -609,17 +611,16 @@ static DecodeStatus checkDecodedInstruction(MCInst &MI, uint64_t &Size,
DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
ArrayRef<uint8_t> Bytes,
- uint64_t Address, raw_ostream &OS,
+ uint64_t Address,
raw_ostream &CS) const {
if (STI.getFeatureBits()[ARM::ModeThumb])
- return getThumbInstruction(MI, Size, Bytes, Address, OS, CS);
- return getARMInstruction(MI, Size, Bytes, Address, OS, CS);
+ return getThumbInstruction(MI, Size, Bytes, Address, CS);
+ return getARMInstruction(MI, Size, Bytes, Address, CS);
}
DecodeStatus ARMDisassembler::getARMInstruction(MCInst &MI, uint64_t &Size,
ArrayRef<uint8_t> Bytes,
uint64_t Address,
- raw_ostream &OS,
raw_ostream &CS) const {
CommentStream = &CS;
@@ -642,7 +643,7 @@ DecodeStatus ARMDisassembler::getARMInstruction(MCInst &MI, uint64_t &Size,
decodeInstruction(DecoderTableARM32, MI, Insn, Address, this, STI);
if (Result != MCDisassembler::Fail) {
Size = 4;
- return checkDecodedInstruction(MI, Size, Address, OS, CS, Insn, Result);
+ return checkDecodedInstruction(MI, Size, Address, CS, Insn, Result);
}
struct DecodeTable {
@@ -673,7 +674,7 @@ DecodeStatus ARMDisassembler::getARMInstruction(MCInst &MI, uint64_t &Size,
decodeInstruction(DecoderTableCoProc32, MI, Insn, Address, this, STI);
if (Result != MCDisassembler::Fail) {
Size = 4;
- return checkDecodedInstruction(MI, Size, Address, OS, CS, Insn, Result);
+ return checkDecodedInstruction(MI, Size, Address, CS, Insn, Result);
}
Size = 4;
@@ -906,7 +907,6 @@ void ARMDisassembler::UpdateThumbVFPPredicate(
DecodeStatus ARMDisassembler::getThumbInstruction(MCInst &MI, uint64_t &Size,
ArrayRef<uint8_t> Bytes,
uint64_t Address,
- raw_ostream &OS,
raw_ostream &CS) const {
CommentStream = &CS;
@@ -1010,7 +1010,7 @@ DecodeStatus ARMDisassembler::getThumbInstruction(MCInst &MI, uint64_t &Size,
if (Result != MCDisassembler::Fail) {
Size = 4;
Check(Result, AddThumbPredicate(MI));
- return checkDecodedInstruction(MI, Size, Address, OS, CS, Insn32, Result);
+ return checkDecodedInstruction(MI, Size, Address, CS, Insn32, Result);
}
if (fieldFromInstruction(Insn32, 28, 4) == 0xE) {
@@ -1099,7 +1099,7 @@ DecodeStatus ARMDisassembler::getThumbInstruction(MCInst &MI, uint64_t &Size,
return MCDisassembler::Fail;
}
-extern "C" void LLVMInitializeARMDisassembler() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeARMDisassembler() {
TargetRegistry::RegisterMCDisassembler(getTheARMLETarget(),
createARMDisassembler);
TargetRegistry::RegisterMCDisassembler(getTheARMBETarget(),
@@ -1231,6 +1231,17 @@ static DecodeStatus DecodeGPRPairRegisterClass(MCInst &Inst, unsigned RegNo,
return S;
}
+static DecodeStatus DecodeGPRspRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ if (RegNo != 13)
+ return MCDisassembler::Fail;
+
+ unsigned Register = GPRDecoderTable[RegNo];
+ Inst.addOperand(MCOperand::createReg(Register));
+ return MCDisassembler::Success;
+}
+
static DecodeStatus DecodetcGPRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder) {
unsigned Register = 0;
@@ -5588,14 +5599,25 @@ static DecodeStatus DecodeT2Adr(MCInst &Inst, uint32_t Insn,
unsigned sign1 = fieldFromInstruction(Insn, 21, 1);
unsigned sign2 = fieldFromInstruction(Insn, 23, 1);
if (sign1 != sign2) return MCDisassembler::Fail;
+ const unsigned Rd = fieldFromInstruction(Insn, 8, 4);
+ assert(Inst.getNumOperands() == 0 && "We should receive an empty Inst");
+ DecodeStatus S = DecoderGPRRegisterClass(Inst, Rd, Address, Decoder);
unsigned Val = fieldFromInstruction(Insn, 0, 8);
Val |= fieldFromInstruction(Insn, 12, 3) << 8;
Val |= fieldFromInstruction(Insn, 26, 1) << 11;
- Val |= sign1 << 12;
- Inst.addOperand(MCOperand::createImm(SignExtend32<13>(Val)));
-
- return MCDisassembler::Success;
+ // If sign, then it is decreasing the address.
+ if (sign1) {
+ // Following ARMv7 Architecture Manual, when the offset
+ // is zero, it is decoded as a subw, not as a adr.w
+ if (!Val) {
+ Inst.setOpcode(ARM::t2SUBri12);
+ Inst.addOperand(MCOperand::createReg(ARM::PC));
+ } else
+ Val = -Val;
+ }
+ Inst.addOperand(MCOperand::createImm(Val));
+ return S;
}
static DecodeStatus DecodeT2ShifterImmOperand(MCInst &Inst, uint32_t Val,
@@ -6595,3 +6617,40 @@ static DecodeStatus DecodeMVEVPNOT(MCInst &Inst, unsigned Insn, uint64_t Address
Inst.addOperand(MCOperand::createReg(ARM::VPR));
return S;
}
+
+static DecodeStatus DecodeT2AddSubSPImm(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ const unsigned Rd = fieldFromInstruction(Insn, 8, 4);
+ const unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ const unsigned Imm12 = fieldFromInstruction(Insn, 26, 1) << 11 |
+ fieldFromInstruction(Insn, 12, 3) << 8 |
+ fieldFromInstruction(Insn, 0, 8);
+ const unsigned TypeT3 = fieldFromInstruction(Insn, 25, 1);
+ unsigned sign1 = fieldFromInstruction(Insn, 21, 1);
+ unsigned sign2 = fieldFromInstruction(Insn, 23, 1);
+ unsigned S = fieldFromInstruction(Insn, 20, 1);
+ if (sign1 != sign2)
+ return MCDisassembler::Fail;
+
+ // T3 does a zext of imm12, where T2 does a ThumbExpandImm (T2SOImm)
+ DecodeStatus DS = MCDisassembler::Success;
+ if ((!Check(DS,
+ DecodeGPRspRegisterClass(Inst, Rd, Address, Decoder))) || // dst
+ (!Check(DS, DecodeGPRspRegisterClass(Inst, Rn, Address, Decoder))))
+ return MCDisassembler::Fail;
+ if (TypeT3) {
+ Inst.setOpcode(sign1 ? ARM::t2SUBspImm12 : ARM::t2ADDspImm12);
+ S = 0;
+ Inst.addOperand(MCOperand::createImm(Imm12)); // zext imm12
+ } else {
+ Inst.setOpcode(sign1 ? ARM::t2SUBspImm : ARM::t2ADDspImm);
+ if (!Check(DS, DecodeT2SOImm(Inst, Imm12, Address, Decoder))) // imm12
+ return MCDisassembler::Fail;
+ }
+ if (!Check(DS, DecodeCCOutOperand(Inst, S, Address, Decoder))) // cc_out
+ return MCDisassembler::Fail;
+
+ Inst.addOperand(MCOperand::createReg(0)); // pred
+
+ return DS;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
index 1fee38821a49..2c26dd388c05 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
@@ -255,8 +255,11 @@ void ARMELFObjectWriter::addTargetSectionFlags(MCContext &Ctx,
// execute-only section in the object.
MCSectionELF *TextSection =
static_cast<MCSectionELF *>(Ctx.getObjectFileInfo()->getTextSection());
- if (Sec.getKind().isExecuteOnly() && !TextSection->hasInstructions() &&
- !TextSection->hasData()) {
+ if (Sec.getKind().isExecuteOnly() && !TextSection->hasInstructions()) {
+ for (auto &F : TextSection->getFragmentList())
+ if (auto *DF = dyn_cast<MCDataFragment>(&F))
+ if (!DF->getContents().empty())
+ return;
TextSection->setFlags(TextSection->getFlags() | ELF::SHF_ARM_PURECODE);
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
index f51fbdcd84da..f558ca8d2d9f 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
@@ -441,10 +441,12 @@ public:
friend class ARMTargetELFStreamer;
ARMELFStreamer(MCContext &Context, std::unique_ptr<MCAsmBackend> TAB,
- std::unique_ptr<MCObjectWriter> OW, std::unique_ptr<MCCodeEmitter> Emitter,
- bool IsThumb)
- : MCELFStreamer(Context, std::move(TAB), std::move(OW), std::move(Emitter)),
- IsThumb(IsThumb) {
+ std::unique_ptr<MCObjectWriter> OW,
+ std::unique_ptr<MCCodeEmitter> Emitter, bool IsThumb,
+ bool IsAndroid)
+ : MCELFStreamer(Context, std::move(TAB), std::move(OW),
+ std::move(Emitter)),
+ IsThumb(IsThumb), IsAndroid(IsAndroid) {
EHReset();
}
@@ -657,11 +659,10 @@ private:
uint64_t Offset) {
auto *Symbol = cast<MCSymbolELF>(getContext().getOrCreateSymbol(
Name + "." + Twine(MappingSymbolCounter++)));
- EmitLabel(Symbol, Loc, F);
+ EmitLabelAtPos(Symbol, Loc, F, Offset);
Symbol->setType(ELF::STT_NOTYPE);
Symbol->setBinding(ELF::STB_LOCAL);
Symbol->setExternal(false);
- Symbol->setOffset(Offset);
}
void EmitThumbFunc(MCSymbol *Func) override {
@@ -687,6 +688,7 @@ private:
void EmitFixup(const MCExpr *Expr, MCFixupKind Kind);
bool IsThumb;
+ bool IsAndroid;
int64_t MappingSymbolCounter = 0;
DenseMap<const MCSection *, std::unique_ptr<ElfMappingSymbolInfo>>
@@ -1269,7 +1271,12 @@ void ARMELFStreamer::emitFnEnd() {
// Emit the exception index table entry
SwitchToExIdxSection(*FnStart);
- if (PersonalityIndex < ARM::EHABI::NUM_PERSONALITY_INDEX)
+ // The EHABI requires a dependency preserving R_ARM_NONE relocation to the
+ // personality routine to protect it from an arbitrary platform's static
+ // linker garbage collection. We disable this for Android where the unwinder
+ // is either dynamically linked or directly references the personality
+ // routine.
+ if (PersonalityIndex < ARM::EHABI::NUM_PERSONALITY_INDEX && !IsAndroid)
EmitPersonalityFixup(GetAEABIUnwindPersonalityName(PersonalityIndex));
const MCSymbolRefExpr *FnStartRef =
@@ -1504,9 +1511,11 @@ MCELFStreamer *createARMELFStreamer(MCContext &Context,
std::unique_ptr<MCAsmBackend> TAB,
std::unique_ptr<MCObjectWriter> OW,
std::unique_ptr<MCCodeEmitter> Emitter,
- bool RelaxAll, bool IsThumb) {
- ARMELFStreamer *S = new ARMELFStreamer(Context, std::move(TAB), std::move(OW),
- std::move(Emitter), IsThumb);
+ bool RelaxAll, bool IsThumb,
+ bool IsAndroid) {
+ ARMELFStreamer *S =
+ new ARMELFStreamer(Context, std::move(TAB), std::move(OW),
+ std::move(Emitter), IsThumb, IsAndroid);
// FIXME: This should eventually end up somewhere else where more
// intelligent flag decisions can be made. For now we are just maintaining
// the status quo for ARM and setting EF_ARM_EABI_VER5 as the default.
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.cpp
index a1def61b58d9..b36106a78b71 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.cpp
@@ -88,8 +88,9 @@ void ARMInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
OS << markup("<reg:") << getRegisterName(RegNo, DefaultAltIdx) << markup(">");
}
-void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
- StringRef Annot, const MCSubtargetInfo &STI) {
+void ARMInstPrinter::printInst(const MCInst *MI, uint64_t Address,
+ StringRef Annot, const MCSubtargetInfo &STI,
+ raw_ostream &O) {
unsigned Opcode = MI->getOpcode();
switch (Opcode) {
@@ -275,7 +276,7 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
// Copy the rest operands into NewMI.
for (unsigned i = isStore ? 3 : 2; i < MI->getNumOperands(); ++i)
NewMI.addOperand(MI->getOperand(i));
- printInstruction(&NewMI, STI, O);
+ printInstruction(&NewMI, Address, STI, O);
return;
}
break;
@@ -288,7 +289,7 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
switch (MI->getOperand(0).getImm()) {
default:
if (!printAliasInstr(MI, STI, O))
- printInstruction(MI, STI, O);
+ printInstruction(MI, Address, STI, O);
break;
case 0:
O << "\tssbb";
@@ -302,7 +303,7 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
}
if (!printAliasInstr(MI, STI, O))
- printInstruction(MI, STI, O);
+ printInstruction(MI, Address, STI, O);
printAnnotation(O, Annot);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.h b/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.h
index eeb811e216fc..20f901033395 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.h
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.h
@@ -25,13 +25,13 @@ public:
bool applyTargetSpecificCLOption(StringRef Opt) override;
- void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
- const MCSubtargetInfo &STI) override;
+ void printInst(const MCInst *MI, uint64_t Address, StringRef Annot,
+ const MCSubtargetInfo &STI, raw_ostream &O) override;
void printRegName(raw_ostream &OS, unsigned RegNo) const override;
// Autogenerated by tblgen.
- void printInstruction(const MCInst *MI, const MCSubtargetInfo &STI,
- raw_ostream &O);
+ void printInstruction(const MCInst *MI, uint64_t Address,
+ const MCSubtargetInfo &STI, raw_ostream &O);
virtual bool printAliasInstr(const MCInst *MI, const MCSubtargetInfo &STI,
raw_ostream &O);
virtual void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx,
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
index 90022a8d88a6..9f60e70e0e02 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
@@ -187,7 +187,8 @@ static MCRegisterInfo *createARMMCRegisterInfo(const Triple &Triple) {
}
static MCAsmInfo *createARMMCAsmInfo(const MCRegisterInfo &MRI,
- const Triple &TheTriple) {
+ const Triple &TheTriple,
+ const MCTargetOptions &Options) {
MCAsmInfo *MAI;
if (TheTriple.isOSDarwin() || TheTriple.isOSBinFormatMachO())
MAI = new ARMMCAsmInfoDarwin(TheTriple);
@@ -211,7 +212,8 @@ static MCStreamer *createELFStreamer(const Triple &T, MCContext &Ctx,
bool RelaxAll) {
return createARMELFStreamer(
Ctx, std::move(MAB), std::move(OW), std::move(Emitter), false,
- (T.getArch() == Triple::thumb || T.getArch() == Triple::thumbeb));
+ (T.getArch() == Triple::thumb || T.getArch() == Triple::thumbeb),
+ T.isAndroid());
}
static MCStreamer *
@@ -315,7 +317,7 @@ static MCInstrAnalysis *createThumbMCInstrAnalysis(const MCInstrInfo *Info) {
}
// Force static initialization.
-extern "C" void LLVMInitializeARMTargetMC() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeARMTargetMC() {
for (Target *T : {&getTheARMLETarget(), &getTheARMBETarget(),
&getTheThumbLETarget(), &getTheThumbBETarget()}) {
// Register the MC asm info.
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp
index 38667d686b85..a9460b70da56 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp
@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This file implements the unwind opcode assmebler for ARM exception handling
+// This file implements the unwind opcode assembler for ARM exception handling
// table.
//
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h b/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h
index c3134c04b33a..5fb7307159d1 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h
@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This file declares the unwind opcode assmebler for ARM exception handling
+// This file declares the unwind opcode assembler for ARM exception handling
// table.
//
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp
new file mode 100644
index 000000000000..9f64af02e698
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp
@@ -0,0 +1,301 @@
+//===- MVEGatherScatterLowering.cpp - Gather/Scatter lowering -------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// This pass custom lowers llvm.gather and llvm.scatter instructions to
+/// arm.mve.gather and arm.mve.scatter intrinsics, optimising the code to
+/// produce a better final result as we go.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMSubtarget.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsARM.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
+#include <algorithm>
+#include <cassert>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "mve-gather-scatter-lowering"
+
+cl::opt<bool> EnableMaskedGatherScatters(
+ "enable-arm-maskedgatscat", cl::Hidden, cl::init(false),
+ cl::desc("Enable the generation of masked gathers and scatters"));
+
+namespace {
+
+class MVEGatherScatterLowering : public FunctionPass {
+public:
+ static char ID; // Pass identification, replacement for typeid
+
+ explicit MVEGatherScatterLowering() : FunctionPass(ID) {
+ initializeMVEGatherScatterLoweringPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override;
+
+ StringRef getPassName() const override {
+ return "MVE gather/scatter lowering";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ AU.addRequired<TargetPassConfig>();
+ FunctionPass::getAnalysisUsage(AU);
+ }
+
+private:
+ // Check this is a valid gather with correct alignment
+ bool isLegalTypeAndAlignment(unsigned NumElements, unsigned ElemSize,
+ unsigned Alignment);
+ // Check whether Ptr is hidden behind a bitcast and look through it
+ void lookThroughBitcast(Value *&Ptr);
+ // Check for a getelementptr and deduce base and offsets from it, on success
+ // returning the base directly and the offsets indirectly using the Offsets
+ // argument
+ Value *checkGEP(Value *&Offsets, Type *Ty, Value *Ptr, IRBuilder<> Builder);
+
+ bool lowerGather(IntrinsicInst *I);
+ // Create a gather from a base + vector of offsets
+ Value *tryCreateMaskedGatherOffset(IntrinsicInst *I, Value *Ptr,
+ IRBuilder<> Builder);
+ // Create a gather from a vector of pointers
+ Value *tryCreateMaskedGatherBase(IntrinsicInst *I, Value *Ptr,
+ IRBuilder<> Builder);
+};
+
+} // end anonymous namespace
+
+char MVEGatherScatterLowering::ID = 0;
+
+INITIALIZE_PASS(MVEGatherScatterLowering, DEBUG_TYPE,
+ "MVE gather/scattering lowering pass", false, false)
+
+Pass *llvm::createMVEGatherScatterLoweringPass() {
+ return new MVEGatherScatterLowering();
+}
+
+bool MVEGatherScatterLowering::isLegalTypeAndAlignment(unsigned NumElements,
+ unsigned ElemSize,
+ unsigned Alignment) {
+ // Do only allow non-extending gathers for now
+ if (((NumElements == 4 && ElemSize == 32) ||
+ (NumElements == 8 && ElemSize == 16) ||
+ (NumElements == 16 && ElemSize == 8)) &&
+ ElemSize / 8 <= Alignment)
+ return true;
+ LLVM_DEBUG(dbgs() << "masked gathers: instruction does not have valid "
+ << "alignment or vector type \n");
+ return false;
+}
+
+Value *MVEGatherScatterLowering::checkGEP(Value *&Offsets, Type *Ty, Value *Ptr,
+ IRBuilder<> Builder) {
+ GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr);
+ if (!GEP) {
+ LLVM_DEBUG(dbgs() << "masked gathers: no getelementpointer found\n");
+ return nullptr;
+ }
+ LLVM_DEBUG(dbgs() << "masked gathers: getelementpointer found. Loading"
+ << " from base + vector of offsets\n");
+ Value *GEPPtr = GEP->getPointerOperand();
+ if (GEPPtr->getType()->isVectorTy()) {
+ LLVM_DEBUG(dbgs() << "masked gathers: gather from a vector of pointers"
+ << " hidden behind a getelementptr currently not"
+ << " supported. Expanding.\n");
+ return nullptr;
+ }
+ if (GEP->getNumOperands() != 2) {
+ LLVM_DEBUG(dbgs() << "masked gathers: getelementptr with too many"
+ << " operands. Expanding.\n");
+ return nullptr;
+ }
+ Offsets = GEP->getOperand(1);
+ // SExt offsets inside masked gathers are not permitted by the architecture;
+ // we therefore can't fold them
+ if (ZExtInst *ZextOffs = dyn_cast<ZExtInst>(Offsets))
+ Offsets = ZextOffs->getOperand(0);
+ Type *OffsType = VectorType::getInteger(cast<VectorType>(Ty));
+ // If the offset we found does not have the type the intrinsic expects,
+ // i.e., the same type as the gather itself, we need to convert it (only i
+ // types) or fall back to expanding the gather
+ if (OffsType != Offsets->getType()) {
+ if (OffsType->getScalarSizeInBits() >
+ Offsets->getType()->getScalarSizeInBits()) {
+ LLVM_DEBUG(dbgs() << "masked gathers: extending offsets\n");
+ Offsets = Builder.CreateZExt(Offsets, OffsType, "");
+ } else {
+ LLVM_DEBUG(dbgs() << "masked gathers: no correct offset type. Can't"
+ << " create masked gather\n");
+ return nullptr;
+ }
+ }
+ // If none of the checks failed, return the gep's base pointer
+ return GEPPtr;
+}
+
+void MVEGatherScatterLowering::lookThroughBitcast(Value *&Ptr) {
+ // Look through bitcast instruction if #elements is the same
+ if (auto *BitCast = dyn_cast<BitCastInst>(Ptr)) {
+ Type *BCTy = BitCast->getType();
+ Type *BCSrcTy = BitCast->getOperand(0)->getType();
+ if (BCTy->getVectorNumElements() == BCSrcTy->getVectorNumElements()) {
+ LLVM_DEBUG(dbgs() << "masked gathers: looking through bitcast\n");
+ Ptr = BitCast->getOperand(0);
+ }
+ }
+}
+
+bool MVEGatherScatterLowering::lowerGather(IntrinsicInst *I) {
+ using namespace PatternMatch;
+ LLVM_DEBUG(dbgs() << "masked gathers: checking transform preconditions\n");
+
+ // @llvm.masked.gather.*(Ptrs, alignment, Mask, Src0)
+ // Attempt to turn the masked gather in I into a MVE intrinsic
+ // Potentially optimising the addressing modes as we do so.
+ Type *Ty = I->getType();
+ Value *Ptr = I->getArgOperand(0);
+ unsigned Alignment = cast<ConstantInt>(I->getArgOperand(1))->getZExtValue();
+ Value *Mask = I->getArgOperand(2);
+ Value *PassThru = I->getArgOperand(3);
+
+ if (!isLegalTypeAndAlignment(Ty->getVectorNumElements(),
+ Ty->getScalarSizeInBits(), Alignment))
+ return false;
+ lookThroughBitcast(Ptr);
+ assert(Ptr->getType()->isVectorTy() && "Unexpected pointer type");
+
+ IRBuilder<> Builder(I->getContext());
+ Builder.SetInsertPoint(I);
+ Builder.SetCurrentDebugLocation(I->getDebugLoc());
+ Value *Load = tryCreateMaskedGatherOffset(I, Ptr, Builder);
+ if (!Load)
+ Load = tryCreateMaskedGatherBase(I, Ptr, Builder);
+ if (!Load)
+ return false;
+
+ if (!isa<UndefValue>(PassThru) && !match(PassThru, m_Zero())) {
+ LLVM_DEBUG(dbgs() << "masked gathers: found non-trivial passthru - "
+ << "creating select\n");
+ Load = Builder.CreateSelect(Mask, Load, PassThru);
+ }
+
+ LLVM_DEBUG(dbgs() << "masked gathers: successfully built masked gather\n");
+ I->replaceAllUsesWith(Load);
+ I->eraseFromParent();
+ return true;
+}
+
+Value *MVEGatherScatterLowering::tryCreateMaskedGatherBase(
+ IntrinsicInst *I, Value *Ptr, IRBuilder<> Builder) {
+ using namespace PatternMatch;
+ LLVM_DEBUG(dbgs() << "masked gathers: loading from vector of pointers\n");
+ Type *Ty = I->getType();
+ if (Ty->getVectorNumElements() != 4)
+ // Can't build an intrinsic for this
+ return nullptr;
+ Value *Mask = I->getArgOperand(2);
+ if (match(Mask, m_One()))
+ return Builder.CreateIntrinsic(Intrinsic::arm_mve_vldr_gather_base,
+ {Ty, Ptr->getType()},
+ {Ptr, Builder.getInt32(0)});
+ else
+ return Builder.CreateIntrinsic(
+ Intrinsic::arm_mve_vldr_gather_base_predicated,
+ {Ty, Ptr->getType(), Mask->getType()},
+ {Ptr, Builder.getInt32(0), Mask});
+}
+
+Value *MVEGatherScatterLowering::tryCreateMaskedGatherOffset(
+ IntrinsicInst *I, Value *Ptr, IRBuilder<> Builder) {
+ using namespace PatternMatch;
+ Type *Ty = I->getType();
+ Value *Offsets;
+ Value *BasePtr = checkGEP(Offsets, Ty, Ptr, Builder);
+ if (!BasePtr)
+ return nullptr;
+
+ unsigned Scale;
+ int GEPElemSize =
+ BasePtr->getType()->getPointerElementType()->getPrimitiveSizeInBits();
+ int ResultElemSize = Ty->getScalarSizeInBits();
+ // This can be a 32bit load scaled by 4, a 16bit load scaled by 2, or a
+ // 8bit, 16bit or 32bit load scaled by 1
+ if (GEPElemSize == 32 && ResultElemSize == 32) {
+ Scale = 2;
+ } else if (GEPElemSize == 16 && ResultElemSize == 16) {
+ Scale = 1;
+ } else if (GEPElemSize == 8) {
+ Scale = 0;
+ } else {
+ LLVM_DEBUG(dbgs() << "masked gathers: incorrect scale for load. Can't"
+ << " create masked gather\n");
+ return nullptr;
+ }
+
+ Value *Mask = I->getArgOperand(2);
+ if (!match(Mask, m_One()))
+ return Builder.CreateIntrinsic(
+ Intrinsic::arm_mve_vldr_gather_offset_predicated,
+ {Ty, BasePtr->getType(), Offsets->getType(), Mask->getType()},
+ {BasePtr, Offsets, Builder.getInt32(Ty->getScalarSizeInBits()),
+ Builder.getInt32(Scale), Builder.getInt32(1), Mask});
+ else
+ return Builder.CreateIntrinsic(
+ Intrinsic::arm_mve_vldr_gather_offset,
+ {Ty, BasePtr->getType(), Offsets->getType()},
+ {BasePtr, Offsets, Builder.getInt32(Ty->getScalarSizeInBits()),
+ Builder.getInt32(Scale), Builder.getInt32(1)});
+}
+
+bool MVEGatherScatterLowering::runOnFunction(Function &F) {
+ if (!EnableMaskedGatherScatters)
+ return false;
+ auto &TPC = getAnalysis<TargetPassConfig>();
+ auto &TM = TPC.getTM<TargetMachine>();
+ auto *ST = &TM.getSubtarget<ARMSubtarget>(F);
+ if (!ST->hasMVEIntegerOps())
+ return false;
+ SmallVector<IntrinsicInst *, 4> Gathers;
+ for (BasicBlock &BB : F) {
+ for (Instruction &I : BB) {
+ IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I);
+ if (II && II->getIntrinsicID() == Intrinsic::masked_gather)
+ Gathers.push_back(II);
+ }
+ }
+
+ if (Gathers.empty())
+ return false;
+
+ for (IntrinsicInst *I : Gathers)
+ lowerGather(I);
+
+ return true;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/MVETailPredication.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/MVETailPredication.cpp
index 4db8ab17c49b..038c68739cdf 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/MVETailPredication.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/MVETailPredication.cpp
@@ -20,7 +20,14 @@
/// - A tail-predicated loop, with implicit predication.
/// - A loop containing multiple VCPT instructions, predicating multiple VPT
/// blocks of instructions operating on different vector types.
+///
+/// This pass inserts the inserts the VCTP intrinsic to represent the effect of
+/// tail predication. This will be picked up by the ARM Low-overhead loop pass,
+/// which performs the final transformation to a DLSTP or WLSTP tail-predicated
+/// loop.
+#include "ARM.h"
+#include "ARMSubtarget.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolution.h"
@@ -28,20 +35,19 @@
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/TargetPassConfig.h"
-#include "llvm/IR/Instructions.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicsARM.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/Debug.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "ARM.h"
-#include "ARMSubtarget.h"
using namespace llvm;
#define DEBUG_TYPE "mve-tail-predication"
#define DESC "Transform predicated vector loops to use MVE tail predication"
-static cl::opt<bool>
+cl::opt<bool>
DisableTailPredication("disable-mve-tail-predication", cl::Hidden,
cl::init(true),
cl::desc("Disable MVE Tail Predication"));
@@ -85,6 +91,12 @@ private:
/// Is the icmp that generates an i1 vector, based upon a loop counter
/// and a limit that is defined outside the loop.
bool isTailPredicate(Instruction *Predicate, Value *NumElements);
+
+ /// Insert the intrinsic to represent the effect of tail predication.
+ void InsertVCTPIntrinsic(Instruction *Predicate,
+ DenseMap<Instruction*, Instruction*> &NewPredicates,
+ VectorType *VecTy,
+ Value *NumElements);
};
} // end namespace
@@ -123,7 +135,7 @@ bool MVETailPredication::runOnLoop(Loop *L, LPPassManager&) {
// The MVE and LOB extensions are combined to enable tail-predication, but
// there's nothing preventing us from generating VCTP instructions for v8.1m.
if (!ST->hasMVEIntegerOps() || !ST->hasV8_1MMainlineOps()) {
- LLVM_DEBUG(dbgs() << "TP: Not a v8.1m.main+mve target.\n");
+ LLVM_DEBUG(dbgs() << "ARM TP: Not a v8.1m.main+mve target.\n");
return false;
}
@@ -148,7 +160,7 @@ bool MVETailPredication::runOnLoop(Loop *L, LPPassManager&) {
// Look for the hardware loop intrinsic that sets the iteration count.
IntrinsicInst *Setup = FindLoopIterations(Preheader);
- // The test.set iteration could live in the pre- preheader.
+ // The test.set iteration could live in the pre-preheader.
if (!Setup) {
if (!Preheader->getSinglePredecessor())
return false;
@@ -171,11 +183,9 @@ bool MVETailPredication::runOnLoop(Loop *L, LPPassManager&) {
if (!Decrement)
return false;
- LLVM_DEBUG(dbgs() << "TP: Running on Loop: " << *L
- << *Setup << "\n"
+ LLVM_DEBUG(dbgs() << "ARM TP: Running on Loop: " << *L << *Setup << "\n"
<< *Decrement << "\n");
- bool Changed = TryConvert(Setup->getArgOperand(0));
- return Changed;
+ return TryConvert(Setup->getArgOperand(0));
}
bool MVETailPredication::isTailPredicate(Instruction *I, Value *NumElements) {
@@ -208,7 +218,7 @@ bool MVETailPredication::isTailPredicate(Instruction *I, Value *NumElements) {
// The vector icmp
if (!match(I, m_ICmp(Pred, m_Instruction(Induction),
m_Instruction(Shuffle))) ||
- Pred != ICmpInst::ICMP_ULE || !L->isLoopInvariant(Shuffle))
+ Pred != ICmpInst::ICMP_ULE)
return false;
// First find the stuff outside the loop which is setting up the limit
@@ -230,11 +240,11 @@ bool MVETailPredication::isTailPredicate(Instruction *I, Value *NumElements) {
if (!match(BECount, m_Add(m_Value(TripCount), m_AllOnes())))
return false;
- if (TripCount != NumElements)
+ if (TripCount != NumElements || !L->isLoopInvariant(BECount))
return false;
// Now back to searching inside the loop body...
- // Find the add with takes the index iv and adds a constant vector to it.
+ // Find the add with takes the index iv and adds a constant vector to it.
Instruction *BroadcastSplat = nullptr;
Constant *Const = nullptr;
if (!match(Induction, m_Add(m_Instruction(BroadcastSplat),
@@ -269,14 +279,14 @@ bool MVETailPredication::isTailPredicate(Instruction *I, Value *NumElements) {
Value *OnEntry = Phi->getIncomingValueForBlock(L->getLoopPreheader());
if (!match(OnEntry, m_Zero()))
return false;
-
+
Value *InLoop = Phi->getIncomingValueForBlock(L->getLoopLatch());
unsigned Lanes = cast<VectorType>(Insert->getType())->getNumElements();
Instruction *LHS = nullptr;
if (!match(InLoop, m_Add(m_Instruction(LHS), m_SpecificInt(Lanes))))
return false;
-
+
return LHS == Phi;
}
@@ -298,8 +308,8 @@ bool MVETailPredication::IsPredicatedVectorLoop() {
unsigned ElementWidth = VecTy->getScalarSizeInBits();
// MVE vectors are 128-bit, but don't support 128 x i1.
// TODO: Can we support vectors larger than 128-bits?
- unsigned MaxWidth = TTI->getRegisterBitWidth(true);
- if (Lanes * ElementWidth != MaxWidth || Lanes == MaxWidth)
+ unsigned MaxWidth = TTI->getRegisterBitWidth(true);
+ if (Lanes * ElementWidth > MaxWidth || Lanes == MaxWidth)
return false;
MaskedInsts.push_back(cast<IntrinsicInst>(&I));
} else if (auto *Int = dyn_cast<IntrinsicInst>(&I)) {
@@ -399,19 +409,25 @@ Value* MVETailPredication::ComputeElements(Value *TripCount,
// tail predicated loop.
static void Cleanup(DenseMap<Instruction*, Instruction*> &NewPredicates,
SetVector<Instruction*> &MaybeDead, Loop *L) {
- if (BasicBlock *Exit = L->getUniqueExitBlock()) {
- for (auto &Pair : NewPredicates) {
- Instruction *OldPred = Pair.first;
- Instruction *NewPred = Pair.second;
-
- for (auto &I : *Exit) {
- if (I.isSameOperationAs(OldPred)) {
- Instruction *PredClone = NewPred->clone();
- PredClone->insertBefore(&I);
- I.replaceAllUsesWith(PredClone);
- MaybeDead.insert(&I);
- break;
- }
+ BasicBlock *Exit = L->getUniqueExitBlock();
+ if (!Exit) {
+ LLVM_DEBUG(dbgs() << "ARM TP: can't find loop exit block\n");
+ return;
+ }
+
+ for (auto &Pair : NewPredicates) {
+ Instruction *OldPred = Pair.first;
+ Instruction *NewPred = Pair.second;
+
+ for (auto &I : *Exit) {
+ if (I.isSameOperationAs(OldPred)) {
+ Instruction *PredClone = NewPred->clone();
+ PredClone->insertBefore(&I);
+ I.replaceAllUsesWith(PredClone);
+ MaybeDead.insert(&I);
+ LLVM_DEBUG(dbgs() << "ARM TP: replacing: "; I.dump();
+ dbgs() << "ARM TP: with: "; PredClone->dump());
+ break;
}
}
}
@@ -432,23 +448,69 @@ static void Cleanup(DenseMap<Instruction*, Instruction*> &NewPredicates,
Dead.insert(I);
}
- for (auto *I : Dead)
+ for (auto *I : Dead) {
+ LLVM_DEBUG(dbgs() << "ARM TP: removing dead insn: "; I->dump());
I->eraseFromParent();
+ }
for (auto I : L->blocks())
DeleteDeadPHIs(I);
}
+void MVETailPredication::InsertVCTPIntrinsic(Instruction *Predicate,
+ DenseMap<Instruction*, Instruction*> &NewPredicates,
+ VectorType *VecTy, Value *NumElements) {
+ IRBuilder<> Builder(L->getHeader()->getFirstNonPHI());
+ Module *M = L->getHeader()->getModule();
+ Type *Ty = IntegerType::get(M->getContext(), 32);
+
+ // Insert a phi to count the number of elements processed by the loop.
+ PHINode *Processed = Builder.CreatePHI(Ty, 2);
+ Processed->addIncoming(NumElements, L->getLoopPreheader());
+
+ // Insert the intrinsic to represent the effect of tail predication.
+ Builder.SetInsertPoint(cast<Instruction>(Predicate));
+ ConstantInt *Factor =
+ ConstantInt::get(cast<IntegerType>(Ty), VecTy->getNumElements());
+
+ Intrinsic::ID VCTPID;
+ switch (VecTy->getNumElements()) {
+ default:
+ llvm_unreachable("unexpected number of lanes");
+ case 4: VCTPID = Intrinsic::arm_mve_vctp32; break;
+ case 8: VCTPID = Intrinsic::arm_mve_vctp16; break;
+ case 16: VCTPID = Intrinsic::arm_mve_vctp8; break;
+
+ // FIXME: vctp64 currently not supported because the predicate
+ // vector wants to be <2 x i1>, but v2i1 is not a legal MVE
+ // type, so problems happen at isel time.
+ // Intrinsic::arm_mve_vctp64 exists for ACLE intrinsics
+ // purposes, but takes a v4i1 instead of a v2i1.
+ }
+ Function *VCTP = Intrinsic::getDeclaration(M, VCTPID);
+ Value *TailPredicate = Builder.CreateCall(VCTP, Processed);
+ Predicate->replaceAllUsesWith(TailPredicate);
+ NewPredicates[Predicate] = cast<Instruction>(TailPredicate);
+
+ // Add the incoming value to the new phi.
+ // TODO: This add likely already exists in the loop.
+ Value *Remaining = Builder.CreateSub(Processed, Factor);
+ Processed->addIncoming(Remaining, L->getLoopLatch());
+ LLVM_DEBUG(dbgs() << "ARM TP: Insert processed elements phi: "
+ << *Processed << "\n"
+ << "ARM TP: Inserted VCTP: " << *TailPredicate << "\n");
+}
+
bool MVETailPredication::TryConvert(Value *TripCount) {
- if (!IsPredicatedVectorLoop())
+ if (!IsPredicatedVectorLoop()) {
+ LLVM_DEBUG(dbgs() << "ARM TP: no masked instructions in loop");
return false;
+ }
- LLVM_DEBUG(dbgs() << "TP: Found predicated vector loop.\n");
+ LLVM_DEBUG(dbgs() << "ARM TP: Found predicated vector loop.\n");
// Walk through the masked intrinsics and try to find whether the predicate
// operand is generated from an induction variable.
- Module *M = L->getHeader()->getModule();
- Type *Ty = IntegerType::get(M->getContext(), 32);
SetVector<Instruction*> Predicates;
DenseMap<Instruction*, Instruction*> NewPredicates;
@@ -465,43 +527,14 @@ bool MVETailPredication::TryConvert(Value *TripCount) {
continue;
if (!isTailPredicate(Predicate, NumElements)) {
- LLVM_DEBUG(dbgs() << "TP: Not tail predicate: " << *Predicate << "\n");
+ LLVM_DEBUG(dbgs() << "ARM TP: Not tail predicate: " << *Predicate << "\n");
continue;
}
- LLVM_DEBUG(dbgs() << "TP: Found tail predicate: " << *Predicate << "\n");
+ LLVM_DEBUG(dbgs() << "ARM TP: Found tail predicate: " << *Predicate << "\n");
Predicates.insert(Predicate);
- // Insert a phi to count the number of elements processed by the loop.
- IRBuilder<> Builder(L->getHeader()->getFirstNonPHI());
- PHINode *Processed = Builder.CreatePHI(Ty, 2);
- Processed->addIncoming(NumElements, L->getLoopPreheader());
-
- // Insert the intrinsic to represent the effect of tail predication.
- Builder.SetInsertPoint(cast<Instruction>(Predicate));
- ConstantInt *Factor =
- ConstantInt::get(cast<IntegerType>(Ty), VecTy->getNumElements());
- Intrinsic::ID VCTPID;
- switch (VecTy->getNumElements()) {
- default:
- llvm_unreachable("unexpected number of lanes");
- case 2: VCTPID = Intrinsic::arm_vctp64; break;
- case 4: VCTPID = Intrinsic::arm_vctp32; break;
- case 8: VCTPID = Intrinsic::arm_vctp16; break;
- case 16: VCTPID = Intrinsic::arm_vctp8; break;
- }
- Function *VCTP = Intrinsic::getDeclaration(M, VCTPID);
- Value *TailPredicate = Builder.CreateCall(VCTP, Processed);
- Predicate->replaceAllUsesWith(TailPredicate);
- NewPredicates[Predicate] = cast<Instruction>(TailPredicate);
-
- // Add the incoming value to the new phi.
- // TODO: This add likely already exists in the loop.
- Value *Remaining = Builder.CreateSub(Processed, Factor);
- Processed->addIncoming(Remaining, L->getLoopLatch());
- LLVM_DEBUG(dbgs() << "TP: Insert processed elements phi: "
- << *Processed << "\n"
- << "TP: Inserted VCTP: " << *TailPredicate << "\n");
+ InsertVCTPIntrinsic(Predicate, NewPredicates, VecTy, NumElements);
}
// Now clean up.
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/MVEVPTBlockPass.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/MVEVPTBlockPass.cpp
index bc0a80b177ed..a5df46c94f42 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/MVEVPTBlockPass.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/MVEVPTBlockPass.cpp
@@ -22,9 +22,9 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineInstrBundle.h"
#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/ReachingDefAnalysis.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/MC/MCInstrDesc.h"
-#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/Debug.h"
#include <cassert>
#include <new>
@@ -37,16 +37,21 @@ namespace {
class MVEVPTBlock : public MachineFunctionPass {
public:
static char ID;
- const Thumb2InstrInfo *TII;
- const TargetRegisterInfo *TRI;
MVEVPTBlock() : MachineFunctionPass(ID) {}
bool runOnMachineFunction(MachineFunction &Fn) override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ AU.addRequired<ReachingDefAnalysis>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
MachineFunctionProperties getRequiredProperties() const override {
return MachineFunctionProperties().set(
- MachineFunctionProperties::Property::NoVRegs);
+ MachineFunctionProperties::Property::NoVRegs).set(
+ MachineFunctionProperties::Property::TracksLiveness);
}
StringRef getPassName() const override {
@@ -55,6 +60,9 @@ namespace {
private:
bool InsertVPTBlocks(MachineBasicBlock &MBB);
+
+ const Thumb2InstrInfo *TII = nullptr;
+ ReachingDefAnalysis *RDA = nullptr;
};
char MVEVPTBlock::ID = 0;
@@ -63,112 +71,32 @@ namespace {
INITIALIZE_PASS(MVEVPTBlock, DEBUG_TYPE, "ARM MVE VPT block pass", false, false)
-enum VPTMaskValue {
- T = 8, // 0b1000
- TT = 4, // 0b0100
- TE = 12, // 0b1100
- TTT = 2, // 0b0010
- TTE = 6, // 0b0110
- TEE = 10, // 0b1010
- TET = 14, // 0b1110
- TTTT = 1, // 0b0001
- TTTE = 3, // 0b0011
- TTEE = 5, // 0b0101
- TTET = 7, // 0b0111
- TEEE = 9, // 0b1001
- TEET = 11, // 0b1011
- TETT = 13, // 0b1101
- TETE = 15 // 0b1111
-};
-
-static unsigned VCMPOpcodeToVPT(unsigned Opcode) {
- switch (Opcode) {
- case ARM::MVE_VCMPf32:
- return ARM::MVE_VPTv4f32;
- case ARM::MVE_VCMPf16:
- return ARM::MVE_VPTv8f16;
- case ARM::MVE_VCMPi8:
- return ARM::MVE_VPTv16i8;
- case ARM::MVE_VCMPi16:
- return ARM::MVE_VPTv8i16;
- case ARM::MVE_VCMPi32:
- return ARM::MVE_VPTv4i32;
- case ARM::MVE_VCMPu8:
- return ARM::MVE_VPTv16u8;
- case ARM::MVE_VCMPu16:
- return ARM::MVE_VPTv8u16;
- case ARM::MVE_VCMPu32:
- return ARM::MVE_VPTv4u32;
- case ARM::MVE_VCMPs8:
- return ARM::MVE_VPTv16s8;
- case ARM::MVE_VCMPs16:
- return ARM::MVE_VPTv8s16;
- case ARM::MVE_VCMPs32:
- return ARM::MVE_VPTv4s32;
-
- case ARM::MVE_VCMPf32r:
- return ARM::MVE_VPTv4f32r;
- case ARM::MVE_VCMPf16r:
- return ARM::MVE_VPTv8f16r;
- case ARM::MVE_VCMPi8r:
- return ARM::MVE_VPTv16i8r;
- case ARM::MVE_VCMPi16r:
- return ARM::MVE_VPTv8i16r;
- case ARM::MVE_VCMPi32r:
- return ARM::MVE_VPTv4i32r;
- case ARM::MVE_VCMPu8r:
- return ARM::MVE_VPTv16u8r;
- case ARM::MVE_VCMPu16r:
- return ARM::MVE_VPTv8u16r;
- case ARM::MVE_VCMPu32r:
- return ARM::MVE_VPTv4u32r;
- case ARM::MVE_VCMPs8r:
- return ARM::MVE_VPTv16s8r;
- case ARM::MVE_VCMPs16r:
- return ARM::MVE_VPTv8s16r;
- case ARM::MVE_VCMPs32r:
- return ARM::MVE_VPTv4s32r;
-
- default:
- return 0;
- }
-}
-
-static MachineInstr *findVCMPToFoldIntoVPST(MachineBasicBlock::iterator MI,
- const TargetRegisterInfo *TRI,
+static MachineInstr *findVCMPToFoldIntoVPST(MachineInstr *MI,
+ ReachingDefAnalysis *RDA,
unsigned &NewOpcode) {
- // Search backwards to the instruction that defines VPR. This may or not
- // be a VCMP, we check that after this loop. If we find another instruction
- // that reads cpsr, we return nullptr.
- MachineBasicBlock::iterator CmpMI = MI;
- while (CmpMI != MI->getParent()->begin()) {
- --CmpMI;
- if (CmpMI->modifiesRegister(ARM::VPR, TRI))
- break;
- if (CmpMI->readsRegister(ARM::VPR, TRI))
- break;
- }
-
- if (CmpMI == MI)
- return nullptr;
- NewOpcode = VCMPOpcodeToVPT(CmpMI->getOpcode());
- if (NewOpcode == 0)
+ // First, search backwards to the instruction that defines VPR
+ auto *Def = RDA->getReachingMIDef(MI, ARM::VPR);
+ if (!Def)
return nullptr;
- // Search forward from CmpMI to MI, checking if either register was def'd
- if (registerDefinedBetween(CmpMI->getOperand(1).getReg(), std::next(CmpMI),
- MI, TRI))
+ // Now check that Def is a VCMP
+ if (!(NewOpcode = VCMPOpcodeToVPT(Def->getOpcode())))
return nullptr;
- if (registerDefinedBetween(CmpMI->getOperand(2).getReg(), std::next(CmpMI),
- MI, TRI))
+
+ // Check that Def's operands are not defined between the VCMP and MI, i.e.
+ // check that they have the same reaching def.
+ if (!RDA->hasSameReachingDef(Def, MI, Def->getOperand(1).getReg()) ||
+ !RDA->hasSameReachingDef(Def, MI, Def->getOperand(2).getReg()))
return nullptr;
- return &*CmpMI;
+
+ return Def;
}
bool MVEVPTBlock::InsertVPTBlocks(MachineBasicBlock &Block) {
bool Modified = false;
MachineBasicBlock::instr_iterator MBIter = Block.instr_begin();
MachineBasicBlock::instr_iterator EndIter = Block.instr_end();
+ SmallSet<MachineInstr *, 4> RemovedVCMPs;
while (MBIter != EndIter) {
MachineInstr *MI = &*MBIter;
@@ -208,29 +136,13 @@ bool MVEVPTBlock::InsertVPTBlocks(MachineBasicBlock &Block) {
++MBIter;
};
- unsigned BlockMask = 0;
- switch (VPTInstCnt) {
- case 1:
- BlockMask = VPTMaskValue::T;
- break;
- case 2:
- BlockMask = VPTMaskValue::TT;
- break;
- case 3:
- BlockMask = VPTMaskValue::TTT;
- break;
- case 4:
- BlockMask = VPTMaskValue::TTTT;
- break;
- default:
- llvm_unreachable("Unexpected number of instruction in a VPT block");
- };
+ unsigned BlockMask = getARMVPTBlockMask(VPTInstCnt);
// Search back for a VCMP that can be folded to create a VPT, or else create
// a VPST directly
MachineInstrBuilder MIBuilder;
unsigned NewOpcode;
- MachineInstr *VCMP = findVCMPToFoldIntoVPST(MI, TRI, NewOpcode);
+ MachineInstr *VCMP = findVCMPToFoldIntoVPST(MI, RDA, NewOpcode);
if (VCMP) {
LLVM_DEBUG(dbgs() << " folding VCMP into VPST: "; VCMP->dump());
MIBuilder = BuildMI(Block, MI, dl, TII->get(NewOpcode));
@@ -238,7 +150,11 @@ bool MVEVPTBlock::InsertVPTBlocks(MachineBasicBlock &Block) {
MIBuilder.add(VCMP->getOperand(1));
MIBuilder.add(VCMP->getOperand(2));
MIBuilder.add(VCMP->getOperand(3));
- VCMP->eraseFromParent();
+ // We delay removing the actual VCMP instruction by saving it to a list
+ // and deleting all instructions in this list in one go after we have
+ // created the VPT blocks. We do this in order not to invalidate the
+ // ReachingDefAnalysis that is queried by 'findVCMPToFoldIntoVPST'.
+ RemovedVCMPs.insert(VCMP);
} else {
MIBuilder = BuildMI(Block, MI, dl, TII->get(ARM::MVE_VPST));
MIBuilder.addImm(BlockMask);
@@ -249,10 +165,17 @@ bool MVEVPTBlock::InsertVPTBlocks(MachineBasicBlock &Block) {
Modified = true;
}
+
+ for (auto *I : RemovedVCMPs)
+ I->eraseFromParent();
+
return Modified;
}
bool MVEVPTBlock::runOnMachineFunction(MachineFunction &Fn) {
+ if (skipFunction(Fn.getFunction()))
+ return false;
+
const ARMSubtarget &STI =
static_cast<const ARMSubtarget &>(Fn.getSubtarget());
@@ -260,7 +183,7 @@ bool MVEVPTBlock::runOnMachineFunction(MachineFunction &Fn) {
return false;
TII = static_cast<const Thumb2InstrInfo *>(STI.getInstrInfo());
- TRI = STI.getRegisterInfo();
+ RDA = &getAnalysis<ReachingDefAnalysis>();
LLVM_DEBUG(dbgs() << "********** ARM MVE VPT BLOCKS **********\n"
<< "********** Function: " << Fn.getName() << '\n');
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp
index 86cb907abfa3..a7f7d75e356e 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp
@@ -27,7 +27,7 @@ Target &llvm::getTheThumbBETarget() {
return TheThumbBETarget;
}
-extern "C" void LLVMInitializeARMTargetInfo() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeARMTargetInfo() {
RegisterTarget<Triple::arm, /*HasJIT=*/true> X(getTheARMLETarget(), "arm",
"ARM", "ARM");
RegisterTarget<Triple::armeb, /*HasJIT=*/true> Y(getTheARMBETarget(), "armeb",
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp
index fccaa4c9cc8a..b08b71a4952d 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp
@@ -37,8 +37,8 @@ unsigned Thumb1InstrInfo::getUnindexedOpcode(unsigned Opc) const {
void Thumb1InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
- const DebugLoc &DL, unsigned DestReg,
- unsigned SrcReg, bool KillSrc) const {
+ const DebugLoc &DL, MCRegister DestReg,
+ MCRegister SrcReg, bool KillSrc) const {
// Need to check the arch.
MachineFunction &MF = *MBB.getParent();
const ARMSubtarget &st = MF.getSubtarget<ARMSubtarget>();
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/Thumb1InstrInfo.h b/contrib/llvm-project/llvm/lib/Target/ARM/Thumb1InstrInfo.h
index bc433e7a7a93..530289fe8c5d 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/Thumb1InstrInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/Thumb1InstrInfo.h
@@ -38,7 +38,7 @@ public:
const ThumbRegisterInfo &getRegisterInfo() const override { return RI; }
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
- const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
+ const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg,
bool KillSrc) const override;
void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
index af1f0aeb27ba..e06bb9546c03 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -120,8 +120,8 @@ Thumb2InstrInfo::isLegalToSplitMBBAt(MachineBasicBlock &MBB,
void Thumb2InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
- const DebugLoc &DL, unsigned DestReg,
- unsigned SrcReg, bool KillSrc) const {
+ const DebugLoc &DL, MCRegister DestReg,
+ MCRegister SrcReg, bool KillSrc) const {
// Handle SPR, DPR, and QPR copies.
if (!ARM::GPRRegClass.contains(DestReg, SrcReg))
return ARMBaseInstrInfo::copyPhysReg(MBB, I, DL, DestReg, SrcReg, KillSrc);
@@ -303,50 +303,45 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB,
continue;
}
- bool HasCCOut = true;
- if (BaseReg == ARM::SP) {
- // sub sp, sp, #imm7
- if (DestReg == ARM::SP && (ThisVal < ((1 << 7)-1) * 4)) {
- assert((ThisVal & 3) == 0 && "Stack update is not multiple of 4?");
- Opc = isSub ? ARM::tSUBspi : ARM::tADDspi;
- BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
- .addReg(BaseReg)
- .addImm(ThisVal / 4)
- .setMIFlags(MIFlags)
- .add(predOps(ARMCC::AL));
- NumBytes = 0;
- continue;
- }
+ assert((DestReg != ARM::SP || BaseReg == ARM::SP) &&
+ "Writing to SP, from other register.");
- // sub rd, sp, so_imm
- Opc = isSub ? ARM::t2SUBri : ARM::t2ADDri;
- if (ARM_AM::getT2SOImmVal(NumBytes) != -1) {
- NumBytes = 0;
- } else {
- // FIXME: Move this to ARMAddressingModes.h?
- unsigned RotAmt = countLeadingZeros(ThisVal);
- ThisVal = ThisVal & ARM_AM::rotr32(0xff000000U, RotAmt);
- NumBytes &= ~ThisVal;
- assert(ARM_AM::getT2SOImmVal(ThisVal) != -1 &&
- "Bit extraction didn't work?");
- }
+ // Try to use T1, as it smaller
+ if ((DestReg == ARM::SP) && (ThisVal < ((1 << 7) - 1) * 4)) {
+ assert((ThisVal & 3) == 0 && "Stack update is not multiple of 4?");
+ Opc = isSub ? ARM::tSUBspi : ARM::tADDspi;
+ BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
+ .addReg(BaseReg)
+ .addImm(ThisVal / 4)
+ .setMIFlags(MIFlags)
+ .add(predOps(ARMCC::AL));
+ break;
+ }
+ bool HasCCOut = true;
+ int ImmIsT2SO = ARM_AM::getT2SOImmVal(ThisVal);
+ bool ToSP = DestReg == ARM::SP;
+ unsigned t2SUB = ToSP ? ARM::t2SUBspImm : ARM::t2SUBri;
+ unsigned t2ADD = ToSP ? ARM::t2ADDspImm : ARM::t2ADDri;
+ unsigned t2SUBi12 = ToSP ? ARM::t2SUBspImm12 : ARM::t2SUBri12;
+ unsigned t2ADDi12 = ToSP ? ARM::t2ADDspImm12 : ARM::t2ADDri12;
+ Opc = isSub ? t2SUB : t2ADD;
+ // Prefer T2: sub rd, rn, so_imm | sub sp, sp, so_imm
+ if (ImmIsT2SO != -1) {
+ NumBytes = 0;
+ } else if (ThisVal < 4096) {
+ // Prefer T3 if can make it in a single go: subw rd, rn, imm12 | subw sp,
+ // sp, imm12
+ Opc = isSub ? t2SUBi12 : t2ADDi12;
+ HasCCOut = false;
+ NumBytes = 0;
} else {
- assert(DestReg != ARM::SP && BaseReg != ARM::SP);
- Opc = isSub ? ARM::t2SUBri : ARM::t2ADDri;
- if (ARM_AM::getT2SOImmVal(NumBytes) != -1) {
- NumBytes = 0;
- } else if (ThisVal < 4096) {
- Opc = isSub ? ARM::t2SUBri12 : ARM::t2ADDri12;
- HasCCOut = false;
- NumBytes = 0;
- } else {
- // FIXME: Move this to ARMAddressingModes.h?
- unsigned RotAmt = countLeadingZeros(ThisVal);
- ThisVal = ThisVal & ARM_AM::rotr32(0xff000000U, RotAmt);
- NumBytes &= ~ThisVal;
- assert(ARM_AM::getT2SOImmVal(ThisVal) != -1 &&
- "Bit extraction didn't work?");
- }
+ // Use one T2 instruction to reduce NumBytes
+ // FIXME: Move this to ARMAddressingModes.h?
+ unsigned RotAmt = countLeadingZeros(ThisVal);
+ ThisVal = ThisVal & ARM_AM::rotr32(0xff000000U, RotAmt);
+ NumBytes &= ~ThisVal;
+ assert(ARM_AM::getT2SOImmVal(ThisVal) != -1 &&
+ "Bit extraction didn't work?");
}
// Build the new ADD / SUB.
@@ -375,6 +370,8 @@ negativeOffsetOpcode(unsigned opcode)
case ARM::t2STRBi12: return ARM::t2STRBi8;
case ARM::t2STRHi12: return ARM::t2STRHi8;
case ARM::t2PLDi12: return ARM::t2PLDi8;
+ case ARM::t2PLDWi12: return ARM::t2PLDWi8;
+ case ARM::t2PLIi12: return ARM::t2PLIi8;
case ARM::t2LDRi8:
case ARM::t2LDRHi8:
@@ -385,13 +382,13 @@ negativeOffsetOpcode(unsigned opcode)
case ARM::t2STRBi8:
case ARM::t2STRHi8:
case ARM::t2PLDi8:
+ case ARM::t2PLDWi8:
+ case ARM::t2PLIi8:
return opcode;
default:
- break;
+ llvm_unreachable("unknown thumb2 opcode.");
}
-
- return 0;
}
static unsigned
@@ -407,6 +404,8 @@ positiveOffsetOpcode(unsigned opcode)
case ARM::t2STRBi8: return ARM::t2STRBi12;
case ARM::t2STRHi8: return ARM::t2STRHi12;
case ARM::t2PLDi8: return ARM::t2PLDi12;
+ case ARM::t2PLDWi8: return ARM::t2PLDWi12;
+ case ARM::t2PLIi8: return ARM::t2PLIi12;
case ARM::t2LDRi12:
case ARM::t2LDRHi12:
@@ -417,13 +416,13 @@ positiveOffsetOpcode(unsigned opcode)
case ARM::t2STRBi12:
case ARM::t2STRHi12:
case ARM::t2PLDi12:
+ case ARM::t2PLDWi12:
+ case ARM::t2PLIi12:
return opcode;
default:
- break;
+ llvm_unreachable("unknown thumb2 opcode.");
}
-
- return 0;
}
static unsigned
@@ -439,6 +438,8 @@ immediateOffsetOpcode(unsigned opcode)
case ARM::t2STRBs: return ARM::t2STRBi12;
case ARM::t2STRHs: return ARM::t2STRHi12;
case ARM::t2PLDs: return ARM::t2PLDi12;
+ case ARM::t2PLDWs: return ARM::t2PLDWi12;
+ case ARM::t2PLIs: return ARM::t2PLIi12;
case ARM::t2LDRi12:
case ARM::t2LDRHi12:
@@ -449,6 +450,8 @@ immediateOffsetOpcode(unsigned opcode)
case ARM::t2STRBi12:
case ARM::t2STRHi12:
case ARM::t2PLDi12:
+ case ARM::t2PLDWi12:
+ case ARM::t2PLIi12:
case ARM::t2LDRi8:
case ARM::t2LDRHi8:
case ARM::t2LDRBi8:
@@ -458,13 +461,13 @@ immediateOffsetOpcode(unsigned opcode)
case ARM::t2STRBi8:
case ARM::t2STRHi8:
case ARM::t2PLDi8:
+ case ARM::t2PLDWi8:
+ case ARM::t2PLIi8:
return opcode;
default:
- break;
+ llvm_unreachable("unknown thumb2 opcode.");
}
-
- return 0;
}
bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
@@ -484,7 +487,8 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
if (Opcode == ARM::INLINEASM || Opcode == ARM::INLINEASM_BR)
AddrMode = ARMII::AddrModeT2_i12; // FIXME. mode for thumb2?
- if (Opcode == ARM::t2ADDri || Opcode == ARM::t2ADDri12) {
+ const bool IsSP = Opcode == ARM::t2ADDspImm12 || Opcode == ARM::t2ADDspImm;
+ if (IsSP || Opcode == ARM::t2ADDri || Opcode == ARM::t2ADDri12) {
Offset += MI.getOperand(FrameRegIdx+1).getImm();
unsigned PredReg;
@@ -501,14 +505,14 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
return true;
}
- bool HasCCOut = Opcode != ARM::t2ADDri12;
+ bool HasCCOut = (Opcode != ARM::t2ADDspImm12 && Opcode != ARM::t2ADDri12);
if (Offset < 0) {
Offset = -Offset;
isSub = true;
- MI.setDesc(TII.get(ARM::t2SUBri));
+ MI.setDesc(IsSP ? TII.get(ARM::t2SUBspImm) : TII.get(ARM::t2SUBri));
} else {
- MI.setDesc(TII.get(ARM::t2ADDri));
+ MI.setDesc(IsSP ? TII.get(ARM::t2ADDspImm) : TII.get(ARM::t2ADDri));
}
// Common case: small offset, fits into instruction.
@@ -524,7 +528,8 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
// Another common case: imm12.
if (Offset < 4096 &&
(!HasCCOut || MI.getOperand(MI.getNumOperands()-1).getReg() == 0)) {
- unsigned NewOpc = isSub ? ARM::t2SUBri12 : ARM::t2ADDri12;
+ unsigned NewOpc = isSub ? IsSP ? ARM::t2SUBspImm12 : ARM::t2SUBri12
+ : IsSP ? ARM::t2ADDspImm12 : ARM::t2ADDri12;
MI.setDesc(TII.get(NewOpc));
MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset);
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/Thumb2InstrInfo.h b/contrib/llvm-project/llvm/lib/Target/ARM/Thumb2InstrInfo.h
index a6712d5a0e72..7d8dff14e1e7 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/Thumb2InstrInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/Thumb2InstrInfo.h
@@ -39,7 +39,7 @@ public:
MachineBasicBlock::iterator MBBI) const override;
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
- const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
+ const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg,
bool KillSrc) const override;
void storeRegToStackSlot(MachineBasicBlock &MBB,
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/Utils/ARMBaseInfo.h b/contrib/llvm-project/llvm/lib/Target/ARM/Utils/ARMBaseInfo.h
index aa3aca359cb8..27605422983d 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/Utils/ARMBaseInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/Utils/ARMBaseInfo.h
@@ -64,6 +64,25 @@ inline static CondCodes getOppositeCondition(CondCodes CC) {
case LE: return GT;
}
}
+
+/// getSwappedCondition - assume the flags are set by MI(a,b), return
+/// the condition code if we modify the instructions such that flags are
+/// set by MI(b,a).
+inline static ARMCC::CondCodes getSwappedCondition(ARMCC::CondCodes CC) {
+ switch (CC) {
+ default: return ARMCC::AL;
+ case ARMCC::EQ: return ARMCC::EQ;
+ case ARMCC::NE: return ARMCC::NE;
+ case ARMCC::HS: return ARMCC::LS;
+ case ARMCC::LO: return ARMCC::HI;
+ case ARMCC::HI: return ARMCC::LO;
+ case ARMCC::LS: return ARMCC::HS;
+ case ARMCC::GE: return ARMCC::LE;
+ case ARMCC::LT: return ARMCC::GT;
+ case ARMCC::GT: return ARMCC::LT;
+ case ARMCC::LE: return ARMCC::GE;
+ }
+}
} // end namespace ARMCC
namespace ARMVCC {
@@ -72,6 +91,40 @@ namespace ARMVCC {
Then,
Else
};
+
+ enum VPTMaskValue {
+ T = 8, // 0b1000
+ TT = 4, // 0b0100
+ TE = 12, // 0b1100
+ TTT = 2, // 0b0010
+ TTE = 6, // 0b0110
+ TEE = 10, // 0b1010
+ TET = 14, // 0b1110
+ TTTT = 1, // 0b0001
+ TTTE = 3, // 0b0011
+ TTEE = 5, // 0b0101
+ TTET = 7, // 0b0111
+ TEEE = 9, // 0b1001
+ TEET = 11, // 0b1011
+ TETT = 13, // 0b1101
+ TETE = 15 // 0b1111
+ };
+}
+
+inline static unsigned getARMVPTBlockMask(unsigned NumInsts) {
+ switch (NumInsts) {
+ case 1:
+ return ARMVCC::T;
+ case 2:
+ return ARMVCC::TT;
+ case 3:
+ return ARMVCC::TTT;
+ case 4:
+ return ARMVCC::TTTT;
+ default:
+ break;
+ };
+ llvm_unreachable("Unexpected number of instruction in a VPT block");
}
inline static const char *ARMVPTPredToString(ARMVCC::VPTCodes CC) {
diff --git a/contrib/llvm-project/llvm/lib/Target/AVR/AVRAsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/AVR/AVRAsmPrinter.cpp
index 1db6b2236b4f..9b09c7456543 100644
--- a/contrib/llvm-project/llvm/lib/Target/AVR/AVRAsmPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AVR/AVRAsmPrinter.cpp
@@ -178,7 +178,7 @@ void AVRAsmPrinter::EmitInstruction(const MachineInstr *MI) {
} // end of namespace llvm
-extern "C" void LLVMInitializeAVRAsmPrinter() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAVRAsmPrinter() {
llvm::RegisterAsmPrinter<llvm::AVRAsmPrinter> X(llvm::getTheAVRTarget());
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp b/contrib/llvm-project/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp
index 83d0f6845332..f466c5c053ad 100644
--- a/contrib/llvm-project/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp
@@ -52,6 +52,8 @@ private:
/// The register to be used for temporary storage.
const unsigned SCRATCH_REGISTER = AVR::R0;
+ /// The register that will always contain zero.
+ const unsigned ZERO_REGISTER = AVR::R1;
/// The IO address of the status register.
const unsigned SREG_ADDR = 0x3f;
@@ -1243,6 +1245,93 @@ bool AVRExpandPseudo::expand<AVR::POPWRd>(Block &MBB, BlockIt MBBI) {
}
template <>
+bool AVRExpandPseudo::expand<AVR::ROLBRd>(Block &MBB, BlockIt MBBI) {
+ // In AVR, the rotate instructions behave quite unintuitively. They rotate
+ // bits through the carry bit in SREG, effectively rotating over 9 bits,
+ // instead of 8. This is useful when we are dealing with numbers over
+ // multiple registers, but when we actually need to rotate stuff, we have
+ // to explicitly add the carry bit.
+
+ MachineInstr &MI = *MBBI;
+ unsigned OpShift, OpCarry;
+ unsigned DstReg = MI.getOperand(0).getReg();
+ bool DstIsDead = MI.getOperand(0).isDead();
+ OpShift = AVR::ADDRdRr;
+ OpCarry = AVR::ADCRdRr;
+
+ // add r16, r16
+ // adc r16, r1
+
+ // Shift part
+ buildMI(MBB, MBBI, OpShift)
+ .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstReg)
+ .addReg(DstReg);
+
+ // Add the carry bit
+ auto MIB = buildMI(MBB, MBBI, OpCarry)
+ .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstReg)
+ .addReg(ZERO_REGISTER);
+
+ // SREG is always implicitly killed
+ MIB->getOperand(2).setIsKill();
+
+ MI.eraseFromParent();
+ return true;
+}
+
+template <>
+bool AVRExpandPseudo::expand<AVR::RORBRd>(Block &MBB, BlockIt MBBI) {
+ // In AVR, the rotate instructions behave quite unintuitively. They rotate
+ // bits through the carry bit in SREG, effectively rotating over 9 bits,
+ // instead of 8. This is useful when we are dealing with numbers over
+ // multiple registers, but when we actually need to rotate stuff, we have
+ // to explicitly add the carry bit.
+
+ MachineInstr &MI = *MBBI;
+ unsigned OpShiftOut, OpLoad, OpShiftIn, OpAdd;
+ unsigned DstReg = MI.getOperand(0).getReg();
+ bool DstIsDead = MI.getOperand(0).isDead();
+ OpShiftOut = AVR::LSRRd;
+ OpLoad = AVR::LDIRdK;
+ OpShiftIn = AVR::RORRd;
+ OpAdd = AVR::ORRdRr;
+
+ // lsr r16
+ // ldi r0, 0
+ // ror r0
+ // or r16, r17
+
+ // Shift out
+ buildMI(MBB, MBBI, OpShiftOut)
+ .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstReg);
+
+ // Put 0 in temporary register
+ buildMI(MBB, MBBI, OpLoad)
+ .addReg(SCRATCH_REGISTER, RegState::Define | getDeadRegState(true))
+ .addImm(0x00);
+
+ // Shift in
+ buildMI(MBB, MBBI, OpShiftIn)
+ .addReg(SCRATCH_REGISTER, RegState::Define | getDeadRegState(true))
+ .addReg(SCRATCH_REGISTER);
+
+ // Add the results together using an or-instruction
+ auto MIB = buildMI(MBB, MBBI, OpAdd)
+ .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstReg)
+ .addReg(SCRATCH_REGISTER);
+
+ // SREG is always implicitly killed
+ MIB->getOperand(2).setIsKill();
+
+ MI.eraseFromParent();
+ return true;
+}
+
+template <>
bool AVRExpandPseudo::expand<AVR::LSLWRd>(Block &MBB, BlockIt MBBI) {
MachineInstr &MI = *MBBI;
unsigned OpLo, OpHi, DstLoReg, DstHiReg;
@@ -1562,6 +1651,8 @@ bool AVRExpandPseudo::expandMI(Block &MBB, BlockIt MBBI) {
EXPAND(AVR::OUTWARr);
EXPAND(AVR::PUSHWRr);
EXPAND(AVR::POPWRd);
+ EXPAND(AVR::ROLBRd);
+ EXPAND(AVR::RORBRd);
EXPAND(AVR::LSLWRd);
EXPAND(AVR::LSRWRd);
EXPAND(AVR::RORWRd);
diff --git a/contrib/llvm-project/llvm/lib/Target/AVR/AVRISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AVR/AVRISelLowering.cpp
index f12c59b7d8c3..880688807702 100644
--- a/contrib/llvm-project/llvm/lib/Target/AVR/AVRISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AVR/AVRISelLowering.cpp
@@ -1472,16 +1472,15 @@ MachineBasicBlock *AVRTargetLowering::insertShift(MachineInstr &MI,
RC = &AVR::DREGSRegClass;
break;
case AVR::Rol8:
- Opc = AVR::ADCRdRr; // ROL is an alias of ADC Rd, Rd
+ Opc = AVR::ROLBRd;
RC = &AVR::GPR8RegClass;
- HasRepeatedOperand = true;
break;
case AVR::Rol16:
Opc = AVR::ROLWRd;
RC = &AVR::DREGSRegClass;
break;
case AVR::Ror8:
- Opc = AVR::RORRd;
+ Opc = AVR::RORBRd;
RC = &AVR::GPR8RegClass;
break;
case AVR::Ror16:
@@ -2006,11 +2005,11 @@ void AVRTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
}
-Register AVRTargetLowering::getRegisterByName(const char *RegName, EVT VT,
+Register AVRTargetLowering::getRegisterByName(const char *RegName, LLT VT,
const MachineFunction &MF) const {
Register Reg;
- if (VT == MVT::i8) {
+ if (VT == LLT::scalar(8)) {
Reg = StringSwitch<unsigned>(RegName)
.Case("r0", AVR::R0).Case("r1", AVR::R1).Case("r2", AVR::R2)
.Case("r3", AVR::R3).Case("r4", AVR::R4).Case("r5", AVR::R5)
diff --git a/contrib/llvm-project/llvm/lib/Target/AVR/AVRISelLowering.h b/contrib/llvm-project/llvm/lib/Target/AVR/AVRISelLowering.h
index 6c722fa5414b..aca1ea1d50e5 100644
--- a/contrib/llvm-project/llvm/lib/Target/AVR/AVRISelLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/AVR/AVRISelLowering.h
@@ -125,7 +125,7 @@ public:
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const override;
- Register getRegisterByName(const char* RegName, EVT VT,
+ Register getRegisterByName(const char* RegName, LLT VT,
const MachineFunction &MF) const override;
bool shouldSplitFunctionArgumentsAsLittleEndian(const DataLayout &DL)
diff --git a/contrib/llvm-project/llvm/lib/Target/AVR/AVRInstrFormats.td b/contrib/llvm-project/llvm/lib/Target/AVR/AVRInstrFormats.td
index 347e683cd47f..ef596f5cebd5 100644
--- a/contrib/llvm-project/llvm/lib/Target/AVR/AVRInstrFormats.td
+++ b/contrib/llvm-project/llvm/lib/Target/AVR/AVRInstrFormats.td
@@ -42,8 +42,8 @@ class AVRInst32<dag outs, dag ins, string asmstr, list<dag> pattern>
}
// A class for pseudo instructions.
-// Psuedo instructions are not real AVR instructions. The DAG stores
-// psuedo instructions which are replaced by real AVR instructions by
+// Pseudo instructions are not real AVR instructions. The DAG stores
+// pseudo instructions which are replaced by real AVR instructions by
// AVRExpandPseudoInsts.cpp.
//
// For example, the ADDW (add wide, as in add 16 bit values) instruction
diff --git a/contrib/llvm-project/llvm/lib/Target/AVR/AVRInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AVR/AVRInstrInfo.cpp
index ba7a95e92c5c..a6832f282b31 100644
--- a/contrib/llvm-project/llvm/lib/Target/AVR/AVRInstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AVR/AVRInstrInfo.cpp
@@ -40,8 +40,8 @@ AVRInstrInfo::AVRInstrInfo()
void AVRInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- const DebugLoc &DL, unsigned DestReg,
- unsigned SrcReg, bool KillSrc) const {
+ const DebugLoc &DL, MCRegister DestReg,
+ MCRegister SrcReg, bool KillSrc) const {
const AVRSubtarget &STI = MBB.getParent()->getSubtarget<AVRSubtarget>();
const AVRRegisterInfo &TRI = *STI.getRegisterInfo();
unsigned Opc;
diff --git a/contrib/llvm-project/llvm/lib/Target/AVR/AVRInstrInfo.h b/contrib/llvm-project/llvm/lib/Target/AVR/AVRInstrInfo.h
index ba74af325474..bb00ca8c724a 100644
--- a/contrib/llvm-project/llvm/lib/Target/AVR/AVRInstrInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/AVR/AVRInstrInfo.h
@@ -72,7 +72,7 @@ public:
unsigned getInstSizeInBytes(const MachineInstr &MI) const override;
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
- const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
+ const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg,
bool KillSrc) const override;
void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI, unsigned SrcReg,
diff --git a/contrib/llvm-project/llvm/lib/Target/AVR/AVRInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/AVR/AVRInstrInfo.td
index caca9b617609..acf991dcfbb1 100644
--- a/contrib/llvm-project/llvm/lib/Target/AVR/AVRInstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/AVR/AVRInstrInfo.td
@@ -1310,7 +1310,7 @@ def AtomicLoadOr16 : AtomicLoadOp16<atomic_load_or_16>;
def AtomicLoadXor8 : AtomicLoadOp8<atomic_load_xor_8>;
def AtomicLoadXor16 : AtomicLoadOp16<atomic_load_xor_16>;
def AtomicFence : Pseudo<(outs), (ins), "atomic_fence",
- [(atomic_fence imm, imm)]>;
+ [(atomic_fence timm, timm)]>;
// Indirect store from register to data space.
def STSKRr : F32DM<0b1,
@@ -1676,6 +1676,16 @@ Defs = [SREG] in
{
// 8-bit ROL is an alias of ADC Rd, Rd
+ def ROLBRd : Pseudo<(outs GPR8:$rd),
+ (ins GPR8:$src),
+ "rolb\t$rd",
+ [(set i8:$rd, (AVRrol i8:$src)), (implicit SREG)]>;
+
+ def RORBRd : Pseudo<(outs GPR8:$rd),
+ (ins GPR8:$src),
+ "rorb\t$rd",
+ [(set i8:$rd, (AVRror i8:$src)), (implicit SREG)]>;
+
def ROLWRd : Pseudo<(outs DREGS:$rd),
(ins DREGS:$src),
"rolw\t$rd",
@@ -1686,7 +1696,7 @@ Defs = [SREG] in
(outs GPR8:$rd),
(ins GPR8:$src),
"ror\t$rd",
- [(set i8:$rd, (AVRror i8:$src)), (implicit SREG)]>;
+ []>;
def RORWRd : Pseudo<(outs DREGS:$rd),
(ins DREGS:$src),
diff --git a/contrib/llvm-project/llvm/lib/Target/AVR/AVRTargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/AVR/AVRTargetMachine.cpp
index 25304280d002..b33284b73d63 100644
--- a/contrib/llvm-project/llvm/lib/Target/AVR/AVRTargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AVR/AVRTargetMachine.cpp
@@ -76,7 +76,7 @@ TargetPassConfig *AVRTargetMachine::createPassConfig(PassManagerBase &PM) {
return new AVRPassConfig(*this, PM);
}
-extern "C" void LLVMInitializeAVRTarget() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAVRTarget() {
// Register the target.
RegisterTargetMachine<AVRTargetMachine> X(getTheAVRTarget());
diff --git a/contrib/llvm-project/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp
index af60bc4fdc90..fc34583ae573 100644
--- a/contrib/llvm-project/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp
@@ -682,7 +682,7 @@ bool AVRAsmParser::parseLiteralValues(unsigned SizeInBytes, SMLoc L) {
return (parseMany(parseOne));
}
-extern "C" void LLVMInitializeAVRAsmParser() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAVRAsmParser() {
RegisterMCAsmParser<AVRAsmParser> X(getTheAVRTarget());
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AVR/Disassembler/AVRDisassembler.cpp b/contrib/llvm-project/llvm/lib/Target/AVR/Disassembler/AVRDisassembler.cpp
index e203a5069c85..694aee818f7c 100644
--- a/contrib/llvm-project/llvm/lib/Target/AVR/Disassembler/AVRDisassembler.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AVR/Disassembler/AVRDisassembler.cpp
@@ -40,7 +40,6 @@ public:
DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
ArrayRef<uint8_t> Bytes, uint64_t Address,
- raw_ostream &VStream,
raw_ostream &CStream) const override;
};
}
@@ -52,7 +51,7 @@ static MCDisassembler *createAVRDisassembler(const Target &T,
}
-extern "C" void LLVMInitializeAVRDisassembler() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAVRDisassembler() {
// Register the disassembler.
TargetRegistry::RegisterMCDisassembler(getTheAVRTarget(),
createAVRDisassembler);
@@ -114,7 +113,6 @@ static const uint8_t *getDecoderTable(uint64_t Size) {
DecodeStatus AVRDisassembler::getInstruction(MCInst &Instr, uint64_t &Size,
ArrayRef<uint8_t> Bytes,
uint64_t Address,
- raw_ostream &VStream,
raw_ostream &CStream) const {
uint32_t Insn;
diff --git a/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRInstPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRInstPrinter.cpp
index 88ce9a25680e..832112406155 100644
--- a/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRInstPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRInstPrinter.cpp
@@ -32,8 +32,9 @@ namespace llvm {
#define PRINT_ALIAS_INSTR
#include "AVRGenAsmWriter.inc"
-void AVRInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
- StringRef Annot, const MCSubtargetInfo &STI) {
+void AVRInstPrinter::printInst(const MCInst *MI, uint64_t Address,
+ StringRef Annot, const MCSubtargetInfo &STI,
+ raw_ostream &O) {
unsigned Opcode = MI->getOpcode();
// First handle load and store instructions with postinc or predec
@@ -78,7 +79,7 @@ void AVRInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
break;
default:
if (!printAliasInstr(MI, O))
- printInstruction(MI, O);
+ printInstruction(MI, Address, O);
printAnnotation(O, Annot);
break;
diff --git a/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRInstPrinter.h b/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRInstPrinter.h
index 5b758a7503c9..247e9fc83989 100644
--- a/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRInstPrinter.h
+++ b/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRInstPrinter.h
@@ -29,8 +29,8 @@ public:
static const char *getPrettyRegisterName(unsigned RegNo,
MCRegisterInfo const &MRI);
- void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
- const MCSubtargetInfo &STI) override;
+ void printInst(const MCInst *MI, uint64_t Address, StringRef Annot,
+ const MCSubtargetInfo &STI, raw_ostream &O) override;
private:
static const char *getRegisterName(unsigned RegNo,
@@ -41,7 +41,7 @@ private:
void printMemri(const MCInst *MI, unsigned OpNo, raw_ostream &O);
// Autogenerated by TableGen.
- void printInstruction(const MCInst *MI, raw_ostream &O);
+ void printInstruction(const MCInst *MI, uint64_t Address, raw_ostream &O);
bool printAliasInstr(const MCInst *MI, raw_ostream &O);
void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx,
unsigned PrintMethodIdx, raw_ostream &O);
diff --git a/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.cpp
index 99b2172c562f..c25a2b232013 100644
--- a/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.cpp
@@ -16,7 +16,7 @@
namespace llvm {
-AVRMCAsmInfo::AVRMCAsmInfo(const Triple &TT) {
+AVRMCAsmInfo::AVRMCAsmInfo(const Triple &TT, const MCTargetOptions &Options) {
CodePointerSize = 2;
CalleeSaveStackSlotSize = 2;
CommentString = ";";
diff --git a/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.h b/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.h
index b2fa18777bc0..17dd77f6266a 100644
--- a/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.h
@@ -22,7 +22,7 @@ class Triple;
/// Specifies the format of AVR assembly files.
class AVRMCAsmInfo : public MCAsmInfo {
public:
- explicit AVRMCAsmInfo(const Triple &TT);
+ explicit AVRMCAsmInfo(const Triple &TT, const MCTargetOptions &Options);
};
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.cpp b/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.cpp
index bc0488778685..db995e247562 100644
--- a/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.cpp
@@ -25,6 +25,7 @@
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/Casting.h"
+#include "llvm/Support/EndianStream.h"
#include "llvm/Support/raw_ostream.h"
#define DEBUG_TYPE "mccodeemitter"
@@ -268,14 +269,11 @@ unsigned AVRMCCodeEmitter::getMachineOpValue(const MCInst &MI,
void AVRMCCodeEmitter::emitInstruction(uint64_t Val, unsigned Size,
const MCSubtargetInfo &STI,
raw_ostream &OS) const {
- const uint16_t *Words = reinterpret_cast<uint16_t const *>(&Val);
size_t WordCount = Size / 2;
for (int64_t i = WordCount - 1; i >= 0; --i) {
- uint16_t Word = Words[i];
-
- OS << (uint8_t) ((Word & 0x00ff) >> 0);
- OS << (uint8_t) ((Word & 0xff00) >> 8);
+ uint16_t Word = (Val >> (i * 16)) & 0xFFFF;
+ support::endian::write(OS, Word, support::endianness::little);
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.cpp b/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.cpp
index f6607b26a065..bfc274d9cdcc 100644
--- a/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.cpp
@@ -89,7 +89,7 @@ static MCTargetStreamer *createMCAsmTargetStreamer(MCStreamer &S,
return new AVRTargetAsmStreamer(S);
}
-extern "C" void LLVMInitializeAVRTargetMC() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAVRTargetMC() {
// Register the MC asm info.
RegisterMCAsmInfo<AVRMCAsmInfo> X(getTheAVRTarget());
diff --git a/contrib/llvm-project/llvm/lib/Target/AVR/TargetInfo/AVRTargetInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AVR/TargetInfo/AVRTargetInfo.cpp
index c62d5cb85bc4..69b509b33e88 100644
--- a/contrib/llvm-project/llvm/lib/Target/AVR/TargetInfo/AVRTargetInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AVR/TargetInfo/AVRTargetInfo.cpp
@@ -15,7 +15,7 @@ Target &getTheAVRTarget() {
}
}
-extern "C" void LLVMInitializeAVRTargetInfo() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAVRTargetInfo() {
llvm::RegisterTarget<llvm::Triple::avr> X(llvm::getTheAVRTarget(), "avr",
"Atmel AVR Microcontroller", "AVR");
}
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp
index ce1d2ecd9d26..1f5d5025bc7b 100644
--- a/contrib/llvm-project/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp
@@ -493,7 +493,7 @@ bool BPFAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
bool BPFAsmParser::ParseDirective(AsmToken DirectiveID) { return true; }
-extern "C" void LLVMInitializeBPFAsmParser() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeBPFAsmParser() {
RegisterMCAsmParser<BPFAsmParser> X(getTheBPFTarget());
RegisterMCAsmParser<BPFAsmParser> Y(getTheBPFleTarget());
RegisterMCAsmParser<BPFAsmParser> Z(getTheBPFbeTarget());
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp b/contrib/llvm-project/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp
index 400701c4e5c2..a28816cc87b7 100644
--- a/contrib/llvm-project/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp
@@ -117,6 +117,7 @@ public:
struct CallInfo {
uint32_t Kind;
uint32_t AccessIndex;
+ uint32_t RecordAlignment;
MDNode *Metadata;
Value *Base;
};
@@ -130,6 +131,8 @@ private:
BPFPreserveFieldInfoAI = 4,
};
+ const DataLayout *DL = nullptr;
+
std::map<std::string, GlobalVariable *> GEPGlobals;
// A map to link preserve_*_access_index instrinsic calls.
std::map<CallInst *, std::pair<CallInst *, CallInfo>> AIChain;
@@ -154,11 +157,11 @@ private:
void replaceWithGEP(std::vector<CallInst *> &CallList,
uint32_t NumOfZerosIndex, uint32_t DIIndex);
bool HasPreserveFieldInfoCall(CallInfoStack &CallStack);
- void GetStorageBitRange(DICompositeType *CTy, DIDerivedType *MemberTy,
- uint32_t AccessIndex, uint32_t &StartBitOffset,
- uint32_t &EndBitOffset);
+ void GetStorageBitRange(DIDerivedType *MemberTy, uint32_t RecordAlignment,
+ uint32_t &StartBitOffset, uint32_t &EndBitOffset);
uint32_t GetFieldInfo(uint32_t InfoKind, DICompositeType *CTy,
- uint32_t AccessIndex, uint32_t PatchImm);
+ uint32_t AccessIndex, uint32_t PatchImm,
+ uint32_t RecordAlignment);
Value *computeBaseAndAccessKey(CallInst *Call, CallInfo &CInfo,
std::string &AccessKey, MDNode *&BaseMeta);
@@ -182,6 +185,7 @@ bool BPFAbstractMemberAccess::runOnModule(Module &M) {
if (M.debug_compile_units().empty())
return false;
+ DL = &M.getDataLayout();
return doTransformation(M);
}
@@ -243,6 +247,8 @@ bool BPFAbstractMemberAccess::IsPreserveDIAccessIndexCall(const CallInst *Call,
report_fatal_error("Missing metadata for llvm.preserve.array.access.index intrinsic");
CInfo.AccessIndex = getConstant(Call->getArgOperand(2));
CInfo.Base = Call->getArgOperand(0);
+ CInfo.RecordAlignment =
+ DL->getABITypeAlignment(CInfo.Base->getType()->getPointerElementType());
return true;
}
if (GV->getName().startswith("llvm.preserve.union.access.index")) {
@@ -252,6 +258,8 @@ bool BPFAbstractMemberAccess::IsPreserveDIAccessIndexCall(const CallInst *Call,
report_fatal_error("Missing metadata for llvm.preserve.union.access.index intrinsic");
CInfo.AccessIndex = getConstant(Call->getArgOperand(1));
CInfo.Base = Call->getArgOperand(0);
+ CInfo.RecordAlignment =
+ DL->getABITypeAlignment(CInfo.Base->getType()->getPointerElementType());
return true;
}
if (GV->getName().startswith("llvm.preserve.struct.access.index")) {
@@ -261,6 +269,8 @@ bool BPFAbstractMemberAccess::IsPreserveDIAccessIndexCall(const CallInst *Call,
report_fatal_error("Missing metadata for llvm.preserve.struct.access.index intrinsic");
CInfo.AccessIndex = getConstant(Call->getArgOperand(2));
CInfo.Base = Call->getArgOperand(0);
+ CInfo.RecordAlignment =
+ DL->getABITypeAlignment(CInfo.Base->getType()->getPointerElementType());
return true;
}
if (GV->getName().startswith("llvm.bpf.preserve.field.info")) {
@@ -509,40 +519,29 @@ uint64_t BPFAbstractMemberAccess::getConstant(const Value *IndexValue) {
}
/// Get the start and the end of storage offset for \p MemberTy.
-/// The storage bits are corresponding to the LLVM internal types,
-/// and the storage bits for the member determines what load width
-/// to use in order to extract the bitfield value.
-void BPFAbstractMemberAccess::GetStorageBitRange(DICompositeType *CTy,
- DIDerivedType *MemberTy,
- uint32_t AccessIndex,
+void BPFAbstractMemberAccess::GetStorageBitRange(DIDerivedType *MemberTy,
+ uint32_t RecordAlignment,
uint32_t &StartBitOffset,
uint32_t &EndBitOffset) {
- auto SOff = dyn_cast<ConstantInt>(MemberTy->getStorageOffsetInBits());
- assert(SOff);
- StartBitOffset = SOff->getZExtValue();
-
- EndBitOffset = CTy->getSizeInBits();
- uint32_t Index = AccessIndex + 1;
- for (; Index < CTy->getElements().size(); ++Index) {
- auto Member = cast<DIDerivedType>(CTy->getElements()[Index]);
- if (!Member->getStorageOffsetInBits()) {
- EndBitOffset = Member->getOffsetInBits();
- break;
- }
- SOff = dyn_cast<ConstantInt>(Member->getStorageOffsetInBits());
- assert(SOff);
- unsigned BitOffset = SOff->getZExtValue();
- if (BitOffset != StartBitOffset) {
- EndBitOffset = BitOffset;
- break;
- }
- }
+ uint32_t MemberBitSize = MemberTy->getSizeInBits();
+ uint32_t MemberBitOffset = MemberTy->getOffsetInBits();
+ uint32_t AlignBits = RecordAlignment * 8;
+ if (RecordAlignment > 8 || MemberBitSize > AlignBits)
+ report_fatal_error("Unsupported field expression for llvm.bpf.preserve.field.info, "
+ "requiring too big alignment");
+
+ StartBitOffset = MemberBitOffset & ~(AlignBits - 1);
+ if ((StartBitOffset + AlignBits) < (MemberBitOffset + MemberBitSize))
+ report_fatal_error("Unsupported field expression for llvm.bpf.preserve.field.info, "
+ "cross alignment boundary");
+ EndBitOffset = StartBitOffset + AlignBits;
}
uint32_t BPFAbstractMemberAccess::GetFieldInfo(uint32_t InfoKind,
DICompositeType *CTy,
uint32_t AccessIndex,
- uint32_t PatchImm) {
+ uint32_t PatchImm,
+ uint32_t RecordAlignment) {
if (InfoKind == BPFCoreSharedInfo::FIELD_EXISTENCE)
return 1;
@@ -557,9 +556,10 @@ uint32_t BPFAbstractMemberAccess::GetFieldInfo(uint32_t InfoKind,
if (!MemberTy->isBitField()) {
PatchImm += MemberTy->getOffsetInBits() >> 3;
} else {
- auto SOffset = dyn_cast<ConstantInt>(MemberTy->getStorageOffsetInBits());
- assert(SOffset);
- PatchImm += SOffset->getZExtValue() >> 3;
+ unsigned SBitOffset, NextSBitOffset;
+ GetStorageBitRange(MemberTy, RecordAlignment, SBitOffset,
+ NextSBitOffset);
+ PatchImm += SBitOffset >> 3;
}
}
return PatchImm;
@@ -576,7 +576,7 @@ uint32_t BPFAbstractMemberAccess::GetFieldInfo(uint32_t InfoKind,
return SizeInBits >> 3;
unsigned SBitOffset, NextSBitOffset;
- GetStorageBitRange(CTy, MemberTy, AccessIndex, SBitOffset, NextSBitOffset);
+ GetStorageBitRange(MemberTy, RecordAlignment, SBitOffset, NextSBitOffset);
SizeInBits = NextSBitOffset - SBitOffset;
if (SizeInBits & (SizeInBits - 1))
report_fatal_error("Unsupported field expression for llvm.bpf.preserve.field.info");
@@ -636,7 +636,7 @@ uint32_t BPFAbstractMemberAccess::GetFieldInfo(uint32_t InfoKind,
}
unsigned SBitOffset, NextSBitOffset;
- GetStorageBitRange(CTy, MemberTy, AccessIndex, SBitOffset, NextSBitOffset);
+ GetStorageBitRange(MemberTy, RecordAlignment, SBitOffset, NextSBitOffset);
if (NextSBitOffset - SBitOffset > 64)
report_fatal_error("too big field size for llvm.bpf.preserve.field.info");
@@ -667,7 +667,7 @@ uint32_t BPFAbstractMemberAccess::GetFieldInfo(uint32_t InfoKind,
}
unsigned SBitOffset, NextSBitOffset;
- GetStorageBitRange(CTy, MemberTy, AccessIndex, SBitOffset, NextSBitOffset);
+ GetStorageBitRange(MemberTy, RecordAlignment, SBitOffset, NextSBitOffset);
if (NextSBitOffset - SBitOffset > 64)
report_fatal_error("too big field size for llvm.bpf.preserve.field.info");
@@ -822,14 +822,20 @@ Value *BPFAbstractMemberAccess::computeBaseAndAccessKey(CallInst *Call,
AccessKey += ":" + std::to_string(AccessIndex);
MDNode *MDN = CInfo.Metadata;
+ uint32_t RecordAlignment = CInfo.RecordAlignment;
// At this stage, it cannot be pointer type.
auto *CTy = cast<DICompositeType>(stripQualifiers(cast<DIType>(MDN)));
- PatchImm = GetFieldInfo(InfoKind, CTy, AccessIndex, PatchImm);
+ PatchImm = GetFieldInfo(InfoKind, CTy, AccessIndex, PatchImm,
+ RecordAlignment);
}
- // Access key is the type name + reloc type + patched imm + access string,
+ // Access key is the
+ // "llvm." + type name + ":" + reloc type + ":" + patched imm + "$" +
+ // access string,
// uniquely identifying one relocation.
- AccessKey = TypeName + ":" + std::to_string(InfoKind) + ":" +
+ // The prefix "llvm." indicates this is a temporary global, which should
+ // not be emitted to ELF file.
+ AccessKey = "llvm." + TypeName + ":" + std::to_string(InfoKind) + ":" +
std::to_string(PatchImm) + "$" + AccessKey;
return Base;
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/BPFAsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/BPF/BPFAsmPrinter.cpp
index 218b0302927c..b81386f479d3 100644
--- a/contrib/llvm-project/llvm/lib/Target/BPF/BPFAsmPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/BPFAsmPrinter.cpp
@@ -148,7 +148,7 @@ void BPFAsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
// Force static initialization.
-extern "C" void LLVMInitializeBPFAsmPrinter() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeBPFAsmPrinter() {
RegisterAsmPrinter<BPFAsmPrinter> X(getTheBPFleTarget());
RegisterAsmPrinter<BPFAsmPrinter> Y(getTheBPFbeTarget());
RegisterAsmPrinter<BPFAsmPrinter> Z(getTheBPFTarget());
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp
index f2be0ff070d2..6f5f58554d09 100644
--- a/contrib/llvm-project/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp
@@ -24,6 +24,7 @@
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/IntrinsicsBPF.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/ErrorHandling.h"
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/BPFInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/BPF/BPFInstrInfo.cpp
index 6de3a4084d3d..626c204e99e6 100644
--- a/contrib/llvm-project/llvm/lib/Target/BPF/BPFInstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/BPFInstrInfo.cpp
@@ -30,8 +30,8 @@ BPFInstrInfo::BPFInstrInfo()
void BPFInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
- const DebugLoc &DL, unsigned DestReg,
- unsigned SrcReg, bool KillSrc) const {
+ const DebugLoc &DL, MCRegister DestReg,
+ MCRegister SrcReg, bool KillSrc) const {
if (BPF::GPRRegClass.contains(DestReg, SrcReg))
BuildMI(MBB, I, DL, get(BPF::MOV_rr), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc));
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/BPFInstrInfo.h b/contrib/llvm-project/llvm/lib/Target/BPF/BPFInstrInfo.h
index e4bd757da560..22413d530e17 100644
--- a/contrib/llvm-project/llvm/lib/Target/BPF/BPFInstrInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/BPFInstrInfo.h
@@ -30,7 +30,7 @@ public:
const BPFRegisterInfo &getRegisterInfo() const { return RI; }
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
- const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
+ const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg,
bool KillSrc) const override;
bool expandPostRAPseudo(MachineInstr &MI) const override;
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/BPFInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/BPF/BPFInstrInfo.td
index ae5a82a99303..0f39294daa2b 100644
--- a/contrib/llvm-project/llvm/lib/Target/BPF/BPFInstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/BPFInstrInfo.td
@@ -437,6 +437,25 @@ class LOAD<BPFWidthModifer SizeOp, string OpcodeStr, list<dag> Pattern>
class LOADi64<BPFWidthModifer SizeOp, string OpcodeStr, PatFrag OpNode>
: LOAD<SizeOp, OpcodeStr, [(set i64:$dst, (OpNode ADDRri:$addr))]>;
+let isCodeGenOnly = 1 in {
+ def CORE_MEM : TYPE_LD_ST<BPF_MEM.Value, BPF_W.Value,
+ (outs GPR:$dst),
+ (ins u64imm:$opcode, GPR:$src, u64imm:$offset),
+ "$dst = core_mem($opcode, $src, $offset)",
+ []>;
+ def CORE_ALU32_MEM : TYPE_LD_ST<BPF_MEM.Value, BPF_W.Value,
+ (outs GPR32:$dst),
+ (ins u64imm:$opcode, GPR:$src, u64imm:$offset),
+ "$dst = core_alu32_mem($opcode, $src, $offset)",
+ []>;
+ let Constraints = "$dst = $src" in {
+ def CORE_SHIFT : ALU_RR<BPF_ALU64, BPF_LSH,
+ (outs GPR:$dst),
+ (ins u64imm:$opcode, GPR:$src, u64imm:$offset),
+ "$dst = core_shift($opcode, $src, $offset)",
+ []>;
+ }
+}
let Predicates = [BPFNoALU32] in {
def LDW : LOADi64<BPF_W, "u32", zextloadi32>;
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/BPFMIPeephole.cpp b/contrib/llvm-project/llvm/lib/Target/BPF/BPFMIPeephole.cpp
index e9eecc55c3c3..022267fbe3c2 100644
--- a/contrib/llvm-project/llvm/lib/Target/BPF/BPFMIPeephole.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/BPFMIPeephole.cpp
@@ -51,9 +51,14 @@ private:
// Initialize class variables.
void initialize(MachineFunction &MFParm);
+ bool isCopyFrom32Def(MachineInstr *CopyMI);
+ bool isInsnFrom32Def(MachineInstr *DefInsn);
+ bool isPhiFrom32Def(MachineInstr *MovMI);
bool isMovFrom32Def(MachineInstr *MovMI);
bool eliminateZExtSeq(void);
+ std::set<MachineInstr *> PhiInsns;
+
public:
// Main entry point for this pass.
@@ -75,42 +80,86 @@ void BPFMIPeephole::initialize(MachineFunction &MFParm) {
LLVM_DEBUG(dbgs() << "*** BPF MachineSSA ZEXT Elim peephole pass ***\n\n");
}
-bool BPFMIPeephole::isMovFrom32Def(MachineInstr *MovMI)
+bool BPFMIPeephole::isCopyFrom32Def(MachineInstr *CopyMI)
{
- MachineInstr *DefInsn = MRI->getVRegDef(MovMI->getOperand(1).getReg());
+ MachineOperand &opnd = CopyMI->getOperand(1);
- LLVM_DEBUG(dbgs() << " Def of Mov Src:");
- LLVM_DEBUG(DefInsn->dump());
+ if (!opnd.isReg())
+ return false;
- if (!DefInsn)
+ // Return false if getting value from a 32bit physical register.
+ // Most likely, this physical register is aliased to
+ // function call return value or current function parameters.
+ Register Reg = opnd.getReg();
+ if (!Register::isVirtualRegister(Reg))
return false;
- if (DefInsn->isPHI()) {
- for (unsigned i = 1, e = DefInsn->getNumOperands(); i < e; i += 2) {
- MachineOperand &opnd = DefInsn->getOperand(i);
+ if (MRI->getRegClass(Reg) == &BPF::GPRRegClass)
+ return false;
- if (!opnd.isReg())
- return false;
+ MachineInstr *DefInsn = MRI->getVRegDef(Reg);
+ if (!isInsnFrom32Def(DefInsn))
+ return false;
- MachineInstr *PhiDef = MRI->getVRegDef(opnd.getReg());
- // quick check on PHI incoming definitions.
- if (!PhiDef || PhiDef->isPHI() || PhiDef->getOpcode() == BPF::COPY)
+ return true;
+}
+
+bool BPFMIPeephole::isPhiFrom32Def(MachineInstr *PhiMI)
+{
+ for (unsigned i = 1, e = PhiMI->getNumOperands(); i < e; i += 2) {
+ MachineOperand &opnd = PhiMI->getOperand(i);
+
+ if (!opnd.isReg())
+ return false;
+
+ MachineInstr *PhiDef = MRI->getVRegDef(opnd.getReg());
+ if (!PhiDef)
+ return false;
+ if (PhiDef->isPHI()) {
+ if (PhiInsns.find(PhiDef) != PhiInsns.end())
+ return false;
+ PhiInsns.insert(PhiDef);
+ if (!isPhiFrom32Def(PhiDef))
return false;
}
+ if (PhiDef->getOpcode() == BPF::COPY && !isCopyFrom32Def(PhiDef))
+ return false;
}
- if (DefInsn->getOpcode() == BPF::COPY) {
- MachineOperand &opnd = DefInsn->getOperand(1);
+ return true;
+}
- if (!opnd.isReg())
- return false;
+// The \p DefInsn instruction defines a virtual register.
+bool BPFMIPeephole::isInsnFrom32Def(MachineInstr *DefInsn)
+{
+ if (!DefInsn)
+ return false;
- Register Reg = opnd.getReg();
- if ((Register::isVirtualRegister(Reg) &&
- MRI->getRegClass(Reg) == &BPF::GPRRegClass))
+ if (DefInsn->isPHI()) {
+ if (PhiInsns.find(DefInsn) != PhiInsns.end())
+ return false;
+ PhiInsns.insert(DefInsn);
+ if (!isPhiFrom32Def(DefInsn))
+ return false;
+ } else if (DefInsn->getOpcode() == BPF::COPY) {
+ if (!isCopyFrom32Def(DefInsn))
return false;
}
+ return true;
+}
+
+bool BPFMIPeephole::isMovFrom32Def(MachineInstr *MovMI)
+{
+ MachineInstr *DefInsn = MRI->getVRegDef(MovMI->getOperand(1).getReg());
+
+ LLVM_DEBUG(dbgs() << " Def of Mov Src:");
+ LLVM_DEBUG(DefInsn->dump());
+
+ PhiInsns.clear();
+ if (!isInsnFrom32Def(DefInsn))
+ return false;
+
LLVM_DEBUG(dbgs() << " One ZExt elim sequence identified.\n");
return true;
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp b/contrib/llvm-project/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp
index 9c689aed6417..5310f0f07b65 100644
--- a/contrib/llvm-project/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp
@@ -53,6 +53,19 @@ private:
void initialize(MachineFunction &MFParm);
bool removeLD(void);
+ void processCandidate(MachineRegisterInfo *MRI, MachineBasicBlock &MBB,
+ MachineInstr &MI, Register &SrcReg, Register &DstReg,
+ const GlobalValue *GVal);
+ void processDstReg(MachineRegisterInfo *MRI, Register &DstReg,
+ Register &SrcReg, const GlobalValue *GVal,
+ bool doSrcRegProp);
+ void processInst(MachineRegisterInfo *MRI, MachineInstr *Inst,
+ MachineOperand *RelocOp, const GlobalValue *GVal);
+ void checkADDrr(MachineRegisterInfo *MRI, MachineOperand *RelocOp,
+ const GlobalValue *GVal);
+ void checkShift(MachineRegisterInfo *MRI, MachineBasicBlock &MBB,
+ MachineOperand *RelocOp, const GlobalValue *GVal,
+ unsigned Opcode);
public:
// Main entry point for this pass.
@@ -71,6 +84,146 @@ void BPFMISimplifyPatchable::initialize(MachineFunction &MFParm) {
LLVM_DEBUG(dbgs() << "*** BPF simplify patchable insts pass ***\n\n");
}
+void BPFMISimplifyPatchable::checkADDrr(MachineRegisterInfo *MRI,
+ MachineOperand *RelocOp, const GlobalValue *GVal) {
+ const MachineInstr *Inst = RelocOp->getParent();
+ const MachineOperand *Op1 = &Inst->getOperand(1);
+ const MachineOperand *Op2 = &Inst->getOperand(2);
+ const MachineOperand *BaseOp = (RelocOp == Op1) ? Op2 : Op1;
+
+ // Go through all uses of %1 as in %1 = ADD_rr %2, %3
+ const MachineOperand Op0 = Inst->getOperand(0);
+ auto Begin = MRI->use_begin(Op0.getReg()), End = MRI->use_end();
+ decltype(End) NextI;
+ for (auto I = Begin; I != End; I = NextI) {
+ NextI = std::next(I);
+ // The candidate needs to have a unique definition.
+ if (!MRI->getUniqueVRegDef(I->getReg()))
+ continue;
+
+ MachineInstr *DefInst = I->getParent();
+ unsigned Opcode = DefInst->getOpcode();
+ unsigned COREOp;
+ if (Opcode == BPF::LDB || Opcode == BPF::LDH || Opcode == BPF::LDW ||
+ Opcode == BPF::LDD || Opcode == BPF::STB || Opcode == BPF::STH ||
+ Opcode == BPF::STW || Opcode == BPF::STD)
+ COREOp = BPF::CORE_MEM;
+ else if (Opcode == BPF::LDB32 || Opcode == BPF::LDH32 ||
+ Opcode == BPF::LDW32 || Opcode == BPF::STB32 ||
+ Opcode == BPF::STH32 || Opcode == BPF::STW32)
+ COREOp = BPF::CORE_ALU32_MEM;
+ else
+ continue;
+
+ // It must be a form of %1 = *(type *)(%2 + 0) or *(type *)(%2 + 0) = %1.
+ const MachineOperand &ImmOp = DefInst->getOperand(2);
+ if (!ImmOp.isImm() || ImmOp.getImm() != 0)
+ continue;
+
+ BuildMI(*DefInst->getParent(), *DefInst, DefInst->getDebugLoc(), TII->get(COREOp))
+ .add(DefInst->getOperand(0)).addImm(Opcode).add(*BaseOp)
+ .addGlobalAddress(GVal);
+ DefInst->eraseFromParent();
+ }
+}
+
+void BPFMISimplifyPatchable::checkShift(MachineRegisterInfo *MRI,
+ MachineBasicBlock &MBB, MachineOperand *RelocOp, const GlobalValue *GVal,
+ unsigned Opcode) {
+ // Relocation operand should be the operand #2.
+ MachineInstr *Inst = RelocOp->getParent();
+ if (RelocOp != &Inst->getOperand(2))
+ return;
+
+ BuildMI(MBB, *Inst, Inst->getDebugLoc(), TII->get(BPF::CORE_SHIFT))
+ .add(Inst->getOperand(0)).addImm(Opcode)
+ .add(Inst->getOperand(1)).addGlobalAddress(GVal);
+ Inst->eraseFromParent();
+}
+
+void BPFMISimplifyPatchable::processCandidate(MachineRegisterInfo *MRI,
+ MachineBasicBlock &MBB, MachineInstr &MI, Register &SrcReg,
+ Register &DstReg, const GlobalValue *GVal) {
+ if (MRI->getRegClass(DstReg) == &BPF::GPR32RegClass) {
+ // We can optimize such a pattern:
+ // %1:gpr = LD_imm64 @"llvm.s:0:4$0:2"
+ // %2:gpr32 = LDW32 %1:gpr, 0
+ // %3:gpr = SUBREG_TO_REG 0, %2:gpr32, %subreg.sub_32
+ // %4:gpr = ADD_rr %0:gpr, %3:gpr
+ // or similar patterns below for non-alu32 case.
+ auto Begin = MRI->use_begin(DstReg), End = MRI->use_end();
+ decltype(End) NextI;
+ for (auto I = Begin; I != End; I = NextI) {
+ NextI = std::next(I);
+ if (!MRI->getUniqueVRegDef(I->getReg()))
+ continue;
+
+ unsigned Opcode = I->getParent()->getOpcode();
+ if (Opcode == BPF::SUBREG_TO_REG) {
+ Register TmpReg = I->getParent()->getOperand(0).getReg();
+ processDstReg(MRI, TmpReg, DstReg, GVal, false);
+ }
+ }
+
+ BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(BPF::COPY), DstReg)
+ .addReg(SrcReg, 0, BPF::sub_32);
+ return;
+ }
+
+ // All uses of DstReg replaced by SrcReg
+ processDstReg(MRI, DstReg, SrcReg, GVal, true);
+}
+
+void BPFMISimplifyPatchable::processDstReg(MachineRegisterInfo *MRI,
+ Register &DstReg, Register &SrcReg, const GlobalValue *GVal,
+ bool doSrcRegProp) {
+ auto Begin = MRI->use_begin(DstReg), End = MRI->use_end();
+ decltype(End) NextI;
+ for (auto I = Begin; I != End; I = NextI) {
+ NextI = std::next(I);
+ if (doSrcRegProp)
+ I->setReg(SrcReg);
+
+ // The candidate needs to have a unique definition.
+ if (MRI->getUniqueVRegDef(I->getReg()))
+ processInst(MRI, I->getParent(), &*I, GVal);
+ }
+}
+
+// Check to see whether we could do some optimization
+// to attach relocation to downstream dependent instructions.
+// Two kinds of patterns are recognized below:
+// Pattern 1:
+// %1 = LD_imm64 @"llvm.b:0:4$0:1" <== patch_imm = 4
+// %2 = LDD %1, 0 <== this insn will be removed
+// %3 = ADD_rr %0, %2
+// %4 = LDW[32] %3, 0 OR STW[32] %4, %3, 0
+// The `%4 = ...` will be transformed to
+// CORE_[ALU32_]MEM(%4, mem_opcode, %0, @"llvm.b:0:4$0:1")
+// and later on, BTF emit phase will translate to
+// %4 = LDW[32] %0, 4 STW[32] %4, %0, 4
+// and attach a relocation to it.
+// Pattern 2:
+// %15 = LD_imm64 @"llvm.t:5:63$0:2" <== relocation type 5
+// %16 = LDD %15, 0 <== this insn will be removed
+// %17 = SRA_rr %14, %16
+// The `%17 = ...` will be transformed to
+// %17 = CORE_SHIFT(SRA_ri, %14, @"llvm.t:5:63$0:2")
+// and later on, BTF emit phase will translate to
+// %r4 = SRA_ri %r4, 63
+void BPFMISimplifyPatchable::processInst(MachineRegisterInfo *MRI,
+ MachineInstr *Inst, MachineOperand *RelocOp, const GlobalValue *GVal) {
+ unsigned Opcode = Inst->getOpcode();
+ if (Opcode == BPF::ADD_rr)
+ checkADDrr(MRI, RelocOp, GVal);
+ else if (Opcode == BPF::SLL_rr)
+ checkShift(MRI, *Inst->getParent(), RelocOp, GVal, BPF::SLL_ri);
+ else if (Opcode == BPF::SRA_rr)
+ checkShift(MRI, *Inst->getParent(), RelocOp, GVal, BPF::SRA_ri);
+ else if (Opcode == BPF::SRL_rr)
+ checkShift(MRI, *Inst->getParent(), RelocOp, GVal, BPF::SRL_ri);
+}
+
/// Remove unneeded Load instructions.
bool BPFMISimplifyPatchable::removeLD() {
MachineRegisterInfo *MRI = &MF->getRegInfo();
@@ -105,10 +258,11 @@ bool BPFMISimplifyPatchable::removeLD() {
continue;
bool IsCandidate = false;
+ const GlobalValue *GVal = nullptr;
if (DefInst->getOpcode() == BPF::LD_imm64) {
const MachineOperand &MO = DefInst->getOperand(1);
if (MO.isGlobal()) {
- const GlobalValue *GVal = MO.getGlobal();
+ GVal = MO.getGlobal();
auto *GVar = dyn_cast<GlobalVariable>(GVal);
if (GVar) {
// Global variables representing structure offset or
@@ -124,12 +278,7 @@ bool BPFMISimplifyPatchable::removeLD() {
if (!IsCandidate)
continue;
- auto Begin = MRI->use_begin(DstReg), End = MRI->use_end();
- decltype(End) NextI;
- for (auto I = Begin; I != End; I = NextI) {
- NextI = std::next(I);
- I->setReg(SrcReg);
- }
+ processCandidate(MRI, MBB, MI, SrcReg, DstReg, GVal);
ToErase = &MI;
Changed = true;
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/BPFSubtarget.cpp b/contrib/llvm-project/llvm/lib/Target/BPF/BPFSubtarget.cpp
index ab3452501b95..f3cb03b1f1f5 100644
--- a/contrib/llvm-project/llvm/lib/Target/BPF/BPFSubtarget.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/BPFSubtarget.cpp
@@ -52,6 +52,7 @@ void BPFSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
if (CPU == "v3") {
HasJmpExt = true;
HasJmp32 = true;
+ HasAlu32 = true;
return;
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/BPFTargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/BPF/BPFTargetMachine.cpp
index 0c4f2c74e7a4..40375bc88bff 100644
--- a/contrib/llvm-project/llvm/lib/Target/BPF/BPFTargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/BPFTargetMachine.cpp
@@ -27,7 +27,7 @@ static cl::
opt<bool> DisableMIPeephole("disable-bpf-peephole", cl::Hidden,
cl::desc("Disable machine peepholes for BPF"));
-extern "C" void LLVMInitializeBPFTarget() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeBPFTarget() {
// Register the target.
RegisterTargetMachine<BPFTargetMachine> X(getTheBPFleTarget());
RegisterTargetMachine<BPFTargetMachine> Y(getTheBPFbeTarget());
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/BTF.h b/contrib/llvm-project/llvm/lib/Target/BPF/BTF.h
index a13c862bf840..ad3dcc14c38a 100644
--- a/contrib/llvm-project/llvm/lib/Target/BPF/BTF.h
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/BTF.h
@@ -176,12 +176,18 @@ struct BTFParam {
uint32_t Type;
};
+/// BTF_KIND_FUNC can be global, static or extern.
+enum : uint8_t {
+ FUNC_STATIC = 0,
+ FUNC_GLOBAL = 1,
+ FUNC_EXTERN = 2,
+};
+
/// Variable scoping information.
enum : uint8_t {
VAR_STATIC = 0, ///< Linkage: InternalLinkage
VAR_GLOBAL_ALLOCATED = 1, ///< Linkage: ExternalLinkage
- VAR_GLOBAL_TENTATIVE = 2, ///< Linkage: CommonLinkage
- VAR_GLOBAL_EXTERNAL = 3, ///< Linkage: ExternalLinkage
+ VAR_GLOBAL_EXTERNAL = 2, ///< Linkage: ExternalLinkage
};
/// BTF_KIND_DATASEC are followed by multiple "struct BTFDataSecVar".
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/BTFDebug.cpp b/contrib/llvm-project/llvm/lib/Target/BPF/BTFDebug.cpp
index db551e739bd7..a9fb04f20d1c 100644
--- a/contrib/llvm-project/llvm/lib/Target/BPF/BTFDebug.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/BTFDebug.cpp
@@ -308,10 +308,11 @@ void BTFTypeFuncProto::emitType(MCStreamer &OS) {
}
}
-BTFTypeFunc::BTFTypeFunc(StringRef FuncName, uint32_t ProtoTypeId)
+BTFTypeFunc::BTFTypeFunc(StringRef FuncName, uint32_t ProtoTypeId,
+ uint32_t Scope)
: Name(FuncName) {
Kind = BTF::BTF_KIND_FUNC;
- BTFType.Info = Kind << 24;
+ BTFType.Info = (Kind << 24) | Scope;
BTFType.Type = ProtoTypeId;
}
@@ -897,8 +898,9 @@ void BTFDebug::beginFunctionImpl(const MachineFunction *MF) {
visitSubroutineType(SP->getType(), true, FuncArgNames, ProtoTypeId);
// Construct subprogram func type
+ uint8_t Scope = SP->isLocalToUnit() ? BTF::FUNC_STATIC : BTF::FUNC_GLOBAL;
auto FuncTypeEntry =
- std::make_unique<BTFTypeFunc>(SP->getName(), ProtoTypeId);
+ std::make_unique<BTFTypeFunc>(SP->getName(), ProtoTypeId, Scope);
uint32_t FuncTypeId = addType(std::move(FuncTypeEntry));
for (const auto &TypeEntry : TypeEntries)
@@ -937,9 +939,8 @@ unsigned BTFDebug::populateStructType(const DIType *Ty) {
}
/// Generate a struct member field relocation.
-void BTFDebug::generateFieldReloc(const MachineInstr *MI,
- const MCSymbol *ORSym, DIType *RootTy,
- StringRef AccessPattern) {
+void BTFDebug::generateFieldReloc(const MCSymbol *ORSym, DIType *RootTy,
+ StringRef AccessPattern) {
unsigned RootId = populateStructType(RootTy);
size_t FirstDollar = AccessPattern.find_first_of('$');
size_t FirstColon = AccessPattern.find_first_of(':');
@@ -959,33 +960,8 @@ void BTFDebug::generateFieldReloc(const MachineInstr *MI,
FieldRelocTable[SecNameOff].push_back(FieldReloc);
}
-void BTFDebug::processLDimm64(const MachineInstr *MI) {
- // If the insn is an LD_imm64, the following two cases
- // will generate an .BTF.ext record.
- //
- // If the insn is "r2 = LD_imm64 @__BTF_...",
- // add this insn into the .BTF.ext FieldReloc subsection.
- // Relocation looks like:
- // . SecName:
- // . InstOffset
- // . TypeID
- // . OffSetNameOff
- // Later, the insn is replaced with "r2 = <offset>"
- // where "<offset>" equals to the offset based on current
- // type definitions.
- //
- // If the insn is "r2 = LD_imm64 @VAR" and VAR is
- // a patchable external global, add this insn into the .BTF.ext
- // ExternReloc subsection.
- // Relocation looks like:
- // . SecName:
- // . InstOffset
- // . ExternNameOff
- // Later, the insn is replaced with "r2 = <value>" or
- // "LD_imm64 r2, <value>" where "<value>" = 0.
-
+void BTFDebug::processReloc(const MachineOperand &MO) {
// check whether this is a candidate or not
- const MachineOperand &MO = MI->getOperand(1);
if (MO.isGlobal()) {
const GlobalValue *GVal = MO.getGlobal();
auto *GVar = dyn_cast<GlobalVariable>(GVal);
@@ -995,7 +971,7 @@ void BTFDebug::processLDimm64(const MachineInstr *MI) {
MDNode *MDN = GVar->getMetadata(LLVMContext::MD_preserve_access_index);
DIType *Ty = dyn_cast<DIType>(MDN);
- generateFieldReloc(MI, ORSym, Ty, GVar->getName());
+ generateFieldReloc(ORSym, Ty, GVar->getName());
}
}
}
@@ -1020,8 +996,31 @@ void BTFDebug::beginInstruction(const MachineInstr *MI) {
return;
}
- if (MI->getOpcode() == BPF::LD_imm64)
- processLDimm64(MI);
+ if (MI->getOpcode() == BPF::LD_imm64) {
+ // If the insn is "r2 = LD_imm64 @<an AmaAttr global>",
+ // add this insn into the .BTF.ext FieldReloc subsection.
+ // Relocation looks like:
+ // . SecName:
+ // . InstOffset
+ // . TypeID
+ // . OffSetNameOff
+ // . RelocType
+ // Later, the insn is replaced with "r2 = <offset>"
+ // where "<offset>" equals to the offset based on current
+ // type definitions.
+ processReloc(MI->getOperand(1));
+ } else if (MI->getOpcode() == BPF::CORE_MEM ||
+ MI->getOpcode() == BPF::CORE_ALU32_MEM ||
+ MI->getOpcode() == BPF::CORE_SHIFT) {
+ // relocation insn is a load, store or shift insn.
+ processReloc(MI->getOperand(3));
+ } else if (MI->getOpcode() == BPF::JAL) {
+ // check extern function references
+ const MachineOperand &MO = MI->getOperand(0);
+ if (MO.isGlobal()) {
+ processFuncPrototypes(dyn_cast<Function>(MO.getGlobal()));
+ }
+ }
// Skip this instruction if no DebugLoc or the DebugLoc
// is the same as the previous instruction.
@@ -1055,20 +1054,20 @@ void BTFDebug::processGlobals(bool ProcessingMapDef) {
// Collect all types referenced by globals.
const Module *M = MMI->getModule();
for (const GlobalVariable &Global : M->globals()) {
- // Ignore external globals for now.
- if (!Global.hasInitializer() && Global.hasExternalLinkage())
- continue;
-
// Decide the section name.
StringRef SecName;
if (Global.hasSection()) {
SecName = Global.getSection();
- } else {
+ } else if (Global.hasInitializer()) {
// data, bss, or readonly sections
if (Global.isConstant())
SecName = ".rodata";
else
SecName = Global.getInitializer()->isZeroValue() ? ".bss" : ".data";
+ } else {
+ // extern variables without explicit section,
+ // put them into ".extern" section.
+ SecName = ".extern";
}
if (ProcessingMapDef != SecName.startswith(".maps"))
@@ -1076,6 +1075,11 @@ void BTFDebug::processGlobals(bool ProcessingMapDef) {
SmallVector<DIGlobalVariableExpression *, 1> GVs;
Global.getDebugInfo(GVs);
+
+ // No type information, mostly internal, skip it.
+ if (GVs.size() == 0)
+ continue;
+
uint32_t GVTypeId = 0;
for (auto *GVE : GVs) {
if (SecName.startswith(".maps"))
@@ -1087,25 +1091,33 @@ void BTFDebug::processGlobals(bool ProcessingMapDef) {
// Only support the following globals:
// . static variables
- // . non-static global variables with section attributes
- // Essentially means:
- // . .bcc/.data/.rodata DataSec entities only contain static data
- // . Other DataSec entities contain static or initialized global data.
- // Initialized global data are mostly used for finding map key/value type
- // id's. Whether DataSec is readonly or not can be found from
- // corresponding ELF section flags.
+ // . non-static weak or non-weak global variables
+ // . weak or non-weak extern global variables
+ // Whether DataSec is readonly or not can be found from corresponding ELF
+ // section flags. Whether a BTF_KIND_VAR is a weak symbol or not
+ // can be found from the corresponding ELF symbol table.
auto Linkage = Global.getLinkage();
if (Linkage != GlobalValue::InternalLinkage &&
- (Linkage != GlobalValue::ExternalLinkage || !Global.hasSection()))
+ Linkage != GlobalValue::ExternalLinkage &&
+ Linkage != GlobalValue::WeakAnyLinkage &&
+ Linkage != GlobalValue::ExternalWeakLinkage)
continue;
- uint32_t GVarInfo = Linkage == GlobalValue::ExternalLinkage
- ? BTF::VAR_GLOBAL_ALLOCATED
- : BTF::VAR_STATIC;
+ uint32_t GVarInfo;
+ if (Linkage == GlobalValue::InternalLinkage) {
+ GVarInfo = BTF::VAR_STATIC;
+ } else if (Global.hasInitializer()) {
+ GVarInfo = BTF::VAR_GLOBAL_ALLOCATED;
+ } else {
+ GVarInfo = BTF::VAR_GLOBAL_EXTERNAL;
+ }
+
auto VarEntry =
std::make_unique<BTFKindVar>(Global.getName(), GVTypeId, GVarInfo);
uint32_t VarId = addType(std::move(VarEntry));
+ assert(!SecName.empty());
+
// Find or create a DataSec
if (DataSecEntries.find(SecName) == DataSecEntries.end()) {
DataSecEntries[SecName] = std::make_unique<BTFKindDataSec>(Asm, SecName);
@@ -1135,10 +1147,52 @@ bool BTFDebug::InstLower(const MachineInstr *MI, MCInst &OutMI) {
return true;
}
}
+ } else if (MI->getOpcode() == BPF::CORE_MEM ||
+ MI->getOpcode() == BPF::CORE_ALU32_MEM ||
+ MI->getOpcode() == BPF::CORE_SHIFT) {
+ const MachineOperand &MO = MI->getOperand(3);
+ if (MO.isGlobal()) {
+ const GlobalValue *GVal = MO.getGlobal();
+ auto *GVar = dyn_cast<GlobalVariable>(GVal);
+ if (GVar && GVar->hasAttribute(BPFCoreSharedInfo::AmaAttr)) {
+ uint32_t Imm = PatchImms[GVar->getName().str()];
+ OutMI.setOpcode(MI->getOperand(1).getImm());
+ if (MI->getOperand(0).isImm())
+ OutMI.addOperand(MCOperand::createImm(MI->getOperand(0).getImm()));
+ else
+ OutMI.addOperand(MCOperand::createReg(MI->getOperand(0).getReg()));
+ OutMI.addOperand(MCOperand::createReg(MI->getOperand(2).getReg()));
+ OutMI.addOperand(MCOperand::createImm(Imm));
+ return true;
+ }
+ }
}
return false;
}
+void BTFDebug::processFuncPrototypes(const Function *F) {
+ if (!F)
+ return;
+
+ const DISubprogram *SP = F->getSubprogram();
+ if (!SP || SP->isDefinition())
+ return;
+
+ // Do not emit again if already emitted.
+ if (ProtoFunctions.find(F) != ProtoFunctions.end())
+ return;
+ ProtoFunctions.insert(F);
+
+ uint32_t ProtoTypeId;
+ const std::unordered_map<uint32_t, StringRef> FuncArgNames;
+ visitSubroutineType(SP->getType(), false, FuncArgNames, ProtoTypeId);
+
+ uint8_t Scope = BTF::FUNC_EXTERN;
+ auto FuncTypeEntry =
+ std::make_unique<BTFTypeFunc>(SP->getName(), ProtoTypeId, Scope);
+ addType(std::move(FuncTypeEntry));
+}
+
void BTFDebug::endModule() {
// Collect MapDef globals if not collected yet.
if (MapDefNotCollected) {
@@ -1148,6 +1202,7 @@ void BTFDebug::endModule() {
// Collect global types/variables except MapDef globals.
processGlobals(false);
+
for (auto &DataSec : DataSecEntries)
addType(std::move(DataSec.second));
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/BTFDebug.h b/contrib/llvm-project/llvm/lib/Target/BPF/BTFDebug.h
index c01e0d1d1612..0812c4f7915d 100644
--- a/contrib/llvm-project/llvm/lib/Target/BPF/BTFDebug.h
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/BTFDebug.h
@@ -16,6 +16,7 @@
#include "llvm/ADT/StringMap.h"
#include "llvm/CodeGen/DebugHandlerBase.h"
+#include <set>
#include <unordered_map>
#include "BTF.h"
@@ -151,7 +152,7 @@ class BTFTypeFunc : public BTFTypeBase {
StringRef Name;
public:
- BTFTypeFunc(StringRef FuncName, uint32_t ProtoTypeId);
+ BTFTypeFunc(StringRef FuncName, uint32_t ProtoTypeId, uint32_t Scope);
uint32_t getSize() { return BTFTypeBase::getSize(); }
void completeType(BTFDebug &BDebug);
void emitType(MCStreamer &OS);
@@ -223,7 +224,7 @@ struct BTFLineInfo {
uint32_t ColumnNum; ///< the column number
};
-/// Represent one offset relocation.
+/// Represent one field relocation.
struct BTFFieldReloc {
const MCSymbol *Label; ///< MCSymbol identifying insn for the reloc
uint32_t TypeID; ///< Type ID
@@ -251,6 +252,7 @@ class BTFDebug : public DebugHandlerBase {
std::map<std::string, uint32_t> PatchImms;
std::map<StringRef, std::pair<bool, std::vector<BTFTypeDerived *>>>
FixupDerivedTypes;
+ std::set<const Function *>ProtoFunctions;
/// Add types to TypeEntries.
/// @{
@@ -293,15 +295,18 @@ class BTFDebug : public DebugHandlerBase {
/// Generate types and variables for globals.
void processGlobals(bool ProcessingMapDef);
- /// Generate one offset relocation record.
- void generateFieldReloc(const MachineInstr *MI, const MCSymbol *ORSym,
- DIType *RootTy, StringRef AccessPattern);
+ /// Generate types for function prototypes.
+ void processFuncPrototypes(const Function *);
+
+ /// Generate one field relocation record.
+ void generateFieldReloc(const MCSymbol *ORSym, DIType *RootTy,
+ StringRef AccessPattern);
/// Populating unprocessed struct type.
unsigned populateStructType(const DIType *Ty);
- /// Process LD_imm64 instructions.
- void processLDimm64(const MachineInstr *MI);
+ /// Process relocation instructions.
+ void processReloc(const MachineOperand &MO);
/// Emit common header of .BTF and .BTF.ext sections.
void emitCommonHeader();
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp b/contrib/llvm-project/llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp
index c845524ad657..75f963b5448a 100644
--- a/contrib/llvm-project/llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp
@@ -67,7 +67,6 @@ public:
DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
ArrayRef<uint8_t> Bytes, uint64_t Address,
- raw_ostream &VStream,
raw_ostream &CStream) const override;
uint8_t getInstClass(uint64_t Inst) const { return (Inst >> 56) & 0x7; };
@@ -84,7 +83,7 @@ static MCDisassembler *createBPFDisassembler(const Target &T,
}
-extern "C" void LLVMInitializeBPFDisassembler() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeBPFDisassembler() {
// Register the disassembler.
TargetRegistry::RegisterMCDisassembler(getTheBPFTarget(),
createBPFDisassembler);
@@ -162,7 +161,6 @@ static DecodeStatus readInstruction64(ArrayRef<uint8_t> Bytes, uint64_t Address,
DecodeStatus BPFDisassembler::getInstruction(MCInst &Instr, uint64_t &Size,
ArrayRef<uint8_t> Bytes,
uint64_t Address,
- raw_ostream &VStream,
raw_ostream &CStream) const {
bool IsLittleEndian = getContext().getAsmInfo()->isLittleEndian();
uint64_t Insn, Hi;
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/MCTargetDesc/BPFInstPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/BPF/MCTargetDesc/BPFInstPrinter.cpp
index 079202994c8d..e0aeec989879 100644
--- a/contrib/llvm-project/llvm/lib/Target/BPF/MCTargetDesc/BPFInstPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/MCTargetDesc/BPFInstPrinter.cpp
@@ -24,9 +24,10 @@ using namespace llvm;
// Include the auto-generated portion of the assembly writer.
#include "BPFGenAsmWriter.inc"
-void BPFInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
- StringRef Annot, const MCSubtargetInfo &STI) {
- printInstruction(MI, O);
+void BPFInstPrinter::printInst(const MCInst *MI, uint64_t Address,
+ StringRef Annot, const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ printInstruction(MI, Address, O);
printAnnotation(O, Annot);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/MCTargetDesc/BPFInstPrinter.h b/contrib/llvm-project/llvm/lib/Target/BPF/MCTargetDesc/BPFInstPrinter.h
index 8c9a0bc94cff..2181bb575cdd 100644
--- a/contrib/llvm-project/llvm/lib/Target/BPF/MCTargetDesc/BPFInstPrinter.h
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/MCTargetDesc/BPFInstPrinter.h
@@ -22,8 +22,8 @@ public:
const MCRegisterInfo &MRI)
: MCInstPrinter(MAI, MII, MRI) {}
- void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
- const MCSubtargetInfo &STI) override;
+ void printInst(const MCInst *MI, uint64_t Address, StringRef Annot,
+ const MCSubtargetInfo &STI, raw_ostream &O) override;
void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O,
const char *Modifier = nullptr);
void printMemOperand(const MCInst *MI, int OpNo, raw_ostream &O,
@@ -32,7 +32,7 @@ public:
void printBrTargetOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
// Autogenerated by tblgen.
- void printInstruction(const MCInst *MI, raw_ostream &O);
+ void printInstruction(const MCInst *MI, uint64_t Address, raw_ostream &O);
static const char *getRegisterName(unsigned RegNo);
};
}
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h b/contrib/llvm-project/llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h
index 04a6a87cebc9..97f0cbd58608 100644
--- a/contrib/llvm-project/llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h
@@ -21,7 +21,7 @@ class Target;
class BPFMCAsmInfo : public MCAsmInfo {
public:
- explicit BPFMCAsmInfo(const Triple &TT) {
+ explicit BPFMCAsmInfo(const Triple &TT, const MCTargetOptions &Options) {
if (TT.getArch() == Triple::bpfeb)
IsLittleEndian = false;
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp b/contrib/llvm-project/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp
index fa27b335f3a1..58da0830d002 100644
--- a/contrib/llvm-project/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp
@@ -97,7 +97,7 @@ static MCInstrAnalysis *createBPFInstrAnalysis(const MCInstrInfo *Info) {
return new BPFMCInstrAnalysis(Info);
}
-extern "C" void LLVMInitializeBPFTargetMC() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeBPFTargetMC() {
for (Target *T :
{&getTheBPFleTarget(), &getTheBPFbeTarget(), &getTheBPFTarget()}) {
// Register the MC asm info.
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/TargetInfo/BPFTargetInfo.cpp b/contrib/llvm-project/llvm/lib/Target/BPF/TargetInfo/BPFTargetInfo.cpp
index 5dfa915034ba..49eb9ad62c56 100644
--- a/contrib/llvm-project/llvm/lib/Target/BPF/TargetInfo/BPFTargetInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/TargetInfo/BPFTargetInfo.cpp
@@ -24,7 +24,7 @@ Target &llvm::getTheBPFTarget() {
return TheBPFTarget;
}
-extern "C" void LLVMInitializeBPFTargetInfo() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeBPFTargetInfo() {
TargetRegistry::RegisterTarget(getTheBPFTarget(), "bpf", "BPF (host endian)",
"BPF", [](Triple::ArchType) { return false; },
true);
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
index 590c4a2eb69d..cee1954e369b 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
@@ -813,10 +813,10 @@ bool HexagonAsmParser::RegisterMatchesArch(unsigned MatchNum) const {
return true;
}
-// extern "C" void LLVMInitializeHexagonAsmLexer();
+// extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeHexagonAsmLexer();
/// Force static initialization.
-extern "C" void LLVMInitializeHexagonAsmParser() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeHexagonAsmParser() {
RegisterMCAsmParser<HexagonAsmParser> X(getTheHexagonTarget());
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/BitTracker.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/BitTracker.cpp
index efd5ed915127..8a07b991ff5a 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/BitTracker.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/BitTracker.cpp
@@ -860,7 +860,7 @@ void BT::visitNonBranch(const MachineInstr &MI) {
<< " cell: " << ME.getCell(RU, Map) << "\n";
}
dbgs() << "Outputs:\n";
- for (const std::pair<unsigned, RegisterCell> &P : ResMap) {
+ for (const std::pair<const unsigned, RegisterCell> &P : ResMap) {
RegisterRef RD(P.first);
dbgs() << " " << printReg(P.first, &ME.TRI) << " cell: "
<< ME.getCell(RD, ResMap) << "\n";
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
index 99e3ee871570..7a90d585eb9a 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
@@ -53,11 +53,9 @@ public:
DecodeStatus getSingleInstruction(MCInst &Instr, MCInst &MCB,
ArrayRef<uint8_t> Bytes, uint64_t Address,
- raw_ostream &VStream, raw_ostream &CStream,
- bool &Complete) const;
+ raw_ostream &CStream, bool &Complete) const;
DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
ArrayRef<uint8_t> Bytes, uint64_t Address,
- raw_ostream &VStream,
raw_ostream &CStream) const override;
void remapInstruction(MCInst &Instr) const;
};
@@ -158,7 +156,7 @@ static MCDisassembler *createHexagonDisassembler(const Target &T,
return new HexagonDisassembler(STI, Ctx, T.createMCInstrInfo());
}
-extern "C" void LLVMInitializeHexagonDisassembler() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeHexagonDisassembler() {
TargetRegistry::RegisterMCDisassembler(getTheHexagonTarget(),
createHexagonDisassembler);
}
@@ -166,7 +164,6 @@ extern "C" void LLVMInitializeHexagonDisassembler() {
DecodeStatus HexagonDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
ArrayRef<uint8_t> Bytes,
uint64_t Address,
- raw_ostream &os,
raw_ostream &cs) const {
DecodeStatus Result = DecodeStatus::Success;
bool Complete = false;
@@ -179,7 +176,7 @@ DecodeStatus HexagonDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
if (Bytes.size() < HEXAGON_INSTR_SIZE)
return MCDisassembler::Fail;
MCInst *Inst = new (getContext()) MCInst;
- Result = getSingleInstruction(*Inst, MI, Bytes, Address, os, cs, Complete);
+ Result = getSingleInstruction(*Inst, MI, Bytes, Address, cs, Complete);
MI.addOperand(MCOperand::createInst(Inst));
Size += HEXAGON_INSTR_SIZE;
Bytes = Bytes.slice(HEXAGON_INSTR_SIZE);
@@ -290,9 +287,11 @@ static void adjustDuplex(MCInst &MI, MCContext &Context) {
}
}
-DecodeStatus HexagonDisassembler::getSingleInstruction(
- MCInst &MI, MCInst &MCB, ArrayRef<uint8_t> Bytes, uint64_t Address,
- raw_ostream &os, raw_ostream &cs, bool &Complete) const {
+DecodeStatus HexagonDisassembler::getSingleInstruction(MCInst &MI, MCInst &MCB,
+ ArrayRef<uint8_t> Bytes,
+ uint64_t Address,
+ raw_ostream &cs,
+ bool &Complete) const {
assert(Bytes.size() >= HEXAGON_INSTR_SIZE);
uint32_t Instruction = support::endian::read32le(Bytes.data());
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp
index 3d771d388e28..30fdde70d01a 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp
@@ -771,6 +771,6 @@ void HexagonAsmPrinter::EmitInstruction(const MachineInstr *MI) {
OutStreamer->EmitInstruction(MCB, getSubtargetInfo());
}
-extern "C" void LLVMInitializeHexagonAsmPrinter() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeHexagonAsmPrinter() {
RegisterAsmPrinter<HexagonAsmPrinter> X(getTheHexagonTarget());
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp
index 3068fb6f9629..799b85ed48b4 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp
@@ -27,6 +27,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/IR/DebugLoc.h"
+#include "llvm/InitializePasses.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp
index cf1b0a0f7daa..6d2aadb066cf 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp
@@ -6,17 +6,14 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "commgep"
-
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/FoldingSet.h"
#include "llvm/ADT/GraphTraits.h"
-#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/PostDominators.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
@@ -30,6 +27,7 @@
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/Verifier.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/Casting.h"
@@ -37,6 +35,7 @@
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/Local.h"
#include <algorithm>
#include <cassert>
#include <cstddef>
@@ -47,6 +46,8 @@
#include <utility>
#include <vector>
+#define DEBUG_TYPE "commgep"
+
using namespace llvm;
static cl::opt<bool> OptSpeculate("commgep-speculate", cl::init(true),
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp
index ddc9b847ef1c..aa9a715718bf 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp
@@ -15,6 +15,7 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Register.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/raw_ostream.h"
@@ -554,7 +555,7 @@ namespace {
LLVM_ATTRIBUTE_UNUSED
raw_ostream &operator<< (raw_ostream &OS, const PrintIMap &P) {
OS << "{\n";
- for (const std::pair<HCE::ExtenderInit,HCE::IndexList> &Q : P.IMap) {
+ for (const std::pair<const HCE::ExtenderInit, HCE::IndexList> &Q : P.IMap) {
OS << " " << PrintInit(Q.first, P.HRI) << " -> {";
for (unsigned I : Q.second)
OS << ' ' << I;
@@ -1638,7 +1639,7 @@ bool HCE::replaceInstrExact(const ExtDesc &ED, Register ExtR) {
return true;
}
- if ((MI.mayLoad() || MI.mayStore()) && !isStoreImmediate(ExtOpc)) {
+ if (MI.mayLoadOrStore() && !isStoreImmediate(ExtOpc)) {
// For memory instructions, there is an asymmetry in the addressing
// modes. Addressing modes allowing extenders can be replaced with
// addressing modes that use registers, but the order of operands
@@ -1793,7 +1794,7 @@ bool HCE::replaceInstrExpr(const ExtDesc &ED, const ExtenderInit &ExtI,
return true;
}
- if (MI.mayLoad() || MI.mayStore()) {
+ if (MI.mayLoadOrStore()) {
unsigned IdxOpc = getRegOffOpcode(ExtOpc);
assert(IdxOpc && "Expecting indexed opcode");
MachineInstrBuilder MIB = BuildMI(MBB, At, dl, HII->get(IdxOpc));
@@ -1843,7 +1844,7 @@ bool HCE::replaceInstr(unsigned Idx, Register ExtR, const ExtenderInit &ExtI) {
// These two addressing modes must be converted into indexed forms
// regardless of what the initializer looks like.
bool IsAbs = false, IsAbsSet = false;
- if (MI.mayLoad() || MI.mayStore()) {
+ if (MI.mayLoadOrStore()) {
unsigned AM = HII->getAddrMode(MI);
IsAbs = AM == HexagonII::Absolute;
IsAbsSet = AM == HexagonII::AbsoluteSet;
@@ -1894,7 +1895,7 @@ bool HCE::replaceExtenders(const AssignmentMap &IMap) {
LocDefList Defs;
bool Changed = false;
- for (const std::pair<ExtenderInit,IndexList> &P : IMap) {
+ for (const std::pair<const ExtenderInit, IndexList> &P : IMap) {
const IndexList &Idxs = P.second;
if (Idxs.size() < CountThreshold)
continue;
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonConstPropagation.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonConstPropagation.cpp
index a82501cabb9b..5b61d1084e08 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonConstPropagation.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonConstPropagation.cpp
@@ -134,11 +134,21 @@ namespace {
uint32_t properties() const;
unsigned size() const { return Size; }
- LatticeCell &operator= (const LatticeCell &L) {
+ LatticeCell(const LatticeCell &L) {
+ // This memcpy also copies Properties (when L.Size == 0).
+ uint32_t N =
+ L.IsSpecial ? sizeof L.Properties : L.Size * sizeof(const Constant *);
+ memcpy(Values, L.Values, N);
+ Kind = L.Kind;
+ Size = L.Size;
+ IsSpecial = L.IsSpecial;
+ }
+
+ LatticeCell &operator=(const LatticeCell &L) {
if (this != &L) {
// This memcpy also copies Properties (when L.Size == 0).
uint32_t N = L.IsSpecial ? sizeof L.Properties
- : L.Size*sizeof(const Constant*);
+ : L.Size * sizeof(const Constant *);
memcpy(Values, L.Values, N);
Kind = L.Kind;
Size = L.Size;
@@ -260,7 +270,7 @@ namespace {
void propagate(MachineFunction &MF);
bool rewrite(MachineFunction &MF);
- MachineRegisterInfo *MRI;
+ MachineRegisterInfo *MRI = nullptr;
MachineConstEvaluator &MCE;
using CFGEdge = std::pair<unsigned, unsigned>;
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td
index e4a2ba0ec29c..61a1df5eb94b 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td
@@ -1,4 +1,4 @@
-//===----------------------------------------------------------------------===//
+//===-------------------------------------------------------*- tablegen -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp
index 0844fb8a8629..d0285a7aa377 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp
@@ -217,7 +217,7 @@ namespace {
MachineDominatorTree *MDT = nullptr;
MachineLoopInfo *MLI = nullptr;
BlockSetType Deleted;
- const MachineBranchProbabilityInfo *MBPI;
+ const MachineBranchProbabilityInfo *MBPI = nullptr;
};
} // end anonymous namespace
@@ -282,6 +282,7 @@ bool HexagonEarlyIfConversion::matchFlowPattern(MachineBasicBlock *B,
// can fall through into the other, in other words, it will be executed
// in both cases. We only want to predicate the block that is executed
// conditionally.
+ assert(TB && FB && "Failed to find triangle control flow blocks");
unsigned TNP = TB->pred_size(), FNP = FB->pred_size();
unsigned TNS = TB->succ_size(), FNS = FB->succ_size();
@@ -682,7 +683,7 @@ bool HexagonEarlyIfConversion::isPredicableStore(const MachineInstr *MI)
bool HexagonEarlyIfConversion::isSafeToSpeculate(const MachineInstr *MI)
const {
- if (MI->mayLoad() || MI->mayStore())
+ if (MI->mayLoadOrStore())
return false;
if (MI->isCall() || MI->isBarrier() || MI->isBranch())
return false;
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp
index 8984ee82960d..c1d0599830cc 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp
@@ -106,6 +106,7 @@
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Function.h"
+#include "llvm/InitializePasses.h"
#include "llvm/MC/LaneBitmask.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
@@ -1040,7 +1041,7 @@ bool HexagonExpandCondsets::predicate(MachineInstr &TfrI, bool Cond,
bool CanDown = canMoveOver(*DefI, Defs, Uses);
// The TfrI does not access memory, but DefI could. Check if it's safe
// to move DefI down to TfrI.
- if (DefI->mayLoad() || DefI->mayStore())
+ if (DefI->mayLoadOrStore())
if (!canMoveMemTo(*DefI, TfrI, true))
CanDown = false;
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
index bfa3372d7faf..aff8e57b0a94 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
@@ -36,6 +36,7 @@
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachinePostDominators.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/IR/Attributes.h"
@@ -223,8 +224,7 @@ namespace {
bool HexagonCallFrameInformation::runOnMachineFunction(MachineFunction &MF) {
auto &HFI = *MF.getSubtarget<HexagonSubtarget>().getFrameLowering();
- bool NeedCFI = MF.getMMI().hasDebugInfo() ||
- MF.getFunction().needsUnwindTableEntry();
+ bool NeedCFI = MF.needsFrameMoves();
if (!NeedCFI)
return false;
@@ -1363,6 +1363,7 @@ void HexagonFrameLowering::processFunctionBeforeFrameFinalized(
if (!HasAlloca || !NeedsAlign)
return;
+ SmallSet<int, 4> DealignSlots;
unsigned LFS = MFI.getLocalFrameSize();
for (int i = 0, e = MFI.getObjectIndexEnd(); i != e; ++i) {
if (!MFI.isSpillSlotObjectIndex(i) || MFI.isDeadObjectIndex(i))
@@ -1373,7 +1374,8 @@ void HexagonFrameLowering::processFunctionBeforeFrameFinalized(
unsigned A = std::max(MFI.getObjectAlignment(i), 8U);
MFI.setObjectAlignment(i, 8);
LFS = alignTo(LFS+S, A);
- MFI.mapLocalFrameObject(i, -LFS);
+ MFI.mapLocalFrameObject(i, -static_cast<int64_t>(LFS));
+ DealignSlots.insert(i);
}
MFI.setLocalFrameSize(LFS);
@@ -1383,6 +1385,38 @@ void HexagonFrameLowering::processFunctionBeforeFrameFinalized(
MFI.setLocalFrameMaxAlign(Align(8));
MFI.setUseLocalStackAllocationBlock(true);
+ // Go over all MachineMemOperands in the code, and change the ones that
+ // refer to the dealigned stack slots to reflect the new alignment.
+ if (!DealignSlots.empty()) {
+ for (MachineBasicBlock &BB : MF) {
+ for (MachineInstr &MI : BB) {
+ bool KeepOld = true;
+ ArrayRef<MachineMemOperand*> memops = MI.memoperands();
+ SmallVector<MachineMemOperand*,1> new_memops;
+ for (MachineMemOperand *MMO : memops) {
+ auto *PV = MMO->getPseudoValue();
+ if (auto *FS = dyn_cast_or_null<FixedStackPseudoSourceValue>(PV)) {
+ int FI = FS->getFrameIndex();
+ if (DealignSlots.count(FI)) {
+ unsigned A = MFI.getObjectAlignment(FI);
+ auto *NewMMO = MF.getMachineMemOperand(MMO->getPointerInfo(),
+ MMO->getFlags(), MMO->getSize(), A,
+ MMO->getAAInfo(), MMO->getRanges(),
+ MMO->getSyncScopeID(), MMO->getOrdering(),
+ MMO->getFailureOrdering());
+ new_memops.push_back(NewMMO);
+ KeepOld = false;
+ continue;
+ }
+ }
+ new_memops.push_back(MMO);
+ }
+ if (!KeepOld)
+ MI.setMemRefs(MF, new_memops);
+ }
+ }
+ }
+
// Set the physical aligned-stack base address register.
unsigned AP = 0;
if (const MachineInstr *AI = getAlignaInstr(MF))
@@ -1750,16 +1784,21 @@ bool HexagonFrameLowering::expandStoreVec2(MachineBasicBlock &B,
Register SrcHi = HRI.getSubReg(SrcR, Hexagon::vsub_hi);
bool IsKill = MI->getOperand(2).isKill();
int FI = MI->getOperand(0).getIndex();
+ bool NeedsAligna = needsAligna(MF);
unsigned Size = HRI.getSpillSize(Hexagon::HvxVRRegClass);
unsigned NeedAlign = HRI.getSpillAlignment(Hexagon::HvxVRRegClass);
unsigned HasAlign = MFI.getObjectAlignment(FI);
unsigned StoreOpc;
+ auto UseAligned = [&] (unsigned NeedAlign, unsigned HasAlign) {
+ return !NeedsAligna && (NeedAlign <= HasAlign);
+ };
+
// Store low part.
if (LPR.contains(SrcLo)) {
- StoreOpc = NeedAlign <= HasAlign ? Hexagon::V6_vS32b_ai
- : Hexagon::V6_vS32Ub_ai;
+ StoreOpc = UseAligned(NeedAlign, HasAlign) ? Hexagon::V6_vS32b_ai
+ : Hexagon::V6_vS32Ub_ai;
BuildMI(B, It, DL, HII.get(StoreOpc))
.addFrameIndex(FI)
.addImm(0)
@@ -1769,8 +1808,8 @@ bool HexagonFrameLowering::expandStoreVec2(MachineBasicBlock &B,
// Store high part.
if (LPR.contains(SrcHi)) {
- StoreOpc = NeedAlign <= MinAlign(HasAlign, Size) ? Hexagon::V6_vS32b_ai
- : Hexagon::V6_vS32Ub_ai;
+ StoreOpc = UseAligned(NeedAlign, HasAlign) ? Hexagon::V6_vS32b_ai
+ : Hexagon::V6_vS32Ub_ai;
BuildMI(B, It, DL, HII.get(StoreOpc))
.addFrameIndex(FI)
.addImm(Size)
@@ -1797,23 +1836,28 @@ bool HexagonFrameLowering::expandLoadVec2(MachineBasicBlock &B,
Register DstHi = HRI.getSubReg(DstR, Hexagon::vsub_hi);
Register DstLo = HRI.getSubReg(DstR, Hexagon::vsub_lo);
int FI = MI->getOperand(1).getIndex();
+ bool NeedsAligna = needsAligna(MF);
unsigned Size = HRI.getSpillSize(Hexagon::HvxVRRegClass);
unsigned NeedAlign = HRI.getSpillAlignment(Hexagon::HvxVRRegClass);
unsigned HasAlign = MFI.getObjectAlignment(FI);
unsigned LoadOpc;
+ auto UseAligned = [&] (unsigned NeedAlign, unsigned HasAlign) {
+ return !NeedsAligna && (NeedAlign <= HasAlign);
+ };
+
// Load low part.
- LoadOpc = NeedAlign <= HasAlign ? Hexagon::V6_vL32b_ai
- : Hexagon::V6_vL32Ub_ai;
+ LoadOpc = UseAligned(NeedAlign, HasAlign) ? Hexagon::V6_vL32b_ai
+ : Hexagon::V6_vL32Ub_ai;
BuildMI(B, It, DL, HII.get(LoadOpc), DstLo)
.addFrameIndex(FI)
.addImm(0)
.cloneMemRefs(*MI);
// Load high part.
- LoadOpc = NeedAlign <= MinAlign(HasAlign, Size) ? Hexagon::V6_vL32b_ai
- : Hexagon::V6_vL32Ub_ai;
+ LoadOpc = UseAligned(NeedAlign, HasAlign) ? Hexagon::V6_vL32b_ai
+ : Hexagon::V6_vL32Ub_ai;
BuildMI(B, It, DL, HII.get(LoadOpc), DstHi)
.addFrameIndex(FI)
.addImm(Size)
@@ -1832,6 +1876,7 @@ bool HexagonFrameLowering::expandStoreVec(MachineBasicBlock &B,
if (!MI->getOperand(0).isFI())
return false;
+ bool NeedsAligna = needsAligna(MF);
auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
DebugLoc DL = MI->getDebugLoc();
Register SrcR = MI->getOperand(2).getReg();
@@ -1840,8 +1885,9 @@ bool HexagonFrameLowering::expandStoreVec(MachineBasicBlock &B,
unsigned NeedAlign = HRI.getSpillAlignment(Hexagon::HvxVRRegClass);
unsigned HasAlign = MFI.getObjectAlignment(FI);
- unsigned StoreOpc = NeedAlign <= HasAlign ? Hexagon::V6_vS32b_ai
- : Hexagon::V6_vS32Ub_ai;
+ bool UseAligned = !NeedsAligna && (NeedAlign <= HasAlign);
+ unsigned StoreOpc = UseAligned ? Hexagon::V6_vS32b_ai
+ : Hexagon::V6_vS32Ub_ai;
BuildMI(B, It, DL, HII.get(StoreOpc))
.addFrameIndex(FI)
.addImm(0)
@@ -1861,6 +1907,7 @@ bool HexagonFrameLowering::expandLoadVec(MachineBasicBlock &B,
if (!MI->getOperand(1).isFI())
return false;
+ bool NeedsAligna = needsAligna(MF);
auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
DebugLoc DL = MI->getDebugLoc();
Register DstR = MI->getOperand(0).getReg();
@@ -1868,8 +1915,9 @@ bool HexagonFrameLowering::expandLoadVec(MachineBasicBlock &B,
unsigned NeedAlign = HRI.getSpillAlignment(Hexagon::HvxVRRegClass);
unsigned HasAlign = MFI.getObjectAlignment(FI);
- unsigned LoadOpc = NeedAlign <= HasAlign ? Hexagon::V6_vL32b_ai
- : Hexagon::V6_vL32Ub_ai;
+ bool UseAligned = !NeedsAligna && (NeedAlign <= HasAlign);
+ unsigned LoadOpc = UseAligned ? Hexagon::V6_vL32b_ai
+ : Hexagon::V6_vL32Ub_ai;
BuildMI(B, It, DL, HII.get(LoadOpc), DstR)
.addFrameIndex(FI)
.addImm(0)
@@ -1912,11 +1960,9 @@ bool HexagonFrameLowering::expandSpillMacros(MachineFunction &MF,
Changed |= expandLoadVecPred(B, I, MRI, HII, NewRegs);
break;
case Hexagon::PS_vloadrw_ai:
- case Hexagon::PS_vloadrwu_ai:
Changed |= expandLoadVec2(B, I, MRI, HII, NewRegs);
break;
case Hexagon::PS_vstorerw_ai:
- case Hexagon::PS_vstorerwu_ai:
Changed |= expandStoreVec2(B, I, MRI, HII, NewRegs);
break;
}
@@ -1961,7 +2007,15 @@ void HexagonFrameLowering::determineCalleeSaves(MachineFunction &MF,
for (auto *RC : SpillRCs) {
if (!needToReserveScavengingSpillSlots(MF, HRI, RC))
continue;
- unsigned Num = RC == &Hexagon::IntRegsRegClass ? NumberScavengerSlots : 1;
+ unsigned Num = 1;
+ switch (RC->getID()) {
+ case Hexagon::IntRegsRegClassID:
+ Num = NumberScavengerSlots;
+ break;
+ case Hexagon::HvxQRRegClassID:
+ Num = 2; // Vector predicate spills also need a vector register.
+ break;
+ }
unsigned S = HRI.getSpillSize(*RC), A = HRI.getSpillAlignment(*RC);
for (unsigned i = 0; i < Num; i++) {
int NewFI = MFI.CreateSpillStackObject(S, A);
@@ -2389,9 +2443,9 @@ bool HexagonFrameLowering::needsAligna(const MachineFunction &MF) const {
const MachineFrameInfo &MFI = MF.getFrameInfo();
if (!MFI.hasVarSizedObjects())
return false;
- unsigned MaxA = MFI.getMaxAlignment();
- if (MaxA <= getStackAlignment())
- return false;
+ // Do not check for max stack object alignment here, because the stack
+ // may not be complete yet. Assume that we will need PS_aligna if there
+ // are variable-sized objects.
return true;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonGenExtract.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonGenExtract.cpp
index caa0e4d80397..342ca21525c5 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonGenExtract.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonGenExtract.cpp
@@ -17,9 +17,11 @@
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsHexagon.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include <algorithm>
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp
index 48881e02f4d3..2f29e88bc989 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp
@@ -29,6 +29,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/IR/DebugLoc.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -92,6 +93,10 @@ namespace {
RegisterSet() = default;
explicit RegisterSet(unsigned s, bool t = false) : BitVector(s, t) {}
RegisterSet(const RegisterSet &RS) : BitVector(RS) {}
+ RegisterSet &operator=(const RegisterSet &RS) {
+ BitVector::operator=(RS);
+ return *this;
+ }
using BitVector::clear;
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonGenMux.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonGenMux.cpp
index b559e7bbbb60..9585b14dbf80 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonGenMux.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonGenMux.cpp
@@ -332,6 +332,12 @@ bool HexagonGenMux::genMuxInBlock(MachineBasicBlock &B) {
unsigned MxOpc = getMuxOpcode(*MX.SrcT, *MX.SrcF);
if (!MxOpc)
continue;
+ // Basic sanity check: since we are deleting instructions, validate the
+ // iterators. There is a possibility that one of Def1 or Def2 is translated
+ // to "mux" and being considered for other "mux" instructions.
+ if (!MX.At->getParent() || !MX.Def1->getParent() || !MX.Def2->getParent())
+ continue;
+
MachineBasicBlock &B = *MX.At->getParent();
const DebugLoc &DL = B.findDebugLoc(MX.At);
auto NewMux = BuildMI(B, MX.At, DL, HII->get(MxOpc), MX.DefR)
@@ -339,8 +345,8 @@ bool HexagonGenMux::genMuxInBlock(MachineBasicBlock &B) {
.add(*MX.SrcT)
.add(*MX.SrcF);
NewMux->clearKillInfo();
- B.erase(MX.Def1);
- B.erase(MX.Def2);
+ B.remove(MX.Def1);
+ B.remove(MX.Def2);
Changed = true;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp
index 24d33c91a29b..903287e68c99 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp
@@ -20,6 +20,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/IR/DebugLoc.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp
index 62291790f0fe..1cf1500bc832 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp
@@ -44,6 +44,7 @@
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugLoc.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
index 4684d8e4781a..9cf5b257a00a 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
@@ -10,8 +10,8 @@
//
//===----------------------------------------------------------------------===//
-#include "Hexagon.h"
#include "HexagonISelDAGToDAG.h"
+#include "Hexagon.h"
#include "HexagonISelLowering.h"
#include "HexagonMachineFunctionInfo.h"
#include "HexagonTargetMachine.h"
@@ -19,6 +19,7 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsHexagon.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
using namespace llvm;
@@ -915,7 +916,6 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
switch (ConstraintID) {
default:
return true;
- case InlineAsm::Constraint_i:
case InlineAsm::Constraint_o: // Offsetable.
case InlineAsm::Constraint_v: // Not offsetable.
case InlineAsm::Constraint_m: // Memory.
@@ -1261,7 +1261,7 @@ void HexagonDAGToDAGISel::PreprocessISelDAG() {
}
void HexagonDAGToDAGISel::EmitFunctionEntryCode() {
- auto &HST = static_cast<const HexagonSubtarget&>(MF->getSubtarget());
+ auto &HST = MF->getSubtarget<HexagonSubtarget>();
auto &HFI = *HST.getFrameLowering();
if (!HFI.needsAligna(*MF))
return;
@@ -1269,12 +1269,23 @@ void HexagonDAGToDAGISel::EmitFunctionEntryCode() {
MachineFrameInfo &MFI = MF->getFrameInfo();
MachineBasicBlock *EntryBB = &MF->front();
unsigned AR = FuncInfo->CreateReg(MVT::i32);
- unsigned MaxA = MFI.getMaxAlignment();
+ unsigned EntryMaxA = MFI.getMaxAlignment();
BuildMI(EntryBB, DebugLoc(), HII->get(Hexagon::PS_aligna), AR)
- .addImm(MaxA);
+ .addImm(EntryMaxA);
MF->getInfo<HexagonMachineFunctionInfo>()->setStackAlignBaseVReg(AR);
}
+void HexagonDAGToDAGISel::updateAligna() {
+ auto &HFI = *MF->getSubtarget<HexagonSubtarget>().getFrameLowering();
+ if (!HFI.needsAligna(*MF))
+ return;
+ auto *AlignaI = const_cast<MachineInstr*>(HFI.getAlignaInstr(*MF));
+ assert(AlignaI != nullptr);
+ unsigned MaxA = MF->getFrameInfo().getMaxAlignment();
+ if (AlignaI->getOperand(1).getImm() < MaxA)
+ AlignaI->getOperand(1).setImm(MaxA);
+}
+
// Match a frame index that can be used in an addressing mode.
bool HexagonDAGToDAGISel::SelectAddrFI(SDValue &N, SDValue &R) {
if (N.getOpcode() != ISD::FrameIndex)
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.h b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.h
index 65edb09603b3..6c77d8803359 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.h
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.h
@@ -43,6 +43,7 @@ public:
HII = HST->getInstrInfo();
HRI = HST->getRegisterInfo();
SelectionDAGISel::runOnMachineFunction(MF);
+ updateAligna();
return true;
}
@@ -144,6 +145,9 @@ private:
void ppAddrRewriteAndSrl(std::vector<SDNode*> &&Nodes);
void ppHoistZextI1(std::vector<SDNode*> &&Nodes);
+ // Function postprocessing.
+ void updateAligna();
+
SmallDenseMap<SDNode *,int> RootWeights;
SmallDenseMap<SDNode *,int> RootHeights;
SmallDenseMap<const Value *,int> GAUsesInFunction;
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
index e7f1c345af1d..7e143a349400 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
@@ -14,6 +14,7 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsHexagon.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
index 8a8986e232a0..e11ecdc7d035 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -39,8 +39,9 @@
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsHexagon.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
@@ -232,19 +233,76 @@ HexagonTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
bool HexagonTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
// If either no tail call or told not to tail call at all, don't.
- auto Attr =
- CI->getParent()->getParent()->getFnAttribute("disable-tail-calls");
- if (!CI->isTailCall() || Attr.getValueAsString() == "true")
- return false;
-
- return true;
+ return CI->isTailCall();
}
-Register HexagonTargetLowering::getRegisterByName(const char* RegName, EVT VT,
- const MachineFunction &) const {
+Register HexagonTargetLowering::getRegisterByName(
+ const char* RegName, LLT VT, const MachineFunction &) const {
// Just support r19, the linux kernel uses it.
Register Reg = StringSwitch<Register>(RegName)
+ .Case("r0", Hexagon::R0)
+ .Case("r1", Hexagon::R1)
+ .Case("r2", Hexagon::R2)
+ .Case("r3", Hexagon::R3)
+ .Case("r4", Hexagon::R4)
+ .Case("r5", Hexagon::R5)
+ .Case("r6", Hexagon::R6)
+ .Case("r7", Hexagon::R7)
+ .Case("r8", Hexagon::R8)
+ .Case("r9", Hexagon::R9)
+ .Case("r10", Hexagon::R10)
+ .Case("r11", Hexagon::R11)
+ .Case("r12", Hexagon::R12)
+ .Case("r13", Hexagon::R13)
+ .Case("r14", Hexagon::R14)
+ .Case("r15", Hexagon::R15)
+ .Case("r16", Hexagon::R16)
+ .Case("r17", Hexagon::R17)
+ .Case("r18", Hexagon::R18)
.Case("r19", Hexagon::R19)
+ .Case("r20", Hexagon::R20)
+ .Case("r21", Hexagon::R21)
+ .Case("r22", Hexagon::R22)
+ .Case("r23", Hexagon::R23)
+ .Case("r24", Hexagon::R24)
+ .Case("r25", Hexagon::R25)
+ .Case("r26", Hexagon::R26)
+ .Case("r27", Hexagon::R27)
+ .Case("r28", Hexagon::R28)
+ .Case("r29", Hexagon::R29)
+ .Case("r30", Hexagon::R30)
+ .Case("r31", Hexagon::R31)
+ .Case("r1:0", Hexagon::D0)
+ .Case("r3:2", Hexagon::D1)
+ .Case("r5:4", Hexagon::D2)
+ .Case("r7:6", Hexagon::D3)
+ .Case("r9:8", Hexagon::D4)
+ .Case("r11:10", Hexagon::D5)
+ .Case("r13:12", Hexagon::D6)
+ .Case("r15:14", Hexagon::D7)
+ .Case("r17:16", Hexagon::D8)
+ .Case("r19:18", Hexagon::D9)
+ .Case("r21:20", Hexagon::D10)
+ .Case("r23:22", Hexagon::D11)
+ .Case("r25:24", Hexagon::D12)
+ .Case("r27:26", Hexagon::D13)
+ .Case("r29:28", Hexagon::D14)
+ .Case("r31:30", Hexagon::D15)
+ .Case("sp", Hexagon::R29)
+ .Case("fp", Hexagon::R30)
+ .Case("lr", Hexagon::R31)
+ .Case("p0", Hexagon::P0)
+ .Case("p1", Hexagon::P1)
+ .Case("p2", Hexagon::P2)
+ .Case("p3", Hexagon::P3)
+ .Case("sa0", Hexagon::SA0)
+ .Case("lc0", Hexagon::LC0)
+ .Case("sa1", Hexagon::SA1)
+ .Case("lc1", Hexagon::LC1)
+ .Case("m0", Hexagon::M0)
+ .Case("m1", Hexagon::M1)
+ .Case("usr", Hexagon::USR)
+ .Case("ugp", Hexagon::UGP)
.Default(Register());
if (Reg)
return Reg;
@@ -345,10 +403,6 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
else
CCInfo.AnalyzeCallOperands(Outs, CC_Hexagon);
- auto Attr = MF.getFunction().getFnAttribute("disable-tail-calls");
- if (Attr.getValueAsString() == "true")
- CLI.IsTailCall = false;
-
if (CLI.IsTailCall) {
bool StructAttrFlag = MF.getFunction().hasStructRetAttr();
CLI.IsTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
@@ -413,7 +467,7 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
MemAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, MemAddr);
if (ArgAlign)
LargestAlignSeen = std::max(LargestAlignSeen,
- VA.getLocVT().getStoreSizeInBits() >> 3);
+ (unsigned)VA.getLocVT().getStoreSizeInBits() >> 3);
if (Flags.isByVal()) {
// The argument is a struct passed by value. According to LLVM, "Arg"
// is a pointer.
@@ -1473,6 +1527,10 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i16, MVT::v4i8, Legal);
setLoadExtAction(ISD::SEXTLOAD, MVT::v4i16, MVT::v4i8, Legal);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Legal);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Legal);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal);
+
// Types natively supported:
for (MVT NativeVT : {MVT::v8i1, MVT::v4i1, MVT::v2i1, MVT::v4i8,
MVT::v8i8, MVT::v2i16, MVT::v4i16, MVT::v2i32}) {
@@ -1847,7 +1905,8 @@ bool HexagonTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
return VT1.getSimpleVT() == MVT::i64 && VT2.getSimpleVT() == MVT::i32;
}
-bool HexagonTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
+bool HexagonTargetLowering::isFMAFasterThanFMulAndFAdd(
+ const MachineFunction &MF, EVT VT) const {
return isOperationLegalOrCustom(ISD::FMA, VT);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLowering.h
index 75f553bfec7f..e79646de6287 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLowering.h
@@ -137,7 +137,8 @@ namespace HexagonISD {
/// instructions. fmuladd intrinsics will be expanded to FMAs when this
/// method returns true (and FMAs are legal), otherwise fmuladd is
/// expanded to mul + add.
- bool isFMAFasterThanFMulAndFAdd(EVT) const override;
+ bool isFMAFasterThanFMulAndFAdd(const MachineFunction &,
+ EVT) const override;
// Should we expand the build vector with shuffles?
bool shouldExpandBuildVectorWithShuffles(EVT VT,
@@ -229,7 +230,7 @@ namespace HexagonISD {
bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
- Register getRegisterByName(const char* RegName, EVT VT,
+ Register getRegisterByName(const char* RegName, LLT VT,
const MachineFunction &MF) const override;
/// If a physical register, this returns the register that receives the
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
index bc8a9959c917..204950f9010e 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
@@ -194,6 +194,13 @@ HexagonTargetLowering::initializeHVXLowering() {
setOperationAction(ISD::XOR, BoolV, Legal);
}
+ if (Use64b)
+ for (MVT T: {MVT::v32i8, MVT::v32i16, MVT::v16i8, MVT::v16i16, MVT::v16i32})
+ setOperationAction(ISD::SIGN_EXTEND_INREG, T, Legal);
+ else
+ for (MVT T: {MVT::v64i8, MVT::v64i16, MVT::v32i8, MVT::v32i16, MVT::v32i32})
+ setOperationAction(ISD::SIGN_EXTEND_INREG, T, Legal);
+
setTargetDAGCombine(ISD::VSELECT);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
index 767538f92ed6..39ec8936214e 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -786,8 +786,8 @@ bool HexagonInstrInfo::isProfitableToDupForIfCvt(MachineBasicBlock &MBB,
void HexagonInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
- const DebugLoc &DL, unsigned DestReg,
- unsigned SrcReg, bool KillSrc) const {
+ const DebugLoc &DL, MCRegister DestReg,
+ MCRegister SrcReg, bool KillSrc) const {
const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo();
unsigned KillFlag = getKillRegState(KillSrc);
@@ -888,10 +888,7 @@ void HexagonInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = MF.getFrameInfo();
unsigned SlotAlign = MFI.getObjectAlignment(FI);
- unsigned RegAlign = TRI->getSpillAlignment(*RC);
unsigned KillFlag = getKillRegState(isKill);
- bool HasAlloca = MFI.hasVarSizedObjects();
- const HexagonFrameLowering &HFI = *Subtarget.getFrameLowering();
MachineMemOperand *MMO = MF.getMachineMemOperand(
MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOStore,
@@ -918,29 +915,13 @@ void HexagonInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
.addFrameIndex(FI).addImm(0)
.addReg(SrcReg, KillFlag).addMemOperand(MMO);
} else if (Hexagon::HvxVRRegClass.hasSubClassEq(RC)) {
- // If there are variable-sized objects, spills will not be aligned.
- if (HasAlloca)
- SlotAlign = HFI.getStackAlignment();
- unsigned Opc = SlotAlign < RegAlign ? Hexagon::V6_vS32Ub_ai
- : Hexagon::V6_vS32b_ai;
- MachineMemOperand *MMOA = MF.getMachineMemOperand(
- MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOStore,
- MFI.getObjectSize(FI), SlotAlign);
- BuildMI(MBB, I, DL, get(Opc))
+ BuildMI(MBB, I, DL, get(Hexagon::PS_vstorerv_ai))
.addFrameIndex(FI).addImm(0)
- .addReg(SrcReg, KillFlag).addMemOperand(MMOA);
+ .addReg(SrcReg, KillFlag).addMemOperand(MMO);
} else if (Hexagon::HvxWRRegClass.hasSubClassEq(RC)) {
- // If there are variable-sized objects, spills will not be aligned.
- if (HasAlloca)
- SlotAlign = HFI.getStackAlignment();
- unsigned Opc = SlotAlign < RegAlign ? Hexagon::PS_vstorerwu_ai
- : Hexagon::PS_vstorerw_ai;
- MachineMemOperand *MMOA = MF.getMachineMemOperand(
- MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOStore,
- MFI.getObjectSize(FI), SlotAlign);
- BuildMI(MBB, I, DL, get(Opc))
+ BuildMI(MBB, I, DL, get(Hexagon::PS_vstorerw_ai))
.addFrameIndex(FI).addImm(0)
- .addReg(SrcReg, KillFlag).addMemOperand(MMOA);
+ .addReg(SrcReg, KillFlag).addMemOperand(MMO);
} else {
llvm_unreachable("Unimplemented");
}
@@ -954,9 +935,6 @@ void HexagonInstrInfo::loadRegFromStackSlot(
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = MF.getFrameInfo();
unsigned SlotAlign = MFI.getObjectAlignment(FI);
- unsigned RegAlign = TRI->getSpillAlignment(*RC);
- bool HasAlloca = MFI.hasVarSizedObjects();
- const HexagonFrameLowering &HFI = *Subtarget.getFrameLowering();
MachineMemOperand *MMO = MF.getMachineMemOperand(
MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOLoad,
@@ -978,27 +956,11 @@ void HexagonInstrInfo::loadRegFromStackSlot(
BuildMI(MBB, I, DL, get(Hexagon::PS_vloadrq_ai), DestReg)
.addFrameIndex(FI).addImm(0).addMemOperand(MMO);
} else if (Hexagon::HvxVRRegClass.hasSubClassEq(RC)) {
- // If there are variable-sized objects, spills will not be aligned.
- if (HasAlloca)
- SlotAlign = HFI.getStackAlignment();
- unsigned Opc = SlotAlign < RegAlign ? Hexagon::V6_vL32Ub_ai
- : Hexagon::V6_vL32b_ai;
- MachineMemOperand *MMOA = MF.getMachineMemOperand(
- MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOLoad,
- MFI.getObjectSize(FI), SlotAlign);
- BuildMI(MBB, I, DL, get(Opc), DestReg)
- .addFrameIndex(FI).addImm(0).addMemOperand(MMOA);
+ BuildMI(MBB, I, DL, get(Hexagon::PS_vloadrv_ai), DestReg)
+ .addFrameIndex(FI).addImm(0).addMemOperand(MMO);
} else if (Hexagon::HvxWRRegClass.hasSubClassEq(RC)) {
- // If there are variable-sized objects, spills will not be aligned.
- if (HasAlloca)
- SlotAlign = HFI.getStackAlignment();
- unsigned Opc = SlotAlign < RegAlign ? Hexagon::PS_vloadrwu_ai
- : Hexagon::PS_vloadrw_ai;
- MachineMemOperand *MMOA = MF.getMachineMemOperand(
- MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOLoad,
- MFI.getObjectSize(FI), SlotAlign);
- BuildMI(MBB, I, DL, get(Opc), DestReg)
- .addFrameIndex(FI).addImm(0).addMemOperand(MMOA);
+ BuildMI(MBB, I, DL, get(Hexagon::PS_vloadrw_ai), DestReg)
+ .addFrameIndex(FI).addImm(0).addMemOperand(MMO);
} else {
llvm_unreachable("Can't store this register to stack slot");
}
@@ -1040,6 +1002,15 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
return true;
};
+ auto UseAligned = [&] (const MachineInstr &MI, unsigned NeedAlign) {
+ if (MI.memoperands().empty())
+ return false;
+ return all_of(MI.memoperands(),
+ [NeedAlign] (const MachineMemOperand *MMO) {
+ return NeedAlign <= MMO->getAlignment();
+ });
+ };
+
switch (Opc) {
case TargetOpcode::COPY: {
MachineOperand &MD = MI.getOperand(0);
@@ -1086,47 +1057,78 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
MRI.clearKillFlags(SrcSubHi);
return true;
}
- case Hexagon::PS_vstorerw_ai:
- case Hexagon::PS_vstorerwu_ai: {
- bool Aligned = Opc == Hexagon::PS_vstorerw_ai;
- Register SrcReg = MI.getOperand(2).getReg();
- Register SrcSubHi = HRI.getSubReg(SrcReg, Hexagon::vsub_hi);
- Register SrcSubLo = HRI.getSubReg(SrcReg, Hexagon::vsub_lo);
- unsigned NewOpc = Aligned ? Hexagon::V6_vS32b_ai : Hexagon::V6_vS32Ub_ai;
- unsigned Offset = HRI.getSpillSize(Hexagon::HvxVRRegClass);
-
- MachineInstr *MI1New = BuildMI(MBB, MI, DL, get(NewOpc))
- .add(MI.getOperand(0))
- .addImm(MI.getOperand(1).getImm())
- .addReg(SrcSubLo)
- .cloneMemRefs(MI);
- MI1New->getOperand(0).setIsKill(false);
- BuildMI(MBB, MI, DL, get(NewOpc))
- .add(MI.getOperand(0))
- // The Vectors are indexed in multiples of vector size.
- .addImm(MI.getOperand(1).getImm() + Offset)
- .addReg(SrcSubHi)
+ case Hexagon::PS_vloadrv_ai: {
+ Register DstReg = MI.getOperand(0).getReg();
+ const MachineOperand &BaseOp = MI.getOperand(1);
+ assert(BaseOp.getSubReg() == 0);
+ int Offset = MI.getOperand(2).getImm();
+ unsigned NeedAlign = HRI.getSpillAlignment(Hexagon::HvxVRRegClass);
+ unsigned NewOpc = UseAligned(MI, NeedAlign) ? Hexagon::V6_vL32b_ai
+ : Hexagon::V6_vL32Ub_ai;
+ BuildMI(MBB, MI, DL, get(NewOpc), DstReg)
+ .addReg(BaseOp.getReg(), getRegState(BaseOp))
+ .addImm(Offset)
.cloneMemRefs(MI);
MBB.erase(MI);
return true;
}
- case Hexagon::PS_vloadrw_ai:
- case Hexagon::PS_vloadrwu_ai: {
- bool Aligned = Opc == Hexagon::PS_vloadrw_ai;
+ case Hexagon::PS_vloadrw_ai: {
Register DstReg = MI.getOperand(0).getReg();
- unsigned NewOpc = Aligned ? Hexagon::V6_vL32b_ai : Hexagon::V6_vL32Ub_ai;
- unsigned Offset = HRI.getSpillSize(Hexagon::HvxVRRegClass);
-
- MachineInstr *MI1New = BuildMI(MBB, MI, DL, get(NewOpc),
- HRI.getSubReg(DstReg, Hexagon::vsub_lo))
- .add(MI.getOperand(1))
- .addImm(MI.getOperand(2).getImm())
- .cloneMemRefs(MI);
- MI1New->getOperand(1).setIsKill(false);
- BuildMI(MBB, MI, DL, get(NewOpc), HRI.getSubReg(DstReg, Hexagon::vsub_hi))
- .add(MI.getOperand(1))
- // The Vectors are indexed in multiples of vector size.
- .addImm(MI.getOperand(2).getImm() + Offset)
+ const MachineOperand &BaseOp = MI.getOperand(1);
+ assert(BaseOp.getSubReg() == 0);
+ int Offset = MI.getOperand(2).getImm();
+ unsigned VecOffset = HRI.getSpillSize(Hexagon::HvxVRRegClass);
+ unsigned NeedAlign = HRI.getSpillAlignment(Hexagon::HvxVRRegClass);
+ unsigned NewOpc = UseAligned(MI, NeedAlign) ? Hexagon::V6_vL32b_ai
+ : Hexagon::V6_vL32Ub_ai;
+ BuildMI(MBB, MI, DL, get(NewOpc),
+ HRI.getSubReg(DstReg, Hexagon::vsub_lo))
+ .addReg(BaseOp.getReg(), getRegState(BaseOp) & ~RegState::Kill)
+ .addImm(Offset)
+ .cloneMemRefs(MI);
+ BuildMI(MBB, MI, DL, get(NewOpc),
+ HRI.getSubReg(DstReg, Hexagon::vsub_hi))
+ .addReg(BaseOp.getReg(), getRegState(BaseOp))
+ .addImm(Offset + VecOffset)
+ .cloneMemRefs(MI);
+ MBB.erase(MI);
+ return true;
+ }
+ case Hexagon::PS_vstorerv_ai: {
+ const MachineOperand &SrcOp = MI.getOperand(2);
+ assert(SrcOp.getSubReg() == 0);
+ const MachineOperand &BaseOp = MI.getOperand(0);
+ assert(BaseOp.getSubReg() == 0);
+ int Offset = MI.getOperand(1).getImm();
+ unsigned NeedAlign = HRI.getSpillAlignment(Hexagon::HvxVRRegClass);
+ unsigned NewOpc = UseAligned(MI, NeedAlign) ? Hexagon::V6_vS32b_ai
+ : Hexagon::V6_vS32Ub_ai;
+ BuildMI(MBB, MI, DL, get(NewOpc))
+ .addReg(BaseOp.getReg(), getRegState(BaseOp))
+ .addImm(Offset)
+ .addReg(SrcOp.getReg(), getRegState(SrcOp))
+ .cloneMemRefs(MI);
+ MBB.erase(MI);
+ return true;
+ }
+ case Hexagon::PS_vstorerw_ai: {
+ Register SrcReg = MI.getOperand(2).getReg();
+ const MachineOperand &BaseOp = MI.getOperand(0);
+ assert(BaseOp.getSubReg() == 0);
+ int Offset = MI.getOperand(1).getImm();
+ unsigned VecOffset = HRI.getSpillSize(Hexagon::HvxVRRegClass);
+ unsigned NeedAlign = HRI.getSpillAlignment(Hexagon::HvxVRRegClass);
+ unsigned NewOpc = UseAligned(MI, NeedAlign) ? Hexagon::V6_vS32b_ai
+ : Hexagon::V6_vS32Ub_ai;
+ BuildMI(MBB, MI, DL, get(NewOpc))
+ .addReg(BaseOp.getReg(), getRegState(BaseOp) & ~RegState::Kill)
+ .addImm(Offset)
+ .addReg(HRI.getSubReg(SrcReg, Hexagon::vsub_lo))
+ .cloneMemRefs(MI);
+ BuildMI(MBB, MI, DL, get(NewOpc))
+ .addReg(BaseOp.getReg(), getRegState(BaseOp))
+ .addImm(Offset + VecOffset)
+ .addReg(HRI.getSubReg(SrcReg, Hexagon::vsub_hi))
.cloneMemRefs(MI);
MBB.erase(MI);
return true;
@@ -2145,7 +2147,7 @@ bool HexagonInstrInfo::isDuplexPair(const MachineInstr &MIa,
}
bool HexagonInstrInfo::isEarlySourceInstr(const MachineInstr &MI) const {
- if (MI.mayLoad() || MI.mayStore() || MI.isCompare())
+ if (MI.mayLoadOrStore() || MI.isCompare())
return true;
// Multiply
@@ -2683,9 +2685,11 @@ bool HexagonInstrInfo::isValidOffset(unsigned Opcode, int Offset,
// misaligns with respect to load size.
switch (Opcode) {
case Hexagon::PS_vstorerq_ai:
+ case Hexagon::PS_vstorerv_ai:
case Hexagon::PS_vstorerw_ai:
case Hexagon::PS_vstorerw_nt_ai:
case Hexagon::PS_vloadrq_ai:
+ case Hexagon::PS_vloadrv_ai:
case Hexagon::PS_vloadrw_ai:
case Hexagon::PS_vloadrw_nt_ai:
case Hexagon::V6_vL32b_ai:
@@ -2941,10 +2945,7 @@ bool HexagonInstrInfo::getMemOperandWithOffset(
const TargetRegisterInfo *TRI) const {
unsigned AccessSize = 0;
BaseOp = getBaseAndOffset(LdSt, Offset, AccessSize);
- assert((!BaseOp || BaseOp->isReg()) &&
- "getMemOperandWithOffset only supports base "
- "operands of type register.");
- return BaseOp != nullptr;
+ return BaseOp != nullptr && BaseOp->isReg();
}
/// Can these instructions execute at the same time in a bundle.
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonInstrInfo.h b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
index 60298cd666bb..676f6f0a2a8c 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
@@ -173,7 +173,7 @@ public:
/// careful implementation when multiple copy instructions are required for
/// large registers. See for example the ARM target.
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
- const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
+ const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg,
bool KillSrc) const override;
/// Store the specified register of the given register class to the specified
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonIntrinsics.td b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonIntrinsics.td
index c5e3cfd080d6..8ae55b207188 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonIntrinsics.td
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonIntrinsics.td
@@ -353,9 +353,6 @@ def: Pat<(v64i16 (trunc v64i32:$Vdd)),
(v32i32 (V6_lo HvxWR:$Vdd))))>,
Requires<[UseHVX]>;
-def: Pat<(int_hexagon_S2_asr_i_vh DoubleRegs:$src1, IntRegs:$src2),
- (S2_asr_r_vh DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV55]>;
-
multiclass T_VI_pat <InstHexagon MI, Intrinsic IntID> {
def: Pat<(IntID HvxVR:$src1, u3_0ImmPred:$src2),
(MI HvxVR:$src1, HvxVR:$src1, u3_0ImmPred:$src2)>,
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
index bda3eccac0cd..ffaf71e23690 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
@@ -6,8 +6,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "hexagon-lir"
-
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SetVector.h"
@@ -25,7 +23,6 @@
#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
@@ -42,11 +39,13 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsHexagon.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
@@ -57,6 +56,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils.h"
+#include "llvm/Transforms/Utils/Local.h"
#include <algorithm>
#include <array>
#include <cassert>
@@ -70,6 +70,8 @@
#include <utility>
#include <vector>
+#define DEBUG_TYPE "hexagon-lir"
+
using namespace llvm;
static cl::opt<bool> DisableMemcpyIdiom("disable-memcpy-idiom",
@@ -2273,14 +2275,12 @@ CleanupAndExit:
: CondBuilder.CreateBitCast(LoadBasePtr, Int32PtrTy);
NewCall = CondBuilder.CreateCall(Fn, {Op0, Op1, NumWords});
} else {
- NewCall = CondBuilder.CreateMemMove(StoreBasePtr, SI->getAlignment(),
- LoadBasePtr, LI->getAlignment(),
- NumBytes);
+ NewCall = CondBuilder.CreateMemMove(
+ StoreBasePtr, SI->getAlign(), LoadBasePtr, LI->getAlign(), NumBytes);
}
} else {
- NewCall = Builder.CreateMemCpy(StoreBasePtr, SI->getAlignment(),
- LoadBasePtr, LI->getAlignment(),
- NumBytes);
+ NewCall = Builder.CreateMemCpy(StoreBasePtr, SI->getAlign(), LoadBasePtr,
+ LI->getAlign(), NumBytes);
// Okay, the memcpy has been formed. Zap the original store and
// anything that feeds into it.
RecursivelyDeleteTriviallyDeadInstructions(SI, TLI);
@@ -2335,7 +2335,7 @@ bool HexagonLoopIdiomRecognize::coverLoop(Loop *L,
continue;
if (!Worklist.count(&In) && In.mayHaveSideEffects())
return false;
- for (const auto &K : In.users()) {
+ for (auto K : In.users()) {
Instruction *UseI = dyn_cast<Instruction>(K);
if (!UseI)
continue;
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp
index 680d01e12af0..e3579dfa9ba9 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp
@@ -20,6 +20,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/InitializePasses.h"
#include "Hexagon.h"
#include "HexagonInstrInfo.h"
#include "HexagonRegisterInfo.h"
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp
index 9121115020a2..886034d9601a 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp
@@ -28,6 +28,7 @@
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonOptimizeSZextends.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonOptimizeSZextends.cpp
index d00fc23102a5..d818e0897f75 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonOptimizeSZextends.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonOptimizeSZextends.cpp
@@ -15,6 +15,7 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/IntrinsicsHexagon.h"
#include "llvm/Pass.h"
#include "llvm/Transforms/Scalar.h"
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonPatterns.td b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonPatterns.td
index 485e658e1c84..cf711058823c 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonPatterns.td
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonPatterns.td
@@ -1817,9 +1817,9 @@ def: Pat<(i1 (seteq (and I64:$Rs, IsPow2_64L:$u6), 0)),
def: Pat<(i1 (seteq (and I64:$Rs, IsPow2_64H:$u6), 0)),
(S4_ntstbit_i (HiReg $Rs), (UDEC32 (i32 (Log2_64 $u6))))>;
def: Pat<(i1 (setne (and I64:$Rs, IsPow2_64L:$u6), 0)),
- (S2_tstbit_i (LoReg $Rs), (Log2_32 imm:$u6))>;
+ (S2_tstbit_i (LoReg $Rs), (Log2_64 imm:$u6))>;
def: Pat<(i1 (setne (and I64:$Rs, IsPow2_64H:$u6), 0)),
- (S2_tstbit_i (HiReg $Rs), (UDEC32 (i32 (Log2_32 imm:$u6))))>;
+ (S2_tstbit_i (HiReg $Rs), (UDEC32 (i32 (Log2_64 imm:$u6))))>;
// Do not increase complexity of these patterns. In the DAG, "cmp i8" may be
// represented as a compare against "value & 0xFF", which is an exact match
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonPseudo.td b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonPseudo.td
index 7dd25d7d93d5..d2b6d64e3c92 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonPseudo.td
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonPseudo.td
@@ -408,15 +408,17 @@ let isCall = 1, Uses = [R29, R31], isAsmParserOnly = 1 in {
// Vector store pseudos
let Predicates = [HasV60,UseHVX], isPseudo = 1, isCodeGenOnly = 1,
mayStore = 1, accessSize = HVXVectorAccess, hasSideEffects = 0 in
-class STrivv_template<RegisterClass RC, InstHexagon rootInst>
+class STriv_template<RegisterClass RC, InstHexagon rootInst>
: InstHexagon<(outs), (ins IntRegs:$addr, s32_0Imm:$off, RC:$src),
"", [], "", rootInst.Itinerary, rootInst.Type>;
-def PS_vstorerw_ai: STrivv_template<HvxWR, V6_vS32b_ai>,
+def PS_vstorerv_ai: STriv_template<HvxVR, V6_vS32b_ai>,
Requires<[HasV60,UseHVX]>;
-def PS_vstorerw_nt_ai: STrivv_template<HvxWR, V6_vS32b_nt_ai>,
+def PS_vstorerv_nt_ai: STriv_template<HvxVR, V6_vS32b_nt_ai>,
Requires<[HasV60,UseHVX]>;
-def PS_vstorerwu_ai: STrivv_template<HvxWR, V6_vS32Ub_ai>,
+def PS_vstorerw_ai: STriv_template<HvxWR, V6_vS32b_ai>,
+ Requires<[HasV60,UseHVX]>;
+def PS_vstorerw_nt_ai: STriv_template<HvxWR, V6_vS32b_nt_ai>,
Requires<[HasV60,UseHVX]>;
let isPseudo = 1, isCodeGenOnly = 1, mayStore = 1, hasSideEffects = 0 in
@@ -427,15 +429,17 @@ def PS_vstorerq_ai: Pseudo<(outs),
// Vector load pseudos
let Predicates = [HasV60, UseHVX], isPseudo = 1, isCodeGenOnly = 1,
mayLoad = 1, accessSize = HVXVectorAccess, hasSideEffects = 0 in
-class LDrivv_template<RegisterClass RC, InstHexagon rootInst>
+class LDriv_template<RegisterClass RC, InstHexagon rootInst>
: InstHexagon<(outs RC:$dst), (ins IntRegs:$addr, s32_0Imm:$off),
"", [], "", rootInst.Itinerary, rootInst.Type>;
-def PS_vloadrw_ai: LDrivv_template<HvxWR, V6_vL32b_ai>,
+def PS_vloadrv_ai: LDriv_template<HvxVR, V6_vL32b_ai>,
+ Requires<[HasV60,UseHVX]>;
+def PS_vloadrv_nt_ai: LDriv_template<HvxVR, V6_vL32b_nt_ai>,
Requires<[HasV60,UseHVX]>;
-def PS_vloadrw_nt_ai: LDrivv_template<HvxWR, V6_vL32b_nt_ai>,
+def PS_vloadrw_ai: LDriv_template<HvxWR, V6_vL32b_ai>,
Requires<[HasV60,UseHVX]>;
-def PS_vloadrwu_ai: LDrivv_template<HvxWR, V6_vL32Ub_ai>,
+def PS_vloadrw_nt_ai: LDriv_template<HvxWR, V6_vL32b_nt_ai>,
Requires<[HasV60,UseHVX]>;
let isPseudo = 1, isCodeGenOnly = 1, mayLoad = 1, hasSideEffects = 0 in
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonRDFOpt.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonRDFOpt.cpp
index 910a17540e6e..517ad1c6ee7b 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonRDFOpt.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonRDFOpt.cpp
@@ -24,6 +24,7 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp
index b7171fb14272..d55aeaf10852 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp
@@ -73,6 +73,9 @@ HexagonRegisterInfo::getCallerSavedRegs(const MachineFunction *MF,
static const MCPhysReg VecDbl[] = {
W0, W1, W2, W3, W4, W5, W6, W7, W8, W9, W10, W11, W12, W13, W14, W15, 0
};
+ static const MCPhysReg VecPred[] = {
+ Q0, Q1, Q2, Q3, 0
+ };
switch (RC->getID()) {
case IntRegsRegClassID:
@@ -85,6 +88,8 @@ HexagonRegisterInfo::getCallerSavedRegs(const MachineFunction *MF,
return VecSgl;
case HvxWRRegClassID:
return VecDbl;
+ case HvxQRRegClassID:
+ return VecPred;
default:
break;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td
index f12189052699..c23b837bb62f 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td
@@ -119,7 +119,7 @@ let Namespace = "Hexagon" in {
def P2 : Rp<2, "p2">, DwarfRegNum<[65]>;
def P3 : Rp<3, "p3">, DwarfRegNum<[66]>;
- // Fake register to represent USR.OVF bit. Artihmetic/saturating instruc-
+ // Fake register to represent USR.OVF bit. Arithmetic/saturating instruc-
// tions modify this bit, and multiple such instructions are allowed in the
// same packet. We need to ignore output dependencies on this bit, but not
// on the entire USR.
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonSplitDouble.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonSplitDouble.cpp
index 55f31c628854..d80e0ed50c93 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonSplitDouble.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonSplitDouble.cpp
@@ -159,7 +159,7 @@ bool HexagonSplitDoubleRegs::isVolatileInstr(const MachineInstr *MI) const {
}
bool HexagonSplitDoubleRegs::isFixedInstr(const MachineInstr *MI) const {
- if (MI->mayLoad() || MI->mayStore())
+ if (MI->mayLoadOrStore())
if (MemRefsFixed || isVolatileInstr(MI))
return true;
if (MI->isDebugInstr())
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonStoreWidening.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonStoreWidening.cpp
index 27fefa5f5e2b..aab37393ed36 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonStoreWidening.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonStoreWidening.cpp
@@ -20,8 +20,6 @@
// per packet, it also means fewer packets, and ultimately fewer cycles.
//===---------------------------------------------------------------------===//
-#define DEBUG_TYPE "hexagon-widen-stores"
-
#include "HexagonInstrInfo.h"
#include "HexagonRegisterInfo.h"
#include "HexagonSubtarget.h"
@@ -37,6 +35,7 @@
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/IR/DebugLoc.h"
+#include "llvm/InitializePasses.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
@@ -49,6 +48,8 @@
#include <iterator>
#include <vector>
+#define DEBUG_TYPE "hexagon-widen-stores"
+
using namespace llvm;
namespace llvm {
@@ -270,7 +271,7 @@ void HexagonStoreWidening::createStoreGroup(MachineInstr *BaseStore,
if (MI->isCall() || MI->hasUnmodeledSideEffects())
return;
- if (MI->mayLoad() || MI->mayStore()) {
+ if (MI->mayLoadOrStore()) {
if (MI->hasOrderedMemoryRef() || instrAliased(Group, MI))
return;
Other.push_back(MI);
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
index d709a82be660..9e9ce209a825 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -180,7 +180,7 @@ static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) {
return *RM;
}
-extern "C" void LLVMInitializeHexagonTarget() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeHexagonTarget() {
// Register the target.
RegisterTargetMachine<HexagonTargetMachine> X(getTheHexagonTarget());
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
index ddbc5543348d..4d4627cd2071 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
@@ -152,7 +152,9 @@ unsigned HexagonTTIImpl::getAddressComputationCost(Type *Tp,
}
unsigned HexagonTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
- unsigned Alignment, unsigned AddressSpace, const Instruction *I) {
+ MaybeAlign Alignment,
+ unsigned AddressSpace,
+ const Instruction *I) {
assert(Opcode == Instruction::Load || Opcode == Instruction::Store);
if (Opcode == Instruction::Store)
return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I);
@@ -166,24 +168,30 @@ unsigned HexagonTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
// Cost of HVX loads.
if (VecWidth % RegWidth == 0)
return VecWidth / RegWidth;
- // Cost of constructing HVX vector from scalar loads.
- Alignment = std::min(Alignment, RegWidth / 8);
- unsigned AlignWidth = 8 * std::max(1u, Alignment);
+ // Cost of constructing HVX vector from scalar loads
+ const Align RegAlign(RegWidth / 8);
+ if (!Alignment || *Alignment > RegAlign)
+ Alignment = RegAlign;
+ assert(Alignment);
+ unsigned AlignWidth = 8 * Alignment->value();
unsigned NumLoads = alignTo(VecWidth, AlignWidth) / AlignWidth;
return 3 * NumLoads;
}
// Non-HVX vectors.
// Add extra cost for floating point types.
- unsigned Cost = VecTy->getElementType()->isFloatingPointTy() ? FloatFactor
- : 1;
- Alignment = std::min(Alignment, 8u);
- unsigned AlignWidth = 8 * std::max(1u, Alignment);
+ unsigned Cost =
+ VecTy->getElementType()->isFloatingPointTy() ? FloatFactor : 1;
+
+ // At this point unspecified alignment is considered as Align::None().
+ const Align BoundAlignment = std::min(Alignment.valueOrOne(), Align(8));
+ unsigned AlignWidth = 8 * BoundAlignment.value();
unsigned NumLoads = alignTo(VecWidth, AlignWidth) / AlignWidth;
- if (Alignment == 4 || Alignment == 8)
+ if (Alignment == Align(4) || Alignment == Align(8))
return Cost * NumLoads;
// Loads of less than 32 bits will need extra inserts to compose a vector.
- unsigned LogA = Log2_32(Alignment);
+ assert(BoundAlignment <= Align(8));
+ unsigned LogA = Log2(BoundAlignment);
return (3 - LogA) * Cost * NumLoads;
}
@@ -214,7 +222,8 @@ unsigned HexagonTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode,
return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
Alignment, AddressSpace,
UseMaskForCond, UseMaskForGaps);
- return getMemoryOpCost(Opcode, VecTy, Alignment, AddressSpace, nullptr);
+ return getMemoryOpCost(Opcode, VecTy, MaybeAlign(Alignment), AddressSpace,
+ nullptr);
}
unsigned HexagonTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
@@ -227,17 +236,18 @@ unsigned HexagonTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
}
-unsigned HexagonTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
- TTI::OperandValueKind Opd1Info, TTI::OperandValueKind Opd2Info,
- TTI::OperandValueProperties Opd1PropInfo,
- TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value*> Args) {
+unsigned HexagonTTIImpl::getArithmeticInstrCost(
+ unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info,
+ TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
+ TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args,
+ const Instruction *CxtI) {
if (Ty->isVectorTy()) {
std::pair<int, MVT> LT = TLI.getTypeLegalizationCost(DL, Ty);
if (LT.second.isFloatingPoint())
return LT.first + FloatFactor * getTypeNumElements(Ty);
}
return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
- Opd1PropInfo, Opd2PropInfo, Args);
+ Opd1PropInfo, Opd2PropInfo, Args, CxtI);
}
unsigned HexagonTTIImpl::getCastInstrCost(unsigned Opcode, Type *DstTy,
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
index 12ede503af83..ace0d797bbdb 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
@@ -112,8 +112,9 @@ public:
unsigned ScalarizationCostPassed = UINT_MAX);
unsigned getAddressComputationCost(Type *Tp, ScalarEvolution *SE,
const SCEV *S);
- unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
- unsigned AddressSpace, const Instruction *I = nullptr);
+ unsigned getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
+ unsigned AddressSpace,
+ const Instruction *I = nullptr);
unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
unsigned AddressSpace);
unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
@@ -126,12 +127,14 @@ public:
bool UseMaskForGaps = false);
unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
const Instruction *I);
- unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
- TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
- TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
- TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
- TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
- ArrayRef<const Value *> Args = ArrayRef<const Value *>());
+ unsigned getArithmeticInstrCost(
+ unsigned Opcode, Type *Ty,
+ TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
+ TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
+ TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
+ TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
+ ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
+ const Instruction *CxtI = nullptr);
unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
const Instruction *I = nullptr);
unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonVExtract.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonVExtract.cpp
index 0c0266a6839a..b7d6dbe21c74 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonVExtract.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonVExtract.cpp
@@ -11,6 +11,7 @@
#include "Hexagon.h"
#include "HexagonInstrInfo.h"
+#include "HexagonMachineFunctionInfo.h"
#include "HexagonRegisterInfo.h"
#include "HexagonSubtarget.h"
#include "llvm/ADT/SmallVector.h"
@@ -103,7 +104,10 @@ bool HexagonVExtract::runOnMachineFunction(MachineFunction &MF) {
const auto &HRI = *HST->getRegisterInfo();
MachineRegisterInfo &MRI = MF.getRegInfo();
MachineFrameInfo &MFI = MF.getFrameInfo();
+ Register AR =
+ MF.getInfo<HexagonMachineFunctionInfo>()->getStackAlignBaseVReg();
std::map<unsigned, SmallVector<MachineInstr*,4>> VExtractMap;
+ unsigned MaxAlign = 0;
bool Changed = false;
for (MachineBasicBlock &MBB : MF) {
@@ -116,22 +120,41 @@ bool HexagonVExtract::runOnMachineFunction(MachineFunction &MF) {
}
}
+ auto EmitAddr = [&] (MachineBasicBlock &BB, MachineBasicBlock::iterator At,
+ DebugLoc dl, int FI, unsigned Offset) {
+ Register AddrR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
+ unsigned FiOpc = AR != 0 ? Hexagon::PS_fia : Hexagon::PS_fi;
+ auto MIB = BuildMI(BB, At, dl, HII->get(FiOpc), AddrR);
+ if (AR)
+ MIB.addReg(AR);
+ MIB.addFrameIndex(FI).addImm(Offset);
+ return AddrR;
+ };
+
for (auto &P : VExtractMap) {
unsigned VecR = P.first;
if (P.second.size() <= VExtractThreshold)
continue;
const auto &VecRC = *MRI.getRegClass(VecR);
- int FI = MFI.CreateSpillStackObject(HRI.getSpillSize(VecRC),
- HRI.getSpillAlignment(VecRC));
+ unsigned Align = HRI.getSpillAlignment(VecRC);
+ MaxAlign = std::max(MaxAlign, Align);
+ // Make sure this is not a spill slot: spill slots cannot be aligned
+ // if there are variable-sized objects on the stack. They must be
+ // accessible via FP (which is not aligned), because SP is unknown,
+ // and AP may not be available at the location of the load/store.
+ int FI = MFI.CreateStackObject(HRI.getSpillSize(VecRC), Align,
+ /*isSpillSlot*/false);
+
MachineInstr *DefI = MRI.getVRegDef(VecR);
MachineBasicBlock::iterator At = std::next(DefI->getIterator());
MachineBasicBlock &DefB = *DefI->getParent();
unsigned StoreOpc = VecRC.getID() == Hexagon::HvxVRRegClassID
? Hexagon::V6_vS32b_ai
: Hexagon::PS_vstorerw_ai;
+ Register AddrR = EmitAddr(DefB, At, DefI->getDebugLoc(), FI, 0);
BuildMI(DefB, At, DefI->getDebugLoc(), HII->get(StoreOpc))
- .addFrameIndex(FI)
+ .addReg(AddrR)
.addImm(0)
.addReg(VecR);
@@ -144,10 +167,8 @@ bool HexagonVExtract::runOnMachineFunction(MachineFunction &MF) {
MachineBasicBlock &ExtB = *ExtI->getParent();
DebugLoc DL = ExtI->getDebugLoc();
- Register BaseR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
- BuildMI(ExtB, ExtI, DL, HII->get(Hexagon::PS_fi), BaseR)
- .addFrameIndex(FI)
- .addImm(SR == 0 ? 0 : VecSize/2);
+ Register BaseR = EmitAddr(ExtB, ExtI, ExtI->getDebugLoc(), FI,
+ SR == 0 ? 0 : VecSize/2);
unsigned ElemR = genElemLoad(ExtI, BaseR, MRI);
Register ExtR = ExtI->getOperand(0).getReg();
@@ -157,6 +178,15 @@ bool HexagonVExtract::runOnMachineFunction(MachineFunction &MF) {
}
}
+ if (AR) {
+ // Update the required stack alignment.
+ MachineInstr *AlignaI = MRI.getVRegDef(AR);
+ assert(AlignaI->getOpcode() == Hexagon::PS_aligna);
+ MachineOperand &Op = AlignaI->getOperand(1);
+ if (MaxAlign > Op.getImm())
+ Op.setImm(MaxAlign);
+ }
+
return Changed;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
index fab5edefb553..36d71c41da54 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
@@ -40,6 +40,7 @@
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/DebugLoc.h"
+#include "llvm/InitializePasses.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
@@ -111,9 +112,9 @@ namespace {
}
private:
- const HexagonInstrInfo *HII;
- const HexagonRegisterInfo *HRI;
- const bool Minimal;
+ const HexagonInstrInfo *HII = nullptr;
+ const HexagonRegisterInfo *HRI = nullptr;
+ const bool Minimal = false;
};
} // end anonymous namespace
@@ -308,7 +309,7 @@ bool HexagonPacketizerList::isCallDependent(const MachineInstr &MI,
// r0 = ...
// J2_jumpr r0
if (DepType == SDep::Data) {
- for (const MachineOperand MO : MI.operands())
+ for (const MachineOperand &MO : MI.operands())
if (MO.isReg() && MO.getReg() == DepReg && !MO.isImplicit())
return true;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp
index e5df1d456c1e..42451e02ba36 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp
@@ -127,9 +127,11 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsHexagon.h"
#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
@@ -235,7 +237,7 @@ namespace {
Instruction *Inst2Replace = nullptr;
// In the new PHI node that we'll construct this is the value that'll be
- // used over the backedge. This is teh value that gets reused from a
+ // used over the backedge. This is the value that gets reused from a
// previous iteration.
Instruction *BackedgeInst = nullptr;
std::map<Instruction *, DepChain *> DepChains;
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
index 75cb398d4097..8f1e5c1c3a97 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
@@ -60,7 +60,7 @@ class HexagonAsmBackend : public MCAsmBackend {
public:
HexagonAsmBackend(const Target &T, const Triple &TT, uint8_t OSABI,
StringRef CPU)
- : MCAsmBackend(support::little), OSABI(OSABI), CPU(CPU),
+ : MCAsmBackend(support::little), OSABI(OSABI), CPU(CPU), relaxedCnt(0),
MCII(T.createMCInstrInfo()), RelaxTarget(new MCInst *),
Extender(nullptr) {}
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp
index 6b9e63f5ac9e..698dcbd4b8b1 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp
@@ -30,8 +30,9 @@ void HexagonInstPrinter::printRegName(raw_ostream &O, unsigned RegNo) const {
O << getRegisterName(RegNo);
}
-void HexagonInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
- StringRef Annot, const MCSubtargetInfo &STI) {
+void HexagonInstPrinter::printInst(const MCInst *MI, uint64_t Address,
+ StringRef Annot, const MCSubtargetInfo &STI,
+ raw_ostream &OS) {
assert(HexagonMCInstrInfo::isBundle(*MI));
assert(HexagonMCInstrInfo::bundleSize(*MI) <= HEXAGON_PACKET_SIZE);
assert(HexagonMCInstrInfo::bundleSize(*MI) > 0);
@@ -39,12 +40,12 @@ void HexagonInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
for (auto const &I : HexagonMCInstrInfo::bundleInstructions(*MI)) {
MCInst const &MCI = *I.getInst();
if (HexagonMCInstrInfo::isDuplex(MII, MCI)) {
- printInstruction(MCI.getOperand(1).getInst(), OS);
+ printInstruction(MCI.getOperand(1).getInst(), Address, OS);
OS << '\v';
HasExtender = false;
- printInstruction(MCI.getOperand(0).getInst(), OS);
+ printInstruction(MCI.getOperand(0).getInst(), Address, OS);
} else
- printInstruction(&MCI, OS);
+ printInstruction(&MCI, Address, OS);
HasExtender = HexagonMCInstrInfo::isImmext(MCI);
OS << "\n";
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h b/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h
index ca32c3c1f50f..cd96a23e1b94 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h
@@ -28,13 +28,13 @@ public:
MCRegisterInfo const &MRI)
: MCInstPrinter(MAI, MII, MRI), MII(MII) {}
- void printInst(MCInst const *MI, raw_ostream &O, StringRef Annot,
- const MCSubtargetInfo &STI) override;
+ void printInst(MCInst const *MI, uint64_t Address, StringRef Annot,
+ const MCSubtargetInfo &STI, raw_ostream &O) override;
void printRegName(raw_ostream &O, unsigned RegNo) const override;
static char const *getRegisterName(unsigned RegNo);
- void printInstruction(MCInst const *MI, raw_ostream &O);
+ void printInstruction(const MCInst *MI, uint64_t Address, raw_ostream &O);
void printOperand(MCInst const *MI, unsigned OpNo, raw_ostream &O) const;
void printBrtarget(MCInst const *MI, unsigned OpNo, raw_ostream &O) const;
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp
index ed571188c1e8..2b0bbdafa381 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp
@@ -92,7 +92,7 @@ static unsigned getCompoundCandidateGroup(MCInst const &MI, bool IsExtended) {
case Hexagon::C2_cmpgt:
case Hexagon::C2_cmpgtu:
if (IsExtended)
- return false;
+ return HexagonII::HCG_None;
DstReg = MI.getOperand(0).getReg();
Src1Reg = MI.getOperand(1).getReg();
Src2Reg = MI.getOperand(2).getReg();
@@ -105,7 +105,7 @@ static unsigned getCompoundCandidateGroup(MCInst const &MI, bool IsExtended) {
case Hexagon::C2_cmpgti:
case Hexagon::C2_cmpgtui:
if (IsExtended)
- return false;
+ return HexagonII::HCG_None;
// P0 = cmp.eq(Rs,#u2)
DstReg = MI.getOperand(0).getReg();
SrcReg = MI.getOperand(1).getReg();
@@ -117,7 +117,7 @@ static unsigned getCompoundCandidateGroup(MCInst const &MI, bool IsExtended) {
break;
case Hexagon::A2_tfr:
if (IsExtended)
- return false;
+ return HexagonII::HCG_None;
// Rd = Rs
DstReg = MI.getOperand(0).getReg();
SrcReg = MI.getOperand(1).getReg();
@@ -127,7 +127,7 @@ static unsigned getCompoundCandidateGroup(MCInst const &MI, bool IsExtended) {
break;
case Hexagon::A2_tfrsi:
if (IsExtended)
- return false;
+ return HexagonII::HCG_None;
// Rd = #u6
DstReg = MI.getOperand(0).getReg();
if (HexagonMCInstrInfo::minConstant(MI, 1) <= 63 &&
@@ -137,7 +137,7 @@ static unsigned getCompoundCandidateGroup(MCInst const &MI, bool IsExtended) {
break;
case Hexagon::S2_tstbit_i:
if (IsExtended)
- return false;
+ return HexagonII::HCG_None;
DstReg = MI.getOperand(0).getReg();
Src1Reg = MI.getOperand(1).getReg();
if ((Hexagon::P0 == DstReg || Hexagon::P1 == DstReg) &&
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
index 870ab9e94a63..f8dc0547baad 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
@@ -137,14 +137,15 @@ public:
MCInstPrinter &IP)
: HexagonTargetStreamer(S) {}
- void prettyPrintAsm(MCInstPrinter &InstPrinter, raw_ostream &OS,
- const MCInst &Inst, const MCSubtargetInfo &STI) override {
+ void prettyPrintAsm(MCInstPrinter &InstPrinter, uint64_t Address,
+ const MCInst &Inst, const MCSubtargetInfo &STI,
+ raw_ostream &OS) override {
assert(HexagonMCInstrInfo::isBundle(Inst));
assert(HexagonMCInstrInfo::bundleSize(Inst) <= HEXAGON_PACKET_SIZE);
std::string Buffer;
{
raw_string_ostream TempStream(Buffer);
- InstPrinter.printInst(&Inst, TempStream, "", STI);
+ InstPrinter.printInst(&Inst, Address, "", STI, TempStream);
}
StringRef Contents(Buffer);
auto PacketBundle = Contents.rsplit('\n');
@@ -219,7 +220,8 @@ static MCRegisterInfo *createHexagonMCRegisterInfo(const Triple &TT) {
}
static MCAsmInfo *createHexagonMCAsmInfo(const MCRegisterInfo &MRI,
- const Triple &TT) {
+ const Triple &TT,
+ const MCTargetOptions &Options) {
MCAsmInfo *MAI = new HexagonMCAsmInfo(TT);
// VirtualFP = (R30 + #0).
@@ -454,7 +456,7 @@ static MCInstrAnalysis *createHexagonMCInstrAnalysis(const MCInstrInfo *Info) {
}
// Force static initialization.
-extern "C" void LLVMInitializeHexagonTargetMC() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeHexagonTargetMC() {
// Register the MC asm info.
RegisterMCAsmInfoFn X(getTheHexagonTarget(), createHexagonMCAsmInfo);
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/RDFLiveness.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/RDFLiveness.cpp
index 7d7b89462ff9..e2c007c9d01a 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/RDFLiveness.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/RDFLiveness.cpp
@@ -620,7 +620,7 @@ void Liveness::computePhiInfo() {
for (NodeAddr<UseNode*> UA : PUs) {
std::map<NodeId,RegisterAggr> &PUM = PhiUp[UA.Id];
RegisterRef UR = PRI.normalize(UA.Addr->getRegRef(DFG));
- for (const std::pair<NodeId,RegisterAggr> &P : PUM) {
+ for (const std::pair<const NodeId, RegisterAggr> &P : PUM) {
bool Changed = false;
const RegisterAggr &MidDefs = P.second;
@@ -636,7 +636,7 @@ void Liveness::computePhiInfo() {
// if MidDefs does not cover (R,U)
// then add (R-MidDefs,U) to RealUseMap[P]
//
- for (const std::pair<RegisterId,NodeRefSet> &T : RUM) {
+ for (const std::pair<const RegisterId, NodeRefSet> &T : RUM) {
RegisterRef R(T.first);
// The current phi (PA) could be a phi for a regmask. It could
// reach a whole variety of uses that are not related to the
@@ -768,7 +768,7 @@ void Liveness::computeLiveIns() {
auto PrA = DFG.addr<BlockNode*>(PUA.Addr->getPredecessor());
RefMap &LOX = PhiLOX[PrA.Addr->getCode()];
- for (const std::pair<RegisterId,NodeRefSet> &RS : RUs) {
+ for (const std::pair<const RegisterId, NodeRefSet> &RS : RUs) {
// We need to visit each individual use.
for (std::pair<NodeId,LaneBitmask> P : RS.second) {
// Create a register ref corresponding to the use, and find
@@ -991,7 +991,7 @@ void Liveness::traverse(MachineBasicBlock *B, RefMap &LiveIn) {
RefMap LiveInCopy = LiveIn;
LiveIn.clear();
- for (const std::pair<RegisterId,NodeRefSet> &LE : LiveInCopy) {
+ for (const std::pair<const RegisterId, NodeRefSet> &LE : LiveInCopy) {
RegisterRef LRef(LE.first);
NodeRefSet &NewDefs = LiveIn[LRef.Reg]; // To be filled.
const NodeRefSet &OldDefs = LE.second;
@@ -1105,7 +1105,7 @@ void Liveness::traverse(MachineBasicBlock *B, RefMap &LiveIn) {
for (auto C : IIDF[B]) {
RegisterAggr &LiveC = LiveMap[C];
- for (const std::pair<RegisterId,NodeRefSet> &S : LiveIn)
+ for (const std::pair<const RegisterId, NodeRefSet> &S : LiveIn)
for (auto R : S.second)
if (MDT.properlyDominates(getBlockWithRef(R.first), C))
LiveC.insert(RegisterRef(S.first, R.second));
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp
index d77b235d0077..48770be3e301 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp
@@ -15,7 +15,7 @@ Target &llvm::getTheHexagonTarget() {
return TheHexagonTarget;
}
-extern "C" void LLVMInitializeHexagonTargetInfo() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeHexagonTargetInfo() {
RegisterTarget<Triple::hexagon, /*HasJIT=*/true> X(
getTheHexagonTarget(), "hexagon", "Hexagon", "Hexagon");
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp
index ec82e3a41f2a..8b8504978c75 100644
--- a/contrib/llvm-project/llvm/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp
@@ -1224,6 +1224,6 @@ bool LanaiAsmParser::ParseInstruction(ParseInstructionInfo & /*Info*/,
#define GET_MATCHER_IMPLEMENTATION
#include "LanaiGenAsmMatcher.inc"
-extern "C" void LLVMInitializeLanaiAsmParser() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeLanaiAsmParser() {
RegisterMCAsmParser<LanaiAsmParser> x(getTheLanaiTarget());
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Lanai/Disassembler/LanaiDisassembler.cpp b/contrib/llvm-project/llvm/lib/Target/Lanai/Disassembler/LanaiDisassembler.cpp
index 25ae7c521706..b6f372657d59 100644
--- a/contrib/llvm-project/llvm/lib/Target/Lanai/Disassembler/LanaiDisassembler.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Lanai/Disassembler/LanaiDisassembler.cpp
@@ -36,7 +36,7 @@ static MCDisassembler *createLanaiDisassembler(const Target & /*T*/,
return new LanaiDisassembler(STI, Ctx);
}
-extern "C" void LLVMInitializeLanaiDisassembler() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeLanaiDisassembler() {
// Register the disassembler
TargetRegistry::RegisterMCDisassembler(getTheLanaiTarget(),
createLanaiDisassembler);
@@ -128,9 +128,10 @@ static void PostOperandDecodeAdjust(MCInst &Instr, uint32_t Insn) {
}
}
-DecodeStatus LanaiDisassembler::getInstruction(
- MCInst &Instr, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t Address,
- raw_ostream & /*VStream*/, raw_ostream & /*CStream*/) const {
+DecodeStatus
+LanaiDisassembler::getInstruction(MCInst &Instr, uint64_t &Size,
+ ArrayRef<uint8_t> Bytes, uint64_t Address,
+ raw_ostream & /*CStream*/) const {
uint32_t Insn;
DecodeStatus Result = readInstruction32(Bytes, Size, Insn);
diff --git a/contrib/llvm-project/llvm/lib/Target/Lanai/Disassembler/LanaiDisassembler.h b/contrib/llvm-project/llvm/lib/Target/Lanai/Disassembler/LanaiDisassembler.h
index ae821df303d8..6ca9b1df86c6 100644
--- a/contrib/llvm-project/llvm/lib/Target/Lanai/Disassembler/LanaiDisassembler.h
+++ b/contrib/llvm-project/llvm/lib/Target/Lanai/Disassembler/LanaiDisassembler.h
@@ -28,8 +28,7 @@ public:
// getInstruction - See MCDisassembler.
MCDisassembler::DecodeStatus
getInstruction(MCInst &Instr, uint64_t &Size, ArrayRef<uint8_t> Bytes,
- uint64_t Address, raw_ostream &VStream,
- raw_ostream &CStream) const override;
+ uint64_t Address, raw_ostream &CStream) const override;
};
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiAsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiAsmPrinter.cpp
index 12a3202446a8..c13ee08e1213 100644
--- a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiAsmPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiAsmPrinter.cpp
@@ -237,6 +237,6 @@ bool LanaiAsmPrinter::isBlockOnlyReachableByFallthrough(
}
// Force static initialization.
-extern "C" void LLVMInitializeLanaiAsmPrinter() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeLanaiAsmPrinter() {
RegisterAsmPrinter<LanaiAsmPrinter> X(getTheLanaiTarget());
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiISelLowering.cpp
index 43933d062a7e..6fa0c93d4a05 100644
--- a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiISelLowering.cpp
@@ -213,7 +213,7 @@ SDValue LanaiTargetLowering::LowerOperation(SDValue Op,
//===----------------------------------------------------------------------===//
Register LanaiTargetLowering::getRegisterByName(
- const char *RegName, EVT /*VT*/,
+ const char *RegName, LLT /*VT*/,
const MachineFunction & /*MF*/) const {
// Only unallocatable registers should be matched here.
Register Reg = StringSwitch<unsigned>(RegName)
diff --git a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiISelLowering.h b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiISelLowering.h
index 4c35a2c6fb8e..d29d69eaadb0 100644
--- a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiISelLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiISelLowering.h
@@ -90,7 +90,7 @@ public:
SDValue LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
- Register getRegisterByName(const char *RegName, EVT VT,
+ Register getRegisterByName(const char *RegName, LLT VT,
const MachineFunction &MF) const override;
std::pair<unsigned, const TargetRegisterClass *>
getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
diff --git a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp
index b950fd0424ef..4ce72f9621ad 100644
--- a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp
@@ -34,8 +34,8 @@ LanaiInstrInfo::LanaiInstrInfo()
void LanaiInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator Position,
const DebugLoc &DL,
- unsigned DestinationRegister,
- unsigned SourceRegister,
+ MCRegister DestinationRegister,
+ MCRegister SourceRegister,
bool KillSource) const {
if (!Lanai::GPRRegClass.contains(DestinationRegister, SourceRegister)) {
llvm_unreachable("Impossible reg-to-reg copy");
@@ -788,8 +788,10 @@ bool LanaiInstrInfo::getMemOperandWithOffsetWidth(
BaseOp = &LdSt.getOperand(1);
Offset = LdSt.getOperand(2).getImm();
- assert(BaseOp->isReg() && "getMemOperandWithOffset only supports base "
- "operands of type register.");
+
+ if (!BaseOp->isReg())
+ return false;
+
return true;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiInstrInfo.h b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiInstrInfo.h
index 59a04d2cc388..c7741dd7437f 100644
--- a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiInstrInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiInstrInfo.h
@@ -48,8 +48,8 @@ public:
int &FrameIndex) const override;
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator Position,
- const DebugLoc &DL, unsigned DestinationRegister,
- unsigned SourceRegister, bool KillSource) const override;
+ const DebugLoc &DL, MCRegister DestinationRegister,
+ MCRegister SourceRegister, bool KillSource) const override;
void
storeRegToStackSlot(MachineBasicBlock &MBB,
diff --git a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiTargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiTargetMachine.cpp
index 8ae0225629ab..69387119f1f4 100644
--- a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiTargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiTargetMachine.cpp
@@ -30,7 +30,7 @@ namespace llvm {
void initializeLanaiMemAluCombinerPass(PassRegistry &);
} // namespace llvm
-extern "C" void LLVMInitializeLanaiTarget() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeLanaiTarget() {
// Register the target.
RegisterTargetMachine<LanaiTargetMachine> registered_target(
getTheLanaiTarget());
diff --git a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiTargetTransformInfo.h b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiTargetTransformInfo.h
index 63cc47dedce3..a22d3a34f98c 100644
--- a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiTargetTransformInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiTargetTransformInfo.h
@@ -66,12 +66,12 @@ public:
return 4 * TTI::TCC_Basic;
}
- int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty) {
+ int getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty) {
return getIntImmCost(Imm, Ty);
}
- int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
- Type *Ty) {
+ int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
+ Type *Ty) {
return getIntImmCost(Imm, Ty);
}
@@ -81,7 +81,8 @@ public:
TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
- ArrayRef<const Value *> Args = ArrayRef<const Value *>()) {
+ ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
+ const Instruction *CxtI = nullptr) {
int ISD = TLI->InstructionOpcodeToISD(Opcode);
switch (ISD) {
diff --git a/contrib/llvm-project/llvm/lib/Target/Lanai/MCTargetDesc/LanaiInstPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/Lanai/MCTargetDesc/LanaiInstPrinter.cpp
index 0d42612824b4..ccc413995917 100644
--- a/contrib/llvm-project/llvm/lib/Target/Lanai/MCTargetDesc/LanaiInstPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Lanai/MCTargetDesc/LanaiInstPrinter.cpp
@@ -137,11 +137,12 @@ bool LanaiInstPrinter::printAlias(const MCInst *MI, raw_ostream &OS) {
}
}
-void LanaiInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
+void LanaiInstPrinter::printInst(const MCInst *MI, uint64_t Address,
StringRef Annotation,
- const MCSubtargetInfo & /*STI*/) {
+ const MCSubtargetInfo & /*STI*/,
+ raw_ostream &OS) {
if (!printAlias(MI, OS) && !printAliasInstr(MI, OS))
- printInstruction(MI, OS);
+ printInstruction(MI, Address, OS);
printAnnotation(OS, Annotation);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Lanai/MCTargetDesc/LanaiInstPrinter.h b/contrib/llvm-project/llvm/lib/Target/Lanai/MCTargetDesc/LanaiInstPrinter.h
index 721a129a859e..a71a9497c691 100644
--- a/contrib/llvm-project/llvm/lib/Target/Lanai/MCTargetDesc/LanaiInstPrinter.h
+++ b/contrib/llvm-project/llvm/lib/Target/Lanai/MCTargetDesc/LanaiInstPrinter.h
@@ -24,8 +24,8 @@ public:
const MCRegisterInfo &MRI)
: MCInstPrinter(MAI, MII, MRI) {}
- void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
- const MCSubtargetInfo &STI) override;
+ void printInst(const MCInst *MI, uint64_t Address, StringRef Annot,
+ const MCSubtargetInfo &STI, raw_ostream &O) override;
void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O,
const char *Modifier = nullptr);
void printPredicateOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
@@ -43,7 +43,7 @@ public:
void printMemImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
// Autogenerated by tblgen.
- void printInstruction(const MCInst *MI, raw_ostream &O);
+ void printInstruction(const MCInst *MI, uint64_t Address, raw_ostream &O);
bool printAliasInstr(const MCInst *MI, raw_ostream &OS);
void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx,
unsigned PrintMethodIdx, raw_ostream &O);
diff --git a/contrib/llvm-project/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCAsmInfo.cpp b/contrib/llvm-project/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCAsmInfo.cpp
index 14d3dac26d1f..f1c174897047 100644
--- a/contrib/llvm-project/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCAsmInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCAsmInfo.cpp
@@ -18,7 +18,8 @@ using namespace llvm;
void LanaiMCAsmInfo::anchor() {}
-LanaiMCAsmInfo::LanaiMCAsmInfo(const Triple & /*TheTriple*/) {
+LanaiMCAsmInfo::LanaiMCAsmInfo(const Triple & /*TheTriple*/,
+ const MCTargetOptions &Options) {
IsLittleEndian = false;
PrivateGlobalPrefix = ".L";
WeakRefDirective = "\t.weak\t";
diff --git a/contrib/llvm-project/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCAsmInfo.h b/contrib/llvm-project/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCAsmInfo.h
index 265af425d037..f0352d021291 100644
--- a/contrib/llvm-project/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCAsmInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCAsmInfo.h
@@ -22,7 +22,8 @@ class LanaiMCAsmInfo : public MCAsmInfoELF {
void anchor() override;
public:
- explicit LanaiMCAsmInfo(const Triple &TheTriple);
+ explicit LanaiMCAsmInfo(const Triple &TheTriple,
+ const MCTargetOptions &Options);
};
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp b/contrib/llvm-project/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp
index a9de0416fcac..9de15bf61c8c 100644
--- a/contrib/llvm-project/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp
@@ -123,7 +123,7 @@ static MCInstrAnalysis *createLanaiInstrAnalysis(const MCInstrInfo *Info) {
return new LanaiMCInstrAnalysis(Info);
}
-extern "C" void LLVMInitializeLanaiTargetMC() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeLanaiTargetMC() {
// Register the MC asm info.
RegisterMCAsmInfo<LanaiMCAsmInfo> X(getTheLanaiTarget());
diff --git a/contrib/llvm-project/llvm/lib/Target/Lanai/TargetInfo/LanaiTargetInfo.cpp b/contrib/llvm-project/llvm/lib/Target/Lanai/TargetInfo/LanaiTargetInfo.cpp
index 93deb891dec5..2bb9f6ed1e97 100644
--- a/contrib/llvm-project/llvm/lib/Target/Lanai/TargetInfo/LanaiTargetInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Lanai/TargetInfo/LanaiTargetInfo.cpp
@@ -16,7 +16,7 @@ Target &llvm::getTheLanaiTarget() {
return TheLanaiTarget;
}
-extern "C" void LLVMInitializeLanaiTargetInfo() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeLanaiTargetInfo() {
RegisterTarget<Triple::lanai> X(getTheLanaiTarget(), "lanai", "Lanai",
"Lanai");
}
diff --git a/contrib/llvm-project/llvm/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp
index 85dcc0f152f9..0995e80a0a09 100644
--- a/contrib/llvm-project/llvm/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp
@@ -529,7 +529,7 @@ bool MSP430AsmParser::ParseLiteralValues(unsigned Size, SMLoc L) {
return (parseMany(parseOne));
}
-extern "C" void LLVMInitializeMSP430AsmParser() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeMSP430AsmParser() {
RegisterMCAsmParser<MSP430AsmParser> X(getTheMSP430Target());
}
diff --git a/contrib/llvm-project/llvm/lib/Target/MSP430/Disassembler/MSP430Disassembler.cpp b/contrib/llvm-project/llvm/lib/Target/MSP430/Disassembler/MSP430Disassembler.cpp
index 59c12e24e8bf..6aa76156bf14 100644
--- a/contrib/llvm-project/llvm/lib/Target/MSP430/Disassembler/MSP430Disassembler.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/MSP430/Disassembler/MSP430Disassembler.cpp
@@ -32,17 +32,14 @@ namespace {
class MSP430Disassembler : public MCDisassembler {
DecodeStatus getInstructionI(MCInst &MI, uint64_t &Size,
ArrayRef<uint8_t> Bytes, uint64_t Address,
- raw_ostream &VStream,
raw_ostream &CStream) const;
DecodeStatus getInstructionII(MCInst &MI, uint64_t &Size,
ArrayRef<uint8_t> Bytes, uint64_t Address,
- raw_ostream &VStream,
raw_ostream &CStream) const;
DecodeStatus getInstructionCJ(MCInst &MI, uint64_t &Size,
ArrayRef<uint8_t> Bytes, uint64_t Address,
- raw_ostream &VStream,
raw_ostream &CStream) const;
public:
@@ -51,7 +48,6 @@ public:
DecodeStatus getInstruction(MCInst &MI, uint64_t &Size,
ArrayRef<uint8_t> Bytes, uint64_t Address,
- raw_ostream &VStream,
raw_ostream &CStream) const override;
};
} // end anonymous namespace
@@ -62,7 +58,7 @@ static MCDisassembler *createMSP430Disassembler(const Target &T,
return new MSP430Disassembler(STI, Ctx);
}
-extern "C" void LLVMInitializeMSP430Disassembler() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeMSP430Disassembler() {
TargetRegistry::RegisterMCDisassembler(getTheMSP430Target(),
createMSP430Disassembler);
}
@@ -233,7 +229,6 @@ static const uint8_t *getDecoderTable(AddrMode SrcAM, unsigned Words) {
DecodeStatus MSP430Disassembler::getInstructionI(MCInst &MI, uint64_t &Size,
ArrayRef<uint8_t> Bytes,
uint64_t Address,
- raw_ostream &VStream,
raw_ostream &CStream) const {
uint64_t Insn = support::endian::read16le(Bytes.data());
AddrMode SrcAM = DecodeSrcAddrModeI(Insn);
@@ -289,7 +284,6 @@ DecodeStatus MSP430Disassembler::getInstructionI(MCInst &MI, uint64_t &Size,
DecodeStatus MSP430Disassembler::getInstructionII(MCInst &MI, uint64_t &Size,
ArrayRef<uint8_t> Bytes,
uint64_t Address,
- raw_ostream &VStream,
raw_ostream &CStream) const {
uint64_t Insn = support::endian::read16le(Bytes.data());
AddrMode SrcAM = DecodeSrcAddrModeII(Insn);
@@ -345,7 +339,6 @@ static MSP430CC::CondCodes getCondCode(unsigned Cond) {
DecodeStatus MSP430Disassembler::getInstructionCJ(MCInst &MI, uint64_t &Size,
ArrayRef<uint8_t> Bytes,
uint64_t Address,
- raw_ostream &VStream,
raw_ostream &CStream) const {
uint64_t Insn = support::endian::read16le(Bytes.data());
unsigned Cond = fieldFromInstruction(Insn, 10, 3);
@@ -367,7 +360,6 @@ DecodeStatus MSP430Disassembler::getInstructionCJ(MCInst &MI, uint64_t &Size,
DecodeStatus MSP430Disassembler::getInstruction(MCInst &MI, uint64_t &Size,
ArrayRef<uint8_t> Bytes,
uint64_t Address,
- raw_ostream &VStream,
raw_ostream &CStream) const {
if (Bytes.size() < 2) {
Size = 0;
@@ -378,10 +370,10 @@ DecodeStatus MSP430Disassembler::getInstruction(MCInst &MI, uint64_t &Size,
unsigned Opc = fieldFromInstruction(Insn, 13, 3);
switch (Opc) {
case 0:
- return getInstructionII(MI, Size, Bytes, Address, VStream, CStream);
+ return getInstructionII(MI, Size, Bytes, Address, CStream);
case 1:
- return getInstructionCJ(MI, Size, Bytes, Address, VStream, CStream);
+ return getInstructionCJ(MI, Size, Bytes, Address, CStream);
default:
- return getInstructionI(MI, Size, Bytes, Address, VStream, CStream);
+ return getInstructionI(MI, Size, Bytes, Address, CStream);
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/MSP430/MCTargetDesc/MSP430InstPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/MSP430/MCTargetDesc/MSP430InstPrinter.cpp
index 2f3c6ed3c17e..0c6da5a35c68 100644
--- a/contrib/llvm-project/llvm/lib/Target/MSP430/MCTargetDesc/MSP430InstPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/MSP430/MCTargetDesc/MSP430InstPrinter.cpp
@@ -26,10 +26,11 @@ using namespace llvm;
#define PRINT_ALIAS_INSTR
#include "MSP430GenAsmWriter.inc"
-void MSP430InstPrinter::printInst(const MCInst *MI, raw_ostream &O,
- StringRef Annot, const MCSubtargetInfo &STI) {
+void MSP430InstPrinter::printInst(const MCInst *MI, uint64_t Address,
+ StringRef Annot, const MCSubtargetInfo &STI,
+ raw_ostream &O) {
if (!printAliasInstr(MI, O))
- printInstruction(MI, O);
+ printInstruction(MI, Address, O);
printAnnotation(O, Annot);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/MSP430/MCTargetDesc/MSP430InstPrinter.h b/contrib/llvm-project/llvm/lib/Target/MSP430/MCTargetDesc/MSP430InstPrinter.h
index 25451033236e..200dc0e6db60 100644
--- a/contrib/llvm-project/llvm/lib/Target/MSP430/MCTargetDesc/MSP430InstPrinter.h
+++ b/contrib/llvm-project/llvm/lib/Target/MSP430/MCTargetDesc/MSP430InstPrinter.h
@@ -22,11 +22,11 @@ namespace llvm {
const MCRegisterInfo &MRI)
: MCInstPrinter(MAI, MII, MRI) {}
- void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
- const MCSubtargetInfo &STI) override;
+ void printInst(const MCInst *MI, uint64_t Address, StringRef Annot,
+ const MCSubtargetInfo &STI, raw_ostream &O) override;
// Autogenerated by tblgen.
- void printInstruction(const MCInst *MI, raw_ostream &O);
+ void printInstruction(const MCInst *MI, uint64_t Address, raw_ostream &O);
bool printAliasInstr(const MCInst *MI, raw_ostream &O);
void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx,
unsigned PrintMethodIdx, raw_ostream &O);
diff --git a/contrib/llvm-project/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp b/contrib/llvm-project/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp
index db5a49dd22a7..cfdc44ada771 100644
--- a/contrib/llvm-project/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp
@@ -15,7 +15,8 @@ using namespace llvm;
void MSP430MCAsmInfo::anchor() { }
-MSP430MCAsmInfo::MSP430MCAsmInfo(const Triple &TT) {
+MSP430MCAsmInfo::MSP430MCAsmInfo(const Triple &TT,
+ const MCTargetOptions &Options) {
CodePointerSize = CalleeSaveStackSlotSize = 2;
CommentString = ";";
diff --git a/contrib/llvm-project/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h b/contrib/llvm-project/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h
index 93979df037e6..c4ff4a9eefb1 100644
--- a/contrib/llvm-project/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h
@@ -22,7 +22,7 @@ class MSP430MCAsmInfo : public MCAsmInfoELF {
void anchor() override;
public:
- explicit MSP430MCAsmInfo(const Triple &TT);
+ explicit MSP430MCAsmInfo(const Triple &TT, const MCTargetOptions &Options);
};
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp b/contrib/llvm-project/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
index da928733015f..f207d24ce04b 100644
--- a/contrib/llvm-project/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
@@ -57,7 +57,7 @@ static MCInstPrinter *createMSP430MCInstPrinter(const Triple &T,
return nullptr;
}
-extern "C" void LLVMInitializeMSP430TargetMC() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeMSP430TargetMC() {
Target &T = getTheMSP430Target();
RegisterMCAsmInfo<MSP430MCAsmInfo> X(T);
diff --git a/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp
index a3b91acdc6d0..2f871b959a71 100644
--- a/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp
@@ -185,6 +185,6 @@ bool MSP430AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
}
// Force static initialization.
-extern "C" void LLVMInitializeMSP430AsmPrinter() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeMSP430AsmPrinter() {
RegisterAsmPrinter<MSP430AsmPrinter> X(getTheMSP430Target());
}
diff --git a/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
index 23449585505e..8550230155c8 100644
--- a/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
@@ -37,25 +37,22 @@ namespace {
enum {
RegBase,
FrameIndexBase
- } BaseType;
+ } BaseType = RegBase;
struct { // This is really a union, discriminated by BaseType!
SDValue Reg;
- int FrameIndex;
+ int FrameIndex = 0;
} Base;
- int16_t Disp;
- const GlobalValue *GV;
- const Constant *CP;
- const BlockAddress *BlockAddr;
- const char *ES;
- int JT;
- unsigned Align; // CP alignment.
-
- MSP430ISelAddressMode()
- : BaseType(RegBase), Disp(0), GV(nullptr), CP(nullptr),
- BlockAddr(nullptr), ES(nullptr), JT(-1), Align(0) {
- }
+ int16_t Disp = 0;
+ const GlobalValue *GV = nullptr;
+ const Constant *CP = nullptr;
+ const BlockAddress *BlockAddr = nullptr;
+ const char *ES = nullptr;
+ int JT = -1;
+ unsigned Align = 0; // CP alignment.
+
+ MSP430ISelAddressMode() = default;
bool hasSymbolicDisplacement() const {
return GV != nullptr || CP != nullptr || ES != nullptr || JT != -1;
diff --git a/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp
index 64169d1f5eb1..37e6ea24d088 100644
--- a/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp
@@ -37,6 +37,11 @@ using namespace llvm;
#define DEBUG_TYPE "msp430-lower"
+static cl::opt<bool>MSP430NoLegalImmediate(
+ "msp430-no-legal-immediate", cl::Hidden,
+ cl::desc("Enable non legal immediates (for testing purposes only)"),
+ cl::init(false));
+
MSP430TargetLowering::MSP430TargetLowering(const TargetMachine &TM,
const MSP430Subtarget &STI)
: TargetLowering(TM) {
@@ -353,9 +358,20 @@ SDValue MSP430TargetLowering::LowerOperation(SDValue Op,
}
}
-unsigned MSP430TargetLowering::getShiftAmountThreshold(EVT VT) const {
- return 2;
+// Define non profitable transforms into shifts
+bool MSP430TargetLowering::shouldAvoidTransformToShift(EVT VT,
+ unsigned Amount) const {
+ return !(Amount == 8 || Amount == 9 || Amount<=2);
+}
+
+// Implemented to verify test case assertions in
+// tests/codegen/msp430/shift-amount-threshold-b.ll
+bool MSP430TargetLowering::isLegalICmpImmediate(int64_t Immed) const {
+ if (MSP430NoLegalImmediate)
+ return Immed >= -32 && Immed < 32;
+ return TargetLowering::isLegalICmpImmediate(Immed);
}
+
//===----------------------------------------------------------------------===//
// MSP430 Inline Assembly Support
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430ISelLowering.h b/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430ISelLowering.h
index 9224e5e3d005..650f9a704062 100644
--- a/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430ISelLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430ISelLowering.h
@@ -124,7 +124,8 @@ namespace llvm {
bool isZExtFree(EVT VT1, EVT VT2) const override;
bool isZExtFree(SDValue Val, EVT VT2) const override;
- unsigned getShiftAmountThreshold(EVT VT) const override;
+ bool isLegalICmpImmediate(int64_t) const override;
+ bool shouldAvoidTransformToShift(EVT VT, unsigned Amount) const override;
MachineBasicBlock *
EmitInstrWithCustomInserter(MachineInstr &MI,
diff --git a/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp
index 5c3a3fc69266..9e03334d6b62 100644
--- a/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp
@@ -89,8 +89,8 @@ void MSP430InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
void MSP430InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
- const DebugLoc &DL, unsigned DestReg,
- unsigned SrcReg, bool KillSrc) const {
+ const DebugLoc &DL, MCRegister DestReg,
+ MCRegister SrcReg, bool KillSrc) const {
unsigned Opc;
if (MSP430::GR16RegClass.contains(DestReg, SrcReg))
Opc = MSP430::MOV16rr;
diff --git a/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430InstrInfo.h b/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430InstrInfo.h
index 13c50ad23adc..e3838772c061 100644
--- a/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430InstrInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430InstrInfo.h
@@ -36,7 +36,7 @@ public:
const TargetRegisterInfo &getRegisterInfo() const { return RI; }
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
- const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
+ const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg,
bool KillSrc) const override;
void storeRegToStackSlot(MachineBasicBlock &MBB,
diff --git a/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430MachineFunctionInfo.h b/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430MachineFunctionInfo.h
index 2b2c8967a749..712519cfe38a 100644
--- a/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430MachineFunctionInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430MachineFunctionInfo.h
@@ -24,21 +24,21 @@ class MSP430MachineFunctionInfo : public MachineFunctionInfo {
/// CalleeSavedFrameSize - Size of the callee-saved register portion of the
/// stack frame in bytes.
- unsigned CalleeSavedFrameSize;
+ unsigned CalleeSavedFrameSize = 0;
/// ReturnAddrIndex - FrameIndex for return slot.
- int ReturnAddrIndex;
+ int ReturnAddrIndex = 0;
/// VarArgsFrameIndex - FrameIndex for start of varargs area.
- int VarArgsFrameIndex;
+ int VarArgsFrameIndex = 0;
/// SRetReturnReg - Some subtargets require that sret lowering includes
/// returning the value of the returned struct in a register. This field
/// holds the virtual register into which the sret argument is passed.
- unsigned SRetReturnReg;
+ unsigned SRetReturnReg = 0;
public:
- MSP430MachineFunctionInfo() : CalleeSavedFrameSize(0) {}
+ MSP430MachineFunctionInfo() = default;
explicit MSP430MachineFunctionInfo(MachineFunction &MF)
: CalleeSavedFrameSize(0), ReturnAddrIndex(0), SRetReturnReg(0) {}
diff --git a/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430Subtarget.h b/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430Subtarget.h
index ab2b71e3bb1a..2348d984d7e2 100644
--- a/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430Subtarget.h
+++ b/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430Subtarget.h
@@ -36,8 +36,8 @@ public:
private:
virtual void anchor();
- bool ExtendedInsts;
- HWMultEnum HWMultMode;
+ bool ExtendedInsts = false;
+ HWMultEnum HWMultMode = NoHWMult;
MSP430FrameLowering FrameLowering;
MSP430InstrInfo InstrInfo;
MSP430TargetLowering TLInfo;
diff --git a/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp
index e9aeba76de85..81851427c0ed 100644
--- a/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp
@@ -21,7 +21,7 @@
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
-extern "C" void LLVMInitializeMSP430Target() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeMSP430Target() {
// Register the target.
RegisterTargetMachine<MSP430TargetMachine> X(getTheMSP430Target());
}
diff --git a/contrib/llvm-project/llvm/lib/Target/MSP430/TargetInfo/MSP430TargetInfo.cpp b/contrib/llvm-project/llvm/lib/Target/MSP430/TargetInfo/MSP430TargetInfo.cpp
index 5da7d588079f..9d4a8f141cc4 100644
--- a/contrib/llvm-project/llvm/lib/Target/MSP430/TargetInfo/MSP430TargetInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/MSP430/TargetInfo/MSP430TargetInfo.cpp
@@ -15,7 +15,7 @@ Target &llvm::getTheMSP430Target() {
return TheMSP430Target;
}
-extern "C" void LLVMInitializeMSP430TargetInfo() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeMSP430TargetInfo() {
RegisterTarget<Triple::msp430> X(getTheMSP430Target(), "msp430",
"MSP430 [experimental]", "MSP430");
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index 4e701e0d98b4..e467ed36938b 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -134,6 +134,8 @@ namespace {
class MipsAsmParser : public MCTargetAsmParser {
MipsTargetStreamer &getTargetStreamer() {
+ assert(getParser().getStreamer().getTargetStreamer() &&
+ "do not have a target streamer");
MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
return static_cast<MipsTargetStreamer &>(TS);
}
@@ -252,8 +254,10 @@ class MipsAsmParser : public MCTargetAsmParser {
bool expandUncondBranchMMPseudo(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
const MCSubtargetInfo *STI);
- void expandMemInst(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
- const MCSubtargetInfo *STI, bool IsLoad);
+ void expandMem16Inst(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
+ const MCSubtargetInfo *STI, bool IsLoad);
+ void expandMem9Inst(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
+ const MCSubtargetInfo *STI, bool IsLoad);
bool expandLoadStoreMultiple(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
const MCSubtargetInfo *STI);
@@ -339,7 +343,6 @@ class MipsAsmParser : public MCTargetAsmParser {
bool parseMemOffset(const MCExpr *&Res, bool isParenExpr);
- bool isEvaluated(const MCExpr *Expr);
bool parseSetMips0Directive();
bool parseSetArchDirective();
bool parseSetFeature(uint64_t Feature);
@@ -562,6 +565,19 @@ public:
return getSTI().getFeatureBits()[Mips::FeatureFP64Bit];
}
+ bool isJalrRelocAvailable(const MCExpr *JalExpr) {
+ if (!EmitJalrReloc)
+ return false;
+ MCValue Res;
+ if (!JalExpr->evaluateAsRelocatable(Res, nullptr, nullptr))
+ return false;
+ if (Res.getSymB() != nullptr)
+ return false;
+ if (Res.getConstant() != 0)
+ return ABI.IsN32() || ABI.IsN64();
+ return true;
+ }
+
const MipsABIInfo &getABI() const { return ABI; }
bool isABI_N32() const { return ABI.IsN32(); }
bool isABI_N64() const { return ABI.IsN64(); }
@@ -1303,8 +1319,6 @@ public:
}
// Allow relocation operators.
- // FIXME: This predicate and others need to look through binary expressions
- // and determine whether a Value is a constant or not.
template <unsigned Bits, unsigned ShiftAmount = 0>
bool isMemWithSimmOffset() const {
if (!isMem())
@@ -1785,17 +1799,71 @@ static unsigned countMCSymbolRefExpr(const MCExpr *Expr) {
return 0;
}
+static bool isEvaluated(const MCExpr *Expr) {
+ switch (Expr->getKind()) {
+ case MCExpr::Constant:
+ return true;
+ case MCExpr::SymbolRef:
+ return (cast<MCSymbolRefExpr>(Expr)->getKind() != MCSymbolRefExpr::VK_None);
+ case MCExpr::Binary: {
+ const MCBinaryExpr *BE = cast<MCBinaryExpr>(Expr);
+ if (!isEvaluated(BE->getLHS()))
+ return false;
+ return isEvaluated(BE->getRHS());
+ }
+ case MCExpr::Unary:
+ return isEvaluated(cast<MCUnaryExpr>(Expr)->getSubExpr());
+ case MCExpr::Target:
+ return true;
+ }
+ return false;
+}
+
+static bool needsExpandMemInst(MCInst &Inst) {
+ const MCInstrDesc &MCID = getInstDesc(Inst.getOpcode());
+
+ unsigned NumOp = MCID.getNumOperands();
+ if (NumOp != 3 && NumOp != 4)
+ return false;
+
+ const MCOperandInfo &OpInfo = MCID.OpInfo[NumOp - 1];
+ if (OpInfo.OperandType != MCOI::OPERAND_MEMORY &&
+ OpInfo.OperandType != MCOI::OPERAND_UNKNOWN &&
+ OpInfo.OperandType != MipsII::OPERAND_MEM_SIMM9)
+ return false;
+
+ MCOperand &Op = Inst.getOperand(NumOp - 1);
+ if (Op.isImm()) {
+ if (OpInfo.OperandType == MipsII::OPERAND_MEM_SIMM9)
+ return !isInt<9>(Op.getImm());
+ // Offset can't exceed 16bit value.
+ return !isInt<16>(Op.getImm());
+ }
+
+ if (Op.isExpr()) {
+ const MCExpr *Expr = Op.getExpr();
+ if (Expr->getKind() != MCExpr::SymbolRef)
+ return !isEvaluated(Expr);
+
+ // Expand symbol.
+ const MCSymbolRefExpr *SR = static_cast<const MCSymbolRefExpr *>(Expr);
+ return SR->getKind() == MCSymbolRefExpr::VK_None;
+ }
+
+ return false;
+}
+
bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
MCStreamer &Out,
const MCSubtargetInfo *STI) {
MipsTargetStreamer &TOut = getTargetStreamer();
- const MCInstrDesc &MCID = getInstDesc(Inst.getOpcode());
+ const unsigned Opcode = Inst.getOpcode();
+ const MCInstrDesc &MCID = getInstDesc(Opcode);
bool ExpandedJalSym = false;
Inst.setLoc(IDLoc);
if (MCID.isBranch() || MCID.isCall()) {
- const unsigned Opcode = Inst.getOpcode();
MCOperand Offset;
switch (Opcode) {
@@ -1909,14 +1977,13 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
// SSNOP is deprecated on MIPS32r6/MIPS64r6
// We still accept it but it is a normal nop.
- if (hasMips32r6() && Inst.getOpcode() == Mips::SSNOP) {
+ if (hasMips32r6() && Opcode == Mips::SSNOP) {
std::string ISA = hasMips64r6() ? "MIPS64r6" : "MIPS32r6";
Warning(IDLoc, "ssnop is deprecated for " + ISA + " and is equivalent to a "
"nop instruction");
}
if (hasCnMips()) {
- const unsigned Opcode = Inst.getOpcode();
MCOperand Opnd;
int Imm;
@@ -1966,7 +2033,7 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
// not in the operands.
unsigned FirstOp = 1;
unsigned SecondOp = 2;
- switch (Inst.getOpcode()) {
+ switch (Opcode) {
default:
break;
case Mips::SDivIMacro:
@@ -2012,8 +2079,7 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
}
// For PIC code convert unconditional jump to unconditional branch.
- if ((Inst.getOpcode() == Mips::J || Inst.getOpcode() == Mips::J_MM) &&
- inPicMode()) {
+ if ((Opcode == Mips::J || Opcode == Mips::J_MM) && inPicMode()) {
MCInst BInst;
BInst.setOpcode(inMicroMipsMode() ? Mips::BEQ_MM : Mips::BEQ);
BInst.addOperand(MCOperand::createReg(Mips::ZERO));
@@ -2024,8 +2090,7 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
// This expansion is not in a function called by tryExpandInstruction()
// because the pseudo-instruction doesn't have a distinct opcode.
- if ((Inst.getOpcode() == Mips::JAL || Inst.getOpcode() == Mips::JAL_MM) &&
- inPicMode()) {
+ if ((Opcode == Mips::JAL || Opcode == Mips::JAL_MM) && inPicMode()) {
warnIfNoMacro(IDLoc);
const MCExpr *JalExpr = Inst.getOperand(0).getExpr();
@@ -2039,62 +2104,19 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
// of the assembler. We ought to leave it to those later stages.
const MCSymbol *JalSym = getSingleMCSymbol(JalExpr);
- // FIXME: Add support for label+offset operands (currently causes an error).
- // FIXME: Add support for forward-declared local symbols.
- // FIXME: Add expansion for when the LargeGOT option is enabled.
- if (JalSym->isInSection() || JalSym->isTemporary() ||
- (JalSym->isELF() &&
- cast<MCSymbolELF>(JalSym)->getBinding() == ELF::STB_LOCAL)) {
- if (isABI_O32()) {
- // If it's a local symbol and the O32 ABI is being used, we expand to:
- // lw $25, 0($gp)
- // R_(MICRO)MIPS_GOT16 label
- // addiu $25, $25, 0
- // R_(MICRO)MIPS_LO16 label
- // jalr $25
- const MCExpr *Got16RelocExpr =
- MipsMCExpr::create(MipsMCExpr::MEK_GOT, JalExpr, getContext());
- const MCExpr *Lo16RelocExpr =
- MipsMCExpr::create(MipsMCExpr::MEK_LO, JalExpr, getContext());
-
- TOut.emitRRX(Mips::LW, Mips::T9, GPReg,
- MCOperand::createExpr(Got16RelocExpr), IDLoc, STI);
- TOut.emitRRX(Mips::ADDiu, Mips::T9, Mips::T9,
- MCOperand::createExpr(Lo16RelocExpr), IDLoc, STI);
- } else if (isABI_N32() || isABI_N64()) {
- // If it's a local symbol and the N32/N64 ABIs are being used,
- // we expand to:
- // lw/ld $25, 0($gp)
- // R_(MICRO)MIPS_GOT_DISP label
- // jalr $25
- const MCExpr *GotDispRelocExpr =
- MipsMCExpr::create(MipsMCExpr::MEK_GOT_DISP, JalExpr, getContext());
-
- TOut.emitRRX(ABI.ArePtrs64bit() ? Mips::LD : Mips::LW, Mips::T9,
- GPReg, MCOperand::createExpr(GotDispRelocExpr), IDLoc,
- STI);
- }
- } else {
- // If it's an external/weak symbol, we expand to:
- // lw/ld $25, 0($gp)
- // R_(MICRO)MIPS_CALL16 label
- // jalr $25
- const MCExpr *Call16RelocExpr =
- MipsMCExpr::create(MipsMCExpr::MEK_GOT_CALL, JalExpr, getContext());
-
- TOut.emitRRX(ABI.ArePtrs64bit() ? Mips::LD : Mips::LW, Mips::T9, GPReg,
- MCOperand::createExpr(Call16RelocExpr), IDLoc, STI);
- }
+ if (expandLoadAddress(Mips::T9, Mips::NoRegister, Inst.getOperand(0),
+ !isGP64bit(), IDLoc, Out, STI))
+ return true;
MCInst JalrInst;
- if (IsCpRestoreSet && inMicroMipsMode())
- JalrInst.setOpcode(Mips::JALRS_MM);
+ if (inMicroMipsMode())
+ JalrInst.setOpcode(IsCpRestoreSet ? Mips::JALRS_MM : Mips::JALR_MM);
else
- JalrInst.setOpcode(inMicroMipsMode() ? Mips::JALR_MM : Mips::JALR);
+ JalrInst.setOpcode(Mips::JALR);
JalrInst.addOperand(MCOperand::createReg(Mips::RA));
JalrInst.addOperand(MCOperand::createReg(Mips::T9));
- if (EmitJalrReloc) {
+ if (isJalrRelocAvailable(JalExpr)) {
// As an optimization hint for the linker, before the JALR we add:
// .reloc tmplabel, R_{MICRO}MIPS_JALR, symbol
// tmplabel:
@@ -2114,43 +2136,25 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
ExpandedJalSym = true;
}
- bool IsPCRelativeLoad = (MCID.TSFlags & MipsII::IsPCRelativeLoad) != 0;
- if ((MCID.mayLoad() || MCID.mayStore()) && !IsPCRelativeLoad) {
+ if (MCID.mayLoad() || MCID.mayStore()) {
// Check the offset of memory operand, if it is a symbol
// reference or immediate we may have to expand instructions.
- for (unsigned i = 0; i < MCID.getNumOperands(); i++) {
- const MCOperandInfo &OpInfo = MCID.OpInfo[i];
- if ((OpInfo.OperandType == MCOI::OPERAND_MEMORY) ||
- (OpInfo.OperandType == MCOI::OPERAND_UNKNOWN)) {
- MCOperand &Op = Inst.getOperand(i);
- if (Op.isImm()) {
- int64_t MemOffset = Op.getImm();
- if (MemOffset < -32768 || MemOffset > 32767) {
- // Offset can't exceed 16bit value.
- expandMemInst(Inst, IDLoc, Out, STI, MCID.mayLoad());
- return getParser().hasPendingError();
- }
- } else if (Op.isExpr()) {
- const MCExpr *Expr = Op.getExpr();
- if (Expr->getKind() == MCExpr::SymbolRef) {
- const MCSymbolRefExpr *SR =
- static_cast<const MCSymbolRefExpr *>(Expr);
- if (SR->getKind() == MCSymbolRefExpr::VK_None) {
- // Expand symbol.
- expandMemInst(Inst, IDLoc, Out, STI, MCID.mayLoad());
- return getParser().hasPendingError();
- }
- } else if (!isEvaluated(Expr)) {
- expandMemInst(Inst, IDLoc, Out, STI, MCID.mayLoad());
- return getParser().hasPendingError();
- }
- }
+ if (needsExpandMemInst(Inst)) {
+ const MCInstrDesc &MCID = getInstDesc(Inst.getOpcode());
+ switch (MCID.OpInfo[MCID.getNumOperands() - 1].OperandType) {
+ case MipsII::OPERAND_MEM_SIMM9:
+ expandMem9Inst(Inst, IDLoc, Out, STI, MCID.mayLoad());
+ break;
+ default:
+ expandMem16Inst(Inst, IDLoc, Out, STI, MCID.mayLoad());
+ break;
}
- } // for
- } // if load/store
+ return getParser().hasPendingError();
+ }
+ }
if (inMicroMipsMode()) {
- if (MCID.mayLoad() && Inst.getOpcode() != Mips::LWP_MM) {
+ if (MCID.mayLoad() && Opcode != Mips::LWP_MM) {
// Try to create 16-bit GP relative load instruction.
for (unsigned i = 0; i < MCID.getNumOperands(); i++) {
const MCOperandInfo &OpInfo = MCID.OpInfo[i];
@@ -2181,7 +2185,7 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
MCOperand Opnd;
int Imm;
- switch (Inst.getOpcode()) {
+ switch (Opcode) {
default:
break;
case Mips::ADDIUSP_MM:
@@ -2329,8 +2333,8 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
TOut.emitDirectiveSetReorder();
}
- if ((Inst.getOpcode() == Mips::JalOneReg ||
- Inst.getOpcode() == Mips::JalTwoReg || ExpandedJalSym) &&
+ if ((Opcode == Mips::JalOneReg || Opcode == Mips::JalTwoReg ||
+ ExpandedJalSym) &&
isPicAndNotNxxAbi()) {
if (IsCpRestoreSet) {
// We need a NOP between the JALR and the LW:
@@ -2848,13 +2852,9 @@ bool MipsAsmParser::expandLoadAddress(unsigned DstReg, unsigned BaseReg,
const MCSubtargetInfo *STI) {
// la can't produce a usable address when addresses are 64-bit.
if (Is32BitAddress && ABI.ArePtrs64bit()) {
- // FIXME: Demote this to a warning and continue as if we had 'dla' instead.
- // We currently can't do this because we depend on the equality
- // operator and N64 can end up with a GPR32/GPR64 mismatch.
- Error(IDLoc, "la used to load 64-bit address");
+ Warning(IDLoc, "la used to load 64-bit address");
// Continue as if we had 'dla' instead.
Is32BitAddress = false;
- return true;
}
// dla requires 64-bit addresses.
@@ -2944,6 +2944,9 @@ bool MipsAsmParser::loadAndAddSymbolAddress(const MCExpr *SymExpr,
TmpReg = ATReg;
}
+ // FIXME: In case of N32 / N64 ABI and emabled XGOT, local addresses
+ // loaded using R_MIPS_GOT_PAGE / R_MIPS_GOT_OFST pair of relocations.
+ // FIXME: Implement XGOT for microMIPS.
if (UseXGOT) {
// Loading address from XGOT
// External GOT: lui $tmp, %got_hi(symbol)($gp)
@@ -2980,7 +2983,7 @@ bool MipsAsmParser::loadAndAddSymbolAddress(const MCExpr *SymExpr,
const MipsMCExpr *GotExpr = nullptr;
const MCExpr *LoExpr = nullptr;
- if (IsPtr64) {
+ if (ABI.IsN32() || ABI.IsN64()) {
// The remaining cases are:
// Small offset: ld $tmp, %got_disp(symbol)($gp)
// >daddiu $tmp, $tmp, offset
@@ -3643,20 +3646,26 @@ bool MipsAsmParser::expandBranchImm(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
return false;
}
-void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
- const MCSubtargetInfo *STI, bool IsLoad) {
- const MCOperand &DstRegOp = Inst.getOperand(0);
+void MipsAsmParser::expandMem16Inst(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
+ const MCSubtargetInfo *STI, bool IsLoad) {
+ unsigned NumOp = Inst.getNumOperands();
+ assert((NumOp == 3 || NumOp == 4) && "unexpected operands number");
+ unsigned StartOp = NumOp == 3 ? 0 : 1;
+
+ const MCOperand &DstRegOp = Inst.getOperand(StartOp);
assert(DstRegOp.isReg() && "expected register operand kind");
- const MCOperand &BaseRegOp = Inst.getOperand(1);
+ const MCOperand &BaseRegOp = Inst.getOperand(StartOp + 1);
assert(BaseRegOp.isReg() && "expected register operand kind");
+ const MCOperand &OffsetOp = Inst.getOperand(StartOp + 2);
MipsTargetStreamer &TOut = getTargetStreamer();
+ unsigned OpCode = Inst.getOpcode();
unsigned DstReg = DstRegOp.getReg();
unsigned BaseReg = BaseRegOp.getReg();
unsigned TmpReg = DstReg;
- const MCInstrDesc &Desc = getInstDesc(Inst.getOpcode());
- int16_t DstRegClass = Desc.OpInfo[0].RegClass;
+ const MCInstrDesc &Desc = getInstDesc(OpCode);
+ int16_t DstRegClass = Desc.OpInfo[StartOp].RegClass;
unsigned DstRegClassID =
getContext().getRegisterInfo()->getRegClass(DstRegClass).getID();
bool IsGPR = (DstRegClassID == Mips::GPR32RegClassID) ||
@@ -3670,25 +3679,12 @@ void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
return;
}
- if (Inst.getNumOperands() > 3) {
- const MCOperand &BaseRegOp = Inst.getOperand(2);
- assert(BaseRegOp.isReg() && "expected register operand kind");
- const MCOperand &ExprOp = Inst.getOperand(3);
- assert(ExprOp.isExpr() && "expected expression oprand kind");
-
- unsigned BaseReg = BaseRegOp.getReg();
- const MCExpr *ExprOffset = ExprOp.getExpr();
-
- MCOperand LoOperand = MCOperand::createExpr(
- MipsMCExpr::create(MipsMCExpr::MEK_LO, ExprOffset, getContext()));
- MCOperand HiOperand = MCOperand::createExpr(
- MipsMCExpr::create(MipsMCExpr::MEK_HI, ExprOffset, getContext()));
- TOut.emitSCWithSymOffset(Inst.getOpcode(), DstReg, BaseReg, HiOperand,
- LoOperand, TmpReg, IDLoc, STI);
- return;
- }
-
- const MCOperand &OffsetOp = Inst.getOperand(2);
+ auto emitInstWithOffset = [&](const MCOperand &Off) {
+ if (NumOp == 3)
+ TOut.emitRRX(OpCode, DstReg, TmpReg, Off, IDLoc, STI);
+ else
+ TOut.emitRRRX(OpCode, DstReg, DstReg, TmpReg, Off, IDLoc, STI);
+ };
if (OffsetOp.isImm()) {
int64_t LoOffset = OffsetOp.getImm() & 0xffff;
@@ -3702,16 +3698,16 @@ void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
bool IsLargeOffset = HiOffset != 0;
if (IsLargeOffset) {
- bool Is32BitImm = (HiOffset >> 32) == 0;
+ bool Is32BitImm = isInt<32>(OffsetOp.getImm());
if (loadImmediate(HiOffset, TmpReg, Mips::NoRegister, Is32BitImm, true,
IDLoc, Out, STI))
return;
}
if (BaseReg != Mips::ZERO && BaseReg != Mips::ZERO_64)
- TOut.emitRRR(isGP64bit() ? Mips::DADDu : Mips::ADDu, TmpReg, TmpReg,
- BaseReg, IDLoc, STI);
- TOut.emitRRI(Inst.getOpcode(), DstReg, TmpReg, LoOffset, IDLoc, STI);
+ TOut.emitRRR(ABI.ArePtrs64bit() ? Mips::DADDu : Mips::ADDu, TmpReg,
+ TmpReg, BaseReg, IDLoc, STI);
+ emitInstWithOffset(MCOperand::createImm(int16_t(LoOffset)));
return;
}
@@ -3735,26 +3731,41 @@ void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
loadAndAddSymbolAddress(Res.getSymA(), TmpReg, BaseReg,
!ABI.ArePtrs64bit(), IDLoc, Out, STI);
- TOut.emitRRI(Inst.getOpcode(), DstReg, TmpReg, Res.getConstant(), IDLoc,
- STI);
+ emitInstWithOffset(MCOperand::createImm(int16_t(Res.getConstant())));
} else {
// FIXME: Implement 64-bit case.
// 1) lw $8, sym => lui $8, %hi(sym)
// lw $8, %lo(sym)($8)
// 2) sw $8, sym => lui $at, %hi(sym)
// sw $8, %lo(sym)($at)
- const MCExpr *ExprOffset = OffsetOp.getExpr();
+ const MCExpr *OffExpr = OffsetOp.getExpr();
MCOperand LoOperand = MCOperand::createExpr(
- MipsMCExpr::create(MipsMCExpr::MEK_LO, ExprOffset, getContext()));
+ MipsMCExpr::create(MipsMCExpr::MEK_LO, OffExpr, getContext()));
MCOperand HiOperand = MCOperand::createExpr(
- MipsMCExpr::create(MipsMCExpr::MEK_HI, ExprOffset, getContext()));
-
- // Generate the base address in TmpReg.
- TOut.emitRX(Mips::LUi, TmpReg, HiOperand, IDLoc, STI);
- if (BaseReg != Mips::ZERO)
- TOut.emitRRR(Mips::ADDu, TmpReg, TmpReg, BaseReg, IDLoc, STI);
- // Emit the load or store with the adjusted base and offset.
- TOut.emitRRX(Inst.getOpcode(), DstReg, TmpReg, LoOperand, IDLoc, STI);
+ MipsMCExpr::create(MipsMCExpr::MEK_HI, OffExpr, getContext()));
+
+ if (ABI.IsN64()) {
+ MCOperand HighestOperand = MCOperand::createExpr(
+ MipsMCExpr::create(MipsMCExpr::MEK_HIGHEST, OffExpr, getContext()));
+ MCOperand HigherOperand = MCOperand::createExpr(
+ MipsMCExpr::create(MipsMCExpr::MEK_HIGHER, OffExpr, getContext()));
+
+ TOut.emitRX(Mips::LUi, TmpReg, HighestOperand, IDLoc, STI);
+ TOut.emitRRX(Mips::DADDiu, TmpReg, TmpReg, HigherOperand, IDLoc, STI);
+ TOut.emitRRI(Mips::DSLL, TmpReg, TmpReg, 16, IDLoc, STI);
+ TOut.emitRRX(Mips::DADDiu, TmpReg, TmpReg, HiOperand, IDLoc, STI);
+ TOut.emitRRI(Mips::DSLL, TmpReg, TmpReg, 16, IDLoc, STI);
+ if (BaseReg != Mips::ZERO && BaseReg != Mips::ZERO_64)
+ TOut.emitRRR(Mips::DADDu, TmpReg, TmpReg, BaseReg, IDLoc, STI);
+ emitInstWithOffset(LoOperand);
+ } else {
+ // Generate the base address in TmpReg.
+ TOut.emitRX(Mips::LUi, TmpReg, HiOperand, IDLoc, STI);
+ if (BaseReg != Mips::ZERO)
+ TOut.emitRRR(Mips::ADDu, TmpReg, TmpReg, BaseReg, IDLoc, STI);
+ // Emit the load or store with the adjusted base and offset.
+ emitInstWithOffset(LoOperand);
+ }
}
return;
}
@@ -3762,6 +3773,64 @@ void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
llvm_unreachable("unexpected operand type");
}
+void MipsAsmParser::expandMem9Inst(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
+ const MCSubtargetInfo *STI, bool IsLoad) {
+ unsigned NumOp = Inst.getNumOperands();
+ assert((NumOp == 3 || NumOp == 4) && "unexpected operands number");
+ unsigned StartOp = NumOp == 3 ? 0 : 1;
+
+ const MCOperand &DstRegOp = Inst.getOperand(StartOp);
+ assert(DstRegOp.isReg() && "expected register operand kind");
+ const MCOperand &BaseRegOp = Inst.getOperand(StartOp + 1);
+ assert(BaseRegOp.isReg() && "expected register operand kind");
+ const MCOperand &OffsetOp = Inst.getOperand(StartOp + 2);
+
+ MipsTargetStreamer &TOut = getTargetStreamer();
+ unsigned OpCode = Inst.getOpcode();
+ unsigned DstReg = DstRegOp.getReg();
+ unsigned BaseReg = BaseRegOp.getReg();
+ unsigned TmpReg = DstReg;
+
+ const MCInstrDesc &Desc = getInstDesc(OpCode);
+ int16_t DstRegClass = Desc.OpInfo[StartOp].RegClass;
+ unsigned DstRegClassID =
+ getContext().getRegisterInfo()->getRegClass(DstRegClass).getID();
+ bool IsGPR = (DstRegClassID == Mips::GPR32RegClassID) ||
+ (DstRegClassID == Mips::GPR64RegClassID);
+
+ if (!IsLoad || !IsGPR || (BaseReg == DstReg)) {
+ // At this point we need AT to perform the expansions
+ // and we exit if it is not available.
+ TmpReg = getATReg(IDLoc);
+ if (!TmpReg)
+ return;
+ }
+
+ auto emitInst = [&]() {
+ if (NumOp == 3)
+ TOut.emitRRX(OpCode, DstReg, TmpReg, MCOperand::createImm(0), IDLoc, STI);
+ else
+ TOut.emitRRRX(OpCode, DstReg, DstReg, TmpReg, MCOperand::createImm(0),
+ IDLoc, STI);
+ };
+
+ if (OffsetOp.isImm()) {
+ loadImmediate(OffsetOp.getImm(), TmpReg, BaseReg, !ABI.ArePtrs64bit(), true,
+ IDLoc, Out, STI);
+ emitInst();
+ return;
+ }
+
+ if (OffsetOp.isExpr()) {
+ loadAndAddSymbolAddress(OffsetOp.getExpr(), TmpReg, BaseReg,
+ !ABI.ArePtrs64bit(), IDLoc, Out, STI);
+ emitInst();
+ return;
+ }
+
+ llvm_unreachable("unexpected operand type");
+}
+
bool MipsAsmParser::expandLoadStoreMultiple(MCInst &Inst, SMLoc IDLoc,
MCStreamer &Out,
const MCSubtargetInfo *STI) {
@@ -4063,8 +4132,8 @@ bool MipsAsmParser::expandCondBranches(MCInst &Inst, SMLoc IDLoc,
// D(S)DivMacro.
bool MipsAsmParser::expandDivRem(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
- const MCSubtargetInfo *STI, const bool IsMips64,
- const bool Signed) {
+ const MCSubtargetInfo *STI,
+ const bool IsMips64, const bool Signed) {
MipsTargetStreamer &TOut = getTargetStreamer();
warnIfNoMacro(IDLoc);
@@ -6131,26 +6200,6 @@ bool MipsAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
return true;
}
-bool MipsAsmParser::isEvaluated(const MCExpr *Expr) {
- switch (Expr->getKind()) {
- case MCExpr::Constant:
- return true;
- case MCExpr::SymbolRef:
- return (cast<MCSymbolRefExpr>(Expr)->getKind() != MCSymbolRefExpr::VK_None);
- case MCExpr::Binary: {
- const MCBinaryExpr *BE = cast<MCBinaryExpr>(Expr);
- if (!isEvaluated(BE->getLHS()))
- return false;
- return isEvaluated(BE->getRHS());
- }
- case MCExpr::Unary:
- return isEvaluated(cast<MCUnaryExpr>(Expr)->getSubExpr());
- case MCExpr::Target:
- return true;
- }
- return false;
-}
-
bool MipsAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
SMLoc &EndLoc) {
SmallVector<std::unique_ptr<MCParsedAsmOperand>, 1> Operands;
@@ -8596,7 +8645,7 @@ bool MipsAsmParser::parseInternalDirectiveReallowModule() {
return false;
}
-extern "C" void LLVMInitializeMipsAsmParser() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeMipsAsmParser() {
RegisterMCAsmParser<MipsAsmParser> X(getTheMipsTarget());
RegisterMCAsmParser<MipsAsmParser> Y(getTheMipselTarget());
RegisterMCAsmParser<MipsAsmParser> A(getTheMips64Target());
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
index 94d720d2e058..6f197e424561 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
@@ -72,7 +72,6 @@ public:
DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
ArrayRef<uint8_t> Bytes, uint64_t Address,
- raw_ostream &VStream,
raw_ostream &CStream) const override;
};
@@ -448,16 +447,16 @@ static DecodeStatus DecodeINSVE_DF(MCInst &MI, InsnType insn, uint64_t Address,
const void *Decoder);
template <typename InsnType>
-static DecodeStatus DecodeDAHIDATIMMR6(MCInst &MI, InsnType insn, uint64_t Address,
- const void *Decoder);
+static DecodeStatus DecodeDAHIDATIMMR6(MCInst &MI, InsnType insn,
+ uint64_t Address, const void *Decoder);
template <typename InsnType>
static DecodeStatus DecodeDAHIDATI(MCInst &MI, InsnType insn, uint64_t Address,
const void *Decoder);
template <typename InsnType>
-static DecodeStatus DecodeDAHIDATIMMR6(MCInst &MI, InsnType insn, uint64_t Address,
- const void *Decoder);
+static DecodeStatus DecodeDAHIDATIMMR6(MCInst &MI, InsnType insn,
+ uint64_t Address, const void *Decoder);
template <typename InsnType>
static DecodeStatus DecodeDAHIDATI(MCInst &MI, InsnType insn, uint64_t Address,
@@ -564,7 +563,7 @@ static MCDisassembler *createMipselDisassembler(
return new MipsDisassembler(STI, Ctx, false);
}
-extern "C" void LLVMInitializeMipsDisassembler() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeMipsDisassembler() {
// Register the disassembler.
TargetRegistry::RegisterMCDisassembler(getTheMipsTarget(),
createMipsDisassembler);
@@ -632,8 +631,8 @@ static DecodeStatus DecodeINSVE_DF(MCInst &MI, InsnType insn, uint64_t Address,
}
template <typename InsnType>
-static DecodeStatus DecodeDAHIDATIMMR6(MCInst &MI, InsnType insn, uint64_t Address,
- const void *Decoder) {
+static DecodeStatus DecodeDAHIDATIMMR6(MCInst &MI, InsnType insn,
+ uint64_t Address, const void *Decoder) {
InsnType Rs = fieldFromInstruction(insn, 16, 5);
InsnType Imm = fieldFromInstruction(insn, 0, 16);
MI.addOperand(MCOperand::createReg(getReg(Decoder, Mips::GPR64RegClassID,
@@ -1093,8 +1092,10 @@ static DecodeStatus DecodeDEXT(MCInst &MI, InsnType Insn, uint64_t Address,
InsnType Rs = fieldFromInstruction(Insn, 21, 5);
InsnType Rt = fieldFromInstruction(Insn, 16, 5);
- MI.addOperand(MCOperand::createReg(getReg(Decoder, Mips::GPR64RegClassID, Rt)));
- MI.addOperand(MCOperand::createReg(getReg(Decoder, Mips::GPR64RegClassID, Rs)));
+ MI.addOperand(
+ MCOperand::createReg(getReg(Decoder, Mips::GPR64RegClassID, Rt)));
+ MI.addOperand(
+ MCOperand::createReg(getReg(Decoder, Mips::GPR64RegClassID, Rs)));
MI.addOperand(MCOperand::createImm(Pos));
MI.addOperand(MCOperand::createImm(Size));
@@ -1134,8 +1135,10 @@ static DecodeStatus DecodeDINS(MCInst &MI, InsnType Insn, uint64_t Address,
InsnType Rt = fieldFromInstruction(Insn, 16, 5);
MI.setOpcode(Mips::DINS);
- MI.addOperand(MCOperand::createReg(getReg(Decoder, Mips::GPR64RegClassID, Rt)));
- MI.addOperand(MCOperand::createReg(getReg(Decoder, Mips::GPR64RegClassID, Rs)));
+ MI.addOperand(
+ MCOperand::createReg(getReg(Decoder, Mips::GPR64RegClassID, Rt)));
+ MI.addOperand(
+ MCOperand::createReg(getReg(Decoder, Mips::GPR64RegClassID, Rs)));
MI.addOperand(MCOperand::createImm(Pos));
MI.addOperand(MCOperand::createImm(Size));
@@ -1216,7 +1219,6 @@ static DecodeStatus readInstruction32(ArrayRef<uint8_t> Bytes, uint64_t Address,
DecodeStatus MipsDisassembler::getInstruction(MCInst &Instr, uint64_t &Size,
ArrayRef<uint8_t> Bytes,
uint64_t Address,
- raw_ostream &VStream,
raw_ostream &CStream) const {
uint32_t Insn;
DecodeStatus Result;
@@ -1258,8 +1260,8 @@ DecodeStatus MipsDisassembler::getInstruction(MCInst &Instr, uint64_t &Size,
LLVM_DEBUG(
dbgs() << "Trying MicroMips32r632 table (32-bit instructions):\n");
// Calling the auto-generated decoder function.
- Result = decodeInstruction(DecoderTableMicroMipsR632, Instr, Insn, Address,
- this, STI);
+ Result = decodeInstruction(DecoderTableMicroMipsR632, Instr, Insn,
+ Address, this, STI);
if (Result != MCDisassembler::Fail) {
Size = 4;
return Result;
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp
index bdd190fc17c9..37e970f2f15b 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp
@@ -11,6 +11,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/MC/MCTargetOptions.h"
+#include "llvm/Support/CommandLine.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
index 70f2a7bdf10f..94d338746a6c 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
@@ -585,6 +585,8 @@ MCAsmBackend *llvm::createMipsAsmBackend(const Target &T,
const MCSubtargetInfo &STI,
const MCRegisterInfo &MRI,
const MCTargetOptions &Options) {
- MipsABIInfo ABI = MipsABIInfo::computeTargetABI(STI.getTargetTriple(), STI.getCPU(), Options);
- return new MipsAsmBackend(T, MRI, STI.getTargetTriple(), STI.getCPU(), ABI.IsN32());
+ MipsABIInfo ABI = MipsABIInfo::computeTargetABI(STI.getTargetTriple(),
+ STI.getCPU(), Options);
+ return new MipsAsmBackend(T, MRI, STI.getTargetTriple(), STI.getCPU(),
+ ABI.IsN32());
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h b/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h
index 6d8cb264158f..02ab5ede2c1a 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h
@@ -16,6 +16,7 @@
#include "MipsFixupKinds.h"
#include "MipsMCTargetDesc.h"
#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInstrDesc.h"
#include "llvm/Support/DataTypes.h"
#include "llvm/Support/ErrorHandling.h"
@@ -123,13 +124,16 @@ namespace MipsII {
IsCTI = 1 << 4,
/// HasForbiddenSlot - Instruction has a forbidden slot.
HasForbiddenSlot = 1 << 5,
- /// IsPCRelativeLoad - A Load instruction with implicit source register
- /// ($pc) with explicit offset and destination register
- IsPCRelativeLoad = 1 << 6,
/// HasFCCRegOperand - Instruction uses an $fcc<x> register.
- HasFCCRegOperand = 1 << 7
+ HasFCCRegOperand = 1 << 6
};
+
+ enum OperandType : unsigned {
+ OPERAND_FIRST_MIPS_MEM_IMM = MCOI::OPERAND_FIRST_TARGET,
+ OPERAND_MEM_SIMM9 = OPERAND_FIRST_MIPS_MEM_IMM,
+ OPERAND_LAST_MIPS_MEM_IMM = OPERAND_MEM_SIMM9
+ };
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsInstPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsInstPrinter.cpp
index fb290a8e3f26..649ba20324bf 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsInstPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsInstPrinter.cpp
@@ -75,8 +75,9 @@ void MipsInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
OS << '$' << StringRef(getRegisterName(RegNo)).lower();
}
-void MipsInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
- StringRef Annot, const MCSubtargetInfo &STI) {
+void MipsInstPrinter::printInst(const MCInst *MI, uint64_t Address,
+ StringRef Annot, const MCSubtargetInfo &STI,
+ raw_ostream &O) {
switch (MI->getOpcode()) {
default:
break;
@@ -109,7 +110,7 @@ void MipsInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
// Try to print any aliases first.
if (!printAliasInstr(MI, O) && !printAlias(*MI, O))
- printInstruction(MI, O);
+ printInstruction(MI, Address, O);
printAnnotation(O, Annot);
switch (MI->getOpcode()) {
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsInstPrinter.h b/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsInstPrinter.h
index a34a5c1d6418..0b1ee800e440 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsInstPrinter.h
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsInstPrinter.h
@@ -79,12 +79,12 @@ public:
: MCInstPrinter(MAI, MII, MRI) {}
// Autogenerated by tblgen.
- void printInstruction(const MCInst *MI, raw_ostream &O);
+ void printInstruction(const MCInst *MI, uint64_t Address, raw_ostream &O);
static const char *getRegisterName(unsigned RegNo);
void printRegName(raw_ostream &OS, unsigned RegNo) const override;
- void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
- const MCSubtargetInfo &STI) override;
+ void printInst(const MCInst *MI, uint64_t Address, StringRef Annot,
+ const MCSubtargetInfo &STI, raw_ostream &O) override;
bool printAliasInstr(const MCInst *MI, raw_ostream &OS);
void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx,
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
index ec78158d387d..5182205edaea 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
@@ -11,25 +11,27 @@
//===----------------------------------------------------------------------===//
#include "MipsMCAsmInfo.h"
+#include "MipsABIInfo.h"
#include "llvm/ADT/Triple.h"
using namespace llvm;
void MipsMCAsmInfo::anchor() { }
-MipsMCAsmInfo::MipsMCAsmInfo(const Triple &TheTriple) {
+MipsMCAsmInfo::MipsMCAsmInfo(const Triple &TheTriple,
+ const MCTargetOptions &Options) {
IsLittleEndian = TheTriple.isLittleEndian();
- if (TheTriple.isMIPS64() && TheTriple.getEnvironment() != Triple::GNUABIN32)
+ MipsABIInfo ABI = MipsABIInfo::computeTargetABI(TheTriple, "", Options);
+
+ if (TheTriple.isMIPS64() && !ABI.IsN32())
CodePointerSize = CalleeSaveStackSlotSize = 8;
- // FIXME: This condition isn't quite right but it's the best we can do until
- // this object can identify the ABI. It will misbehave when using O32
- // on a mips64*-* triple.
- if (TheTriple.isMIPS32()) {
+ if (ABI.IsO32())
PrivateGlobalPrefix = "$";
- PrivateLabelPrefix = "$";
- }
+ else if (ABI.IsN32() || ABI.IsN64())
+ PrivateGlobalPrefix = ".L";
+ PrivateLabelPrefix = PrivateGlobalPrefix;
AlignmentIsInBytes = false;
Data16bitsDirective = "\t.2byte\t";
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h b/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h
index 867f4d223de4..d8bfe58d24a8 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h
@@ -22,7 +22,8 @@ class MipsMCAsmInfo : public MCAsmInfoELF {
void anchor() override;
public:
- explicit MipsMCAsmInfo(const Triple &TheTriple);
+ explicit MipsMCAsmInfo(const Triple &TheTriple,
+ const MCTargetOptions &Options);
};
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
index 142e9cebb79e..846f508005f5 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
@@ -600,7 +600,8 @@ getExprOpValue(const MCExpr *Expr, SmallVectorImpl<MCFixup> &Fixups,
}
if (Kind == MCExpr::Binary) {
- unsigned Res = getExprOpValue(cast<MCBinaryExpr>(Expr)->getLHS(), Fixups, STI);
+ unsigned Res =
+ getExprOpValue(cast<MCBinaryExpr>(Expr)->getLHS(), Fixups, STI);
Res += getExprOpValue(cast<MCBinaryExpr>(Expr)->getRHS(), Fixups, STI);
return Res;
}
@@ -729,7 +730,8 @@ getExprOpValue(const MCExpr *Expr, SmallVectorImpl<MCFixup> &Fixups,
default: llvm_unreachable("Unknown fixup kind!");
break;
case MCSymbolRefExpr::VK_None:
- FixupKind = Mips::fixup_Mips_32; // FIXME: This is ok for O32/N32 but not N64.
+ // FIXME: This is ok for O32/N32 but not N64.
+ FixupKind = Mips::fixup_Mips_32;
break;
} // switch
@@ -768,7 +770,8 @@ unsigned MipsMCCodeEmitter::getMemEncoding(const MCInst &MI, unsigned OpNo,
const MCSubtargetInfo &STI) const {
// Base register is encoded in bits 20-16, offset is encoded in bits 15-0.
assert(MI.getOperand(OpNo).isReg());
- unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo),Fixups, STI) << 16;
+ unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo), Fixups, STI)
+ << 16;
unsigned OffBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups, STI);
// Apply the scale factor if there is one.
@@ -857,7 +860,8 @@ getMemEncodingMMImm9(const MCInst &MI, unsigned OpNo,
assert(MI.getOperand(OpNo).isReg());
unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo), Fixups,
STI) << 16;
- unsigned OffBits = getMachineOpValue(MI, MI.getOperand(OpNo + 1), Fixups, STI);
+ unsigned OffBits =
+ getMachineOpValue(MI, MI.getOperand(OpNo + 1), Fixups, STI);
return (OffBits & 0x1FF) | RegBits;
}
@@ -892,7 +896,8 @@ getMemEncodingMMImm12(const MCInst &MI, unsigned OpNo,
// Base register is encoded in bits 20-16, offset is encoded in bits 11-0.
assert(MI.getOperand(OpNo).isReg());
- unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo), Fixups, STI) << 16;
+ unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo), Fixups, STI)
+ << 16;
unsigned OffBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups, STI);
return (OffBits & 0x0FFF) | RegBits;
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp
index 680806c4deb2..a25783b5cf04 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp
@@ -211,7 +211,8 @@ MipsMCExpr::evaluateAsRelocatableImpl(MCValue &Res,
// The value of getKind() that is given to MCValue is only intended to aid
// debugging when inspecting MCValue objects. It shouldn't be relied upon
// for decision making.
- Res = MCValue::get(Res.getSymA(), Res.getSymB(), Res.getConstant(), getKind());
+ Res =
+ MCValue::get(Res.getSymA(), Res.getSymB(), Res.getConstant(), getKind());
return true;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
index 79c47d1b6508..de582bd60cbf 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
@@ -12,6 +12,7 @@
#include "MipsMCTargetDesc.h"
#include "MipsAsmBackend.h"
+#include "MipsBaseInfo.h"
#include "MipsELFStreamer.h"
#include "MipsInstPrinter.h"
#include "MipsMCAsmInfo.h"
@@ -44,7 +45,6 @@ using namespace llvm;
#include "MipsGenRegisterInfo.inc"
/// Select the Mips CPU for the given triple and cpu name.
-/// FIXME: Merge with the copy in MipsSubtarget.cpp
StringRef MIPS_MC::selectMipsCPU(const Triple &TT, StringRef CPU) {
if (CPU.empty() || CPU == "generic") {
if (TT.getSubArch() == llvm::Triple::MipsSubArch_r6) {
@@ -81,8 +81,9 @@ static MCSubtargetInfo *createMipsMCSubtargetInfo(const Triple &TT,
}
static MCAsmInfo *createMipsMCAsmInfo(const MCRegisterInfo &MRI,
- const Triple &TT) {
- MCAsmInfo *MAI = new MipsMCAsmInfo(TT);
+ const Triple &TT,
+ const MCTargetOptions &Options) {
+ MCAsmInfo *MAI = new MipsMCAsmInfo(TT, Options);
unsigned SP = MRI.getDwarfRegNum(Mips::SP, true);
MCCFIInstruction Inst = MCCFIInstruction::createDefCfaRegister(nullptr, SP);
@@ -165,7 +166,7 @@ static MCInstrAnalysis *createMipsMCInstrAnalysis(const MCInstrInfo *Info) {
return new MipsMCInstrAnalysis(Info);
}
-extern "C" void LLVMInitializeMipsTargetMC() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeMipsTargetMC() {
for (Target *T : {&getTheMipsTarget(), &getTheMipselTarget(),
&getTheMips64Target(), &getTheMips64elTarget()}) {
// Register the MC asm info.
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp
index 2d53750ad0ee..0544758f8a25 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp
@@ -154,8 +154,8 @@ public:
}
// Sandbox loads, stores and SP changes.
- unsigned AddrIdx;
- bool IsStore;
+ unsigned AddrIdx = 0;
+ bool IsStore = false;
bool IsMemAccess = isBasePlusOffsetMemoryAccess(Inst.getOpcode(), &AddrIdx,
&IsStore);
bool IsSPFirstOperand = isStackPointerFirstOperand(Inst);
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp
index 3ff9c722484b..bdfb70aa9813 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp
@@ -74,27 +74,23 @@ void MipsRegInfoRecord::SetPhysRegUsed(unsigned Reg,
const MCRegisterInfo *MCRegInfo) {
unsigned Value = 0;
- for (MCSubRegIterator SubRegIt(Reg, MCRegInfo, true); SubRegIt.isValid();
- ++SubRegIt) {
- unsigned CurrentSubReg = *SubRegIt;
-
- unsigned EncVal = MCRegInfo->getEncodingValue(CurrentSubReg);
+ for (const MCPhysReg &SubReg : MCRegInfo->subregs_inclusive(Reg)) {
+ unsigned EncVal = MCRegInfo->getEncodingValue(SubReg);
Value |= 1 << EncVal;
- if (GPR32RegClass->contains(CurrentSubReg) ||
- GPR64RegClass->contains(CurrentSubReg))
+ if (GPR32RegClass->contains(SubReg) || GPR64RegClass->contains(SubReg))
ri_gprmask |= Value;
- else if (COP0RegClass->contains(CurrentSubReg))
+ else if (COP0RegClass->contains(SubReg))
ri_cprmask[0] |= Value;
// MIPS COP1 is the FPU.
- else if (FGR32RegClass->contains(CurrentSubReg) ||
- FGR64RegClass->contains(CurrentSubReg) ||
- AFGR64RegClass->contains(CurrentSubReg) ||
- MSA128BRegClass->contains(CurrentSubReg))
+ else if (FGR32RegClass->contains(SubReg) ||
+ FGR64RegClass->contains(SubReg) ||
+ AFGR64RegClass->contains(SubReg) ||
+ MSA128BRegClass->contains(SubReg))
ri_cprmask[1] |= Value;
- else if (COP2RegClass->contains(CurrentSubReg))
+ else if (COP2RegClass->contains(SubReg))
ri_cprmask[2] |= Value;
- else if (COP3RegClass->contains(CurrentSubReg))
+ else if (COP3RegClass->contains(SubReg))
ri_cprmask[3] |= Value;
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
index b6dae9f6dea8..054dc79f4aa9 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
@@ -34,11 +34,6 @@ static cl::opt<bool> RoundSectionSizes(
cl::desc("Round section sizes up to the section alignment"), cl::Hidden);
} // end anonymous namespace
-static bool isMipsR6(const MCSubtargetInfo *STI) {
- return STI->getFeatureBits()[Mips::FeatureMips32r6] ||
- STI->getFeatureBits()[Mips::FeatureMips64r6];
-}
-
static bool isMicroMips(const MCSubtargetInfo *STI) {
return STI->getFeatureBits()[Mips::FeatureMicroMips];
}
@@ -332,36 +327,6 @@ void MipsTargetStreamer::emitStoreWithImmOffset(
emitRRI(Opcode, SrcReg, ATReg, LoOffset, IDLoc, STI);
}
-/// Emit a store instruction with an symbol offset.
-void MipsTargetStreamer::emitSCWithSymOffset(unsigned Opcode, unsigned SrcReg,
- unsigned BaseReg,
- MCOperand &HiOperand,
- MCOperand &LoOperand,
- unsigned ATReg, SMLoc IDLoc,
- const MCSubtargetInfo *STI) {
- // sc $8, sym => lui $at, %hi(sym)
- // sc $8, %lo(sym)($at)
-
- // Generate the base address in ATReg.
- emitRX(Mips::LUi, ATReg, HiOperand, IDLoc, STI);
- if (!isMicroMips(STI) && isMipsR6(STI)) {
- // For non-micromips r6 offset for 'sc' is not in the lower 16 bits so we
- // put it in 'at'.
- // sc $8, sym => lui $at, %hi(sym)
- // addiu $at, $at, %lo(sym)
- // sc $8, 0($at)
- emitRRX(Mips::ADDiu, ATReg, ATReg, LoOperand, IDLoc, STI);
- MCOperand Offset = MCOperand::createImm(0);
- // Emit the store with the adjusted base and offset.
- emitRRRX(Opcode, SrcReg, SrcReg, ATReg, Offset, IDLoc, STI);
- } else {
- if (BaseReg != Mips::ZERO)
- emitRRR(Mips::ADDu, ATReg, ATReg, BaseReg, IDLoc, STI);
- // Emit the store with the adjusted base and offset.
- emitRRRX(Opcode, SrcReg, SrcReg, ATReg, LoOperand, IDLoc, STI);
- }
-}
-
/// Emit a load instruction with an immediate offset. DstReg and TmpReg are
/// permitted to be the same register iff DstReg is distinct from BaseReg and
/// DstReg is a GPR. It is the callers responsibility to identify such cases
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MicroMips32r6InstrFormats.td b/contrib/llvm-project/llvm/lib/Target/Mips/MicroMips32r6InstrFormats.td
index dbff0f6200f2..da8a06b0cff8 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MicroMips32r6InstrFormats.td
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MicroMips32r6InstrFormats.td
@@ -234,7 +234,8 @@ class POOL32A_FM_MMR6<bits<10> funct> : MipsR6Inst {
let Inst{9-0} = funct;
}
-class POOL32A_PAUSE_FM_MMR6<string instr_asm, bits<5> op> : MMR6Arch<instr_asm> {
+class POOL32A_PAUSE_FM_MMR6<string instr_asm, bits<5> op>
+ : MMR6Arch<instr_asm> {
bits<32> Inst;
let Inst{31-26} = 0;
@@ -512,7 +513,8 @@ class BARRIER_MMR6_ENC<string instr_asm, bits<5> op> : MMR6Arch<instr_asm> {
class POOL32A_EIDI_MMR6_ENC<string instr_asm, bits<10> funct>
: MMR6Arch<instr_asm> {
bits<32> Inst;
- bits<5> rt; // Actually rs but we're sharing code with the standard encodings which call it rt
+ bits<5> rt; // Actually rs but we're sharing code with the standard encodings
+ // which call it rt
let Inst{31-26} = 0x00;
let Inst{25-21} = 0x00;
@@ -521,7 +523,8 @@ class POOL32A_EIDI_MMR6_ENC<string instr_asm, bits<10> funct>
let Inst{5-0} = 0x3c;
}
-class SHIFT_MMR6_ENC<string instr_asm, bits<10> funct, bit rotate> : MMR6Arch<instr_asm> {
+class SHIFT_MMR6_ENC<string instr_asm, bits<10> funct, bit rotate>
+ : MMR6Arch<instr_asm> {
bits<5> rd;
bits<5> rt;
bits<5> shamt;
@@ -809,8 +812,8 @@ class POOL32F_RECIP_ROUND_FM_MMR6<string instr_asm, bits<1> fmt, bits<8> funct>
let Inst{5-0} = 0b111011;
}
-class POOL32F_RINT_FM_MMR6<string instr_asm, bits<2> fmt>
- : MMR6Arch<instr_asm>, MipsR6Inst {
+class POOL32F_RINT_FM_MMR6<string instr_asm, bits<2> fmt> : MMR6Arch<instr_asm>,
+ MipsR6Inst {
bits<5> fs;
bits<5> fd;
@@ -1012,8 +1015,8 @@ class POOL32B_LDWC2_SDWC2_FM_MMR6<string instr_asm, bits<4> funct>
}
class POOL32C_LL_E_SC_E_FM_MMR6<string instr_asm, bits<4> majorFunc,
- bits<3> minorFunc>
- : MMR6Arch<instr_asm>, MipsR6Inst {
+ bits<3> minorFunc> : MMR6Arch<instr_asm>,
+ MipsR6Inst {
bits<5> rt;
bits<21> addr;
bits<5> base = addr{20-16};
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MicroMips32r6InstrInfo.td b/contrib/llvm-project/llvm/lib/Target/Mips/MicroMips32r6InstrInfo.td
index 425773dc57f1..832124cb3f57 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MicroMips32r6InstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MicroMips32r6InstrInfo.td
@@ -341,7 +341,8 @@ class BNEC_MMR6_DESC : CMP_CBR_2R_MMR6_DESC_BASE<"bnec", brtarget_lsl2_mm,
GPR32Opnd>;
class ADD_MMR6_DESC : ArithLogicR<"add", GPR32Opnd, 1, II_ADD>;
-class ADDIU_MMR6_DESC : ArithLogicI<"addiu", simm16, GPR32Opnd, II_ADDIU, immSExt16, add>;
+class ADDIU_MMR6_DESC
+ : ArithLogicI<"addiu", simm16, GPR32Opnd, II_ADDIU, immSExt16, add>;
class ADDU_MMR6_DESC : ArithLogicR<"addu", GPR32Opnd, 1, II_ADDU>;
class MUL_MMR6_DESC : ArithLogicR<"mul", GPR32Opnd, 1, II_MUL, mul>;
class MUH_MMR6_DESC : ArithLogicR<"muh", GPR32Opnd, 1, II_MUH, mulhs>;
@@ -1045,8 +1046,8 @@ class TRUNC_W_D_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"trunc.w.d", FGR32Opnd,
FGR64Opnd, II_TRUNC>;
class SQRT_S_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"sqrt.s", FGR32Opnd, FGR32Opnd,
II_SQRT_S, fsqrt>;
-class SQRT_D_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"sqrt.d", AFGR64Opnd, AFGR64Opnd,
- II_SQRT_D, fsqrt>;
+class SQRT_D_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"sqrt.d", AFGR64Opnd,
+ AFGR64Opnd, II_SQRT_D, fsqrt>;
class ROUND_L_S_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"round.l.s", FGR64Opnd,
FGR32Opnd, II_ROUND>;
class ROUND_L_D_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"round.l.d", FGR64Opnd,
@@ -1101,14 +1102,16 @@ class SLL16_MMR6_DESC : ShiftIMM16<"sll16", uimm3_shift, GPRMM16Opnd, II_SLL>,
MMR6Arch<"sll16">;
class SRL16_MMR6_DESC : ShiftIMM16<"srl16", uimm3_shift, GPRMM16Opnd, II_SRL>,
MMR6Arch<"srl16">;
-class BREAK16_MMR6_DESC : BrkSdbbp16MM<"break16", II_BREAK>, MMR6Arch<"break16">;
+class BREAK16_MMR6_DESC : BrkSdbbp16MM<"break16", II_BREAK>,
+ MMR6Arch<"break16">;
class LI16_MMR6_DESC : LoadImmMM16<"li16", li16_imm, GPRMM16Opnd>,
MMR6Arch<"li16">, IsAsCheapAsAMove;
class MOVE16_MMR6_DESC : MoveMM16<"move16", GPR32Opnd>, MMR6Arch<"move16">;
class MOVEP_MMR6_DESC : MovePMM16<"movep", GPRMM16OpndMovePPairFirst,
GPRMM16OpndMovePPairSecond, GPRMM16OpndMoveP>,
MMR6Arch<"movep">;
-class SDBBP16_MMR6_DESC : BrkSdbbp16MM<"sdbbp16", II_SDBBP>, MMR6Arch<"sdbbp16">;
+class SDBBP16_MMR6_DESC : BrkSdbbp16MM<"sdbbp16", II_SDBBP>,
+ MMR6Arch<"sdbbp16">;
class SUBU16_MMR6_DESC : ArithRMM16<"subu16", GPRMM16Opnd, 0, II_SUBU, sub>,
MMR6Arch<"subu16"> {
int AddedComplexity = 1;
@@ -1439,7 +1442,8 @@ def SYNCI_MMR6 : StdMMR6Rel, SYNCI_MMR6_DESC, SYNCI_MMR6_ENC, ISA_MICROMIPS32R6;
def RDPGPR_MMR6 : R6MMR6Rel, RDPGPR_MMR6_DESC, RDPGPR_MMR6_ENC,
ISA_MICROMIPS32R6;
def SDBBP_MMR6 : R6MMR6Rel, SDBBP_MMR6_DESC, SDBBP_MMR6_ENC, ISA_MICROMIPS32R6;
-def SIGRIE_MMR6 : R6MMR6Rel, SIGRIE_MMR6_DESC, SIGRIE_MMR6_ENC, ISA_MICROMIPS32R6;
+def SIGRIE_MMR6 : R6MMR6Rel, SIGRIE_MMR6_DESC, SIGRIE_MMR6_ENC,
+ ISA_MICROMIPS32R6;
def XOR_MMR6 : StdMMR6Rel, XOR_MMR6_DESC, XOR_MMR6_ENC, ISA_MICROMIPS32R6;
def XORI_MMR6 : StdMMR6Rel, XORI_MMR6_DESC, XORI_MMR6_ENC, ISA_MICROMIPS32R6;
let DecoderMethod = "DecodeMemMMImm16" in {
@@ -1557,7 +1561,8 @@ def INS_MMR6 : StdMMR6Rel, INS_MMR6_ENC, INS_MMR6_DESC, ISA_MICROMIPS32R6;
def JALRC_MMR6 : R6MMR6Rel, JALRC_MMR6_ENC, JALRC_MMR6_DESC, ISA_MICROMIPS32R6;
def RINT_S_MMR6 : StdMMR6Rel, RINT_S_MMR6_ENC, RINT_S_MMR6_DESC,
ISA_MICROMIPS32R6;
-def RINT_D_MMR6 : StdMMR6Rel, RINT_D_MMR6_ENC, RINT_D_MMR6_DESC, ISA_MICROMIPS32R6;
+def RINT_D_MMR6 : StdMMR6Rel, RINT_D_MMR6_ENC, RINT_D_MMR6_DESC,
+ ISA_MICROMIPS32R6;
def ROUND_L_S_MMR6 : StdMMR6Rel, ROUND_L_S_MMR6_ENC, ROUND_L_S_MMR6_DESC,
ISA_MICROMIPS32R6;
def ROUND_L_D_MMR6 : StdMMR6Rel, ROUND_L_D_MMR6_ENC, ROUND_L_D_MMR6_DESC,
@@ -1751,7 +1756,8 @@ defm S_MMR6 : Cmp_Pats<f32, NOR_MMR6, ZERO>, ISA_MICROMIPS32R6;
defm D_MMR6 : Cmp_Pats<f64, NOR_MMR6, ZERO>, ISA_MICROMIPS32R6;
def : MipsPat<(f32 fpimm0), (MTC1_MMR6 ZERO)>, ISA_MICROMIPS32R6;
-def : MipsPat<(f32 fpimm0neg), (FNEG_S_MMR6 (MTC1_MMR6 ZERO))>, ISA_MICROMIPS32R6;
+def : MipsPat<(f32 fpimm0neg), (FNEG_S_MMR6(MTC1_MMR6 ZERO))>,
+ ISA_MICROMIPS32R6;
def : MipsPat<(MipsTruncIntFP FGR64Opnd:$src),
(TRUNC_W_D_MMR6 FGR64Opnd:$src)>, ISA_MICROMIPS32R6;
def : MipsPat<(MipsTruncIntFP FGR32Opnd:$src),
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MicroMipsInstrFPU.td b/contrib/llvm-project/llvm/lib/Target/Mips/MicroMipsInstrFPU.td
index 5d87068ff407..eea4d7746fa6 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MicroMipsInstrFPU.td
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MicroMipsInstrFPU.td
@@ -230,7 +230,8 @@ let DecoderNamespace = "MicroMips" in {
def FSQRT_S_MM : MMRel, ABSS_FT<"sqrt.s", FGR32Opnd, FGR32Opnd, II_SQRT_S,
fsqrt>, ROUND_W_FM_MM<0, 0x28>, ISA_MICROMIPS;
- def MTHC1_D32_MM : MMRel, MTC1_64_FT<"mthc1", AFGR64Opnd, GPR32Opnd, II_MTHC1>,
+ def MTHC1_D32_MM : MMRel,
+ MTC1_64_FT<"mthc1", AFGR64Opnd, GPR32Opnd, II_MTHC1>,
MFC1_FM_MM<0xe0>, ISA_MICROMIPS, FGR_32;
def MFHC1_D32_MM : MMRel, MFC1_FT<"mfhc1", GPR32Opnd, AFGR64Opnd, II_MFHC1>,
MFC1_FM_MM<0xc0>, ISA_MICROMIPS, FGR_32;
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MicroMipsInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/Mips/MicroMipsInstrInfo.td
index 8cc0029fc896..b707f1b96184 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MicroMipsInstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MicroMipsInstrInfo.td
@@ -116,7 +116,7 @@ def mem_mm_9 : Operand<i32> {
let PrintMethod = "printMemOperand";
let MIOperandInfo = (ops ptr_rc, simm9);
let EncoderMethod = "getMemEncodingMMImm9";
- let ParserMatchClass = MipsMemSimm9AsmOperand;
+ let ParserMatchClass = MipsMemSimmAsmOperand<9>;
let OperandType = "OPERAND_MEMORY";
}
@@ -124,7 +124,7 @@ def mem_mm_11 : Operand<i32> {
let PrintMethod = "printMemOperand";
let MIOperandInfo = (ops GPR32, simm11);
let EncoderMethod = "getMemEncodingMMImm11";
- let ParserMatchClass = MipsMemSimm11AsmOperand;
+ let ParserMatchClass = MipsMemSimmAsmOperand<11>;
let OperandType = "OPERAND_MEMORY";
}
@@ -141,7 +141,7 @@ def mem_mm_16 : Operand<i32> {
let MIOperandInfo = (ops ptr_rc, simm16);
let EncoderMethod = "getMemEncodingMMImm16";
let DecoderMethod = "DecodeMemMMImm16";
- let ParserMatchClass = MipsMemSimm16AsmOperand;
+ let ParserMatchClass = MipsMemSimmAsmOperand<16>;
let OperandType = "OPERAND_MEMORY";
}
@@ -506,7 +506,8 @@ class LoadWordIndexedScaledMM<string opstr, RegisterOperand RO,
class PrefetchIndexed<string opstr> :
InstSE<(outs), (ins PtrRC:$base, PtrRC:$index, uimm5:$hint),
- !strconcat(opstr, "\t$hint, ${index}(${base})"), [], II_PREF, FrmOther>;
+ !strconcat(opstr, "\t$hint, ${index}(${base})"),
+ [], II_PREF, FrmOther>;
class AddImmUPC<string opstr, RegisterOperand RO> :
InstSE<(outs RO:$rs), (ins simm23_lsl2:$imm),
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/Mips.td b/contrib/llvm-project/llvm/lib/Target/Mips/Mips.td
index 8b9bbf09e985..b8a69815cc12 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/Mips.td
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/Mips.td
@@ -192,9 +192,9 @@ def FeatureGINV : SubtargetFeature<"ginv", "HasGINV", "true",
def FeatureMicroMips : SubtargetFeature<"micromips", "InMicroMipsMode", "true",
"microMips mode">;
-def FeatureCnMips : SubtargetFeature<"cnmips", "HasCnMips",
- "true", "Octeon cnMIPS Support",
- [FeatureMips64r2]>;
+def FeatureCnMips : SubtargetFeature<"cnmips", "HasCnMips",
+ "true", "Octeon cnMIPS Support",
+ [FeatureMips64r2]>;
def FeatureCnMipsP : SubtargetFeature<"cnmipsp", "HasCnMipsP",
"true", "Octeon+ cnMIPS Support",
@@ -205,8 +205,9 @@ def FeatureUseTCCInDIV : SubtargetFeature<
"UseTCCInDIV", "false",
"Force the assembler to use trapping">;
-def FeatureMadd4 : SubtargetFeature<"nomadd4", "DisableMadd4", "true",
- "Disable 4-operand madd.fmt and related instructions">;
+def FeatureMadd4
+ : SubtargetFeature<"nomadd4", "DisableMadd4", "true",
+ "Disable 4-operand madd.fmt and related instructions">;
def FeatureMT : SubtargetFeature<"mt", "HasMT", "true", "Mips MT ASE">;
@@ -231,6 +232,7 @@ def ImplP5600 : SubtargetFeature<"p5600", "ProcImpl",
class Proc<string Name, list<SubtargetFeature> Features>
: ProcessorModel<Name, MipsGenericModel, Features>;
+def : Proc<"generic", [FeatureMips32]>;
def : Proc<"mips1", [FeatureMips1]>;
def : Proc<"mips2", [FeatureMips2]>;
def : Proc<"mips32", [FeatureMips32]>;
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/Mips16ISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/Mips16ISelLowering.cpp
index 5a5b78c9d5f9..5425df77d9b8 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/Mips16ISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/Mips16ISelLowering.cpp
@@ -252,9 +252,6 @@ void Mips16TargetLowering::setMips16HardFloatLibCalls() {
if (HardFloatLibCalls[I].Libcall != RTLIB::UNKNOWN_LIBCALL)
setLibcallName(HardFloatLibCalls[I].Libcall, HardFloatLibCalls[I].Name);
}
-
- setLibcallName(RTLIB::O_F64, "__mips16_unorddf2");
- setLibcallName(RTLIB::O_F32, "__mips16_unordsf2");
}
//
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/Mips16InstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/Mips16InstrInfo.cpp
index 0d735c20ec2f..d2a1ba39cb0e 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/Mips16InstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/Mips16InstrInfo.cpp
@@ -68,8 +68,8 @@ unsigned Mips16InstrInfo::isStoreToStackSlot(const MachineInstr &MI,
void Mips16InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
- const DebugLoc &DL, unsigned DestReg,
- unsigned SrcReg, bool KillSrc) const {
+ const DebugLoc &DL, MCRegister DestReg,
+ MCRegister SrcReg, bool KillSrc) const {
unsigned Opc = 0;
if (Mips::CPU16RegsRegClass.contains(DestReg) &&
@@ -96,15 +96,11 @@ void Mips16InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MIB.addReg(SrcReg, getKillRegState(KillSrc));
}
-bool Mips16InstrInfo::isCopyInstrImpl(const MachineInstr &MI,
- const MachineOperand *&Src,
- const MachineOperand *&Dest) const {
- if (MI.isMoveReg()) {
- Dest = &MI.getOperand(0);
- Src = &MI.getOperand(1);
- return true;
- }
- return false;
+Optional<DestSourcePair>
+Mips16InstrInfo::isCopyInstrImpl(const MachineInstr &MI) const {
+ if (MI.isMoveReg())
+ return DestSourcePair{MI.getOperand(0), MI.getOperand(1)};
+ return None;
}
void Mips16InstrInfo::storeRegToStack(MachineBasicBlock &MBB,
@@ -418,8 +414,9 @@ unsigned Mips16InstrInfo::loadImmediate(unsigned FrameReg, int64_t Imm,
else
Available.reset(SpReg);
copyPhysReg(MBB, II, DL, SpReg, Mips::SP, false);
- BuildMI(MBB, II, DL, get(Mips:: AdduRxRyRz16), Reg).addReg(SpReg, RegState::Kill)
- .addReg(Reg);
+ BuildMI(MBB, II, DL, get(Mips::AdduRxRyRz16), Reg)
+ .addReg(SpReg, RegState::Kill)
+ .addReg(Reg);
}
else
BuildMI(MBB, II, DL, get(Mips:: AdduRxRyRz16), Reg).addReg(FrameReg)
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/Mips16InstrInfo.h b/contrib/llvm-project/llvm/lib/Target/Mips/Mips16InstrInfo.h
index dadcaa3055b3..2ff849cb2ca2 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/Mips16InstrInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/Mips16InstrInfo.h
@@ -49,7 +49,7 @@ public:
int &FrameIndex) const override;
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
- const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
+ const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg,
bool KillSrc) const override;
void storeRegToStack(MachineBasicBlock &MBB,
@@ -104,10 +104,9 @@ public:
protected:
/// If the specific machine instruction is a instruction that moves/copies
- /// value from one register to another register return true along with
- /// @Source machine operand and @Destination machine operand.
- bool isCopyInstrImpl(const MachineInstr &MI, const MachineOperand *&Source,
- const MachineOperand *&Destination) const override;
+ /// value from one register to another register return destination and source
+ /// registers as machine operands.
+ Optional<DestSourcePair> isCopyInstrImpl(const MachineInstr &MI) const override;
private:
unsigned getAnalyzableBrOpc(unsigned Opc) const override;
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/Mips16InstrInfo.td b/contrib/llvm-project/llvm/lib/Target/Mips/Mips16InstrInfo.td
index 36b6c73d1008..19ea50c89b96 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/Mips16InstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/Mips16InstrInfo.td
@@ -67,7 +67,7 @@ class FI816_ins_base<bits<3> _func, string asmstr,
class FI816_ins<bits<3> _func, string asmstr,
InstrItinClass itin>:
FI816_ins_base<_func, asmstr, "\t$imm # 16 bit inst", itin>;
-
+
class FI816_SP_ins<bits<3> _func, string asmstr,
InstrItinClass itin>:
FI816_ins_base<_func, asmstr, "\t$$sp, $imm # 16 bit inst", itin>;
@@ -90,7 +90,7 @@ class FRI16_TCP_ins<bits<5> _op, string asmstr,
InstrItinClass itin>:
FRI16<_op, (outs CPU16Regs:$rx), (ins pcrel16:$imm, i32imm:$size),
!strconcat(asmstr, "\t$rx, $imm\t# 16 bit inst"), [], itin>;
-
+
class FRI16R_ins_base<bits<5> op, string asmstr, string asmstr2,
InstrItinClass itin>:
FRI16<op, (outs), (ins CPU16Regs:$rx, simm16:$imm),
@@ -983,7 +983,7 @@ def RestoreX16:
// To set up a stack frame on entry to a subroutine,
// saving return address and static registers, and adjusting stack
//
-def Save16:
+def Save16:
FI8_SVRS16<0b1, (outs), (ins variable_ops),
"", [], II_SAVE >, MayStore {
let isCodeGenOnly = 1;
@@ -1883,7 +1883,7 @@ def : Mips16Pat<(sext_inreg CPU16Regs:$val, i8),
def : Mips16Pat<(sext_inreg CPU16Regs:$val, i16),
(SehRx16 CPU16Regs:$val)>;
-def GotPrologue16:
+def GotPrologue16:
MipsPseudo16<
(outs CPU16Regs:$rh, CPU16Regs:$rl),
(ins simm16:$immHi, simm16:$immLo),
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/Mips32r6InstrInfo.td b/contrib/llvm-project/llvm/lib/Target/Mips/Mips32r6InstrInfo.td
index 2c3048411a5c..9607d008bc97 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/Mips32r6InstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/Mips32r6InstrInfo.td
@@ -765,12 +765,12 @@ class LL_R6_DESC_BASE<string instr_asm, RegisterOperand GPROpnd,
InstrItinClass Itinerary = itin;
}
-class LL_R6_DESC : LL_R6_DESC_BASE<"ll", GPR32Opnd, mem_simm9, II_LL>;
+class LL_R6_DESC : LL_R6_DESC_BASE<"ll", GPR32Opnd, mem_simm9_exp, II_LL>;
class SC_R6_DESC_BASE<string instr_asm, RegisterOperand GPROpnd,
InstrItinClass itin> {
dag OutOperandList = (outs GPROpnd:$dst);
- dag InOperandList = (ins GPROpnd:$rt, mem_simm9:$addr);
+ dag InOperandList = (ins GPROpnd:$rt, mem_simm9_exp:$addr);
string AsmString = !strconcat(instr_asm, "\t$rt, $addr");
list<dag> Pattern = [];
bit mayStore = 1;
@@ -996,13 +996,15 @@ def : MipsInstAlias<"evp", (EVP ZERO), 0>, ISA_MIPS32R6;
let AdditionalPredicates = [NotInMicroMips] in {
def : MipsInstAlias<"sdbbp", (SDBBP_R6 0)>, ISA_MIPS32R6;
def : MipsInstAlias<"sigrie", (SIGRIE 0)>, ISA_MIPS32R6;
-def : MipsInstAlias<"jr $rs", (JALR ZERO, GPR32Opnd:$rs), 1>, ISA_MIPS32R6, GPR_32;
+def : MipsInstAlias<"jr $rs", (JALR ZERO, GPR32Opnd:$rs), 1>,
+ ISA_MIPS32R6, GPR_32;
}
def : MipsInstAlias<"jrc $rs", (JIC GPR32Opnd:$rs, 0), 1>, ISA_MIPS32R6, GPR_32;
let AdditionalPredicates = [NotInMicroMips] in {
-def : MipsInstAlias<"jalrc $rs", (JIALC GPR32Opnd:$rs, 0), 1>, ISA_MIPS32R6, GPR_32;
+def : MipsInstAlias<"jalrc $rs", (JIALC GPR32Opnd:$rs, 0), 1>,
+ ISA_MIPS32R6, GPR_32;
}
def : MipsInstAlias<"div $rs, $rt", (DIV GPR32Opnd:$rs, GPR32Opnd:$rs,
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/Mips64InstrInfo.td b/contrib/llvm-project/llvm/lib/Target/Mips/Mips64InstrInfo.td
index c01f1d6f9aaf..306289d56e4b 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/Mips64InstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/Mips64InstrInfo.td
@@ -83,6 +83,10 @@ let usesCustomInserter = 1 in {
def ATOMIC_LOAD_NAND_I64 : Atomic2Ops<atomic_load_nand_64, GPR64>;
def ATOMIC_SWAP_I64 : Atomic2Ops<atomic_swap_64, GPR64>;
def ATOMIC_CMP_SWAP_I64 : AtomicCmpSwap<atomic_cmp_swap_64, GPR64>;
+ def ATOMIC_LOAD_MIN_I64 : Atomic2Ops<atomic_load_min_64, GPR64>;
+ def ATOMIC_LOAD_MAX_I64 : Atomic2Ops<atomic_load_max_64, GPR64>;
+ def ATOMIC_LOAD_UMIN_I64 : Atomic2Ops<atomic_load_umin_64, GPR64>;
+ def ATOMIC_LOAD_UMAX_I64 : Atomic2Ops<atomic_load_umax_64, GPR64>;
}
def ATOMIC_LOAD_ADD_I64_POSTRA : Atomic2OpsPostRA<GPR64>;
@@ -96,6 +100,11 @@ def ATOMIC_SWAP_I64_POSTRA : Atomic2OpsPostRA<GPR64>;
def ATOMIC_CMP_SWAP_I64_POSTRA : AtomicCmpSwapPostRA<GPR64>;
+def ATOMIC_LOAD_MIN_I64_POSTRA : Atomic2OpsPostRA<GPR64>;
+def ATOMIC_LOAD_MAX_I64_POSTRA : Atomic2OpsPostRA<GPR64>;
+def ATOMIC_LOAD_UMIN_I64_POSTRA : Atomic2OpsPostRA<GPR64>;
+def ATOMIC_LOAD_UMAX_I64_POSTRA : Atomic2OpsPostRA<GPR64>;
+
/// Pseudo instructions for loading and storing accumulator registers.
let isPseudo = 1, isCodeGenOnly = 1, hasNoSchedulingInfo = 1 in {
def LOAD_ACC128 : Load<"", ACC128>;
@@ -395,11 +404,11 @@ let AdditionalPredicates = [NotInMicroMips] in {
}
let isCodeGenOnly = 1, AdditionalPredicates = [NotInMicroMips] in {
- def DEXT64_32 : InstSE<(outs GPR64Opnd:$rt),
- (ins GPR32Opnd:$rs, uimm5_report_uimm6:$pos,
- uimm5_plus1:$size),
- "dext $rt, $rs, $pos, $size", [], II_EXT, FrmR, "dext">,
- EXT_FM<3>, ISA_MIPS64R2;
+ def DEXT64_32
+ : InstSE<(outs GPR64Opnd:$rt),
+ (ins GPR32Opnd:$rs, uimm5_report_uimm6:$pos, uimm5_plus1:$size),
+ "dext $rt, $rs, $pos, $size", [], II_EXT, FrmR, "dext">,
+ EXT_FM<3>, ISA_MIPS64R2;
}
let isCodeGenOnly = 1, rs = 0, shamt = 0 in {
@@ -1045,8 +1054,10 @@ let AdditionalPredicates = [NotInMicroMips] in {
(DMTGC0 COP0Opnd:$rd, GPR64Opnd:$rt, 0), 0>,
ISA_MIPS64R5, ASE_VIRT;
}
-def : MipsInstAlias<"dmfc2 $rt, $rd", (DMFC2 GPR64Opnd:$rt, COP2Opnd:$rd, 0), 0>;
-def : MipsInstAlias<"dmtc2 $rt, $rd", (DMTC2 COP2Opnd:$rd, GPR64Opnd:$rt, 0), 0>;
+def : MipsInstAlias<"dmfc2 $rt, $rd",
+ (DMFC2 GPR64Opnd:$rt, COP2Opnd:$rd, 0), 0>;
+def : MipsInstAlias<"dmtc2 $rt, $rd",
+ (DMTC2 COP2Opnd:$rd, GPR64Opnd:$rt, 0), 0>;
def : MipsInstAlias<"synciobdma", (SYNC 0x2), 0>, ASE_MIPS64_CNMIPS;
def : MipsInstAlias<"syncs", (SYNC 0x6), 0>, ASE_MIPS64_CNMIPS;
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/Mips64r6InstrInfo.td b/contrib/llvm-project/llvm/lib/Target/Mips/Mips64r6InstrInfo.td
index d746bb61f824..33132d9ede92 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/Mips64r6InstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/Mips64r6InstrInfo.td
@@ -48,7 +48,8 @@ class CRC32CD_ENC : SPECIAL3_2R_SZ_CRC<3,1>;
//
//===----------------------------------------------------------------------===//
-class AHI_ATI_DESC_BASE<string instr_asm, RegisterOperand GPROpnd, InstrItinClass itin> {
+class AHI_ATI_DESC_BASE<string instr_asm, RegisterOperand GPROpnd,
+ InstrItinClass itin> {
dag OutOperandList = (outs GPROpnd:$rs);
dag InOperandList = (ins GPROpnd:$rt, uimm16_altrelaxed:$imm);
string AsmString = !strconcat(instr_asm, "\t$rs, $rt, $imm");
@@ -74,7 +75,7 @@ class DMUL_R6_DESC : MUL_R6_DESC_BASE<"dmul", GPR64Opnd, II_DMUL, mul>;
class DMULU_DESC : MUL_R6_DESC_BASE<"dmulu", GPR64Opnd, II_DMUL>;
class LDPC_DESC : PCREL_DESC_BASE<"ldpc", GPR64Opnd, simm18_lsl3, II_LDPC>;
class LWUPC_DESC : PCREL_DESC_BASE<"lwupc", GPR32Opnd, simm19_lsl2, II_LWUPC>;
-class LLD_R6_DESC : LL_R6_DESC_BASE<"lld", GPR64Opnd, mem_simmptr, II_LLD>;
+class LLD_R6_DESC : LL_R6_DESC_BASE<"lld", GPR64Opnd, mem_simm9_exp, II_LLD>;
class SCD_R6_DESC : SC_R6_DESC_BASE<"scd", GPR64Opnd, II_SCD>;
class SELEQZ64_DESC : SELEQNE_Z_DESC_BASE<"seleqz", GPR64Opnd>;
class SELNEZ64_DESC : SELEQNE_Z_DESC_BASE<"selnez", GPR64Opnd>;
@@ -105,7 +106,7 @@ class JIC64_DESC : JMP_IDX_COMPACT_DESC_BASE<"jic", jmpoffset16, GPR64Opnd,
list<Register> Defs = [AT];
}
-class LL64_R6_DESC : LL_R6_DESC_BASE<"ll", GPR32Opnd, mem_simm9, II_LL>;
+class LL64_R6_DESC : LL_R6_DESC_BASE<"ll", GPR32Opnd, mem_simm9_exp, II_LL>;
class SC64_R6_DESC : SC_R6_DESC_BASE<"sc", GPR32Opnd, II_SC>;
class JR_HB64_R6_DESC : JR_HB_DESC_BASE<"jr.hb", GPR64Opnd> {
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsAsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
index 2201545adc94..8f75336dce5a 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -257,6 +257,10 @@ void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) {
if (emitPseudoExpansionLowering(*OutStreamer, &*I))
continue;
+ // Skip the BUNDLE pseudo instruction and lower the contents
+ if (I->isBundle())
+ continue;
+
if (I->getOpcode() == Mips::PseudoReturn ||
I->getOpcode() == Mips::PseudoReturn64 ||
I->getOpcode() == Mips::PseudoIndirectBranch ||
@@ -623,8 +627,10 @@ bool MipsAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
assert(OpNum + 1 < MI->getNumOperands() && "Insufficient operands");
const MachineOperand &BaseMO = MI->getOperand(OpNum);
const MachineOperand &OffsetMO = MI->getOperand(OpNum + 1);
- assert(BaseMO.isReg() && "Unexpected base pointer for inline asm memory operand.");
- assert(OffsetMO.isImm() && "Unexpected offset for inline asm memory operand.");
+ assert(BaseMO.isReg() &&
+ "Unexpected base pointer for inline asm memory operand.");
+ assert(OffsetMO.isImm() &&
+ "Unexpected offset for inline asm memory operand.");
int Offset = OffsetMO.getImm();
// Currently we are expecting either no ExtraCode or 'D','M','L'.
@@ -1300,7 +1306,7 @@ bool MipsAsmPrinter::isLongBranchPseudo(int Opcode) const {
}
// Force static initialization.
-extern "C" void LLVMInitializeMipsAsmPrinter() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeMipsAsmPrinter() {
RegisterAsmPrinter<MipsAsmPrinter> X(getTheMipsTarget());
RegisterAsmPrinter<MipsAsmPrinter> Y(getTheMipselTarget());
RegisterAsmPrinter<MipsAsmPrinter> A(getTheMips64Target());
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsCallLowering.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsCallLowering.cpp
index cad82953af50..6ba15c232867 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsCallLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsCallLowering.cpp
@@ -299,7 +299,7 @@ Register OutgoingValueHandler::getStackAddress(const CCValAssign &VA,
MIRBuilder.buildConstant(OffsetReg, Offset);
Register AddrReg = MRI.createGenericVirtualRegister(p0);
- MIRBuilder.buildGEP(AddrReg, SPReg, OffsetReg);
+ MIRBuilder.buildPtrAdd(AddrReg, SPReg, OffsetReg);
MachinePointerInfo MPO =
MachinePointerInfo::getStack(MIRBuilder.getMF(), Offset);
@@ -655,7 +655,8 @@ bool MipsCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
MipsCCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs,
F.getContext());
- CCInfo.AnalyzeCallResult(Ins, TLI.CCAssignFnForReturn(), Info.OrigRet.Ty, Call);
+ CCInfo.AnalyzeCallResult(Ins, TLI.CCAssignFnForReturn(), Info.OrigRet.Ty,
+ Call);
setLocInfo(ArgLocs, Ins);
CallReturnHandler Handler(MIRBuilder, MF.getRegInfo(), MIB);
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsCallingConv.td b/contrib/llvm-project/llvm/lib/Target/Mips/MipsCallingConv.td
index 88236d8e9abd..204f11f1107c 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsCallingConv.td
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsCallingConv.td
@@ -139,7 +139,8 @@ def CC_MipsN : CallingConv<[
CCIfType<[i8, i16, i32], CCIfOrigArgWasNotFloat<CCPromoteToType<i64>>>,
// The only i32's we have left are soft-float arguments.
- CCIfSubtarget<"useSoftFloat()", CCIfType<[i32], CCDelegateTo<CC_MipsN_SoftFloat>>>,
+ CCIfSubtarget<"useSoftFloat()", CCIfType<[i32],
+ CCDelegateTo<CC_MipsN_SoftFloat>>>,
// Integer arguments are passed in integer registers.
CCIfType<[i64], CCAssignToRegWithShadow<[A0_64, A1_64, A2_64, A3_64,
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsCondMov.td b/contrib/llvm-project/llvm/lib/Target/Mips/MipsCondMov.td
index 5affbcbc2101..e9e09a188bf5 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsCondMov.td
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsCondMov.td
@@ -116,8 +116,8 @@ let AdditionalPredicates = [NotInMicroMips] in {
ADD_FM<0, 0xa>, INSN_MIPS4_32_NOT_32R6_64R6, GPR_64;
}
- def MOVN_I_I : MMRel, CMov_I_I_FT<"movn", GPR32Opnd, GPR32Opnd, II_MOVN>,
- ADD_FM<0, 0xb>, INSN_MIPS4_32_NOT_32R6_64R6;
+ def MOVN_I_I : MMRel, CMov_I_I_FT<"movn", GPR32Opnd, GPR32Opnd, II_MOVN>,
+ ADD_FM<0, 0xb>, INSN_MIPS4_32_NOT_32R6_64R6;
let isCodeGenOnly = 1 in {
def MOVN_I_I64 : CMov_I_I_FT<"movn", GPR32Opnd, GPR64Opnd, II_MOVN>,
@@ -226,8 +226,8 @@ let AdditionalPredicates = [NotInMicroMips] in {
GPR_64;
defm : MovnPats<GPR64, GPR32, MOVN_I64_I, XOR64>, INSN_MIPS4_32_NOT_32R6_64R6,
GPR_64;
- defm : MovnPats<GPR64, GPR64, MOVN_I64_I64, XOR64>, INSN_MIPS4_32_NOT_32R6_64R6,
- GPR_64;
+ defm : MovnPats<GPR64, GPR64, MOVN_I64_I64, XOR64>,
+ INSN_MIPS4_32_NOT_32R6_64R6, GPR_64;
defm : MovzPats0<GPR32, FGR32, MOVZ_I_S, SLT, SLTu, SLTi, SLTiu>,
INSN_MIPS4_32_NOT_32R6_64R6;
@@ -236,8 +236,8 @@ let AdditionalPredicates = [NotInMicroMips] in {
defm : MovzPats0<GPR64, FGR32, MOVZ_I_S, SLT64, SLTu64, SLTi64, SLTiu64>,
INSN_MIPS4_32_NOT_32R6_64R6, GPR_64;
- defm : MovzPats1<GPR64, FGR32, MOVZ_I64_S, XOR64>, INSN_MIPS4_32_NOT_32R6_64R6,
- GPR_64;
+ defm : MovzPats1<GPR64, FGR32, MOVZ_I64_S, XOR64>,
+ INSN_MIPS4_32_NOT_32R6_64R6, GPR_64;
defm : MovnPats<GPR64, FGR32, MOVN_I64_S, XOR64>, INSN_MIPS4_32_NOT_32R6_64R6,
GPR_64;
@@ -258,8 +258,8 @@ let AdditionalPredicates = [NotInMicroMips] in {
INSN_MIPS4_32_NOT_32R6_64R6, FGR_64;
defm : MovnPats<GPR32, FGR64, MOVN_I_D64, XOR>, INSN_MIPS4_32_NOT_32R6_64R6,
FGR_64;
- defm : MovnPats<GPR64, FGR64, MOVN_I64_D64, XOR64>, INSN_MIPS4_32_NOT_32R6_64R6,
- FGR_64;
+ defm : MovnPats<GPR64, FGR64, MOVN_I64_D64, XOR64>,
+ INSN_MIPS4_32_NOT_32R6_64R6, FGR_64;
}
// For targets that don't have conditional-move instructions
// we have to match SELECT nodes with pseudo instructions.
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp
index f50640521738..1f1a1574443c 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp
@@ -127,9 +127,9 @@ static unsigned int longformBranchOpcode(unsigned int Opcode) {
llvm_unreachable("Unknown branch type");
}
-// FIXME: need to go through this whole constant islands port and check the math
-// for branch ranges and clean this up and make some functions to calculate things
-// that are done many times identically.
+// FIXME: need to go through this whole constant islands port and check
+// the math for branch ranges and clean this up and make some functions
+// to calculate things that are done many times identically.
// Need to refactor some of the code to call this routine.
static unsigned int branchMaxOffsets(unsigned int Opcode) {
unsigned Bits, Scale;
@@ -1519,8 +1519,8 @@ MipsConstantIslands::fixupUnconditionalBr(ImmBranch &Br) {
// we know that RA is saved because we always save it right now.
// this requirement will be relaxed later but we also have an alternate
// way to implement this that I will implement that does not need jal.
- // We should have a way to back out this alignment restriction if we "can" later.
- // but it is not harmful.
+ // We should have a way to back out this alignment restriction
+ // if we "can" later. but it is not harmful.
//
DestBB->setAlignment(Align(4));
Br.MaxDisp = ((1<<24)-1) * 2;
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsDSPInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/Mips/MipsDSPInstrInfo.td
index d3e68c014fb7..727d47d06ad4 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsDSPInstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsDSPInstrInfo.td
@@ -376,7 +376,7 @@ class LX_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
class ADDUH_QB_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
InstrItinClass itin, RegisterOperand ROD,
- RegisterOperand ROS = ROD, RegisterOperand ROT = ROD> {
+ RegisterOperand ROS = ROD, RegisterOperand ROT = ROD> {
dag OutOperandList = (outs ROD:$rd);
dag InOperandList = (ins ROS:$rs, ROT:$rt);
string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt");
@@ -386,7 +386,8 @@ class ADDUH_QB_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
}
class APPEND_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
- Operand ImmOp, SDPatternOperator Imm, InstrItinClass itin> {
+ Operand ImmOp, SDPatternOperator Imm,
+ InstrItinClass itin> {
dag OutOperandList = (outs GPR32Opnd:$rt);
dag InOperandList = (ins GPR32Opnd:$rs, ImmOp:$sa, GPR32Opnd:$src);
string AsmString = !strconcat(instr_asm, "\t$rt, $rs, $sa");
@@ -511,7 +512,8 @@ class MFHI_DESC_BASE<string instr_asm, RegisterOperand RO, SDNode OpNode,
bit isMoveReg = 1;
}
-class MTHI_DESC_BASE<string instr_asm, RegisterOperand RO, InstrItinClass itin> {
+class MTHI_DESC_BASE<string instr_asm, RegisterOperand RO,
+ InstrItinClass itin> {
dag OutOperandList = (outs RO:$ac);
dag InOperandList = (ins GPR32Opnd:$rs);
string AsmString = !strconcat(instr_asm, "\t$rs, $ac");
@@ -1304,7 +1306,8 @@ let DecoderNamespace = "MipsDSP", Arch = "dsp",
// Pseudo CMP and PICK instructions.
class PseudoCMP<Instruction RealInst> :
PseudoDSP<(outs DSPCC:$cmp), (ins DSPROpnd:$rs, DSPROpnd:$rt), []>,
- PseudoInstExpansion<(RealInst DSPROpnd:$rs, DSPROpnd:$rt)>, NeverHasSideEffects;
+ PseudoInstExpansion<(RealInst DSPROpnd:$rs, DSPROpnd:$rt)>,
+ NeverHasSideEffects;
class PseudoPICK<Instruction RealInst> :
PseudoDSP<(outs DSPROpnd:$rd), (ins DSPCC:$cmp, DSPROpnd:$rs, DSPROpnd:$rt), []>,
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp
index aa07dac86828..84ff674569cd 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp
@@ -91,15 +91,14 @@ enum CompactBranchPolicy {
};
static cl::opt<CompactBranchPolicy> MipsCompactBranchPolicy(
- "mips-compact-branches",cl::Optional,
- cl::init(CB_Optimal),
- cl::desc("MIPS Specific: Compact branch policy."),
- cl::values(
- clEnumValN(CB_Never, "never", "Do not use compact branches if possible."),
- clEnumValN(CB_Optimal, "optimal", "Use compact branches where appropiate (default)."),
- clEnumValN(CB_Always, "always", "Always use compact branches if possible.")
- )
-);
+ "mips-compact-branches", cl::Optional, cl::init(CB_Optimal),
+ cl::desc("MIPS Specific: Compact branch policy."),
+ cl::values(clEnumValN(CB_Never, "never",
+ "Do not use compact branches if possible."),
+ clEnumValN(CB_Optimal, "optimal",
+ "Use compact branches where appropiate (default)."),
+ clEnumValN(CB_Always, "always",
+ "Always use compact branches if possible.")));
namespace {
@@ -417,8 +416,14 @@ bool RegDefsUses::update(const MachineInstr &MI, unsigned Begin, unsigned End) {
for (unsigned I = Begin; I != End; ++I) {
const MachineOperand &MO = MI.getOperand(I);
- if (MO.isReg() && MO.getReg())
- HasHazard |= checkRegDefsUses(NewDefs, NewUses, MO.getReg(), MO.isDef());
+ if (MO.isReg() && MO.getReg()) {
+ if (checkRegDefsUses(NewDefs, NewUses, MO.getReg(), MO.isDef())) {
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE ": found register hazard for operand "
+ << I << ": ";
+ MO.dump());
+ HasHazard = true;
+ }
+ }
}
Defs |= NewDefs;
@@ -613,12 +618,18 @@ bool MipsDelaySlotFiller::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
if (MipsCompactBranchPolicy.getValue() != CB_Always ||
!TII->getEquivalentCompactForm(I)) {
if (searchBackward(MBB, *I)) {
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE ": found instruction for delay slot"
+ " in backwards search.\n");
Filled = true;
} else if (I->isTerminator()) {
if (searchSuccBBs(MBB, I)) {
Filled = true;
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE ": found instruction for delay slot"
+ " in successor BB search.\n");
}
} else if (searchForward(MBB, I)) {
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE ": found instruction for delay slot"
+ " in forwards search.\n");
Filled = true;
}
}
@@ -663,6 +674,8 @@ bool MipsDelaySlotFiller::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
}
// Bundle the NOP to the instruction with the delay slot.
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": could not fill delay slot for ";
+ I->dump());
BuildMI(MBB, std::next(I), I->getDebugLoc(), TII->get(Mips::NOP));
MIBundleBuilder(MBB, I, std::next(I, 2));
++FilledSlots;
@@ -680,13 +693,27 @@ bool MipsDelaySlotFiller::searchRange(MachineBasicBlock &MBB, IterTy Begin,
for (IterTy I = Begin; I != End;) {
IterTy CurrI = I;
++I;
-
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE ": checking instruction: "; CurrI->dump());
// skip debug value
- if (CurrI->isDebugInstr())
+ if (CurrI->isDebugInstr()) {
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE ": ignoring debug instruction: ";
+ CurrI->dump());
continue;
+ }
+
+ if (CurrI->isBundle()) {
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE ": ignoring BUNDLE instruction: ";
+ CurrI->dump());
+ // However, we still need to update the register def-use information.
+ RegDU.update(*CurrI, 0, CurrI->getNumOperands());
+ continue;
+ }
- if (terminateSearch(*CurrI))
+ if (terminateSearch(*CurrI)) {
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE ": should terminate search: ";
+ CurrI->dump());
break;
+ }
assert((!CurrI->isCall() && !CurrI->isReturn() && !CurrI->isBranch()) &&
"Cannot put calls, returns or branches in delay slot.");
@@ -732,6 +759,9 @@ bool MipsDelaySlotFiller::searchRange(MachineBasicBlock &MBB, IterTy Begin,
continue;
Filler = CurrI;
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE ": found instruction for delay slot: ";
+ CurrI->dump());
+
return true;
}
@@ -752,8 +782,11 @@ bool MipsDelaySlotFiller::searchBackward(MachineBasicBlock &MBB,
MachineBasicBlock::iterator SlotI = Slot;
if (!searchRange(MBB, ++SlotI.getReverse(), MBB.rend(), RegDU, MemDU, Slot,
- Filler))
+ Filler)) {
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE ": could not find instruction for delay "
+ "slot using backwards search.\n");
return false;
+ }
MBB.splice(std::next(SlotI), &MBB, Filler.getReverse());
MIBundleBuilder(MBB, SlotI, std::next(SlotI, 2));
@@ -773,8 +806,11 @@ bool MipsDelaySlotFiller::searchForward(MachineBasicBlock &MBB,
RegDU.setCallerSaved(*Slot);
- if (!searchRange(MBB, std::next(Slot), MBB.end(), RegDU, NM, Slot, Filler))
+ if (!searchRange(MBB, std::next(Slot), MBB.end(), RegDU, NM, Slot, Filler)) {
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE ": could not find instruction for delay "
+ "slot using forwards search.\n");
return false;
+ }
MBB.splice(std::next(Slot), &MBB, Filler);
MIBundleBuilder(MBB, Slot, std::next(Slot, 2));
@@ -927,4 +963,6 @@ bool MipsDelaySlotFiller::terminateSearch(const MachineInstr &Candidate) const {
/// createMipsDelaySlotFillerPass - Returns a pass that fills in delay
/// slots in Mips MachineFunctions
-FunctionPass *llvm::createMipsDelaySlotFillerPass() { return new MipsDelaySlotFiller(); }
+FunctionPass *llvm::createMipsDelaySlotFillerPass() {
+ return new MipsDelaySlotFiller();
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsExpandPseudo.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsExpandPseudo.cpp
index 00cd284e7094..b1abf4a33717 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsExpandPseudo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsExpandPseudo.cpp
@@ -308,7 +308,7 @@ bool MipsExpandPseudo::expandAtomicBinOpSubword(
const bool ArePtrs64bit = STI->getABI().ArePtrs64bit();
DebugLoc DL = I->getDebugLoc();
- unsigned LL, SC;
+ unsigned LL, SC, SLT, SLTu, OR, MOVN, MOVZ, SELNEZ, SELEQZ;
unsigned BEQ = Mips::BEQ;
unsigned SEOp = Mips::SEH;
@@ -316,15 +316,32 @@ bool MipsExpandPseudo::expandAtomicBinOpSubword(
LL = STI->hasMips32r6() ? Mips::LL_MMR6 : Mips::LL_MM;
SC = STI->hasMips32r6() ? Mips::SC_MMR6 : Mips::SC_MM;
BEQ = STI->hasMips32r6() ? Mips::BEQC_MMR6 : Mips::BEQ_MM;
+ SLT = Mips::SLT_MM;
+ SLTu = Mips::SLTu_MM;
+ OR = STI->hasMips32r6() ? Mips::OR_MMR6 : Mips::OR_MM;
+ MOVN = Mips::MOVN_I_MM;
+ MOVZ = Mips::MOVZ_I_MM;
+ SELNEZ = STI->hasMips32r6() ? Mips::SELNEZ_MMR6 : Mips::SELNEZ;
+ SELEQZ = STI->hasMips32r6() ? Mips::SELEQZ_MMR6 : Mips::SELEQZ;
} else {
LL = STI->hasMips32r6() ? (ArePtrs64bit ? Mips::LL64_R6 : Mips::LL_R6)
: (ArePtrs64bit ? Mips::LL64 : Mips::LL);
SC = STI->hasMips32r6() ? (ArePtrs64bit ? Mips::SC64_R6 : Mips::SC_R6)
: (ArePtrs64bit ? Mips::SC64 : Mips::SC);
+ SLT = Mips::SLT;
+ SLTu = Mips::SLTu;
+ OR = Mips::OR;
+ MOVN = Mips::MOVN_I_I;
+ MOVZ = Mips::MOVZ_I_I;
+ SELNEZ = Mips::SELNEZ;
+ SELEQZ = Mips::SELEQZ;
}
bool IsSwap = false;
bool IsNand = false;
+ bool IsMin = false;
+ bool IsMax = false;
+ bool IsUnsigned = false;
unsigned Opcode = 0;
switch (I->getOpcode()) {
@@ -370,6 +387,22 @@ bool MipsExpandPseudo::expandAtomicBinOpSubword(
case Mips::ATOMIC_LOAD_XOR_I16_POSTRA:
Opcode = Mips::XOR;
break;
+ case Mips::ATOMIC_LOAD_UMIN_I8_POSTRA:
+ case Mips::ATOMIC_LOAD_UMIN_I16_POSTRA:
+ IsUnsigned = true;
+ LLVM_FALLTHROUGH;
+ case Mips::ATOMIC_LOAD_MIN_I8_POSTRA:
+ case Mips::ATOMIC_LOAD_MIN_I16_POSTRA:
+ IsMin = true;
+ break;
+ case Mips::ATOMIC_LOAD_UMAX_I8_POSTRA:
+ case Mips::ATOMIC_LOAD_UMAX_I16_POSTRA:
+ IsUnsigned = true;
+ LLVM_FALLTHROUGH;
+ case Mips::ATOMIC_LOAD_MAX_I8_POSTRA:
+ case Mips::ATOMIC_LOAD_MAX_I16_POSTRA:
+ IsMax = true;
+ break;
default:
llvm_unreachable("Unknown subword atomic pseudo for expansion!");
}
@@ -415,6 +448,68 @@ bool MipsExpandPseudo::expandAtomicBinOpSubword(
BuildMI(loopMBB, DL, TII->get(Mips::AND), BinOpRes)
.addReg(BinOpRes)
.addReg(Mask);
+ } else if (IsMin || IsMax) {
+
+ assert(I->getNumOperands() == 10 &&
+ "Atomics min|max|umin|umax use an additional register");
+ Register Scratch4 = I->getOperand(9).getReg();
+
+ unsigned SLTScratch4 = IsUnsigned ? SLTu : SLT;
+ unsigned SELIncr = IsMax ? SELNEZ : SELEQZ;
+ unsigned SELOldVal = IsMax ? SELEQZ : SELNEZ;
+ unsigned MOVIncr = IsMax ? MOVN : MOVZ;
+
+ // For little endian we need to clear uninterested bits.
+ if (STI->isLittle()) {
+ // and OldVal, OldVal, Mask
+ // and Incr, Incr, Mask
+ BuildMI(loopMBB, DL, TII->get(Mips::AND), OldVal)
+ .addReg(OldVal)
+ .addReg(Mask);
+ BuildMI(loopMBB, DL, TII->get(Mips::AND), Incr).addReg(Incr).addReg(Mask);
+ }
+
+ // unsigned: sltu Scratch4, oldVal, Incr
+ // signed: slt Scratch4, oldVal, Incr
+ BuildMI(loopMBB, DL, TII->get(SLTScratch4), Scratch4)
+ .addReg(OldVal)
+ .addReg(Incr);
+
+ if (STI->hasMips64r6() || STI->hasMips32r6()) {
+ // max: seleqz BinOpRes, OldVal, Scratch4
+ // selnez Scratch4, Incr, Scratch4
+ // or BinOpRes, BinOpRes, Scratch4
+ // min: selnqz BinOpRes, OldVal, Scratch4
+ // seleqz Scratch4, Incr, Scratch4
+ // or BinOpRes, BinOpRes, Scratch4
+ BuildMI(loopMBB, DL, TII->get(SELOldVal), BinOpRes)
+ .addReg(OldVal)
+ .addReg(Scratch4);
+ BuildMI(loopMBB, DL, TII->get(SELIncr), Scratch4)
+ .addReg(Incr)
+ .addReg(Scratch4);
+ BuildMI(loopMBB, DL, TII->get(OR), BinOpRes)
+ .addReg(BinOpRes)
+ .addReg(Scratch4);
+ } else {
+ // max: move BinOpRes, OldVal
+ // movn BinOpRes, Incr, Scratch4, BinOpRes
+ // min: move BinOpRes, OldVal
+ // movz BinOpRes, Incr, Scratch4, BinOpRes
+ BuildMI(loopMBB, DL, TII->get(OR), BinOpRes)
+ .addReg(OldVal)
+ .addReg(Mips::ZERO);
+ BuildMI(loopMBB, DL, TII->get(MOVIncr), BinOpRes)
+ .addReg(Incr)
+ .addReg(Scratch4)
+ .addReg(BinOpRes);
+ }
+
+ // and BinOpRes, BinOpRes, Mask
+ BuildMI(loopMBB, DL, TII->get(Mips::AND), BinOpRes)
+ .addReg(BinOpRes)
+ .addReg(Mask);
+
} else if (!IsSwap) {
// <binop> binopres, oldval, incr2
// and newval, binopres, mask
@@ -488,13 +583,20 @@ bool MipsExpandPseudo::expandAtomicBinOp(MachineBasicBlock &BB,
const bool ArePtrs64bit = STI->getABI().ArePtrs64bit();
DebugLoc DL = I->getDebugLoc();
- unsigned LL, SC, ZERO, BEQ;
+ unsigned LL, SC, ZERO, BEQ, SLT, SLTu, OR, MOVN, MOVZ, SELNEZ, SELEQZ;
if (Size == 4) {
if (STI->inMicroMipsMode()) {
LL = STI->hasMips32r6() ? Mips::LL_MMR6 : Mips::LL_MM;
SC = STI->hasMips32r6() ? Mips::SC_MMR6 : Mips::SC_MM;
BEQ = STI->hasMips32r6() ? Mips::BEQC_MMR6 : Mips::BEQ_MM;
+ SLT = Mips::SLT_MM;
+ SLTu = Mips::SLTu_MM;
+ OR = STI->hasMips32r6() ? Mips::OR_MMR6 : Mips::OR_MM;
+ MOVN = Mips::MOVN_I_MM;
+ MOVZ = Mips::MOVZ_I_MM;
+ SELNEZ = STI->hasMips32r6() ? Mips::SELNEZ_MMR6 : Mips::SELNEZ;
+ SELEQZ = STI->hasMips32r6() ? Mips::SELEQZ_MMR6 : Mips::SELEQZ;
} else {
LL = STI->hasMips32r6()
? (ArePtrs64bit ? Mips::LL64_R6 : Mips::LL_R6)
@@ -503,6 +605,13 @@ bool MipsExpandPseudo::expandAtomicBinOp(MachineBasicBlock &BB,
? (ArePtrs64bit ? Mips::SC64_R6 : Mips::SC_R6)
: (ArePtrs64bit ? Mips::SC64 : Mips::SC);
BEQ = Mips::BEQ;
+ SLT = Mips::SLT;
+ SLTu = Mips::SLTu;
+ OR = Mips::OR;
+ MOVN = Mips::MOVN_I_I;
+ MOVZ = Mips::MOVZ_I_I;
+ SELNEZ = Mips::SELNEZ;
+ SELEQZ = Mips::SELEQZ;
}
ZERO = Mips::ZERO;
@@ -511,6 +620,13 @@ bool MipsExpandPseudo::expandAtomicBinOp(MachineBasicBlock &BB,
SC = STI->hasMips64r6() ? Mips::SCD_R6 : Mips::SCD;
ZERO = Mips::ZERO_64;
BEQ = Mips::BEQ64;
+ SLT = Mips::SLT64;
+ SLTu = Mips::SLTu64;
+ OR = Mips::OR64;
+ MOVN = Mips::MOVN_I64_I64;
+ MOVZ = Mips::MOVZ_I64_I64;
+ SELNEZ = Mips::SELNEZ64;
+ SELEQZ = Mips::SELEQZ64;
}
Register OldVal = I->getOperand(0).getReg();
@@ -519,10 +635,15 @@ bool MipsExpandPseudo::expandAtomicBinOp(MachineBasicBlock &BB,
Register Scratch = I->getOperand(3).getReg();
unsigned Opcode = 0;
- unsigned OR = 0;
unsigned AND = 0;
unsigned NOR = 0;
+
+ bool IsOr = false;
bool IsNand = false;
+ bool IsMin = false;
+ bool IsMax = false;
+ bool IsUnsigned = false;
+
switch (I->getOpcode()) {
case Mips::ATOMIC_LOAD_ADD_I32_POSTRA:
Opcode = Mips::ADDu;
@@ -545,7 +666,7 @@ bool MipsExpandPseudo::expandAtomicBinOp(MachineBasicBlock &BB,
NOR = Mips::NOR;
break;
case Mips::ATOMIC_SWAP_I32_POSTRA:
- OR = Mips::OR;
+ IsOr = true;
break;
case Mips::ATOMIC_LOAD_ADD_I64_POSTRA:
Opcode = Mips::DADDu;
@@ -568,7 +689,23 @@ bool MipsExpandPseudo::expandAtomicBinOp(MachineBasicBlock &BB,
NOR = Mips::NOR64;
break;
case Mips::ATOMIC_SWAP_I64_POSTRA:
- OR = Mips::OR64;
+ IsOr = true;
+ break;
+ case Mips::ATOMIC_LOAD_UMIN_I32_POSTRA:
+ case Mips::ATOMIC_LOAD_UMIN_I64_POSTRA:
+ IsUnsigned = true;
+ LLVM_FALLTHROUGH;
+ case Mips::ATOMIC_LOAD_MIN_I32_POSTRA:
+ case Mips::ATOMIC_LOAD_MIN_I64_POSTRA:
+ IsMin = true;
+ break;
+ case Mips::ATOMIC_LOAD_UMAX_I32_POSTRA:
+ case Mips::ATOMIC_LOAD_UMAX_I64_POSTRA:
+ IsUnsigned = true;
+ LLVM_FALLTHROUGH;
+ case Mips::ATOMIC_LOAD_MAX_I32_POSTRA:
+ case Mips::ATOMIC_LOAD_MAX_I64_POSTRA:
+ IsMax = true;
break;
default:
llvm_unreachable("Unknown pseudo atomic!");
@@ -592,7 +729,59 @@ bool MipsExpandPseudo::expandAtomicBinOp(MachineBasicBlock &BB,
BuildMI(loopMBB, DL, TII->get(LL), OldVal).addReg(Ptr).addImm(0);
assert((OldVal != Ptr) && "Clobbered the wrong ptr reg!");
assert((OldVal != Incr) && "Clobbered the wrong reg!");
- if (Opcode) {
+ if (IsMin || IsMax) {
+
+ assert(I->getNumOperands() == 5 &&
+ "Atomics min|max|umin|umax use an additional register");
+ Register Scratch2 = I->getOperand(4).getReg();
+
+ // On Mips64 result of slt is GPR32.
+ Register Scratch2_32 =
+ (Size == 8) ? STI->getRegisterInfo()->getSubReg(Scratch2, Mips::sub_32)
+ : Scratch2;
+
+ unsigned SLTScratch2 = IsUnsigned ? SLTu : SLT;
+ unsigned SELIncr = IsMax ? SELNEZ : SELEQZ;
+ unsigned SELOldVal = IsMax ? SELEQZ : SELNEZ;
+ unsigned MOVIncr = IsMax ? MOVN : MOVZ;
+
+ // unsigned: sltu Scratch2, oldVal, Incr
+ // signed: slt Scratch2, oldVal, Incr
+ BuildMI(loopMBB, DL, TII->get(SLTScratch2), Scratch2_32)
+ .addReg(OldVal)
+ .addReg(Incr);
+
+ if (STI->hasMips64r6() || STI->hasMips32r6()) {
+ // max: seleqz Scratch, OldVal, Scratch2
+ // selnez Scratch2, Incr, Scratch2
+ // or Scratch, Scratch, Scratch2
+ // min: selnez Scratch, OldVal, Scratch2
+ // seleqz Scratch2, Incr, Scratch2
+ // or Scratch, Scratch, Scratch2
+ BuildMI(loopMBB, DL, TII->get(SELOldVal), Scratch)
+ .addReg(OldVal)
+ .addReg(Scratch2);
+ BuildMI(loopMBB, DL, TII->get(SELIncr), Scratch2)
+ .addReg(Incr)
+ .addReg(Scratch2);
+ BuildMI(loopMBB, DL, TII->get(OR), Scratch)
+ .addReg(Scratch)
+ .addReg(Scratch2);
+ } else {
+ // max: move Scratch, OldVal
+ // movn Scratch, Incr, Scratch2, Scratch
+ // min: move Scratch, OldVal
+ // movz Scratch, Incr, Scratch2, Scratch
+ BuildMI(loopMBB, DL, TII->get(OR), Scratch)
+ .addReg(OldVal)
+ .addReg(ZERO);
+ BuildMI(loopMBB, DL, TII->get(MOVIncr), Scratch)
+ .addReg(Incr)
+ .addReg(Scratch2)
+ .addReg(Scratch);
+ }
+
+ } else if (Opcode) {
BuildMI(loopMBB, DL, TII->get(Opcode), Scratch).addReg(OldVal).addReg(Incr);
} else if (IsNand) {
assert(AND && NOR &&
@@ -600,12 +789,19 @@ bool MipsExpandPseudo::expandAtomicBinOp(MachineBasicBlock &BB,
BuildMI(loopMBB, DL, TII->get(AND), Scratch).addReg(OldVal).addReg(Incr);
BuildMI(loopMBB, DL, TII->get(NOR), Scratch).addReg(ZERO).addReg(Scratch);
} else {
- assert(OR && "Unknown instruction for atomic pseudo expansion!");
+ assert(IsOr && OR && "Unknown instruction for atomic pseudo expansion!");
+ (void)IsOr;
BuildMI(loopMBB, DL, TII->get(OR), Scratch).addReg(Incr).addReg(ZERO);
}
- BuildMI(loopMBB, DL, TII->get(SC), Scratch).addReg(Scratch).addReg(Ptr).addImm(0);
- BuildMI(loopMBB, DL, TII->get(BEQ)).addReg(Scratch).addReg(ZERO).addMBB(loopMBB);
+ BuildMI(loopMBB, DL, TII->get(SC), Scratch)
+ .addReg(Scratch)
+ .addReg(Ptr)
+ .addImm(0);
+ BuildMI(loopMBB, DL, TII->get(BEQ))
+ .addReg(Scratch)
+ .addReg(ZERO)
+ .addMBB(loopMBB);
NMBBI = BB.end();
I->eraseFromParent();
@@ -644,6 +840,14 @@ bool MipsExpandPseudo::expandMI(MachineBasicBlock &MBB,
case Mips::ATOMIC_LOAD_OR_I16_POSTRA:
case Mips::ATOMIC_LOAD_XOR_I8_POSTRA:
case Mips::ATOMIC_LOAD_XOR_I16_POSTRA:
+ case Mips::ATOMIC_LOAD_MIN_I8_POSTRA:
+ case Mips::ATOMIC_LOAD_MIN_I16_POSTRA:
+ case Mips::ATOMIC_LOAD_MAX_I8_POSTRA:
+ case Mips::ATOMIC_LOAD_MAX_I16_POSTRA:
+ case Mips::ATOMIC_LOAD_UMIN_I8_POSTRA:
+ case Mips::ATOMIC_LOAD_UMIN_I16_POSTRA:
+ case Mips::ATOMIC_LOAD_UMAX_I8_POSTRA:
+ case Mips::ATOMIC_LOAD_UMAX_I16_POSTRA:
return expandAtomicBinOpSubword(MBB, MBBI, NMBB);
case Mips::ATOMIC_LOAD_ADD_I32_POSTRA:
case Mips::ATOMIC_LOAD_SUB_I32_POSTRA:
@@ -652,6 +856,10 @@ bool MipsExpandPseudo::expandMI(MachineBasicBlock &MBB,
case Mips::ATOMIC_LOAD_XOR_I32_POSTRA:
case Mips::ATOMIC_LOAD_NAND_I32_POSTRA:
case Mips::ATOMIC_SWAP_I32_POSTRA:
+ case Mips::ATOMIC_LOAD_MIN_I32_POSTRA:
+ case Mips::ATOMIC_LOAD_MAX_I32_POSTRA:
+ case Mips::ATOMIC_LOAD_UMIN_I32_POSTRA:
+ case Mips::ATOMIC_LOAD_UMAX_I32_POSTRA:
return expandAtomicBinOp(MBB, MBBI, NMBB, 4);
case Mips::ATOMIC_LOAD_ADD_I64_POSTRA:
case Mips::ATOMIC_LOAD_SUB_I64_POSTRA:
@@ -660,6 +868,10 @@ bool MipsExpandPseudo::expandMI(MachineBasicBlock &MBB,
case Mips::ATOMIC_LOAD_XOR_I64_POSTRA:
case Mips::ATOMIC_LOAD_NAND_I64_POSTRA:
case Mips::ATOMIC_SWAP_I64_POSTRA:
+ case Mips::ATOMIC_LOAD_MIN_I64_POSTRA:
+ case Mips::ATOMIC_LOAD_MAX_I64_POSTRA:
+ case Mips::ATOMIC_LOAD_UMIN_I64_POSTRA:
+ case Mips::ATOMIC_LOAD_UMAX_I64_POSTRA:
return expandAtomicBinOp(MBB, MBBI, NMBB, 8);
default:
return Modified;
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp
index e5997af3bcc5..8c36bcd5c8f2 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp
@@ -314,7 +314,6 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
switch(ConstraintID) {
default:
llvm_unreachable("Unexpected asm memory constraint");
- case InlineAsm::Constraint_i:
case InlineAsm::Constraint_m:
case InlineAsm::Constraint_R:
case InlineAsm::Constraint_ZC:
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelLowering.cpp
index 6a5406357441..46b1f35a6fc7 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelLowering.cpp
@@ -111,21 +111,19 @@ static bool isShiftedMask(uint64_t I, uint64_t &Pos, uint64_t &Size) {
MVT MipsTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
CallingConv::ID CC,
EVT VT) const {
- if (VT.isVector()) {
- if (Subtarget.isABI_O32()) {
- return MVT::i32;
- } else {
- return (VT.getSizeInBits() == 32) ? MVT::i32 : MVT::i64;
- }
- }
- return MipsTargetLowering::getRegisterType(Context, VT);
+ if (!VT.isVector())
+ return getRegisterType(Context, VT);
+
+ return Subtarget.isABI_O32() || VT.getSizeInBits() == 32 ? MVT::i32
+ : MVT::i64;
}
unsigned MipsTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
CallingConv::ID CC,
EVT VT) const {
if (VT.isVector())
- return std::max((VT.getSizeInBits() / (Subtarget.isABI_O32() ? 32 : 64)),
+ return std::max(((unsigned)VT.getSizeInBits() /
+ (Subtarget.isABI_O32() ? 32 : 64)),
1U);
return MipsTargetLowering::getNumRegisters(Context, VT);
}
@@ -528,8 +526,9 @@ MipsTargetLowering::MipsTargetLowering(const MipsTargetMachine &TM,
isMicroMips = Subtarget.inMicroMipsMode();
}
-const MipsTargetLowering *MipsTargetLowering::create(const MipsTargetMachine &TM,
- const MipsSubtarget &STI) {
+const MipsTargetLowering *
+MipsTargetLowering::create(const MipsTargetMachine &TM,
+ const MipsSubtarget &STI) {
if (STI.inMips16Mode())
return createMips16TargetLowering(TM, STI);
@@ -710,7 +709,8 @@ static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG,
SDValue True = N->getOperand(1);
SetCC = DAG.getSetCC(DL, SetCC.getValueType(), SetCC.getOperand(0),
- SetCC.getOperand(1), ISD::getSetCCInverse(CC, true));
+ SetCC.getOperand(1),
+ ISD::getSetCCInverse(CC, SetCC.getValueType()));
return DAG.getNode(ISD::SELECT, DL, FalseTy, SetCC, False, True);
}
@@ -744,7 +744,8 @@ static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG,
if (Diff == -1) {
ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
SetCC = DAG.getSetCC(DL, SetCC.getValueType(), SetCC.getOperand(0),
- SetCC.getOperand(1), ISD::getSetCCInverse(CC, true));
+ SetCC.getOperand(1),
+ ISD::getSetCCInverse(CC, SetCC.getValueType()));
return DAG.getNode(ISD::ADD, DL, SetCC.getValueType(), SetCC, True);
}
@@ -1367,6 +1368,43 @@ MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
return emitAtomicCmpSwap(MI, BB);
case Mips::ATOMIC_CMP_SWAP_I64:
return emitAtomicCmpSwap(MI, BB);
+
+ case Mips::ATOMIC_LOAD_MIN_I8:
+ return emitAtomicBinaryPartword(MI, BB, 1);
+ case Mips::ATOMIC_LOAD_MIN_I16:
+ return emitAtomicBinaryPartword(MI, BB, 2);
+ case Mips::ATOMIC_LOAD_MIN_I32:
+ return emitAtomicBinary(MI, BB);
+ case Mips::ATOMIC_LOAD_MIN_I64:
+ return emitAtomicBinary(MI, BB);
+
+ case Mips::ATOMIC_LOAD_MAX_I8:
+ return emitAtomicBinaryPartword(MI, BB, 1);
+ case Mips::ATOMIC_LOAD_MAX_I16:
+ return emitAtomicBinaryPartword(MI, BB, 2);
+ case Mips::ATOMIC_LOAD_MAX_I32:
+ return emitAtomicBinary(MI, BB);
+ case Mips::ATOMIC_LOAD_MAX_I64:
+ return emitAtomicBinary(MI, BB);
+
+ case Mips::ATOMIC_LOAD_UMIN_I8:
+ return emitAtomicBinaryPartword(MI, BB, 1);
+ case Mips::ATOMIC_LOAD_UMIN_I16:
+ return emitAtomicBinaryPartword(MI, BB, 2);
+ case Mips::ATOMIC_LOAD_UMIN_I32:
+ return emitAtomicBinary(MI, BB);
+ case Mips::ATOMIC_LOAD_UMIN_I64:
+ return emitAtomicBinary(MI, BB);
+
+ case Mips::ATOMIC_LOAD_UMAX_I8:
+ return emitAtomicBinaryPartword(MI, BB, 1);
+ case Mips::ATOMIC_LOAD_UMAX_I16:
+ return emitAtomicBinaryPartword(MI, BB, 2);
+ case Mips::ATOMIC_LOAD_UMAX_I32:
+ return emitAtomicBinary(MI, BB);
+ case Mips::ATOMIC_LOAD_UMAX_I64:
+ return emitAtomicBinary(MI, BB);
+
case Mips::PseudoSDIV:
case Mips::PseudoUDIV:
case Mips::DIV:
@@ -1428,6 +1466,7 @@ MipsTargetLowering::emitAtomicBinary(MachineInstr &MI,
DebugLoc DL = MI.getDebugLoc();
unsigned AtomicOp;
+ bool NeedsAdditionalReg = false;
switch (MI.getOpcode()) {
case Mips::ATOMIC_LOAD_ADD_I32:
AtomicOp = Mips::ATOMIC_LOAD_ADD_I32_POSTRA;
@@ -1471,6 +1510,38 @@ MipsTargetLowering::emitAtomicBinary(MachineInstr &MI,
case Mips::ATOMIC_SWAP_I64:
AtomicOp = Mips::ATOMIC_SWAP_I64_POSTRA;
break;
+ case Mips::ATOMIC_LOAD_MIN_I32:
+ AtomicOp = Mips::ATOMIC_LOAD_MIN_I32_POSTRA;
+ NeedsAdditionalReg = true;
+ break;
+ case Mips::ATOMIC_LOAD_MAX_I32:
+ AtomicOp = Mips::ATOMIC_LOAD_MAX_I32_POSTRA;
+ NeedsAdditionalReg = true;
+ break;
+ case Mips::ATOMIC_LOAD_UMIN_I32:
+ AtomicOp = Mips::ATOMIC_LOAD_UMIN_I32_POSTRA;
+ NeedsAdditionalReg = true;
+ break;
+ case Mips::ATOMIC_LOAD_UMAX_I32:
+ AtomicOp = Mips::ATOMIC_LOAD_UMAX_I32_POSTRA;
+ NeedsAdditionalReg = true;
+ break;
+ case Mips::ATOMIC_LOAD_MIN_I64:
+ AtomicOp = Mips::ATOMIC_LOAD_MIN_I64_POSTRA;
+ NeedsAdditionalReg = true;
+ break;
+ case Mips::ATOMIC_LOAD_MAX_I64:
+ AtomicOp = Mips::ATOMIC_LOAD_MAX_I64_POSTRA;
+ NeedsAdditionalReg = true;
+ break;
+ case Mips::ATOMIC_LOAD_UMIN_I64:
+ AtomicOp = Mips::ATOMIC_LOAD_UMIN_I64_POSTRA;
+ NeedsAdditionalReg = true;
+ break;
+ case Mips::ATOMIC_LOAD_UMAX_I64:
+ AtomicOp = Mips::ATOMIC_LOAD_UMAX_I64_POSTRA;
+ NeedsAdditionalReg = true;
+ break;
default:
llvm_unreachable("Unknown pseudo atomic for replacement!");
}
@@ -1523,12 +1594,19 @@ MipsTargetLowering::emitAtomicBinary(MachineInstr &MI,
BuildMI(*BB, II, DL, TII->get(Mips::COPY), IncrCopy).addReg(Incr);
BuildMI(*BB, II, DL, TII->get(Mips::COPY), PtrCopy).addReg(Ptr);
- BuildMI(*BB, II, DL, TII->get(AtomicOp))
- .addReg(OldVal, RegState::Define | RegState::EarlyClobber)
- .addReg(PtrCopy)
- .addReg(IncrCopy)
- .addReg(Scratch, RegState::Define | RegState::EarlyClobber |
- RegState::Implicit | RegState::Dead);
+ MachineInstrBuilder MIB =
+ BuildMI(*BB, II, DL, TII->get(AtomicOp))
+ .addReg(OldVal, RegState::Define | RegState::EarlyClobber)
+ .addReg(PtrCopy)
+ .addReg(IncrCopy)
+ .addReg(Scratch, RegState::Define | RegState::EarlyClobber |
+ RegState::Implicit | RegState::Dead);
+ if (NeedsAdditionalReg) {
+ Register Scratch2 =
+ RegInfo.createVirtualRegister(RegInfo.getRegClass(OldVal));
+ MIB.addReg(Scratch2, RegState::Define | RegState::EarlyClobber |
+ RegState::Implicit | RegState::Dead);
+ }
MI.eraseFromParent();
@@ -1596,6 +1674,7 @@ MachineBasicBlock *MipsTargetLowering::emitAtomicBinaryPartword(
Register Scratch3 = RegInfo.createVirtualRegister(RC);
unsigned AtomicOp = 0;
+ bool NeedsAdditionalReg = false;
switch (MI.getOpcode()) {
case Mips::ATOMIC_LOAD_NAND_I8:
AtomicOp = Mips::ATOMIC_LOAD_NAND_I8_POSTRA;
@@ -1639,6 +1718,38 @@ MachineBasicBlock *MipsTargetLowering::emitAtomicBinaryPartword(
case Mips::ATOMIC_LOAD_XOR_I16:
AtomicOp = Mips::ATOMIC_LOAD_XOR_I16_POSTRA;
break;
+ case Mips::ATOMIC_LOAD_MIN_I8:
+ AtomicOp = Mips::ATOMIC_LOAD_MIN_I8_POSTRA;
+ NeedsAdditionalReg = true;
+ break;
+ case Mips::ATOMIC_LOAD_MIN_I16:
+ AtomicOp = Mips::ATOMIC_LOAD_MIN_I16_POSTRA;
+ NeedsAdditionalReg = true;
+ break;
+ case Mips::ATOMIC_LOAD_MAX_I8:
+ AtomicOp = Mips::ATOMIC_LOAD_MAX_I8_POSTRA;
+ NeedsAdditionalReg = true;
+ break;
+ case Mips::ATOMIC_LOAD_MAX_I16:
+ AtomicOp = Mips::ATOMIC_LOAD_MAX_I16_POSTRA;
+ NeedsAdditionalReg = true;
+ break;
+ case Mips::ATOMIC_LOAD_UMIN_I8:
+ AtomicOp = Mips::ATOMIC_LOAD_UMIN_I8_POSTRA;
+ NeedsAdditionalReg = true;
+ break;
+ case Mips::ATOMIC_LOAD_UMIN_I16:
+ AtomicOp = Mips::ATOMIC_LOAD_UMIN_I16_POSTRA;
+ NeedsAdditionalReg = true;
+ break;
+ case Mips::ATOMIC_LOAD_UMAX_I8:
+ AtomicOp = Mips::ATOMIC_LOAD_UMAX_I8_POSTRA;
+ NeedsAdditionalReg = true;
+ break;
+ case Mips::ATOMIC_LOAD_UMAX_I16:
+ AtomicOp = Mips::ATOMIC_LOAD_UMAX_I16_POSTRA;
+ NeedsAdditionalReg = true;
+ break;
default:
llvm_unreachable("Unknown subword atomic pseudo for expansion!");
}
@@ -1693,19 +1804,25 @@ MachineBasicBlock *MipsTargetLowering::emitAtomicBinaryPartword(
// emitAtomicBinary. In summary, we need a scratch register which is going to
// be undef, that is unique among registers chosen for the instruction.
- BuildMI(BB, DL, TII->get(AtomicOp))
- .addReg(Dest, RegState::Define | RegState::EarlyClobber)
- .addReg(AlignedAddr)
- .addReg(Incr2)
- .addReg(Mask)
- .addReg(Mask2)
- .addReg(ShiftAmt)
- .addReg(Scratch, RegState::EarlyClobber | RegState::Define |
- RegState::Dead | RegState::Implicit)
- .addReg(Scratch2, RegState::EarlyClobber | RegState::Define |
- RegState::Dead | RegState::Implicit)
- .addReg(Scratch3, RegState::EarlyClobber | RegState::Define |
- RegState::Dead | RegState::Implicit);
+ MachineInstrBuilder MIB =
+ BuildMI(BB, DL, TII->get(AtomicOp))
+ .addReg(Dest, RegState::Define | RegState::EarlyClobber)
+ .addReg(AlignedAddr)
+ .addReg(Incr2)
+ .addReg(Mask)
+ .addReg(Mask2)
+ .addReg(ShiftAmt)
+ .addReg(Scratch, RegState::EarlyClobber | RegState::Define |
+ RegState::Dead | RegState::Implicit)
+ .addReg(Scratch2, RegState::EarlyClobber | RegState::Define |
+ RegState::Dead | RegState::Implicit)
+ .addReg(Scratch3, RegState::EarlyClobber | RegState::Define |
+ RegState::Dead | RegState::Implicit);
+ if (NeedsAdditionalReg) {
+ Register Scratch4 = RegInfo.createVirtualRegister(RC);
+ MIB.addReg(Scratch4, RegState::EarlyClobber | RegState::Define |
+ RegState::Dead | RegState::Implicit);
+ }
MI.eraseFromParent(); // The instruction is gone now.
@@ -2804,7 +2921,8 @@ static bool CC_MipsO32(unsigned ValNo, MVT ValVT, MVT LocVT,
// allocate a register directly.
Reg = State.AllocateReg(IntRegs);
}
- } else if (ValVT == MVT::i32 || (ValVT == MVT::f32 && AllocateFloatsInIntReg)) {
+ } else if (ValVT == MVT::i32 ||
+ (ValVT == MVT::f32 && AllocateFloatsInIntReg)) {
Reg = State.AllocateReg(IntRegs);
// If this is the first part of an i64 arg,
// the allocated register must be either A0 or A2.
@@ -3634,8 +3752,8 @@ MipsTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
return CCInfo.CheckReturn(Outs, RetCC_Mips);
}
-bool
-MipsTargetLowering::shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const {
+bool MipsTargetLowering::shouldSignExtendTypeInLibCall(EVT Type,
+ bool IsSigned) const {
if ((ABI.IsN32() || ABI.IsN64()) && Type == MVT::i32)
return true;
@@ -4015,11 +4133,13 @@ MipsTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
}
}
- std::pair<unsigned, const TargetRegisterClass *> R;
- R = parseRegForInlineAsmConstraint(Constraint, VT);
+ if (!Constraint.empty()) {
+ std::pair<unsigned, const TargetRegisterClass *> R;
+ R = parseRegForInlineAsmConstraint(Constraint, VT);
- if (R.second)
- return R;
+ if (R.second)
+ return R;
+ }
return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
}
@@ -4122,7 +4242,8 @@ void MipsTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
bool MipsTargetLowering::isLegalAddressingMode(const DataLayout &DL,
const AddrMode &AM, Type *Ty,
- unsigned AS, Instruction *I) const {
+ unsigned AS,
+ Instruction *I) const {
// No global is ever allowed as a base.
if (AM.BaseGV)
return false;
@@ -4498,8 +4619,9 @@ MachineBasicBlock *MipsTargetLowering::emitPseudoSELECT(MachineInstr &MI,
return BB;
}
-MachineBasicBlock *MipsTargetLowering::emitPseudoD_SELECT(MachineInstr &MI,
- MachineBasicBlock *BB) const {
+MachineBasicBlock *
+MipsTargetLowering::emitPseudoD_SELECT(MachineInstr &MI,
+ MachineBasicBlock *BB) const {
assert(!(Subtarget.hasMips4() || Subtarget.hasMips32()) &&
"Subtarget already supports SELECT nodes with the use of"
"conditional-move instructions.");
@@ -4575,8 +4697,9 @@ MachineBasicBlock *MipsTargetLowering::emitPseudoD_SELECT(MachineInstr &MI,
// FIXME? Maybe this could be a TableGen attribute on some registers and
// this table could be generated automatically from RegInfo.
-Register MipsTargetLowering::getRegisterByName(const char* RegName, EVT VT,
- const MachineFunction &MF) const {
+Register
+MipsTargetLowering::getRegisterByName(const char *RegName, LLT VT,
+ const MachineFunction &MF) const {
// Named registers is expected to be fairly rare. For now, just support $28
// since the linux kernel uses it.
if (Subtarget.isGP64bit()) {
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelLowering.h b/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelLowering.h
index 0a5cddd45afb..92cbe1d54c5b 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelLowering.h
@@ -348,7 +348,7 @@ class TargetRegisterClass;
void HandleByVal(CCState *, unsigned &, unsigned) const override;
- Register getRegisterByName(const char* RegName, EVT VT,
+ Register getRegisterByName(const char* RegName, LLT VT,
const MachineFunction &MF) const override;
/// If a physical register, this returns the register that receives the
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsInstrFPU.td b/contrib/llvm-project/llvm/lib/Target/Mips/MipsInstrFPU.td
index e94e107e64c2..79776998463f 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsInstrFPU.td
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsInstrFPU.td
@@ -129,8 +129,9 @@ class ABSS_FT<string opstr, RegisterOperand DstRC, RegisterOperand SrcRC,
HARDFLOAT,
NeverHasSideEffects;
-class CVT_PS_S_FT<string opstr, RegisterOperand DstRC, RegisterOperand SrcRC, InstrItinClass Itin, bit IsComm,
- SDPatternOperator OpNode= null_frag> :
+class CVT_PS_S_FT<string opstr, RegisterOperand DstRC, RegisterOperand SrcRC,
+ InstrItinClass Itin, bit IsComm,
+ SDPatternOperator OpNode = null_frag> :
InstSE<(outs DstRC:$fd), (ins SrcRC:$fs, SrcRC:$ft),
!strconcat(opstr, "\t$fd, $fs, $ft"),
[(set DstRC:$fd, (OpNode SrcRC:$fs, SrcRC:$ft))], Itin, FrmFR, opstr>,
@@ -142,7 +143,8 @@ multiclass ABSS_M<string opstr, InstrItinClass Itin,
SDPatternOperator OpNode= null_frag> {
def _D32 : MMRel, ABSS_FT<opstr, AFGR64Opnd, AFGR64Opnd, Itin, OpNode>,
FGR_32;
- def _D64 : StdMMR6Rel, ABSS_FT<opstr, FGR64Opnd, FGR64Opnd, Itin, OpNode>, FGR_64 {
+ def _D64 : StdMMR6Rel, ABSS_FT<opstr, FGR64Opnd, FGR64Opnd, Itin, OpNode>,
+ FGR_64 {
string DecoderNamespace = "MipsFP64";
}
}
@@ -362,17 +364,21 @@ defm D64 : C_COND_M<"d", FGR64Opnd, 17, II_C_CC_D>, ISA_MIPS1_NOT_32R6_64R6,
// Floating Point Instructions
//===----------------------------------------------------------------------===//
let AdditionalPredicates = [NotInMicroMips] in {
- def ROUND_W_S : MMRel, StdMMR6Rel, ABSS_FT<"round.w.s", FGR32Opnd, FGR32Opnd, II_ROUND>,
- ABSS_FM<0xc, 16>, ISA_MIPS2;
- defm ROUND_W : ROUND_M<"round.w.d", II_ROUND>, ABSS_FM<0xc, 17>, ISA_MIPS2;
- def TRUNC_W_S : MMRel, StdMMR6Rel, ABSS_FT<"trunc.w.s", FGR32Opnd, FGR32Opnd, II_TRUNC>,
- ABSS_FM<0xd, 16>, ISA_MIPS2;
- def CEIL_W_S : MMRel, StdMMR6Rel, ABSS_FT<"ceil.w.s", FGR32Opnd, FGR32Opnd, II_CEIL>,
- ABSS_FM<0xe, 16>, ISA_MIPS2;
- def FLOOR_W_S : MMRel, StdMMR6Rel, ABSS_FT<"floor.w.s", FGR32Opnd, FGR32Opnd, II_FLOOR>,
- ABSS_FM<0xf, 16>, ISA_MIPS2;
- def CVT_W_S : MMRel, ABSS_FT<"cvt.w.s", FGR32Opnd, FGR32Opnd, II_CVT>,
- ABSS_FM<0x24, 16>, ISA_MIPS1;
+ def ROUND_W_S : MMRel, StdMMR6Rel,
+ ABSS_FT<"round.w.s", FGR32Opnd, FGR32Opnd, II_ROUND>,
+ ABSS_FM<0xc, 16>, ISA_MIPS2;
+ defm ROUND_W : ROUND_M<"round.w.d", II_ROUND>, ABSS_FM<0xc, 17>, ISA_MIPS2;
+ def TRUNC_W_S : MMRel, StdMMR6Rel,
+ ABSS_FT<"trunc.w.s", FGR32Opnd, FGR32Opnd, II_TRUNC>,
+ ABSS_FM<0xd, 16>, ISA_MIPS2;
+ def CEIL_W_S : MMRel, StdMMR6Rel,
+ ABSS_FT<"ceil.w.s", FGR32Opnd, FGR32Opnd, II_CEIL>,
+ ABSS_FM<0xe, 16>, ISA_MIPS2;
+ def FLOOR_W_S : MMRel, StdMMR6Rel,
+ ABSS_FT<"floor.w.s", FGR32Opnd, FGR32Opnd, II_FLOOR>,
+ ABSS_FM<0xf, 16>, ISA_MIPS2;
+ def CVT_W_S : MMRel, ABSS_FT<"cvt.w.s", FGR32Opnd, FGR32Opnd, II_CVT>,
+ ABSS_FM<0x24, 16>, ISA_MIPS1;
defm TRUNC_W : ROUND_M<"trunc.w.d", II_TRUNC>, ABSS_FM<0xd, 17>, ISA_MIPS2;
defm CEIL_W : ROUND_M<"ceil.w.d", II_CEIL>, ABSS_FM<0xe, 17>, ISA_MIPS2;
@@ -536,7 +542,8 @@ let AdditionalPredicates = [NotInMicroMips] in {
let DecoderNamespace = "MipsFP64";
}
- def MTHC1_D32 : MMRel, StdMMR6Rel, MTC1_64_FT<"mthc1", AFGR64Opnd, GPR32Opnd, II_MTHC1>,
+ def MTHC1_D32 : MMRel, StdMMR6Rel,
+ MTC1_64_FT<"mthc1", AFGR64Opnd, GPR32Opnd, II_MTHC1>,
MFC1_FM<7>, ISA_MIPS32R2, FGR_32;
def MTHC1_D64 : MTC1_64_FT<"mthc1", FGR64Opnd, GPR32Opnd, II_MTHC1>,
MFC1_FM<7>, ISA_MIPS32R2, FGR_64 {
@@ -621,7 +628,7 @@ let AdditionalPredicates = [IsNotNaCl, NotInMicroMips],
INSN_MIPS5_32R2_NOT_32R6_64R6, FGR_64;
}
-/// Floating-point Aritmetic
+/// Floating-point Arithmetic
let AdditionalPredicates = [NotInMicroMips] in {
def FADD_S : MMRel, ADDS_FT<"add.s", FGR32Opnd, II_ADD_S, 1, fadd>,
ADDS_FM<0x00, 16>, ISA_MIPS1;
@@ -950,9 +957,12 @@ multiclass NMADD_NMSUB<Instruction Nmadd, Instruction Nmsub, RegisterOperand RC>
}
let AdditionalPredicates = [NoNaNsFPMath, HasMadd4, NotInMicroMips] in {
- defm : NMADD_NMSUB<NMADD_S, NMSUB_S, FGR32Opnd>, INSN_MIPS4_32R2_NOT_32R6_64R6;
- defm : NMADD_NMSUB<NMADD_D32, NMSUB_D32, AFGR64Opnd>, FGR_32, INSN_MIPS4_32R2_NOT_32R6_64R6;
- defm : NMADD_NMSUB<NMADD_D64, NMSUB_D64, FGR64Opnd>, FGR_64, INSN_MIPS4_32R2_NOT_32R6_64R6;
+ defm : NMADD_NMSUB<NMADD_S, NMSUB_S, FGR32Opnd>,
+ INSN_MIPS4_32R2_NOT_32R6_64R6;
+ defm : NMADD_NMSUB<NMADD_D32, NMSUB_D32, AFGR64Opnd>,
+ FGR_32, INSN_MIPS4_32R2_NOT_32R6_64R6;
+ defm : NMADD_NMSUB<NMADD_D64, NMSUB_D64, FGR64Opnd>,
+ FGR_64, INSN_MIPS4_32R2_NOT_32R6_64R6;
}
// Patterns for loads/stores with a reg+imm operand.
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsInstrFormats.td b/contrib/llvm-project/llvm/lib/Target/Mips/MipsInstrFormats.td
index af7cdbe5d609..4624c1f2d04a 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsInstrFormats.td
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsInstrFormats.td
@@ -97,9 +97,6 @@ class MipsInst<dag outs, dag ins, string asmstr, list<dag> pattern,
bit isCTI = 0; // Any form of Control Transfer Instruction.
// Required for MIPSR6
bit hasForbiddenSlot = 0; // Instruction has a forbidden slot.
- bit IsPCRelativeLoad = 0; // Load instruction with implicit source register
- // ($pc) and with explicit offset and destination
- // register
bit hasFCCRegOperand = 0; // Instruction uses $fcc<X> register and is
// present in MIPS-I to MIPS-III.
@@ -107,8 +104,7 @@ class MipsInst<dag outs, dag ins, string asmstr, list<dag> pattern,
let TSFlags{3-0} = FormBits;
let TSFlags{4} = isCTI;
let TSFlags{5} = hasForbiddenSlot;
- let TSFlags{6} = IsPCRelativeLoad;
- let TSFlags{7} = hasFCCRegOperand;
+ let TSFlags{6} = hasFCCRegOperand;
let DecoderNamespace = "Mips";
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsInstrInfo.cpp
index 6bb25ee5754d..25bbe5990827 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsInstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsInstrInfo.cpp
@@ -275,7 +275,8 @@ MipsInstrInfo::BranchType MipsInstrInfo::analyzeBranch(
return BT_CondUncond;
}
-bool MipsInstrInfo::isBranchOffsetInRange(unsigned BranchOpc, int64_t BrOffset) const {
+bool MipsInstrInfo::isBranchOffsetInRange(unsigned BranchOpc,
+ int64_t BrOffset) const {
switch (BranchOpc) {
case Mips::B:
case Mips::BAL:
@@ -433,7 +434,6 @@ bool MipsInstrInfo::isBranchOffsetInRange(unsigned BranchOpc, int64_t BrOffset)
llvm_unreachable("Unknown branch instruction!");
}
-
/// Return the corresponding compact (no delay slot) form of a branch.
unsigned MipsInstrInfo::getEquivalentCompactForm(
const MachineBasicBlock::iterator I) const {
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/Mips/MipsInstrInfo.td
index 5fae4d681c13..d9a3ff802708 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsInstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsInstrInfo.td
@@ -959,8 +959,7 @@ foreach I = {16} in
// Like uimm16_64 but coerces simm16 to uimm16.
def uimm16_relaxed : Operand<i32> {
let PrintMethod = "printUImm<16>";
- let ParserMatchClass =
- !cast<AsmOperandClass>("UImm16RelaxedAsmOperandClass");
+ let ParserMatchClass = UImm16RelaxedAsmOperandClass;
}
foreach I = {5} in
@@ -980,14 +979,12 @@ foreach I = {16} in
// Like uimm16_64 but coerces simm16 to uimm16.
def uimm16_64_relaxed : Operand<i64> {
let PrintMethod = "printUImm<16>";
- let ParserMatchClass =
- !cast<AsmOperandClass>("UImm16RelaxedAsmOperandClass");
+ let ParserMatchClass = UImm16RelaxedAsmOperandClass;
}
def uimm16_altrelaxed : Operand<i32> {
let PrintMethod = "printUImm<16>";
- let ParserMatchClass =
- !cast<AsmOperandClass>("UImm16AltRelaxedAsmOperandClass");
+ let ParserMatchClass = UImm16AltRelaxedAsmOperandClass;
}
// Like uimm5 but reports a less confusing error for 32-63 when
// an instruction alias permits that.
@@ -1060,22 +1057,22 @@ foreach I = {16, 32} in
// Like simm16 but coerces uimm16 to simm16.
def simm16_relaxed : Operand<i32> {
let DecoderMethod = "DecodeSImmWithOffsetAndScale<16>";
- let ParserMatchClass = !cast<AsmOperandClass>("SImm16RelaxedAsmOperandClass");
+ let ParserMatchClass = SImm16RelaxedAsmOperandClass;
}
def simm16_64 : Operand<i64> {
let DecoderMethod = "DecodeSImmWithOffsetAndScale<16>";
- let ParserMatchClass = !cast<AsmOperandClass>("SImm16AsmOperandClass");
+ let ParserMatchClass = SImm16AsmOperandClass;
}
// like simm32 but coerces simm32 to uimm32.
def uimm32_coerced : Operand<i32> {
- let ParserMatchClass = !cast<AsmOperandClass>("UImm32CoercedAsmOperandClass");
+ let ParserMatchClass = UImm32CoercedAsmOperandClass;
}
// Like simm32 but coerces uimm32 to simm32.
def simm32_relaxed : Operand<i32> {
let DecoderMethod = "DecodeSImmWithOffsetAndScale<32>";
- let ParserMatchClass = !cast<AsmOperandClass>("SImm32RelaxedAsmOperandClass");
+ let ParserMatchClass = SImm32RelaxedAsmOperandClass;
}
// This is almost the same as a uimm7 but 0x7f is interpreted as -1.
@@ -1089,59 +1086,14 @@ def MipsMemAsmOperand : AsmOperandClass {
let ParserMethod = "parseMemOperand";
}
-def MipsMemSimm9AsmOperand : AsmOperandClass {
- let Name = "MemOffsetSimm9";
+class MipsMemSimmAsmOperand<int Width, int Shift = 0> : AsmOperandClass {
+ let Name = "MemOffsetSimm" # Width # "_" # Shift;
let SuperClasses = [MipsMemAsmOperand];
let RenderMethod = "addMemOperands";
let ParserMethod = "parseMemOperand";
- let PredicateMethod = "isMemWithSimmOffset<9>";
- let DiagnosticType = "MemSImm9";
-}
-
-def MipsMemSimm10AsmOperand : AsmOperandClass {
- let Name = "MemOffsetSimm10";
- let SuperClasses = [MipsMemAsmOperand];
- let RenderMethod = "addMemOperands";
- let ParserMethod = "parseMemOperand";
- let PredicateMethod = "isMemWithSimmOffset<10>";
- let DiagnosticType = "MemSImm10";
-}
-
-def MipsMemSimm12AsmOperand : AsmOperandClass {
- let Name = "MemOffsetSimm12";
- let SuperClasses = [MipsMemAsmOperand];
- let RenderMethod = "addMemOperands";
- let ParserMethod = "parseMemOperand";
- let PredicateMethod = "isMemWithSimmOffset<12>";
- let DiagnosticType = "MemSImm12";
-}
-
-foreach I = {1, 2, 3} in
- def MipsMemSimm10Lsl # I # AsmOperand : AsmOperandClass {
- let Name = "MemOffsetSimm10_" # I;
- let SuperClasses = [MipsMemAsmOperand];
- let RenderMethod = "addMemOperands";
- let ParserMethod = "parseMemOperand";
- let PredicateMethod = "isMemWithSimmOffset<10, " # I # ">";
- let DiagnosticType = "MemSImm10Lsl" # I;
- }
-
-def MipsMemSimm11AsmOperand : AsmOperandClass {
- let Name = "MemOffsetSimm11";
- let SuperClasses = [MipsMemAsmOperand];
- let RenderMethod = "addMemOperands";
- let ParserMethod = "parseMemOperand";
- let PredicateMethod = "isMemWithSimmOffset<11>";
- let DiagnosticType = "MemSImm11";
-}
-
-def MipsMemSimm16AsmOperand : AsmOperandClass {
- let Name = "MemOffsetSimm16";
- let SuperClasses = [MipsMemAsmOperand];
- let RenderMethod = "addMemOperands";
- let ParserMethod = "parseMemOperand";
- let PredicateMethod = "isMemWithSimmOffset<16>";
- let DiagnosticType = "MemSImm16";
+ let PredicateMethod = "isMemWithSimmOffset<" # Width # ", " # Shift # ">";
+ let DiagnosticType = !if(!eq(Shift, 0), "MemSImm" # Width,
+ "MemSImm" # Width # "Lsl" # Shift);
}
def MipsMemSimmPtrAsmOperand : AsmOperandClass {
@@ -1188,44 +1140,26 @@ def simm12 : Operand<i32> {
let DecoderMethod = "DecodeSimm12";
}
-def mem_simm9 : mem_generic {
+def mem_simm9_exp : mem_generic {
let MIOperandInfo = (ops ptr_rc, simm9);
- let EncoderMethod = "getMemEncoding";
- let ParserMatchClass = MipsMemSimm9AsmOperand;
+ let ParserMatchClass = MipsMemSimmPtrAsmOperand;
+ let OperandNamespace = "MipsII";
+ let OperandType = "OPERAND_MEM_SIMM9";
}
-def mem_simm10 : mem_generic {
- let MIOperandInfo = (ops ptr_rc, simm10);
- let EncoderMethod = "getMemEncoding";
- let ParserMatchClass = MipsMemSimm10AsmOperand;
-}
+foreach I = {9, 10, 11, 12, 16} in
+ def mem_simm # I : mem_generic {
+ let MIOperandInfo = (ops ptr_rc, !cast<Operand>("simm" # I));
+ let ParserMatchClass = MipsMemSimmAsmOperand<I>;
+ }
foreach I = {1, 2, 3} in
def mem_simm10_lsl # I : mem_generic {
let MIOperandInfo = (ops ptr_rc, !cast<Operand>("simm10_lsl" # I));
let EncoderMethod = "getMemEncoding<" # I # ">";
- let ParserMatchClass =
- !cast<AsmOperandClass>("MipsMemSimm10Lsl" # I # "AsmOperand");
+ let ParserMatchClass = MipsMemSimmAsmOperand<10, I>;
}
-def mem_simm11 : mem_generic {
- let MIOperandInfo = (ops ptr_rc, simm11);
- let EncoderMethod = "getMemEncoding";
- let ParserMatchClass = MipsMemSimm11AsmOperand;
-}
-
-def mem_simm12 : mem_generic {
- let MIOperandInfo = (ops ptr_rc, simm12);
- let EncoderMethod = "getMemEncoding";
- let ParserMatchClass = MipsMemSimm12AsmOperand;
-}
-
-def mem_simm16 : mem_generic {
- let MIOperandInfo = (ops ptr_rc, simm16);
- let EncoderMethod = "getMemEncoding";
- let ParserMatchClass = MipsMemSimm16AsmOperand;
-}
-
def mem_simmptr : mem_generic {
let ParserMatchClass = MipsMemSimmPtrAsmOperand;
}
@@ -1272,6 +1206,7 @@ def immSExt8 : PatLeaf<(imm), [{ return isInt<8>(N->getSExtValue()); }]>;
// Node immediate fits as 16-bit sign extended on target immediate.
// e.g. addi, andi
def immSExt16 : PatLeaf<(imm), [{ return isInt<16>(N->getSExtValue()); }]>;
+def imm32SExt16 : IntImmLeaf<i32, [{ return isInt<16>(Imm.getSExtValue()); }]>;
// Node immediate fits as 7-bit zero extended on target immediate.
def immZExt7 : PatLeaf<(imm), [{ return isUInt<7>(N->getZExtValue()); }]>;
@@ -1287,6 +1222,9 @@ def immZExt16 : PatLeaf<(imm), [{
else
return (uint64_t)N->getZExtValue() == (unsigned short)N->getZExtValue();
}], LO16>;
+def imm32ZExt16 : IntImmLeaf<i32, [{
+ return (uint32_t)Imm.getZExtValue() == (unsigned short)Imm.getZExtValue();
+}]>;
// Immediate can be loaded with LUi (32-bit int with lower 16-bit cleared).
def immSExt32Low16Zero : PatLeaf<(imm), [{
@@ -1987,6 +1925,18 @@ let usesCustomInserter = 1 in {
def ATOMIC_CMP_SWAP_I16 : AtomicCmpSwap<atomic_cmp_swap_16, GPR32>;
def ATOMIC_CMP_SWAP_I32 : AtomicCmpSwap<atomic_cmp_swap_32, GPR32>;
+ def ATOMIC_LOAD_MIN_I8 : Atomic2Ops<atomic_load_min_8, GPR32>;
+ def ATOMIC_LOAD_MIN_I16 : Atomic2Ops<atomic_load_min_16, GPR32>;
+ def ATOMIC_LOAD_MIN_I32 : Atomic2Ops<atomic_load_min_32, GPR32>;
+ def ATOMIC_LOAD_MAX_I8 : Atomic2Ops<atomic_load_max_8, GPR32>;
+ def ATOMIC_LOAD_MAX_I16 : Atomic2Ops<atomic_load_max_16, GPR32>;
+ def ATOMIC_LOAD_MAX_I32 : Atomic2Ops<atomic_load_max_32, GPR32>;
+ def ATOMIC_LOAD_UMIN_I8 : Atomic2Ops<atomic_load_umin_8, GPR32>;
+ def ATOMIC_LOAD_UMIN_I16 : Atomic2Ops<atomic_load_umin_16, GPR32>;
+ def ATOMIC_LOAD_UMIN_I32 : Atomic2Ops<atomic_load_umin_32, GPR32>;
+ def ATOMIC_LOAD_UMAX_I8 : Atomic2Ops<atomic_load_umax_8, GPR32>;
+ def ATOMIC_LOAD_UMAX_I16 : Atomic2Ops<atomic_load_umax_16, GPR32>;
+ def ATOMIC_LOAD_UMAX_I32 : Atomic2Ops<atomic_load_umax_32, GPR32>;
}
def ATOMIC_LOAD_ADD_I8_POSTRA : Atomic2OpsSubwordPostRA<GPR32>;
@@ -2016,6 +1966,19 @@ def ATOMIC_CMP_SWAP_I8_POSTRA : AtomicCmpSwapSubwordPostRA<GPR32>;
def ATOMIC_CMP_SWAP_I16_POSTRA : AtomicCmpSwapSubwordPostRA<GPR32>;
def ATOMIC_CMP_SWAP_I32_POSTRA : AtomicCmpSwapPostRA<GPR32>;
+def ATOMIC_LOAD_MIN_I8_POSTRA : Atomic2OpsSubwordPostRA<GPR32>;
+def ATOMIC_LOAD_MIN_I16_POSTRA : Atomic2OpsSubwordPostRA<GPR32>;
+def ATOMIC_LOAD_MIN_I32_POSTRA : Atomic2OpsPostRA<GPR32>;
+def ATOMIC_LOAD_MAX_I8_POSTRA : Atomic2OpsSubwordPostRA<GPR32>;
+def ATOMIC_LOAD_MAX_I16_POSTRA : Atomic2OpsSubwordPostRA<GPR32>;
+def ATOMIC_LOAD_MAX_I32_POSTRA : Atomic2OpsPostRA<GPR32>;
+def ATOMIC_LOAD_UMIN_I8_POSTRA : Atomic2OpsSubwordPostRA<GPR32>;
+def ATOMIC_LOAD_UMIN_I16_POSTRA : Atomic2OpsSubwordPostRA<GPR32>;
+def ATOMIC_LOAD_UMIN_I32_POSTRA : Atomic2OpsPostRA<GPR32>;
+def ATOMIC_LOAD_UMAX_I8_POSTRA : Atomic2OpsSubwordPostRA<GPR32>;
+def ATOMIC_LOAD_UMAX_I16_POSTRA : Atomic2OpsSubwordPostRA<GPR32>;
+def ATOMIC_LOAD_UMAX_I32_POSTRA : Atomic2OpsPostRA<GPR32>;
+
/// Pseudo instructions for loading and storing accumulator registers.
let isPseudo = 1, isCodeGenOnly = 1, hasNoSchedulingInfo = 1 in {
def LOAD_ACC64 : Load<"", ACC64>;
@@ -2058,17 +2021,17 @@ def LONG_BRANCH_ADDiu2Op : PseudoSE<(outs GPR32Opnd:$dst),
/// Arithmetic Instructions (ALU Immediate)
let AdditionalPredicates = [NotInMicroMips] in {
def ADDiu : MMRel, StdMMR6Rel, ArithLogicI<"addiu", simm16_relaxed, GPR32Opnd,
- II_ADDIU, immSExt16, add>,
+ II_ADDIU, imm32SExt16, add>,
ADDI_FM<0x9>, IsAsCheapAsAMove, ISA_MIPS1;
def ANDi : MMRel, StdMMR6Rel,
- ArithLogicI<"andi", uimm16, GPR32Opnd, II_ANDI, immZExt16, and>,
+ ArithLogicI<"andi", uimm16, GPR32Opnd, II_ANDI, imm32ZExt16, and>,
ADDI_FM<0xc>, ISA_MIPS1;
def ORi : MMRel, StdMMR6Rel,
- ArithLogicI<"ori", uimm16, GPR32Opnd, II_ORI, immZExt16, or>,
+ ArithLogicI<"ori", uimm16, GPR32Opnd, II_ORI, imm32ZExt16, or>,
ADDI_FM<0xd>, ISA_MIPS1;
def XORi : MMRel, StdMMR6Rel,
- ArithLogicI<"xori", uimm16, GPR32Opnd, II_XORI, immZExt16, xor>,
+ ArithLogicI<"xori", uimm16, GPR32Opnd, II_XORI, imm32ZExt16, xor>,
ADDI_FM<0xe>, ISA_MIPS1;
def ADDi : MMRel, ArithLogicI<"addi", simm16_relaxed, GPR32Opnd, II_ADDI>,
ADDI_FM<0x8>, ISA_MIPS1_NOT_32R6_64R6;
@@ -2081,10 +2044,10 @@ let AdditionalPredicates = [NotInMicroMips] in {
ISA_MIPS1;
/// Arithmetic Instructions (3-Operand, R-Type)
- def ADDu : MMRel, StdMMR6Rel, ArithLogicR<"addu", GPR32Opnd, 1, II_ADDU, add>,
- ADD_FM<0, 0x21>, ISA_MIPS1;
- def SUBu : MMRel, StdMMR6Rel, ArithLogicR<"subu", GPR32Opnd, 0, II_SUBU, sub>,
- ADD_FM<0, 0x23>, ISA_MIPS1;
+ def ADDu : MMRel, StdMMR6Rel, ArithLogicR<"addu", GPR32Opnd, 1, II_ADDU, add>,
+ ADD_FM<0, 0x21>, ISA_MIPS1;
+ def SUBu : MMRel, StdMMR6Rel, ArithLogicR<"subu", GPR32Opnd, 0, II_SUBU, sub>,
+ ADD_FM<0, 0x23>, ISA_MIPS1;
let Defs = [HI0, LO0] in
def MUL : MMRel, ArithLogicR<"mul", GPR32Opnd, 1, II_MUL, mul>,
@@ -2149,7 +2112,8 @@ let AdditionalPredicates = [NotInMicroMips] in {
LW_FM<0x28>, ISA_MIPS1;
def SH : Store<"sh", GPR32Opnd, truncstorei16, II_SH>, MMRel, LW_FM<0x29>,
ISA_MIPS1;
- def SW : StdMMR6Rel, Store<"sw", GPR32Opnd, store, II_SW>, MMRel, LW_FM<0x2b>, ISA_MIPS1;
+ def SW : StdMMR6Rel, Store<"sw", GPR32Opnd, store, II_SW>,
+ MMRel, LW_FM<0x2b>, ISA_MIPS1;
}
/// load/store left/right
@@ -2250,7 +2214,8 @@ def J : MMRel, JumpFJ<jmptarget, "j", br, bb, "j">, FJ<2>,
IsBranch, ISA_MIPS1;
let AdditionalPredicates = [NotInMicroMips] in {
-def JR : MMRel, IndirectBranch<"jr", GPR32Opnd>, MTLO_FM<8>, ISA_MIPS1_NOT_32R6_64R6;
+def JR : MMRel, IndirectBranch<"jr", GPR32Opnd>, MTLO_FM<8>,
+ ISA_MIPS1_NOT_32R6_64R6;
def BEQ : MMRel, CBranch<"beq", brtarget, seteq, GPR32Opnd>, BEQ_FM<4>,
ISA_MIPS1;
def BEQL : MMRel, CBranchLikely<"beql", brtarget, GPR32Opnd>,
@@ -2408,7 +2373,8 @@ let AdditionalPredicates = [NotInMicroMips] in {
// add op with mem ComplexPattern is used and the stack address copy
// can be matched. It's similar to Sparc LEA_ADDRi
let AdditionalPredicates = [NotInMicroMips] in
- def LEA_ADDiu : MMRel, EffectiveAddress<"addiu", GPR32Opnd>, LW_FM<9>, ISA_MIPS1;
+ def LEA_ADDiu : MMRel, EffectiveAddress<"addiu", GPR32Opnd>, LW_FM<9>,
+ ISA_MIPS1;
// MADD*/MSUB*
def MADD : MMRel, MArithR<"madd", II_MADD, 1>, MULT_FM<0x1c, 0>,
@@ -2584,9 +2550,11 @@ def DROLImm : MipsAsmPseudoInst<(outs),
(ins GPR32Opnd:$rs, GPR32Opnd:$rt, simm16:$imm),
"drol\t$rs, $rt, $imm">, ISA_MIPS64;
def : MipsInstAlias<"drol $rd, $rs",
- (DROL GPR32Opnd:$rd, GPR32Opnd:$rd, GPR32Opnd:$rs), 0>, ISA_MIPS64;
+ (DROL GPR32Opnd:$rd, GPR32Opnd:$rd, GPR32Opnd:$rs), 0>,
+ ISA_MIPS64;
def : MipsInstAlias<"drol $rd, $imm",
- (DROLImm GPR32Opnd:$rd, GPR32Opnd:$rd, simm16:$imm), 0>, ISA_MIPS64;
+ (DROLImm GPR32Opnd:$rd, GPR32Opnd:$rd, simm16:$imm), 0>,
+ ISA_MIPS64;
def DROR : MipsAsmPseudoInst<(outs),
(ins GPR32Opnd:$rs, GPR32Opnd:$rt, GPR32Opnd:$rd),
@@ -2595,9 +2563,11 @@ def DRORImm : MipsAsmPseudoInst<(outs),
(ins GPR32Opnd:$rs, GPR32Opnd:$rt, simm16:$imm),
"dror\t$rs, $rt, $imm">, ISA_MIPS64;
def : MipsInstAlias<"dror $rd, $rs",
- (DROR GPR32Opnd:$rd, GPR32Opnd:$rd, GPR32Opnd:$rs), 0>, ISA_MIPS64;
+ (DROR GPR32Opnd:$rd, GPR32Opnd:$rd, GPR32Opnd:$rs), 0>,
+ ISA_MIPS64;
def : MipsInstAlias<"dror $rd, $imm",
- (DRORImm GPR32Opnd:$rd, GPR32Opnd:$rd, simm16:$imm), 0>, ISA_MIPS64;
+ (DRORImm GPR32Opnd:$rd, GPR32Opnd:$rd, simm16:$imm), 0>,
+ ISA_MIPS64;
def ABSMacro : MipsAsmPseudoInst<(outs GPR32Opnd:$rd), (ins GPR32Opnd:$rs),
"abs\t$rd, $rs">;
@@ -2774,7 +2744,8 @@ let AdditionalPredicates = [NotInMicroMips] in {
def : MipsInstAlias<"nop", (SLL ZERO, ZERO, 0), 1>, ISA_MIPS1;
- defm : OneOrTwoOperandMacroImmediateAlias<"add", ADDi>, ISA_MIPS1_NOT_32R6_64R6;
+ defm : OneOrTwoOperandMacroImmediateAlias<"add", ADDi>,
+ ISA_MIPS1_NOT_32R6_64R6;
defm : OneOrTwoOperandMacroImmediateAlias<"addu", ADDiu>, ISA_MIPS1;
@@ -3101,7 +3072,8 @@ multiclass MaterializeImms<ValueType VT, Register ZEROReg,
// observed.
// Arbitrary immediates
-def : MipsPat<(VT LUiORiPred:$imm), (ORiOp (LUiOp (HI16 imm:$imm)), (LO16 imm:$imm))>;
+def : MipsPat<(VT LUiORiPred:$imm),
+ (ORiOp (LUiOp (HI16 imm:$imm)), (LO16 imm:$imm))>;
// Bits 32-16 set, sign/zero extended.
def : MipsPat<(VT LUiPred:$imm), (LUiOp (HI16 imm:$imm))>;
@@ -3181,8 +3153,6 @@ multiclass MipsHiLoRelocs<Instruction Lui, Instruction Addiu,
(Addiu GPROpnd:$hi, tglobaltlsaddr:$lo)>;
def : MipsPat<(add GPROpnd:$hi, (MipsLo texternalsym:$lo)),
(Addiu GPROpnd:$hi, texternalsym:$lo)>;
- def : MipsPat<(add GPROpnd:$hi, (MipsLo texternalsym:$lo)),
- (Addiu GPROpnd:$hi, texternalsym:$lo)>;
}
// wrapper_pic
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsInstructionSelector.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsInstructionSelector.cpp
index f8fc7cb0898b..2f4c9d74262e 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsInstructionSelector.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsInstructionSelector.cpp
@@ -18,6 +18,7 @@
#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/IR/IntrinsicsMips.h"
#define DEBUG_TYPE "mips-isel"
@@ -39,12 +40,13 @@ public:
private:
bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
+ bool isRegInGprb(Register Reg, MachineRegisterInfo &MRI) const;
+ bool isRegInFprb(Register Reg, MachineRegisterInfo &MRI) const;
bool materialize32BitImm(Register DestReg, APInt Imm,
MachineIRBuilder &B) const;
bool selectCopy(MachineInstr &I, MachineRegisterInfo &MRI) const;
const TargetRegisterClass *
- getRegClassForTypeOnBank(unsigned OpSize, const RegisterBank &RB,
- const RegisterBankInfo &RBI) const;
+ getRegClassForTypeOnBank(Register Reg, MachineRegisterInfo &MRI) const;
unsigned selectLoadStoreOpCode(MachineInstr &I,
MachineRegisterInfo &MRI) const;
@@ -84,24 +86,23 @@ MipsInstructionSelector::MipsInstructionSelector(
{
}
+bool MipsInstructionSelector::isRegInGprb(Register Reg,
+ MachineRegisterInfo &MRI) const {
+ return RBI.getRegBank(Reg, MRI, TRI)->getID() == Mips::GPRBRegBankID;
+}
+
+bool MipsInstructionSelector::isRegInFprb(Register Reg,
+ MachineRegisterInfo &MRI) const {
+ return RBI.getRegBank(Reg, MRI, TRI)->getID() == Mips::FPRBRegBankID;
+}
+
bool MipsInstructionSelector::selectCopy(MachineInstr &I,
MachineRegisterInfo &MRI) const {
Register DstReg = I.getOperand(0).getReg();
if (Register::isPhysicalRegister(DstReg))
return true;
- const RegisterBank *RegBank = RBI.getRegBank(DstReg, MRI, TRI);
- const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
-
- const TargetRegisterClass *RC = &Mips::GPR32RegClass;
- if (RegBank->getID() == Mips::FPRBRegBankID) {
- if (DstSize == 32)
- RC = &Mips::FGR32RegClass;
- else if (DstSize == 64)
- RC = STI.isFP64bit() ? &Mips::FGR64RegClass : &Mips::AFGR64RegClass;
- else
- llvm_unreachable("Unsupported destination size");
- }
+ const TargetRegisterClass *RC = getRegClassForTypeOnBank(DstReg, MRI);
if (!RBI.constrainGenericRegister(DstReg, *RC, MRI)) {
LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
<< " operand\n");
@@ -111,19 +112,27 @@ bool MipsInstructionSelector::selectCopy(MachineInstr &I,
}
const TargetRegisterClass *MipsInstructionSelector::getRegClassForTypeOnBank(
- unsigned OpSize, const RegisterBank &RB,
- const RegisterBankInfo &RBI) const {
- if (RB.getID() == Mips::GPRBRegBankID)
+ Register Reg, MachineRegisterInfo &MRI) const {
+ const LLT Ty = MRI.getType(Reg);
+ const unsigned TySize = Ty.getSizeInBits();
+
+ if (isRegInGprb(Reg, MRI)) {
+ assert((Ty.isScalar() || Ty.isPointer()) && TySize == 32 &&
+ "Register class not available for LLT, register bank combination");
return &Mips::GPR32RegClass;
+ }
- if (RB.getID() == Mips::FPRBRegBankID)
- return OpSize == 32
- ? &Mips::FGR32RegClass
- : STI.hasMips32r6() || STI.isFP64bit() ? &Mips::FGR64RegClass
- : &Mips::AFGR64RegClass;
+ if (isRegInFprb(Reg, MRI)) {
+ if (Ty.isScalar()) {
+ assert((TySize == 32 || TySize == 64) &&
+ "Register class not available for LLT, register bank combination");
+ if (TySize == 32)
+ return &Mips::FGR32RegClass;
+ return STI.isFP64bit() ? &Mips::FGR64RegClass : &Mips::AFGR64RegClass;
+ }
+ }
- llvm_unreachable("getRegClassForTypeOnBank can't find register class.");
- return nullptr;
+ llvm_unreachable("Unsupported register bank.");
}
bool MipsInstructionSelector::materialize32BitImm(Register DestReg, APInt Imm,
@@ -131,8 +140,9 @@ bool MipsInstructionSelector::materialize32BitImm(Register DestReg, APInt Imm,
assert(Imm.getBitWidth() == 32 && "Unsupported immediate size.");
// Ori zero extends immediate. Used for values with zeros in high 16 bits.
if (Imm.getHiBits(16).isNullValue()) {
- MachineInstr *Inst = B.buildInstr(Mips::ORi, {DestReg}, {Register(Mips::ZERO)})
- .addImm(Imm.getLoBits(16).getLimitedValue());
+ MachineInstr *Inst =
+ B.buildInstr(Mips::ORi, {DestReg}, {Register(Mips::ZERO)})
+ .addImm(Imm.getLoBits(16).getLimitedValue());
return constrainSelectedInstRegOperands(*Inst, TII, TRI, RBI);
}
// Lui places immediate in high 16 bits and sets low 16 bits to zero.
@@ -143,8 +153,9 @@ bool MipsInstructionSelector::materialize32BitImm(Register DestReg, APInt Imm,
}
// ADDiu sign extends immediate. Used for values with 1s in high 17 bits.
if (Imm.isSignedIntN(16)) {
- MachineInstr *Inst = B.buildInstr(Mips::ADDiu, {DestReg}, {Register(Mips::ZERO)})
- .addImm(Imm.getLoBits(16).getLimitedValue());
+ MachineInstr *Inst =
+ B.buildInstr(Mips::ADDiu, {DestReg}, {Register(Mips::ZERO)})
+ .addImm(Imm.getLoBits(16).getLimitedValue());
return constrainSelectedInstRegOperands(*Inst, TII, TRI, RBI);
}
// Values that cannot be materialized with single immediate instruction.
@@ -160,17 +171,22 @@ bool MipsInstructionSelector::materialize32BitImm(Register DestReg, APInt Imm,
return true;
}
-/// Returning Opc indicates that we failed to select MIPS instruction opcode.
+/// When I.getOpcode() is returned, we failed to select MIPS instruction opcode.
unsigned
MipsInstructionSelector::selectLoadStoreOpCode(MachineInstr &I,
MachineRegisterInfo &MRI) const {
- STI.getRegisterInfo();
- const Register DestReg = I.getOperand(0).getReg();
- const unsigned RegBank = RBI.getRegBank(DestReg, MRI, TRI)->getID();
+ const Register ValueReg = I.getOperand(0).getReg();
+ const LLT Ty = MRI.getType(ValueReg);
+ const unsigned TySize = Ty.getSizeInBits();
const unsigned MemSizeInBytes = (*I.memoperands_begin())->getSize();
unsigned Opc = I.getOpcode();
const bool isStore = Opc == TargetOpcode::G_STORE;
- if (RegBank == Mips::GPRBRegBankID) {
+
+ if (isRegInGprb(ValueReg, MRI)) {
+ assert(((Ty.isScalar() && TySize == 32) ||
+ (Ty.isPointer() && TySize == 32 && MemSizeInBytes == 4)) &&
+ "Unsupported register bank, LLT, MemSizeInBytes combination");
+ (void)TySize;
if (isStore)
switch (MemSizeInBytes) {
case 4:
@@ -196,33 +212,39 @@ MipsInstructionSelector::selectLoadStoreOpCode(MachineInstr &I,
}
}
- if (RegBank == Mips::FPRBRegBankID) {
- switch (MemSizeInBytes) {
- case 4:
- return isStore ? Mips::SWC1 : Mips::LWC1;
- case 8:
+ if (isRegInFprb(ValueReg, MRI)) {
+ if (Ty.isScalar()) {
+ assert(((TySize == 32 && MemSizeInBytes == 4) ||
+ (TySize == 64 && MemSizeInBytes == 8)) &&
+ "Unsupported register bank, LLT, MemSizeInBytes combination");
+
+ if (MemSizeInBytes == 4)
+ return isStore ? Mips::SWC1 : Mips::LWC1;
+
if (STI.isFP64bit())
return isStore ? Mips::SDC164 : Mips::LDC164;
- else
- return isStore ? Mips::SDC1 : Mips::LDC1;
- case 16: {
+ return isStore ? Mips::SDC1 : Mips::LDC1;
+ }
+
+ if (Ty.isVector()) {
assert(STI.hasMSA() && "Vector instructions require target with MSA.");
- const unsigned VectorElementSizeInBytes =
- MRI.getType(DestReg).getElementType().getSizeInBytes();
- if (VectorElementSizeInBytes == 1)
+ assert((TySize == 128 && MemSizeInBytes == 16) &&
+ "Unsupported register bank, LLT, MemSizeInBytes combination");
+ switch (Ty.getElementType().getSizeInBits()) {
+ case 8:
return isStore ? Mips::ST_B : Mips::LD_B;
- if (VectorElementSizeInBytes == 2)
+ case 16:
return isStore ? Mips::ST_H : Mips::LD_H;
- if (VectorElementSizeInBytes == 4)
+ case 32:
return isStore ? Mips::ST_W : Mips::LD_W;
- if (VectorElementSizeInBytes == 8)
+ case 64:
return isStore ? Mips::ST_D : Mips::LD_D;
- return Opc;
- }
- default:
- return Opc;
+ default:
+ return Opc;
+ }
}
}
+
return Opc;
}
@@ -239,7 +261,8 @@ bool MipsInstructionSelector::select(MachineInstr &I) {
return true;
}
- if (I.getOpcode() == Mips::G_MUL) {
+ if (I.getOpcode() == Mips::G_MUL &&
+ isRegInGprb(I.getOperand(0).getReg(), MRI)) {
MachineInstr *Mul = BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::MUL))
.add(I.getOperand(0))
.add(I.getOperand(1))
@@ -280,7 +303,7 @@ bool MipsInstructionSelector::select(MachineInstr &I) {
I.eraseFromParent();
return true;
}
- case G_GEP: {
+ case G_PTR_ADD: {
MI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::ADDu))
.add(I.getOperand(0))
.add(I.getOperand(1))
@@ -367,14 +390,12 @@ bool MipsInstructionSelector::select(MachineInstr &I) {
}
case G_PHI: {
const Register DestReg = I.getOperand(0).getReg();
- const unsigned OpSize = MRI.getType(DestReg).getSizeInBits();
const TargetRegisterClass *DefRC = nullptr;
if (Register::isPhysicalRegister(DestReg))
DefRC = TRI.getRegClass(DestReg);
else
- DefRC = getRegClassForTypeOnBank(OpSize,
- *RBI.getRegBank(DestReg, MRI, TRI), RBI);
+ DefRC = getRegClassForTypeOnBank(DestReg, MRI);
I.setDesc(TII.get(TargetOpcode::PHI));
return RBI.constrainGenericRegister(DestReg, *DefRC, MRI);
@@ -389,15 +410,15 @@ bool MipsInstructionSelector::select(MachineInstr &I) {
MachineOperand BaseAddr = I.getOperand(1);
int64_t SignedOffset = 0;
- // Try to fold load/store + G_GEP + G_CONSTANT
+ // Try to fold load/store + G_PTR_ADD + G_CONSTANT
// %SignedOffset:(s32) = G_CONSTANT i32 16_bit_signed_immediate
- // %Addr:(p0) = G_GEP %BaseAddr, %SignedOffset
+ // %Addr:(p0) = G_PTR_ADD %BaseAddr, %SignedOffset
// %LoadResult/%StoreSrc = load/store %Addr(p0)
// into:
// %LoadResult/%StoreSrc = NewOpc %BaseAddr(p0), 16_bit_signed_immediate
MachineInstr *Addr = MRI.getVRegDef(I.getOperand(1).getReg());
- if (Addr->getOpcode() == G_GEP) {
+ if (Addr->getOpcode() == G_PTR_ADD) {
MachineInstr *Offset = MRI.getVRegDef(Addr->getOperand(2).getReg());
if (Offset->getOpcode() == G_CONSTANT) {
APInt OffsetValue = Offset->getOperand(1).getCImm()->getValue();
@@ -452,15 +473,12 @@ bool MipsInstructionSelector::select(MachineInstr &I) {
break;
}
case G_IMPLICIT_DEF: {
+ Register Dst = I.getOperand(0).getReg();
MI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::IMPLICIT_DEF))
- .add(I.getOperand(0));
+ .addDef(Dst);
// Set class based on register bank, there can be fpr and gpr implicit def.
- MRI.setRegClass(MI->getOperand(0).getReg(),
- getRegClassForTypeOnBank(
- MRI.getType(I.getOperand(0).getReg()).getSizeInBits(),
- *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI),
- RBI));
+ MRI.setRegClass(Dst, getRegClassForTypeOnBank(Dst, MRI));
break;
}
case G_CONSTANT: {
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp
index bb4a1d902d75..9645aa24dc05 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp
@@ -13,6 +13,7 @@
#include "MipsLegalizerInfo.h"
#include "MipsTargetMachine.h"
#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
+#include "llvm/IR/IntrinsicsMips.h"
using namespace llvm;
@@ -61,11 +62,7 @@ MipsLegalizerInfo::MipsLegalizerInfo(const MipsSubtarget &ST) {
const LLT v2s64 = LLT::vector(2, 64);
const LLT p0 = LLT::pointer(0, 32);
- getActionDefinitionsBuilder({G_SUB, G_MUL})
- .legalFor({s32})
- .clampScalar(0, s32, s32);
-
- getActionDefinitionsBuilder(G_ADD)
+ getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL})
.legalIf([=, &ST](const LegalityQuery &Query) {
if (CheckTyN(0, Query, {s32}))
return true;
@@ -145,8 +142,14 @@ MipsLegalizerInfo::MipsLegalizerInfo(const MipsSubtarget &ST) {
.legalFor({s32})
.clampScalar(0, s32, s32);
- getActionDefinitionsBuilder({G_SDIV, G_SREM, G_UREM, G_UDIV})
- .legalFor({s32})
+ getActionDefinitionsBuilder({G_SDIV, G_SREM, G_UDIV, G_UREM})
+ .legalIf([=, &ST](const LegalityQuery &Query) {
+ if (CheckTyN(0, Query, {s32}))
+ return true;
+ if (ST.hasMSA() && CheckTyN(0, Query, {v16s8, v8s16, v4s32, v2s64}))
+ return true;
+ return false;
+ })
.minScalar(0, s32)
.libcallFor({s64});
@@ -164,7 +167,7 @@ MipsLegalizerInfo::MipsLegalizerInfo(const MipsSubtarget &ST) {
.legalFor({s32})
.clampScalar(0, s32, s32);
- getActionDefinitionsBuilder({G_GEP, G_INTTOPTR})
+ getActionDefinitionsBuilder({G_PTR_ADD, G_INTTOPTR})
.legalFor({{p0, s32}});
getActionDefinitionsBuilder(G_PTRTOINT)
@@ -182,12 +185,35 @@ MipsLegalizerInfo::MipsLegalizerInfo(const MipsSubtarget &ST) {
getActionDefinitionsBuilder(G_VASTART)
.legalFor({p0});
+ getActionDefinitionsBuilder(G_BSWAP)
+ .legalIf([=, &ST](const LegalityQuery &Query) {
+ if (ST.hasMips32r2() && CheckTyN(0, Query, {s32}))
+ return true;
+ return false;
+ })
+ .lowerIf([=, &ST](const LegalityQuery &Query) {
+ if (!ST.hasMips32r2() && CheckTyN(0, Query, {s32}))
+ return true;
+ return false;
+ })
+ .maxScalar(0, s32);
+
+ getActionDefinitionsBuilder(G_BITREVERSE)
+ .lowerFor({s32})
+ .maxScalar(0, s32);
+
// FP instructions
getActionDefinitionsBuilder(G_FCONSTANT)
.legalFor({s32, s64});
getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FABS, G_FSQRT})
- .legalFor({s32, s64});
+ .legalIf([=, &ST](const LegalityQuery &Query) {
+ if (CheckTyN(0, Query, {s32, s64}))
+ return true;
+ if (ST.hasMSA() && CheckTyN(0, Query, {v16s8, v8s16, v4s32, v2s64}))
+ return true;
+ return false;
+ });
getActionDefinitionsBuilder(G_FCMP)
.legalFor({{s32, s32}, {s32, s64}})
@@ -315,6 +341,17 @@ static bool MSA3OpIntrinsicToGeneric(MachineInstr &MI, unsigned Opcode,
return true;
}
+static bool MSA2OpIntrinsicToGeneric(MachineInstr &MI, unsigned Opcode,
+ MachineIRBuilder &MIRBuilder,
+ const MipsSubtarget &ST) {
+ assert(ST.hasMSA() && "MSA intrinsic not supported on target without MSA.");
+ MIRBuilder.buildInstr(Opcode)
+ .add(MI.getOperand(0))
+ .add(MI.getOperand(2));
+ MI.eraseFromParent();
+ return true;
+}
+
bool MipsLegalizerInfo::legalizeIntrinsic(MachineInstr &MI,
MachineRegisterInfo &MRI,
MachineIRBuilder &MIRBuilder) const {
@@ -364,6 +401,64 @@ bool MipsLegalizerInfo::legalizeIntrinsic(MachineInstr &MI,
return SelectMSA3OpIntrinsic(MI, Mips::ADDVI_W, MIRBuilder, ST);
case Intrinsic::mips_addvi_d:
return SelectMSA3OpIntrinsic(MI, Mips::ADDVI_D, MIRBuilder, ST);
+ case Intrinsic::mips_subv_b:
+ case Intrinsic::mips_subv_h:
+ case Intrinsic::mips_subv_w:
+ case Intrinsic::mips_subv_d:
+ return MSA3OpIntrinsicToGeneric(MI, TargetOpcode::G_SUB, MIRBuilder, ST);
+ case Intrinsic::mips_subvi_b:
+ return SelectMSA3OpIntrinsic(MI, Mips::SUBVI_B, MIRBuilder, ST);
+ case Intrinsic::mips_subvi_h:
+ return SelectMSA3OpIntrinsic(MI, Mips::SUBVI_H, MIRBuilder, ST);
+ case Intrinsic::mips_subvi_w:
+ return SelectMSA3OpIntrinsic(MI, Mips::SUBVI_W, MIRBuilder, ST);
+ case Intrinsic::mips_subvi_d:
+ return SelectMSA3OpIntrinsic(MI, Mips::SUBVI_D, MIRBuilder, ST);
+ case Intrinsic::mips_mulv_b:
+ case Intrinsic::mips_mulv_h:
+ case Intrinsic::mips_mulv_w:
+ case Intrinsic::mips_mulv_d:
+ return MSA3OpIntrinsicToGeneric(MI, TargetOpcode::G_MUL, MIRBuilder, ST);
+ case Intrinsic::mips_div_s_b:
+ case Intrinsic::mips_div_s_h:
+ case Intrinsic::mips_div_s_w:
+ case Intrinsic::mips_div_s_d:
+ return MSA3OpIntrinsicToGeneric(MI, TargetOpcode::G_SDIV, MIRBuilder, ST);
+ case Intrinsic::mips_mod_s_b:
+ case Intrinsic::mips_mod_s_h:
+ case Intrinsic::mips_mod_s_w:
+ case Intrinsic::mips_mod_s_d:
+ return MSA3OpIntrinsicToGeneric(MI, TargetOpcode::G_SREM, MIRBuilder, ST);
+ case Intrinsic::mips_div_u_b:
+ case Intrinsic::mips_div_u_h:
+ case Intrinsic::mips_div_u_w:
+ case Intrinsic::mips_div_u_d:
+ return MSA3OpIntrinsicToGeneric(MI, TargetOpcode::G_UDIV, MIRBuilder, ST);
+ case Intrinsic::mips_mod_u_b:
+ case Intrinsic::mips_mod_u_h:
+ case Intrinsic::mips_mod_u_w:
+ case Intrinsic::mips_mod_u_d:
+ return MSA3OpIntrinsicToGeneric(MI, TargetOpcode::G_UREM, MIRBuilder, ST);
+ case Intrinsic::mips_fadd_w:
+ case Intrinsic::mips_fadd_d:
+ return MSA3OpIntrinsicToGeneric(MI, TargetOpcode::G_FADD, MIRBuilder, ST);
+ case Intrinsic::mips_fsub_w:
+ case Intrinsic::mips_fsub_d:
+ return MSA3OpIntrinsicToGeneric(MI, TargetOpcode::G_FSUB, MIRBuilder, ST);
+ case Intrinsic::mips_fmul_w:
+ case Intrinsic::mips_fmul_d:
+ return MSA3OpIntrinsicToGeneric(MI, TargetOpcode::G_FMUL, MIRBuilder, ST);
+ case Intrinsic::mips_fdiv_w:
+ case Intrinsic::mips_fdiv_d:
+ return MSA3OpIntrinsicToGeneric(MI, TargetOpcode::G_FDIV, MIRBuilder, ST);
+ case Intrinsic::mips_fmax_a_w:
+ return SelectMSA3OpIntrinsic(MI, Mips::FMAX_A_W, MIRBuilder, ST);
+ case Intrinsic::mips_fmax_a_d:
+ return SelectMSA3OpIntrinsic(MI, Mips::FMAX_A_D, MIRBuilder, ST);
+ case Intrinsic::mips_fsqrt_w:
+ return MSA2OpIntrinsicToGeneric(MI, TargetOpcode::G_FSQRT, MIRBuilder, ST);
+ case Intrinsic::mips_fsqrt_d:
+ return MSA2OpIntrinsicToGeneric(MI, TargetOpcode::G_FSQRT, MIRBuilder, ST);
default:
break;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsMCInstLower.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsMCInstLower.cpp
index fd984058a2bf..66e04bda2af3 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsMCInstLower.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsMCInstLower.cpp
@@ -34,7 +34,7 @@ void MipsMCInstLower::Initialize(MCContext *C) {
MCOperand MipsMCInstLower::LowerSymbolOperand(const MachineOperand &MO,
MachineOperandType MOTy,
- unsigned Offset) const {
+ int64_t Offset) const {
MCSymbolRefExpr::VariantKind Kind = MCSymbolRefExpr::VK_None;
MipsMCExpr::MipsExprKind TargetKind = MipsMCExpr::MEK_None;
bool IsGpOff = false;
@@ -161,9 +161,7 @@ MCOperand MipsMCInstLower::LowerSymbolOperand(const MachineOperand &MO,
const MCExpr *Expr = MCSymbolRefExpr::create(Symbol, Kind, *Ctx);
if (Offset) {
- // Assume offset is never negative.
- assert(Offset > 0);
-
+ // Note: Offset can also be negative
Expr = MCBinaryExpr::createAdd(Expr, MCConstantExpr::create(Offset, *Ctx),
*Ctx);
}
@@ -177,7 +175,7 @@ MCOperand MipsMCInstLower::LowerSymbolOperand(const MachineOperand &MO,
}
MCOperand MipsMCInstLower::LowerOperand(const MachineOperand &MO,
- unsigned offset) const {
+ int64_t offset) const {
MachineOperandType MOTy = MO.getType();
switch (MOTy) {
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsMCInstLower.h b/contrib/llvm-project/llvm/lib/Target/Mips/MipsMCInstLower.h
index 29af6f21de82..605a124bf102 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsMCInstLower.h
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsMCInstLower.h
@@ -35,11 +35,11 @@ public:
void Initialize(MCContext *C);
void Lower(const MachineInstr *MI, MCInst &OutMI) const;
- MCOperand LowerOperand(const MachineOperand& MO, unsigned offset = 0) const;
+ MCOperand LowerOperand(const MachineOperand &MO, int64_t offset = 0) const;
private:
MCOperand LowerSymbolOperand(const MachineOperand &MO,
- MachineOperandType MOTy, unsigned Offset) const;
+ MachineOperandType MOTy, int64_t Offset) const;
MCOperand createSub(MachineBasicBlock *BB1, MachineBasicBlock *BB2,
MipsMCExpr::MipsExprKind Kind) const;
void lowerLongBranchLUi(const MachineInstr *MI, MCInst &OutMI) const;
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsMSAInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/Mips/MipsMSAInstrInfo.td
index f585d9c1a148..0fef518c240e 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsMSAInstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsMSAInstrInfo.td
@@ -1287,6 +1287,7 @@ class MSA_I10_LDI_DESC_BASE<string instr_asm, RegisterOperand ROWD,
// LDI is matched using custom matching code in MipsSEISelDAGToDAG.cpp
list<dag> Pattern = [];
bit hasSideEffects = 0;
+ bit isReMaterializable = 1;
InstrItinClass Itinerary = itin;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsPreLegalizerCombiner.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsPreLegalizerCombiner.cpp
index ace0735652bd..f9d93ca29658 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsPreLegalizerCombiner.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsPreLegalizerCombiner.cpp
@@ -17,6 +17,7 @@
#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/InitializePasses.h"
#define DEBUG_TYPE "mips-prelegalizer-combiner"
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp
index d334366e727c..2a3f5a05dfe0 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp
@@ -76,8 +76,9 @@ using namespace llvm;
MipsRegisterBankInfo::MipsRegisterBankInfo(const TargetRegisterInfo &TRI)
: MipsGenRegisterBankInfo() {}
-const RegisterBank &MipsRegisterBankInfo::getRegBankFromRegClass(
- const TargetRegisterClass &RC) const {
+const RegisterBank &
+MipsRegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC,
+ LLT) const {
using namespace Mips;
switch (RC.getID()) {
@@ -437,12 +438,10 @@ MipsRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
switch (Opc) {
case G_TRUNC:
- case G_SUB:
- case G_MUL:
case G_UMULH:
case G_ZEXTLOAD:
case G_SEXTLOAD:
- case G_GEP:
+ case G_PTR_ADD:
case G_INTTOPTR:
case G_PTRTOINT:
case G_AND:
@@ -451,15 +450,18 @@ MipsRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case G_SHL:
case G_ASHR:
case G_LSHR:
- case G_SDIV:
- case G_UDIV:
- case G_SREM:
- case G_UREM:
case G_BRINDIRECT:
case G_VASTART:
+ case G_BSWAP:
OperandsMapping = &Mips::ValueMappings[Mips::GPRIdx];
break;
case G_ADD:
+ case G_SUB:
+ case G_MUL:
+ case G_SDIV:
+ case G_SREM:
+ case G_UDIV:
+ case G_UREM:
OperandsMapping = &Mips::ValueMappings[Mips::GPRIdx];
if (Op0Size == 128)
OperandsMapping = getMSAMapping(MF);
@@ -546,6 +548,8 @@ MipsRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case G_FABS:
case G_FSQRT:
OperandsMapping = getFprbMapping(Op0Size);
+ if (Op0Size == 128)
+ OperandsMapping = getMSAMapping(MF);
break;
case G_FCONSTANT:
OperandsMapping = getOperandsMapping({getFprbMapping(Op0Size), nullptr});
@@ -636,7 +640,7 @@ void MipsRegisterBankInfo::setRegBank(MachineInstr &MI,
MRI.setRegBank(Dest, getRegBank(Mips::GPRBRegBankID));
break;
}
- case TargetOpcode::G_GEP: {
+ case TargetOpcode::G_PTR_ADD: {
assert(MRI.getType(Dest).isPointer() && "Unexpected operand type.");
MRI.setRegBank(Dest, getRegBank(Mips::GPRBRegBankID));
break;
@@ -649,8 +653,9 @@ void MipsRegisterBankInfo::setRegBank(MachineInstr &MI,
static void
combineAwayG_UNMERGE_VALUES(LegalizationArtifactCombiner &ArtCombiner,
MachineInstr &MI) {
+ SmallVector<Register, 4> UpdatedDefs;
SmallVector<MachineInstr *, 2> DeadInstrs;
- ArtCombiner.tryCombineMerges(MI, DeadInstrs);
+ ArtCombiner.tryCombineMerges(MI, DeadInstrs, UpdatedDefs);
for (MachineInstr *DeadMI : DeadInstrs)
DeadMI->eraseFromParent();
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsRegisterBankInfo.h b/contrib/llvm-project/llvm/lib/Target/Mips/MipsRegisterBankInfo.h
index fa0f1c7bc941..66267f8d794d 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsRegisterBankInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsRegisterBankInfo.h
@@ -32,8 +32,8 @@ class MipsRegisterBankInfo final : public MipsGenRegisterBankInfo {
public:
MipsRegisterBankInfo(const TargetRegisterInfo &TRI);
- const RegisterBank &
- getRegBankFromRegClass(const TargetRegisterClass &RC) const override;
+ const RegisterBank &getRegBankFromRegClass(const TargetRegisterClass &RC,
+ LLT) const override;
const InstructionMapping &
getInstrMapping(const MachineInstr &MI) const override;
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
index c8313240a678..bef1a3657ea5 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
@@ -27,6 +27,7 @@
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsMips.h"
#include "llvm/IR/Type.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -234,8 +235,8 @@ void MipsSEDAGToDAGISel::selectAddE(SDNode *Node, const SDLoc &DL) const {
SDValue OuFlag = CurDAG->getTargetConstant(20, DL, MVT::i32);
- SDNode *DSPCtrlField =
- CurDAG->getMachineNode(Mips::RDDSP, DL, MVT::i32, MVT::Glue, CstOne, InFlag);
+ SDNode *DSPCtrlField = CurDAG->getMachineNode(Mips::RDDSP, DL, MVT::i32,
+ MVT::Glue, CstOne, InFlag);
SDNode *Carry = CurDAG->getMachineNode(
Mips::EXT, DL, MVT::i32, SDValue(DSPCtrlField, 0), OuFlag, CstOne);
@@ -253,7 +254,8 @@ void MipsSEDAGToDAGISel::selectAddE(SDNode *Node, const SDLoc &DL) const {
SDValue Zero = CurDAG->getRegister(Mips::ZERO, MVT::i32);
SDValue InsOps[4] = {Zero, OuFlag, CstOne, SDValue(DSPCFWithCarry, 0)};
- SDNode *DSPCtrlFinal = CurDAG->getMachineNode(Mips::INS, DL, MVT::i32, InsOps);
+ SDNode *DSPCtrlFinal =
+ CurDAG->getMachineNode(Mips::INS, DL, MVT::i32, InsOps);
SDNode *WrDSP = CurDAG->getMachineNode(Mips::WRDSP, DL, MVT::Glue,
SDValue(DSPCtrlFinal, 0), CstOne);
@@ -1074,7 +1076,8 @@ bool MipsSEDAGToDAGISel::trySelect(SDNode *Node) {
Hi ? SDValue(Res, 0) : ZeroVal, LoVal);
assert((Hi || Lo) && "Zero case reached 32 bit case splat synthesis!");
- Res = CurDAG->getMachineNode(Mips::FILL_W, DL, MVT::v4i32, SDValue(Res, 0));
+ Res =
+ CurDAG->getMachineNode(Mips::FILL_W, DL, MVT::v4i32, SDValue(Res, 0));
} else if (SplatValue.isSignedIntN(32) && SplatBitSize == 64 &&
(ABI.IsN32() || ABI.IsN64())) {
@@ -1117,8 +1120,8 @@ bool MipsSEDAGToDAGISel::trySelect(SDNode *Node) {
// $res4 = insert.w $res3[1], $res fill.d $res
// splat.d $res4, 0
//
- // The ability to use dinsu is guaranteed as MSA requires MIPSR5. This saves
- // having to materialize the value by shifts and ors.
+ // The ability to use dinsu is guaranteed as MSA requires MIPSR5.
+ // This saves having to materialize the value by shifts and ors.
//
// FIXME: Implement the preferred sequence for MIPS64R6:
//
@@ -1239,7 +1242,8 @@ bool MipsSEDAGToDAGISel::trySelect(SDNode *Node) {
llvm_unreachable(
"Zero splat value handled by non-zero 64bit splat synthesis!");
- Res = CurDAG->getMachineNode(Mips::FILL_D, DL, MVT::v2i64, SDValue(Res, 0));
+ Res = CurDAG->getMachineNode(Mips::FILL_D, DL, MVT::v2i64,
+ SDValue(Res, 0));
} else
llvm_unreachable("Unknown ABI in MipsISelDAGToDAG!");
@@ -1278,10 +1282,6 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
default:
llvm_unreachable("Unexpected asm memory constraint");
// All memory constraints can at least accept raw pointers.
- case InlineAsm::Constraint_i:
- OutOps.push_back(Op);
- OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32));
- return false;
case InlineAsm::Constraint_m:
case InlineAsm::Constraint_o:
if (selectAddrRegImm16(Op, Base, Offset)) {
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
index 5bd234f955ba..798e8784405f 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
@@ -34,6 +34,7 @@
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsMips.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp
index 2126a1bda493..d4f09a2f3586 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp
@@ -82,8 +82,8 @@ unsigned MipsSEInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
void MipsSEInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
- const DebugLoc &DL, unsigned DestReg,
- unsigned SrcReg, bool KillSrc) const {
+ const DebugLoc &DL, MCRegister DestReg,
+ MCRegister SrcReg, bool KillSrc) const {
unsigned Opc = 0, ZeroReg = 0;
bool isMicroMips = Subtarget.inMicroMipsMode();
@@ -221,29 +221,24 @@ static bool isReadOrWriteToDSPReg(const MachineInstr &MI, bool &isWrite) {
/// We check for the common case of 'or', as it's MIPS' preferred instruction
/// for GPRs but we have to check the operands to ensure that is the case.
/// Other move instructions for MIPS are directly identifiable.
-bool MipsSEInstrInfo::isCopyInstrImpl(const MachineInstr &MI,
- const MachineOperand *&Src,
- const MachineOperand *&Dest) const {
+Optional<DestSourcePair>
+MipsSEInstrInfo::isCopyInstrImpl(const MachineInstr &MI) const {
bool isDSPControlWrite = false;
// Condition is made to match the creation of WRDSP/RDDSP copy instruction
// from copyPhysReg function.
if (isReadOrWriteToDSPReg(MI, isDSPControlWrite)) {
- if (!MI.getOperand(1).isImm() || MI.getOperand(1).getImm() != (1<<4))
- return false;
+ if (!MI.getOperand(1).isImm() || MI.getOperand(1).getImm() != (1 << 4))
+ return None;
else if (isDSPControlWrite) {
- Src = &MI.getOperand(0);
- Dest = &MI.getOperand(2);
+ return DestSourcePair{MI.getOperand(2), MI.getOperand(0)};
+
} else {
- Dest = &MI.getOperand(0);
- Src = &MI.getOperand(2);
+ return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};
}
- return true;
} else if (MI.isMoveReg() || isORCopyInst(MI)) {
- Dest = &MI.getOperand(0);
- Src = &MI.getOperand(1);
- return true;
+ return DestSourcePair{MI.getOperand(0), MI.getOperand(1)};
}
- return false;
+ return None;
}
void MipsSEInstrInfo::
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEInstrInfo.h b/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEInstrInfo.h
index 3111d1c21a0a..08c00ec8ccef 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEInstrInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEInstrInfo.h
@@ -43,7 +43,7 @@ public:
int &FrameIndex) const override;
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
- const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
+ const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg,
bool KillSrc) const override;
void storeRegToStack(MachineBasicBlock &MBB,
@@ -77,10 +77,10 @@ public:
protected:
/// If the specific machine instruction is a instruction that moves/copies
- /// value from one register to another register return true along with
- /// @Source machine operand and @Destination machine operand.
- bool isCopyInstrImpl(const MachineInstr &MI, const MachineOperand *&Source,
- const MachineOperand *&Destination) const override;
+ /// value from one register to another register return destination and source
+ /// registers as machine operands.
+ Optional<DestSourcePair>
+ isCopyInstrImpl(const MachineInstr &MI) const override;
private:
unsigned getAnalyzableBrOpc(unsigned Opc) const override;
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsScheduleGeneric.td b/contrib/llvm-project/llvm/lib/Target/Mips/MipsScheduleGeneric.td
index 08c6d3e79023..faccb37c2361 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsScheduleGeneric.td
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsScheduleGeneric.td
@@ -1615,5 +1615,6 @@ def : InstRW<[GenericWriteAtomic],
def : InstRW<[GenericWriteAtomic],
(instregex "^ATOMIC_CMP_SWAP_I(8|16|32|64)_POSTRA$")>;
def : InstRW<[GenericWriteAtomic],
- (instregex "^ATOMIC_LOAD_(ADD|SUB|AND|OR|XOR|NAND)_I(8|16|32|64)_POSTRA$")>;
+ (instregex "^ATOMIC_LOAD_(ADD|SUB|AND|OR|XOR|NAND|MIN|MAX|UMIN|UMAX)"
+ "_I(8|16|32|64)_POSTRA$")>;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsScheduleP5600.td b/contrib/llvm-project/llvm/lib/Target/Mips/MipsScheduleP5600.td
index f68adc3496ab..7331917baa25 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsScheduleP5600.td
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsScheduleP5600.td
@@ -632,5 +632,6 @@ def : InstRW<[P5600WriteAtomic],
def : InstRW<[P5600WriteAtomic],
(instregex "^ATOMIC_CMP_SWAP_I(8|16|32|64)_POSTRA$")>;
def : InstRW<[P5600WriteAtomic],
- (instregex "^ATOMIC_LOAD_(ADD|SUB|AND|OR|XOR|NAND)_I(8|16|32|64)_POSTRA$")>;
+ (instregex "^ATOMIC_LOAD_(ADD|SUB|AND|OR|XOR|NAND|MIN|MAX|UMIN|UMAX)"
+ "_I(8|16|32|64)_POSTRA$")>;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsSubtarget.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsSubtarget.cpp
index b9245c9fc0eb..133b818114c8 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsSubtarget.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsSubtarget.cpp
@@ -74,16 +74,17 @@ MipsSubtarget::MipsSubtarget(const Triple &TT, StringRef CPU, StringRef FS,
IsLittle(little), IsSoftFloat(false), IsSingleFloat(false), IsFPXX(false),
NoABICalls(false), Abs2008(false), IsFP64bit(false), UseOddSPReg(true),
IsNaN2008bit(false), IsGP64bit(false), HasVFPU(false), HasCnMips(false),
- HasMips3_32(false), HasMips3_32r2(false), HasMips4_32(false),
- HasMips4_32r2(false), HasMips5_32r2(false), InMips16Mode(false),
- InMips16HardFloat(Mips16HardFloat), InMicroMipsMode(false), HasDSP(false),
- HasDSPR2(false), HasDSPR3(false), AllowMixed16_32(Mixed16_32 | Mips_Os16),
- Os16(Mips_Os16), HasMSA(false), UseTCCInDIV(false), HasSym32(false),
- HasEVA(false), DisableMadd4(false), HasMT(false), HasCRC(false),
- HasVirt(false), HasGINV(false), UseIndirectJumpsHazard(false),
- StackAlignOverride(StackAlignOverride), TM(TM), TargetTriple(TT),
- TSInfo(), InstrInfo(MipsInstrInfo::create(
- initializeSubtargetDependencies(CPU, FS, TM))),
+ HasCnMipsP(false), HasMips3_32(false), HasMips3_32r2(false),
+ HasMips4_32(false), HasMips4_32r2(false), HasMips5_32r2(false),
+ InMips16Mode(false), InMips16HardFloat(Mips16HardFloat),
+ InMicroMipsMode(false), HasDSP(false), HasDSPR2(false), HasDSPR3(false),
+ AllowMixed16_32(Mixed16_32 | Mips_Os16), Os16(Mips_Os16), HasMSA(false),
+ UseTCCInDIV(false), HasSym32(false), HasEVA(false), DisableMadd4(false),
+ HasMT(false), HasCRC(false), HasVirt(false), HasGINV(false),
+ UseIndirectJumpsHazard(false), StackAlignOverride(StackAlignOverride),
+ TM(TM), TargetTriple(TT), TSInfo(),
+ InstrInfo(
+ MipsInstrInfo::create(initializeSubtargetDependencies(CPU, FS, TM))),
FrameLowering(MipsFrameLowering::create(*this)),
TLInfo(MipsTargetLowering::create(TM, *this)) {
@@ -255,6 +256,10 @@ MipsSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS,
stackAlignment = Align(8);
}
+ if ((isABI_N32() || isABI_N64()) && !isGP64bit())
+ report_fatal_error("64-bit code requested on a subtarget that doesn't "
+ "support it!");
+
return *this;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsTargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsTargetMachine.cpp
index e58f316791ba..8fec6db00cb9 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsTargetMachine.cpp
@@ -23,16 +23,17 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/BasicTTIImpl.h"
#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
+#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
#include "llvm/CodeGen/GlobalISel/Legalizer.h"
#include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
-#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
-#include "llvm/CodeGen/BasicTTIImpl.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Function.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/TargetRegistry.h"
@@ -44,7 +45,7 @@ using namespace llvm;
#define DEBUG_TYPE "mips"
-extern "C" void LLVMInitializeMipsTarget() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeMipsTarget() {
// Register the target.
RegisterTargetMachine<MipsebTargetMachine> X(getTheMipsTarget());
RegisterTargetMachine<MipselTargetMachine> Y(getTheMipselTarget());
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsTargetStreamer.h b/contrib/llvm-project/llvm/lib/Target/Mips/MipsTargetStreamer.h
index 298d056ce2c3..b389ba8938c4 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsTargetStreamer.h
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsTargetStreamer.h
@@ -156,10 +156,6 @@ public:
unsigned BaseReg, int64_t Offset,
function_ref<unsigned()> GetATReg, SMLoc IDLoc,
const MCSubtargetInfo *STI);
- void emitSCWithSymOffset(unsigned Opcode, unsigned SrcReg, unsigned BaseReg,
- MCOperand &HiOperand, MCOperand &LoOperand,
- unsigned ATReg, SMLoc IDLoc,
- const MCSubtargetInfo *STI);
void emitLoadWithImmOffset(unsigned Opcode, unsigned DstReg, unsigned BaseReg,
int64_t Offset, unsigned TmpReg, SMLoc IDLoc,
const MCSubtargetInfo *STI);
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp
index 0082ca34cdbd..44041987ec76 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp
@@ -27,7 +27,7 @@ Target &llvm::getTheMips64elTarget() {
return TheMips64elTarget;
}
-extern "C" void LLVMInitializeMipsTargetInfo() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeMipsTargetInfo() {
RegisterTarget<Triple::mips,
/*HasJIT=*/true>
X(getTheMipsTarget(), "mips", "MIPS (32-bit big endian)", "Mips");
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp
index b6eefe206268..82d332ab3f08 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp
@@ -72,9 +72,10 @@ void NVPTXInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
OS << VReg;
}
-void NVPTXInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
- StringRef Annot, const MCSubtargetInfo &STI) {
- printInstruction(MI, OS);
+void NVPTXInstPrinter::printInst(const MCInst *MI, uint64_t Address,
+ StringRef Annot, const MCSubtargetInfo &STI,
+ raw_ostream &OS) {
+ printInstruction(MI, Address, OS);
// Next always print the annotation.
printAnnotation(OS, Annot);
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h b/contrib/llvm-project/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h
index c38472925a29..cee0e7eec54a 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h
@@ -25,11 +25,11 @@ public:
const MCRegisterInfo &MRI);
void printRegName(raw_ostream &OS, unsigned RegNo) const override;
- void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot,
- const MCSubtargetInfo &STI) override;
+ void printInst(const MCInst *MI, uint64_t Address, StringRef Annot,
+ const MCSubtargetInfo &STI, raw_ostream &OS) override;
// Autogenerated by tblgen.
- void printInstruction(const MCInst *MI, raw_ostream &O);
+ void printInstruction(const MCInst *MI, uint64_t Address, raw_ostream &O);
static const char *getRegisterName(unsigned RegNo);
// End
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp b/contrib/llvm-project/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
index 556745825a15..7e1da9b7a94b 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
@@ -17,7 +17,8 @@ using namespace llvm;
void NVPTXMCAsmInfo::anchor() {}
-NVPTXMCAsmInfo::NVPTXMCAsmInfo(const Triple &TheTriple) {
+NVPTXMCAsmInfo::NVPTXMCAsmInfo(const Triple &TheTriple,
+ const MCTargetOptions &Options) {
if (TheTriple.getArch() == Triple::nvptx64) {
CodePointerSize = CalleeSaveStackSlotSize = 8;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h b/contrib/llvm-project/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h
index e888526da898..ce5ca99c5397 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h
@@ -23,7 +23,8 @@ class NVPTXMCAsmInfo : public MCAsmInfo {
virtual void anchor();
public:
- explicit NVPTXMCAsmInfo(const Triple &TheTriple);
+ explicit NVPTXMCAsmInfo(const Triple &TheTriple,
+ const MCTargetOptions &Options);
/// Return true if the .section directive should be omitted when
/// emitting \p SectionName. For example:
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp b/contrib/llvm-project/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
index c8b85b2718a6..d758c2c86959 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
@@ -66,7 +66,7 @@ static MCTargetStreamer *createTargetAsmStreamer(MCStreamer &S,
}
// Force static initialization.
-extern "C" void LLVMInitializeNVPTXTargetMC() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeNVPTXTargetMC() {
for (Target *T : {&getTheNVPTXTarget32(), &getTheNVPTXTarget64()}) {
// Register the MC asm info.
RegisterMCAsmInfo<NVPTXMCAsmInfo> X(*T);
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/ManagedStringPool.h b/contrib/llvm-project/llvm/lib/Target/NVPTX/ManagedStringPool.h
index bbcbb4598040..fb838d1fcd7f 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/ManagedStringPool.h
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/ManagedStringPool.h
@@ -32,7 +32,7 @@ public:
SmallVectorImpl<std::string *>::iterator Current = Pool.begin();
while (Current != Pool.end()) {
delete *Current;
- Current++;
+ ++Current;
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index 307f4d58c3ab..7117438dc503 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -2258,7 +2258,7 @@ void NVPTXAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum,
}
// Force static initialization.
-extern "C" void LLVMInitializeNVPTXAsmPrinter() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeNVPTXAsmPrinter() {
RegisterAsmPrinter<NVPTXAsmPrinter> X(getTheNVPTXTarget32());
RegisterAsmPrinter<NVPTXAsmPrinter> Y(getTheNVPTXTarget64());
}
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index 3d2447d75c77..4296eca6a8df 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -11,11 +11,12 @@
//===----------------------------------------------------------------------===//
#include "NVPTXISelDAGToDAG.h"
-#include "NVPTXUtilities.h"
#include "MCTargetDesc/NVPTXBaseInfo.h"
+#include "NVPTXUtilities.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicsNVPTX.h"
#include "llvm/Support/AtomicOrdering.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -611,7 +612,7 @@ bool NVPTXDAGToDAGISel::tryEXTRACT_VECTOR_ELEMENT(SDNode *N) {
// Find and record all uses of this vector that extract element 0 or 1.
SmallVector<SDNode *, 4> E0, E1;
- for (const auto &U : Vector.getNode()->uses()) {
+ for (auto U : Vector.getNode()->uses()) {
if (U->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
continue;
if (U->getOperand(0) != Vector)
@@ -885,7 +886,7 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
MVT SimpleVT = LoadedVT.getSimpleVT();
MVT ScalarVT = SimpleVT.getScalarType();
// Read at least 8 bits (predicates are stored as 8-bit values)
- unsigned fromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
+ unsigned fromTypeWidth = std::max(8U, (unsigned)ScalarVT.getSizeInBits());
unsigned int fromType;
// Vector Setting
@@ -1030,7 +1031,7 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
// Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
MVT ScalarVT = SimpleVT.getScalarType();
// Read at least 8 bits (predicates are stored as 8-bit values)
- unsigned FromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
+ unsigned FromTypeWidth = std::max(8U, (unsigned)ScalarVT.getSizeInBits());
unsigned int FromType;
// The last operand holds the original LoadSDNode::getExtensionType() value
unsigned ExtensionType = cast<ConstantSDNode>(
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
index e4e5069b7a80..ce5136016f96 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
@@ -46,7 +46,7 @@ public:
return "NVPTX DAG->DAG Pattern Instruction Selection";
}
bool runOnMachineFunction(MachineFunction &MF) override;
- const NVPTXSubtarget *Subtarget;
+ const NVPTXSubtarget *Subtarget = nullptr;
bool SelectInlineAsmMemoryOperand(const SDValue &Op,
unsigned ConstraintID,
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 9acd0bea66fd..92f71c687c46 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -39,6 +39,7 @@
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicsNVPTX.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelLowering.h b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
index ef645fc1e541..546fe49808e2 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -538,7 +538,10 @@ public:
bool allowFMA(MachineFunction &MF, CodeGenOpt::Level OptLevel) const;
bool allowUnsafeFPMath(MachineFunction &MF) const;
- bool isFMAFasterThanFMulAndFAdd(EVT) const override { return true; }
+ bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
+ EVT) const override {
+ return true;
+ }
bool enableAggressiveFMAFusion(EVT VT) const override { return true; }
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXImageOptimizer.cpp b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXImageOptimizer.cpp
index 74ab2f7b8453..a846c2fada26 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXImageOptimizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXImageOptimizer.cpp
@@ -18,6 +18,7 @@
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsNVPTX.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp
index f928b44c91e0..cff230289e60 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp
@@ -31,8 +31,8 @@ NVPTXInstrInfo::NVPTXInstrInfo() : NVPTXGenInstrInfo(), RegInfo() {}
void NVPTXInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
- const DebugLoc &DL, unsigned DestReg,
- unsigned SrcReg, bool KillSrc) const {
+ const DebugLoc &DL, MCRegister DestReg,
+ MCRegister SrcReg, bool KillSrc) const {
const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
const TargetRegisterClass *DestRC = MRI.getRegClass(DestReg);
const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg);
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXInstrInfo.h b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXInstrInfo.h
index 7c0912808f7b..40f6adc964d7 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXInstrInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXInstrInfo.h
@@ -49,7 +49,7 @@ public:
*/
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
- const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
+ const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg,
bool KillSrc) const override;
// Branch analysis.
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
index c52195fb0449..76a4a1d4030a 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -7400,7 +7400,9 @@ class WMMA_REGINFO<WMMA_REGS r>
// u4/s4/b1 -> s32 @ m8n8k32 (u4/s4), m8n8k128(b1)
!or(!eq(geom,"m8n8k128"),
- !eq(geom,"m8n8k32")) : [hasSM75, hasPTX63]);
+ !eq(geom,"m8n8k32")) : [hasSM75, hasPTX63],
+
+ !eq(geom, "m8n8k4") : [hasSM70, hasPTX64]);
// template DAGs for instruction inputs/output.
dag Outs = !dag(outs, ptx_regs, reg_names);
@@ -7546,25 +7548,37 @@ class WMMA_MMA<WMMA_REGINFO FragA, WMMA_REGINFO FragB,
let OutOperandList = FragD.Outs;
let InOperandList = !con(Args, (ins MmaCode:$ptx));
string TypeList = !cond(
+ !eq(FragD.geom, "m8n8k4") : "." # FragD.ptx_elt_type
+ # ".f16.f16."
+ # FragC.ptx_elt_type,
!eq(FragD.ptx_elt_type, "s32") : ".s32"
# "." # FragA.ptx_elt_type
# "." # FragB.ptx_elt_type
# ".s32",
1: "." # FragD.ptx_elt_type # "." # FragC.ptx_elt_type,
);
- let AsmString = "wmma.mma"
- # !if(!eq(FragA.ptx_elt_type, "b1"), ".xor.popc", "")
- # ".sync"
- # "${ptx:aligned}"
- # "." # ALayout
- # "." # BLayout
- # "." # FragA.geom
- # TypeList
- # !if(Satfinite, ".satfinite", "") # "\n\t\t"
- # FragD.regstring # ",\n\t\t"
- # FragA.regstring # ",\n\t\t"
- # FragB.regstring # ",\n\t\t"
- # FragC.regstring # ";";
+ let AsmString = !if(!eq(FragA.geom, "m8n8k4"),
+ "mma.sync.aligned.m8n8k4"
+ # "." # ALayout
+ # "." # BLayout
+ # TypeList # "\n\t\t"
+ # FragD.regstring # ",\n\t\t"
+ # FragA.regstring # ",\n\t\t"
+ # FragB.regstring # ",\n\t\t"
+ # FragC.regstring # ";",
+ "wmma.mma"
+ # !if(!eq(FragA.ptx_elt_type, "b1"), ".xor.popc", "")
+ # ".sync"
+ # "${ptx:aligned}"
+ # "." # ALayout
+ # "." # BLayout
+ # "." # FragA.geom
+ # TypeList
+ # !if(Satfinite, ".satfinite", "") # "\n\t\t"
+ # FragD.regstring # ",\n\t\t"
+ # FragA.regstring # ",\n\t\t"
+ # FragB.regstring # ",\n\t\t"
+ # FragC.regstring # ";");
}
defset list<WMMA_INSTR> MMAs = {
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp
index 945b7286b03c..cf488b06f06c 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp
@@ -70,8 +70,7 @@ bool NVPTXLowerAlloca::runOnFunction(Function &F) {
for (auto &I : BB) {
if (auto allocaInst = dyn_cast<AllocaInst>(&I)) {
Changed = true;
- auto PTy = dyn_cast<PointerType>(allocaInst->getType());
- auto ETy = PTy->getElementType();
+ auto ETy = cast<PointerType>(allocaInst->getType())->getElementType();
auto LocalAddrTy = PointerType::get(ETy, ADDRESS_SPACE_LOCAL);
auto NewASCToLocal = new AddrSpaceCastInst(allocaInst, LocalAddrTy, "");
auto GenericAddrTy = PointerType::get(ETy, ADDRESS_SPACE_GENERIC);
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index f58fb5717773..0778706d936a 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -72,7 +72,7 @@ void initializeNVPTXProxyRegErasurePass(PassRegistry &);
} // end namespace llvm
-extern "C" void LLVMInitializeNVPTXTarget() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeNVPTXTarget() {
// Register the target.
RegisterTargetMachine<NVPTXTargetMachine32> X(getTheNVPTXTarget32());
RegisterTargetMachine<NVPTXTargetMachine64> Y(getTheNVPTXTarget64());
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
index be0416f90fca..afc40a7abed0 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
@@ -14,6 +14,7 @@
#include "llvm/CodeGen/BasicTTIImpl.h"
#include "llvm/CodeGen/CostTable.h"
#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/IR/IntrinsicsNVPTX.h"
#include "llvm/Support/Debug.h"
using namespace llvm;
@@ -113,7 +114,8 @@ bool NVPTXTTIImpl::isSourceOfDivergence(const Value *V) {
int NVPTXTTIImpl::getArithmeticInstrCost(
unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info,
TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
- TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args) {
+ TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args,
+ const Instruction *CxtI) {
// Legalize the type.
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
index b179a28fa713..864d8b91a89a 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
@@ -91,7 +91,8 @@ public:
TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
- ArrayRef<const Value *> Args = ArrayRef<const Value *>());
+ ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
+ const Instruction *CxtI = nullptr);
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP);
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVVMIntrRange.cpp b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVVMIntrRange.cpp
index 5cf7b6691e63..baaedc7ac87c 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVVMIntrRange.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVVMIntrRange.cpp
@@ -16,6 +16,8 @@
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsNVPTX.h"
+#include "llvm/Support/CommandLine.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVVMReflect.cpp b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVVMReflect.cpp
index 634a052e2ee7..ae166dc5a8d5 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVVMReflect.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVVMReflect.cpp
@@ -27,6 +27,7 @@
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsNVPTX.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/Pass.h"
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp b/contrib/llvm-project/llvm/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp
index 2c71ec58ec42..235be9c0dbbb 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp
@@ -19,7 +19,7 @@ Target &llvm::getTheNVPTXTarget64() {
return TheNVPTXTarget64;
}
-extern "C" void LLVMInitializeNVPTXTargetInfo() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeNVPTXTargetInfo() {
RegisterTarget<Triple::nvptx> X(getTheNVPTXTarget32(), "nvptx",
"NVIDIA PTX 32-bit", "NVPTX");
RegisterTarget<Triple::nvptx64> Y(getTheNVPTXTarget64(), "nvptx64",
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
index aedf5b713c3f..7e7902c27a81 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
@@ -800,9 +800,9 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
Inst = TmpInst;
break;
}
- case PPC::SUBICo: {
+ case PPC::SUBIC_rec: {
MCInst TmpInst;
- TmpInst.setOpcode(PPC::ADDICo);
+ TmpInst.setOpcode(PPC::ADDIC_rec);
TmpInst.addOperand(Inst.getOperand(0));
TmpInst.addOperand(Inst.getOperand(1));
addNegOperand(TmpInst, Inst.getOperand(2), getContext());
@@ -810,11 +810,11 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
break;
}
case PPC::EXTLWI:
- case PPC::EXTLWIo: {
+ case PPC::EXTLWI_rec: {
MCInst TmpInst;
int64_t N = Inst.getOperand(2).getImm();
int64_t B = Inst.getOperand(3).getImm();
- TmpInst.setOpcode(Opcode == PPC::EXTLWI? PPC::RLWINM : PPC::RLWINMo);
+ TmpInst.setOpcode(Opcode == PPC::EXTLWI ? PPC::RLWINM : PPC::RLWINM_rec);
TmpInst.addOperand(Inst.getOperand(0));
TmpInst.addOperand(Inst.getOperand(1));
TmpInst.addOperand(MCOperand::createImm(B));
@@ -824,11 +824,11 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
break;
}
case PPC::EXTRWI:
- case PPC::EXTRWIo: {
+ case PPC::EXTRWI_rec: {
MCInst TmpInst;
int64_t N = Inst.getOperand(2).getImm();
int64_t B = Inst.getOperand(3).getImm();
- TmpInst.setOpcode(Opcode == PPC::EXTRWI? PPC::RLWINM : PPC::RLWINMo);
+ TmpInst.setOpcode(Opcode == PPC::EXTRWI ? PPC::RLWINM : PPC::RLWINM_rec);
TmpInst.addOperand(Inst.getOperand(0));
TmpInst.addOperand(Inst.getOperand(1));
TmpInst.addOperand(MCOperand::createImm(B + N));
@@ -838,11 +838,11 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
break;
}
case PPC::INSLWI:
- case PPC::INSLWIo: {
+ case PPC::INSLWI_rec: {
MCInst TmpInst;
int64_t N = Inst.getOperand(2).getImm();
int64_t B = Inst.getOperand(3).getImm();
- TmpInst.setOpcode(Opcode == PPC::INSLWI? PPC::RLWIMI : PPC::RLWIMIo);
+ TmpInst.setOpcode(Opcode == PPC::INSLWI ? PPC::RLWIMI : PPC::RLWIMI_rec);
TmpInst.addOperand(Inst.getOperand(0));
TmpInst.addOperand(Inst.getOperand(0));
TmpInst.addOperand(Inst.getOperand(1));
@@ -853,11 +853,11 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
break;
}
case PPC::INSRWI:
- case PPC::INSRWIo: {
+ case PPC::INSRWI_rec: {
MCInst TmpInst;
int64_t N = Inst.getOperand(2).getImm();
int64_t B = Inst.getOperand(3).getImm();
- TmpInst.setOpcode(Opcode == PPC::INSRWI? PPC::RLWIMI : PPC::RLWIMIo);
+ TmpInst.setOpcode(Opcode == PPC::INSRWI ? PPC::RLWIMI : PPC::RLWIMI_rec);
TmpInst.addOperand(Inst.getOperand(0));
TmpInst.addOperand(Inst.getOperand(0));
TmpInst.addOperand(Inst.getOperand(1));
@@ -868,10 +868,10 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
break;
}
case PPC::ROTRWI:
- case PPC::ROTRWIo: {
+ case PPC::ROTRWI_rec: {
MCInst TmpInst;
int64_t N = Inst.getOperand(2).getImm();
- TmpInst.setOpcode(Opcode == PPC::ROTRWI? PPC::RLWINM : PPC::RLWINMo);
+ TmpInst.setOpcode(Opcode == PPC::ROTRWI ? PPC::RLWINM : PPC::RLWINM_rec);
TmpInst.addOperand(Inst.getOperand(0));
TmpInst.addOperand(Inst.getOperand(1));
TmpInst.addOperand(MCOperand::createImm(32 - N));
@@ -881,10 +881,10 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
break;
}
case PPC::SLWI:
- case PPC::SLWIo: {
+ case PPC::SLWI_rec: {
MCInst TmpInst;
int64_t N = Inst.getOperand(2).getImm();
- TmpInst.setOpcode(Opcode == PPC::SLWI? PPC::RLWINM : PPC::RLWINMo);
+ TmpInst.setOpcode(Opcode == PPC::SLWI ? PPC::RLWINM : PPC::RLWINM_rec);
TmpInst.addOperand(Inst.getOperand(0));
TmpInst.addOperand(Inst.getOperand(1));
TmpInst.addOperand(MCOperand::createImm(N));
@@ -894,10 +894,10 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
break;
}
case PPC::SRWI:
- case PPC::SRWIo: {
+ case PPC::SRWI_rec: {
MCInst TmpInst;
int64_t N = Inst.getOperand(2).getImm();
- TmpInst.setOpcode(Opcode == PPC::SRWI? PPC::RLWINM : PPC::RLWINMo);
+ TmpInst.setOpcode(Opcode == PPC::SRWI ? PPC::RLWINM : PPC::RLWINM_rec);
TmpInst.addOperand(Inst.getOperand(0));
TmpInst.addOperand(Inst.getOperand(1));
TmpInst.addOperand(MCOperand::createImm(32 - N));
@@ -907,10 +907,10 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
break;
}
case PPC::CLRRWI:
- case PPC::CLRRWIo: {
+ case PPC::CLRRWI_rec: {
MCInst TmpInst;
int64_t N = Inst.getOperand(2).getImm();
- TmpInst.setOpcode(Opcode == PPC::CLRRWI? PPC::RLWINM : PPC::RLWINMo);
+ TmpInst.setOpcode(Opcode == PPC::CLRRWI ? PPC::RLWINM : PPC::RLWINM_rec);
TmpInst.addOperand(Inst.getOperand(0));
TmpInst.addOperand(Inst.getOperand(1));
TmpInst.addOperand(MCOperand::createImm(0));
@@ -920,11 +920,11 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
break;
}
case PPC::CLRLSLWI:
- case PPC::CLRLSLWIo: {
+ case PPC::CLRLSLWI_rec: {
MCInst TmpInst;
int64_t B = Inst.getOperand(2).getImm();
int64_t N = Inst.getOperand(3).getImm();
- TmpInst.setOpcode(Opcode == PPC::CLRLSLWI? PPC::RLWINM : PPC::RLWINMo);
+ TmpInst.setOpcode(Opcode == PPC::CLRLSLWI ? PPC::RLWINM : PPC::RLWINM_rec);
TmpInst.addOperand(Inst.getOperand(0));
TmpInst.addOperand(Inst.getOperand(1));
TmpInst.addOperand(MCOperand::createImm(N));
@@ -934,11 +934,11 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
break;
}
case PPC::EXTLDI:
- case PPC::EXTLDIo: {
+ case PPC::EXTLDI_rec: {
MCInst TmpInst;
int64_t N = Inst.getOperand(2).getImm();
int64_t B = Inst.getOperand(3).getImm();
- TmpInst.setOpcode(Opcode == PPC::EXTLDI? PPC::RLDICR : PPC::RLDICRo);
+ TmpInst.setOpcode(Opcode == PPC::EXTLDI ? PPC::RLDICR : PPC::RLDICR_rec);
TmpInst.addOperand(Inst.getOperand(0));
TmpInst.addOperand(Inst.getOperand(1));
TmpInst.addOperand(MCOperand::createImm(B));
@@ -947,11 +947,11 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
break;
}
case PPC::EXTRDI:
- case PPC::EXTRDIo: {
+ case PPC::EXTRDI_rec: {
MCInst TmpInst;
int64_t N = Inst.getOperand(2).getImm();
int64_t B = Inst.getOperand(3).getImm();
- TmpInst.setOpcode(Opcode == PPC::EXTRDI? PPC::RLDICL : PPC::RLDICLo);
+ TmpInst.setOpcode(Opcode == PPC::EXTRDI ? PPC::RLDICL : PPC::RLDICL_rec);
TmpInst.addOperand(Inst.getOperand(0));
TmpInst.addOperand(Inst.getOperand(1));
TmpInst.addOperand(MCOperand::createImm(B + N));
@@ -960,11 +960,11 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
break;
}
case PPC::INSRDI:
- case PPC::INSRDIo: {
+ case PPC::INSRDI_rec: {
MCInst TmpInst;
int64_t N = Inst.getOperand(2).getImm();
int64_t B = Inst.getOperand(3).getImm();
- TmpInst.setOpcode(Opcode == PPC::INSRDI? PPC::RLDIMI : PPC::RLDIMIo);
+ TmpInst.setOpcode(Opcode == PPC::INSRDI ? PPC::RLDIMI : PPC::RLDIMI_rec);
TmpInst.addOperand(Inst.getOperand(0));
TmpInst.addOperand(Inst.getOperand(0));
TmpInst.addOperand(Inst.getOperand(1));
@@ -974,10 +974,10 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
break;
}
case PPC::ROTRDI:
- case PPC::ROTRDIo: {
+ case PPC::ROTRDI_rec: {
MCInst TmpInst;
int64_t N = Inst.getOperand(2).getImm();
- TmpInst.setOpcode(Opcode == PPC::ROTRDI? PPC::RLDICL : PPC::RLDICLo);
+ TmpInst.setOpcode(Opcode == PPC::ROTRDI ? PPC::RLDICL : PPC::RLDICL_rec);
TmpInst.addOperand(Inst.getOperand(0));
TmpInst.addOperand(Inst.getOperand(1));
TmpInst.addOperand(MCOperand::createImm(64 - N));
@@ -986,10 +986,10 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
break;
}
case PPC::SLDI:
- case PPC::SLDIo: {
+ case PPC::SLDI_rec: {
MCInst TmpInst;
int64_t N = Inst.getOperand(2).getImm();
- TmpInst.setOpcode(Opcode == PPC::SLDI? PPC::RLDICR : PPC::RLDICRo);
+ TmpInst.setOpcode(Opcode == PPC::SLDI ? PPC::RLDICR : PPC::RLDICR_rec);
TmpInst.addOperand(Inst.getOperand(0));
TmpInst.addOperand(Inst.getOperand(1));
TmpInst.addOperand(MCOperand::createImm(N));
@@ -1007,10 +1007,10 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
break;
}
case PPC::SRDI:
- case PPC::SRDIo: {
+ case PPC::SRDI_rec: {
MCInst TmpInst;
int64_t N = Inst.getOperand(2).getImm();
- TmpInst.setOpcode(Opcode == PPC::SRDI? PPC::RLDICL : PPC::RLDICLo);
+ TmpInst.setOpcode(Opcode == PPC::SRDI ? PPC::RLDICL : PPC::RLDICL_rec);
TmpInst.addOperand(Inst.getOperand(0));
TmpInst.addOperand(Inst.getOperand(1));
TmpInst.addOperand(MCOperand::createImm(64 - N));
@@ -1019,10 +1019,10 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
break;
}
case PPC::CLRRDI:
- case PPC::CLRRDIo: {
+ case PPC::CLRRDI_rec: {
MCInst TmpInst;
int64_t N = Inst.getOperand(2).getImm();
- TmpInst.setOpcode(Opcode == PPC::CLRRDI? PPC::RLDICR : PPC::RLDICRo);
+ TmpInst.setOpcode(Opcode == PPC::CLRRDI ? PPC::RLDICR : PPC::RLDICR_rec);
TmpInst.addOperand(Inst.getOperand(0));
TmpInst.addOperand(Inst.getOperand(1));
TmpInst.addOperand(MCOperand::createImm(0));
@@ -1031,11 +1031,11 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
break;
}
case PPC::CLRLSLDI:
- case PPC::CLRLSLDIo: {
+ case PPC::CLRLSLDI_rec: {
MCInst TmpInst;
int64_t B = Inst.getOperand(2).getImm();
int64_t N = Inst.getOperand(3).getImm();
- TmpInst.setOpcode(Opcode == PPC::CLRLSLDI? PPC::RLDIC : PPC::RLDICo);
+ TmpInst.setOpcode(Opcode == PPC::CLRLSLDI ? PPC::RLDIC : PPC::RLDIC_rec);
TmpInst.addOperand(Inst.getOperand(0));
TmpInst.addOperand(Inst.getOperand(1));
TmpInst.addOperand(MCOperand::createImm(N));
@@ -1044,14 +1044,14 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
break;
}
case PPC::RLWINMbm:
- case PPC::RLWINMobm: {
+ case PPC::RLWINMbm_rec: {
unsigned MB, ME;
int64_t BM = Inst.getOperand(3).getImm();
if (!isRunOfOnes(BM, MB, ME))
break;
MCInst TmpInst;
- TmpInst.setOpcode(Opcode == PPC::RLWINMbm ? PPC::RLWINM : PPC::RLWINMo);
+ TmpInst.setOpcode(Opcode == PPC::RLWINMbm ? PPC::RLWINM : PPC::RLWINM_rec);
TmpInst.addOperand(Inst.getOperand(0));
TmpInst.addOperand(Inst.getOperand(1));
TmpInst.addOperand(Inst.getOperand(2));
@@ -1061,14 +1061,14 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
break;
}
case PPC::RLWIMIbm:
- case PPC::RLWIMIobm: {
+ case PPC::RLWIMIbm_rec: {
unsigned MB, ME;
int64_t BM = Inst.getOperand(3).getImm();
if (!isRunOfOnes(BM, MB, ME))
break;
MCInst TmpInst;
- TmpInst.setOpcode(Opcode == PPC::RLWIMIbm ? PPC::RLWIMI : PPC::RLWIMIo);
+ TmpInst.setOpcode(Opcode == PPC::RLWIMIbm ? PPC::RLWIMI : PPC::RLWIMI_rec);
TmpInst.addOperand(Inst.getOperand(0));
TmpInst.addOperand(Inst.getOperand(0)); // The tied operand.
TmpInst.addOperand(Inst.getOperand(1));
@@ -1079,14 +1079,14 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
break;
}
case PPC::RLWNMbm:
- case PPC::RLWNMobm: {
+ case PPC::RLWNMbm_rec: {
unsigned MB, ME;
int64_t BM = Inst.getOperand(3).getImm();
if (!isRunOfOnes(BM, MB, ME))
break;
MCInst TmpInst;
- TmpInst.setOpcode(Opcode == PPC::RLWNMbm ? PPC::RLWNM : PPC::RLWNMo);
+ TmpInst.setOpcode(Opcode == PPC::RLWNMbm ? PPC::RLWNM : PPC::RLWNM_rec);
TmpInst.addOperand(Inst.getOperand(0));
TmpInst.addOperand(Inst.getOperand(1));
TmpInst.addOperand(Inst.getOperand(2));
@@ -1116,8 +1116,8 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
case PPC::CP_PASTEx :
case PPC::CP_PASTE_LAST: {
MCInst TmpInst;
- TmpInst.setOpcode(Opcode == PPC::CP_PASTEx ?
- PPC::CP_PASTE : PPC::CP_PASTEo);
+ TmpInst.setOpcode(Opcode == PPC::CP_PASTEx ? PPC::CP_PASTE
+ : PPC::CP_PASTE_rec);
TmpInst.addOperand(Inst.getOperand(0));
TmpInst.addOperand(Inst.getOperand(1));
TmpInst.addOperand(MCOperand::createImm(Opcode == PPC::CP_PASTEx ? 0 : 1));
@@ -1786,7 +1786,7 @@ bool PPCAsmParser::ParseDirectiveLocalEntry(SMLoc L) {
/// Force static initialization.
-extern "C" void LLVMInitializePowerPCAsmParser() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializePowerPCAsmParser() {
RegisterMCAsmParser<PPCAsmParser> A(getThePPC32Target());
RegisterMCAsmParser<PPCAsmParser> B(getThePPC64Target());
RegisterMCAsmParser<PPCAsmParser> C(getThePPC64LETarget());
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
index 3597fd15eeb1..e3c0f958c7ed 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
@@ -34,7 +34,6 @@ public:
DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
ArrayRef<uint8_t> Bytes, uint64_t Address,
- raw_ostream &VStream,
raw_ostream &CStream) const override;
};
} // end anonymous namespace
@@ -51,7 +50,7 @@ static MCDisassembler *createPPCLEDisassembler(const Target &T,
return new PPCDisassembler(STI, Ctx, /*IsLittleEndian=*/true);
}
-extern "C" void LLVMInitializePowerPCDisassembler() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializePowerPCDisassembler() {
// Register the disassembler for each target.
TargetRegistry::RegisterMCDisassembler(getThePPC32Target(),
createPPCDisassembler);
@@ -323,7 +322,7 @@ static DecodeStatus decodeCRBitMOperand(MCInst &Inst, uint64_t Imm,
DecodeStatus PPCDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
ArrayRef<uint8_t> Bytes,
- uint64_t Address, raw_ostream &OS,
+ uint64_t Address,
raw_ostream &CS) const {
// Get the four bytes of the instruction.
Size = 4;
@@ -350,4 +349,3 @@ DecodeStatus PPCDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
return decodeInstruction(DecoderTable32, MI, Inst, Address, this, STI);
}
-
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp
index 7fc231618fa9..9cc1c539e24a 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp
@@ -64,8 +64,9 @@ void PPCInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
OS << RegName;
}
-void PPCInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
- StringRef Annot, const MCSubtargetInfo &STI) {
+void PPCInstPrinter::printInst(const MCInst *MI, uint64_t Address,
+ StringRef Annot, const MCSubtargetInfo &STI,
+ raw_ostream &O) {
// Customize printing of the addis instruction on AIX. When an operand is a
// symbol reference, the instruction syntax is changed to look like a load
// operation, i.e:
@@ -193,11 +194,10 @@ void PPCInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
}
if (!printAliasInstr(MI, O))
- printInstruction(MI, O);
+ printInstruction(MI, Address, O);
printAnnotation(O, Annot);
}
-
void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O,
const char *Modifier) {
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.h b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.h
index 725ae2a7081b..a3ec41aa348d 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.h
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.h
@@ -32,11 +32,11 @@ public:
: MCInstPrinter(MAI, MII, MRI), TT(T) {}
void printRegName(raw_ostream &OS, unsigned RegNo) const override;
- void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
- const MCSubtargetInfo &STI) override;
+ void printInst(const MCInst *MI, uint64_t Address, StringRef Annot,
+ const MCSubtargetInfo &STI, raw_ostream &O) override;
// Autogenerated by tblgen.
- void printInstruction(const MCInst *MI, raw_ostream &O);
+ void printInstruction(const MCInst *MI, uint64_t Address, raw_ostream &O);
static const char *getRegisterName(unsigned RegNo);
bool printAliasInstr(const MCInst *MI, raw_ostream &OS);
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
index 1216cd727289..dc2c216a3efd 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
@@ -15,33 +15,6 @@
using namespace llvm;
-void PPCMCAsmInfoDarwin::anchor() { }
-
-PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit, const Triple& T) {
- if (is64Bit) {
- CodePointerSize = CalleeSaveStackSlotSize = 8;
- }
- IsLittleEndian = false;
-
- SeparatorString = "@";
- CommentString = ";";
- ExceptionsType = ExceptionHandling::DwarfCFI;
-
- if (!is64Bit)
- Data64bitsDirective = nullptr; // We can't emit a 64-bit unit in PPC32 mode.
-
- AssemblerDialect = 1; // New-Style mnemonics.
- SupportsDebugInformation= true; // Debug information.
-
- // The installed assembler for OSX < 10.6 lacks some directives.
- // FIXME: this should really be a check on the assembler characteristics
- // rather than OS version
- if (T.isMacOSX() && T.isMacOSXVersionLT(10, 6))
- HasWeakDefCanBeHiddenDirective = false;
-
- UseIntegratedAssembler = true;
-}
-
void PPCELFMCAsmInfo::anchor() { }
PPCELFMCAsmInfo::PPCELFMCAsmInfo(bool is64Bit, const Triple& T) {
@@ -87,4 +60,5 @@ PPCXCOFFMCAsmInfo::PPCXCOFFMCAsmInfo(bool Is64Bit, const Triple &T) {
assert(!IsLittleEndian && "Little-endian XCOFF not supported.");
CodePointerSize = CalleeSaveStackSlotSize = Is64Bit ? 8 : 4;
ZeroDirective = "\t.space\t";
+ SymbolsHaveSMC = true;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
index 42cb62ad26a4..8c52bbbd8a56 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
@@ -20,13 +20,6 @@
namespace llvm {
class Triple;
-class PPCMCAsmInfoDarwin : public MCAsmInfoDarwin {
- virtual void anchor();
-
-public:
- explicit PPCMCAsmInfoDarwin(bool is64Bit, const Triple &);
-};
-
class PPCELFMCAsmInfo : public MCAsmInfoELF {
void anchor() override;
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
index 90c3c8d20edb..cbfb8e2ff282 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -30,6 +30,7 @@
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCSymbolELF.h"
+#include "llvm/MC/MCSymbolXCOFF.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/ErrorHandling.h"
@@ -76,14 +77,13 @@ static MCSubtargetInfo *createPPCMCSubtargetInfo(const Triple &TT,
}
static MCAsmInfo *createPPCMCAsmInfo(const MCRegisterInfo &MRI,
- const Triple &TheTriple) {
+ const Triple &TheTriple,
+ const MCTargetOptions &Options) {
bool isPPC64 = (TheTriple.getArch() == Triple::ppc64 ||
TheTriple.getArch() == Triple::ppc64le);
MCAsmInfo *MAI;
- if (TheTriple.isOSDarwin())
- MAI = new PPCMCAsmInfoDarwin(isPPC64, TheTriple);
- else if (TheTriple.isOSBinFormatXCOFF())
+ if (TheTriple.isOSBinFormatXCOFF())
MAI = new PPCXCOFFMCAsmInfo(isPPC64, TheTriple);
else
MAI = new PPCELFMCAsmInfo(isPPC64, TheTriple);
@@ -107,8 +107,11 @@ public:
: PPCTargetStreamer(S), OS(OS) {}
void emitTCEntry(const MCSymbol &S) override {
+ const MCAsmInfo *MAI = Streamer.getContext().getAsmInfo();
OS << "\t.tc ";
- OS << S.getName();
+ OS << (MAI->getSymbolsHaveSMC()
+ ? cast<MCSymbolXCOFF>(S).getUnqualifiedName()
+ : S.getName());
OS << "[TC],";
OS << S.getName();
OS << '\n';
@@ -196,7 +199,8 @@ public:
void finish() override {
for (auto *Sym : UpdateOther)
- copyLocalEntry(Sym, Sym->getVariableValue());
+ if (Sym->isVariable())
+ copyLocalEntry(Sym, Sym->getVariableValue());
}
private:
@@ -242,7 +246,10 @@ public:
PPCTargetXCOFFStreamer(MCStreamer &S) : PPCTargetStreamer(S) {}
void emitTCEntry(const MCSymbol &S) override {
- report_fatal_error("TOC entries not supported yet.");
+ const MCAsmInfo *MAI = Streamer.getContext().getAsmInfo();
+ const unsigned PointerSize = MAI->getCodePointerSize();
+ Streamer.EmitValueToAlignment(PointerSize);
+ Streamer.EmitSymbolValue(&S, PointerSize);
}
void emitMachine(StringRef CPU) override {
@@ -285,7 +292,7 @@ static MCInstPrinter *createPPCMCInstPrinter(const Triple &T,
return new PPCInstPrinter(MAI, MII, MRI, T);
}
-extern "C" void LLVMInitializePowerPCTargetMC() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializePowerPCTargetMC() {
for (Target *T :
{&getThePPC32Target(), &getThePPC64Target(), &getThePPC64LETarget()}) {
// Register the MC asm info.
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
index 74b67bd2e928..49443679bb31 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
@@ -82,6 +82,30 @@ static inline bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME) {
return false;
}
+static inline bool isRunOfOnes64(uint64_t Val, unsigned &MB, unsigned &ME) {
+ if (!Val)
+ return false;
+
+ if (isShiftedMask_64(Val)) {
+ // look for the first non-zero bit
+ MB = countLeadingZeros(Val);
+ // look for the first zero bit after the run of ones
+ ME = countLeadingZeros((Val - 1) ^ Val);
+ return true;
+ } else {
+ Val = ~Val; // invert mask
+ if (isShiftedMask_64(Val)) {
+ // effectively look for the first zero bit
+ ME = countLeadingZeros(Val) - 1;
+ // effectively look for the first one bit after the run of zeros
+ MB = countLeadingZeros((Val - 1) ^ Val) + 1;
+ return true;
+ }
+ }
+ // no run present
+ return false;
+}
+
} // end namespace llvm
// Generated files will use "namespace PPC". To avoid symbol clash,
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/P9InstrResources.td b/contrib/llvm-project/llvm/lib/Target/PowerPC/P9InstrResources.td
index 7d3780f0eb7e..9b3d13989ee2 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/P9InstrResources.td
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/P9InstrResources.td
@@ -107,7 +107,7 @@ def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C],
(instregex "XSMAX(C|J)?DP$"),
(instregex "XSMIN(C|J)?DP$"),
(instregex "XSCMP(EQ|EXP|GE|GT|O|U)DP$"),
- (instregex "CNT(L|T)Z(D|W)(8)?(o)?$"),
+ (instregex "CNT(L|T)Z(D|W)(8)?(_rec)?$"),
(instregex "POPCNT(D|W)$"),
(instregex "CMPB(8)?$"),
(instregex "SETB(8)?$"),
@@ -130,23 +130,23 @@ def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C],
(instregex "CMP(WI|LWI|W|LW)(8)?$"),
(instregex "CMP(L)?D(I)?$"),
(instregex "SUBF(I)?C(8)?(O)?$"),
- (instregex "ANDI(S)?o(8)?$"),
+ (instregex "ANDI(S)?(8)?(_rec)?$"),
(instregex "ADDC(8)?(O)?$"),
- (instregex "ADDIC(8)?(o)?$"),
- (instregex "ADD(8|4)(O)?(o)?$"),
- (instregex "ADD(E|ME|ZE)(8)?(O)?(o)?$"),
- (instregex "SUBF(E|ME|ZE)?(8)?(O)?(o)?$"),
- (instregex "NEG(8)?(O)?(o)?$"),
+ (instregex "ADDIC(8)?(_rec)?$"),
+ (instregex "ADD(8|4)(O)?(_rec)?$"),
+ (instregex "ADD(E|ME|ZE)(8)?(O)?(_rec)?$"),
+ (instregex "SUBF(E|ME|ZE)?(8)?(O)?(_rec)?$"),
+ (instregex "NEG(8)?(O)?(_rec)?$"),
(instregex "POPCNTB$"),
(instregex "ADD(I|IS)?(8)?$"),
(instregex "LI(S)?(8)?$"),
- (instregex "(X)?OR(I|IS)?(8)?(o)?$"),
- (instregex "NAND(8)?(o)?$"),
- (instregex "AND(C)?(8)?(o)?$"),
- (instregex "NOR(8)?(o)?$"),
- (instregex "OR(C)?(8)?(o)?$"),
- (instregex "EQV(8)?(o)?$"),
- (instregex "EXTS(B|H|W)(8)?(_32)?(_64)?(o)?$"),
+ (instregex "(X)?OR(I|IS)?(8)?(_rec)?$"),
+ (instregex "NAND(8)?(_rec)?$"),
+ (instregex "AND(C)?(8)?(_rec)?$"),
+ (instregex "NOR(8)?(_rec)?$"),
+ (instregex "OR(C)?(8)?(_rec)?$"),
+ (instregex "EQV(8)?(_rec)?$"),
+ (instregex "EXTS(B|H|W)(8)?(_32)?(_64)?(_rec)?$"),
(instregex "ADD(4|8)(TLS)?(_)?$"),
(instregex "NEG(8)?(O)?$"),
(instregex "ADDI(S)?toc(HA|L)(8)?$"),
@@ -211,8 +211,8 @@ def : InstRW<[P9_ALUE_3C, P9_ALUO_3C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
(instregex "VABSDU(B|H|W)$"),
(instregex "VADDU(B|H|W)S$"),
(instregex "VAVG(S|U)(B|H|W)$"),
- (instregex "VCMP(EQ|GE|GT)FP(o)?$"),
- (instregex "VCMPBFP(o)?$"),
+ (instregex "VCMP(EQ|GE|GT)FP(_rec)?$"),
+ (instregex "VCMPBFP(_rec)?$"),
(instregex "VC(L|T)Z(B|H|W|D)$"),
(instregex "VADDS(B|H|W)S$"),
(instregex "V(MIN|MAX)FP$"),
@@ -233,43 +233,43 @@ def : InstRW<[P9_ALUE_3C, P9_ALUO_3C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
VSUBUWS,
VSUBCUW,
VCMPGTSB,
- VCMPGTSBo,
+ VCMPGTSB_rec,
VCMPGTSD,
- VCMPGTSDo,
+ VCMPGTSD_rec,
VCMPGTSH,
- VCMPGTSHo,
+ VCMPGTSH_rec,
VCMPGTSW,
- VCMPGTSWo,
+ VCMPGTSW_rec,
VCMPGTUB,
- VCMPGTUBo,
+ VCMPGTUB_rec,
VCMPGTUD,
- VCMPGTUDo,
+ VCMPGTUD_rec,
VCMPGTUH,
- VCMPGTUHo,
+ VCMPGTUH_rec,
VCMPGTUW,
- VCMPGTUWo,
- VCMPNEBo,
- VCMPNEHo,
- VCMPNEWo,
- VCMPNEZBo,
- VCMPNEZHo,
- VCMPNEZWo,
- VCMPEQUBo,
- VCMPEQUDo,
- VCMPEQUHo,
- VCMPEQUWo,
+ VCMPGTUW_rec,
+ VCMPNEB_rec,
+ VCMPNEH_rec,
+ VCMPNEW_rec,
+ VCMPNEZB_rec,
+ VCMPNEZH_rec,
+ VCMPNEZW_rec,
+ VCMPEQUB_rec,
+ VCMPEQUD_rec,
+ VCMPEQUH_rec,
+ VCMPEQUW_rec,
XVCMPEQDP,
- XVCMPEQDPo,
+ XVCMPEQDP_rec,
XVCMPEQSP,
- XVCMPEQSPo,
+ XVCMPEQSP_rec,
XVCMPGEDP,
- XVCMPGEDPo,
+ XVCMPGEDP_rec,
XVCMPGESP,
- XVCMPGESPo,
+ XVCMPGESP_rec,
XVCMPGTDP,
- XVCMPGTDPo,
+ XVCMPGTDP_rec,
XVCMPGTSP,
- XVCMPGTSPo,
+ XVCMPGTSP_rec,
XVMAXDP,
XVMAXSP,
XVMINDP,
@@ -451,14 +451,14 @@ def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_3SLOTS_1C],
def : InstRW<[P9_DP_7C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
DISP_3SLOTS_1C, DISP_1C],
(instrs
- (instregex "FSEL(D|S)o$")
+ (instregex "FSEL(D|S)_rec$")
)>;
// 5 Cycle Restricted DP operation and one 2 cycle ALU operation.
def : InstRW<[P9_DPOpAndALUOp_7C, IP_EXEC_1C, IP_EXEC_1C,
DISP_3SLOTS_1C, DISP_1C],
(instrs
- (instregex "MUL(H|L)(D|W)(U)?(O)?o$")
+ (instregex "MUL(H|L)(D|W)(U)?(O)?_rec$")
)>;
// 7 cycle Restricted DP operation and one 3 cycle ALU operation.
@@ -467,18 +467,18 @@ def : InstRW<[P9_DPOpAndALUOp_7C, IP_EXEC_1C, IP_EXEC_1C,
def : InstRW<[P9_DPOpAndALU2Op_10C, IP_EXEC_1C, IP_EXEC_1C,
DISP_3SLOTS_1C, DISP_1C],
(instrs
- (instregex "FRI(N|P|Z|M)(D|S)o$"),
- (instregex "FRE(S)?o$"),
- (instregex "FADD(S)?o$"),
- (instregex "FSUB(S)?o$"),
- (instregex "F(N)?MSUB(S)?o$"),
- (instregex "F(N)?MADD(S)?o$"),
- (instregex "FCFID(U)?(S)?o$"),
- (instregex "FCTID(U)?(Z)?o$"),
- (instregex "FCTIW(U)?(Z)?o$"),
- (instregex "FMUL(S)?o$"),
- (instregex "FRSQRTE(S)?o$"),
- FRSPo
+ (instregex "FRI(N|P|Z|M)(D|S)_rec$"),
+ (instregex "FRE(S)?_rec$"),
+ (instregex "FADD(S)?_rec$"),
+ (instregex "FSUB(S)?_rec$"),
+ (instregex "F(N)?MSUB(S)?_rec$"),
+ (instregex "F(N)?MADD(S)?_rec$"),
+ (instregex "FCFID(U)?(S)?_rec$"),
+ (instregex "FCTID(U)?(Z)?_rec$"),
+ (instregex "FCTIW(U)?(Z)?_rec$"),
+ (instregex "FMUL(S)?_rec$"),
+ (instregex "FRSQRTE(S)?_rec$"),
+ FRSP_rec
)>;
// 7 cycle DP operation. One DP unit, one EXEC pipeline and 1 dispatch units.
@@ -613,16 +613,16 @@ def : InstRW<[P9_PM_3C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C],
XSCMPUQP,
XSTSTDCQP,
XSXSIGQP,
- BCDCFNo,
- BCDCFZo,
- BCDCPSGNo,
- BCDCTNo,
- BCDCTZo,
- BCDSETSGNo,
- BCDSo,
- BCDTRUNCo,
- BCDUSo,
- BCDUTRUNCo
+ BCDCFN_rec,
+ BCDCFZ_rec,
+ BCDCPSGN_rec,
+ BCDCTN_rec,
+ BCDCTZ_rec,
+ BCDSETSGN_rec,
+ BCDS_rec,
+ BCDTRUNC_rec,
+ BCDUS_rec,
+ BCDUTRUNC_rec
)>;
// 12 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
@@ -630,7 +630,7 @@ def : InstRW<[P9_PM_3C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C],
// dispatch.
def : InstRW<[P9_DFU_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
(instrs
- BCDSRo,
+ BCDSR_rec,
XSADDQP,
XSADDQPO,
XSCVDPQP,
@@ -654,7 +654,7 @@ def : InstRW<[P9_DFU_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
// dispatch.
def : InstRW<[P9_DFU_23C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
(instrs
- BCDCTSQo
+ BCDCTSQ_rec
)>;
// 24 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
@@ -679,7 +679,7 @@ def : InstRW<[P9_DFU_24C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
// dispatch.
def : InstRW<[P9_DFU_37C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
(instrs
- BCDCFSQo
+ BCDCFSQ_rec
)>;
// 58 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
@@ -819,7 +819,7 @@ def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C,
DISP_1C, DISP_1C],
(instrs
(instregex "LHA(X)?(8)?$"),
- (instregex "CP_PASTE(8)?o$"),
+ (instregex "CP_PASTE(8)?_rec$"),
(instregex "LWA(X)?(_32)?$"),
TCHECK
)>;
@@ -987,7 +987,7 @@ def : InstRW<[P9_DIV_40C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C],
def : InstRW<[P9_IntDivAndALUOp_18C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
DISP_EVEN_1C, DISP_1C],
(instrs
- (instregex "DIVW(U)?(O)?o$")
+ (instregex "DIVW(U)?(O)?_rec$")
)>;
// Cracked DIV and ALU operation. Requires one full slice for the ALU operation
@@ -996,14 +996,14 @@ def : InstRW<[P9_IntDivAndALUOp_18C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
def : InstRW<[P9_IntDivAndALUOp_26C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
DISP_EVEN_1C, DISP_1C],
(instrs
- DIVDo,
- DIVDOo,
- DIVDUo,
- DIVDUOo,
- DIVWEo,
- DIVWEOo,
- DIVWEUo,
- DIVWEUOo
+ DIVD_rec,
+ DIVDO_rec,
+ DIVDU_rec,
+ DIVDUO_rec,
+ DIVWE_rec,
+ DIVWEO_rec,
+ DIVWEU_rec,
+ DIVWEUO_rec
)>;
// Cracked DIV and ALU operation. Requires one full slice for the ALU operation
@@ -1012,10 +1012,10 @@ def : InstRW<[P9_IntDivAndALUOp_26C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
def : InstRW<[P9_IntDivAndALUOp_42C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
DISP_EVEN_1C, DISP_1C],
(instrs
- DIVDEo,
- DIVDEOo,
- DIVDEUo,
- DIVDEUOo
+ DIVDE_rec,
+ DIVDEO_rec,
+ DIVDEU_rec,
+ DIVDEUO_rec
)>;
// CR access instructions in _BrMCR, IIC_BrMCRX.
@@ -1040,8 +1040,8 @@ def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C,
def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C,
DISP_1C, DISP_1C],
(instrs
- (instregex "ADDC(8)?(O)?o$"),
- (instregex "SUBFC(8)?(O)?o$")
+ (instregex "ADDC(8)?(O)?_rec$"),
+ (instregex "SUBFC(8)?(O)?_rec$")
)>;
// Cracked ALU operations.
@@ -1052,10 +1052,10 @@ def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C,
def : InstRW<[P9_ALU_2C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
DISP_3SLOTS_1C, DISP_1C],
(instrs
- (instregex "F(N)?ABS(D|S)o$"),
- (instregex "FCPSGN(D|S)o$"),
- (instregex "FNEG(D|S)o$"),
- FMRo
+ (instregex "F(N)?ABS(D|S)_rec$"),
+ (instregex "FCPSGN(D|S)_rec$"),
+ (instregex "FNEG(D|S)_rec$"),
+ FMR_rec
)>;
// Cracked ALU operations.
@@ -1077,8 +1077,8 @@ def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
DISP_3SLOTS_1C, DISP_3SLOTS_1C],
(instrs
- (instregex "MTFSF(b|o)?$"),
- (instregex "MTFSFI(o)?$")
+ (instregex "MTFSF(b|_rec)?$"),
+ (instregex "MTFSFI(_rec)?$")
)>;
// Cracked instruction made of two ALU ops.
@@ -1087,13 +1087,13 @@ def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C,
DISP_3SLOTS_1C, DISP_1C],
(instrs
- (instregex "RLD(I)?C(R|L)o$"),
- (instregex "RLW(IMI|INM|NM)(8)?o$"),
- (instregex "SLW(8)?o$"),
- (instregex "SRAW(I)?o$"),
- (instregex "SRW(8)?o$"),
- RLDICL_32o,
- RLDIMIo
+ (instregex "RLD(I)?C(R|L)_rec$"),
+ (instregex "RLW(IMI|INM|NM)(8)?_rec$"),
+ (instregex "SLW(8)?_rec$"),
+ (instregex "SRAW(I)?_rec$"),
+ (instregex "SRW(8)?_rec$"),
+ RLDICL_32_rec,
+ RLDIMI_rec
)>;
// Cracked instruction made of two ALU ops.
@@ -1102,7 +1102,7 @@ def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C,
def : InstRW<[P9_ALU2OpAndALU2Op_6C, IP_EXEC_1C, IP_EXEC_1C,
DISP_3SLOTS_1C, DISP_3SLOTS_1C],
(instrs
- (instregex "MFFS(L|CE|o)?$")
+ (instregex "MFFS(L|CE|_rec)?$")
)>;
// Cracked ALU instruction composed of three consecutive 2 cycle loads for a
@@ -1118,12 +1118,12 @@ def : InstRW<[P9_ALUOpAndALUOpAndALUOp_6C, IP_EXEC_1C, IP_EXEC_1C, IP_EXEC_1C,
// The two ops cannot be done in parallel.
def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C, DISP_1C, DISP_1C],
(instrs
- (instregex "EXTSWSLI_32_64o$"),
- (instregex "SRAD(I)?o$"),
- EXTSWSLIo,
- SLDo,
- SRDo,
- RLDICo
+ (instregex "EXTSWSLI_32_64_rec$"),
+ (instregex "SRAD(I)?_rec$"),
+ EXTSWSLI_rec,
+ SLD_rec,
+ SRD_rec,
+ RLDIC_rec
)>;
// 33 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
@@ -1136,7 +1136,7 @@ def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_3SLOTS_1C],
def : InstRW<[P9_DPOpAndALU2Op_36C_8, IP_EXEC_1C, IP_EXEC_1C,
DISP_3SLOTS_1C, DISP_1C],
(instrs
- FDIVo
+ FDIV_rec
)>;
// 36 Cycle DP Instruction.
@@ -1170,7 +1170,7 @@ def : InstRW<[P9_DPE_27C_10, P9_DPO_27C_10, IP_EXECE_1C, IP_EXECO_1C,
def : InstRW<[P9_DPOpAndALU2Op_39C_10, IP_EXEC_1C, IP_EXEC_1C,
DISP_3SLOTS_1C, DISP_1C],
(instrs
- FSQRTo
+ FSQRT_rec
)>;
// 26 Cycle DP Instruction.
@@ -1189,7 +1189,7 @@ def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_3SLOTS_1C],
def : InstRW<[P9_DPOpAndALU2Op_29C_5, IP_EXEC_1C, IP_EXEC_1C,
DISP_3SLOTS_1C, DISP_1C],
(instrs
- FSQRTSo
+ FSQRTS_rec
)>;
// 33 Cycle DP Instruction. Takes one slice and 1 dispatch.
@@ -1208,7 +1208,7 @@ def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_3SLOTS_1C],
def : InstRW<[P9_DPOpAndALU2Op_25C_5, IP_EXEC_1C, IP_EXEC_1C,
DISP_3SLOTS_1C, DISP_1C],
(instrs
- FDIVSo
+ FDIVS_rec
)>;
// 22 Cycle DP Instruction. Takes one slice and 1 dispatch.
@@ -1318,6 +1318,7 @@ def : InstRW<[P9_BR_2C, DISP_BR_1C],
BCLalways,
BCLn,
BCTRL8_LDinto_toc,
+ BCTRL_LWZinto_toc,
BCn,
CTRL_DEP
)>;
@@ -1414,7 +1415,7 @@ def : InstRW<[],
MBAR,
MSYNC,
SLBSYNC,
- SLBFEEo,
+ SLBFEE_rec,
NAP,
STOP,
TRAP,
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPC.h b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPC.h
index 0534773c4c9e..a83509f0e687 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPC.h
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPC.h
@@ -28,12 +28,13 @@ namespace llvm {
class AsmPrinter;
class MCInst;
class MCOperand;
-
+ class ModulePass;
+
FunctionPass *createPPCCTRLoops();
#ifndef NDEBUG
FunctionPass *createPPCCTRLoopsVerify();
#endif
- FunctionPass *createPPCLoopPreIncPrepPass(PPCTargetMachine &TM);
+ FunctionPass *createPPCLoopInstrFormPrepPass(PPCTargetMachine &TM);
FunctionPass *createPPCTOCRegDepsPass();
FunctionPass *createPPCEarlyReturnPass();
FunctionPass *createPPCVSXCopyPass();
@@ -59,7 +60,7 @@ namespace llvm {
#ifndef NDEBUG
void initializePPCCTRLoopsVerifyPass(PassRegistry&);
#endif
- void initializePPCLoopPreIncPrepPass(PassRegistry&);
+ void initializePPCLoopInstrFormPrepPass(PassRegistry&);
void initializePPCTOCRegDepsPass(PassRegistry&);
void initializePPCEarlyReturnPass(PassRegistry&);
void initializePPCVSXCopyPass(PassRegistry&);
@@ -77,6 +78,10 @@ namespace llvm {
extern char &PPCVSXFMAMutateID;
+ ModulePass *createPPCLowerMASSVEntriesPass();
+ void initializePPCLowerMASSVEntriesPass(PassRegistry &);
+ extern char &PPCLowerMASSVEntriesID;
+
namespace PPCII {
/// Target Operand Flag enum.
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPC.td b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPC.td
index 1724969a1ccc..bef0a81ee3ad 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPC.td
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPC.td
@@ -22,35 +22,37 @@ include "llvm/Target/Target.td"
// CPU Directives //
//===----------------------------------------------------------------------===//
-def Directive440 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_440", "">;
-def Directive601 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_601", "">;
-def Directive602 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_602", "">;
-def Directive603 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_603", "">;
-def Directive604 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_603", "">;
-def Directive620 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_603", "">;
-def Directive7400: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_7400", "">;
-def Directive750 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_750", "">;
-def Directive970 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_970", "">;
-def Directive32 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_32", "">;
-def Directive64 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_64", "">;
-def DirectiveA2 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_A2", "">;
-def DirectiveE500 : SubtargetFeature<"", "DarwinDirective",
+def Directive440 : SubtargetFeature<"", "CPUDirective", "PPC::DIR_440", "">;
+def Directive601 : SubtargetFeature<"", "CPUDirective", "PPC::DIR_601", "">;
+def Directive602 : SubtargetFeature<"", "CPUDirective", "PPC::DIR_602", "">;
+def Directive603 : SubtargetFeature<"", "CPUDirective", "PPC::DIR_603", "">;
+def Directive604 : SubtargetFeature<"", "CPUDirective", "PPC::DIR_603", "">;
+def Directive620 : SubtargetFeature<"", "CPUDirective", "PPC::DIR_603", "">;
+def Directive7400: SubtargetFeature<"", "CPUDirective", "PPC::DIR_7400", "">;
+def Directive750 : SubtargetFeature<"", "CPUDirective", "PPC::DIR_750", "">;
+def Directive970 : SubtargetFeature<"", "CPUDirective", "PPC::DIR_970", "">;
+def Directive32 : SubtargetFeature<"", "CPUDirective", "PPC::DIR_32", "">;
+def Directive64 : SubtargetFeature<"", "CPUDirective", "PPC::DIR_64", "">;
+def DirectiveA2 : SubtargetFeature<"", "CPUDirective", "PPC::DIR_A2", "">;
+def DirectiveE500 : SubtargetFeature<"", "CPUDirective",
"PPC::DIR_E500", "">;
-def DirectiveE500mc : SubtargetFeature<"", "DarwinDirective",
+def DirectiveE500mc : SubtargetFeature<"", "CPUDirective",
"PPC::DIR_E500mc", "">;
-def DirectiveE5500 : SubtargetFeature<"", "DarwinDirective",
+def DirectiveE5500 : SubtargetFeature<"", "CPUDirective",
"PPC::DIR_E5500", "">;
-def DirectivePwr3: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR3", "">;
-def DirectivePwr4: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR4", "">;
-def DirectivePwr5: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR5", "">;
+def DirectivePwr3: SubtargetFeature<"", "CPUDirective", "PPC::DIR_PWR3", "">;
+def DirectivePwr4: SubtargetFeature<"", "CPUDirective", "PPC::DIR_PWR4", "">;
+def DirectivePwr5: SubtargetFeature<"", "CPUDirective", "PPC::DIR_PWR5", "">;
def DirectivePwr5x
- : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR5X", "">;
-def DirectivePwr6: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR6", "">;
+ : SubtargetFeature<"", "CPUDirective", "PPC::DIR_PWR5X", "">;
+def DirectivePwr6: SubtargetFeature<"", "CPUDirective", "PPC::DIR_PWR6", "">;
def DirectivePwr6x
- : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR6X", "">;
-def DirectivePwr7: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR7", "">;
-def DirectivePwr8: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR8", "">;
-def DirectivePwr9: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR9", "">;
+ : SubtargetFeature<"", "CPUDirective", "PPC::DIR_PWR6X", "">;
+def DirectivePwr7: SubtargetFeature<"", "CPUDirective", "PPC::DIR_PWR7", "">;
+def DirectivePwr8: SubtargetFeature<"", "CPUDirective", "PPC::DIR_PWR8", "">;
+def DirectivePwr9: SubtargetFeature<"", "CPUDirective", "PPC::DIR_PWR9", "">;
+def DirectivePwrFuture
+ : SubtargetFeature<"", "CPUDirective", "PPC::DIR_PWR_FUTURE", "">;
def Feature64Bit : SubtargetFeature<"64bit","Has64BitSupport", "true",
"Enable 64-bit instructions">;
@@ -164,6 +166,9 @@ def FeatureHTM : SubtargetFeature<"htm", "HasHTM", "true",
"Enable Hardware Transactional Memory instructions">;
def FeatureMFTB : SubtargetFeature<"", "FeatureMFTB", "true",
"Implement mftb using the mfspr instruction">;
+def FeatureUnalignedFloats :
+ SubtargetFeature<"allow-unaligned-fp-access", "AllowsUnalignedFPAccess",
+ "true", "CPU does not trap on unaligned FP access">;
def FeaturePPCPreRASched:
SubtargetFeature<"ppc-prera-sched", "UsePPCPreRASchedStrategy", "true",
"Use PowerPC pre-RA scheduling strategy">;
@@ -209,36 +214,95 @@ def FeatureVectorsUseTwoUnits : SubtargetFeature<"vectors-use-two-units",
// came before them, the idea is to make implementations of new processors
// less error prone and easier to read.
// Namely:
-// list<SubtargetFeature> Power8FeatureList = ...
-// list<SubtargetFeature> FutureProcessorSpecificFeatureList =
-// [ features that Power8 does not support ]
-// list<SubtargetFeature> FutureProcessorFeatureList =
-// !listconcat(Power8FeatureList, FutureProcessorSpecificFeatureList)
+// list<SubtargetFeature> P8InheritableFeatures = ...
+// list<SubtargetFeature> FutureProcessorAddtionalFeatures =
+// [ features that Power8 does not support but inheritable ]
+// list<SubtargetFeature> FutureProcessorSpecificFeatures =
+// [ features that Power8 does not support and not inheritable ]
+// list<SubtargetFeature> FutureProcessorInheritableFeatures =
+// !listconcat(P8InheritableFeatures, FutureProcessorAddtionalFeatures)
+// list<SubtargetFeature> FutureProcessorFeatures =
+// !listconcat(FutureProcessorInheritableFeatures,
+// FutureProcessorSpecificFeatures)
// Makes it explicit and obvious what is new in FutureProcesor vs. Power8 as
// well as providing a single point of definition if the feature set will be
// used elsewhere.
def ProcessorFeatures {
- list<SubtargetFeature> Power7FeatureList =
- [DirectivePwr7, FeatureAltivec, FeatureVSX,
- FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt, FeatureFRE,
- FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES,
- FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX,
- FeatureFPRND, FeatureFPCVT, FeatureISEL,
- FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX,
- Feature64Bit /*, Feature64BitRegs */,
- FeatureBPERMD, FeatureExtDiv,
- FeatureMFTB, DeprecatedDST, FeatureTwoConstNR];
- list<SubtargetFeature> Power8SpecificFeatures =
- [DirectivePwr8, FeatureP8Altivec, FeatureP8Vector, FeatureP8Crypto,
- FeatureHTM, FeatureDirectMove, FeatureICBT, FeaturePartwordAtomic];
- list<SubtargetFeature> Power8FeatureList =
- !listconcat(Power7FeatureList, Power8SpecificFeatures);
- list<SubtargetFeature> Power9SpecificFeatures =
- [DirectivePwr9, FeatureP9Altivec, FeatureP9Vector, FeatureISA3_0,
- FeatureVectorsUseTwoUnits, FeaturePPCPreRASched, FeaturePPCPostRASched];
- list<SubtargetFeature> Power9FeatureList =
- !listconcat(Power8FeatureList, Power9SpecificFeatures);
+ // Power7
+ list<SubtargetFeature> P7InheritableFeatures = [DirectivePwr7,
+ FeatureAltivec,
+ FeatureVSX,
+ FeatureMFOCRF,
+ FeatureFCPSGN,
+ FeatureFSqrt,
+ FeatureFRE,
+ FeatureFRES,
+ FeatureFRSQRTE,
+ FeatureFRSQRTES,
+ FeatureRecipPrec,
+ FeatureSTFIWX,
+ FeatureLFIWAX,
+ FeatureFPRND,
+ FeatureFPCVT,
+ FeatureISEL,
+ FeaturePOPCNTD,
+ FeatureCMPB,
+ FeatureLDBRX,
+ Feature64Bit,
+ /* Feature64BitRegs, */
+ FeatureBPERMD,
+ FeatureExtDiv,
+ FeatureMFTB,
+ DeprecatedDST,
+ FeatureTwoConstNR,
+ FeatureUnalignedFloats];
+ list<SubtargetFeature> P7SpecificFeatures = [];
+ list<SubtargetFeature> P7Features =
+ !listconcat(P7InheritableFeatures, P7SpecificFeatures);
+
+ // Power8
+ list<SubtargetFeature> P8AdditionalFeatures = [DirectivePwr8,
+ FeatureP8Altivec,
+ FeatureP8Vector,
+ FeatureP8Crypto,
+ FeatureHTM,
+ FeatureDirectMove,
+ FeatureICBT,
+ FeaturePartwordAtomic];
+ list<SubtargetFeature> P8SpecificFeatures = [];
+ list<SubtargetFeature> P8InheritableFeatures =
+ !listconcat(P7InheritableFeatures, P8AdditionalFeatures);
+ list<SubtargetFeature> P8Features =
+ !listconcat(P8InheritableFeatures, P8SpecificFeatures);
+
+ // Power9
+ list<SubtargetFeature> P9AdditionalFeatures = [DirectivePwr9,
+ FeatureP9Altivec,
+ FeatureP9Vector,
+ FeatureISA3_0];
+ // Some features are unique to Power9 and there is no reason to assume
+ // they will be part of any future CPUs. One example is the narrower
+ // dispatch for vector operations than scalar ones. For the time being,
+ // this list also includes scheduling-related features since we do not have
+ // enough info to create custom scheduling strategies for future CPUs.
+ list<SubtargetFeature> P9SpecificFeatures = [FeatureVectorsUseTwoUnits,
+ FeaturePPCPreRASched,
+ FeaturePPCPostRASched];
+ list<SubtargetFeature> P9InheritableFeatures =
+ !listconcat(P8InheritableFeatures, P9AdditionalFeatures);
+ list<SubtargetFeature> P9Features =
+ !listconcat(P9InheritableFeatures, P9SpecificFeatures);
+
+ // Future
+ // For future CPU we assume that all of the existing features from Power 9
+ // still exist with the exception of those we know are Power 9 specific.
+ list<SubtargetFeature> FutureAdditionalFeatures = [];
+ list<SubtargetFeature> FutureSpecificFeatures = [];
+ list<SubtargetFeature> FutureInheritableFeatures =
+ !listconcat(P9InheritableFeatures, FutureAdditionalFeatures);
+ list<SubtargetFeature> FutureFeatures =
+ !listconcat(FutureInheritableFeatures, FutureSpecificFeatures);
}
// Note: Future features to add when support is extended to more
@@ -438,9 +502,12 @@ def : ProcessorModel<"pwr6x", G5Model,
FeatureSTFIWX, FeatureLFIWAX, FeatureCMPB,
FeatureFPRND, Feature64Bit,
FeatureMFTB, DeprecatedDST]>;
-def : ProcessorModel<"pwr7", P7Model, ProcessorFeatures.Power7FeatureList>;
-def : ProcessorModel<"pwr8", P8Model, ProcessorFeatures.Power8FeatureList>;
-def : ProcessorModel<"pwr9", P9Model, ProcessorFeatures.Power9FeatureList>;
+def : ProcessorModel<"pwr7", P7Model, ProcessorFeatures.P7Features>;
+def : ProcessorModel<"pwr8", P8Model, ProcessorFeatures.P8Features>;
+def : ProcessorModel<"pwr9", P9Model, ProcessorFeatures.P9Features>;
+// No scheduler model for future CPU.
+def : ProcessorModel<"future", NoSchedModel,
+ ProcessorFeatures.FutureFeatures>;
def : Processor<"ppc", G3Itineraries, [Directive32, FeatureHardFloat,
FeatureMFTB]>;
def : Processor<"ppc32", G3Itineraries, [Directive32, FeatureHardFloat,
@@ -451,7 +518,7 @@ def : ProcessorModel<"ppc64", G5Model,
FeatureFRSQRTE, FeatureSTFIWX,
Feature64Bit /*, Feature64BitRegs */,
FeatureMFTB]>;
-def : ProcessorModel<"ppc64le", P8Model, ProcessorFeatures.Power8FeatureList>;
+def : ProcessorModel<"ppc64le", P8Model, ProcessorFeatures.P8Features>;
//===----------------------------------------------------------------------===//
// Calling Conventions
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 66236b72a1a3..4311df5dbeb8 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -43,6 +43,7 @@
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Module.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
@@ -79,9 +80,11 @@ namespace {
class PPCAsmPrinter : public AsmPrinter {
protected:
MapVector<const MCSymbol *, MCSymbol *> TOC;
- const PPCSubtarget *Subtarget;
+ const PPCSubtarget *Subtarget = nullptr;
StackMaps SM;
+ virtual MCSymbol *getMCSymbolForTOCPseudoMO(const MachineOperand &MO);
+
public:
explicit PPCAsmPrinter(TargetMachine &TM,
std::unique_ptr<MCStreamer> Streamer)
@@ -144,23 +147,12 @@ public:
void EmitInstruction(const MachineInstr *MI) override;
};
-/// PPCDarwinAsmPrinter - PowerPC assembly printer, customized for Darwin/Mac
-/// OS X
-class PPCDarwinAsmPrinter : public PPCAsmPrinter {
-public:
- explicit PPCDarwinAsmPrinter(TargetMachine &TM,
- std::unique_ptr<MCStreamer> Streamer)
- : PPCAsmPrinter(TM, std::move(Streamer)) {}
-
- StringRef getPassName() const override {
- return "Darwin PPC Assembly Printer";
- }
-
- bool doFinalization(Module &M) override;
- void EmitStartOfAsmFile(Module &M) override;
-};
-
class PPCAIXAsmPrinter : public PPCAsmPrinter {
+private:
+ static void ValidateGV(const GlobalVariable *GV);
+protected:
+ MCSymbol *getMCSymbolForTOCPseudoMO(const MachineOperand &MO) override;
+
public:
PPCAIXAsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer)
: PPCAsmPrinter(TM, std::move(Streamer)) {}
@@ -169,6 +161,8 @@ public:
void SetupMachineFunction(MachineFunction &MF) override;
+ const MCExpr *lowerConstant(const Constant *CV) override;
+
void EmitGlobalVariable(const GlobalVariable *GV) override;
void EmitFunctionDescriptor() override;
@@ -351,8 +345,12 @@ void PPCAsmPrinter::EmitEndOfAsmFile(Module &M) {
void PPCAsmPrinter::LowerSTACKMAP(StackMaps &SM, const MachineInstr &MI) {
unsigned NumNOPBytes = MI.getOperand(1).getImm();
+
+ auto &Ctx = OutStreamer->getContext();
+ MCSymbol *MILabel = Ctx.createTempSymbol();
+ OutStreamer->EmitLabel(MILabel);
- SM.recordStackMap(MI);
+ SM.recordStackMap(*MILabel, MI);
assert(NumNOPBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
// Scan ahead to trim the shadow.
@@ -377,7 +375,11 @@ void PPCAsmPrinter::LowerSTACKMAP(StackMaps &SM, const MachineInstr &MI) {
// Lower a patchpoint of the form:
// [<def>], <id>, <numBytes>, <target>, <numArgs>
void PPCAsmPrinter::LowerPATCHPOINT(StackMaps &SM, const MachineInstr &MI) {
- SM.recordPatchPoint(MI);
+ auto &Ctx = OutStreamer->getContext();
+ MCSymbol *MILabel = Ctx.createTempSymbol();
+ OutStreamer->EmitLabel(MILabel);
+
+ SM.recordPatchPoint(*MILabel, MI);
PatchPointOpers Opers(&MI);
unsigned EncodedBytes = 0;
@@ -490,9 +492,9 @@ void PPCAsmPrinter::EmitTlsCall(const MachineInstr *MI,
(!Subtarget->isPPC64() && MI->getOperand(1).getReg() == PPC::R3)) &&
"GETtls[ld]ADDR[32] must read GPR3");
- if (!Subtarget->isPPC64() && !Subtarget->isDarwin() &&
- isPositionIndependent())
+ if (Subtarget->is32BitELFABI() && isPositionIndependent())
Kind = MCSymbolRefExpr::VK_PLT;
+
const MCExpr *TlsRef =
MCSymbolRefExpr::create(TlsGetAddr, Kind, OutContext);
@@ -514,17 +516,16 @@ void PPCAsmPrinter::EmitTlsCall(const MachineInstr *MI,
/// Map a machine operand for a TOC pseudo-machine instruction to its
/// corresponding MCSymbol.
-static MCSymbol *getMCSymbolForTOCPseudoMO(const MachineOperand &MO,
- AsmPrinter &AP) {
+MCSymbol *PPCAsmPrinter::getMCSymbolForTOCPseudoMO(const MachineOperand &MO) {
switch (MO.getType()) {
case MachineOperand::MO_GlobalAddress:
- return AP.getSymbol(MO.getGlobal());
+ return getSymbol(MO.getGlobal());
case MachineOperand::MO_ConstantPoolIndex:
- return AP.GetCPISymbol(MO.getIndex());
+ return GetCPISymbol(MO.getIndex());
case MachineOperand::MO_JumpTableIndex:
- return AP.GetJTISymbol(MO.getIndex());
+ return GetJTISymbol(MO.getIndex());
case MachineOperand::MO_BlockAddress:
- return AP.GetBlockAddressSymbol(MO.getBlockAddress());
+ return GetBlockAddressSymbol(MO.getBlockAddress());
default:
llvm_unreachable("Unexpected operand type to get symbol.");
}
@@ -688,7 +689,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
"Invalid operand for LWZtoc.");
// Map the operand to its corresponding MCSymbol.
- const MCSymbol *const MOSymbol = getMCSymbolForTOCPseudoMO(MO, *this);
+ const MCSymbol *const MOSymbol = getMCSymbolForTOCPseudoMO(MO);
// Create a reference to the GOT entry for the symbol. The GOT entry will be
// synthesized later.
@@ -749,7 +750,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
// global address operand to be a reference to the TOC entry we will
// synthesize later.
MCSymbol *TOCEntry =
- lookUpOrCreateTOCEntry(getMCSymbolForTOCPseudoMO(MO, *this));
+ lookUpOrCreateTOCEntry(getMCSymbolForTOCPseudoMO(MO));
const MCSymbolRefExpr::VariantKind VK =
IsAIX ? MCSymbolRefExpr::VK_None : MCSymbolRefExpr::VK_PPC_TOC;
@@ -775,7 +776,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
"Invalid operand for ADDIStocHA.");
// Map the machine operand to its corresponding MCSymbol.
- MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO, *this);
+ MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO);
// Always use TOC on AIX. Map the global address operand to be a reference
// to the TOC entry we will synthesize later. 'TOCEntry' is a label used to
@@ -805,7 +806,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
"Invalid operand for LWZtocL.");
// Map the machine operand to its corresponding MCSymbol.
- MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO, *this);
+ MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO);
// Always use TOC on AIX. Map the global address operand to be a reference
// to the TOC entry we will synthesize later. 'TOCEntry' is a label used to
@@ -835,7 +836,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress()) &&
"Invalid operand for ADDIStocHA8!");
- const MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO, *this);
+ const MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO);
const bool GlobalToc =
MO.isGlobal() && Subtarget->isGVIndirectSymbol(MO.getGlobal());
@@ -881,7 +882,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
"LDtocL used on symbol that could be accessed directly is "
"invalid. Must match ADDIStocHA8."));
- const MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO, *this);
+ const MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO);
if (!MO.isCPI() || TM.getCodeModel() == CodeModel::Large)
MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
@@ -911,7 +912,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
"Interposable definitions must use indirect access."));
const MCExpr *Exp =
- MCSymbolRefExpr::create(getMCSymbolForTOCPseudoMO(MO, *this),
+ MCSymbolRefExpr::create(getMCSymbolForTOCPseudoMO(MO),
MCSymbolRefExpr::VK_PPC_TOC_LO, OutContext);
TmpInst.getOperand(2) = MCOperand::createExpr(Exp);
EmitToStreamer(*OutStreamer, TmpInst);
@@ -1578,151 +1579,6 @@ void PPCLinuxAsmPrinter::EmitFunctionBodyEnd() {
}
}
-void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) {
- static const char *const CPUDirectives[] = {
- "",
- "ppc",
- "ppc440",
- "ppc601",
- "ppc602",
- "ppc603",
- "ppc7400",
- "ppc750",
- "ppc970",
- "ppcA2",
- "ppce500",
- "ppce500mc",
- "ppce5500",
- "power3",
- "power4",
- "power5",
- "power5x",
- "power6",
- "power6x",
- "power7",
- // FIXME: why is power8 missing here?
- "ppc64",
- "ppc64le",
- "power9"
- };
-
- // Get the numerically largest directive.
- // FIXME: How should we merge darwin directives?
- unsigned Directive = PPC::DIR_NONE;
- for (const Function &F : M) {
- const PPCSubtarget &STI = TM.getSubtarget<PPCSubtarget>(F);
- unsigned FDir = STI.getDarwinDirective();
- Directive = Directive > FDir ? FDir : STI.getDarwinDirective();
- if (STI.hasMFOCRF() && Directive < PPC::DIR_970)
- Directive = PPC::DIR_970;
- if (STI.hasAltivec() && Directive < PPC::DIR_7400)
- Directive = PPC::DIR_7400;
- if (STI.isPPC64() && Directive < PPC::DIR_64)
- Directive = PPC::DIR_64;
- }
-
- assert(Directive <= PPC::DIR_64 && "Directive out of range.");
-
- assert(Directive < array_lengthof(CPUDirectives) &&
- "CPUDirectives[] might not be up-to-date!");
- PPCTargetStreamer &TStreamer =
- *static_cast<PPCTargetStreamer *>(OutStreamer->getTargetStreamer());
- TStreamer.emitMachine(CPUDirectives[Directive]);
-
- // Prime text sections so they are adjacent. This reduces the likelihood a
- // large data or debug section causes a branch to exceed 16M limit.
- const TargetLoweringObjectFileMachO &TLOFMacho =
- static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering());
- OutStreamer->SwitchSection(TLOFMacho.getTextCoalSection());
- if (TM.getRelocationModel() == Reloc::PIC_) {
- OutStreamer->SwitchSection(
- OutContext.getMachOSection("__TEXT", "__picsymbolstub1",
- MachO::S_SYMBOL_STUBS |
- MachO::S_ATTR_PURE_INSTRUCTIONS,
- 32, SectionKind::getText()));
- } else if (TM.getRelocationModel() == Reloc::DynamicNoPIC) {
- OutStreamer->SwitchSection(
- OutContext.getMachOSection("__TEXT","__symbol_stub1",
- MachO::S_SYMBOL_STUBS |
- MachO::S_ATTR_PURE_INSTRUCTIONS,
- 16, SectionKind::getText()));
- }
- OutStreamer->SwitchSection(getObjFileLowering().getTextSection());
-}
-
-bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
- bool isPPC64 = getDataLayout().getPointerSizeInBits() == 64;
-
- // Darwin/PPC always uses mach-o.
- const TargetLoweringObjectFileMachO &TLOFMacho =
- static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering());
- if (MMI) {
- MachineModuleInfoMachO &MMIMacho =
- MMI->getObjFileInfo<MachineModuleInfoMachO>();
-
- if (MAI->doesSupportExceptionHandling()) {
- // Add the (possibly multiple) personalities to the set of global values.
- // Only referenced functions get into the Personalities list.
- for (const Function *Personality : MMI->getPersonalities()) {
- if (Personality) {
- MCSymbol *NLPSym =
- getSymbolWithGlobalValueBase(Personality, "$non_lazy_ptr");
- MachineModuleInfoImpl::StubValueTy &StubSym =
- MMIMacho.getGVStubEntry(NLPSym);
- StubSym =
- MachineModuleInfoImpl::StubValueTy(getSymbol(Personality), true);
- }
- }
- }
-
- // Output stubs for dynamically-linked functions.
- MachineModuleInfoMachO::SymbolListTy Stubs = MMIMacho.GetGVStubList();
-
- // Output macho stubs for external and common global variables.
- if (!Stubs.empty()) {
- // Switch with ".non_lazy_symbol_pointer" directive.
- OutStreamer->SwitchSection(TLOFMacho.getNonLazySymbolPointerSection());
- EmitAlignment(isPPC64 ? Align(8) : Align(4));
-
- for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
- // L_foo$stub:
- OutStreamer->EmitLabel(Stubs[i].first);
- // .indirect_symbol _foo
- MachineModuleInfoImpl::StubValueTy &MCSym = Stubs[i].second;
- OutStreamer->EmitSymbolAttribute(MCSym.getPointer(),
- MCSA_IndirectSymbol);
-
- if (MCSym.getInt())
- // External to current translation unit.
- OutStreamer->EmitIntValue(0, isPPC64 ? 8 : 4 /*size*/);
- else
- // Internal to current translation unit.
- //
- // When we place the LSDA into the TEXT section, the type info
- // pointers
- // need to be indirect and pc-rel. We accomplish this by using NLPs.
- // However, sometimes the types are local to the file. So we need to
- // fill in the value for the NLP in those cases.
- OutStreamer->EmitValue(
- MCSymbolRefExpr::create(MCSym.getPointer(), OutContext),
- isPPC64 ? 8 : 4 /*size*/);
- }
-
- Stubs.clear();
- OutStreamer->AddBlankLine();
- }
- }
-
- // Funny Darwin hack: This flag tells the linker that no global symbols
- // contain code that falls through to other global symbols (e.g. the obvious
- // implementation of multiple entry points). If this doesn't occur, the
- // linker can safely perform dead code stripping. Since LLVM never generates
- // code that does this, it is always safe to set.
- OutStreamer->EmitAssemblerFlag(MCAF_SubsectionsViaSymbols);
-
- return AsmPrinter::doFinalization(M);
-}
-
void PPCAIXAsmPrinter::SetupMachineFunction(MachineFunction &MF) {
// Get the function descriptor symbol.
CurrentFnDescSym = getSymbol(&MF.getFunction());
@@ -1735,7 +1591,7 @@ void PPCAIXAsmPrinter::SetupMachineFunction(MachineFunction &MF) {
return AsmPrinter::SetupMachineFunction(MF);
}
-void PPCAIXAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
+void PPCAIXAsmPrinter::ValidateGV(const GlobalVariable *GV) {
// Early error checking limiting what is supported.
if (GV->isThreadLocal())
report_fatal_error("Thread local not yet supported on AIX.");
@@ -1745,22 +1601,51 @@ void PPCAIXAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
if (GV->hasComdat())
report_fatal_error("COMDAT not yet supported by AIX.");
+}
+
+const MCExpr *PPCAIXAsmPrinter::lowerConstant(const Constant *CV) {
+ if (const Function *F = dyn_cast<Function>(CV)) {
+ MCSymbolXCOFF *FSym = cast<MCSymbolXCOFF>(getSymbol(F));
+ if (!FSym->hasContainingCsect()) {
+ const XCOFF::StorageClass SC =
+ F->isDeclaration()
+ ? TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(F)
+ : XCOFF::C_HIDEXT;
+ MCSectionXCOFF *Csect = OutStreamer->getContext().getXCOFFSection(
+ FSym->getName(), XCOFF::XMC_DS,
+ F->isDeclaration() ? XCOFF::XTY_ER : XCOFF::XTY_SD, SC,
+ SectionKind::getData());
+ FSym->setContainingCsect(Csect);
+ }
+ return MCSymbolRefExpr::create(
+ FSym->getContainingCsect()->getQualNameSymbol(), OutContext);
+ }
+ return PPCAsmPrinter::lowerConstant(CV);
+}
+
+void PPCAIXAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
+ ValidateGV(GV);
+
+ // External global variables are already handled.
+ if (!GV->hasInitializer())
+ return;
+
+ // Create the symbol, set its storage class.
+ MCSymbolXCOFF *GVSym = cast<MCSymbolXCOFF>(getSymbol(GV));
+ GVSym->setStorageClass(
+ TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(GV));
SectionKind GVKind = getObjFileLowering().getKindForGlobal(GV, TM);
- if (!GVKind.isCommon() && !GVKind.isBSSLocal() && !GVKind.isData())
+ if ((!GVKind.isGlobalWriteableData() && !GVKind.isReadOnly()) ||
+ GVKind.isMergeable2ByteCString() || GVKind.isMergeable4ByteCString())
report_fatal_error("Encountered a global variable kind that is "
"not supported yet.");
// Create the containing csect and switch to it.
- MCSectionXCOFF *CSect = cast<MCSectionXCOFF>(
+ MCSectionXCOFF *Csect = cast<MCSectionXCOFF>(
getObjFileLowering().SectionForGlobal(GV, GVKind, TM));
- OutStreamer->SwitchSection(CSect);
-
- // Create the symbol, set its storage class, and emit it.
- MCSymbolXCOFF *GVSym = cast<MCSymbolXCOFF>(getSymbol(GV));
- GVSym->setStorageClass(
- TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(GV));
- GVSym->setContainingCsect(CSect);
+ OutStreamer->SwitchSection(Csect);
+ GVSym->setContainingCsect(Csect);
const DataLayout &DL = GV->getParent()->getDataLayout();
@@ -1771,9 +1656,10 @@ void PPCAIXAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
uint64_t Size = DL.getTypeAllocSize(GV->getType()->getElementType());
if (GVKind.isBSSLocal())
- OutStreamer->EmitXCOFFLocalCommonSymbol(GVSym, Size, Align);
+ OutStreamer->EmitXCOFFLocalCommonSymbol(
+ GVSym, Size, Csect->getQualNameSymbol(), Align);
else
- OutStreamer->EmitCommonSymbol(GVSym, Size, Align);
+ OutStreamer->EmitCommonSymbol(Csect->getQualNameSymbol(), Size, Align);
return;
}
@@ -1797,7 +1683,10 @@ void PPCAIXAsmPrinter::EmitFunctionDescriptor() {
OutStreamer->EmitValue(MCSymbolRefExpr::create(CurrentFnSym, OutContext),
PointerSize);
// Emit TOC base address.
- MCSymbol *TOCBaseSym = OutContext.getOrCreateSymbol(StringRef("TOC[TC0]"));
+ const MCSectionXCOFF *TOCBaseSec = OutStreamer->getContext().getXCOFFSection(
+ StringRef("TOC"), XCOFF::XMC_TC0, XCOFF::XTY_SD, XCOFF::C_HIDEXT,
+ SectionKind::getData());
+ const MCSymbol *TOCBaseSym = TOCBaseSec->getQualNameSymbol();
OutStreamer->EmitValue(MCSymbolRefExpr::create(TOCBaseSym, OutContext),
PointerSize);
// Emit a null environment pointer.
@@ -1813,15 +1702,90 @@ void PPCAIXAsmPrinter::EmitEndOfAsmFile(Module &M) {
return;
// Emit TOC base.
- MCSymbol *TOCBaseSym = OutContext.getOrCreateSymbol(StringRef("TOC[TC0]"));
MCSectionXCOFF *TOCBaseSection = OutStreamer->getContext().getXCOFFSection(
StringRef("TOC"), XCOFF::XMC_TC0, XCOFF::XTY_SD, XCOFF::C_HIDEXT,
SectionKind::getData());
- cast<MCSymbolXCOFF>(TOCBaseSym)->setContainingCsect(TOCBaseSection);
+ // The TOC-base always has 0 size, but 4 byte alignment.
+ TOCBaseSection->setAlignment(Align(4));
// Switch to section to emit TOC base.
OutStreamer->SwitchSection(TOCBaseSection);
+
+ PPCTargetStreamer &TS =
+ static_cast<PPCTargetStreamer &>(*OutStreamer->getTargetStreamer());
+
+ for (auto &I : TOC) {
+ // Setup the csect for the current TC entry.
+ MCSectionXCOFF *TCEntry = OutStreamer->getContext().getXCOFFSection(
+ cast<MCSymbolXCOFF>(I.first)->getUnqualifiedName(), XCOFF::XMC_TC,
+ XCOFF::XTY_SD, XCOFF::C_HIDEXT, SectionKind::getData());
+ cast<MCSymbolXCOFF>(I.second)->setContainingCsect(TCEntry);
+ OutStreamer->SwitchSection(TCEntry);
+
+ OutStreamer->EmitLabel(I.second);
+ TS.emitTCEntry(*I.first);
+ }
}
+MCSymbol *
+PPCAIXAsmPrinter::getMCSymbolForTOCPseudoMO(const MachineOperand &MO) {
+ const GlobalObject *GO = nullptr;
+
+ // If the MO is a function or certain kind of globals, we want to make sure to
+ // refer to the csect symbol, otherwise we can just do the default handling.
+ if (MO.getType() != MachineOperand::MO_GlobalAddress ||
+ !(GO = dyn_cast<const GlobalObject>(MO.getGlobal())))
+ return PPCAsmPrinter::getMCSymbolForTOCPseudoMO(MO);
+
+ // Do an early error check for globals we don't support. This will go away
+ // eventually.
+ const auto *GV = dyn_cast<const GlobalVariable>(GO);
+ if (GV) {
+ ValidateGV(GV);
+ }
+
+ MCSymbolXCOFF *XSym = cast<MCSymbolXCOFF>(getSymbol(GO));
+
+ // If the global object is a global variable without initializer or is a
+ // declaration of a function, then XSym is an external referenced symbol.
+ // Hence we may need to explictly create a MCSectionXCOFF for it so that we
+ // can return its symbol later.
+ if (GO->isDeclaration()) {
+ if (!XSym->hasContainingCsect()) {
+ // Make sure the storage class is set.
+ const XCOFF::StorageClass SC =
+ TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(GO);
+ XSym->setStorageClass(SC);
+
+ MCSectionXCOFF *Csect = OutStreamer->getContext().getXCOFFSection(
+ XSym->getName(), isa<Function>(GO) ? XCOFF::XMC_DS : XCOFF::XMC_UA,
+ XCOFF::XTY_ER, SC, SectionKind::getMetadata());
+ XSym->setContainingCsect(Csect);
+ }
+
+ return XSym->getContainingCsect()->getQualNameSymbol();
+ }
+
+ // Handle initialized global variables and defined functions.
+ SectionKind GOKind = getObjFileLowering().getKindForGlobal(GO, TM);
+
+ if (GOKind.isText()) {
+ // If the MO is a function, we want to make sure to refer to the function
+ // descriptor csect.
+ return OutStreamer->getContext()
+ .getXCOFFSection(XSym->getName(), XCOFF::XMC_DS, XCOFF::XTY_SD,
+ XCOFF::C_HIDEXT, SectionKind::getData())
+ ->getQualNameSymbol();
+ } else if (GOKind.isCommon() || GOKind.isBSSLocal()) {
+ // If the operand is a common then we should refer to the csect symbol.
+ return cast<MCSectionXCOFF>(
+ getObjFileLowering().SectionForGlobal(GO, GOKind, TM))
+ ->getQualNameSymbol();
+ }
+
+ // Other global variables are refered to by labels inside of a single csect,
+ // so refer to the label directly.
+ return getSymbol(GV);
+}
/// createPPCAsmPrinterPass - Returns a pass that prints the PPC assembly code
/// for a MachineFunction to the given output stream, in a format that the
@@ -1830,8 +1794,6 @@ void PPCAIXAsmPrinter::EmitEndOfAsmFile(Module &M) {
static AsmPrinter *
createPPCAsmPrinterPass(TargetMachine &tm,
std::unique_ptr<MCStreamer> &&Streamer) {
- if (tm.getTargetTriple().isMacOSX())
- return new PPCDarwinAsmPrinter(tm, std::move(Streamer));
if (tm.getTargetTriple().isOSAIX())
return new PPCAIXAsmPrinter(tm, std::move(Streamer));
@@ -1839,7 +1801,7 @@ createPPCAsmPrinterPass(TargetMachine &tm,
}
// Force static initialization.
-extern "C" void LLVMInitializePowerPCAsmPrinter() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializePowerPCAsmPrinter() {
TargetRegistry::RegisterAsmPrinter(getThePPC32Target(),
createPPCAsmPrinterPass);
TargetRegistry::RegisterAsmPrinter(getThePPC64Target(),
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCBranchCoalescing.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCBranchCoalescing.cpp
index d325b078979f..109b665e0d57 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCBranchCoalescing.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCBranchCoalescing.cpp
@@ -23,6 +23,7 @@
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/Debug.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp
index 2b8d9b87724f..4ce705300e1b 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp
@@ -33,10 +33,8 @@
#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopIterator.h"
-#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/TargetSchedule.h"
#include "llvm/IR/Constants.h"
@@ -47,6 +45,7 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/ValueHandle.h"
+#include "llvm/InitializePasses.h"
#include "llvm/PassSupport.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -54,6 +53,7 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#ifndef NDEBUG
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
index 06a4d183e781..4c608520e265 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -782,8 +782,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
MachineModuleInfo &MMI = MF.getMMI();
const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
DebugLoc dl;
- bool needsCFI = MMI.hasDebugInfo() ||
- MF.getFunction().needsUnwindTableEntry();
+ bool needsCFI = MF.needsFrameMoves();
// Get processor type.
bool isPPC64 = Subtarget.isPPC64();
@@ -2442,10 +2441,6 @@ PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
}
unsigned PPCFrameLowering::getTOCSaveOffset() const {
- if (Subtarget.isAIXABI())
- // TOC save/restore is normally handled by the linker.
- // Indirect calls should hit this limitation.
- report_fatal_error("TOC save is not implemented on AIX yet.");
return TOCSaveOffset;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp
index 391ebcc1a143..ffaa3e05c847 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp
@@ -158,7 +158,7 @@ unsigned PPCDispatchGroupSBHazardRecognizer::PreEmitNoops(SUnit *SU) {
// new group.
if (isLoadAfterStore(SU) && CurSlots < 6) {
unsigned Directive =
- DAG->MF.getSubtarget<PPCSubtarget>().getDarwinDirective();
+ DAG->MF.getSubtarget<PPCSubtarget>().getCPUDirective();
// If we're using a special group-terminating nop, then we need only one.
// FIXME: the same for P9 as previous gen until POWER9 scheduling is ready
if (Directive == PPC::DIR_PWR6 || Directive == PPC::DIR_PWR7 ||
@@ -218,7 +218,7 @@ void PPCDispatchGroupSBHazardRecognizer::Reset() {
void PPCDispatchGroupSBHazardRecognizer::EmitNoop() {
unsigned Directive =
- DAG->MF.getSubtarget<PPCSubtarget>().getDarwinDirective();
+ DAG->MF.getSubtarget<PPCSubtarget>().getCPUDirective();
// If the group has now filled all of its slots, or if we're using a special
// group-terminating nop, the group is complete.
// FIXME: the same for P9 as previous gen until POWER9 scheduling is ready
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 4ad6c88233fe..776ec52e2604 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -138,9 +138,9 @@ namespace {
///
class PPCDAGToDAGISel : public SelectionDAGISel {
const PPCTargetMachine &TM;
- const PPCSubtarget *PPCSubTarget;
- const PPCTargetLowering *PPCLowering;
- unsigned GlobalBaseReg;
+ const PPCSubtarget *PPCSubTarget = nullptr;
+ const PPCTargetLowering *PPCLowering = nullptr;
+ unsigned GlobalBaseReg = 0;
public:
explicit PPCDAGToDAGISel(PPCTargetMachine &tm, CodeGenOpt::Level OptLevel)
@@ -204,6 +204,7 @@ namespace {
bool tryBitfieldInsert(SDNode *N);
bool tryBitPermutation(SDNode *N);
bool tryIntCompareInGPR(SDNode *N);
+ bool tryAndWithMask(SDNode *N);
// tryTLSXFormLoad - Convert an ISD::LOAD fed by a PPCISD::ADD_TLS into
// an X-Form load instruction with the offset being a relocation coming from
@@ -309,7 +310,6 @@ namespace {
errs() << "ConstraintID: " << ConstraintID << "\n";
llvm_unreachable("Unexpected asm memory constraint");
case InlineAsm::Constraint_es:
- case InlineAsm::Constraint_i:
case InlineAsm::Constraint_m:
case InlineAsm::Constraint_o:
case InlineAsm::Constraint_Q:
@@ -512,13 +512,14 @@ static bool isInt64Immediate(SDValue N, uint64_t &Imm) {
return isInt64Immediate(N.getNode(), Imm);
}
-static unsigned getBranchHint(unsigned PCC, FunctionLoweringInfo *FuncInfo,
+static unsigned getBranchHint(unsigned PCC,
+ const FunctionLoweringInfo &FuncInfo,
const SDValue &DestMBB) {
assert(isa<BasicBlockSDNode>(DestMBB));
- if (!FuncInfo->BPI) return PPC::BR_NO_HINT;
+ if (!FuncInfo.BPI) return PPC::BR_NO_HINT;
- const BasicBlock *BB = FuncInfo->MBB->getBasicBlock();
+ const BasicBlock *BB = FuncInfo.MBB->getBasicBlock();
const Instruction *BBTerm = BB->getTerminator();
if (BBTerm->getNumSuccessors() != 2) return PPC::BR_NO_HINT;
@@ -526,8 +527,8 @@ static unsigned getBranchHint(unsigned PCC, FunctionLoweringInfo *FuncInfo,
const BasicBlock *TBB = BBTerm->getSuccessor(0);
const BasicBlock *FBB = BBTerm->getSuccessor(1);
- auto TProb = FuncInfo->BPI->getEdgeProbability(BB, TBB);
- auto FProb = FuncInfo->BPI->getEdgeProbability(BB, FBB);
+ auto TProb = FuncInfo.BPI->getEdgeProbability(BB, TBB);
+ auto FProb = FuncInfo.BPI->getEdgeProbability(BB, FBB);
// We only want to handle cases which are easy to predict at static time, e.g.
// C++ throw statement, that is very likely not taken, or calling never
@@ -547,7 +548,7 @@ static unsigned getBranchHint(unsigned PCC, FunctionLoweringInfo *FuncInfo,
if (std::max(TProb, FProb) / Threshold < std::min(TProb, FProb))
return PPC::BR_NO_HINT;
- LLVM_DEBUG(dbgs() << "Use branch hint for '" << FuncInfo->Fn->getName()
+ LLVM_DEBUG(dbgs() << "Use branch hint for '" << FuncInfo.Fn->getName()
<< "::" << BB->getName() << "'\n"
<< " -> " << TBB->getName() << ": " << TProb << "\n"
<< " -> " << FBB->getName() << ": " << FProb << "\n");
@@ -1044,7 +1045,7 @@ static unsigned allUsesTruncate(SelectionDAG *CurDAG, SDNode *N) {
if (Use->isMachineOpcode())
return 0;
MaxTruncation =
- std::max(MaxTruncation, Use->getValueType(0).getSizeInBits());
+ std::max(MaxTruncation, (unsigned)Use->getValueType(0).getSizeInBits());
continue;
case ISD::STORE: {
if (Use->isMachineOpcode())
@@ -1399,11 +1400,14 @@ class BitPermutationSelector {
for (unsigned i = 0; i < NumValidBits; ++i)
Bits[i] = (*LHSBits)[i];
- // These bits are known to be zero.
+ // These bits are known to be zero but the AssertZext may be from a value
+ // that already has some constant zero bits (i.e. from a masking and).
for (unsigned i = NumValidBits; i < NumBits; ++i)
- Bits[i] = ValueBit((*LHSBits)[i].getValue(),
- (*LHSBits)[i].getValueBitIndex(),
- ValueBit::VariableKnownToBeZero);
+ Bits[i] = (*LHSBits)[i].hasValue()
+ ? ValueBit((*LHSBits)[i].getValue(),
+ (*LHSBits)[i].getValueBitIndex(),
+ ValueBit::VariableKnownToBeZero)
+ : ValueBit(ValueBit::ConstZero);
return std::make_pair(Interesting, &Bits);
}
@@ -1811,11 +1815,14 @@ class BitPermutationSelector {
SDValue ANDIVal, ANDISVal;
if (ANDIMask != 0)
- ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo, dl, MVT::i32,
- VRot, getI32Imm(ANDIMask, dl)), 0);
+ ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI_rec, dl, MVT::i32,
+ VRot, getI32Imm(ANDIMask, dl)),
+ 0);
if (ANDISMask != 0)
- ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo, dl, MVT::i32,
- VRot, getI32Imm(ANDISMask, dl)), 0);
+ ANDISVal =
+ SDValue(CurDAG->getMachineNode(PPC::ANDIS_rec, dl, MVT::i32, VRot,
+ getI32Imm(ANDISMask, dl)),
+ 0);
SDValue TotalVal;
if (!ANDIVal)
@@ -1904,11 +1911,14 @@ class BitPermutationSelector {
SDValue ANDIVal, ANDISVal;
if (ANDIMask != 0)
- ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo, dl, MVT::i32,
- Res, getI32Imm(ANDIMask, dl)), 0);
+ ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI_rec, dl, MVT::i32,
+ Res, getI32Imm(ANDIMask, dl)),
+ 0);
if (ANDISMask != 0)
- ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo, dl, MVT::i32,
- Res, getI32Imm(ANDISMask, dl)), 0);
+ ANDISVal =
+ SDValue(CurDAG->getMachineNode(PPC::ANDIS_rec, dl, MVT::i32, Res,
+ getI32Imm(ANDISMask, dl)),
+ 0);
if (!ANDIVal)
Res = ANDISVal;
@@ -2181,15 +2191,16 @@ class BitPermutationSelector {
SDValue ANDIVal, ANDISVal;
if (ANDIMask != 0)
- ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo8, dl, MVT::i64,
+ ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI8_rec, dl, MVT::i64,
ExtendToInt64(VRot, dl),
getI32Imm(ANDIMask, dl)),
0);
if (ANDISMask != 0)
- ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo8, dl, MVT::i64,
- ExtendToInt64(VRot, dl),
- getI32Imm(ANDISMask, dl)),
- 0);
+ ANDISVal =
+ SDValue(CurDAG->getMachineNode(PPC::ANDIS8_rec, dl, MVT::i64,
+ ExtendToInt64(VRot, dl),
+ getI32Imm(ANDISMask, dl)),
+ 0);
if (!ANDIVal)
TotalVal = ANDISVal;
@@ -2330,11 +2341,16 @@ class BitPermutationSelector {
SDValue ANDIVal, ANDISVal;
if (ANDIMask != 0)
- ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo8, dl, MVT::i64,
- ExtendToInt64(Res, dl), getI32Imm(ANDIMask, dl)), 0);
+ ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI8_rec, dl, MVT::i64,
+ ExtendToInt64(Res, dl),
+ getI32Imm(ANDIMask, dl)),
+ 0);
if (ANDISMask != 0)
- ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo8, dl, MVT::i64,
- ExtendToInt64(Res, dl), getI32Imm(ANDISMask, dl)), 0);
+ ANDISVal =
+ SDValue(CurDAG->getMachineNode(PPC::ANDIS8_rec, dl, MVT::i64,
+ ExtendToInt64(Res, dl),
+ getI32Imm(ANDISMask, dl)),
+ 0);
if (!ANDIVal)
Res = ANDISVal;
@@ -2385,7 +2401,7 @@ class BitPermutationSelector {
SmallVector<ValueBit, 64> Bits;
- bool NeedMask;
+ bool NeedMask = false;
SmallVector<unsigned, 64> RLAmt;
SmallVector<BitGroup, 16> BitGroups;
@@ -2393,7 +2409,7 @@ class BitPermutationSelector {
DenseMap<std::pair<SDValue, unsigned>, ValueRotInfo> ValueRots;
SmallVector<ValueRotInfo, 16> ValueRotsVec;
- SelectionDAG *CurDAG;
+ SelectionDAG *CurDAG = nullptr;
public:
BitPermutationSelector(SelectionDAG *DAG)
@@ -2623,8 +2639,9 @@ SDNode *IntegerCompareEliminator::tryLogicOpOfCompares(SDNode *N) {
assert((NewOpc != -1 || !IsBitwiseNegate) &&
"No record form available for AND8/OR8/XOR8?");
WideOp =
- SDValue(CurDAG->getMachineNode(NewOpc == -1 ? PPC::ANDIo8 : NewOpc, dl,
- MVT::i64, MVT::Glue, LHS, RHS), 0);
+ SDValue(CurDAG->getMachineNode(NewOpc == -1 ? PPC::ANDI8_rec : NewOpc,
+ dl, MVT::i64, MVT::Glue, LHS, RHS),
+ 0);
}
// Select this node to a single bit from CR0 set by the record-form node
@@ -3597,7 +3614,7 @@ SDValue IntegerCompareEliminator::getSETCCInGPR(SDValue Compare,
if (ConvOpts == SetccInGPROpts::ZExtInvert ||
ConvOpts == SetccInGPROpts::SExtInvert)
- CC = ISD::getSetCCInverse(CC, true);
+ CC = ISD::getSetCCInverse(CC, InputVT);
bool Inputs32Bit = InputVT == MVT::i32;
@@ -3832,7 +3849,11 @@ SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,
return SDValue(CurDAG->getMachineNode(Opc, dl, MVT::i32, LHS, RHS), 0);
}
-static PPC::Predicate getPredicateForSetCC(ISD::CondCode CC) {
+static PPC::Predicate getPredicateForSetCC(ISD::CondCode CC, const EVT &VT,
+ const PPCSubtarget *Subtarget) {
+ // For SPE instructions, the result is in GT bit of the CR
+ bool UseSPE = Subtarget->hasSPE() && VT.isFloatingPoint();
+
switch (CC) {
case ISD::SETUEQ:
case ISD::SETONE:
@@ -3841,17 +3862,23 @@ static PPC::Predicate getPredicateForSetCC(ISD::CondCode CC) {
llvm_unreachable("Should be lowered by legalize!");
default: llvm_unreachable("Unknown condition!");
case ISD::SETOEQ:
- case ISD::SETEQ: return PPC::PRED_EQ;
+ case ISD::SETEQ:
+ return UseSPE ? PPC::PRED_GT : PPC::PRED_EQ;
case ISD::SETUNE:
- case ISD::SETNE: return PPC::PRED_NE;
+ case ISD::SETNE:
+ return UseSPE ? PPC::PRED_LE : PPC::PRED_NE;
case ISD::SETOLT:
- case ISD::SETLT: return PPC::PRED_LT;
+ case ISD::SETLT:
+ return UseSPE ? PPC::PRED_GT : PPC::PRED_LT;
case ISD::SETULE:
- case ISD::SETLE: return PPC::PRED_LE;
+ case ISD::SETLE:
+ return UseSPE ? PPC::PRED_LE : PPC::PRED_LE;
case ISD::SETOGT:
- case ISD::SETGT: return PPC::PRED_GT;
+ case ISD::SETGT:
+ return UseSPE ? PPC::PRED_GT : PPC::PRED_GT;
case ISD::SETUGE:
- case ISD::SETGE: return PPC::PRED_GE;
+ case ISD::SETGE:
+ return UseSPE ? PPC::PRED_LE : PPC::PRED_GE;
case ISD::SETO: return PPC::PRED_NU;
case ISD::SETUO: return PPC::PRED_UN;
// These two are invalid for floating point. Assume we have int.
@@ -4344,6 +4371,142 @@ static bool mayUseP9Setb(SDNode *N, const ISD::CondCode &CC, SelectionDAG *DAG,
return true;
}
+bool PPCDAGToDAGISel::tryAndWithMask(SDNode *N) {
+ if (N->getOpcode() != ISD::AND)
+ return false;
+
+ SDLoc dl(N);
+ SDValue Val = N->getOperand(0);
+ unsigned Imm, Imm2, SH, MB, ME;
+ uint64_t Imm64;
+
+ // If this is an and of a value rotated between 0 and 31 bits and then and'd
+ // with a mask, emit rlwinm
+ if (isInt32Immediate(N->getOperand(1), Imm) &&
+ isRotateAndMask(N->getOperand(0).getNode(), Imm, false, SH, MB, ME)) {
+ SDValue Val = N->getOperand(0).getOperand(0);
+ SDValue Ops[] = { Val, getI32Imm(SH, dl), getI32Imm(MB, dl),
+ getI32Imm(ME, dl) };
+ CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
+ return true;
+ }
+
+ // If this is just a masked value where the input is not handled, and
+ // is not a rotate-left (handled by a pattern in the .td file), emit rlwinm
+ if (isInt32Immediate(N->getOperand(1), Imm)) {
+ if (isRunOfOnes(Imm, MB, ME) &&
+ N->getOperand(0).getOpcode() != ISD::ROTL) {
+ SDValue Ops[] = { Val, getI32Imm(0, dl), getI32Imm(MB, dl),
+ getI32Imm(ME, dl) };
+ CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
+ return true;
+ }
+ // AND X, 0 -> 0, not "rlwinm 32".
+ if (Imm == 0) {
+ ReplaceUses(SDValue(N, 0), N->getOperand(1));
+ return true;
+ }
+
+ // ISD::OR doesn't get all the bitfield insertion fun.
+ // (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) might be a
+ // bitfield insert.
+ if (N->getOperand(0).getOpcode() == ISD::OR &&
+ isInt32Immediate(N->getOperand(0).getOperand(1), Imm2)) {
+ // The idea here is to check whether this is equivalent to:
+ // (c1 & m) | (x & ~m)
+ // where m is a run-of-ones mask. The logic here is that, for each bit in
+ // c1 and c2:
+ // - if both are 1, then the output will be 1.
+ // - if both are 0, then the output will be 0.
+ // - if the bit in c1 is 0, and the bit in c2 is 1, then the output will
+ // come from x.
+ // - if the bit in c1 is 1, and the bit in c2 is 0, then the output will
+ // be 0.
+ // If that last condition is never the case, then we can form m from the
+ // bits that are the same between c1 and c2.
+ unsigned MB, ME;
+ if (isRunOfOnes(~(Imm^Imm2), MB, ME) && !(~Imm & Imm2)) {
+ SDValue Ops[] = { N->getOperand(0).getOperand(0),
+ N->getOperand(0).getOperand(1),
+ getI32Imm(0, dl), getI32Imm(MB, dl),
+ getI32Imm(ME, dl) };
+ ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops));
+ return true;
+ }
+ }
+ } else if (isInt64Immediate(N->getOperand(1).getNode(), Imm64)) {
+ // If this is a 64-bit zero-extension mask, emit rldicl.
+ if (isMask_64(Imm64)) {
+ MB = 64 - countTrailingOnes(Imm64);
+ SH = 0;
+
+ if (Val.getOpcode() == ISD::ANY_EXTEND) {
+ auto Op0 = Val.getOperand(0);
+ if ( Op0.getOpcode() == ISD::SRL &&
+ isInt32Immediate(Op0.getOperand(1).getNode(), Imm) && Imm <= MB) {
+
+ auto ResultType = Val.getNode()->getValueType(0);
+ auto ImDef = CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl,
+ ResultType);
+ SDValue IDVal (ImDef, 0);
+
+ Val = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl,
+ ResultType, IDVal, Op0.getOperand(0),
+ getI32Imm(1, dl)), 0);
+ SH = 64 - Imm;
+ }
+ }
+
+ // If the operand is a logical right shift, we can fold it into this
+ // instruction: rldicl(rldicl(x, 64-n, n), 0, mb) -> rldicl(x, 64-n, mb)
+ // for n <= mb. The right shift is really a left rotate followed by a
+ // mask, and this mask is a more-restrictive sub-mask of the mask implied
+ // by the shift.
+ if (Val.getOpcode() == ISD::SRL &&
+ isInt32Immediate(Val.getOperand(1).getNode(), Imm) && Imm <= MB) {
+ assert(Imm < 64 && "Illegal shift amount");
+ Val = Val.getOperand(0);
+ SH = 64 - Imm;
+ }
+
+ SDValue Ops[] = { Val, getI32Imm(SH, dl), getI32Imm(MB, dl) };
+ CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops);
+ return true;
+ } else if (isMask_64(~Imm64)) {
+ // If this is a negated 64-bit zero-extension mask,
+ // i.e. the immediate is a sequence of ones from most significant side
+ // and all zero for reminder, we should use rldicr.
+ MB = 63 - countTrailingOnes(~Imm64);
+ SH = 0;
+ SDValue Ops[] = { Val, getI32Imm(SH, dl), getI32Imm(MB, dl) };
+ CurDAG->SelectNodeTo(N, PPC::RLDICR, MVT::i64, Ops);
+ return true;
+ }
+
+ // It is not 16-bit imm that means we need two instructions at least if
+ // using "and" instruction. Try to exploit it with rotate mask instructions.
+ if (isRunOfOnes64(Imm64, MB, ME)) {
+ if (MB >= 32 && MB <= ME) {
+ // MB ME
+ // +----------------------+
+ // |xxxxxxxxxxx00011111000|
+ // +----------------------+
+ // 0 32 64
+ // We can only do it if the MB is larger than 32 and MB <= ME
+ // as RLWINM will replace the content of [0 - 32) with [32 - 64) even
+ // we didn't rotate it.
+ SDValue Ops[] = { Val, getI64Imm(0, dl), getI64Imm(MB - 32, dl),
+ getI64Imm(ME - 32, dl) };
+ CurDAG->SelectNodeTo(N, PPC::RLWINM8, MVT::i64, Ops);
+ return true;
+ }
+ // TODO - handle it with rldicl + rldicl
+ }
+ }
+
+ return false;
+}
+
// Select - Convert the specified operand from a target-independent to a
// target-specific node if it hasn't already been changed.
void PPCDAGToDAGISel::Select(SDNode *N) {
@@ -4565,121 +4728,13 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
}
}
- case ISD::AND: {
- unsigned Imm, Imm2, SH, MB, ME;
- uint64_t Imm64;
-
- // If this is an and of a value rotated between 0 and 31 bits and then and'd
- // with a mask, emit rlwinm
- if (isInt32Immediate(N->getOperand(1), Imm) &&
- isRotateAndMask(N->getOperand(0).getNode(), Imm, false, SH, MB, ME)) {
- SDValue Val = N->getOperand(0).getOperand(0);
- SDValue Ops[] = { Val, getI32Imm(SH, dl), getI32Imm(MB, dl),
- getI32Imm(ME, dl) };
- CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
- return;
- }
- // If this is just a masked value where the input is not handled above, and
- // is not a rotate-left (handled by a pattern in the .td file), emit rlwinm
- if (isInt32Immediate(N->getOperand(1), Imm) &&
- isRunOfOnes(Imm, MB, ME) &&
- N->getOperand(0).getOpcode() != ISD::ROTL) {
- SDValue Val = N->getOperand(0);
- SDValue Ops[] = { Val, getI32Imm(0, dl), getI32Imm(MB, dl),
- getI32Imm(ME, dl) };
- CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
- return;
- }
- // If this is a 64-bit zero-extension mask, emit rldicl.
- if (isInt64Immediate(N->getOperand(1).getNode(), Imm64) &&
- isMask_64(Imm64)) {
- SDValue Val = N->getOperand(0);
- MB = 64 - countTrailingOnes(Imm64);
- SH = 0;
-
- if (Val.getOpcode() == ISD::ANY_EXTEND) {
- auto Op0 = Val.getOperand(0);
- if ( Op0.getOpcode() == ISD::SRL &&
- isInt32Immediate(Op0.getOperand(1).getNode(), Imm) && Imm <= MB) {
-
- auto ResultType = Val.getNode()->getValueType(0);
- auto ImDef = CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl,
- ResultType);
- SDValue IDVal (ImDef, 0);
-
- Val = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl,
- ResultType, IDVal, Op0.getOperand(0),
- getI32Imm(1, dl)), 0);
- SH = 64 - Imm;
- }
- }
-
- // If the operand is a logical right shift, we can fold it into this
- // instruction: rldicl(rldicl(x, 64-n, n), 0, mb) -> rldicl(x, 64-n, mb)
- // for n <= mb. The right shift is really a left rotate followed by a
- // mask, and this mask is a more-restrictive sub-mask of the mask implied
- // by the shift.
- if (Val.getOpcode() == ISD::SRL &&
- isInt32Immediate(Val.getOperand(1).getNode(), Imm) && Imm <= MB) {
- assert(Imm < 64 && "Illegal shift amount");
- Val = Val.getOperand(0);
- SH = 64 - Imm;
- }
-
- SDValue Ops[] = { Val, getI32Imm(SH, dl), getI32Imm(MB, dl) };
- CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops);
- return;
- }
- // If this is a negated 64-bit zero-extension mask,
- // i.e. the immediate is a sequence of ones from most significant side
- // and all zero for reminder, we should use rldicr.
- if (isInt64Immediate(N->getOperand(1).getNode(), Imm64) &&
- isMask_64(~Imm64)) {
- SDValue Val = N->getOperand(0);
- MB = 63 - countTrailingOnes(~Imm64);
- SH = 0;
- SDValue Ops[] = { Val, getI32Imm(SH, dl), getI32Imm(MB, dl) };
- CurDAG->SelectNodeTo(N, PPC::RLDICR, MVT::i64, Ops);
- return;
- }
-
- // AND X, 0 -> 0, not "rlwinm 32".
- if (isInt32Immediate(N->getOperand(1), Imm) && (Imm == 0)) {
- ReplaceUses(SDValue(N, 0), N->getOperand(1));
+ case ISD::AND:
+ // If this is an 'and' with a mask, try to emit rlwinm/rldicl/rldicr
+ if (tryAndWithMask(N))
return;
- }
- // ISD::OR doesn't get all the bitfield insertion fun.
- // (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) might be a
- // bitfield insert.
- if (isInt32Immediate(N->getOperand(1), Imm) &&
- N->getOperand(0).getOpcode() == ISD::OR &&
- isInt32Immediate(N->getOperand(0).getOperand(1), Imm2)) {
- // The idea here is to check whether this is equivalent to:
- // (c1 & m) | (x & ~m)
- // where m is a run-of-ones mask. The logic here is that, for each bit in
- // c1 and c2:
- // - if both are 1, then the output will be 1.
- // - if both are 0, then the output will be 0.
- // - if the bit in c1 is 0, and the bit in c2 is 1, then the output will
- // come from x.
- // - if the bit in c1 is 1, and the bit in c2 is 0, then the output will
- // be 0.
- // If that last condition is never the case, then we can form m from the
- // bits that are the same between c1 and c2.
- unsigned MB, ME;
- if (isRunOfOnes(~(Imm^Imm2), MB, ME) && !(~Imm & Imm2)) {
- SDValue Ops[] = { N->getOperand(0).getOperand(0),
- N->getOperand(0).getOperand(1),
- getI32Imm(0, dl), getI32Imm(MB, dl),
- getI32Imm(ME, dl) };
- ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops));
- return;
- }
- }
// Other cases are autogenerated.
break;
- }
case ISD::OR: {
if (N->getValueType(0) == MVT::i32)
if (tryBitfieldInsert(N))
@@ -4781,24 +4836,24 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
break;
}
// FIXME: Remove this once the ANDI glue bug is fixed:
- case PPCISD::ANDIo_1_EQ_BIT:
- case PPCISD::ANDIo_1_GT_BIT: {
+ case PPCISD::ANDI_rec_1_EQ_BIT:
+ case PPCISD::ANDI_rec_1_GT_BIT: {
if (!ANDIGlueBug)
break;
EVT InVT = N->getOperand(0).getValueType();
assert((InVT == MVT::i64 || InVT == MVT::i32) &&
- "Invalid input type for ANDIo_1_EQ_BIT");
+ "Invalid input type for ANDI_rec_1_EQ_BIT");
- unsigned Opcode = (InVT == MVT::i64) ? PPC::ANDIo8 : PPC::ANDIo;
+ unsigned Opcode = (InVT == MVT::i64) ? PPC::ANDI8_rec : PPC::ANDI_rec;
SDValue AndI(CurDAG->getMachineNode(Opcode, dl, InVT, MVT::Glue,
N->getOperand(0),
CurDAG->getTargetConstant(1, dl, InVT)),
0);
SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32);
- SDValue SRIdxVal =
- CurDAG->getTargetConstant(N->getOpcode() == PPCISD::ANDIo_1_EQ_BIT ?
- PPC::sub_eq : PPC::sub_gt, dl, MVT::i32);
+ SDValue SRIdxVal = CurDAG->getTargetConstant(
+ N->getOpcode() == PPCISD::ANDI_rec_1_EQ_BIT ? PPC::sub_eq : PPC::sub_gt,
+ dl, MVT::i32);
CurDAG->SelectNodeTo(N, TargetOpcode::EXTRACT_SUBREG, MVT::i1, CR0Reg,
SRIdxVal, SDValue(AndI.getNode(), 1) /* glue */);
@@ -4889,7 +4944,8 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
return;
}
- unsigned BROpc = getPredicateForSetCC(CC);
+ unsigned BROpc =
+ getPredicateForSetCC(CC, N->getOperand(0).getValueType(), PPCSubTarget);
unsigned SelectCCOp;
if (N->getValueType(0) == MVT::i32)
@@ -5002,7 +5058,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
// Prevent PPC::PRED_* from being selected into LI.
unsigned PCC = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
if (EnableBranchHint)
- PCC |= getBranchHint(PCC, FuncInfo, N->getOperand(3));
+ PCC |= getBranchHint(PCC, *FuncInfo, N->getOperand(3));
SDValue Pred = getI32Imm(PCC, dl);
SDValue Ops[] = { Pred, N->getOperand(2), N->getOperand(3),
@@ -5012,7 +5068,8 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
}
case ISD::BR_CC: {
ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
- unsigned PCC = getPredicateForSetCC(CC);
+ unsigned PCC =
+ getPredicateForSetCC(CC, N->getOperand(2).getValueType(), PPCSubTarget);
if (N->getOperand(2).getValueType() == MVT::i1) {
unsigned Opc;
@@ -5045,7 +5102,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
}
if (EnableBranchHint)
- PCC |= getBranchHint(PCC, FuncInfo, N->getOperand(4));
+ PCC |= getBranchHint(PCC, *FuncInfo, N->getOperand(4));
SDValue CondCode = SelectCC(N->getOperand(2), N->getOperand(3), CC, dl);
SDValue Ops[] = { getI32Imm(PCC, dl), CondCode,
@@ -6181,8 +6238,8 @@ static bool PeepholePPC64ZExtGather(SDValue Op32,
// For ANDI and ANDIS, the higher-order bits are zero if either that is true
// of the first operand, or if the second operand is positive (so that it is
// not sign extended).
- if (Op32.getMachineOpcode() == PPC::ANDIo ||
- Op32.getMachineOpcode() == PPC::ANDISo) {
+ if (Op32.getMachineOpcode() == PPC::ANDI_rec ||
+ Op32.getMachineOpcode() == PPC::ANDIS_rec) {
SmallPtrSet<SDNode *, 16> ToPromote1;
bool Op0OK =
PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1);
@@ -6304,8 +6361,12 @@ void PPCDAGToDAGISel::PeepholePPC64ZExt() {
case PPC::ORI: NewOpcode = PPC::ORI8; break;
case PPC::ORIS: NewOpcode = PPC::ORIS8; break;
case PPC::AND: NewOpcode = PPC::AND8; break;
- case PPC::ANDIo: NewOpcode = PPC::ANDIo8; break;
- case PPC::ANDISo: NewOpcode = PPC::ANDISo8; break;
+ case PPC::ANDI_rec:
+ NewOpcode = PPC::ANDI8_rec;
+ break;
+ case PPC::ANDIS_rec:
+ NewOpcode = PPC::ANDIS8_rec;
+ break;
}
// Note: During the replacement process, the nodes will be in an
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index d2a926bb7bae..60ed72e1018b 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -52,6 +52,7 @@
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/CallSite.h"
@@ -66,6 +67,7 @@
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsPowerPC.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
@@ -119,6 +121,9 @@ cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden);
static cl::opt<bool> EnableQuadPrecision("enable-ppc-quad-precision",
cl::desc("enable quad precision float support on ppc"), cl::Hidden);
+static cl::opt<bool> UseAbsoluteJumpTables("ppc-use-absolute-jumptables",
+cl::desc("use absolute jump tables on ppc"), cl::Hidden);
+
STATISTIC(NumTailCalls, "Number of tail calls");
STATISTIC(NumSiblingCalls, "Number of sibling calls");
@@ -132,10 +137,6 @@ extern cl::opt<bool> ANDIGlueBug;
PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
const PPCSubtarget &STI)
: TargetLowering(TM), Subtarget(STI) {
- // Use _setjmp/_longjmp instead of setjmp/longjmp.
- setUseUnderscoreSetJmp(true);
- setUseUnderscoreLongJmp(true);
-
// On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
// arguments are at least 4/8 bytes aligned.
bool isPPC64 = Subtarget.isPPC64();
@@ -389,6 +390,16 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::BITCAST, MVT::i32, Legal);
setOperationAction(ISD::BITCAST, MVT::i64, Legal);
setOperationAction(ISD::BITCAST, MVT::f64, Legal);
+ if (TM.Options.UnsafeFPMath) {
+ setOperationAction(ISD::LRINT, MVT::f64, Legal);
+ setOperationAction(ISD::LRINT, MVT::f32, Legal);
+ setOperationAction(ISD::LLRINT, MVT::f64, Legal);
+ setOperationAction(ISD::LLRINT, MVT::f32, Legal);
+ setOperationAction(ISD::LROUND, MVT::f64, Legal);
+ setOperationAction(ISD::LROUND, MVT::f32, Legal);
+ setOperationAction(ISD::LLROUND, MVT::f64, Legal);
+ setOperationAction(ISD::LLROUND, MVT::f32, Legal);
+ }
} else {
setOperationAction(ISD::BITCAST, MVT::f32, Expand);
setOperationAction(ISD::BITCAST, MVT::i32, Expand);
@@ -548,6 +559,13 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
}
+ if (Subtarget.hasVSX()) {
+ setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal);
+ setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal);
+ setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal);
+ setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal);
+ }
+
if (Subtarget.hasAltivec()) {
// First set operation action for all vector types to expand. Then we
// will selectively turn on ones that can be effectively codegen'd.
@@ -702,6 +720,14 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
if (!Subtarget.hasP8Altivec())
setOperationAction(ISD::ABS, MVT::v2i64, Expand);
+ // With hasAltivec set, we can lower ISD::ROTL to vrl(b|h|w).
+ if (Subtarget.hasAltivec())
+ for (auto VT : {MVT::v4i32, MVT::v8i16, MVT::v16i8})
+ setOperationAction(ISD::ROTL, VT, Legal);
+ // With hasP8Altivec set, we can lower ISD::ROTL to vrld.
+ if (Subtarget.hasP8Altivec())
+ setOperationAction(ISD::ROTL, MVT::v2i64, Legal);
+
addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
@@ -756,13 +782,23 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
}
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
+ // The nearbyint variants are not allowed to raise the inexact exception
+ // so we can only code-gen them with unsafe math.
+ if (TM.Options.UnsafeFPMath) {
+ setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
+ setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
+ }
+
setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
+ setOperationAction(ISD::FROUND, MVT::f64, Legal);
+ setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
+ setOperationAction(ISD::FROUND, MVT::f32, Legal);
setOperationAction(ISD::MUL, MVT::v2f64, Legal);
setOperationAction(ISD::FMA, MVT::v2f64, Legal);
@@ -910,12 +946,23 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::FREM, MVT::f128, Expand);
}
setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
-
+ setOperationAction(ISD::BSWAP, MVT::v8i16, Legal);
+ setOperationAction(ISD::BSWAP, MVT::v4i32, Legal);
+ setOperationAction(ISD::BSWAP, MVT::v2i64, Legal);
+ setOperationAction(ISD::BSWAP, MVT::v1i128, Legal);
}
if (Subtarget.hasP9Altivec()) {
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
+
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Legal);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Legal);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Legal);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Legal);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Legal);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal);
}
}
@@ -1183,7 +1230,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
if (Subtarget.isDarwin())
setPrefFunctionAlignment(Align(16));
- switch (Subtarget.getDarwinDirective()) {
+ switch (Subtarget.getCPUDirective()) {
default: break;
case PPC::DIR_970:
case PPC::DIR_A2:
@@ -1198,6 +1245,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
case PPC::DIR_PWR7:
case PPC::DIR_PWR8:
case PPC::DIR_PWR9:
+ case PPC::DIR_PWR_FUTURE:
setPrefLoopAlignment(Align(16));
setPrefFunctionAlignment(Align(16));
break;
@@ -1212,15 +1260,15 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
// The Freescale cores do better with aggressive inlining of memcpy and
// friends. GCC uses same threshold of 128 bytes (= 32 word stores).
- if (Subtarget.getDarwinDirective() == PPC::DIR_E500mc ||
- Subtarget.getDarwinDirective() == PPC::DIR_E5500) {
+ if (Subtarget.getCPUDirective() == PPC::DIR_E500mc ||
+ Subtarget.getCPUDirective() == PPC::DIR_E5500) {
MaxStoresPerMemset = 32;
MaxStoresPerMemsetOptSize = 16;
MaxStoresPerMemcpy = 32;
MaxStoresPerMemcpyOptSize = 8;
MaxStoresPerMemmove = 32;
MaxStoresPerMemmoveOptSize = 8;
- } else if (Subtarget.getDarwinDirective() == PPC::DIR_A2) {
+ } else if (Subtarget.getCPUDirective() == PPC::DIR_A2) {
// The A2 also benefits from (very) aggressive inlining of memcpy and
// friends. The overhead of a the function call, even when warm, can be
// over one hundred cycles.
@@ -1294,6 +1342,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
switch ((PPCISD::NodeType)Opcode) {
case PPCISD::FIRST_NUMBER: break;
case PPCISD::FSEL: return "PPCISD::FSEL";
+ case PPCISD::XSMAXCDP: return "PPCISD::XSMAXCDP";
+ case PPCISD::XSMINCDP: return "PPCISD::XSMINCDP";
case PPCISD::FCFID: return "PPCISD::FCFID";
case PPCISD::FCFIDU: return "PPCISD::FCFIDU";
case PPCISD::FCFIDS: return "PPCISD::FCFIDS";
@@ -1314,7 +1364,6 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::VPERM: return "PPCISD::VPERM";
case PPCISD::XXSPLT: return "PPCISD::XXSPLT";
case PPCISD::VECINSERT: return "PPCISD::VECINSERT";
- case PPCISD::XXREVERSE: return "PPCISD::XXREVERSE";
case PPCISD::XXPERMDI: return "PPCISD::XXPERMDI";
case PPCISD::VECSHL: return "PPCISD::VECSHL";
case PPCISD::CMPB: return "PPCISD::CMPB";
@@ -1345,8 +1394,10 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::MTVSRZ: return "PPCISD::MTVSRZ";
case PPCISD::SINT_VEC_TO_FP: return "PPCISD::SINT_VEC_TO_FP";
case PPCISD::UINT_VEC_TO_FP: return "PPCISD::UINT_VEC_TO_FP";
- case PPCISD::ANDIo_1_EQ_BIT: return "PPCISD::ANDIo_1_EQ_BIT";
- case PPCISD::ANDIo_1_GT_BIT: return "PPCISD::ANDIo_1_GT_BIT";
+ case PPCISD::ANDI_rec_1_EQ_BIT:
+ return "PPCISD::ANDI_rec_1_EQ_BIT";
+ case PPCISD::ANDI_rec_1_GT_BIT:
+ return "PPCISD::ANDI_rec_1_GT_BIT";
case PPCISD::VCMP: return "PPCISD::VCMP";
case PPCISD::VCMPo: return "PPCISD::VCMPo";
case PPCISD::LBRX: return "PPCISD::LBRX";
@@ -2699,9 +2750,9 @@ SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
const Constant *C = CP->getConstVal();
- // 64-bit SVR4 ABI code is always position-independent.
+ // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
// The actual address of the GlobalValue is stored in the TOC.
- if (Subtarget.is64BitELFABI()) {
+ if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
setUsesTOCBasePtr(DAG);
SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0);
return getTOCEntry(DAG, SDLoc(CP), GA);
@@ -2735,14 +2786,16 @@ unsigned PPCTargetLowering::getJumpTableEncoding() const {
}
bool PPCTargetLowering::isJumpTableRelative() const {
- if (Subtarget.isPPC64())
+ if (UseAbsoluteJumpTables)
+ return false;
+ if (Subtarget.isPPC64() || Subtarget.isAIXABI())
return true;
return TargetLowering::isJumpTableRelative();
}
SDValue PPCTargetLowering::getPICJumpTableRelocBase(SDValue Table,
SelectionDAG &DAG) const {
- if (!Subtarget.isPPC64())
+ if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
return TargetLowering::getPICJumpTableRelocBase(Table, DAG);
switch (getTargetMachine().getCodeModel()) {
@@ -2759,7 +2812,7 @@ const MCExpr *
PPCTargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
unsigned JTI,
MCContext &Ctx) const {
- if (!Subtarget.isPPC64())
+ if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
switch (getTargetMachine().getCodeModel()) {
@@ -2775,9 +2828,9 @@ SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
EVT PtrVT = Op.getValueType();
JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
- // 64-bit SVR4 ABI code is always position-independent.
+ // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
// The actual address of the GlobalValue is stored in the TOC.
- if (Subtarget.is64BitELFABI()) {
+ if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
setUsesTOCBasePtr(DAG);
SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
return getTOCEntry(DAG, SDLoc(JT), GA);
@@ -2804,9 +2857,9 @@ SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op);
const BlockAddress *BA = BASDN->getBlockAddress();
- // 64-bit SVR4 ABI code is always position-independent.
+ // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
// The actual BlockAddress is stored in the TOC.
- if (Subtarget.is64BitELFABI()) {
+ if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
setUsesTOCBasePtr(DAG);
SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
return getTOCEntry(DAG, SDLoc(BASDN), GA);
@@ -3129,11 +3182,17 @@ SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
SelectionDAG &DAG) const {
+ if (Subtarget.isAIXABI())
+ report_fatal_error("ADJUST_TRAMPOLINE operation is not supported on AIX.");
+
return Op.getOperand(0);
}
SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
SelectionDAG &DAG) const {
+ if (Subtarget.isAIXABI())
+ report_fatal_error("INIT_TRAMPOLINE operation is not supported on AIX.");
+
SDValue Chain = Op.getOperand(0);
SDValue Trmp = Op.getOperand(1); // trampoline
SDValue FPtr = Op.getOperand(2); // nested function
@@ -3394,15 +3453,16 @@ SDValue PPCTargetLowering::LowerFormalArguments(
SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
+ if (Subtarget.isAIXABI())
+ return LowerFormalArguments_AIX(Chain, CallConv, isVarArg, Ins, dl, DAG,
+ InVals);
if (Subtarget.is64BitELFABI())
return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
InVals);
- else if (Subtarget.is32BitELFABI())
+ if (Subtarget.is32BitELFABI())
return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
InVals);
- // FIXME: We are using this for both AIX and Darwin. We should add appropriate
- // AIX testing, and rename it appropriately.
return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins, dl, DAG,
InVals);
}
@@ -4934,218 +4994,6 @@ static bool isFunctionGlobalAddress(SDValue Callee) {
return false;
}
-static unsigned
-PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain,
- SDValue CallSeqStart, const SDLoc &dl, int SPDiff, bool isTailCall,
- bool isPatchPoint, bool hasNest,
- SmallVectorImpl<std::pair<unsigned, SDValue>> &RegsToPass,
- SmallVectorImpl<SDValue> &Ops, std::vector<EVT> &NodeTys,
- ImmutableCallSite CS, const PPCSubtarget &Subtarget) {
- bool isPPC64 = Subtarget.isPPC64();
- bool isSVR4ABI = Subtarget.isSVR4ABI();
- bool is64BitELFv1ABI = isPPC64 && isSVR4ABI && !Subtarget.isELFv2ABI();
- bool isAIXABI = Subtarget.isAIXABI();
-
- EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
- NodeTys.push_back(MVT::Other); // Returns a chain
- NodeTys.push_back(MVT::Glue); // Returns a flag for retval copy to use.
-
- unsigned CallOpc = PPCISD::CALL;
-
- bool needIndirectCall = true;
- if (!isSVR4ABI || !isPPC64)
- if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) {
- // If this is an absolute destination address, use the munged value.
- Callee = SDValue(Dest, 0);
- needIndirectCall = false;
- }
-
- // PC-relative references to external symbols should go through $stub, unless
- // we're building with the leopard linker or later, which automatically
- // synthesizes these stubs.
- const TargetMachine &TM = DAG.getTarget();
- const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
- const GlobalValue *GV = nullptr;
- if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee))
- GV = G->getGlobal();
- bool Local = TM.shouldAssumeDSOLocal(*Mod, GV);
- // The PLT is only used in 32-bit ELF PIC mode. Attempting to use the PLT in
- // a static relocation model causes some versions of GNU LD (2.17.50, at
- // least) to force BSS-PLT, instead of secure-PLT, even if all objects are
- // built with secure-PLT.
- bool UsePlt = !Local && Subtarget.isTargetELF() && !isPPC64 &&
- Subtarget.getTargetMachine().getRelocationModel() == Reloc::PIC_;
-
- // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
- // every direct call is) turn it into a TargetGlobalAddress /
- // TargetExternalSymbol node so that legalize doesn't hack it.
- if (isFunctionGlobalAddress(Callee)) {
- GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Callee);
-
- // A call to a TLS address is actually an indirect call to a
- // thread-specific pointer.
- unsigned OpFlags = 0;
- if (UsePlt)
- OpFlags = PPCII::MO_PLT;
-
- Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
- Callee.getValueType(), 0, OpFlags);
- needIndirectCall = false;
- }
-
- if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
- unsigned char OpFlags = 0;
-
- if (UsePlt)
- OpFlags = PPCII::MO_PLT;
-
- Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType(),
- OpFlags);
- needIndirectCall = false;
- }
-
- if (isPatchPoint) {
- // We'll form an invalid direct call when lowering a patchpoint; the full
- // sequence for an indirect call is complicated, and many of the
- // instructions introduced might have side effects (and, thus, can't be
- // removed later). The call itself will be removed as soon as the
- // argument/return lowering is complete, so the fact that it has the wrong
- // kind of operands should not really matter.
- needIndirectCall = false;
- }
-
- if (needIndirectCall) {
- // Otherwise, this is an indirect call. We have to use a MTCTR/BCTRL pair
- // to do the call, we can't use PPCISD::CALL.
- SDValue MTCTROps[] = {Chain, Callee, InFlag};
-
- if (is64BitELFv1ABI) {
- // Function pointers in the 64-bit SVR4 ABI do not point to the function
- // entry point, but to the function descriptor (the function entry point
- // address is part of the function descriptor though).
- // The function descriptor is a three doubleword structure with the
- // following fields: function entry point, TOC base address and
- // environment pointer.
- // Thus for a call through a function pointer, the following actions need
- // to be performed:
- // 1. Save the TOC of the caller in the TOC save area of its stack
- // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
- // 2. Load the address of the function entry point from the function
- // descriptor.
- // 3. Load the TOC of the callee from the function descriptor into r2.
- // 4. Load the environment pointer from the function descriptor into
- // r11.
- // 5. Branch to the function entry point address.
- // 6. On return of the callee, the TOC of the caller needs to be
- // restored (this is done in FinishCall()).
- //
- // The loads are scheduled at the beginning of the call sequence, and the
- // register copies are flagged together to ensure that no other
- // operations can be scheduled in between. E.g. without flagging the
- // copies together, a TOC access in the caller could be scheduled between
- // the assignment of the callee TOC and the branch to the callee, which
- // results in the TOC access going through the TOC of the callee instead
- // of going through the TOC of the caller, which leads to incorrect code.
-
- // Load the address of the function entry point from the function
- // descriptor.
- SDValue LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-1);
- if (LDChain.getValueType() == MVT::Glue)
- LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-2);
-
- auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
- ? (MachineMemOperand::MODereferenceable |
- MachineMemOperand::MOInvariant)
- : MachineMemOperand::MONone;
-
- MachinePointerInfo MPI(CS ? CS.getCalledValue() : nullptr);
- SDValue LoadFuncPtr = DAG.getLoad(MVT::i64, dl, LDChain, Callee, MPI,
- /* Alignment = */ 8, MMOFlags);
-
- // Load environment pointer into r11.
- SDValue PtrOff = DAG.getIntPtrConstant(16, dl);
- SDValue AddPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, PtrOff);
- SDValue LoadEnvPtr =
- DAG.getLoad(MVT::i64, dl, LDChain, AddPtr, MPI.getWithOffset(16),
- /* Alignment = */ 8, MMOFlags);
-
- SDValue TOCOff = DAG.getIntPtrConstant(8, dl);
- SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, TOCOff);
- SDValue TOCPtr =
- DAG.getLoad(MVT::i64, dl, LDChain, AddTOC, MPI.getWithOffset(8),
- /* Alignment = */ 8, MMOFlags);
-
- setUsesTOCBasePtr(DAG);
- SDValue TOCVal = DAG.getCopyToReg(Chain, dl, PPC::X2, TOCPtr,
- InFlag);
- Chain = TOCVal.getValue(0);
- InFlag = TOCVal.getValue(1);
-
- // If the function call has an explicit 'nest' parameter, it takes the
- // place of the environment pointer.
- if (!hasNest) {
- SDValue EnvVal = DAG.getCopyToReg(Chain, dl, PPC::X11, LoadEnvPtr,
- InFlag);
-
- Chain = EnvVal.getValue(0);
- InFlag = EnvVal.getValue(1);
- }
-
- MTCTROps[0] = Chain;
- MTCTROps[1] = LoadFuncPtr;
- MTCTROps[2] = InFlag;
- }
-
- Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys,
- makeArrayRef(MTCTROps, InFlag.getNode() ? 3 : 2));
- InFlag = Chain.getValue(1);
-
- NodeTys.clear();
- NodeTys.push_back(MVT::Other);
- NodeTys.push_back(MVT::Glue);
- Ops.push_back(Chain);
- CallOpc = PPCISD::BCTRL;
- Callee.setNode(nullptr);
- // Add use of X11 (holding environment pointer)
- if (is64BitELFv1ABI && !hasNest)
- Ops.push_back(DAG.getRegister(PPC::X11, PtrVT));
- // Add CTR register as callee so a bctr can be emitted later.
- if (isTailCall)
- Ops.push_back(DAG.getRegister(isPPC64 ? PPC::CTR8 : PPC::CTR, PtrVT));
- }
-
- // If this is a direct call, pass the chain and the callee.
- if (Callee.getNode()) {
- Ops.push_back(Chain);
- Ops.push_back(Callee);
- }
- // If this is a tail call add stack pointer delta.
- if (isTailCall)
- Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));
-
- // Add argument registers to the end of the list so that they are known live
- // into the call.
- for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
- Ops.push_back(DAG.getRegister(RegsToPass[i].first,
- RegsToPass[i].second.getValueType()));
-
- // All calls, in the AIX ABI and 64-bit ELF ABIs, need the TOC register
- // live into the call.
- // We do need to reserve R2/X2 to appease the verifier for the PATCHPOINT.
- if ((isSVR4ABI && isPPC64) || isAIXABI) {
- setUsesTOCBasePtr(DAG);
-
- // We cannot add R2/X2 as an operand here for PATCHPOINT, because there is
- // no way to mark dependencies as implicit here.
- // We will add the R2/X2 dependency in EmitInstrWithCustomInserter.
- if (!isPatchPoint)
- Ops.push_back(DAG.getRegister(isPPC64 ? PPC::X2
- : PPC::R2, PtrVT));
- }
-
- return CallOpc;
-}
-
SDValue PPCTargetLowering::LowerCallResult(
SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
@@ -5210,30 +5058,357 @@ SDValue PPCTargetLowering::LowerCallResult(
return Chain;
}
-SDValue PPCTargetLowering::FinishCall(
- CallingConv::ID CallConv, const SDLoc &dl, bool isTailCall, bool isVarArg,
- bool isPatchPoint, bool hasNest, SelectionDAG &DAG,
- SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue InFlag,
- SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,
- unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
- SmallVectorImpl<SDValue> &InVals, ImmutableCallSite CS) const {
- std::vector<EVT> NodeTys;
- SmallVector<SDValue, 8> Ops;
- unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, CallSeqStart, dl,
- SPDiff, isTailCall, isPatchPoint, hasNest,
- RegsToPass, Ops, NodeTys, CS, Subtarget);
+static bool isIndirectCall(const SDValue &Callee, SelectionDAG &DAG,
+ const PPCSubtarget &Subtarget, bool isPatchPoint) {
+ // PatchPoint calls are not indirect.
+ if (isPatchPoint)
+ return false;
+
+ if (isFunctionGlobalAddress(Callee) || dyn_cast<ExternalSymbolSDNode>(Callee))
+ return false;
+
+ // Darwin, and 32-bit ELF can use a BLA. The descriptor based ABIs can not
+ // becuase the immediate function pointer points to a descriptor instead of
+ // a function entry point. The ELFv2 ABI cannot use a BLA because the function
+ // pointer immediate points to the global entry point, while the BLA would
+ // need to jump to the local entry point (see rL211174).
+ if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI() &&
+ isBLACompatibleAddress(Callee, DAG))
+ return false;
+
+ return true;
+}
+
+static unsigned getCallOpcode(bool isIndirectCall, bool isPatchPoint,
+ bool isTailCall, const Function &Caller,
+ const SDValue &Callee,
+ const PPCSubtarget &Subtarget,
+ const TargetMachine &TM) {
+ if (isTailCall)
+ return PPCISD::TC_RETURN;
+
+ // This is a call through a function pointer.
+ if (isIndirectCall) {
+ // AIX and the 64-bit ELF ABIs need to maintain the TOC pointer accross
+ // indirect calls. The save of the caller's TOC pointer to the stack will be
+ // inserted into the DAG as part of call lowering. The restore of the TOC
+ // pointer is modeled by using a pseudo instruction for the call opcode that
+ // represents the 2 instruction sequence of an indirect branch and link,
+ // immediately followed by a load of the TOC pointer from the the stack save
+ // slot into gpr2.
+ if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI())
+ return PPCISD::BCTRL_LOAD_TOC;
+
+ // An indirect call that does not need a TOC restore.
+ return PPCISD::BCTRL;
+ }
+
+ // The ABIs that maintain a TOC pointer accross calls need to have a nop
+ // immediately following the call instruction if the caller and callee may
+ // have different TOC bases. At link time if the linker determines the calls
+ // may not share a TOC base, the call is redirected to a trampoline inserted
+ // by the linker. The trampoline will (among other things) save the callers
+ // TOC pointer at an ABI designated offset in the linkage area and the linker
+ // will rewrite the nop to be a load of the TOC pointer from the linkage area
+ // into gpr2.
+ if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI())
+ return callsShareTOCBase(&Caller, Callee, TM) ? PPCISD::CALL
+ : PPCISD::CALL_NOP;
+
+ return PPCISD::CALL;
+}
+
+static bool isValidAIXExternalSymSDNode(StringRef SymName) {
+ return StringSwitch<bool>(SymName)
+ .Cases("__divdi3", "__fixunsdfdi", "__floatundidf", "__floatundisf",
+ "__moddi3", "__udivdi3", "__umoddi3", true)
+ .Cases("ceil", "floor", "memcpy", "memmove", "memset", "round", true)
+ .Default(false);
+}
+
+static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG,
+ const SDLoc &dl, const PPCSubtarget &Subtarget) {
+ if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI())
+ if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG))
+ return SDValue(Dest, 0);
+
+ // Returns true if the callee is local, and false otherwise.
+ auto isLocalCallee = [&]() {
+ const GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
+ const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
+ const GlobalValue *GV = G ? G->getGlobal() : nullptr;
+
+ return DAG.getTarget().shouldAssumeDSOLocal(*Mod, GV) &&
+ !dyn_cast_or_null<GlobalIFunc>(GV);
+ };
+
+ // The PLT is only used in 32-bit ELF PIC mode. Attempting to use the PLT in
+ // a static relocation model causes some versions of GNU LD (2.17.50, at
+ // least) to force BSS-PLT, instead of secure-PLT, even if all objects are
+ // built with secure-PLT.
+ bool UsePlt =
+ Subtarget.is32BitELFABI() && !isLocalCallee() &&
+ Subtarget.getTargetMachine().getRelocationModel() == Reloc::PIC_;
+
+ // On AIX, direct function calls reference the symbol for the function's
+ // entry point, which is named by prepending a "." before the function's
+ // C-linkage name.
+ const auto getAIXFuncEntryPointSymbolSDNode =
+ [&](StringRef FuncName, bool IsDeclaration,
+ const XCOFF::StorageClass &SC) {
+ auto &Context = DAG.getMachineFunction().getMMI().getContext();
+
+ MCSymbolXCOFF *S = cast<MCSymbolXCOFF>(
+ Context.getOrCreateSymbol(Twine(".") + Twine(FuncName)));
+
+ if (IsDeclaration && !S->hasContainingCsect()) {
+ // On AIX, an undefined symbol needs to be associated with a
+ // MCSectionXCOFF to get the correct storage mapping class.
+ // In this case, XCOFF::XMC_PR.
+ MCSectionXCOFF *Sec = Context.getXCOFFSection(
+ S->getName(), XCOFF::XMC_PR, XCOFF::XTY_ER, SC,
+ SectionKind::getMetadata());
+ S->setContainingCsect(Sec);
+ }
+
+ MVT PtrVT =
+ DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
+ return DAG.getMCSymbol(S, PtrVT);
+ };
+
+ if (isFunctionGlobalAddress(Callee)) {
+ const GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Callee);
+ const GlobalValue *GV = G->getGlobal();
+
+ if (!Subtarget.isAIXABI())
+ return DAG.getTargetGlobalAddress(GV, dl, Callee.getValueType(), 0,
+ UsePlt ? PPCII::MO_PLT : 0);
+
+ assert(!isa<GlobalIFunc>(GV) && "IFunc is not supported on AIX.");
+ const GlobalObject *GO = cast<GlobalObject>(GV);
+ const XCOFF::StorageClass SC =
+ TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(GO);
+ return getAIXFuncEntryPointSymbolSDNode(GO->getName(), GO->isDeclaration(),
+ SC);
+ }
+
+ if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
+ const char *SymName = S->getSymbol();
+ if (!Subtarget.isAIXABI())
+ return DAG.getTargetExternalSymbol(SymName, Callee.getValueType(),
+ UsePlt ? PPCII::MO_PLT : 0);
+
+ // If there exists a user-declared function whose name is the same as the
+ // ExternalSymbol's, then we pick up the user-declared version.
+ const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
+ if (const Function *F =
+ dyn_cast_or_null<Function>(Mod->getNamedValue(SymName))) {
+ const XCOFF::StorageClass SC =
+ TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(F);
+ return getAIXFuncEntryPointSymbolSDNode(F->getName(), F->isDeclaration(),
+ SC);
+ }
+
+ // TODO: Remove this when the support for ExternalSymbolSDNode is complete.
+ if (isValidAIXExternalSymSDNode(SymName)) {
+ return getAIXFuncEntryPointSymbolSDNode(SymName, true, XCOFF::C_EXT);
+ }
+
+ report_fatal_error("Unexpected ExternalSymbolSDNode: " + Twine(SymName));
+ }
+
+ // No transformation needed.
+ assert(Callee.getNode() && "What no callee?");
+ return Callee;
+}
+
+static SDValue getOutputChainFromCallSeq(SDValue CallSeqStart) {
+ assert(CallSeqStart.getOpcode() == ISD::CALLSEQ_START &&
+ "Expected a CALLSEQ_STARTSDNode.");
+
+ // The last operand is the chain, except when the node has glue. If the node
+ // has glue, then the last operand is the glue, and the chain is the second
+ // last operand.
+ SDValue LastValue = CallSeqStart.getValue(CallSeqStart->getNumValues() - 1);
+ if (LastValue.getValueType() != MVT::Glue)
+ return LastValue;
+
+ return CallSeqStart.getValue(CallSeqStart->getNumValues() - 2);
+}
+
+// Creates the node that moves a functions address into the count register
+// to prepare for an indirect call instruction.
+static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee,
+ SDValue &Glue, SDValue &Chain,
+ const SDLoc &dl) {
+ SDValue MTCTROps[] = {Chain, Callee, Glue};
+ EVT ReturnTypes[] = {MVT::Other, MVT::Glue};
+ Chain = DAG.getNode(PPCISD::MTCTR, dl, makeArrayRef(ReturnTypes, 2),
+ makeArrayRef(MTCTROps, Glue.getNode() ? 3 : 2));
+ // The glue is the second value produced.
+ Glue = Chain.getValue(1);
+}
+
+static void prepareDescriptorIndirectCall(SelectionDAG &DAG, SDValue &Callee,
+ SDValue &Glue, SDValue &Chain,
+ SDValue CallSeqStart,
+ ImmutableCallSite CS, const SDLoc &dl,
+ bool hasNest,
+ const PPCSubtarget &Subtarget) {
+ // Function pointers in the 64-bit SVR4 ABI do not point to the function
+ // entry point, but to the function descriptor (the function entry point
+ // address is part of the function descriptor though).
+ // The function descriptor is a three doubleword structure with the
+ // following fields: function entry point, TOC base address and
+ // environment pointer.
+ // Thus for a call through a function pointer, the following actions need
+ // to be performed:
+ // 1. Save the TOC of the caller in the TOC save area of its stack
+ // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
+ // 2. Load the address of the function entry point from the function
+ // descriptor.
+ // 3. Load the TOC of the callee from the function descriptor into r2.
+ // 4. Load the environment pointer from the function descriptor into
+ // r11.
+ // 5. Branch to the function entry point address.
+ // 6. On return of the callee, the TOC of the caller needs to be
+ // restored (this is done in FinishCall()).
+ //
+ // The loads are scheduled at the beginning of the call sequence, and the
+ // register copies are flagged together to ensure that no other
+ // operations can be scheduled in between. E.g. without flagging the
+ // copies together, a TOC access in the caller could be scheduled between
+ // the assignment of the callee TOC and the branch to the callee, which leads
+ // to incorrect code.
+
+ // Start by loading the function address from the descriptor.
+ SDValue LDChain = getOutputChainFromCallSeq(CallSeqStart);
+ auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
+ ? (MachineMemOperand::MODereferenceable |
+ MachineMemOperand::MOInvariant)
+ : MachineMemOperand::MONone;
+
+ MachinePointerInfo MPI(CS ? CS.getCalledValue() : nullptr);
+
+ // Registers used in building the DAG.
+ const MCRegister EnvPtrReg = Subtarget.getEnvironmentPointerRegister();
+ const MCRegister TOCReg = Subtarget.getTOCPointerRegister();
+
+ // Offsets of descriptor members.
+ const unsigned TOCAnchorOffset = Subtarget.descriptorTOCAnchorOffset();
+ const unsigned EnvPtrOffset = Subtarget.descriptorEnvironmentPointerOffset();
+
+ const MVT RegVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
+ const unsigned Alignment = Subtarget.isPPC64() ? 8 : 4;
+
+ // One load for the functions entry point address.
+ SDValue LoadFuncPtr = DAG.getLoad(RegVT, dl, LDChain, Callee, MPI,
+ Alignment, MMOFlags);
+
+ // One for loading the TOC anchor for the module that contains the called
+ // function.
+ SDValue TOCOff = DAG.getIntPtrConstant(TOCAnchorOffset, dl);
+ SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, Callee, TOCOff);
+ SDValue TOCPtr =
+ DAG.getLoad(RegVT, dl, LDChain, AddTOC,
+ MPI.getWithOffset(TOCAnchorOffset), Alignment, MMOFlags);
+
+ // One for loading the environment pointer.
+ SDValue PtrOff = DAG.getIntPtrConstant(EnvPtrOffset, dl);
+ SDValue AddPtr = DAG.getNode(ISD::ADD, dl, RegVT, Callee, PtrOff);
+ SDValue LoadEnvPtr =
+ DAG.getLoad(RegVT, dl, LDChain, AddPtr,
+ MPI.getWithOffset(EnvPtrOffset), Alignment, MMOFlags);
+
+
+ // Then copy the newly loaded TOC anchor to the TOC pointer.
+ SDValue TOCVal = DAG.getCopyToReg(Chain, dl, TOCReg, TOCPtr, Glue);
+ Chain = TOCVal.getValue(0);
+ Glue = TOCVal.getValue(1);
+
+ // If the function call has an explicit 'nest' parameter, it takes the
+ // place of the environment pointer.
+ assert((!hasNest || !Subtarget.isAIXABI()) &&
+ "Nest parameter is not supported on AIX.");
+ if (!hasNest) {
+ SDValue EnvVal = DAG.getCopyToReg(Chain, dl, EnvPtrReg, LoadEnvPtr, Glue);
+ Chain = EnvVal.getValue(0);
+ Glue = EnvVal.getValue(1);
+ }
+
+ // The rest of the indirect call sequence is the same as the non-descriptor
+ // DAG.
+ prepareIndirectCall(DAG, LoadFuncPtr, Glue, Chain, dl);
+}
+
+static void
+buildCallOperands(SmallVectorImpl<SDValue> &Ops, CallingConv::ID CallConv,
+ const SDLoc &dl, bool isTailCall, bool isVarArg,
+ bool isPatchPoint, bool hasNest, SelectionDAG &DAG,
+ SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
+ SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff,
+ const PPCSubtarget &Subtarget, bool isIndirect) {
+ const bool IsPPC64 = Subtarget.isPPC64();
+ // MVT for a general purpose register.
+ const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
+
+ // First operand is always the chain.
+ Ops.push_back(Chain);
+
+ // If it's a direct call pass the callee as the second operand.
+ if (!isIndirect)
+ Ops.push_back(Callee);
+ else {
+ assert(!isPatchPoint && "Patch point call are not indirect.");
+
+ // For the TOC based ABIs, we have saved the TOC pointer to the linkage area
+ // on the stack (this would have been done in `LowerCall_64SVR4` or
+ // `LowerCall_AIX`). The call instruction is a pseudo instruction that
+ // represents both the indirect branch and a load that restores the TOC
+ // pointer from the linkage area. The operand for the TOC restore is an add
+ // of the TOC save offset to the stack pointer. This must be the second
+ // operand: after the chain input but before any other variadic arguments.
+ if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
+ const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
+
+ SDValue StackPtr = DAG.getRegister(StackPtrReg, RegVT);
+ unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
+ SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
+ SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, StackPtr, TOCOff);
+ Ops.push_back(AddTOC);
+ }
+
+ // Add the register used for the environment pointer.
+ if (Subtarget.usesFunctionDescriptors() && !hasNest)
+ Ops.push_back(DAG.getRegister(Subtarget.getEnvironmentPointerRegister(),
+ RegVT));
+
+
+ // Add CTR register as callee so a bctr can be emitted later.
+ if (isTailCall)
+ Ops.push_back(DAG.getRegister(IsPPC64 ? PPC::CTR8 : PPC::CTR, RegVT));
+ }
+
+ // If this is a tail call add stack pointer delta.
+ if (isTailCall)
+ Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));
+
+ // Add argument registers to the end of the list so that they are known live
+ // into the call.
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+ Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+ RegsToPass[i].second.getValueType()));
+
+ // We cannot add R2/X2 as an operand here for PATCHPOINT, because there is
+ // no way to mark dependencies as implicit here.
+ // We will add the R2/X2 dependency in EmitInstrWithCustomInserter.
+ if ((Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) && !isPatchPoint)
+ Ops.push_back(DAG.getRegister(Subtarget.getTOCPointerRegister(), RegVT));
// Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
- if (isVarArg && Subtarget.isSVR4ABI() && !Subtarget.isPPC64())
+ if (isVarArg && Subtarget.is32BitELFABI())
Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
- // When performing tail call optimization the callee pops its arguments off
- // the stack. Account for this here so these bytes can be pushed back on in
- // PPCFrameLowering::eliminateCallFramePseudoInstr.
- int BytesCalleePops =
- (CallConv == CallingConv::Fast &&
- getTargetMachine().Options.GuaranteedTailCallOpt) ? NumBytes : 0;
-
// Add a register mask operand representing the call-preserved registers.
const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
const uint32_t *Mask =
@@ -5241,8 +5416,40 @@ SDValue PPCTargetLowering::FinishCall(
assert(Mask && "Missing call preserved mask for calling convention");
Ops.push_back(DAG.getRegisterMask(Mask));
- if (InFlag.getNode())
- Ops.push_back(InFlag);
+ // If the glue is valid, it is the last operand.
+ if (Glue.getNode())
+ Ops.push_back(Glue);
+}
+
+SDValue PPCTargetLowering::FinishCall(
+ CallingConv::ID CallConv, const SDLoc &dl, bool isTailCall, bool isVarArg,
+ bool isPatchPoint, bool hasNest, SelectionDAG &DAG,
+ SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue Glue,
+ SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,
+ unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
+ SmallVectorImpl<SDValue> &InVals, ImmutableCallSite CS) const {
+
+ if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI())
+ setUsesTOCBasePtr(DAG);
+
+ const bool isIndirect = isIndirectCall(Callee, DAG, Subtarget, isPatchPoint);
+ unsigned CallOpc = getCallOpcode(isIndirect, isPatchPoint, isTailCall,
+ DAG.getMachineFunction().getFunction(),
+ Callee, Subtarget, DAG.getTarget());
+
+ if (!isIndirect)
+ Callee = transformCallee(Callee, DAG, dl, Subtarget);
+ else if (Subtarget.usesFunctionDescriptors())
+ prepareDescriptorIndirectCall(DAG, Callee, Glue, Chain, CallSeqStart, CS,
+ dl, hasNest, Subtarget);
+ else
+ prepareIndirectCall(DAG, Callee, Glue, Chain, dl);
+
+ // Build the operand list for the call instruction.
+ SmallVector<SDValue, 8> Ops;
+ buildCallOperands(Ops, CallConv, dl, isTailCall, isVarArg, isPatchPoint,
+ hasNest, DAG, RegsToPass, Glue, Chain, Callee, SPDiff,
+ Subtarget, isIndirect);
// Emit tail call.
if (isTailCall) {
@@ -5251,81 +5458,32 @@ SDValue PPCTargetLowering::FinishCall(
Callee.getOpcode() == ISD::TargetExternalSymbol ||
Callee.getOpcode() == ISD::TargetGlobalAddress ||
isa<ConstantSDNode>(Callee)) &&
- "Expecting an global address, external symbol, absolute value or register");
-
+ "Expecting a global address, external symbol, absolute value or "
+ "register");
+ assert(CallOpc == PPCISD::TC_RETURN &&
+ "Unexpected call opcode for a tail call.");
DAG.getMachineFunction().getFrameInfo().setHasTailCall();
- return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, Ops);
+ return DAG.getNode(CallOpc, dl, MVT::Other, Ops);
}
- // Add a NOP immediately after the branch instruction when using the 64-bit
- // SVR4 or the AIX ABI.
- // At link time, if caller and callee are in a different module and
- // thus have a different TOC, the call will be replaced with a call to a stub
- // function which saves the current TOC, loads the TOC of the callee and
- // branches to the callee. The NOP will be replaced with a load instruction
- // which restores the TOC of the caller from the TOC save slot of the current
- // stack frame. If caller and callee belong to the same module (and have the
- // same TOC), the NOP will remain unchanged, or become some other NOP.
-
- MachineFunction &MF = DAG.getMachineFunction();
- EVT PtrVT = getPointerTy(DAG.getDataLayout());
- if (!isTailCall && !isPatchPoint &&
- ((Subtarget.isSVR4ABI() && Subtarget.isPPC64()) ||
- Subtarget.isAIXABI())) {
- if (CallOpc == PPCISD::BCTRL) {
- if (Subtarget.isAIXABI())
- report_fatal_error("Indirect call on AIX is not implemented.");
-
- // This is a call through a function pointer.
- // Restore the caller TOC from the save area into R2.
- // See PrepareCall() for more information about calls through function
- // pointers in the 64-bit SVR4 ABI.
- // We are using a target-specific load with r2 hard coded, because the
- // result of a target-independent load would never go directly into r2,
- // since r2 is a reserved register (which prevents the register allocator
- // from allocating it), resulting in an additional register being
- // allocated and an unnecessary move instruction being generated.
- CallOpc = PPCISD::BCTRL_LOAD_TOC;
-
- SDValue StackPtr = DAG.getRegister(PPC::X1, PtrVT);
- unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
- SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
- SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr, TOCOff);
-
- // The address needs to go after the chain input but before the flag (or
- // any other variadic arguments).
- Ops.insert(std::next(Ops.begin()), AddTOC);
- } else if (CallOpc == PPCISD::CALL &&
- !callsShareTOCBase(&MF.getFunction(), Callee, DAG.getTarget())) {
- // Otherwise insert NOP for non-local calls.
- CallOpc = PPCISD::CALL_NOP;
- }
- }
-
- if (Subtarget.isAIXABI() && isFunctionGlobalAddress(Callee)) {
- // On AIX, direct function calls reference the symbol for the function's
- // entry point, which is named by inserting a "." before the function's
- // C-linkage name.
- GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Callee);
- auto &Context = DAG.getMachineFunction().getMMI().getContext();
- MCSymbol *S = Context.getOrCreateSymbol(Twine(".") +
- Twine(G->getGlobal()->getName()));
- Callee = DAG.getMCSymbol(S, PtrVT);
- // Replace the GlobalAddressSDNode Callee with the MCSymbolSDNode.
- Ops[1] = Callee;
- }
+ std::array<EVT, 2> ReturnTypes = {{MVT::Other, MVT::Glue}};
+ Chain = DAG.getNode(CallOpc, dl, ReturnTypes, Ops);
+ Glue = Chain.getValue(1);
- Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
- InFlag = Chain.getValue(1);
+ // When performing tail call optimization the callee pops its arguments off
+ // the stack. Account for this here so these bytes can be pushed back on in
+ // PPCFrameLowering::eliminateCallFramePseudoInstr.
+ int BytesCalleePops = (CallConv == CallingConv::Fast &&
+ getTargetMachine().Options.GuaranteedTailCallOpt)
+ ? NumBytes
+ : 0;
Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
DAG.getIntPtrConstant(BytesCalleePops, dl, true),
- InFlag, dl);
- if (!Ins.empty())
- InFlag = Chain.getValue(1);
+ Glue, dl);
+ Glue = Chain.getValue(1);
- return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
- Ins, dl, DAG, InVals);
+ return LowerCallResult(Chain, Glue, CallConv, isVarArg, Ins, dl, DAG, InVals);
}
SDValue
@@ -6278,8 +6436,8 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
// Check if this is an indirect call (MTCTR/BCTRL).
- // See PrepareCall() for more information about calls through function
- // pointers in the 64-bit SVR4 ABI.
+ // See prepareDescriptorIndirectCall and buildCallOperands for more
+ // information about calls through function pointers in the 64-bit SVR4 ABI.
if (!isTailCall && !isPatchPoint &&
!isFunctionGlobalAddress(Callee) &&
!isa<ExternalSymbolSDNode>(Callee)) {
@@ -6700,6 +6858,205 @@ SDValue PPCTargetLowering::LowerCall_Darwin(
NumBytes, Ins, InVals, CS);
}
+static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
+ CCState &State) {
+
+ if (ValVT == MVT::f128)
+ report_fatal_error("f128 is unimplemented on AIX.");
+
+ if (ArgFlags.isByVal())
+ report_fatal_error("Passing structure by value is unimplemented.");
+
+ if (ArgFlags.isNest())
+ report_fatal_error("Nest arguments are unimplemented.");
+
+ if (ValVT.isVector() || LocVT.isVector())
+ report_fatal_error("Vector arguments are unimplemented on AIX.");
+
+ const PPCSubtarget &Subtarget = static_cast<const PPCSubtarget &>(
+ State.getMachineFunction().getSubtarget());
+ const bool IsPPC64 = Subtarget.isPPC64();
+ const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
+
+ static const MCPhysReg GPR_32[] = {// 32-bit registers.
+ PPC::R3, PPC::R4, PPC::R5, PPC::R6,
+ PPC::R7, PPC::R8, PPC::R9, PPC::R10};
+ static const MCPhysReg GPR_64[] = {// 64-bit registers.
+ PPC::X3, PPC::X4, PPC::X5, PPC::X6,
+ PPC::X7, PPC::X8, PPC::X9, PPC::X10};
+
+ // Arguments always reserve parameter save area.
+ switch (ValVT.SimpleTy) {
+ default:
+ report_fatal_error("Unhandled value type for argument.");
+ case MVT::i64:
+ // i64 arguments should have been split to i32 for PPC32.
+ assert(IsPPC64 && "PPC32 should have split i64 values.");
+ LLVM_FALLTHROUGH;
+ case MVT::i1:
+ case MVT::i32:
+ State.AllocateStack(PtrByteSize, PtrByteSize);
+ if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32)) {
+ MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
+ // Promote integers if needed.
+ if (ValVT.getSizeInBits() < RegVT.getSizeInBits())
+ LocInfo = ArgFlags.isSExt() ? CCValAssign::LocInfo::SExt
+ : CCValAssign::LocInfo::ZExt;
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
+ }
+ else
+ report_fatal_error("Handling of placing parameters on the stack is "
+ "unimplemented!");
+ return false;
+
+ case MVT::f32:
+ case MVT::f64: {
+ // Parameter save area (PSA) is reserved even if the float passes in fpr.
+ const unsigned StoreSize = LocVT.getStoreSize();
+ // Floats are always 4-byte aligned in the PSA on AIX.
+ // This includes f64 in 64-bit mode for ABI compatibility.
+ State.AllocateStack(IsPPC64 ? 8 : StoreSize, 4);
+ if (unsigned Reg = State.AllocateReg(FPR))
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ else
+ report_fatal_error("Handling of placing parameters on the stack is "
+ "unimplemented!");
+
+ // AIX requires that GPRs are reserved for float arguments.
+ // Successfully reserved GPRs are only initialized for vararg calls.
+ MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
+ for (unsigned I = 0; I < StoreSize; I += PtrByteSize) {
+ if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32)) {
+ if (State.isVarArg()) {
+ // Custom handling is required for:
+ // f64 in PPC32 needs to be split into 2 GPRs.
+ // f32 in PPC64 needs to occupy only lower 32 bits of 64-bit GPR.
+ State.addLoc(
+ CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
+ }
+ } else if (State.isVarArg()) {
+ report_fatal_error("Handling of placing parameters on the stack is "
+ "unimplemented!");
+ }
+ }
+
+ return false;
+ }
+ }
+ return true;
+}
+
+static const TargetRegisterClass *getRegClassForSVT(MVT::SimpleValueType SVT,
+ bool IsPPC64) {
+ assert((IsPPC64 || SVT != MVT::i64) &&
+ "i64 should have been split for 32-bit codegen.");
+
+ switch (SVT) {
+ default:
+ report_fatal_error("Unexpected value type for formal argument");
+ case MVT::i1:
+ case MVT::i32:
+ case MVT::i64:
+ return IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
+ case MVT::f32:
+ return &PPC::F4RCRegClass;
+ case MVT::f64:
+ return &PPC::F8RCRegClass;
+ }
+}
+
+static SDValue truncateScalarIntegerArg(ISD::ArgFlagsTy Flags, EVT ValVT,
+ SelectionDAG &DAG, SDValue ArgValue,
+ MVT LocVT, const SDLoc &dl) {
+ assert(ValVT.isScalarInteger() && LocVT.isScalarInteger());
+ assert(ValVT.getSizeInBits() < LocVT.getSizeInBits());
+
+ if (Flags.isSExt())
+ ArgValue = DAG.getNode(ISD::AssertSext, dl, LocVT, ArgValue,
+ DAG.getValueType(ValVT));
+ else if (Flags.isZExt())
+ ArgValue = DAG.getNode(ISD::AssertZext, dl, LocVT, ArgValue,
+ DAG.getValueType(ValVT));
+
+ return DAG.getNode(ISD::TRUNCATE, dl, ValVT, ArgValue);
+}
+
+SDValue PPCTargetLowering::LowerFormalArguments_AIX(
+ SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
+ SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
+
+ assert((CallConv == CallingConv::C || CallConv == CallingConv::Cold ||
+ CallConv == CallingConv::Fast) &&
+ "Unexpected calling convention!");
+
+ if (isVarArg)
+ report_fatal_error("This call type is unimplemented on AIX.");
+
+ if (getTargetMachine().Options.GuaranteedTailCallOpt)
+ report_fatal_error("Tail call support is unimplemented on AIX.");
+
+ if (useSoftFloat())
+ report_fatal_error("Soft float support is unimplemented on AIX.");
+
+ const PPCSubtarget &Subtarget =
+ static_cast<const PPCSubtarget &>(DAG.getSubtarget());
+ if (Subtarget.hasQPX())
+ report_fatal_error("QPX support is not supported on AIX.");
+
+ const bool IsPPC64 = Subtarget.isPPC64();
+ const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
+
+ // Assign locations to all of the incoming arguments.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ MachineFunction &MF = DAG.getMachineFunction();
+ CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
+
+ // Reserve space for the linkage area on the stack.
+ const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
+ // On AIX a minimum of 8 words is saved to the parameter save area.
+ const unsigned MinParameterSaveArea = 8 * PtrByteSize;
+ CCInfo.AllocateStack(LinkageSize + MinParameterSaveArea, PtrByteSize);
+ CCInfo.AnalyzeFormalArguments(Ins, CC_AIX);
+
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ SDValue ArgValue;
+ ISD::ArgFlagsTy Flags = Ins[i].Flags;
+ if (VA.isRegLoc()) {
+ EVT ValVT = VA.getValVT();
+ MVT LocVT = VA.getLocVT();
+ MVT::SimpleValueType SVT = ValVT.getSimpleVT().SimpleTy;
+ unsigned VReg =
+ MF.addLiveIn(VA.getLocReg(), getRegClassForSVT(SVT, IsPPC64));
+ ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
+ if (ValVT.isScalarInteger() &&
+ (ValVT.getSizeInBits() < LocVT.getSizeInBits())) {
+ ArgValue =
+ truncateScalarIntegerArg(Flags, ValVT, DAG, ArgValue, LocVT, dl);
+ }
+ InVals.push_back(ArgValue);
+ } else {
+ report_fatal_error("Handling of formal arguments on the stack is "
+ "unimplemented!");
+ }
+ }
+
+ // Area that is at least reserved in the caller of this function.
+ unsigned MinReservedArea = CCInfo.getNextStackOffset();
+
+ // Set the size that is at least reserved in caller of this function. Tail
+ // call optimized function's reserved stack space needs to be aligned so
+ // that taking the difference between two stack areas will result in an
+ // aligned stack.
+ MinReservedArea =
+ EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
+ PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+ FuncInfo->setMinReservedArea(MinReservedArea);
+
+ return Chain;
+}
SDValue PPCTargetLowering::LowerCall_AIX(
SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg,
@@ -6710,22 +7067,33 @@ SDValue PPCTargetLowering::LowerCall_AIX(
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
ImmutableCallSite CS) const {
- assert((CallConv == CallingConv::C || CallConv == CallingConv::Fast) &&
- "Unimplemented calling convention!");
- if (isVarArg || isPatchPoint)
+ assert((CallConv == CallingConv::C ||
+ CallConv == CallingConv::Cold ||
+ CallConv == CallingConv::Fast) && "Unexpected calling convention!");
+
+ if (isPatchPoint)
report_fatal_error("This call type is unimplemented on AIX.");
- EVT PtrVT = getPointerTy(DAG.getDataLayout());
- bool isPPC64 = PtrVT == MVT::i64;
- unsigned PtrByteSize = isPPC64 ? 8 : 4;
- unsigned NumOps = Outs.size();
+ const PPCSubtarget& Subtarget =
+ static_cast<const PPCSubtarget&>(DAG.getSubtarget());
+ if (Subtarget.hasQPX())
+ report_fatal_error("QPX is not supported on AIX.");
+ if (Subtarget.hasAltivec())
+ report_fatal_error("Altivec support is unimplemented on AIX.");
+ MachineFunction &MF = DAG.getMachineFunction();
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
- // Count how many bytes are to be pushed on the stack, including the linkage
- // area, parameter list area.
- // On XCOFF, we start with 24/48, which is reserved space for
- // [SP][CR][LR][2 x reserved][TOC].
- unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
+ // Reserve space for the linkage save area (LSA) on the stack.
+ // In both PPC32 and PPC64 there are 6 reserved slots in the LSA:
+ // [SP][CR][LR][2 x reserved][TOC].
+ // The LSA is 24 bytes (6x4) in PPC32 and 48 bytes (6x8) in PPC64.
+ const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
+ const bool IsPPC64 = Subtarget.isPPC64();
+ const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
+ CCInfo.AllocateStack(LinkageSize, PtrByteSize);
+ CCInfo.AnalyzeCallOperands(Outs, CC_AIX);
// The prolog code of the callee may store up to 8 GPR argument registers to
// the stack, allowing va_start to index over them in memory if the callee
@@ -6733,98 +7101,101 @@ SDValue PPCTargetLowering::LowerCall_AIX(
// Because we cannot tell if this is needed on the caller side, we have to
// conservatively assume that it is needed. As such, make sure we have at
// least enough stack space for the caller to store the 8 GPRs.
- unsigned NumBytes = LinkageSize + 8 * PtrByteSize;
+ const unsigned MinParameterSaveAreaSize = 8 * PtrByteSize;
+ const unsigned NumBytes = LinkageSize + MinParameterSaveAreaSize;
// Adjust the stack pointer for the new arguments...
- // These operations are automatically eliminated by the prolog/epilog
- // inserter pass.
+ // These operations are automatically eliminated by the prolog/epilog pass.
Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
SDValue CallSeqStart = Chain;
- static const MCPhysReg GPR_32[] = { // 32-bit registers.
- PPC::R3, PPC::R4, PPC::R5, PPC::R6,
- PPC::R7, PPC::R8, PPC::R9, PPC::R10
- };
- static const MCPhysReg GPR_64[] = { // 64-bit registers.
- PPC::X3, PPC::X4, PPC::X5, PPC::X6,
- PPC::X7, PPC::X8, PPC::X9, PPC::X10
- };
+ SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
- const unsigned NumGPRs = isPPC64 ? array_lengthof(GPR_64)
- : array_lengthof(GPR_32);
- const unsigned NumFPRs = array_lengthof(FPR);
- assert(NumFPRs == 13 && "Only FPR 1-13 could be used for parameter passing "
- "on AIX");
+ for (unsigned I = 0, E = ArgLocs.size(); I != E;) {
+ CCValAssign &VA = ArgLocs[I++];
- const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
- unsigned GPR_idx = 0, FPR_idx = 0;
+ if (VA.isMemLoc())
+ report_fatal_error("Handling of placing parameters on the stack is "
+ "unimplemented!");
+ if (!VA.isRegLoc())
+ report_fatal_error(
+ "Unexpected non-register location for function call argument.");
- SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
-
- if (isTailCall)
- report_fatal_error("Handling of tail call is unimplemented!");
- int SPDiff = 0;
+ SDValue Arg = OutVals[VA.getValNo()];
- for (unsigned i = 0; i != NumOps; ++i) {
- SDValue Arg = OutVals[i];
- ISD::ArgFlagsTy Flags = Outs[i].Flags;
+ if (!VA.needsCustom()) {
+ switch (VA.getLocInfo()) {
+ default:
+ report_fatal_error("Unexpected argument extension type.");
+ case CCValAssign::Full:
+ break;
+ case CCValAssign::ZExt:
+ Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::SExt:
+ Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ }
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
- // Promote integers if needed.
- if (Arg.getValueType() == MVT::i1 ||
- (isPPC64 && Arg.getValueType() == MVT::i32)) {
- unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
- Arg = DAG.getNode(ExtOp, dl, PtrVT, Arg);
+ continue;
}
- // Note: "by value" is code for passing a structure by value, not
- // basic types.
- if (Flags.isByVal())
- report_fatal_error("Passing structure by value is unimplemented!");
+ // Custom handling is used for GPR initializations for vararg float
+ // arguments.
+ assert(isVarArg && VA.getValVT().isFloatingPoint() &&
+ VA.getLocVT().isInteger() &&
+ "Unexpected custom register handling for calling convention.");
- switch (Arg.getSimpleValueType().SimpleTy) {
- default: llvm_unreachable("Unexpected ValueType for argument!");
- case MVT::i1:
- case MVT::i32:
- case MVT::i64:
- if (GPR_idx != NumGPRs)
- RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
- else
- report_fatal_error("Handling of placing parameters on the stack is "
- "unimplemented!");
- break;
- case MVT::f32:
- case MVT::f64:
- if (FPR_idx != NumFPRs) {
- RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
+ SDValue ArgAsInt =
+ DAG.getBitcast(MVT::getIntegerVT(VA.getValVT().getSizeInBits()), Arg);
- // If we have any FPRs remaining, we may also have GPRs remaining.
- // Args passed in FPRs consume 1 or 2 (f64 in 32 bit mode) available
- // GPRs.
- if (GPR_idx != NumGPRs)
- ++GPR_idx;
- if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64)
- ++GPR_idx;
- } else
- report_fatal_error("Handling of placing parameters on the stack is "
- "unimplemented!");
- break;
- case MVT::v4f32:
- case MVT::v4i32:
- case MVT::v8i16:
- case MVT::v16i8:
- case MVT::v2f64:
- case MVT::v2i64:
- case MVT::v1i128:
- case MVT::f128:
- case MVT::v4f64:
- case MVT::v4i1:
- report_fatal_error("Handling of this parameter type is unimplemented!");
- }
- }
+ if (Arg.getValueType().getStoreSize() == VA.getLocVT().getStoreSize())
+ // f32 in 32-bit GPR
+ // f64 in 64-bit GPR
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgAsInt));
+ else if (Arg.getValueType().getSizeInBits() < VA.getLocVT().getSizeInBits())
+ // f32 in 64-bit GPR.
+ RegsToPass.push_back(std::make_pair(
+ VA.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, VA.getLocVT())));
+ else {
+ // f64 in two 32-bit GPRs
+ // The 2 GPRs are marked custom and expected to be adjacent in ArgLocs.
+ assert(Arg.getValueType() == MVT::f64 && isVarArg && !IsPPC64 &&
+ "Unexpected custom register for argument!");
+ CCValAssign &GPR1 = VA;
+ SDValue MSWAsI64 = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgAsInt,
+ DAG.getConstant(32, dl, MVT::i8));
+ RegsToPass.push_back(std::make_pair(
+ GPR1.getLocReg(), DAG.getZExtOrTrunc(MSWAsI64, dl, MVT::i32)));
+ assert(I != E && "A second custom GPR is expected!");
+ CCValAssign &GPR2 = ArgLocs[I++];
+ assert(GPR2.isRegLoc() && GPR2.getValNo() == GPR1.getValNo() &&
+ GPR2.needsCustom() && "A second custom GPR is expected!");
+ RegsToPass.push_back(std::make_pair(
+ GPR2.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, MVT::i32)));
+ }
+ }
+
+ // For indirect calls, we need to save the TOC base to the stack for
+ // restoration after the call.
+ if (!isTailCall && !isPatchPoint &&
+ !isFunctionGlobalAddress(Callee) && !isa<ExternalSymbolSDNode>(Callee)) {
+ const MCRegister TOCBaseReg = Subtarget.getTOCPointerRegister();
+ const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
+ const MVT PtrVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
+ const unsigned TOCSaveOffset =
+ Subtarget.getFrameLowering()->getTOCSaveOffset();
- if (!isFunctionGlobalAddress(Callee) &&
- !isa<ExternalSymbolSDNode>(Callee))
- report_fatal_error("Handling of indirect call is unimplemented!");
+ setUsesTOCBasePtr(DAG);
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, TOCBaseReg, PtrVT);
+ SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
+ SDValue StackPtr = DAG.getRegister(StackPtrReg, PtrVT);
+ SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
+ Chain = DAG.getStore(
+ Val.getValue(1), dl, Val, AddPtr,
+ MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset));
+ }
// Build a sequence of copy-to-reg nodes chained together with token chain
// and flag operands which copy the outgoing args into the appropriate regs.
@@ -6834,10 +7205,11 @@ SDValue PPCTargetLowering::LowerCall_AIX(
InFlag = Chain.getValue(1);
}
+ const int SPDiff = 0;
return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint,
- /* unused except on PPC64 ELFv1 */ false, DAG,
- RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff,
- NumBytes, Ins, InVals, CS);
+ /* unused except on PPC64 ELFv1 */ false, DAG, RegsToPass,
+ InFlag, Chain, CallSeqStart, Callee, SPDiff, NumBytes, Ins,
+ InVals, CS);
}
bool
@@ -7126,8 +7498,7 @@ SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
"Custom lowering only for i1 results");
SDLoc DL(Op);
- return DAG.getNode(PPCISD::ANDIo_1_GT_BIT, DL, MVT::i1,
- Op.getOperand(0));
+ return DAG.getNode(PPCISD::ANDI_rec_1_GT_BIT, DL, MVT::i1, Op.getOperand(0));
}
SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op,
@@ -7193,17 +7564,15 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
!Op.getOperand(2).getValueType().isFloatingPoint())
return Op;
+ bool HasNoInfs = DAG.getTarget().Options.NoInfsFPMath;
+ bool HasNoNaNs = DAG.getTarget().Options.NoNaNsFPMath;
// We might be able to do better than this under some circumstances, but in
// general, fsel-based lowering of select is a finite-math-only optimization.
// For more information, see section F.3 of the 2.06 ISA specification.
- if (!DAG.getTarget().Options.NoInfsFPMath ||
- !DAG.getTarget().Options.NoNaNsFPMath)
+ // With ISA 3.0, we have xsmaxcdp/xsmincdp which are OK to emit even in the
+ // presence of infinities.
+ if (!Subtarget.hasP9Vector() && (!HasNoInfs || !HasNoNaNs))
return Op;
- // TODO: Propagate flags from the select rather than global settings.
- SDNodeFlags Flags;
- Flags.setNoInfs(true);
- Flags.setNoNaNs(true);
-
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
EVT ResVT = Op.getValueType();
@@ -7212,6 +7581,27 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
SDValue TV = Op.getOperand(2), FV = Op.getOperand(3);
SDLoc dl(Op);
+ if (Subtarget.hasP9Vector() && LHS == TV && RHS == FV) {
+ switch (CC) {
+ default:
+ // Not a min/max but with finite math, we may still be able to use fsel.
+ if (HasNoInfs && HasNoNaNs)
+ break;
+ return Op;
+ case ISD::SETOGT:
+ case ISD::SETGT:
+ return DAG.getNode(PPCISD::XSMAXCDP, dl, Op.getValueType(), LHS, RHS);
+ case ISD::SETOLT:
+ case ISD::SETLT:
+ return DAG.getNode(PPCISD::XSMINCDP, dl, Op.getValueType(), LHS, RHS);
+ }
+ }
+
+ // TODO: Propagate flags from the select rather than global settings.
+ SDNodeFlags Flags;
+ Flags.setNoInfs(true);
+ Flags.setNoNaNs(true);
+
// If the RHS of the comparison is a 0.0, we don't need to do the
// subtraction at all.
SDValue Sel1;
@@ -8060,8 +8450,6 @@ SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
/// SplatSize. Cast the result to VT.
static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT,
SelectionDAG &DAG, const SDLoc &dl) {
- assert(Val >= -16 && Val <= 15 && "vsplti is out of range!");
-
static const MVT VTys[] = { // canonical VT to use for each size.
MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
};
@@ -8381,29 +8769,10 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
}
// We have XXSPLTIB for constant splats one byte wide
- if (Subtarget.hasP9Vector() && SplatSize == 1) {
- // This is a splat of 1-byte elements with some elements potentially undef.
- // Rather than trying to match undef in the SDAG patterns, ensure that all
- // elements are the same constant.
- if (HasAnyUndefs || ISD::isBuildVectorAllOnes(BVN)) {
- SmallVector<SDValue, 16> Ops(16, DAG.getConstant(SplatBits,
- dl, MVT::i32));
- SDValue NewBV = DAG.getBuildVector(MVT::v16i8, dl, Ops);
- if (Op.getValueType() != MVT::v16i8)
- return DAG.getBitcast(Op.getValueType(), NewBV);
- return NewBV;
- }
-
- // BuildVectorSDNode::isConstantSplat() is actually pretty smart. It'll
- // detect that constant splats like v8i16: 0xABAB are really just splats
- // of a 1-byte constant. In this case, we need to convert the node to a
- // splat of v16i8 and a bitcast.
- if (Op.getValueType() != MVT::v16i8)
- return DAG.getBitcast(Op.getValueType(),
- DAG.getConstant(SplatBits, dl, MVT::v16i8));
-
- return Op;
- }
+ // FIXME: SplatBits is an unsigned int being cast to an int while passing it
+ // as an argument to BuildSplatiI. Given SplatSize == 1 it is okay here.
+ if (Subtarget.hasP9Vector() && SplatSize == 1)
+ return BuildSplatI(SplatBits, SplatSize, Op.getValueType(), DAG, dl);
// If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>
@@ -8935,19 +9304,19 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
if (Subtarget.hasP9Vector()) {
if (PPC::isXXBRHShuffleMask(SVOp)) {
SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
- SDValue ReveHWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v8i16, Conv);
+ SDValue ReveHWord = DAG.getNode(ISD::BSWAP, dl, MVT::v8i16, Conv);
return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveHWord);
} else if (PPC::isXXBRWShuffleMask(SVOp)) {
SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
- SDValue ReveWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v4i32, Conv);
+ SDValue ReveWord = DAG.getNode(ISD::BSWAP, dl, MVT::v4i32, Conv);
return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveWord);
} else if (PPC::isXXBRDShuffleMask(SVOp)) {
SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
- SDValue ReveDWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v2i64, Conv);
+ SDValue ReveDWord = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Conv);
return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveDWord);
} else if (PPC::isXXBRQShuffleMask(SVOp)) {
SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, V1);
- SDValue ReveQWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v1i128, Conv);
+ SDValue ReveQWord = DAG.getNode(ISD::BSWAP, dl, MVT::v1i128, Conv);
return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveQWord);
}
}
@@ -9508,7 +9877,7 @@ SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const {
Op = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, Op.getOperand(0),
Op.getOperand(0));
// XXBRD
- Op = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v2i64, Op);
+ Op = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Op);
// MFVSRD
int VectorIndex = 0;
if (Subtarget.isLittleEndian())
@@ -10850,9 +11219,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
DebugLoc dl = MI.getDebugLoc();
TII->insertSelect(*BB, MI, dl, MI.getOperand(0).getReg(), Cond,
MI.getOperand(2).getReg(), MI.getOperand(3).getReg());
- } else if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
- MI.getOpcode() == PPC::SELECT_CC_I8 ||
- MI.getOpcode() == PPC::SELECT_CC_F4 ||
+ } else if (MI.getOpcode() == PPC::SELECT_CC_F4 ||
MI.getOpcode() == PPC::SELECT_CC_F8 ||
MI.getOpcode() == PPC::SELECT_CC_F16 ||
MI.getOpcode() == PPC::SELECT_CC_QFRC ||
@@ -10864,8 +11231,6 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MI.getOpcode() == PPC::SELECT_CC_VSRC ||
MI.getOpcode() == PPC::SELECT_CC_SPE4 ||
MI.getOpcode() == PPC::SELECT_CC_SPE ||
- MI.getOpcode() == PPC::SELECT_I4 ||
- MI.getOpcode() == PPC::SELECT_I8 ||
MI.getOpcode() == PPC::SELECT_F4 ||
MI.getOpcode() == PPC::SELECT_F8 ||
MI.getOpcode() == PPC::SELECT_F16 ||
@@ -11402,28 +11767,28 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
// Restore FPSCR value.
BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg);
- } else if (MI.getOpcode() == PPC::ANDIo_1_EQ_BIT ||
- MI.getOpcode() == PPC::ANDIo_1_GT_BIT ||
- MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8 ||
- MI.getOpcode() == PPC::ANDIo_1_GT_BIT8) {
- unsigned Opcode = (MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8 ||
- MI.getOpcode() == PPC::ANDIo_1_GT_BIT8)
- ? PPC::ANDIo8
- : PPC::ANDIo;
- bool isEQ = (MI.getOpcode() == PPC::ANDIo_1_EQ_BIT ||
- MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8);
+ } else if (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
+ MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT ||
+ MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
+ MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8) {
+ unsigned Opcode = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
+ MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8)
+ ? PPC::ANDI8_rec
+ : PPC::ANDI_rec;
+ bool IsEQ = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
+ MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8);
MachineRegisterInfo &RegInfo = F->getRegInfo();
Register Dest = RegInfo.createVirtualRegister(
- Opcode == PPC::ANDIo ? &PPC::GPRCRegClass : &PPC::G8RCRegClass);
+ Opcode == PPC::ANDI_rec ? &PPC::GPRCRegClass : &PPC::G8RCRegClass);
- DebugLoc dl = MI.getDebugLoc();
- BuildMI(*BB, MI, dl, TII->get(Opcode), Dest)
+ DebugLoc Dl = MI.getDebugLoc();
+ BuildMI(*BB, MI, Dl, TII->get(Opcode), Dest)
.addReg(MI.getOperand(1).getReg())
.addImm(1);
- BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY),
+ BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
MI.getOperand(0).getReg())
- .addReg(isEQ ? PPC::CR0EQ : PPC::CR0GT);
+ .addReg(IsEQ ? PPC::CR0EQ : PPC::CR0GT);
} else if (MI.getOpcode() == PPC::TCHECK_RET) {
DebugLoc Dl = MI.getDebugLoc();
MachineRegisterInfo &RegInfo = F->getRegInfo();
@@ -11643,7 +12008,7 @@ unsigned PPCTargetLowering::combineRepeatedFPDivisors() const {
// Combine multiple FDIVs with the same divisor into multiple FMULs by the
// reciprocal if there are two or more FDIVs (for embedded cores with only
// one FP pipeline) for three or more FDIVs (for generic OOO cores).
- switch (Subtarget.getDarwinDirective()) {
+ switch (Subtarget.getCPUDirective()) {
default:
return 3;
case PPC::DIR_440:
@@ -14116,7 +14481,7 @@ void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
}
Align PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
- switch (Subtarget.getDarwinDirective()) {
+ switch (Subtarget.getCPUDirective()) {
default: break;
case PPC::DIR_970:
case PPC::DIR_PWR4:
@@ -14126,7 +14491,8 @@ Align PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
case PPC::DIR_PWR6X:
case PPC::DIR_PWR7:
case PPC::DIR_PWR8:
- case PPC::DIR_PWR9: {
+ case PPC::DIR_PWR9:
+ case PPC::DIR_PWR_FUTURE: {
if (!ML)
break;
@@ -14314,6 +14680,17 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
return std::make_pair(0U, &PPC::VSFRCRegClass);
}
+ // If we name a VSX register, we can't defer to the base class because it
+ // will not recognize the correct register (their names will be VSL{0-31}
+ // and V{0-31} so they won't match). So we match them here.
+ if (Constraint.size() > 3 && Constraint[1] == 'v' && Constraint[2] == 's') {
+ int VSNum = atoi(Constraint.data() + 3);
+ assert(VSNum >= 0 && VSNum <= 63 &&
+ "Attempted to access a vsr out of range");
+ if (VSNum < 32)
+ return std::make_pair(PPC::VSL0 + VSNum, &PPC::VSRCRegClass);
+ return std::make_pair(PPC::V0 + VSNum - 32, &PPC::VSRCRegClass);
+ }
std::pair<unsigned, const TargetRegisterClass *> R =
TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
@@ -14518,16 +14895,15 @@ SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
// FIXME? Maybe this could be a TableGen attribute on some registers and
// this table could be generated automatically from RegInfo.
-Register PPCTargetLowering::getRegisterByName(const char* RegName, EVT VT,
+Register PPCTargetLowering::getRegisterByName(const char* RegName, LLT VT,
const MachineFunction &MF) const {
bool isPPC64 = Subtarget.isPPC64();
bool IsDarwinABI = Subtarget.isDarwinABI();
- if ((isPPC64 && VT != MVT::i64 && VT != MVT::i32) ||
- (!isPPC64 && VT != MVT::i32))
+ bool is64Bit = isPPC64 && VT == LLT::scalar(64);
+ if (!is64Bit && VT != LLT::scalar(32))
report_fatal_error("Invalid register global variable type");
- bool is64Bit = isPPC64 && VT == MVT::i64;
Register Reg = StringSwitch<Register>(RegName)
.Case("r1", is64Bit ? PPC::X1 : PPC::R1)
.Case("r2", (IsDarwinABI || isPPC64) ? Register() : PPC::R2)
@@ -14875,6 +15251,9 @@ bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
if (!VT.isSimple())
return false;
+ if (VT.isFloatingPoint() && !Subtarget.allowsUnalignedFPAccess())
+ return false;
+
if (VT.getSimpleVT().isVector()) {
if (Subtarget.hasVSX()) {
if (VT != MVT::v2f64 && VT != MVT::v2i64 &&
@@ -14894,7 +15273,8 @@ bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
return true;
}
-bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
+bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
+ EVT VT) const {
VT = VT.getScalarType();
if (!VT.isSimple())
@@ -15283,7 +15663,7 @@ SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const {
return SDValue();
auto IsProfitable = [this](bool IsNeg, bool IsAddOne, EVT VT) -> bool {
- switch (this->Subtarget.getDarwinDirective()) {
+ switch (this->Subtarget.getCPUDirective()) {
default:
// TODO: enhance the condition for subtarget before pwr8
return false;
@@ -15293,6 +15673,7 @@ SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const {
// vector 7 2 2
return true;
case PPC::DIR_PWR9:
+ case PPC::DIR_PWR_FUTURE:
// type mul add shl
// scalar 5 2 2
// vector 7 2 2
@@ -15362,12 +15743,6 @@ bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
if (!CI->isTailCall())
return false;
- // If tail calls are disabled for the caller then we are done.
- const Function *Caller = CI->getParent()->getParent();
- auto Attr = Caller->getFnAttribute("disable-tail-calls");
- if (Attr.getValueAsString() == "true")
- return false;
-
// If sibling calls have been disabled and tail-calls aren't guaranteed
// there is no reason to duplicate.
auto &TM = getTargetMachine();
@@ -15380,6 +15755,7 @@ bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
return false;
// Make sure the callee and caller calling conventions are eligible for tco.
+ const Function *Caller = CI->getParent()->getParent();
if (!areCallingConvEligibleForTCO_64SVR4(Caller->getCallingConv(),
CI->getCallingConv()))
return false;
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.h b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 62922ea2d4c4..e0c381827b87 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -43,460 +43,475 @@ namespace llvm {
// that come before it. For example, ADD or MUL should be placed before
// the ISD::FIRST_TARGET_MEMORY_OPCODE while a LOAD or STORE should come
// after it.
- enum NodeType : unsigned {
- // Start the numbering where the builtin ops and target ops leave off.
- FIRST_NUMBER = ISD::BUILTIN_OP_END,
-
- /// FSEL - Traditional three-operand fsel node.
- ///
- FSEL,
-
- /// FCFID - The FCFID instruction, taking an f64 operand and producing
- /// and f64 value containing the FP representation of the integer that
- /// was temporarily in the f64 operand.
- FCFID,
-
- /// Newer FCFID[US] integer-to-floating-point conversion instructions for
- /// unsigned integers and single-precision outputs.
- FCFIDU, FCFIDS, FCFIDUS,
-
- /// FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64
- /// operand, producing an f64 value containing the integer representation
- /// of that FP value.
- FCTIDZ, FCTIWZ,
-
- /// Newer FCTI[D,W]UZ floating-point-to-integer conversion instructions for
- /// unsigned integers with round toward zero.
- FCTIDUZ, FCTIWUZ,
-
- /// Floating-point-to-interger conversion instructions
- FP_TO_UINT_IN_VSR, FP_TO_SINT_IN_VSR,
-
- /// VEXTS, ByteWidth - takes an input in VSFRC and produces an output in
- /// VSFRC that is sign-extended from ByteWidth to a 64-byte integer.
- VEXTS,
-
- /// SExtVElems, takes an input vector of a smaller type and sign
- /// extends to an output vector of a larger type.
- SExtVElems,
-
- /// Reciprocal estimate instructions (unary FP ops).
- FRE, FRSQRTE,
-
- // VMADDFP, VNMSUBFP - The VMADDFP and VNMSUBFP instructions, taking
- // three v4f32 operands and producing a v4f32 result.
- VMADDFP, VNMSUBFP,
-
- /// VPERM - The PPC VPERM Instruction.
- ///
- VPERM,
-
- /// XXSPLT - The PPC VSX splat instructions
- ///
- XXSPLT,
-
- /// VECINSERT - The PPC vector insert instruction
- ///
- VECINSERT,
-
- /// XXREVERSE - The PPC VSX reverse instruction
- ///
- XXREVERSE,
-
- /// VECSHL - The PPC vector shift left instruction
- ///
- VECSHL,
-
- /// XXPERMDI - The PPC XXPERMDI instruction
- ///
- XXPERMDI,
-
- /// The CMPB instruction (takes two operands of i32 or i64).
- CMPB,
-
- /// Hi/Lo - These represent the high and low 16-bit parts of a global
- /// address respectively. These nodes have two operands, the first of
- /// which must be a TargetGlobalAddress, and the second of which must be a
- /// Constant. Selected naively, these turn into 'lis G+C' and 'li G+C',
- /// though these are usually folded into other nodes.
- Hi, Lo,
-
- /// The following two target-specific nodes are used for calls through
- /// function pointers in the 64-bit SVR4 ABI.
-
- /// OPRC, CHAIN = DYNALLOC(CHAIN, NEGSIZE, FRAME_INDEX)
- /// This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to
- /// compute an allocation on the stack.
- DYNALLOC,
-
- /// This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to
- /// compute an offset from native SP to the address of the most recent
- /// dynamic alloca.
- DYNAREAOFFSET,
-
- /// GlobalBaseReg - On Darwin, this node represents the result of the mflr
- /// at function entry, used for PIC code.
- GlobalBaseReg,
-
- /// These nodes represent PPC shifts.
- ///
- /// For scalar types, only the last `n + 1` bits of the shift amounts
- /// are used, where n is log2(sizeof(element) * 8). See sld/slw, etc.
- /// for exact behaviors.
- ///
- /// For vector types, only the last n bits are used. See vsld.
- SRL, SRA, SHL,
-
- /// EXTSWSLI = The PPC extswsli instruction, which does an extend-sign
- /// word and shift left immediate.
- EXTSWSLI,
-
- /// The combination of sra[wd]i and addze used to implemented signed
- /// integer division by a power of 2. The first operand is the dividend,
- /// and the second is the constant shift amount (representing the
- /// divisor).
- SRA_ADDZE,
-
- /// CALL - A direct function call.
- /// CALL_NOP is a call with the special NOP which follows 64-bit
- /// SVR4 calls and 32-bit/64-bit AIX calls.
- CALL, CALL_NOP,
-
- /// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a
- /// MTCTR instruction.
- MTCTR,
-
- /// CHAIN,FLAG = BCTRL(CHAIN, INFLAG) - Directly corresponds to a
- /// BCTRL instruction.
- BCTRL,
-
- /// CHAIN,FLAG = BCTRL(CHAIN, ADDR, INFLAG) - The combination of a bctrl
- /// instruction and the TOC reload required on SVR4 PPC64.
- BCTRL_LOAD_TOC,
-
- /// Return with a flag operand, matched by 'blr'
- RET_FLAG,
-
- /// R32 = MFOCRF(CRREG, INFLAG) - Represents the MFOCRF instruction.
- /// This copies the bits corresponding to the specified CRREG into the
- /// resultant GPR. Bits corresponding to other CR regs are undefined.
- MFOCRF,
-
- /// Direct move from a VSX register to a GPR
- MFVSR,
-
- /// Direct move from a GPR to a VSX register (algebraic)
- MTVSRA,
-
- /// Direct move from a GPR to a VSX register (zero)
- MTVSRZ,
-
- /// Direct move of 2 consecutive GPR to a VSX register.
- BUILD_FP128,
-
- /// BUILD_SPE64 and EXTRACT_SPE are analogous to BUILD_PAIR and
- /// EXTRACT_ELEMENT but take f64 arguments instead of i64, as i64 is
- /// unsupported for this target.
- /// Merge 2 GPRs to a single SPE register.
- BUILD_SPE64,
-
- /// Extract SPE register component, second argument is high or low.
- EXTRACT_SPE,
-
- /// Extract a subvector from signed integer vector and convert to FP.
- /// It is primarily used to convert a (widened) illegal integer vector
- /// type to a legal floating point vector type.
- /// For example v2i32 -> widened to v4i32 -> v2f64
- SINT_VEC_TO_FP,
-
- /// Extract a subvector from unsigned integer vector and convert to FP.
- /// As with SINT_VEC_TO_FP, used for converting illegal types.
- UINT_VEC_TO_FP,
-
- // FIXME: Remove these once the ANDI glue bug is fixed:
- /// i1 = ANDIo_1_[EQ|GT]_BIT(i32 or i64 x) - Represents the result of the
- /// eq or gt bit of CR0 after executing andi. x, 1. This is used to
- /// implement truncation of i32 or i64 to i1.
- ANDIo_1_EQ_BIT, ANDIo_1_GT_BIT,
-
- // READ_TIME_BASE - A read of the 64-bit time-base register on a 32-bit
- // target (returns (Lo, Hi)). It takes a chain operand.
- READ_TIME_BASE,
-
- // EH_SJLJ_SETJMP - SjLj exception handling setjmp.
- EH_SJLJ_SETJMP,
-
- // EH_SJLJ_LONGJMP - SjLj exception handling longjmp.
- EH_SJLJ_LONGJMP,
-
- /// RESVEC = VCMP(LHS, RHS, OPC) - Represents one of the altivec VCMP*
- /// instructions. For lack of better number, we use the opcode number
- /// encoding for the OPC field to identify the compare. For example, 838
- /// is VCMPGTSH.
- VCMP,
-
- /// RESVEC, OUTFLAG = VCMPo(LHS, RHS, OPC) - Represents one of the
- /// altivec VCMP*o instructions. For lack of better number, we use the
- /// opcode number encoding for the OPC field to identify the compare. For
- /// example, 838 is VCMPGTSH.
- VCMPo,
-
- /// CHAIN = COND_BRANCH CHAIN, CRRC, OPC, DESTBB [, INFLAG] - This
- /// corresponds to the COND_BRANCH pseudo instruction. CRRC is the
- /// condition register to branch on, OPC is the branch opcode to use (e.g.
- /// PPC::BLE), DESTBB is the destination block to branch to, and INFLAG is
- /// an optional input flag argument.
- COND_BRANCH,
-
- /// CHAIN = BDNZ CHAIN, DESTBB - These are used to create counter-based
- /// loops.
- BDNZ, BDZ,
-
- /// F8RC = FADDRTZ F8RC, F8RC - This is an FADD done with rounding
- /// towards zero. Used only as part of the long double-to-int
- /// conversion sequence.
- FADDRTZ,
-
- /// F8RC = MFFS - This moves the FPSCR (not modeled) into the register.
- MFFS,
-
- /// TC_RETURN - A tail call return.
- /// operand #0 chain
- /// operand #1 callee (register or absolute)
- /// operand #2 stack adjustment
- /// operand #3 optional in flag
- TC_RETURN,
-
- /// ch, gl = CR6[UN]SET ch, inglue - Toggle CR bit 6 for SVR4 vararg calls
- CR6SET,
- CR6UNSET,
-
- /// GPRC = address of _GLOBAL_OFFSET_TABLE_. Used by initial-exec TLS
- /// for non-position independent code on PPC32.
- PPC32_GOT,
-
- /// GPRC = address of _GLOBAL_OFFSET_TABLE_. Used by general dynamic and
- /// local dynamic TLS and position indendepent code on PPC32.
- PPC32_PICGOT,
-
- /// G8RC = ADDIS_GOT_TPREL_HA %x2, Symbol - Used by the initial-exec
- /// TLS model, produces an ADDIS8 instruction that adds the GOT
- /// base to sym\@got\@tprel\@ha.
- ADDIS_GOT_TPREL_HA,
-
- /// G8RC = LD_GOT_TPREL_L Symbol, G8RReg - Used by the initial-exec
- /// TLS model, produces a LD instruction with base register G8RReg
- /// and offset sym\@got\@tprel\@l. This completes the addition that
- /// finds the offset of "sym" relative to the thread pointer.
- LD_GOT_TPREL_L,
-
- /// G8RC = ADD_TLS G8RReg, Symbol - Used by the initial-exec TLS
- /// model, produces an ADD instruction that adds the contents of
- /// G8RReg to the thread pointer. Symbol contains a relocation
- /// sym\@tls which is to be replaced by the thread pointer and
- /// identifies to the linker that the instruction is part of a
- /// TLS sequence.
- ADD_TLS,
-
- /// G8RC = ADDIS_TLSGD_HA %x2, Symbol - For the general-dynamic TLS
- /// model, produces an ADDIS8 instruction that adds the GOT base
- /// register to sym\@got\@tlsgd\@ha.
- ADDIS_TLSGD_HA,
-
- /// %x3 = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS
- /// model, produces an ADDI8 instruction that adds G8RReg to
- /// sym\@got\@tlsgd\@l and stores the result in X3. Hidden by
- /// ADDIS_TLSGD_L_ADDR until after register assignment.
- ADDI_TLSGD_L,
-
- /// %x3 = GET_TLS_ADDR %x3, Symbol - For the general-dynamic TLS
- /// model, produces a call to __tls_get_addr(sym\@tlsgd). Hidden by
- /// ADDIS_TLSGD_L_ADDR until after register assignment.
- GET_TLS_ADDR,
-
- /// G8RC = ADDI_TLSGD_L_ADDR G8RReg, Symbol, Symbol - Op that
- /// combines ADDI_TLSGD_L and GET_TLS_ADDR until expansion following
- /// register assignment.
- ADDI_TLSGD_L_ADDR,
-
- /// G8RC = ADDIS_TLSLD_HA %x2, Symbol - For the local-dynamic TLS
- /// model, produces an ADDIS8 instruction that adds the GOT base
- /// register to sym\@got\@tlsld\@ha.
- ADDIS_TLSLD_HA,
-
- /// %x3 = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS
- /// model, produces an ADDI8 instruction that adds G8RReg to
- /// sym\@got\@tlsld\@l and stores the result in X3. Hidden by
- /// ADDIS_TLSLD_L_ADDR until after register assignment.
- ADDI_TLSLD_L,
-
- /// %x3 = GET_TLSLD_ADDR %x3, Symbol - For the local-dynamic TLS
- /// model, produces a call to __tls_get_addr(sym\@tlsld). Hidden by
- /// ADDIS_TLSLD_L_ADDR until after register assignment.
- GET_TLSLD_ADDR,
-
- /// G8RC = ADDI_TLSLD_L_ADDR G8RReg, Symbol, Symbol - Op that
- /// combines ADDI_TLSLD_L and GET_TLSLD_ADDR until expansion
- /// following register assignment.
- ADDI_TLSLD_L_ADDR,
-
- /// G8RC = ADDIS_DTPREL_HA %x3, Symbol - For the local-dynamic TLS
- /// model, produces an ADDIS8 instruction that adds X3 to
- /// sym\@dtprel\@ha.
- ADDIS_DTPREL_HA,
-
- /// G8RC = ADDI_DTPREL_L G8RReg, Symbol - For the local-dynamic TLS
- /// model, produces an ADDI8 instruction that adds G8RReg to
- /// sym\@got\@dtprel\@l.
- ADDI_DTPREL_L,
-
- /// VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded
- /// during instruction selection to optimize a BUILD_VECTOR into
- /// operations on splats. This is necessary to avoid losing these
- /// optimizations due to constant folding.
- VADD_SPLAT,
-
- /// CHAIN = SC CHAIN, Imm128 - System call. The 7-bit unsigned
- /// operand identifies the operating system entry point.
- SC,
-
- /// CHAIN = CLRBHRB CHAIN - Clear branch history rolling buffer.
- CLRBHRB,
-
- /// GPRC, CHAIN = MFBHRBE CHAIN, Entry, Dummy - Move from branch
- /// history rolling buffer entry.
- MFBHRBE,
-
- /// CHAIN = RFEBB CHAIN, State - Return from event-based branch.
- RFEBB,
-
- /// VSRC, CHAIN = XXSWAPD CHAIN, VSRC - Occurs only for little
- /// endian. Maps to an xxswapd instruction that corrects an lxvd2x
- /// or stxvd2x instruction. The chain is necessary because the
- /// sequence replaces a load and needs to provide the same number
- /// of outputs.
- XXSWAPD,
-
- /// An SDNode for swaps that are not associated with any loads/stores
- /// and thereby have no chain.
- SWAP_NO_CHAIN,
-
- /// An SDNode for Power9 vector absolute value difference.
- /// operand #0 vector
- /// operand #1 vector
- /// operand #2 constant i32 0 or 1, to indicate whether needs to patch
- /// the most significant bit for signed i32
- ///
- /// Power9 VABSD* instructions are designed to support unsigned integer
- /// vectors (byte/halfword/word), if we want to make use of them for signed
- /// integer vectors, we have to flip their sign bits first. To flip sign bit
- /// for byte/halfword integer vector would become inefficient, but for word
- /// integer vector, we can leverage XVNEGSP to make it efficiently. eg:
- /// abs(sub(a,b)) => VABSDUW(a+0x80000000, b+0x80000000)
- /// => VABSDUW((XVNEGSP a), (XVNEGSP b))
- VABSD,
-
- /// QVFPERM = This corresponds to the QPX qvfperm instruction.
- QVFPERM,
-
- /// QVGPCI = This corresponds to the QPX qvgpci instruction.
- QVGPCI,
-
- /// QVALIGNI = This corresponds to the QPX qvaligni instruction.
- QVALIGNI,
-
- /// QVESPLATI = This corresponds to the QPX qvesplati instruction.
- QVESPLATI,
-
- /// QBFLT = Access the underlying QPX floating-point boolean
- /// representation.
- QBFLT,
-
- /// FP_EXTEND_HALF(VECTOR, IDX) - Custom extend upper (IDX=0) half or
- /// lower (IDX=1) half of v4f32 to v2f64.
- FP_EXTEND_HALF,
-
- /// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a
- /// byte-swapping store instruction. It byte-swaps the low "Type" bits of
- /// the GPRC input, then stores it through Ptr. Type can be either i16 or
- /// i32.
- STBRX = ISD::FIRST_TARGET_MEMORY_OPCODE,
-
- /// GPRC, CHAIN = LBRX CHAIN, Ptr, Type - This is a
- /// byte-swapping load instruction. It loads "Type" bits, byte swaps it,
- /// then puts it in the bottom bits of the GPRC. TYPE can be either i16
- /// or i32.
- LBRX,
-
- /// STFIWX - The STFIWX instruction. The first operand is an input token
- /// chain, then an f64 value to store, then an address to store it to.
- STFIWX,
-
- /// GPRC, CHAIN = LFIWAX CHAIN, Ptr - This is a floating-point
- /// load which sign-extends from a 32-bit integer value into the
- /// destination 64-bit register.
- LFIWAX,
-
- /// GPRC, CHAIN = LFIWZX CHAIN, Ptr - This is a floating-point
- /// load which zero-extends from a 32-bit integer value into the
- /// destination 64-bit register.
- LFIWZX,
-
- /// GPRC, CHAIN = LXSIZX, CHAIN, Ptr, ByteWidth - This is a load of an
- /// integer smaller than 64 bits into a VSR. The integer is zero-extended.
- /// This can be used for converting loaded integers to floating point.
- LXSIZX,
-
- /// STXSIX - The STXSI[bh]X instruction. The first operand is an input
- /// chain, then an f64 value to store, then an address to store it to,
- /// followed by a byte-width for the store.
- STXSIX,
-
- /// VSRC, CHAIN = LXVD2X_LE CHAIN, Ptr - Occurs only for little endian.
- /// Maps directly to an lxvd2x instruction that will be followed by
- /// an xxswapd.
- LXVD2X,
-
- /// VSRC, CHAIN = LOAD_VEC_BE CHAIN, Ptr - Occurs only for little endian.
- /// Maps directly to one of lxvd2x/lxvw4x/lxvh8x/lxvb16x depending on
- /// the vector type to load vector in big-endian element order.
- LOAD_VEC_BE,
-
- /// VSRC, CHAIN = LD_VSX_LH CHAIN, Ptr - This is a floating-point load of a
- /// v2f32 value into the lower half of a VSR register.
- LD_VSX_LH,
-
- /// VSRC, CHAIN = LD_SPLAT, CHAIN, Ptr - a splatting load memory
- /// instructions such as LXVDSX, LXVWSX.
- LD_SPLAT,
-
- /// CHAIN = STXVD2X CHAIN, VSRC, Ptr - Occurs only for little endian.
- /// Maps directly to an stxvd2x instruction that will be preceded by
- /// an xxswapd.
- STXVD2X,
-
- /// CHAIN = STORE_VEC_BE CHAIN, VSRC, Ptr - Occurs only for little endian.
- /// Maps directly to one of stxvd2x/stxvw4x/stxvh8x/stxvb16x depending on
- /// the vector type to store vector in big-endian element order.
- STORE_VEC_BE,
-
- /// Store scalar integers from VSR.
- ST_VSR_SCAL_INT,
-
- /// QBRC, CHAIN = QVLFSb CHAIN, Ptr
- /// The 4xf32 load used for v4i1 constants.
- QVLFSb,
-
- /// ATOMIC_CMP_SWAP - the exact same as the target-independent nodes
- /// except they ensure that the compare input is zero-extended for
- /// sub-word versions because the atomic loads zero-extend.
- ATOMIC_CMP_SWAP_8, ATOMIC_CMP_SWAP_16,
-
- /// GPRC = TOC_ENTRY GA, TOC
- /// Loads the entry for GA from the TOC, where the TOC base is given by
- /// the last operand.
- TOC_ENTRY
- };
+ enum NodeType : unsigned {
+ // Start the numbering where the builtin ops and target ops leave off.
+ FIRST_NUMBER = ISD::BUILTIN_OP_END,
+
+ /// FSEL - Traditional three-operand fsel node.
+ ///
+ FSEL,
+
+ /// XSMAXCDP, XSMINCDP - C-type min/max instructions.
+ XSMAXCDP,
+ XSMINCDP,
+
+ /// FCFID - The FCFID instruction, taking an f64 operand and producing
+ /// and f64 value containing the FP representation of the integer that
+ /// was temporarily in the f64 operand.
+ FCFID,
+
+ /// Newer FCFID[US] integer-to-floating-point conversion instructions for
+ /// unsigned integers and single-precision outputs.
+ FCFIDU,
+ FCFIDS,
+ FCFIDUS,
+
+ /// FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64
+ /// operand, producing an f64 value containing the integer representation
+ /// of that FP value.
+ FCTIDZ,
+ FCTIWZ,
+
+ /// Newer FCTI[D,W]UZ floating-point-to-integer conversion instructions for
+ /// unsigned integers with round toward zero.
+ FCTIDUZ,
+ FCTIWUZ,
+
+ /// Floating-point-to-interger conversion instructions
+ FP_TO_UINT_IN_VSR,
+ FP_TO_SINT_IN_VSR,
+
+ /// VEXTS, ByteWidth - takes an input in VSFRC and produces an output in
+ /// VSFRC that is sign-extended from ByteWidth to a 64-byte integer.
+ VEXTS,
+
+ /// SExtVElems, takes an input vector of a smaller type and sign
+ /// extends to an output vector of a larger type.
+ SExtVElems,
+
+ /// Reciprocal estimate instructions (unary FP ops).
+ FRE,
+ FRSQRTE,
+
+ // VMADDFP, VNMSUBFP - The VMADDFP and VNMSUBFP instructions, taking
+ // three v4f32 operands and producing a v4f32 result.
+ VMADDFP,
+ VNMSUBFP,
+
+ /// VPERM - The PPC VPERM Instruction.
+ ///
+ VPERM,
+
+ /// XXSPLT - The PPC VSX splat instructions
+ ///
+ XXSPLT,
+
+ /// VECINSERT - The PPC vector insert instruction
+ ///
+ VECINSERT,
+
+ /// VECSHL - The PPC vector shift left instruction
+ ///
+ VECSHL,
+
+ /// XXPERMDI - The PPC XXPERMDI instruction
+ ///
+ XXPERMDI,
+
+ /// The CMPB instruction (takes two operands of i32 or i64).
+ CMPB,
+
+ /// Hi/Lo - These represent the high and low 16-bit parts of a global
+ /// address respectively. These nodes have two operands, the first of
+ /// which must be a TargetGlobalAddress, and the second of which must be a
+ /// Constant. Selected naively, these turn into 'lis G+C' and 'li G+C',
+ /// though these are usually folded into other nodes.
+ Hi,
+ Lo,
+
+ /// The following two target-specific nodes are used for calls through
+ /// function pointers in the 64-bit SVR4 ABI.
+
+ /// OPRC, CHAIN = DYNALLOC(CHAIN, NEGSIZE, FRAME_INDEX)
+ /// This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to
+ /// compute an allocation on the stack.
+ DYNALLOC,
+
+ /// This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to
+ /// compute an offset from native SP to the address of the most recent
+ /// dynamic alloca.
+ DYNAREAOFFSET,
+
+ /// GlobalBaseReg - On Darwin, this node represents the result of the mflr
+ /// at function entry, used for PIC code.
+ GlobalBaseReg,
+
+ /// These nodes represent PPC shifts.
+ ///
+ /// For scalar types, only the last `n + 1` bits of the shift amounts
+ /// are used, where n is log2(sizeof(element) * 8). See sld/slw, etc.
+ /// for exact behaviors.
+ ///
+ /// For vector types, only the last n bits are used. See vsld.
+ SRL,
+ SRA,
+ SHL,
+
+ /// EXTSWSLI = The PPC extswsli instruction, which does an extend-sign
+ /// word and shift left immediate.
+ EXTSWSLI,
+
+ /// The combination of sra[wd]i and addze used to implemented signed
+ /// integer division by a power of 2. The first operand is the dividend,
+ /// and the second is the constant shift amount (representing the
+ /// divisor).
+ SRA_ADDZE,
+
+ /// CALL - A direct function call.
+ /// CALL_NOP is a call with the special NOP which follows 64-bit
+ /// SVR4 calls and 32-bit/64-bit AIX calls.
+ CALL,
+ CALL_NOP,
+
+ /// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a
+ /// MTCTR instruction.
+ MTCTR,
+
+ /// CHAIN,FLAG = BCTRL(CHAIN, INFLAG) - Directly corresponds to a
+ /// BCTRL instruction.
+ BCTRL,
+
+ /// CHAIN,FLAG = BCTRL(CHAIN, ADDR, INFLAG) - The combination of a bctrl
+ /// instruction and the TOC reload required on 64-bit ELF, 32-bit AIX
+ /// and 64-bit AIX.
+ BCTRL_LOAD_TOC,
+
+ /// Return with a flag operand, matched by 'blr'
+ RET_FLAG,
+
+ /// R32 = MFOCRF(CRREG, INFLAG) - Represents the MFOCRF instruction.
+ /// This copies the bits corresponding to the specified CRREG into the
+ /// resultant GPR. Bits corresponding to other CR regs are undefined.
+ MFOCRF,
+
+ /// Direct move from a VSX register to a GPR
+ MFVSR,
+
+ /// Direct move from a GPR to a VSX register (algebraic)
+ MTVSRA,
+
+ /// Direct move from a GPR to a VSX register (zero)
+ MTVSRZ,
+
+ /// Direct move of 2 consecutive GPR to a VSX register.
+ BUILD_FP128,
+
+ /// BUILD_SPE64 and EXTRACT_SPE are analogous to BUILD_PAIR and
+ /// EXTRACT_ELEMENT but take f64 arguments instead of i64, as i64 is
+ /// unsupported for this target.
+ /// Merge 2 GPRs to a single SPE register.
+ BUILD_SPE64,
+
+ /// Extract SPE register component, second argument is high or low.
+ EXTRACT_SPE,
+
+ /// Extract a subvector from signed integer vector and convert to FP.
+ /// It is primarily used to convert a (widened) illegal integer vector
+ /// type to a legal floating point vector type.
+ /// For example v2i32 -> widened to v4i32 -> v2f64
+ SINT_VEC_TO_FP,
+
+ /// Extract a subvector from unsigned integer vector and convert to FP.
+ /// As with SINT_VEC_TO_FP, used for converting illegal types.
+ UINT_VEC_TO_FP,
+
+ // FIXME: Remove these once the ANDI glue bug is fixed:
+ /// i1 = ANDI_rec_1_[EQ|GT]_BIT(i32 or i64 x) - Represents the result of the
+ /// eq or gt bit of CR0 after executing andi. x, 1. This is used to
+ /// implement truncation of i32 or i64 to i1.
+ ANDI_rec_1_EQ_BIT,
+ ANDI_rec_1_GT_BIT,
+
+ // READ_TIME_BASE - A read of the 64-bit time-base register on a 32-bit
+ // target (returns (Lo, Hi)). It takes a chain operand.
+ READ_TIME_BASE,
+
+ // EH_SJLJ_SETJMP - SjLj exception handling setjmp.
+ EH_SJLJ_SETJMP,
+
+ // EH_SJLJ_LONGJMP - SjLj exception handling longjmp.
+ EH_SJLJ_LONGJMP,
+
+ /// RESVEC = VCMP(LHS, RHS, OPC) - Represents one of the altivec VCMP*
+ /// instructions. For lack of better number, we use the opcode number
+ /// encoding for the OPC field to identify the compare. For example, 838
+ /// is VCMPGTSH.
+ VCMP,
+
+ /// RESVEC, OUTFLAG = VCMPo(LHS, RHS, OPC) - Represents one of the
+ /// altivec VCMP*o instructions. For lack of better number, we use the
+ /// opcode number encoding for the OPC field to identify the compare. For
+ /// example, 838 is VCMPGTSH.
+ VCMPo,
+
+ /// CHAIN = COND_BRANCH CHAIN, CRRC, OPC, DESTBB [, INFLAG] - This
+ /// corresponds to the COND_BRANCH pseudo instruction. CRRC is the
+ /// condition register to branch on, OPC is the branch opcode to use (e.g.
+ /// PPC::BLE), DESTBB is the destination block to branch to, and INFLAG is
+ /// an optional input flag argument.
+ COND_BRANCH,
+
+ /// CHAIN = BDNZ CHAIN, DESTBB - These are used to create counter-based
+ /// loops.
+ BDNZ,
+ BDZ,
+
+ /// F8RC = FADDRTZ F8RC, F8RC - This is an FADD done with rounding
+ /// towards zero. Used only as part of the long double-to-int
+ /// conversion sequence.
+ FADDRTZ,
+
+ /// F8RC = MFFS - This moves the FPSCR (not modeled) into the register.
+ MFFS,
+
+ /// TC_RETURN - A tail call return.
+ /// operand #0 chain
+ /// operand #1 callee (register or absolute)
+ /// operand #2 stack adjustment
+ /// operand #3 optional in flag
+ TC_RETURN,
+
+ /// ch, gl = CR6[UN]SET ch, inglue - Toggle CR bit 6 for SVR4 vararg calls
+ CR6SET,
+ CR6UNSET,
+
+ /// GPRC = address of _GLOBAL_OFFSET_TABLE_. Used by initial-exec TLS
+ /// for non-position independent code on PPC32.
+ PPC32_GOT,
+
+ /// GPRC = address of _GLOBAL_OFFSET_TABLE_. Used by general dynamic and
+ /// local dynamic TLS and position indendepent code on PPC32.
+ PPC32_PICGOT,
+
+ /// G8RC = ADDIS_GOT_TPREL_HA %x2, Symbol - Used by the initial-exec
+ /// TLS model, produces an ADDIS8 instruction that adds the GOT
+ /// base to sym\@got\@tprel\@ha.
+ ADDIS_GOT_TPREL_HA,
+
+ /// G8RC = LD_GOT_TPREL_L Symbol, G8RReg - Used by the initial-exec
+ /// TLS model, produces a LD instruction with base register G8RReg
+ /// and offset sym\@got\@tprel\@l. This completes the addition that
+ /// finds the offset of "sym" relative to the thread pointer.
+ LD_GOT_TPREL_L,
+
+ /// G8RC = ADD_TLS G8RReg, Symbol - Used by the initial-exec TLS
+ /// model, produces an ADD instruction that adds the contents of
+ /// G8RReg to the thread pointer. Symbol contains a relocation
+ /// sym\@tls which is to be replaced by the thread pointer and
+ /// identifies to the linker that the instruction is part of a
+ /// TLS sequence.
+ ADD_TLS,
+
+ /// G8RC = ADDIS_TLSGD_HA %x2, Symbol - For the general-dynamic TLS
+ /// model, produces an ADDIS8 instruction that adds the GOT base
+ /// register to sym\@got\@tlsgd\@ha.
+ ADDIS_TLSGD_HA,
+
+ /// %x3 = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS
+ /// model, produces an ADDI8 instruction that adds G8RReg to
+ /// sym\@got\@tlsgd\@l and stores the result in X3. Hidden by
+ /// ADDIS_TLSGD_L_ADDR until after register assignment.
+ ADDI_TLSGD_L,
+
+ /// %x3 = GET_TLS_ADDR %x3, Symbol - For the general-dynamic TLS
+ /// model, produces a call to __tls_get_addr(sym\@tlsgd). Hidden by
+ /// ADDIS_TLSGD_L_ADDR until after register assignment.
+ GET_TLS_ADDR,
+
+ /// G8RC = ADDI_TLSGD_L_ADDR G8RReg, Symbol, Symbol - Op that
+ /// combines ADDI_TLSGD_L and GET_TLS_ADDR until expansion following
+ /// register assignment.
+ ADDI_TLSGD_L_ADDR,
+
+ /// G8RC = ADDIS_TLSLD_HA %x2, Symbol - For the local-dynamic TLS
+ /// model, produces an ADDIS8 instruction that adds the GOT base
+ /// register to sym\@got\@tlsld\@ha.
+ ADDIS_TLSLD_HA,
+
+ /// %x3 = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS
+ /// model, produces an ADDI8 instruction that adds G8RReg to
+ /// sym\@got\@tlsld\@l and stores the result in X3. Hidden by
+ /// ADDIS_TLSLD_L_ADDR until after register assignment.
+ ADDI_TLSLD_L,
+
+ /// %x3 = GET_TLSLD_ADDR %x3, Symbol - For the local-dynamic TLS
+ /// model, produces a call to __tls_get_addr(sym\@tlsld). Hidden by
+ /// ADDIS_TLSLD_L_ADDR until after register assignment.
+ GET_TLSLD_ADDR,
+
+ /// G8RC = ADDI_TLSLD_L_ADDR G8RReg, Symbol, Symbol - Op that
+ /// combines ADDI_TLSLD_L and GET_TLSLD_ADDR until expansion
+ /// following register assignment.
+ ADDI_TLSLD_L_ADDR,
+
+ /// G8RC = ADDIS_DTPREL_HA %x3, Symbol - For the local-dynamic TLS
+ /// model, produces an ADDIS8 instruction that adds X3 to
+ /// sym\@dtprel\@ha.
+ ADDIS_DTPREL_HA,
+
+ /// G8RC = ADDI_DTPREL_L G8RReg, Symbol - For the local-dynamic TLS
+ /// model, produces an ADDI8 instruction that adds G8RReg to
+ /// sym\@got\@dtprel\@l.
+ ADDI_DTPREL_L,
+
+ /// VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded
+ /// during instruction selection to optimize a BUILD_VECTOR into
+ /// operations on splats. This is necessary to avoid losing these
+ /// optimizations due to constant folding.
+ VADD_SPLAT,
+
+ /// CHAIN = SC CHAIN, Imm128 - System call. The 7-bit unsigned
+ /// operand identifies the operating system entry point.
+ SC,
+
+ /// CHAIN = CLRBHRB CHAIN - Clear branch history rolling buffer.
+ CLRBHRB,
+
+ /// GPRC, CHAIN = MFBHRBE CHAIN, Entry, Dummy - Move from branch
+ /// history rolling buffer entry.
+ MFBHRBE,
+
+ /// CHAIN = RFEBB CHAIN, State - Return from event-based branch.
+ RFEBB,
+
+ /// VSRC, CHAIN = XXSWAPD CHAIN, VSRC - Occurs only for little
+ /// endian. Maps to an xxswapd instruction that corrects an lxvd2x
+ /// or stxvd2x instruction. The chain is necessary because the
+ /// sequence replaces a load and needs to provide the same number
+ /// of outputs.
+ XXSWAPD,
+
+ /// An SDNode for swaps that are not associated with any loads/stores
+ /// and thereby have no chain.
+ SWAP_NO_CHAIN,
+
+ /// An SDNode for Power9 vector absolute value difference.
+ /// operand #0 vector
+ /// operand #1 vector
+ /// operand #2 constant i32 0 or 1, to indicate whether needs to patch
+ /// the most significant bit for signed i32
+ ///
+ /// Power9 VABSD* instructions are designed to support unsigned integer
+ /// vectors (byte/halfword/word), if we want to make use of them for signed
+ /// integer vectors, we have to flip their sign bits first. To flip sign bit
+ /// for byte/halfword integer vector would become inefficient, but for word
+ /// integer vector, we can leverage XVNEGSP to make it efficiently. eg:
+ /// abs(sub(a,b)) => VABSDUW(a+0x80000000, b+0x80000000)
+ /// => VABSDUW((XVNEGSP a), (XVNEGSP b))
+ VABSD,
+
+ /// QVFPERM = This corresponds to the QPX qvfperm instruction.
+ QVFPERM,
+
+ /// QVGPCI = This corresponds to the QPX qvgpci instruction.
+ QVGPCI,
+
+ /// QVALIGNI = This corresponds to the QPX qvaligni instruction.
+ QVALIGNI,
+
+ /// QVESPLATI = This corresponds to the QPX qvesplati instruction.
+ QVESPLATI,
+
+ /// QBFLT = Access the underlying QPX floating-point boolean
+ /// representation.
+ QBFLT,
+
+ /// FP_EXTEND_HALF(VECTOR, IDX) - Custom extend upper (IDX=0) half or
+ /// lower (IDX=1) half of v4f32 to v2f64.
+ FP_EXTEND_HALF,
+
+ /// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a
+ /// byte-swapping store instruction. It byte-swaps the low "Type" bits of
+ /// the GPRC input, then stores it through Ptr. Type can be either i16 or
+ /// i32.
+ STBRX = ISD::FIRST_TARGET_MEMORY_OPCODE,
+
+ /// GPRC, CHAIN = LBRX CHAIN, Ptr, Type - This is a
+ /// byte-swapping load instruction. It loads "Type" bits, byte swaps it,
+ /// then puts it in the bottom bits of the GPRC. TYPE can be either i16
+ /// or i32.
+ LBRX,
+
+ /// STFIWX - The STFIWX instruction. The first operand is an input token
+ /// chain, then an f64 value to store, then an address to store it to.
+ STFIWX,
+
+ /// GPRC, CHAIN = LFIWAX CHAIN, Ptr - This is a floating-point
+ /// load which sign-extends from a 32-bit integer value into the
+ /// destination 64-bit register.
+ LFIWAX,
+
+ /// GPRC, CHAIN = LFIWZX CHAIN, Ptr - This is a floating-point
+ /// load which zero-extends from a 32-bit integer value into the
+ /// destination 64-bit register.
+ LFIWZX,
+
+ /// GPRC, CHAIN = LXSIZX, CHAIN, Ptr, ByteWidth - This is a load of an
+ /// integer smaller than 64 bits into a VSR. The integer is zero-extended.
+ /// This can be used for converting loaded integers to floating point.
+ LXSIZX,
+
+ /// STXSIX - The STXSI[bh]X instruction. The first operand is an input
+ /// chain, then an f64 value to store, then an address to store it to,
+ /// followed by a byte-width for the store.
+ STXSIX,
+
+ /// VSRC, CHAIN = LXVD2X_LE CHAIN, Ptr - Occurs only for little endian.
+ /// Maps directly to an lxvd2x instruction that will be followed by
+ /// an xxswapd.
+ LXVD2X,
+
+ /// VSRC, CHAIN = LOAD_VEC_BE CHAIN, Ptr - Occurs only for little endian.
+ /// Maps directly to one of lxvd2x/lxvw4x/lxvh8x/lxvb16x depending on
+ /// the vector type to load vector in big-endian element order.
+ LOAD_VEC_BE,
+
+ /// VSRC, CHAIN = LD_VSX_LH CHAIN, Ptr - This is a floating-point load of a
+ /// v2f32 value into the lower half of a VSR register.
+ LD_VSX_LH,
+
+ /// VSRC, CHAIN = LD_SPLAT, CHAIN, Ptr - a splatting load memory
+ /// instructions such as LXVDSX, LXVWSX.
+ LD_SPLAT,
+
+ /// CHAIN = STXVD2X CHAIN, VSRC, Ptr - Occurs only for little endian.
+ /// Maps directly to an stxvd2x instruction that will be preceded by
+ /// an xxswapd.
+ STXVD2X,
+
+ /// CHAIN = STORE_VEC_BE CHAIN, VSRC, Ptr - Occurs only for little endian.
+ /// Maps directly to one of stxvd2x/stxvw4x/stxvh8x/stxvb16x depending on
+ /// the vector type to store vector in big-endian element order.
+ STORE_VEC_BE,
+
+ /// Store scalar integers from VSR.
+ ST_VSR_SCAL_INT,
+
+ /// QBRC, CHAIN = QVLFSb CHAIN, Ptr
+ /// The 4xf32 load used for v4i1 constants.
+ QVLFSb,
+
+ /// ATOMIC_CMP_SWAP - the exact same as the target-independent nodes
+ /// except they ensure that the compare input is zero-extended for
+ /// sub-word versions because the atomic loads zero-extend.
+ ATOMIC_CMP_SWAP_8,
+ ATOMIC_CMP_SWAP_16,
+
+ /// GPRC = TOC_ENTRY GA, TOC
+ /// Loads the entry for GA from the TOC, where the TOC base is given by
+ /// the last operand.
+ TOC_ENTRY
+ };
} // end namespace PPCISD
@@ -647,6 +662,10 @@ namespace llvm {
return true;
}
+ bool isEqualityCmpFoldedWithSignedCmp() const override {
+ return false;
+ }
+
bool hasAndNotCompare(SDValue) const override {
return true;
}
@@ -733,7 +752,7 @@ namespace llvm {
SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
SmallVectorImpl<SDNode *> &Created) const override;
- Register getRegisterByName(const char* RegName, EVT VT,
+ Register getRegisterByName(const char* RegName, LLT VT,
const MachineFunction &MF) const override;
void computeKnownBitsForTargetNode(const SDValue Op,
@@ -900,7 +919,8 @@ namespace llvm {
/// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
/// expanded to FMAs when this method returns true, otherwise fmuladd is
/// expanded to fmul + fadd.
- bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
+ bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
+ EVT VT) const override;
const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
@@ -1113,6 +1133,10 @@ namespace llvm {
SelectionDAG &DAG, SDValue ArgVal,
const SDLoc &dl) const;
+ SDValue LowerFormalArguments_AIX(
+ SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
+ SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const;
SDValue LowerFormalArguments_Darwin(
SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
index 414b0540db7e..43431a1e0069 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -71,13 +71,14 @@ def SRL64 : SDNodeXForm<imm, [{
let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in {
- let isReturn = 1, Uses = [LR8, RM] in
+ let isReturn = 1, isPredicable = 1, Uses = [LR8, RM] in
def BLR8 : XLForm_2_ext<19, 16, 20, 0, 0, (outs), (ins), "blr", IIC_BrB,
[(retflag)]>, Requires<[In64BitMode]>;
let isBranch = 1, isIndirectBranch = 1, Uses = [CTR8] in {
- def BCTR8 : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", IIC_BrB,
- []>,
- Requires<[In64BitMode]>;
+ let isPredicable = 1 in
+ def BCTR8 : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", IIC_BrB,
+ []>,
+ Requires<[In64BitMode]>;
def BCCCTR8 : XLForm_2_br<19, 528, 0, (outs), (ins pred:$cond),
"b${cond:cc}ctr${cond:pm} ${cond:reg}", IIC_BrB,
[]>,
@@ -141,9 +142,10 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in {
[(PPCcall_nop (i64 imm:$func))]>;
}
let Uses = [CTR8, RM] in {
- def BCTRL8 : XLForm_2_ext<19, 528, 20, 0, 1, (outs), (ins),
- "bctrl", IIC_BrB, [(PPCbctrl)]>,
- Requires<[In64BitMode]>;
+ let isPredicable = 1 in
+ def BCTRL8 : XLForm_2_ext<19, 528, 20, 0, 1, (outs), (ins),
+ "bctrl", IIC_BrB, [(PPCbctrl)]>,
+ Requires<[In64BitMode]>;
let isCodeGenOnly = 1 in {
def BCCCTRL8 : XLForm_2_br<19, 528, 1, (outs), (ins pred:$cond),
@@ -253,7 +255,7 @@ def LDARX : XForm_1_memOp<31, 84, (outs g8rc:$rD), (ins memrr:$ptr),
// Instruction to support lock versions of atomics
// (EH=1 - see Power ISA 2.07 Book II 4.4.2)
def LDARXL : XForm_1<31, 84, (outs g8rc:$rD), (ins memrr:$ptr),
- "ldarx $rD, $ptr, 1", IIC_LdStLDARX, []>, isDOT;
+ "ldarx $rD, $ptr, 1", IIC_LdStLDARX, []>, isRecordForm;
let hasExtraDefRegAllocReq = 1 in
def LDAT : X_RD5_RS5_IM5<31, 614, (outs g8rc:$rD), (ins g8rc:$rA, u5imm:$FC),
@@ -263,7 +265,7 @@ def LDAT : X_RD5_RS5_IM5<31, 614, (outs g8rc:$rD), (ins g8rc:$rA, u5imm:$FC),
let Defs = [CR0], mayStore = 1, mayLoad = 0, hasSideEffects = 0 in
def STDCX : XForm_1_memOp<31, 214, (outs), (ins g8rc:$rS, memrr:$dst),
- "stdcx. $rS, $dst", IIC_LdStSTDCX, []>, isDOT;
+ "stdcx. $rS, $dst", IIC_LdStSTDCX, []>, isRecordForm;
let mayStore = 1, mayLoad = 0, hasSideEffects = 0 in
def STDAT : X_RD5_RS5_IM5<31, 742, (outs), (ins g8rc:$rS, g8rc:$rA, u5imm:$FC),
@@ -410,6 +412,7 @@ def DYNALLOC8 : PPCEmitTimePseudo<(outs g8rc:$result), (ins g8rc:$negsize, memri
def DYNAREAOFFSET8 : PPCEmitTimePseudo<(outs i64imm:$result), (ins memri:$fpsi), "#DYNAREAOFFSET8",
[(set i64:$result, (PPCdynareaoffset iaddr:$fpsi))]>;
+let hasSideEffects = 0 in {
let Defs = [LR8] in {
def MTLR8 : XFXForm_7_ext<31, 467, 8, (outs), (ins g8rc:$rS),
"mtlr $rS", IIC_SprMTSPR>,
@@ -421,6 +424,7 @@ def MFLR8 : XFXForm_1_ext<31, 339, 8, (outs g8rc:$rT), (ins),
PPC970_DGroup_First, PPC970_Unit_FXU;
}
} // Interpretation64Bit
+}
//===----------------------------------------------------------------------===//
// Fixed point instructions.
@@ -474,14 +478,14 @@ defm XOR8 : XForm_6r<31, 316, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
// Logical ops with immediate.
let Defs = [CR0] in {
-def ANDIo8 : DForm_4<28, (outs g8rc:$dst), (ins g8rc:$src1, u16imm64:$src2),
+def ANDI8_rec : DForm_4<28, (outs g8rc:$dst), (ins g8rc:$src1, u16imm64:$src2),
"andi. $dst, $src1, $src2", IIC_IntGeneral,
[(set i64:$dst, (and i64:$src1, immZExt16:$src2))]>,
- isDOT;
-def ANDISo8 : DForm_4<29, (outs g8rc:$dst), (ins g8rc:$src1, u16imm64:$src2),
+ isRecordForm;
+def ANDIS8_rec : DForm_4<29, (outs g8rc:$dst), (ins g8rc:$src1, u16imm64:$src2),
"andis. $dst, $src1, $src2", IIC_IntGeneral,
[(set i64:$dst, (and i64:$src1, imm16ShiftedZExt:$src2))]>,
- isDOT;
+ isRecordForm;
}
def ORI8 : DForm_4<24, (outs g8rc:$dst), (ins g8rc:$src1, u16imm64:$src2),
"ori $dst, $src1, $src2", IIC_IntSimple,
@@ -933,7 +937,7 @@ def LWAX : XForm_1_memOp<31, 341, (outs g8rc:$rD), (ins memrr:$src),
[(set i64:$rD, (sextloadi32 xaddrX4:$src))]>, isPPC64,
PPC970_DGroup_Cracked;
// For fast-isel:
-let isCodeGenOnly = 1, mayLoad = 1 in {
+let isCodeGenOnly = 1, mayLoad = 1, hasSideEffects = 0 in {
def LWA_32 : DSForm_1<58, 2, (outs gprc:$rD), (ins memrix:$src),
"lwa $rD, $src", IIC_LdStLWA, []>, isPPC64,
PPC970_DGroup_Cracked;
@@ -1459,7 +1463,7 @@ class X_L1_RA5_RB5<bits<6> opcode, bits<10> xo, string opc, RegisterOperand ty,
let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
def CP_COPY8 : X_L1_RA5_RB5<31, 774, "copy" , g8rc, IIC_LdStCOPY, []>;
def CP_PASTE8 : X_L1_RA5_RB5<31, 902, "paste" , g8rc, IIC_LdStPASTE, []>;
-def CP_PASTE8o : X_L1_RA5_RB5<31, 902, "paste.", g8rc, IIC_LdStPASTE, []>,isDOT;
+def CP_PASTE8_rec : X_L1_RA5_RB5<31, 902, "paste.", g8rc, IIC_LdStPASTE, []>,isRecordForm;
}
// SLB Invalidate Entry Global
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrAltivec.td b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
index fd3fc2af2327..f94816a35f79 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -261,6 +261,11 @@ def vecspltisw : PatLeaf<(build_vector), [{
return PPC::get_VSPLTI_elt(N, 4, *CurDAG).getNode() != nullptr;
}], VSPLTISW_get_imm>;
+def immEQOneV : PatLeaf<(build_vector), [{
+ if (ConstantSDNode *C = cast<BuildVectorSDNode>(N)->getConstantSplatNode())
+ return C->isOne();
+ return false;
+}]>;
//===----------------------------------------------------------------------===//
// Helpers for defining instructions that directly correspond to intrinsics.
@@ -518,19 +523,19 @@ def VANDC : VXForm_1<1092, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
def VCFSX : VXForm_1<842, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB),
"vcfsx $vD, $vB, $UIMM", IIC_VecFP,
[(set v4f32:$vD,
- (int_ppc_altivec_vcfsx v4i32:$vB, imm:$UIMM))]>;
+ (int_ppc_altivec_vcfsx v4i32:$vB, timm:$UIMM))]>;
def VCFUX : VXForm_1<778, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB),
"vcfux $vD, $vB, $UIMM", IIC_VecFP,
[(set v4f32:$vD,
- (int_ppc_altivec_vcfux v4i32:$vB, imm:$UIMM))]>;
+ (int_ppc_altivec_vcfux v4i32:$vB, timm:$UIMM))]>;
def VCTSXS : VXForm_1<970, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB),
"vctsxs $vD, $vB, $UIMM", IIC_VecFP,
[(set v4i32:$vD,
- (int_ppc_altivec_vctsxs v4f32:$vB, imm:$UIMM))]>;
+ (int_ppc_altivec_vctsxs v4f32:$vB, timm:$UIMM))]>;
def VCTUXS : VXForm_1<906, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB),
"vctuxs $vD, $vB, $UIMM", IIC_VecFP,
[(set v4i32:$vD,
- (int_ppc_altivec_vctuxs v4f32:$vB, imm:$UIMM))]>;
+ (int_ppc_altivec_vctuxs v4f32:$vB, timm:$UIMM))]>;
// Defines with the UIM field set to 0 for floating-point
// to integer (fp_to_sint/fp_to_uint) conversions and integer
@@ -706,7 +711,7 @@ def VSPLTW : VXForm_1<652, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB),
"vspltw $vD, $vB, $UIMM", IIC_VecPerm,
[(set v16i8:$vD,
(vspltw_shuffle:$UIMM v16i8:$vB, (undef)))]>;
-let isCodeGenOnly = 1 in {
+let isCodeGenOnly = 1, hasSideEffects = 0 in {
def VSPLTBs : VXForm_1<524, (outs vrrc:$vD), (ins u5imm:$UIMM, vfrc:$vB),
"vspltb $vD, $vB, $UIMM", IIC_VecPerm, []>;
def VSPLTHs : VXForm_1<588, (outs vrrc:$vD), (ins u5imm:$UIMM, vfrc:$vB),
@@ -789,37 +794,37 @@ class VCMPo<bits<10> xo, string asmstr, ValueType Ty>
// f32 element comparisons.0
def VCMPBFP : VCMP <966, "vcmpbfp $vD, $vA, $vB" , v4f32>;
-def VCMPBFPo : VCMPo<966, "vcmpbfp. $vD, $vA, $vB" , v4f32>;
+def VCMPBFP_rec : VCMPo<966, "vcmpbfp. $vD, $vA, $vB" , v4f32>;
def VCMPEQFP : VCMP <198, "vcmpeqfp $vD, $vA, $vB" , v4f32>;
-def VCMPEQFPo : VCMPo<198, "vcmpeqfp. $vD, $vA, $vB", v4f32>;
+def VCMPEQFP_rec : VCMPo<198, "vcmpeqfp. $vD, $vA, $vB", v4f32>;
def VCMPGEFP : VCMP <454, "vcmpgefp $vD, $vA, $vB" , v4f32>;
-def VCMPGEFPo : VCMPo<454, "vcmpgefp. $vD, $vA, $vB", v4f32>;
+def VCMPGEFP_rec : VCMPo<454, "vcmpgefp. $vD, $vA, $vB", v4f32>;
def VCMPGTFP : VCMP <710, "vcmpgtfp $vD, $vA, $vB" , v4f32>;
-def VCMPGTFPo : VCMPo<710, "vcmpgtfp. $vD, $vA, $vB", v4f32>;
+def VCMPGTFP_rec : VCMPo<710, "vcmpgtfp. $vD, $vA, $vB", v4f32>;
// i8 element comparisons.
def VCMPEQUB : VCMP < 6, "vcmpequb $vD, $vA, $vB" , v16i8>;
-def VCMPEQUBo : VCMPo< 6, "vcmpequb. $vD, $vA, $vB", v16i8>;
+def VCMPEQUB_rec : VCMPo< 6, "vcmpequb. $vD, $vA, $vB", v16i8>;
def VCMPGTSB : VCMP <774, "vcmpgtsb $vD, $vA, $vB" , v16i8>;
-def VCMPGTSBo : VCMPo<774, "vcmpgtsb. $vD, $vA, $vB", v16i8>;
+def VCMPGTSB_rec : VCMPo<774, "vcmpgtsb. $vD, $vA, $vB", v16i8>;
def VCMPGTUB : VCMP <518, "vcmpgtub $vD, $vA, $vB" , v16i8>;
-def VCMPGTUBo : VCMPo<518, "vcmpgtub. $vD, $vA, $vB", v16i8>;
+def VCMPGTUB_rec : VCMPo<518, "vcmpgtub. $vD, $vA, $vB", v16i8>;
// i16 element comparisons.
def VCMPEQUH : VCMP < 70, "vcmpequh $vD, $vA, $vB" , v8i16>;
-def VCMPEQUHo : VCMPo< 70, "vcmpequh. $vD, $vA, $vB", v8i16>;
+def VCMPEQUH_rec : VCMPo< 70, "vcmpequh. $vD, $vA, $vB", v8i16>;
def VCMPGTSH : VCMP <838, "vcmpgtsh $vD, $vA, $vB" , v8i16>;
-def VCMPGTSHo : VCMPo<838, "vcmpgtsh. $vD, $vA, $vB", v8i16>;
+def VCMPGTSH_rec : VCMPo<838, "vcmpgtsh. $vD, $vA, $vB", v8i16>;
def VCMPGTUH : VCMP <582, "vcmpgtuh $vD, $vA, $vB" , v8i16>;
-def VCMPGTUHo : VCMPo<582, "vcmpgtuh. $vD, $vA, $vB", v8i16>;
+def VCMPGTUH_rec : VCMPo<582, "vcmpgtuh. $vD, $vA, $vB", v8i16>;
// i32 element comparisons.
def VCMPEQUW : VCMP <134, "vcmpequw $vD, $vA, $vB" , v4i32>;
-def VCMPEQUWo : VCMPo<134, "vcmpequw. $vD, $vA, $vB", v4i32>;
+def VCMPEQUW_rec : VCMPo<134, "vcmpequw. $vD, $vA, $vB", v4i32>;
def VCMPGTSW : VCMP <902, "vcmpgtsw $vD, $vA, $vB" , v4i32>;
-def VCMPGTSWo : VCMPo<902, "vcmpgtsw. $vD, $vA, $vB", v4i32>;
+def VCMPGTSW_rec : VCMPo<902, "vcmpgtsw. $vD, $vA, $vB", v4i32>;
def VCMPGTUW : VCMP <646, "vcmpgtuw $vD, $vA, $vB" , v4i32>;
-def VCMPGTUWo : VCMPo<646, "vcmpgtuw. $vD, $vA, $vB", v4i32>;
+def VCMPGTUW_rec : VCMPo<646, "vcmpgtuw. $vD, $vA, $vB", v4i32>;
let isCodeGenOnly = 1, isMoveImm = 1, isAsCheapAsAMove = 1,
isReMaterializable = 1 in {
@@ -856,6 +861,14 @@ def V_SETALLONES : VXForm_3<908, (outs vrrc:$vD), (ins),
def : InstAlias<"vmr $vD, $vA", (VOR vrrc:$vD, vrrc:$vA, vrrc:$vA)>;
def : InstAlias<"vnot $vD, $vA", (VNOR vrrc:$vD, vrrc:$vA, vrrc:$vA)>;
+// Rotates.
+def : Pat<(v16i8 (rotl v16i8:$vA, v16i8:$vB)),
+ (v16i8 (VRLB v16i8:$vA, v16i8:$vB))>;
+def : Pat<(v8i16 (rotl v8i16:$vA, v8i16:$vB)),
+ (v8i16 (VRLH v8i16:$vA, v8i16:$vB))>;
+def : Pat<(v4i32 (rotl v4i32:$vA, v4i32:$vB)),
+ (v4i32 (VRLW v4i32:$vA, v4i32:$vB))>;
+
// Loads.
def : Pat<(v4i32 (load xoaddr:$src)), (LVX xoaddr:$src)>;
@@ -1092,6 +1105,20 @@ def : Pat<(v4f32 (vselect v4i32:$vA, v4f32:$vB, v4f32:$vC)),
def : Pat<(v2f64 (vselect v2i64:$vA, v2f64:$vB, v2f64:$vC)),
(VSEL $vC, $vB, $vA)>;
+// Vector Integer Average Instructions
+def : Pat<(v4i32 (sra (sub v4i32:$vA, (vnot_ppc v4i32:$vB)),
+ (v4i32 (immEQOneV)))), (v4i32 (VAVGSW $vA, $vB))>;
+def : Pat<(v8i16 (sra (sub v8i16:$vA, (v8i16 (bitconvert(vnot_ppc v4i32:$vB)))),
+ (v8i16 (immEQOneV)))), (v8i16 (VAVGSH $vA, $vB))>;
+def : Pat<(v16i8 (sra (sub v16i8:$vA, (v16i8 (bitconvert(vnot_ppc v4i32:$vB)))),
+ (v16i8 (immEQOneV)))), (v16i8 (VAVGSB $vA, $vB))>;
+def : Pat<(v4i32 (srl (sub v4i32:$vA, (vnot_ppc v4i32:$vB)),
+ (v4i32 (immEQOneV)))), (v4i32 (VAVGUW $vA, $vB))>;
+def : Pat<(v8i16 (srl (sub v8i16:$vA, (v8i16 (bitconvert(vnot_ppc v4i32:$vB)))),
+ (v8i16 (immEQOneV)))), (v8i16 (VAVGUH $vA, $vB))>;
+def : Pat<(v16i8 (srl (sub v16i8:$vA, (v16i8 (bitconvert(vnot_ppc v4i32:$vB)))),
+ (v16i8 (immEQOneV)))), (v16i8 (VAVGUB $vA, $vB))>;
+
} // end HasAltivec
def HasP8Altivec : Predicate<"PPCSubTarget->hasP8Altivec()">;
@@ -1140,9 +1167,13 @@ def:Pat<(vmrgew_swapped_shuffle v16i8:$vA, v16i8:$vB),
def:Pat<(vmrgow_swapped_shuffle v16i8:$vA, v16i8:$vB),
(VMRGOW $vB, $vA)>;
+// Vector rotates.
+def VRLD : VX1_Int_Ty<196, "vrld", int_ppc_altivec_vrld, v2i64>;
+
+def : Pat<(v2i64 (rotl v2i64:$vA, v2i64:$vB)),
+ (v2i64 (VRLD v2i64:$vA, v2i64:$vB))>;
// Vector shifts
-def VRLD : VX1_Int_Ty<196, "vrld", int_ppc_altivec_vrld, v2i64>;
def VSLD : VXForm_1<1476, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
"vsld $vD, $vA, $vB", IIC_VecGeneral, []>;
def VSRD : VXForm_1<1732, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
@@ -1245,11 +1276,11 @@ def VORC : VXForm_1<1348, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
// i64 element comparisons.
def VCMPEQUD : VCMP <199, "vcmpequd $vD, $vA, $vB" , v2i64>;
-def VCMPEQUDo : VCMPo<199, "vcmpequd. $vD, $vA, $vB", v2i64>;
+def VCMPEQUD_rec : VCMPo<199, "vcmpequd. $vD, $vA, $vB", v2i64>;
def VCMPGTSD : VCMP <967, "vcmpgtsd $vD, $vA, $vB" , v2i64>;
-def VCMPGTSDo : VCMPo<967, "vcmpgtsd. $vD, $vA, $vB", v2i64>;
+def VCMPGTSD_rec : VCMPo<967, "vcmpgtsd. $vD, $vA, $vB", v2i64>;
def VCMPGTUD : VCMP <711, "vcmpgtud $vD, $vA, $vB" , v2i64>;
-def VCMPGTUDo : VCMPo<711, "vcmpgtud. $vD, $vA, $vB", v2i64>;
+def VCMPGTUD_rec : VCMPo<711, "vcmpgtud. $vD, $vA, $vB", v2i64>;
// The cryptography instructions that do not require Category:Vector.Crypto
def VPMSUMB : VX1_Int_Ty<1032, "vpmsumb",
@@ -1313,21 +1344,21 @@ let Predicates = [HasP9Altivec] in {
// i8 element comparisons.
def VCMPNEB : VCMP < 7, "vcmpneb $vD, $vA, $vB" , v16i8>;
-def VCMPNEBo : VCMPo < 7, "vcmpneb. $vD, $vA, $vB" , v16i8>;
+def VCMPNEB_rec : VCMPo < 7, "vcmpneb. $vD, $vA, $vB" , v16i8>;
def VCMPNEZB : VCMP <263, "vcmpnezb $vD, $vA, $vB" , v16i8>;
-def VCMPNEZBo : VCMPo<263, "vcmpnezb. $vD, $vA, $vB", v16i8>;
+def VCMPNEZB_rec : VCMPo<263, "vcmpnezb. $vD, $vA, $vB", v16i8>;
// i16 element comparisons.
def VCMPNEH : VCMP < 71, "vcmpneh $vD, $vA, $vB" , v8i16>;
-def VCMPNEHo : VCMPo< 71, "vcmpneh. $vD, $vA, $vB" , v8i16>;
+def VCMPNEH_rec : VCMPo< 71, "vcmpneh. $vD, $vA, $vB" , v8i16>;
def VCMPNEZH : VCMP <327, "vcmpnezh $vD, $vA, $vB" , v8i16>;
-def VCMPNEZHo : VCMPo<327, "vcmpnezh. $vD, $vA, $vB", v8i16>;
+def VCMPNEZH_rec : VCMPo<327, "vcmpnezh. $vD, $vA, $vB", v8i16>;
// i32 element comparisons.
def VCMPNEW : VCMP <135, "vcmpnew $vD, $vA, $vB" , v4i32>;
-def VCMPNEWo : VCMPo<135, "vcmpnew. $vD, $vA, $vB" , v4i32>;
+def VCMPNEW_rec : VCMPo<135, "vcmpnew. $vD, $vA, $vB" , v4i32>;
def VCMPNEZW : VCMP <391, "vcmpnezw $vD, $vA, $vB" , v4i32>;
-def VCMPNEZWo : VCMPo<391, "vcmpnezw. $vD, $vA, $vB", v4i32>;
+def VCMPNEZW_rec : VCMPo<391, "vcmpnezw. $vD, $vA, $vB", v4i32>;
// VX-Form: [PO VRT / UIM VRB XO].
// We use VXForm_1 to implement it, that is, we use "VRA" (5 bit) to represent
@@ -1347,12 +1378,14 @@ def VEXTRACTUW : VX1_VT5_UIM5_VB5<653, "vextractuw", []>;
def VEXTRACTD : VX1_VT5_UIM5_VB5<717, "vextractd" , []>;
// Vector Extract Unsigned Byte/Halfword/Word Left/Right-Indexed
+let hasSideEffects = 0 in {
def VEXTUBLX : VX1_RT5_RA5_VB5<1549, "vextublx", []>;
def VEXTUBRX : VX1_RT5_RA5_VB5<1805, "vextubrx", []>;
def VEXTUHLX : VX1_RT5_RA5_VB5<1613, "vextuhlx", []>;
def VEXTUHRX : VX1_RT5_RA5_VB5<1869, "vextuhrx", []>;
def VEXTUWLX : VX1_RT5_RA5_VB5<1677, "vextuwlx", []>;
def VEXTUWRX : VX1_RT5_RA5_VB5<1933, "vextuwrx", []>;
+}
// Vector Insert Element Instructions
def VINSERTB : VXForm_1<781, (outs vrrc:$vD),
@@ -1410,6 +1443,12 @@ let isCodeGenOnly = 1 in {
def VEXTSW2Ds : VX_VT5_EO5_VB5s<1538, 26, "vextsw2d", []>;
}
+def : Pat<(v4i32 (sext_inreg v4i32:$VRB, v4i8)), (v4i32 (VEXTSB2W $VRB))>;
+def : Pat<(v4i32 (sext_inreg v4i32:$VRB, v4i16)), (v4i32 (VEXTSH2W $VRB))>;
+def : Pat<(v2i64 (sext_inreg v2i64:$VRB, v2i8)), (v2i64 (VEXTSB2D $VRB))>;
+def : Pat<(v2i64 (sext_inreg v2i64:$VRB, v2i16)), (v2i64 (VEXTSH2D $VRB))>;
+def : Pat<(v2i64 (sext_inreg v2i64:$VRB, v2i32)), (v2i64 (VEXTSW2D $VRB))>;
+
// Vector Integer Negate
def VNEGW : VX_VT5_EO5_VB5<1538, 6, "vnegw",
[(set v4i32:$vD,
@@ -1496,18 +1535,18 @@ class VX_VT5_EO5_VB5_XO9_o<bits<5> eo, bits<9> xo, string opc,
}
// Decimal Convert From/to National/Zoned/Signed-QWord
-def BCDCFNo : VX_VT5_EO5_VB5_PS1_XO9_o<7, 385, "bcdcfn." , []>;
-def BCDCFZo : VX_VT5_EO5_VB5_PS1_XO9_o<6, 385, "bcdcfz." , []>;
-def BCDCTNo : VX_VT5_EO5_VB5_XO9_o <5, 385, "bcdctn." , []>;
-def BCDCTZo : VX_VT5_EO5_VB5_PS1_XO9_o<4, 385, "bcdctz." , []>;
-def BCDCFSQo : VX_VT5_EO5_VB5_PS1_XO9_o<2, 385, "bcdcfsq.", []>;
-def BCDCTSQo : VX_VT5_EO5_VB5_XO9_o <0, 385, "bcdctsq.", []>;
+def BCDCFN_rec : VX_VT5_EO5_VB5_PS1_XO9_o<7, 385, "bcdcfn." , []>;
+def BCDCFZ_rec : VX_VT5_EO5_VB5_PS1_XO9_o<6, 385, "bcdcfz." , []>;
+def BCDCTN_rec : VX_VT5_EO5_VB5_XO9_o <5, 385, "bcdctn." , []>;
+def BCDCTZ_rec : VX_VT5_EO5_VB5_PS1_XO9_o<4, 385, "bcdctz." , []>;
+def BCDCFSQ_rec : VX_VT5_EO5_VB5_PS1_XO9_o<2, 385, "bcdcfsq.", []>;
+def BCDCTSQ_rec : VX_VT5_EO5_VB5_XO9_o <0, 385, "bcdctsq.", []>;
// Decimal Copy-Sign/Set-Sign
let Defs = [CR6] in
-def BCDCPSGNo : VX1_VT5_VA5_VB5<833, "bcdcpsgn.", []>;
+def BCDCPSGN_rec : VX1_VT5_VA5_VB5<833, "bcdcpsgn.", []>;
-def BCDSETSGNo : VX_VT5_EO5_VB5_PS1_XO9_o<31, 385, "bcdsetsgn.", []>;
+def BCDSETSGN_rec : VX_VT5_EO5_VB5_PS1_XO9_o<31, 385, "bcdsetsgn.", []>;
// [PO VRT VRA VRB 1 PS XO], "_o" means CR6 is set.
class VX_VT5_VA5_VB5_PS1_XO9_o<bits<9> xo, string opc, list<dag> pattern>
@@ -1526,13 +1565,13 @@ class VX_VT5_VA5_VB5_XO9_o<bits<9> xo, string opc, list<dag> pattern>
}
// Decimal Shift/Unsigned-Shift/Shift-and-Round
-def BCDSo : VX_VT5_VA5_VB5_PS1_XO9_o<193, "bcds." , []>;
-def BCDUSo : VX_VT5_VA5_VB5_XO9_o <129, "bcdus.", []>;
-def BCDSRo : VX_VT5_VA5_VB5_PS1_XO9_o<449, "bcdsr.", []>;
+def BCDS_rec : VX_VT5_VA5_VB5_PS1_XO9_o<193, "bcds." , []>;
+def BCDUS_rec : VX_VT5_VA5_VB5_XO9_o <129, "bcdus.", []>;
+def BCDSR_rec : VX_VT5_VA5_VB5_PS1_XO9_o<449, "bcdsr.", []>;
// Decimal (Unsigned) Truncate
-def BCDTRUNCo : VX_VT5_VA5_VB5_PS1_XO9_o<257, "bcdtrunc." , []>;
-def BCDUTRUNCo : VX_VT5_VA5_VB5_XO9_o <321, "bcdutrunc.", []>;
+def BCDTRUNC_rec : VX_VT5_VA5_VB5_PS1_XO9_o<257, "bcdtrunc." , []>;
+def BCDUTRUNC_rec : VX_VT5_VA5_VB5_XO9_o <321, "bcdutrunc.", []>;
// Absolute Difference
def VABSDUB : VXForm_1<1027, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrFormats.td b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrFormats.td
index 96b9c9a119c0..115bd44ea202 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrFormats.td
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrFormats.td
@@ -262,8 +262,8 @@ class DForm_2<bits<6> opcode, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: DForm_base<opcode, OOL, IOL, asmstr, itin, pattern> {
- // Even though ADDICo does not really have an RC bit, provide
- // the declaration of one here so that isDOT has something to set.
+ // Even though ADDIC_rec does not really have an RC bit, provide
+ // the declaration of one here so that isRecordForm has something to set.
bit RC = 0;
}
@@ -428,7 +428,7 @@ class XForm_base_r3xo<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asms
let Pattern = pattern;
- bit RC = 0; // set by isDOT
+ bit RC = 0; // set by isRecordForm
let Inst{6-10} = RST;
let Inst{11-15} = A;
@@ -463,7 +463,7 @@ class XForm_base_r3xo_swapped
bits<5> RST;
bits<5> B;
- bit RC = 0; // set by isDOT
+ bit RC = 0; // set by isRecordForm
let Inst{6-10} = RST;
let Inst{11-15} = A;
@@ -744,7 +744,7 @@ class XForm_42<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
: XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
let Pattern = pattern;
- bit RC = 0; // set by isDOT
+ bit RC = 0; // set by isRecordForm
let Inst{6-10} = RST;
let Inst{11-20} = 0;
@@ -757,7 +757,7 @@ class XForm_43<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
let Pattern = pattern;
bits<5> FM;
- bit RC = 0; // set by isDOT
+ bit RC = 0; // set by isRecordForm
let Inst{6-10} = FM;
let Inst{11-20} = 0;
@@ -902,7 +902,7 @@ class XForm_htm2<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
: I<opcode, OOL, IOL, asmstr, itin> {
bit L;
- bit RC = 0; // set by isDOT
+ bit RC = 0; // set by isRecordForm
let Inst{7-9} = 0;
let Inst{10} = L;
@@ -1265,7 +1265,7 @@ class XX3Form_Rc<bits<6> opcode, bits<7> xo, dag OOL, dag IOL, string asmstr,
let Pattern = pattern;
- bit RC = 0; // set by isDOT
+ bit RC = 0; // set by isRecordForm
let Inst{6-10} = XT{4-0};
let Inst{11-15} = XA{4-0};
@@ -1529,6 +1529,29 @@ class XLForm_2_ext_and_DSForm_1<bits<6> opcode1, bits<10> xo1,
let BH = 0;
}
+class XLForm_2_ext_and_DForm_1<bits<6> opcode1, bits<10> xo1, bits<5> bo,
+ bits<5> bi, bit lk, bits<6> opcode2, dag OOL,
+ dag IOL, string asmstr, InstrItinClass itin,
+ list<dag> pattern>
+ : I2<opcode1, opcode2, OOL, IOL, asmstr, itin> {
+
+ bits<5> RST;
+ bits<21> D_RA;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = bo;
+ let Inst{11-15} = bi;
+ let Inst{16-18} = 0;
+ let Inst{19-20} = 0; // Unused (BH)
+ let Inst{21-30} = xo1;
+ let Inst{31} = lk;
+
+ let Inst{38-42} = RST;
+ let Inst{43-47} = D_RA{20-16}; // Base Register
+ let Inst{48-63} = D_RA{15-0}; // Displacement
+}
+
// 1.7.8 XFX-Form
class XFXForm_1<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin>
@@ -1628,7 +1651,7 @@ class XFLForm<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
bits<8> FM;
bits<5> rT;
- bit RC = 0; // set by isDOT
+ bit RC = 0; // set by isRecordForm
let Pattern = pattern;
let Inst{6} = 0;
@@ -1647,7 +1670,7 @@ class XFLForm_1<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
bit W;
bits<5> FRB;
- bit RC = 0; // set by isDOT
+ bit RC = 0; // set by isRecordForm
let Pattern = pattern;
let Inst{6} = L;
@@ -1666,7 +1689,7 @@ class XSForm_1<bits<6> opcode, bits<9> xo, dag OOL, dag IOL, string asmstr,
bits<5> RS;
bits<6> SH;
- bit RC = 0; // set by isDOT
+ bit RC = 0; // set by isRecordForm
let Pattern = pattern;
let Inst{6-10} = RS;
@@ -1687,7 +1710,7 @@ class XOForm_1<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL, string asms
let Pattern = pattern;
- bit RC = 0; // set by isDOT
+ bit RC = 0; // set by isRecordForm
let Inst{6-10} = RT;
let Inst{11-15} = RA;
@@ -1714,7 +1737,7 @@ class AForm_1<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr,
let Pattern = pattern;
- bit RC = 0; // set by isDOT
+ bit RC = 0; // set by isRecordForm
let Inst{6-10} = FRT;
let Inst{11-15} = FRA;
@@ -1774,7 +1797,7 @@ class MForm_1<bits<6> opcode, dag OOL, dag IOL, string asmstr,
let Pattern = pattern;
- bit RC = 0; // set by isDOT
+ bit RC = 0; // set by isRecordForm
let Inst{6-10} = RS;
let Inst{11-15} = RA;
@@ -1800,7 +1823,7 @@ class MDForm_1<bits<6> opcode, bits<3> xo, dag OOL, dag IOL, string asmstr,
let Pattern = pattern;
- bit RC = 0; // set by isDOT
+ bit RC = 0; // set by isRecordForm
let Inst{6-10} = RS;
let Inst{11-15} = RA;
@@ -1821,7 +1844,7 @@ class MDSForm_1<bits<6> opcode, bits<4> xo, dag OOL, dag IOL, string asmstr,
let Pattern = pattern;
- bit RC = 0; // set by isDOT
+ bit RC = 0; // set by isRecordForm
let Inst{6-10} = RS;
let Inst{11-15} = RA;
@@ -2083,7 +2106,7 @@ class Z23Form_1<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr,
let Pattern = pattern;
- bit RC = 0; // set by isDOT
+ bit RC = 0; // set by isRecordForm
let Inst{6-10} = FRT;
let Inst{11-15} = FRA;
@@ -2107,7 +2130,7 @@ class Z23Form_3<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr,
let Pattern = pattern;
- bit RC = 0; // set by isDOT
+ bit RC = 0; // set by isRecordForm
let Inst{6-10} = FRT;
let Inst{11-22} = idx;
@@ -2125,7 +2148,7 @@ class Z23Form_8<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr,
let Pattern = pattern;
- bit RC = 0; // set by isDOT
+ bit RC = 0; // set by isRecordForm
let Inst{6-10} = VRT;
let Inst{11-14} = 0;
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrHTM.td b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrHTM.td
index 104b57a70a2e..6cbf999ca73d 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrHTM.td
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrHTM.td
@@ -36,7 +36,7 @@ def TEND : XForm_htm1 <31, 686,
def TABORT : XForm_base_r3xo <31, 910,
(outs), (ins gprc:$A), "tabort. $A", IIC_SprMTSPR,
- []>, isDOT {
+ []>, isRecordForm {
let RST = 0;
let B = 0;
}
@@ -44,38 +44,38 @@ def TABORT : XForm_base_r3xo <31, 910,
def TABORTWC : XForm_base_r3xo <31, 782,
(outs), (ins u5imm:$RTS, gprc:$A, gprc:$B),
"tabortwc. $RTS, $A, $B", IIC_SprMTSPR, []>,
- isDOT;
+ isRecordForm;
def TABORTWCI : XForm_base_r3xo <31, 846,
(outs), (ins u5imm:$RTS, gprc:$A, u5imm:$B),
"tabortwci. $RTS, $A, $B", IIC_SprMTSPR, []>,
- isDOT;
+ isRecordForm;
def TABORTDC : XForm_base_r3xo <31, 814,
(outs), (ins u5imm:$RTS, gprc:$A, gprc:$B),
"tabortdc. $RTS, $A, $B", IIC_SprMTSPR, []>,
- isDOT;
+ isRecordForm;
def TABORTDCI : XForm_base_r3xo <31, 878,
(outs), (ins u5imm:$RTS, gprc:$A, u5imm:$B),
"tabortdci. $RTS, $A, $B", IIC_SprMTSPR, []>,
- isDOT;
+ isRecordForm;
def TSR : XForm_htm2 <31, 750,
(outs), (ins u1imm:$L), "tsr. $L", IIC_SprMTSPR, []>,
- isDOT;
+ isRecordForm;
def TRECLAIM : XForm_base_r3xo <31, 942,
(outs), (ins gprc:$A), "treclaim. $A",
IIC_SprMTSPR, []>,
- isDOT {
+ isRecordForm {
let RST = 0;
let B = 0;
}
def TRECHKPT : XForm_base_r3xo <31, 1006,
(outs), (ins), "trechkpt.", IIC_SprMTSPR, []>,
- isDOT {
+ isRecordForm {
let RST = 0;
let A = 0;
let B = 0;
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index 311ca220def4..30906a32b00c 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -108,7 +108,7 @@ ScheduleHazardRecognizer *
PPCInstrInfo::CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI,
const ScheduleDAG *DAG) const {
unsigned Directive =
- static_cast<const PPCSubtarget *>(STI)->getDarwinDirective();
+ static_cast<const PPCSubtarget *>(STI)->getCPUDirective();
if (Directive == PPC::DIR_440 || Directive == PPC::DIR_A2 ||
Directive == PPC::DIR_E500mc || Directive == PPC::DIR_E5500) {
const InstrItineraryData *II =
@@ -125,7 +125,7 @@ ScheduleHazardRecognizer *
PPCInstrInfo::CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
const ScheduleDAG *DAG) const {
unsigned Directive =
- DAG->MF.getSubtarget<PPCSubtarget>().getDarwinDirective();
+ DAG->MF.getSubtarget<PPCSubtarget>().getCPUDirective();
// FIXME: Leaving this as-is until we have POWER9 scheduling info
if (Directive == PPC::DIR_PWR7 || Directive == PPC::DIR_PWR8)
@@ -202,7 +202,7 @@ int PPCInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
// On some cores, there is an additional delay between writing to a condition
// register, and using it from a branch.
- unsigned Directive = Subtarget.getDarwinDirective();
+ unsigned Directive = Subtarget.getCPUDirective();
switch (Directive) {
default: break;
case PPC::DIR_7400:
@@ -371,7 +371,7 @@ MachineInstr *PPCInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
MachineFunction &MF = *MI.getParent()->getParent();
// Normal instructions can be commuted the obvious way.
- if (MI.getOpcode() != PPC::RLWIMI && MI.getOpcode() != PPC::RLWIMIo)
+ if (MI.getOpcode() != PPC::RLWIMI && MI.getOpcode() != PPC::RLWIMI_rec)
return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
// Note that RLWIMI can be commuted as a 32-bit instruction, but not as a
// 64-bit instruction (so we don't handle PPC::RLWIMI8 here), because
@@ -391,7 +391,7 @@ MachineInstr *PPCInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
// Swap op1/op2
assert(((OpIdx1 == 1 && OpIdx2 == 2) || (OpIdx1 == 2 && OpIdx2 == 1)) &&
- "Only the operands 1 and 2 can be swapped in RLSIMI/RLWIMIo.");
+ "Only the operands 1 and 2 can be swapped in RLSIMI/RLWIMI_rec.");
Register Reg0 = MI.getOperand(0).getReg();
Register Reg1 = MI.getOperand(1).getReg();
Register Reg2 = MI.getOperand(2).getReg();
@@ -469,7 +469,7 @@ void PPCInstrInfo::insertNoop(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI) const {
// This function is used for scheduling, and the nop wanted here is the type
// that terminates dispatch groups on the POWER cores.
- unsigned Directive = Subtarget.getDarwinDirective();
+ unsigned Directive = Subtarget.getCPUDirective();
unsigned Opcode;
switch (Directive) {
default: Opcode = PPC::NOP; break;
@@ -903,15 +903,15 @@ static unsigned getCRBitValue(unsigned CRBit) {
void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
- const DebugLoc &DL, unsigned DestReg,
- unsigned SrcReg, bool KillSrc) const {
+ const DebugLoc &DL, MCRegister DestReg,
+ MCRegister SrcReg, bool KillSrc) const {
// We can end up with self copies and similar things as a result of VSX copy
// legalization. Promote them here.
const TargetRegisterInfo *TRI = &getRegisterInfo();
if (PPC::F8RCRegClass.contains(DestReg) &&
PPC::VSRCRegClass.contains(SrcReg)) {
- unsigned SuperReg =
- TRI->getMatchingSuperReg(DestReg, PPC::sub_64, &PPC::VSRCRegClass);
+ MCRegister SuperReg =
+ TRI->getMatchingSuperReg(DestReg, PPC::sub_64, &PPC::VSRCRegClass);
if (VSXSelfCopyCrash && SrcReg == SuperReg)
llvm_unreachable("nop VSX copy");
@@ -919,8 +919,8 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
DestReg = SuperReg;
} else if (PPC::F8RCRegClass.contains(SrcReg) &&
PPC::VSRCRegClass.contains(DestReg)) {
- unsigned SuperReg =
- TRI->getMatchingSuperReg(SrcReg, PPC::sub_64, &PPC::VSRCRegClass);
+ MCRegister SuperReg =
+ TRI->getMatchingSuperReg(SrcReg, PPC::sub_64, &PPC::VSRCRegClass);
if (VSXSelfCopyCrash && DestReg == SuperReg)
llvm_unreachable("nop VSX copy");
@@ -931,7 +931,7 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
// Different class register copy
if (PPC::CRBITRCRegClass.contains(SrcReg) &&
PPC::GPRCRegClass.contains(DestReg)) {
- unsigned CRReg = getCRFromCRBit(SrcReg);
+ MCRegister CRReg = getCRFromCRBit(SrcReg);
BuildMI(MBB, I, DL, get(PPC::MFOCRF), DestReg).addReg(CRReg);
getKillRegState(KillSrc);
// Rotate the CR bit in the CR fields to be the least significant bit and
@@ -1587,22 +1587,6 @@ bool PPCInstrInfo::DefinesPredicate(MachineInstr &MI,
return Found;
}
-bool PPCInstrInfo::isPredicable(const MachineInstr &MI) const {
- unsigned OpC = MI.getOpcode();
- switch (OpC) {
- default:
- return false;
- case PPC::B:
- case PPC::BLR:
- case PPC::BLR8:
- case PPC::BCTR:
- case PPC::BCTR8:
- case PPC::BCTRL:
- case PPC::BCTRL8:
- return true;
- }
-}
-
bool PPCInstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
unsigned &SrcReg2, int &Mask,
int &Value) const {
@@ -1836,8 +1820,8 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
int NewOpC = -1;
int MIOpC = MI->getOpcode();
- if (MIOpC == PPC::ANDIo || MIOpC == PPC::ANDIo8 ||
- MIOpC == PPC::ANDISo || MIOpC == PPC::ANDISo8)
+ if (MIOpC == PPC::ANDI_rec || MIOpC == PPC::ANDI8_rec ||
+ MIOpC == PPC::ANDIS_rec || MIOpC == PPC::ANDIS8_rec)
NewOpC = MIOpC;
else {
NewOpC = PPC::getRecordFormOpcode(MIOpC);
@@ -1943,9 +1927,9 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
Mask = ((1LLU << (32 - MB)) - 1) & ~((1LLU << (31 - ME)) - 1);
// The mask value needs to shift right 16 if we're emitting andis.
Mask >>= MBInLoHWord ? 0 : 16;
- NewOpC = MIOpC == PPC::RLWINM ?
- (MBInLoHWord ? PPC::ANDIo : PPC::ANDISo) :
- (MBInLoHWord ? PPC::ANDIo8 :PPC::ANDISo8);
+ NewOpC = MIOpC == PPC::RLWINM
+ ? (MBInLoHWord ? PPC::ANDI_rec : PPC::ANDIS_rec)
+ : (MBInLoHWord ? PPC::ANDI8_rec : PPC::ANDIS8_rec);
} else if (MRI->use_empty(GPRRes) && (ME == 31) &&
(ME - MB + 1 == SH) && (MB >= 16)) {
// If we are rotating by the exact number of bits as are in the mask
@@ -1953,7 +1937,7 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
// that's just an andis. (as long as the GPR result has no uses).
Mask = ((1LLU << 32) - 1) & ~((1LLU << (32 - SH)) - 1);
Mask >>= 16;
- NewOpC = MIOpC == PPC::RLWINM ? PPC::ANDISo :PPC::ANDISo8;
+ NewOpC = MIOpC == PPC::RLWINM ? PPC::ANDIS_rec : PPC::ANDIS8_rec;
}
// If we've set the mask, we can transform.
if (Mask != ~0LLU) {
@@ -1966,7 +1950,7 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
int64_t MB = MI->getOperand(3).getImm();
if (MB >= 48) {
uint64_t Mask = (1LLU << (63 - MB + 1)) - 1;
- NewOpC = PPC::ANDIo8;
+ NewOpC = PPC::ANDI8_rec;
MI->RemoveOperand(3);
MI->getOperand(2).setImm(Mask);
NumRcRotatesConvertedToRcAnd++;
@@ -2306,7 +2290,7 @@ void PPCInstrInfo::replaceInstrWithLI(MachineInstr &MI,
// Replace the instruction.
if (LII.SetCR) {
- MI.setDesc(get(LII.Is64Bit ? PPC::ANDIo8 : PPC::ANDIo));
+ MI.setDesc(get(LII.Is64Bit ? PPC::ANDI8_rec : PPC::ANDI_rec));
// Set the immediate.
MachineInstrBuilder(*MI.getParent()->getParent(), MI)
.addImm(LII.Imm).addReg(PPC::CR0, RegState::ImplicitDefine);
@@ -2370,15 +2354,13 @@ MachineInstr *PPCInstrInfo::getForwardingDefMI(
ImmInstrInfo III;
unsigned Opc = MI.getOpcode();
bool ConvertibleImmForm =
- Opc == PPC::CMPWI || Opc == PPC::CMPLWI ||
- Opc == PPC::CMPDI || Opc == PPC::CMPLDI ||
- Opc == PPC::ADDI || Opc == PPC::ADDI8 ||
- Opc == PPC::ORI || Opc == PPC::ORI8 ||
- Opc == PPC::XORI || Opc == PPC::XORI8 ||
- Opc == PPC::RLDICL || Opc == PPC::RLDICLo ||
- Opc == PPC::RLDICL_32 || Opc == PPC::RLDICL_32_64 ||
- Opc == PPC::RLWINM || Opc == PPC::RLWINMo ||
- Opc == PPC::RLWINM8 || Opc == PPC::RLWINM8o;
+ Opc == PPC::CMPWI || Opc == PPC::CMPLWI || Opc == PPC::CMPDI ||
+ Opc == PPC::CMPLDI || Opc == PPC::ADDI || Opc == PPC::ADDI8 ||
+ Opc == PPC::ORI || Opc == PPC::ORI8 || Opc == PPC::XORI ||
+ Opc == PPC::XORI8 || Opc == PPC::RLDICL || Opc == PPC::RLDICL_rec ||
+ Opc == PPC::RLDICL_32 || Opc == PPC::RLDICL_32_64 ||
+ Opc == PPC::RLWINM || Opc == PPC::RLWINM_rec || Opc == PPC::RLWINM8 ||
+ Opc == PPC::RLWINM8_rec;
bool IsVFReg = (MI.getNumOperands() && MI.getOperand(0).isReg())
? isVFRegister(MI.getOperand(0).getReg())
: false;
@@ -2527,6 +2509,225 @@ void PPCInstrInfo::fixupIsDeadOrKill(MachineInstr &StartMI, MachineInstr &EndMI,
"RegNo should be killed or dead");
}
+// This opt tries to convert the following imm form to an index form to save an
+// add for stack variables.
+// Return false if no such pattern found.
+//
+// ADDI instr: ToBeChangedReg = ADDI FrameBaseReg, OffsetAddi
+// ADD instr: ToBeDeletedReg = ADD ToBeChangedReg(killed), ScaleReg
+// Imm instr: Reg = op OffsetImm, ToBeDeletedReg(killed)
+//
+// can be converted to:
+//
+// new ADDI instr: ToBeChangedReg = ADDI FrameBaseReg, (OffsetAddi + OffsetImm)
+// Index instr: Reg = opx ScaleReg, ToBeChangedReg(killed)
+//
+// In order to eliminate ADD instr, make sure that:
+// 1: (OffsetAddi + OffsetImm) must be int16 since this offset will be used in
+// new ADDI instr and ADDI can only take int16 Imm.
+// 2: ToBeChangedReg must be killed in ADD instr and there is no other use
+// between ADDI and ADD instr since its original def in ADDI will be changed
+// in new ADDI instr. And also there should be no new def for it between
+// ADD and Imm instr as ToBeChangedReg will be used in Index instr.
+// 3: ToBeDeletedReg must be killed in Imm instr and there is no other use
+// between ADD and Imm instr since ADD instr will be eliminated.
+// 4: ScaleReg must not be redefined between ADD and Imm instr since it will be
+// moved to Index instr.
+bool PPCInstrInfo::foldFrameOffset(MachineInstr &MI) const {
+ MachineFunction *MF = MI.getParent()->getParent();
+ MachineRegisterInfo *MRI = &MF->getRegInfo();
+ bool PostRA = !MRI->isSSA();
+ // Do this opt after PEI which is after RA. The reason is stack slot expansion
+ // in PEI may expose such opportunities since in PEI, stack slot offsets to
+ // frame base(OffsetAddi) are determined.
+ if (!PostRA)
+ return false;
+ unsigned ToBeDeletedReg = 0;
+ int64_t OffsetImm = 0;
+ unsigned XFormOpcode = 0;
+ ImmInstrInfo III;
+
+ // Check if Imm instr meets requirement.
+ if (!isImmInstrEligibleForFolding(MI, ToBeDeletedReg, XFormOpcode, OffsetImm,
+ III))
+ return false;
+
+ bool OtherIntermediateUse = false;
+ MachineInstr *ADDMI = getDefMIPostRA(ToBeDeletedReg, MI, OtherIntermediateUse);
+
+ // Exit if there is other use between ADD and Imm instr or no def found.
+ if (OtherIntermediateUse || !ADDMI)
+ return false;
+
+ // Check if ADD instr meets requirement.
+ if (!isADDInstrEligibleForFolding(*ADDMI))
+ return false;
+
+ unsigned ScaleRegIdx = 0;
+ int64_t OffsetAddi = 0;
+ MachineInstr *ADDIMI = nullptr;
+
+ // Check if there is a valid ToBeChangedReg in ADDMI.
+ // 1: It must be killed.
+ // 2: Its definition must be a valid ADDIMI.
+ // 3: It must satify int16 offset requirement.
+ if (isValidToBeChangedReg(ADDMI, 1, ADDIMI, OffsetAddi, OffsetImm))
+ ScaleRegIdx = 2;
+ else if (isValidToBeChangedReg(ADDMI, 2, ADDIMI, OffsetAddi, OffsetImm))
+ ScaleRegIdx = 1;
+ else
+ return false;
+
+ assert(ADDIMI && "There should be ADDIMI for valid ToBeChangedReg.");
+ unsigned ToBeChangedReg = ADDIMI->getOperand(0).getReg();
+ unsigned ScaleReg = ADDMI->getOperand(ScaleRegIdx).getReg();
+ auto NewDefFor = [&](unsigned Reg, MachineBasicBlock::iterator Start,
+ MachineBasicBlock::iterator End) {
+ for (auto It = ++Start; It != End; It++)
+ if (It->modifiesRegister(Reg, &getRegisterInfo()))
+ return true;
+ return false;
+ };
+ // Make sure no other def for ToBeChangedReg and ScaleReg between ADD Instr
+ // and Imm Instr.
+ if (NewDefFor(ToBeChangedReg, *ADDMI, MI) || NewDefFor(ScaleReg, *ADDMI, MI))
+ return false;
+
+ // Now start to do the transformation.
+ LLVM_DEBUG(dbgs() << "Replace instruction: "
+ << "\n");
+ LLVM_DEBUG(ADDIMI->dump());
+ LLVM_DEBUG(ADDMI->dump());
+ LLVM_DEBUG(MI.dump());
+ LLVM_DEBUG(dbgs() << "with: "
+ << "\n");
+
+ // Update ADDI instr.
+ ADDIMI->getOperand(2).setImm(OffsetAddi + OffsetImm);
+
+ // Update Imm instr.
+ MI.setDesc(get(XFormOpcode));
+ MI.getOperand(III.ImmOpNo)
+ .ChangeToRegister(ScaleReg, false, false,
+ ADDMI->getOperand(ScaleRegIdx).isKill());
+
+ MI.getOperand(III.OpNoForForwarding)
+ .ChangeToRegister(ToBeChangedReg, false, false, true);
+
+ // Eliminate ADD instr.
+ ADDMI->eraseFromParent();
+
+ LLVM_DEBUG(ADDIMI->dump());
+ LLVM_DEBUG(MI.dump());
+
+ return true;
+}
+
+bool PPCInstrInfo::isADDIInstrEligibleForFolding(MachineInstr &ADDIMI,
+ int64_t &Imm) const {
+ unsigned Opc = ADDIMI.getOpcode();
+
+ // Exit if the instruction is not ADDI.
+ if (Opc != PPC::ADDI && Opc != PPC::ADDI8)
+ return false;
+
+ Imm = ADDIMI.getOperand(2).getImm();
+
+ return true;
+}
+
+bool PPCInstrInfo::isADDInstrEligibleForFolding(MachineInstr &ADDMI) const {
+ unsigned Opc = ADDMI.getOpcode();
+
+ // Exit if the instruction is not ADD.
+ return Opc == PPC::ADD4 || Opc == PPC::ADD8;
+}
+
+bool PPCInstrInfo::isImmInstrEligibleForFolding(MachineInstr &MI,
+ unsigned &ToBeDeletedReg,
+ unsigned &XFormOpcode,
+ int64_t &OffsetImm,
+ ImmInstrInfo &III) const {
+ // Only handle load/store.
+ if (!MI.mayLoadOrStore())
+ return false;
+
+ unsigned Opc = MI.getOpcode();
+
+ XFormOpcode = RI.getMappedIdxOpcForImmOpc(Opc);
+
+ // Exit if instruction has no index form.
+ if (XFormOpcode == PPC::INSTRUCTION_LIST_END)
+ return false;
+
+ // TODO: sync the logic between instrHasImmForm() and ImmToIdxMap.
+ if (!instrHasImmForm(XFormOpcode, isVFRegister(MI.getOperand(0).getReg()),
+ III, true))
+ return false;
+
+ if (!III.IsSummingOperands)
+ return false;
+
+ MachineOperand ImmOperand = MI.getOperand(III.ImmOpNo);
+ MachineOperand RegOperand = MI.getOperand(III.OpNoForForwarding);
+ // Only support imm operands, not relocation slots or others.
+ if (!ImmOperand.isImm())
+ return false;
+
+ assert(RegOperand.isReg() && "Instruction format is not right");
+
+ // There are other use for ToBeDeletedReg after Imm instr, can not delete it.
+ if (!RegOperand.isKill())
+ return false;
+
+ ToBeDeletedReg = RegOperand.getReg();
+ OffsetImm = ImmOperand.getImm();
+
+ return true;
+}
+
+bool PPCInstrInfo::isValidToBeChangedReg(MachineInstr *ADDMI, unsigned Index,
+ MachineInstr *&ADDIMI,
+ int64_t &OffsetAddi,
+ int64_t OffsetImm) const {
+ assert((Index == 1 || Index == 2) && "Invalid operand index for add.");
+ MachineOperand &MO = ADDMI->getOperand(Index);
+
+ if (!MO.isKill())
+ return false;
+
+ bool OtherIntermediateUse = false;
+
+ ADDIMI = getDefMIPostRA(MO.getReg(), *ADDMI, OtherIntermediateUse);
+ // Currently handle only one "add + Imminstr" pair case, exit if other
+ // intermediate use for ToBeChangedReg found.
+ // TODO: handle the cases where there are other "add + Imminstr" pairs
+ // with same offset in Imminstr which is like:
+ //
+ // ADDI instr: ToBeChangedReg = ADDI FrameBaseReg, OffsetAddi
+ // ADD instr1: ToBeDeletedReg1 = ADD ToBeChangedReg, ScaleReg1
+ // Imm instr1: Reg1 = op1 OffsetImm, ToBeDeletedReg1(killed)
+ // ADD instr2: ToBeDeletedReg2 = ADD ToBeChangedReg(killed), ScaleReg2
+ // Imm instr2: Reg2 = op2 OffsetImm, ToBeDeletedReg2(killed)
+ //
+ // can be converted to:
+ //
+ // new ADDI instr: ToBeChangedReg = ADDI FrameBaseReg,
+ // (OffsetAddi + OffsetImm)
+ // Index instr1: Reg1 = opx1 ScaleReg1, ToBeChangedReg
+ // Index instr2: Reg2 = opx2 ScaleReg2, ToBeChangedReg(killed)
+
+ if (OtherIntermediateUse || !ADDIMI)
+ return false;
+ // Check if ADDI instr meets requirement.
+ if (!isADDIInstrEligibleForFolding(*ADDIMI, OffsetAddi))
+ return false;
+
+ if (isInt<16>(OffsetAddi + OffsetImm))
+ return true;
+ return false;
+}
+
// If this instruction has an immediate form and one of its operands is a
// result of a load-immediate or an add-immediate, convert it to
// the immediate form if the constant is in range.
@@ -2660,34 +2861,34 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI,
return false;
}
case PPC::RLDICL:
- case PPC::RLDICLo:
+ case PPC::RLDICL_rec:
case PPC::RLDICL_32:
case PPC::RLDICL_32_64: {
// Use APInt's rotate function.
int64_t SH = MI.getOperand(2).getImm();
int64_t MB = MI.getOperand(3).getImm();
- APInt InVal((Opc == PPC::RLDICL || Opc == PPC::RLDICLo) ?
- 64 : 32, SExtImm, true);
+ APInt InVal((Opc == PPC::RLDICL || Opc == PPC::RLDICL_rec) ? 64 : 32,
+ SExtImm, true);
InVal = InVal.rotl(SH);
uint64_t Mask = (1LLU << (63 - MB + 1)) - 1;
InVal &= Mask;
// Can't replace negative values with an LI as that will sign-extend
// and not clear the left bits. If we're setting the CR bit, we will use
- // ANDIo which won't sign extend, so that's safe.
+ // ANDI_rec which won't sign extend, so that's safe.
if (isUInt<15>(InVal.getSExtValue()) ||
- (Opc == PPC::RLDICLo && isUInt<16>(InVal.getSExtValue()))) {
+ (Opc == PPC::RLDICL_rec && isUInt<16>(InVal.getSExtValue()))) {
ReplaceWithLI = true;
Is64BitLI = Opc != PPC::RLDICL_32;
NewImm = InVal.getSExtValue();
- SetCR = Opc == PPC::RLDICLo;
+ SetCR = Opc == PPC::RLDICL_rec;
break;
}
return false;
}
case PPC::RLWINM:
case PPC::RLWINM8:
- case PPC::RLWINMo:
- case PPC::RLWINM8o: {
+ case PPC::RLWINM_rec:
+ case PPC::RLWINM8_rec: {
int64_t SH = MI.getOperand(2).getImm();
int64_t MB = MI.getOperand(3).getImm();
int64_t ME = MI.getOperand(4).getImm();
@@ -2698,15 +2899,15 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI,
InVal &= Mask;
// Can't replace negative values with an LI as that will sign-extend
// and not clear the left bits. If we're setting the CR bit, we will use
- // ANDIo which won't sign extend, so that's safe.
+ // ANDI_rec which won't sign extend, so that's safe.
bool ValueFits = isUInt<15>(InVal.getSExtValue());
- ValueFits |= ((Opc == PPC::RLWINMo || Opc == PPC::RLWINM8o) &&
+ ValueFits |= ((Opc == PPC::RLWINM_rec || Opc == PPC::RLWINM8_rec) &&
isUInt<16>(InVal.getSExtValue()));
if (ValueFits) {
ReplaceWithLI = true;
- Is64BitLI = Opc == PPC::RLWINM8 || Opc == PPC::RLWINM8o;
+ Is64BitLI = Opc == PPC::RLWINM8 || Opc == PPC::RLWINM8_rec;
NewImm = InVal.getSExtValue();
- SetCR = Opc == PPC::RLWINMo || Opc == PPC::RLWINM8o;
+ SetCR = Opc == PPC::RLWINM_rec || Opc == PPC::RLWINM8_rec;
break;
}
return false;
@@ -2768,7 +2969,7 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI,
LII.Is64Bit = Is64BitLI;
LII.SetCR = SetCR;
// If we're setting the CR, the original load-immediate must be kept (as an
- // operand to ANDIo/ANDI8o).
+ // operand to ANDI_rec/ANDI8_rec).
if (KilledDef && SetCR)
*KilledDef = nullptr;
replaceInstrWithLI(MI, LII);
@@ -2819,13 +3020,13 @@ bool PPCInstrInfo::instrHasImmForm(unsigned Opc, bool IsVFReg,
III.IsSummingOperands = true;
III.ImmOpcode = Opc == PPC::ADDC ? PPC::ADDIC : PPC::ADDIC8;
break;
- case PPC::ADDCo:
+ case PPC::ADDC_rec:
III.SignedImm = true;
III.ZeroIsSpecialOrig = 0;
III.ZeroIsSpecialNew = 0;
III.IsCommutative = true;
III.IsSummingOperands = true;
- III.ImmOpcode = PPC::ADDICo;
+ III.ImmOpcode = PPC::ADDIC_rec;
break;
case PPC::SUBFC:
case PPC::SUBFC8:
@@ -2851,8 +3052,8 @@ bool PPCInstrInfo::instrHasImmForm(unsigned Opc, bool IsVFReg,
III.IsCommutative = false;
III.ImmOpcode = Opc == PPC::CMPLW ? PPC::CMPLWI : PPC::CMPLDI;
break;
- case PPC::ANDo:
- case PPC::AND8o:
+ case PPC::AND_rec:
+ case PPC::AND8_rec:
case PPC::OR:
case PPC::OR8:
case PPC::XOR:
@@ -2863,8 +3064,12 @@ bool PPCInstrInfo::instrHasImmForm(unsigned Opc, bool IsVFReg,
III.IsCommutative = true;
switch(Opc) {
default: llvm_unreachable("Unknown opcode");
- case PPC::ANDo: III.ImmOpcode = PPC::ANDIo; break;
- case PPC::AND8o: III.ImmOpcode = PPC::ANDIo8; break;
+ case PPC::AND_rec:
+ III.ImmOpcode = PPC::ANDI_rec;
+ break;
+ case PPC::AND8_rec:
+ III.ImmOpcode = PPC::ANDI8_rec;
+ break;
case PPC::OR: III.ImmOpcode = PPC::ORI; break;
case PPC::OR8: III.ImmOpcode = PPC::ORI8; break;
case PPC::XOR: III.ImmOpcode = PPC::XORI; break;
@@ -2873,18 +3078,18 @@ bool PPCInstrInfo::instrHasImmForm(unsigned Opc, bool IsVFReg,
break;
case PPC::RLWNM:
case PPC::RLWNM8:
- case PPC::RLWNMo:
- case PPC::RLWNM8o:
+ case PPC::RLWNM_rec:
+ case PPC::RLWNM8_rec:
case PPC::SLW:
case PPC::SLW8:
- case PPC::SLWo:
- case PPC::SLW8o:
+ case PPC::SLW_rec:
+ case PPC::SLW8_rec:
case PPC::SRW:
case PPC::SRW8:
- case PPC::SRWo:
- case PPC::SRW8o:
+ case PPC::SRW_rec:
+ case PPC::SRW8_rec:
case PPC::SRAW:
- case PPC::SRAWo:
+ case PPC::SRAW_rec:
III.SignedImm = false;
III.ZeroIsSpecialOrig = 0;
III.ZeroIsSpecialNew = 0;
@@ -2894,8 +3099,8 @@ bool PPCInstrInfo::instrHasImmForm(unsigned Opc, bool IsVFReg,
// This does not apply to shift right algebraic because a value
// out of range will produce a -1/0.
III.ImmWidth = 16;
- if (Opc == PPC::RLWNM || Opc == PPC::RLWNM8 ||
- Opc == PPC::RLWNMo || Opc == PPC::RLWNM8o)
+ if (Opc == PPC::RLWNM || Opc == PPC::RLWNM8 || Opc == PPC::RLWNM_rec ||
+ Opc == PPC::RLWNM8_rec)
III.TruncateImmTo = 5;
else
III.TruncateImmTo = 6;
@@ -2903,38 +3108,50 @@ bool PPCInstrInfo::instrHasImmForm(unsigned Opc, bool IsVFReg,
default: llvm_unreachable("Unknown opcode");
case PPC::RLWNM: III.ImmOpcode = PPC::RLWINM; break;
case PPC::RLWNM8: III.ImmOpcode = PPC::RLWINM8; break;
- case PPC::RLWNMo: III.ImmOpcode = PPC::RLWINMo; break;
- case PPC::RLWNM8o: III.ImmOpcode = PPC::RLWINM8o; break;
+ case PPC::RLWNM_rec:
+ III.ImmOpcode = PPC::RLWINM_rec;
+ break;
+ case PPC::RLWNM8_rec:
+ III.ImmOpcode = PPC::RLWINM8_rec;
+ break;
case PPC::SLW: III.ImmOpcode = PPC::RLWINM; break;
case PPC::SLW8: III.ImmOpcode = PPC::RLWINM8; break;
- case PPC::SLWo: III.ImmOpcode = PPC::RLWINMo; break;
- case PPC::SLW8o: III.ImmOpcode = PPC::RLWINM8o; break;
+ case PPC::SLW_rec:
+ III.ImmOpcode = PPC::RLWINM_rec;
+ break;
+ case PPC::SLW8_rec:
+ III.ImmOpcode = PPC::RLWINM8_rec;
+ break;
case PPC::SRW: III.ImmOpcode = PPC::RLWINM; break;
case PPC::SRW8: III.ImmOpcode = PPC::RLWINM8; break;
- case PPC::SRWo: III.ImmOpcode = PPC::RLWINMo; break;
- case PPC::SRW8o: III.ImmOpcode = PPC::RLWINM8o; break;
+ case PPC::SRW_rec:
+ III.ImmOpcode = PPC::RLWINM_rec;
+ break;
+ case PPC::SRW8_rec:
+ III.ImmOpcode = PPC::RLWINM8_rec;
+ break;
case PPC::SRAW:
III.ImmWidth = 5;
III.TruncateImmTo = 0;
III.ImmOpcode = PPC::SRAWI;
break;
- case PPC::SRAWo:
+ case PPC::SRAW_rec:
III.ImmWidth = 5;
III.TruncateImmTo = 0;
- III.ImmOpcode = PPC::SRAWIo;
+ III.ImmOpcode = PPC::SRAWI_rec;
break;
}
break;
case PPC::RLDCL:
- case PPC::RLDCLo:
+ case PPC::RLDCL_rec:
case PPC::RLDCR:
- case PPC::RLDCRo:
+ case PPC::RLDCR_rec:
case PPC::SLD:
- case PPC::SLDo:
+ case PPC::SLD_rec:
case PPC::SRD:
- case PPC::SRDo:
+ case PPC::SRD_rec:
case PPC::SRAD:
- case PPC::SRADo:
+ case PPC::SRAD_rec:
III.SignedImm = false;
III.ZeroIsSpecialOrig = 0;
III.ZeroIsSpecialNew = 0;
@@ -2944,30 +3161,38 @@ bool PPCInstrInfo::instrHasImmForm(unsigned Opc, bool IsVFReg,
// This does not apply to shift right algebraic because a value
// out of range will produce a -1/0.
III.ImmWidth = 16;
- if (Opc == PPC::RLDCL || Opc == PPC::RLDCLo ||
- Opc == PPC::RLDCR || Opc == PPC::RLDCRo)
+ if (Opc == PPC::RLDCL || Opc == PPC::RLDCL_rec || Opc == PPC::RLDCR ||
+ Opc == PPC::RLDCR_rec)
III.TruncateImmTo = 6;
else
III.TruncateImmTo = 7;
switch(Opc) {
default: llvm_unreachable("Unknown opcode");
case PPC::RLDCL: III.ImmOpcode = PPC::RLDICL; break;
- case PPC::RLDCLo: III.ImmOpcode = PPC::RLDICLo; break;
+ case PPC::RLDCL_rec:
+ III.ImmOpcode = PPC::RLDICL_rec;
+ break;
case PPC::RLDCR: III.ImmOpcode = PPC::RLDICR; break;
- case PPC::RLDCRo: III.ImmOpcode = PPC::RLDICRo; break;
+ case PPC::RLDCR_rec:
+ III.ImmOpcode = PPC::RLDICR_rec;
+ break;
case PPC::SLD: III.ImmOpcode = PPC::RLDICR; break;
- case PPC::SLDo: III.ImmOpcode = PPC::RLDICRo; break;
+ case PPC::SLD_rec:
+ III.ImmOpcode = PPC::RLDICR_rec;
+ break;
case PPC::SRD: III.ImmOpcode = PPC::RLDICL; break;
- case PPC::SRDo: III.ImmOpcode = PPC::RLDICLo; break;
+ case PPC::SRD_rec:
+ III.ImmOpcode = PPC::RLDICL_rec;
+ break;
case PPC::SRAD:
III.ImmWidth = 6;
III.TruncateImmTo = 0;
III.ImmOpcode = PPC::SRADI;
break;
- case PPC::SRADo:
+ case PPC::SRAD_rec:
III.ImmWidth = 6;
III.TruncateImmTo = 0;
- III.ImmOpcode = PPC::SRADIo;
+ III.ImmOpcode = PPC::SRADI_rec;
break;
}
break;
@@ -3538,16 +3763,16 @@ bool PPCInstrInfo::transformToImmFormFedByLI(MachineInstr &MI,
ForwardKilledOperandReg = MI.getOperand(ConstantOpNo).getReg();
unsigned Opc = MI.getOpcode();
- bool SpecialShift32 = Opc == PPC::SLW || Opc == PPC::SLWo ||
- Opc == PPC::SRW || Opc == PPC::SRWo ||
- Opc == PPC::SLW8 || Opc == PPC::SLW8o ||
- Opc == PPC::SRW8 || Opc == PPC::SRW8o;
- bool SpecialShift64 =
- Opc == PPC::SLD || Opc == PPC::SLDo || Opc == PPC::SRD || Opc == PPC::SRDo;
- bool SetCR = Opc == PPC::SLWo || Opc == PPC::SRWo ||
- Opc == PPC::SLDo || Opc == PPC::SRDo;
- bool RightShift =
- Opc == PPC::SRW || Opc == PPC::SRWo || Opc == PPC::SRD || Opc == PPC::SRDo;
+ bool SpecialShift32 = Opc == PPC::SLW || Opc == PPC::SLW_rec ||
+ Opc == PPC::SRW || Opc == PPC::SRW_rec ||
+ Opc == PPC::SLW8 || Opc == PPC::SLW8_rec ||
+ Opc == PPC::SRW8 || Opc == PPC::SRW8_rec;
+ bool SpecialShift64 = Opc == PPC::SLD || Opc == PPC::SLD_rec ||
+ Opc == PPC::SRD || Opc == PPC::SRD_rec;
+ bool SetCR = Opc == PPC::SLW_rec || Opc == PPC::SRW_rec ||
+ Opc == PPC::SLD_rec || Opc == PPC::SRD_rec;
+ bool RightShift = Opc == PPC::SRW || Opc == PPC::SRW_rec || Opc == PPC::SRD ||
+ Opc == PPC::SRD_rec;
MI.setDesc(get(III.ImmOpcode));
if (ConstantOpNo == III.OpNoForForwarding) {
@@ -3651,27 +3876,21 @@ int PPCInstrInfo::getRecordFormOpcode(unsigned Opcode) {
// i.e. 0 to 31-th bits are same as 32-th bit.
static bool isSignExtendingOp(const MachineInstr &MI) {
int Opcode = MI.getOpcode();
- if (Opcode == PPC::LI || Opcode == PPC::LI8 ||
- Opcode == PPC::LIS || Opcode == PPC::LIS8 ||
- Opcode == PPC::SRAW || Opcode == PPC::SRAWo ||
- Opcode == PPC::SRAWI || Opcode == PPC::SRAWIo ||
- Opcode == PPC::LWA || Opcode == PPC::LWAX ||
- Opcode == PPC::LWA_32 || Opcode == PPC::LWAX_32 ||
- Opcode == PPC::LHA || Opcode == PPC::LHAX ||
- Opcode == PPC::LHA8 || Opcode == PPC::LHAX8 ||
- Opcode == PPC::LBZ || Opcode == PPC::LBZX ||
- Opcode == PPC::LBZ8 || Opcode == PPC::LBZX8 ||
- Opcode == PPC::LBZU || Opcode == PPC::LBZUX ||
- Opcode == PPC::LBZU8 || Opcode == PPC::LBZUX8 ||
- Opcode == PPC::LHZ || Opcode == PPC::LHZX ||
- Opcode == PPC::LHZ8 || Opcode == PPC::LHZX8 ||
- Opcode == PPC::LHZU || Opcode == PPC::LHZUX ||
- Opcode == PPC::LHZU8 || Opcode == PPC::LHZUX8 ||
- Opcode == PPC::EXTSB || Opcode == PPC::EXTSBo ||
- Opcode == PPC::EXTSH || Opcode == PPC::EXTSHo ||
- Opcode == PPC::EXTSB8 || Opcode == PPC::EXTSH8 ||
- Opcode == PPC::EXTSW || Opcode == PPC::EXTSWo ||
- Opcode == PPC::SETB || Opcode == PPC::SETB8 ||
+ if (Opcode == PPC::LI || Opcode == PPC::LI8 || Opcode == PPC::LIS ||
+ Opcode == PPC::LIS8 || Opcode == PPC::SRAW || Opcode == PPC::SRAW_rec ||
+ Opcode == PPC::SRAWI || Opcode == PPC::SRAWI_rec || Opcode == PPC::LWA ||
+ Opcode == PPC::LWAX || Opcode == PPC::LWA_32 || Opcode == PPC::LWAX_32 ||
+ Opcode == PPC::LHA || Opcode == PPC::LHAX || Opcode == PPC::LHA8 ||
+ Opcode == PPC::LHAX8 || Opcode == PPC::LBZ || Opcode == PPC::LBZX ||
+ Opcode == PPC::LBZ8 || Opcode == PPC::LBZX8 || Opcode == PPC::LBZU ||
+ Opcode == PPC::LBZUX || Opcode == PPC::LBZU8 || Opcode == PPC::LBZUX8 ||
+ Opcode == PPC::LHZ || Opcode == PPC::LHZX || Opcode == PPC::LHZ8 ||
+ Opcode == PPC::LHZX8 || Opcode == PPC::LHZU || Opcode == PPC::LHZUX ||
+ Opcode == PPC::LHZU8 || Opcode == PPC::LHZUX8 || Opcode == PPC::EXTSB ||
+ Opcode == PPC::EXTSB_rec || Opcode == PPC::EXTSH ||
+ Opcode == PPC::EXTSH_rec || Opcode == PPC::EXTSB8 ||
+ Opcode == PPC::EXTSH8 || Opcode == PPC::EXTSW ||
+ Opcode == PPC::EXTSW_rec || Opcode == PPC::SETB || Opcode == PPC::SETB8 ||
Opcode == PPC::EXTSH8_32_64 || Opcode == PPC::EXTSW_32_64 ||
Opcode == PPC::EXTSB8_32_64)
return true;
@@ -3679,8 +3898,8 @@ static bool isSignExtendingOp(const MachineInstr &MI) {
if (Opcode == PPC::RLDICL && MI.getOperand(3).getImm() >= 33)
return true;
- if ((Opcode == PPC::RLWINM || Opcode == PPC::RLWINMo ||
- Opcode == PPC::RLWNM || Opcode == PPC::RLWNMo) &&
+ if ((Opcode == PPC::RLWINM || Opcode == PPC::RLWINM_rec ||
+ Opcode == PPC::RLWNM || Opcode == PPC::RLWNM_rec) &&
MI.getOperand(3).getImm() > 0 &&
MI.getOperand(3).getImm() <= MI.getOperand(4).getImm())
return true;
@@ -3703,52 +3922,46 @@ static bool isZeroExtendingOp(const MachineInstr &MI) {
// We have some variations of rotate-and-mask instructions
// that clear higher 32-bits.
- if ((Opcode == PPC::RLDICL || Opcode == PPC::RLDICLo ||
- Opcode == PPC::RLDCL || Opcode == PPC::RLDCLo ||
+ if ((Opcode == PPC::RLDICL || Opcode == PPC::RLDICL_rec ||
+ Opcode == PPC::RLDCL || Opcode == PPC::RLDCL_rec ||
Opcode == PPC::RLDICL_32_64) &&
MI.getOperand(3).getImm() >= 32)
return true;
- if ((Opcode == PPC::RLDIC || Opcode == PPC::RLDICo) &&
+ if ((Opcode == PPC::RLDIC || Opcode == PPC::RLDIC_rec) &&
MI.getOperand(3).getImm() >= 32 &&
MI.getOperand(3).getImm() <= 63 - MI.getOperand(2).getImm())
return true;
- if ((Opcode == PPC::RLWINM || Opcode == PPC::RLWINMo ||
- Opcode == PPC::RLWNM || Opcode == PPC::RLWNMo ||
+ if ((Opcode == PPC::RLWINM || Opcode == PPC::RLWINM_rec ||
+ Opcode == PPC::RLWNM || Opcode == PPC::RLWNM_rec ||
Opcode == PPC::RLWINM8 || Opcode == PPC::RLWNM8) &&
MI.getOperand(3).getImm() <= MI.getOperand(4).getImm())
return true;
// There are other instructions that clear higher 32-bits.
- if (Opcode == PPC::CNTLZW || Opcode == PPC::CNTLZWo ||
- Opcode == PPC::CNTTZW || Opcode == PPC::CNTTZWo ||
+ if (Opcode == PPC::CNTLZW || Opcode == PPC::CNTLZW_rec ||
+ Opcode == PPC::CNTTZW || Opcode == PPC::CNTTZW_rec ||
Opcode == PPC::CNTLZW8 || Opcode == PPC::CNTTZW8 ||
- Opcode == PPC::CNTLZD || Opcode == PPC::CNTLZDo ||
- Opcode == PPC::CNTTZD || Opcode == PPC::CNTTZDo ||
- Opcode == PPC::POPCNTD || Opcode == PPC::POPCNTW ||
- Opcode == PPC::SLW || Opcode == PPC::SLWo ||
- Opcode == PPC::SRW || Opcode == PPC::SRWo ||
- Opcode == PPC::SLW8 || Opcode == PPC::SRW8 ||
- Opcode == PPC::SLWI || Opcode == PPC::SLWIo ||
- Opcode == PPC::SRWI || Opcode == PPC::SRWIo ||
- Opcode == PPC::LWZ || Opcode == PPC::LWZX ||
- Opcode == PPC::LWZU || Opcode == PPC::LWZUX ||
- Opcode == PPC::LWBRX || Opcode == PPC::LHBRX ||
- Opcode == PPC::LHZ || Opcode == PPC::LHZX ||
- Opcode == PPC::LHZU || Opcode == PPC::LHZUX ||
- Opcode == PPC::LBZ || Opcode == PPC::LBZX ||
- Opcode == PPC::LBZU || Opcode == PPC::LBZUX ||
- Opcode == PPC::LWZ8 || Opcode == PPC::LWZX8 ||
- Opcode == PPC::LWZU8 || Opcode == PPC::LWZUX8 ||
- Opcode == PPC::LWBRX8 || Opcode == PPC::LHBRX8 ||
- Opcode == PPC::LHZ8 || Opcode == PPC::LHZX8 ||
- Opcode == PPC::LHZU8 || Opcode == PPC::LHZUX8 ||
- Opcode == PPC::LBZ8 || Opcode == PPC::LBZX8 ||
- Opcode == PPC::LBZU8 || Opcode == PPC::LBZUX8 ||
- Opcode == PPC::ANDIo || Opcode == PPC::ANDISo ||
- Opcode == PPC::ROTRWI || Opcode == PPC::ROTRWIo ||
- Opcode == PPC::EXTLWI || Opcode == PPC::EXTLWIo ||
+ Opcode == PPC::CNTLZD || Opcode == PPC::CNTLZD_rec ||
+ Opcode == PPC::CNTTZD || Opcode == PPC::CNTTZD_rec ||
+ Opcode == PPC::POPCNTD || Opcode == PPC::POPCNTW || Opcode == PPC::SLW ||
+ Opcode == PPC::SLW_rec || Opcode == PPC::SRW || Opcode == PPC::SRW_rec ||
+ Opcode == PPC::SLW8 || Opcode == PPC::SRW8 || Opcode == PPC::SLWI ||
+ Opcode == PPC::SLWI_rec || Opcode == PPC::SRWI ||
+ Opcode == PPC::SRWI_rec || Opcode == PPC::LWZ || Opcode == PPC::LWZX ||
+ Opcode == PPC::LWZU || Opcode == PPC::LWZUX || Opcode == PPC::LWBRX ||
+ Opcode == PPC::LHBRX || Opcode == PPC::LHZ || Opcode == PPC::LHZX ||
+ Opcode == PPC::LHZU || Opcode == PPC::LHZUX || Opcode == PPC::LBZ ||
+ Opcode == PPC::LBZX || Opcode == PPC::LBZU || Opcode == PPC::LBZUX ||
+ Opcode == PPC::LWZ8 || Opcode == PPC::LWZX8 || Opcode == PPC::LWZU8 ||
+ Opcode == PPC::LWZUX8 || Opcode == PPC::LWBRX8 || Opcode == PPC::LHBRX8 ||
+ Opcode == PPC::LHZ8 || Opcode == PPC::LHZX8 || Opcode == PPC::LHZU8 ||
+ Opcode == PPC::LHZUX8 || Opcode == PPC::LBZ8 || Opcode == PPC::LBZX8 ||
+ Opcode == PPC::LBZU8 || Opcode == PPC::LBZUX8 ||
+ Opcode == PPC::ANDI_rec || Opcode == PPC::ANDIS_rec ||
+ Opcode == PPC::ROTRWI || Opcode == PPC::ROTRWI_rec ||
+ Opcode == PPC::EXTLWI || Opcode == PPC::EXTLWI_rec ||
Opcode == PPC::MFVSRWZ)
return true;
@@ -3842,14 +4055,14 @@ PPCInstrInfo::isSignOrZeroExtended(const MachineInstr &MI, bool SignExt,
return false;
}
- case PPC::ANDIo:
- case PPC::ANDISo:
+ case PPC::ANDI_rec:
+ case PPC::ANDIS_rec:
case PPC::ORI:
case PPC::ORIS:
case PPC::XORI:
case PPC::XORIS:
- case PPC::ANDIo8:
- case PPC::ANDISo8:
+ case PPC::ANDI8_rec:
+ case PPC::ANDIS8_rec:
case PPC::ORI8:
case PPC::ORIS8:
case PPC::XORI8:
@@ -4044,12 +4257,10 @@ MachineInstr *PPCInstrInfo::findLoopInstr(
// Return true if get the base operand, byte offset of an instruction and the
// memory width. Width is the size of memory that is being loaded/stored.
bool PPCInstrInfo::getMemOperandWithOffsetWidth(
- const MachineInstr &LdSt,
- const MachineOperand *&BaseReg,
- int64_t &Offset,
- unsigned &Width,
- const TargetRegisterInfo *TRI) const {
- assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
+ const MachineInstr &LdSt, const MachineOperand *&BaseReg, int64_t &Offset,
+ unsigned &Width, const TargetRegisterInfo *TRI) const {
+ if (!LdSt.mayLoadOrStore())
+ return false;
// Handle only loads/stores with base register followed by immediate offset.
if (LdSt.getNumExplicitOperands() != 3)
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.h
index 19ab30cb0908..2fe8df0e1d68 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.h
@@ -280,7 +280,7 @@ public:
unsigned FalseReg) const override;
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
- const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
+ const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg,
bool KillSrc) const override;
void storeRegToStackSlot(MachineBasicBlock &MBB,
@@ -346,8 +346,6 @@ public:
bool DefinesPredicate(MachineInstr &MI,
std::vector<MachineOperand> &Pred) const override;
- bool isPredicable(const MachineInstr &MI) const override;
-
// Comparison optimization.
bool analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
@@ -422,6 +420,16 @@ public:
bool convertToImmediateForm(MachineInstr &MI,
MachineInstr **KilledDef = nullptr) const;
+ bool foldFrameOffset(MachineInstr &MI) const;
+ bool isADDIInstrEligibleForFolding(MachineInstr &ADDIMI, int64_t &Imm) const;
+ bool isADDInstrEligibleForFolding(MachineInstr &ADDMI) const;
+ bool isImmInstrEligibleForFolding(MachineInstr &MI, unsigned &BaseReg,
+ unsigned &XFormOpcode,
+ int64_t &OffsetOfImmInstr,
+ ImmInstrInfo &III) const;
+ bool isValidToBeChangedReg(MachineInstr *ADDMI, unsigned Index,
+ MachineInstr *&ADDIMI, int64_t &OffsetAddi,
+ int64_t OffsetImm) const;
/// Fixup killed/dead flag for register \p RegNo between instructions [\p
/// StartMI, \p EndMI]. Some PostRA transformations may violate register
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index 6a3f43d69133..b38ca3af63f5 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -61,10 +61,6 @@ def SDT_PPCVecInsert : SDTypeProfile<1, 3, [ SDTCisVec<0>,
SDTCisVec<1>, SDTCisVec<2>, SDTCisInt<3>
]>;
-def SDT_PPCVecReverse: SDTypeProfile<1, 1, [ SDTCisVec<0>,
- SDTCisVec<1>
-]>;
-
def SDT_PPCxxpermdi: SDTypeProfile<1, 3, [ SDTCisVec<0>,
SDTCisVec<1>, SDTCisVec<2>, SDTCisInt<3>
]>;
@@ -117,6 +113,10 @@ def SDT_PPCextswsli : SDTypeProfile<1, 2, [ // extswsli
SDTCisInt<0>, SDTCisInt<1>, SDTCisOpSmallerThanOp<1, 0>, SDTCisInt<2>
]>;
+def SDT_PPCFPMinMax : SDTypeProfile<1, 2, [
+ SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisFP<0>
+]>;
+
//===----------------------------------------------------------------------===//
// PowerPC specific DAG Nodes.
//
@@ -165,7 +165,8 @@ def PPCfsel : SDNode<"PPCISD::FSEL",
// Type constraint for fsel.
SDTypeProfile<1, 3, [SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>,
SDTCisFP<0>, SDTCisVT<1, f64>]>, []>;
-
+def PPCxsmaxc : SDNode<"PPCISD::XSMAXCDP", SDT_PPCFPMinMax, []>;
+def PPCxsminc : SDNode<"PPCISD::XSMINCDP", SDT_PPCFPMinMax, []>;
def PPChi : SDNode<"PPCISD::Hi", SDTIntBinOp, []>;
def PPClo : SDNode<"PPCISD::Lo", SDTIntBinOp, []>;
def PPCtoc_entry: SDNode<"PPCISD::TOC_ENTRY", SDTIntBinOp,
@@ -199,7 +200,6 @@ def PPCaddiDtprelL : SDNode<"PPCISD::ADDI_DTPREL_L", SDTIntBinOp>;
def PPCvperm : SDNode<"PPCISD::VPERM", SDT_PPCvperm, []>;
def PPCxxsplt : SDNode<"PPCISD::XXSPLT", SDT_PPCVecSplat, []>;
def PPCvecinsert : SDNode<"PPCISD::VECINSERT", SDT_PPCVecInsert, []>;
-def PPCxxreverse : SDNode<"PPCISD::XXREVERSE", SDT_PPCVecReverse, []>;
def PPCxxpermdi : SDNode<"PPCISD::XXPERMDI", SDT_PPCxxpermdi, []>;
def PPCvecshl : SDNode<"PPCISD::VECSHL", SDT_PPCVecShift, []>;
@@ -486,7 +486,7 @@ def mul_without_simm16 : BinOpWithoutSImm16Operand<mul>;
// PowerPC Flag Definitions.
class isPPC64 { bit PPC64 = 1; }
-class isDOT { bit RC = 1; }
+class isRecordForm { bit RC = 1; }
class RegConstraint<string C> {
string Constraints = C;
@@ -961,9 +961,9 @@ multiclass XForm_6r<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
!strconcat(asmbase, !strconcat(" ", asmstr)), itin,
pattern>, RecFormRel;
let Defs = [CR0] in
- def o : XForm_6<opcode, xo, OOL, IOL,
+ def _rec : XForm_6<opcode, xo, OOL, IOL,
!strconcat(asmbase, !strconcat(". ", asmstr)), itin,
- []>, isDOT, RecFormRel;
+ []>, isRecordForm, RecFormRel;
}
}
@@ -976,9 +976,9 @@ multiclass XForm_6rc<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
!strconcat(asmbase, !strconcat(" ", asmstr)), itin,
pattern>, RecFormRel;
let Defs = [CARRY, CR0] in
- def o : XForm_6<opcode, xo, OOL, IOL,
+ def _rec : XForm_6<opcode, xo, OOL, IOL,
!strconcat(asmbase, !strconcat(". ", asmstr)), itin,
- []>, isDOT, RecFormRel;
+ []>, isRecordForm, RecFormRel;
}
}
@@ -991,9 +991,9 @@ multiclass XForm_10rc<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
!strconcat(asmbase, !strconcat(" ", asmstr)), itin,
pattern>, RecFormRel;
let Defs = [CARRY, CR0] in
- def o : XForm_10<opcode, xo, OOL, IOL,
+ def _rec : XForm_10<opcode, xo, OOL, IOL,
!strconcat(asmbase, !strconcat(". ", asmstr)), itin,
- []>, isDOT, RecFormRel;
+ []>, isRecordForm, RecFormRel;
}
}
@@ -1005,9 +1005,9 @@ multiclass XForm_11r<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
!strconcat(asmbase, !strconcat(" ", asmstr)), itin,
pattern>, RecFormRel;
let Defs = [CR0] in
- def o : XForm_11<opcode, xo, OOL, IOL,
+ def _rec : XForm_11<opcode, xo, OOL, IOL,
!strconcat(asmbase, !strconcat(". ", asmstr)), itin,
- []>, isDOT, RecFormRel;
+ []>, isRecordForm, RecFormRel;
}
}
@@ -1019,9 +1019,9 @@ multiclass XOForm_1r<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL,
!strconcat(asmbase, !strconcat(" ", asmstr)), itin,
pattern>, RecFormRel;
let Defs = [CR0] in
- def o : XOForm_1<opcode, xo, oe, OOL, IOL,
+ def _rec : XOForm_1<opcode, xo, oe, OOL, IOL,
!strconcat(asmbase, !strconcat(". ", asmstr)), itin,
- []>, isDOT, RecFormRel;
+ []>, isRecordForm, RecFormRel;
}
}
@@ -1035,9 +1035,9 @@ multiclass XOForm_1rx<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL,
!strconcat(asmbase, !strconcat(" ", asmstr)), itin,
pattern>, RecFormRel;
let Defs = [CR0] in
- def o : XOForm_1<opcode, xo, 0, OOL, IOL,
+ def _rec : XOForm_1<opcode, xo, 0, OOL, IOL,
!strconcat(asmbase, !strconcat(". ", asmstr)), itin,
- []>, isDOT, RecFormRel;
+ []>, isRecordForm, RecFormRel;
}
let BaseName = !strconcat(asmbase, "O") in {
let Defs = [XER] in
@@ -1045,9 +1045,9 @@ multiclass XOForm_1rx<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL,
!strconcat(asmbase, !strconcat("o ", asmstr)), itin,
[]>, RecFormRel;
let Defs = [XER, CR0] in
- def Oo : XOForm_1<opcode, xo, 1, OOL, IOL,
+ def O_rec : XOForm_1<opcode, xo, 1, OOL, IOL,
!strconcat(asmbase, !strconcat("o. ", asmstr)), itin,
- []>, isDOT, RecFormRel;
+ []>, isRecordForm, RecFormRel;
}
}
@@ -1061,9 +1061,9 @@ multiclass XOForm_1rcr<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL,
!strconcat(asmbase, !strconcat(" ", asmstr)), itin,
pattern>, RecFormRel;
let Defs = [CR0] in
- def o : XOForm_1<opcode, xo, oe, OOL, IOL,
+ def _rec : XOForm_1<opcode, xo, oe, OOL, IOL,
!strconcat(asmbase, !strconcat(". ", asmstr)), itin,
- []>, isDOT, RecFormRel, PPC970_DGroup_First,
+ []>, isRecordForm, RecFormRel, PPC970_DGroup_First,
PPC970_DGroup_Cracked;
}
let BaseName = !strconcat(asmbase, "O") in {
@@ -1072,9 +1072,9 @@ multiclass XOForm_1rcr<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL,
!strconcat(asmbase, !strconcat("o ", asmstr)), itin,
[]>, RecFormRel;
let Defs = [XER, CR0] in
- def Oo : XOForm_1<opcode, xo, 1, OOL, IOL,
+ def O_rec : XOForm_1<opcode, xo, 1, OOL, IOL,
!strconcat(asmbase, !strconcat("o. ", asmstr)), itin,
- []>, isDOT, RecFormRel;
+ []>, isRecordForm, RecFormRel;
}
}
@@ -1087,9 +1087,9 @@ multiclass XOForm_1rc<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL,
!strconcat(asmbase, !strconcat(" ", asmstr)), itin,
pattern>, RecFormRel;
let Defs = [CARRY, CR0] in
- def o : XOForm_1<opcode, xo, oe, OOL, IOL,
+ def _rec : XOForm_1<opcode, xo, oe, OOL, IOL,
!strconcat(asmbase, !strconcat(". ", asmstr)), itin,
- []>, isDOT, RecFormRel;
+ []>, isRecordForm, RecFormRel;
}
let BaseName = !strconcat(asmbase, "O") in {
let Defs = [CARRY, XER] in
@@ -1097,9 +1097,9 @@ multiclass XOForm_1rc<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL,
!strconcat(asmbase, !strconcat("o ", asmstr)), itin,
[]>, RecFormRel;
let Defs = [CARRY, XER, CR0] in
- def Oo : XOForm_1<opcode, xo, 1, OOL, IOL,
+ def O_rec : XOForm_1<opcode, xo, 1, OOL, IOL,
!strconcat(asmbase, !strconcat("o. ", asmstr)), itin,
- []>, isDOT, RecFormRel;
+ []>, isRecordForm, RecFormRel;
}
}
@@ -1111,9 +1111,9 @@ multiclass XOForm_3r<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL,
!strconcat(asmbase, !strconcat(" ", asmstr)), itin,
pattern>, RecFormRel;
let Defs = [CR0] in
- def o : XOForm_3<opcode, xo, oe, OOL, IOL,
+ def _rec : XOForm_3<opcode, xo, oe, OOL, IOL,
!strconcat(asmbase, !strconcat(". ", asmstr)), itin,
- []>, isDOT, RecFormRel;
+ []>, isRecordForm, RecFormRel;
}
let BaseName = !strconcat(asmbase, "O") in {
let Defs = [XER] in
@@ -1121,9 +1121,9 @@ multiclass XOForm_3r<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL,
!strconcat(asmbase, !strconcat("o ", asmstr)), itin,
[]>, RecFormRel;
let Defs = [XER, CR0] in
- def Oo : XOForm_3<opcode, xo, 1, OOL, IOL,
+ def O_rec : XOForm_3<opcode, xo, 1, OOL, IOL,
!strconcat(asmbase, !strconcat("o. ", asmstr)), itin,
- []>, isDOT, RecFormRel;
+ []>, isRecordForm, RecFormRel;
}
}
@@ -1136,9 +1136,9 @@ multiclass XOForm_3rc<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL,
!strconcat(asmbase, !strconcat(" ", asmstr)), itin,
pattern>, RecFormRel;
let Defs = [CARRY, CR0] in
- def o : XOForm_3<opcode, xo, oe, OOL, IOL,
+ def _rec : XOForm_3<opcode, xo, oe, OOL, IOL,
!strconcat(asmbase, !strconcat(". ", asmstr)), itin,
- []>, isDOT, RecFormRel;
+ []>, isRecordForm, RecFormRel;
}
let BaseName = !strconcat(asmbase, "O") in {
let Defs = [CARRY, XER] in
@@ -1146,9 +1146,9 @@ multiclass XOForm_3rc<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL,
!strconcat(asmbase, !strconcat("o ", asmstr)), itin,
[]>, RecFormRel;
let Defs = [CARRY, XER, CR0] in
- def Oo : XOForm_3<opcode, xo, 1, OOL, IOL,
+ def O_rec : XOForm_3<opcode, xo, 1, OOL, IOL,
!strconcat(asmbase, !strconcat("o. ", asmstr)), itin,
- []>, isDOT, RecFormRel;
+ []>, isRecordForm, RecFormRel;
}
}
@@ -1160,9 +1160,9 @@ multiclass MForm_2r<bits<6> opcode, dag OOL, dag IOL,
!strconcat(asmbase, !strconcat(" ", asmstr)), itin,
pattern>, RecFormRel;
let Defs = [CR0] in
- def o : MForm_2<opcode, OOL, IOL,
+ def _rec : MForm_2<opcode, OOL, IOL,
!strconcat(asmbase, !strconcat(". ", asmstr)), itin,
- []>, isDOT, RecFormRel;
+ []>, isRecordForm, RecFormRel;
}
}
@@ -1174,9 +1174,9 @@ multiclass MDForm_1r<bits<6> opcode, bits<3> xo, dag OOL, dag IOL,
!strconcat(asmbase, !strconcat(" ", asmstr)), itin,
pattern>, RecFormRel;
let Defs = [CR0] in
- def o : MDForm_1<opcode, xo, OOL, IOL,
+ def _rec : MDForm_1<opcode, xo, OOL, IOL,
!strconcat(asmbase, !strconcat(". ", asmstr)), itin,
- []>, isDOT, RecFormRel;
+ []>, isRecordForm, RecFormRel;
}
}
@@ -1188,9 +1188,9 @@ multiclass MDSForm_1r<bits<6> opcode, bits<4> xo, dag OOL, dag IOL,
!strconcat(asmbase, !strconcat(" ", asmstr)), itin,
pattern>, RecFormRel;
let Defs = [CR0] in
- def o : MDSForm_1<opcode, xo, OOL, IOL,
+ def _rec : MDSForm_1<opcode, xo, OOL, IOL,
!strconcat(asmbase, !strconcat(". ", asmstr)), itin,
- []>, isDOT, RecFormRel;
+ []>, isRecordForm, RecFormRel;
}
}
@@ -1203,9 +1203,9 @@ multiclass XSForm_1rc<bits<6> opcode, bits<9> xo, dag OOL, dag IOL,
!strconcat(asmbase, !strconcat(" ", asmstr)), itin,
pattern>, RecFormRel;
let Defs = [CARRY, CR0] in
- def o : XSForm_1<opcode, xo, OOL, IOL,
+ def _rec : XSForm_1<opcode, xo, OOL, IOL,
!strconcat(asmbase, !strconcat(". ", asmstr)), itin,
- []>, isDOT, RecFormRel;
+ []>, isRecordForm, RecFormRel;
}
}
@@ -1217,9 +1217,9 @@ multiclass XSForm_1r<bits<6> opcode, bits<9> xo, dag OOL, dag IOL,
!strconcat(asmbase, !strconcat(" ", asmstr)), itin,
pattern>, RecFormRel;
let Defs = [CR0] in
- def o : XSForm_1<opcode, xo, OOL, IOL,
+ def _rec : XSForm_1<opcode, xo, OOL, IOL,
!strconcat(asmbase, !strconcat(". ", asmstr)), itin,
- []>, isDOT, RecFormRel;
+ []>, isRecordForm, RecFormRel;
}
}
@@ -1231,9 +1231,9 @@ multiclass XForm_26r<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
!strconcat(asmbase, !strconcat(" ", asmstr)), itin,
pattern>, RecFormRel;
let Defs = [CR1] in
- def o : XForm_26<opcode, xo, OOL, IOL,
+ def _rec : XForm_26<opcode, xo, OOL, IOL,
!strconcat(asmbase, !strconcat(". ", asmstr)), itin,
- []>, isDOT, RecFormRel;
+ []>, isRecordForm, RecFormRel;
}
}
@@ -1245,9 +1245,9 @@ multiclass XForm_28r<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
!strconcat(asmbase, !strconcat(" ", asmstr)), itin,
pattern>, RecFormRel;
let Defs = [CR1] in
- def o : XForm_28<opcode, xo, OOL, IOL,
+ def _rec : XForm_28<opcode, xo, OOL, IOL,
!strconcat(asmbase, !strconcat(". ", asmstr)), itin,
- []>, isDOT, RecFormRel;
+ []>, isRecordForm, RecFormRel;
}
}
@@ -1259,9 +1259,9 @@ multiclass AForm_1r<bits<6> opcode, bits<5> xo, dag OOL, dag IOL,
!strconcat(asmbase, !strconcat(" ", asmstr)), itin,
pattern>, RecFormRel;
let Defs = [CR1] in
- def o : AForm_1<opcode, xo, OOL, IOL,
+ def _rec : AForm_1<opcode, xo, OOL, IOL,
!strconcat(asmbase, !strconcat(". ", asmstr)), itin,
- []>, isDOT, RecFormRel;
+ []>, isRecordForm, RecFormRel;
}
}
@@ -1273,9 +1273,9 @@ multiclass AForm_2r<bits<6> opcode, bits<5> xo, dag OOL, dag IOL,
!strconcat(asmbase, !strconcat(" ", asmstr)), itin,
pattern>, RecFormRel;
let Defs = [CR1] in
- def o : AForm_2<opcode, xo, OOL, IOL,
+ def _rec : AForm_2<opcode, xo, OOL, IOL,
!strconcat(asmbase, !strconcat(". ", asmstr)), itin,
- []>, isDOT, RecFormRel;
+ []>, isRecordForm, RecFormRel;
}
}
@@ -1287,9 +1287,9 @@ multiclass AForm_3r<bits<6> opcode, bits<5> xo, dag OOL, dag IOL,
!strconcat(asmbase, !strconcat(" ", asmstr)), itin,
pattern>, RecFormRel;
let Defs = [CR1] in
- def o : AForm_3<opcode, xo, OOL, IOL,
+ def _rec : AForm_3<opcode, xo, OOL, IOL,
!strconcat(asmbase, !strconcat(". ", asmstr)), itin,
- []>, isDOT, RecFormRel;
+ []>, isRecordForm, RecFormRel;
}
}
@@ -1390,12 +1390,13 @@ def RESTORE_CRBIT : PPCEmitTimePseudo<(outs crbitrc:$cond), (ins memri:$F),
}
let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in {
- let isReturn = 1, Uses = [LR, RM] in
+ let isPredicable = 1, isReturn = 1, Uses = [LR, RM] in
def BLR : XLForm_2_ext<19, 16, 20, 0, 0, (outs), (ins), "blr", IIC_BrB,
[(retflag)]>, Requires<[In32BitMode]>;
let isBranch = 1, isIndirectBranch = 1, Uses = [CTR] in {
- def BCTR : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", IIC_BrB,
- []>;
+ let isPredicable = 1 in
+ def BCTR : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", IIC_BrB,
+ []>;
let isCodeGenOnly = 1 in {
def BCCCTR : XLForm_2_br<19, 528, 0, (outs), (ins pred:$cond),
@@ -1428,9 +1429,10 @@ let Defs = [LR] in
let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
let isBarrier = 1 in {
- def B : IForm<18, 0, 0, (outs), (ins directbrtarget:$dst),
- "b $dst", IIC_BrB,
- [(br bb:$dst)]>;
+ let isPredicable = 1 in
+ def B : IForm<18, 0, 0, (outs), (ins directbrtarget:$dst),
+ "b $dst", IIC_BrB,
+ [(br bb:$dst)]>;
def BA : IForm<18, 1, 0, (outs), (ins absdirectbrtarget:$dst),
"ba $dst", IIC_BrB, []>;
}
@@ -1551,9 +1553,10 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR] in {
}
}
let Uses = [CTR, RM] in {
- def BCTRL : XLForm_2_ext<19, 528, 20, 0, 1, (outs), (ins),
- "bctrl", IIC_BrB, [(PPCbctrl)]>,
- Requires<[In32BitMode]>;
+ let isPredicable = 1 in
+ def BCTRL : XLForm_2_ext<19, 528, 20, 0, 1, (outs), (ins),
+ "bctrl", IIC_BrB, [(PPCbctrl)]>,
+ Requires<[In32BitMode]>;
let isCodeGenOnly = 1 in {
def BCCCTRL : XLForm_2_br<19, 528, 1, (outs), (ins pred:$cond),
@@ -1640,6 +1643,15 @@ def TCRETURNri : PPCEmitTimePseudo<(outs), (ins CTRRC:$dst, i32imm:$offset),
"#TC_RETURNr $dst $offset",
[]>;
+let isCall = 1, PPC970_Unit = 7, isCodeGenOnly = 1,
+ Defs = [LR, R2], Uses = [CTR, RM], RST = 2 in {
+ def BCTRL_LWZinto_toc:
+ XLForm_2_ext_and_DForm_1<19, 528, 20, 0, 1, 32, (outs),
+ (ins memri:$src), "bctrl\n\tlwz 2, $src", IIC_BrB,
+ [(PPCbctrl_load_toc iaddr:$src)]>, Requires<[In32BitMode]>;
+
+}
+
let isCodeGenOnly = 1 in {
@@ -1905,15 +1917,15 @@ def LWARX : XForm_1_memOp<31, 20, (outs gprc:$rD), (ins memrr:$src),
// Instructions to support lock versions of atomics
// (EH=1 - see Power ISA 2.07 Book II 4.4.2)
def LBARXL : XForm_1_memOp<31, 52, (outs gprc:$rD), (ins memrr:$src),
- "lbarx $rD, $src, 1", IIC_LdStLWARX, []>, isDOT,
+ "lbarx $rD, $src, 1", IIC_LdStLWARX, []>, isRecordForm,
Requires<[HasPartwordAtomics]>;
def LHARXL : XForm_1_memOp<31, 116, (outs gprc:$rD), (ins memrr:$src),
- "lharx $rD, $src, 1", IIC_LdStLWARX, []>, isDOT,
+ "lharx $rD, $src, 1", IIC_LdStLWARX, []>, isRecordForm,
Requires<[HasPartwordAtomics]>;
def LWARXL : XForm_1_memOp<31, 20, (outs gprc:$rD), (ins memrr:$src),
- "lwarx $rD, $src, 1", IIC_LdStLWARX, []>, isDOT;
+ "lwarx $rD, $src, 1", IIC_LdStLWARX, []>, isRecordForm;
// The atomic instructions use the destination register as well as the next one
// or two registers in order (modulo 31).
@@ -1926,14 +1938,14 @@ def LWAT : X_RD5_RS5_IM5<31, 582, (outs gprc:$rD), (ins gprc:$rA, u5imm:$FC),
let Defs = [CR0], mayStore = 1, mayLoad = 0, hasSideEffects = 0 in {
def STBCX : XForm_1_memOp<31, 694, (outs), (ins gprc:$rS, memrr:$dst),
"stbcx. $rS, $dst", IIC_LdStSTWCX, []>,
- isDOT, Requires<[HasPartwordAtomics]>;
+ isRecordForm, Requires<[HasPartwordAtomics]>;
def STHCX : XForm_1_memOp<31, 726, (outs), (ins gprc:$rS, memrr:$dst),
"sthcx. $rS, $dst", IIC_LdStSTWCX, []>,
- isDOT, Requires<[HasPartwordAtomics]>;
+ isRecordForm, Requires<[HasPartwordAtomics]>;
def STWCX : XForm_1_memOp<31, 150, (outs), (ins gprc:$rS, memrr:$dst),
- "stwcx. $rS, $dst", IIC_LdStSTWCX, []>, isDOT;
+ "stwcx. $rS, $dst", IIC_LdStSTWCX, []>, isRecordForm;
}
let mayStore = 1, mayLoad = 0, hasSideEffects = 0 in
@@ -2100,6 +2112,7 @@ def LFIWZX : XForm_25_memOp<31, 887, (outs f8rc:$frD), (ins memrr:$src),
}
// Load Multiple
+let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in
def LMW : DForm_1<46, (outs gprc:$rD), (ins memri:$src),
"lmw $rD, $src", IIC_LdStLMW, []>;
@@ -2254,6 +2267,7 @@ def : Pat<(pre_store f64:$rS, iPTR:$ptrreg, iPTR:$ptroff),
}
// Store Multiple
+let mayStore = 1, mayLoad = 0, hasSideEffects = 0 in
def STMW : DForm_1<47, (outs), (ins gprc:$rS, memri:$dst),
"stmw $rS, $dst", IIC_LdStLMW, []>;
@@ -2287,9 +2301,9 @@ def ADDIC : DForm_2<12, (outs gprc:$rD), (ins gprc:$rA, s16imm:$imm),
[(set i32:$rD, (addc i32:$rA, imm32SExt16:$imm))]>,
RecFormRel, PPC970_DGroup_Cracked;
let Defs = [CARRY, CR0] in
-def ADDICo : DForm_2<13, (outs gprc:$rD), (ins gprc:$rA, s16imm:$imm),
+def ADDIC_rec : DForm_2<13, (outs gprc:$rD), (ins gprc:$rA, s16imm:$imm),
"addic. $rD, $rA, $imm", IIC_IntGeneral,
- []>, isDOT, RecFormRel;
+ []>, isRecordForm, RecFormRel;
}
def ADDIS : DForm_2<15, (outs gprc:$rD), (ins gprc_nor0:$rA, s17imm:$imm),
"addis $rD, $rA, $imm", IIC_IntSimple,
@@ -2319,14 +2333,14 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in {
let PPC970_Unit = 1 in { // FXU Operations.
let Defs = [CR0] in {
-def ANDIo : DForm_4<28, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2),
+def ANDI_rec : DForm_4<28, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2),
"andi. $dst, $src1, $src2", IIC_IntGeneral,
[(set i32:$dst, (and i32:$src1, immZExt16:$src2))]>,
- isDOT;
-def ANDISo : DForm_4<29, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2),
+ isRecordForm;
+def ANDIS_rec : DForm_4<29, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2),
"andis. $dst, $src1, $src2", IIC_IntGeneral,
[(set i32:$dst, (and i32:$src1, imm16ShiftedZExt:$src2))]>,
- isDOT;
+ isRecordForm;
}
def ORI : DForm_4<24, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2),
"ori $dst, $src1, $src2", IIC_IntSimple,
@@ -2687,6 +2701,7 @@ def MTCTRloop : XFXForm_7_ext<31, 467, 9, (outs), (ins gprc:$rS),
PPC970_DGroup_First, PPC970_Unit_FXU;
}
+let hasSideEffects = 0 in {
let Defs = [LR] in {
def MTLR : XFXForm_7_ext<31, 467, 8, (outs), (ins gprc:$rS),
"mtlr $rS", IIC_SprMTSPR>,
@@ -2697,6 +2712,7 @@ def MFLR : XFXForm_1_ext<31, 339, 8, (outs gprc:$rT), (ins),
"mflr $rT", IIC_SprMFSPR>,
PPC970_DGroup_First, PPC970_Unit_FXU;
}
+}
let isCodeGenOnly = 1 in {
// Move to/from VRSAVE: despite being a SPR, the VRSAVE register is renamed
@@ -2798,8 +2814,8 @@ let Uses = [RM] in {
PPC970_DGroup_Single, PPC970_Unit_FPU;
let Defs = [CR1] in
- def MFFSo : XForm_42<63, 583, (outs f8rc:$rT), (ins),
- "mffs. $rT", IIC_IntMFFS, []>, isDOT;
+ def MFFS_rec : XForm_42<63, 583, (outs f8rc:$rT), (ins),
+ "mffs. $rT", IIC_IntMFFS, []>, isRecordForm;
def MFFSCE : X_FRT5_XO2_XO3_XO10<63, 0, 1, 583, (outs f8rc:$rT), (ins),
"mffsce $rT", IIC_IntMFFS, []>,
@@ -3040,10 +3056,10 @@ def RLWINM : MForm_2<21,
"rlwinm $rA, $rS, $SH, $MB, $ME", IIC_IntGeneral,
[]>, RecFormRel;
let Defs = [CR0] in
-def RLWINMo : MForm_2<21,
+def RLWINM_rec : MForm_2<21,
(outs gprc:$rA), (ins gprc:$rS, u5imm:$SH, u5imm:$MB, u5imm:$ME),
"rlwinm. $rA, $rS, $SH, $MB, $ME", IIC_IntGeneral,
- []>, isDOT, RecFormRel, PPC970_DGroup_Cracked;
+ []>, isRecordForm, RecFormRel, PPC970_DGroup_Cracked;
}
defm RLWNM : MForm_2r<23, (outs gprc:$rA),
(ins gprc:$rS, gprc:$rB, u5imm:$MB, u5imm:$ME),
@@ -3233,6 +3249,11 @@ def ADDIStocHA : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, tocentr
// the function label.
def UpdateGBR : PPCEmitTimePseudo<(outs gprc:$rD, gprc:$rT), (ins gprc:$rI), "#UpdateGBR", []>;
+// Pseudo-instruction marked for deletion. When deleting the instruction would
+// cause iterator invalidation in MIR transformation passes, this pseudo can be
+// used instead. It will be removed unconditionally at pre-emit time (prior to
+// branch selection).
+def UNENCODED_NOP: PPCEmitTimePseudo<(outs), (ins), "#UNENCODED_NOP", []>;
// Standard shifts. These are represented separately from the real shifts above
// so that we can distinguish between shifts that allow 5-bit and 6-bit shift
@@ -3275,10 +3296,10 @@ def : Pat<(f64 (fpextend f32:$src)),
// source: http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
// The rule for seq_cst is duplicated to work with both 64 bits and 32 bits
// versions of Power.
-def : Pat<(atomic_fence (i64 7), (imm)), (SYNC 0)>, Requires<[HasSYNC]>;
-def : Pat<(atomic_fence (i32 7), (imm)), (SYNC 0)>, Requires<[HasSYNC]>;
-def : Pat<(atomic_fence (imm), (imm)), (SYNC 1)>, Requires<[HasSYNC]>;
-def : Pat<(atomic_fence (imm), (imm)), (MSYNC)>, Requires<[HasOnlyMSYNC]>;
+def : Pat<(atomic_fence (i64 7), (timm)), (SYNC 0)>, Requires<[HasSYNC]>;
+def : Pat<(atomic_fence (i32 7), (timm)), (SYNC 0)>, Requires<[HasSYNC]>;
+def : Pat<(atomic_fence (timm), (timm)), (SYNC 1)>, Requires<[HasSYNC]>;
+def : Pat<(atomic_fence (timm), (timm)), (MSYNC)>, Requires<[HasOnlyMSYNC]>;
let Predicates = [HasFPU] in {
// Additional FNMSUB patterns: -a*c + b == -(a*c - b)
@@ -4066,24 +4087,24 @@ def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETUGT)),
def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETNE)),
(SELECT_VRRC (CRXOR $lhs, $rhs), $tval, $fval)>;
-def ANDIo_1_EQ_BIT : PPCCustomInserterPseudo<(outs crbitrc:$dst), (ins gprc:$in),
- "#ANDIo_1_EQ_BIT",
+def ANDI_rec_1_EQ_BIT : PPCCustomInserterPseudo<(outs crbitrc:$dst), (ins gprc:$in),
+ "#ANDI_rec_1_EQ_BIT",
[(set i1:$dst, (trunc (not i32:$in)))]>;
-def ANDIo_1_GT_BIT : PPCCustomInserterPseudo<(outs crbitrc:$dst), (ins gprc:$in),
- "#ANDIo_1_GT_BIT",
+def ANDI_rec_1_GT_BIT : PPCCustomInserterPseudo<(outs crbitrc:$dst), (ins gprc:$in),
+ "#ANDI_rec_1_GT_BIT",
[(set i1:$dst, (trunc i32:$in))]>;
-def ANDIo_1_EQ_BIT8 : PPCCustomInserterPseudo<(outs crbitrc:$dst), (ins g8rc:$in),
- "#ANDIo_1_EQ_BIT8",
+def ANDI_rec_1_EQ_BIT8 : PPCCustomInserterPseudo<(outs crbitrc:$dst), (ins g8rc:$in),
+ "#ANDI_rec_1_EQ_BIT8",
[(set i1:$dst, (trunc (not i64:$in)))]>;
-def ANDIo_1_GT_BIT8 : PPCCustomInserterPseudo<(outs crbitrc:$dst), (ins g8rc:$in),
- "#ANDIo_1_GT_BIT8",
+def ANDI_rec_1_GT_BIT8 : PPCCustomInserterPseudo<(outs crbitrc:$dst), (ins g8rc:$in),
+ "#ANDI_rec_1_GT_BIT8",
[(set i1:$dst, (trunc i64:$in))]>;
def : Pat<(i1 (not (trunc i32:$in))),
- (ANDIo_1_EQ_BIT $in)>;
+ (ANDI_rec_1_EQ_BIT $in)>;
def : Pat<(i1 (not (trunc i64:$in))),
- (ANDIo_1_EQ_BIT8 $in)>;
+ (ANDI_rec_1_EQ_BIT8 $in)>;
//===----------------------------------------------------------------------===//
// PowerPC Instructions used for assembler/disassembler only
@@ -4167,22 +4188,22 @@ def MCRFS : XLForm_3<63, 64, (outs crrc:$BF), (ins crrc:$BFA),
def MTFSFI : XLForm_4<63, 134, (outs crrc:$BF), (ins i32imm:$U, i32imm:$W),
"mtfsfi $BF, $U, $W", IIC_IntMFFS>;
-def MTFSFIo : XLForm_4<63, 134, (outs crrc:$BF), (ins i32imm:$U, i32imm:$W),
- "mtfsfi. $BF, $U, $W", IIC_IntMFFS>, isDOT;
+def MTFSFI_rec : XLForm_4<63, 134, (outs crrc:$BF), (ins i32imm:$U, i32imm:$W),
+ "mtfsfi. $BF, $U, $W", IIC_IntMFFS>, isRecordForm;
def : InstAlias<"mtfsfi $BF, $U", (MTFSFI crrc:$BF, i32imm:$U, 0)>;
-def : InstAlias<"mtfsfi. $BF, $U", (MTFSFIo crrc:$BF, i32imm:$U, 0)>;
+def : InstAlias<"mtfsfi. $BF, $U", (MTFSFI_rec crrc:$BF, i32imm:$U, 0)>;
let Predicates = [HasFPU] in {
def MTFSF : XFLForm_1<63, 711, (outs),
(ins i32imm:$FLM, f8rc:$FRB, i32imm:$L, i32imm:$W),
"mtfsf $FLM, $FRB, $L, $W", IIC_IntMFFS, []>;
-def MTFSFo : XFLForm_1<63, 711, (outs),
+def MTFSF_rec : XFLForm_1<63, 711, (outs),
(ins i32imm:$FLM, f8rc:$FRB, i32imm:$L, i32imm:$W),
- "mtfsf. $FLM, $FRB, $L, $W", IIC_IntMFFS, []>, isDOT;
+ "mtfsf. $FLM, $FRB, $L, $W", IIC_IntMFFS, []>, isRecordForm;
def : InstAlias<"mtfsf $FLM, $FRB", (MTFSF i32imm:$FLM, f8rc:$FRB, 0, 0)>;
-def : InstAlias<"mtfsf. $FLM, $FRB", (MTFSFo i32imm:$FLM, f8rc:$FRB, 0, 0)>;
+def : InstAlias<"mtfsf. $FLM, $FRB", (MTFSF_rec i32imm:$FLM, f8rc:$FRB, 0, 0)>;
}
def SLBIE : XForm_16b<31, 434, (outs), (ins gprc:$RB),
@@ -4200,8 +4221,8 @@ def SLBMFEV : XLForm_1_gen<31, 851, (outs gprc:$RT), (ins gprc:$RB),
def SLBIA : XForm_0<31, 498, (outs), (ins), "slbia", IIC_SprSLBIA, []>;
let Defs = [CR0] in
-def SLBFEEo : XForm_26<31, 979, (outs gprc:$RT), (ins gprc:$RB),
- "slbfee. $RT, $RB", IIC_SprSLBFEE, []>, isDOT;
+def SLBFEE_rec : XForm_26<31, 979, (outs gprc:$RT), (ins gprc:$RB),
+ "slbfee. $RT, $RB", IIC_SprSLBFEE, []>, isRecordForm;
def TLBIA : XForm_0<31, 370, (outs), (ins),
"tlbia", IIC_SprTLBIA, []>;
@@ -4244,7 +4265,7 @@ def TLBSX2 : XForm_base_r3xo<31, 914, (outs), (ins gprc:$RST, gprc:$A, gprc:$B),
def TLBSX2D : XForm_base_r3xo<31, 914, (outs),
(ins gprc:$RST, gprc:$A, gprc:$B),
"tlbsx. $RST, $A, $B", IIC_LdStLoad, []>,
- Requires<[IsPPC4xx]>, isDOT;
+ Requires<[IsPPC4xx]>, isRecordForm;
def RFID : XForm_0<19, 18, (outs), (ins), "rfid", IIC_IntRFID, []>;
@@ -4462,10 +4483,10 @@ def : InstAlias<"mttbhi $Rx", (MTSPR 988, gprc:$Rx)>, Requires<[IsPPC4xx]>;
def : InstAlias<"xnop", (XORI R0, R0, 0)>;
def : InstAlias<"mr $rA, $rB", (OR8 g8rc:$rA, g8rc:$rB, g8rc:$rB)>;
-def : InstAlias<"mr. $rA, $rB", (OR8o g8rc:$rA, g8rc:$rB, g8rc:$rB)>;
+def : InstAlias<"mr. $rA, $rB", (OR8_rec g8rc:$rA, g8rc:$rB, g8rc:$rB)>;
def : InstAlias<"not $rA, $rB", (NOR8 g8rc:$rA, g8rc:$rB, g8rc:$rB)>;
-def : InstAlias<"not. $rA, $rB", (NOR8o g8rc:$rA, g8rc:$rB, g8rc:$rB)>;
+def : InstAlias<"not. $rA, $rB", (NOR8_rec g8rc:$rA, g8rc:$rB, g8rc:$rB)>;
def : InstAlias<"mtcr $rA", (MTCRF8 255, g8rc:$rA)>;
@@ -4531,13 +4552,13 @@ def SUBIS : PPCAsmPseudo<"subis $rA, $rB, $imm",
(ins gprc:$rA, gprc:$rB, s16imm:$imm)>;
def SUBIC : PPCAsmPseudo<"subic $rA, $rB, $imm",
(ins gprc:$rA, gprc:$rB, s16imm:$imm)>;
-def SUBICo : PPCAsmPseudo<"subic. $rA, $rB, $imm",
+def SUBIC_rec : PPCAsmPseudo<"subic. $rA, $rB, $imm",
(ins gprc:$rA, gprc:$rB, s16imm:$imm)>;
def : InstAlias<"sub $rA, $rB, $rC", (SUBF8 g8rc:$rA, g8rc:$rC, g8rc:$rB)>;
-def : InstAlias<"sub. $rA, $rB, $rC", (SUBF8o g8rc:$rA, g8rc:$rC, g8rc:$rB)>;
+def : InstAlias<"sub. $rA, $rB, $rC", (SUBF8_rec g8rc:$rA, g8rc:$rC, g8rc:$rB)>;
def : InstAlias<"subc $rA, $rB, $rC", (SUBFC8 g8rc:$rA, g8rc:$rC, g8rc:$rB)>;
-def : InstAlias<"subc. $rA, $rB, $rC", (SUBFC8o g8rc:$rA, g8rc:$rC, g8rc:$rB)>;
+def : InstAlias<"subc. $rA, $rB, $rC", (SUBFC8_rec g8rc:$rA, g8rc:$rC, g8rc:$rB)>;
def : InstAlias<"mtmsrd $RS", (MTMSRD gprc:$RS, 0)>;
def : InstAlias<"mtmsr $RS", (MTMSR gprc:$RS, 0)>;
@@ -4590,109 +4611,109 @@ def : InstAlias<"tlbwelo $RS, $A", (TLBWE2 gprc:$RS, gprc:$A, 1)>,
def EXTLWI : PPCAsmPseudo<"extlwi $rA, $rS, $n, $b",
(ins gprc:$rA, gprc:$rS, u5imm:$n, u5imm:$b)>;
-def EXTLWIo : PPCAsmPseudo<"extlwi. $rA, $rS, $n, $b",
+def EXTLWI_rec : PPCAsmPseudo<"extlwi. $rA, $rS, $n, $b",
(ins gprc:$rA, gprc:$rS, u5imm:$n, u5imm:$b)>;
def EXTRWI : PPCAsmPseudo<"extrwi $rA, $rS, $n, $b",
(ins gprc:$rA, gprc:$rS, u5imm:$n, u5imm:$b)>;
-def EXTRWIo : PPCAsmPseudo<"extrwi. $rA, $rS, $n, $b",
+def EXTRWI_rec : PPCAsmPseudo<"extrwi. $rA, $rS, $n, $b",
(ins gprc:$rA, gprc:$rS, u5imm:$n, u5imm:$b)>;
def INSLWI : PPCAsmPseudo<"inslwi $rA, $rS, $n, $b",
(ins gprc:$rA, gprc:$rS, u5imm:$n, u5imm:$b)>;
-def INSLWIo : PPCAsmPseudo<"inslwi. $rA, $rS, $n, $b",
+def INSLWI_rec : PPCAsmPseudo<"inslwi. $rA, $rS, $n, $b",
(ins gprc:$rA, gprc:$rS, u5imm:$n, u5imm:$b)>;
def INSRWI : PPCAsmPseudo<"insrwi $rA, $rS, $n, $b",
(ins gprc:$rA, gprc:$rS, u5imm:$n, u5imm:$b)>;
-def INSRWIo : PPCAsmPseudo<"insrwi. $rA, $rS, $n, $b",
+def INSRWI_rec : PPCAsmPseudo<"insrwi. $rA, $rS, $n, $b",
(ins gprc:$rA, gprc:$rS, u5imm:$n, u5imm:$b)>;
def ROTRWI : PPCAsmPseudo<"rotrwi $rA, $rS, $n",
(ins gprc:$rA, gprc:$rS, u5imm:$n)>;
-def ROTRWIo : PPCAsmPseudo<"rotrwi. $rA, $rS, $n",
+def ROTRWI_rec : PPCAsmPseudo<"rotrwi. $rA, $rS, $n",
(ins gprc:$rA, gprc:$rS, u5imm:$n)>;
def SLWI : PPCAsmPseudo<"slwi $rA, $rS, $n",
(ins gprc:$rA, gprc:$rS, u5imm:$n)>;
-def SLWIo : PPCAsmPseudo<"slwi. $rA, $rS, $n",
+def SLWI_rec : PPCAsmPseudo<"slwi. $rA, $rS, $n",
(ins gprc:$rA, gprc:$rS, u5imm:$n)>;
def SRWI : PPCAsmPseudo<"srwi $rA, $rS, $n",
(ins gprc:$rA, gprc:$rS, u5imm:$n)>;
-def SRWIo : PPCAsmPseudo<"srwi. $rA, $rS, $n",
+def SRWI_rec : PPCAsmPseudo<"srwi. $rA, $rS, $n",
(ins gprc:$rA, gprc:$rS, u5imm:$n)>;
def CLRRWI : PPCAsmPseudo<"clrrwi $rA, $rS, $n",
(ins gprc:$rA, gprc:$rS, u5imm:$n)>;
-def CLRRWIo : PPCAsmPseudo<"clrrwi. $rA, $rS, $n",
+def CLRRWI_rec : PPCAsmPseudo<"clrrwi. $rA, $rS, $n",
(ins gprc:$rA, gprc:$rS, u5imm:$n)>;
def CLRLSLWI : PPCAsmPseudo<"clrlslwi $rA, $rS, $b, $n",
(ins gprc:$rA, gprc:$rS, u5imm:$b, u5imm:$n)>;
-def CLRLSLWIo : PPCAsmPseudo<"clrlslwi. $rA, $rS, $b, $n",
+def CLRLSLWI_rec : PPCAsmPseudo<"clrlslwi. $rA, $rS, $b, $n",
(ins gprc:$rA, gprc:$rS, u5imm:$b, u5imm:$n)>;
def : InstAlias<"rotlwi $rA, $rS, $n", (RLWINM gprc:$rA, gprc:$rS, u5imm:$n, 0, 31)>;
-def : InstAlias<"rotlwi. $rA, $rS, $n", (RLWINMo gprc:$rA, gprc:$rS, u5imm:$n, 0, 31)>;
+def : InstAlias<"rotlwi. $rA, $rS, $n", (RLWINM_rec gprc:$rA, gprc:$rS, u5imm:$n, 0, 31)>;
def : InstAlias<"rotlw $rA, $rS, $rB", (RLWNM gprc:$rA, gprc:$rS, gprc:$rB, 0, 31)>;
-def : InstAlias<"rotlw. $rA, $rS, $rB", (RLWNMo gprc:$rA, gprc:$rS, gprc:$rB, 0, 31)>;
+def : InstAlias<"rotlw. $rA, $rS, $rB", (RLWNM_rec gprc:$rA, gprc:$rS, gprc:$rB, 0, 31)>;
def : InstAlias<"clrlwi $rA, $rS, $n", (RLWINM gprc:$rA, gprc:$rS, 0, u5imm:$n, 31)>;
-def : InstAlias<"clrlwi. $rA, $rS, $n", (RLWINMo gprc:$rA, gprc:$rS, 0, u5imm:$n, 31)>;
+def : InstAlias<"clrlwi. $rA, $rS, $n", (RLWINM_rec gprc:$rA, gprc:$rS, 0, u5imm:$n, 31)>;
def : InstAlias<"cntlzw $rA, $rS", (CNTLZW gprc:$rA, gprc:$rS)>;
-def : InstAlias<"cntlzw. $rA, $rS", (CNTLZWo gprc:$rA, gprc:$rS)>;
+def : InstAlias<"cntlzw. $rA, $rS", (CNTLZW_rec gprc:$rA, gprc:$rS)>;
// The POWER variant
def : MnemonicAlias<"cntlz", "cntlzw">;
def : MnemonicAlias<"cntlz.", "cntlzw.">;
def EXTLDI : PPCAsmPseudo<"extldi $rA, $rS, $n, $b",
(ins g8rc:$rA, g8rc:$rS, u6imm:$n, u6imm:$b)>;
-def EXTLDIo : PPCAsmPseudo<"extldi. $rA, $rS, $n, $b",
+def EXTLDI_rec : PPCAsmPseudo<"extldi. $rA, $rS, $n, $b",
(ins g8rc:$rA, g8rc:$rS, u6imm:$n, u6imm:$b)>;
def EXTRDI : PPCAsmPseudo<"extrdi $rA, $rS, $n, $b",
(ins g8rc:$rA, g8rc:$rS, u6imm:$n, u6imm:$b)>;
-def EXTRDIo : PPCAsmPseudo<"extrdi. $rA, $rS, $n, $b",
+def EXTRDI_rec : PPCAsmPseudo<"extrdi. $rA, $rS, $n, $b",
(ins g8rc:$rA, g8rc:$rS, u6imm:$n, u6imm:$b)>;
def INSRDI : PPCAsmPseudo<"insrdi $rA, $rS, $n, $b",
(ins g8rc:$rA, g8rc:$rS, u6imm:$n, u6imm:$b)>;
-def INSRDIo : PPCAsmPseudo<"insrdi. $rA, $rS, $n, $b",
+def INSRDI_rec : PPCAsmPseudo<"insrdi. $rA, $rS, $n, $b",
(ins g8rc:$rA, g8rc:$rS, u6imm:$n, u6imm:$b)>;
def ROTRDI : PPCAsmPseudo<"rotrdi $rA, $rS, $n",
(ins g8rc:$rA, g8rc:$rS, u6imm:$n)>;
-def ROTRDIo : PPCAsmPseudo<"rotrdi. $rA, $rS, $n",
+def ROTRDI_rec : PPCAsmPseudo<"rotrdi. $rA, $rS, $n",
(ins g8rc:$rA, g8rc:$rS, u6imm:$n)>;
def SLDI : PPCAsmPseudo<"sldi $rA, $rS, $n",
(ins g8rc:$rA, g8rc:$rS, u6imm:$n)>;
-def SLDIo : PPCAsmPseudo<"sldi. $rA, $rS, $n",
+def SLDI_rec : PPCAsmPseudo<"sldi. $rA, $rS, $n",
(ins g8rc:$rA, g8rc:$rS, u6imm:$n)>;
def SRDI : PPCAsmPseudo<"srdi $rA, $rS, $n",
(ins g8rc:$rA, g8rc:$rS, u6imm:$n)>;
-def SRDIo : PPCAsmPseudo<"srdi. $rA, $rS, $n",
+def SRDI_rec : PPCAsmPseudo<"srdi. $rA, $rS, $n",
(ins g8rc:$rA, g8rc:$rS, u6imm:$n)>;
def CLRRDI : PPCAsmPseudo<"clrrdi $rA, $rS, $n",
(ins g8rc:$rA, g8rc:$rS, u6imm:$n)>;
-def CLRRDIo : PPCAsmPseudo<"clrrdi. $rA, $rS, $n",
+def CLRRDI_rec : PPCAsmPseudo<"clrrdi. $rA, $rS, $n",
(ins g8rc:$rA, g8rc:$rS, u6imm:$n)>;
def CLRLSLDI : PPCAsmPseudo<"clrlsldi $rA, $rS, $b, $n",
(ins g8rc:$rA, g8rc:$rS, u6imm:$b, u6imm:$n)>;
-def CLRLSLDIo : PPCAsmPseudo<"clrlsldi. $rA, $rS, $b, $n",
+def CLRLSLDI_rec : PPCAsmPseudo<"clrlsldi. $rA, $rS, $b, $n",
(ins g8rc:$rA, g8rc:$rS, u6imm:$b, u6imm:$n)>;
def SUBPCIS : PPCAsmPseudo<"subpcis $RT, $D", (ins g8rc:$RT, s16imm:$D)>;
def : InstAlias<"rotldi $rA, $rS, $n", (RLDICL g8rc:$rA, g8rc:$rS, u6imm:$n, 0)>;
-def : InstAlias<"rotldi. $rA, $rS, $n", (RLDICLo g8rc:$rA, g8rc:$rS, u6imm:$n, 0)>;
+def : InstAlias<"rotldi. $rA, $rS, $n", (RLDICL_rec g8rc:$rA, g8rc:$rS, u6imm:$n, 0)>;
def : InstAlias<"rotld $rA, $rS, $rB", (RLDCL g8rc:$rA, g8rc:$rS, gprc:$rB, 0)>;
-def : InstAlias<"rotld. $rA, $rS, $rB", (RLDCLo g8rc:$rA, g8rc:$rS, gprc:$rB, 0)>;
+def : InstAlias<"rotld. $rA, $rS, $rB", (RLDCL_rec g8rc:$rA, g8rc:$rS, gprc:$rB, 0)>;
def : InstAlias<"clrldi $rA, $rS, $n", (RLDICL g8rc:$rA, g8rc:$rS, 0, u6imm:$n)>;
def : InstAlias<"clrldi $rA, $rS, $n",
(RLDICL_32_64 g8rc:$rA, gprc:$rS, 0, u6imm:$n)>;
-def : InstAlias<"clrldi. $rA, $rS, $n", (RLDICLo g8rc:$rA, g8rc:$rS, 0, u6imm:$n)>;
+def : InstAlias<"clrldi. $rA, $rS, $n", (RLDICL_rec g8rc:$rA, g8rc:$rS, 0, u6imm:$n)>;
def : InstAlias<"lnia $RT", (ADDPCIS g8rc:$RT, 0)>;
def RLWINMbm : PPCAsmPseudo<"rlwinm $rA, $rS, $n, $b",
(ins g8rc:$rA, g8rc:$rS, u5imm:$n, i32imm:$b)>;
-def RLWINMobm : PPCAsmPseudo<"rlwinm. $rA, $rS, $n, $b",
+def RLWINMbm_rec : PPCAsmPseudo<"rlwinm. $rA, $rS, $n, $b",
(ins g8rc:$rA, g8rc:$rS, u5imm:$n, i32imm:$b)>;
def RLWIMIbm : PPCAsmPseudo<"rlwimi $rA, $rS, $n, $b",
(ins g8rc:$rA, g8rc:$rS, u5imm:$n, i32imm:$b)>;
-def RLWIMIobm : PPCAsmPseudo<"rlwimi. $rA, $rS, $n, $b",
+def RLWIMIbm_rec : PPCAsmPseudo<"rlwimi. $rA, $rS, $n, $b",
(ins g8rc:$rA, g8rc:$rS, u5imm:$n, i32imm:$b)>;
def RLWNMbm : PPCAsmPseudo<"rlwnm $rA, $rS, $n, $b",
(ins g8rc:$rA, g8rc:$rS, u5imm:$n, i32imm:$b)>;
-def RLWNMobm : PPCAsmPseudo<"rlwnm. $rA, $rS, $n, $b",
+def RLWNMbm_rec : PPCAsmPseudo<"rlwnm. $rA, $rS, $n, $b",
(ins g8rc:$rA, g8rc:$rS, u5imm:$n, i32imm:$b)>;
// These generic branch instruction forms are used for the assembler parser only.
@@ -4921,7 +4942,7 @@ let mayStore = 1 in
def CP_PASTE : X_L1_RA5_RB5<31, 902, "paste" , gprc, IIC_LdStPASTE, []>;
let mayStore = 1, Defs = [CR0] in
-def CP_PASTEo : X_L1_RA5_RB5<31, 902, "paste.", gprc, IIC_LdStPASTE, []>, isDOT;
+def CP_PASTE_rec : X_L1_RA5_RB5<31, 902, "paste.", gprc, IIC_LdStPASTE, []>, isRecordForm;
def CP_COPYx : PPCAsmPseudo<"copy $rA, $rB" , (ins gprc:$rA, gprc:$rB)>;
def CP_PASTEx : PPCAsmPseudo<"paste $rA, $rB", (ins gprc:$rA, gprc:$rB)>;
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index 2aad5860d87f..be6b30ffa08b 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -120,11 +120,11 @@ multiclass XX3Form_Rcr<bits<6> opcode, bits<7> xo, string asmbase,
!strconcat(asmbase, !strconcat(" ", asmstr)), itin,
[(set OutTy:$XT, (Int InTy:$XA, InTy:$XB))]>;
let Defs = [CR6] in
- def o : XX3Form_Rc<opcode, xo, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ def _rec : XX3Form_Rc<opcode, xo, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
!strconcat(asmbase, !strconcat(". ", asmstr)), itin,
[(set InTy:$XT,
(InTy (PPCvcmp_o InTy:$XA, InTy:$XB, xo)))]>,
- isDOT;
+ isRecordForm;
}
}
@@ -152,7 +152,6 @@ def HasOnlySwappingMemOps : Predicate<"!PPCSubTarget->hasP9Vector()">;
let Predicates = [HasVSX] in {
let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
let hasSideEffects = 0 in { // VSX instructions don't have side effects.
-let Uses = [RM] in {
// Load indexed instructions
let mayLoad = 1, mayStore = 0 in {
@@ -214,6 +213,7 @@ let Uses = [RM] in {
}
} // mayStore
+ let Uses = [RM] in {
// Add/Mul Instructions
let isCommutable = 1 in {
def XSADDDP : XX3Form<60, 32,
@@ -1255,6 +1255,55 @@ def : Pat<(f64 (PPCfcfidu (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))),
} // AddedComplexity
} // HasVSX
+def FpMinMax {
+ dag F32Min = (COPY_TO_REGCLASS (XSMINDP (COPY_TO_REGCLASS $A, VSFRC),
+ (COPY_TO_REGCLASS $B, VSFRC)),
+ VSSRC);
+ dag F32Max = (COPY_TO_REGCLASS (XSMAXDP (COPY_TO_REGCLASS $A, VSFRC),
+ (COPY_TO_REGCLASS $B, VSFRC)),
+ VSSRC);
+}
+
+let AddedComplexity = 400, Predicates = [HasVSX] in {
+ // f32 Min.
+ def : Pat<(f32 (fminnum_ieee f32:$A, f32:$B)),
+ (f32 FpMinMax.F32Min)>;
+ def : Pat<(f32 (fminnum_ieee (fcanonicalize f32:$A), f32:$B)),
+ (f32 FpMinMax.F32Min)>;
+ def : Pat<(f32 (fminnum_ieee f32:$A, (fcanonicalize f32:$B))),
+ (f32 FpMinMax.F32Min)>;
+ def : Pat<(f32 (fminnum_ieee (fcanonicalize f32:$A), (fcanonicalize f32:$B))),
+ (f32 FpMinMax.F32Min)>;
+ // F32 Max.
+ def : Pat<(f32 (fmaxnum_ieee f32:$A, f32:$B)),
+ (f32 FpMinMax.F32Max)>;
+ def : Pat<(f32 (fmaxnum_ieee (fcanonicalize f32:$A), f32:$B)),
+ (f32 FpMinMax.F32Max)>;
+ def : Pat<(f32 (fmaxnum_ieee f32:$A, (fcanonicalize f32:$B))),
+ (f32 FpMinMax.F32Max)>;
+ def : Pat<(f32 (fmaxnum_ieee (fcanonicalize f32:$A), (fcanonicalize f32:$B))),
+ (f32 FpMinMax.F32Max)>;
+
+ // f64 Min.
+ def : Pat<(f64 (fminnum_ieee f64:$A, f64:$B)),
+ (f64 (XSMINDP $A, $B))>;
+ def : Pat<(f64 (fminnum_ieee (fcanonicalize f64:$A), f64:$B)),
+ (f64 (XSMINDP $A, $B))>;
+ def : Pat<(f64 (fminnum_ieee f64:$A, (fcanonicalize f64:$B))),
+ (f64 (XSMINDP $A, $B))>;
+ def : Pat<(f64 (fminnum_ieee (fcanonicalize f64:$A), (fcanonicalize f64:$B))),
+ (f64 (XSMINDP $A, $B))>;
+ // f64 Max.
+ def : Pat<(f64 (fmaxnum_ieee f64:$A, f64:$B)),
+ (f64 (XSMAXDP $A, $B))>;
+ def : Pat<(f64 (fmaxnum_ieee (fcanonicalize f64:$A), f64:$B)),
+ (f64 (XSMAXDP $A, $B))>;
+ def : Pat<(f64 (fmaxnum_ieee f64:$A, (fcanonicalize f64:$B))),
+ (f64 (XSMAXDP $A, $B))>;
+ def : Pat<(f64 (fmaxnum_ieee (fcanonicalize f64:$A), (fcanonicalize f64:$B))),
+ (f64 (XSMAXDP $A, $B))>;
+}
+
def ScalarLoads {
dag Li8 = (i32 (extloadi8 xoaddr:$src));
dag ZELi8 = (i32 (zextloadi8 xoaddr:$src));
@@ -1330,7 +1379,7 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
[(set v4i32:$XT, (or v4i32:$XA, (vnot_ppc v4i32:$XB)))]>;
// VSX scalar loads introduced in ISA 2.07
- let mayLoad = 1, mayStore = 0 in {
+ let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in {
let CodeSize = 3 in
def LXSSPX : XX1Form_memOp<31, 524, (outs vssrc:$XT), (ins memrr:$src),
"lxsspx $XT, $src", IIC_LdStLFD, []>;
@@ -1355,7 +1404,7 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
} // mayLoad
// VSX scalar stores introduced in ISA 2.07
- let mayStore = 1, mayLoad = 0 in {
+ let mayStore = 1, mayLoad = 0, hasSideEffects = 0 in {
let CodeSize = 3 in
def STXSSPX : XX1Form_memOp<31, 652, (outs), (ins vssrc:$XT, memrr:$dst),
"stxsspx $XT, $dst", IIC_LdStSTFD, []>;
@@ -1912,7 +1961,7 @@ def VectorExtractions {
- The order of elements after the move to GPR is reversed, so we invert
the bits of the index prior to truncating to the range 0-7
*/
- dag BE_VBYTE_PERM_VEC = (v16i8 (LVSL ZERO8, (ANDIo8 $Idx, 8)));
+ dag BE_VBYTE_PERM_VEC = (v16i8 (LVSL ZERO8, (ANDI8_rec $Idx, 8)));
dag BE_VBYTE_PERMUTE = (v16i8 (VPERM $S, $S, BE_VBYTE_PERM_VEC));
dag BE_MV_VBYTE = (MFVSRD
(EXTRACT_SUBREG
@@ -1931,7 +1980,7 @@ def VectorExtractions {
the bits of the index prior to truncating to the range 0-3
*/
dag BE_VHALF_PERM_VEC = (v16i8 (LVSL ZERO8,
- (RLDICR (ANDIo8 $Idx, 4), 1, 62)));
+ (RLDICR (ANDI8_rec $Idx, 4), 1, 62)));
dag BE_VHALF_PERMUTE = (v16i8 (VPERM $S, $S, BE_VHALF_PERM_VEC));
dag BE_MV_VHALF = (MFVSRD
(EXTRACT_SUBREG
@@ -1949,7 +1998,7 @@ def VectorExtractions {
the bits of the index prior to truncating to the range 0-1
*/
dag BE_VWORD_PERM_VEC = (v16i8 (LVSL ZERO8,
- (RLDICR (ANDIo8 $Idx, 2), 2, 61)));
+ (RLDICR (ANDI8_rec $Idx, 2), 2, 61)));
dag BE_VWORD_PERMUTE = (v16i8 (VPERM $S, $S, BE_VWORD_PERM_VEC));
dag BE_MV_VWORD = (MFVSRD
(EXTRACT_SUBREG
@@ -1965,7 +2014,7 @@ def VectorExtractions {
element indices.
*/
dag BE_VDWORD_PERM_VEC = (v16i8 (LVSL ZERO8,
- (RLDICR (ANDIo8 $Idx, 1), 3, 60)));
+ (RLDICR (ANDI8_rec $Idx, 1), 3, 60)));
dag BE_VDWORD_PERMUTE = (v16i8 (VPERM $S, $S, BE_VDWORD_PERM_VEC));
dag BE_VARIABLE_DWORD =
(MFVSRD (EXTRACT_SUBREG
@@ -2477,6 +2526,43 @@ def : Pat<(i64 (bitconvert f64:$S)),
// (move to FPR, nothing else needed)
def : Pat<(f64 (bitconvert i64:$S)),
(f64 (MTVSRD $S))>;
+
+// Rounding to integer.
+def : Pat<(i64 (lrint f64:$S)),
+ (i64 (MFVSRD (FCTID $S)))>;
+def : Pat<(i64 (lrint f32:$S)),
+ (i64 (MFVSRD (FCTID (COPY_TO_REGCLASS $S, F8RC))))>;
+def : Pat<(i64 (llrint f64:$S)),
+ (i64 (MFVSRD (FCTID $S)))>;
+def : Pat<(i64 (llrint f32:$S)),
+ (i64 (MFVSRD (FCTID (COPY_TO_REGCLASS $S, F8RC))))>;
+def : Pat<(i64 (lround f64:$S)),
+ (i64 (MFVSRD (FCTID (XSRDPI $S))))>;
+def : Pat<(i64 (lround f32:$S)),
+ (i64 (MFVSRD (FCTID (XSRDPI (COPY_TO_REGCLASS $S, VSFRC)))))>;
+def : Pat<(i64 (llround f64:$S)),
+ (i64 (MFVSRD (FCTID (XSRDPI $S))))>;
+def : Pat<(i64 (llround f32:$S)),
+ (i64 (MFVSRD (FCTID (XSRDPI (COPY_TO_REGCLASS $S, VSFRC)))))>;
+}
+
+let Predicates = [HasVSX] in {
+// Rounding for single precision.
+def : Pat<(f32 (fround f32:$S)),
+ (f32 (COPY_TO_REGCLASS (XSRDPI
+ (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
+def : Pat<(f32 (fnearbyint f32:$S)),
+ (f32 (COPY_TO_REGCLASS (XSRDPIC
+ (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
+def : Pat<(f32 (ffloor f32:$S)),
+ (f32 (COPY_TO_REGCLASS (XSRDPIM
+ (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
+def : Pat<(f32 (fceil f32:$S)),
+ (f32 (COPY_TO_REGCLASS (XSRDPIP
+ (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
+def : Pat<(f32 (ftrunc f32:$S)),
+ (f32 (COPY_TO_REGCLASS (XSRDPIZ
+ (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
}
// Materialize a zero-vector of long long
@@ -2502,7 +2588,7 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
// [PO VRT XO VRB XO RO], Round to Odd version of [PO VRT XO VRB XO /]
class X_VT5_XO5_VB5_Ro<bits<6> opcode, bits<5> xo2, bits<10> xo, string opc,
list<dag> pattern>
- : X_VT5_XO5_VB5<opcode, xo2, xo, opc, pattern>, isDOT;
+ : X_VT5_XO5_VB5<opcode, xo2, xo, opc, pattern>, isRecordForm;
// [PO VRT XO VRB XO /], but the VRB is only used the left 64 bits (or less),
// So we use different operand class for VRB
@@ -2520,7 +2606,7 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
// [PO VRT XO VRB XO RO], Round to Odd version of [PO VRT XO VRB XO /]
class X_VT5_XO5_VB5_VSFR_Ro<bits<6> opcode, bits<5> xo2, bits<10> xo, string opc,
list<dag> pattern>
- : X_VT5_XO5_VB5_VSFR<opcode, xo2, xo, opc, pattern>, isDOT;
+ : X_VT5_XO5_VB5_VSFR<opcode, xo2, xo, opc, pattern>, isRecordForm;
// [PO T XO B XO BX /]
class XX2_RT5_XO5_XB6<bits<6> opcode, bits<5> xo2, bits<9> xo, string opc,
@@ -2550,7 +2636,7 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
// [PO VRT VRA VRB XO RO], Round to Odd version of [PO VRT VRA VRB XO /]
class X_VT5_VA5_VB5_Ro<bits<6> opcode, bits<10> xo, string opc,
list<dag> pattern>
- : X_VT5_VA5_VB5<opcode, xo, opc, pattern>, isDOT;
+ : X_VT5_VA5_VB5<opcode, xo, opc, pattern>, isRecordForm;
// [PO VRT VRA VRB XO /]
class X_VT5_VA5_VB5_FMA<bits<6> opcode, bits<10> xo, string opc,
@@ -2562,7 +2648,7 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
// [PO VRT VRA VRB XO RO], Round to Odd version of [PO VRT VRA VRB XO /]
class X_VT5_VA5_VB5_FMA_Ro<bits<6> opcode, bits<10> xo, string opc,
list<dag> pattern>
- : X_VT5_VA5_VB5_FMA<opcode, xo, opc, pattern>, isDOT;
+ : X_VT5_VA5_VB5_FMA<opcode, xo, opc, pattern>, isRecordForm;
//===--------------------------------------------------------------------===//
// Quad-Precision Scalar Move Instructions:
@@ -2884,13 +2970,14 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
//===--------------------------------------------------------------------===//
// Maximum/Minimum Type-C/Type-J DP
- // XT.dword[1] = 0xUUUU_UUUU_UUUU_UUUU, so we use vsrc for XT
- def XSMAXCDP : XX3_XT5_XA5_XB5<60, 128, "xsmaxcdp", vsrc, vsfrc, vsfrc,
- IIC_VecFP, []>;
+ def XSMAXCDP : XX3_XT5_XA5_XB5<60, 128, "xsmaxcdp", vsfrc, vsfrc, vsfrc,
+ IIC_VecFP,
+ [(set f64:$XT, (PPCxsmaxc f64:$XA, f64:$XB))]>;
def XSMAXJDP : XX3_XT5_XA5_XB5<60, 144, "xsmaxjdp", vsrc, vsfrc, vsfrc,
IIC_VecFP, []>;
- def XSMINCDP : XX3_XT5_XA5_XB5<60, 136, "xsmincdp", vsrc, vsfrc, vsfrc,
- IIC_VecFP, []>;
+ def XSMINCDP : XX3_XT5_XA5_XB5<60, 136, "xsmincdp", vsfrc, vsfrc, vsfrc,
+ IIC_VecFP,
+ [(set f64:$XT, (PPCxsminc f64:$XA, f64:$XB))]>;
def XSMINJDP : XX3_XT5_XA5_XB5<60, 152, "xsminjdp", vsrc, vsfrc, vsfrc,
IIC_VecFP, []>;
@@ -2898,18 +2985,16 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
// Vector Byte-Reverse H/W/D/Q Word
def XXBRH : XX2_XT6_XO5_XB6<60, 7, 475, "xxbrh", vsrc, []>;
- def XXBRW : XX2_XT6_XO5_XB6<60, 15, 475, "xxbrw", vsrc, []>;
- def XXBRD : XX2_XT6_XO5_XB6<60, 23, 475, "xxbrd", vsrc, []>;
+ def XXBRW : XX2_XT6_XO5_XB6<60, 15, 475, "xxbrw", vsrc,
+ [(set v4i32:$XT, (bswap v4i32:$XB))]>;
+ def XXBRD : XX2_XT6_XO5_XB6<60, 23, 475, "xxbrd", vsrc,
+ [(set v2i64:$XT, (bswap v2i64:$XB))]>;
def XXBRQ : XX2_XT6_XO5_XB6<60, 31, 475, "xxbrq", vsrc, []>;
// Vector Reverse
- def : Pat<(v8i16 (PPCxxreverse v8i16 :$A)),
+ def : Pat<(v8i16 (bswap v8i16 :$A)),
(v8i16 (COPY_TO_REGCLASS (XXBRH (COPY_TO_REGCLASS $A, VSRC)), VRRC))>;
- def : Pat<(v4i32 (PPCxxreverse v4i32 :$A)),
- (v4i32 (XXBRW $A))>;
- def : Pat<(v2i64 (PPCxxreverse v2i64 :$A)),
- (v2i64 (XXBRD $A))>;
- def : Pat<(v1i128 (PPCxxreverse v1i128 :$A)),
+ def : Pat<(v1i128 (bswap v1i128 :$A)),
(v1i128 (COPY_TO_REGCLASS (XXBRQ (COPY_TO_REGCLASS $A, VSRC)), VRRC))>;
// Vector Permute
@@ -2927,7 +3012,7 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
// When adding new D-Form loads/stores, be sure to update the ImmToIdxMap in
// PPCRegisterInfo::PPCRegisterInfo and maybe save yourself some debugging.
- let mayLoad = 1, mayStore = 0 in {
+ let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in {
// Load Vector
def LXV : DQ_RD6_RS5_DQ12<61, 1, (outs vsrc:$XT), (ins memrix16:$src),
"lxv $XT, $src", IIC_LdStLFD, []>;
@@ -2972,7 +3057,7 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
// When adding new D-Form loads/stores, be sure to update the ImmToIdxMap in
// PPCRegisterInfo::PPCRegisterInfo and maybe save yourself some debugging.
- let mayStore = 1, mayLoad = 0 in {
+ let mayStore = 1, mayLoad = 0, hasSideEffects = 0 in {
// Store Vector
def STXV : DQ_RD6_RS5_DQ12<61, 5, (outs), (ins vsrc:$XT, memrix16:$dst),
"stxv $XT, $dst", IIC_LdStSTFD, []>;
@@ -3697,6 +3782,15 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
def : Pat<(f128 (fpextend f32:$src)),
(f128 (XSCVDPQP (COPY_TO_REGCLASS $src, VFRC)))>;
+ def : Pat<(f32 (PPCxsmaxc f32:$XA, f32:$XB)),
+ (f32 (COPY_TO_REGCLASS (XSMAXCDP (COPY_TO_REGCLASS $XA, VSSRC),
+ (COPY_TO_REGCLASS $XB, VSSRC)),
+ VSSRC))>;
+ def : Pat<(f32 (PPCxsminc f32:$XA, f32:$XB)),
+ (f32 (COPY_TO_REGCLASS (XSMINCDP (COPY_TO_REGCLASS $XA, VSSRC),
+ (COPY_TO_REGCLASS $XB, VSSRC)),
+ VSSRC))>;
+
} // end HasP9Vector, AddedComplexity
let AddedComplexity = 400 in {
@@ -3710,7 +3804,7 @@ let AddedComplexity = 400 in {
}
}
-let Predicates = [HasP9Vector] in {
+let Predicates = [HasP9Vector], hasSideEffects = 0 in {
let mayStore = 1 in {
def SPILLTOVSR_STX : PseudoXFormMemOp<(outs),
(ins spilltovsrrc:$XT, memrr:$dst),
@@ -3865,8 +3959,20 @@ def DblToULongLoad {
dag A = (i64 (PPCmfvsr (PPCfctiduz (f64 (load xoaddr:$A)))));
}
+// FP load dags (for f32 -> v4f32)
+def LoadFP {
+ dag A = (f32 (load xoaddr:$A));
+ dag B = (f32 (load xoaddr:$B));
+ dag C = (f32 (load xoaddr:$C));
+ dag D = (f32 (load xoaddr:$D));
+}
+
// FP merge dags (for f32 -> v4f32)
def MrgFP {
+ dag LD32A = (COPY_TO_REGCLASS (LIWZX xoaddr:$A), VSRC);
+ dag LD32B = (COPY_TO_REGCLASS (LIWZX xoaddr:$B), VSRC);
+ dag LD32C = (COPY_TO_REGCLASS (LIWZX xoaddr:$C), VSRC);
+ dag LD32D = (COPY_TO_REGCLASS (LIWZX xoaddr:$D), VSRC);
dag AC = (XVCVDPSP (XXPERMDI (COPY_TO_REGCLASS $A, VSRC),
(COPY_TO_REGCLASS $C, VSRC), 0));
dag BD = (XVCVDPSP (XXPERMDI (COPY_TO_REGCLASS $B, VSRC),
@@ -4022,7 +4128,18 @@ let AddedComplexity = 400 in {
(v2f64 (XXPERMDI
(COPY_TO_REGCLASS $A, VSRC),
(COPY_TO_REGCLASS $B, VSRC), 0))>;
-
+ // Using VMRGEW to assemble the final vector would be a lower latency
+ // solution. However, we choose to go with the slightly higher latency
+ // XXPERMDI for 2 reasons:
+ // 1. This is likely to occur in unrolled loops where regpressure is high,
+ // so we want to use the latter as it has access to all 64 VSX registers.
+ // 2. Using Altivec instructions in this sequence would likely cause the
+ // allocation of Altivec registers even for the loads which in turn would
+ // force the use of LXSIWZX for the loads, adding a cycle of latency to
+ // each of the loads which would otherwise be able to use LFIWZX.
+ def : Pat<(v4f32 (build_vector LoadFP.A, LoadFP.B, LoadFP.C, LoadFP.D)),
+ (v4f32 (XXPERMDI (XXMRGHW MrgFP.LD32A, MrgFP.LD32B),
+ (XXMRGHW MrgFP.LD32C, MrgFP.LD32D), 3))>;
def : Pat<(v4f32 (build_vector f32:$A, f32:$B, f32:$C, f32:$D)),
(VMRGEW MrgFP.AC, MrgFP.BD)>;
def : Pat<(v4f32 (build_vector DblToFlt.A0, DblToFlt.A1,
@@ -4089,7 +4206,18 @@ let AddedComplexity = 400 in {
(v2f64 (XXPERMDI
(COPY_TO_REGCLASS $B, VSRC),
(COPY_TO_REGCLASS $A, VSRC), 0))>;
-
+ // Using VMRGEW to assemble the final vector would be a lower latency
+ // solution. However, we choose to go with the slightly higher latency
+ // XXPERMDI for 2 reasons:
+ // 1. This is likely to occur in unrolled loops where regpressure is high,
+ // so we want to use the latter as it has access to all 64 VSX registers.
+ // 2. Using Altivec instructions in this sequence would likely cause the
+ // allocation of Altivec registers even for the loads which in turn would
+ // force the use of LXSIWZX for the loads, adding a cycle of latency to
+ // each of the loads which would otherwise be able to use LFIWZX.
+ def : Pat<(v4f32 (build_vector LoadFP.A, LoadFP.B, LoadFP.C, LoadFP.D)),
+ (v4f32 (XXPERMDI (XXMRGHW MrgFP.LD32D, MrgFP.LD32C),
+ (XXMRGHW MrgFP.LD32B, MrgFP.LD32A), 3))>;
def : Pat<(v4f32 (build_vector f32:$D, f32:$C, f32:$B, f32:$A)),
(VMRGEW MrgFP.AC, MrgFP.BD)>;
def : Pat<(v4f32 (build_vector DblToFlt.A0, DblToFlt.A1,
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp
index d252cfbd26b1..b761f337533b 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp
@@ -1,4 +1,4 @@
-//===------ PPCLoopPreIncPrep.cpp - Loop Pre-Inc. AM Prep. Pass -----------===//
+//===------ PPCLoopInstrFormPrep.cpp - Loop Instr Form Prep Pass ----------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,19 +6,42 @@
//
//===----------------------------------------------------------------------===//
//
-// This file implements a pass to prepare loops for pre-increment addressing
-// modes. Additional PHIs are created for loop induction variables used by
-// load/store instructions so that the pre-increment forms can be used.
-// Generically, this means transforming loops like this:
-// for (int i = 0; i < n; ++i)
-// array[i] = c;
-// to look like this:
-// T *p = array[-1];
-// for (int i = 0; i < n; ++i)
-// *++p = c;
+// This file implements a pass to prepare loops for ppc preferred addressing
+// modes, leveraging different instruction form. (eg: DS/DQ form, D/DS form with
+// update)
+// Additional PHIs are created for loop induction variables used by load/store
+// instructions so that preferred addressing modes can be used.
+//
+// 1: DS/DQ form preparation, prepare the load/store instructions so that they
+// can satisfy the DS/DQ form displacement requirements.
+// Generically, this means transforming loops like this:
+// for (int i = 0; i < n; ++i) {
+// unsigned long x1 = *(unsigned long *)(p + i + 5);
+// unsigned long x2 = *(unsigned long *)(p + i + 9);
+// }
+//
+// to look like this:
+//
+// unsigned NewP = p + 5;
+// for (int i = 0; i < n; ++i) {
+// unsigned long x1 = *(unsigned long *)(i + NewP);
+// unsigned long x2 = *(unsigned long *)(i + NewP + 4);
+// }
+//
+// 2: D/DS form with update preparation, prepare the load/store instructions so
+// that we can use update form to do pre-increment.
+// Generically, this means transforming loops like this:
+// for (int i = 0; i < n; ++i)
+// array[i] = c;
+//
+// to look like this:
+//
+// T *p = array[-1];
+// for (int i = 0; i < n; ++i)
+// *++p = c;
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "ppc-loop-preinc-prep"
+#define DEBUG_TYPE "ppc-loop-instr-form-prep"
#include "PPC.h"
#include "PPCSubtarget.h"
@@ -32,7 +55,6 @@
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Dominators.h"
@@ -42,6 +64,7 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
@@ -49,6 +72,7 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include <cassert>
#include <iterator>
@@ -56,13 +80,49 @@
using namespace llvm;
-// By default, we limit this to creating 16 PHIs (which is a little over half
-// of the allocatable register set).
-static cl::opt<unsigned> MaxVars("ppc-preinc-prep-max-vars",
+// By default, we limit this to creating 16 common bases out of loops per
+// function. 16 is a little over half of the allocatable register set.
+static cl::opt<unsigned> MaxVarsPrep("ppc-formprep-max-vars",
cl::Hidden, cl::init(16),
- cl::desc("Potential PHI threshold for PPC preinc loop prep"));
-
-STATISTIC(PHINodeAlreadyExists, "PHI node already in pre-increment form");
+ cl::desc("Potential common base number threshold per function for PPC loop "
+ "prep"));
+
+static cl::opt<bool> PreferUpdateForm("ppc-formprep-prefer-update",
+ cl::init(true), cl::Hidden,
+ cl::desc("prefer update form when ds form is also a update form"));
+
+// Sum of following 3 per loop thresholds for all loops can not be larger
+// than MaxVarsPrep.
+// By default, we limit this to creating 9 PHIs for one loop.
+// 9 and 3 for each kind prep are exterimental values on Power9.
+static cl::opt<unsigned> MaxVarsUpdateForm("ppc-preinc-prep-max-vars",
+ cl::Hidden, cl::init(3),
+ cl::desc("Potential PHI threshold per loop for PPC loop prep of update "
+ "form"));
+
+static cl::opt<unsigned> MaxVarsDSForm("ppc-dsprep-max-vars",
+ cl::Hidden, cl::init(3),
+ cl::desc("Potential PHI threshold per loop for PPC loop prep of DS form"));
+
+static cl::opt<unsigned> MaxVarsDQForm("ppc-dqprep-max-vars",
+ cl::Hidden, cl::init(3),
+ cl::desc("Potential PHI threshold per loop for PPC loop prep of DQ form"));
+
+
+// If would not be profitable if the common base has only one load/store, ISEL
+// should already be able to choose best load/store form based on offset for
+// single load/store. Set minimal profitable value default to 2 and make it as
+// an option.
+static cl::opt<unsigned> DispFormPrepMinThreshold("ppc-dispprep-min-threshold",
+ cl::Hidden, cl::init(2),
+ cl::desc("Minimal common base load/store instructions triggering DS/DQ form "
+ "preparation"));
+
+STATISTIC(PHINodeAlreadyExistsUpdate, "PHI node already in pre-increment form");
+STATISTIC(PHINodeAlreadyExistsDS, "PHI node already in DS form");
+STATISTIC(PHINodeAlreadyExistsDQ, "PHI node already in DQ form");
+STATISTIC(DSFormChainRewritten, "Num of DS form chain rewritten");
+STATISTIC(DQFormChainRewritten, "Num of DQ form chain rewritten");
STATISTIC(UpdFormChainRewritten, "Num of update form chain rewritten");
namespace {
@@ -82,16 +142,22 @@ namespace {
SmallVector<BucketElement, 16> Elements;
};
- class PPCLoopPreIncPrep : public FunctionPass {
+ // "UpdateForm" is not a real PPC instruction form, it stands for dform
+ // load/store with update like ldu/stdu, or Prefetch intrinsic.
+ // For DS form instructions, their displacements must be multiple of 4.
+ // For DQ form instructions, their displacements must be multiple of 16.
+ enum InstrForm { UpdateForm = 1, DSForm = 4, DQForm = 16 };
+
+ class PPCLoopInstrFormPrep : public FunctionPass {
public:
static char ID; // Pass ID, replacement for typeid
- PPCLoopPreIncPrep() : FunctionPass(ID) {
- initializePPCLoopPreIncPrepPass(*PassRegistry::getPassRegistry());
+ PPCLoopInstrFormPrep() : FunctionPass(ID) {
+ initializePPCLoopInstrFormPrepPass(*PassRegistry::getPassRegistry());
}
- PPCLoopPreIncPrep(PPCTargetMachine &TM) : FunctionPass(ID), TM(&TM) {
- initializePPCLoopPreIncPrepPass(*PassRegistry::getPassRegistry());
+ PPCLoopInstrFormPrep(PPCTargetMachine &TM) : FunctionPass(ID), TM(&TM) {
+ initializePPCLoopInstrFormPrepPass(*PassRegistry::getPassRegistry());
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -111,12 +177,19 @@ namespace {
ScalarEvolution *SE;
bool PreserveLCSSA;
+ /// Successful preparation number for Update/DS/DQ form in all inner most
+ /// loops. One successful preparation will put one common base out of loop,
+ /// this may leads to register presure like LICM does.
+ /// Make sure total preparation number can be controlled by option.
+ unsigned SuccPrepCount;
+
bool runOnLoop(Loop *L);
/// Check if required PHI node is already exist in Loop \p L.
bool alreadyPrepared(Loop *L, Instruction* MemI,
const SCEV *BasePtrStartSCEV,
- const SCEVConstant *BasePtrIncSCEV);
+ const SCEVConstant *BasePtrIncSCEV,
+ InstrForm Form);
/// Collect condition matched(\p isValidCandidate() returns true)
/// candidates in Loop \p L.
@@ -134,32 +207,50 @@ namespace {
/// Prepare all candidates in \p Buckets for update form.
bool updateFormPrep(Loop *L, SmallVector<Bucket, 16> &Buckets);
+ /// Prepare all candidates in \p Buckets for displacement form, now for
+ /// ds/dq.
+ bool dispFormPrep(Loop *L, SmallVector<Bucket, 16> &Buckets,
+ InstrForm Form);
+
/// Prepare for one chain \p BucketChain, find the best base element and
/// update all other elements in \p BucketChain accordingly.
+ /// \p Form is used to find the best base element.
+ /// If success, best base element must be stored as the first element of
+ /// \p BucketChain.
+ /// Return false if no base element found, otherwise return true.
+ bool prepareBaseForDispFormChain(Bucket &BucketChain,
+ InstrForm Form);
+
+ /// Prepare for one chain \p BucketChain, find the best base element and
+ /// update all other elements in \p BucketChain accordingly.
+ /// If success, best base element must be stored as the first element of
+ /// \p BucketChain.
+ /// Return false if no base element found, otherwise return true.
bool prepareBaseForUpdateFormChain(Bucket &BucketChain);
/// Rewrite load/store instructions in \p BucketChain according to
/// preparation.
bool rewriteLoadStores(Loop *L, Bucket &BucketChain,
- SmallSet<BasicBlock *, 16> &BBChanged);
+ SmallSet<BasicBlock *, 16> &BBChanged,
+ InstrForm Form);
};
} // end anonymous namespace
-char PPCLoopPreIncPrep::ID = 0;
-static const char *name = "Prepare loop for pre-inc. addressing modes";
-INITIALIZE_PASS_BEGIN(PPCLoopPreIncPrep, DEBUG_TYPE, name, false, false)
+char PPCLoopInstrFormPrep::ID = 0;
+static const char *name = "Prepare loop for ppc preferred instruction forms";
+INITIALIZE_PASS_BEGIN(PPCLoopInstrFormPrep, DEBUG_TYPE, name, false, false)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
-INITIALIZE_PASS_END(PPCLoopPreIncPrep, DEBUG_TYPE, name, false, false)
+INITIALIZE_PASS_END(PPCLoopInstrFormPrep, DEBUG_TYPE, name, false, false)
static const std::string PHINodeNameSuffix = ".phi";
static const std::string CastNodeNameSuffix = ".cast";
static const std::string GEPNodeIncNameSuffix = ".inc";
static const std::string GEPNodeOffNameSuffix = ".off";
-FunctionPass *llvm::createPPCLoopPreIncPrepPass(PPCTargetMachine &TM) {
- return new PPCLoopPreIncPrep(TM);
+FunctionPass *llvm::createPPCLoopInstrFormPrepPass(PPCTargetMachine &TM) {
+ return new PPCLoopInstrFormPrep(TM);
}
static bool IsPtrInBounds(Value *BasePtr) {
@@ -193,7 +284,7 @@ static Value *GetPointerOperand(Value *MemI) {
return nullptr;
}
-bool PPCLoopPreIncPrep::runOnFunction(Function &F) {
+bool PPCLoopInstrFormPrep::runOnFunction(Function &F) {
if (skipFunction(F))
return false;
@@ -203,6 +294,7 @@ bool PPCLoopPreIncPrep::runOnFunction(Function &F) {
DT = DTWP ? &DTWP->getDomTree() : nullptr;
PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
ST = TM ? TM->getSubtargetImpl(F) : nullptr;
+ SuccPrepCount = 0;
bool MadeChange = false;
@@ -213,7 +305,7 @@ bool PPCLoopPreIncPrep::runOnFunction(Function &F) {
return MadeChange;
}
-void PPCLoopPreIncPrep::addOneCandidate(Instruction *MemI, const SCEV *LSCEV,
+void PPCLoopInstrFormPrep::addOneCandidate(Instruction *MemI, const SCEV *LSCEV,
SmallVector<Bucket, 16> &Buckets,
unsigned MaxCandidateNum) {
assert((MemI && GetPointerOperand(MemI)) &&
@@ -236,7 +328,7 @@ void PPCLoopPreIncPrep::addOneCandidate(Instruction *MemI, const SCEV *LSCEV,
}
}
-SmallVector<Bucket, 16> PPCLoopPreIncPrep::collectCandidates(
+SmallVector<Bucket, 16> PPCLoopInstrFormPrep::collectCandidates(
Loop *L,
std::function<bool(const Instruction *, const Value *)> isValidCandidate,
unsigned MaxCandidateNum) {
@@ -277,13 +369,82 @@ SmallVector<Bucket, 16> PPCLoopPreIncPrep::collectCandidates(
return Buckets;
}
-// TODO: implement a more clever base choosing policy.
+bool PPCLoopInstrFormPrep::prepareBaseForDispFormChain(Bucket &BucketChain,
+ InstrForm Form) {
+ // RemainderOffsetInfo details:
+ // key: value of (Offset urem DispConstraint). For DSForm, it can
+ // be [0, 4).
+ // first of pair: the index of first BucketElement whose remainder is equal
+ // to key. For key 0, this value must be 0.
+ // second of pair: number of load/stores with the same remainder.
+ DenseMap<unsigned, std::pair<unsigned, unsigned>> RemainderOffsetInfo;
+
+ for (unsigned j = 0, je = BucketChain.Elements.size(); j != je; ++j) {
+ if (!BucketChain.Elements[j].Offset)
+ RemainderOffsetInfo[0] = std::make_pair(0, 1);
+ else {
+ unsigned Remainder =
+ BucketChain.Elements[j].Offset->getAPInt().urem(Form);
+ if (RemainderOffsetInfo.find(Remainder) == RemainderOffsetInfo.end())
+ RemainderOffsetInfo[Remainder] = std::make_pair(j, 1);
+ else
+ RemainderOffsetInfo[Remainder].second++;
+ }
+ }
+ // Currently we choose the most profitable base as the one which has the max
+ // number of load/store with same remainder.
+ // FIXME: adjust the base selection strategy according to load/store offset
+ // distribution.
+ // For example, if we have one candidate chain for DS form preparation, which
+ // contains following load/stores with different remainders:
+ // 1: 10 load/store whose remainder is 1;
+ // 2: 9 load/store whose remainder is 2;
+ // 3: 1 for remainder 3 and 0 for remainder 0;
+ // Now we will choose the first load/store whose remainder is 1 as base and
+ // adjust all other load/stores according to new base, so we will get 10 DS
+ // form and 10 X form.
+ // But we should be more clever, for this case we could use two bases, one for
+ // remainder 1 and the other for remainder 2, thus we could get 19 DS form and 1
+ // X form.
+ unsigned MaxCountRemainder = 0;
+ for (unsigned j = 0; j < (unsigned)Form; j++)
+ if ((RemainderOffsetInfo.find(j) != RemainderOffsetInfo.end()) &&
+ RemainderOffsetInfo[j].second >
+ RemainderOffsetInfo[MaxCountRemainder].second)
+ MaxCountRemainder = j;
+
+ // Abort when there are too few insts with common base.
+ if (RemainderOffsetInfo[MaxCountRemainder].second < DispFormPrepMinThreshold)
+ return false;
+
+ // If the first value is most profitable, no needed to adjust BucketChain
+ // elements as they are substracted the first value when collecting.
+ if (MaxCountRemainder == 0)
+ return true;
+
+ // Adjust load/store to the new chosen base.
+ const SCEV *Offset =
+ BucketChain.Elements[RemainderOffsetInfo[MaxCountRemainder].first].Offset;
+ BucketChain.BaseSCEV = SE->getAddExpr(BucketChain.BaseSCEV, Offset);
+ for (auto &E : BucketChain.Elements) {
+ if (E.Offset)
+ E.Offset = cast<SCEVConstant>(SE->getMinusSCEV(E.Offset, Offset));
+ else
+ E.Offset = cast<SCEVConstant>(SE->getNegativeSCEV(Offset));
+ }
+
+ std::swap(BucketChain.Elements[RemainderOffsetInfo[MaxCountRemainder].first],
+ BucketChain.Elements[0]);
+ return true;
+}
+
+// FIXME: implement a more clever base choosing policy.
// Currently we always choose an exist load/store offset. This maybe lead to
// suboptimal code sequences. For example, for one DS chain with offsets
// {-32769, 2003, 2007, 2011}, we choose -32769 as base offset, and left disp
// for load/stores are {0, 34772, 34776, 34780}. Though each offset now is a
// multipler of 4, it cannot be represented by sint16.
-bool PPCLoopPreIncPrep::prepareBaseForUpdateFormChain(Bucket &BucketChain) {
+bool PPCLoopInstrFormPrep::prepareBaseForUpdateFormChain(Bucket &BucketChain) {
// We have a choice now of which instruction's memory operand we use as the
// base for the generated PHI. Always picking the first instruction in each
// bucket does not work well, specifically because that instruction might
@@ -323,8 +484,9 @@ bool PPCLoopPreIncPrep::prepareBaseForUpdateFormChain(Bucket &BucketChain) {
return true;
}
-bool PPCLoopPreIncPrep::rewriteLoadStores(
- Loop *L, Bucket &BucketChain, SmallSet<BasicBlock *, 16> &BBChanged) {
+bool PPCLoopInstrFormPrep::rewriteLoadStores(Loop *L, Bucket &BucketChain,
+ SmallSet<BasicBlock *, 16> &BBChanged,
+ InstrForm Form) {
bool MadeChange = false;
const SCEVAddRecExpr *BasePtrSCEV =
cast<SCEVAddRecExpr>(BucketChain.BaseSCEV);
@@ -345,19 +507,30 @@ bool PPCLoopPreIncPrep::rewriteLoadStores(
Type *I8PtrTy = Type::getInt8PtrTy(MemI->getParent()->getContext(),
BasePtr->getType()->getPointerAddressSpace());
- const SCEV *BasePtrStartSCEV = BasePtrSCEV->getStart();
- if (!SE->isLoopInvariant(BasePtrStartSCEV, L))
+ if (!SE->isLoopInvariant(BasePtrSCEV->getStart(), L))
return MadeChange;
const SCEVConstant *BasePtrIncSCEV =
dyn_cast<SCEVConstant>(BasePtrSCEV->getStepRecurrence(*SE));
if (!BasePtrIncSCEV)
return MadeChange;
- BasePtrStartSCEV = SE->getMinusSCEV(BasePtrStartSCEV, BasePtrIncSCEV);
+
+ // For some DS form load/store instructions, it can also be an update form,
+ // if the stride is a multipler of 4. Use update form if prefer it.
+ bool CanPreInc = (Form == UpdateForm ||
+ ((Form == DSForm) && !BasePtrIncSCEV->getAPInt().urem(4) &&
+ PreferUpdateForm));
+ const SCEV *BasePtrStartSCEV = nullptr;
+ if (CanPreInc)
+ BasePtrStartSCEV =
+ SE->getMinusSCEV(BasePtrSCEV->getStart(), BasePtrIncSCEV);
+ else
+ BasePtrStartSCEV = BasePtrSCEV->getStart();
+
if (!isSafeToExpand(BasePtrStartSCEV, *SE))
return MadeChange;
- if (alreadyPrepared(L, MemI, BasePtrStartSCEV, BasePtrIncSCEV))
+ if (alreadyPrepared(L, MemI, BasePtrStartSCEV, BasePtrIncSCEV, Form))
return MadeChange;
LLVM_DEBUG(dbgs() << "PIP: New start is: " << *BasePtrStartSCEV << "\n");
@@ -377,31 +550,61 @@ bool PPCLoopPreIncPrep::rewriteLoadStores(
// Note that LoopPredecessor might occur in the predecessor list multiple
// times, and we need to add it the right number of times.
- for (const auto &PI : predecessors(Header)) {
+ for (auto PI : predecessors(Header)) {
if (PI != LoopPredecessor)
continue;
NewPHI->addIncoming(BasePtrStart, LoopPredecessor);
}
- Instruction *InsPoint = &*Header->getFirstInsertionPt();
- GetElementPtrInst *PtrInc = GetElementPtrInst::Create(
- I8Ty, NewPHI, BasePtrIncSCEV->getValue(),
- getInstrName(MemI, GEPNodeIncNameSuffix), InsPoint);
- PtrInc->setIsInBounds(IsPtrInBounds(BasePtr));
- for (const auto &PI : predecessors(Header)) {
- if (PI == LoopPredecessor)
- continue;
+ Instruction *PtrInc = nullptr;
+ Instruction *NewBasePtr = nullptr;
+ if (CanPreInc) {
+ Instruction *InsPoint = &*Header->getFirstInsertionPt();
+ PtrInc = GetElementPtrInst::Create(
+ I8Ty, NewPHI, BasePtrIncSCEV->getValue(),
+ getInstrName(MemI, GEPNodeIncNameSuffix), InsPoint);
+ cast<GetElementPtrInst>(PtrInc)->setIsInBounds(IsPtrInBounds(BasePtr));
+ for (auto PI : predecessors(Header)) {
+ if (PI == LoopPredecessor)
+ continue;
- NewPHI->addIncoming(PtrInc, PI);
- }
+ NewPHI->addIncoming(PtrInc, PI);
+ }
+ if (PtrInc->getType() != BasePtr->getType())
+ NewBasePtr = new BitCastInst(
+ PtrInc, BasePtr->getType(),
+ getInstrName(PtrInc, CastNodeNameSuffix), InsPoint);
+ else
+ NewBasePtr = PtrInc;
+ } else {
+ // Note that LoopPredecessor might occur in the predecessor list multiple
+ // times, and we need to make sure no more incoming value for them in PHI.
+ for (auto PI : predecessors(Header)) {
+ if (PI == LoopPredecessor)
+ continue;
- Instruction *NewBasePtr;
- if (PtrInc->getType() != BasePtr->getType())
- NewBasePtr = new BitCastInst(PtrInc, BasePtr->getType(),
- getInstrName(PtrInc, CastNodeNameSuffix), InsPoint);
- else
- NewBasePtr = PtrInc;
+ // For the latch predecessor, we need to insert a GEP just before the
+ // terminator to increase the address.
+ BasicBlock *BB = PI;
+ Instruction *InsPoint = BB->getTerminator();
+ PtrInc = GetElementPtrInst::Create(
+ I8Ty, NewPHI, BasePtrIncSCEV->getValue(),
+ getInstrName(MemI, GEPNodeIncNameSuffix), InsPoint);
+
+ cast<GetElementPtrInst>(PtrInc)->setIsInBounds(IsPtrInBounds(BasePtr));
+
+ NewPHI->addIncoming(PtrInc, PI);
+ }
+ PtrInc = NewPHI;
+ if (NewPHI->getType() != BasePtr->getType())
+ NewBasePtr =
+ new BitCastInst(NewPHI, BasePtr->getType(),
+ getInstrName(NewPHI, CastNodeNameSuffix),
+ &*Header->getFirstInsertionPt());
+ else
+ NewBasePtr = NewPHI;
+ }
if (Instruction *IDel = dyn_cast<Instruction>(BasePtr))
BBChanged.insert(IDel->getParent());
@@ -460,12 +663,20 @@ bool PPCLoopPreIncPrep::rewriteLoadStores(
}
MadeChange = true;
- UpdFormChainRewritten++;
+
+ SuccPrepCount++;
+
+ if (Form == DSForm && !CanPreInc)
+ DSFormChainRewritten++;
+ else if (Form == DQForm)
+ DQFormChainRewritten++;
+ else if (Form == UpdateForm || (Form == DSForm && CanPreInc))
+ UpdFormChainRewritten++;
return MadeChange;
}
-bool PPCLoopPreIncPrep::updateFormPrep(Loop *L,
+bool PPCLoopInstrFormPrep::updateFormPrep(Loop *L,
SmallVector<Bucket, 16> &Buckets) {
bool MadeChange = false;
if (Buckets.empty())
@@ -475,7 +686,30 @@ bool PPCLoopPreIncPrep::updateFormPrep(Loop *L,
// The base address of each bucket is transformed into a phi and the others
// are rewritten based on new base.
if (prepareBaseForUpdateFormChain(Bucket))
- MadeChange |= rewriteLoadStores(L, Bucket, BBChanged);
+ MadeChange |= rewriteLoadStores(L, Bucket, BBChanged, UpdateForm);
+
+ if (MadeChange)
+ for (auto &BB : L->blocks())
+ if (BBChanged.count(BB))
+ DeleteDeadPHIs(BB);
+ return MadeChange;
+}
+
+bool PPCLoopInstrFormPrep::dispFormPrep(Loop *L, SmallVector<Bucket, 16> &Buckets,
+ InstrForm Form) {
+ bool MadeChange = false;
+
+ if (Buckets.empty())
+ return MadeChange;
+
+ SmallSet<BasicBlock *, 16> BBChanged;
+ for (auto &Bucket : Buckets) {
+ if (Bucket.Elements.size() < DispFormPrepMinThreshold)
+ continue;
+ if (prepareBaseForDispFormChain(Bucket, Form))
+ MadeChange |= rewriteLoadStores(L, Bucket, BBChanged, Form);
+ }
+
if (MadeChange)
for (auto &BB : L->blocks())
if (BBChanged.count(BB))
@@ -483,13 +717,14 @@ bool PPCLoopPreIncPrep::updateFormPrep(Loop *L,
return MadeChange;
}
-// In order to prepare for the pre-increment a PHI is added.
+// In order to prepare for the preferred instruction form, a PHI is added.
// This function will check to see if that PHI already exists and will return
-// true if it found an existing PHI with the same start and increment as the
+// true if it found an existing PHI with the matched start and increment as the
// one we wanted to create.
-bool PPCLoopPreIncPrep::alreadyPrepared(Loop *L, Instruction* MemI,
+bool PPCLoopInstrFormPrep::alreadyPrepared(Loop *L, Instruction* MemI,
const SCEV *BasePtrStartSCEV,
- const SCEVConstant *BasePtrIncSCEV) {
+ const SCEVConstant *BasePtrIncSCEV,
+ InstrForm Form) {
BasicBlock *BB = MemI->getParent();
if (!BB)
return false;
@@ -526,12 +761,25 @@ bool PPCLoopPreIncPrep::alreadyPrepared(Loop *L, Instruction* MemI,
CurrentPHINode->getIncomingBlock(1) == PredBB) ||
(CurrentPHINode->getIncomingBlock(1) == LatchBB &&
CurrentPHINode->getIncomingBlock(0) == PredBB)) {
- if (PHIBasePtrSCEV->getStart() == BasePtrStartSCEV &&
- PHIBasePtrIncSCEV == BasePtrIncSCEV) {
+ if (PHIBasePtrIncSCEV == BasePtrIncSCEV) {
// The existing PHI (CurrentPHINode) has the same start and increment
- // as the PHI that we wanted to create.
- ++PHINodeAlreadyExists;
- return true;
+ // as the PHI that we wanted to create.
+ if (Form == UpdateForm &&
+ PHIBasePtrSCEV->getStart() == BasePtrStartSCEV) {
+ ++PHINodeAlreadyExistsUpdate;
+ return true;
+ }
+ if (Form == DSForm || Form == DQForm) {
+ const SCEVConstant *Diff = dyn_cast<SCEVConstant>(
+ SE->getMinusSCEV(PHIBasePtrSCEV->getStart(), BasePtrStartSCEV));
+ if (Diff && !Diff->getAPInt().urem(Form)) {
+ if (Form == DSForm)
+ ++PHINodeAlreadyExistsDS;
+ else
+ ++PHINodeAlreadyExistsDQ;
+ return true;
+ }
+ }
}
}
}
@@ -539,13 +787,17 @@ bool PPCLoopPreIncPrep::alreadyPrepared(Loop *L, Instruction* MemI,
return false;
}
-bool PPCLoopPreIncPrep::runOnLoop(Loop *L) {
+bool PPCLoopInstrFormPrep::runOnLoop(Loop *L) {
bool MadeChange = false;
// Only prep. the inner-most loop
if (!L->empty())
return MadeChange;
+ // Return if already done enough preparation.
+ if (SuccPrepCount >= MaxVarsPrep)
+ return MadeChange;
+
LLVM_DEBUG(dbgs() << "PIP: Examining: " << *L << "\n");
BasicBlock *LoopPredecessor = L->getLoopPredecessor();
@@ -562,7 +814,6 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) {
LLVM_DEBUG(dbgs() << "PIP fails since no predecessor for current loop.\n");
return MadeChange;
}
-
// Check if a load/store has update form. This lambda is used by function
// collectCandidates which can collect candidates for types defined by lambda.
auto isUpdateFormCandidate = [&] (const Instruction *I,
@@ -591,15 +842,53 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) {
}
return true;
};
+
+ // Check if a load/store has DS form.
+ auto isDSFormCandidate = [] (const Instruction *I, const Value *PtrValue) {
+ assert((PtrValue && I) && "Invalid parameter!");
+ if (isa<IntrinsicInst>(I))
+ return false;
+ Type *PointerElementType = PtrValue->getType()->getPointerElementType();
+ return (PointerElementType->isIntegerTy(64)) ||
+ (PointerElementType->isFloatTy()) ||
+ (PointerElementType->isDoubleTy()) ||
+ (PointerElementType->isIntegerTy(32) &&
+ llvm::any_of(I->users(),
+ [](const User *U) { return isa<SExtInst>(U); }));
+ };
+
+ // Check if a load/store has DQ form.
+ auto isDQFormCandidate = [&] (const Instruction *I, const Value *PtrValue) {
+ assert((PtrValue && I) && "Invalid parameter!");
+ return !isa<IntrinsicInst>(I) && ST && ST->hasP9Vector() &&
+ (PtrValue->getType()->getPointerElementType()->isVectorTy());
+ };
- // Collect buckets of comparable addresses used by loads, stores and prefetch
// intrinsic for update form.
SmallVector<Bucket, 16> UpdateFormBuckets =
- collectCandidates(L, isUpdateFormCandidate, MaxVars);
+ collectCandidates(L, isUpdateFormCandidate, MaxVarsUpdateForm);
// Prepare for update form.
if (!UpdateFormBuckets.empty())
MadeChange |= updateFormPrep(L, UpdateFormBuckets);
+ // Collect buckets of comparable addresses used by loads and stores for DS
+ // form.
+ SmallVector<Bucket, 16> DSFormBuckets =
+ collectCandidates(L, isDSFormCandidate, MaxVarsDSForm);
+
+ // Prepare for DS form.
+ if (!DSFormBuckets.empty())
+ MadeChange |= dispFormPrep(L, DSFormBuckets, DSForm);
+
+ // Collect buckets of comparable addresses used by loads and stores for DQ
+ // form.
+ SmallVector<Bucket, 16> DQFormBuckets =
+ collectCandidates(L, isDQFormCandidate, MaxVarsDQForm);
+
+ // Prepare for DQ form.
+ if (!DQFormBuckets.empty())
+ MadeChange |= dispFormPrep(L, DQFormBuckets, DQForm);
+
return MadeChange;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCLowerMASSVEntries.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCLowerMASSVEntries.cpp
new file mode 100644
index 000000000000..83cca11b27a3
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCLowerMASSVEntries.cpp
@@ -0,0 +1,164 @@
+//===-- PPCLowerMASSVEntries.cpp ------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements lowering of MASSV (SIMD) entries for specific PowerPC
+// subtargets.
+// Following is an example of a conversion specific to Power9 subtarget:
+// __sind2_massv ---> __sind2_P9
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPC.h"
+#include "PPCSubtarget.h"
+#include "PPCTargetMachine.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+
+#define DEBUG_TYPE "ppc-lower-massv-entries"
+
+using namespace llvm;
+
+namespace {
+
+// Length of the suffix "massv", which is specific to IBM MASSV library entries.
+const unsigned MASSVSuffixLength = 5;
+
+static StringRef MASSVFuncs[] = {
+#define TLI_DEFINE_MASSV_VECFUNCS_NAMES
+#include "llvm/Analysis/VecFuncs.def"
+};
+
+class PPCLowerMASSVEntries : public ModulePass {
+public:
+ static char ID;
+
+ PPCLowerMASSVEntries() : ModulePass(ID) {}
+
+ bool runOnModule(Module &M) override;
+
+ StringRef getPassName() const override { return "PPC Lower MASS Entries"; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<TargetTransformInfoWrapperPass>();
+ }
+
+private:
+ static bool isMASSVFunc(StringRef Name);
+ static StringRef getCPUSuffix(const PPCSubtarget *Subtarget);
+ static std::string createMASSVFuncName(Function &Func,
+ const PPCSubtarget *Subtarget);
+ bool lowerMASSVCall(CallInst *CI, Function &Func, Module &M,
+ const PPCSubtarget *Subtarget);
+};
+
+} // namespace
+
+/// Checks if the specified function name represents an entry in the MASSV
+/// library.
+bool PPCLowerMASSVEntries::isMASSVFunc(StringRef Name) {
+ auto Iter = std::find(std::begin(MASSVFuncs), std::end(MASSVFuncs), Name);
+ return Iter != std::end(MASSVFuncs);
+}
+
+// FIXME:
+/// Returns a string corresponding to the specified PowerPC subtarget. e.g.:
+/// "P8" for Power8, "P9" for Power9. The string is used as a suffix while
+/// generating subtarget-specific MASSV library functions. Current support
+/// includes Power8 and Power9 subtargets.
+StringRef PPCLowerMASSVEntries::getCPUSuffix(const PPCSubtarget *Subtarget) {
+ // Assume Power8 when Subtarget is unavailable.
+ if (!Subtarget)
+ return "P8";
+ if (Subtarget->hasP9Vector())
+ return "P9";
+ if (Subtarget->hasP8Vector())
+ return "P8";
+
+ report_fatal_error("Unsupported Subtarget: MASSV is supported only on "
+ "Power8 and Power9 subtargets.");
+}
+
+/// Creates PowerPC subtarget-specific name corresponding to the specified
+/// generic MASSV function, and the PowerPC subtarget.
+std::string
+PPCLowerMASSVEntries::createMASSVFuncName(Function &Func,
+ const PPCSubtarget *Subtarget) {
+ StringRef Suffix = getCPUSuffix(Subtarget);
+ auto GenericName = Func.getName().drop_back(MASSVSuffixLength).str();
+ std::string MASSVEntryName = GenericName + Suffix.str();
+ return MASSVEntryName;
+}
+
+/// Lowers generic MASSV entries to PowerPC subtarget-specific MASSV entries.
+/// e.g.: __sind2_massv --> __sind2_P9 for a Power9 subtarget.
+/// Both function prototypes and their callsites are updated during lowering.
+bool PPCLowerMASSVEntries::lowerMASSVCall(CallInst *CI, Function &Func,
+ Module &M,
+ const PPCSubtarget *Subtarget) {
+ if (CI->use_empty())
+ return false;
+
+ std::string MASSVEntryName = createMASSVFuncName(Func, Subtarget);
+ FunctionCallee FCache = M.getOrInsertFunction(
+ MASSVEntryName, Func.getFunctionType(), Func.getAttributes());
+
+ CallSite CS(CI);
+ CI->setCalledFunction(FCache);
+
+ return true;
+}
+
+bool PPCLowerMASSVEntries::runOnModule(Module &M) {
+ bool Changed = false;
+
+ auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
+ if (!TPC)
+ return Changed;
+
+ auto &TM = TPC->getTM<PPCTargetMachine>();
+ const PPCSubtarget *Subtarget;
+
+ for (Function &Func : M) {
+ if (!Func.isDeclaration())
+ continue;
+
+ if (!isMASSVFunc(Func.getName()))
+ continue;
+
+ // Call to lowerMASSVCall() invalidates the iterator over users upon
+ // replacing the users. Precomputing the current list of users allows us to
+ // replace all the call sites.
+ SmallVector<User *, 4> MASSVUsers;
+ for (auto *User: Func.users())
+ MASSVUsers.push_back(User);
+
+ for (auto *User : MASSVUsers) {
+ auto *CI = dyn_cast<CallInst>(User);
+ if (!CI)
+ continue;
+
+ Subtarget = &TM.getSubtarget<PPCSubtarget>(*CI->getParent()->getParent());
+ Changed |= lowerMASSVCall(CI, Func, M, Subtarget);
+ }
+ }
+
+ return Changed;
+}
+
+char PPCLowerMASSVEntries::ID = 0;
+
+char &llvm::PPCLowerMASSVEntriesID = PPCLowerMASSVEntries::ID;
+
+INITIALIZE_PASS(PPCLowerMASSVEntries, DEBUG_TYPE, "Lower MASSV entries", false,
+ false)
+
+ModulePass *llvm::createPPCLowerMASSVEntriesPass() {
+ return new PPCLowerMASSVEntries();
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
index ac8ac060f460..74192cb20cd0 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
@@ -18,6 +18,8 @@
//
//===---------------------------------------------------------------------===//
+#include "MCTargetDesc/PPCMCTargetDesc.h"
+#include "MCTargetDesc/PPCPredicates.h"
#include "PPC.h"
#include "PPCInstrBuilder.h"
#include "PPCInstrInfo.h"
@@ -26,12 +28,12 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachinePostDominators.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachinePostDominators.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/Debug.h"
-#include "MCTargetDesc/PPCPredicates.h"
using namespace llvm;
@@ -160,33 +162,33 @@ static MachineInstr *getVRegDefOrNull(MachineOperand *Op,
static unsigned
getKnownLeadingZeroCount(MachineInstr *MI, const PPCInstrInfo *TII) {
unsigned Opcode = MI->getOpcode();
- if (Opcode == PPC::RLDICL || Opcode == PPC::RLDICLo ||
- Opcode == PPC::RLDCL || Opcode == PPC::RLDCLo)
+ if (Opcode == PPC::RLDICL || Opcode == PPC::RLDICL_rec ||
+ Opcode == PPC::RLDCL || Opcode == PPC::RLDCL_rec)
return MI->getOperand(3).getImm();
- if ((Opcode == PPC::RLDIC || Opcode == PPC::RLDICo) &&
- MI->getOperand(3).getImm() <= 63 - MI->getOperand(2).getImm())
+ if ((Opcode == PPC::RLDIC || Opcode == PPC::RLDIC_rec) &&
+ MI->getOperand(3).getImm() <= 63 - MI->getOperand(2).getImm())
return MI->getOperand(3).getImm();
- if ((Opcode == PPC::RLWINM || Opcode == PPC::RLWINMo ||
- Opcode == PPC::RLWNM || Opcode == PPC::RLWNMo ||
+ if ((Opcode == PPC::RLWINM || Opcode == PPC::RLWINM_rec ||
+ Opcode == PPC::RLWNM || Opcode == PPC::RLWNM_rec ||
Opcode == PPC::RLWINM8 || Opcode == PPC::RLWNM8) &&
- MI->getOperand(3).getImm() <= MI->getOperand(4).getImm())
+ MI->getOperand(3).getImm() <= MI->getOperand(4).getImm())
return 32 + MI->getOperand(3).getImm();
- if (Opcode == PPC::ANDIo) {
+ if (Opcode == PPC::ANDI_rec) {
uint16_t Imm = MI->getOperand(2).getImm();
return 48 + countLeadingZeros(Imm);
}
- if (Opcode == PPC::CNTLZW || Opcode == PPC::CNTLZWo ||
- Opcode == PPC::CNTTZW || Opcode == PPC::CNTTZWo ||
+ if (Opcode == PPC::CNTLZW || Opcode == PPC::CNTLZW_rec ||
+ Opcode == PPC::CNTTZW || Opcode == PPC::CNTTZW_rec ||
Opcode == PPC::CNTLZW8 || Opcode == PPC::CNTTZW8)
// The result ranges from 0 to 32.
return 58;
- if (Opcode == PPC::CNTLZD || Opcode == PPC::CNTLZDo ||
- Opcode == PPC::CNTTZD || Opcode == PPC::CNTTZDo)
+ if (Opcode == PPC::CNTLZD || Opcode == PPC::CNTLZD_rec ||
+ Opcode == PPC::CNTTZD || Opcode == PPC::CNTTZD_rec)
// The result ranges from 0 to 64.
return 57;
@@ -331,108 +333,121 @@ bool PPCMIPeephole::simplifyCode(void) {
// is identified by an immediate value of 0 or 3.
int Immed = MI.getOperand(3).getImm();
- if (Immed != 1) {
-
- // For each of these simplifications, we need the two source
- // regs to match. Unfortunately, MachineCSE ignores COPY and
- // SUBREG_TO_REG, so for example we can see
- // XXPERMDI t, SUBREG_TO_REG(s), SUBREG_TO_REG(s), immed.
- // We have to look through chains of COPY and SUBREG_TO_REG
- // to find the real source values for comparison.
- unsigned TrueReg1 =
- TRI->lookThruCopyLike(MI.getOperand(1).getReg(), MRI);
- unsigned TrueReg2 =
- TRI->lookThruCopyLike(MI.getOperand(2).getReg(), MRI);
-
- if (TrueReg1 == TrueReg2 && Register::isVirtualRegister(TrueReg1)) {
- MachineInstr *DefMI = MRI->getVRegDef(TrueReg1);
- unsigned DefOpc = DefMI ? DefMI->getOpcode() : 0;
-
- // If this is a splat fed by a splatting load, the splat is
- // redundant. Replace with a copy. This doesn't happen directly due
- // to code in PPCDAGToDAGISel.cpp, but it can happen when converting
- // a load of a double to a vector of 64-bit integers.
- auto isConversionOfLoadAndSplat = [=]() -> bool {
- if (DefOpc != PPC::XVCVDPSXDS && DefOpc != PPC::XVCVDPUXDS)
- return false;
- unsigned DefReg =
- TRI->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI);
- if (Register::isVirtualRegister(DefReg)) {
- MachineInstr *LoadMI = MRI->getVRegDef(DefReg);
- if (LoadMI && LoadMI->getOpcode() == PPC::LXVDSX)
- return true;
- }
- return false;
- };
- if (DefMI && (Immed == 0 || Immed == 3)) {
- if (DefOpc == PPC::LXVDSX || isConversionOfLoadAndSplat()) {
- LLVM_DEBUG(dbgs() << "Optimizing load-and-splat/splat "
- "to load-and-splat/copy: ");
- LLVM_DEBUG(MI.dump());
- BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
- MI.getOperand(0).getReg())
- .add(MI.getOperand(1));
- ToErase = &MI;
- Simplified = true;
- }
- }
+ if (Immed == 1)
+ break;
- // If this is a splat or a swap fed by another splat, we
- // can replace it with a copy.
- if (DefOpc == PPC::XXPERMDI) {
- unsigned FeedImmed = DefMI->getOperand(3).getImm();
- unsigned FeedReg1 =
- TRI->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI);
- unsigned FeedReg2 =
- TRI->lookThruCopyLike(DefMI->getOperand(2).getReg(), MRI);
-
- if ((FeedImmed == 0 || FeedImmed == 3) && FeedReg1 == FeedReg2) {
- LLVM_DEBUG(dbgs() << "Optimizing splat/swap or splat/splat "
- "to splat/copy: ");
- LLVM_DEBUG(MI.dump());
- BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
- MI.getOperand(0).getReg())
- .add(MI.getOperand(1));
- ToErase = &MI;
- Simplified = true;
- }
-
- // If this is a splat fed by a swap, we can simplify modify
- // the splat to splat the other value from the swap's input
- // parameter.
- else if ((Immed == 0 || Immed == 3)
- && FeedImmed == 2 && FeedReg1 == FeedReg2) {
- LLVM_DEBUG(dbgs() << "Optimizing swap/splat => splat: ");
- LLVM_DEBUG(MI.dump());
- MI.getOperand(1).setReg(DefMI->getOperand(1).getReg());
- MI.getOperand(2).setReg(DefMI->getOperand(2).getReg());
- MI.getOperand(3).setImm(3 - Immed);
- Simplified = true;
- }
-
- // If this is a swap fed by a swap, we can replace it
- // with a copy from the first swap's input.
- else if (Immed == 2 && FeedImmed == 2 && FeedReg1 == FeedReg2) {
- LLVM_DEBUG(dbgs() << "Optimizing swap/swap => copy: ");
- LLVM_DEBUG(MI.dump());
- BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
- MI.getOperand(0).getReg())
- .add(DefMI->getOperand(1));
- ToErase = &MI;
- Simplified = true;
- }
- } else if ((Immed == 0 || Immed == 3) && DefOpc == PPC::XXPERMDIs &&
- (DefMI->getOperand(2).getImm() == 0 ||
- DefMI->getOperand(2).getImm() == 3)) {
- // Splat fed by another splat - switch the output of the first
- // and remove the second.
- DefMI->getOperand(0).setReg(MI.getOperand(0).getReg());
- ToErase = &MI;
- Simplified = true;
- LLVM_DEBUG(dbgs() << "Removing redundant splat: ");
- LLVM_DEBUG(MI.dump());
- }
+ // For each of these simplifications, we need the two source
+ // regs to match. Unfortunately, MachineCSE ignores COPY and
+ // SUBREG_TO_REG, so for example we can see
+ // XXPERMDI t, SUBREG_TO_REG(s), SUBREG_TO_REG(s), immed.
+ // We have to look through chains of COPY and SUBREG_TO_REG
+ // to find the real source values for comparison.
+ unsigned TrueReg1 =
+ TRI->lookThruCopyLike(MI.getOperand(1).getReg(), MRI);
+ unsigned TrueReg2 =
+ TRI->lookThruCopyLike(MI.getOperand(2).getReg(), MRI);
+
+ if (!(TrueReg1 == TrueReg2 && Register::isVirtualRegister(TrueReg1)))
+ break;
+
+ MachineInstr *DefMI = MRI->getVRegDef(TrueReg1);
+
+ if (!DefMI)
+ break;
+
+ unsigned DefOpc = DefMI->getOpcode();
+
+ // If this is a splat fed by a splatting load, the splat is
+ // redundant. Replace with a copy. This doesn't happen directly due
+ // to code in PPCDAGToDAGISel.cpp, but it can happen when converting
+ // a load of a double to a vector of 64-bit integers.
+ auto isConversionOfLoadAndSplat = [=]() -> bool {
+ if (DefOpc != PPC::XVCVDPSXDS && DefOpc != PPC::XVCVDPUXDS)
+ return false;
+ unsigned FeedReg1 =
+ TRI->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI);
+ if (Register::isVirtualRegister(FeedReg1)) {
+ MachineInstr *LoadMI = MRI->getVRegDef(FeedReg1);
+ if (LoadMI && LoadMI->getOpcode() == PPC::LXVDSX)
+ return true;
+ }
+ return false;
+ };
+ if ((Immed == 0 || Immed == 3) &&
+ (DefOpc == PPC::LXVDSX || isConversionOfLoadAndSplat())) {
+ LLVM_DEBUG(dbgs() << "Optimizing load-and-splat/splat "
+ "to load-and-splat/copy: ");
+ LLVM_DEBUG(MI.dump());
+ BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
+ MI.getOperand(0).getReg())
+ .add(MI.getOperand(1));
+ ToErase = &MI;
+ Simplified = true;
+ }
+
+ // If this is a splat or a swap fed by another splat, we
+ // can replace it with a copy.
+ if (DefOpc == PPC::XXPERMDI) {
+ unsigned DefReg1 = DefMI->getOperand(1).getReg();
+ unsigned DefReg2 = DefMI->getOperand(2).getReg();
+ unsigned DefImmed = DefMI->getOperand(3).getImm();
+
+ // If the two inputs are not the same register, check to see if
+ // they originate from the same virtual register after only
+ // copy-like instructions.
+ if (DefReg1 != DefReg2) {
+ unsigned FeedReg1 = TRI->lookThruCopyLike(DefReg1, MRI);
+ unsigned FeedReg2 = TRI->lookThruCopyLike(DefReg2, MRI);
+
+ if (!(FeedReg1 == FeedReg2 &&
+ Register::isVirtualRegister(FeedReg1)))
+ break;
+ }
+
+ if (DefImmed == 0 || DefImmed == 3) {
+ LLVM_DEBUG(dbgs() << "Optimizing splat/swap or splat/splat "
+ "to splat/copy: ");
+ LLVM_DEBUG(MI.dump());
+ BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
+ MI.getOperand(0).getReg())
+ .add(MI.getOperand(1));
+ ToErase = &MI;
+ Simplified = true;
}
+
+ // If this is a splat fed by a swap, we can simplify modify
+ // the splat to splat the other value from the swap's input
+ // parameter.
+ else if ((Immed == 0 || Immed == 3) && DefImmed == 2) {
+ LLVM_DEBUG(dbgs() << "Optimizing swap/splat => splat: ");
+ LLVM_DEBUG(MI.dump());
+ MI.getOperand(1).setReg(DefReg1);
+ MI.getOperand(2).setReg(DefReg2);
+ MI.getOperand(3).setImm(3 - Immed);
+ Simplified = true;
+ }
+
+ // If this is a swap fed by a swap, we can replace it
+ // with a copy from the first swap's input.
+ else if (Immed == 2 && DefImmed == 2) {
+ LLVM_DEBUG(dbgs() << "Optimizing swap/swap => copy: ");
+ LLVM_DEBUG(MI.dump());
+ BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
+ MI.getOperand(0).getReg())
+ .add(DefMI->getOperand(1));
+ ToErase = &MI;
+ Simplified = true;
+ }
+ } else if ((Immed == 0 || Immed == 3) && DefOpc == PPC::XXPERMDIs &&
+ (DefMI->getOperand(2).getImm() == 0 ||
+ DefMI->getOperand(2).getImm() == 3)) {
+ // Splat fed by another splat - switch the output of the first
+ // and remove the second.
+ DefMI->getOperand(0).setReg(MI.getOperand(0).getReg());
+ ToErase = &MI;
+ Simplified = true;
+ LLVM_DEBUG(dbgs() << "Removing redundant splat: ");
+ LLVM_DEBUG(MI.dump());
}
break;
}
@@ -805,6 +820,153 @@ bool PPCMIPeephole::simplifyCode(void) {
combineSEXTAndSHL(MI, ToErase);
break;
}
+ case PPC::RLWINM:
+ case PPC::RLWINM_rec:
+ case PPC::RLWINM8:
+ case PPC::RLWINM8_rec: {
+ unsigned FoldingReg = MI.getOperand(1).getReg();
+ if (!Register::isVirtualRegister(FoldingReg))
+ break;
+
+ MachineInstr *SrcMI = MRI->getVRegDef(FoldingReg);
+ if (SrcMI->getOpcode() != PPC::RLWINM &&
+ SrcMI->getOpcode() != PPC::RLWINM_rec &&
+ SrcMI->getOpcode() != PPC::RLWINM8 &&
+ SrcMI->getOpcode() != PPC::RLWINM8_rec)
+ break;
+ assert((MI.getOperand(2).isImm() && MI.getOperand(3).isImm() &&
+ MI.getOperand(4).isImm() && SrcMI->getOperand(2).isImm() &&
+ SrcMI->getOperand(3).isImm() && SrcMI->getOperand(4).isImm()) &&
+ "Invalid PPC::RLWINM Instruction!");
+ uint64_t SHSrc = SrcMI->getOperand(2).getImm();
+ uint64_t SHMI = MI.getOperand(2).getImm();
+ uint64_t MBSrc = SrcMI->getOperand(3).getImm();
+ uint64_t MBMI = MI.getOperand(3).getImm();
+ uint64_t MESrc = SrcMI->getOperand(4).getImm();
+ uint64_t MEMI = MI.getOperand(4).getImm();
+
+ assert((MEMI < 32 && MESrc < 32 && MBMI < 32 && MBSrc < 32) &&
+ "Invalid PPC::RLWINM Instruction!");
+
+ // If MBMI is bigger than MEMI, we always can not get run of ones.
+ // RotatedSrcMask non-wrap:
+ // 0........31|32........63
+ // RotatedSrcMask: B---E B---E
+ // MaskMI: -----------|--E B------
+ // Result: ----- --- (Bad candidate)
+ //
+ // RotatedSrcMask wrap:
+ // 0........31|32........63
+ // RotatedSrcMask: --E B----|--E B----
+ // MaskMI: -----------|--E B------
+ // Result: --- -----|--- ----- (Bad candidate)
+ //
+ // One special case is RotatedSrcMask is a full set mask.
+ // RotatedSrcMask full:
+ // 0........31|32........63
+ // RotatedSrcMask: ------EB---|-------EB---
+ // MaskMI: -----------|--E B------
+ // Result: -----------|--- ------- (Good candidate)
+
+ // Mark special case.
+ bool SrcMaskFull = (MBSrc - MESrc == 1) || (MBSrc == 0 && MESrc == 31);
+
+ // For other MBMI > MEMI cases, just return.
+ if ((MBMI > MEMI) && !SrcMaskFull)
+ break;
+
+ // Handle MBMI <= MEMI cases.
+ APInt MaskMI = APInt::getBitsSetWithWrap(32, 32 - MEMI - 1, 32 - MBMI);
+ // In MI, we only need low 32 bits of SrcMI, just consider about low 32
+ // bit of SrcMI mask. Note that in APInt, lowerest bit is at index 0,
+ // while in PowerPC ISA, lowerest bit is at index 63.
+ APInt MaskSrc =
+ APInt::getBitsSetWithWrap(32, 32 - MESrc - 1, 32 - MBSrc);
+ // Current APInt::getBitsSetWithWrap sets all bits to 0 if loBit is
+ // equal to highBit.
+ // If MBSrc - MESrc == 1, we expect a full set mask instead of Null.
+ if (SrcMaskFull && (MBSrc - MESrc == 1))
+ MaskSrc.setAllBits();
+
+ APInt RotatedSrcMask = MaskSrc.rotl(SHMI);
+ APInt FinalMask = RotatedSrcMask & MaskMI;
+ uint32_t NewMB, NewME;
+
+ // If final mask is 0, MI result should be 0 too.
+ if (FinalMask.isNullValue()) {
+ bool Is64Bit = (MI.getOpcode() == PPC::RLWINM8 ||
+ MI.getOpcode() == PPC::RLWINM8_rec);
+
+ Simplified = true;
+
+ LLVM_DEBUG(dbgs() << "Replace Instr: ");
+ LLVM_DEBUG(MI.dump());
+
+ if (MI.getOpcode() == PPC::RLWINM || MI.getOpcode() == PPC::RLWINM8) {
+ // Replace MI with "LI 0"
+ MI.RemoveOperand(4);
+ MI.RemoveOperand(3);
+ MI.RemoveOperand(2);
+ MI.getOperand(1).ChangeToImmediate(0);
+ MI.setDesc(TII->get(Is64Bit ? PPC::LI8 : PPC::LI));
+ } else {
+ // Replace MI with "ANDI_rec reg, 0"
+ MI.RemoveOperand(4);
+ MI.RemoveOperand(3);
+ MI.getOperand(2).setImm(0);
+ MI.setDesc(TII->get(Is64Bit ? PPC::ANDI8_rec : PPC::ANDI_rec));
+ MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg());
+ if (SrcMI->getOperand(1).isKill()) {
+ MI.getOperand(1).setIsKill(true);
+ SrcMI->getOperand(1).setIsKill(false);
+ } else
+ // About to replace MI.getOperand(1), clear its kill flag.
+ MI.getOperand(1).setIsKill(false);
+ }
+
+ LLVM_DEBUG(dbgs() << "With: ");
+ LLVM_DEBUG(MI.dump());
+ } else if ((isRunOfOnes((unsigned)(FinalMask.getZExtValue()), NewMB,
+ NewME) && NewMB <= NewME)|| SrcMaskFull) {
+ // Here we only handle MBMI <= MEMI case, so NewMB must be no bigger
+ // than NewME. Otherwise we get a 64 bit value after folding, but MI
+ // return a 32 bit value.
+
+ Simplified = true;
+ LLVM_DEBUG(dbgs() << "Converting Instr: ");
+ LLVM_DEBUG(MI.dump());
+
+ uint16_t NewSH = (SHSrc + SHMI) % 32;
+ MI.getOperand(2).setImm(NewSH);
+ // If SrcMI mask is full, no need to update MBMI and MEMI.
+ if (!SrcMaskFull) {
+ MI.getOperand(3).setImm(NewMB);
+ MI.getOperand(4).setImm(NewME);
+ }
+ MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg());
+ if (SrcMI->getOperand(1).isKill()) {
+ MI.getOperand(1).setIsKill(true);
+ SrcMI->getOperand(1).setIsKill(false);
+ } else
+ // About to replace MI.getOperand(1), clear its kill flag.
+ MI.getOperand(1).setIsKill(false);
+
+ LLVM_DEBUG(dbgs() << "To: ");
+ LLVM_DEBUG(MI.dump());
+ }
+ if (Simplified) {
+ // If FoldingReg has no non-debug use and it has no implicit def (it
+ // is not RLWINMO or RLWINM8o), it's safe to delete its def SrcMI.
+ // Otherwise keep it.
+ ++NumRotatesCollapsed;
+ if (MRI->use_nodbg_empty(FoldingReg) && !SrcMI->hasImplicitDef()) {
+ ToErase = SrcMI;
+ LLVM_DEBUG(dbgs() << "Delete dead instruction: ");
+ LLVM_DEBUG(SrcMI->dump());
+ }
+ }
+ break;
+ }
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h
index dfae19804d94..2b341b5952c8 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h
@@ -42,7 +42,7 @@ class PPCFunctionInfo : public MachineFunctionInfo {
/// MustSaveLR - Indicates whether LR is defined (or clobbered) in the current
/// function. This is only valid after the initial scan of the function by
/// PEI.
- bool MustSaveLR;
+ bool MustSaveLR = false;
/// MustSaveTOC - Indicates that the TOC save needs to be performed in the
/// prologue of the function. This is typically the case when there are
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
index b1c0433641dd..a4b4bf2973d1 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
@@ -35,6 +35,8 @@ STATISTIC(NumRemovedInPreEmit,
"Number of instructions deleted in pre-emit peephole");
STATISTIC(NumberOfSelfCopies,
"Number of self copy instructions eliminated");
+STATISTIC(NumFrameOffFoldInPreEmit,
+ "Number of folding frame offset by using r+r in pre-emit peephole");
static cl::opt<bool>
RunPreEmitPeephole("ppc-late-peephole", cl::Hidden, cl::init(true),
@@ -161,8 +163,19 @@ namespace {
}
bool runOnMachineFunction(MachineFunction &MF) override {
- if (skipFunction(MF.getFunction()) || !RunPreEmitPeephole)
+ if (skipFunction(MF.getFunction()) || !RunPreEmitPeephole) {
+ // Remove UNENCODED_NOP even when this pass is disabled.
+ // This needs to be done unconditionally so we don't emit zeros
+ // in the instruction stream.
+ SmallVector<MachineInstr *, 4> InstrsToErase;
+ for (MachineBasicBlock &MBB : MF)
+ for (MachineInstr &MI : MBB)
+ if (MI.getOpcode() == PPC::UNENCODED_NOP)
+ InstrsToErase.push_back(&MI);
+ for (MachineInstr *MI : InstrsToErase)
+ MI->eraseFromParent();
return false;
+ }
bool Changed = false;
const PPCInstrInfo *TII = MF.getSubtarget<PPCSubtarget>().getInstrInfo();
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
@@ -171,6 +184,10 @@ namespace {
Changed |= removeRedundantLIs(MBB, TRI);
for (MachineInstr &MI : MBB) {
unsigned Opc = MI.getOpcode();
+ if (Opc == PPC::UNENCODED_NOP) {
+ InstrsToErase.push_back(&MI);
+ continue;
+ }
// Detect self copies - these can result from running AADB.
if (PPCInstrInfo::isSameClassPhysRegCopy(Opc)) {
const MCInstrDesc &MCID = TII->get(Opc);
@@ -202,6 +219,12 @@ namespace {
InstrsToErase.push_back(DefMIToErase);
}
}
+ if (TII->foldFrameOffset(MI)) {
+ Changed = true;
+ NumFrameOffFoldInPreEmit++;
+ LLVM_DEBUG(dbgs() << "Frame offset folding by using index form: ");
+ LLVM_DEBUG(MI.dump());
+ }
}
// Eliminate conditional branch based on a constant CR bit by
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCReduceCRLogicals.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCReduceCRLogicals.cpp
index 3b71ed219c17..90cc81beb89d 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCReduceCRLogicals.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCReduceCRLogicals.cpp
@@ -24,6 +24,7 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Config/llvm-config.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/Debug.h"
using namespace llvm;
@@ -375,10 +376,10 @@ public:
};
private:
- const PPCInstrInfo *TII;
- MachineFunction *MF;
- MachineRegisterInfo *MRI;
- const MachineBranchProbabilityInfo *MBPI;
+ const PPCInstrInfo *TII = nullptr;
+ MachineFunction *MF = nullptr;
+ MachineRegisterInfo *MRI = nullptr;
+ const MachineBranchProbabilityInfo *MBPI = nullptr;
// A vector to contain all the CR logical operations
SmallVector<CRLogicalOpInfo, 16> AllCRLogicalOps;
@@ -470,21 +471,21 @@ PPCReduceCRLogicals::createCRLogicalOpInfo(MachineInstr &MIParam) {
} else {
MachineInstr *Def1 = lookThroughCRCopy(MIParam.getOperand(1).getReg(),
Ret.SubregDef1, Ret.CopyDefs.first);
+ assert(Def1 && "Must be able to find a definition of operand 1.");
Ret.DefsSingleUse &=
MRI->hasOneNonDBGUse(Def1->getOperand(0).getReg());
Ret.DefsSingleUse &=
MRI->hasOneNonDBGUse(Ret.CopyDefs.first->getOperand(0).getReg());
- assert(Def1 && "Must be able to find a definition of operand 1.");
if (isBinary(MIParam)) {
Ret.IsBinary = 1;
MachineInstr *Def2 = lookThroughCRCopy(MIParam.getOperand(2).getReg(),
Ret.SubregDef2,
Ret.CopyDefs.second);
+ assert(Def2 && "Must be able to find a definition of operand 2.");
Ret.DefsSingleUse &=
MRI->hasOneNonDBGUse(Def2->getOperand(0).getReg());
Ret.DefsSingleUse &=
MRI->hasOneNonDBGUse(Ret.CopyDefs.second->getOperand(0).getReg());
- assert(Def2 && "Must be able to find a definition of operand 2.");
Ret.TrueDefs = std::make_pair(Def1, Def2);
} else {
Ret.TrueDefs = std::make_pair(Def1, nullptr);
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 9ec26a19bdaa..01b97ba6ab20 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -380,7 +380,7 @@ bool PPCRegisterInfo::requiresFrameIndexScavenging(const MachineFunction &MF) co
// This is eiher:
// 1) A fixed frame index object which we know are aligned so
// as long as we have a valid DForm/DSForm/DQForm (non XForm) we don't
- // need to consider the alignement here.
+ // need to consider the alignment here.
// 2) A not fixed object but in that case we now know that the min required
// alignment is no more than 1 based on the previous check.
if (InstrInfo->isXFormMemOp(Opcode))
@@ -747,12 +747,18 @@ void PPCRegisterInfo::lowerCRBitSpilling(MachineBasicBlock::iterator II,
Register SrcReg = MI.getOperand(0).getReg();
// Search up the BB to find the definition of the CR bit.
- MachineBasicBlock::reverse_iterator Ins;
+ MachineBasicBlock::reverse_iterator Ins = MI;
+ MachineBasicBlock::reverse_iterator Rend = MBB.rend();
+ ++Ins;
unsigned CRBitSpillDistance = 0;
- for (Ins = MI; Ins != MBB.rend(); Ins++) {
+ bool SeenUse = false;
+ for (; Ins != Rend; ++Ins) {
// Definition found.
if (Ins->modifiesRegister(SrcReg, TRI))
break;
+ // Use found.
+ if (Ins->readsRegister(SrcReg, TRI))
+ SeenUse = true;
// Unable to find CR bit definition within maximum search distance.
if (CRBitSpillDistance == MaxCRBitSpillDist) {
Ins = MI;
@@ -767,17 +773,35 @@ void PPCRegisterInfo::lowerCRBitSpilling(MachineBasicBlock::iterator II,
if (Ins == MBB.rend())
Ins = MI;
+ bool SpillsKnownBit = false;
// There is no need to extract the CR bit if its value is already known.
switch (Ins->getOpcode()) {
case PPC::CRUNSET:
BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::LI8 : PPC::LI), Reg)
.addImm(0);
+ SpillsKnownBit = true;
break;
case PPC::CRSET:
BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::LIS8 : PPC::LIS), Reg)
.addImm(-32768);
+ SpillsKnownBit = true;
break;
default:
+ // On Power9, we can use SETB to extract the LT bit. This only works for
+ // the LT bit since SETB produces -1/1/0 for LT/GT/<neither>. So the value
+ // of the bit we care about (32-bit sign bit) will be set to the value of
+ // the LT bit (regardless of the other bits in the CR field).
+ if (Subtarget.isISA3_0()) {
+ if (SrcReg == PPC::CR0LT || SrcReg == PPC::CR1LT ||
+ SrcReg == PPC::CR2LT || SrcReg == PPC::CR3LT ||
+ SrcReg == PPC::CR4LT || SrcReg == PPC::CR5LT ||
+ SrcReg == PPC::CR6LT || SrcReg == PPC::CR7LT) {
+ BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::SETB8 : PPC::SETB), Reg)
+ .addReg(getCRFromCRBit(SrcReg), RegState::Undef);
+ break;
+ }
+ }
+
// We need to move the CR field that contains the CR bit we are spilling.
// The super register may not be explicitly defined (i.e. it can be defined
// by a CR-logical that only defines the subreg) so we state that the CR
@@ -803,8 +827,13 @@ void PPCRegisterInfo::lowerCRBitSpilling(MachineBasicBlock::iterator II,
.addReg(Reg, RegState::Kill),
FrameIndex);
+ bool KillsCRBit = MI.killsRegister(SrcReg, TRI);
// Discard the pseudo instruction.
MBB.erase(II);
+ if (SpillsKnownBit && KillsCRBit && !SeenUse) {
+ Ins->setDesc(TII.get(PPC::UNENCODED_NOP));
+ Ins->RemoveOperand(0);
+ }
}
void PPCRegisterInfo::lowerCRBitRestore(MachineBasicBlock::iterator II,
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCRegisterInfo.h b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
index a50e05920cd4..a5fbb0c6ec64 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -61,6 +61,15 @@ class PPCRegisterInfo : public PPCGenRegisterInfo {
public:
PPCRegisterInfo(const PPCTargetMachine &TM);
+ /// getMappedIdxOpcForImmOpc - Return the mapped index form load/store opcode
+ /// for a given imm form load/store opcode \p ImmFormOpcode.
+ /// FIXME: move this to PPCInstrInfo class.
+ unsigned getMappedIdxOpcForImmOpc(unsigned ImmOpcode) const {
+ if (!ImmToIdxMap.count(ImmOpcode))
+ return PPC::INSTRUCTION_LIST_END;
+ return ImmToIdxMap.find(ImmOpcode)->second;
+ }
+
/// getPointerRegClass - Return the register class to use to hold pointers.
/// This is used for addressing modes.
const TargetRegisterClass *
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCSubtarget.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
index ad7f9208b787..0997f68bd999 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -61,7 +61,7 @@ PPCSubtarget::PPCSubtarget(const Triple &TT, const std::string &CPU,
void PPCSubtarget::initializeEnvironment() {
StackAlignment = Align(16);
- DarwinDirective = PPC::DIR_NONE;
+ CPUDirective = PPC::DIR_NONE;
HasMFOCRF = false;
Has64BitSupport = false;
Use64BitRegs = false;
@@ -100,6 +100,7 @@ void PPCSubtarget::initializeEnvironment() {
IsPPC6xx = false;
IsE500 = false;
FeatureMFTB = false;
+ AllowsUnalignedFPAccess = false;
DeprecatedDST = false;
HasLazyResolverStubs = false;
HasICBT = false;
@@ -192,7 +193,7 @@ bool PPCSubtarget::hasLazyResolverStub(const GlobalValue *GV) const {
bool PPCSubtarget::enableMachineScheduler() const { return true; }
bool PPCSubtarget::enableMachinePipeliner() const {
- return (DarwinDirective == PPC::DIR_PWR9) && EnableMachinePipeliner;
+ return (CPUDirective == PPC::DIR_PWR9) && EnableMachinePipeliner;
}
bool PPCSubtarget::useDFAforSMS() const { return false; }
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCSubtarget.h b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCSubtarget.h
index d96c2893aee9..044e982740e9 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCSubtarget.h
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCSubtarget.h
@@ -57,6 +57,7 @@ namespace PPC {
DIR_PWR7,
DIR_PWR8,
DIR_PWR9,
+ DIR_PWR_FUTURE,
DIR_64
};
}
@@ -84,7 +85,7 @@ protected:
InstrItineraryData InstrItins;
/// Which cpu directive was used.
- unsigned DarwinDirective;
+ unsigned CPUDirective;
/// Used by the ISel to turn in optimizations for POWER4-derived architectures
bool HasMFOCRF;
@@ -123,6 +124,7 @@ protected:
bool IsPPC4xx;
bool IsPPC6xx;
bool FeatureMFTB;
+ bool AllowsUnalignedFPAccess;
bool DeprecatedDST;
bool HasLazyResolverStubs;
bool IsLittleEndian;
@@ -169,8 +171,11 @@ public:
Align getStackAlignment() const { return StackAlignment; }
/// getDarwinDirective - Returns the -m directive specified for the cpu.
+ unsigned getDarwinDirective() const { return CPUDirective; }
+
+ /// getCPUDirective - Returns the -m directive specified for the cpu.
///
- unsigned getDarwinDirective() const { return DarwinDirective; }
+ unsigned getCPUDirective() const { return CPUDirective; }
/// getInstrItins - Return the instruction itineraries based on subtarget
/// selection.
@@ -270,6 +275,7 @@ public:
bool vectorsUseTwoUnits() const {return VectorsUseTwoUnits; }
bool isE500() const { return IsE500; }
bool isFeatureMFTB() const { return FeatureMFTB; }
+ bool allowsUnalignedFPAccess() const { return AllowsUnalignedFPAccess; }
bool isDeprecatedDST() const { return DeprecatedDST; }
bool hasICBT() const { return HasICBT; }
bool hasInvariantFunctionDescriptors() const {
@@ -347,6 +353,41 @@ public:
/// True if the GV will be accessed via an indirect symbol.
bool isGVIndirectSymbol(const GlobalValue *GV) const;
+ /// True if the ABI is descriptor based.
+ bool usesFunctionDescriptors() const {
+ // Both 32-bit and 64-bit AIX are descriptor based. For ELF only the 64-bit
+ // v1 ABI uses descriptors.
+ return isAIXABI() || (is64BitELFABI() && !isELFv2ABI());
+ }
+
+ unsigned descriptorTOCAnchorOffset() const {
+ assert(usesFunctionDescriptors() &&
+ "Should only be called when the target uses descriptors.");
+ return IsPPC64 ? 8 : 4;
+ }
+
+ unsigned descriptorEnvironmentPointerOffset() const {
+ assert(usesFunctionDescriptors() &&
+ "Should only be called when the target uses descriptors.");
+ return IsPPC64 ? 16 : 8;
+ }
+
+ MCRegister getEnvironmentPointerRegister() const {
+ assert(usesFunctionDescriptors() &&
+ "Should only be called when the target uses descriptors.");
+ return IsPPC64 ? PPC::X11 : PPC::R11;
+ }
+
+ MCRegister getTOCPointerRegister() const {
+ assert((is64BitELFABI() || isAIXABI()) &&
+ "Should only be called when the target is a TOC based ABI.");
+ return IsPPC64 ? PPC::X2 : PPC::R2;
+ }
+
+ MCRegister getStackPointerRegister() const {
+ return IsPPC64 ? PPC::X1 : PPC::R1;
+ }
+
bool isXRaySupported() const override { return IsPPC64 && IsLittleEndian; }
};
} // End llvm namespace
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
index 8f313d9d01c4..17e1196eea59 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
@@ -27,6 +27,7 @@
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
index f6ae1402732f..2caf4c99a1f8 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -51,8 +51,8 @@ opt<bool> DisableCTRLoops("disable-ppc-ctrloops", cl::Hidden,
cl::desc("Disable CTR loops for PPC"));
static cl::
-opt<bool> DisablePreIncPrep("disable-ppc-preinc-prep", cl::Hidden,
- cl::desc("Disable PPC loop preinc prep"));
+opt<bool> DisableInstrFormPrep("disable-ppc-instr-form-prep", cl::Hidden,
+ cl::desc("Disable PPC loop instr form prep"));
static cl::opt<bool>
VSXFMAMutateEarly("schedule-ppc-vsx-fma-mutation-early",
@@ -77,7 +77,7 @@ EnableGEPOpt("ppc-gep-opt", cl::Hidden,
static cl::opt<bool>
EnablePrefetch("enable-ppc-prefetching",
- cl::desc("disable software prefetching on PPC"),
+ cl::desc("enable software prefetching on PPC"),
cl::init(false), cl::Hidden);
static cl::opt<bool>
@@ -94,7 +94,7 @@ static cl::opt<bool>
ReduceCRLogical("ppc-reduce-cr-logicals",
cl::desc("Expand eligible cr-logical binary ops to branches"),
cl::init(true), cl::Hidden);
-extern "C" void LLVMInitializePowerPCTarget() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializePowerPCTarget() {
// Register the targets
RegisterTargetMachine<PPCTargetMachine> A(getThePPC32Target());
RegisterTargetMachine<PPCTargetMachine> B(getThePPC64Target());
@@ -104,7 +104,7 @@ extern "C" void LLVMInitializePowerPCTarget() {
#ifndef NDEBUG
initializePPCCTRLoopsVerifyPass(PR);
#endif
- initializePPCLoopPreIncPrepPass(PR);
+ initializePPCLoopInstrFormPrepPass(PR);
initializePPCTOCRegDepsPass(PR);
initializePPCEarlyReturnPass(PR);
initializePPCVSXCopyPass(PR);
@@ -119,6 +119,7 @@ extern "C" void LLVMInitializePowerPCTarget() {
initializePPCPreEmitPeepholePass(PR);
initializePPCTLSDynamicCallPass(PR);
initializePPCMIPeepholePass(PR);
+ initializePPCLowerMASSVEntriesPass(PR);
}
/// Return the datalayout string of a subtarget.
@@ -210,26 +211,10 @@ static PPCTargetMachine::PPCABI computeTargetABI(const Triple &TT,
if (TT.isMacOSX())
return PPCTargetMachine::PPC_ABI_UNKNOWN;
- if (TT.isOSFreeBSD()) {
- switch (TT.getArch()) {
- case Triple::ppc64le:
- case Triple::ppc64:
- if (TT.getOSMajorVersion() >= 13)
- return PPCTargetMachine::PPC_ABI_ELFv2;
- else
- return PPCTargetMachine::PPC_ABI_ELFv1;
- case Triple::ppc:
- default:
- return PPCTargetMachine::PPC_ABI_UNKNOWN;
- }
- }
-
switch (TT.getArch()) {
case Triple::ppc64le:
return PPCTargetMachine::PPC_ABI_ELFv2;
case Triple::ppc64:
- if (TT.getEnvironment() == llvm::Triple::ELFv2)
- return PPCTargetMachine::PPC_ABI_ELFv2;
return PPCTargetMachine::PPC_ABI_ELFv1;
default:
return PPCTargetMachine::PPC_ABI_UNKNOWN;
@@ -415,6 +400,9 @@ void PPCPassConfig::addIRPasses() {
addPass(createPPCBoolRetToIntPass());
addPass(createAtomicExpandPass());
+ // Lower generic MASSV routines to PowerPC subtarget-specific entries.
+ addPass(createPPCLowerMASSVEntriesPass());
+
// For the BG/Q (or if explicitly requested), add explicit data prefetch
// intrinsics.
bool UsePrefetching = TM->getTargetTriple().getVendor() == Triple::BGQ &&
@@ -441,8 +429,8 @@ void PPCPassConfig::addIRPasses() {
}
bool PPCPassConfig::addPreISel() {
- if (!DisablePreIncPrep && getOptLevel() != CodeGenOpt::None)
- addPass(createPPCLoopPreIncPrepPass(getPPCTargetMachine()));
+ if (!DisableInstrFormPrep && getOptLevel() != CodeGenOpt::None)
+ addPass(createPPCLoopInstrFormPrepPass(getPPCTargetMachine()));
if (!DisableCTRLoops && getOptLevel() != CodeGenOpt::None)
addPass(createHardwareLoopsPass());
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index f9459c246e45..e05699cc95ec 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -84,10 +84,10 @@ int PPCTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
return 4 * TTI::TCC_Basic;
}
-int PPCTTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
- Type *Ty) {
+int PPCTTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
+ const APInt &Imm, Type *Ty) {
if (DisablePPCConstHoist)
- return BaseT::getIntImmCost(IID, Idx, Imm, Ty);
+ return BaseT::getIntImmCostIntrin(IID, Idx, Imm, Ty);
assert(Ty->isIntegerTy());
@@ -118,10 +118,10 @@ int PPCTTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
return PPCTTIImpl::getIntImmCost(Imm, Ty);
}
-int PPCTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
- Type *Ty) {
+int PPCTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
+ const APInt &Imm, Type *Ty) {
if (DisablePPCConstHoist)
- return BaseT::getIntImmCost(Opcode, Idx, Imm, Ty);
+ return BaseT::getIntImmCostInst(Opcode, Idx, Imm, Ty);
assert(Ty->isIntegerTy());
@@ -283,24 +283,6 @@ bool PPCTTIImpl::mightUseCTR(BasicBlock *BB,
case Intrinsic::loop_decrement:
return true;
-// VisualStudio defines setjmp as _setjmp
-#if defined(_MSC_VER) && defined(setjmp) && \
- !defined(setjmp_undefined_for_msvc)
-# pragma push_macro("setjmp")
-# undef setjmp
-# define setjmp_undefined_for_msvc
-#endif
-
- case Intrinsic::setjmp:
-
-#if defined(_MSC_VER) && defined(setjmp_undefined_for_msvc)
- // let's return it to _setjmp state
-# pragma pop_macro("setjmp")
-# undef setjmp_undefined_for_msvc
-#endif
-
- case Intrinsic::longjmp:
-
// Exclude eh_sjlj_setjmp; we don't need to exclude eh_sjlj_longjmp
// because, although it does clobber the counter register, the
// control can't then return to inside the loop unless there is also
@@ -554,7 +536,7 @@ bool PPCTTIImpl::isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
void PPCTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP) {
- if (ST->getDarwinDirective() == PPC::DIR_A2) {
+ if (ST->getCPUDirective() == PPC::DIR_A2) {
// The A2 is in-order with a deep pipeline, and concatenation unrolling
// helps expose latency-hiding opportunities to the instruction scheduler.
UP.Partial = UP.Runtime = true;
@@ -580,7 +562,7 @@ bool PPCTTIImpl::enableAggressiveInterleaving(bool LoopHasReductions) {
// on combining the loads generated for consecutive accesses, and failure to
// do so is particularly expensive. This makes it much more likely (compared
// to only using concatenation unrolling).
- if (ST->getDarwinDirective() == PPC::DIR_A2)
+ if (ST->getCPUDirective() == PPC::DIR_A2)
return true;
return LoopHasReductions;
@@ -602,8 +584,8 @@ unsigned PPCTTIImpl::getNumberOfRegisters(unsigned ClassID) const {
assert(ClassID == GPRRC || ClassID == FPRRC ||
ClassID == VRRC || ClassID == VSXRC);
if (ST->hasVSX()) {
- assert(ClassID == GPRRC || ClassID == VSXRC);
- return ClassID == GPRRC ? 32 : 64;
+ assert(ClassID == GPRRC || ClassID == VSXRC || ClassID == VRRC);
+ return ClassID == VSXRC ? 64 : 32;
}
assert(ClassID == GPRRC || ClassID == FPRRC || ClassID == VRRC);
return 32;
@@ -612,8 +594,14 @@ unsigned PPCTTIImpl::getNumberOfRegisters(unsigned ClassID) const {
unsigned PPCTTIImpl::getRegisterClassForType(bool Vector, Type *Ty) const {
if (Vector)
return ST->hasVSX() ? VSXRC : VRRC;
- else if (Ty && Ty->getScalarType()->isFloatTy())
+ else if (Ty && (Ty->getScalarType()->isFloatTy() ||
+ Ty->getScalarType()->isDoubleTy()))
return ST->hasVSX() ? VSXRC : FPRRC;
+ else if (Ty && (Ty->getScalarType()->isFP128Ty() ||
+ Ty->getScalarType()->isPPC_FP128Ty()))
+ return VRRC;
+ else if (Ty && Ty->getScalarType()->isHalfTy())
+ return VSXRC;
else
return GPRRC;
}
@@ -650,9 +638,10 @@ unsigned PPCTTIImpl::getCacheLineSize() const {
return CacheLineSize;
// On P7, P8 or P9 we have a cache line size of 128.
- unsigned Directive = ST->getDarwinDirective();
+ unsigned Directive = ST->getCPUDirective();
+ // Assume that Future CPU has the same cache line size as the others.
if (Directive == PPC::DIR_PWR7 || Directive == PPC::DIR_PWR8 ||
- Directive == PPC::DIR_PWR9)
+ Directive == PPC::DIR_PWR9 || Directive == PPC::DIR_PWR_FUTURE)
return 128;
// On other processors return a default of 64 bytes.
@@ -666,7 +655,7 @@ unsigned PPCTTIImpl::getPrefetchDistance() const {
}
unsigned PPCTTIImpl::getMaxInterleaveFactor(unsigned VF) {
- unsigned Directive = ST->getDarwinDirective();
+ unsigned Directive = ST->getCPUDirective();
// The 440 has no SIMD support, but floating-point instructions
// have a 5-cycle latency, so unroll by 5x for latency hiding.
if (Directive == PPC::DIR_440)
@@ -684,8 +673,9 @@ unsigned PPCTTIImpl::getMaxInterleaveFactor(unsigned VF) {
// For P7 and P8, floating-point instructions have a 6-cycle latency and
// there are two execution units, so unroll by 12x for latency hiding.
// FIXME: the same for P9 as previous gen until POWER9 scheduling is ready
+ // Assume that future is the same as the others.
if (Directive == PPC::DIR_PWR7 || Directive == PPC::DIR_PWR8 ||
- Directive == PPC::DIR_PWR9)
+ Directive == PPC::DIR_PWR9 || Directive == PPC::DIR_PWR_FUTURE)
return 12;
// For most things, modern systems have two execution units (and
@@ -720,10 +710,13 @@ int PPCTTIImpl::vectorCostAdjustment(int Cost, unsigned Opcode, Type *Ty1,
return Cost * 2;
}
-int PPCTTIImpl::getArithmeticInstrCost(
- unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info,
- TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo,
- TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args) {
+int PPCTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
+ TTI::OperandValueKind Op1Info,
+ TTI::OperandValueKind Op2Info,
+ TTI::OperandValueProperties Opd1PropInfo,
+ TTI::OperandValueProperties Opd2PropInfo,
+ ArrayRef<const Value *> Args,
+ const Instruction *CxtI) {
assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");
// Fallback to the default implementation.
@@ -833,8 +826,9 @@ int PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
return Cost;
}
-int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
- unsigned AddressSpace, const Instruction *I) {
+int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
+ MaybeAlign Alignment, unsigned AddressSpace,
+ const Instruction *I) {
// Legalize the type.
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
@@ -892,7 +886,8 @@ int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
// to be decomposed based on the alignment factor.
// Add the cost of each scalar load or store.
- Cost += LT.first*(SrcBytes/Alignment-1);
+ assert(Alignment);
+ Cost += LT.first * ((SrcBytes / Alignment->value()) - 1);
// For a vector type, there is also scalarization overhead (only for
// stores, loads are expanded using the vector-load + permutation sequence,
@@ -923,7 +918,8 @@ int PPCTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, VecTy);
// Firstly, the cost of load/store operation.
- int Cost = getMemoryOpCost(Opcode, VecTy, Alignment, AddressSpace);
+ int Cost =
+ getMemoryOpCost(Opcode, VecTy, MaybeAlign(Alignment), AddressSpace);
// PPC, for both Altivec/VSX and QPX, support cheap arbitrary permutations
// (at least in the sense that there need only be one non-loop-invariant
@@ -935,6 +931,20 @@ int PPCTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
return Cost;
}
+unsigned PPCTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
+ ArrayRef<Value*> Args, FastMathFlags FMF, unsigned VF) {
+ return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF);
+}
+
+unsigned PPCTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
+ ArrayRef<Type*> Tys, FastMathFlags FMF,
+ unsigned ScalarizationCostPassed) {
+ if (ID == Intrinsic::bswap && ST->hasP9Vector())
+ return TLI->getTypeLegalizationCost(DL, RetTy).first;
+ return BaseT::getIntrinsicInstrCost(ID, RetTy, Tys, FMF,
+ ScalarizationCostPassed);
+}
+
bool PPCTTIImpl::canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE,
LoopInfo *LI, DominatorTree *DT,
AssumptionCache *AC, TargetLibraryInfo *LibInfo) {
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
index 83a70364bf68..35388d14f606 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
@@ -46,9 +46,10 @@ public:
using BaseT::getIntImmCost;
int getIntImmCost(const APInt &Imm, Type *Ty);
- int getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty);
- int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
- Type *Ty);
+ int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm,
+ Type *Ty);
+ int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
+ Type *Ty);
unsigned getUserCost(const User *U, ArrayRef<const Value *> Operands);
@@ -90,14 +91,15 @@ public:
TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
- ArrayRef<const Value *> Args = ArrayRef<const Value *>());
+ ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
+ const Instruction *CxtI = nullptr);
int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp);
int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
const Instruction *I = nullptr);
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
const Instruction *I = nullptr);
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
- int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+ int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
unsigned AddressSpace, const Instruction *I = nullptr);
int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
unsigned Factor,
@@ -106,6 +108,11 @@ public:
unsigned AddressSpace,
bool UseMaskForCond = false,
bool UseMaskForGaps = false);
+ unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
+ ArrayRef<Value*> Args, FastMathFlags FMF, unsigned VF);
+ unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
+ ArrayRef<Type*> Tys, FastMathFlags FMF,
+ unsigned ScalarizationCostPassed = UINT_MAX);
/// @}
};
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
index 5e150be544ed..3e6d1c7939f1 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
@@ -29,6 +29,7 @@
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/InitializePasses.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp
index 99b5dec74668..649bd648a6cf 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp
@@ -23,7 +23,7 @@ Target &llvm::getThePPC64LETarget() {
return ThePPC64LETarget;
}
-extern "C" void LLVMInitializePowerPCTargetInfo() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializePowerPCTargetInfo() {
RegisterTarget<Triple::ppc, /*HasJIT=*/true> X(getThePPC32Target(), "ppc32",
"PowerPC 32", "PPC");
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
index 300ba8dc675c..53562f42a184 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
@@ -15,6 +15,7 @@
#include "Utils/RISCVMatInt.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/CodeGen/Register.h"
#include "llvm/MC/MCAssembler.h"
@@ -37,10 +38,15 @@
using namespace llvm;
+#define DEBUG_TYPE "riscv-asm-parser"
+
// Include the auto-generated portion of the compress emitter.
#define GEN_COMPRESS_INSTR
#include "RISCVGenCompressInstEmitter.inc"
+STATISTIC(RISCVNumInstrsCompressed,
+ "Number of RISC-V Compressed instructions emitted");
+
namespace {
struct RISCVOperand;
@@ -188,6 +194,18 @@ public:
Parser.addAliasForDirective(".word", ".4byte");
Parser.addAliasForDirective(".dword", ".8byte");
setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
+
+ if (Options.ABIName.back() == 'f' &&
+ !getSTI().getFeatureBits()[RISCV::FeatureStdExtF]) {
+ errs() << "Hard-float 'f' ABI can't be used for a target that "
+ "doesn't support the F instruction set extension (ignoring "
+ "target-abi)\n";
+ } else if (Options.ABIName.back() == 'd' &&
+ !getSTI().getFeatureBits()[RISCV::FeatureStdExtD]) {
+ errs() << "Hard-float 'd' ABI can't be used for a target that "
+ "doesn't support the D instruction set extension (ignoring "
+ "target-abi)\n";
+ }
}
};
@@ -258,6 +276,11 @@ public:
bool isMem() const override { return false; }
bool isSystemRegister() const { return Kind == KindTy::SystemRegister; }
+ bool isGPR() const {
+ return Kind == KindTy::Register &&
+ RISCVMCRegisterClasses[RISCV::GPRRegClassID].contains(Reg.RegNum);
+ }
+
static bool evaluateConstantImm(const MCExpr *Expr, int64_t &Imm,
RISCVMCExpr::VariantKind &VK) {
if (auto *RE = dyn_cast<RISCVMCExpr>(Expr)) {
@@ -738,7 +761,9 @@ public:
} // end anonymous namespace.
#define GET_REGISTER_MATCHER
+#define GET_SUBTARGET_FEATURE_NAME
#define GET_MATCHER_IMPLEMENTATION
+#define GET_MNEMONIC_SPELL_CHECKER
#include "RISCVGenAsmMatcher.inc"
static Register convertFPR64ToFPR32(Register Reg) {
@@ -775,24 +800,45 @@ bool RISCVAsmParser::generateImmOutOfRangeError(
return Error(ErrorLoc, Msg + " [" + Twine(Lower) + ", " + Twine(Upper) + "]");
}
+static std::string RISCVMnemonicSpellCheck(StringRef S,
+ const FeatureBitset &FBS,
+ unsigned VariantID = 0);
+
bool RISCVAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands,
MCStreamer &Out,
uint64_t &ErrorInfo,
bool MatchingInlineAsm) {
MCInst Inst;
+ FeatureBitset MissingFeatures;
auto Result =
- MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm);
+ MatchInstructionImpl(Operands, Inst, ErrorInfo, MissingFeatures,
+ MatchingInlineAsm);
switch (Result) {
default:
break;
case Match_Success:
return processInstruction(Inst, IDLoc, Operands, Out);
- case Match_MissingFeature:
- return Error(IDLoc, "instruction use requires an option to be enabled");
- case Match_MnemonicFail:
- return Error(IDLoc, "unrecognized instruction mnemonic");
+ case Match_MissingFeature: {
+ assert(MissingFeatures.any() && "Unknown missing features!");
+ bool FirstFeature = true;
+ std::string Msg = "instruction requires the following:";
+ for (unsigned i = 0, e = MissingFeatures.size(); i != e; ++i) {
+ if (MissingFeatures[i]) {
+ Msg += FirstFeature ? " " : ", ";
+ Msg += getSubtargetFeatureName(i);
+ FirstFeature = false;
+ }
+ }
+ return Error(IDLoc, Msg);
+ }
+ case Match_MnemonicFail: {
+ FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
+ std::string Suggestion = RISCVMnemonicSpellCheck(
+ ((RISCVOperand &)*Operands[0]).getToken(), FBS);
+ return Error(IDLoc, "unrecognized instruction mnemonic" + Suggestion);
+ }
case Match_InvalidOperand: {
SMLoc ErrorLoc = IDLoc;
if (ErrorInfo != ~0U) {
@@ -1587,7 +1633,8 @@ bool RISCVAsmParser::parseDirectiveOption() {
void RISCVAsmParser::emitToStreamer(MCStreamer &S, const MCInst &Inst) {
MCInst CInst;
bool Res = compressInst(CInst, Inst, getSTI(), S.getContext());
- CInst.setLoc(Inst.getLoc());
+ if (Res)
+ ++RISCVNumInstrsCompressed;
S.EmitInstruction((Res ? CInst : Inst), getSTI());
}
@@ -1837,7 +1884,7 @@ bool RISCVAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
return false;
}
-extern "C" void LLVMInitializeRISCVAsmParser() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVAsmParser() {
RegisterMCAsmParser<RISCVAsmParser> X(getTheRISCV32Target());
RegisterMCAsmParser<RISCVAsmParser> Y(getTheRISCV64Target());
}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
index 15943ba42156..1461a40227bf 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
@@ -38,7 +38,6 @@ public:
DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
ArrayRef<uint8_t> Bytes, uint64_t Address,
- raw_ostream &VStream,
raw_ostream &CStream) const override;
};
} // end anonymous namespace
@@ -49,7 +48,7 @@ static MCDisassembler *createRISCVDisassembler(const Target &T,
return new RISCVDisassembler(STI, Ctx);
}
-extern "C" void LLVMInitializeRISCVDisassembler() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVDisassembler() {
// Register the disassembler for each target.
TargetRegistry::RegisterMCDisassembler(getTheRISCV32Target(),
createRISCVDisassembler);
@@ -315,7 +314,6 @@ static DecodeStatus decodeRVCInstrRdRs1Rs2(MCInst &Inst, unsigned Insn,
DecodeStatus RISCVDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
ArrayRef<uint8_t> Bytes,
uint64_t Address,
- raw_ostream &OS,
raw_ostream &CS) const {
// TODO: This will need modification when supporting instruction set
// extensions with instructions > 32-bits (up to 176 bits wide).
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp
index cab2bbcb81bc..08b75795ed4b 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp
@@ -9,6 +9,7 @@
#include "MCTargetDesc/RISCVFixupKinds.h"
#include "MCTargetDesc/RISCVMCExpr.h"
#include "MCTargetDesc/RISCVMCTargetDesc.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCFixup.h"
#include "llvm/MC/MCObjectWriter.h"
@@ -54,7 +55,8 @@ unsigned RISCVELFObjectWriter::getRelocType(MCContext &Ctx,
if (IsPCRel) {
switch (Kind) {
default:
- llvm_unreachable("invalid fixup kind!");
+ Ctx.reportError(Fixup.getLoc(), "Unsupported relocation type");
+ return ELF::R_RISCV_NONE;
case FK_Data_4:
case FK_PCRel_4:
return ELF::R_RISCV_32_PCREL;
@@ -87,7 +89,14 @@ unsigned RISCVELFObjectWriter::getRelocType(MCContext &Ctx,
switch (Kind) {
default:
- llvm_unreachable("invalid fixup kind!");
+ Ctx.reportError(Fixup.getLoc(), "Unsupported relocation type");
+ return ELF::R_RISCV_NONE;
+ case FK_Data_1:
+ Ctx.reportError(Fixup.getLoc(), "1-byte data relocations not supported");
+ return ELF::R_RISCV_NONE;
+ case FK_Data_2:
+ Ctx.reportError(Fixup.getLoc(), "2-byte data relocations not supported");
+ return ELF::R_RISCV_NONE;
case FK_Data_4:
if (Expr->getKind() == MCExpr::Target &&
cast<RISCVMCExpr>(Expr)->getKind() == RISCVMCExpr::VK_RISCV_32_PCREL)
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp
index 8b5fe6dd8252..22bb80ae34e2 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp
@@ -63,8 +63,9 @@ bool RISCVInstPrinter::applyTargetSpecificCLOption(StringRef Opt) {
return false;
}
-void RISCVInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
- StringRef Annot, const MCSubtargetInfo &STI) {
+void RISCVInstPrinter::printInst(const MCInst *MI, uint64_t Address,
+ StringRef Annot, const MCSubtargetInfo &STI,
+ raw_ostream &O) {
bool Res = false;
const MCInst *NewMI = MI;
MCInst UncompressedMI;
@@ -73,7 +74,7 @@ void RISCVInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
if (Res)
NewMI = const_cast<MCInst *>(&UncompressedMI);
if (NoAliases || !printAliasInstr(NewMI, STI, O))
- printInstruction(NewMI, STI, O);
+ printInstruction(NewMI, Address, STI, O);
printAnnotation(O, Annot);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h
index 189d72626f3e..aeb2ea636060 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h
@@ -27,8 +27,8 @@ public:
bool applyTargetSpecificCLOption(StringRef Opt) override;
- void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
- const MCSubtargetInfo &STI) override;
+ void printInst(const MCInst *MI, uint64_t Address, StringRef Annot,
+ const MCSubtargetInfo &STI, raw_ostream &O) override;
void printRegName(raw_ostream &O, unsigned RegNo) const override;
void printOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
@@ -43,8 +43,8 @@ public:
const MCSubtargetInfo &STI, raw_ostream &O);
// Autogenerated by tblgen.
- void printInstruction(const MCInst *MI, const MCSubtargetInfo &STI,
- raw_ostream &O);
+ void printInstruction(const MCInst *MI, uint64_t Address,
+ const MCSubtargetInfo &STI, raw_ostream &O);
bool printAliasInstr(const MCInst *MI, const MCSubtargetInfo &STI,
raw_ostream &O);
void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx,
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.cpp
index ae25ec818171..7aa9b5e7d683 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.cpp
@@ -139,7 +139,11 @@ bool RISCVMCExpr::evaluatePCRelLo(MCValue &Res, const MCAsmLayout *Layout,
findAssociatedFragment()->getParent())
return false;
- uint64_t AUIPCOffset = AUIPCSymbol->getOffset();
+ // We must use TargetFixup rather than AUIPCSymbol here. They will almost
+ // always have the same offset, except for the case when AUIPCSymbol is at
+ // the end of a fragment and the fixup comes from offset 0 in the next
+ // fragment.
+ uint64_t AUIPCOffset = TargetFixup->getOffset();
Res = MCValue::get(Target.getSymA(), nullptr,
Target.getConstant() + (Fixup->getOffset() - AUIPCOffset));
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
index 5a4c86e48f1e..c37482be3c2c 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
@@ -51,7 +51,8 @@ static MCRegisterInfo *createRISCVMCRegisterInfo(const Triple &TT) {
}
static MCAsmInfo *createRISCVMCAsmInfo(const MCRegisterInfo &MRI,
- const Triple &TT) {
+ const Triple &TT,
+ const MCTargetOptions &Options) {
MCAsmInfo *MAI = new RISCVMCAsmInfo(TT);
Register SP = MRI.getDwarfRegNum(RISCV::X2, true);
@@ -92,7 +93,7 @@ static MCTargetStreamer *createRISCVAsmTargetStreamer(MCStreamer &S,
return new RISCVTargetAsmStreamer(S, OS);
}
-extern "C" void LLVMInitializeRISCVTargetMC() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTargetMC() {
for (Target *T : {&getTheRISCV32Target(), &getTheRISCV64Target()}) {
TargetRegistry::RegisterMCAsmInfo(*T, createRISCVMCAsmInfo);
TargetRegistry::RegisterMCInstrInfo(*T, createRISCVMCInstrInfo);
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCV.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCV.td
index 46530a8f74a8..82afa13aece3 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCV.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCV.td
@@ -16,45 +16,53 @@ def FeatureStdExtM
: SubtargetFeature<"m", "HasStdExtM", "true",
"'M' (Integer Multiplication and Division)">;
def HasStdExtM : Predicate<"Subtarget->hasStdExtM()">,
- AssemblerPredicate<"FeatureStdExtM">;
+ AssemblerPredicate<"FeatureStdExtM",
+ "'M' (Integer Multiplication and Division)">;
def FeatureStdExtA
: SubtargetFeature<"a", "HasStdExtA", "true",
"'A' (Atomic Instructions)">;
def HasStdExtA : Predicate<"Subtarget->hasStdExtA()">,
- AssemblerPredicate<"FeatureStdExtA">;
+ AssemblerPredicate<"FeatureStdExtA",
+ "'A' (Atomic Instructions)">;
def FeatureStdExtF
: SubtargetFeature<"f", "HasStdExtF", "true",
"'F' (Single-Precision Floating-Point)">;
def HasStdExtF : Predicate<"Subtarget->hasStdExtF()">,
- AssemblerPredicate<"FeatureStdExtF">;
+ AssemblerPredicate<"FeatureStdExtF",
+ "'F' (Single-Precision Floating-Point)">;
def FeatureStdExtD
: SubtargetFeature<"d", "HasStdExtD", "true",
"'D' (Double-Precision Floating-Point)",
[FeatureStdExtF]>;
def HasStdExtD : Predicate<"Subtarget->hasStdExtD()">,
- AssemblerPredicate<"FeatureStdExtD">;
+ AssemblerPredicate<"FeatureStdExtD",
+ "'D' (Double-Precision Floating-Point)">;
def FeatureStdExtC
: SubtargetFeature<"c", "HasStdExtC", "true",
"'C' (Compressed Instructions)">;
def HasStdExtC : Predicate<"Subtarget->hasStdExtC()">,
- AssemblerPredicate<"FeatureStdExtC">;
+ AssemblerPredicate<"FeatureStdExtC",
+ "'C' (Compressed Instructions)">;
def FeatureRVCHints
: SubtargetFeature<"rvc-hints", "EnableRVCHintInstrs", "true",
"Enable RVC Hint Instructions.">;
def HasRVCHints : Predicate<"Subtarget->enableRVCHintInstrs()">,
- AssemblerPredicate<"FeatureRVCHints">;
+ AssemblerPredicate<"FeatureRVCHints",
+ "RVC Hint Instructions">;
def Feature64Bit
: SubtargetFeature<"64bit", "HasRV64", "true", "Implements RV64">;
def IsRV64 : Predicate<"Subtarget->is64Bit()">,
- AssemblerPredicate<"Feature64Bit">;
+ AssemblerPredicate<"Feature64Bit",
+ "RV64I Base Instruction Set">;
def IsRV32 : Predicate<"!Subtarget->is64Bit()">,
- AssemblerPredicate<"!Feature64Bit">;
+ AssemblerPredicate<"!Feature64Bit",
+ "RV32I Base Instruction Set">;
def RV64 : HwMode<"+64bit">;
def RV32 : HwMode<"-64bit">;
@@ -69,6 +77,11 @@ def FeatureRelax
: SubtargetFeature<"relax", "EnableLinkerRelax", "true",
"Enable Linker relaxation.">;
+foreach i = {1-31} in
+ def FeatureReserveX#i :
+ SubtargetFeature<"reserve-x"#i, "UserReservedRegister[RISCV::X"#i#"]",
+ "true", "Reserve X"#i>;
+
//===----------------------------------------------------------------------===//
// Named operands for CSR instructions.
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp
index 57631dcb5115..f4aa28bcc0c1 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp
@@ -16,6 +16,7 @@
#include "MCTargetDesc/RISCVMCExpr.h"
#include "RISCVTargetMachine.h"
#include "TargetInfo/RISCVTargetInfo.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -31,6 +32,9 @@ using namespace llvm;
#define DEBUG_TYPE "asm-printer"
+STATISTIC(RISCVNumInstrsCompressed,
+ "Number of RISC-V Compressed instructions emitted");
+
namespace {
class RISCVAsmPrinter : public AsmPrinter {
public:
@@ -64,6 +68,8 @@ void RISCVAsmPrinter::EmitToStreamer(MCStreamer &S, const MCInst &Inst) {
MCInst CInst;
bool Res = compressInst(CInst, Inst, *TM.getMCSubtargetInfo(),
OutStreamer->getContext());
+ if (Res)
+ ++RISCVNumInstrsCompressed;
AsmPrinter::EmitToStreamer(*OutStreamer, Res ? CInst : Inst);
}
@@ -115,6 +121,14 @@ bool RISCVAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
case MachineOperand::MO_Register:
OS << RISCVInstPrinter::getRegisterName(MO.getReg());
return false;
+ case MachineOperand::MO_GlobalAddress:
+ PrintSymbolOperand(MO, OS);
+ return false;
+ case MachineOperand::MO_BlockAddress: {
+ MCSymbol *Sym = GetBlockAddressSymbol(MO.getBlockAddress());
+ Sym->print(OS, MAI);
+ return false;
+ }
default:
break;
}
@@ -141,7 +155,7 @@ bool RISCVAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
}
// Force static initialization.
-extern "C" void LLVMInitializeRISCVAsmPrinter() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVAsmPrinter() {
RegisterAsmPrinter<RISCVAsmPrinter> X(getTheRISCV32Target());
RegisterAsmPrinter<RISCVAsmPrinter> Y(getTheRISCV64Target());
}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
index da5cd16e750c..84bce0f48562 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
@@ -319,9 +319,9 @@ static void doMaskedAtomicBinOpExpansion(
default:
llvm_unreachable("Unexpected AtomicRMW BinOp");
case AtomicRMWInst::Xchg:
- BuildMI(LoopMBB, DL, TII->get(RISCV::ADD), ScratchReg)
- .addReg(RISCV::X0)
- .addReg(IncrReg);
+ BuildMI(LoopMBB, DL, TII->get(RISCV::ADDI), ScratchReg)
+ .addReg(IncrReg)
+ .addImm(0);
break;
case AtomicRMWInst::Add:
BuildMI(LoopMBB, DL, TII->get(RISCV::ADD), ScratchReg)
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
index 2ef388884897..c60fc3fc6b42 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
@@ -18,6 +18,7 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/MC/MCDwarf.h"
using namespace llvm;
@@ -31,6 +32,13 @@ bool RISCVFrameLowering::hasFP(const MachineFunction &MF) const {
MFI.isFrameAddressTaken();
}
+bool RISCVFrameLowering::hasBP(const MachineFunction &MF) const {
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ const TargetRegisterInfo *TRI = STI.getRegisterInfo();
+
+ return MFI.hasVarSizedObjects() && TRI->needsStackRealignment(MF);
+}
+
// Determines the size of the frame and maximum call frame size.
void RISCVFrameLowering::determineFrameLayout(MachineFunction &MF) const {
MachineFrameInfo &MFI = MF.getFrameInfo();
@@ -99,22 +107,15 @@ static Register getSPReg(const RISCVSubtarget &STI) { return RISCV::X2; }
void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
- assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
-
MachineFrameInfo &MFI = MF.getFrameInfo();
auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
const RISCVRegisterInfo *RI = STI.getRegisterInfo();
const RISCVInstrInfo *TII = STI.getInstrInfo();
MachineBasicBlock::iterator MBBI = MBB.begin();
- if (RI->needsStackRealignment(MF) && MFI.hasVarSizedObjects()) {
- report_fatal_error(
- "RISC-V backend can't currently handle functions that need stack "
- "realignment and have variable sized objects");
- }
-
Register FPReg = getFPReg(STI);
Register SPReg = getSPReg(STI);
+ Register BPReg = RISCVABI::getBPReg();
// Debug location must be unknown since the first debug location is used
// to determine the end of the prologue.
@@ -131,6 +132,12 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
if (StackSize == 0 && !MFI.adjustsStack())
return;
+ // If the stack pointer has been marked as reserved, then produce an error if
+ // the frame requires stack allocation
+ if (STI.isRegisterReservedByUser(SPReg))
+ MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
+ MF.getFunction(), "Stack pointer required, but has been reserved."});
+
uint64_t FirstSPAdjustAmount = getFirstSPAdjustAmount(MF);
// Split the SP adjustment to reduce the offsets of callee saved spill.
if (FirstSPAdjustAmount)
@@ -167,6 +174,10 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
// Generate new FP.
if (hasFP(MF)) {
+ if (STI.isRegisterReservedByUser(FPReg))
+ MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
+ MF.getFunction(), "Frame pointer required, but has been reserved."});
+
adjustReg(MBB, MBBI, DL, FPReg, SPReg,
StackSize - RVFI->getVarArgsSaveSize(), MachineInstr::FrameSetup);
@@ -184,11 +195,16 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
"SecondSPAdjustAmount should be greater than zero");
adjustReg(MBB, MBBI, DL, SPReg, SPReg, -SecondSPAdjustAmount,
MachineInstr::FrameSetup);
- // Emit ".cfi_def_cfa_offset StackSize"
- unsigned CFIIndex = MF.addFrameInst(
- MCCFIInstruction::createDefCfaOffset(nullptr, -MFI.getStackSize()));
- BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
+
+ // If we are using a frame-pointer, and thus emitted ".cfi_def_cfa fp, 0",
+ // don't emit an sp-based .cfi_def_cfa_offset
+ if (!hasFP(MF)) {
+ // Emit ".cfi_def_cfa_offset StackSize"
+ unsigned CFIIndex = MF.addFrameInst(
+ MCCFIInstruction::createDefCfaOffset(nullptr, -MFI.getStackSize()));
+ BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+ }
}
if (hasFP(MF)) {
@@ -213,20 +229,43 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
.addReg(VR)
.addImm(ShiftAmount);
}
+ // FP will be used to restore the frame in the epilogue, so we need
+ // another base register BP to record SP after re-alignment. SP will
+ // track the current stack after allocating variable sized objects.
+ if (hasBP(MF)) {
+ // move BP, SP
+ BuildMI(MBB, MBBI, DL, TII->get(RISCV::ADDI), BPReg)
+ .addReg(SPReg)
+ .addImm(0);
+ }
}
}
}
void RISCVFrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
- MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
const RISCVRegisterInfo *RI = STI.getRegisterInfo();
MachineFrameInfo &MFI = MF.getFrameInfo();
auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
- DebugLoc DL = MBBI->getDebugLoc();
Register FPReg = getFPReg(STI);
Register SPReg = getSPReg(STI);
+ // Get the insert location for the epilogue. If there were no terminators in
+ // the block, get the last instruction.
+ MachineBasicBlock::iterator MBBI = MBB.end();
+ DebugLoc DL;
+ if (!MBB.empty()) {
+ MBBI = MBB.getFirstTerminator();
+ if (MBBI == MBB.end())
+ MBBI = MBB.getLastNonDebugInstr();
+ DL = MBBI->getDebugLoc();
+
+ // If this is not a terminator, the actual insert location should be after the
+ // last instruction.
+ if (!MBBI->isTerminator())
+ MBBI = std::next(MBBI);
+ }
+
// Skip to before the restores of callee-saved registers
// FIXME: assumes exactly one instruction is used to restore each
// callee-saved register.
@@ -252,14 +291,6 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF,
adjustReg(MBB, LastFrameDestroy, DL, SPReg, SPReg, SecondSPAdjustAmount,
MachineInstr::FrameDestroy);
-
- // Emit ".cfi_def_cfa_offset FirstSPAdjustAmount"
- unsigned CFIIndex =
- MF.addFrameInst(
- MCCFIInstruction::createDefCfaOffset(nullptr,
- -FirstSPAdjustAmount));
- BuildMI(MBB, LastFrameDestroy, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
}
if (FirstSPAdjustAmount)
@@ -300,12 +331,14 @@ int RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF,
Offset += FirstSPAdjustAmount;
else
Offset += MF.getFrameInfo().getStackSize();
- } else if (RI->needsStackRealignment(MF)) {
- assert(!MFI.hasVarSizedObjects() &&
- "Unexpected combination of stack realignment and varsized objects");
+ } else if (RI->needsStackRealignment(MF) && !MFI.isFixedObjectIndex(FI)) {
// If the stack was realigned, the frame pointer is set in order to allow
- // SP to be restored, but we still access stack objects using SP.
- FrameReg = RISCV::X2;
+ // SP to be restored, so we need another base register to record the stack
+ // after realignment.
+ if (hasBP(MF))
+ FrameReg = RISCVABI::getBPReg();
+ else
+ FrameReg = RISCV::X2;
Offset += MF.getFrameInfo().getStackSize();
} else {
FrameReg = RI->getFrameRegister(MF);
@@ -327,6 +360,9 @@ void RISCVFrameLowering::determineCalleeSaves(MachineFunction &MF,
SavedRegs.set(RISCV::X1);
SavedRegs.set(RISCV::X8);
}
+ // Mark BP as used if function has dedicated base pointer.
+ if (hasBP(MF))
+ SavedRegs.set(RISCVABI::getBPReg());
// If interrupt is enabled and there are calls in the handler,
// unconditionally save all Caller-saved registers and
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFrameLowering.h b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFrameLowering.h
index f4a5949773d9..3a16cf93cf10 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFrameLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFrameLowering.h
@@ -40,6 +40,8 @@ public:
bool hasFP(const MachineFunction &MF) const override;
+ bool hasBP(const MachineFunction &MF) const;
+
bool hasReservedCallFrame(const MachineFunction &MF) const override;
MachineBasicBlock::iterator
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index 1a12d9177d2a..f66d06c20e37 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -27,7 +27,7 @@ using namespace llvm;
// SelectionDAG operations.
namespace {
class RISCVDAGToDAGISel final : public SelectionDAGISel {
- const RISCVSubtarget *Subtarget;
+ const RISCVSubtarget *Subtarget = nullptr;
public:
explicit RISCVDAGToDAGISel(RISCVTargetMachine &TargetMachine)
@@ -173,7 +173,6 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
bool RISCVDAGToDAGISel::SelectInlineAsmMemoryOperand(
const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
switch (ConstraintID) {
- case InlineAsm::Constraint_i:
case InlineAsm::Constraint_m:
// We just support simple memory operands that have a single address
// operand and need no special handling.
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 5ec31721b29c..5a2cffbc824c 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -30,6 +30,7 @@
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/DiagnosticPrinter.h"
+#include "llvm/IR/IntrinsicsRISCV.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
@@ -50,6 +51,20 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
RISCVABI::ABI ABI = Subtarget.getTargetABI();
assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
+ if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) &&
+ !Subtarget.hasStdExtF()) {
+ errs() << "Hard-float 'f' ABI can't be used for a target that "
+ "doesn't support the F instruction set extension (ignoring "
+ "target-abi)\n";
+ ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
+ } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) &&
+ !Subtarget.hasStdExtD()) {
+ errs() << "Hard-float 'd' ABI can't be used for a target that "
+ "doesn't support the D instruction set extension (ignoring "
+ "target-abi)\n";
+ ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
+ }
+
switch (ABI) {
default:
report_fatal_error("Don't know how to lower this ABI");
@@ -146,7 +161,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
ISD::SETGE, ISD::SETNE};
ISD::NodeType FPOpToExtend[] = {
- ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM};
+ ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM, ISD::FP16_TO_FP,
+ ISD::FP_TO_FP16};
if (Subtarget.hasStdExtF()) {
setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
@@ -158,6 +174,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::BR_CC, MVT::f32, Expand);
for (auto Op : FPOpToExtend)
setOperationAction(Op, MVT::f32, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
+ setTruncStoreAction(MVT::f32, MVT::f16, Expand);
}
if (Subtarget.hasStdExtF() && Subtarget.is64Bit())
@@ -175,6 +193,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
for (auto Op : FPOpToExtend)
setOperationAction(Op, MVT::f64, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
+ setTruncStoreAction(MVT::f64, MVT::f16, Expand);
}
setOperationAction(ISD::GlobalAddress, XLenVT, Custom);
@@ -188,6 +208,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::READCYCLECOUNTER, MVT::i64,
Subtarget.is64Bit() ? Legal : Custom);
+ setOperationAction(ISD::TRAP, MVT::Other, Legal);
+ setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
+
if (Subtarget.hasStdExtA()) {
setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
setMinCmpXchgSizeInBits(32);
@@ -2007,10 +2030,6 @@ bool RISCVTargetLowering::isEligibleForTailCallOptimization(
auto &Caller = MF.getFunction();
auto CallerCC = Caller.getCallingConv();
- // Do not tail call opt functions with "disable-tail-calls" attribute.
- if (Caller.getFnAttribute("disable-tail-calls").getValueAsString() == "true")
- return false;
-
// Exception-handling functions need a special set of instructions to
// indicate a return to the hardware. Tail-calling another function would
// probably break this.
@@ -2244,6 +2263,16 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
Glue = Chain.getValue(1);
}
+ // Validate that none of the argument registers have been marked as
+ // reserved, if so report an error. Do the same for the return address if this
+ // is not a tailcall.
+ validateCCReservedRegs(RegsToPass, MF);
+ if (!IsTailCall &&
+ MF.getSubtarget<RISCVSubtarget>().isRegisterReservedByUser(RISCV::X1))
+ MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
+ MF.getFunction(),
+ "Return address register required, but has been reserved."});
+
// If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
// TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
// split it and then direct call can be matched by PseudoCALL.
@@ -2359,6 +2388,9 @@ RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SDLoc &DL, SelectionDAG &DAG) const {
+ const MachineFunction &MF = DAG.getMachineFunction();
+ const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
+
// Stores the assignment of the return value to a location.
SmallVector<CCValAssign, 16> RVLocs;
@@ -2388,6 +2420,13 @@ RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
Register RegLo = VA.getLocReg();
assert(RegLo < RISCV::X31 && "Invalid register pair");
Register RegHi = RegLo + 1;
+
+ if (STI.isRegisterReservedByUser(RegLo) ||
+ STI.isRegisterReservedByUser(RegHi))
+ MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
+ MF.getFunction(),
+ "Return value register required, but has been reserved."});
+
Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
Glue = Chain.getValue(1);
RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
@@ -2399,6 +2438,11 @@ RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
Val = convertValVTToLocVT(DAG, Val, VA, DL);
Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
+ if (STI.isRegisterReservedByUser(VA.getLocReg()))
+ MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
+ MF.getFunction(),
+ "Return value register required, but has been reserved."});
+
// Guarantee that all emitted copies are stuck together.
Glue = Chain.getValue(1);
RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
@@ -2437,6 +2481,19 @@ RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
return DAG.getNode(RISCVISD::RET_FLAG, DL, MVT::Other, RetOps);
}
+void RISCVTargetLowering::validateCCReservedRegs(
+ const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs,
+ MachineFunction &MF) const {
+ const Function &F = MF.getFunction();
+ const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
+
+ if (std::any_of(std::begin(Regs), std::end(Regs), [&STI](auto Reg) {
+ return STI.isRegisterReservedByUser(Reg.first);
+ }))
+ F.getContext().diagnose(DiagnosticInfoUnsupported{
+ F, "Argument register required, but has been reserved."});
+}
+
const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
switch ((RISCVISD::NodeType)Opcode) {
case RISCVISD::FIRST_NUMBER:
@@ -2845,3 +2902,22 @@ bool RISCVTargetLowering::shouldExtendTypeInLibCall(EVT Type) const {
return true;
}
+
+#define GET_REGISTER_MATCHER
+#include "RISCVGenAsmMatcher.inc"
+
+Register
+RISCVTargetLowering::getRegisterByName(const char *RegName, LLT VT,
+ const MachineFunction &MF) const {
+ Register Reg = MatchRegisterAltName(RegName);
+ if (Reg == RISCV::NoRegister)
+ Reg = MatchRegisterName(RegName);
+ if (Reg == RISCV::NoRegister)
+ report_fatal_error(
+ Twine("Invalid register name \"" + StringRef(RegName) + "\"."));
+ BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
+ if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg))
+ report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
+ StringRef(RegName) + "\"."));
+ return Reg;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.h b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 18fc7350bbbf..b2ad75d67024 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -147,6 +147,13 @@ public:
bool shouldExtendTypeInLibCall(EVT Type) const override;
+ /// Returns the register with the specified architectural or ABI name. This
+ /// method is necessary to lower the llvm.read_register.* and
+ /// llvm.write_register.* intrinsics. Allocatable registers must be reserved
+ /// with the clang -ffixed-xX flag for access to be allowed.
+ Register getRegisterByName(const char *RegName, LLT VT,
+ const MachineFunction &MF) const override;
+
private:
void analyzeInputArgs(MachineFunction &MF, CCState &CCInfo,
const SmallVectorImpl<ISD::InputArg> &Ins,
@@ -210,6 +217,12 @@ private:
Value *AlignedAddr, Value *CmpVal,
Value *NewVal, Value *Mask,
AtomicOrdering Ord) const override;
+
+ /// Generate error diagnostics if any register used by CC has been marked
+ /// reserved.
+ void validateCCReservedRegs(
+ const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs,
+ MachineFunction &MF) const;
};
}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 084839299530..3b416ce3d3f4 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -24,6 +24,9 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
+#define GEN_CHECK_COMPRESS_INSTR
+#include "RISCVGenCompressInstEmitter.inc"
+
#define GET_INSTRINFO_CTOR_DTOR
#include "RISCVGenInstrInfo.inc"
@@ -84,8 +87,8 @@ unsigned RISCVInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
void RISCVInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
- const DebugLoc &DL, unsigned DstReg,
- unsigned SrcReg, bool KillSrc) const {
+ const DebugLoc &DL, MCRegister DstReg,
+ MCRegister SrcReg, bool KillSrc) const {
if (RISCV::GPRRegClass.contains(DstReg, SrcReg)) {
BuildMI(MBB, MBBI, DL, get(RISCV::ADDI), DstReg)
.addReg(SrcReg, getKillRegState(KillSrc))
@@ -451,7 +454,18 @@ unsigned RISCVInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
unsigned Opcode = MI.getOpcode();
switch (Opcode) {
- default: { return get(Opcode).getSize(); }
+ default: {
+ if (MI.getParent() && MI.getParent()->getParent()) {
+ const auto MF = MI.getMF();
+ const auto &TM = static_cast<const RISCVTargetMachine &>(MF->getTarget());
+ const MCRegisterInfo &MRI = *TM.getMCRegisterInfo();
+ const MCSubtargetInfo &STI = *TM.getMCSubtargetInfo();
+ const RISCVSubtarget &ST = MF->getSubtarget<RISCVSubtarget>();
+ if (isCompressibleInst(MI, &ST, MRI, STI))
+ return 2;
+ }
+ return get(Opcode).getSize();
+ }
case TargetOpcode::EH_LABEL:
case TargetOpcode::IMPLICIT_DEF:
case TargetOpcode::KILL:
@@ -542,3 +556,242 @@ bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI,
return true;
}
+
+// Return true if get the base operand, byte offset of an instruction and the
+// memory width. Width is the size of memory that is being loaded/stored.
+bool RISCVInstrInfo::getMemOperandWithOffsetWidth(
+ const MachineInstr &LdSt, const MachineOperand *&BaseReg, int64_t &Offset,
+ unsigned &Width, const TargetRegisterInfo *TRI) const {
+ if (!LdSt.mayLoadOrStore())
+ return false;
+
+ // Here we assume the standard RISC-V ISA, which uses a base+offset
+ // addressing mode. You'll need to relax these conditions to support custom
+ // load/stores instructions.
+ if (LdSt.getNumExplicitOperands() != 3)
+ return false;
+ if (!LdSt.getOperand(1).isReg() || !LdSt.getOperand(2).isImm())
+ return false;
+
+ if (!LdSt.hasOneMemOperand())
+ return false;
+
+ Width = (*LdSt.memoperands_begin())->getSize();
+ BaseReg = &LdSt.getOperand(1);
+ Offset = LdSt.getOperand(2).getImm();
+ return true;
+}
+
+bool RISCVInstrInfo::areMemAccessesTriviallyDisjoint(
+ const MachineInstr &MIa, const MachineInstr &MIb) const {
+ assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
+ assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
+
+ if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects() ||
+ MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef())
+ return false;
+
+ // Retrieve the base register, offset from the base register and width. Width
+ // is the size of memory that is being loaded/stored (e.g. 1, 2, 4). If
+ // base registers are identical, and the offset of a lower memory access +
+ // the width doesn't overlap the offset of a higher memory access,
+ // then the memory accesses are different.
+ const TargetRegisterInfo *TRI = STI.getRegisterInfo();
+ const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
+ int64_t OffsetA = 0, OffsetB = 0;
+ unsigned int WidthA = 0, WidthB = 0;
+ if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, WidthA, TRI) &&
+ getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, WidthB, TRI)) {
+ if (BaseOpA->isIdenticalTo(*BaseOpB)) {
+ int LowOffset = std::min(OffsetA, OffsetB);
+ int HighOffset = std::max(OffsetA, OffsetB);
+ int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
+ if (LowOffset + LowWidth <= HighOffset)
+ return true;
+ }
+ }
+ return false;
+}
+
+std::pair<unsigned, unsigned>
+RISCVInstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
+ const unsigned Mask = RISCVII::MO_DIRECT_FLAG_MASK;
+ return std::make_pair(TF & Mask, TF & ~Mask);
+}
+
+ArrayRef<std::pair<unsigned, const char *>>
+RISCVInstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
+ using namespace RISCVII;
+ static const std::pair<unsigned, const char *> TargetFlags[] = {
+ {MO_CALL, "riscv-call"},
+ {MO_PLT, "riscv-plt"},
+ {MO_LO, "riscv-lo"},
+ {MO_HI, "riscv-hi"},
+ {MO_PCREL_LO, "riscv-pcrel-lo"},
+ {MO_PCREL_HI, "riscv-pcrel-hi"},
+ {MO_GOT_HI, "riscv-got-hi"},
+ {MO_TPREL_LO, "riscv-tprel-lo"},
+ {MO_TPREL_HI, "riscv-tprel-hi"},
+ {MO_TPREL_ADD, "riscv-tprel-add"},
+ {MO_TLS_GOT_HI, "riscv-tls-got-hi"},
+ {MO_TLS_GD_HI, "riscv-tls-gd-hi"}};
+ return makeArrayRef(TargetFlags);
+}
+bool RISCVInstrInfo::isFunctionSafeToOutlineFrom(
+ MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {
+ const Function &F = MF.getFunction();
+
+ // Can F be deduplicated by the linker? If it can, don't outline from it.
+ if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage())
+ return false;
+
+ // Don't outline from functions with section markings; the program could
+ // expect that all the code is in the named section.
+ if (F.hasSection())
+ return false;
+
+ // It's safe to outline from MF.
+ return true;
+}
+
+bool RISCVInstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB,
+ unsigned &Flags) const {
+ // More accurate safety checking is done in getOutliningCandidateInfo.
+ return true;
+}
+
+// Enum values indicating how an outlined call should be constructed.
+enum MachineOutlinerConstructionID {
+ MachineOutlinerDefault
+};
+
+outliner::OutlinedFunction RISCVInstrInfo::getOutliningCandidateInfo(
+ std::vector<outliner::Candidate> &RepeatedSequenceLocs) const {
+
+ // First we need to filter out candidates where the X5 register (IE t0) can't
+ // be used to setup the function call.
+ auto CannotInsertCall = [](outliner::Candidate &C) {
+ const TargetRegisterInfo *TRI = C.getMF()->getSubtarget().getRegisterInfo();
+
+ C.initLRU(*TRI);
+ LiveRegUnits LRU = C.LRU;
+ return !LRU.available(RISCV::X5);
+ };
+
+ RepeatedSequenceLocs.erase(std::remove_if(RepeatedSequenceLocs.begin(),
+ RepeatedSequenceLocs.end(),
+ CannotInsertCall),
+ RepeatedSequenceLocs.end());
+
+ // If the sequence doesn't have enough candidates left, then we're done.
+ if (RepeatedSequenceLocs.size() < 2)
+ return outliner::OutlinedFunction();
+
+ unsigned SequenceSize = 0;
+
+ auto I = RepeatedSequenceLocs[0].front();
+ auto E = std::next(RepeatedSequenceLocs[0].back());
+ for (; I != E; ++I)
+ SequenceSize += getInstSizeInBytes(*I);
+
+ // call t0, function = 8 bytes.
+ unsigned CallOverhead = 8;
+ for (auto &C : RepeatedSequenceLocs)
+ C.setCallInfo(MachineOutlinerDefault, CallOverhead);
+
+ // jr t0 = 4 bytes, 2 bytes if compressed instructions are enabled.
+ unsigned FrameOverhead = 4;
+ if (RepeatedSequenceLocs[0].getMF()->getSubtarget()
+ .getFeatureBits()[RISCV::FeatureStdExtC])
+ FrameOverhead = 2;
+
+ return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize,
+ FrameOverhead, MachineOutlinerDefault);
+}
+
+outliner::InstrType
+RISCVInstrInfo::getOutliningType(MachineBasicBlock::iterator &MBBI,
+ unsigned Flags) const {
+ MachineInstr &MI = *MBBI;
+ MachineBasicBlock *MBB = MI.getParent();
+ const TargetRegisterInfo *TRI =
+ MBB->getParent()->getSubtarget().getRegisterInfo();
+
+ // Positions generally can't safely be outlined.
+ if (MI.isPosition()) {
+ // We can manually strip out CFI instructions later.
+ if (MI.isCFIInstruction())
+ return outliner::InstrType::Invisible;
+
+ return outliner::InstrType::Illegal;
+ }
+
+ // Don't trust the user to write safe inline assembly.
+ if (MI.isInlineAsm())
+ return outliner::InstrType::Illegal;
+
+ // We can't outline branches to other basic blocks.
+ if (MI.isTerminator() && !MBB->succ_empty())
+ return outliner::InstrType::Illegal;
+
+ // We need support for tail calls to outlined functions before return
+ // statements can be allowed.
+ if (MI.isReturn())
+ return outliner::InstrType::Illegal;
+
+ // Don't allow modifying the X5 register which we use for return addresses for
+ // these outlined functions.
+ if (MI.modifiesRegister(RISCV::X5, TRI) ||
+ MI.getDesc().hasImplicitDefOfPhysReg(RISCV::X5))
+ return outliner::InstrType::Illegal;
+
+ // Make sure the operands don't reference something unsafe.
+ for (const auto &MO : MI.operands())
+ if (MO.isMBB() || MO.isBlockAddress() || MO.isCPI())
+ return outliner::InstrType::Illegal;
+
+ // Don't allow instructions which won't be materialized to impact outlining
+ // analysis.
+ if (MI.isMetaInstruction())
+ return outliner::InstrType::Invisible;
+
+ return outliner::InstrType::Legal;
+}
+
+void RISCVInstrInfo::buildOutlinedFrame(
+ MachineBasicBlock &MBB, MachineFunction &MF,
+ const outliner::OutlinedFunction &OF) const {
+
+ // Strip out any CFI instructions
+ bool Changed = true;
+ while (Changed) {
+ Changed = false;
+ auto I = MBB.begin();
+ auto E = MBB.end();
+ for (; I != E; ++I) {
+ if (I->isCFIInstruction()) {
+ I->removeFromParent();
+ Changed = true;
+ break;
+ }
+ }
+ }
+
+ // Add in a return instruction to the end of the outlined frame.
+ MBB.insert(MBB.end(), BuildMI(MF, DebugLoc(), get(RISCV::JALR))
+ .addReg(RISCV::X0, RegState::Define)
+ .addReg(RISCV::X5)
+ .addImm(0));
+}
+
+MachineBasicBlock::iterator RISCVInstrInfo::insertOutlinedCall(
+ Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It,
+ MachineFunction &MF, const outliner::Candidate &C) const {
+
+ // Add in a call instruction to the outlined function at the given location.
+ It = MBB.insert(It,
+ BuildMI(MF, DebugLoc(), get(RISCV::PseudoCALLReg), RISCV::X5)
+ .addGlobalAddress(M.getNamedValue(MF.getName()), 0,
+ RISCVII::MO_CALL));
+ return It;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.h
index d3ae04aefe04..625b61875133 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.h
@@ -34,7 +34,7 @@ public:
int &FrameIndex) const override;
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
- const DebugLoc &DL, unsigned DstReg, unsigned SrcReg,
+ const DebugLoc &DL, MCRegister DstReg, MCRegister SrcReg,
bool KillSrc) const override;
void storeRegToStackSlot(MachineBasicBlock &MBB,
@@ -86,6 +86,50 @@ public:
bool verifyInstruction(const MachineInstr &MI,
StringRef &ErrInfo) const override;
+ bool getMemOperandWithOffsetWidth(const MachineInstr &LdSt,
+ const MachineOperand *&BaseOp,
+ int64_t &Offset, unsigned &Width,
+ const TargetRegisterInfo *TRI) const;
+
+ bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa,
+ const MachineInstr &MIb) const override;
+
+
+ std::pair<unsigned, unsigned>
+ decomposeMachineOperandsTargetFlags(unsigned TF) const override;
+
+ ArrayRef<std::pair<unsigned, const char *>>
+ getSerializableDirectMachineOperandTargetFlags() const override;
+
+ // Return true if the function can safely be outlined from.
+ virtual bool
+ isFunctionSafeToOutlineFrom(MachineFunction &MF,
+ bool OutlineFromLinkOnceODRs) const override;
+
+ // Return true if MBB is safe to outline from, and return any target-specific
+ // information in Flags.
+ virtual bool isMBBSafeToOutlineFrom(MachineBasicBlock &MBB,
+ unsigned &Flags) const override;
+
+ // Calculate target-specific information for a set of outlining candidates.
+ outliner::OutlinedFunction getOutliningCandidateInfo(
+ std::vector<outliner::Candidate> &RepeatedSequenceLocs) const override;
+
+ // Return if/how a given MachineInstr should be outlined.
+ virtual outliner::InstrType
+ getOutliningType(MachineBasicBlock::iterator &MBBI,
+ unsigned Flags) const override;
+
+ // Insert a custom frame for outlined functions.
+ virtual void
+ buildOutlinedFrame(MachineBasicBlock &MBB, MachineFunction &MF,
+ const outliner::OutlinedFunction &OF) const override;
+
+ // Insert a call to an outlined function into a given basic block.
+ virtual MachineBasicBlock::iterator
+ insertOutlinedCall(Module &M, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &It, MachineFunction &MF,
+ const outliner::Candidate &C) const override;
protected:
const RISCVSubtarget &STI;
};
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.td
index db2ecc49d14e..8e9ad4965583 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -636,9 +636,6 @@ def : InstAlias<"jr $rs, $offset", (JALR X0, GPR:$rs, simm12:$offset
def : InstAlias<"jalr $rs, $offset", (JALR X1, GPR:$rs, simm12:$offset), 0>;
def : InstAlias<"jalr $rd, $rs, $offset", (JALR GPR:$rd, GPR:$rs, simm12:$offset), 0>;
-// TODO call
-// TODO tail
-
def : InstAlias<"fence", (FENCE 0xF, 0xF)>; // 0xF == iorw
def : InstAlias<"rdinstret $rd", (CSRRS GPR:$rd, INSTRET.Encoding, X0)>;
@@ -901,7 +898,7 @@ def : Pat<(brind GPR:$rs1), (PseudoBRIND GPR:$rs1, 0)>;
def : Pat<(brind (add GPR:$rs1, simm12:$imm12)),
(PseudoBRIND GPR:$rs1, simm12:$imm12)>;
-// PsuedoCALLReg is a generic pseudo instruction for calls which will eventually
+// PseudoCALLReg is a generic pseudo instruction for calls which will eventually
// expand to auipc and jalr while encoding, with any given register used as the
// destination.
// Define AsmString to print "call" when compile with -S flag.
@@ -1022,13 +1019,13 @@ defm : StPat<store, SW, GPR>, Requires<[IsRV32]>;
// Manual: Volume I.
// fence acquire -> fence r, rw
-def : Pat<(atomic_fence (XLenVT 4), (imm)), (FENCE 0b10, 0b11)>;
+def : Pat<(atomic_fence (XLenVT 4), (timm)), (FENCE 0b10, 0b11)>;
// fence release -> fence rw, w
-def : Pat<(atomic_fence (XLenVT 5), (imm)), (FENCE 0b11, 0b1)>;
+def : Pat<(atomic_fence (XLenVT 5), (timm)), (FENCE 0b11, 0b1)>;
// fence acq_rel -> fence.tso
-def : Pat<(atomic_fence (XLenVT 6), (imm)), (FENCE_TSO)>;
+def : Pat<(atomic_fence (XLenVT 6), (timm)), (FENCE_TSO)>;
// fence seq_cst -> fence rw, rw
-def : Pat<(atomic_fence (XLenVT 7), (imm)), (FENCE 0b11, 0b11)>;
+def : Pat<(atomic_fence (XLenVT 7), (timm)), (FENCE 0b11, 0b11)>;
// Lowering for atomic load and store is defined in RISCVInstrInfoA.td.
// Although these are lowered to fence+load/store instructions defined in the
@@ -1097,6 +1094,16 @@ let Predicates = [IsRV32], usesCustomInserter = 1, hasSideEffects = 0,
mayLoad = 0, mayStore = 0, hasNoSchedulingInfo = 1 in
def ReadCycleWide : Pseudo<(outs GPR:$lo, GPR:$hi), (ins), [], "", "">;
+/// traps
+
+// We lower `trap` to `unimp`, as this causes a hard exception on nearly all
+// systems.
+def : Pat<(trap), (UNIMP)>;
+
+// We lower `debugtrap` to `ebreak`, as this will get the attention of the
+// debugger if possible.
+def : Pat<(debugtrap), (EBREAK)>;
+
//===----------------------------------------------------------------------===//
// Standard extensions
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoA.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
index 38ba3f9fb24e..7321f4bd9d2f 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
@@ -20,7 +20,7 @@
def AtomicMemOpOperand : AsmOperandClass {
let Name = "AtomicMemOpOperand";
let RenderMethod = "addRegOperands";
- let PredicateMethod = "isReg";
+ let PredicateMethod = "isGPR";
let ParserMethod = "parseAtomicMemOp";
}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
index 66557687c0b6..1d41994ef1e3 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
@@ -66,21 +66,35 @@ RISCVRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
}
BitVector RISCVRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
- const TargetFrameLowering *TFI = getFrameLowering(MF);
+ const RISCVFrameLowering *TFI = getFrameLowering(MF);
BitVector Reserved(getNumRegs());
+ // Mark any registers requested to be reserved as such
+ for (size_t Reg = 0; Reg < getNumRegs(); Reg++) {
+ if (MF.getSubtarget<RISCVSubtarget>().isRegisterReservedByUser(Reg))
+ markSuperRegs(Reserved, Reg);
+ }
+
// Use markSuperRegs to ensure any register aliases are also reserved
markSuperRegs(Reserved, RISCV::X0); // zero
- markSuperRegs(Reserved, RISCV::X1); // ra
markSuperRegs(Reserved, RISCV::X2); // sp
markSuperRegs(Reserved, RISCV::X3); // gp
markSuperRegs(Reserved, RISCV::X4); // tp
if (TFI->hasFP(MF))
markSuperRegs(Reserved, RISCV::X8); // fp
+ // Reserve the base register if we need to realign the stack and allocate
+ // variable-sized objects at runtime.
+ if (TFI->hasBP(MF))
+ markSuperRegs(Reserved, RISCVABI::getBPReg()); // bp
assert(checkAllSuperRegsMarked(Reserved));
return Reserved;
}
+bool RISCVRegisterInfo::isAsmClobberable(const MachineFunction &MF,
+ unsigned PhysReg) const {
+ return !MF.getSubtarget<RISCVSubtarget>().isRegisterReservedByUser(PhysReg);
+}
+
bool RISCVRegisterInfo::isConstantPhysReg(unsigned PhysReg) const {
return PhysReg == RISCV::X0;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.h b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.h
index 56a50fe6ddc0..30b639517fde 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.h
@@ -30,6 +30,8 @@ struct RISCVRegisterInfo : public RISCVGenRegisterInfo {
const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
BitVector getReservedRegs(const MachineFunction &MF) const override;
+ bool isAsmClobberable(const MachineFunction &MF,
+ unsigned PhysReg) const override;
bool isConstantPhysReg(unsigned PhysReg) const override;
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
index f114c6ac1925..83e7e2d52cc1 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
@@ -50,6 +50,7 @@ RISCVSubtarget &RISCVSubtarget::initializeSubtargetDependencies(
RISCVSubtarget::RISCVSubtarget(const Triple &TT, StringRef CPU, StringRef FS,
StringRef ABIName, const TargetMachine &TM)
: RISCVGenSubtargetInfo(TT, CPU, FS),
+ UserReservedRegister(RISCV::NUM_TARGET_REGS),
FrameLowering(initializeSubtargetDependencies(TT, CPU, FS, ABIName)),
InstrInfo(*this), RegInfo(getHwMode()), TLInfo(TM, *this) {
CallLoweringInfo.reset(new RISCVCallLowering(*getTargetLowering()));
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.h b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.h
index 7d0373a5253a..605d4abcc9ae 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.h
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.h
@@ -46,6 +46,7 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
unsigned XLen = 32;
MVT XLenVT = MVT::i32;
RISCVABI::ABI TargetABI = RISCVABI::ABI_Unknown;
+ BitVector UserReservedRegister;
RISCVFrameLowering FrameLowering;
RISCVInstrInfo InstrInfo;
RISCVRegisterInfo RegInfo;
@@ -93,6 +94,10 @@ public:
MVT getXLenVT() const { return XLenVT; }
unsigned getXLen() const { return XLen; }
RISCVABI::ABI getTargetABI() const { return TargetABI; }
+ bool isRegisterReservedByUser(Register i) const {
+ assert(i < RISCV::NUM_TARGET_REGS && "Register out of range");
+ return UserReservedRegister[i];
+ }
protected:
// GlobalISel related APIs.
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index 5ffc6eda6bd7..2bb26988c7da 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -25,12 +25,13 @@
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetOptions.h"
using namespace llvm;
-extern "C" void LLVMInitializeRISCVTarget() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() {
RegisterTargetMachine<RISCVTargetMachine> X(getTheRISCV32Target());
RegisterTargetMachine<RISCVTargetMachine> Y(getTheRISCV64Target());
auto PR = PassRegistry::getPassRegistry();
@@ -63,9 +64,35 @@ RISCVTargetMachine::RISCVTargetMachine(const Target &T, const Triple &TT,
: LLVMTargetMachine(T, computeDataLayout(TT), TT, CPU, FS, Options,
getEffectiveRelocModel(TT, RM),
getEffectiveCodeModel(CM, CodeModel::Small), OL),
- TLOF(std::make_unique<RISCVELFTargetObjectFile>()),
- Subtarget(TT, CPU, FS, Options.MCOptions.getABIName(), *this) {
+ TLOF(std::make_unique<RISCVELFTargetObjectFile>()) {
initAsmInfo();
+
+ // RISC-V supports the MachineOutliner.
+ setMachineOutliner(true);
+}
+
+const RISCVSubtarget *
+RISCVTargetMachine::getSubtargetImpl(const Function &F) const {
+ Attribute CPUAttr = F.getFnAttribute("target-cpu");
+ Attribute FSAttr = F.getFnAttribute("target-features");
+
+ std::string CPU = !CPUAttr.hasAttribute(Attribute::None)
+ ? CPUAttr.getValueAsString().str()
+ : TargetCPU;
+ std::string FS = !FSAttr.hasAttribute(Attribute::None)
+ ? FSAttr.getValueAsString().str()
+ : TargetFS;
+ std::string Key = CPU + FS;
+ auto &I = SubtargetMap[Key];
+ if (!I) {
+ // This needs to be done before we create a new subtarget since any
+ // creation will depend on the TM and the code generation flags on the
+ // function that reside in TargetOptions.
+ resetTargetOptions(F);
+ I = std::make_unique<RISCVSubtarget>(TargetTriple, CPU, FS,
+ Options.MCOptions.getABIName(), *this);
+ }
+ return I.get();
}
TargetTransformInfo
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetMachine.h b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetMachine.h
index ebf3f3c07955..a4476fa40a7d 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetMachine.h
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetMachine.h
@@ -22,7 +22,7 @@
namespace llvm {
class RISCVTargetMachine : public LLVMTargetMachine {
std::unique_ptr<TargetLoweringObjectFile> TLOF;
- RISCVSubtarget Subtarget;
+ mutable StringMap<std::unique_ptr<RISCVSubtarget>> SubtargetMap;
public:
RISCVTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
@@ -30,9 +30,11 @@ public:
Optional<Reloc::Model> RM, Optional<CodeModel::Model> CM,
CodeGenOpt::Level OL, bool JIT);
- const RISCVSubtarget *getSubtargetImpl(const Function &) const override {
- return &Subtarget;
- }
+ const RISCVSubtarget *getSubtargetImpl(const Function &F) const override;
+ // DO NOT IMPLEMENT: There is no such thing as a valid default subtarget,
+ // subtargets are per-function entities based on the target-specific
+ // attributes of each function.
+ const RISCVSubtarget *getSubtargetImpl() const = delete;
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 2c6400cbb1eb..90fcd679c523 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -29,7 +29,7 @@ int RISCVTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
getST()->is64Bit());
}
-int RISCVTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
+int RISCVTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm,
Type *Ty) {
assert(Ty->isIntegerTy() &&
"getIntImmCost can only estimate cost of materialising integers");
@@ -85,8 +85,8 @@ int RISCVTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
return TTI::TCC_Free;
}
-int RISCVTTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
- const APInt &Imm, Type *Ty) {
+int RISCVTTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
+ const APInt &Imm, Type *Ty) {
// Prevent hoisting in unknown cases.
return TTI::TCC_Free;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
index f361b25a0c70..d219ba81bb56 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -42,9 +42,9 @@ public:
TLI(ST->getTargetLowering()) {}
int getIntImmCost(const APInt &Imm, Type *Ty);
- int getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty);
- int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
- Type *Ty);
+ int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty);
+ int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
+ Type *Ty);
};
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/TargetInfo/RISCVTargetInfo.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/TargetInfo/RISCVTargetInfo.cpp
index e44984a3fcc5..4f265d556380 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/TargetInfo/RISCVTargetInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/TargetInfo/RISCVTargetInfo.cpp
@@ -20,7 +20,7 @@ Target &llvm::getTheRISCV64Target() {
return TheRISCV64Target;
}
-extern "C" void LLVMInitializeRISCVTargetInfo() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTargetInfo() {
RegisterTarget<Triple::riscv32> X(getTheRISCV32Target(), "riscv32",
"32-bit RISC-V", "RISCV");
RegisterTarget<Triple::riscv64> Y(getTheRISCV64Target(), "riscv64",
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/Utils/RISCVBaseInfo.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/Utils/RISCVBaseInfo.cpp
index bc5395768ca1..432ebb294d46 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/Utils/RISCVBaseInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/Utils/RISCVBaseInfo.cpp
@@ -37,17 +37,8 @@ ABI computeTargetABI(const Triple &TT, FeatureBitset FeatureBits,
errs() << "64-bit ABIs are not supported for 32-bit targets (ignoring "
"target-abi)\n";
TargetABI = ABI_Unknown;
- } else if (ABIName.endswith("f") && !FeatureBits[RISCV::FeatureStdExtF]) {
- errs() << "Hard-float 'f' ABI can't be used for a target that "
- "doesn't support the F instruction set extension (ignoring "
- "target-abi)\n";
- TargetABI = ABI_Unknown;
- } else if (ABIName.endswith("d") && !FeatureBits[RISCV::FeatureStdExtD]) {
- errs() << "Hard-float 'd' ABI can't be used for a target that "
- "doesn't support the D instruction set extension (ignoring "
- "target-abi)\n";
- TargetABI = ABI_Unknown;
} else if (IsRV32E && TargetABI != ABI_ILP32E && TargetABI != ABI_Unknown) {
+ // TODO: move this checking to RISCVTargetLowering and RISCVAsmParser
errs()
<< "Only the ilp32e ABI is supported for RV32E (ignoring target-abi)\n";
TargetABI = ABI_Unknown;
@@ -66,6 +57,12 @@ ABI computeTargetABI(const Triple &TT, FeatureBitset FeatureBits,
return ABI_LP64;
return ABI_ILP32;
}
+
+// To avoid the BP value clobbered by a function call, we need to choose a
+// callee saved register to save the value. RV32E only has X8 and X9 as callee
+// saved registers and X8 will be used as fp. So we choose X9 as bp.
+Register getBPReg() { return RISCV::X9; }
+
} // namespace RISCVABI
namespace RISCVFeatures {
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/Utils/RISCVBaseInfo.h b/contrib/llvm-project/llvm/lib/Target/RISCV/Utils/RISCVBaseInfo.h
index 30e475e80a01..cf078df9609a 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/Utils/RISCVBaseInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/Utils/RISCVBaseInfo.h
@@ -13,6 +13,7 @@
#ifndef LLVM_LIB_TARGET_RISCV_MCTARGETDESC_RISCVBASEINFO_H
#define LLVM_LIB_TARGET_RISCV_MCTARGETDESC_RISCVBASEINFO_H
+#include "RISCVRegisterInfo.h"
#include "MCTargetDesc/RISCVMCTargetDesc.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
@@ -47,20 +48,26 @@ enum {
InstFormatMask = 31
};
+// RISC-V Specific Machine Operand Flags
enum {
- MO_None,
- MO_CALL,
- MO_PLT,
- MO_LO,
- MO_HI,
- MO_PCREL_LO,
- MO_PCREL_HI,
- MO_GOT_HI,
- MO_TPREL_LO,
- MO_TPREL_HI,
- MO_TPREL_ADD,
- MO_TLS_GOT_HI,
- MO_TLS_GD_HI,
+ MO_None = 0,
+ MO_CALL = 1,
+ MO_PLT = 2,
+ MO_LO = 3,
+ MO_HI = 4,
+ MO_PCREL_LO = 5,
+ MO_PCREL_HI = 6,
+ MO_GOT_HI = 7,
+ MO_TPREL_LO = 8,
+ MO_TPREL_HI = 9,
+ MO_TPREL_ADD = 10,
+ MO_TLS_GOT_HI = 11,
+ MO_TLS_GD_HI = 12,
+
+ // Used to differentiate between target-specific "direct" flags and "bitmask"
+ // flags. A machine operand can only have one "direct" flag, but can have
+ // multiple "bitmask" flags.
+ MO_DIRECT_FLAG_MASK = 15
};
} // namespace RISCVII
@@ -195,6 +202,9 @@ enum ABI {
ABI computeTargetABI(const Triple &TT, FeatureBitset FeatureBits,
StringRef ABIName);
+// Returns the register used to hold the stack pointer after realignment.
+Register getBPReg();
+
} // namespace RISCVABI
namespace RISCVFeatures {
diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
index f6be9dd01249..2d3137f38821 100644
--- a/contrib/llvm-project/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
@@ -1308,7 +1308,7 @@ bool SparcAsmParser::matchSparcAsmModifiers(const MCExpr *&EVal,
return true;
}
-extern "C" void LLVMInitializeSparcAsmParser() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSparcAsmParser() {
RegisterMCAsmParser<SparcAsmParser> A(getTheSparcTarget());
RegisterMCAsmParser<SparcAsmParser> B(getTheSparcV9Target());
RegisterMCAsmParser<SparcAsmParser> C(getTheSparcelTarget());
diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/DelaySlotFiller.cpp b/contrib/llvm-project/llvm/lib/Target/Sparc/DelaySlotFiller.cpp
index db8e7850300f..7319924a24ba 100644
--- a/contrib/llvm-project/llvm/lib/Target/Sparc/DelaySlotFiller.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Sparc/DelaySlotFiller.cpp
@@ -37,7 +37,7 @@ static cl::opt<bool> DisableDelaySlotFiller(
namespace {
struct Filler : public MachineFunctionPass {
- const SparcSubtarget *Subtarget;
+ const SparcSubtarget *Subtarget = nullptr;
static char ID;
Filler() : MachineFunctionPass(ID) {}
diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp b/contrib/llvm-project/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
index bee331874e96..1caa33341300 100644
--- a/contrib/llvm-project/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
@@ -36,7 +36,6 @@ public:
DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
ArrayRef<uint8_t> Bytes, uint64_t Address,
- raw_ostream &VStream,
raw_ostream &CStream) const override;
};
}
@@ -48,7 +47,7 @@ static MCDisassembler *createSparcDisassembler(const Target &T,
}
-extern "C" void LLVMInitializeSparcDisassembler() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSparcDisassembler() {
// Register the disassembler.
TargetRegistry::RegisterMCDisassembler(getTheSparcTarget(),
createSparcDisassembler);
@@ -332,7 +331,6 @@ static DecodeStatus readInstruction32(ArrayRef<uint8_t> Bytes, uint64_t Address,
DecodeStatus SparcDisassembler::getInstruction(MCInst &Instr, uint64_t &Size,
ArrayRef<uint8_t> Bytes,
uint64_t Address,
- raw_ostream &VStream,
raw_ostream &CStream) const {
uint32_t Insn;
bool isLittleEndian = getContext().getAsmInfo()->isLittleEndian();
diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/LeonPasses.h b/contrib/llvm-project/llvm/lib/Target/Sparc/LeonPasses.h
index 154a2b467e16..b165bc93780f 100755
--- a/contrib/llvm-project/llvm/lib/Target/Sparc/LeonPasses.h
+++ b/contrib/llvm-project/llvm/lib/Target/Sparc/LeonPasses.h
@@ -23,7 +23,7 @@ namespace llvm {
class LLVM_LIBRARY_VISIBILITY LEONMachineFunctionPass
: public MachineFunctionPass {
protected:
- const SparcSubtarget *Subtarget;
+ const SparcSubtarget *Subtarget = nullptr;
const int LAST_OPERAND = -1;
// this vector holds free registers that we allocate in groups for some of the
diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.cpp
index c479459786d7..8a673de69911 100644
--- a/contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.cpp
@@ -43,10 +43,11 @@ void SparcInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const
OS << '%' << StringRef(getRegisterName(RegNo)).lower();
}
-void SparcInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
- StringRef Annot, const MCSubtargetInfo &STI) {
+void SparcInstPrinter::printInst(const MCInst *MI, uint64_t Address,
+ StringRef Annot, const MCSubtargetInfo &STI,
+ raw_ostream &O) {
if (!printAliasInstr(MI, STI, O) && !printSparcAliasInstr(MI, STI, O))
- printInstruction(MI, STI, O);
+ printInstruction(MI, Address, STI, O);
printAnnotation(O, Annot);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.h b/contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.h
index 499bcadb0d4d..cb85fe98ed42 100644
--- a/contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.h
+++ b/contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.h
@@ -24,15 +24,15 @@ public:
: MCInstPrinter(MAI, MII, MRI) {}
void printRegName(raw_ostream &OS, unsigned RegNo) const override;
- void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
- const MCSubtargetInfo &STI) override;
+ void printInst(const MCInst *MI, uint64_t Address, StringRef Annot,
+ const MCSubtargetInfo &STI, raw_ostream &O) override;
bool printSparcAliasInstr(const MCInst *MI, const MCSubtargetInfo &STI,
raw_ostream &OS);
bool isV9(const MCSubtargetInfo &STI) const;
// Autogenerated by tblgen.
- void printInstruction(const MCInst *MI, const MCSubtargetInfo &STI,
- raw_ostream &O);
+ void printInstruction(const MCInst *MI, uint64_t Address,
+ const MCSubtargetInfo &STI, raw_ostream &O);
bool printAliasInstr(const MCInst *MI, const MCSubtargetInfo &STI,
raw_ostream &O);
void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx,
diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp b/contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
index ce593bb66770..7eb27f55baac 100644
--- a/contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
@@ -33,7 +33,8 @@ using namespace llvm;
#include "SparcGenRegisterInfo.inc"
static MCAsmInfo *createSparcMCAsmInfo(const MCRegisterInfo &MRI,
- const Triple &TT) {
+ const Triple &TT,
+ const MCTargetOptions &Options) {
MCAsmInfo *MAI = new SparcELFMCAsmInfo(TT);
unsigned Reg = MRI.getDwarfRegNum(SP::O6, true);
MCCFIInstruction Inst = MCCFIInstruction::createDefCfa(nullptr, Reg, 0);
@@ -42,7 +43,8 @@ static MCAsmInfo *createSparcMCAsmInfo(const MCRegisterInfo &MRI,
}
static MCAsmInfo *createSparcV9MCAsmInfo(const MCRegisterInfo &MRI,
- const Triple &TT) {
+ const Triple &TT,
+ const MCTargetOptions &Options) {
MCAsmInfo *MAI = new SparcELFMCAsmInfo(TT);
unsigned Reg = MRI.getDwarfRegNum(SP::O6, true);
MCCFIInstruction Inst = MCCFIInstruction::createDefCfa(nullptr, Reg, 2047);
@@ -89,7 +91,7 @@ static MCInstPrinter *createSparcMCInstPrinter(const Triple &T,
return new SparcInstPrinter(MAI, MII, MRI);
}
-extern "C" void LLVMInitializeSparcTargetMC() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSparcTargetMC() {
// Register the MC asm info.
RegisterMCAsmInfoFn X(getTheSparcTarget(), createSparcMCAsmInfo);
RegisterMCAsmInfoFn Y(getTheSparcV9Target(), createSparcV9MCAsmInfo);
diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp
index 4d5cbfbadc9d..f0caf3bc284f 100644
--- a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp
@@ -439,7 +439,7 @@ bool SparcAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
}
// Force static initialization.
-extern "C" void LLVMInitializeSparcAsmPrinter() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSparcAsmPrinter() {
RegisterAsmPrinter<SparcAsmPrinter> X(getTheSparcTarget());
RegisterAsmPrinter<SparcAsmPrinter> Y(getTheSparcV9Target());
RegisterAsmPrinter<SparcAsmPrinter> Z(getTheSparcelTarget());
diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp
index 4e61c341b703..afb69899e724 100644
--- a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp
@@ -31,7 +31,7 @@ namespace {
class SparcDAGToDAGISel : public SelectionDAGISel {
/// Subtarget - Keep a pointer to the Sparc Subtarget around so that we can
/// make the right decision when generating code for different targets.
- const SparcSubtarget *Subtarget;
+ const SparcSubtarget *Subtarget = nullptr;
public:
explicit SparcDAGToDAGISel(SparcTargetMachine &tm) : SelectionDAGISel(tm) {}
@@ -380,7 +380,6 @@ SparcDAGToDAGISel::SelectInlineAsmMemoryOperand(const SDValue &Op,
SDValue Op0, Op1;
switch (ConstraintID) {
default: return true;
- case InlineAsm::Constraint_i:
case InlineAsm::Constraint_o:
case InlineAsm::Constraint_m: // memory
if (!SelectADDRrr(Op, Op0, Op1))
diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcISelLowering.cpp
index 4a2ba00ac6c2..d853d0608519 100644
--- a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcISelLowering.cpp
@@ -1016,7 +1016,7 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
// FIXME? Maybe this could be a TableGen attribute on some registers and
// this table could be generated automatically from RegInfo.
-Register SparcTargetLowering::getRegisterByName(const char* RegName, EVT VT,
+Register SparcTargetLowering::getRegisterByName(const char* RegName, LLT VT,
const MachineFunction &MF) const {
Register Reg = StringSwitch<unsigned>(RegName)
.Case("i0", SP::I0).Case("i1", SP::I1).Case("i2", SP::I2).Case("i3", SP::I3)
diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcISelLowering.h b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcISelLowering.h
index 3d798cec0c16..2838ca4bdc66 100644
--- a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcISelLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcISelLowering.h
@@ -98,7 +98,7 @@ namespace llvm {
return MVT::i32;
}
- Register getRegisterByName(const char* RegName, EVT VT,
+ Register getRegisterByName(const char* RegName, LLT VT,
const MachineFunction &MF) const override;
/// If a physical register, this returns the register that receives the
diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcInstr64Bit.td b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcInstr64Bit.td
index d18ab3b1370b..9a200a36cd3e 100644
--- a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcInstr64Bit.td
+++ b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcInstr64Bit.td
@@ -497,8 +497,6 @@ let Predicates = [Is64Bit], Constraints = "$swap = $rd", asi = 0b10000000 in {
let Predicates = [Is64Bit] in {
-def : Pat<(atomic_fence imm, imm), (MEMBARi 0xf)>;
-
// atomic_load_64 addr -> load addr
def : Pat<(i64 (atomic_load_64 ADDRrr:$src)), (LDXrr ADDRrr:$src)>;
def : Pat<(i64 (atomic_load_64 ADDRri:$src)), (LDXri ADDRri:$src)>;
diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcInstrInfo.cpp
index 3d3d314a26bb..31185aa508af 100644
--- a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcInstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcInstrInfo.cpp
@@ -304,8 +304,8 @@ bool SparcInstrInfo::reverseBranchCondition(
void SparcInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
- const DebugLoc &DL, unsigned DestReg,
- unsigned SrcReg, bool KillSrc) const {
+ const DebugLoc &DL, MCRegister DestReg,
+ MCRegister SrcReg, bool KillSrc) const {
unsigned numSubRegs = 0;
unsigned movOpc = 0;
const unsigned *subRegIdx = nullptr;
diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcInstrInfo.h b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcInstrInfo.h
index b587b28c25fc..f0b3dde6dec3 100644
--- a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcInstrInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcInstrInfo.h
@@ -81,7 +81,7 @@ public:
reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
- const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
+ const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg,
bool KillSrc) const override;
void storeRegToStackSlot(MachineBasicBlock &MBB,
diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcInstrInfo.td
index 73dbdc4f443e..f26f4a1c1a84 100644
--- a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcInstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcInstrInfo.td
@@ -1676,7 +1676,10 @@ def : Pat<(store (i32 0), ADDRri:$dst), (STri ADDRri:$dst, (i32 G0))>;
// store bar for all atomic_fence in V8.
let Predicates = [HasNoV9] in
- def : Pat<(atomic_fence imm, imm), (STBAR)>;
+ def : Pat<(atomic_fence timm, timm), (STBAR)>;
+
+let Predicates = [HasV9] in
+ def : Pat<(atomic_fence timm, timm), (MEMBARi 0xf)>;
// atomic_load addr -> load addr
def : Pat<(i32 (atomic_load_8 ADDRrr:$src)), (LDUBrr ADDRrr:$src)>;
diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcTargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcTargetMachine.cpp
index c1e3f8c36982..76f387842f73 100644
--- a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcTargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcTargetMachine.cpp
@@ -20,7 +20,7 @@
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
-extern "C" void LLVMInitializeSparcTarget() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSparcTarget() {
// Register the target.
RegisterTargetMachine<SparcV8TargetMachine> X(getTheSparcTarget());
RegisterTargetMachine<SparcV9TargetMachine> Y(getTheSparcV9Target());
diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp b/contrib/llvm-project/llvm/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp
index eafa2b4b2f13..3bf5907012da 100644
--- a/contrib/llvm-project/llvm/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp
@@ -23,7 +23,7 @@ Target &llvm::getTheSparcelTarget() {
return TheSparcelTarget;
}
-extern "C" void LLVMInitializeSparcTargetInfo() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSparcTargetInfo() {
RegisterTarget<Triple::sparc, /*HasJIT=*/true> X(getTheSparcTarget(), "sparc",
"Sparc", "Sparc");
RegisterTarget<Triple::sparcv9, /*HasJIT=*/true> Y(
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
index 93c4ce4b5ccc..607266d552a6 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
@@ -1304,14 +1304,23 @@ SystemZAsmParser::parsePCRel(OperandVector &Operands, int64_t MinVal,
if (getParser().parseExpression(Expr))
return MatchOperand_NoMatch;
+ auto isOutOfRangeConstant = [&](const MCExpr *E) -> bool {
+ if (auto *CE = dyn_cast<MCConstantExpr>(E)) {
+ int64_t Value = CE->getValue();
+ if ((Value & 1) || Value < MinVal || Value > MaxVal)
+ return true;
+ }
+ return false;
+ };
+
// For consistency with the GNU assembler, treat immediates as offsets
// from ".".
if (auto *CE = dyn_cast<MCConstantExpr>(Expr)) {
- int64_t Value = CE->getValue();
- if ((Value & 1) || Value < MinVal || Value > MaxVal) {
+ if (isOutOfRangeConstant(CE)) {
Error(StartLoc, "offset out of range");
return MatchOperand_ParseFail;
}
+ int64_t Value = CE->getValue();
MCSymbol *Sym = Ctx.createTempSymbol();
Out.EmitLabel(Sym);
const MCExpr *Base = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None,
@@ -1319,6 +1328,15 @@ SystemZAsmParser::parsePCRel(OperandVector &Operands, int64_t MinVal,
Expr = Value == 0 ? Base : MCBinaryExpr::createAdd(Base, Expr, Ctx);
}
+ // For consistency with the GNU assembler, conservatively assume that a
+ // constant offset must by itself be within the given size range.
+ if (const auto *BE = dyn_cast<MCBinaryExpr>(Expr))
+ if (isOutOfRangeConstant(BE->getLHS()) ||
+ isOutOfRangeConstant(BE->getRHS())) {
+ Error(StartLoc, "offset out of range");
+ return MatchOperand_ParseFail;
+ }
+
// Optionally match :tls_gdcall: or :tls_ldcall: followed by a TLS symbol.
const MCExpr *Sym = nullptr;
if (AllowTLS && getLexer().is(AsmToken::Colon)) {
@@ -1371,6 +1389,6 @@ SystemZAsmParser::parsePCRel(OperandVector &Operands, int64_t MinVal,
}
// Force static initialization.
-extern "C" void LLVMInitializeSystemZAsmParser() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSystemZAsmParser() {
RegisterMCAsmParser<SystemZAsmParser> X(getTheSystemZTarget());
}
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
index 70c26db33ced..e42aa14fe589 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
@@ -34,7 +34,6 @@ public:
DecodeStatus getInstruction(MCInst &instr, uint64_t &Size,
ArrayRef<uint8_t> Bytes, uint64_t Address,
- raw_ostream &VStream,
raw_ostream &CStream) const override;
};
@@ -46,7 +45,7 @@ static MCDisassembler *createSystemZDisassembler(const Target &T,
return new SystemZDisassembler(STI, Ctx);
}
-extern "C" void LLVMInitializeSystemZDisassembler() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSystemZDisassembler() {
// Register the disassembler.
TargetRegistry::RegisterMCDisassembler(getTheSystemZTarget(),
createSystemZDisassembler);
@@ -449,7 +448,6 @@ static DecodeStatus decodeBDVAddr64Disp12Operand(MCInst &Inst, uint64_t Field,
DecodeStatus SystemZDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
ArrayRef<uint8_t> Bytes,
uint64_t Address,
- raw_ostream &OS,
raw_ostream &CS) const {
// Get the first two bytes of the instruction.
Size = 0;
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.cpp
index 91cb35dd72f2..5893b227c08c 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.cpp
@@ -41,8 +41,12 @@ void SystemZInstPrinter::printAddress(unsigned Base, int64_t Disp,
void SystemZInstPrinter::printOperand(const MCOperand &MO, const MCAsmInfo *MAI,
raw_ostream &O) {
- if (MO.isReg())
- O << '%' << getRegisterName(MO.getReg());
+ if (MO.isReg()) {
+ if (!MO.getReg())
+ O << '0';
+ else
+ O << '%' << getRegisterName(MO.getReg());
+ }
else if (MO.isImm())
O << MO.getImm();
else if (MO.isExpr())
@@ -51,10 +55,10 @@ void SystemZInstPrinter::printOperand(const MCOperand &MO, const MCAsmInfo *MAI,
llvm_unreachable("Invalid operand");
}
-void SystemZInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
- StringRef Annot,
- const MCSubtargetInfo &STI) {
- printInstruction(MI, O);
+void SystemZInstPrinter::printInst(const MCInst *MI, uint64_t Address,
+ StringRef Annot, const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ printInstruction(MI, Address, O);
printAnnotation(O, Annot);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.h b/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.h
index 4235d4e21792..5628e9252f03 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.h
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.h
@@ -27,7 +27,7 @@ public:
: MCInstPrinter(MAI, MII, MRI) {}
// Automatically generated by tblgen.
- void printInstruction(const MCInst *MI, raw_ostream &O);
+ void printInstruction(const MCInst *MI, uint64_t Address, raw_ostream &O);
static const char *getRegisterName(unsigned RegNo);
// Print an address with the given base, displacement and index.
@@ -40,8 +40,8 @@ public:
// Override MCInstPrinter.
void printRegName(raw_ostream &O, unsigned RegNo) const override;
- void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
- const MCSubtargetInfo &STI) override;
+ void printInst(const MCInst *MI, uint64_t Address, StringRef Annot,
+ const MCSubtargetInfo &STI, raw_ostream &O) override;
private:
// Print various types of operand.
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
index 3c0300cfd8f0..eb2112674a12 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
@@ -147,7 +147,8 @@ unsigned SystemZMC::getFirstReg(unsigned Reg) {
}
static MCAsmInfo *createSystemZMCAsmInfo(const MCRegisterInfo &MRI,
- const Triple &TT) {
+ const Triple &TT,
+ const MCTargetOptions &Options) {
MCAsmInfo *MAI = new SystemZMCAsmInfo(TT);
MCCFIInstruction Inst =
MCCFIInstruction::createDefCfa(nullptr,
@@ -182,7 +183,7 @@ static MCInstPrinter *createSystemZMCInstPrinter(const Triple &T,
return new SystemZInstPrinter(MAI, MII, MRI);
}
-extern "C" void LLVMInitializeSystemZTargetMC() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSystemZTargetMC() {
// Register the MCAsmInfo.
TargetRegistry::RegisterMCAsmInfo(getTheSystemZTarget(),
createSystemZMCAsmInfo);
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZ.h b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZ.h
index 88cf589a3f10..0808160f627c 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZ.h
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZ.h
@@ -55,7 +55,7 @@ const unsigned CCMASK_ARITH = CCMASK_ANY;
// Condition-code mask assignments for logical operations.
const unsigned CCMASK_LOGICAL_ZERO = CCMASK_0 | CCMASK_2;
-const unsigned CCMASK_LOGICAL_NONZERO = CCMASK_1 | CCMASK_2;
+const unsigned CCMASK_LOGICAL_NONZERO = CCMASK_1 | CCMASK_3;
const unsigned CCMASK_LOGICAL_CARRY = CCMASK_2 | CCMASK_3;
const unsigned CCMASK_LOGICAL_NOCARRY = CCMASK_0 | CCMASK_1;
const unsigned CCMASK_LOGICAL_BORROW = CCMASK_LOGICAL_NOCARRY;
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
index 10023e9e169c..67c4aa08f90d 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
@@ -16,11 +16,13 @@
#include "SystemZConstantPoolValue.h"
#include "SystemZMCInstLower.h"
#include "TargetInfo/SystemZTargetInfo.h"
+#include "llvm/BinaryFormat/ELF.h"
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/IR/Mangler.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInstBuilder.h"
+#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/Support/TargetRegistry.h"
@@ -543,9 +545,9 @@ static unsigned EmitNop(MCContext &OutContext, MCStreamer &OutStreamer,
else {
MCSymbol *DotSym = OutContext.createTempSymbol();
const MCSymbolRefExpr *Dot = MCSymbolRefExpr::create(DotSym, OutContext);
+ OutStreamer.EmitLabel(DotSym);
OutStreamer.EmitInstruction(MCInstBuilder(SystemZ::BRCLAsm)
.addImm(0).addExpr(Dot), STI);
- OutStreamer.EmitLabel(DotSym);
return 6;
}
}
@@ -553,8 +555,17 @@ static unsigned EmitNop(MCContext &OutContext, MCStreamer &OutStreamer,
void SystemZAsmPrinter::LowerFENTRY_CALL(const MachineInstr &MI,
SystemZMCInstLower &Lower) {
MCContext &Ctx = MF->getContext();
- if (MF->getFunction().getFnAttribute("mnop-mcount")
- .getValueAsString() == "true") {
+ if (MF->getFunction().hasFnAttribute("mrecord-mcount")) {
+ MCSymbol *DotSym = OutContext.createTempSymbol();
+ OutStreamer->PushSection();
+ OutStreamer->SwitchSection(
+ Ctx.getELFSection("__mcount_loc", ELF::SHT_PROGBITS, ELF::SHF_ALLOC));
+ OutStreamer->EmitSymbolValue(DotSym, 8);
+ OutStreamer->PopSection();
+ OutStreamer->EmitLabel(DotSym);
+ }
+
+ if (MF->getFunction().hasFnAttribute("mnop-mcount")) {
EmitNop(Ctx, *OutStreamer, 6, getSubtargetInfo());
return;
}
@@ -572,7 +583,11 @@ void SystemZAsmPrinter::LowerSTACKMAP(const MachineInstr &MI) {
unsigned NumNOPBytes = MI.getOperand(1).getImm();
- SM.recordStackMap(MI);
+ auto &Ctx = OutStreamer->getContext();
+ MCSymbol *MILabel = Ctx.createTempSymbol();
+ OutStreamer->EmitLabel(MILabel);
+
+ SM.recordStackMap(*MILabel, MI);
assert(NumNOPBytes % 2 == 0 && "Invalid number of NOP bytes requested!");
// Scan ahead to trim the shadow.
@@ -601,7 +616,11 @@ void SystemZAsmPrinter::LowerSTACKMAP(const MachineInstr &MI) {
// [<def>], <id>, <numBytes>, <target>, <numArgs>
void SystemZAsmPrinter::LowerPATCHPOINT(const MachineInstr &MI,
SystemZMCInstLower &Lower) {
- SM.recordPatchPoint(MI);
+ auto &Ctx = OutStreamer->getContext();
+ MCSymbol *MILabel = Ctx.createTempSymbol();
+ OutStreamer->EmitLabel(MILabel);
+
+ SM.recordPatchPoint(*MILabel, MI);
PatchPointOpers Opers(&MI);
unsigned EncodedBytes = 0;
@@ -705,6 +724,6 @@ void SystemZAsmPrinter::EmitEndOfAsmFile(Module &M) {
}
// Force static initialization.
-extern "C" void LLVMInitializeSystemZAsmPrinter() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSystemZAsmPrinter() {
RegisterAsmPrinter<SystemZAsmPrinter> X(getTheSystemZTarget());
}
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZCallingConv.h b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZCallingConv.h
index 82f29b6361f1..4432adc6a269 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZCallingConv.h
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZCallingConv.h
@@ -124,6 +124,13 @@ inline bool CC_SystemZ_I128Indirect(unsigned &ValNo, MVT &ValVT,
return true;
}
+inline bool CC_SystemZ_GHC_Error(unsigned &, MVT &, MVT &,
+ CCValAssign::LocInfo &, ISD::ArgFlagsTy &,
+ CCState &) {
+ report_fatal_error("No registers left in GHC calling convention");
+ return false;
+}
+
} // end namespace llvm
#endif
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZCallingConv.td b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZCallingConv.td
index bbd51546ac9f..b1b7ad47671f 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZCallingConv.td
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZCallingConv.td
@@ -58,9 +58,34 @@ def RetCC_SystemZ : CallingConv<[
]>;
//===----------------------------------------------------------------------===//
+// z/Linux argument calling conventions for GHC
+//===----------------------------------------------------------------------===//
+def CC_SystemZ_GHC : CallingConv<[
+ // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, R7, R8, SpLim
+ CCIfType<[i64], CCAssignToReg<[R7D, R8D, R10D, R11D, R12D, R13D,
+ R6D, R2D, R3D, R4D, R5D, R9D]>>,
+
+ // Pass in STG registers: F1, ..., F6
+ CCIfType<[f32], CCAssignToReg<[F8S, F9S, F10S, F11S, F0S, F1S]>>,
+
+ // Pass in STG registers: D1, ..., D6
+ CCIfType<[f64], CCAssignToReg<[F12D, F13D, F14D, F15D, F2D, F3D]>>,
+
+ // Pass in STG registers: XMM1, ..., XMM6
+ CCIfSubtarget<"hasVector()",
+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ CCIfFixed<CCAssignToReg<[V16, V17, V18, V19, V20, V21]>>>>,
+
+ // Fail otherwise
+ CCCustom<"CC_SystemZ_GHC_Error">
+]>;
+
+//===----------------------------------------------------------------------===//
// z/Linux argument calling conventions
//===----------------------------------------------------------------------===//
def CC_SystemZ : CallingConv<[
+ CCIfCC<"CallingConv::GHC", CCDelegateTo<CC_SystemZ_GHC>>,
+
// Promote i32 to i64 if it has an explicit extension type.
// The convention is that true integer arguments that are smaller
// than 64 bits should be marked as extended, but structures that
@@ -128,3 +153,5 @@ def CSR_SystemZ_AllRegs : CalleeSavedRegs<(add (sequence "R%dD", 2, 15),
def CSR_SystemZ_AllRegs_Vector : CalleeSavedRegs<(add (sequence "R%dD", 2, 15),
(sequence "V%d", 0, 31))>;
+def CSR_SystemZ_NoRegs : CalleeSavedRegs<(add)>;
+
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp
index 946eb2ba7c79..2f0cf0317029 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp
@@ -18,6 +18,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -87,6 +88,8 @@ private:
SmallVectorImpl<MachineInstr *> &CCUsers);
bool convertToLoadAndTest(MachineInstr &MI, MachineInstr &Compare,
SmallVectorImpl<MachineInstr *> &CCUsers);
+ bool convertToLogical(MachineInstr &MI, MachineInstr &Compare,
+ SmallVectorImpl<MachineInstr *> &CCUsers);
bool adjustCCMasksForInstr(MachineInstr &MI, MachineInstr &Compare,
SmallVectorImpl<MachineInstr *> &CCUsers,
unsigned ConvOpc = 0);
@@ -103,14 +106,6 @@ char SystemZElimCompare::ID = 0;
} // end anonymous namespace
-// Return true if CC is live out of MBB.
-static bool isCCLiveOut(MachineBasicBlock &MBB) {
- for (auto SI = MBB.succ_begin(), SE = MBB.succ_end(); SI != SE; ++SI)
- if ((*SI)->isLiveIn(SystemZ::CC))
- return true;
- return false;
-}
-
// Returns true if MI is an instruction whose output equals the value in Reg.
static bool preservesValueOf(MachineInstr &MI, unsigned Reg) {
switch (MI.getOpcode()) {
@@ -302,9 +297,60 @@ bool SystemZElimCompare::convertToLoadAndTest(
MIB.setMemRefs(MI.memoperands());
MI.eraseFromParent();
+ // Mark instruction as not raising an FP exception if applicable. We already
+ // verified earlier that this move is valid.
+ if (!Compare.mayRaiseFPException())
+ MIB.setMIFlag(MachineInstr::MIFlag::NoFPExcept);
+
+ return true;
+}
+
+// See if MI is an instruction with an equivalent "logical" opcode that can
+// be used and replace MI. This is useful for EQ/NE comparisons where the
+// "nsw" flag is missing since the "logical" opcode always sets CC to reflect
+// the result being zero or non-zero.
+bool SystemZElimCompare::convertToLogical(
+ MachineInstr &MI, MachineInstr &Compare,
+ SmallVectorImpl<MachineInstr *> &CCUsers) {
+
+ unsigned ConvOpc = 0;
+ switch (MI.getOpcode()) {
+ case SystemZ::AR: ConvOpc = SystemZ::ALR; break;
+ case SystemZ::ARK: ConvOpc = SystemZ::ALRK; break;
+ case SystemZ::AGR: ConvOpc = SystemZ::ALGR; break;
+ case SystemZ::AGRK: ConvOpc = SystemZ::ALGRK; break;
+ case SystemZ::A: ConvOpc = SystemZ::AL; break;
+ case SystemZ::AY: ConvOpc = SystemZ::ALY; break;
+ case SystemZ::AG: ConvOpc = SystemZ::ALG; break;
+ default: break;
+ }
+ if (!ConvOpc || !adjustCCMasksForInstr(MI, Compare, CCUsers, ConvOpc))
+ return false;
+
+ // Operands should be identical, so just change the opcode and remove the
+ // dead flag on CC.
+ MI.setDesc(TII->get(ConvOpc));
+ MI.clearRegisterDeads(SystemZ::CC);
return true;
}
+#ifndef NDEBUG
+static bool isAddWithImmediate(unsigned Opcode) {
+ switch(Opcode) {
+ case SystemZ::AHI:
+ case SystemZ::AHIK:
+ case SystemZ::AGHI:
+ case SystemZ::AGHIK:
+ case SystemZ::AFI:
+ case SystemZ::AIH:
+ case SystemZ::AGFI:
+ return true;
+ default: break;
+ }
+ return false;
+}
+#endif
+
// The CC users in CCUsers are testing the result of a comparison of some
// value X against zero and we know that any CC value produced by MI would
// also reflect the value of X. ConvOpc may be used to pass the transfomed
@@ -315,65 +361,116 @@ bool SystemZElimCompare::adjustCCMasksForInstr(
MachineInstr &MI, MachineInstr &Compare,
SmallVectorImpl<MachineInstr *> &CCUsers,
unsigned ConvOpc) {
+ unsigned CompareFlags = Compare.getDesc().TSFlags;
+ unsigned CompareCCValues = SystemZII::getCCValues(CompareFlags);
int Opcode = (ConvOpc ? ConvOpc : MI.getOpcode());
const MCInstrDesc &Desc = TII->get(Opcode);
unsigned MIFlags = Desc.TSFlags;
- // See which compare-style condition codes are available.
- unsigned ReusableCCMask = SystemZII::getCompareZeroCCMask(MIFlags);
+ // If Compare may raise an FP exception, we can only eliminate it
+ // if MI itself would have already raised the exception.
+ if (Compare.mayRaiseFPException()) {
+ // If the caller will change MI to use ConvOpc, only test whether
+ // ConvOpc is suitable; it is on the caller to set the MI flag.
+ if (ConvOpc && !Desc.mayRaiseFPException())
+ return false;
+ // If the caller will not change MI, we test the MI flag here.
+ if (!ConvOpc && !MI.mayRaiseFPException())
+ return false;
+ }
+ // See which compare-style condition codes are available.
+ unsigned CCValues = SystemZII::getCCValues(MIFlags);
+ unsigned ReusableCCMask = CCValues;
// For unsigned comparisons with zero, only equality makes sense.
- unsigned CompareFlags = Compare.getDesc().TSFlags;
if (CompareFlags & SystemZII::IsLogical)
ReusableCCMask &= SystemZ::CCMASK_CMP_EQ;
-
+ unsigned OFImplies = 0;
+ bool LogicalMI = false;
+ bool MIEquivalentToCmp = false;
+ if (MI.getFlag(MachineInstr::NoSWrap) &&
+ (MIFlags & SystemZII::CCIfNoSignedWrap)) {
+ // If MI has the NSW flag set in combination with the
+ // SystemZII::CCIfNoSignedWrap flag, all CCValues are valid.
+ }
+ else if ((MIFlags & SystemZII::CCIfNoSignedWrap) &&
+ MI.getOperand(2).isImm()) {
+ // Signed addition of immediate. If adding a positive immediate
+ // overflows, the result must be less than zero. If adding a negative
+ // immediate overflows, the result must be larger than zero (except in
+ // the special case of adding the minimum value of the result range, in
+ // which case we cannot predict whether the result is larger than or
+ // equal to zero).
+ assert(isAddWithImmediate(Opcode) && "Expected an add with immediate.");
+ assert(!MI.mayLoadOrStore() && "Expected an immediate term.");
+ int64_t RHS = MI.getOperand(2).getImm();
+ if (SystemZ::GRX32BitRegClass.contains(MI.getOperand(0).getReg()) &&
+ RHS == INT32_MIN)
+ return false;
+ OFImplies = (RHS > 0 ? SystemZ::CCMASK_CMP_LT : SystemZ::CCMASK_CMP_GT);
+ }
+ else if ((MIFlags & SystemZII::IsLogical) && CCValues) {
+ // Use CCMASK_CMP_EQ to match with CCUsers. On success CCMask:s will be
+ // converted to CCMASK_LOGICAL_ZERO or CCMASK_LOGICAL_NONZERO.
+ LogicalMI = true;
+ ReusableCCMask = SystemZ::CCMASK_CMP_EQ;
+ }
+ else {
+ ReusableCCMask &= SystemZII::getCompareZeroCCMask(MIFlags);
+ assert((ReusableCCMask & ~CCValues) == 0 && "Invalid CCValues");
+ MIEquivalentToCmp =
+ ReusableCCMask == CCValues && CCValues == CompareCCValues;
+ }
if (ReusableCCMask == 0)
return false;
- unsigned CCValues = SystemZII::getCCValues(MIFlags);
- assert((ReusableCCMask & ~CCValues) == 0 && "Invalid CCValues");
-
- bool MIEquivalentToCmp =
- (ReusableCCMask == CCValues &&
- CCValues == SystemZII::getCCValues(CompareFlags));
-
if (!MIEquivalentToCmp) {
// Now check whether these flags are enough for all users.
SmallVector<MachineOperand *, 4> AlterMasks;
for (unsigned int I = 0, E = CCUsers.size(); I != E; ++I) {
- MachineInstr *MI = CCUsers[I];
+ MachineInstr *CCUserMI = CCUsers[I];
// Fail if this isn't a use of CC that we understand.
- unsigned Flags = MI->getDesc().TSFlags;
+ unsigned Flags = CCUserMI->getDesc().TSFlags;
unsigned FirstOpNum;
if (Flags & SystemZII::CCMaskFirst)
FirstOpNum = 0;
else if (Flags & SystemZII::CCMaskLast)
- FirstOpNum = MI->getNumExplicitOperands() - 2;
+ FirstOpNum = CCUserMI->getNumExplicitOperands() - 2;
else
return false;
// Check whether the instruction predicate treats all CC values
// outside of ReusableCCMask in the same way. In that case it
// doesn't matter what those CC values mean.
- unsigned CCValid = MI->getOperand(FirstOpNum).getImm();
- unsigned CCMask = MI->getOperand(FirstOpNum + 1).getImm();
+ unsigned CCValid = CCUserMI->getOperand(FirstOpNum).getImm();
+ unsigned CCMask = CCUserMI->getOperand(FirstOpNum + 1).getImm();
+ assert(CCValid == CompareCCValues && (CCMask & ~CCValid) == 0 &&
+ "Corrupt CC operands of CCUser.");
unsigned OutValid = ~ReusableCCMask & CCValid;
unsigned OutMask = ~ReusableCCMask & CCMask;
if (OutMask != 0 && OutMask != OutValid)
return false;
- AlterMasks.push_back(&MI->getOperand(FirstOpNum));
- AlterMasks.push_back(&MI->getOperand(FirstOpNum + 1));
+ AlterMasks.push_back(&CCUserMI->getOperand(FirstOpNum));
+ AlterMasks.push_back(&CCUserMI->getOperand(FirstOpNum + 1));
}
// All users are OK. Adjust the masks for MI.
for (unsigned I = 0, E = AlterMasks.size(); I != E; I += 2) {
AlterMasks[I]->setImm(CCValues);
unsigned CCMask = AlterMasks[I + 1]->getImm();
- if (CCMask & ~ReusableCCMask)
- AlterMasks[I + 1]->setImm((CCMask & ReusableCCMask) |
- (CCValues & ~ReusableCCMask));
+ if (LogicalMI) {
+ // Translate the CCMask into its "logical" value.
+ CCMask = (CCMask == SystemZ::CCMASK_CMP_EQ ?
+ SystemZ::CCMASK_LOGICAL_ZERO : SystemZ::CCMASK_LOGICAL_NONZERO);
+ CCMask &= CCValues; // Logical subtracts never set CC=0.
+ } else {
+ if (CCMask & ~ReusableCCMask)
+ CCMask = (CCMask & ReusableCCMask) | (CCValues & ~ReusableCCMask);
+ CCMask |= (CCMask & OFImplies) ? SystemZ::CCMASK_ARITH_OVERFLOW : 0;
+ }
+ AlterMasks[I + 1]->setImm(CCMask);
}
}
@@ -450,7 +547,9 @@ bool SystemZElimCompare::optimizeCompareZero(
}
// Try to eliminate Compare by reusing a CC result from MI.
if ((!CCRefs && convertToLoadAndTest(MI, Compare, CCUsers)) ||
- (!CCRefs.Def && adjustCCMasksForInstr(MI, Compare, CCUsers))) {
+ (!CCRefs.Def &&
+ (adjustCCMasksForInstr(MI, Compare, CCUsers) ||
+ convertToLogical(MI, Compare, CCUsers)))) {
EliminatedComparisons += 1;
return true;
}
@@ -461,6 +560,12 @@ bool SystemZElimCompare::optimizeCompareZero(
CCRefs |= getRegReferences(MI, SystemZ::CC);
if (CCRefs.Use && CCRefs.Def)
break;
+ // Eliminating a Compare that may raise an FP exception will move
+ // raising the exception to some earlier MI. We cannot do this if
+ // there is anything in between that might change exception flags.
+ if (Compare.mayRaiseFPException() &&
+ (MI.isCall() || MI.hasUnmodeledSideEffects()))
+ break;
}
// Also do a forward search to handle cases where an instruction after the
@@ -595,7 +700,9 @@ bool SystemZElimCompare::processBlock(MachineBasicBlock &MBB) {
// Walk backwards through the block looking for comparisons, recording
// all CC users as we go. The subroutines can delete Compare and
// instructions before it.
- bool CompleteCCUsers = !isCCLiveOut(MBB);
+ LivePhysRegs LiveRegs(*TRI);
+ LiveRegs.addLiveOuts(MBB);
+ bool CompleteCCUsers = !LiveRegs.contains(SystemZ::CC);
SmallVector<MachineInstr *, 4> CCUsers;
MachineBasicBlock::iterator MBBI = MBB.end();
while (MBBI != MBB.begin()) {
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
index 0b8b6880accc..3cdf6bf98ee0 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
@@ -21,8 +21,8 @@
using namespace llvm;
namespace {
-// The ABI-defined register save slots, relative to the incoming stack
-// pointer.
+// The ABI-defined register save slots, relative to the CFA (i.e.
+// incoming stack pointer + SystemZMC::CallFrameSize).
static const TargetFrameLowering::SpillSlot SpillOffsetTable[] = {
{ SystemZ::R2D, 0x10 },
{ SystemZ::R3D, 0x18 },
@@ -47,18 +47,125 @@ static const TargetFrameLowering::SpillSlot SpillOffsetTable[] = {
SystemZFrameLowering::SystemZFrameLowering()
: TargetFrameLowering(TargetFrameLowering::StackGrowsDown, Align(8),
- -SystemZMC::CallFrameSize, Align(8),
- false /* StackRealignable */) {
+ 0, Align(8), false /* StackRealignable */),
+ RegSpillOffsets(0) {
+ // Due to the SystemZ ABI, the DWARF CFA (Canonical Frame Address) is not
+ // equal to the incoming stack pointer, but to incoming stack pointer plus
+ // 160. Instead of using a Local Area Offset, the Register save area will
+ // be occupied by fixed frame objects, and all offsets are actually
+ // relative to CFA.
+
// Create a mapping from register number to save slot offset.
+ // These offsets are relative to the start of the register save area.
RegSpillOffsets.grow(SystemZ::NUM_TARGET_REGS);
for (unsigned I = 0, E = array_lengthof(SpillOffsetTable); I != E; ++I)
RegSpillOffsets[SpillOffsetTable[I].Reg] = SpillOffsetTable[I].Offset;
}
-const TargetFrameLowering::SpillSlot *
-SystemZFrameLowering::getCalleeSavedSpillSlots(unsigned &NumEntries) const {
- NumEntries = array_lengthof(SpillOffsetTable);
- return SpillOffsetTable;
+static bool usePackedStack(MachineFunction &MF) {
+ bool HasPackedStackAttr = MF.getFunction().hasFnAttribute("packed-stack");
+ bool IsVarArg = MF.getFunction().isVarArg();
+ bool CallConv = MF.getFunction().getCallingConv() != CallingConv::GHC;
+ bool BackChain = MF.getFunction().hasFnAttribute("backchain");
+ bool FramAddressTaken = MF.getFrameInfo().isFrameAddressTaken();
+ if (HasPackedStackAttr && BackChain)
+ report_fatal_error("packed-stack with backchain is currently unsupported.");
+ return HasPackedStackAttr && !IsVarArg && CallConv && !BackChain &&
+ !FramAddressTaken;
+}
+
+bool SystemZFrameLowering::
+assignCalleeSavedSpillSlots(MachineFunction &MF,
+ const TargetRegisterInfo *TRI,
+ std::vector<CalleeSavedInfo> &CSI) const {
+ SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>();
+ MachineFrameInfo &MFFrame = MF.getFrameInfo();
+ bool IsVarArg = MF.getFunction().isVarArg();
+ if (CSI.empty())
+ return true; // Early exit if no callee saved registers are modified!
+
+ unsigned LowGPR = 0;
+ unsigned HighGPR = SystemZ::R15D;
+ int StartSPOffset = SystemZMC::CallFrameSize;
+ int CurrOffset;
+ if (!usePackedStack(MF)) {
+ for (auto &CS : CSI) {
+ unsigned Reg = CS.getReg();
+ int Offset = RegSpillOffsets[Reg];
+ if (Offset) {
+ if (SystemZ::GR64BitRegClass.contains(Reg) && StartSPOffset > Offset) {
+ LowGPR = Reg;
+ StartSPOffset = Offset;
+ }
+ Offset -= SystemZMC::CallFrameSize;
+ int FrameIdx = MFFrame.CreateFixedSpillStackObject(8, Offset);
+ CS.setFrameIdx(FrameIdx);
+ } else
+ CS.setFrameIdx(INT32_MAX);
+ }
+
+ // Save the range of call-saved registers, for use by the
+ // prologue/epilogue inserters.
+ ZFI->setRestoreGPRRegs(LowGPR, HighGPR, StartSPOffset);
+ if (IsVarArg) {
+ // Also save the GPR varargs, if any. R6D is call-saved, so would
+ // already be included, but we also need to handle the call-clobbered
+ // argument registers.
+ unsigned FirstGPR = ZFI->getVarArgsFirstGPR();
+ if (FirstGPR < SystemZ::NumArgGPRs) {
+ unsigned Reg = SystemZ::ArgGPRs[FirstGPR];
+ int Offset = RegSpillOffsets[Reg];
+ if (StartSPOffset > Offset) {
+ LowGPR = Reg; StartSPOffset = Offset;
+ }
+ }
+ }
+ ZFI->setSpillGPRRegs(LowGPR, HighGPR, StartSPOffset);
+
+ CurrOffset = -SystemZMC::CallFrameSize;
+ } else {
+ // Packed stack: put all the GPRs at the top of the Register save area.
+ uint32_t LowGR64Num = UINT32_MAX;
+ for (auto &CS : CSI) {
+ unsigned Reg = CS.getReg();
+ if (SystemZ::GR64BitRegClass.contains(Reg)) {
+ unsigned GR64Num = SystemZMC::getFirstReg(Reg);
+ int Offset = -8 * (15 - GR64Num + 1);
+ if (LowGR64Num > GR64Num) {
+ LowGR64Num = GR64Num;
+ StartSPOffset = SystemZMC::CallFrameSize + Offset;
+ }
+ int FrameIdx = MFFrame.CreateFixedSpillStackObject(8, Offset);
+ CS.setFrameIdx(FrameIdx);
+ } else
+ CS.setFrameIdx(INT32_MAX);
+ }
+ if (LowGR64Num < UINT32_MAX)
+ LowGPR = SystemZMC::GR64Regs[LowGR64Num];
+
+ // Save the range of call-saved registers, for use by the
+ // prologue/epilogue inserters.
+ ZFI->setRestoreGPRRegs(LowGPR, HighGPR, StartSPOffset);
+ ZFI->setSpillGPRRegs(LowGPR, HighGPR, StartSPOffset);
+
+ CurrOffset = LowGPR ? -(SystemZMC::CallFrameSize - StartSPOffset) : 0;
+ }
+
+ // Create fixed stack objects for the remaining registers.
+ for (auto &CS : CSI) {
+ if (CS.getFrameIdx() != INT32_MAX)
+ continue;
+ unsigned Reg = CS.getReg();
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ unsigned Size = TRI->getSpillSize(*RC);
+ CurrOffset -= Size;
+ assert(CurrOffset % 8 == 0 &&
+ "8-byte alignment required for for all register save slots");
+ int FrameIdx = MFFrame.CreateFixedSpillStackObject(Size, CurrOffset);
+ CS.setFrameIdx(FrameIdx);
+ }
+
+ return true;
}
void SystemZFrameLowering::determineCalleeSaves(MachineFunction &MF,
@@ -141,53 +248,21 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB,
bool IsVarArg = MF.getFunction().isVarArg();
DebugLoc DL;
- // Scan the call-saved GPRs and find the bounds of the register spill area.
- unsigned LowGPR = 0;
- unsigned HighGPR = SystemZ::R15D;
- unsigned StartOffset = -1U;
- for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
- unsigned Reg = CSI[I].getReg();
- if (SystemZ::GR64BitRegClass.contains(Reg)) {
- unsigned Offset = RegSpillOffsets[Reg];
- assert(Offset && "Unexpected GPR save");
- if (StartOffset > Offset) {
- LowGPR = Reg;
- StartOffset = Offset;
- }
- }
- }
-
- // Save the range of call-saved registers, for use by the epilogue inserter.
- ZFI->setLowSavedGPR(LowGPR);
- ZFI->setHighSavedGPR(HighGPR);
-
- // Include the GPR varargs, if any. R6D is call-saved, so would
- // be included by the loop above, but we also need to handle the
- // call-clobbered argument registers.
- if (IsVarArg) {
- unsigned FirstGPR = ZFI->getVarArgsFirstGPR();
- if (FirstGPR < SystemZ::NumArgGPRs) {
- unsigned Reg = SystemZ::ArgGPRs[FirstGPR];
- unsigned Offset = RegSpillOffsets[Reg];
- if (StartOffset > Offset) {
- LowGPR = Reg; StartOffset = Offset;
- }
- }
- }
-
// Save GPRs
- if (LowGPR) {
- assert(LowGPR != HighGPR && "Should be saving %r15 and something else");
+ SystemZ::GPRRegs SpillGPRs = ZFI->getSpillGPRRegs();
+ if (SpillGPRs.LowGPR) {
+ assert(SpillGPRs.LowGPR != SpillGPRs.HighGPR &&
+ "Should be saving %r15 and something else");
// Build an STMG instruction.
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(SystemZ::STMG));
// Add the explicit register operands.
- addSavedGPR(MBB, MIB, LowGPR, false);
- addSavedGPR(MBB, MIB, HighGPR, false);
+ addSavedGPR(MBB, MIB, SpillGPRs.LowGPR, false);
+ addSavedGPR(MBB, MIB, SpillGPRs.HighGPR, false);
// Add the address.
- MIB.addReg(SystemZ::R15D).addImm(StartOffset);
+ MIB.addReg(SystemZ::R15D).addImm(SpillGPRs.GPROffset);
// Make sure all call-saved GPRs are included as operands and are
// marked as live on entry.
@@ -248,30 +323,29 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
// Restore call-saved GPRs (but not call-clobbered varargs, which at
// this point might hold return values).
- unsigned LowGPR = ZFI->getLowSavedGPR();
- unsigned HighGPR = ZFI->getHighSavedGPR();
- unsigned StartOffset = RegSpillOffsets[LowGPR];
- if (LowGPR) {
+ SystemZ::GPRRegs RestoreGPRs = ZFI->getRestoreGPRRegs();
+ if (RestoreGPRs.LowGPR) {
// If we saved any of %r2-%r5 as varargs, we should also be saving
// and restoring %r6. If we're saving %r6 or above, we should be
// restoring it too.
- assert(LowGPR != HighGPR && "Should be loading %r15 and something else");
+ assert(RestoreGPRs.LowGPR != RestoreGPRs.HighGPR &&
+ "Should be loading %r15 and something else");
// Build an LMG instruction.
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(SystemZ::LMG));
// Add the explicit register operands.
- MIB.addReg(LowGPR, RegState::Define);
- MIB.addReg(HighGPR, RegState::Define);
+ MIB.addReg(RestoreGPRs.LowGPR, RegState::Define);
+ MIB.addReg(RestoreGPRs.HighGPR, RegState::Define);
// Add the address.
MIB.addReg(HasFP ? SystemZ::R11D : SystemZ::R15D);
- MIB.addImm(StartOffset);
+ MIB.addImm(RestoreGPRs.GPROffset);
// Do a second scan adding regs as being defined by instruction
for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
unsigned Reg = CSI[I].getReg();
- if (Reg != LowGPR && Reg != HighGPR &&
+ if (Reg != RestoreGPRs.LowGPR && Reg != RestoreGPRs.HighGPR &&
SystemZ::GR64BitRegClass.contains(Reg))
MIB.addReg(Reg, RegState::ImplicitDefine);
}
@@ -284,16 +358,20 @@ void SystemZFrameLowering::
processFunctionBeforeFrameFinalized(MachineFunction &MF,
RegScavenger *RS) const {
MachineFrameInfo &MFFrame = MF.getFrameInfo();
+
+ if (!usePackedStack(MF))
+ // Always create the full incoming register save area.
+ getOrCreateFramePointerSaveIndex(MF);
+
// Get the size of our stack frame to be allocated ...
uint64_t StackSize = (MFFrame.estimateStackSize(MF) +
SystemZMC::CallFrameSize);
// ... and the maximum offset we may need to reach into the
// caller's frame to access the save area or stack arguments.
- int64_t MaxArgOffset = SystemZMC::CallFrameSize;
+ int64_t MaxArgOffset = 0;
for (int I = MFFrame.getObjectIndexBegin(); I != 0; ++I)
if (MFFrame.getObjectOffset(I) >= 0) {
- int64_t ArgOffset = SystemZMC::CallFrameSize +
- MFFrame.getObjectOffset(I) +
+ int64_t ArgOffset = MFFrame.getObjectOffset(I) +
MFFrame.getObjectSize(I);
MaxArgOffset = std::max(MaxArgOffset, ArgOffset);
}
@@ -351,6 +429,23 @@ void SystemZFrameLowering::emitPrologue(MachineFunction &MF,
const std::vector<CalleeSavedInfo> &CSI = MFFrame.getCalleeSavedInfo();
bool HasFP = hasFP(MF);
+ // In GHC calling convention C stack space, including the ABI-defined
+ // 160-byte base area, is (de)allocated by GHC itself. This stack space may
+ // be used by LLVM as spill slots for the tail recursive GHC functions. Thus
+ // do not allocate stack space here, too.
+ if (MF.getFunction().getCallingConv() == CallingConv::GHC) {
+ if (MFFrame.getStackSize() > 2048 * sizeof(long)) {
+ report_fatal_error(
+ "Pre allocated stack space for GHC function is too small");
+ }
+ if (HasFP) {
+ report_fatal_error(
+ "In GHC calling convention a frame pointer is not supported");
+ }
+ MFFrame.setStackSize(MFFrame.getStackSize() + SystemZMC::CallFrameSize);
+ return;
+ }
+
// Debug location must be unknown since the first debug location is used
// to determine the end of the prologue.
DebugLoc DL;
@@ -358,7 +453,7 @@ void SystemZFrameLowering::emitPrologue(MachineFunction &MF,
// The current offset of the stack pointer from the CFA.
int64_t SPOffsetFromCFA = -SystemZMC::CFAOffsetFromInitialSP;
- if (ZFI->getLowSavedGPR()) {
+ if (ZFI->getSpillGPRRegs().LowGPR) {
// Skip over the GPR saves.
if (MBBI != MBB.end() && MBBI->getOpcode() == SystemZ::STMG)
++MBBI;
@@ -369,7 +464,8 @@ void SystemZFrameLowering::emitPrologue(MachineFunction &MF,
for (auto &Save : CSI) {
unsigned Reg = Save.getReg();
if (SystemZ::GR64BitRegClass.contains(Reg)) {
- int64_t Offset = SPOffsetFromCFA + RegSpillOffsets[Reg];
+ int FI = Save.getFrameIdx();
+ int64_t Offset = MFFrame.getObjectOffset(FI);
unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
nullptr, MRI->getDwarfRegNum(Reg, true), Offset));
BuildMI(MBB, MBBI, DL, ZII->get(TargetOpcode::CFI_INSTRUCTION))
@@ -382,10 +478,19 @@ void SystemZFrameLowering::emitPrologue(MachineFunction &MF,
// We need to allocate the ABI-defined 160-byte base area whenever
// we allocate stack space for our own use and whenever we call another
// function.
- if (StackSize || MFFrame.hasVarSizedObjects() || MFFrame.hasCalls()) {
+ bool HasStackObject = false;
+ for (unsigned i = 0, e = MFFrame.getObjectIndexEnd(); i != e; ++i)
+ if (!MFFrame.isDeadObjectIndex(i)) {
+ HasStackObject = true;
+ break;
+ }
+ if (HasStackObject || MFFrame.hasCalls())
StackSize += SystemZMC::CallFrameSize;
- MFFrame.setStackSize(StackSize);
- }
+ // Don't allocate the incoming reg save area.
+ StackSize = StackSize > SystemZMC::CallFrameSize
+ ? StackSize - SystemZMC::CallFrameSize
+ : 0;
+ MFFrame.setStackSize(StackSize);
if (StackSize) {
// Determine if we want to store a backchain.
@@ -410,7 +515,8 @@ void SystemZFrameLowering::emitPrologue(MachineFunction &MF,
if (StoreBackchain)
BuildMI(MBB, MBBI, DL, ZII->get(SystemZ::STG))
- .addReg(SystemZ::R1D, RegState::Kill).addReg(SystemZ::R15D).addImm(0).addReg(0);
+ .addReg(SystemZ::R1D, RegState::Kill).addReg(SystemZ::R15D).addImm(0)
+ .addReg(0);
}
if (HasFP) {
@@ -478,11 +584,15 @@ void SystemZFrameLowering::emitEpilogue(MachineFunction &MF,
SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>();
MachineFrameInfo &MFFrame = MF.getFrameInfo();
+ // See SystemZFrameLowering::emitPrologue
+ if (MF.getFunction().getCallingConv() == CallingConv::GHC)
+ return;
+
// Skip the return instruction.
assert(MBBI->isReturn() && "Can only insert epilogue into returning blocks");
uint64_t StackSize = MFFrame.getStackSize();
- if (ZFI->getLowSavedGPR()) {
+ if (ZFI->getRestoreGPRRegs().LowGPR) {
--MBBI;
unsigned Opcode = MBBI->getOpcode();
if (Opcode != SystemZ::LMG)
@@ -527,6 +637,16 @@ SystemZFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
return true;
}
+int SystemZFrameLowering::getFrameIndexReference(const MachineFunction &MF,
+ int FI,
+ unsigned &FrameReg) const {
+ // Our incoming SP is actually SystemZMC::CallFrameSize below the CFA, so
+ // add that difference here.
+ int64_t Offset =
+ TargetFrameLowering::getFrameIndexReference(MF, FI, FrameReg);
+ return Offset + SystemZMC::CallFrameSize;
+}
+
MachineBasicBlock::iterator SystemZFrameLowering::
eliminateCallFramePseudoInstr(MachineFunction &MF,
MachineBasicBlock &MBB,
@@ -543,3 +663,15 @@ eliminateCallFramePseudoInstr(MachineFunction &MF,
llvm_unreachable("Unexpected call frame instruction");
}
}
+
+int SystemZFrameLowering::
+getOrCreateFramePointerSaveIndex(MachineFunction &MF) const {
+ SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>();
+ int FI = ZFI->getFramePointerSaveIndex();
+ if (!FI) {
+ MachineFrameInfo &MFFrame = MF.getFrameInfo();
+ FI = MFFrame.CreateFixedObject(8, -SystemZMC::CallFrameSize, false);
+ ZFI->setFramePointerSaveIndex(FI);
+ }
+ return FI;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZFrameLowering.h b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
index 71ef3e4dc240..4189a92b8294 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
@@ -24,8 +24,10 @@ public:
// Override TargetFrameLowering.
bool isFPCloseToIncomingSP() const override { return false; }
- const SpillSlot *getCalleeSavedSpillSlots(unsigned &NumEntries) const
- override;
+ bool
+ assignCalleeSavedSpillSlots(MachineFunction &MF,
+ const TargetRegisterInfo *TRI,
+ std::vector<CalleeSavedInfo> &CSI) const override;
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
RegScavenger *RS) const override;
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
@@ -43,6 +45,8 @@ public:
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
bool hasFP(const MachineFunction &MF) const override;
bool hasReservedCallFrame(const MachineFunction &MF) const override;
+ int getFrameIndexReference(const MachineFunction &MF, int FI,
+ unsigned &FrameReg) const override;
MachineBasicBlock::iterator
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI) const override;
@@ -52,6 +56,9 @@ public:
unsigned getRegSpillOffset(unsigned Reg) const {
return RegSpillOffsets[Reg];
}
+
+ // Get or create the frame index of where the old frame pointer is stored.
+ int getOrCreateFramePointerSaveIndex(MachineFunction &MF) const;
};
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
index 751034c2d41a..3927a977e6fc 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
@@ -347,9 +347,12 @@ public:
bool runOnMachineFunction(MachineFunction &MF) override {
const Function &F = MF.getFunction();
- if (F.getFnAttribute("mnop-mcount").getValueAsString() == "true" &&
- F.getFnAttribute("fentry-call").getValueAsString() != "true")
- report_fatal_error("mnop-mcount only supported with fentry-call");
+ if (F.getFnAttribute("fentry-call").getValueAsString() != "true") {
+ if (F.hasFnAttribute("mnop-mcount"))
+ report_fatal_error("mnop-mcount only supported with fentry-call");
+ if (F.hasFnAttribute("mrecord-mcount"))
+ report_fatal_error("mrecord-mcount only supported with fentry-call");
+ }
Subtarget = &MF.getSubtarget<SystemZSubtarget>();
return SelectionDAGISel::runOnMachineFunction(MF);
@@ -1494,8 +1497,9 @@ void SystemZDAGToDAGISel::Select(SDNode *Node) {
if (ChildOpcode == ISD::AND || ChildOpcode == ISD::OR ||
ChildOpcode == ISD::XOR)
break;
- // Check whether this expression matches OR-with-complement.
- if (Opcode == ISD::OR && ChildOpcode == ISD::XOR) {
+ // Check whether this expression matches OR-with-complement
+ // (or matches an alternate pattern for NXOR).
+ if (ChildOpcode == ISD::XOR) {
auto Op0 = Node->getOperand(0);
if (auto *Op0Op1 = dyn_cast<ConstantSDNode>(Op0->getOperand(1)))
if (Op0Op1->getZExtValue() == (uint64_t)-1)
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index e0ca9da93561..c73905d3357a 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -19,8 +19,9 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
-#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsS390.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/KnownBits.h"
#include <cctype>
@@ -32,12 +33,16 @@ using namespace llvm;
namespace {
// Represents information about a comparison.
struct Comparison {
- Comparison(SDValue Op0In, SDValue Op1In)
- : Op0(Op0In), Op1(Op1In), Opcode(0), ICmpType(0), CCValid(0), CCMask(0) {}
+ Comparison(SDValue Op0In, SDValue Op1In, SDValue ChainIn)
+ : Op0(Op0In), Op1(Op1In), Chain(ChainIn),
+ Opcode(0), ICmpType(0), CCValid(0), CCMask(0) {}
// The operands to the comparison.
SDValue Op0, Op1;
+ // Chain if this is a strict floating-point comparison.
+ SDValue Chain;
+
// The opcode that should be used to compare Op0 and Op1.
unsigned Opcode;
@@ -132,6 +137,8 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
if (isTypeLegal(VT)) {
// Lower SET_CC into an IPM-based sequence.
setOperationAction(ISD::SETCC, VT, Custom);
+ setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
+ setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
// Expand SELECT(C, A, B) into SELECT_CC(X, 0, A, B, NE).
setOperationAction(ISD::SELECT, VT, Expand);
@@ -212,6 +219,11 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::STRICT_FP_TO_SINT, VT, Legal);
if (Subtarget.hasFPExtension())
setOperationAction(ISD::STRICT_FP_TO_UINT, VT, Legal);
+
+ // And similarly for STRICT_[SU]INT_TO_FP.
+ setOperationAction(ISD::STRICT_SINT_TO_FP, VT, Legal);
+ if (Subtarget.hasFPExtension())
+ setOperationAction(ISD::STRICT_UINT_TO_FP, VT, Legal);
}
}
@@ -251,6 +263,8 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
if (!Subtarget.hasFPExtension()) {
setOperationAction(ISD::UINT_TO_FP, MVT::i32, Promote);
setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
+ setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Promote);
+ setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Expand);
}
// We have native support for a 64-bit CTLZ, via FLOGR.
@@ -373,6 +387,9 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
// Map SETCCs onto one of VCE, VCH or VCHL, swapping the operands
// and inverting the result as necessary.
setOperationAction(ISD::SETCC, VT, Custom);
+ setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
+ if (Subtarget.hasVectorEnhancements1())
+ setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
}
}
@@ -392,6 +409,10 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2f64, Legal);
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i64, Legal);
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2f64, Legal);
+ setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i64, Legal);
+ setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2f64, Legal);
+ setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i64, Legal);
+ setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2f64, Legal);
}
if (Subtarget.hasVectorEnhancements2()) {
@@ -408,6 +429,10 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4f32, Legal);
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32, Legal);
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4f32, Legal);
+ setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i32, Legal);
+ setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4f32, Legal);
+ setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32, Legal);
+ setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4f32, Legal);
}
// Handle floating-point types.
@@ -558,16 +583,16 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
MVT::v4f32, MVT::v2f64 }) {
setOperationAction(ISD::STRICT_FMAXNUM, VT, Legal);
setOperationAction(ISD::STRICT_FMINNUM, VT, Legal);
+ setOperationAction(ISD::STRICT_FMAXIMUM, VT, Legal);
+ setOperationAction(ISD::STRICT_FMINIMUM, VT, Legal);
}
}
- // We have fused multiply-addition for f32 and f64 but not f128.
- setOperationAction(ISD::FMA, MVT::f32, Legal);
- setOperationAction(ISD::FMA, MVT::f64, Legal);
- if (Subtarget.hasVectorEnhancements1())
- setOperationAction(ISD::FMA, MVT::f128, Legal);
- else
+ // We only have fused f128 multiply-addition on vector registers.
+ if (!Subtarget.hasVectorEnhancements1()) {
setOperationAction(ISD::FMA, MVT::f128, Expand);
+ setOperationAction(ISD::STRICT_FMA, MVT::f128, Expand);
+ }
// We don't have a copysign instruction on vector registers.
if (Subtarget.hasVectorEnhancements1())
@@ -612,7 +637,9 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
setTargetDAGCombine(ISD::FP_ROUND);
+ setTargetDAGCombine(ISD::STRICT_FP_ROUND);
setTargetDAGCombine(ISD::FP_EXTEND);
+ setTargetDAGCombine(ISD::STRICT_FP_EXTEND);
setTargetDAGCombine(ISD::BSWAP);
setTargetDAGCombine(ISD::SDIV);
setTargetDAGCombine(ISD::UDIV);
@@ -634,6 +661,9 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
// than "STC;MVC". Handle the choice in target-specific code instead.
MaxStoresPerMemset = 0;
MaxStoresPerMemsetOptSize = 0;
+
+ // Default to having -disable-strictnode-mutation on
+ IsStrictFPEnabled = true;
}
EVT SystemZTargetLowering::getSetCCResultType(const DataLayout &DL,
@@ -643,7 +673,8 @@ EVT SystemZTargetLowering::getSetCCResultType(const DataLayout &DL,
return VT.changeVectorElementTypeToInteger();
}
-bool SystemZTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
+bool SystemZTargetLowering::isFMAFasterThanFMulAndFAdd(
+ const MachineFunction &MF, EVT VT) const {
VT = VT.getScalarType();
if (!VT.isSimple())
@@ -1406,7 +1437,7 @@ SDValue SystemZTargetLowering::LowerFormalArguments(
// ...and a similar frame index for the caller-allocated save area
// that will be used to store the incoming registers.
- int64_t RegSaveOffset = TFL->getOffsetOfLocalArea();
+ int64_t RegSaveOffset = -SystemZMC::CallFrameSize;
unsigned RegSaveIndex = MFI.CreateFixedObject(1, RegSaveOffset, true);
FuncInfo->setRegSaveFrameIndex(RegSaveIndex);
@@ -1675,6 +1706,9 @@ SystemZTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
if (RetLocs.empty())
return DAG.getNode(SystemZISD::RET_FLAG, DL, MVT::Other, Chain);
+ if (CallConv == CallingConv::GHC)
+ report_fatal_error("GHC functions return void only");
+
// Copy the result values into the output registers.
SDValue Glue;
SmallVector<SDValue, 4> RetOps;
@@ -2161,6 +2195,10 @@ static void adjustForSubtraction(SelectionDAG &DAG, const SDLoc &DL,
// negation to set CC, so avoiding separate LOAD AND TEST and
// LOAD (NEGATIVE/COMPLEMENT) instructions.
static void adjustForFNeg(Comparison &C) {
+ // This optimization is invalid for strict comparisons, since FNEG
+ // does not raise any exceptions.
+ if (C.Chain)
+ return;
auto *C1 = dyn_cast<ConstantFPSDNode>(C.Op1);
if (C1 && C1->isZero()) {
for (auto I = C.Op0->use_begin(), E = C.Op0->use_end(); I != E; ++I) {
@@ -2448,7 +2486,7 @@ static void adjustForRedundantAnd(SelectionDAG &DAG, const SDLoc &DL,
static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode,
SDValue Call, unsigned CCValid, uint64_t CC,
ISD::CondCode Cond) {
- Comparison C(Call, SDValue());
+ Comparison C(Call, SDValue(), SDValue());
C.Opcode = Opcode;
C.CCValid = CCValid;
if (Cond == ISD::SETEQ)
@@ -2479,8 +2517,11 @@ static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode,
// Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1.
static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
- ISD::CondCode Cond, const SDLoc &DL) {
+ ISD::CondCode Cond, const SDLoc &DL,
+ SDValue Chain = SDValue(),
+ bool IsSignaling = false) {
if (CmpOp1.getOpcode() == ISD::Constant) {
+ assert(!Chain);
uint64_t Constant = cast<ConstantSDNode>(CmpOp1)->getZExtValue();
unsigned Opcode, CCValid;
if (CmpOp0.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
@@ -2492,13 +2533,19 @@ static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
isIntrinsicWithCC(CmpOp0, Opcode, CCValid))
return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond);
}
- Comparison C(CmpOp0, CmpOp1);
+ Comparison C(CmpOp0, CmpOp1, Chain);
C.CCMask = CCMaskForCondCode(Cond);
if (C.Op0.getValueType().isFloatingPoint()) {
C.CCValid = SystemZ::CCMASK_FCMP;
- C.Opcode = SystemZISD::FCMP;
+ if (!C.Chain)
+ C.Opcode = SystemZISD::FCMP;
+ else if (!IsSignaling)
+ C.Opcode = SystemZISD::STRICT_FCMP;
+ else
+ C.Opcode = SystemZISD::STRICT_FCMPS;
adjustForFNeg(C);
} else {
+ assert(!C.Chain);
C.CCValid = SystemZ::CCMASK_ICMP;
C.Opcode = SystemZISD::ICMP;
// Choose the type of comparison. Equality and inequality tests can
@@ -2556,6 +2603,10 @@ static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
return DAG.getNode(SystemZISD::TM, DL, MVT::i32, C.Op0, C.Op1,
DAG.getTargetConstant(RegisterOnly, DL, MVT::i32));
}
+ if (C.Chain) {
+ SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other);
+ return DAG.getNode(C.Opcode, DL, VTs, C.Chain, C.Op0, C.Op1);
+ }
return DAG.getNode(C.Opcode, DL, MVT::i32, C.Op0, C.Op1);
}
@@ -2600,24 +2651,51 @@ static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue CCReg,
}
// Return the SystemISD vector comparison operation for CC, or 0 if it cannot
-// be done directly. IsFP is true if CC is for a floating-point rather than
-// integer comparison.
-static unsigned getVectorComparison(ISD::CondCode CC, bool IsFP) {
+// be done directly. Mode is CmpMode::Int for integer comparisons, CmpMode::FP
+// for regular floating-point comparisons, CmpMode::StrictFP for strict (quiet)
+// floating-point comparisons, and CmpMode::SignalingFP for strict signaling
+// floating-point comparisons.
+enum class CmpMode { Int, FP, StrictFP, SignalingFP };
+static unsigned getVectorComparison(ISD::CondCode CC, CmpMode Mode) {
switch (CC) {
case ISD::SETOEQ:
case ISD::SETEQ:
- return IsFP ? SystemZISD::VFCMPE : SystemZISD::VICMPE;
+ switch (Mode) {
+ case CmpMode::Int: return SystemZISD::VICMPE;
+ case CmpMode::FP: return SystemZISD::VFCMPE;
+ case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPE;
+ case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPES;
+ }
+ llvm_unreachable("Bad mode");
case ISD::SETOGE:
case ISD::SETGE:
- return IsFP ? SystemZISD::VFCMPHE : static_cast<SystemZISD::NodeType>(0);
+ switch (Mode) {
+ case CmpMode::Int: return 0;
+ case CmpMode::FP: return SystemZISD::VFCMPHE;
+ case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPHE;
+ case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPHES;
+ }
+ llvm_unreachable("Bad mode");
case ISD::SETOGT:
case ISD::SETGT:
- return IsFP ? SystemZISD::VFCMPH : SystemZISD::VICMPH;
+ switch (Mode) {
+ case CmpMode::Int: return SystemZISD::VICMPH;
+ case CmpMode::FP: return SystemZISD::VFCMPH;
+ case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPH;
+ case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPHS;
+ }
+ llvm_unreachable("Bad mode");
case ISD::SETUGT:
- return IsFP ? static_cast<SystemZISD::NodeType>(0) : SystemZISD::VICMPHL;
+ switch (Mode) {
+ case CmpMode::Int: return SystemZISD::VICMPHL;
+ case CmpMode::FP: return 0;
+ case CmpMode::StrictFP: return 0;
+ case CmpMode::SignalingFP: return 0;
+ }
+ llvm_unreachable("Bad mode");
default:
return 0;
@@ -2626,17 +2704,16 @@ static unsigned getVectorComparison(ISD::CondCode CC, bool IsFP) {
// Return the SystemZISD vector comparison operation for CC or its inverse,
// or 0 if neither can be done directly. Indicate in Invert whether the
-// result is for the inverse of CC. IsFP is true if CC is for a
-// floating-point rather than integer comparison.
-static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, bool IsFP,
+// result is for the inverse of CC. Mode is as above.
+static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, CmpMode Mode,
bool &Invert) {
- if (unsigned Opcode = getVectorComparison(CC, IsFP)) {
+ if (unsigned Opcode = getVectorComparison(CC, Mode)) {
Invert = false;
return Opcode;
}
- CC = ISD::getSetCCInverse(CC, !IsFP);
- if (unsigned Opcode = getVectorComparison(CC, IsFP)) {
+ CC = ISD::getSetCCInverse(CC, Mode == CmpMode::Int ? MVT::i32 : MVT::f32);
+ if (unsigned Opcode = getVectorComparison(CC, Mode)) {
Invert = true;
return Opcode;
}
@@ -2645,44 +2722,73 @@ static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, bool IsFP,
}
// Return a v2f64 that contains the extended form of elements Start and Start+1
-// of v4f32 value Op.
+// of v4f32 value Op. If Chain is nonnull, return the strict form.
static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL,
- SDValue Op) {
+ SDValue Op, SDValue Chain) {
int Mask[] = { Start, -1, Start + 1, -1 };
Op = DAG.getVectorShuffle(MVT::v4f32, DL, Op, DAG.getUNDEF(MVT::v4f32), Mask);
+ if (Chain) {
+ SDVTList VTs = DAG.getVTList(MVT::v2f64, MVT::Other);
+ return DAG.getNode(SystemZISD::STRICT_VEXTEND, DL, VTs, Chain, Op);
+ }
return DAG.getNode(SystemZISD::VEXTEND, DL, MVT::v2f64, Op);
}
// Build a comparison of vectors CmpOp0 and CmpOp1 using opcode Opcode,
-// producing a result of type VT.
+// producing a result of type VT. If Chain is nonnull, return the strict form.
SDValue SystemZTargetLowering::getVectorCmp(SelectionDAG &DAG, unsigned Opcode,
const SDLoc &DL, EVT VT,
SDValue CmpOp0,
- SDValue CmpOp1) const {
+ SDValue CmpOp1,
+ SDValue Chain) const {
// There is no hardware support for v4f32 (unless we have the vector
// enhancements facility 1), so extend the vector into two v2f64s
// and compare those.
if (CmpOp0.getValueType() == MVT::v4f32 &&
!Subtarget.hasVectorEnhancements1()) {
- SDValue H0 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp0);
- SDValue L0 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp0);
- SDValue H1 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp1);
- SDValue L1 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp1);
+ SDValue H0 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp0, Chain);
+ SDValue L0 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp0, Chain);
+ SDValue H1 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp1, Chain);
+ SDValue L1 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp1, Chain);
+ if (Chain) {
+ SDVTList VTs = DAG.getVTList(MVT::v2i64, MVT::Other);
+ SDValue HRes = DAG.getNode(Opcode, DL, VTs, Chain, H0, H1);
+ SDValue LRes = DAG.getNode(Opcode, DL, VTs, Chain, L0, L1);
+ SDValue Res = DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
+ SDValue Chains[6] = { H0.getValue(1), L0.getValue(1),
+ H1.getValue(1), L1.getValue(1),
+ HRes.getValue(1), LRes.getValue(1) };
+ SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
+ SDValue Ops[2] = { Res, NewChain };
+ return DAG.getMergeValues(Ops, DL);
+ }
SDValue HRes = DAG.getNode(Opcode, DL, MVT::v2i64, H0, H1);
SDValue LRes = DAG.getNode(Opcode, DL, MVT::v2i64, L0, L1);
return DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
}
+ if (Chain) {
+ SDVTList VTs = DAG.getVTList(VT, MVT::Other);
+ return DAG.getNode(Opcode, DL, VTs, Chain, CmpOp0, CmpOp1);
+ }
return DAG.getNode(Opcode, DL, VT, CmpOp0, CmpOp1);
}
// Lower a vector comparison of type CC between CmpOp0 and CmpOp1, producing
-// an integer mask of type VT.
+// an integer mask of type VT. If Chain is nonnull, we have a strict
+// floating-point comparison. If in addition IsSignaling is true, we have
+// a strict signaling floating-point comparison.
SDValue SystemZTargetLowering::lowerVectorSETCC(SelectionDAG &DAG,
const SDLoc &DL, EVT VT,
ISD::CondCode CC,
SDValue CmpOp0,
- SDValue CmpOp1) const {
+ SDValue CmpOp1,
+ SDValue Chain,
+ bool IsSignaling) const {
bool IsFP = CmpOp0.getValueType().isFloatingPoint();
+ assert (!Chain || IsFP);
+ assert (!IsSignaling || Chain);
+ CmpMode Mode = IsSignaling ? CmpMode::SignalingFP :
+ Chain ? CmpMode::StrictFP : IsFP ? CmpMode::FP : CmpMode::Int;
bool Invert = false;
SDValue Cmp;
switch (CC) {
@@ -2692,9 +2798,14 @@ SDValue SystemZTargetLowering::lowerVectorSETCC(SelectionDAG &DAG,
LLVM_FALLTHROUGH;
case ISD::SETO: {
assert(IsFP && "Unexpected integer comparison");
- SDValue LT = getVectorCmp(DAG, SystemZISD::VFCMPH, DL, VT, CmpOp1, CmpOp0);
- SDValue GE = getVectorCmp(DAG, SystemZISD::VFCMPHE, DL, VT, CmpOp0, CmpOp1);
+ SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
+ DL, VT, CmpOp1, CmpOp0, Chain);
+ SDValue GE = getVectorCmp(DAG, getVectorComparison(ISD::SETOGE, Mode),
+ DL, VT, CmpOp0, CmpOp1, Chain);
Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GE);
+ if (Chain)
+ Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
+ LT.getValue(1), GE.getValue(1));
break;
}
@@ -2704,9 +2815,14 @@ SDValue SystemZTargetLowering::lowerVectorSETCC(SelectionDAG &DAG,
LLVM_FALLTHROUGH;
case ISD::SETONE: {
assert(IsFP && "Unexpected integer comparison");
- SDValue LT = getVectorCmp(DAG, SystemZISD::VFCMPH, DL, VT, CmpOp1, CmpOp0);
- SDValue GT = getVectorCmp(DAG, SystemZISD::VFCMPH, DL, VT, CmpOp0, CmpOp1);
+ SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
+ DL, VT, CmpOp1, CmpOp0, Chain);
+ SDValue GT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
+ DL, VT, CmpOp0, CmpOp1, Chain);
Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GT);
+ if (Chain)
+ Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
+ LT.getValue(1), GT.getValue(1));
break;
}
@@ -2714,15 +2830,17 @@ SDValue SystemZTargetLowering::lowerVectorSETCC(SelectionDAG &DAG,
// matter whether we try the inversion or the swap first, since
// there are no cases where both work.
default:
- if (unsigned Opcode = getVectorComparisonOrInvert(CC, IsFP, Invert))
- Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp0, CmpOp1);
+ if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert))
+ Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp0, CmpOp1, Chain);
else {
CC = ISD::getSetCCSwappedOperands(CC);
- if (unsigned Opcode = getVectorComparisonOrInvert(CC, IsFP, Invert))
- Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp1, CmpOp0);
+ if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert))
+ Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp1, CmpOp0, Chain);
else
llvm_unreachable("Unhandled comparison");
}
+ if (Chain)
+ Chain = Cmp.getValue(1);
break;
}
if (Invert) {
@@ -2730,6 +2848,10 @@ SDValue SystemZTargetLowering::lowerVectorSETCC(SelectionDAG &DAG,
DAG.getSplatBuildVector(VT, DL, DAG.getConstant(-1, DL, MVT::i64));
Cmp = DAG.getNode(ISD::XOR, DL, VT, Cmp, Mask);
}
+ if (Chain && Chain.getNode() != Cmp.getNode()) {
+ SDValue Ops[2] = { Cmp, Chain };
+ Cmp = DAG.getMergeValues(Ops, DL);
+ }
return Cmp;
}
@@ -2748,6 +2870,29 @@ SDValue SystemZTargetLowering::lowerSETCC(SDValue Op,
return emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask);
}
+SDValue SystemZTargetLowering::lowerSTRICT_FSETCC(SDValue Op,
+ SelectionDAG &DAG,
+ bool IsSignaling) const {
+ SDValue Chain = Op.getOperand(0);
+ SDValue CmpOp0 = Op.getOperand(1);
+ SDValue CmpOp1 = Op.getOperand(2);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(3))->get();
+ SDLoc DL(Op);
+ EVT VT = Op.getNode()->getValueType(0);
+ if (VT.isVector()) {
+ SDValue Res = lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1,
+ Chain, IsSignaling);
+ return Res.getValue(Op.getResNo());
+ }
+
+ Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL, Chain, IsSignaling));
+ SDValue CCReg = emitCmp(DAG, DL, C);
+ CCReg->setFlags(Op->getFlags());
+ SDValue Result = emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask);
+ SDValue Ops[2] = { Result, CCReg.getValue(1) };
+ return DAG.getMergeValues(Ops, DL);
+}
+
SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
SDValue CmpOp0 = Op.getOperand(2);
@@ -2828,17 +2973,26 @@ SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node,
SDValue Result;
if (Subtarget.isPC32DBLSymbol(GV, CM)) {
- // Assign anchors at 1<<12 byte boundaries.
- uint64_t Anchor = Offset & ~uint64_t(0xfff);
- Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor);
- Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
-
- // The offset can be folded into the address if it is aligned to a halfword.
- Offset -= Anchor;
- if (Offset != 0 && (Offset & 1) == 0) {
- SDValue Full = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor + Offset);
- Result = DAG.getNode(SystemZISD::PCREL_OFFSET, DL, PtrVT, Full, Result);
- Offset = 0;
+ if (isInt<32>(Offset)) {
+ // Assign anchors at 1<<12 byte boundaries.
+ uint64_t Anchor = Offset & ~uint64_t(0xfff);
+ Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor);
+ Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
+
+ // The offset can be folded into the address if it is aligned to a
+ // halfword.
+ Offset -= Anchor;
+ if (Offset != 0 && (Offset & 1) == 0) {
+ SDValue Full =
+ DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor + Offset);
+ Result = DAG.getNode(SystemZISD::PCREL_OFFSET, DL, PtrVT, Full, Result);
+ Offset = 0;
+ }
+ } else {
+ // Conservatively load a constant offset greater than 32 bits into a
+ // register below.
+ Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT);
+ Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
}
} else {
Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, SystemZII::MO_GOT);
@@ -2865,6 +3019,10 @@ SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node,
SDValue Chain = DAG.getEntryNode();
SDValue Glue;
+ if (DAG.getMachineFunction().getFunction().getCallingConv() ==
+ CallingConv::GHC)
+ report_fatal_error("In GHC calling convention TLS is not supported");
+
// __tls_get_offset takes the GOT offset in %r2 and the GOT in %r12.
SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R12D, GOT, Glue);
@@ -2931,6 +3089,10 @@ SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
EVT PtrVT = getPointerTy(DAG.getDataLayout());
TLSModel::Model model = DAG.getTarget().getTLSModel(GV);
+ if (DAG.getMachineFunction().getFunction().getCallingConv() ==
+ CallingConv::GHC)
+ report_fatal_error("In GHC calling convention TLS is not supported");
+
SDValue TP = lowerThreadPointer(DL, DAG);
// Get the offset of GA from the thread pointer, based on the TLS model.
@@ -3060,14 +3222,10 @@ SDValue SystemZTargetLowering::lowerFRAMEADDR(SDValue Op,
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
EVT PtrVT = getPointerTy(DAG.getDataLayout());
- // If the back chain frame index has not been allocated yet, do so.
- SystemZMachineFunctionInfo *FI = MF.getInfo<SystemZMachineFunctionInfo>();
- int BackChainIdx = FI->getFramePointerSaveIndex();
- if (!BackChainIdx) {
- // By definition, the frame address is the address of the back chain.
- BackChainIdx = MFI.CreateFixedObject(8, -SystemZMC::CallFrameSize, false);
- FI->setFramePointerSaveIndex(BackChainIdx);
- }
+ // By definition, the frame address is the address of the back chain.
+ auto *TFL =
+ static_cast<const SystemZFrameLowering *>(Subtarget.getFrameLowering());
+ int BackChainIdx = TFL->getOrCreateFramePointerSaveIndex(MF);
SDValue BackChain = DAG.getFrameIndex(BackChainIdx, PtrVT);
// FIXME The frontend should detect this case.
@@ -3585,7 +3743,7 @@ SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op,
// Get the known-zero mask for the operand.
KnownBits Known = DAG.computeKnownBits(Op);
- unsigned NumSignificantBits = (~Known.Zero).getActiveBits();
+ unsigned NumSignificantBits = Known.getMaxValue().getActiveBits();
if (NumSignificantBits == 0)
return DAG.getConstant(0, DL, VT);
@@ -3861,6 +4019,9 @@ SDValue SystemZTargetLowering::lowerSTACKSAVE(SDValue Op,
SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
MF.getInfo<SystemZMachineFunctionInfo>()->setManipulatesSP(true);
+ if (MF.getFunction().getCallingConv() == CallingConv::GHC)
+ report_fatal_error("Variable-sized stack allocations are not supported "
+ "in GHC calling convention");
return DAG.getCopyFromReg(Op.getOperand(0), SDLoc(Op),
SystemZ::R15D, Op.getValueType());
}
@@ -3871,6 +4032,10 @@ SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op,
MF.getInfo<SystemZMachineFunctionInfo>()->setManipulatesSP(true);
bool StoreBackchain = MF.getFunction().hasFnAttribute("backchain");
+ if (MF.getFunction().getCallingConv() == CallingConv::GHC)
+ report_fatal_error("Variable-sized stack allocations are not supported "
+ "in GHC calling convention");
+
SDValue Chain = Op.getOperand(0);
SDValue NewSP = Op.getOperand(1);
SDValue Backchain;
@@ -4935,6 +5100,10 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
return lowerSELECT_CC(Op, DAG);
case ISD::SETCC:
return lowerSETCC(Op, DAG);
+ case ISD::STRICT_FSETCC:
+ return lowerSTRICT_FSETCC(Op, DAG, false);
+ case ISD::STRICT_FSETCCS:
+ return lowerSTRICT_FSETCC(Op, DAG, true);
case ISD::GlobalAddress:
return lowerGlobalAddress(cast<GlobalAddressSDNode>(Op), DAG);
case ISD::GlobalTLSAddress:
@@ -5140,6 +5309,8 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
OPCODE(IABS);
OPCODE(ICMP);
OPCODE(FCMP);
+ OPCODE(STRICT_FCMP);
+ OPCODE(STRICT_FCMPS);
OPCODE(TM);
OPCODE(BR_CCMASK);
OPCODE(SELECT_CCMASK);
@@ -5202,14 +5373,22 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
OPCODE(VICMPHS);
OPCODE(VICMPHLS);
OPCODE(VFCMPE);
+ OPCODE(STRICT_VFCMPE);
+ OPCODE(STRICT_VFCMPES);
OPCODE(VFCMPH);
+ OPCODE(STRICT_VFCMPH);
+ OPCODE(STRICT_VFCMPHS);
OPCODE(VFCMPHE);
+ OPCODE(STRICT_VFCMPHE);
+ OPCODE(STRICT_VFCMPHES);
OPCODE(VFCMPES);
OPCODE(VFCMPHS);
OPCODE(VFCMPHES);
OPCODE(VFTCI);
OPCODE(VEXTEND);
+ OPCODE(STRICT_VEXTEND);
OPCODE(VROUND);
+ OPCODE(STRICT_VROUND);
OPCODE(VTM);
OPCODE(VFAE_CC);
OPCODE(VFAEZ_CC);
@@ -5732,6 +5911,19 @@ SDValue SystemZTargetLowering::combineJOIN_DWORDS(
return SDValue();
}
+static SDValue MergeInputChains(SDNode *N1, SDNode *N2) {
+ SDValue Chain1 = N1->getOperand(0);
+ SDValue Chain2 = N2->getOperand(0);
+
+ // Trivial case: both nodes take the same chain.
+ if (Chain1 == Chain2)
+ return Chain1;
+
+ // FIXME - we could handle more complex cases via TokenFactor,
+ // assuming we can verify that this would not create a cycle.
+ return SDValue();
+}
+
SDValue SystemZTargetLowering::combineFP_ROUND(
SDNode *N, DAGCombinerInfo &DCI) const {
@@ -5744,8 +5936,9 @@ SDValue SystemZTargetLowering::combineFP_ROUND(
// (extract_vector_elt (VROUND X) 2)
//
// This is a special case since the target doesn't really support v2f32s.
+ unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
SelectionDAG &DAG = DCI.DAG;
- SDValue Op0 = N->getOperand(0);
+ SDValue Op0 = N->getOperand(OpNo);
if (N->getValueType(0) == MVT::f32 &&
Op0.hasOneUse() &&
Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
@@ -5761,20 +5954,34 @@ SDValue SystemZTargetLowering::combineFP_ROUND(
U->getOperand(1).getOpcode() == ISD::Constant &&
cast<ConstantSDNode>(U->getOperand(1))->getZExtValue() == 1) {
SDValue OtherRound = SDValue(*U->use_begin(), 0);
- if (OtherRound.getOpcode() == ISD::FP_ROUND &&
- OtherRound.getOperand(0) == SDValue(U, 0) &&
+ if (OtherRound.getOpcode() == N->getOpcode() &&
+ OtherRound.getOperand(OpNo) == SDValue(U, 0) &&
OtherRound.getValueType() == MVT::f32) {
- SDValue VRound = DAG.getNode(SystemZISD::VROUND, SDLoc(N),
- MVT::v4f32, Vec);
+ SDValue VRound, Chain;
+ if (N->isStrictFPOpcode()) {
+ Chain = MergeInputChains(N, OtherRound.getNode());
+ if (!Chain)
+ continue;
+ VRound = DAG.getNode(SystemZISD::STRICT_VROUND, SDLoc(N),
+ {MVT::v4f32, MVT::Other}, {Chain, Vec});
+ Chain = VRound.getValue(1);
+ } else
+ VRound = DAG.getNode(SystemZISD::VROUND, SDLoc(N),
+ MVT::v4f32, Vec);
DCI.AddToWorklist(VRound.getNode());
SDValue Extract1 =
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f32,
VRound, DAG.getConstant(2, SDLoc(U), MVT::i32));
DCI.AddToWorklist(Extract1.getNode());
DAG.ReplaceAllUsesOfValueWith(OtherRound, Extract1);
+ if (Chain)
+ DAG.ReplaceAllUsesOfValueWith(OtherRound.getValue(1), Chain);
SDValue Extract0 =
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f32,
VRound, DAG.getConstant(0, SDLoc(Op0), MVT::i32));
+ if (Chain)
+ return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op0),
+ N->getVTList(), Extract0, Chain);
return Extract0;
}
}
@@ -5795,8 +6002,9 @@ SDValue SystemZTargetLowering::combineFP_EXTEND(
// (extract_vector_elt (VEXTEND X) 1)
//
// This is a special case since the target doesn't really support v2f32s.
+ unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
SelectionDAG &DAG = DCI.DAG;
- SDValue Op0 = N->getOperand(0);
+ SDValue Op0 = N->getOperand(OpNo);
if (N->getValueType(0) == MVT::f64 &&
Op0.hasOneUse() &&
Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
@@ -5812,20 +6020,34 @@ SDValue SystemZTargetLowering::combineFP_EXTEND(
U->getOperand(1).getOpcode() == ISD::Constant &&
cast<ConstantSDNode>(U->getOperand(1))->getZExtValue() == 2) {
SDValue OtherExtend = SDValue(*U->use_begin(), 0);
- if (OtherExtend.getOpcode() == ISD::FP_EXTEND &&
- OtherExtend.getOperand(0) == SDValue(U, 0) &&
+ if (OtherExtend.getOpcode() == N->getOpcode() &&
+ OtherExtend.getOperand(OpNo) == SDValue(U, 0) &&
OtherExtend.getValueType() == MVT::f64) {
- SDValue VExtend = DAG.getNode(SystemZISD::VEXTEND, SDLoc(N),
- MVT::v2f64, Vec);
+ SDValue VExtend, Chain;
+ if (N->isStrictFPOpcode()) {
+ Chain = MergeInputChains(N, OtherExtend.getNode());
+ if (!Chain)
+ continue;
+ VExtend = DAG.getNode(SystemZISD::STRICT_VEXTEND, SDLoc(N),
+ {MVT::v2f64, MVT::Other}, {Chain, Vec});
+ Chain = VExtend.getValue(1);
+ } else
+ VExtend = DAG.getNode(SystemZISD::VEXTEND, SDLoc(N),
+ MVT::v2f64, Vec);
DCI.AddToWorklist(VExtend.getNode());
SDValue Extract1 =
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f64,
VExtend, DAG.getConstant(1, SDLoc(U), MVT::i32));
DCI.AddToWorklist(Extract1.getNode());
DAG.ReplaceAllUsesOfValueWith(OtherExtend, Extract1);
+ if (Chain)
+ DAG.ReplaceAllUsesOfValueWith(OtherExtend.getValue(1), Chain);
SDValue Extract0 =
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f64,
VExtend, DAG.getConstant(0, SDLoc(Op0), MVT::i32));
+ if (Chain)
+ return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op0),
+ N->getVTList(), Extract0, Chain);
return Extract0;
}
}
@@ -6165,7 +6387,9 @@ SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::VECTOR_SHUFFLE: return combineVECTOR_SHUFFLE(N, DCI);
case ISD::EXTRACT_VECTOR_ELT: return combineEXTRACT_VECTOR_ELT(N, DCI);
case SystemZISD::JOIN_DWORDS: return combineJOIN_DWORDS(N, DCI);
+ case ISD::STRICT_FP_ROUND:
case ISD::FP_ROUND: return combineFP_ROUND(N, DCI);
+ case ISD::STRICT_FP_EXTEND:
case ISD::FP_EXTEND: return combineFP_EXTEND(N, DCI);
case ISD::BSWAP: return combineBSWAP(N, DCI);
case SystemZISD::BR_CCMASK: return combineBR_CCMASK(N, DCI);
@@ -7523,7 +7747,8 @@ MachineBasicBlock *SystemZTargetLowering::emitLoadAndTestCmp0(
// Replace pseudo with a normal load-and-test that models the def as
// well.
BuildMI(*MBB, MI, DL, TII->get(Opcode), DstReg)
- .addReg(SrcReg);
+ .addReg(SrcReg)
+ .setMIFlags(MI.getFlags());
MI.eraseFromParent();
return MBB;
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.h
index 23cdcc72bc42..defcaa6eb6eb 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.h
@@ -245,7 +245,7 @@ enum NodeType : unsigned {
VICMPHS,
VICMPHLS,
- // Compare floating-point vector operands 0 and 1 to preoduce the usual 0/-1
+ // Compare floating-point vector operands 0 and 1 to produce the usual 0/-1
// vector result. VFCMPE is for "ordered and equal", VFCMPH for "ordered and
// greater than" and VFCMPHE for "ordered and greater than or equal to".
VFCMPE,
@@ -290,6 +290,24 @@ enum NodeType : unsigned {
// Operand 1: the bit mask
TDC,
+ // Strict variants of scalar floating-point comparisons.
+ // Quiet and signaling versions.
+ STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE,
+ STRICT_FCMPS,
+
+ // Strict variants of vector floating-point comparisons.
+ // Quiet and signaling versions.
+ STRICT_VFCMPE,
+ STRICT_VFCMPH,
+ STRICT_VFCMPHE,
+ STRICT_VFCMPES,
+ STRICT_VFCMPHS,
+ STRICT_VFCMPHES,
+
+ // Strict variants of VEXTEND and VROUND.
+ STRICT_VEXTEND,
+ STRICT_VROUND,
+
// Wrappers around the inner loop of an 8- or 16-bit ATOMIC_SWAP or
// ATOMIC_LOAD_<op>.
//
@@ -404,7 +422,8 @@ public:
bool isCheapToSpeculateCtlz() const override { return true; }
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &,
EVT) const override;
- bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
+ bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
+ EVT VT) const override;
bool isFPImmLegal(const APFloat &Imm, EVT VT,
bool ForCodeSize) const override;
bool isLegalICmpImmediate(int64_t Imm) const override;
@@ -530,11 +549,15 @@ private:
// Implement LowerOperation for individual opcodes.
SDValue getVectorCmp(SelectionDAG &DAG, unsigned Opcode,
const SDLoc &DL, EVT VT,
- SDValue CmpOp0, SDValue CmpOp1) const;
+ SDValue CmpOp0, SDValue CmpOp1, SDValue Chain) const;
SDValue lowerVectorSETCC(SelectionDAG &DAG, const SDLoc &DL,
EVT VT, ISD::CondCode CC,
- SDValue CmpOp0, SDValue CmpOp1) const;
+ SDValue CmpOp0, SDValue CmpOp1,
+ SDValue Chain = SDValue(),
+ bool IsSignaling = false) const;
SDValue lowerSETCC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerSTRICT_FSETCC(SDValue Op, SelectionDAG &DAG,
+ bool IsSignaling) const;
SDValue lowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerGlobalAddress(GlobalAddressSDNode *Node,
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrFP.td b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrFP.td
index 9c95e8aec940..6d03274fe8a6 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrFP.td
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrFP.td
@@ -221,13 +221,13 @@ let Predicates = [FeatureNoVectorEnhancements1] in {
// Convert a signed integer register value to a floating-point one.
let Uses = [FPC], mayRaiseFPException = 1 in {
- def CEFBR : UnaryRRE<"cefbr", 0xB394, sint_to_fp, FP32, GR32>;
- def CDFBR : UnaryRRE<"cdfbr", 0xB395, sint_to_fp, FP64, GR32>;
- def CXFBR : UnaryRRE<"cxfbr", 0xB396, sint_to_fp, FP128, GR32>;
+ def CEFBR : UnaryRRE<"cefbr", 0xB394, any_sint_to_fp, FP32, GR32>;
+ def CDFBR : UnaryRRE<"cdfbr", 0xB395, any_sint_to_fp, FP64, GR32>;
+ def CXFBR : UnaryRRE<"cxfbr", 0xB396, any_sint_to_fp, FP128, GR32>;
- def CEGBR : UnaryRRE<"cegbr", 0xB3A4, sint_to_fp, FP32, GR64>;
- def CDGBR : UnaryRRE<"cdgbr", 0xB3A5, sint_to_fp, FP64, GR64>;
- def CXGBR : UnaryRRE<"cxgbr", 0xB3A6, sint_to_fp, FP128, GR64>;
+ def CEGBR : UnaryRRE<"cegbr", 0xB3A4, any_sint_to_fp, FP32, GR64>;
+ def CDGBR : UnaryRRE<"cdgbr", 0xB3A5, any_sint_to_fp, FP64, GR64>;
+ def CXGBR : UnaryRRE<"cxgbr", 0xB3A6, any_sint_to_fp, FP128, GR64>;
}
// The FP extension feature provides versions of the above that allow
@@ -254,13 +254,13 @@ let Predicates = [FeatureFPExtension] in {
def CXLGBR : TernaryRRFe<"cxlgbr", 0xB3A2, FP128, GR64>;
}
- def : Pat<(f32 (uint_to_fp GR32:$src)), (CELFBR 0, GR32:$src, 0)>;
- def : Pat<(f64 (uint_to_fp GR32:$src)), (CDLFBR 0, GR32:$src, 0)>;
- def : Pat<(f128 (uint_to_fp GR32:$src)), (CXLFBR 0, GR32:$src, 0)>;
+ def : Pat<(f32 (any_uint_to_fp GR32:$src)), (CELFBR 0, GR32:$src, 0)>;
+ def : Pat<(f64 (any_uint_to_fp GR32:$src)), (CDLFBR 0, GR32:$src, 0)>;
+ def : Pat<(f128 (any_uint_to_fp GR32:$src)), (CXLFBR 0, GR32:$src, 0)>;
- def : Pat<(f32 (uint_to_fp GR64:$src)), (CELGBR 0, GR64:$src, 0)>;
- def : Pat<(f64 (uint_to_fp GR64:$src)), (CDLGBR 0, GR64:$src, 0)>;
- def : Pat<(f128 (uint_to_fp GR64:$src)), (CXLGBR 0, GR64:$src, 0)>;
+ def : Pat<(f32 (any_uint_to_fp GR64:$src)), (CELGBR 0, GR64:$src, 0)>;
+ def : Pat<(f64 (any_uint_to_fp GR64:$src)), (CDLGBR 0, GR64:$src, 0)>;
+ def : Pat<(f128 (any_uint_to_fp GR64:$src)), (CXLGBR 0, GR64:$src, 0)>;
}
// Convert a floating-point register value to a signed integer value,
@@ -467,16 +467,16 @@ let Uses = [FPC], mayRaiseFPException = 1 in {
// f64 multiplication of two FP32 registers.
let Uses = [FPC], mayRaiseFPException = 1 in
def MDEBR : BinaryRRE<"mdebr", 0xB30C, null_frag, FP64, FP32>;
-def : Pat<(any_fmul (f64 (fpextend FP32:$src1)),
- (f64 (fpextend FP32:$src2))),
+def : Pat<(any_fmul (f64 (any_fpextend FP32:$src1)),
+ (f64 (any_fpextend FP32:$src2))),
(MDEBR (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
FP32:$src1, subreg_h32), FP32:$src2)>;
// f64 multiplication of an FP32 register and an f32 memory.
let Uses = [FPC], mayRaiseFPException = 1 in
def MDEB : BinaryRXE<"mdeb", 0xED0C, null_frag, FP64, load, 4>;
-def : Pat<(any_fmul (f64 (fpextend FP32:$src1)),
- (f64 (extloadf32 bdxaddr12only:$addr))),
+def : Pat<(any_fmul (f64 (any_fpextend FP32:$src1)),
+ (f64 (any_extloadf32 bdxaddr12only:$addr))),
(MDEB (INSERT_SUBREG (f64 (IMPLICIT_DEF)), FP32:$src1, subreg_h32),
bdxaddr12only:$addr)>;
@@ -484,8 +484,8 @@ def : Pat<(any_fmul (f64 (fpextend FP32:$src1)),
let Uses = [FPC], mayRaiseFPException = 1 in
def MXDBR : BinaryRRE<"mxdbr", 0xB307, null_frag, FP128, FP64>;
let Predicates = [FeatureNoVectorEnhancements1] in
- def : Pat<(any_fmul (f128 (fpextend FP64:$src1)),
- (f128 (fpextend FP64:$src2))),
+ def : Pat<(any_fmul (f128 (any_fpextend FP64:$src1)),
+ (f128 (any_fpextend FP64:$src2))),
(MXDBR (INSERT_SUBREG (f128 (IMPLICIT_DEF)),
FP64:$src1, subreg_h64), FP64:$src2)>;
@@ -493,8 +493,8 @@ let Predicates = [FeatureNoVectorEnhancements1] in
let Uses = [FPC], mayRaiseFPException = 1 in
def MXDB : BinaryRXE<"mxdb", 0xED07, null_frag, FP128, load, 8>;
let Predicates = [FeatureNoVectorEnhancements1] in
- def : Pat<(any_fmul (f128 (fpextend FP64:$src1)),
- (f128 (extloadf64 bdxaddr12only:$addr))),
+ def : Pat<(any_fmul (f128 (any_fpextend FP64:$src1)),
+ (f128 (any_extloadf64 bdxaddr12only:$addr))),
(MXDB (INSERT_SUBREG (f128 (IMPLICIT_DEF)), FP64:$src1, subreg_h64),
bdxaddr12only:$addr)>;
@@ -537,19 +537,19 @@ let Uses = [FPC], mayRaiseFPException = 1, Defs = [CC] in {
//===----------------------------------------------------------------------===//
let Uses = [FPC], mayRaiseFPException = 1, Defs = [CC], CCValues = 0xF in {
- def CEBR : CompareRRE<"cebr", 0xB309, z_fcmp, FP32, FP32>;
- def CDBR : CompareRRE<"cdbr", 0xB319, z_fcmp, FP64, FP64>;
- def CXBR : CompareRRE<"cxbr", 0xB349, z_fcmp, FP128, FP128>;
+ def CEBR : CompareRRE<"cebr", 0xB309, z_any_fcmp, FP32, FP32>;
+ def CDBR : CompareRRE<"cdbr", 0xB319, z_any_fcmp, FP64, FP64>;
+ def CXBR : CompareRRE<"cxbr", 0xB349, z_any_fcmp, FP128, FP128>;
- def CEB : CompareRXE<"ceb", 0xED09, z_fcmp, FP32, load, 4>;
- def CDB : CompareRXE<"cdb", 0xED19, z_fcmp, FP64, load, 8>;
+ def CEB : CompareRXE<"ceb", 0xED09, z_any_fcmp, FP32, load, 4>;
+ def CDB : CompareRXE<"cdb", 0xED19, z_any_fcmp, FP64, load, 8>;
- def KEBR : CompareRRE<"kebr", 0xB308, null_frag, FP32, FP32>;
- def KDBR : CompareRRE<"kdbr", 0xB318, null_frag, FP64, FP64>;
- def KXBR : CompareRRE<"kxbr", 0xB348, null_frag, FP128, FP128>;
+ def KEBR : CompareRRE<"kebr", 0xB308, z_strict_fcmps, FP32, FP32>;
+ def KDBR : CompareRRE<"kdbr", 0xB318, z_strict_fcmps, FP64, FP64>;
+ def KXBR : CompareRRE<"kxbr", 0xB348, z_strict_fcmps, FP128, FP128>;
- def KEB : CompareRXE<"keb", 0xED08, null_frag, FP32, load, 4>;
- def KDB : CompareRXE<"kdb", 0xED18, null_frag, FP64, load, 8>;
+ def KEB : CompareRXE<"keb", 0xED08, z_strict_fcmps, FP32, load, 4>;
+ def KDB : CompareRXE<"kdb", 0xED18, z_strict_fcmps, FP64, load, 8>;
}
// Test Data Class.
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrFormats.td b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
index c9dbe3da686d..f064d33ac2f3 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
@@ -75,8 +75,9 @@ class InstSystemZ<int size, dag outs, dag ins, string asmstr,
// SystemZ::CCMASK_*.
bits<4> CCValues = 0;
- // The subset of CCValues that have the same meaning as they would after
- // a comparison of the first operand against zero.
+ // The subset of CCValues that have the same meaning as they would after a
+ // comparison of the first operand against zero. "Logical" instructions
+ // leave this blank as they set CC in a different way.
bits<4> CompareZeroCCMask = 0;
// True if the instruction is conditional and if the CC mask operand
@@ -87,9 +88,16 @@ class InstSystemZ<int size, dag outs, dag ins, string asmstr,
bit CCMaskLast = 0;
// True if the instruction is the "logical" rather than "arithmetic" form,
- // in cases where a distinction exists.
+ // in cases where a distinction exists. Except for logical compares, if the
+ // instruction sets this flag along with a non-zero CCValues field, it is
+ // assumed to set CC to either CCMASK_LOGICAL_ZERO or
+ // CCMASK_LOGICAL_NONZERO.
bit IsLogical = 0;
+ // True if the (add or sub) instruction sets CC like a compare of the
+ // result against zero, but only if the 'nsw' flag is set.
+ bit CCIfNoSignedWrap = 0;
+
let TSFlags{0} = SimpleBDXLoad;
let TSFlags{1} = SimpleBDXStore;
let TSFlags{2} = Has20BitOffset;
@@ -101,6 +109,7 @@ class InstSystemZ<int size, dag outs, dag ins, string asmstr,
let TSFlags{18} = CCMaskFirst;
let TSFlags{19} = CCMaskLast;
let TSFlags{20} = IsLogical;
+ let TSFlags{21} = CCIfNoSignedWrap;
}
//===----------------------------------------------------------------------===//
@@ -3200,6 +3209,8 @@ class CondBinaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1,
let Constraints = "$R1 = $R1src";
let DisableEncoding = "$R1src";
let CCMaskLast = 1;
+ let NumOpsKey = !subst("loc", "sel", mnemonic);
+ let NumOpsValue = "2";
}
// Like CondBinaryRRF, but used for the raw assembly form. The condition-code
@@ -3239,6 +3250,8 @@ class CondBinaryRRFa<string mnemonic, bits<16> opcode, RegisterOperand cls1,
[(set cls1:$R1, (z_select_ccmask cls2:$R2, cls3:$R3,
cond4:$valid, cond4:$M4))]> {
let CCMaskLast = 1;
+ let NumOpsKey = mnemonic;
+ let NumOpsValue = "3";
}
// Like CondBinaryRRFa, but used for the raw assembly form. The condition-code
@@ -4789,7 +4802,8 @@ class TestBinarySILPseudo<SDPatternOperator operator, ImmOpWithPattern imm>
// Like CondBinaryRRF, but expanded after RA depending on the choice of
// register.
-class CondBinaryRRFPseudo<RegisterOperand cls1, RegisterOperand cls2>
+class CondBinaryRRFPseudo<string mnemonic, RegisterOperand cls1,
+ RegisterOperand cls2>
: Pseudo<(outs cls1:$R1),
(ins cls1:$R1src, cls2:$R2, cond4:$valid, cond4:$M3),
[(set cls1:$R1, (z_select_ccmask cls2:$R2, cls1:$R1src,
@@ -4797,17 +4811,21 @@ class CondBinaryRRFPseudo<RegisterOperand cls1, RegisterOperand cls2>
let Constraints = "$R1 = $R1src";
let DisableEncoding = "$R1src";
let CCMaskLast = 1;
+ let NumOpsKey = !subst("loc", "sel", mnemonic);
+ let NumOpsValue = "2";
}
// Like CondBinaryRRFa, but expanded after RA depending on the choice of
// register.
-class CondBinaryRRFaPseudo<RegisterOperand cls1, RegisterOperand cls2,
- RegisterOperand cls3>
+class CondBinaryRRFaPseudo<string mnemonic, RegisterOperand cls1,
+ RegisterOperand cls2, RegisterOperand cls3>
: Pseudo<(outs cls1:$R1),
(ins cls3:$R3, cls2:$R2, cond4:$valid, cond4:$M4),
[(set cls1:$R1, (z_select_ccmask cls2:$R2, cls3:$R3,
cond4:$valid, cond4:$M4))]> {
let CCMaskLast = 1;
+ let NumOpsKey = mnemonic;
+ let NumOpsValue = "3";
}
// Like CondBinaryRIE, but expanded after RA depending on the choice of
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
index bc783608d45b..97c8fa7aa32e 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -765,8 +765,8 @@ bool SystemZInstrInfo::PredicateInstruction(
void SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
- const DebugLoc &DL, unsigned DestReg,
- unsigned SrcReg, bool KillSrc) const {
+ const DebugLoc &DL, MCRegister DestReg,
+ MCRegister SrcReg, bool KillSrc) const {
// Split 128-bit GPR moves into two 64-bit moves. Add implicit uses of the
// super register in case one of the subregs is undefined.
// This handles ADDR128 too.
@@ -791,12 +791,12 @@ void SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
// Move 128-bit floating-point values between VR128 and FP128.
if (SystemZ::VR128BitRegClass.contains(DestReg) &&
SystemZ::FP128BitRegClass.contains(SrcReg)) {
- unsigned SrcRegHi =
- RI.getMatchingSuperReg(RI.getSubReg(SrcReg, SystemZ::subreg_h64),
- SystemZ::subreg_h64, &SystemZ::VR128BitRegClass);
- unsigned SrcRegLo =
- RI.getMatchingSuperReg(RI.getSubReg(SrcReg, SystemZ::subreg_l64),
- SystemZ::subreg_h64, &SystemZ::VR128BitRegClass);
+ MCRegister SrcRegHi =
+ RI.getMatchingSuperReg(RI.getSubReg(SrcReg, SystemZ::subreg_h64),
+ SystemZ::subreg_h64, &SystemZ::VR128BitRegClass);
+ MCRegister SrcRegLo =
+ RI.getMatchingSuperReg(RI.getSubReg(SrcReg, SystemZ::subreg_l64),
+ SystemZ::subreg_h64, &SystemZ::VR128BitRegClass);
BuildMI(MBB, MBBI, DL, get(SystemZ::VMRHG), DestReg)
.addReg(SrcRegHi, getKillRegState(KillSrc))
@@ -805,12 +805,12 @@ void SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
}
if (SystemZ::FP128BitRegClass.contains(DestReg) &&
SystemZ::VR128BitRegClass.contains(SrcReg)) {
- unsigned DestRegHi =
- RI.getMatchingSuperReg(RI.getSubReg(DestReg, SystemZ::subreg_h64),
- SystemZ::subreg_h64, &SystemZ::VR128BitRegClass);
- unsigned DestRegLo =
- RI.getMatchingSuperReg(RI.getSubReg(DestReg, SystemZ::subreg_l64),
- SystemZ::subreg_h64, &SystemZ::VR128BitRegClass);
+ MCRegister DestRegHi =
+ RI.getMatchingSuperReg(RI.getSubReg(DestReg, SystemZ::subreg_h64),
+ SystemZ::subreg_h64, &SystemZ::VR128BitRegClass);
+ MCRegister DestRegLo =
+ RI.getMatchingSuperReg(RI.getSubReg(DestReg, SystemZ::subreg_l64),
+ SystemZ::subreg_h64, &SystemZ::VR128BitRegClass);
if (DestRegHi != SrcReg)
copyPhysReg(MBB, MBBI, DL, DestRegHi, SrcReg, false);
@@ -945,6 +945,12 @@ static void transferDeadCC(MachineInstr *OldMI, MachineInstr *NewMI) {
}
}
+static void transferMIFlag(MachineInstr *OldMI, MachineInstr *NewMI,
+ MachineInstr::MIFlag Flag) {
+ if (OldMI->getFlag(Flag))
+ NewMI->setFlag(Flag);
+}
+
MachineInstr *SystemZInstrInfo::convertToThreeAddress(
MachineFunction::iterator &MFI, MachineInstr &MI, LiveVariables *LV) const {
MachineBasicBlock *MBB = MI.getParent();
@@ -1050,6 +1056,7 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(
.addImm(0)
.addImm(MI.getOperand(2).getImm());
transferDeadCC(&MI, BuiltMI);
+ transferMIFlag(&MI, BuiltMI, MachineInstr::NoSWrap);
return BuiltMI;
}
@@ -1200,6 +1207,7 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(
if (MemDesc.TSFlags & SystemZII::HasIndex)
MIB.addReg(0);
transferDeadCC(&MI, MIB);
+ transferMIFlag(&MI, MIB, MachineInstr::NoSWrap);
return MIB;
}
}
@@ -1748,6 +1756,28 @@ void SystemZInstrInfo::loadImmediate(MachineBasicBlock &MBB,
BuildMI(MBB, MBBI, DL, get(Opcode), Reg).addImm(Value);
}
+bool SystemZInstrInfo::verifyInstruction(const MachineInstr &MI,
+ StringRef &ErrInfo) const {
+ const MCInstrDesc &MCID = MI.getDesc();
+ for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
+ if (I >= MCID.getNumOperands())
+ break;
+ const MachineOperand &Op = MI.getOperand(I);
+ const MCOperandInfo &MCOI = MCID.OpInfo[I];
+ // Addressing modes have register and immediate operands. Op should be a
+ // register (or frame index) operand if MCOI.RegClass contains a valid
+ // register class, or an immediate otherwise.
+ if (MCOI.OperandType == MCOI::OPERAND_MEMORY &&
+ ((MCOI.RegClass != -1 && !Op.isReg() && !Op.isFI()) ||
+ (MCOI.RegClass == -1 && !Op.isImm()))) {
+ ErrInfo = "Addressing mode operands corrupt!";
+ return false;
+ }
+ }
+
+ return true;
+}
+
bool SystemZInstrInfo::
areMemAccessesTriviallyDisjoint(const MachineInstr &MIa,
const MachineInstr &MIb) const {
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrInfo.h b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
index 6dc6e72aa52a..8391970c7d9d 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
@@ -46,7 +46,8 @@ enum {
CompareZeroCCMaskShift = 14,
CCMaskFirst = (1 << 18),
CCMaskLast = (1 << 19),
- IsLogical = (1 << 20)
+ IsLogical = (1 << 20),
+ CCIfNoSignedWrap = (1 << 21)
};
static inline unsigned getAccessSize(unsigned int Flags) {
@@ -242,7 +243,7 @@ public:
bool PredicateInstruction(MachineInstr &MI,
ArrayRef<MachineOperand> Pred) const override;
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
- const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
+ const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg,
bool KillSrc) const override;
void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
@@ -322,6 +323,10 @@ public:
MachineBasicBlock::iterator MBBI,
unsigned Reg, uint64_t Value) const;
+ // Perform target specific instruction verification.
+ bool verifyInstruction(const MachineInstr &MI,
+ StringRef &ErrInfo) const override;
+
// Sometimes, it is possible for the target to tell, even without
// aliasing information, that two MIs access different memory
// addresses. This function returns true if two MIs access different
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
index 8b334756611a..9579dcc0d1b6 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -492,7 +492,7 @@ let Predicates = [FeatureMiscellaneousExtensions3], Uses = [CC] in {
let isCommutable = 1 in {
// Expands to SELR or SELFHR or a branch-and-move sequence,
// depending on the choice of registers.
- def SELRMux : CondBinaryRRFaPseudo<GRX32, GRX32, GRX32>;
+ def SELRMux : CondBinaryRRFaPseudo<"selrmux", GRX32, GRX32, GRX32>;
defm SELFHR : CondBinaryRRFaPair<"selfhr", 0xB9C0, GRH32, GRH32, GRH32>;
defm SELR : CondBinaryRRFaPair<"selr", 0xB9F0, GR32, GR32, GR32>;
defm SELGR : CondBinaryRRFaPair<"selgr", 0xB9E3, GR64, GR64, GR64>;
@@ -525,7 +525,7 @@ let Predicates = [FeatureLoadStoreOnCond2], Uses = [CC] in {
let isCommutable = 1 in {
// Expands to LOCR or LOCFHR or a branch-and-move sequence,
// depending on the choice of registers.
- def LOCRMux : CondBinaryRRFPseudo<GRX32, GRX32>;
+ def LOCRMux : CondBinaryRRFPseudo<"locrmux", GRX32, GRX32>;
defm LOCFHR : CondBinaryRRFPair<"locfhr", 0xB9E0, GRH32, GRH32>;
}
@@ -915,7 +915,7 @@ def : Pat<(or (zext32 GR32:$src), imm64hf32:$imm),
//===----------------------------------------------------------------------===//
// Addition producing a signed overflow flag.
-let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0x8 in {
+let Defs = [CC], CCValues = 0xF, CCIfNoSignedWrap = 1 in {
// Addition of a register.
let isCommutable = 1 in {
defm AR : BinaryRRAndK<"ar", 0x1A, 0xB9F8, z_sadd, GR32, GR32>;
@@ -957,7 +957,7 @@ let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0x8 in {
defm : SXB<z_sadd, GR64, AGFR>;
// Addition producing a carry.
-let Defs = [CC] in {
+let Defs = [CC], CCValues = 0xF, IsLogical = 1 in {
// Addition of a register.
let isCommutable = 1 in {
defm ALR : BinaryRRAndK<"alr", 0x1E, 0xB9FA, z_uadd, GR32, GR32>;
@@ -997,7 +997,7 @@ let Defs = [CC] in {
defm : ZXB<z_uadd, GR64, ALGFR>;
// Addition producing and using a carry.
-let Defs = [CC], Uses = [CC] in {
+let Defs = [CC], Uses = [CC], CCValues = 0xF, IsLogical = 1 in {
// Addition of a register.
def ALCR : BinaryRRE<"alcr", 0xB998, z_addcarry, GR32, GR32>;
def ALCGR : BinaryRRE<"alcgr", 0xB988, z_addcarry, GR64, GR64>;
@@ -1017,7 +1017,8 @@ def ALSIHN : BinaryRIL<"alsihn", 0xCCB, null_frag, GRH32, simm32>,
//===----------------------------------------------------------------------===//
// Subtraction producing a signed overflow flag.
-let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0x8 in {
+let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0x8,
+ CCIfNoSignedWrap = 1 in {
// Subtraction of a register.
defm SR : BinaryRRAndK<"sr", 0x1B, 0xB9F9, z_ssub, GR32, GR32>;
def SGFR : BinaryRRE<"sgfr", 0xB919, null_frag, GR64, GR32>;
@@ -1066,7 +1067,7 @@ def : Pat<(z_saddo GR64:$src1, imm64lf32n:$src2),
(SGR GR64:$src1, (LLILF imm64lf32n:$src2))>;
// Subtraction producing a carry.
-let Defs = [CC] in {
+let Defs = [CC], CCValues = 0x7, IsLogical = 1 in {
// Subtraction of a register.
defm SLR : BinaryRRAndK<"slr", 0x1F, 0xB9FB, z_usub, GR32, GR32>;
def SLGFR : BinaryRRE<"slgfr", 0xB91B, null_frag, GR64, GR32>;
@@ -1104,7 +1105,7 @@ def : Pat<(add GR64:$src1, imm64zx32n:$src2),
(SLGFI GR64:$src1, imm64zx32n:$src2)>;
// Subtraction producing and using a carry.
-let Defs = [CC], Uses = [CC] in {
+let Defs = [CC], Uses = [CC], CCValues = 0xF, IsLogical = 1 in {
// Subtraction of a register.
def SLBR : BinaryRRE<"slbr", 0xB999, z_subcarry, GR32, GR32>;
def SLBGR : BinaryRRE<"slbgr", 0xB989, z_subcarry, GR64, GR64>;
@@ -2069,7 +2070,7 @@ let Predicates = [FeatureProcessorAssist] in {
def PPA : SideEffectTernaryRRFc<"ppa", 0xB2E8, GR64, GR64, imm32zx4>;
def : Pat<(int_s390_ppa_txassist GR32:$src),
(PPA (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, subreg_l32),
- 0, 1)>;
+ zero_reg, 1)>;
}
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrVector.td b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrVector.td
index 02364bbda5c1..c945122ee577 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrVector.td
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrVector.td
@@ -1034,7 +1034,7 @@ let Predicates = [FeatureVector] in {
def VCDGB : TernaryVRRa<"vcdgb", 0xE7C3, null_frag, v128db, v128g, 3, 0>;
def WCDGB : TernaryVRRa<"wcdgb", 0xE7C3, null_frag, v64db, v64g, 3, 8>;
}
- def : FPConversion<VCDGB, sint_to_fp, v128db, v128g, 0, 0>;
+ def : FPConversion<VCDGB, any_sint_to_fp, v128db, v128g, 0, 0>;
let Predicates = [FeatureVectorEnhancements2] in {
let Uses = [FPC], mayRaiseFPException = 1 in {
let isAsmParserOnly = 1 in
@@ -1042,7 +1042,7 @@ let Predicates = [FeatureVector] in {
def VCEFB : TernaryVRRa<"vcefb", 0xE7C3, null_frag, v128sb, v128g, 2, 0>;
def WCEFB : TernaryVRRa<"wcefb", 0xE7C3, null_frag, v32sb, v32f, 2, 8>;
}
- def : FPConversion<VCEFB, sint_to_fp, v128sb, v128f, 0, 0>;
+ def : FPConversion<VCEFB, any_sint_to_fp, v128sb, v128f, 0, 0>;
}
// Convert from logical.
@@ -1051,7 +1051,7 @@ let Predicates = [FeatureVector] in {
def VCDLGB : TernaryVRRa<"vcdlgb", 0xE7C1, null_frag, v128db, v128g, 3, 0>;
def WCDLGB : TernaryVRRa<"wcdlgb", 0xE7C1, null_frag, v64db, v64g, 3, 8>;
}
- def : FPConversion<VCDLGB, uint_to_fp, v128db, v128g, 0, 0>;
+ def : FPConversion<VCDLGB, any_uint_to_fp, v128db, v128g, 0, 0>;
let Predicates = [FeatureVectorEnhancements2] in {
let Uses = [FPC], mayRaiseFPException = 1 in {
let isAsmParserOnly = 1 in
@@ -1059,7 +1059,7 @@ let Predicates = [FeatureVector] in {
def VCELFB : TernaryVRRa<"vcelfb", 0xE7C1, null_frag, v128sb, v128g, 2, 0>;
def WCELFB : TernaryVRRa<"wcelfb", 0xE7C1, null_frag, v32sb, v32f, 2, 8>;
}
- def : FPConversion<VCELFB, uint_to_fp, v128sb, v128f, 0, 0>;
+ def : FPConversion<VCELFB, any_uint_to_fp, v128sb, v128f, 0, 0>;
}
// Convert to fixed.
@@ -1134,7 +1134,7 @@ let Predicates = [FeatureVector] in {
// Load lengthened.
let Uses = [FPC], mayRaiseFPException = 1 in {
def VLDE : UnaryVRRaFloatGeneric<"vlde", 0xE7C4>;
- def VLDEB : UnaryVRRa<"vldeb", 0xE7C4, z_vextend, v128db, v128sb, 2, 0>;
+ def VLDEB : UnaryVRRa<"vldeb", 0xE7C4, z_any_vextend, v128db, v128sb, 2, 0>;
def WLDEB : UnaryVRRa<"wldeb", 0xE7C4, any_fpextend, v64db, v32sb, 2, 8>;
}
let Predicates = [FeatureVectorEnhancements1] in {
@@ -1156,7 +1156,7 @@ let Predicates = [FeatureVector] in {
def VLEDB : TernaryVRRa<"vledb", 0xE7C5, null_frag, v128sb, v128db, 3, 0>;
def WLEDB : TernaryVRRa<"wledb", 0xE7C5, null_frag, v32sb, v64db, 3, 8>;
}
- def : Pat<(v4f32 (z_vround (v2f64 VR128:$src))), (VLEDB VR128:$src, 0, 0)>;
+ def : Pat<(v4f32 (z_any_vround (v2f64 VR128:$src))), (VLEDB VR128:$src, 0, 0)>;
def : FPConversion<WLEDB, any_fpround, v32sb, v64db, 0, 0>;
let Predicates = [FeatureVectorEnhancements1] in {
let Uses = [FPC], mayRaiseFPException = 1 in {
@@ -1175,7 +1175,7 @@ let Predicates = [FeatureVector] in {
// Maximum.
multiclass VectorMax<Instruction insn, TypedReg tr> {
def : FPMinMax<insn, any_fmaxnum, tr, 4>;
- def : FPMinMax<insn, fmaximum, tr, 1>;
+ def : FPMinMax<insn, any_fmaximum, tr, 1>;
}
let Predicates = [FeatureVectorEnhancements1] in {
let Uses = [FPC], mayRaiseFPException = 1 in {
@@ -1201,7 +1201,7 @@ let Predicates = [FeatureVector] in {
// Minimum.
multiclass VectorMin<Instruction insn, TypedReg tr> {
def : FPMinMax<insn, any_fminnum, tr, 4>;
- def : FPMinMax<insn, fminimum, tr, 1>;
+ def : FPMinMax<insn, any_fminimum, tr, 1>;
}
let Predicates = [FeatureVectorEnhancements1] in {
let Uses = [FPC], mayRaiseFPException = 1 in {
@@ -1364,32 +1364,32 @@ let Predicates = [FeatureVector] in {
// Compare scalar.
let Uses = [FPC], mayRaiseFPException = 1, Defs = [CC] in {
def WFC : CompareVRRaFloatGeneric<"wfc", 0xE7CB>;
- def WFCDB : CompareVRRa<"wfcdb", 0xE7CB, z_fcmp, v64db, 3>;
+ def WFCDB : CompareVRRa<"wfcdb", 0xE7CB, z_any_fcmp, v64db, 3>;
let Predicates = [FeatureVectorEnhancements1] in {
- def WFCSB : CompareVRRa<"wfcsb", 0xE7CB, z_fcmp, v32sb, 2>;
- def WFCXB : CompareVRRa<"wfcxb", 0xE7CB, z_fcmp, v128xb, 4>;
+ def WFCSB : CompareVRRa<"wfcsb", 0xE7CB, z_any_fcmp, v32sb, 2>;
+ def WFCXB : CompareVRRa<"wfcxb", 0xE7CB, z_any_fcmp, v128xb, 4>;
}
}
// Compare and signal scalar.
let Uses = [FPC], mayRaiseFPException = 1, Defs = [CC] in {
def WFK : CompareVRRaFloatGeneric<"wfk", 0xE7CA>;
- def WFKDB : CompareVRRa<"wfkdb", 0xE7CA, null_frag, v64db, 3>;
+ def WFKDB : CompareVRRa<"wfkdb", 0xE7CA, z_strict_fcmps, v64db, 3>;
let Predicates = [FeatureVectorEnhancements1] in {
- def WFKSB : CompareVRRa<"wfksb", 0xE7CA, null_frag, v32sb, 2>;
- def WFKXB : CompareVRRa<"wfkxb", 0xE7CA, null_frag, v128xb, 4>;
+ def WFKSB : CompareVRRa<"wfksb", 0xE7CA, z_strict_fcmps, v32sb, 2>;
+ def WFKXB : CompareVRRa<"wfkxb", 0xE7CA, z_strict_fcmps, v128xb, 4>;
}
}
// Compare equal.
let Uses = [FPC], mayRaiseFPException = 1 in {
def VFCE : BinaryVRRcSPairFloatGeneric<"vfce", 0xE7E8>;
- defm VFCEDB : BinaryVRRcSPair<"vfcedb", 0xE7E8, z_vfcmpe, z_vfcmpes,
+ defm VFCEDB : BinaryVRRcSPair<"vfcedb", 0xE7E8, z_any_vfcmpe, z_vfcmpes,
v128g, v128db, 3, 0>;
defm WFCEDB : BinaryVRRcSPair<"wfcedb", 0xE7E8, null_frag, null_frag,
v64g, v64db, 3, 8>;
let Predicates = [FeatureVectorEnhancements1] in {
- defm VFCESB : BinaryVRRcSPair<"vfcesb", 0xE7E8, z_vfcmpe, z_vfcmpes,
+ defm VFCESB : BinaryVRRcSPair<"vfcesb", 0xE7E8, z_any_vfcmpe, z_vfcmpes,
v128f, v128sb, 2, 0>;
defm WFCESB : BinaryVRRcSPair<"wfcesb", 0xE7E8, null_frag, null_frag,
v32f, v32sb, 2, 8>;
@@ -1401,11 +1401,11 @@ let Predicates = [FeatureVector] in {
// Compare and signal equal.
let Uses = [FPC], mayRaiseFPException = 1,
Predicates = [FeatureVectorEnhancements1] in {
- defm VFKEDB : BinaryVRRcSPair<"vfkedb", 0xE7E8, null_frag, null_frag,
+ defm VFKEDB : BinaryVRRcSPair<"vfkedb", 0xE7E8, z_strict_vfcmpes, null_frag,
v128g, v128db, 3, 4>;
defm WFKEDB : BinaryVRRcSPair<"wfkedb", 0xE7E8, null_frag, null_frag,
v64g, v64db, 3, 12>;
- defm VFKESB : BinaryVRRcSPair<"vfkesb", 0xE7E8, null_frag, null_frag,
+ defm VFKESB : BinaryVRRcSPair<"vfkesb", 0xE7E8, z_strict_vfcmpes, null_frag,
v128f, v128sb, 2, 4>;
defm WFKESB : BinaryVRRcSPair<"wfkesb", 0xE7E8, null_frag, null_frag,
v32f, v32sb, 2, 12>;
@@ -1416,12 +1416,12 @@ let Predicates = [FeatureVector] in {
// Compare high.
let Uses = [FPC], mayRaiseFPException = 1 in {
def VFCH : BinaryVRRcSPairFloatGeneric<"vfch", 0xE7EB>;
- defm VFCHDB : BinaryVRRcSPair<"vfchdb", 0xE7EB, z_vfcmph, z_vfcmphs,
+ defm VFCHDB : BinaryVRRcSPair<"vfchdb", 0xE7EB, z_any_vfcmph, z_vfcmphs,
v128g, v128db, 3, 0>;
defm WFCHDB : BinaryVRRcSPair<"wfchdb", 0xE7EB, null_frag, null_frag,
v64g, v64db, 3, 8>;
let Predicates = [FeatureVectorEnhancements1] in {
- defm VFCHSB : BinaryVRRcSPair<"vfchsb", 0xE7EB, z_vfcmph, z_vfcmphs,
+ defm VFCHSB : BinaryVRRcSPair<"vfchsb", 0xE7EB, z_any_vfcmph, z_vfcmphs,
v128f, v128sb, 2, 0>;
defm WFCHSB : BinaryVRRcSPair<"wfchsb", 0xE7EB, null_frag, null_frag,
v32f, v32sb, 2, 8>;
@@ -1433,11 +1433,11 @@ let Predicates = [FeatureVector] in {
// Compare and signal high.
let Uses = [FPC], mayRaiseFPException = 1,
Predicates = [FeatureVectorEnhancements1] in {
- defm VFKHDB : BinaryVRRcSPair<"vfkhdb", 0xE7EB, null_frag, null_frag,
+ defm VFKHDB : BinaryVRRcSPair<"vfkhdb", 0xE7EB, z_strict_vfcmphs, null_frag,
v128g, v128db, 3, 4>;
defm WFKHDB : BinaryVRRcSPair<"wfkhdb", 0xE7EB, null_frag, null_frag,
v64g, v64db, 3, 12>;
- defm VFKHSB : BinaryVRRcSPair<"vfkhsb", 0xE7EB, null_frag, null_frag,
+ defm VFKHSB : BinaryVRRcSPair<"vfkhsb", 0xE7EB, z_strict_vfcmphs, null_frag,
v128f, v128sb, 2, 4>;
defm WFKHSB : BinaryVRRcSPair<"wfkhsb", 0xE7EB, null_frag, null_frag,
v32f, v32sb, 2, 12>;
@@ -1448,12 +1448,12 @@ let Predicates = [FeatureVector] in {
// Compare high or equal.
let Uses = [FPC], mayRaiseFPException = 1 in {
def VFCHE : BinaryVRRcSPairFloatGeneric<"vfche", 0xE7EA>;
- defm VFCHEDB : BinaryVRRcSPair<"vfchedb", 0xE7EA, z_vfcmphe, z_vfcmphes,
+ defm VFCHEDB : BinaryVRRcSPair<"vfchedb", 0xE7EA, z_any_vfcmphe, z_vfcmphes,
v128g, v128db, 3, 0>;
defm WFCHEDB : BinaryVRRcSPair<"wfchedb", 0xE7EA, null_frag, null_frag,
v64g, v64db, 3, 8>;
let Predicates = [FeatureVectorEnhancements1] in {
- defm VFCHESB : BinaryVRRcSPair<"vfchesb", 0xE7EA, z_vfcmphe, z_vfcmphes,
+ defm VFCHESB : BinaryVRRcSPair<"vfchesb", 0xE7EA, z_any_vfcmphe, z_vfcmphes,
v128f, v128sb, 2, 0>;
defm WFCHESB : BinaryVRRcSPair<"wfchesb", 0xE7EA, null_frag, null_frag,
v32f, v32sb, 2, 8>;
@@ -1465,11 +1465,11 @@ let Predicates = [FeatureVector] in {
// Compare and signal high or equal.
let Uses = [FPC], mayRaiseFPException = 1,
Predicates = [FeatureVectorEnhancements1] in {
- defm VFKHEDB : BinaryVRRcSPair<"vfkhedb", 0xE7EA, null_frag, null_frag,
+ defm VFKHEDB : BinaryVRRcSPair<"vfkhedb", 0xE7EA, z_strict_vfcmphes, null_frag,
v128g, v128db, 3, 4>;
defm WFKHEDB : BinaryVRRcSPair<"wfkhedb", 0xE7EA, null_frag, null_frag,
v64g, v64db, 3, 12>;
- defm VFKHESB : BinaryVRRcSPair<"vfkhesb", 0xE7EA, null_frag, null_frag,
+ defm VFKHESB : BinaryVRRcSPair<"vfkhesb", 0xE7EA, z_strict_vfcmphes, null_frag,
v128f, v128sb, 2, 4>;
defm WFKHESB : BinaryVRRcSPair<"wfkhesb", 0xE7EA, null_frag, null_frag,
v32f, v32sb, 2, 12>;
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp
index 724111229569..b1964321c78a 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp
@@ -162,7 +162,7 @@ private:
void relaxBranches();
const SystemZInstrInfo *TII = nullptr;
- MachineFunction *MF;
+ MachineFunction *MF = nullptr;
SmallVector<MBBInfo, 16> MBBs;
SmallVector<TerminatorInfo, 16> Terminators;
};
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
index 9eec3f37bc28..d1f6511ceea3 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
@@ -13,10 +13,22 @@
namespace llvm {
+namespace SystemZ {
+// A struct to hold the low and high GPR registers to be saved/restored as
+// well as the offset into the register save area of the low register.
+struct GPRRegs {
+ unsigned LowGPR;
+ unsigned HighGPR;
+ unsigned GPROffset;
+ GPRRegs() : LowGPR(0), HighGPR(0), GPROffset(0) {}
+ };
+}
+
class SystemZMachineFunctionInfo : public MachineFunctionInfo {
virtual void anchor();
- unsigned LowSavedGPR;
- unsigned HighSavedGPR;
+
+ SystemZ::GPRRegs SpillGPRRegs;
+ SystemZ::GPRRegs RestoreGPRRegs;
unsigned VarArgsFirstGPR;
unsigned VarArgsFirstFPR;
unsigned VarArgsFrameIndex;
@@ -27,19 +39,29 @@ class SystemZMachineFunctionInfo : public MachineFunctionInfo {
public:
explicit SystemZMachineFunctionInfo(MachineFunction &MF)
- : LowSavedGPR(0), HighSavedGPR(0), VarArgsFirstGPR(0), VarArgsFirstFPR(0),
- VarArgsFrameIndex(0), RegSaveFrameIndex(0), FramePointerSaveIndex(0),
- ManipulatesSP(false), NumLocalDynamics(0) {}
-
- // Get and set the first call-saved GPR that should be saved and restored
- // by this function. This is 0 if no GPRs need to be saved or restored.
- unsigned getLowSavedGPR() const { return LowSavedGPR; }
- void setLowSavedGPR(unsigned Reg) { LowSavedGPR = Reg; }
-
- // Get and set the last call-saved GPR that should be saved and restored
- // by this function.
- unsigned getHighSavedGPR() const { return HighSavedGPR; }
- void setHighSavedGPR(unsigned Reg) { HighSavedGPR = Reg; }
+ : VarArgsFirstGPR(0), VarArgsFirstFPR(0), VarArgsFrameIndex(0),
+ RegSaveFrameIndex(0), FramePointerSaveIndex(0), ManipulatesSP(false),
+ NumLocalDynamics(0) {}
+
+ // Get and set the first and last call-saved GPR that should be saved by
+ // this function and the SP offset for the STMG. These are 0 if no GPRs
+ // need to be saved or restored.
+ SystemZ::GPRRegs getSpillGPRRegs() const { return SpillGPRRegs; }
+ void setSpillGPRRegs(unsigned Low, unsigned High, unsigned Offs) {
+ SpillGPRRegs.LowGPR = Low;
+ SpillGPRRegs.HighGPR = High;
+ SpillGPRRegs.GPROffset = Offs;
+ }
+
+ // Get and set the first and last call-saved GPR that should be restored by
+ // this function and the SP offset for the LMG. These are 0 if no GPRs
+ // need to be saved or restored.
+ SystemZ::GPRRegs getRestoreGPRRegs() const { return RestoreGPRRegs; }
+ void setRestoreGPRRegs(unsigned Low, unsigned High, unsigned Offs) {
+ RestoreGPRRegs.LowGPR = Low;
+ RestoreGPRRegs.HighGPR = High;
+ RestoreGPRRegs.GPROffset = Offs;
+ }
// Get and set the number of fixed (as opposed to variable) arguments
// that are passed in GPRs to this function.
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZOperands.td b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZOperands.td
index b2bab68a6274..bd40f6d7bf40 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZOperands.td
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZOperands.td
@@ -25,6 +25,7 @@ class ImmediateOp<ValueType vt, string asmop> : Operand<vt> {
let PrintMethod = "print"##asmop##"Operand";
let DecoderMethod = "decode"##asmop##"Operand";
let ParserMatchClass = !cast<AsmOperandClass>(asmop);
+ let OperandType = "OPERAND_IMMEDIATE";
}
class ImmOpWithPattern<ValueType vt, string asmop, code pred, SDNodeXForm xform,
@@ -63,13 +64,15 @@ class PCRelTLSAsmOperand<string size>
// Constructs an operand for a PC-relative address with address type VT.
// ASMOP is the associated asm operand.
-class PCRelOperand<ValueType vt, AsmOperandClass asmop> : Operand<vt> {
- let PrintMethod = "printPCRelOperand";
- let ParserMatchClass = asmop;
-}
-class PCRelTLSOperand<ValueType vt, AsmOperandClass asmop> : Operand<vt> {
- let PrintMethod = "printPCRelTLSOperand";
- let ParserMatchClass = asmop;
+let OperandType = "OPERAND_PCREL" in {
+ class PCRelOperand<ValueType vt, AsmOperandClass asmop> : Operand<vt> {
+ let PrintMethod = "printPCRelOperand";
+ let ParserMatchClass = asmop;
+ }
+ class PCRelTLSOperand<ValueType vt, AsmOperandClass asmop> : Operand<vt> {
+ let PrintMethod = "printPCRelTLSOperand";
+ let ParserMatchClass = asmop;
+ }
}
// Constructs both a DAG pattern and instruction operand for a PC-relative
@@ -105,6 +108,7 @@ class AddressOperand<string bitsize, string dispsize, string length,
let EncoderMethod = "get"##format##dispsize##length##"Encoding";
let DecoderMethod =
"decode"##format##bitsize##"Disp"##dispsize##length##"Operand";
+ let OperandType = "OPERAND_MEMORY";
let MIOperandInfo = operands;
let ParserMatchClass =
!cast<AddressAsmOperand>(format##bitsize##"Disp"##dispsize##length);
@@ -508,7 +512,8 @@ defm imm64zx48 : Immediate<i64, [{
return isUInt<64>(N->getZExtValue());
}], UIMM48, "U48Imm">;
-def imm64 : ImmLeaf<i64, [{}]>, Operand<i64>;
+let OperandType = "OPERAND_IMMEDIATE" in
+ def imm64 : ImmLeaf<i64, [{}]>, Operand<i64>;
//===----------------------------------------------------------------------===//
// Floating-point immediates
@@ -657,4 +662,5 @@ def bdvaddr12only : BDVMode< "64", "12">;
def cond4 : PatLeaf<(i32 timm), [{ return (N->getZExtValue() < 16); }]>,
Operand<i32> {
let PrintMethod = "printCond4Operand";
+ let OperandType = "OPERAND_IMMEDIATE";
}
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZOperators.td b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZOperators.td
index 6fe383e64b74..a6a72903e573 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZOperators.td
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZOperators.td
@@ -258,6 +258,10 @@ def z_pcrel_offset : SDNode<"SystemZISD::PCREL_OFFSET",
def z_iabs : SDNode<"SystemZISD::IABS", SDTIntUnaryOp, []>;
def z_icmp : SDNode<"SystemZISD::ICMP", SDT_ZICmp>;
def z_fcmp : SDNode<"SystemZISD::FCMP", SDT_ZCmp>;
+def z_strict_fcmp : SDNode<"SystemZISD::STRICT_FCMP", SDT_ZCmp,
+ [SDNPHasChain]>;
+def z_strict_fcmps : SDNode<"SystemZISD::STRICT_FCMPS", SDT_ZCmp,
+ [SDNPHasChain]>;
def z_tm : SDNode<"SystemZISD::TM", SDT_ZICmp>;
def z_br_ccmask_1 : SDNode<"SystemZISD::BR_CCMASK", SDT_ZBRCCMask,
[SDNPHasChain]>;
@@ -328,13 +332,29 @@ def z_vicmpes : SDNode<"SystemZISD::VICMPES", SDT_ZVecBinaryCC>;
def z_vicmphs : SDNode<"SystemZISD::VICMPHS", SDT_ZVecBinaryCC>;
def z_vicmphls : SDNode<"SystemZISD::VICMPHLS", SDT_ZVecBinaryCC>;
def z_vfcmpe : SDNode<"SystemZISD::VFCMPE", SDT_ZVecBinaryConv>;
+def z_strict_vfcmpe : SDNode<"SystemZISD::STRICT_VFCMPE",
+ SDT_ZVecBinaryConv, [SDNPHasChain]>;
+def z_strict_vfcmpes : SDNode<"SystemZISD::STRICT_VFCMPES",
+ SDT_ZVecBinaryConv, [SDNPHasChain]>;
def z_vfcmph : SDNode<"SystemZISD::VFCMPH", SDT_ZVecBinaryConv>;
+def z_strict_vfcmph : SDNode<"SystemZISD::STRICT_VFCMPH",
+ SDT_ZVecBinaryConv, [SDNPHasChain]>;
+def z_strict_vfcmphs : SDNode<"SystemZISD::STRICT_VFCMPHS",
+ SDT_ZVecBinaryConv, [SDNPHasChain]>;
def z_vfcmphe : SDNode<"SystemZISD::VFCMPHE", SDT_ZVecBinaryConv>;
+def z_strict_vfcmphe : SDNode<"SystemZISD::STRICT_VFCMPHE",
+ SDT_ZVecBinaryConv, [SDNPHasChain]>;
+def z_strict_vfcmphes : SDNode<"SystemZISD::STRICT_VFCMPHES",
+ SDT_ZVecBinaryConv, [SDNPHasChain]>;
def z_vfcmpes : SDNode<"SystemZISD::VFCMPES", SDT_ZVecBinaryConvCC>;
def z_vfcmphs : SDNode<"SystemZISD::VFCMPHS", SDT_ZVecBinaryConvCC>;
def z_vfcmphes : SDNode<"SystemZISD::VFCMPHES", SDT_ZVecBinaryConvCC>;
def z_vextend : SDNode<"SystemZISD::VEXTEND", SDT_ZVecUnaryConv>;
+def z_strict_vextend : SDNode<"SystemZISD::STRICT_VEXTEND",
+ SDT_ZVecUnaryConv, [SDNPHasChain]>;
def z_vround : SDNode<"SystemZISD::VROUND", SDT_ZVecUnaryConv>;
+def z_strict_vround : SDNode<"SystemZISD::STRICT_VROUND",
+ SDT_ZVecUnaryConv, [SDNPHasChain]>;
def z_vtm : SDNode<"SystemZISD::VTM", SDT_ZCmp>;
def z_vfae_cc : SDNode<"SystemZISD::VFAE_CC", SDT_ZVecTernaryIntCC>;
def z_vfaez_cc : SDNode<"SystemZISD::VFAEZ_CC", SDT_ZVecTernaryIntCC>;
@@ -707,6 +727,26 @@ def any_fnms : PatFrag<(ops node:$src1, node:$src2, node:$src3),
// Floating-point negative absolute.
def fnabs : PatFrag<(ops node:$ptr), (fneg (fabs node:$ptr))>;
+// Strict floating-point fragments.
+def z_any_fcmp : PatFrags<(ops node:$lhs, node:$rhs),
+ [(z_strict_fcmp node:$lhs, node:$rhs),
+ (z_fcmp node:$lhs, node:$rhs)]>;
+def z_any_vfcmpe : PatFrags<(ops node:$lhs, node:$rhs),
+ [(z_strict_vfcmpe node:$lhs, node:$rhs),
+ (z_vfcmpe node:$lhs, node:$rhs)]>;
+def z_any_vfcmph : PatFrags<(ops node:$lhs, node:$rhs),
+ [(z_strict_vfcmph node:$lhs, node:$rhs),
+ (z_vfcmph node:$lhs, node:$rhs)]>;
+def z_any_vfcmphe : PatFrags<(ops node:$lhs, node:$rhs),
+ [(z_strict_vfcmphe node:$lhs, node:$rhs),
+ (z_vfcmphe node:$lhs, node:$rhs)]>;
+def z_any_vextend : PatFrags<(ops node:$src),
+ [(z_strict_vextend node:$src),
+ (z_vextend node:$src)]>;
+def z_any_vround : PatFrags<(ops node:$src),
+ [(z_strict_vround node:$src),
+ (z_vround node:$src)]>;
+
// Create a unary operator that loads from memory and then performs
// the given operation on it.
class loadu<SDPatternOperator operator, SDPatternOperator load = load>
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZPatterns.td b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZPatterns.td
index 65300fb47627..501a69488397 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZPatterns.td
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZPatterns.td
@@ -148,9 +148,9 @@ multiclass BlockLoadStore<SDPatternOperator load, ValueType vt,
// registers in CLS against zero. The instruction has separate R1 and R2
// operands, but they must be the same when the instruction is used like this.
multiclass CompareZeroFP<Instruction insn, RegisterOperand cls> {
- def : Pat<(z_fcmp cls:$reg, (fpimm0)), (insn cls:$reg, cls:$reg)>;
+ def : Pat<(z_any_fcmp cls:$reg, (fpimm0)), (insn cls:$reg, cls:$reg)>;
// The sign of the zero makes no difference.
- def : Pat<(z_fcmp cls:$reg, (fpimmneg0)), (insn cls:$reg, cls:$reg)>;
+ def : Pat<(z_any_fcmp cls:$reg, (fpimmneg0)), (insn cls:$reg, cls:$reg)>;
}
// Use INSN for performing binary operation OPERATION of type VT
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp
index 39ace5594b7f..0d5e7af92523 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp
@@ -87,6 +87,52 @@ SystemZRegisterInfo::getRegAllocationHints(unsigned VirtReg,
bool BaseImplRetVal = TargetRegisterInfo::getRegAllocationHints(
VirtReg, Order, Hints, MF, VRM, Matrix);
+ if (VRM != nullptr) {
+ // Add any two address hints after any copy hints.
+ SmallSet<unsigned, 4> TwoAddrHints;
+ for (auto &Use : MRI->reg_nodbg_instructions(VirtReg))
+ if (SystemZ::getTwoOperandOpcode(Use.getOpcode()) != -1) {
+ const MachineOperand *VRRegMO = nullptr;
+ const MachineOperand *OtherMO = nullptr;
+ const MachineOperand *CommuMO = nullptr;
+ if (VirtReg == Use.getOperand(0).getReg()) {
+ VRRegMO = &Use.getOperand(0);
+ OtherMO = &Use.getOperand(1);
+ if (Use.isCommutable())
+ CommuMO = &Use.getOperand(2);
+ } else if (VirtReg == Use.getOperand(1).getReg()) {
+ VRRegMO = &Use.getOperand(1);
+ OtherMO = &Use.getOperand(0);
+ } else if (VirtReg == Use.getOperand(2).getReg() &&
+ Use.isCommutable()) {
+ VRRegMO = &Use.getOperand(2);
+ OtherMO = &Use.getOperand(0);
+ } else
+ continue;
+
+ auto tryAddHint = [&](const MachineOperand *MO) -> void {
+ Register Reg = MO->getReg();
+ Register PhysReg =
+ Register::isPhysicalRegister(Reg) ? Reg : VRM->getPhys(Reg);
+ if (PhysReg) {
+ if (MO->getSubReg())
+ PhysReg = getSubReg(PhysReg, MO->getSubReg());
+ if (VRRegMO->getSubReg())
+ PhysReg = getMatchingSuperReg(PhysReg, VRRegMO->getSubReg(),
+ MRI->getRegClass(VirtReg));
+ if (!MRI->isReserved(PhysReg) && !is_contained(Hints, PhysReg))
+ TwoAddrHints.insert(PhysReg);
+ }
+ };
+ tryAddHint(OtherMO);
+ if (CommuMO)
+ tryAddHint(CommuMO);
+ }
+ for (MCPhysReg OrderReg : Order)
+ if (TwoAddrHints.count(OrderReg))
+ Hints.push_back(OrderReg);
+ }
+
if (MRI->getRegClass(VirtReg) == &SystemZ::GRX32BitRegClass) {
SmallVector<unsigned, 8> Worklist;
SmallSet<unsigned, 4> DoneRegs;
@@ -143,58 +189,14 @@ SystemZRegisterInfo::getRegAllocationHints(unsigned VirtReg,
}
}
- if (VRM == nullptr)
- return BaseImplRetVal;
-
- // Add any two address hints after any copy hints.
- SmallSet<unsigned, 4> TwoAddrHints;
- for (auto &Use : MRI->reg_nodbg_instructions(VirtReg))
- if (SystemZ::getTwoOperandOpcode(Use.getOpcode()) != -1) {
- const MachineOperand *VRRegMO = nullptr;
- const MachineOperand *OtherMO = nullptr;
- const MachineOperand *CommuMO = nullptr;
- if (VirtReg == Use.getOperand(0).getReg()) {
- VRRegMO = &Use.getOperand(0);
- OtherMO = &Use.getOperand(1);
- if (Use.isCommutable())
- CommuMO = &Use.getOperand(2);
- } else if (VirtReg == Use.getOperand(1).getReg()) {
- VRRegMO = &Use.getOperand(1);
- OtherMO = &Use.getOperand(0);
- } else if (VirtReg == Use.getOperand(2).getReg() && Use.isCommutable()) {
- VRRegMO = &Use.getOperand(2);
- OtherMO = &Use.getOperand(0);
- } else
- continue;
-
- auto tryAddHint = [&](const MachineOperand *MO) -> void {
- Register Reg = MO->getReg();
- Register PhysReg =
- Register::isPhysicalRegister(Reg) ? Reg : VRM->getPhys(Reg);
- if (PhysReg) {
- if (MO->getSubReg())
- PhysReg = getSubReg(PhysReg, MO->getSubReg());
- if (VRRegMO->getSubReg())
- PhysReg = getMatchingSuperReg(PhysReg, VRRegMO->getSubReg(),
- MRI->getRegClass(VirtReg));
- if (!MRI->isReserved(PhysReg) && !is_contained(Hints, PhysReg))
- TwoAddrHints.insert(PhysReg);
- }
- };
- tryAddHint(OtherMO);
- if (CommuMO)
- tryAddHint(CommuMO);
- }
- for (MCPhysReg OrderReg : Order)
- if (TwoAddrHints.count(OrderReg))
- Hints.push_back(OrderReg);
-
return BaseImplRetVal;
}
const MCPhysReg *
SystemZRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
const SystemZSubtarget &Subtarget = MF->getSubtarget<SystemZSubtarget>();
+ if (MF->getFunction().getCallingConv() == CallingConv::GHC)
+ return CSR_SystemZ_NoRegs_SaveList;
if (MF->getFunction().getCallingConv() == CallingConv::AnyReg)
return Subtarget.hasVector()? CSR_SystemZ_AllRegs_Vector_SaveList
: CSR_SystemZ_AllRegs_SaveList;
@@ -209,6 +211,8 @@ const uint32_t *
SystemZRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
CallingConv::ID CC) const {
const SystemZSubtarget &Subtarget = MF.getSubtarget<SystemZSubtarget>();
+ if (CC == CallingConv::GHC)
+ return CSR_SystemZ_NoRegs_RegMask;
if (CC == CallingConv::AnyReg)
return Subtarget.hasVector()? CSR_SystemZ_AllRegs_Vector_RegMask
: CSR_SystemZ_AllRegs_RegMask;
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp
index 2aca22c9082a..f6184cec795a 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp
@@ -46,7 +46,6 @@ private:
bool shortenOn001(MachineInstr &MI, unsigned Opcode);
bool shortenOn001AddCC(MachineInstr &MI, unsigned Opcode);
bool shortenFPConv(MachineInstr &MI, unsigned Opcode);
- bool shortenSelect(MachineInstr &MI, unsigned Opcode);
const SystemZInstrInfo *TII;
const TargetRegisterInfo *TRI;
@@ -176,23 +175,6 @@ bool SystemZShortenInst::shortenFPConv(MachineInstr &MI, unsigned Opcode) {
return false;
}
-// MI is a three-operand select instruction. If one of the sources match
-// the destination, convert to the equivalent load-on-condition.
-bool SystemZShortenInst::shortenSelect(MachineInstr &MI, unsigned Opcode) {
- if (MI.getOperand(0).getReg() == MI.getOperand(1).getReg()) {
- MI.setDesc(TII->get(Opcode));
- MI.tieOperands(0, 1);
- return true;
- }
- if (MI.getOperand(0).getReg() == MI.getOperand(2).getReg()) {
- TII->commuteInstruction(MI, false, 1, 2);
- MI.setDesc(TII->get(Opcode));
- MI.tieOperands(0, 1);
- return true;
- }
- return false;
-}
-
// Process all instructions in MBB. Return true if something changed.
bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) {
bool Changed = false;
@@ -213,18 +195,6 @@ bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) {
Changed |= shortenIIF(MI, SystemZ::LLIHL, SystemZ::LLIHH);
break;
- case SystemZ::SELR:
- Changed |= shortenSelect(MI, SystemZ::LOCR);
- break;
-
- case SystemZ::SELFHR:
- Changed |= shortenSelect(MI, SystemZ::LOCFHR);
- break;
-
- case SystemZ::SELGR:
- Changed |= shortenSelect(MI, SystemZ::LOCGR);
- break;
-
case SystemZ::WFADB:
Changed |= shortenOn001AddCC(MI, SystemZ::ADBR);
break;
@@ -313,6 +283,14 @@ bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) {
Changed |= shortenOn01(MI, SystemZ::CEBR);
break;
+ case SystemZ::WFKDB:
+ Changed |= shortenOn01(MI, SystemZ::KDBR);
+ break;
+
+ case SystemZ::WFKSB:
+ Changed |= shortenOn01(MI, SystemZ::KEBR);
+ break;
+
case SystemZ::VL32:
// For z13 we prefer LDE over LE to avoid partial register dependencies.
Changed |= shortenOn0(MI, SystemZ::LDE32);
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTDC.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTDC.cpp
index 478848c30701..f103812eb096 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTDC.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTDC.cpp
@@ -50,6 +50,7 @@
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/IntrinsicsS390.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Module.h"
#include <deque>
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
index 20865037fe38..dfcdb5356485 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
@@ -29,7 +29,7 @@
using namespace llvm;
-extern "C" void LLVMInitializeSystemZTarget() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSystemZTarget() {
// Register the target.
RegisterTargetMachine<SystemZTargetMachine> X(getTheSystemZTarget());
}
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
index 11c99aa11174..acec3c533585 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
@@ -62,7 +62,7 @@ int SystemZTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
return 4 * TTI::TCC_Basic;
}
-int SystemZTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx,
+int SystemZTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
const APInt &Imm, Type *Ty) {
assert(Ty->isIntegerTy());
@@ -180,8 +180,8 @@ int SystemZTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx,
return SystemZTTIImpl::getIntImmCost(Imm, Ty);
}
-int SystemZTTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
- const APInt &Imm, Type *Ty) {
+int SystemZTTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
+ const APInt &Imm, Type *Ty) {
assert(Ty->isIntegerTy());
unsigned BitSize = Ty->getPrimitiveSizeInBits();
@@ -259,7 +259,7 @@ void SystemZTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
}
if (isa<StoreInst>(&I)) {
Type *MemAccessTy = I.getOperand(0)->getType();
- NumStores += getMemoryOpCost(Instruction::Store, MemAccessTy, 0, 0);
+ NumStores += getMemoryOpCost(Instruction::Store, MemAccessTy, None, 0);
}
}
@@ -348,11 +348,10 @@ static unsigned getNumVectorRegs(Type *Ty) {
}
int SystemZTTIImpl::getArithmeticInstrCost(
- unsigned Opcode, Type *Ty,
- TTI::OperandValueKind Op1Info, TTI::OperandValueKind Op2Info,
- TTI::OperandValueProperties Opd1PropInfo,
- TTI::OperandValueProperties Opd2PropInfo,
- ArrayRef<const Value *> Args) {
+ unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info,
+ TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo,
+ TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args,
+ const Instruction *CxtI) {
// TODO: return a good value for BB-VECTORIZER that includes the
// immediate loads, which we do not want to count for the loop
@@ -508,7 +507,7 @@ int SystemZTTIImpl::getArithmeticInstrCost(
// Fallback to the default implementation.
return BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info,
- Opd1PropInfo, Opd2PropInfo, Args);
+ Opd1PropInfo, Opd2PropInfo, Args, CxtI);
}
int SystemZTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
@@ -995,7 +994,7 @@ static bool isBswapIntrinsicCall(const Value *V) {
}
int SystemZTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
- unsigned Alignment, unsigned AddressSpace,
+ MaybeAlign Alignment, unsigned AddressSpace,
const Instruction *I) {
assert(!Src->isVoidTy() && "Invalid type");
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
index 3ba80b31439f..bc4d066881c1 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
@@ -40,9 +40,9 @@ public:
int getIntImmCost(const APInt &Imm, Type *Ty);
- int getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty);
- int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
- Type *Ty);
+ int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty);
+ int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
+ Type *Ty);
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
@@ -75,7 +75,8 @@ public:
TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
- ArrayRef<const Value *> Args = ArrayRef<const Value *>());
+ ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
+ const Instruction *CxtI = nullptr);
int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp);
unsigned getVectorTruncCost(Type *SrcTy, Type *DstTy);
unsigned getVectorBitmaskConversionCost(Type *SrcTy, Type *DstTy);
@@ -87,7 +88,7 @@ public:
const Instruction *I = nullptr);
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
bool isFoldableLoad(const LoadInst *Ld, const Instruction *&FoldedValue);
- int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+ int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
unsigned AddressSpace, const Instruction *I = nullptr);
int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp
index 713a55ee8400..36291e079882 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp
@@ -16,7 +16,7 @@ Target &llvm::getTheSystemZTarget() {
return TheSystemZTarget;
}
-extern "C" void LLVMInitializeSystemZTargetInfo() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSystemZTargetInfo() {
RegisterTarget<Triple::systemz, /*HasJIT=*/true> X(
getTheSystemZTarget(), "systemz", "SystemZ", "SystemZ");
}
diff --git a/contrib/llvm-project/llvm/lib/Target/TargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/TargetMachine.cpp
index 4c98e140f446..97a1eb2f190a 100644
--- a/contrib/llvm-project/llvm/lib/Target/TargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/TargetMachine.cpp
@@ -36,8 +36,8 @@ TargetMachine::TargetMachine(const Target &T, StringRef DataLayoutString,
const TargetOptions &Options)
: TheTarget(T), DL(DataLayoutString), TargetTriple(TT), TargetCPU(CPU),
TargetFS(FS), AsmInfo(nullptr), MRI(nullptr), MII(nullptr), STI(nullptr),
- RequireStructuredCFG(false), DefaultOptions(Options), Options(Options) {
-}
+ RequireStructuredCFG(false), O0WantsFastISel(false),
+ DefaultOptions(Options), Options(Options) {}
TargetMachine::~TargetMachine() = default;
@@ -184,15 +184,14 @@ bool TargetMachine::shouldAssumeDSOLocal(const Module &M,
const Function *F = dyn_cast_or_null<Function>(GV);
if (F && F->hasFnAttribute(Attribute::NonLazyBind))
return false;
-
- bool IsTLS = GV && GV->isThreadLocal();
- bool IsAccessViaCopyRelocs =
- GV && Options.MCOptions.MCPIECopyRelocations && isa<GlobalVariable>(GV);
Triple::ArchType Arch = TT.getArch();
- bool IsPPC =
- Arch == Triple::ppc || Arch == Triple::ppc64 || Arch == Triple::ppc64le;
- // Check if we can use copy relocations. PowerPC has no copy relocations.
- if (!IsTLS && !IsPPC && (RM == Reloc::Static || IsAccessViaCopyRelocs))
+
+ // PowerPC prefers avoiding copy relocations.
+ if (Arch == Triple::ppc || TT.isPPC64())
+ return false;
+
+ // Check if we can use copy relocations.
+ if (!(GV && GV->isThreadLocal()) && RM == Reloc::Static)
return true;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/TargetMachineC.cpp b/contrib/llvm-project/llvm/lib/Target/TargetMachineC.cpp
index 3ac9c38dfc0b..a38633e1f27e 100644
--- a/contrib/llvm-project/llvm/lib/Target/TargetMachineC.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/TargetMachineC.cpp
@@ -195,13 +195,13 @@ static LLVMBool LLVMTargetMachineEmit(LLVMTargetMachineRef T, LLVMModuleRef M,
Mod->setDataLayout(TM->createDataLayout());
- TargetMachine::CodeGenFileType ft;
+ CodeGenFileType ft;
switch (codegen) {
case LLVMAssemblyFile:
- ft = TargetMachine::CGFT_AssemblyFile;
+ ft = CGFT_AssemblyFile;
break;
default:
- ft = TargetMachine::CGFT_ObjectFile;
+ ft = CGFT_ObjectFile;
break;
}
if (TM->addPassesToEmitFile(pass, OS, nullptr, ft)) {
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/InstPrinter/VEInstPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/VE/InstPrinter/VEInstPrinter.cpp
new file mode 100644
index 000000000000..4e7bcd36c32a
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/VE/InstPrinter/VEInstPrinter.cpp
@@ -0,0 +1,118 @@
+//===-- VEInstPrinter.cpp - Convert VE MCInst to assembly syntax -----------==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an VE MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#include "VEInstPrinter.h"
+#include "VE.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "ve-asmprinter"
+
+// The generated AsmMatcher VEGenAsmWriter uses "VE" as the target
+// namespace.
+namespace llvm {
+namespace VE {
+using namespace VE;
+}
+} // namespace llvm
+
+#define GET_INSTRUCTION_NAME
+#define PRINT_ALIAS_INSTR
+#include "VEGenAsmWriter.inc"
+
+void VEInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
+ OS << '%' << StringRef(getRegisterName(RegNo)).lower();
+}
+
+void VEInstPrinter::printInst(const MCInst *MI, uint64_t Address,
+ StringRef Annot, const MCSubtargetInfo &STI,
+ raw_ostream &OS) {
+ if (!printAliasInstr(MI, STI, OS))
+ printInstruction(MI, Address, STI, OS);
+ printAnnotation(OS, Annot);
+}
+
+void VEInstPrinter::printOperand(const MCInst *MI, int opNum,
+ const MCSubtargetInfo &STI, raw_ostream &O) {
+ const MCOperand &MO = MI->getOperand(opNum);
+
+ if (MO.isReg()) {
+ printRegName(O, MO.getReg());
+ return;
+ }
+
+ if (MO.isImm()) {
+ switch (MI->getOpcode()) {
+ default:
+ // Expects signed 32bit literals
+ assert(isInt<32>(MO.getImm()) && "Immediate too large");
+ int32_t TruncatedImm = static_cast<int32_t>(MO.getImm());
+ O << TruncatedImm;
+ return;
+ }
+ }
+
+ assert(MO.isExpr() && "Unknown operand kind in printOperand");
+ MO.getExpr()->print(O, &MAI);
+}
+
+void VEInstPrinter::printMemASXOperand(const MCInst *MI, int opNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O, const char *Modifier) {
+ // If this is an ADD operand, emit it like normal operands.
+ if (Modifier && !strcmp(Modifier, "arith")) {
+ printOperand(MI, opNum, STI, O);
+ O << ", ";
+ printOperand(MI, opNum + 1, STI, O);
+ return;
+ }
+
+ const MCOperand &MO = MI->getOperand(opNum + 1);
+ if (!MO.isImm() || MO.getImm() != 0) {
+ printOperand(MI, opNum + 1, STI, O);
+ }
+ O << "(,";
+ printOperand(MI, opNum, STI, O);
+ O << ")";
+}
+
+void VEInstPrinter::printMemASOperand(const MCInst *MI, int opNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O, const char *Modifier) {
+ // If this is an ADD operand, emit it like normal operands.
+ if (Modifier && !strcmp(Modifier, "arith")) {
+ printOperand(MI, opNum, STI, O);
+ O << ", ";
+ printOperand(MI, opNum + 1, STI, O);
+ return;
+ }
+
+ const MCOperand &MO = MI->getOperand(opNum + 1);
+ if (!MO.isImm() || MO.getImm() != 0) {
+ printOperand(MI, opNum + 1, STI, O);
+ }
+ O << "(";
+ printOperand(MI, opNum, STI, O);
+ O << ")";
+}
+
+void VEInstPrinter::printCCOperand(const MCInst *MI, int opNum,
+ const MCSubtargetInfo &STI, raw_ostream &O) {
+ int CC = (int)MI->getOperand(opNum).getImm();
+ O << VECondCodeToString((VECC::CondCodes)CC);
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/InstPrinter/VEInstPrinter.h b/contrib/llvm-project/llvm/lib/Target/VE/InstPrinter/VEInstPrinter.h
new file mode 100644
index 000000000000..05a53d59e878
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/VE/InstPrinter/VEInstPrinter.h
@@ -0,0 +1,49 @@
+//===-- VEInstPrinter.h - Convert VE MCInst to assembly syntax ------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an VE MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_VE_INSTPRINTER_VEINSTPRINTER_H
+#define LLVM_LIB_TARGET_VE_INSTPRINTER_VEINSTPRINTER_H
+
+#include "llvm/MC/MCInstPrinter.h"
+
+namespace llvm {
+
+class VEInstPrinter : public MCInstPrinter {
+public:
+ VEInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI)
+ : MCInstPrinter(MAI, MII, MRI) {}
+
+ void printRegName(raw_ostream &OS, unsigned RegNo) const override;
+ void printInst(const MCInst *MI, uint64_t Address, StringRef Annot,
+ const MCSubtargetInfo &STI, raw_ostream &OS) override;
+
+ // Autogenerated by tblgen.
+ bool printAliasInstr(const MCInst *, const MCSubtargetInfo &, raw_ostream &);
+ void printInstruction(const MCInst *, uint64_t, const MCSubtargetInfo &,
+ raw_ostream &);
+ static const char *getRegisterName(unsigned RegNo);
+
+ void printOperand(const MCInst *MI, int opNum, const MCSubtargetInfo &STI,
+ raw_ostream &OS);
+ void printMemASXOperand(const MCInst *MI, int opNum,
+ const MCSubtargetInfo &STI, raw_ostream &OS,
+ const char *Modifier = nullptr);
+ void printMemASOperand(const MCInst *MI, int opNum,
+ const MCSubtargetInfo &STI, raw_ostream &OS,
+ const char *Modifier = nullptr);
+ void printCCOperand(const MCInst *MI, int opNum, const MCSubtargetInfo &STI,
+ raw_ostream &OS);
+};
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEMCAsmInfo.cpp b/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEMCAsmInfo.cpp
new file mode 100644
index 000000000000..9f29fc092c69
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEMCAsmInfo.cpp
@@ -0,0 +1,40 @@
+//===- VEMCAsmInfo.cpp - VE asm properties --------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the VEMCAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "VEMCAsmInfo.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCTargetOptions.h"
+
+using namespace llvm;
+
+void VEELFMCAsmInfo::anchor() {}
+
+VEELFMCAsmInfo::VEELFMCAsmInfo(const Triple &TheTriple) {
+
+ CodePointerSize = CalleeSaveStackSlotSize = 8;
+ MaxInstLength = MinInstAlignment = 8;
+
+ // VE uses ".*byte" directive for unaligned data.
+ Data8bitsDirective = "\t.byte\t";
+ Data16bitsDirective = "\t.2byte\t";
+ Data32bitsDirective = "\t.4byte\t";
+ Data64bitsDirective = "\t.8byte\t";
+
+ // Uses '.section' before '.bss' directive. VE requires this although
+ // assembler manual says sinple '.bss' is supported.
+ UsesELFSectionDirectiveForBSS = true;
+
+ SupportsDebugInformation = true;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEMCAsmInfo.h b/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEMCAsmInfo.h
new file mode 100644
index 000000000000..6557d68b383c
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEMCAsmInfo.h
@@ -0,0 +1,31 @@
+//===- VEMCAsmInfo.h - VE asm properties -----------------------*- C++ -*--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the VEMCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_VE_MCTARGETDESC_VEMCASMINFO_H
+#define LLVM_LIB_TARGET_VE_MCTARGETDESC_VEMCASMINFO_H
+
+#include "llvm/MC/MCAsmInfoELF.h"
+
+namespace llvm {
+
+class Triple;
+
+class VEELFMCAsmInfo : public MCAsmInfoELF {
+ void anchor() override;
+
+public:
+ explicit VEELFMCAsmInfo(const Triple &TheTriple);
+};
+
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_VE_MCTARGETDESC_VEMCASMINFO_H
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.cpp b/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.cpp
new file mode 100644
index 000000000000..b228617058a6
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.cpp
@@ -0,0 +1,106 @@
+//===-- VEMCTargetDesc.cpp - VE Target Descriptions -----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides VE specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "VEMCTargetDesc.h"
+#include "InstPrinter/VEInstPrinter.h"
+#include "VEMCAsmInfo.h"
+#include "VETargetStreamer.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+
+using namespace llvm;
+
+#define GET_INSTRINFO_MC_DESC
+#include "VEGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "VEGenSubtargetInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "VEGenRegisterInfo.inc"
+
+static MCAsmInfo *createVEMCAsmInfo(const MCRegisterInfo &MRI, const Triple &TT,
+ const MCTargetOptions &Options) {
+ MCAsmInfo *MAI = new VEELFMCAsmInfo(TT);
+ unsigned Reg = MRI.getDwarfRegNum(VE::SX11, true);
+ MCCFIInstruction Inst = MCCFIInstruction::createDefCfa(nullptr, Reg, 0);
+ MAI->addInitialFrameState(Inst);
+ return MAI;
+}
+
+static MCInstrInfo *createVEMCInstrInfo() {
+ MCInstrInfo *X = new MCInstrInfo();
+ InitVEMCInstrInfo(X);
+ return X;
+}
+
+static MCRegisterInfo *createVEMCRegisterInfo(const Triple &TT) {
+ MCRegisterInfo *X = new MCRegisterInfo();
+ InitVEMCRegisterInfo(X, VE::SX10);
+ return X;
+}
+
+static MCSubtargetInfo *createVEMCSubtargetInfo(const Triple &TT, StringRef CPU,
+ StringRef FS) {
+ if (CPU.empty())
+ CPU = "ve";
+ return createVEMCSubtargetInfoImpl(TT, CPU, FS);
+}
+
+static MCTargetStreamer *
+createObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) {
+ return new VETargetELFStreamer(S);
+}
+
+static MCTargetStreamer *createTargetAsmStreamer(MCStreamer &S,
+ formatted_raw_ostream &OS,
+ MCInstPrinter *InstPrint,
+ bool isVerboseAsm) {
+ return new VETargetAsmStreamer(S, OS);
+}
+
+static MCInstPrinter *createVEMCInstPrinter(const Triple &T,
+ unsigned SyntaxVariant,
+ const MCAsmInfo &MAI,
+ const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI) {
+ return new VEInstPrinter(MAI, MII, MRI);
+}
+
+extern "C" void LLVMInitializeVETargetMC() {
+ // Register the MC asm info.
+ RegisterMCAsmInfoFn X(getTheVETarget(), createVEMCAsmInfo);
+
+ for (Target *T : {&getTheVETarget()}) {
+ // Register the MC instruction info.
+ TargetRegistry::RegisterMCInstrInfo(*T, createVEMCInstrInfo);
+
+ // Register the MC register info.
+ TargetRegistry::RegisterMCRegInfo(*T, createVEMCRegisterInfo);
+
+ // Register the MC subtarget info.
+ TargetRegistry::RegisterMCSubtargetInfo(*T, createVEMCSubtargetInfo);
+
+ // Register the object target streamer.
+ TargetRegistry::RegisterObjectTargetStreamer(*T,
+ createObjectTargetStreamer);
+
+ // Register the asm streamer.
+ TargetRegistry::RegisterAsmTargetStreamer(*T, createTargetAsmStreamer);
+
+ // Register the MCInstPrinter
+ TargetRegistry::RegisterMCInstPrinter(*T, createVEMCInstPrinter);
+ }
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.h b/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.h
new file mode 100644
index 000000000000..24a5c8209be2
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.h
@@ -0,0 +1,53 @@
+//===-- VEMCTargetDesc.h - VE Target Descriptions ---------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides VE specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_VE_MCTARGETDESC_VEMCTARGETDESC_H
+#define LLVM_LIB_TARGET_VE_MCTARGETDESC_VEMCTARGETDESC_H
+
+#include "llvm/Support/DataTypes.h"
+
+#include <memory>
+
+namespace llvm {
+class MCAsmBackend;
+class MCCodeEmitter;
+class MCContext;
+class MCInstrInfo;
+class MCObjectWriter;
+class MCRegisterInfo;
+class MCSubtargetInfo;
+class MCTargetOptions;
+class Target;
+class Triple;
+class StringRef;
+class raw_pwrite_stream;
+class raw_ostream;
+
+Target &getTheVETarget();
+
+} // namespace llvm
+
+// Defines symbolic names for VE registers. This defines a mapping from
+// register name to register number.
+//
+#define GET_REGINFO_ENUM
+#include "VEGenRegisterInfo.inc"
+
+// Defines symbolic names for the VE instructions.
+//
+#define GET_INSTRINFO_ENUM
+#include "VEGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "VEGenSubtargetInfo.inc"
+
+#endif
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VETargetStreamer.cpp b/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VETargetStreamer.cpp
new file mode 100644
index 000000000000..dfe94bbaaa4b
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VETargetStreamer.cpp
@@ -0,0 +1,44 @@
+//===-- VETargetStreamer.cpp - VE Target Streamer Methods -----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides VE specific target streamer methods.
+//
+//===----------------------------------------------------------------------===//
+
+#include "VETargetStreamer.h"
+#include "InstPrinter/VEInstPrinter.h"
+#include "llvm/Support/FormattedStream.h"
+
+using namespace llvm;
+
+// pin vtable to this file
+VETargetStreamer::VETargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {}
+
+void VETargetStreamer::anchor() {}
+
+VETargetAsmStreamer::VETargetAsmStreamer(MCStreamer &S,
+ formatted_raw_ostream &OS)
+ : VETargetStreamer(S), OS(OS) {}
+
+void VETargetAsmStreamer::emitVERegisterIgnore(unsigned reg) {
+ OS << "\t.register "
+ << "%" << StringRef(VEInstPrinter::getRegisterName(reg)).lower()
+ << ", #ignore\n";
+}
+
+void VETargetAsmStreamer::emitVERegisterScratch(unsigned reg) {
+ OS << "\t.register "
+ << "%" << StringRef(VEInstPrinter::getRegisterName(reg)).lower()
+ << ", #scratch\n";
+}
+
+VETargetELFStreamer::VETargetELFStreamer(MCStreamer &S) : VETargetStreamer(S) {}
+
+MCELFStreamer &VETargetELFStreamer::getStreamer() {
+ return static_cast<MCELFStreamer &>(Streamer);
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VETargetStreamer.h b/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VETargetStreamer.h
new file mode 100644
index 000000000000..6f6a0d4b02d7
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VETargetStreamer.h
@@ -0,0 +1,47 @@
+//===-- VETargetStreamer.h - VE Target Streamer ----------------*- C++ -*--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_VE_VETARGETSTREAMER_H
+#define LLVM_LIB_TARGET_VE_VETARGETSTREAMER_H
+
+#include "llvm/MC/MCELFStreamer.h"
+#include "llvm/MC/MCStreamer.h"
+
+namespace llvm {
+class VETargetStreamer : public MCTargetStreamer {
+ virtual void anchor();
+
+public:
+ VETargetStreamer(MCStreamer &S);
+ /// Emit ".register <reg>, #ignore".
+ virtual void emitVERegisterIgnore(unsigned reg) = 0;
+ /// Emit ".register <reg>, #scratch".
+ virtual void emitVERegisterScratch(unsigned reg) = 0;
+};
+
+// This part is for ascii assembly output
+class VETargetAsmStreamer : public VETargetStreamer {
+ formatted_raw_ostream &OS;
+
+public:
+ VETargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS);
+ void emitVERegisterIgnore(unsigned reg) override;
+ void emitVERegisterScratch(unsigned reg) override;
+};
+
+// This part is for ELF object output
+class VETargetELFStreamer : public VETargetStreamer {
+public:
+ VETargetELFStreamer(MCStreamer &S);
+ MCELFStreamer &getStreamer();
+ void emitVERegisterIgnore(unsigned reg) override {}
+ void emitVERegisterScratch(unsigned reg) override {}
+};
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/TargetInfo/VETargetInfo.cpp b/contrib/llvm-project/llvm/lib/Target/VE/TargetInfo/VETargetInfo.cpp
new file mode 100644
index 000000000000..be68fe7d2429
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/VE/TargetInfo/VETargetInfo.cpp
@@ -0,0 +1,23 @@
+//===-- VETargetInfo.cpp - VE Target Implementation -----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "VE.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/TargetRegistry.h"
+
+using namespace llvm;
+
+Target &llvm::getTheVETarget() {
+ static Target TheVETarget;
+ return TheVETarget;
+}
+
+extern "C" void LLVMInitializeVETargetInfo() {
+ RegisterTarget<Triple::ve, /*HasJIT=*/false> X(getTheVETarget(), "ve",
+ "VE", "VE");
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VE.h b/contrib/llvm-project/llvm/lib/Target/VE/VE.h
new file mode 100644
index 000000000000..9b61f2b63f36
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VE.h
@@ -0,0 +1,109 @@
+//===-- VE.h - Top-level interface for VE representation --------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the LLVM
+// VE back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_VE_VE_H
+#define LLVM_LIB_TARGET_VE_VE_H
+
+#include "MCTargetDesc/VEMCTargetDesc.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+class FunctionPass;
+class VETargetMachine;
+class formatted_raw_ostream;
+class AsmPrinter;
+class MCInst;
+class MachineInstr;
+
+FunctionPass *createVEISelDag(VETargetMachine &TM);
+FunctionPass *createVEPromoteToI1Pass();
+
+void LowerVEMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
+ AsmPrinter &AP);
+} // namespace llvm
+
+namespace llvm {
+// Enums corresponding to VE condition codes, both icc's and fcc's. These
+// values must be kept in sync with the ones in the .td file.
+namespace VECC {
+enum CondCodes {
+ // Integer comparison
+ CC_IG = 0, // Greater
+ CC_IL = 1, // Less
+ CC_INE = 2, // Not Equal
+ CC_IEQ = 3, // Equal
+ CC_IGE = 4, // Greater or Equal
+ CC_ILE = 5, // Less or Equal
+
+ // Floating point comparison
+ CC_AF = 0 + 6, // Never
+ CC_G = 1 + 6, // Greater
+ CC_L = 2 + 6, // Less
+ CC_NE = 3 + 6, // Not Equal
+ CC_EQ = 4 + 6, // Equal
+ CC_GE = 5 + 6, // Greater or Equal
+ CC_LE = 6 + 6, // Less or Equal
+ CC_NUM = 7 + 6, // Number
+ CC_NAN = 8 + 6, // NaN
+ CC_GNAN = 9 + 6, // Greater or NaN
+ CC_LNAN = 10 + 6, // Less or NaN
+ CC_NENAN = 11 + 6, // Not Equal or NaN
+ CC_EQNAN = 12 + 6, // Equal or NaN
+ CC_GENAN = 13 + 6, // Greater or Equal or NaN
+ CC_LENAN = 14 + 6, // Less or Equal or NaN
+ CC_AT = 15 + 6, // Always
+};
+}
+
+inline static const char *VECondCodeToString(VECC::CondCodes CC) {
+ switch (CC) {
+ case VECC::CC_IG: return "gt";
+ case VECC::CC_IL: return "lt";
+ case VECC::CC_INE: return "ne";
+ case VECC::CC_IEQ: return "eq";
+ case VECC::CC_IGE: return "ge";
+ case VECC::CC_ILE: return "le";
+ case VECC::CC_AF: return "af";
+ case VECC::CC_G: return "gt";
+ case VECC::CC_L: return "lt";
+ case VECC::CC_NE: return "ne";
+ case VECC::CC_EQ: return "eq";
+ case VECC::CC_GE: return "ge";
+ case VECC::CC_LE: return "le";
+ case VECC::CC_NUM: return "num";
+ case VECC::CC_NAN: return "nan";
+ case VECC::CC_GNAN: return "gtnan";
+ case VECC::CC_LNAN: return "ltnan";
+ case VECC::CC_NENAN: return "nenan";
+ case VECC::CC_EQNAN: return "eqnan";
+ case VECC::CC_GENAN: return "genan";
+ case VECC::CC_LENAN: return "lenan";
+ case VECC::CC_AT: return "at";
+ }
+ llvm_unreachable("Invalid cond code");
+}
+
+// Different to Hi_32/Lo_32 the HI32 and LO32 functions
+// preserve the correct numerical value
+// on the LLVM data type for MC immediates (int64_t).
+inline static int64_t HI32(int64_t imm) {
+ return (int32_t)(imm >> 32);
+}
+
+inline static int64_t LO32(int64_t imm) {
+ return (int32_t)(imm);
+}
+
+} // namespace llvm
+#endif
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VE.td b/contrib/llvm-project/llvm/lib/Target/VE/VE.td
new file mode 100644
index 000000000000..7404321b1a06
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VE.td
@@ -0,0 +1,56 @@
+//===-- VE.td - Describe the VE Target Machine -------------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces which we are implementing
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// VE Subtarget features.
+//
+
+//===----------------------------------------------------------------------===//
+// Register File, Calling Conv, Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+include "VERegisterInfo.td"
+include "VECallingConv.td"
+include "VEInstrInfo.td"
+
+def VEInstrInfo : InstrInfo;
+
+//===----------------------------------------------------------------------===//
+// VE processors supported.
+//===----------------------------------------------------------------------===//
+
+class Proc<string Name, list<SubtargetFeature> Features>
+ : Processor<Name, NoItineraries, Features>;
+
+def : Proc<"ve", []>;
+
+//===----------------------------------------------------------------------===//
+// Declare the target which we are implementing
+//===----------------------------------------------------------------------===//
+
+def VEAsmWriter : AsmWriter {
+ string AsmWriterClassName = "InstPrinter";
+ int PassSubtarget = 1;
+ int Variant = 0;
+}
+
+def VE : Target {
+ // Pull in Instruction Info:
+ let InstructionSet = VEInstrInfo;
+ let AssemblyWriters = [VEAsmWriter];
+ let AllowRegisterRenaming = 1;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEAsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/VE/VEAsmPrinter.cpp
new file mode 100644
index 000000000000..918f2a1acdaf
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VEAsmPrinter.cpp
@@ -0,0 +1,78 @@
+//===-- VEAsmPrinter.cpp - VE LLVM assembly writer ------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to GAS-format VE assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#include "InstPrinter/VEInstPrinter.h"
+#include "MCTargetDesc/VETargetStreamer.h"
+#include "VE.h"
+#include "VEInstrInfo.h"
+#include "VETargetMachine.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/IR/Mangler.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstBuilder.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "ve-asmprinter"
+
+namespace {
+class VEAsmPrinter : public AsmPrinter {
+ VETargetStreamer &getTargetStreamer() {
+ return static_cast<VETargetStreamer &>(*OutStreamer->getTargetStreamer());
+ }
+
+public:
+ explicit VEAsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer)
+ : AsmPrinter(TM, std::move(Streamer)) {}
+
+ StringRef getPassName() const override { return "VE Assembly Printer"; }
+
+ void EmitInstruction(const MachineInstr *MI) override;
+
+ static const char *getRegisterName(unsigned RegNo) {
+ return VEInstPrinter::getRegisterName(RegNo);
+ }
+};
+} // end of anonymous namespace
+
+void VEAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+
+ switch (MI->getOpcode()) {
+ default:
+ break;
+ case TargetOpcode::DBG_VALUE:
+ // FIXME: Debug Value.
+ return;
+ }
+ MachineBasicBlock::const_instr_iterator I = MI->getIterator();
+ MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
+ do {
+ MCInst TmpInst;
+ LowerVEMachineInstrToMCInst(&*I, TmpInst, *this);
+ EmitToStreamer(*OutStreamer, TmpInst);
+ } while ((++I != E) && I->isInsideBundle()); // Delay slot check.
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeVEAsmPrinter() {
+ RegisterAsmPrinter<VEAsmPrinter> X(getTheVETarget());
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VECallingConv.td b/contrib/llvm-project/llvm/lib/Target/VE/VECallingConv.td
new file mode 100644
index 000000000000..1a9097c79dd4
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VECallingConv.td
@@ -0,0 +1,19 @@
+//===-- VECallingConv.td - Calling Conventions VE ----------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This describes the calling conventions for the VE architectures.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Aurora VE
+//===----------------------------------------------------------------------===//
+
+// Callee-saved registers
+def CSR : CalleeSavedRegs<(add (sequence "SX%u", 18, 33))>;
+def CSR_NoRegs : CalleeSavedRegs<(add)>;
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEFrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/VE/VEFrameLowering.cpp
new file mode 100644
index 000000000000..ef5b5f055911
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VEFrameLowering.cpp
@@ -0,0 +1,325 @@
+//===-- VEFrameLowering.cpp - VE Frame Information ------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the VE implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "VEFrameLowering.h"
+#include "VEInstrInfo.h"
+#include "VESubtarget.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/MathExtras.h"
+
+using namespace llvm;
+
+VEFrameLowering::VEFrameLowering(const VESubtarget &ST)
+ : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, Align(16), 0,
+ Align(16)) {}
+
+void VEFrameLowering::emitPrologueInsns(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ int NumBytes,
+ bool RequireFPUpdate) const {
+
+ DebugLoc dl;
+ const VEInstrInfo &TII =
+ *static_cast<const VEInstrInfo *>(MF.getSubtarget().getInstrInfo());
+ // Insert following codes here as prologue
+ //
+ // st %fp, 0(,%sp)
+ // st %lr, 8(,%sp)
+ // st %got, 24(,%sp)
+ // st %plt, 32(,%sp)
+ // or %fp, 0, %sp
+
+ BuildMI(MBB, MBBI, dl, TII.get(VE::STSri))
+ .addReg(VE::SX11)
+ .addImm(0)
+ .addReg(VE::SX9);
+ BuildMI(MBB, MBBI, dl, TII.get(VE::STSri))
+ .addReg(VE::SX11)
+ .addImm(8)
+ .addReg(VE::SX10);
+ BuildMI(MBB, MBBI, dl, TII.get(VE::STSri))
+ .addReg(VE::SX11)
+ .addImm(24)
+ .addReg(VE::SX15);
+ BuildMI(MBB, MBBI, dl, TII.get(VE::STSri))
+ .addReg(VE::SX11)
+ .addImm(32)
+ .addReg(VE::SX16);
+ BuildMI(MBB, MBBI, dl, TII.get(VE::ORri), VE::SX9)
+ .addReg(VE::SX11)
+ .addImm(0);
+}
+
+void VEFrameLowering::emitEpilogueInsns(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ int NumBytes,
+ bool RequireFPUpdate) const {
+
+ DebugLoc dl;
+ const VEInstrInfo &TII =
+ *static_cast<const VEInstrInfo *>(MF.getSubtarget().getInstrInfo());
+ // Insert following codes here as epilogue
+ //
+ // or %sp, 0, %fp
+ // ld %got, 32(,%sp)
+ // ld %plt, 24(,%sp)
+ // ld %lr, 8(,%sp)
+ // ld %fp, 0(,%sp)
+
+ BuildMI(MBB, MBBI, dl, TII.get(VE::ORri), VE::SX11)
+ .addReg(VE::SX9)
+ .addImm(0);
+ BuildMI(MBB, MBBI, dl, TII.get(VE::LDSri), VE::SX16)
+ .addReg(VE::SX11)
+ .addImm(32);
+ BuildMI(MBB, MBBI, dl, TII.get(VE::LDSri), VE::SX15)
+ .addReg(VE::SX11)
+ .addImm(24);
+ BuildMI(MBB, MBBI, dl, TII.get(VE::LDSri), VE::SX10)
+ .addReg(VE::SX11)
+ .addImm(8);
+ BuildMI(MBB, MBBI, dl, TII.get(VE::LDSri), VE::SX9)
+ .addReg(VE::SX11)
+ .addImm(0);
+}
+
+void VEFrameLowering::emitSPAdjustment(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ int NumBytes) const {
+ DebugLoc dl;
+ const VEInstrInfo &TII =
+ *static_cast<const VEInstrInfo *>(MF.getSubtarget().getInstrInfo());
+
+ if (NumBytes >= -64 && NumBytes < 63) {
+ BuildMI(MBB, MBBI, dl, TII.get(VE::ADXri), VE::SX11)
+ .addReg(VE::SX11)
+ .addImm(NumBytes);
+ return;
+ }
+
+ // Emit following codes. This clobbers SX13 which we always know is
+ // available here.
+ // lea %s13,%lo(NumBytes)
+ // and %s13,%s13,(32)0
+ // lea.sl %sp,%hi(NumBytes)(%sp, %s13)
+ BuildMI(MBB, MBBI, dl, TII.get(VE::LEAzzi), VE::SX13)
+ .addImm(LO32(NumBytes));
+ BuildMI(MBB, MBBI, dl, TII.get(VE::ANDrm0), VE::SX13)
+ .addReg(VE::SX13)
+ .addImm(32);
+ BuildMI(MBB, MBBI, dl, TII.get(VE::LEASLrri), VE::SX11)
+ .addReg(VE::SX11)
+ .addReg(VE::SX13)
+ .addImm(HI32(NumBytes));
+}
+
+void VEFrameLowering::emitSPExtend(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ int NumBytes) const {
+ DebugLoc dl;
+ const VEInstrInfo &TII =
+ *static_cast<const VEInstrInfo *>(MF.getSubtarget().getInstrInfo());
+
+ // Emit following codes. It is not possible to insert multiple
+ // BasicBlocks in PEI pass, so we emit two pseudo instructions here.
+ //
+ // EXTEND_STACK // pseudo instrcution
+ // EXTEND_STACK_GUARD // pseudo instrcution
+ //
+ // EXTEND_STACK pseudo will be converted by ExpandPostRA pass into
+ // following instructions with multiple basic blocks later.
+ //
+ // thisBB:
+ // brge.l.t %sp, %sl, sinkBB
+ // syscallBB:
+ // ld %s61, 0x18(, %tp) // load param area
+ // or %s62, 0, %s0 // spill the value of %s0
+ // lea %s63, 0x13b // syscall # of grow
+ // shm.l %s63, 0x0(%s61) // store syscall # at addr:0
+ // shm.l %sl, 0x8(%s61) // store old limit at addr:8
+ // shm.l %sp, 0x10(%s61) // store new limit at addr:16
+ // monc // call monitor
+ // or %s0, 0, %s62 // restore the value of %s0
+ // sinkBB:
+ //
+ // EXTEND_STACK_GUARD pseudo will be simply eliminated by ExpandPostRA
+ // pass. This pseudo is required to be at the next of EXTEND_STACK
+ // pseudo in order to protect iteration loop in ExpandPostRA.
+
+ BuildMI(MBB, MBBI, dl, TII.get(VE::EXTEND_STACK));
+ BuildMI(MBB, MBBI, dl, TII.get(VE::EXTEND_STACK_GUARD));
+}
+
+void VEFrameLowering::emitPrologue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ const VESubtarget &Subtarget = MF.getSubtarget<VESubtarget>();
+ const VEInstrInfo &TII =
+ *static_cast<const VEInstrInfo *>(Subtarget.getInstrInfo());
+ const VERegisterInfo &RegInfo =
+ *static_cast<const VERegisterInfo *>(Subtarget.getRegisterInfo());
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ // Debug location must be unknown since the first debug location is used
+ // to determine the end of the prologue.
+ DebugLoc dl;
+ bool NeedsStackRealignment = RegInfo.needsStackRealignment(MF);
+
+ // FIXME: unfortunately, returning false from canRealignStack
+ // actually just causes needsStackRealignment to return false,
+ // rather than reporting an error, as would be sensible. This is
+ // poor, but fixing that bogosity is going to be a large project.
+ // For now, just see if it's lied, and report an error here.
+ if (!NeedsStackRealignment && MFI.getMaxAlignment() > getStackAlignment())
+ report_fatal_error("Function \"" + Twine(MF.getName()) +
+ "\" required "
+ "stack re-alignment, but LLVM couldn't handle it "
+ "(probably because it has a dynamic alloca).");
+
+ // Get the number of bytes to allocate from the FrameInfo
+ int NumBytes = (int)MFI.getStackSize();
+ // The VE ABI requires a reserved 176-byte area in the user's stack, starting
+ // at %sp + 16. This is for the callee Register Save Area (RSA).
+ //
+ // We therefore need to add that offset to the total stack size
+ // after all the stack objects are placed by
+ // PrologEpilogInserter calculateFrameObjectOffsets. However, since the stack
+ // needs to be aligned *after* the extra size is added, we need to disable
+ // calculateFrameObjectOffsets's built-in stack alignment, by having
+ // targetHandlesStackFrameRounding return true.
+
+ // Add the extra call frame stack size, if needed. (This is the same
+ // code as in PrologEpilogInserter, but also gets disabled by
+ // targetHandlesStackFrameRounding)
+ if (MFI.adjustsStack() && hasReservedCallFrame(MF))
+ NumBytes += MFI.getMaxCallFrameSize();
+
+ // Adds the VE subtarget-specific spill area to the stack
+ // size. Also ensures target-required alignment.
+ NumBytes = Subtarget.getAdjustedFrameSize(NumBytes);
+
+ // Finally, ensure that the size is sufficiently aligned for the
+ // data on the stack.
+ if (MFI.getMaxAlignment() > 0) {
+ NumBytes = alignTo(NumBytes, MFI.getMaxAlignment());
+ }
+
+ // Update stack size with corrected value.
+ MFI.setStackSize(NumBytes);
+
+ // Emit Prologue instructions to save %lr
+ emitPrologueInsns(MF, MBB, MBBI, NumBytes, true);
+
+ // Emit stack adjust instructions
+ emitSPAdjustment(MF, MBB, MBBI, -NumBytes);
+
+ // Emit stack extend instructions
+ emitSPExtend(MF, MBB, MBBI, -NumBytes);
+
+ unsigned regFP = RegInfo.getDwarfRegNum(VE::SX9, true);
+
+ // Emit ".cfi_def_cfa_register 30".
+ unsigned CFIIndex =
+ MF.addFrameInst(MCCFIInstruction::createDefCfaRegister(nullptr, regFP));
+ BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+
+ // Emit ".cfi_window_save".
+ CFIIndex = MF.addFrameInst(MCCFIInstruction::createWindowSave(nullptr));
+ BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+}
+
+MachineBasicBlock::iterator VEFrameLowering::eliminateCallFramePseudoInstr(
+ MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ if (!hasReservedCallFrame(MF)) {
+ MachineInstr &MI = *I;
+ int Size = MI.getOperand(0).getImm();
+ if (MI.getOpcode() == VE::ADJCALLSTACKDOWN)
+ Size = -Size;
+
+ if (Size)
+ emitSPAdjustment(MF, MBB, I, Size);
+ }
+ return MBB.erase(I);
+}
+
+void VEFrameLowering::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ DebugLoc dl = MBBI->getDebugLoc();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+
+ int NumBytes = (int)MFI.getStackSize();
+
+ // Emit Epilogue instructions to restore %lr
+ emitEpilogueInsns(MF, MBB, MBBI, NumBytes, true);
+}
+
+bool VEFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
+ // Reserve call frame if there are no variable sized objects on the stack.
+ return !MF.getFrameInfo().hasVarSizedObjects();
+}
+
+// hasFP - Return true if the specified function should have a dedicated frame
+// pointer register. This is true if the function has variable sized allocas or
+// if frame pointer elimination is disabled.
+bool VEFrameLowering::hasFP(const MachineFunction &MF) const {
+ const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
+
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ return MF.getTarget().Options.DisableFramePointerElim(MF) ||
+ RegInfo->needsStackRealignment(MF) || MFI.hasVarSizedObjects() ||
+ MFI.isFrameAddressTaken();
+}
+
+int VEFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
+ unsigned &FrameReg) const {
+ // Addressable stack objects are accessed using neg. offsets from
+ // %fp, or positive offsets from %sp.
+ int64_t FrameOffset = MF.getFrameInfo().getObjectOffset(FI);
+ FrameReg = VE::SX11; // %sp
+ return FrameOffset + MF.getFrameInfo().getStackSize();
+}
+
+bool VEFrameLowering::isLeafProc(MachineFunction &MF) const {
+
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+
+ return !MFI.hasCalls() // No calls
+ && !MRI.isPhysRegUsed(VE::SX18) // Registers within limits
+ // (s18 is first CSR)
+ && !MRI.isPhysRegUsed(VE::SX11) // %sp un-used
+ && !hasFP(MF); // Don't need %fp
+}
+
+void VEFrameLowering::determineCalleeSaves(MachineFunction &MF,
+ BitVector &SavedRegs,
+ RegScavenger *RS) const {
+ TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
+
+ assert(isLeafProc(MF) && "TODO implement for non-leaf procs");
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEFrameLowering.h b/contrib/llvm-project/llvm/lib/Target/VE/VEFrameLowering.h
new file mode 100644
index 000000000000..97e31d21aa43
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VEFrameLowering.h
@@ -0,0 +1,81 @@
+//===-- VEFrameLowering.h - Define frame lowering for VE --*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This class implements VE-specific bits of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_VE_VEFRAMELOWERING_H
+#define LLVM_LIB_TARGET_VE_VEFRAMELOWERING_H
+
+#include "VE.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+
+namespace llvm {
+
+class VESubtarget;
+class VEFrameLowering : public TargetFrameLowering {
+public:
+ explicit VEFrameLowering(const VESubtarget &ST);
+
+ /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+ /// the function.
+ void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
+ void emitPrologueInsns(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, int NumBytes,
+ bool RequireFPUpdate) const;
+ void emitEpilogueInsns(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, int NumBytes,
+ bool RequireFPUpdate) const;
+
+ MachineBasicBlock::iterator
+ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const override;
+
+ bool hasReservedCallFrame(const MachineFunction &MF) const override;
+ bool hasFP(const MachineFunction &MF) const override;
+ void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
+ RegScavenger *RS = nullptr) const override;
+
+ int getFrameIndexReference(const MachineFunction &MF, int FI,
+ unsigned &FrameReg) const override;
+
+ const SpillSlot *
+ getCalleeSavedSpillSlots(unsigned &NumEntries) const override {
+ static const SpillSlot Offsets[] = {
+ {VE::SX17, 40}, {VE::SX18, 48}, {VE::SX19, 56}, {VE::SX20, 64},
+ {VE::SX21, 72}, {VE::SX22, 80}, {VE::SX23, 88}, {VE::SX24, 96},
+ {VE::SX25, 104}, {VE::SX26, 112}, {VE::SX27, 120}, {VE::SX28, 128},
+ {VE::SX29, 136}, {VE::SX30, 144}, {VE::SX31, 152}, {VE::SX32, 160},
+ {VE::SX33, 168}};
+ NumEntries = array_lengthof(Offsets);
+ return Offsets;
+ }
+
+ /// targetHandlesStackFrameRounding - Returns true if the target is
+ /// responsible for rounding up the stack frame (probably at emitPrologue
+ /// time).
+ bool targetHandlesStackFrameRounding() const override { return true; }
+
+private:
+ // Returns true if MF is a leaf procedure.
+ bool isLeafProc(MachineFunction &MF) const;
+
+ // Emits code for adjusting SP in function prologue/epilogue.
+ void emitSPAdjustment(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, int NumBytes) const;
+
+ // Emits code for extending SP in function prologue/epilogue.
+ void emitSPExtend(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, int NumBytes) const;
+};
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/VE/VEISelDAGToDAG.cpp
new file mode 100644
index 000000000000..43030993efb9
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VEISelDAGToDAG.cpp
@@ -0,0 +1,70 @@
+//===-- VEISelDAGToDAG.cpp - A dag to dag inst selector for VE ------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the VE target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "VETargetMachine.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Instruction Selector Implementation
+//===----------------------------------------------------------------------===//
+
+//===--------------------------------------------------------------------===//
+/// VEDAGToDAGISel - VE specific code to select VE machine
+/// instructions for SelectionDAG operations.
+///
+namespace {
+class VEDAGToDAGISel : public SelectionDAGISel {
+ /// Subtarget - Keep a pointer to the VE Subtarget around so that we can
+ /// make the right decision when generating code for different targets.
+ const VESubtarget *Subtarget;
+
+public:
+ explicit VEDAGToDAGISel(VETargetMachine &tm) : SelectionDAGISel(tm) {}
+
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ Subtarget = &MF.getSubtarget<VESubtarget>();
+ return SelectionDAGISel::runOnMachineFunction(MF);
+ }
+
+ void Select(SDNode *N) override;
+
+ StringRef getPassName() const override {
+ return "VE DAG->DAG Pattern Instruction Selection";
+ }
+
+ // Include the pieces autogenerated from the target description.
+#include "VEGenDAGISel.inc"
+};
+} // end anonymous namespace
+
+void VEDAGToDAGISel::Select(SDNode *N) {
+ SDLoc dl(N);
+ if (N->isMachineOpcode()) {
+ N->setNodeId(-1);
+ return; // Already selected.
+ }
+
+ SelectCode(N);
+}
+
+/// createVEISelDag - This pass converts a legalized DAG into a
+/// VE-specific DAG, ready for instruction scheduling.
+///
+FunctionPass *llvm::createVEISelDag(VETargetMachine &TM) {
+ return new VEDAGToDAGISel(TM);
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/VE/VEISelLowering.cpp
new file mode 100644
index 000000000000..aa6c3c08bd75
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VEISelLowering.cpp
@@ -0,0 +1,137 @@
+//===-- VEISelLowering.cpp - VE DAG Lowering Implementation ---------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the interfaces that VE uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#include "VEISelLowering.h"
+#include "VERegisterInfo.h"
+#include "VETargetMachine.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/KnownBits.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "ve-lower"
+
+//===----------------------------------------------------------------------===//
+// Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+#include "VEGenCallingConv.inc"
+
+bool VETargetLowering::CanLowerReturn(
+ CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
+ assert(!IsVarArg && "TODO implement var args");
+ assert(Outs.empty() && "TODO implement return values");
+ return true; // TODO support more than 'ret void'
+}
+
+SDValue
+VETargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
+ bool IsVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SDLoc &DL, SelectionDAG &DAG) const {
+ assert(!IsVarArg && "TODO implement var args");
+ assert(Outs.empty() && "TODO implement return values");
+ assert(OutVals.empty() && "TODO implement return values");
+
+ SmallVector<SDValue, 4> RetOps(1, Chain);
+ RetOps[0] = Chain; // Update chain.
+ return DAG.getNode(VEISD::RET_FLAG, DL, MVT::Other, RetOps);
+}
+
+SDValue VETargetLowering::LowerFormalArguments(
+ SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
+ SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
+ assert(!IsVarArg && "TODO implement var args");
+ assert(Ins.empty() && "TODO implement input arguments");
+ return Chain;
+}
+
+// FIXME? Maybe this could be a TableGen attribute on some registers and
+// this table could be generated automatically from RegInfo.
+Register VETargetLowering::getRegisterByName(const char *RegName, LLT VT,
+ const MachineFunction &MF) const {
+ Register Reg = StringSwitch<Register>(RegName)
+ .Case("sp", VE::SX11) // Stack pointer
+ .Case("fp", VE::SX9) // Frame pointer
+ .Case("sl", VE::SX8) // Stack limit
+ .Case("lr", VE::SX10) // Link regsiter
+ .Case("tp", VE::SX14) // Thread pointer
+ .Case("outer", VE::SX12) // Outer regiser
+ .Case("info", VE::SX17) // Info area register
+ .Case("got", VE::SX15) // Global offset table register
+ .Case("plt", VE::SX16) // Procedure linkage table register
+ .Default(0);
+
+ if (Reg)
+ return Reg;
+
+ report_fatal_error("Invalid register name global variable");
+}
+
+//===----------------------------------------------------------------------===//
+// TargetLowering Implementation
+//===----------------------------------------------------------------------===//
+
+VETargetLowering::VETargetLowering(const TargetMachine &TM,
+ const VESubtarget &STI)
+ : TargetLowering(TM), Subtarget(&STI) {
+ // Instructions which use registers as conditionals examine all the
+ // bits (as does the pseudo SELECT_CC expansion). I don't think it
+ // matters much whether it's ZeroOrOneBooleanContent, or
+ // ZeroOrNegativeOneBooleanContent, so, arbitrarily choose the
+ // former.
+ setBooleanContents(ZeroOrOneBooleanContent);
+ setBooleanVectorContents(ZeroOrOneBooleanContent);
+
+ // Set up the register classes.
+ addRegisterClass(MVT::i64, &VE::I64RegClass);
+
+ setStackPointerRegisterToSaveRestore(VE::SX11);
+
+ // Set function alignment to 16 bytes
+ setMinFunctionAlignment(Align(16));
+
+ // VE stores all argument by 8 bytes alignment
+ setMinStackArgumentAlignment(Align(8));
+
+ computeRegisterProperties(Subtarget->getRegisterInfo());
+}
+
+const char *VETargetLowering::getTargetNodeName(unsigned Opcode) const {
+ switch ((VEISD::NodeType)Opcode) {
+ case VEISD::FIRST_NUMBER:
+ break;
+ case VEISD::RET_FLAG:
+ return "VEISD::RET_FLAG";
+ }
+ return nullptr;
+}
+
+EVT VETargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &,
+ EVT VT) const {
+ return MVT::i64;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEISelLowering.h b/contrib/llvm-project/llvm/lib/Target/VE/VEISelLowering.h
new file mode 100644
index 000000000000..39b3610a0c3a
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VEISelLowering.h
@@ -0,0 +1,62 @@
+//===-- VEISelLowering.h - VE DAG Lowering Interface ------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that VE uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_VE_VEISELLOWERING_H
+#define LLVM_LIB_TARGET_VE_VEISELLOWERING_H
+
+#include "VE.h"
+#include "llvm/CodeGen/TargetLowering.h"
+
+namespace llvm {
+class VESubtarget;
+
+namespace VEISD {
+enum NodeType : unsigned {
+ FIRST_NUMBER = ISD::BUILTIN_OP_END,
+ RET_FLAG, // Return with a flag operand.
+};
+}
+
+class VETargetLowering : public TargetLowering {
+ const VESubtarget *Subtarget;
+
+public:
+ VETargetLowering(const TargetMachine &TM, const VESubtarget &STI);
+
+ const char *getTargetNodeName(unsigned Opcode) const override;
+
+ Register getRegisterByName(const char *RegName, LLT VT,
+ const MachineFunction &MF) const override;
+
+ /// getSetCCResultType - Return the ISD::SETCC ValueType
+ EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
+ EVT VT) const override;
+
+ SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ const SDLoc &dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const override;
+
+ bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &ArgsFlags,
+ LLVMContext &Context) const override;
+ SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals, const SDLoc &dl,
+ SelectionDAG &DAG) const override;
+};
+} // namespace llvm
+
+#endif // VE_ISELLOWERING_H
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEInstrFormats.td b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrFormats.td
new file mode 100644
index 000000000000..a8d3e786ba89
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrFormats.td
@@ -0,0 +1,75 @@
+//===-- VEInstrFormats.td - VE Instruction Formats ---------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+class InstVE<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : Instruction {
+ field bits<64> Inst;
+
+ let Namespace = "VE";
+ let Size = 8;
+
+ bits<8> op;
+ let Inst{0-7} = op;
+
+ dag OutOperandList = outs;
+ dag InOperandList = ins;
+ let AsmString = asmstr;
+ let Pattern = pattern;
+
+ let DecoderNamespace = "VE";
+ field bits<64> SoftFail = 0;
+}
+
+class RM<bits<8>opVal, dag outs, dag ins, string asmstr, list<dag> pattern=[]>
+ : InstVE<outs, ins, asmstr, pattern> {
+ bits<1> cx = 0;
+ bits<7> sx;
+ bits<1> cy = 0;
+ bits<7> sy;
+ bits<1> cz = 0;
+ bits<7> sz;
+ bits<32> imm32 = 0;
+ let op = opVal;
+ let Inst{15} = cx;
+ let Inst{14-8} = sx;
+ let Inst{23} = cy;
+ let Inst{22-16} = sy;
+ let Inst{31} = cz;
+ let Inst{30-24} = sz;
+ let Inst{63-32} = imm32;
+}
+
+class RR<bits<8>opVal, dag outs, dag ins, string asmstr>
+ : RM<opVal, outs, ins, asmstr> {
+ bits<1> cw = 0;
+ bits<1> cw2 = 0;
+ bits<4> cfw = 0;
+ let imm32{0-23} = 0;
+ let imm32{24} = cw;
+ let imm32{25} = cw2;
+ let imm32{26-27} = 0;
+ let imm32{28-31} = cfw;
+}
+
+class CF<bits<8>opVal, dag outs, dag ins, string asmstr, list<dag> pattern=[]>
+ : RM<opVal, outs, ins, asmstr, pattern> {
+ bits<1> cx2;
+ bits<2> bpf;
+ bits<4> cf;
+ let cx = 0;
+ let sx{6} = cx2;
+ let sx{5-4} = bpf;
+ let sx{3-0} = cf;
+}
+
+// Pseudo instructions.
+class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern=[]>
+ : InstVE<outs, ins, asmstr, pattern> {
+ let isCodeGenOnly = 1;
+ let isPseudo = 1;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrInfo.cpp
new file mode 100644
index 000000000000..bc382dcef7c3
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrInfo.cpp
@@ -0,0 +1,133 @@
+//===-- VEInstrInfo.cpp - VE Instruction Information ----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the VE implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "VEInstrInfo.h"
+#include "VE.h"
+#include "VESubtarget.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+
+#define DEBUG_TYPE "ve"
+
+using namespace llvm;
+
+#define GET_INSTRINFO_CTOR_DTOR
+#include "VEGenInstrInfo.inc"
+
+// Pin the vtable to this file.
+void VEInstrInfo::anchor() {}
+
+VEInstrInfo::VEInstrInfo(VESubtarget &ST)
+ : VEGenInstrInfo(VE::ADJCALLSTACKDOWN, VE::ADJCALLSTACKUP), RI(),
+ Subtarget(ST) {}
+
+bool VEInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
+ switch (MI.getOpcode()) {
+ case VE::EXTEND_STACK: {
+ return expandExtendStackPseudo(MI);
+ }
+ case VE::EXTEND_STACK_GUARD: {
+ MI.eraseFromParent(); // The pseudo instruction is gone now.
+ return true;
+ }
+ }
+ return false;
+}
+
+bool VEInstrInfo::expandExtendStackPseudo(MachineInstr &MI) const {
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ const VEInstrInfo &TII =
+ *static_cast<const VEInstrInfo *>(MF.getSubtarget().getInstrInfo());
+ DebugLoc dl = MBB.findDebugLoc(MI);
+
+ // Create following instructions and multiple basic blocks.
+ //
+ // thisBB:
+ // brge.l.t %sp, %sl, sinkBB
+ // syscallBB:
+ // ld %s61, 0x18(, %tp) // load param area
+ // or %s62, 0, %s0 // spill the value of %s0
+ // lea %s63, 0x13b // syscall # of grow
+ // shm.l %s63, 0x0(%s61) // store syscall # at addr:0
+ // shm.l %sl, 0x8(%s61) // store old limit at addr:8
+ // shm.l %sp, 0x10(%s61) // store new limit at addr:16
+ // monc // call monitor
+ // or %s0, 0, %s62 // restore the value of %s0
+ // sinkBB:
+
+ // Create new MBB
+ MachineBasicBlock *BB = &MBB;
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineBasicBlock *syscallMBB = MF.CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *sinkMBB = MF.CreateMachineBasicBlock(LLVM_BB);
+ MachineFunction::iterator It = ++(BB->getIterator());
+ MF.insert(It, syscallMBB);
+ MF.insert(It, sinkMBB);
+
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ sinkMBB->splice(sinkMBB->begin(), BB,
+ std::next(std::next(MachineBasicBlock::iterator(MI))),
+ BB->end());
+ sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ // Next, add the true and fallthrough blocks as its successors.
+ BB->addSuccessor(syscallMBB);
+ BB->addSuccessor(sinkMBB);
+ BuildMI(BB, dl, TII.get(VE::BCRLrr))
+ .addImm(VECC::CC_IGE)
+ .addReg(VE::SX11) // %sp
+ .addReg(VE::SX8) // %sl
+ .addMBB(sinkMBB);
+
+ BB = syscallMBB;
+
+ // Update machine-CFG edges
+ BB->addSuccessor(sinkMBB);
+
+ BuildMI(BB, dl, TII.get(VE::LDSri), VE::SX61)
+ .addReg(VE::SX14)
+ .addImm(0x18);
+ BuildMI(BB, dl, TII.get(VE::ORri), VE::SX62)
+ .addReg(VE::SX0)
+ .addImm(0);
+ BuildMI(BB, dl, TII.get(VE::LEAzzi), VE::SX63)
+ .addImm(0x13b);
+ BuildMI(BB, dl, TII.get(VE::SHMri))
+ .addReg(VE::SX61)
+ .addImm(0)
+ .addReg(VE::SX63);
+ BuildMI(BB, dl, TII.get(VE::SHMri))
+ .addReg(VE::SX61)
+ .addImm(8)
+ .addReg(VE::SX8);
+ BuildMI(BB, dl, TII.get(VE::SHMri))
+ .addReg(VE::SX61)
+ .addImm(16)
+ .addReg(VE::SX11);
+ BuildMI(BB, dl, TII.get(VE::MONC));
+
+ BuildMI(BB, dl, TII.get(VE::ORri), VE::SX0)
+ .addReg(VE::SX62)
+ .addImm(0);
+
+ MI.eraseFromParent(); // The pseudo instruction is gone now.
+ return true;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEInstrInfo.h b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrInfo.h
new file mode 100644
index 000000000000..6a26d0e95275
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrInfo.h
@@ -0,0 +1,48 @@
+//===-- VEInstrInfo.h - VE Instruction Information --------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the VE implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_VE_VEINSTRINFO_H
+#define LLVM_LIB_TARGET_VE_VEINSTRINFO_H
+
+#include "VERegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+
+#define GET_INSTRINFO_HEADER
+#include "VEGenInstrInfo.inc"
+
+namespace llvm {
+
+class VESubtarget;
+
+class VEInstrInfo : public VEGenInstrInfo {
+ const VERegisterInfo RI;
+ const VESubtarget &Subtarget;
+ virtual void anchor();
+
+public:
+ explicit VEInstrInfo(VESubtarget &ST);
+
+ /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
+ /// such, whenever a client has an instance of instruction info, it should
+ /// always be able to get register info as well (through this method).
+ ///
+ const VERegisterInfo &getRegisterInfo() const { return RI; }
+
+ // Lower pseudo instructions after register allocation.
+ bool expandPostRAPseudo(MachineInstr &MI) const override;
+
+ bool expandExtendStackPseudo(MachineInstr &MI) const;
+};
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrInfo.td
new file mode 100644
index 000000000000..dc671aaa3f8d
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrInfo.td
@@ -0,0 +1,288 @@
+//===-- VEInstrInfo.td - Target Description for VE Target -----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the VE instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Instruction format superclass
+//===----------------------------------------------------------------------===//
+
+include "VEInstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+// Feature predicates.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Instruction Pattern Stuff
+//===----------------------------------------------------------------------===//
+
+def simm7 : PatLeaf<(imm), [{ return isInt<7>(N->getSExtValue()); }]>;
+def simm32 : PatLeaf<(imm), [{ return isInt<32>(N->getSExtValue()); }]>;
+def uimm6 : PatLeaf<(imm), [{ return isUInt<6>(N->getZExtValue()); }]>;
+
+// ASX format of memory address
+def MEMri : Operand<iPTR> {
+ let PrintMethod = "printMemASXOperand";
+ let MIOperandInfo = (ops ptr_rc, i64imm);
+}
+
+// AS format of memory address
+def MEMASri : Operand<iPTR> {
+ let PrintMethod = "printMemASOperand";
+ let MIOperandInfo = (ops ptr_rc, i64imm);
+}
+
+// Branch targets have OtherVT type.
+def brtarget32 : Operand<OtherVT> {
+ let EncoderMethod = "getBranchTarget32OpValue";
+}
+
+def simm7Op64 : Operand<i64> {
+ let DecoderMethod = "DecodeSIMM7";
+}
+
+def simm32Op64 : Operand<i64> {
+ let DecoderMethod = "DecodeSIMM32";
+}
+
+def uimm6Op64 : Operand<i64> {
+ let DecoderMethod = "DecodeUIMM6";
+}
+
+// Operand for printing out a condition code.
+let PrintMethod = "printCCOperand" in
+ def CCOp : Operand<i32>;
+
+// These are target-independent nodes, but have target-specific formats.
+def SDT_SPCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i64>,
+ SDTCisVT<1, i64> ]>;
+def SDT_SPCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i64>,
+ SDTCisVT<1, i64> ]>;
+
+def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_SPCallSeqStart,
+ [SDNPHasChain, SDNPOutGlue]>;
+def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_SPCallSeqEnd,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
+// def SDT_SPCall : SDTypeProfile<0, -1, [SDTCisVT<0, i64>]>;
+
+def retflag : SDNode<"VEISD::RET_FLAG", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+//===----------------------------------------------------------------------===//
+// VE Flag Conditions
+//===----------------------------------------------------------------------===//
+
+// Note that these values must be kept in sync with the CCOp::CondCode enum
+// values.
+class CC_VAL<int N> : PatLeaf<(i32 N)>;
+def CC_IG : CC_VAL< 0>; // Greater
+def CC_IL : CC_VAL< 1>; // Less
+def CC_INE : CC_VAL< 2>; // Not Equal
+def CC_IEQ : CC_VAL< 3>; // Equal
+def CC_IGE : CC_VAL< 4>; // Greater or Equal
+def CC_ILE : CC_VAL< 5>; // Less or Equal
+def CC_AF : CC_VAL< 6>; // Always false
+def CC_G : CC_VAL< 7>; // Greater
+def CC_L : CC_VAL< 8>; // Less
+def CC_NE : CC_VAL< 9>; // Not Equal
+def CC_EQ : CC_VAL<10>; // Equal
+def CC_GE : CC_VAL<11>; // Greater or Equal
+def CC_LE : CC_VAL<12>; // Less or Equal
+def CC_NUM : CC_VAL<13>; // Number
+def CC_NAN : CC_VAL<14>; // NaN
+def CC_GNAN : CC_VAL<15>; // Greater or NaN
+def CC_LNAN : CC_VAL<16>; // Less or NaN
+def CC_NENAN : CC_VAL<17>; // Not Equal or NaN
+def CC_EQNAN : CC_VAL<18>; // Equal or NaN
+def CC_GENAN : CC_VAL<19>; // Greater or Equal or NaN
+def CC_LENAN : CC_VAL<20>; // Less or Equal or NaN
+def CC_AT : CC_VAL<21>; // Always true
+
+//===----------------------------------------------------------------------===//
+// VE Multiclasses for common instruction formats
+//===----------------------------------------------------------------------===//
+
+multiclass RMm<string opcStr, bits<8>opc,
+ RegisterClass RC, ValueType Ty, Operand immOp, Operand immOp2> {
+ def rri : RM<
+ opc, (outs RC:$sx), (ins RC:$sy, RC:$sz, immOp2:$imm32),
+ !strconcat(opcStr, " $sx, ${imm32}($sy, ${sz})")> {
+ let cy = 1;
+ let cz = 1;
+ let hasSideEffects = 0;
+ }
+ def zzi : RM<
+ opc, (outs RC:$sx), (ins immOp2:$imm32),
+ !strconcat(opcStr, " $sx, $imm32")> {
+ let cy = 0;
+ let sy = 0;
+ let cz = 0;
+ let sz = 0;
+ let hasSideEffects = 0;
+ }
+}
+
+// Multiclass for RR type instructions
+
+multiclass RRmrr<string opcStr, bits<8>opc,
+ RegisterClass RCo, ValueType Tyo,
+ RegisterClass RCi, ValueType Tyi> {
+ def rr : RR<opc, (outs RCo:$sx), (ins RCi:$sy, RCi:$sz),
+ !strconcat(opcStr, " $sx, $sy, $sz")>
+ { let cy = 1; let cz = 1; let hasSideEffects = 0; }
+}
+
+multiclass RRmri<string opcStr, bits<8>opc,
+ RegisterClass RCo, ValueType Tyo,
+ RegisterClass RCi, ValueType Tyi, Operand immOp> {
+ // VE calculates (OpNode $sy, $sz), but llvm requires to have immediate
+ // in RHS, so we use following definition.
+ def ri : RR<opc, (outs RCo:$sx), (ins RCi:$sz, immOp:$sy),
+ !strconcat(opcStr, " $sx, $sy, $sz")>
+ { let cy = 0; let cz = 1; let hasSideEffects = 0; }
+}
+
+multiclass RRmiz<string opcStr, bits<8>opc,
+ RegisterClass RCo, ValueType Tyo,
+ RegisterClass RCi, ValueType Tyi, Operand immOp> {
+ def zi : RR<opc, (outs RCo:$sx), (ins immOp:$sy),
+ !strconcat(opcStr, " $sx, $sy")>
+ { let cy = 0; let cz = 0; let sz = 0; let hasSideEffects = 0; }
+}
+
+multiclass RRNDmrm<string opcStr, bits<8>opc,
+ RegisterClass RCo, ValueType Tyo,
+ RegisterClass RCi, ValueType Tyi, Operand immOp2> {
+ def rm0 : RR<opc, (outs RCo:$sx), (ins RCi:$sy, immOp2:$sz),
+ !strconcat(opcStr, " $sx, $sy, (${sz})0")> {
+ let cy = 1;
+ let cz = 0;
+ let sz{6} = 1;
+ // (guess) tblgen conservatively assumes hasSideEffects when
+ // it fails to infer from a pattern.
+ let hasSideEffects = 0;
+ }
+}
+
+// Used by add, mul, div, and similar commutative instructions
+// The order of operands are "$sx, $sy, $sz"
+
+multiclass RRm<string opcStr, bits<8>opc,
+ RegisterClass RC, ValueType Ty, Operand immOp, Operand immOp2> :
+ RRmrr<opcStr, opc, RC, Ty, RC, Ty>,
+ RRmri<opcStr, opc, RC, Ty, RC, Ty, immOp>,
+ RRmiz<opcStr, opc, RC, Ty, RC, Ty, immOp>,
+ RRNDmrm<opcStr, opc, RC, Ty, RC, Ty, immOp2>;
+
+// Branch multiclass
+let isBranch = 1, isTerminator = 1, hasDelaySlot = 1 in
+multiclass BCRm<string opcStr, string opcStrAt, bits<8> opc,
+ RegisterClass RC, ValueType Ty, Operand immOp, Operand immOp2> {
+ def rr : CF<
+ opc, (outs),
+ (ins CCOp:$cf, RC:$sy, RC:$sz, brtarget32:$imm32),
+ !strconcat(opcStr, " $sy, $sz, $imm32")> {
+ let cy = 1;
+ let cz = 1;
+ let hasSideEffects = 0;
+ }
+}
+
+
+//===----------------------------------------------------------------------===//
+// Instructions
+//===----------------------------------------------------------------------===//
+
+// LEA and LEASL instruction (load 32 bit imm to low or high part)
+let cx = 0 in
+defm LEA : RMm<"lea", 0x06, I64, i64, simm7Op64, simm32Op64>;
+let cx = 1 in
+defm LEASL : RMm<"lea.sl", 0x06, I64, i64, simm7Op64, simm32Op64>;
+
+// 5.3.2.2. Fixed-Point Arithmetic Operation Instructions
+
+// ADX instruction
+let cx = 0 in
+defm ADX : RRm<"adds.l", 0x59, I64, i64, simm7Op64, uimm6Op64>;
+
+// 5.3.2.3. Logical Arithmetic Operation Instructions
+
+let cx = 0 in {
+ defm AND : RRm<"and", 0x44, I64, i64, simm7Op64, uimm6Op64>;
+ defm OR : RRm<"or", 0x45, I64, i64, simm7Op64, uimm6Op64>;
+}
+
+// Load and Store instructions
+// As 1st step, only uses sz and imm32 to represent $addr
+let mayLoad = 1, hasSideEffects = 0 in {
+let cy = 0, sy = 0, cz = 1 in {
+let cx = 0 in
+def LDSri : RM<
+ 0x01, (outs I64:$sx), (ins MEMri:$addr),
+ "ld $sx, $addr">;
+}
+}
+
+let mayStore = 1, hasSideEffects = 0 in {
+let cx = 0, cy = 0, sy = 0, cz = 1 in {
+def STSri : RM<
+ 0x11, (outs), (ins MEMri:$addr, I64:$sx),
+ "st $sx, $addr">;
+}
+}
+
+// Return instruction is also a special case of jump.
+let cx = 0, cx2 = 0, bpf = 0 /* NONE */, cf = 15 /* AT */, cy = 0, sy = 0,
+ cz = 1, sz = 0x10 /* SX10 */, imm32 = 0, Uses = [SX10],
+ isReturn = 1, isTerminator = 1, hasDelaySlot = 1, isBarrier = 1,
+ isCodeGenOnly = 1, hasSideEffects = 0 in
+def RET : CF<
+ 0x19, (outs), (ins),
+ "b.l (,%lr)",
+ [(retflag)]>;
+
+// Branch instruction
+let cx = 0, cx2 = 0, bpf = 0 /* NONE */ in
+defm BCRL : BCRm<"br${cf}.l", "br.l", 0x18, I64, i64, simm7Op64, uimm6Op64>;
+
+let cx = 0, cy = 0, cz = 1, hasSideEffects = 0 in {
+let sy = 3 in
+def SHMri : RM<
+ 0x31, (outs), (ins MEMASri:$addr, I64:$sx),
+ "shm.l $sx, $addr">;
+}
+
+let cx = 0, sx = 0, cy = 0, sy = 0, cz = 0, sz = 0, hasSideEffects = 0 in
+def MONC : RR<
+ 0x3F, (outs), (ins),
+ "monc">;
+
+//===----------------------------------------------------------------------===//
+// Pseudo Instructions
+//===----------------------------------------------------------------------===//
+
+let Defs = [SX11], Uses = [SX11], hasSideEffects = 0 in {
+def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i64imm:$amt, i64imm:$amt2),
+ "# ADJCALLSTACKDOWN $amt, $amt2",
+ [(callseq_start timm:$amt, timm:$amt2)]>;
+def ADJCALLSTACKUP : Pseudo<(outs), (ins i64imm:$amt1, i64imm:$amt2),
+ "# ADJCALLSTACKUP $amt1",
+ [(callseq_end timm:$amt1, timm:$amt2)]>;
+}
+
+let Defs = [SX8], Uses = [SX8, SX11], hasSideEffects = 0 in
+def EXTEND_STACK : Pseudo<(outs), (ins),
+ "# EXTEND STACK",
+ []>;
+let hasSideEffects = 0 in
+def EXTEND_STACK_GUARD : Pseudo<(outs), (ins),
+ "# EXTEND STACK GUARD",
+ []>;
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEMCInstLower.cpp b/contrib/llvm-project/llvm/lib/Target/VE/VEMCInstLower.cpp
new file mode 100644
index 000000000000..6c8fc3536c34
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VEMCInstLower.cpp
@@ -0,0 +1,69 @@
+//===-- VEMCInstLower.cpp - Convert VE MachineInstr to MCInst -------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains code to lower VE MachineInstrs to their corresponding
+// MCInst records.
+//
+//===----------------------------------------------------------------------===//
+
+#include "VE.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/IR/Mangler.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+
+using namespace llvm;
+
+static MCOperand LowerSymbolOperand(const MachineInstr *MI,
+ const MachineOperand &MO,
+ const MCSymbol *Symbol, AsmPrinter &AP) {
+
+ const MCSymbolRefExpr *MCSym = MCSymbolRefExpr::create(Symbol, AP.OutContext);
+ return MCOperand::createExpr(MCSym);
+}
+
+static MCOperand LowerOperand(const MachineInstr *MI, const MachineOperand &MO,
+ AsmPrinter &AP) {
+ switch (MO.getType()) {
+ default:
+ report_fatal_error("unsupported operand type");
+
+ case MachineOperand::MO_Register:
+ if (MO.isImplicit())
+ break;
+ return MCOperand::createReg(MO.getReg());
+
+ case MachineOperand::MO_Immediate:
+ return MCOperand::createImm(MO.getImm());
+
+ case MachineOperand::MO_MachineBasicBlock:
+ return LowerSymbolOperand(MI, MO, MO.getMBB()->getSymbol(), AP);
+
+ case MachineOperand::MO_RegisterMask:
+ break;
+ }
+ return MCOperand();
+}
+
+void llvm::LowerVEMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
+ AsmPrinter &AP) {
+ OutMI.setOpcode(MI->getOpcode());
+
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ MCOperand MCOp = LowerOperand(MI, MO, AP);
+
+ if (MCOp.isValid())
+ OutMI.addOperand(MCOp);
+ }
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VERegisterInfo.cpp b/contrib/llvm-project/llvm/lib/Target/VE/VERegisterInfo.cpp
new file mode 100644
index 000000000000..e1ff614abc20
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VERegisterInfo.cpp
@@ -0,0 +1,133 @@
+//===-- VERegisterInfo.cpp - VE Register Information ----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the VE implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "VERegisterInfo.h"
+#include "VE.h"
+#include "VESubtarget.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+
+#define GET_REGINFO_TARGET_DESC
+#include "VEGenRegisterInfo.inc"
+
+// VE uses %s10 == %lp to keep return address
+VERegisterInfo::VERegisterInfo() : VEGenRegisterInfo(VE::SX10) {}
+
+const MCPhysReg *
+VERegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+ return CSR_SaveList;
+}
+
+const uint32_t *VERegisterInfo::getCallPreservedMask(const MachineFunction &MF,
+ CallingConv::ID CC) const {
+ return CSR_RegMask;
+}
+
+const uint32_t *VERegisterInfo::getNoPreservedMask() const {
+ return CSR_NoRegs_RegMask;
+}
+
+BitVector VERegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+ BitVector Reserved(getNumRegs());
+ Reserved.set(VE::SX8); // stack limit
+ Reserved.set(VE::SX9); // frame pointer
+ Reserved.set(VE::SX10); // link register (return address)
+ Reserved.set(VE::SX11); // stack pointer
+
+ Reserved.set(VE::SX12); // outer register
+ Reserved.set(VE::SX13); // id register for dynamic linker
+
+ Reserved.set(VE::SX14); // thread pointer
+ Reserved.set(VE::SX15); // global offset table register
+ Reserved.set(VE::SX16); // procedure linkage table register
+ Reserved.set(VE::SX17); // linkage-area register
+
+ // sx18-sx33 are callee-saved registers
+ // sx34-sx63 are temporary registers
+
+ return Reserved;
+}
+
+bool VERegisterInfo::isConstantPhysReg(unsigned PhysReg) const { return false; }
+
+const TargetRegisterClass *
+VERegisterInfo::getPointerRegClass(const MachineFunction &MF,
+ unsigned Kind) const {
+ return &VE::I64RegClass;
+}
+
+static void replaceFI(MachineFunction &MF, MachineBasicBlock::iterator II,
+ MachineInstr &MI, const DebugLoc &dl,
+ unsigned FIOperandNum, int Offset, unsigned FramePtr) {
+ // Replace frame index with a frame pointer reference directly.
+ // VE has 32 bit offset field, so no need to expand a target instruction.
+ // Directly encode it.
+ MI.getOperand(FIOperandNum).ChangeToRegister(FramePtr, false);
+ MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
+}
+
+void VERegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, unsigned FIOperandNum,
+ RegScavenger *RS) const {
+ assert(SPAdj == 0 && "Unexpected");
+
+ MachineInstr &MI = *II;
+ DebugLoc dl = MI.getDebugLoc();
+ int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
+ MachineFunction &MF = *MI.getParent()->getParent();
+ const VEFrameLowering *TFI = getFrameLowering(MF);
+
+ unsigned FrameReg;
+ int Offset;
+ Offset = TFI->getFrameIndexReference(MF, FrameIndex, FrameReg);
+
+ Offset += MI.getOperand(FIOperandNum + 1).getImm();
+
+ replaceFI(MF, II, MI, dl, FIOperandNum, Offset, FrameReg);
+}
+
+Register VERegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+ return VE::SX9;
+}
+
+// VE has no architectural need for stack realignment support,
+// except that LLVM unfortunately currently implements overaligned
+// stack objects by depending upon stack realignment support.
+// If that ever changes, this can probably be deleted.
+bool VERegisterInfo::canRealignStack(const MachineFunction &MF) const {
+ if (!TargetRegisterInfo::canRealignStack(MF))
+ return false;
+
+ // VE always has a fixed frame pointer register, so don't need to
+ // worry about needing to reserve it. [even if we don't have a frame
+ // pointer for our frame, it still cannot be used for other things,
+ // or register window traps will be SADNESS.]
+
+ // If there's a reserved call frame, we can use VE to access locals.
+ if (getFrameLowering(MF)->hasReservedCallFrame(MF))
+ return true;
+
+ // Otherwise, we'd need a base pointer, but those aren't implemented
+ // for VE at the moment.
+
+ return false;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VERegisterInfo.h b/contrib/llvm-project/llvm/lib/Target/VE/VERegisterInfo.h
new file mode 100644
index 000000000000..9cb475f5e174
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VERegisterInfo.h
@@ -0,0 +1,49 @@
+//===-- VERegisterInfo.h - VE Register Information Impl ---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the VE implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_VE_VEREGISTERINFO_H
+#define LLVM_LIB_TARGET_VE_VEREGISTERINFO_H
+
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+
+#define GET_REGINFO_HEADER
+#include "VEGenRegisterInfo.inc"
+
+namespace llvm {
+struct VERegisterInfo : public VEGenRegisterInfo {
+public:
+ VERegisterInfo();
+
+ /// Code Generation virtual methods...
+ const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
+ const uint32_t *getCallPreservedMask(const MachineFunction &MF,
+ CallingConv::ID CC) const override;
+ const uint32_t *getNoPreservedMask() const override;
+
+ BitVector getReservedRegs(const MachineFunction &MF) const override;
+ bool isConstantPhysReg(unsigned PhysReg) const override;
+
+ const TargetRegisterClass *getPointerRegClass(const MachineFunction &MF,
+ unsigned Kind) const override;
+
+ void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
+ unsigned FIOperandNum,
+ RegScavenger *RS = nullptr) const override;
+
+ Register getFrameRegister(const MachineFunction &MF) const override;
+
+ bool canRealignStack(const MachineFunction &MF) const override;
+};
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VERegisterInfo.td b/contrib/llvm-project/llvm/lib/Target/VE/VERegisterInfo.td
new file mode 100644
index 000000000000..ef5b9c09705a
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VERegisterInfo.td
@@ -0,0 +1,37 @@
+//===-- VERegisterInfo.td - VE Register defs ---------------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Declarations that describe the VE register file
+//===----------------------------------------------------------------------===//
+
+class VEReg<bits<7> Enc, string n> : Register<n> {
+ let HWEncoding{15-7} = 0;
+ let HWEncoding{6-0} = Enc;
+ let Namespace = "VE";
+}
+
+// Registers are identified with 7-bit ID numbers.
+// R - 64-bit integer or floating-point registers
+class R<bits<7> Enc, string n, list<Register> subregs = [],
+ list<Register> aliases = []>: VEReg<Enc, n> {
+ let SubRegs = subregs;
+ let Aliases = aliases;
+}
+
+// Generic integer registers - 64 bits wide
+foreach I = 0-63 in
+ def SX#I : R<I, "S"#I, []>,
+ DwarfRegNum<[I]>;
+
+// Register classes.
+//
+// The register order is defined in terms of the preferred
+// allocation order.
+def I64 : RegisterClass<"VE", [i64], 64,
+ (sequence "SX%u", 0, 63)>;
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VESubtarget.cpp b/contrib/llvm-project/llvm/lib/Target/VE/VESubtarget.cpp
new file mode 100644
index 000000000000..861e88cdb583
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VESubtarget.cpp
@@ -0,0 +1,99 @@
+//===-- VESubtarget.cpp - VE Subtarget Information ------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the VE specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "VESubtarget.h"
+#include "VE.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/TargetRegistry.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "ve-subtarget"
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "VEGenSubtargetInfo.inc"
+
+void VESubtarget::anchor() {}
+
+VESubtarget &VESubtarget::initializeSubtargetDependencies(StringRef CPU,
+ StringRef FS) {
+ // Determine default and user specified characteristics
+ std::string CPUName = CPU;
+ if (CPUName.empty())
+ CPUName = "ve";
+
+ // Parse features string.
+ ParseSubtargetFeatures(CPUName, FS);
+
+ return *this;
+}
+
+VESubtarget::VESubtarget(const Triple &TT, const std::string &CPU,
+ const std::string &FS, const TargetMachine &TM)
+ : VEGenSubtargetInfo(TT, CPU, FS), TargetTriple(TT),
+ InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this),
+ FrameLowering(*this) {}
+
+int VESubtarget::getAdjustedFrameSize(int frameSize) const {
+
+ // VE stack frame:
+ //
+ // +----------------------------------------+
+ // | Locals and temporaries |
+ // +----------------------------------------+
+ // | Parameter area for callee |
+ // 176(fp) | |
+ // +----------------------------------------+
+ // | Register save area (RSA) for callee |
+ // | |
+ // 16(fp) | 20 * 8 bytes |
+ // +----------------------------------------+
+ // 8(fp) | Return address |
+ // +----------------------------------------+
+ // 0(fp) | Frame pointer of caller |
+ // --------+----------------------------------------+--------
+ // | Locals and temporaries for callee |
+ // +----------------------------------------+
+ // | Parameter area for callee of callee |
+ // +----------------------------------------+
+ // 16(sp) | RSA for callee of callee |
+ // +----------------------------------------+
+ // 8(sp) | Return address |
+ // +----------------------------------------+
+ // 0(sp) | Frame pointer of callee |
+ // +----------------------------------------+
+
+ // RSA frame:
+ // +----------------------------------------------+
+ // 168(fp) | %s33 |
+ // +----------------------------------------------+
+ // | %s19...%s32 |
+ // +----------------------------------------------+
+ // 48(fp) | %s18 |
+ // +----------------------------------------------+
+ // 40(fp) | Linkage area register (%s17) |
+ // +----------------------------------------------+
+ // 32(fp) | Procedure linkage table register (%plt=%s16) |
+ // +----------------------------------------------+
+ // 24(fp) | Global offset table register (%got=%s15) |
+ // +----------------------------------------------+
+ // 16(fp) | Thread pointer register (%tp=%s14) |
+ // +----------------------------------------------+
+
+ frameSize += 176; // for RSA, RA, and FP
+ frameSize = alignTo(frameSize, 16); // requires 16 bytes alignment
+
+ return frameSize;
+}
+
+bool VESubtarget::enableMachineScheduler() const { return true; }
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VESubtarget.h b/contrib/llvm-project/llvm/lib/Target/VE/VESubtarget.h
new file mode 100644
index 000000000000..e9637cc16023
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VESubtarget.h
@@ -0,0 +1,73 @@
+//===-- VESubtarget.h - Define Subtarget for the VE -------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the VE specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_VE_VESUBTARGET_H
+#define LLVM_LIB_TARGET_VE_VESUBTARGET_H
+
+#include "VEFrameLowering.h"
+#include "VEISelLowering.h"
+#include "VEInstrInfo.h"
+#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include <string>
+
+#define GET_SUBTARGETINFO_HEADER
+#include "VEGenSubtargetInfo.inc"
+
+namespace llvm {
+class StringRef;
+
+class VESubtarget : public VEGenSubtargetInfo {
+ Triple TargetTriple;
+ virtual void anchor();
+
+ VEInstrInfo InstrInfo;
+ VETargetLowering TLInfo;
+ SelectionDAGTargetInfo TSInfo;
+ VEFrameLowering FrameLowering;
+
+public:
+ VESubtarget(const Triple &TT, const std::string &CPU, const std::string &FS,
+ const TargetMachine &TM);
+
+ const VEInstrInfo *getInstrInfo() const override { return &InstrInfo; }
+ const TargetFrameLowering *getFrameLowering() const override {
+ return &FrameLowering;
+ }
+ const VERegisterInfo *getRegisterInfo() const override {
+ return &InstrInfo.getRegisterInfo();
+ }
+ const VETargetLowering *getTargetLowering() const override { return &TLInfo; }
+ const SelectionDAGTargetInfo *getSelectionDAGInfo() const override {
+ return &TSInfo;
+ }
+
+ bool enableMachineScheduler() const override;
+
+ /// ParseSubtargetFeatures - Parses features string setting specified
+ /// subtarget options. Definition of function is auto generated by tblgen.
+ void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+ VESubtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS);
+
+ /// Given a actual stack size as determined by FrameInfo, this function
+ /// returns adjusted framesize which includes space for register window
+ /// spills and arguments.
+ int getAdjustedFrameSize(int stackSize) const;
+
+ bool isTargetLinux() const { return TargetTriple.isOSLinux(); }
+};
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VETargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/VE/VETargetMachine.cpp
new file mode 100644
index 000000000000..46f5c0dc1805
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VETargetMachine.cpp
@@ -0,0 +1,108 @@
+//===-- VETargetMachine.cpp - Define TargetMachine for VE -----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "VETargetMachine.h"
+#include "VE.h"
+#include "VETargetTransformInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/Support/TargetRegistry.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "ve"
+
+extern "C" void LLVMInitializeVETarget() {
+ // Register the target.
+ RegisterTargetMachine<VETargetMachine> X(getTheVETarget());
+}
+
+static std::string computeDataLayout(const Triple &T) {
+ // Aurora VE is little endian
+ std::string Ret = "e";
+
+ // Use ELF mangling
+ Ret += "-m:e";
+
+ // Alignments for 64 bit integers.
+ Ret += "-i64:64";
+
+ // VE supports 32 bit and 64 bits integer on registers
+ Ret += "-n32:64";
+
+ // Stack alignment is 64 bits
+ Ret += "-S64";
+
+ return Ret;
+}
+
+static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) {
+ if (!RM.hasValue())
+ return Reloc::Static;
+ return *RM;
+}
+
+class VEELFTargetObjectFile : public TargetLoweringObjectFileELF {
+ void Initialize(MCContext &Ctx, const TargetMachine &TM) override {
+ TargetLoweringObjectFileELF::Initialize(Ctx, TM);
+ InitializeELF(TM.Options.UseInitArray);
+ }
+};
+
+static std::unique_ptr<TargetLoweringObjectFile> createTLOF() {
+ return std::make_unique<VEELFTargetObjectFile>();
+}
+
+/// Create an Aurora VE architecture model
+VETargetMachine::VETargetMachine(const Target &T, const Triple &TT,
+ StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
+ Optional<Reloc::Model> RM,
+ Optional<CodeModel::Model> CM,
+ CodeGenOpt::Level OL, bool JIT)
+ : LLVMTargetMachine(T, computeDataLayout(TT), TT, CPU, FS, Options,
+ getEffectiveRelocModel(RM),
+ getEffectiveCodeModel(CM, CodeModel::Small), OL),
+ TLOF(createTLOF()), Subtarget(TT, CPU, FS, *this) {
+ initAsmInfo();
+}
+
+VETargetMachine::~VETargetMachine() {}
+
+TargetTransformInfo VETargetMachine::getTargetTransformInfo(const Function &F) {
+ return TargetTransformInfo(VETTIImpl(this, F));
+}
+
+namespace {
+/// VE Code Generator Pass Configuration Options.
+class VEPassConfig : public TargetPassConfig {
+public:
+ VEPassConfig(VETargetMachine &TM, PassManagerBase &PM)
+ : TargetPassConfig(TM, PM) {}
+
+ VETargetMachine &getVETargetMachine() const {
+ return getTM<VETargetMachine>();
+ }
+
+ bool addInstSelector() override;
+};
+} // namespace
+
+TargetPassConfig *VETargetMachine::createPassConfig(PassManagerBase &PM) {
+ return new VEPassConfig(*this, PM);
+}
+
+bool VEPassConfig::addInstSelector() {
+ addPass(createVEISelDag(getVETargetMachine()));
+ return false;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VETargetMachine.h b/contrib/llvm-project/llvm/lib/Target/VE/VETargetMachine.h
new file mode 100644
index 000000000000..3191d59ec1c8
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VETargetMachine.h
@@ -0,0 +1,57 @@
+//===-- VETargetMachine.h - Define TargetMachine for VE ---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the VE specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_VE_VETARGETMACHINE_H
+#define LLVM_LIB_TARGET_VE_VETARGETMACHINE_H
+
+#include "VEInstrInfo.h"
+#include "VESubtarget.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+class VETargetMachine : public LLVMTargetMachine {
+ std::unique_ptr<TargetLoweringObjectFile> TLOF;
+ VESubtarget Subtarget;
+ // Hold Strings that can be free'd all together with VETargetMachine
+ // e.g.: "GCC_except_tableXX" string.
+ std::list<std::string> StrList;
+
+public:
+ VETargetMachine(const Target &T, const Triple &TT, StringRef CPU,
+ StringRef FS, const TargetOptions &Options,
+ Optional<Reloc::Model> RM, Optional<CodeModel::Model> CM,
+ CodeGenOpt::Level OL, bool JIT);
+ ~VETargetMachine() override;
+
+ const VESubtarget *getSubtargetImpl() const { return &Subtarget; }
+ const VESubtarget *getSubtargetImpl(const Function &) const override {
+ return &Subtarget;
+ }
+ std::list<std::string> *getStrList() const {
+ return const_cast<std::list<std::string> *>(&StrList);
+ }
+
+ // Pass Pipeline Configuration
+ TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
+ TargetLoweringObjectFile *getObjFileLowering() const override {
+ return TLOF.get();
+ }
+
+ bool isMachineVerifierClean() const override { return false; }
+
+ TargetTransformInfo getTargetTransformInfo(const Function &F) override;
+};
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VETargetTransformInfo.h b/contrib/llvm-project/llvm/lib/Target/VE/VETargetTransformInfo.h
new file mode 100644
index 000000000000..c267c4d9a578
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VETargetTransformInfo.h
@@ -0,0 +1,50 @@
+//===- VETargetTransformInfo.h - VE specific TTI ------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file a TargetTransformInfo::Concept conforming object specific to the
+/// VE target machine. It uses the target's detailed information to
+/// provide more precise answers to certain TTI queries, while letting the
+/// target independent and default TTI implementations handle the rest.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_VE_VETARGETTRANSFORMINFO_H
+#define LLVM_LIB_TARGET_VE_VETARGETTRANSFORMINFO_H
+
+#include "VE.h"
+#include "VETargetMachine.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/BasicTTIImpl.h"
+
+namespace llvm {
+
+class VETTIImpl : public BasicTTIImplBase<VETTIImpl> {
+ using BaseT = BasicTTIImplBase<VETTIImpl>;
+ friend BaseT;
+
+ const VESubtarget *ST;
+ const VETargetLowering *TLI;
+
+ const VESubtarget *getST() const { return ST; }
+ const VETargetLowering *getTLI() const { return TLI; }
+
+public:
+ explicit VETTIImpl(const VETargetMachine *TM, const Function &F)
+ : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
+ TLI(ST->getTargetLowering()) {}
+
+ unsigned getNumberOfRegisters(unsigned ClassID) const { return 64; }
+
+ unsigned getRegisterBitWidth(bool Vector) const { return 64; }
+
+ unsigned getMinVectorRegisterBitWidth() const { return 64; }
+};
+
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_VE_VETARGETTRANSFORMINFO_H
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp
index 53a96fd6a97d..ea99cee3eb3b 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp
@@ -712,6 +712,42 @@ public:
return expect(AsmToken::EndOfStatement, "EOL");
}
+ if (DirectiveID.getString() == ".export_name") {
+ auto SymName = expectIdent();
+ if (SymName.empty())
+ return true;
+ if (expect(AsmToken::Comma, ","))
+ return true;
+ auto ExportName = expectIdent();
+ auto WasmSym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(SymName));
+ WasmSym->setExportName(ExportName);
+ TOut.emitExportName(WasmSym, ExportName);
+ }
+
+ if (DirectiveID.getString() == ".import_module") {
+ auto SymName = expectIdent();
+ if (SymName.empty())
+ return true;
+ if (expect(AsmToken::Comma, ","))
+ return true;
+ auto ImportModule = expectIdent();
+ auto WasmSym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(SymName));
+ WasmSym->setImportModule(ImportModule);
+ TOut.emitImportModule(WasmSym, ImportModule);
+ }
+
+ if (DirectiveID.getString() == ".import_name") {
+ auto SymName = expectIdent();
+ if (SymName.empty())
+ return true;
+ if (expect(AsmToken::Comma, ","))
+ return true;
+ auto ImportName = expectIdent();
+ auto WasmSym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(SymName));
+ WasmSym->setImportName(ImportName);
+ TOut.emitImportName(WasmSym, ImportName);
+ }
+
if (DirectiveID.getString() == ".eventtype") {
auto SymName = expectIdent();
if (SymName.empty())
@@ -863,7 +899,7 @@ public:
} // end anonymous namespace
// Force static initialization.
-extern "C" void LLVMInitializeWebAssemblyAsmParser() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeWebAssemblyAsmParser() {
RegisterMCAsmParser<WebAssemblyAsmParser> X(getTheWebAssemblyTarget32());
RegisterMCAsmParser<WebAssemblyAsmParser> Y(getTheWebAssemblyTarget64());
}
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp
index 9a9c31cff2d5..a8cb5d18537c 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp
@@ -45,11 +45,9 @@ class WebAssemblyDisassembler final : public MCDisassembler {
DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
ArrayRef<uint8_t> Bytes, uint64_t Address,
- raw_ostream &VStream,
raw_ostream &CStream) const override;
DecodeStatus onSymbolStart(StringRef Name, uint64_t &Size,
ArrayRef<uint8_t> Bytes, uint64_t Address,
- raw_ostream &VStream,
raw_ostream &CStream) const override;
public:
@@ -66,7 +64,8 @@ static MCDisassembler *createWebAssemblyDisassembler(const Target &T,
return new WebAssemblyDisassembler(STI, Ctx, std::move(MCII));
}
-extern "C" void LLVMInitializeWebAssemblyDisassembler() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void
+LLVMInitializeWebAssemblyDisassembler() {
// Register the disassembler for each target.
TargetRegistry::RegisterMCDisassembler(getTheWebAssemblyTarget32(),
createWebAssemblyDisassembler);
@@ -123,7 +122,7 @@ bool parseImmediate(MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes) {
MCDisassembler::DecodeStatus WebAssemblyDisassembler::onSymbolStart(
StringRef Name, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t Address,
- raw_ostream &VStream, raw_ostream &CStream) const {
+ raw_ostream &CStream) const {
Size = 0;
if (Address == 0) {
// Start of a code section: we're parsing only the function count.
@@ -158,7 +157,7 @@ MCDisassembler::DecodeStatus WebAssemblyDisassembler::onSymbolStart(
MCDisassembler::DecodeStatus WebAssemblyDisassembler::getInstruction(
MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t /*Address*/,
- raw_ostream & /*OS*/, raw_ostream &CS) const {
+ raw_ostream &CS) const {
CommentStream = &CS;
Size = 0;
int Opc = nextByte(Bytes, Size);
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp
index 221ac17b8336..b262e06e55e7 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp
@@ -44,11 +44,12 @@ void WebAssemblyInstPrinter::printRegName(raw_ostream &OS,
OS << "$" << RegNo;
}
-void WebAssemblyInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
+void WebAssemblyInstPrinter::printInst(const MCInst *MI, uint64_t Address,
StringRef Annot,
- const MCSubtargetInfo &STI) {
+ const MCSubtargetInfo &STI,
+ raw_ostream &OS) {
// Print the instruction (this uses the AsmStrings from the .td files).
- printInstruction(MI, OS);
+ printInstruction(MI, Address, OS);
// Print any additional variadic operands.
const MCInstrDesc &Desc = MII.get(MI->getOpcode());
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.h b/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.h
index cf37778099a0..bee85507f044 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.h
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.h
@@ -37,8 +37,8 @@ public:
const MCRegisterInfo &MRI);
void printRegName(raw_ostream &OS, unsigned RegNo) const override;
- void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot,
- const MCSubtargetInfo &STI) override;
+ void printInst(const MCInst *MI, uint64_t Address, StringRef Annot,
+ const MCSubtargetInfo &STI, raw_ostream &OS) override;
// Used by tblegen code.
void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
@@ -49,7 +49,7 @@ public:
raw_ostream &O);
// Autogenerated by tblgen.
- void printInstruction(const MCInst *MI, raw_ostream &O);
+ void printInstruction(const MCInst *MI, uint64_t Address, raw_ostream &O);
static const char *getRegisterName(unsigned RegNo);
};
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp
index 8f6531563e1b..c3d259e6ff20 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp
@@ -21,7 +21,8 @@ using namespace llvm;
WebAssemblyMCAsmInfo::~WebAssemblyMCAsmInfo() = default; // anchor.
-WebAssemblyMCAsmInfo::WebAssemblyMCAsmInfo(const Triple &T) {
+WebAssemblyMCAsmInfo::WebAssemblyMCAsmInfo(const Triple &T,
+ const MCTargetOptions &Options) {
CodePointerSize = CalleeSaveStackSlotSize = T.isArch64Bit() ? 8 : 4;
// TODO: What should MaxInstLength be?
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.h b/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.h
index 9efbbf881f59..5ba4dcf8c4b3 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.h
@@ -22,7 +22,8 @@ class Triple;
class WebAssemblyMCAsmInfo final : public MCAsmInfoWasm {
public:
- explicit WebAssemblyMCAsmInfo(const Triple &T);
+ explicit WebAssemblyMCAsmInfo(const Triple &T,
+ const MCTargetOptions &Options);
~WebAssemblyMCAsmInfo() override;
};
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
index 9c8ca1f13b18..027e5408c633 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
@@ -35,8 +35,9 @@ using namespace llvm;
#include "WebAssemblyGenRegisterInfo.inc"
static MCAsmInfo *createMCAsmInfo(const MCRegisterInfo & /*MRI*/,
- const Triple &TT) {
- return new WebAssemblyMCAsmInfo(TT);
+ const Triple &TT,
+ const MCTargetOptions &Options) {
+ return new WebAssemblyMCAsmInfo(TT, Options);
}
static MCInstrInfo *createMCInstrInfo() {
@@ -95,7 +96,7 @@ static MCTargetStreamer *createNullTargetStreamer(MCStreamer &S) {
}
// Force static initialization.
-extern "C" void LLVMInitializeWebAssemblyTargetMC() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeWebAssemblyTargetMC() {
for (Target *T :
{&getTheWebAssemblyTarget32(), &getTheWebAssemblyTarget64()}) {
// Register the MC asm info.
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp
index 40926201931a..7c21ed5f974e 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp
@@ -94,6 +94,12 @@ void WebAssemblyTargetAsmStreamer::emitImportName(const MCSymbolWasm *Sym,
<< ImportName << '\n';
}
+void WebAssemblyTargetAsmStreamer::emitExportName(const MCSymbolWasm *Sym,
+ StringRef ExportName) {
+ OS << "\t.export_name\t" << Sym->getName() << ", "
+ << ExportName << '\n';
+}
+
void WebAssemblyTargetAsmStreamer::emitIndIdx(const MCExpr *Value) {
OS << "\t.indidx \t" << *Value << '\n';
}
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h b/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h
index 0164f8e572ef..9aee1a06c956 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h
@@ -48,6 +48,9 @@ public:
/// .import_name
virtual void emitImportName(const MCSymbolWasm *Sym,
StringRef ImportName) = 0;
+ /// .export_name
+ virtual void emitExportName(const MCSymbolWasm *Sym,
+ StringRef ExportName) = 0;
protected:
void emitValueType(wasm::ValType Type);
@@ -68,6 +71,7 @@ public:
void emitEventType(const MCSymbolWasm *Sym) override;
void emitImportModule(const MCSymbolWasm *Sym, StringRef ImportModule) override;
void emitImportName(const MCSymbolWasm *Sym, StringRef ImportName) override;
+ void emitExportName(const MCSymbolWasm *Sym, StringRef ExportName) override;
};
/// This part is for Wasm object output
@@ -85,6 +89,8 @@ public:
StringRef ImportModule) override {}
void emitImportName(const MCSymbolWasm *Sym,
StringRef ImportName) override {}
+ void emitExportName(const MCSymbolWasm *Sym,
+ StringRef ExportName) override {}
};
/// This part is for null output
@@ -101,6 +107,7 @@ public:
void emitEventType(const MCSymbolWasm *) override {}
void emitImportModule(const MCSymbolWasm *, StringRef) override {}
void emitImportName(const MCSymbolWasm *, StringRef) override {}
+ void emitExportName(const MCSymbolWasm *, StringRef) override {}
};
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/TargetInfo/WebAssemblyTargetInfo.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/TargetInfo/WebAssemblyTargetInfo.cpp
index e4afe2bb2830..87317f8a7f1e 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/TargetInfo/WebAssemblyTargetInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/TargetInfo/WebAssemblyTargetInfo.cpp
@@ -26,7 +26,7 @@ Target &llvm::getTheWebAssemblyTarget64() {
return TheWebAssemblyTarget64;
}
-extern "C" void LLVMInitializeWebAssemblyTargetInfo() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeWebAssemblyTargetInfo() {
RegisterTarget<Triple::wasm32> X(getTheWebAssemblyTarget32(), "wasm32",
"WebAssembly 32-bit", "WebAssembly");
RegisterTarget<Triple::wasm64> Y(getTheWebAssemblyTarget64(), "wasm64",
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssembly.h b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssembly.h
index fcbd0a5082ff..fcd48e0096b6 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssembly.h
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssembly.h
@@ -79,6 +79,10 @@ void initializeWebAssemblyRegNumberingPass(PassRegistry &);
void initializeWebAssemblyPeepholePass(PassRegistry &);
void initializeWebAssemblyCallIndirectFixupPass(PassRegistry &);
+namespace WebAssembly {
+enum TargetIndex { TI_LOCAL_START, TI_GLOBAL_START, TI_OPERAND_STACK_START };
+} // end namespace WebAssembly
+
} // end namespace llvm
#endif
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
index 5d8b873ce23b..adcb24b4be53 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
@@ -96,8 +96,11 @@ void WebAssemblyAsmPrinter::EmitEndOfAsmFile(Module &M) {
}
for (const auto &F : M) {
+ if (F.isIntrinsic())
+ continue;
+
// Emit function type info for all undefined functions
- if (F.isDeclarationForLinker() && !F.isIntrinsic()) {
+ if (F.isDeclarationForLinker()) {
SmallVector<MVT, 4> Results;
SmallVector<MVT, 4> Params;
computeSignatureVTs(F.getFunctionType(), F, TM, Params, Results);
@@ -130,6 +133,13 @@ void WebAssemblyAsmPrinter::EmitEndOfAsmFile(Module &M) {
getTargetStreamer()->emitImportName(Sym, Name);
}
}
+
+ if (F.hasFnAttribute("wasm-export-name")) {
+ auto *Sym = cast<MCSymbolWasm>(getSymbol(&F));
+ StringRef Name = F.getFnAttribute("wasm-export-name").getValueAsString();
+ Sym->setExportName(Name);
+ getTargetStreamer()->emitExportName(Sym, Name);
+ }
}
for (const auto &G : M.globals()) {
@@ -415,7 +425,7 @@ bool WebAssemblyAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
}
// Force static initialization.
-extern "C" void LLVMInitializeWebAssemblyAsmPrinter() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeWebAssemblyAsmPrinter() {
RegisterAsmPrinter<WebAssemblyAsmPrinter> X(getTheWebAssemblyTarget32());
RegisterAsmPrinter<WebAssemblyAsmPrinter> Y(getTheWebAssemblyTarget64());
}
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyDebugValueManager.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyDebugValueManager.cpp
index 579377c9a5d7..114a50a3055d 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyDebugValueManager.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyDebugValueManager.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "WebAssemblyDebugValueManager.h"
+#include "WebAssembly.h"
#include "WebAssemblyMachineFunctionInfo.h"
#include "llvm/CodeGen/MachineInstr.h"
@@ -43,3 +44,10 @@ void WebAssemblyDebugValueManager::clone(MachineInstr *Insert,
MBB->insert(Insert, Clone);
}
}
+
+void WebAssemblyDebugValueManager::replaceWithLocal(unsigned LocalId) {
+ for (auto *DBI : DbgValues) {
+ MachineOperand &Op = DBI->getOperand(0);
+ Op.ChangeToTargetIndex(llvm::WebAssembly::TI_LOCAL_START, LocalId);
+ }
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyDebugValueManager.h b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyDebugValueManager.h
index 06e8805b5ad0..7eae3cb5febd 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyDebugValueManager.h
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyDebugValueManager.h
@@ -30,6 +30,7 @@ public:
void move(MachineInstr *Insert);
void updateReg(unsigned Reg);
void clone(MachineInstr *Insert, unsigned NewReg);
+ void replaceWithLocal(unsigned LocalId);
};
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.cpp
index 0387957b14c2..a511b320b56b 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.cpp
@@ -17,6 +17,7 @@
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/CodeGen/MachineDominanceFrontier.h"
#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/InitializePasses.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp
index ef75bb215317..acbd4c9921b0 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp
@@ -17,6 +17,7 @@
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
#include "WebAssembly.h"
+#include "WebAssemblyDebugValueManager.h"
#include "WebAssemblyMachineFunctionInfo.h"
#include "WebAssemblySubtarget.h"
#include "WebAssemblyUtilities.h"
@@ -261,6 +262,8 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
.addImm(LocalId)
.addReg(MI.getOperand(2).getReg());
+ WebAssemblyDebugValueManager(&MI).replaceWithLocal(LocalId);
+
MI.eraseFromParent();
Changed = true;
continue;
@@ -290,6 +293,9 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
} else {
unsigned LocalId = getLocalId(Reg2Local, CurLocal, OldReg);
unsigned Opc = getLocalSetOpcode(RC);
+
+ WebAssemblyDebugValueManager(&MI).replaceWithLocal(LocalId);
+
BuildMI(MBB, InsertPt, MI.getDebugLoc(), TII->get(Opc))
.addImm(LocalId)
.addReg(NewReg);
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISD.def b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISD.def
index 13f0476eb4a5..ba04fd4eb9dd 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISD.def
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISD.def
@@ -30,9 +30,9 @@ HANDLE_NODETYPE(SWIZZLE)
HANDLE_NODETYPE(VEC_SHL)
HANDLE_NODETYPE(VEC_SHR_S)
HANDLE_NODETYPE(VEC_SHR_U)
-HANDLE_NODETYPE(LOAD_SPLAT)
HANDLE_NODETYPE(THROW)
HANDLE_NODETYPE(MEMORY_COPY)
HANDLE_NODETYPE(MEMORY_FILL)
-// add memory opcodes starting at ISD::FIRST_TARGET_MEMORY_OPCODE here...
+// Memory intrinsics
+HANDLE_MEM_NODETYPE(LOAD_SPLAT)
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
index f83a8a984ae0..531a07b829c8 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
@@ -17,6 +17,7 @@
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Function.h" // To access function attributes.
+#include "llvm/IR/IntrinsicsWebAssembly.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MathExtras.h"
@@ -35,12 +36,10 @@ class WebAssemblyDAGToDAGISel final : public SelectionDAGISel {
/// right decision when generating code for different targets.
const WebAssemblySubtarget *Subtarget;
- bool ForCodeSize;
-
public:
WebAssemblyDAGToDAGISel(WebAssemblyTargetMachine &TM,
CodeGenOpt::Level OptLevel)
- : SelectionDAGISel(TM, OptLevel), Subtarget(nullptr), ForCodeSize(false) {
+ : SelectionDAGISel(TM, OptLevel), Subtarget(nullptr) {
}
StringRef getPassName() const override {
@@ -52,7 +51,6 @@ public:
"********** Function: "
<< MF.getName() << '\n');
- ForCodeSize = MF.getFunction().hasOptSize();
Subtarget = &MF.getSubtarget<WebAssemblySubtarget>();
// Wasm64 is not fully supported right now (and is not specified)
@@ -220,7 +218,6 @@ void WebAssemblyDAGToDAGISel::Select(SDNode *Node) {
bool WebAssemblyDAGToDAGISel::SelectInlineAsmMemoryOperand(
const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
switch (ConstraintID) {
- case InlineAsm::Constraint_i:
case InlineAsm::Constraint_m:
// We just support simple memory operands that just have a single address
// operand and need no special handling.
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index f06afdbcea9e..5b177c0c5d9d 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -28,6 +28,7 @@
#include "llvm/IR/DiagnosticPrinter.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsWebAssembly.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
@@ -180,6 +181,13 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
setOperationAction(Op, MVT::v2i64, Expand);
}
+ // But we do have integer min and max operations
+ if (Subtarget->hasUnimplementedSIMD128()) {
+ for (auto Op : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
+ for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
+ setOperationAction(Op, T, Legal);
+ }
+
// Expand float operations supported for scalars but not SIMD
for (auto Op : {ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FNEARBYINT,
ISD::FCOPYSIGN, ISD::FLOG, ISD::FLOG2, ISD::FLOG10,
@@ -189,6 +197,11 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
setOperationAction(Op, MVT::v2f64, Expand);
}
+ // Expand operations not supported for i64x2 vectors
+ if (Subtarget->hasUnimplementedSIMD128())
+ for (unsigned CC = 0; CC < ISD::SETCC_INVALID; ++CC)
+ setCondCodeAction(static_cast<ISD::CondCode>(CC), MVT::v2i64, Custom);
+
// Expand additional SIMD ops that V8 hasn't implemented yet
if (!Subtarget->hasUnimplementedSIMD128()) {
setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
@@ -461,11 +474,14 @@ const char *
WebAssemblyTargetLowering::getTargetNodeName(unsigned Opcode) const {
switch (static_cast<WebAssemblyISD::NodeType>(Opcode)) {
case WebAssemblyISD::FIRST_NUMBER:
+ case WebAssemblyISD::FIRST_MEM_OPCODE:
break;
#define HANDLE_NODETYPE(NODE) \
case WebAssemblyISD::NODE: \
return "WebAssemblyISD::" #NODE;
+#define HANDLE_MEM_NODETYPE(NODE) HANDLE_NODETYPE(NODE)
#include "WebAssemblyISD.def"
+#undef HANDLE_MEM_NODETYPE
#undef HANDLE_NODETYPE
}
return nullptr;
@@ -1011,6 +1027,8 @@ SDValue WebAssemblyTargetLowering::LowerOperation(SDValue Op,
return LowerBUILD_VECTOR(Op, DAG);
case ISD::VECTOR_SHUFFLE:
return LowerVECTOR_SHUFFLE(Op, DAG);
+ case ISD::SETCC:
+ return LowerSETCC(Op, DAG);
case ISD::SHL:
case ISD::SRA:
case ISD::SRL:
@@ -1425,7 +1443,11 @@ SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op,
if (Subtarget->hasUnimplementedSIMD128() &&
(SplattedLoad = dyn_cast<LoadSDNode>(SplatValue)) &&
SplattedLoad->getMemoryVT() == VecT.getVectorElementType()) {
- Result = DAG.getNode(WebAssemblyISD::LOAD_SPLAT, DL, VecT, SplatValue);
+ Result = DAG.getMemIntrinsicNode(
+ WebAssemblyISD::LOAD_SPLAT, DL, DAG.getVTList(VecT),
+ {SplattedLoad->getChain(), SplattedLoad->getBasePtr(),
+ SplattedLoad->getOffset()},
+ SplattedLoad->getMemoryVT(), SplattedLoad->getMemOperand());
} else {
Result = DAG.getSplatBuildVector(VecT, DL, SplatValue);
}
@@ -1472,6 +1494,29 @@ WebAssemblyTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
return DAG.getNode(WebAssemblyISD::SHUFFLE, DL, Op.getValueType(), Ops);
}
+SDValue WebAssemblyTargetLowering::LowerSETCC(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ // The legalizer does not know how to expand the comparison modes of i64x2
+ // vectors because no comparison modes are supported. We could solve this by
+ // expanding all i64x2 SETCC nodes, but that seems to expand f64x2 SETCC nodes
+ // (which return i64x2 results) as well. So instead we manually unroll i64x2
+ // comparisons here.
+ assert(Subtarget->hasUnimplementedSIMD128());
+ assert(Op->getOperand(0)->getSimpleValueType(0) == MVT::v2i64);
+ SmallVector<SDValue, 2> LHS, RHS;
+ DAG.ExtractVectorElements(Op->getOperand(0), LHS);
+ DAG.ExtractVectorElements(Op->getOperand(1), RHS);
+ const SDValue &CC = Op->getOperand(2);
+ auto MakeLane = [&](unsigned I) {
+ return DAG.getNode(ISD::SELECT_CC, DL, MVT::i64, LHS[I], RHS[I],
+ DAG.getConstant(uint64_t(-1), DL, MVT::i64),
+ DAG.getConstant(uint64_t(0), DL, MVT::i64), CC);
+ };
+ return DAG.getBuildVector(Op->getValueType(0), DL,
+ {MakeLane(0), MakeLane(1)});
+}
+
SDValue
WebAssemblyTargetLowering::LowerAccessVectorElement(SDValue Op,
SelectionDAG &DAG) const {
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
index a53e24a05542..58e088a0ba50 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
@@ -24,8 +24,16 @@ namespace WebAssemblyISD {
enum NodeType : unsigned {
FIRST_NUMBER = ISD::BUILTIN_OP_END,
#define HANDLE_NODETYPE(NODE) NODE,
+#define HANDLE_MEM_NODETYPE(NODE)
#include "WebAssemblyISD.def"
+ FIRST_MEM_OPCODE = ISD::FIRST_TARGET_MEMORY_OPCODE,
#undef HANDLE_NODETYPE
+#undef HANDLE_MEM_NODETYPE
+#define HANDLE_NODETYPE(NODE)
+#define HANDLE_MEM_NODETYPE(NODE) NODE,
+#include "WebAssemblyISD.def"
+#undef HANDLE_NODETYPE
+#undef HANDLE_MEM_NODETYPE
};
} // end namespace WebAssemblyISD
@@ -108,6 +116,7 @@ private:
SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerAccessVectorElement(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerShift(SDValue Op, SelectionDAG &DAG) const;
};
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp
index 8e8126c90e72..221dacaf821b 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp
@@ -14,6 +14,7 @@
#include "WebAssemblyInstrInfo.h"
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "WebAssembly.h"
#include "WebAssemblyMachineFunctionInfo.h"
#include "WebAssemblySubtarget.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -54,8 +55,8 @@ bool WebAssemblyInstrInfo::isReallyTriviallyReMaterializable(
void WebAssemblyInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
- const DebugLoc &DL, unsigned DestReg,
- unsigned SrcReg, bool KillSrc) const {
+ const DebugLoc &DL, MCRegister DestReg,
+ MCRegister SrcReg, bool KillSrc) const {
// This method is called by post-RA expansion, which expects only pregs to
// exist. However we need to handle both here.
auto &MRI = MBB.getParent()->getRegInfo();
@@ -230,3 +231,12 @@ bool WebAssemblyInstrInfo::reverseBranchCondition(
Cond.front() = MachineOperand::CreateImm(!Cond.front().getImm());
return false;
}
+
+ArrayRef<std::pair<int, const char *>>
+WebAssemblyInstrInfo::getSerializableTargetIndices() const {
+ static const std::pair<int, const char *> TargetIndices[] = {
+ {WebAssembly::TI_LOCAL_START, "wasm-local-start"},
+ {WebAssembly::TI_GLOBAL_START, "wasm-global-start"},
+ {WebAssembly::TI_OPERAND_STACK_START, "wasm-operator-stack-start"}};
+ return makeArrayRef(TargetIndices);
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.h b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.h
index fe6211663c31..5762fd964c2a 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.h
@@ -16,6 +16,7 @@
#define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYINSTRINFO_H
#include "WebAssemblyRegisterInfo.h"
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#define GET_INSTRINFO_HEADER
@@ -46,7 +47,7 @@ public:
AAResults *AA) const override;
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
- const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
+ const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg,
bool KillSrc) const override;
MachineInstr *commuteInstructionImpl(MachineInstr &MI, bool NewMI,
unsigned OpIdx1,
@@ -64,6 +65,9 @@ public:
int *BytesAdded = nullptr) const override;
bool
reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
+
+ ArrayRef<std::pair<int, const char *>>
+ getSerializableTargetIndices() const override;
};
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
index fc5d73dac52e..64033c993e3f 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -72,35 +72,30 @@ defm "" : SIMDLoadSplat<"v16x8", 195>;
defm "" : SIMDLoadSplat<"v32x4", 196>;
defm "" : SIMDLoadSplat<"v64x2", 197>;
-def wasm_load_splat_t : SDTypeProfile<1, 1, []>;
-def wasm_load_splat : SDNode<"WebAssemblyISD::LOAD_SPLAT", wasm_load_splat_t>;
-
-foreach args = [["v16i8", "i32", "extloadi8"], ["v8i16", "i32", "extloadi16"],
- ["v4i32", "i32", "load"], ["v2i64", "i64", "load"],
- ["v4f32", "f32", "load"], ["v2f64", "f64", "load"]] in
-def load_splat_#args[0] :
- PatFrag<(ops node:$addr), (wasm_load_splat
- (!cast<ValueType>(args[1]) (!cast<PatFrag>(args[2]) node:$addr)))>;
+def wasm_load_splat_t : SDTypeProfile<1, 1, [SDTCisPtrTy<1>]>;
+def wasm_load_splat : SDNode<"WebAssemblyISD::LOAD_SPLAT", wasm_load_splat_t,
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+def load_splat : PatFrag<(ops node:$addr), (wasm_load_splat node:$addr)>;
let Predicates = [HasUnimplementedSIMD128] in
foreach args = [["v16i8", "v8x16"], ["v8i16", "v16x8"], ["v4i32", "v32x4"],
["v2i64", "v64x2"], ["v4f32", "v32x4"], ["v2f64", "v64x2"]] in {
def : LoadPatNoOffset<!cast<ValueType>(args[0]),
- !cast<PatFrag>("load_splat_"#args[0]),
+ load_splat,
!cast<NI>("LOAD_SPLAT_"#args[1])>;
def : LoadPatImmOff<!cast<ValueType>(args[0]),
- !cast<PatFrag>("load_splat_"#args[0]),
+ load_splat,
regPlusImm,
!cast<NI>("LOAD_SPLAT_"#args[1])>;
def : LoadPatImmOff<!cast<ValueType>(args[0]),
- !cast<PatFrag>("load_splat_"#args[0]),
+ load_splat,
or_is_add,
!cast<NI>("LOAD_SPLAT_"#args[1])>;
def : LoadPatOffsetOnly<!cast<ValueType>(args[0]),
- !cast<PatFrag>("load_splat_"#args[0]),
+ load_splat,
!cast<NI>("LOAD_SPLAT_"#args[1])>;
def : LoadPatGlobalAddrOffOnly<!cast<ValueType>(args[0]),
- !cast<PatFrag>("load_splat_"#args[0]),
+ load_splat,
!cast<NI>("LOAD_SPLAT_"#args[1])>;
}
@@ -732,8 +727,44 @@ defm SUB_SAT_U :
SIMDBinaryIntSmall<int_wasm_sub_saturate_unsigned, "sub_saturate_u", 92>;
// Integer multiplication: mul
+let isCommutable = 1 in
defm MUL : SIMDBinaryIntNoI64x2<mul, "mul", 93>;
+// Integer min_s / min_u / max_s / max_u
+let isCommutable = 1 in {
+defm MIN_S : SIMDBinaryIntNoI64x2<smin, "min_s", 94>;
+defm MIN_U : SIMDBinaryIntNoI64x2<umin, "min_u", 95>;
+defm MAX_S : SIMDBinaryIntNoI64x2<smax, "max_s", 96>;
+defm MAX_U : SIMDBinaryIntNoI64x2<umax, "max_u", 97>;
+} // isCommutable = 1
+
+// Integer unsigned rounding average: avgr_u
+let isCommutable = 1, Predicates = [HasUnimplementedSIMD128] in {
+defm AVGR_U : SIMDBinary<v16i8, "i8x16", int_wasm_avgr_unsigned, "avgr_u", 217>;
+defm AVGR_U : SIMDBinary<v8i16, "i16x8", int_wasm_avgr_unsigned, "avgr_u", 218>;
+}
+
+def add_nuw : PatFrag<(ops node:$lhs, node:$rhs),
+ (add node:$lhs, node:$rhs),
+ "return N->getFlags().hasNoUnsignedWrap();">;
+
+foreach nodes = [[v16i8, splat16], [v8i16, splat8]] in
+def : Pat<(srl
+ (add_nuw
+ (add_nuw (nodes[0] V128:$lhs), (nodes[0] V128:$rhs)),
+ (nodes[1] (i32 1))
+ ),
+ (nodes[0] (nodes[1] (i32 1)))
+ ),
+ (!cast<NI>("AVGR_U_"#nodes[0]) V128:$lhs, V128:$rhs)>;
+
+// Widening dot product: i32x4.dot_i16x8_s
+let isCommutable = 1 in
+defm DOT : SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs), (outs), (ins),
+ [(set V128:$dst, (int_wasm_dot V128:$lhs, V128:$rhs))],
+ "i32x4.dot_i16x8_s\t$dst, $lhs, $rhs", "i32x4.dot_i16x8_s",
+ 219>;
+
//===----------------------------------------------------------------------===//
// Floating-point unary arithmetic
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp
index 1cf397dd060b..d1f3acbd221e 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp
@@ -211,6 +211,7 @@
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
@@ -452,7 +453,7 @@ Function *WebAssemblyLowerEmscriptenEHSjLj::getInvokeWrapper(CallOrInvoke *CI) {
CalleeFTy = F->getFunctionType();
else {
auto *CalleeTy = cast<PointerType>(Callee->getType())->getElementType();
- CalleeFTy = dyn_cast<FunctionType>(CalleeTy);
+ CalleeFTy = cast<FunctionType>(CalleeTy);
}
std::string Sig = getSignature(CalleeFTy);
@@ -750,7 +751,6 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runEHOnFunction(Function &F) {
auto *II = dyn_cast<InvokeInst>(BB.getTerminator());
if (!II)
continue;
- Changed = true;
LandingPads.insert(II->getLandingPadInst());
IRB.SetInsertPoint(II);
@@ -830,6 +830,7 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runEHOnFunction(Function &F) {
if (auto *LPI = dyn_cast<LandingPadInst>(I))
LandingPads.insert(LPI);
}
+ Changed = !LandingPads.empty();
// Handle all the landingpad for this function together, as multiple invokes
// may share a single lp
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp
index 7b9ae90326f0..c6cf7b6bc551 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp
@@ -316,12 +316,6 @@ struct RuntimeLibcallSignatureTable {
Table[RTLIB::UO_F32] = i32_func_f32_f32;
Table[RTLIB::UO_F64] = i32_func_f64_f64;
Table[RTLIB::UO_F128] = i32_func_i64_i64_i64_i64;
- // O_FXX has the weird property that it uses the same libcall name as UO_FXX
- // This breaks our name-based lookup. Fortunately only the UO family of
- // libcalls appears to be actually used.
- Table[RTLIB::O_F32] = unsupported;
- Table[RTLIB::O_F64] = unsupported;
- Table[RTLIB::O_F128] = unsupported;
// Memory
Table[RTLIB::MEMCPY] = iPTR_func_iPTR_iPTR_iPTR;
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
index bdf5fe2620a4..4291b48c16be 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
@@ -45,7 +45,7 @@ static cl::opt<bool> EnableEmSjLj(
cl::desc("WebAssembly Emscripten-style setjmp/longjmp handling"),
cl::init(false));
-extern "C" void LLVMInitializeWebAssemblyTarget() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeWebAssemblyTarget() {
// Register the target.
RegisterTargetMachine<WebAssemblyTargetMachine> X(
getTheWebAssemblyTarget32());
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
index 1c53e90daea7..ac8ad927d334 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
@@ -46,7 +46,8 @@ unsigned WebAssemblyTTIImpl::getRegisterBitWidth(bool Vector) const {
unsigned WebAssemblyTTIImpl::getArithmeticInstrCost(
unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info,
TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
- TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args) {
+ TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args,
+ const Instruction *CxtI) {
unsigned Cost = BasicTTIImplBase<WebAssemblyTTIImpl>::getArithmeticInstrCost(
Opcode, Ty, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo);
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h
index f0ecc73e91de..2731dda10bec 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h
@@ -61,7 +61,8 @@ public:
TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
- ArrayRef<const Value *> Args = ArrayRef<const Value *>());
+ ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
+ const Instruction *CxtI = nullptr);
unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
/// @}
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
index 25be79ec2b1e..d37d812df485 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -134,7 +134,6 @@ private:
IOK_LENGTH,
IOK_SIZE,
IOK_TYPE,
- IOK_OFFSET
};
class InfixCalculator {
@@ -326,6 +325,7 @@ private:
IES_RSHIFT,
IES_PLUS,
IES_MINUS,
+ IES_OFFSET,
IES_NOT,
IES_MULTIPLY,
IES_DIVIDE,
@@ -350,16 +350,30 @@ private:
InlineAsmIdentifierInfo Info;
short BracCount;
bool MemExpr;
+ bool OffsetOperator;
+ SMLoc OffsetOperatorLoc;
+
+ bool setSymRef(const MCExpr *Val, StringRef ID, StringRef &ErrMsg) {
+ if (Sym) {
+ ErrMsg = "cannot use more than one symbol in memory operand";
+ return true;
+ }
+ Sym = Val;
+ SymName = ID;
+ return false;
+ }
public:
IntelExprStateMachine()
: State(IES_INIT), PrevState(IES_ERROR), BaseReg(0), IndexReg(0),
TmpReg(0), Scale(0), Imm(0), Sym(nullptr), BracCount(0),
- MemExpr(false) {}
+ MemExpr(false), OffsetOperator(false) {}
void addImm(int64_t imm) { Imm += imm; }
short getBracCount() { return BracCount; }
bool isMemExpr() { return MemExpr; }
+ bool isOffsetOperator() { return OffsetOperator; }
+ SMLoc getOffsetLoc() { return OffsetOperatorLoc; }
unsigned getBaseReg() { return BaseReg; }
unsigned getIndexReg() { return IndexReg; }
unsigned getScale() { return Scale; }
@@ -456,6 +470,7 @@ private:
case IES_INTEGER:
case IES_RPAREN:
case IES_REGISTER:
+ case IES_OFFSET:
State = IES_PLUS;
IC.pushOperator(IC_PLUS);
if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
@@ -500,10 +515,12 @@ private:
case IES_INTEGER:
case IES_REGISTER:
case IES_INIT:
+ case IES_OFFSET:
State = IES_MINUS;
// push minus operator if it is not a negate operator
if (CurrState == IES_REGISTER || CurrState == IES_RPAREN ||
- CurrState == IES_INTEGER || CurrState == IES_RBRAC)
+ CurrState == IES_INTEGER || CurrState == IES_RBRAC ||
+ CurrState == IES_OFFSET)
IC.pushOperator(IC_MINUS);
else if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) {
// We have negate operator for Scale: it's illegal
@@ -556,7 +573,6 @@ private:
}
PrevState = CurrState;
}
-
bool onRegister(unsigned Reg, StringRef &ErrMsg) {
IntelExprState CurrState = State;
switch (State) {
@@ -604,7 +620,6 @@ private:
if (auto *CE = dyn_cast<MCConstantExpr>(SymRef))
return onInteger(CE->getValue(), ErrMsg);
PrevState = State;
- bool HasSymbol = Sym != nullptr;
switch (State) {
default:
State = IES_ERROR;
@@ -614,18 +629,16 @@ private:
case IES_NOT:
case IES_INIT:
case IES_LBRAC:
+ if (setSymRef(SymRef, SymRefName, ErrMsg))
+ return true;
MemExpr = true;
State = IES_INTEGER;
- Sym = SymRef;
- SymName = SymRefName;
IC.pushOperand(IC_IMM);
if (ParsingInlineAsm)
Info = IDInfo;
break;
}
- if (HasSymbol)
- ErrMsg = "cannot use more than one symbol in memory operand";
- return HasSymbol;
+ return false;
}
bool onInteger(int64_t TmpInt, StringRef &ErrMsg) {
IntelExprState CurrState = State;
@@ -738,6 +751,7 @@ private:
State = IES_ERROR;
break;
case IES_INTEGER:
+ case IES_OFFSET:
case IES_REGISTER:
case IES_RPAREN:
if (BracCount-- != 1)
@@ -792,6 +806,7 @@ private:
State = IES_ERROR;
break;
case IES_INTEGER:
+ case IES_OFFSET:
case IES_REGISTER:
case IES_RPAREN:
State = IES_RPAREN;
@@ -799,6 +814,32 @@ private:
break;
}
}
+ bool onOffset(const MCExpr *Val, SMLoc OffsetLoc, StringRef ID,
+ const InlineAsmIdentifierInfo &IDInfo, bool ParsingInlineAsm,
+ StringRef &ErrMsg) {
+ PrevState = State;
+ switch (State) {
+ default:
+ ErrMsg = "unexpected offset operator expression";
+ return true;
+ case IES_PLUS:
+ case IES_INIT:
+ case IES_LBRAC:
+ if (setSymRef(Val, ID, ErrMsg))
+ return true;
+ OffsetOperator = true;
+ OffsetOperatorLoc = OffsetLoc;
+ State = IES_OFFSET;
+ // As we cannot yet resolve the actual value (offset), we retain
+ // the requested semantics by pushing a '0' to the operands stack
+ IC.pushOperand(IC_IMM);
+ if (ParsingInlineAsm) {
+ Info = IDInfo;
+ }
+ break;
+ }
+ return false;
+ }
};
bool Error(SMLoc L, const Twine &Msg, SMRange Range = None,
@@ -830,18 +871,21 @@ private:
std::unique_ptr<X86Operand> ParseOperand();
std::unique_ptr<X86Operand> ParseATTOperand();
std::unique_ptr<X86Operand> ParseIntelOperand();
- std::unique_ptr<X86Operand> ParseIntelOffsetOfOperator();
+ bool ParseIntelOffsetOperator(const MCExpr *&Val, StringRef &ID,
+ InlineAsmIdentifierInfo &Info, SMLoc &End);
bool ParseIntelDotOperator(IntelExprStateMachine &SM, SMLoc &End);
unsigned IdentifyIntelInlineAsmOperator(StringRef Name);
unsigned ParseIntelInlineAsmOperator(unsigned OpKind);
std::unique_ptr<X86Operand> ParseRoundingModeOp(SMLoc Start);
- bool ParseIntelNamedOperator(StringRef Name, IntelExprStateMachine &SM);
+ bool ParseIntelNamedOperator(StringRef Name, IntelExprStateMachine &SM,
+ bool &ParseError, SMLoc &End);
void RewriteIntelExpression(IntelExprStateMachine &SM, SMLoc Start,
SMLoc End);
bool ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End);
bool ParseIntelInlineAsmIdentifier(const MCExpr *&Val, StringRef &Identifier,
InlineAsmIdentifierInfo &Info,
- bool IsUnevaluatedOperand, SMLoc &End);
+ bool IsUnevaluatedOperand, SMLoc &End,
+ bool IsParsingOffsetOperator = false);
std::unique_ptr<X86Operand> ParseMemOperand(unsigned SegReg,
const MCExpr *&Disp,
@@ -1112,9 +1156,10 @@ bool X86AsmParser::ParseRegister(unsigned &RegNo,
if (RegNo == 0)
RegNo = MatchRegisterName(Tok.getString().lower());
- // The "flags" register cannot be referenced directly.
+ // The "flags" and "mxcsr" registers cannot be referenced directly.
// Treat it as an identifier instead.
- if (isParsingInlineAsm() && isParsingIntelSyntax() && RegNo == X86::EFLAGS)
+ if (isParsingInlineAsm() && isParsingIntelSyntax() &&
+ (RegNo == X86::EFLAGS || RegNo == X86::MXCSR))
RegNo = 0;
if (!is64BitMode()) {
@@ -1408,26 +1453,44 @@ std::unique_ptr<X86Operand> X86AsmParser::CreateMemForInlineAsm(
// Some binary bitwise operators have a named synonymous
// Query a candidate string for being such a named operator
// and if so - invoke the appropriate handler
-bool X86AsmParser::ParseIntelNamedOperator(StringRef Name, IntelExprStateMachine &SM) {
+bool X86AsmParser::ParseIntelNamedOperator(StringRef Name,
+ IntelExprStateMachine &SM,
+ bool &ParseError, SMLoc &End) {
// A named operator should be either lower or upper case, but not a mix
if (Name.compare(Name.lower()) && Name.compare(Name.upper()))
return false;
- if (Name.equals_lower("not"))
+ if (Name.equals_lower("not")) {
SM.onNot();
- else if (Name.equals_lower("or"))
+ } else if (Name.equals_lower("or")) {
SM.onOr();
- else if (Name.equals_lower("shl"))
+ } else if (Name.equals_lower("shl")) {
SM.onLShift();
- else if (Name.equals_lower("shr"))
+ } else if (Name.equals_lower("shr")) {
SM.onRShift();
- else if (Name.equals_lower("xor"))
+ } else if (Name.equals_lower("xor")) {
SM.onXor();
- else if (Name.equals_lower("and"))
+ } else if (Name.equals_lower("and")) {
SM.onAnd();
- else if (Name.equals_lower("mod"))
+ } else if (Name.equals_lower("mod")) {
SM.onMod();
- else
+ } else if (Name.equals_lower("offset")) {
+ SMLoc OffsetLoc = getTok().getLoc();
+ const MCExpr *Val = nullptr;
+ StringRef ID;
+ InlineAsmIdentifierInfo Info;
+ ParseError = ParseIntelOffsetOperator(Val, ID, Info, End);
+ if (ParseError)
+ return true;
+ StringRef ErrMsg;
+ ParseError =
+ SM.onOffset(Val, OffsetLoc, ID, Info, isParsingInlineAsm(), ErrMsg);
+ if (ParseError)
+ return Error(SMLoc::getFromPointer(Name.data()), ErrMsg);
+ } else {
return false;
+ }
+ if (!Name.equals_lower("offset"))
+ End = consumeToken();
return true;
}
@@ -1470,8 +1533,12 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
break;
}
// Operator synonymous ("not", "or" etc.)
- if ((UpdateLocLex = ParseIntelNamedOperator(Identifier, SM)))
+ bool ParseError = false;
+ if (ParseIntelNamedOperator(Identifier, SM, ParseError, End)) {
+ if (ParseError)
+ return true;
break;
+ }
// Symbol reference, when parsing assembly content
InlineAsmIdentifierInfo Info;
const MCExpr *Val;
@@ -1485,9 +1552,6 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
}
// MS InlineAsm operators (TYPE/LENGTH/SIZE)
if (unsigned OpKind = IdentifyIntelInlineAsmOperator(Identifier)) {
- if (OpKind == IOK_OFFSET)
- return Error(IdentLoc, "Dealing OFFSET operator as part of"
- "a compound immediate expression is yet to be supported");
if (int64_t Val = ParseIntelInlineAsmOperator(OpKind)) {
if (SM.onInteger(Val, ErrMsg))
return Error(IdentLoc, ErrMsg);
@@ -1589,9 +1653,9 @@ void X86AsmParser::RewriteIntelExpression(IntelExprStateMachine &SM,
SMLoc Loc = Start;
unsigned ExprLen = End.getPointer() - Start.getPointer();
// Skip everything before a symbol displacement (if we have one)
- if (SM.getSym()) {
+ if (SM.getSym() && !SM.isOffsetOperator()) {
StringRef SymName = SM.getSymName();
- if (unsigned Len = SymName.data() - Start.getPointer())
+ if (unsigned Len = SymName.data() - Start.getPointer())
InstInfo->AsmRewrites->emplace_back(AOK_Skip, Start, Len);
Loc = SMLoc::getFromPointer(SymName.data() + SymName.size());
ExprLen = End.getPointer() - (SymName.data() + SymName.size());
@@ -1606,21 +1670,23 @@ void X86AsmParser::RewriteIntelExpression(IntelExprStateMachine &SM,
// Build an Intel Expression rewrite
StringRef BaseRegStr;
StringRef IndexRegStr;
+ StringRef OffsetNameStr;
if (SM.getBaseReg())
BaseRegStr = X86IntelInstPrinter::getRegisterName(SM.getBaseReg());
if (SM.getIndexReg())
IndexRegStr = X86IntelInstPrinter::getRegisterName(SM.getIndexReg());
+ if (SM.isOffsetOperator())
+ OffsetNameStr = SM.getSymName();
// Emit it
- IntelExpr Expr(BaseRegStr, IndexRegStr, SM.getScale(), SM.getImm(), SM.isMemExpr());
+ IntelExpr Expr(BaseRegStr, IndexRegStr, SM.getScale(), OffsetNameStr,
+ SM.getImm(), SM.isMemExpr());
InstInfo->AsmRewrites->emplace_back(Loc, ExprLen, Expr);
}
// Inline assembly may use variable names with namespace alias qualifiers.
-bool X86AsmParser::ParseIntelInlineAsmIdentifier(const MCExpr *&Val,
- StringRef &Identifier,
- InlineAsmIdentifierInfo &Info,
- bool IsUnevaluatedOperand,
- SMLoc &End) {
+bool X86AsmParser::ParseIntelInlineAsmIdentifier(
+ const MCExpr *&Val, StringRef &Identifier, InlineAsmIdentifierInfo &Info,
+ bool IsUnevaluatedOperand, SMLoc &End, bool IsParsingOffsetOperator) {
MCAsmParser &Parser = getParser();
assert(isParsingInlineAsm() && "Expected to be parsing inline assembly.");
Val = nullptr;
@@ -1653,9 +1719,13 @@ bool X86AsmParser::ParseIntelInlineAsmIdentifier(const MCExpr *&Val,
SemaCallback->LookupInlineAsmLabel(Identifier, getSourceManager(),
Loc, false);
assert(InternalName.size() && "We should have an internal name here.");
- // Push a rewrite for replacing the identifier name with the internal name.
- InstInfo->AsmRewrites->emplace_back(AOK_Label, Loc, Identifier.size(),
- InternalName);
+ // Push a rewrite for replacing the identifier name with the internal name,
+ // unless we are parsing the operand of an offset operator
+ if (!IsParsingOffsetOperator)
+ InstInfo->AsmRewrites->emplace_back(AOK_Label, Loc, Identifier.size(),
+ InternalName);
+ else
+ Identifier = InternalName;
} else if (Info.isKind(InlineAsmIdentifierInfo::IK_EnumVal))
return false;
// Create the symbol reference.
@@ -1738,39 +1808,25 @@ bool X86AsmParser::ParseIntelDotOperator(IntelExprStateMachine &SM, SMLoc &End)
return false;
}
-/// Parse the 'offset' operator. This operator is used to specify the
-/// location rather then the content of a variable.
-std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOffsetOfOperator() {
- MCAsmParser &Parser = getParser();
- const AsmToken &Tok = Parser.getTok();
- SMLoc OffsetOfLoc = Tok.getLoc();
- Parser.Lex(); // Eat offset.
-
- const MCExpr *Val;
- InlineAsmIdentifierInfo Info;
- SMLoc Start = Tok.getLoc(), End;
- StringRef Identifier = Tok.getString();
- if (ParseIntelInlineAsmIdentifier(Val, Identifier, Info,
- /*Unevaluated=*/false, End))
- return nullptr;
-
- void *Decl = nullptr;
- // FIXME: MS evaluates "offset <Constant>" to the underlying integral
- if (Info.isKind(InlineAsmIdentifierInfo::IK_EnumVal))
- return ErrorOperand(Start, "offset operator cannot yet handle constants");
- else if (Info.isKind(InlineAsmIdentifierInfo::IK_Var))
- Decl = Info.Var.Decl;
- // Don't emit the offset operator.
- InstInfo->AsmRewrites->emplace_back(AOK_Skip, OffsetOfLoc, 7);
-
- // The offset operator will have an 'r' constraint, thus we need to create
- // register operand to ensure proper matching. Just pick a GPR based on
- // the size of a pointer.
- bool Parse32 = is32BitMode() || Code16GCC;
- unsigned RegNo = is64BitMode() ? X86::RBX : (Parse32 ? X86::EBX : X86::BX);
-
- return X86Operand::CreateReg(RegNo, Start, End, /*GetAddress=*/true,
- OffsetOfLoc, Identifier, Decl);
+/// Parse the 'offset' operator.
+/// This operator is used to specify the location of a given operand
+bool X86AsmParser::ParseIntelOffsetOperator(const MCExpr *&Val, StringRef &ID,
+ InlineAsmIdentifierInfo &Info,
+ SMLoc &End) {
+ // Eat offset, mark start of identifier.
+ SMLoc Start = Lex().getLoc();
+ ID = getTok().getString();
+ if (!isParsingInlineAsm()) {
+ if ((getTok().isNot(AsmToken::Identifier) &&
+ getTok().isNot(AsmToken::String)) ||
+ getParser().parsePrimaryExpr(Val, End))
+ return Error(Start, "unexpected token!");
+ } else if (ParseIntelInlineAsmIdentifier(Val, ID, Info, false, End, true)) {
+ return Error(Start, "unable to lookup expression");
+ } else if (Info.isKind(InlineAsmIdentifierInfo::IK_EnumVal)) {
+ return Error(Start, "offset operator cannot yet handle constants");
+ }
+ return false;
}
// Query a candidate string for being an Intel assembly operator
@@ -1780,7 +1836,6 @@ unsigned X86AsmParser::IdentifyIntelInlineAsmOperator(StringRef Name) {
.Cases("TYPE","type",IOK_TYPE)
.Cases("SIZE","size",IOK_SIZE)
.Cases("LENGTH","length",IOK_LENGTH)
- .Cases("OFFSET","offset",IOK_OFFSET)
.Default(IOK_INVALID);
}
@@ -1850,13 +1905,6 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() {
const AsmToken &Tok = Parser.getTok();
SMLoc Start, End;
- // FIXME: Offset operator
- // Should be handled as part of immediate expression, as other operators
- // Currently, only supported as a stand-alone operand
- if (isParsingInlineAsm())
- if (IdentifyIntelInlineAsmOperator(Tok.getString()) == IOK_OFFSET)
- return ParseIntelOffsetOfOperator();
-
// Parse optional Size directive.
unsigned Size;
if (ParseIntelMemoryOperandSize(Size))
@@ -1904,8 +1952,19 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() {
// RegNo != 0 specifies a valid segment register,
// and we are parsing a segment override
- if (!SM.isMemExpr() && !RegNo)
+ if (!SM.isMemExpr() && !RegNo) {
+ if (isParsingInlineAsm() && SM.isOffsetOperator()) {
+ const InlineAsmIdentifierInfo Info = SM.getIdentifierInfo();
+ if (Info.isKind(InlineAsmIdentifierInfo::IK_Var)) {
+ // Disp includes the address of a variable; make sure this is recorded
+ // for later handling.
+ return X86Operand::CreateImm(Disp, Start, End, SM.getSymName(),
+ Info.Var.Decl, Info.Var.IsGlobalLV);
+ }
+ }
+
return X86Operand::CreateImm(Disp, Start, End);
+ }
StringRef ErrMsg;
unsigned BaseReg = SM.getBaseReg();
@@ -3131,6 +3190,7 @@ unsigned X86AsmParser::checkTargetMatchPredicate(MCInst &Inst) {
case X86::VCVTTSS2SI64Zrm: case X86::VCVTTSS2SI64Zrm_Int:
if (ForcedVEXEncoding != VEXEncoding_EVEX)
return Match_Unsupported;
+ break;
}
return Match_Success;
@@ -3879,7 +3939,7 @@ bool X86AsmParser::parseDirectiveSEHPushFrame(SMLoc Loc) {
}
// Force static initialization.
-extern "C" void LLVMInitializeX86AsmParser() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeX86AsmParser() {
RegisterMCAsmParser<X86AsmParser> X(getTheX86_32Target());
RegisterMCAsmParser<X86AsmParser> Y(getTheX86_64Target());
}
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/AsmParser/X86Operand.h b/contrib/llvm-project/llvm/lib/Target/X86/AsmParser/X86Operand.h
index 3a76d023e640..d831a63b04ee 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/AsmParser/X86Operand.h
+++ b/contrib/llvm-project/llvm/lib/Target/X86/AsmParser/X86Operand.h
@@ -36,6 +36,7 @@ struct X86Operand final : public MCParsedAsmOperand {
StringRef SymName;
void *OpDecl;
bool AddressOf;
+ bool CallOperand;
struct TokOp {
const char *Data;
@@ -52,6 +53,7 @@ struct X86Operand final : public MCParsedAsmOperand {
struct ImmOp {
const MCExpr *Val;
+ bool LocalRef;
};
struct MemOp {
@@ -77,7 +79,7 @@ struct X86Operand final : public MCParsedAsmOperand {
};
X86Operand(KindTy K, SMLoc Start, SMLoc End)
- : Kind(K), StartLoc(Start), EndLoc(End) {}
+ : Kind(K), StartLoc(Start), EndLoc(End), CallOperand(false) {}
StringRef getSymName() override { return SymName; }
void *getOpDecl() override { return OpDecl; }
@@ -104,8 +106,8 @@ struct X86Operand final : public MCParsedAsmOperand {
} else if (Val->getKind() == MCExpr::SymbolRef) {
if (auto *SRE = dyn_cast<MCSymbolRefExpr>(Val)) {
const MCSymbol &Sym = SRE->getSymbol();
- if (auto SymName = Sym.getName().data())
- OS << VName << SymName;
+ if (const char *SymNameStr = Sym.getName().data())
+ OS << VName << SymNameStr;
}
}
};
@@ -278,13 +280,9 @@ struct X86Operand final : public MCParsedAsmOperand {
return isImmUnsignedi8Value(CE->getValue());
}
- bool isOffsetOf() const override {
- return OffsetOfLoc.getPointer();
- }
+ bool isOffsetOfLocal() const override { return isImm() && Imm.LocalRef; }
- bool needAddressOf() const override {
- return AddressOf;
- }
+ bool needAddressOf() const override { return AddressOf; }
bool isMem() const override { return Kind == Memory; }
bool isMemUnsized() const {
@@ -613,9 +611,16 @@ struct X86Operand final : public MCParsedAsmOperand {
}
static std::unique_ptr<X86Operand> CreateImm(const MCExpr *Val,
- SMLoc StartLoc, SMLoc EndLoc) {
+ SMLoc StartLoc, SMLoc EndLoc,
+ StringRef SymName = StringRef(),
+ void *OpDecl = nullptr,
+ bool GlobalRef = true) {
auto Res = std::make_unique<X86Operand>(Immediate, StartLoc, EndLoc);
- Res->Imm.Val = Val;
+ Res->Imm.Val = Val;
+ Res->Imm.LocalRef = !GlobalRef;
+ Res->SymName = SymName;
+ Res->OpDecl = OpDecl;
+ Res->AddressOf = true;
return Res;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp b/contrib/llvm-project/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
index 9a635bbe5f85..ea8c606d1564 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -84,6 +84,7 @@
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
@@ -92,24 +93,1552 @@ using namespace llvm::X86Disassembler;
#define DEBUG_TYPE "x86-disassembler"
-void llvm::X86Disassembler::Debug(const char *file, unsigned line,
- const char *s) {
- dbgs() << file << ":" << line << ": " << s;
+#define debug(s) LLVM_DEBUG(dbgs() << __LINE__ << ": " << s);
+
+// Specifies whether a ModR/M byte is needed and (if so) which
+// instruction each possible value of the ModR/M byte corresponds to. Once
+// this information is known, we have narrowed down to a single instruction.
+struct ModRMDecision {
+ uint8_t modrm_type;
+ uint16_t instructionIDs;
+};
+
+// Specifies which set of ModR/M->instruction tables to look at
+// given a particular opcode.
+struct OpcodeDecision {
+ ModRMDecision modRMDecisions[256];
+};
+
+// Specifies which opcode->instruction tables to look at given
+// a particular context (set of attributes). Since there are many possible
+// contexts, the decoder first uses CONTEXTS_SYM to determine which context
+// applies given a specific set of attributes. Hence there are only IC_max
+// entries in this table, rather than 2^(ATTR_max).
+struct ContextDecision {
+ OpcodeDecision opcodeDecisions[IC_max];
+};
+
+#include "X86GenDisassemblerTables.inc"
+
+static InstrUID decode(OpcodeType type, InstructionContext insnContext,
+ uint8_t opcode, uint8_t modRM) {
+ const struct ModRMDecision *dec;
+
+ switch (type) {
+ case ONEBYTE:
+ dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
+ break;
+ case TWOBYTE:
+ dec = &TWOBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
+ break;
+ case THREEBYTE_38:
+ dec = &THREEBYTE38_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
+ break;
+ case THREEBYTE_3A:
+ dec = &THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
+ break;
+ case XOP8_MAP:
+ dec = &XOP8_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
+ break;
+ case XOP9_MAP:
+ dec = &XOP9_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
+ break;
+ case XOPA_MAP:
+ dec = &XOPA_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
+ break;
+ case THREEDNOW_MAP:
+ dec =
+ &THREEDNOW_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
+ break;
+ }
+
+ switch (dec->modrm_type) {
+ default:
+ llvm_unreachable("Corrupt table! Unknown modrm_type");
+ return 0;
+ case MODRM_ONEENTRY:
+ return modRMTable[dec->instructionIDs];
+ case MODRM_SPLITRM:
+ if (modFromModRM(modRM) == 0x3)
+ return modRMTable[dec->instructionIDs + 1];
+ return modRMTable[dec->instructionIDs];
+ case MODRM_SPLITREG:
+ if (modFromModRM(modRM) == 0x3)
+ return modRMTable[dec->instructionIDs + ((modRM & 0x38) >> 3) + 8];
+ return modRMTable[dec->instructionIDs + ((modRM & 0x38) >> 3)];
+ case MODRM_SPLITMISC:
+ if (modFromModRM(modRM) == 0x3)
+ return modRMTable[dec->instructionIDs + (modRM & 0x3f) + 8];
+ return modRMTable[dec->instructionIDs + ((modRM & 0x38) >> 3)];
+ case MODRM_FULL:
+ return modRMTable[dec->instructionIDs + modRM];
+ }
}
-StringRef llvm::X86Disassembler::GetInstrName(unsigned Opcode,
- const void *mii) {
- const MCInstrInfo *MII = static_cast<const MCInstrInfo *>(mii);
- return MII->getName(Opcode);
+static bool peek(struct InternalInstruction *insn, uint8_t &byte) {
+ uint64_t offset = insn->readerCursor - insn->startLocation;
+ if (offset >= insn->bytes.size())
+ return true;
+ byte = insn->bytes[offset];
+ return false;
}
-#define debug(s) LLVM_DEBUG(Debug(__FILE__, __LINE__, s));
+template <typename T> static bool consume(InternalInstruction *insn, T &ptr) {
+ auto r = insn->bytes;
+ uint64_t offset = insn->readerCursor - insn->startLocation;
+ if (offset + sizeof(T) > r.size())
+ return true;
+ T ret = 0;
+ for (unsigned i = 0; i < sizeof(T); ++i)
+ ret |= (uint64_t)r[offset + i] << (i * 8);
+ ptr = ret;
+ insn->readerCursor += sizeof(T);
+ return false;
+}
+
+static bool isREX(struct InternalInstruction *insn, uint8_t prefix) {
+ return insn->mode == MODE_64BIT && prefix >= 0x40 && prefix <= 0x4f;
+}
+
+// Consumes all of an instruction's prefix bytes, and marks the
+// instruction as having them. Also sets the instruction's default operand,
+// address, and other relevant data sizes to report operands correctly.
+//
+// insn must not be empty.
+static int readPrefixes(struct InternalInstruction *insn) {
+ bool isPrefix = true;
+ uint8_t byte = 0;
+ uint8_t nextByte;
+
+ LLVM_DEBUG(dbgs() << "readPrefixes()");
+
+ while (isPrefix) {
+ // If we fail reading prefixes, just stop here and let the opcode reader
+ // deal with it.
+ if (consume(insn, byte))
+ break;
+
+ // If the byte is a LOCK/REP/REPNE prefix and not a part of the opcode, then
+ // break and let it be disassembled as a normal "instruction".
+ if (insn->readerCursor - 1 == insn->startLocation && byte == 0xf0) // LOCK
+ break;
+
+ if ((byte == 0xf2 || byte == 0xf3) && !peek(insn, nextByte)) {
+ // If the byte is 0xf2 or 0xf3, and any of the following conditions are
+ // met:
+ // - it is followed by a LOCK (0xf0) prefix
+ // - it is followed by an xchg instruction
+ // then it should be disassembled as a xacquire/xrelease not repne/rep.
+ if (((nextByte == 0xf0) ||
+ ((nextByte & 0xfe) == 0x86 || (nextByte & 0xf8) == 0x90))) {
+ insn->xAcquireRelease = true;
+ if (!(byte == 0xf3 && nextByte == 0x90)) // PAUSE instruction support
+ break;
+ }
+ // Also if the byte is 0xf3, and the following condition is met:
+ // - it is followed by a "mov mem, reg" (opcode 0x88/0x89) or
+ // "mov mem, imm" (opcode 0xc6/0xc7) instructions.
+ // then it should be disassembled as an xrelease not rep.
+ if (byte == 0xf3 && (nextByte == 0x88 || nextByte == 0x89 ||
+ nextByte == 0xc6 || nextByte == 0xc7)) {
+ insn->xAcquireRelease = true;
+ break;
+ }
+ if (isREX(insn, nextByte)) {
+ uint8_t nnextByte;
+ // Go to REX prefix after the current one
+ if (consume(insn, nnextByte))
+ return -1;
+ // We should be able to read next byte after REX prefix
+ if (peek(insn, nnextByte))
+ return -1;
+ --insn->readerCursor;
+ }
+ }
+
+ switch (byte) {
+ case 0xf0: // LOCK
+ insn->hasLockPrefix = true;
+ break;
+ case 0xf2: // REPNE/REPNZ
+ case 0xf3: { // REP or REPE/REPZ
+ uint8_t nextByte;
+ if (peek(insn, nextByte))
+ break;
+ // TODO:
+ // 1. There could be several 0x66
+ // 2. if (nextByte == 0x66) and nextNextByte != 0x0f then
+ // it's not mandatory prefix
+ // 3. if (nextByte >= 0x40 && nextByte <= 0x4f) it's REX and we need
+ // 0x0f exactly after it to be mandatory prefix
+ if (isREX(insn, nextByte) || nextByte == 0x0f || nextByte == 0x66)
+ // The last of 0xf2 /0xf3 is mandatory prefix
+ insn->mandatoryPrefix = byte;
+ insn->repeatPrefix = byte;
+ break;
+ }
+ case 0x2e: // CS segment override -OR- Branch not taken
+ insn->segmentOverride = SEG_OVERRIDE_CS;
+ break;
+ case 0x36: // SS segment override -OR- Branch taken
+ insn->segmentOverride = SEG_OVERRIDE_SS;
+ break;
+ case 0x3e: // DS segment override
+ insn->segmentOverride = SEG_OVERRIDE_DS;
+ break;
+ case 0x26: // ES segment override
+ insn->segmentOverride = SEG_OVERRIDE_ES;
+ break;
+ case 0x64: // FS segment override
+ insn->segmentOverride = SEG_OVERRIDE_FS;
+ break;
+ case 0x65: // GS segment override
+ insn->segmentOverride = SEG_OVERRIDE_GS;
+ break;
+ case 0x66: { // Operand-size override {
+ uint8_t nextByte;
+ insn->hasOpSize = true;
+ if (peek(insn, nextByte))
+ break;
+ // 0x66 can't overwrite existing mandatory prefix and should be ignored
+ if (!insn->mandatoryPrefix && (nextByte == 0x0f || isREX(insn, nextByte)))
+ insn->mandatoryPrefix = byte;
+ break;
+ }
+ case 0x67: // Address-size override
+ insn->hasAdSize = true;
+ break;
+ default: // Not a prefix byte
+ isPrefix = false;
+ break;
+ }
+
+ if (isPrefix)
+ LLVM_DEBUG(dbgs() << format("Found prefix 0x%hhx", byte));
+ }
+
+ insn->vectorExtensionType = TYPE_NO_VEX_XOP;
+
+ if (byte == 0x62) {
+ uint8_t byte1, byte2;
+ if (consume(insn, byte1)) {
+ LLVM_DEBUG(dbgs() << "Couldn't read second byte of EVEX prefix");
+ return -1;
+ }
+
+ if (peek(insn, byte2)) {
+ LLVM_DEBUG(dbgs() << "Couldn't read third byte of EVEX prefix");
+ return -1;
+ }
+
+ if ((insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) &&
+ ((~byte1 & 0xc) == 0xc) && ((byte2 & 0x4) == 0x4)) {
+ insn->vectorExtensionType = TYPE_EVEX;
+ } else {
+ --insn->readerCursor; // unconsume byte1
+ --insn->readerCursor; // unconsume byte
+ }
+
+ if (insn->vectorExtensionType == TYPE_EVEX) {
+ insn->vectorExtensionPrefix[0] = byte;
+ insn->vectorExtensionPrefix[1] = byte1;
+ if (consume(insn, insn->vectorExtensionPrefix[2])) {
+ LLVM_DEBUG(dbgs() << "Couldn't read third byte of EVEX prefix");
+ return -1;
+ }
+ if (consume(insn, insn->vectorExtensionPrefix[3])) {
+ LLVM_DEBUG(dbgs() << "Couldn't read fourth byte of EVEX prefix");
+ return -1;
+ }
+
+ // We simulate the REX prefix for simplicity's sake
+ if (insn->mode == MODE_64BIT) {
+ insn->rexPrefix = 0x40 |
+ (wFromEVEX3of4(insn->vectorExtensionPrefix[2]) << 3) |
+ (rFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 2) |
+ (xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 1) |
+ (bFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 0);
+ }
+
+ LLVM_DEBUG(
+ dbgs() << format(
+ "Found EVEX prefix 0x%hhx 0x%hhx 0x%hhx 0x%hhx",
+ insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1],
+ insn->vectorExtensionPrefix[2], insn->vectorExtensionPrefix[3]));
+ }
+ } else if (byte == 0xc4) {
+ uint8_t byte1;
+ if (peek(insn, byte1)) {
+ LLVM_DEBUG(dbgs() << "Couldn't read second byte of VEX");
+ return -1;
+ }
+
+ if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0)
+ insn->vectorExtensionType = TYPE_VEX_3B;
+ else
+ --insn->readerCursor;
+
+ if (insn->vectorExtensionType == TYPE_VEX_3B) {
+ insn->vectorExtensionPrefix[0] = byte;
+ consume(insn, insn->vectorExtensionPrefix[1]);
+ consume(insn, insn->vectorExtensionPrefix[2]);
+
+ // We simulate the REX prefix for simplicity's sake
+
+ if (insn->mode == MODE_64BIT)
+ insn->rexPrefix = 0x40 |
+ (wFromVEX3of3(insn->vectorExtensionPrefix[2]) << 3) |
+ (rFromVEX2of3(insn->vectorExtensionPrefix[1]) << 2) |
+ (xFromVEX2of3(insn->vectorExtensionPrefix[1]) << 1) |
+ (bFromVEX2of3(insn->vectorExtensionPrefix[1]) << 0);
+
+ LLVM_DEBUG(dbgs() << format("Found VEX prefix 0x%hhx 0x%hhx 0x%hhx",
+ insn->vectorExtensionPrefix[0],
+ insn->vectorExtensionPrefix[1],
+ insn->vectorExtensionPrefix[2]));
+ }
+ } else if (byte == 0xc5) {
+ uint8_t byte1;
+ if (peek(insn, byte1)) {
+ LLVM_DEBUG(dbgs() << "Couldn't read second byte of VEX");
+ return -1;
+ }
+
+ if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0)
+ insn->vectorExtensionType = TYPE_VEX_2B;
+ else
+ --insn->readerCursor;
+
+ if (insn->vectorExtensionType == TYPE_VEX_2B) {
+ insn->vectorExtensionPrefix[0] = byte;
+ consume(insn, insn->vectorExtensionPrefix[1]);
+
+ if (insn->mode == MODE_64BIT)
+ insn->rexPrefix =
+ 0x40 | (rFromVEX2of2(insn->vectorExtensionPrefix[1]) << 2);
+
+ switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) {
+ default:
+ break;
+ case VEX_PREFIX_66:
+ insn->hasOpSize = true;
+ break;
+ }
+
+ LLVM_DEBUG(dbgs() << format("Found VEX prefix 0x%hhx 0x%hhx",
+ insn->vectorExtensionPrefix[0],
+ insn->vectorExtensionPrefix[1]));
+ }
+ } else if (byte == 0x8f) {
+ uint8_t byte1;
+ if (peek(insn, byte1)) {
+ LLVM_DEBUG(dbgs() << "Couldn't read second byte of XOP");
+ return -1;
+ }
+
+ if ((byte1 & 0x38) != 0x0) // 0 in these 3 bits is a POP instruction.
+ insn->vectorExtensionType = TYPE_XOP;
+ else
+ --insn->readerCursor;
+
+ if (insn->vectorExtensionType == TYPE_XOP) {
+ insn->vectorExtensionPrefix[0] = byte;
+ consume(insn, insn->vectorExtensionPrefix[1]);
+ consume(insn, insn->vectorExtensionPrefix[2]);
+
+ // We simulate the REX prefix for simplicity's sake
+
+ if (insn->mode == MODE_64BIT)
+ insn->rexPrefix = 0x40 |
+ (wFromXOP3of3(insn->vectorExtensionPrefix[2]) << 3) |
+ (rFromXOP2of3(insn->vectorExtensionPrefix[1]) << 2) |
+ (xFromXOP2of3(insn->vectorExtensionPrefix[1]) << 1) |
+ (bFromXOP2of3(insn->vectorExtensionPrefix[1]) << 0);
+
+ switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) {
+ default:
+ break;
+ case VEX_PREFIX_66:
+ insn->hasOpSize = true;
+ break;
+ }
+
+ LLVM_DEBUG(dbgs() << format("Found XOP prefix 0x%hhx 0x%hhx 0x%hhx",
+ insn->vectorExtensionPrefix[0],
+ insn->vectorExtensionPrefix[1],
+ insn->vectorExtensionPrefix[2]));
+ }
+ } else if (isREX(insn, byte)) {
+ if (peek(insn, nextByte))
+ return -1;
+ insn->rexPrefix = byte;
+ LLVM_DEBUG(dbgs() << format("Found REX prefix 0x%hhx", byte));
+ } else
+ --insn->readerCursor;
+
+ if (insn->mode == MODE_16BIT) {
+ insn->registerSize = (insn->hasOpSize ? 4 : 2);
+ insn->addressSize = (insn->hasAdSize ? 4 : 2);
+ insn->displacementSize = (insn->hasAdSize ? 4 : 2);
+ insn->immediateSize = (insn->hasOpSize ? 4 : 2);
+ } else if (insn->mode == MODE_32BIT) {
+ insn->registerSize = (insn->hasOpSize ? 2 : 4);
+ insn->addressSize = (insn->hasAdSize ? 2 : 4);
+ insn->displacementSize = (insn->hasAdSize ? 2 : 4);
+ insn->immediateSize = (insn->hasOpSize ? 2 : 4);
+ } else if (insn->mode == MODE_64BIT) {
+ if (insn->rexPrefix && wFromREX(insn->rexPrefix)) {
+ insn->registerSize = 8;
+ insn->addressSize = (insn->hasAdSize ? 4 : 8);
+ insn->displacementSize = 4;
+ insn->immediateSize = 4;
+ } else {
+ insn->registerSize = (insn->hasOpSize ? 2 : 4);
+ insn->addressSize = (insn->hasAdSize ? 4 : 8);
+ insn->displacementSize = (insn->hasOpSize ? 2 : 4);
+ insn->immediateSize = (insn->hasOpSize ? 2 : 4);
+ }
+ }
+
+ return 0;
+}
+
+// Consumes the SIB byte to determine addressing information.
+static int readSIB(struct InternalInstruction *insn) {
+ SIBBase sibBaseBase = SIB_BASE_NONE;
+ uint8_t index, base;
+
+ LLVM_DEBUG(dbgs() << "readSIB()");
+ switch (insn->addressSize) {
+ case 2:
+ default:
+ llvm_unreachable("SIB-based addressing doesn't work in 16-bit mode");
+ case 4:
+ insn->sibIndexBase = SIB_INDEX_EAX;
+ sibBaseBase = SIB_BASE_EAX;
+ break;
+ case 8:
+ insn->sibIndexBase = SIB_INDEX_RAX;
+ sibBaseBase = SIB_BASE_RAX;
+ break;
+ }
+
+ if (consume(insn, insn->sib))
+ return -1;
+
+ index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3);
+
+ if (index == 0x4) {
+ insn->sibIndex = SIB_INDEX_NONE;
+ } else {
+ insn->sibIndex = (SIBIndex)(insn->sibIndexBase + index);
+ }
+
+ insn->sibScale = 1 << scaleFromSIB(insn->sib);
+
+ base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3);
+
+ switch (base) {
+ case 0x5:
+ case 0xd:
+ switch (modFromModRM(insn->modRM)) {
+ case 0x0:
+ insn->eaDisplacement = EA_DISP_32;
+ insn->sibBase = SIB_BASE_NONE;
+ break;
+ case 0x1:
+ insn->eaDisplacement = EA_DISP_8;
+ insn->sibBase = (SIBBase)(sibBaseBase + base);
+ break;
+ case 0x2:
+ insn->eaDisplacement = EA_DISP_32;
+ insn->sibBase = (SIBBase)(sibBaseBase + base);
+ break;
+ default:
+ llvm_unreachable("Cannot have Mod = 0b11 and a SIB byte");
+ }
+ break;
+ default:
+ insn->sibBase = (SIBBase)(sibBaseBase + base);
+ break;
+ }
+
+ return 0;
+}
+
+static int readDisplacement(struct InternalInstruction *insn) {
+ int8_t d8;
+ int16_t d16;
+ int32_t d32;
+ LLVM_DEBUG(dbgs() << "readDisplacement()");
+
+ insn->displacementOffset = insn->readerCursor - insn->startLocation;
+ switch (insn->eaDisplacement) {
+ case EA_DISP_NONE:
+ break;
+ case EA_DISP_8:
+ if (consume(insn, d8))
+ return -1;
+ insn->displacement = d8;
+ break;
+ case EA_DISP_16:
+ if (consume(insn, d16))
+ return -1;
+ insn->displacement = d16;
+ break;
+ case EA_DISP_32:
+ if (consume(insn, d32))
+ return -1;
+ insn->displacement = d32;
+ break;
+ }
+
+ return 0;
+}
+
+// Consumes all addressing information (ModR/M byte, SIB byte, and displacement.
+static int readModRM(struct InternalInstruction *insn) {
+ uint8_t mod, rm, reg, evexrm;
+ LLVM_DEBUG(dbgs() << "readModRM()");
+
+ if (insn->consumedModRM)
+ return 0;
+
+ if (consume(insn, insn->modRM))
+ return -1;
+ insn->consumedModRM = true;
+
+ mod = modFromModRM(insn->modRM);
+ rm = rmFromModRM(insn->modRM);
+ reg = regFromModRM(insn->modRM);
+
+ // This goes by insn->registerSize to pick the correct register, which messes
+ // up if we're using (say) XMM or 8-bit register operands. That gets fixed in
+ // fixupReg().
+ switch (insn->registerSize) {
+ case 2:
+ insn->regBase = MODRM_REG_AX;
+ insn->eaRegBase = EA_REG_AX;
+ break;
+ case 4:
+ insn->regBase = MODRM_REG_EAX;
+ insn->eaRegBase = EA_REG_EAX;
+ break;
+ case 8:
+ insn->regBase = MODRM_REG_RAX;
+ insn->eaRegBase = EA_REG_RAX;
+ break;
+ }
+
+ reg |= rFromREX(insn->rexPrefix) << 3;
+ rm |= bFromREX(insn->rexPrefix) << 3;
+
+ evexrm = 0;
+ if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT) {
+ reg |= r2FromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4;
+ evexrm = xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4;
+ }
+
+ insn->reg = (Reg)(insn->regBase + reg);
+
+ switch (insn->addressSize) {
+ case 2: {
+ EABase eaBaseBase = EA_BASE_BX_SI;
+
+ switch (mod) {
+ case 0x0:
+ if (rm == 0x6) {
+ insn->eaBase = EA_BASE_NONE;
+ insn->eaDisplacement = EA_DISP_16;
+ if (readDisplacement(insn))
+ return -1;
+ } else {
+ insn->eaBase = (EABase)(eaBaseBase + rm);
+ insn->eaDisplacement = EA_DISP_NONE;
+ }
+ break;
+ case 0x1:
+ insn->eaBase = (EABase)(eaBaseBase + rm);
+ insn->eaDisplacement = EA_DISP_8;
+ insn->displacementSize = 1;
+ if (readDisplacement(insn))
+ return -1;
+ break;
+ case 0x2:
+ insn->eaBase = (EABase)(eaBaseBase + rm);
+ insn->eaDisplacement = EA_DISP_16;
+ if (readDisplacement(insn))
+ return -1;
+ break;
+ case 0x3:
+ insn->eaBase = (EABase)(insn->eaRegBase + rm);
+ if (readDisplacement(insn))
+ return -1;
+ break;
+ }
+ break;
+ }
+ case 4:
+ case 8: {
+ EABase eaBaseBase = (insn->addressSize == 4 ? EA_BASE_EAX : EA_BASE_RAX);
+
+ switch (mod) {
+ case 0x0:
+ insn->eaDisplacement = EA_DISP_NONE; // readSIB may override this
+ // In determining whether RIP-relative mode is used (rm=5),
+ // or whether a SIB byte is present (rm=4),
+ // the extension bits (REX.b and EVEX.x) are ignored.
+ switch (rm & 7) {
+ case 0x4: // SIB byte is present
+ insn->eaBase = (insn->addressSize == 4 ? EA_BASE_sib : EA_BASE_sib64);
+ if (readSIB(insn) || readDisplacement(insn))
+ return -1;
+ break;
+ case 0x5: // RIP-relative
+ insn->eaBase = EA_BASE_NONE;
+ insn->eaDisplacement = EA_DISP_32;
+ if (readDisplacement(insn))
+ return -1;
+ break;
+ default:
+ insn->eaBase = (EABase)(eaBaseBase + rm);
+ break;
+ }
+ break;
+ case 0x1:
+ insn->displacementSize = 1;
+ LLVM_FALLTHROUGH;
+ case 0x2:
+ insn->eaDisplacement = (mod == 0x1 ? EA_DISP_8 : EA_DISP_32);
+ switch (rm & 7) {
+ case 0x4: // SIB byte is present
+ insn->eaBase = EA_BASE_sib;
+ if (readSIB(insn) || readDisplacement(insn))
+ return -1;
+ break;
+ default:
+ insn->eaBase = (EABase)(eaBaseBase + rm);
+ if (readDisplacement(insn))
+ return -1;
+ break;
+ }
+ break;
+ case 0x3:
+ insn->eaDisplacement = EA_DISP_NONE;
+ insn->eaBase = (EABase)(insn->eaRegBase + rm + evexrm);
+ break;
+ }
+ break;
+ }
+ } // switch (insn->addressSize)
+
+ return 0;
+}
+
+#define GENERIC_FIXUP_FUNC(name, base, prefix, mask) \
+ static uint16_t name(struct InternalInstruction *insn, OperandType type, \
+ uint8_t index, uint8_t *valid) { \
+ *valid = 1; \
+ switch (type) { \
+ default: \
+ debug("Unhandled register type"); \
+ *valid = 0; \
+ return 0; \
+ case TYPE_Rv: \
+ return base + index; \
+ case TYPE_R8: \
+ index &= mask; \
+ if (index > 0xf) \
+ *valid = 0; \
+ if (insn->rexPrefix && index >= 4 && index <= 7) { \
+ return prefix##_SPL + (index - 4); \
+ } else { \
+ return prefix##_AL + index; \
+ } \
+ case TYPE_R16: \
+ index &= mask; \
+ if (index > 0xf) \
+ *valid = 0; \
+ return prefix##_AX + index; \
+ case TYPE_R32: \
+ index &= mask; \
+ if (index > 0xf) \
+ *valid = 0; \
+ return prefix##_EAX + index; \
+ case TYPE_R64: \
+ index &= mask; \
+ if (index > 0xf) \
+ *valid = 0; \
+ return prefix##_RAX + index; \
+ case TYPE_ZMM: \
+ return prefix##_ZMM0 + index; \
+ case TYPE_YMM: \
+ return prefix##_YMM0 + index; \
+ case TYPE_XMM: \
+ return prefix##_XMM0 + index; \
+ case TYPE_VK: \
+ index &= 0xf; \
+ if (index > 7) \
+ *valid = 0; \
+ return prefix##_K0 + index; \
+ case TYPE_VK_PAIR: \
+ if (index > 7) \
+ *valid = 0; \
+ return prefix##_K0_K1 + (index / 2); \
+ case TYPE_MM64: \
+ return prefix##_MM0 + (index & 0x7); \
+ case TYPE_SEGMENTREG: \
+ if ((index & 7) > 5) \
+ *valid = 0; \
+ return prefix##_ES + (index & 7); \
+ case TYPE_DEBUGREG: \
+ return prefix##_DR0 + index; \
+ case TYPE_CONTROLREG: \
+ return prefix##_CR0 + index; \
+ case TYPE_BNDR: \
+ if (index > 3) \
+ *valid = 0; \
+ return prefix##_BND0 + index; \
+ case TYPE_MVSIBX: \
+ return prefix##_XMM0 + index; \
+ case TYPE_MVSIBY: \
+ return prefix##_YMM0 + index; \
+ case TYPE_MVSIBZ: \
+ return prefix##_ZMM0 + index; \
+ } \
+ }
+
+// Consult an operand type to determine the meaning of the reg or R/M field. If
+// the operand is an XMM operand, for example, an operand would be XMM0 instead
+// of AX, which readModRM() would otherwise misinterpret it as.
+//
+// @param insn - The instruction containing the operand.
+// @param type - The operand type.
+// @param index - The existing value of the field as reported by readModRM().
+// @param valid - The address of a uint8_t. The target is set to 1 if the
+// field is valid for the register class; 0 if not.
+// @return - The proper value.
+GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase, MODRM_REG, 0x1f)
+GENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG, 0xf)
+
+// Consult an operand specifier to determine which of the fixup*Value functions
+// to use in correcting readModRM()'ss interpretation.
+//
+// @param insn - See fixup*Value().
+// @param op - The operand specifier.
+// @return - 0 if fixup was successful; -1 if the register returned was
+// invalid for its class.
+static int fixupReg(struct InternalInstruction *insn,
+ const struct OperandSpecifier *op) {
+ uint8_t valid;
+ LLVM_DEBUG(dbgs() << "fixupReg()");
+
+ switch ((OperandEncoding)op->encoding) {
+ default:
+ debug("Expected a REG or R/M encoding in fixupReg");
+ return -1;
+ case ENCODING_VVVV:
+ insn->vvvv =
+ (Reg)fixupRegValue(insn, (OperandType)op->type, insn->vvvv, &valid);
+ if (!valid)
+ return -1;
+ break;
+ case ENCODING_REG:
+ insn->reg = (Reg)fixupRegValue(insn, (OperandType)op->type,
+ insn->reg - insn->regBase, &valid);
+ if (!valid)
+ return -1;
+ break;
+ CASE_ENCODING_RM:
+ if (insn->eaBase >= insn->eaRegBase) {
+ insn->eaBase = (EABase)fixupRMValue(
+ insn, (OperandType)op->type, insn->eaBase - insn->eaRegBase, &valid);
+ if (!valid)
+ return -1;
+ }
+ break;
+ }
+
+ return 0;
+}
+
+// Read the opcode (except the ModR/M byte in the case of extended or escape
+// opcodes).
+static bool readOpcode(struct InternalInstruction *insn) {
+ uint8_t current;
+ LLVM_DEBUG(dbgs() << "readOpcode()");
+
+ insn->opcodeType = ONEBYTE;
+ if (insn->vectorExtensionType == TYPE_EVEX) {
+ switch (mmFromEVEX2of4(insn->vectorExtensionPrefix[1])) {
+ default:
+ LLVM_DEBUG(
+ dbgs() << format("Unhandled mm field for instruction (0x%hhx)",
+ mmFromEVEX2of4(insn->vectorExtensionPrefix[1])));
+ return true;
+ case VEX_LOB_0F:
+ insn->opcodeType = TWOBYTE;
+ return consume(insn, insn->opcode);
+ case VEX_LOB_0F38:
+ insn->opcodeType = THREEBYTE_38;
+ return consume(insn, insn->opcode);
+ case VEX_LOB_0F3A:
+ insn->opcodeType = THREEBYTE_3A;
+ return consume(insn, insn->opcode);
+ }
+ } else if (insn->vectorExtensionType == TYPE_VEX_3B) {
+ switch (mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1])) {
+ default:
+ LLVM_DEBUG(
+ dbgs() << format("Unhandled m-mmmm field for instruction (0x%hhx)",
+ mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1])));
+ return true;
+ case VEX_LOB_0F:
+ insn->opcodeType = TWOBYTE;
+ return consume(insn, insn->opcode);
+ case VEX_LOB_0F38:
+ insn->opcodeType = THREEBYTE_38;
+ return consume(insn, insn->opcode);
+ case VEX_LOB_0F3A:
+ insn->opcodeType = THREEBYTE_3A;
+ return consume(insn, insn->opcode);
+ }
+ } else if (insn->vectorExtensionType == TYPE_VEX_2B) {
+ insn->opcodeType = TWOBYTE;
+ return consume(insn, insn->opcode);
+ } else if (insn->vectorExtensionType == TYPE_XOP) {
+ switch (mmmmmFromXOP2of3(insn->vectorExtensionPrefix[1])) {
+ default:
+ LLVM_DEBUG(
+ dbgs() << format("Unhandled m-mmmm field for instruction (0x%hhx)",
+ mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1])));
+ return true;
+ case XOP_MAP_SELECT_8:
+ insn->opcodeType = XOP8_MAP;
+ return consume(insn, insn->opcode);
+ case XOP_MAP_SELECT_9:
+ insn->opcodeType = XOP9_MAP;
+ return consume(insn, insn->opcode);
+ case XOP_MAP_SELECT_A:
+ insn->opcodeType = XOPA_MAP;
+ return consume(insn, insn->opcode);
+ }
+ }
+
+ if (consume(insn, current))
+ return true;
+
+ if (current == 0x0f) {
+ LLVM_DEBUG(
+ dbgs() << format("Found a two-byte escape prefix (0x%hhx)", current));
+ if (consume(insn, current))
+ return true;
+
+ if (current == 0x38) {
+ LLVM_DEBUG(dbgs() << format("Found a three-byte escape prefix (0x%hhx)",
+ current));
+ if (consume(insn, current))
+ return true;
+
+ insn->opcodeType = THREEBYTE_38;
+ } else if (current == 0x3a) {
+ LLVM_DEBUG(dbgs() << format("Found a three-byte escape prefix (0x%hhx)",
+ current));
+ if (consume(insn, current))
+ return true;
+
+ insn->opcodeType = THREEBYTE_3A;
+ } else if (current == 0x0f) {
+ LLVM_DEBUG(
+ dbgs() << format("Found a 3dnow escape prefix (0x%hhx)", current));
+
+ // Consume operands before the opcode to comply with the 3DNow encoding
+ if (readModRM(insn))
+ return true;
+
+ if (consume(insn, current))
+ return true;
+
+ insn->opcodeType = THREEDNOW_MAP;
+ } else {
+ LLVM_DEBUG(dbgs() << "Didn't find a three-byte escape prefix");
+ insn->opcodeType = TWOBYTE;
+ }
+ } else if (insn->mandatoryPrefix)
+ // The opcode with mandatory prefix must start with opcode escape.
+ // If not it's legacy repeat prefix
+ insn->mandatoryPrefix = 0;
+
+ // At this point we have consumed the full opcode.
+ // Anything we consume from here on must be unconsumed.
+ insn->opcode = current;
+
+ return false;
+}
+
+// Determine whether equiv is the 16-bit equivalent of orig (32-bit or 64-bit).
+static bool is16BitEquivalent(const char *orig, const char *equiv) {
+ for (int i = 0;; i++) {
+ if (orig[i] == '\0' && equiv[i] == '\0')
+ return true;
+ if (orig[i] == '\0' || equiv[i] == '\0')
+ return false;
+ if (orig[i] != equiv[i]) {
+ if ((orig[i] == 'Q' || orig[i] == 'L') && equiv[i] == 'W')
+ continue;
+ if ((orig[i] == '6' || orig[i] == '3') && equiv[i] == '1')
+ continue;
+ if ((orig[i] == '4' || orig[i] == '2') && equiv[i] == '6')
+ continue;
+ return false;
+ }
+ }
+}
+
+// Determine whether this instruction is a 64-bit instruction.
+static bool is64Bit(const char *name) {
+ for (int i = 0;; ++i) {
+ if (name[i] == '\0')
+ return false;
+ if (name[i] == '6' && name[i + 1] == '4')
+ return true;
+ }
+}
+
+// Determine the ID of an instruction, consuming the ModR/M byte as appropriate
+// for extended and escape opcodes, and using a supplied attribute mask.
+static int getInstructionIDWithAttrMask(uint16_t *instructionID,
+ struct InternalInstruction *insn,
+ uint16_t attrMask) {
+ auto insnCtx = InstructionContext(x86DisassemblerContexts[attrMask]);
+ const ContextDecision *decision;
+ switch (insn->opcodeType) {
+ case ONEBYTE:
+ decision = &ONEBYTE_SYM;
+ break;
+ case TWOBYTE:
+ decision = &TWOBYTE_SYM;
+ break;
+ case THREEBYTE_38:
+ decision = &THREEBYTE38_SYM;
+ break;
+ case THREEBYTE_3A:
+ decision = &THREEBYTE3A_SYM;
+ break;
+ case XOP8_MAP:
+ decision = &XOP8_MAP_SYM;
+ break;
+ case XOP9_MAP:
+ decision = &XOP9_MAP_SYM;
+ break;
+ case XOPA_MAP:
+ decision = &XOPA_MAP_SYM;
+ break;
+ case THREEDNOW_MAP:
+ decision = &THREEDNOW_MAP_SYM;
+ break;
+ }
+
+ if (decision->opcodeDecisions[insnCtx]
+ .modRMDecisions[insn->opcode]
+ .modrm_type != MODRM_ONEENTRY) {
+ if (readModRM(insn))
+ return -1;
+ *instructionID =
+ decode(insn->opcodeType, insnCtx, insn->opcode, insn->modRM);
+ } else {
+ *instructionID = decode(insn->opcodeType, insnCtx, insn->opcode, 0);
+ }
+
+ return 0;
+}
+
+// Determine the ID of an instruction, consuming the ModR/M byte as appropriate
+// for extended and escape opcodes. Determines the attributes and context for
+// the instruction before doing so.
+static int getInstructionID(struct InternalInstruction *insn,
+ const MCInstrInfo *mii) {
+ uint16_t attrMask;
+ uint16_t instructionID;
+
+ LLVM_DEBUG(dbgs() << "getID()");
+
+ attrMask = ATTR_NONE;
+
+ if (insn->mode == MODE_64BIT)
+ attrMask |= ATTR_64BIT;
+
+ if (insn->vectorExtensionType != TYPE_NO_VEX_XOP) {
+ attrMask |= (insn->vectorExtensionType == TYPE_EVEX) ? ATTR_EVEX : ATTR_VEX;
+
+ if (insn->vectorExtensionType == TYPE_EVEX) {
+ switch (ppFromEVEX3of4(insn->vectorExtensionPrefix[2])) {
+ case VEX_PREFIX_66:
+ attrMask |= ATTR_OPSIZE;
+ break;
+ case VEX_PREFIX_F3:
+ attrMask |= ATTR_XS;
+ break;
+ case VEX_PREFIX_F2:
+ attrMask |= ATTR_XD;
+ break;
+ }
+
+ if (zFromEVEX4of4(insn->vectorExtensionPrefix[3]))
+ attrMask |= ATTR_EVEXKZ;
+ if (bFromEVEX4of4(insn->vectorExtensionPrefix[3]))
+ attrMask |= ATTR_EVEXB;
+ if (aaaFromEVEX4of4(insn->vectorExtensionPrefix[3]))
+ attrMask |= ATTR_EVEXK;
+ if (lFromEVEX4of4(insn->vectorExtensionPrefix[3]))
+ attrMask |= ATTR_VEXL;
+ if (l2FromEVEX4of4(insn->vectorExtensionPrefix[3]))
+ attrMask |= ATTR_EVEXL2;
+ } else if (insn->vectorExtensionType == TYPE_VEX_3B) {
+ switch (ppFromVEX3of3(insn->vectorExtensionPrefix[2])) {
+ case VEX_PREFIX_66:
+ attrMask |= ATTR_OPSIZE;
+ break;
+ case VEX_PREFIX_F3:
+ attrMask |= ATTR_XS;
+ break;
+ case VEX_PREFIX_F2:
+ attrMask |= ATTR_XD;
+ break;
+ }
+
+ if (lFromVEX3of3(insn->vectorExtensionPrefix[2]))
+ attrMask |= ATTR_VEXL;
+ } else if (insn->vectorExtensionType == TYPE_VEX_2B) {
+ switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) {
+ case VEX_PREFIX_66:
+ attrMask |= ATTR_OPSIZE;
+ break;
+ case VEX_PREFIX_F3:
+ attrMask |= ATTR_XS;
+ break;
+ case VEX_PREFIX_F2:
+ attrMask |= ATTR_XD;
+ break;
+ }
+
+ if (lFromVEX2of2(insn->vectorExtensionPrefix[1]))
+ attrMask |= ATTR_VEXL;
+ } else if (insn->vectorExtensionType == TYPE_XOP) {
+ switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) {
+ case VEX_PREFIX_66:
+ attrMask |= ATTR_OPSIZE;
+ break;
+ case VEX_PREFIX_F3:
+ attrMask |= ATTR_XS;
+ break;
+ case VEX_PREFIX_F2:
+ attrMask |= ATTR_XD;
+ break;
+ }
+
+ if (lFromXOP3of3(insn->vectorExtensionPrefix[2]))
+ attrMask |= ATTR_VEXL;
+ } else {
+ return -1;
+ }
+ } else if (!insn->mandatoryPrefix) {
+ // If we don't have mandatory prefix we should use legacy prefixes here
+ if (insn->hasOpSize && (insn->mode != MODE_16BIT))
+ attrMask |= ATTR_OPSIZE;
+ if (insn->hasAdSize)
+ attrMask |= ATTR_ADSIZE;
+ if (insn->opcodeType == ONEBYTE) {
+ if (insn->repeatPrefix == 0xf3 && (insn->opcode == 0x90))
+ // Special support for PAUSE
+ attrMask |= ATTR_XS;
+ } else {
+ if (insn->repeatPrefix == 0xf2)
+ attrMask |= ATTR_XD;
+ else if (insn->repeatPrefix == 0xf3)
+ attrMask |= ATTR_XS;
+ }
+ } else {
+ switch (insn->mandatoryPrefix) {
+ case 0xf2:
+ attrMask |= ATTR_XD;
+ break;
+ case 0xf3:
+ attrMask |= ATTR_XS;
+ break;
+ case 0x66:
+ if (insn->mode != MODE_16BIT)
+ attrMask |= ATTR_OPSIZE;
+ break;
+ case 0x67:
+ attrMask |= ATTR_ADSIZE;
+ break;
+ }
+ }
+
+ if (insn->rexPrefix & 0x08) {
+ attrMask |= ATTR_REXW;
+ attrMask &= ~ATTR_ADSIZE;
+ }
+
+ if (insn->mode == MODE_16BIT) {
+ // JCXZ/JECXZ need special handling for 16-bit mode because the meaning
+ // of the AdSize prefix is inverted w.r.t. 32-bit mode.
+ if (insn->opcodeType == ONEBYTE && insn->opcode == 0xE3)
+ attrMask ^= ATTR_ADSIZE;
+ // If we're in 16-bit mode and this is one of the relative jumps and opsize
+ // prefix isn't present, we need to force the opsize attribute since the
+ // prefix is inverted relative to 32-bit mode.
+ if (!insn->hasOpSize && insn->opcodeType == ONEBYTE &&
+ (insn->opcode == 0xE8 || insn->opcode == 0xE9))
+ attrMask |= ATTR_OPSIZE;
+
+ if (!insn->hasOpSize && insn->opcodeType == TWOBYTE &&
+ insn->opcode >= 0x80 && insn->opcode <= 0x8F)
+ attrMask |= ATTR_OPSIZE;
+ }
+
+
+ if (getInstructionIDWithAttrMask(&instructionID, insn, attrMask))
+ return -1;
+
+ // The following clauses compensate for limitations of the tables.
+
+ if (insn->mode != MODE_64BIT &&
+ insn->vectorExtensionType != TYPE_NO_VEX_XOP) {
+ // The tables can't distinquish between cases where the W-bit is used to
+ // select register size and cases where its a required part of the opcode.
+ if ((insn->vectorExtensionType == TYPE_EVEX &&
+ wFromEVEX3of4(insn->vectorExtensionPrefix[2])) ||
+ (insn->vectorExtensionType == TYPE_VEX_3B &&
+ wFromVEX3of3(insn->vectorExtensionPrefix[2])) ||
+ (insn->vectorExtensionType == TYPE_XOP &&
+ wFromXOP3of3(insn->vectorExtensionPrefix[2]))) {
+
+ uint16_t instructionIDWithREXW;
+ if (getInstructionIDWithAttrMask(&instructionIDWithREXW, insn,
+ attrMask | ATTR_REXW)) {
+ insn->instructionID = instructionID;
+ insn->spec = &INSTRUCTIONS_SYM[instructionID];
+ return 0;
+ }
+
+ auto SpecName = mii->getName(instructionIDWithREXW);
+ // If not a 64-bit instruction. Switch the opcode.
+ if (!is64Bit(SpecName.data())) {
+ insn->instructionID = instructionIDWithREXW;
+ insn->spec = &INSTRUCTIONS_SYM[instructionIDWithREXW];
+ return 0;
+ }
+ }
+ }
+
+ // Absolute moves, umonitor, and movdir64b need special handling.
+ // -For 16-bit mode because the meaning of the AdSize and OpSize prefixes are
+ // inverted w.r.t.
+ // -For 32-bit mode we need to ensure the ADSIZE prefix is observed in
+ // any position.
+ if ((insn->opcodeType == ONEBYTE && ((insn->opcode & 0xFC) == 0xA0)) ||
+ (insn->opcodeType == TWOBYTE && (insn->opcode == 0xAE)) ||
+ (insn->opcodeType == THREEBYTE_38 && insn->opcode == 0xF8)) {
+ // Make sure we observed the prefixes in any position.
+ if (insn->hasAdSize)
+ attrMask |= ATTR_ADSIZE;
+ if (insn->hasOpSize)
+ attrMask |= ATTR_OPSIZE;
+
+ // In 16-bit, invert the attributes.
+ if (insn->mode == MODE_16BIT) {
+ attrMask ^= ATTR_ADSIZE;
+
+ // The OpSize attribute is only valid with the absolute moves.
+ if (insn->opcodeType == ONEBYTE && ((insn->opcode & 0xFC) == 0xA0))
+ attrMask ^= ATTR_OPSIZE;
+ }
+
+ if (getInstructionIDWithAttrMask(&instructionID, insn, attrMask))
+ return -1;
+
+ insn->instructionID = instructionID;
+ insn->spec = &INSTRUCTIONS_SYM[instructionID];
+ return 0;
+ }
+
+ if ((insn->mode == MODE_16BIT || insn->hasOpSize) &&
+ !(attrMask & ATTR_OPSIZE)) {
+ // The instruction tables make no distinction between instructions that
+ // allow OpSize anywhere (i.e., 16-bit operations) and that need it in a
+ // particular spot (i.e., many MMX operations). In general we're
+ // conservative, but in the specific case where OpSize is present but not in
+ // the right place we check if there's a 16-bit operation.
+ const struct InstructionSpecifier *spec;
+ uint16_t instructionIDWithOpsize;
+ llvm::StringRef specName, specWithOpSizeName;
+
+ spec = &INSTRUCTIONS_SYM[instructionID];
+
+ if (getInstructionIDWithAttrMask(&instructionIDWithOpsize, insn,
+ attrMask | ATTR_OPSIZE)) {
+ // ModRM required with OpSize but not present. Give up and return the
+ // version without OpSize set.
+ insn->instructionID = instructionID;
+ insn->spec = spec;
+ return 0;
+ }
+
+ specName = mii->getName(instructionID);
+ specWithOpSizeName = mii->getName(instructionIDWithOpsize);
+
+ if (is16BitEquivalent(specName.data(), specWithOpSizeName.data()) &&
+ (insn->mode == MODE_16BIT) ^ insn->hasOpSize) {
+ insn->instructionID = instructionIDWithOpsize;
+ insn->spec = &INSTRUCTIONS_SYM[instructionIDWithOpsize];
+ } else {
+ insn->instructionID = instructionID;
+ insn->spec = spec;
+ }
+ return 0;
+ }
+
+ if (insn->opcodeType == ONEBYTE && insn->opcode == 0x90 &&
+ insn->rexPrefix & 0x01) {
+ // NOOP shouldn't decode as NOOP if REX.b is set. Instead it should decode
+ // as XCHG %r8, %eax.
+ const struct InstructionSpecifier *spec;
+ uint16_t instructionIDWithNewOpcode;
+ const struct InstructionSpecifier *specWithNewOpcode;
+
+ spec = &INSTRUCTIONS_SYM[instructionID];
+
+ // Borrow opcode from one of the other XCHGar opcodes
+ insn->opcode = 0x91;
+
+ if (getInstructionIDWithAttrMask(&instructionIDWithNewOpcode, insn,
+ attrMask)) {
+ insn->opcode = 0x90;
+
+ insn->instructionID = instructionID;
+ insn->spec = spec;
+ return 0;
+ }
+
+ specWithNewOpcode = &INSTRUCTIONS_SYM[instructionIDWithNewOpcode];
+
+ // Change back
+ insn->opcode = 0x90;
+
+ insn->instructionID = instructionIDWithNewOpcode;
+ insn->spec = specWithNewOpcode;
+
+ return 0;
+ }
+
+ insn->instructionID = instructionID;
+ insn->spec = &INSTRUCTIONS_SYM[insn->instructionID];
+
+ return 0;
+}
+
+// Read an operand from the opcode field of an instruction and interprets it
+// appropriately given the operand width. Handles AddRegFrm instructions.
+//
+// @param insn - the instruction whose opcode field is to be read.
+// @param size - The width (in bytes) of the register being specified.
+// 1 means AL and friends, 2 means AX, 4 means EAX, and 8 means
+// RAX.
+// @return - 0 on success; nonzero otherwise.
+static int readOpcodeRegister(struct InternalInstruction *insn, uint8_t size) {
+ LLVM_DEBUG(dbgs() << "readOpcodeRegister()");
+
+ if (size == 0)
+ size = insn->registerSize;
+
+ switch (size) {
+ case 1:
+ insn->opcodeRegister = (Reg)(
+ MODRM_REG_AL + ((bFromREX(insn->rexPrefix) << 3) | (insn->opcode & 7)));
+ if (insn->rexPrefix && insn->opcodeRegister >= MODRM_REG_AL + 0x4 &&
+ insn->opcodeRegister < MODRM_REG_AL + 0x8) {
+ insn->opcodeRegister =
+ (Reg)(MODRM_REG_SPL + (insn->opcodeRegister - MODRM_REG_AL - 4));
+ }
+
+ break;
+ case 2:
+ insn->opcodeRegister = (Reg)(
+ MODRM_REG_AX + ((bFromREX(insn->rexPrefix) << 3) | (insn->opcode & 7)));
+ break;
+ case 4:
+ insn->opcodeRegister =
+ (Reg)(MODRM_REG_EAX +
+ ((bFromREX(insn->rexPrefix) << 3) | (insn->opcode & 7)));
+ break;
+ case 8:
+ insn->opcodeRegister =
+ (Reg)(MODRM_REG_RAX +
+ ((bFromREX(insn->rexPrefix) << 3) | (insn->opcode & 7)));
+ break;
+ }
+
+ return 0;
+}
+
+// Consume an immediate operand from an instruction, given the desired operand
+// size.
+//
+// @param insn - The instruction whose operand is to be read.
+// @param size - The width (in bytes) of the operand.
+// @return - 0 if the immediate was successfully consumed; nonzero
+// otherwise.
+static int readImmediate(struct InternalInstruction *insn, uint8_t size) {
+ uint8_t imm8;
+ uint16_t imm16;
+ uint32_t imm32;
+ uint64_t imm64;
+
+ LLVM_DEBUG(dbgs() << "readImmediate()");
+
+ assert(insn->numImmediatesConsumed < 2 && "Already consumed two immediates");
+
+ insn->immediateSize = size;
+ insn->immediateOffset = insn->readerCursor - insn->startLocation;
+
+ switch (size) {
+ case 1:
+ if (consume(insn, imm8))
+ return -1;
+ insn->immediates[insn->numImmediatesConsumed] = imm8;
+ break;
+ case 2:
+ if (consume(insn, imm16))
+ return -1;
+ insn->immediates[insn->numImmediatesConsumed] = imm16;
+ break;
+ case 4:
+ if (consume(insn, imm32))
+ return -1;
+ insn->immediates[insn->numImmediatesConsumed] = imm32;
+ break;
+ case 8:
+ if (consume(insn, imm64))
+ return -1;
+ insn->immediates[insn->numImmediatesConsumed] = imm64;
+ break;
+ default:
+ llvm_unreachable("invalid size");
+ }
+
+ insn->numImmediatesConsumed++;
+
+ return 0;
+}
+
+// Consume vvvv from an instruction if it has a VEX prefix.
+static int readVVVV(struct InternalInstruction *insn) {
+ LLVM_DEBUG(dbgs() << "readVVVV()");
+
+ int vvvv;
+ if (insn->vectorExtensionType == TYPE_EVEX)
+ vvvv = (v2FromEVEX4of4(insn->vectorExtensionPrefix[3]) << 4 |
+ vvvvFromEVEX3of4(insn->vectorExtensionPrefix[2]));
+ else if (insn->vectorExtensionType == TYPE_VEX_3B)
+ vvvv = vvvvFromVEX3of3(insn->vectorExtensionPrefix[2]);
+ else if (insn->vectorExtensionType == TYPE_VEX_2B)
+ vvvv = vvvvFromVEX2of2(insn->vectorExtensionPrefix[1]);
+ else if (insn->vectorExtensionType == TYPE_XOP)
+ vvvv = vvvvFromXOP3of3(insn->vectorExtensionPrefix[2]);
+ else
+ return -1;
+
+ if (insn->mode != MODE_64BIT)
+ vvvv &= 0xf; // Can only clear bit 4. Bit 3 must be cleared later.
+
+ insn->vvvv = static_cast<Reg>(vvvv);
+ return 0;
+}
+
+// Read an mask register from the opcode field of an instruction.
+//
+// @param insn - The instruction whose opcode field is to be read.
+// @return - 0 on success; nonzero otherwise.
+static int readMaskRegister(struct InternalInstruction *insn) {
+ LLVM_DEBUG(dbgs() << "readMaskRegister()");
+
+ if (insn->vectorExtensionType != TYPE_EVEX)
+ return -1;
+
+ insn->writemask =
+ static_cast<Reg>(aaaFromEVEX4of4(insn->vectorExtensionPrefix[3]));
+ return 0;
+}
+
+// Consults the specifier for an instruction and consumes all
+// operands for that instruction, interpreting them as it goes.
+static int readOperands(struct InternalInstruction *insn) {
+ int hasVVVV, needVVVV;
+ int sawRegImm = 0;
+
+ LLVM_DEBUG(dbgs() << "readOperands()");
+
+ // If non-zero vvvv specified, make sure one of the operands uses it.
+ hasVVVV = !readVVVV(insn);
+ needVVVV = hasVVVV && (insn->vvvv != 0);
+
+ for (const auto &Op : x86OperandSets[insn->spec->operands]) {
+ switch (Op.encoding) {
+ case ENCODING_NONE:
+ case ENCODING_SI:
+ case ENCODING_DI:
+ break;
+ CASE_ENCODING_VSIB:
+ // VSIB can use the V2 bit so check only the other bits.
+ if (needVVVV)
+ needVVVV = hasVVVV & ((insn->vvvv & 0xf) != 0);
+ if (readModRM(insn))
+ return -1;
+
+ // Reject if SIB wasn't used.
+ if (insn->eaBase != EA_BASE_sib && insn->eaBase != EA_BASE_sib64)
+ return -1;
+
+ // If sibIndex was set to SIB_INDEX_NONE, index offset is 4.
+ if (insn->sibIndex == SIB_INDEX_NONE)
+ insn->sibIndex = (SIBIndex)(insn->sibIndexBase + 4);
+
+ // If EVEX.v2 is set this is one of the 16-31 registers.
+ if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT &&
+ v2FromEVEX4of4(insn->vectorExtensionPrefix[3]))
+ insn->sibIndex = (SIBIndex)(insn->sibIndex + 16);
+
+ // Adjust the index register to the correct size.
+ switch ((OperandType)Op.type) {
+ default:
+ debug("Unhandled VSIB index type");
+ return -1;
+ case TYPE_MVSIBX:
+ insn->sibIndex =
+ (SIBIndex)(SIB_INDEX_XMM0 + (insn->sibIndex - insn->sibIndexBase));
+ break;
+ case TYPE_MVSIBY:
+ insn->sibIndex =
+ (SIBIndex)(SIB_INDEX_YMM0 + (insn->sibIndex - insn->sibIndexBase));
+ break;
+ case TYPE_MVSIBZ:
+ insn->sibIndex =
+ (SIBIndex)(SIB_INDEX_ZMM0 + (insn->sibIndex - insn->sibIndexBase));
+ break;
+ }
+
+ // Apply the AVX512 compressed displacement scaling factor.
+ if (Op.encoding != ENCODING_REG && insn->eaDisplacement == EA_DISP_8)
+ insn->displacement *= 1 << (Op.encoding - ENCODING_VSIB);
+ break;
+ case ENCODING_REG:
+ CASE_ENCODING_RM:
+ if (readModRM(insn))
+ return -1;
+ if (fixupReg(insn, &Op))
+ return -1;
+ // Apply the AVX512 compressed displacement scaling factor.
+ if (Op.encoding != ENCODING_REG && insn->eaDisplacement == EA_DISP_8)
+ insn->displacement *= 1 << (Op.encoding - ENCODING_RM);
+ break;
+ case ENCODING_IB:
+ if (sawRegImm) {
+ // Saw a register immediate so don't read again and instead split the
+ // previous immediate. FIXME: This is a hack.
+ insn->immediates[insn->numImmediatesConsumed] =
+ insn->immediates[insn->numImmediatesConsumed - 1] & 0xf;
+ ++insn->numImmediatesConsumed;
+ break;
+ }
+ if (readImmediate(insn, 1))
+ return -1;
+ if (Op.type == TYPE_XMM || Op.type == TYPE_YMM)
+ sawRegImm = 1;
+ break;
+ case ENCODING_IW:
+ if (readImmediate(insn, 2))
+ return -1;
+ break;
+ case ENCODING_ID:
+ if (readImmediate(insn, 4))
+ return -1;
+ break;
+ case ENCODING_IO:
+ if (readImmediate(insn, 8))
+ return -1;
+ break;
+ case ENCODING_Iv:
+ if (readImmediate(insn, insn->immediateSize))
+ return -1;
+ break;
+ case ENCODING_Ia:
+ if (readImmediate(insn, insn->addressSize))
+ return -1;
+ break;
+ case ENCODING_IRC:
+ insn->RC = (l2FromEVEX4of4(insn->vectorExtensionPrefix[3]) << 1) |
+ lFromEVEX4of4(insn->vectorExtensionPrefix[3]);
+ break;
+ case ENCODING_RB:
+ if (readOpcodeRegister(insn, 1))
+ return -1;
+ break;
+ case ENCODING_RW:
+ if (readOpcodeRegister(insn, 2))
+ return -1;
+ break;
+ case ENCODING_RD:
+ if (readOpcodeRegister(insn, 4))
+ return -1;
+ break;
+ case ENCODING_RO:
+ if (readOpcodeRegister(insn, 8))
+ return -1;
+ break;
+ case ENCODING_Rv:
+ if (readOpcodeRegister(insn, 0))
+ return -1;
+ break;
+ case ENCODING_CC:
+ insn->immediates[1] = insn->opcode & 0xf;
+ break;
+ case ENCODING_FP:
+ break;
+ case ENCODING_VVVV:
+ needVVVV = 0; // Mark that we have found a VVVV operand.
+ if (!hasVVVV)
+ return -1;
+ if (insn->mode != MODE_64BIT)
+ insn->vvvv = static_cast<Reg>(insn->vvvv & 0x7);
+ if (fixupReg(insn, &Op))
+ return -1;
+ break;
+ case ENCODING_WRITEMASK:
+ if (readMaskRegister(insn))
+ return -1;
+ break;
+ case ENCODING_DUP:
+ break;
+ default:
+ LLVM_DEBUG(dbgs() << "Encountered an operand with an unknown encoding.");
+ return -1;
+ }
+ }
+
+ // If we didn't find ENCODING_VVVV operand, but non-zero vvvv present, fail
+ if (needVVVV)
+ return -1;
+
+ return 0;
+}
namespace llvm {
-// Fill-ins to make the compiler happy. These constants are never actually
-// assigned; they are just filler to make an automatically-generated switch
-// statement work.
+// Fill-ins to make the compiler happy. These constants are never actually
+// assigned; they are just filler to make an automatically-generated switch
+// statement work.
namespace X86 {
enum {
BX_SI = 500,
@@ -140,7 +1669,6 @@ public:
public:
DecodeStatus getInstruction(MCInst &instr, uint64_t &size,
ArrayRef<uint8_t> Bytes, uint64_t Address,
- raw_ostream &vStream,
raw_ostream &cStream) const override;
private:
@@ -169,91 +1697,51 @@ X86GenericDisassembler::X86GenericDisassembler(
llvm_unreachable("Invalid CPU mode");
}
-namespace {
-struct Region {
- ArrayRef<uint8_t> Bytes;
- uint64_t Base;
- Region(ArrayRef<uint8_t> Bytes, uint64_t Base) : Bytes(Bytes), Base(Base) {}
-};
-} // end anonymous namespace
-
-/// A callback function that wraps the readByte method from Region.
-///
-/// @param Arg - The generic callback parameter. In this case, this should
-/// be a pointer to a Region.
-/// @param Byte - A pointer to the byte to be read.
-/// @param Address - The address to be read.
-static int regionReader(const void *Arg, uint8_t *Byte, uint64_t Address) {
- auto *R = static_cast<const Region *>(Arg);
- ArrayRef<uint8_t> Bytes = R->Bytes;
- unsigned Index = Address - R->Base;
- if (Bytes.size() <= Index)
- return -1;
- *Byte = Bytes[Index];
- return 0;
-}
-
-/// logger - a callback function that wraps the operator<< method from
-/// raw_ostream.
-///
-/// @param arg - The generic callback parameter. This should be a pointe
-/// to a raw_ostream.
-/// @param log - A string to be logged. logger() adds a newline.
-static void logger(void* arg, const char* log) {
- if (!arg)
- return;
-
- raw_ostream &vStream = *(static_cast<raw_ostream*>(arg));
- vStream << log << "\n";
-}
-
-//
-// Public interface for the disassembler
-//
-
MCDisassembler::DecodeStatus X86GenericDisassembler::getInstruction(
MCInst &Instr, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t Address,
- raw_ostream &VStream, raw_ostream &CStream) const {
+ raw_ostream &CStream) const {
CommentStream = &CStream;
- InternalInstruction InternalInstr;
-
- dlog_t LoggerFn = logger;
- if (&VStream == &nulls())
- LoggerFn = nullptr; // Disable logging completely if it's going to nulls().
-
- Region R(Bytes, Address);
-
- int Ret = decodeInstruction(&InternalInstr, regionReader, (const void *)&R,
- LoggerFn, (void *)&VStream,
- (const void *)MII.get(), Address, fMode);
-
- if (Ret) {
- Size = InternalInstr.readerCursor - Address;
+ InternalInstruction Insn;
+ memset(&Insn, 0, sizeof(InternalInstruction));
+ Insn.bytes = Bytes;
+ Insn.startLocation = Address;
+ Insn.readerCursor = Address;
+ Insn.mode = fMode;
+
+ if (Bytes.empty() || readPrefixes(&Insn) || readOpcode(&Insn) ||
+ getInstructionID(&Insn, MII.get()) || Insn.instructionID == 0 ||
+ readOperands(&Insn)) {
+ Size = Insn.readerCursor - Address;
return Fail;
- } else {
- Size = InternalInstr.length;
- bool Ret = translateInstruction(Instr, InternalInstr, this);
- if (!Ret) {
- unsigned Flags = X86::IP_NO_PREFIX;
- if (InternalInstr.hasAdSize)
- Flags |= X86::IP_HAS_AD_SIZE;
- if (!InternalInstr.mandatoryPrefix) {
- if (InternalInstr.hasOpSize)
- Flags |= X86::IP_HAS_OP_SIZE;
- if (InternalInstr.repeatPrefix == 0xf2)
- Flags |= X86::IP_HAS_REPEAT_NE;
- else if (InternalInstr.repeatPrefix == 0xf3 &&
- // It should not be 'pause' f3 90
- InternalInstr.opcode != 0x90)
- Flags |= X86::IP_HAS_REPEAT;
- if (InternalInstr.hasLockPrefix)
- Flags |= X86::IP_HAS_LOCK;
- }
- Instr.setFlags(Flags);
+ }
+
+ Insn.operands = x86OperandSets[Insn.spec->operands];
+ Insn.length = Insn.readerCursor - Insn.startLocation;
+ Size = Insn.length;
+ if (Size > 15)
+ LLVM_DEBUG(dbgs() << "Instruction exceeds 15-byte limit");
+
+ bool Ret = translateInstruction(Instr, Insn, this);
+ if (!Ret) {
+ unsigned Flags = X86::IP_NO_PREFIX;
+ if (Insn.hasAdSize)
+ Flags |= X86::IP_HAS_AD_SIZE;
+ if (!Insn.mandatoryPrefix) {
+ if (Insn.hasOpSize)
+ Flags |= X86::IP_HAS_OP_SIZE;
+ if (Insn.repeatPrefix == 0xf2)
+ Flags |= X86::IP_HAS_REPEAT_NE;
+ else if (Insn.repeatPrefix == 0xf3 &&
+ // It should not be 'pause' f3 90
+ Insn.opcode != 0x90)
+ Flags |= X86::IP_HAS_REPEAT;
+ if (Insn.hasLockPrefix)
+ Flags |= X86::IP_HAS_LOCK;
}
- return (!Ret) ? Success : Fail;
+ Instr.setFlags(Flags);
}
+ return (!Ret) ? Success : Fail;
}
//
@@ -844,7 +2332,7 @@ static MCDisassembler *createX86Disassembler(const Target &T,
return new X86GenericDisassembler(STI, Ctx, std::move(MII));
}
-extern "C" void LLVMInitializeX86Disassembler() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeX86Disassembler() {
// Register the disassembler.
TargetRegistry::RegisterMCDisassembler(getTheX86_32Target(),
createX86Disassembler);
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp b/contrib/llvm-project/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp
deleted file mode 100644
index e287f6625115..000000000000
--- a/contrib/llvm-project/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp
+++ /dev/null
@@ -1,1938 +0,0 @@
-//===-- X86DisassemblerDecoder.cpp - Disassembler decoder -----------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file is part of the X86 Disassembler.
-// It contains the implementation of the instruction decoder.
-// Documentation for the disassembler can be found in X86Disassembler.h.
-//
-//===----------------------------------------------------------------------===//
-
-#include "X86DisassemblerDecoder.h"
-#include "llvm/ADT/StringRef.h"
-
-#include <cstdarg> /* for va_*() */
-#include <cstdio> /* for vsnprintf() */
-#include <cstdlib> /* for exit() */
-#include <cstring> /* for memset() */
-
-using namespace llvm::X86Disassembler;
-
-/// Specifies whether a ModR/M byte is needed and (if so) which
-/// instruction each possible value of the ModR/M byte corresponds to. Once
-/// this information is known, we have narrowed down to a single instruction.
-struct ModRMDecision {
- uint8_t modrm_type;
- uint16_t instructionIDs;
-};
-
-/// Specifies which set of ModR/M->instruction tables to look at
-/// given a particular opcode.
-struct OpcodeDecision {
- ModRMDecision modRMDecisions[256];
-};
-
-/// Specifies which opcode->instruction tables to look at given
-/// a particular context (set of attributes). Since there are many possible
-/// contexts, the decoder first uses CONTEXTS_SYM to determine which context
-/// applies given a specific set of attributes. Hence there are only IC_max
-/// entries in this table, rather than 2^(ATTR_max).
-struct ContextDecision {
- OpcodeDecision opcodeDecisions[IC_max];
-};
-
-#include "X86GenDisassemblerTables.inc"
-
-#ifndef NDEBUG
-#define debug(s) do { Debug(__FILE__, __LINE__, s); } while (0)
-#else
-#define debug(s) do { } while (0)
-#endif
-
-/*
- * contextForAttrs - Client for the instruction context table. Takes a set of
- * attributes and returns the appropriate decode context.
- *
- * @param attrMask - Attributes, from the enumeration attributeBits.
- * @return - The InstructionContext to use when looking up an
- * an instruction with these attributes.
- */
-static InstructionContext contextForAttrs(uint16_t attrMask) {
- return static_cast<InstructionContext>(CONTEXTS_SYM[attrMask]);
-}
-
-/*
- * modRMRequired - Reads the appropriate instruction table to determine whether
- * the ModR/M byte is required to decode a particular instruction.
- *
- * @param type - The opcode type (i.e., how many bytes it has).
- * @param insnContext - The context for the instruction, as returned by
- * contextForAttrs.
- * @param opcode - The last byte of the instruction's opcode, not counting
- * ModR/M extensions and escapes.
- * @return - true if the ModR/M byte is required, false otherwise.
- */
-static int modRMRequired(OpcodeType type,
- InstructionContext insnContext,
- uint16_t opcode) {
- const struct ContextDecision* decision = nullptr;
-
- switch (type) {
- case ONEBYTE:
- decision = &ONEBYTE_SYM;
- break;
- case TWOBYTE:
- decision = &TWOBYTE_SYM;
- break;
- case THREEBYTE_38:
- decision = &THREEBYTE38_SYM;
- break;
- case THREEBYTE_3A:
- decision = &THREEBYTE3A_SYM;
- break;
- case XOP8_MAP:
- decision = &XOP8_MAP_SYM;
- break;
- case XOP9_MAP:
- decision = &XOP9_MAP_SYM;
- break;
- case XOPA_MAP:
- decision = &XOPA_MAP_SYM;
- break;
- case THREEDNOW_MAP:
- decision = &THREEDNOW_MAP_SYM;
- break;
- }
-
- return decision->opcodeDecisions[insnContext].modRMDecisions[opcode].
- modrm_type != MODRM_ONEENTRY;
-}
-
-/*
- * decode - Reads the appropriate instruction table to obtain the unique ID of
- * an instruction.
- *
- * @param type - See modRMRequired().
- * @param insnContext - See modRMRequired().
- * @param opcode - See modRMRequired().
- * @param modRM - The ModR/M byte if required, or any value if not.
- * @return - The UID of the instruction, or 0 on failure.
- */
-static InstrUID decode(OpcodeType type,
- InstructionContext insnContext,
- uint8_t opcode,
- uint8_t modRM) {
- const struct ModRMDecision* dec = nullptr;
-
- switch (type) {
- case ONEBYTE:
- dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
- break;
- case TWOBYTE:
- dec = &TWOBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
- break;
- case THREEBYTE_38:
- dec = &THREEBYTE38_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
- break;
- case THREEBYTE_3A:
- dec = &THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
- break;
- case XOP8_MAP:
- dec = &XOP8_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
- break;
- case XOP9_MAP:
- dec = &XOP9_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
- break;
- case XOPA_MAP:
- dec = &XOPA_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
- break;
- case THREEDNOW_MAP:
- dec = &THREEDNOW_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
- break;
- }
-
- switch (dec->modrm_type) {
- default:
- debug("Corrupt table! Unknown modrm_type");
- return 0;
- case MODRM_ONEENTRY:
- return modRMTable[dec->instructionIDs];
- case MODRM_SPLITRM:
- if (modFromModRM(modRM) == 0x3)
- return modRMTable[dec->instructionIDs+1];
- return modRMTable[dec->instructionIDs];
- case MODRM_SPLITREG:
- if (modFromModRM(modRM) == 0x3)
- return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)+8];
- return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)];
- case MODRM_SPLITMISC:
- if (modFromModRM(modRM) == 0x3)
- return modRMTable[dec->instructionIDs+(modRM & 0x3f)+8];
- return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)];
- case MODRM_FULL:
- return modRMTable[dec->instructionIDs+modRM];
- }
-}
-
-/*
- * specifierForUID - Given a UID, returns the name and operand specification for
- * that instruction.
- *
- * @param uid - The unique ID for the instruction. This should be returned by
- * decode(); specifierForUID will not check bounds.
- * @return - A pointer to the specification for that instruction.
- */
-static const struct InstructionSpecifier *specifierForUID(InstrUID uid) {
- return &INSTRUCTIONS_SYM[uid];
-}
-
-/*
- * consumeByte - Uses the reader function provided by the user to consume one
- * byte from the instruction's memory and advance the cursor.
- *
- * @param insn - The instruction with the reader function to use. The cursor
- * for this instruction is advanced.
- * @param byte - A pointer to a pre-allocated memory buffer to be populated
- * with the data read.
- * @return - 0 if the read was successful; nonzero otherwise.
- */
-static int consumeByte(struct InternalInstruction* insn, uint8_t* byte) {
- int ret = insn->reader(insn->readerArg, byte, insn->readerCursor);
-
- if (!ret)
- ++(insn->readerCursor);
-
- return ret;
-}
-
-/*
- * lookAtByte - Like consumeByte, but does not advance the cursor.
- *
- * @param insn - See consumeByte().
- * @param byte - See consumeByte().
- * @return - See consumeByte().
- */
-static int lookAtByte(struct InternalInstruction* insn, uint8_t* byte) {
- return insn->reader(insn->readerArg, byte, insn->readerCursor);
-}
-
-static void unconsumeByte(struct InternalInstruction* insn) {
- insn->readerCursor--;
-}
-
-#define CONSUME_FUNC(name, type) \
- static int name(struct InternalInstruction* insn, type* ptr) { \
- type combined = 0; \
- unsigned offset; \
- for (offset = 0; offset < sizeof(type); ++offset) { \
- uint8_t byte; \
- int ret = insn->reader(insn->readerArg, \
- &byte, \
- insn->readerCursor + offset); \
- if (ret) \
- return ret; \
- combined = combined | ((uint64_t)byte << (offset * 8)); \
- } \
- *ptr = combined; \
- insn->readerCursor += sizeof(type); \
- return 0; \
- }
-
-/*
- * consume* - Use the reader function provided by the user to consume data
- * values of various sizes from the instruction's memory and advance the
- * cursor appropriately. These readers perform endian conversion.
- *
- * @param insn - See consumeByte().
- * @param ptr - A pointer to a pre-allocated memory of appropriate size to
- * be populated with the data read.
- * @return - See consumeByte().
- */
-CONSUME_FUNC(consumeInt8, int8_t)
-CONSUME_FUNC(consumeInt16, int16_t)
-CONSUME_FUNC(consumeInt32, int32_t)
-CONSUME_FUNC(consumeUInt16, uint16_t)
-CONSUME_FUNC(consumeUInt32, uint32_t)
-CONSUME_FUNC(consumeUInt64, uint64_t)
-
-/*
- * dbgprintf - Uses the logging function provided by the user to log a single
- * message, typically without a carriage-return.
- *
- * @param insn - The instruction containing the logging function.
- * @param format - See printf().
- * @param ... - See printf().
- */
-static void dbgprintf(struct InternalInstruction* insn,
- const char* format,
- ...) {
- char buffer[256];
- va_list ap;
-
- if (!insn->dlog)
- return;
-
- va_start(ap, format);
- (void)vsnprintf(buffer, sizeof(buffer), format, ap);
- va_end(ap);
-
- insn->dlog(insn->dlogArg, buffer);
-}
-
-static bool isREX(struct InternalInstruction *insn, uint8_t prefix) {
- if (insn->mode == MODE_64BIT)
- return prefix >= 0x40 && prefix <= 0x4f;
- return false;
-}
-
-/*
- * setPrefixPresent - Marks that a particular prefix is present as mandatory
- *
- * @param insn - The instruction to be marked as having the prefix.
- * @param prefix - The prefix that is present.
- */
-static void setPrefixPresent(struct InternalInstruction *insn, uint8_t prefix) {
- uint8_t nextByte;
- switch (prefix) {
- case 0xf0:
- insn->hasLockPrefix = true;
- break;
- case 0xf2:
- case 0xf3:
- if (lookAtByte(insn, &nextByte))
- break;
- // TODO:
- // 1. There could be several 0x66
- // 2. if (nextByte == 0x66) and nextNextByte != 0x0f then
- // it's not mandatory prefix
- // 3. if (nextByte >= 0x40 && nextByte <= 0x4f) it's REX and we need
- // 0x0f exactly after it to be mandatory prefix
- if (isREX(insn, nextByte) || nextByte == 0x0f || nextByte == 0x66)
- // The last of 0xf2 /0xf3 is mandatory prefix
- insn->mandatoryPrefix = prefix;
- insn->repeatPrefix = prefix;
- break;
- case 0x66:
- if (lookAtByte(insn, &nextByte))
- break;
- // 0x66 can't overwrite existing mandatory prefix and should be ignored
- if (!insn->mandatoryPrefix && (nextByte == 0x0f || isREX(insn, nextByte)))
- insn->mandatoryPrefix = prefix;
- break;
- }
-}
-
-/*
- * readPrefixes - Consumes all of an instruction's prefix bytes, and marks the
- * instruction as having them. Also sets the instruction's default operand,
- * address, and other relevant data sizes to report operands correctly.
- *
- * @param insn - The instruction whose prefixes are to be read.
- * @return - 0 if the instruction could be read until the end of the prefix
- * bytes, and no prefixes conflicted; nonzero otherwise.
- */
-static int readPrefixes(struct InternalInstruction* insn) {
- bool isPrefix = true;
- uint8_t byte = 0;
- uint8_t nextByte;
-
- dbgprintf(insn, "readPrefixes()");
-
- while (isPrefix) {
- /* If we fail reading prefixes, just stop here and let the opcode reader deal with it */
- if (consumeByte(insn, &byte))
- break;
-
- /*
- * If the byte is a LOCK/REP/REPNE prefix and not a part of the opcode, then
- * break and let it be disassembled as a normal "instruction".
- */
- if (insn->readerCursor - 1 == insn->startLocation && byte == 0xf0) // LOCK
- break;
-
- if ((byte == 0xf2 || byte == 0xf3) && !lookAtByte(insn, &nextByte)) {
- /*
- * If the byte is 0xf2 or 0xf3, and any of the following conditions are
- * met:
- * - it is followed by a LOCK (0xf0) prefix
- * - it is followed by an xchg instruction
- * then it should be disassembled as a xacquire/xrelease not repne/rep.
- */
- if (((nextByte == 0xf0) ||
- ((nextByte & 0xfe) == 0x86 || (nextByte & 0xf8) == 0x90))) {
- insn->xAcquireRelease = true;
- if (!(byte == 0xf3 && nextByte == 0x90)) // PAUSE instruction support
- break;
- }
- /*
- * Also if the byte is 0xf3, and the following condition is met:
- * - it is followed by a "mov mem, reg" (opcode 0x88/0x89) or
- * "mov mem, imm" (opcode 0xc6/0xc7) instructions.
- * then it should be disassembled as an xrelease not rep.
- */
- if (byte == 0xf3 && (nextByte == 0x88 || nextByte == 0x89 ||
- nextByte == 0xc6 || nextByte == 0xc7)) {
- insn->xAcquireRelease = true;
- break;
- }
- if (isREX(insn, nextByte)) {
- uint8_t nnextByte;
- // Go to REX prefix after the current one
- if (consumeByte(insn, &nnextByte))
- return -1;
- // We should be able to read next byte after REX prefix
- if (lookAtByte(insn, &nnextByte))
- return -1;
- unconsumeByte(insn);
- }
- }
-
- switch (byte) {
- case 0xf0: /* LOCK */
- case 0xf2: /* REPNE/REPNZ */
- case 0xf3: /* REP or REPE/REPZ */
- setPrefixPresent(insn, byte);
- break;
- case 0x2e: /* CS segment override -OR- Branch not taken */
- case 0x36: /* SS segment override -OR- Branch taken */
- case 0x3e: /* DS segment override */
- case 0x26: /* ES segment override */
- case 0x64: /* FS segment override */
- case 0x65: /* GS segment override */
- switch (byte) {
- case 0x2e:
- insn->segmentOverride = SEG_OVERRIDE_CS;
- break;
- case 0x36:
- insn->segmentOverride = SEG_OVERRIDE_SS;
- break;
- case 0x3e:
- insn->segmentOverride = SEG_OVERRIDE_DS;
- break;
- case 0x26:
- insn->segmentOverride = SEG_OVERRIDE_ES;
- break;
- case 0x64:
- insn->segmentOverride = SEG_OVERRIDE_FS;
- break;
- case 0x65:
- insn->segmentOverride = SEG_OVERRIDE_GS;
- break;
- default:
- debug("Unhandled override");
- return -1;
- }
- setPrefixPresent(insn, byte);
- break;
- case 0x66: /* Operand-size override */
- insn->hasOpSize = true;
- setPrefixPresent(insn, byte);
- break;
- case 0x67: /* Address-size override */
- insn->hasAdSize = true;
- setPrefixPresent(insn, byte);
- break;
- default: /* Not a prefix byte */
- isPrefix = false;
- break;
- }
-
- if (isPrefix)
- dbgprintf(insn, "Found prefix 0x%hhx", byte);
- }
-
- insn->vectorExtensionType = TYPE_NO_VEX_XOP;
-
- if (byte == 0x62) {
- uint8_t byte1, byte2;
-
- if (consumeByte(insn, &byte1)) {
- dbgprintf(insn, "Couldn't read second byte of EVEX prefix");
- return -1;
- }
-
- if (lookAtByte(insn, &byte2)) {
- dbgprintf(insn, "Couldn't read third byte of EVEX prefix");
- return -1;
- }
-
- if ((insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) &&
- ((~byte1 & 0xc) == 0xc) && ((byte2 & 0x4) == 0x4)) {
- insn->vectorExtensionType = TYPE_EVEX;
- } else {
- unconsumeByte(insn); /* unconsume byte1 */
- unconsumeByte(insn); /* unconsume byte */
- }
-
- if (insn->vectorExtensionType == TYPE_EVEX) {
- insn->vectorExtensionPrefix[0] = byte;
- insn->vectorExtensionPrefix[1] = byte1;
- if (consumeByte(insn, &insn->vectorExtensionPrefix[2])) {
- dbgprintf(insn, "Couldn't read third byte of EVEX prefix");
- return -1;
- }
- if (consumeByte(insn, &insn->vectorExtensionPrefix[3])) {
- dbgprintf(insn, "Couldn't read fourth byte of EVEX prefix");
- return -1;
- }
-
- /* We simulate the REX prefix for simplicity's sake */
- if (insn->mode == MODE_64BIT) {
- insn->rexPrefix = 0x40
- | (wFromEVEX3of4(insn->vectorExtensionPrefix[2]) << 3)
- | (rFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 2)
- | (xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 1)
- | (bFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 0);
- }
-
- dbgprintf(insn, "Found EVEX prefix 0x%hhx 0x%hhx 0x%hhx 0x%hhx",
- insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1],
- insn->vectorExtensionPrefix[2], insn->vectorExtensionPrefix[3]);
- }
- } else if (byte == 0xc4) {
- uint8_t byte1;
-
- if (lookAtByte(insn, &byte1)) {
- dbgprintf(insn, "Couldn't read second byte of VEX");
- return -1;
- }
-
- if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0)
- insn->vectorExtensionType = TYPE_VEX_3B;
- else
- unconsumeByte(insn);
-
- if (insn->vectorExtensionType == TYPE_VEX_3B) {
- insn->vectorExtensionPrefix[0] = byte;
- consumeByte(insn, &insn->vectorExtensionPrefix[1]);
- consumeByte(insn, &insn->vectorExtensionPrefix[2]);
-
- /* We simulate the REX prefix for simplicity's sake */
-
- if (insn->mode == MODE_64BIT)
- insn->rexPrefix = 0x40
- | (wFromVEX3of3(insn->vectorExtensionPrefix[2]) << 3)
- | (rFromVEX2of3(insn->vectorExtensionPrefix[1]) << 2)
- | (xFromVEX2of3(insn->vectorExtensionPrefix[1]) << 1)
- | (bFromVEX2of3(insn->vectorExtensionPrefix[1]) << 0);
-
- dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx 0x%hhx",
- insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1],
- insn->vectorExtensionPrefix[2]);
- }
- } else if (byte == 0xc5) {
- uint8_t byte1;
-
- if (lookAtByte(insn, &byte1)) {
- dbgprintf(insn, "Couldn't read second byte of VEX");
- return -1;
- }
-
- if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0)
- insn->vectorExtensionType = TYPE_VEX_2B;
- else
- unconsumeByte(insn);
-
- if (insn->vectorExtensionType == TYPE_VEX_2B) {
- insn->vectorExtensionPrefix[0] = byte;
- consumeByte(insn, &insn->vectorExtensionPrefix[1]);
-
- if (insn->mode == MODE_64BIT)
- insn->rexPrefix = 0x40
- | (rFromVEX2of2(insn->vectorExtensionPrefix[1]) << 2);
-
- switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) {
- default:
- break;
- case VEX_PREFIX_66:
- insn->hasOpSize = true;
- break;
- }
-
- dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx",
- insn->vectorExtensionPrefix[0],
- insn->vectorExtensionPrefix[1]);
- }
- } else if (byte == 0x8f) {
- uint8_t byte1;
-
- if (lookAtByte(insn, &byte1)) {
- dbgprintf(insn, "Couldn't read second byte of XOP");
- return -1;
- }
-
- if ((byte1 & 0x38) != 0x0) /* 0 in these 3 bits is a POP instruction. */
- insn->vectorExtensionType = TYPE_XOP;
- else
- unconsumeByte(insn);
-
- if (insn->vectorExtensionType == TYPE_XOP) {
- insn->vectorExtensionPrefix[0] = byte;
- consumeByte(insn, &insn->vectorExtensionPrefix[1]);
- consumeByte(insn, &insn->vectorExtensionPrefix[2]);
-
- /* We simulate the REX prefix for simplicity's sake */
-
- if (insn->mode == MODE_64BIT)
- insn->rexPrefix = 0x40
- | (wFromXOP3of3(insn->vectorExtensionPrefix[2]) << 3)
- | (rFromXOP2of3(insn->vectorExtensionPrefix[1]) << 2)
- | (xFromXOP2of3(insn->vectorExtensionPrefix[1]) << 1)
- | (bFromXOP2of3(insn->vectorExtensionPrefix[1]) << 0);
-
- switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) {
- default:
- break;
- case VEX_PREFIX_66:
- insn->hasOpSize = true;
- break;
- }
-
- dbgprintf(insn, "Found XOP prefix 0x%hhx 0x%hhx 0x%hhx",
- insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1],
- insn->vectorExtensionPrefix[2]);
- }
- } else if (isREX(insn, byte)) {
- if (lookAtByte(insn, &nextByte))
- return -1;
- insn->rexPrefix = byte;
- dbgprintf(insn, "Found REX prefix 0x%hhx", byte);
- } else
- unconsumeByte(insn);
-
- if (insn->mode == MODE_16BIT) {
- insn->registerSize = (insn->hasOpSize ? 4 : 2);
- insn->addressSize = (insn->hasAdSize ? 4 : 2);
- insn->displacementSize = (insn->hasAdSize ? 4 : 2);
- insn->immediateSize = (insn->hasOpSize ? 4 : 2);
- } else if (insn->mode == MODE_32BIT) {
- insn->registerSize = (insn->hasOpSize ? 2 : 4);
- insn->addressSize = (insn->hasAdSize ? 2 : 4);
- insn->displacementSize = (insn->hasAdSize ? 2 : 4);
- insn->immediateSize = (insn->hasOpSize ? 2 : 4);
- } else if (insn->mode == MODE_64BIT) {
- if (insn->rexPrefix && wFromREX(insn->rexPrefix)) {
- insn->registerSize = 8;
- insn->addressSize = (insn->hasAdSize ? 4 : 8);
- insn->displacementSize = 4;
- insn->immediateSize = 4;
- } else {
- insn->registerSize = (insn->hasOpSize ? 2 : 4);
- insn->addressSize = (insn->hasAdSize ? 4 : 8);
- insn->displacementSize = (insn->hasOpSize ? 2 : 4);
- insn->immediateSize = (insn->hasOpSize ? 2 : 4);
- }
- }
-
- return 0;
-}
-
-static int readModRM(struct InternalInstruction* insn);
-
-/*
- * readOpcode - Reads the opcode (excepting the ModR/M byte in the case of
- * extended or escape opcodes).
- *
- * @param insn - The instruction whose opcode is to be read.
- * @return - 0 if the opcode could be read successfully; nonzero otherwise.
- */
-static int readOpcode(struct InternalInstruction* insn) {
- /* Determine the length of the primary opcode */
-
- uint8_t current;
-
- dbgprintf(insn, "readOpcode()");
-
- insn->opcodeType = ONEBYTE;
-
- if (insn->vectorExtensionType == TYPE_EVEX) {
- switch (mmFromEVEX2of4(insn->vectorExtensionPrefix[1])) {
- default:
- dbgprintf(insn, "Unhandled mm field for instruction (0x%hhx)",
- mmFromEVEX2of4(insn->vectorExtensionPrefix[1]));
- return -1;
- case VEX_LOB_0F:
- insn->opcodeType = TWOBYTE;
- return consumeByte(insn, &insn->opcode);
- case VEX_LOB_0F38:
- insn->opcodeType = THREEBYTE_38;
- return consumeByte(insn, &insn->opcode);
- case VEX_LOB_0F3A:
- insn->opcodeType = THREEBYTE_3A;
- return consumeByte(insn, &insn->opcode);
- }
- } else if (insn->vectorExtensionType == TYPE_VEX_3B) {
- switch (mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1])) {
- default:
- dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)",
- mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1]));
- return -1;
- case VEX_LOB_0F:
- insn->opcodeType = TWOBYTE;
- return consumeByte(insn, &insn->opcode);
- case VEX_LOB_0F38:
- insn->opcodeType = THREEBYTE_38;
- return consumeByte(insn, &insn->opcode);
- case VEX_LOB_0F3A:
- insn->opcodeType = THREEBYTE_3A;
- return consumeByte(insn, &insn->opcode);
- }
- } else if (insn->vectorExtensionType == TYPE_VEX_2B) {
- insn->opcodeType = TWOBYTE;
- return consumeByte(insn, &insn->opcode);
- } else if (insn->vectorExtensionType == TYPE_XOP) {
- switch (mmmmmFromXOP2of3(insn->vectorExtensionPrefix[1])) {
- default:
- dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)",
- mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1]));
- return -1;
- case XOP_MAP_SELECT_8:
- insn->opcodeType = XOP8_MAP;
- return consumeByte(insn, &insn->opcode);
- case XOP_MAP_SELECT_9:
- insn->opcodeType = XOP9_MAP;
- return consumeByte(insn, &insn->opcode);
- case XOP_MAP_SELECT_A:
- insn->opcodeType = XOPA_MAP;
- return consumeByte(insn, &insn->opcode);
- }
- }
-
- if (consumeByte(insn, &current))
- return -1;
-
- if (current == 0x0f) {
- dbgprintf(insn, "Found a two-byte escape prefix (0x%hhx)", current);
-
- if (consumeByte(insn, &current))
- return -1;
-
- if (current == 0x38) {
- dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
-
- if (consumeByte(insn, &current))
- return -1;
-
- insn->opcodeType = THREEBYTE_38;
- } else if (current == 0x3a) {
- dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
-
- if (consumeByte(insn, &current))
- return -1;
-
- insn->opcodeType = THREEBYTE_3A;
- } else if (current == 0x0f) {
- dbgprintf(insn, "Found a 3dnow escape prefix (0x%hhx)", current);
-
- // Consume operands before the opcode to comply with the 3DNow encoding
- if (readModRM(insn))
- return -1;
-
- if (consumeByte(insn, &current))
- return -1;
-
- insn->opcodeType = THREEDNOW_MAP;
- } else {
- dbgprintf(insn, "Didn't find a three-byte escape prefix");
-
- insn->opcodeType = TWOBYTE;
- }
- } else if (insn->mandatoryPrefix)
- // The opcode with mandatory prefix must start with opcode escape.
- // If not it's legacy repeat prefix
- insn->mandatoryPrefix = 0;
-
- /*
- * At this point we have consumed the full opcode.
- * Anything we consume from here on must be unconsumed.
- */
-
- insn->opcode = current;
-
- return 0;
-}
-
-/*
- * getIDWithAttrMask - Determines the ID of an instruction, consuming
- * the ModR/M byte as appropriate for extended and escape opcodes,
- * and using a supplied attribute mask.
- *
- * @param instructionID - A pointer whose target is filled in with the ID of the
- * instruction.
- * @param insn - The instruction whose ID is to be determined.
- * @param attrMask - The attribute mask to search.
- * @return - 0 if the ModR/M could be read when needed or was not
- * needed; nonzero otherwise.
- */
-static int getIDWithAttrMask(uint16_t* instructionID,
- struct InternalInstruction* insn,
- uint16_t attrMask) {
- bool hasModRMExtension;
-
- InstructionContext instructionClass = contextForAttrs(attrMask);
-
- hasModRMExtension = modRMRequired(insn->opcodeType,
- instructionClass,
- insn->opcode);
-
- if (hasModRMExtension) {
- if (readModRM(insn))
- return -1;
-
- *instructionID = decode(insn->opcodeType,
- instructionClass,
- insn->opcode,
- insn->modRM);
- } else {
- *instructionID = decode(insn->opcodeType,
- instructionClass,
- insn->opcode,
- 0);
- }
-
- return 0;
-}
-
-/*
- * is16BitEquivalent - Determines whether two instruction names refer to
- * equivalent instructions but one is 16-bit whereas the other is not.
- *
- * @param orig - The instruction that is not 16-bit
- * @param equiv - The instruction that is 16-bit
- */
-static bool is16BitEquivalent(const char *orig, const char *equiv) {
- off_t i;
-
- for (i = 0;; i++) {
- if (orig[i] == '\0' && equiv[i] == '\0')
- return true;
- if (orig[i] == '\0' || equiv[i] == '\0')
- return false;
- if (orig[i] != equiv[i]) {
- if ((orig[i] == 'Q' || orig[i] == 'L') && equiv[i] == 'W')
- continue;
- if ((orig[i] == '6' || orig[i] == '3') && equiv[i] == '1')
- continue;
- if ((orig[i] == '4' || orig[i] == '2') && equiv[i] == '6')
- continue;
- return false;
- }
- }
-}
-
-/*
- * is64Bit - Determines whether this instruction is a 64-bit instruction.
- *
- * @param name - The instruction that is not 16-bit
- */
-static bool is64Bit(const char *name) {
- off_t i;
-
- for (i = 0;; ++i) {
- if (name[i] == '\0')
- return false;
- if (name[i] == '6' && name[i+1] == '4')
- return true;
- }
-}
-
-/*
- * getID - Determines the ID of an instruction, consuming the ModR/M byte as
- * appropriate for extended and escape opcodes. Determines the attributes and
- * context for the instruction before doing so.
- *
- * @param insn - The instruction whose ID is to be determined.
- * @return - 0 if the ModR/M could be read when needed or was not needed;
- * nonzero otherwise.
- */
-static int getID(struct InternalInstruction* insn, const void *miiArg) {
- uint16_t attrMask;
- uint16_t instructionID;
-
- dbgprintf(insn, "getID()");
-
- attrMask = ATTR_NONE;
-
- if (insn->mode == MODE_64BIT)
- attrMask |= ATTR_64BIT;
-
- if (insn->vectorExtensionType != TYPE_NO_VEX_XOP) {
- attrMask |= (insn->vectorExtensionType == TYPE_EVEX) ? ATTR_EVEX : ATTR_VEX;
-
- if (insn->vectorExtensionType == TYPE_EVEX) {
- switch (ppFromEVEX3of4(insn->vectorExtensionPrefix[2])) {
- case VEX_PREFIX_66:
- attrMask |= ATTR_OPSIZE;
- break;
- case VEX_PREFIX_F3:
- attrMask |= ATTR_XS;
- break;
- case VEX_PREFIX_F2:
- attrMask |= ATTR_XD;
- break;
- }
-
- if (zFromEVEX4of4(insn->vectorExtensionPrefix[3]))
- attrMask |= ATTR_EVEXKZ;
- if (bFromEVEX4of4(insn->vectorExtensionPrefix[3]))
- attrMask |= ATTR_EVEXB;
- if (aaaFromEVEX4of4(insn->vectorExtensionPrefix[3]))
- attrMask |= ATTR_EVEXK;
- if (lFromEVEX4of4(insn->vectorExtensionPrefix[3]))
- attrMask |= ATTR_VEXL;
- if (l2FromEVEX4of4(insn->vectorExtensionPrefix[3]))
- attrMask |= ATTR_EVEXL2;
- } else if (insn->vectorExtensionType == TYPE_VEX_3B) {
- switch (ppFromVEX3of3(insn->vectorExtensionPrefix[2])) {
- case VEX_PREFIX_66:
- attrMask |= ATTR_OPSIZE;
- break;
- case VEX_PREFIX_F3:
- attrMask |= ATTR_XS;
- break;
- case VEX_PREFIX_F2:
- attrMask |= ATTR_XD;
- break;
- }
-
- if (lFromVEX3of3(insn->vectorExtensionPrefix[2]))
- attrMask |= ATTR_VEXL;
- } else if (insn->vectorExtensionType == TYPE_VEX_2B) {
- switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) {
- case VEX_PREFIX_66:
- attrMask |= ATTR_OPSIZE;
- break;
- case VEX_PREFIX_F3:
- attrMask |= ATTR_XS;
- break;
- case VEX_PREFIX_F2:
- attrMask |= ATTR_XD;
- break;
- }
-
- if (lFromVEX2of2(insn->vectorExtensionPrefix[1]))
- attrMask |= ATTR_VEXL;
- } else if (insn->vectorExtensionType == TYPE_XOP) {
- switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) {
- case VEX_PREFIX_66:
- attrMask |= ATTR_OPSIZE;
- break;
- case VEX_PREFIX_F3:
- attrMask |= ATTR_XS;
- break;
- case VEX_PREFIX_F2:
- attrMask |= ATTR_XD;
- break;
- }
-
- if (lFromXOP3of3(insn->vectorExtensionPrefix[2]))
- attrMask |= ATTR_VEXL;
- } else {
- return -1;
- }
- } else if (!insn->mandatoryPrefix) {
- // If we don't have mandatory prefix we should use legacy prefixes here
- if (insn->hasOpSize && (insn->mode != MODE_16BIT))
- attrMask |= ATTR_OPSIZE;
- if (insn->hasAdSize)
- attrMask |= ATTR_ADSIZE;
- if (insn->opcodeType == ONEBYTE) {
- if (insn->repeatPrefix == 0xf3 && (insn->opcode == 0x90))
- // Special support for PAUSE
- attrMask |= ATTR_XS;
- } else {
- if (insn->repeatPrefix == 0xf2)
- attrMask |= ATTR_XD;
- else if (insn->repeatPrefix == 0xf3)
- attrMask |= ATTR_XS;
- }
- } else {
- switch (insn->mandatoryPrefix) {
- case 0xf2:
- attrMask |= ATTR_XD;
- break;
- case 0xf3:
- attrMask |= ATTR_XS;
- break;
- case 0x66:
- if (insn->mode != MODE_16BIT)
- attrMask |= ATTR_OPSIZE;
- break;
- case 0x67:
- attrMask |= ATTR_ADSIZE;
- break;
- }
-
- }
-
- if (insn->rexPrefix & 0x08) {
- attrMask |= ATTR_REXW;
- attrMask &= ~ATTR_ADSIZE;
- }
-
- /*
- * JCXZ/JECXZ need special handling for 16-bit mode because the meaning
- * of the AdSize prefix is inverted w.r.t. 32-bit mode.
- */
- if (insn->mode == MODE_16BIT && insn->opcodeType == ONEBYTE &&
- insn->opcode == 0xE3)
- attrMask ^= ATTR_ADSIZE;
-
- // If we're in 16-bit mode and this is one of the relative jumps and opsize
- // prefix isn't present, we need to force the opsize attribute since the
- // prefix is inverted relative to 32-bit mode.
- if (insn->mode == MODE_16BIT && !insn->hasOpSize &&
- insn->opcodeType == ONEBYTE &&
- (insn->opcode == 0xE8 || insn->opcode == 0xE9))
- attrMask |= ATTR_OPSIZE;
-
- if (insn->mode == MODE_16BIT && !insn->hasOpSize &&
- insn->opcodeType == TWOBYTE &&
- insn->opcode >= 0x80 && insn->opcode <= 0x8F)
- attrMask |= ATTR_OPSIZE;
-
- if (getIDWithAttrMask(&instructionID, insn, attrMask))
- return -1;
-
- /* The following clauses compensate for limitations of the tables. */
-
- if (insn->mode != MODE_64BIT &&
- insn->vectorExtensionType != TYPE_NO_VEX_XOP) {
- /*
- * The tables can't distinquish between cases where the W-bit is used to
- * select register size and cases where its a required part of the opcode.
- */
- if ((insn->vectorExtensionType == TYPE_EVEX &&
- wFromEVEX3of4(insn->vectorExtensionPrefix[2])) ||
- (insn->vectorExtensionType == TYPE_VEX_3B &&
- wFromVEX3of3(insn->vectorExtensionPrefix[2])) ||
- (insn->vectorExtensionType == TYPE_XOP &&
- wFromXOP3of3(insn->vectorExtensionPrefix[2]))) {
-
- uint16_t instructionIDWithREXW;
- if (getIDWithAttrMask(&instructionIDWithREXW,
- insn, attrMask | ATTR_REXW)) {
- insn->instructionID = instructionID;
- insn->spec = specifierForUID(instructionID);
- return 0;
- }
-
- auto SpecName = GetInstrName(instructionIDWithREXW, miiArg);
- // If not a 64-bit instruction. Switch the opcode.
- if (!is64Bit(SpecName.data())) {
- insn->instructionID = instructionIDWithREXW;
- insn->spec = specifierForUID(instructionIDWithREXW);
- return 0;
- }
- }
- }
-
- /*
- * Absolute moves, umonitor, and movdir64b need special handling.
- * -For 16-bit mode because the meaning of the AdSize and OpSize prefixes are
- * inverted w.r.t.
- * -For 32-bit mode we need to ensure the ADSIZE prefix is observed in
- * any position.
- */
- if ((insn->opcodeType == ONEBYTE && ((insn->opcode & 0xFC) == 0xA0)) ||
- (insn->opcodeType == TWOBYTE && (insn->opcode == 0xAE)) ||
- (insn->opcodeType == THREEBYTE_38 && insn->opcode == 0xF8)) {
- /* Make sure we observed the prefixes in any position. */
- if (insn->hasAdSize)
- attrMask |= ATTR_ADSIZE;
- if (insn->hasOpSize)
- attrMask |= ATTR_OPSIZE;
-
- /* In 16-bit, invert the attributes. */
- if (insn->mode == MODE_16BIT) {
- attrMask ^= ATTR_ADSIZE;
-
- /* The OpSize attribute is only valid with the absolute moves. */
- if (insn->opcodeType == ONEBYTE && ((insn->opcode & 0xFC) == 0xA0))
- attrMask ^= ATTR_OPSIZE;
- }
-
- if (getIDWithAttrMask(&instructionID, insn, attrMask))
- return -1;
-
- insn->instructionID = instructionID;
- insn->spec = specifierForUID(instructionID);
- return 0;
- }
-
- if ((insn->mode == MODE_16BIT || insn->hasOpSize) &&
- !(attrMask & ATTR_OPSIZE)) {
- /*
- * The instruction tables make no distinction between instructions that
- * allow OpSize anywhere (i.e., 16-bit operations) and that need it in a
- * particular spot (i.e., many MMX operations). In general we're
- * conservative, but in the specific case where OpSize is present but not
- * in the right place we check if there's a 16-bit operation.
- */
-
- const struct InstructionSpecifier *spec;
- uint16_t instructionIDWithOpsize;
- llvm::StringRef specName, specWithOpSizeName;
-
- spec = specifierForUID(instructionID);
-
- if (getIDWithAttrMask(&instructionIDWithOpsize,
- insn,
- attrMask | ATTR_OPSIZE)) {
- /*
- * ModRM required with OpSize but not present; give up and return version
- * without OpSize set
- */
-
- insn->instructionID = instructionID;
- insn->spec = spec;
- return 0;
- }
-
- specName = GetInstrName(instructionID, miiArg);
- specWithOpSizeName = GetInstrName(instructionIDWithOpsize, miiArg);
-
- if (is16BitEquivalent(specName.data(), specWithOpSizeName.data()) &&
- (insn->mode == MODE_16BIT) ^ insn->hasOpSize) {
- insn->instructionID = instructionIDWithOpsize;
- insn->spec = specifierForUID(instructionIDWithOpsize);
- } else {
- insn->instructionID = instructionID;
- insn->spec = spec;
- }
- return 0;
- }
-
- if (insn->opcodeType == ONEBYTE && insn->opcode == 0x90 &&
- insn->rexPrefix & 0x01) {
- /*
- * NOOP shouldn't decode as NOOP if REX.b is set. Instead
- * it should decode as XCHG %r8, %eax.
- */
-
- const struct InstructionSpecifier *spec;
- uint16_t instructionIDWithNewOpcode;
- const struct InstructionSpecifier *specWithNewOpcode;
-
- spec = specifierForUID(instructionID);
-
- /* Borrow opcode from one of the other XCHGar opcodes */
- insn->opcode = 0x91;
-
- if (getIDWithAttrMask(&instructionIDWithNewOpcode,
- insn,
- attrMask)) {
- insn->opcode = 0x90;
-
- insn->instructionID = instructionID;
- insn->spec = spec;
- return 0;
- }
-
- specWithNewOpcode = specifierForUID(instructionIDWithNewOpcode);
-
- /* Change back */
- insn->opcode = 0x90;
-
- insn->instructionID = instructionIDWithNewOpcode;
- insn->spec = specWithNewOpcode;
-
- return 0;
- }
-
- insn->instructionID = instructionID;
- insn->spec = specifierForUID(insn->instructionID);
-
- return 0;
-}
-
-/*
- * readSIB - Consumes the SIB byte to determine addressing information for an
- * instruction.
- *
- * @param insn - The instruction whose SIB byte is to be read.
- * @return - 0 if the SIB byte was successfully read; nonzero otherwise.
- */
-static int readSIB(struct InternalInstruction* insn) {
- SIBBase sibBaseBase = SIB_BASE_NONE;
- uint8_t index, base;
-
- dbgprintf(insn, "readSIB()");
-
- if (insn->consumedSIB)
- return 0;
-
- insn->consumedSIB = true;
-
- switch (insn->addressSize) {
- case 2:
- dbgprintf(insn, "SIB-based addressing doesn't work in 16-bit mode");
- return -1;
- case 4:
- insn->sibIndexBase = SIB_INDEX_EAX;
- sibBaseBase = SIB_BASE_EAX;
- break;
- case 8:
- insn->sibIndexBase = SIB_INDEX_RAX;
- sibBaseBase = SIB_BASE_RAX;
- break;
- }
-
- if (consumeByte(insn, &insn->sib))
- return -1;
-
- index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3);
-
- if (index == 0x4) {
- insn->sibIndex = SIB_INDEX_NONE;
- } else {
- insn->sibIndex = (SIBIndex)(insn->sibIndexBase + index);
- }
-
- insn->sibScale = 1 << scaleFromSIB(insn->sib);
-
- base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3);
-
- switch (base) {
- case 0x5:
- case 0xd:
- switch (modFromModRM(insn->modRM)) {
- case 0x0:
- insn->eaDisplacement = EA_DISP_32;
- insn->sibBase = SIB_BASE_NONE;
- break;
- case 0x1:
- insn->eaDisplacement = EA_DISP_8;
- insn->sibBase = (SIBBase)(sibBaseBase + base);
- break;
- case 0x2:
- insn->eaDisplacement = EA_DISP_32;
- insn->sibBase = (SIBBase)(sibBaseBase + base);
- break;
- case 0x3:
- debug("Cannot have Mod = 0b11 and a SIB byte");
- return -1;
- }
- break;
- default:
- insn->sibBase = (SIBBase)(sibBaseBase + base);
- break;
- }
-
- return 0;
-}
-
-/*
- * readDisplacement - Consumes the displacement of an instruction.
- *
- * @param insn - The instruction whose displacement is to be read.
- * @return - 0 if the displacement byte was successfully read; nonzero
- * otherwise.
- */
-static int readDisplacement(struct InternalInstruction* insn) {
- int8_t d8;
- int16_t d16;
- int32_t d32;
-
- dbgprintf(insn, "readDisplacement()");
-
- if (insn->consumedDisplacement)
- return 0;
-
- insn->consumedDisplacement = true;
- insn->displacementOffset = insn->readerCursor - insn->startLocation;
-
- switch (insn->eaDisplacement) {
- case EA_DISP_NONE:
- insn->consumedDisplacement = false;
- break;
- case EA_DISP_8:
- if (consumeInt8(insn, &d8))
- return -1;
- insn->displacement = d8;
- break;
- case EA_DISP_16:
- if (consumeInt16(insn, &d16))
- return -1;
- insn->displacement = d16;
- break;
- case EA_DISP_32:
- if (consumeInt32(insn, &d32))
- return -1;
- insn->displacement = d32;
- break;
- }
-
- insn->consumedDisplacement = true;
- return 0;
-}
-
-/*
- * readModRM - Consumes all addressing information (ModR/M byte, SIB byte, and
- * displacement) for an instruction and interprets it.
- *
- * @param insn - The instruction whose addressing information is to be read.
- * @return - 0 if the information was successfully read; nonzero otherwise.
- */
-static int readModRM(struct InternalInstruction* insn) {
- uint8_t mod, rm, reg, evexrm;
-
- dbgprintf(insn, "readModRM()");
-
- if (insn->consumedModRM)
- return 0;
-
- if (consumeByte(insn, &insn->modRM))
- return -1;
- insn->consumedModRM = true;
-
- mod = modFromModRM(insn->modRM);
- rm = rmFromModRM(insn->modRM);
- reg = regFromModRM(insn->modRM);
-
- /*
- * This goes by insn->registerSize to pick the correct register, which messes
- * up if we're using (say) XMM or 8-bit register operands. That gets fixed in
- * fixupReg().
- */
- switch (insn->registerSize) {
- case 2:
- insn->regBase = MODRM_REG_AX;
- insn->eaRegBase = EA_REG_AX;
- break;
- case 4:
- insn->regBase = MODRM_REG_EAX;
- insn->eaRegBase = EA_REG_EAX;
- break;
- case 8:
- insn->regBase = MODRM_REG_RAX;
- insn->eaRegBase = EA_REG_RAX;
- break;
- }
-
- reg |= rFromREX(insn->rexPrefix) << 3;
- rm |= bFromREX(insn->rexPrefix) << 3;
-
- evexrm = 0;
- if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT) {
- reg |= r2FromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4;
- evexrm = xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4;
- }
-
- insn->reg = (Reg)(insn->regBase + reg);
-
- switch (insn->addressSize) {
- case 2: {
- EABase eaBaseBase = EA_BASE_BX_SI;
-
- switch (mod) {
- case 0x0:
- if (rm == 0x6) {
- insn->eaBase = EA_BASE_NONE;
- insn->eaDisplacement = EA_DISP_16;
- if (readDisplacement(insn))
- return -1;
- } else {
- insn->eaBase = (EABase)(eaBaseBase + rm);
- insn->eaDisplacement = EA_DISP_NONE;
- }
- break;
- case 0x1:
- insn->eaBase = (EABase)(eaBaseBase + rm);
- insn->eaDisplacement = EA_DISP_8;
- insn->displacementSize = 1;
- if (readDisplacement(insn))
- return -1;
- break;
- case 0x2:
- insn->eaBase = (EABase)(eaBaseBase + rm);
- insn->eaDisplacement = EA_DISP_16;
- if (readDisplacement(insn))
- return -1;
- break;
- case 0x3:
- insn->eaBase = (EABase)(insn->eaRegBase + rm);
- if (readDisplacement(insn))
- return -1;
- break;
- }
- break;
- }
- case 4:
- case 8: {
- EABase eaBaseBase = (insn->addressSize == 4 ? EA_BASE_EAX : EA_BASE_RAX);
-
- switch (mod) {
- case 0x0:
- insn->eaDisplacement = EA_DISP_NONE; /* readSIB may override this */
- // In determining whether RIP-relative mode is used (rm=5),
- // or whether a SIB byte is present (rm=4),
- // the extension bits (REX.b and EVEX.x) are ignored.
- switch (rm & 7) {
- case 0x4: // SIB byte is present
- insn->eaBase = (insn->addressSize == 4 ?
- EA_BASE_sib : EA_BASE_sib64);
- if (readSIB(insn) || readDisplacement(insn))
- return -1;
- break;
- case 0x5: // RIP-relative
- insn->eaBase = EA_BASE_NONE;
- insn->eaDisplacement = EA_DISP_32;
- if (readDisplacement(insn))
- return -1;
- break;
- default:
- insn->eaBase = (EABase)(eaBaseBase + rm);
- break;
- }
- break;
- case 0x1:
- insn->displacementSize = 1;
- LLVM_FALLTHROUGH;
- case 0x2:
- insn->eaDisplacement = (mod == 0x1 ? EA_DISP_8 : EA_DISP_32);
- switch (rm & 7) {
- case 0x4: // SIB byte is present
- insn->eaBase = EA_BASE_sib;
- if (readSIB(insn) || readDisplacement(insn))
- return -1;
- break;
- default:
- insn->eaBase = (EABase)(eaBaseBase + rm);
- if (readDisplacement(insn))
- return -1;
- break;
- }
- break;
- case 0x3:
- insn->eaDisplacement = EA_DISP_NONE;
- insn->eaBase = (EABase)(insn->eaRegBase + rm + evexrm);
- break;
- }
- break;
- }
- } /* switch (insn->addressSize) */
-
- return 0;
-}
-
-#define GENERIC_FIXUP_FUNC(name, base, prefix, mask) \
- static uint16_t name(struct InternalInstruction *insn, \
- OperandType type, \
- uint8_t index, \
- uint8_t *valid) { \
- *valid = 1; \
- switch (type) { \
- default: \
- debug("Unhandled register type"); \
- *valid = 0; \
- return 0; \
- case TYPE_Rv: \
- return base + index; \
- case TYPE_R8: \
- index &= mask; \
- if (index > 0xf) \
- *valid = 0; \
- if (insn->rexPrefix && \
- index >= 4 && index <= 7) { \
- return prefix##_SPL + (index - 4); \
- } else { \
- return prefix##_AL + index; \
- } \
- case TYPE_R16: \
- index &= mask; \
- if (index > 0xf) \
- *valid = 0; \
- return prefix##_AX + index; \
- case TYPE_R32: \
- index &= mask; \
- if (index > 0xf) \
- *valid = 0; \
- return prefix##_EAX + index; \
- case TYPE_R64: \
- index &= mask; \
- if (index > 0xf) \
- *valid = 0; \
- return prefix##_RAX + index; \
- case TYPE_ZMM: \
- return prefix##_ZMM0 + index; \
- case TYPE_YMM: \
- return prefix##_YMM0 + index; \
- case TYPE_XMM: \
- return prefix##_XMM0 + index; \
- case TYPE_VK: \
- index &= 0xf; \
- if (index > 7) \
- *valid = 0; \
- return prefix##_K0 + index; \
- case TYPE_VK_PAIR: \
- if (index > 7) \
- *valid = 0; \
- return prefix##_K0_K1 + (index / 2); \
- case TYPE_MM64: \
- return prefix##_MM0 + (index & 0x7); \
- case TYPE_SEGMENTREG: \
- if ((index & 7) > 5) \
- *valid = 0; \
- return prefix##_ES + (index & 7); \
- case TYPE_DEBUGREG: \
- return prefix##_DR0 + index; \
- case TYPE_CONTROLREG: \
- return prefix##_CR0 + index; \
- case TYPE_BNDR: \
- if (index > 3) \
- *valid = 0; \
- return prefix##_BND0 + index; \
- case TYPE_MVSIBX: \
- return prefix##_XMM0 + index; \
- case TYPE_MVSIBY: \
- return prefix##_YMM0 + index; \
- case TYPE_MVSIBZ: \
- return prefix##_ZMM0 + index; \
- } \
- }
-
-/*
- * fixup*Value - Consults an operand type to determine the meaning of the
- * reg or R/M field. If the operand is an XMM operand, for example, an
- * operand would be XMM0 instead of AX, which readModRM() would otherwise
- * misinterpret it as.
- *
- * @param insn - The instruction containing the operand.
- * @param type - The operand type.
- * @param index - The existing value of the field as reported by readModRM().
- * @param valid - The address of a uint8_t. The target is set to 1 if the
- * field is valid for the register class; 0 if not.
- * @return - The proper value.
- */
-GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase, MODRM_REG, 0x1f)
-GENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG, 0xf)
-
-/*
- * fixupReg - Consults an operand specifier to determine which of the
- * fixup*Value functions to use in correcting readModRM()'ss interpretation.
- *
- * @param insn - See fixup*Value().
- * @param op - The operand specifier.
- * @return - 0 if fixup was successful; -1 if the register returned was
- * invalid for its class.
- */
-static int fixupReg(struct InternalInstruction *insn,
- const struct OperandSpecifier *op) {
- uint8_t valid;
-
- dbgprintf(insn, "fixupReg()");
-
- switch ((OperandEncoding)op->encoding) {
- default:
- debug("Expected a REG or R/M encoding in fixupReg");
- return -1;
- case ENCODING_VVVV:
- insn->vvvv = (Reg)fixupRegValue(insn,
- (OperandType)op->type,
- insn->vvvv,
- &valid);
- if (!valid)
- return -1;
- break;
- case ENCODING_REG:
- insn->reg = (Reg)fixupRegValue(insn,
- (OperandType)op->type,
- insn->reg - insn->regBase,
- &valid);
- if (!valid)
- return -1;
- break;
- CASE_ENCODING_RM:
- if (insn->eaBase >= insn->eaRegBase) {
- insn->eaBase = (EABase)fixupRMValue(insn,
- (OperandType)op->type,
- insn->eaBase - insn->eaRegBase,
- &valid);
- if (!valid)
- return -1;
- }
- break;
- }
-
- return 0;
-}
-
-/*
- * readOpcodeRegister - Reads an operand from the opcode field of an
- * instruction and interprets it appropriately given the operand width.
- * Handles AddRegFrm instructions.
- *
- * @param insn - the instruction whose opcode field is to be read.
- * @param size - The width (in bytes) of the register being specified.
- * 1 means AL and friends, 2 means AX, 4 means EAX, and 8 means
- * RAX.
- * @return - 0 on success; nonzero otherwise.
- */
-static int readOpcodeRegister(struct InternalInstruction* insn, uint8_t size) {
- dbgprintf(insn, "readOpcodeRegister()");
-
- if (size == 0)
- size = insn->registerSize;
-
- switch (size) {
- case 1:
- insn->opcodeRegister = (Reg)(MODRM_REG_AL + ((bFromREX(insn->rexPrefix) << 3)
- | (insn->opcode & 7)));
- if (insn->rexPrefix &&
- insn->opcodeRegister >= MODRM_REG_AL + 0x4 &&
- insn->opcodeRegister < MODRM_REG_AL + 0x8) {
- insn->opcodeRegister = (Reg)(MODRM_REG_SPL
- + (insn->opcodeRegister - MODRM_REG_AL - 4));
- }
-
- break;
- case 2:
- insn->opcodeRegister = (Reg)(MODRM_REG_AX
- + ((bFromREX(insn->rexPrefix) << 3)
- | (insn->opcode & 7)));
- break;
- case 4:
- insn->opcodeRegister = (Reg)(MODRM_REG_EAX
- + ((bFromREX(insn->rexPrefix) << 3)
- | (insn->opcode & 7)));
- break;
- case 8:
- insn->opcodeRegister = (Reg)(MODRM_REG_RAX
- + ((bFromREX(insn->rexPrefix) << 3)
- | (insn->opcode & 7)));
- break;
- }
-
- return 0;
-}
-
-/*
- * readImmediate - Consumes an immediate operand from an instruction, given the
- * desired operand size.
- *
- * @param insn - The instruction whose operand is to be read.
- * @param size - The width (in bytes) of the operand.
- * @return - 0 if the immediate was successfully consumed; nonzero
- * otherwise.
- */
-static int readImmediate(struct InternalInstruction* insn, uint8_t size) {
- uint8_t imm8;
- uint16_t imm16;
- uint32_t imm32;
- uint64_t imm64;
-
- dbgprintf(insn, "readImmediate()");
-
- if (insn->numImmediatesConsumed == 2) {
- debug("Already consumed two immediates");
- return -1;
- }
-
- if (size == 0)
- size = insn->immediateSize;
- else
- insn->immediateSize = size;
- insn->immediateOffset = insn->readerCursor - insn->startLocation;
-
- switch (size) {
- case 1:
- if (consumeByte(insn, &imm8))
- return -1;
- insn->immediates[insn->numImmediatesConsumed] = imm8;
- break;
- case 2:
- if (consumeUInt16(insn, &imm16))
- return -1;
- insn->immediates[insn->numImmediatesConsumed] = imm16;
- break;
- case 4:
- if (consumeUInt32(insn, &imm32))
- return -1;
- insn->immediates[insn->numImmediatesConsumed] = imm32;
- break;
- case 8:
- if (consumeUInt64(insn, &imm64))
- return -1;
- insn->immediates[insn->numImmediatesConsumed] = imm64;
- break;
- }
-
- insn->numImmediatesConsumed++;
-
- return 0;
-}
-
-/*
- * readVVVV - Consumes vvvv from an instruction if it has a VEX prefix.
- *
- * @param insn - The instruction whose operand is to be read.
- * @return - 0 if the vvvv was successfully consumed; nonzero
- * otherwise.
- */
-static int readVVVV(struct InternalInstruction* insn) {
- dbgprintf(insn, "readVVVV()");
-
- int vvvv;
- if (insn->vectorExtensionType == TYPE_EVEX)
- vvvv = (v2FromEVEX4of4(insn->vectorExtensionPrefix[3]) << 4 |
- vvvvFromEVEX3of4(insn->vectorExtensionPrefix[2]));
- else if (insn->vectorExtensionType == TYPE_VEX_3B)
- vvvv = vvvvFromVEX3of3(insn->vectorExtensionPrefix[2]);
- else if (insn->vectorExtensionType == TYPE_VEX_2B)
- vvvv = vvvvFromVEX2of2(insn->vectorExtensionPrefix[1]);
- else if (insn->vectorExtensionType == TYPE_XOP)
- vvvv = vvvvFromXOP3of3(insn->vectorExtensionPrefix[2]);
- else
- return -1;
-
- if (insn->mode != MODE_64BIT)
- vvvv &= 0xf; // Can only clear bit 4. Bit 3 must be cleared later.
-
- insn->vvvv = static_cast<Reg>(vvvv);
- return 0;
-}
-
-/*
- * readMaskRegister - Reads an mask register from the opcode field of an
- * instruction.
- *
- * @param insn - The instruction whose opcode field is to be read.
- * @return - 0 on success; nonzero otherwise.
- */
-static int readMaskRegister(struct InternalInstruction* insn) {
- dbgprintf(insn, "readMaskRegister()");
-
- if (insn->vectorExtensionType != TYPE_EVEX)
- return -1;
-
- insn->writemask =
- static_cast<Reg>(aaaFromEVEX4of4(insn->vectorExtensionPrefix[3]));
- return 0;
-}
-
-/*
- * readOperands - Consults the specifier for an instruction and consumes all
- * operands for that instruction, interpreting them as it goes.
- *
- * @param insn - The instruction whose operands are to be read and interpreted.
- * @return - 0 if all operands could be read; nonzero otherwise.
- */
-static int readOperands(struct InternalInstruction* insn) {
- int hasVVVV, needVVVV;
- int sawRegImm = 0;
-
- dbgprintf(insn, "readOperands()");
-
- /* If non-zero vvvv specified, need to make sure one of the operands
- uses it. */
- hasVVVV = !readVVVV(insn);
- needVVVV = hasVVVV && (insn->vvvv != 0);
-
- for (const auto &Op : x86OperandSets[insn->spec->operands]) {
- switch (Op.encoding) {
- case ENCODING_NONE:
- case ENCODING_SI:
- case ENCODING_DI:
- break;
- CASE_ENCODING_VSIB:
- // VSIB can use the V2 bit so check only the other bits.
- if (needVVVV)
- needVVVV = hasVVVV & ((insn->vvvv & 0xf) != 0);
- if (readModRM(insn))
- return -1;
-
- // Reject if SIB wasn't used.
- if (insn->eaBase != EA_BASE_sib && insn->eaBase != EA_BASE_sib64)
- return -1;
-
- // If sibIndex was set to SIB_INDEX_NONE, index offset is 4.
- if (insn->sibIndex == SIB_INDEX_NONE)
- insn->sibIndex = (SIBIndex)(insn->sibIndexBase + 4);
-
- // If EVEX.v2 is set this is one of the 16-31 registers.
- if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT &&
- v2FromEVEX4of4(insn->vectorExtensionPrefix[3]))
- insn->sibIndex = (SIBIndex)(insn->sibIndex + 16);
-
- // Adjust the index register to the correct size.
- switch ((OperandType)Op.type) {
- default:
- debug("Unhandled VSIB index type");
- return -1;
- case TYPE_MVSIBX:
- insn->sibIndex = (SIBIndex)(SIB_INDEX_XMM0 +
- (insn->sibIndex - insn->sibIndexBase));
- break;
- case TYPE_MVSIBY:
- insn->sibIndex = (SIBIndex)(SIB_INDEX_YMM0 +
- (insn->sibIndex - insn->sibIndexBase));
- break;
- case TYPE_MVSIBZ:
- insn->sibIndex = (SIBIndex)(SIB_INDEX_ZMM0 +
- (insn->sibIndex - insn->sibIndexBase));
- break;
- }
-
- // Apply the AVX512 compressed displacement scaling factor.
- if (Op.encoding != ENCODING_REG && insn->eaDisplacement == EA_DISP_8)
- insn->displacement *= 1 << (Op.encoding - ENCODING_VSIB);
- break;
- case ENCODING_REG:
- CASE_ENCODING_RM:
- if (readModRM(insn))
- return -1;
- if (fixupReg(insn, &Op))
- return -1;
- // Apply the AVX512 compressed displacement scaling factor.
- if (Op.encoding != ENCODING_REG && insn->eaDisplacement == EA_DISP_8)
- insn->displacement *= 1 << (Op.encoding - ENCODING_RM);
- break;
- case ENCODING_IB:
- if (sawRegImm) {
- /* Saw a register immediate so don't read again and instead split the
- previous immediate. FIXME: This is a hack. */
- insn->immediates[insn->numImmediatesConsumed] =
- insn->immediates[insn->numImmediatesConsumed - 1] & 0xf;
- ++insn->numImmediatesConsumed;
- break;
- }
- if (readImmediate(insn, 1))
- return -1;
- if (Op.type == TYPE_XMM || Op.type == TYPE_YMM)
- sawRegImm = 1;
- break;
- case ENCODING_IW:
- if (readImmediate(insn, 2))
- return -1;
- break;
- case ENCODING_ID:
- if (readImmediate(insn, 4))
- return -1;
- break;
- case ENCODING_IO:
- if (readImmediate(insn, 8))
- return -1;
- break;
- case ENCODING_Iv:
- if (readImmediate(insn, insn->immediateSize))
- return -1;
- break;
- case ENCODING_Ia:
- if (readImmediate(insn, insn->addressSize))
- return -1;
- break;
- case ENCODING_IRC:
- insn->RC = (l2FromEVEX4of4(insn->vectorExtensionPrefix[3]) << 1) |
- lFromEVEX4of4(insn->vectorExtensionPrefix[3]);
- break;
- case ENCODING_RB:
- if (readOpcodeRegister(insn, 1))
- return -1;
- break;
- case ENCODING_RW:
- if (readOpcodeRegister(insn, 2))
- return -1;
- break;
- case ENCODING_RD:
- if (readOpcodeRegister(insn, 4))
- return -1;
- break;
- case ENCODING_RO:
- if (readOpcodeRegister(insn, 8))
- return -1;
- break;
- case ENCODING_Rv:
- if (readOpcodeRegister(insn, 0))
- return -1;
- break;
- case ENCODING_CC:
- insn->immediates[1] = insn->opcode & 0xf;
- break;
- case ENCODING_FP:
- break;
- case ENCODING_VVVV:
- needVVVV = 0; /* Mark that we have found a VVVV operand. */
- if (!hasVVVV)
- return -1;
- if (insn->mode != MODE_64BIT)
- insn->vvvv = static_cast<Reg>(insn->vvvv & 0x7);
- if (fixupReg(insn, &Op))
- return -1;
- break;
- case ENCODING_WRITEMASK:
- if (readMaskRegister(insn))
- return -1;
- break;
- case ENCODING_DUP:
- break;
- default:
- dbgprintf(insn, "Encountered an operand with an unknown encoding.");
- return -1;
- }
- }
-
- /* If we didn't find ENCODING_VVVV operand, but non-zero vvvv present, fail */
- if (needVVVV) return -1;
-
- return 0;
-}
-
-/*
- * decodeInstruction - Reads and interprets a full instruction provided by the
- * user.
- *
- * @param insn - A pointer to the instruction to be populated. Must be
- * pre-allocated.
- * @param reader - The function to be used to read the instruction's bytes.
- * @param readerArg - A generic argument to be passed to the reader to store
- * any internal state.
- * @param logger - If non-NULL, the function to be used to write log messages
- * and warnings.
- * @param loggerArg - A generic argument to be passed to the logger to store
- * any internal state.
- * @param startLoc - The address (in the reader's address space) of the first
- * byte in the instruction.
- * @param mode - The mode (real mode, IA-32e, or IA-32e in 64-bit mode) to
- * decode the instruction in.
- * @return - 0 if the instruction's memory could be read; nonzero if
- * not.
- */
-int llvm::X86Disassembler::decodeInstruction(
- struct InternalInstruction *insn, byteReader_t reader,
- const void *readerArg, dlog_t logger, void *loggerArg, const void *miiArg,
- uint64_t startLoc, DisassemblerMode mode) {
- memset(insn, 0, sizeof(struct InternalInstruction));
-
- insn->reader = reader;
- insn->readerArg = readerArg;
- insn->dlog = logger;
- insn->dlogArg = loggerArg;
- insn->startLocation = startLoc;
- insn->readerCursor = startLoc;
- insn->mode = mode;
- insn->numImmediatesConsumed = 0;
-
- if (readPrefixes(insn) ||
- readOpcode(insn) ||
- getID(insn, miiArg) ||
- insn->instructionID == 0 ||
- readOperands(insn))
- return -1;
-
- insn->operands = x86OperandSets[insn->spec->operands];
-
- insn->length = insn->readerCursor - insn->startLocation;
-
- dbgprintf(insn, "Read from 0x%llx to 0x%llx: length %zu",
- startLoc, insn->readerCursor, insn->length);
-
- if (insn->length > 15)
- dbgprintf(insn, "Instruction exceeds 15-byte limit");
-
- return 0;
-}
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/contrib/llvm-project/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
index 7c0a42c019e3..147fe46d81b9 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
+++ b/contrib/llvm-project/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
@@ -19,6 +19,9 @@
#include "llvm/Support/X86DisassemblerDecoderCommon.h"
namespace llvm {
+
+class MCInstrInfo;
+
namespace X86Disassembler {
// Accessor functions for various fields of an Intel instruction
@@ -446,12 +449,12 @@ enum SIBBase {
};
/// Possible displacement types for effective-address computations.
-typedef enum {
+enum EADisplacement {
EA_DISP_NONE,
EA_DISP_8,
EA_DISP_16,
EA_DISP_32
-} EADisplacement;
+};
/// All possible values of the reg field in the ModR/M byte.
enum Reg {
@@ -502,25 +505,6 @@ enum VectorExtensionType {
TYPE_XOP = 0x4
};
-/// Type for the byte reader that the consumer must provide to
-/// the decoder. Reads a single byte from the instruction's address space.
-/// \param arg A baton that the consumer can associate with any internal
-/// state that it needs.
-/// \param byte A pointer to a single byte in memory that should be set to
-/// contain the value at address.
-/// \param address The address in the instruction's address space that should
-/// be read from.
-/// \return -1 if the byte cannot be read for any reason; 0 otherwise.
-typedef int (*byteReader_t)(const void *arg, uint8_t *byte, uint64_t address);
-
-/// Type for the logging function that the consumer can provide to
-/// get debugging output from the decoder.
-/// \param arg A baton that the consumer can associate with any internal
-/// state that it needs.
-/// \param log A string that contains the message. Will be reused after
-/// the logger returns.
-typedef void (*dlog_t)(void *arg, const char *log);
-
/// The specification for how to extract and interpret a full instruction and
/// its operands.
struct InstructionSpecifier {
@@ -529,18 +513,11 @@ struct InstructionSpecifier {
/// The x86 internal instruction, which is produced by the decoder.
struct InternalInstruction {
- // Reader interface (C)
- byteReader_t reader;
// Opaque value passed to the reader
- const void* readerArg;
+ llvm::ArrayRef<uint8_t> bytes;
// The address of the next byte to read via the reader
uint64_t readerCursor;
- // Logger interface (C)
- dlog_t dlog;
- // Opaque value passed to the logger
- void* dlogArg;
-
// General instruction information
// The mode to disassemble for (64-bit, protected, real)
@@ -616,11 +593,9 @@ struct InternalInstruction {
uint8_t modRM;
// The SIB byte, used for more complex 32- or 64-bit memory operands
- bool consumedSIB;
uint8_t sib;
// The displacement, used for memory operands
- bool consumedDisplacement;
int32_t displacement;
// Immediates. There can be two in some cases
@@ -657,38 +632,6 @@ struct InternalInstruction {
ArrayRef<OperandSpecifier> operands;
};
-/// Decode one instruction and store the decoding results in
-/// a buffer provided by the consumer.
-/// \param insn The buffer to store the instruction in. Allocated by the
-/// consumer.
-/// \param reader The byteReader_t for the bytes to be read.
-/// \param readerArg An argument to pass to the reader for storing context
-/// specific to the consumer. May be NULL.
-/// \param logger The dlog_t to be used in printing status messages from the
-/// disassembler. May be NULL.
-/// \param loggerArg An argument to pass to the logger for storing context
-/// specific to the logger. May be NULL.
-/// \param startLoc The address (in the reader's address space) of the first
-/// byte in the instruction.
-/// \param mode The mode (16-bit, 32-bit, 64-bit) to decode in.
-/// \return Nonzero if there was an error during decode, 0 otherwise.
-int decodeInstruction(InternalInstruction *insn,
- byteReader_t reader,
- const void *readerArg,
- dlog_t logger,
- void *loggerArg,
- const void *miiArg,
- uint64_t startLoc,
- DisassemblerMode mode);
-
-/// Print a message to debugs()
-/// \param file The name of the file printing the debug message.
-/// \param line The line number that printed the debug message.
-/// \param s The message to print.
-void Debug(const char *file, unsigned line, const char *s);
-
-StringRef GetInstrName(unsigned Opcode, const void *mii);
-
} // namespace X86Disassembler
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp
index ed2ee55ff2a5..675a9c377b12 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp
@@ -38,8 +38,9 @@ void X86ATTInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
OS << markup("<reg:") << '%' << getRegisterName(RegNo) << markup(">");
}
-void X86ATTInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
- StringRef Annot, const MCSubtargetInfo &STI) {
+void X86ATTInstPrinter::printInst(const MCInst *MI, uint64_t Address,
+ StringRef Annot, const MCSubtargetInfo &STI,
+ raw_ostream &OS) {
// If verbose assembly is enabled, we can print some informative comments.
if (CommentStream)
HasCustomInstComment = EmitAnyX86InstComments(MI, *CommentStream, MII);
@@ -69,7 +70,7 @@ void X86ATTInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
// Try to print any aliases first.
else if (!printAliasInstr(MI, OS) &&
!printVecCompareInstr(MI, OS))
- printInstruction(MI, OS);
+ printInstruction(MI, Address, OS);
// Next always print the annotation.
printAnnotation(OS, Annot);
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.h b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.h
index 747ddd30a2d9..3d5d384dc4a0 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.h
+++ b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.h
@@ -24,8 +24,8 @@ public:
: X86InstPrinterCommon(MAI, MII, MRI), HasCustomInstComment(false) {}
void printRegName(raw_ostream &OS, unsigned RegNo) const override;
- void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot,
- const MCSubtargetInfo &STI) override;
+ void printInst(const MCInst *MI, uint64_t Address, StringRef Annot,
+ const MCSubtargetInfo &STI, raw_ostream &OS) override;
bool printVecCompareInstr(const MCInst *MI, raw_ostream &OS);
// Autogenerated by tblgen, returns true if we successfully printed an
@@ -35,7 +35,7 @@ public:
unsigned PrintMethodIdx, raw_ostream &O);
// Autogenerated by tblgen.
- void printInstruction(const MCInst *MI, raw_ostream &OS);
+ void printInstruction(const MCInst *MI, uint64_t Address, raw_ostream &OS);
static const char *getRegisterName(unsigned RegNo);
void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &OS) override;
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
index f08fcb575bf0..dffda5217675 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -12,55 +12,95 @@
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/BinaryFormat/MachO.h"
#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDwarf.h"
#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCFixupKindInfo.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCMachObjectWriter.h"
+#include "llvm/MC/MCObjectStreamer.h"
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
+
using namespace llvm;
-static unsigned getFixupKindSize(unsigned Kind) {
- switch (Kind) {
- default:
- llvm_unreachable("invalid fixup kind!");
- case FK_NONE:
- return 0;
- case FK_PCRel_1:
- case FK_SecRel_1:
- case FK_Data_1:
- return 1;
- case FK_PCRel_2:
- case FK_SecRel_2:
- case FK_Data_2:
- return 2;
- case FK_PCRel_4:
- case X86::reloc_riprel_4byte:
- case X86::reloc_riprel_4byte_relax:
- case X86::reloc_riprel_4byte_relax_rex:
- case X86::reloc_riprel_4byte_movq_load:
- case X86::reloc_signed_4byte:
- case X86::reloc_signed_4byte_relax:
- case X86::reloc_global_offset_table:
- case X86::reloc_branch_4byte_pcrel:
- case FK_SecRel_4:
- case FK_Data_4:
- return 4;
- case FK_PCRel_8:
- case FK_SecRel_8:
- case FK_Data_8:
- case X86::reloc_global_offset_table8:
- return 8;
+namespace {
+/// A wrapper for holding a mask of the values from X86::AlignBranchBoundaryKind
+class X86AlignBranchKind {
+private:
+ uint8_t AlignBranchKind = 0;
+
+public:
+ void operator=(const std::string &Val) {
+ if (Val.empty())
+ return;
+ SmallVector<StringRef, 6> BranchTypes;
+ StringRef(Val).split(BranchTypes, '+', -1, false);
+ for (auto BranchType : BranchTypes) {
+ if (BranchType == "fused")
+ addKind(X86::AlignBranchFused);
+ else if (BranchType == "jcc")
+ addKind(X86::AlignBranchJcc);
+ else if (BranchType == "jmp")
+ addKind(X86::AlignBranchJmp);
+ else if (BranchType == "call")
+ addKind(X86::AlignBranchCall);
+ else if (BranchType == "ret")
+ addKind(X86::AlignBranchRet);
+ else if (BranchType == "indirect")
+ addKind(X86::AlignBranchIndirect);
+ else {
+ report_fatal_error(
+ "'-x86-align-branch 'The branches's type is combination of jcc, "
+ "fused, jmp, call, ret, indirect.(plus separated)",
+ false);
+ }
+ }
}
-}
-namespace {
+ operator uint8_t() const { return AlignBranchKind; }
+ void addKind(X86::AlignBranchBoundaryKind Value) { AlignBranchKind |= Value; }
+};
+
+X86AlignBranchKind X86AlignBranchKindLoc;
+
+cl::opt<unsigned> X86AlignBranchBoundary(
+ "x86-align-branch-boundary", cl::init(0),
+ cl::desc(
+ "Control how the assembler should align branches with NOP. If the "
+ "boundary's size is not 0, it should be a power of 2 and no less "
+ "than 32. Branches will be aligned to prevent from being across or "
+ "against the boundary of specified size. The default value 0 does not "
+ "align branches."));
+
+cl::opt<X86AlignBranchKind, true, cl::parser<std::string>> X86AlignBranch(
+ "x86-align-branch",
+ cl::desc("Specify types of branches to align (plus separated list of "
+ "types). The branches's types are combination of jcc, fused, "
+ "jmp, call, ret, indirect."),
+ cl::value_desc("jcc indicates conditional jumps, fused indicates fused "
+ "conditional jumps, jmp indicates unconditional jumps, call "
+ "indicates direct and indirect calls, ret indicates rets, "
+ "indirect indicates indirect jumps."),
+ cl::location(X86AlignBranchKindLoc));
+
+cl::opt<bool> X86AlignBranchWithin32BBoundaries(
+ "x86-branches-within-32B-boundaries", cl::init(false),
+ cl::desc(
+ "Align selected instructions to mitigate negative performance impact "
+ "of Intel's micro code update for errata skx102. May break "
+ "assumptions about labels corresponding to particular instructions, "
+ "and should be used with caution."));
class X86ELFObjectWriter : public MCELFObjectTargetWriter {
public:
@@ -71,9 +111,42 @@ public:
class X86AsmBackend : public MCAsmBackend {
const MCSubtargetInfo &STI;
+ std::unique_ptr<const MCInstrInfo> MCII;
+ X86AlignBranchKind AlignBranchType;
+ Align AlignBoundary;
+
+ bool isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const;
+
+ bool needAlign(MCObjectStreamer &OS) const;
+ bool needAlignInst(const MCInst &Inst) const;
+ MCBoundaryAlignFragment *
+ getOrCreateBoundaryAlignFragment(MCObjectStreamer &OS) const;
+ MCInst PrevInst;
+
public:
X86AsmBackend(const Target &T, const MCSubtargetInfo &STI)
- : MCAsmBackend(support::little), STI(STI) {}
+ : MCAsmBackend(support::little), STI(STI),
+ MCII(T.createMCInstrInfo()) {
+ if (X86AlignBranchWithin32BBoundaries) {
+ // At the moment, this defaults to aligning fused branches, unconditional
+ // jumps, and (unfused) conditional jumps with nops. Both the
+ // instructions aligned and the alignment method (nop vs prefix) may
+ // change in the future.
+ AlignBoundary = assumeAligned(32);;
+ AlignBranchType.addKind(X86::AlignBranchFused);
+ AlignBranchType.addKind(X86::AlignBranchJcc);
+ AlignBranchType.addKind(X86::AlignBranchJmp);
+ }
+ // Allow overriding defaults set by master flag
+ if (X86AlignBranchBoundary.getNumOccurrences())
+ AlignBoundary = assumeAligned(X86AlignBranchBoundary);
+ if (X86AlignBranch.getNumOccurrences())
+ AlignBranchType = X86AlignBranchKindLoc;
+ }
+
+ bool allowAutoPadding() const override;
+ void alignBranchesBegin(MCObjectStreamer &OS, const MCInst &Inst) override;
+ void alignBranchesEnd(MCObjectStreamer &OS, const MCInst &Inst) override;
unsigned getNumFixupKinds() const override {
return X86::NumTargetFixupKinds;
@@ -81,49 +154,15 @@ public:
Optional<MCFixupKind> getFixupKind(StringRef Name) const override;
- const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override {
- const static MCFixupKindInfo Infos[X86::NumTargetFixupKinds] = {
- {"reloc_riprel_4byte", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
- {"reloc_riprel_4byte_movq_load", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
- {"reloc_riprel_4byte_relax", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
- {"reloc_riprel_4byte_relax_rex", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
- {"reloc_signed_4byte", 0, 32, 0},
- {"reloc_signed_4byte_relax", 0, 32, 0},
- {"reloc_global_offset_table", 0, 32, 0},
- {"reloc_global_offset_table8", 0, 64, 0},
- {"reloc_branch_4byte_pcrel", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
- };
-
- if (Kind < FirstTargetFixupKind)
- return MCAsmBackend::getFixupKindInfo(Kind);
-
- assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
- "Invalid kind!");
- assert(Infos[Kind - FirstTargetFixupKind].Name && "Empty fixup name!");
- return Infos[Kind - FirstTargetFixupKind];
- }
-
+ const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override;
+
bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
const MCValue &Target) override;
void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
const MCValue &Target, MutableArrayRef<char> Data,
uint64_t Value, bool IsResolved,
- const MCSubtargetInfo *STI) const override {
- unsigned Size = getFixupKindSize(Fixup.getKind());
-
- assert(Fixup.getOffset() + Size <= Data.size() && "Invalid fixup offset!");
-
- // Check that uppper bits are either all zeros or all ones.
- // Specifically ignore overflow/underflow as long as the leakage is
- // limited to the lower bits. This is to remain compatible with
- // other assemblers.
- assert((Size == 0 || isIntN(Size * 8 + 1, Value)) &&
- "Value does not fit in the Fixup field");
-
- for (unsigned i = 0; i != Size; ++i)
- Data[Fixup.getOffset() + i] = uint8_t(Value >> (i * 8));
- }
+ const MCSubtargetInfo *STI) const override;
bool mayNeedRelaxation(const MCInst &Inst,
const MCSubtargetInfo &STI) const override;
@@ -243,6 +282,200 @@ static unsigned getRelaxedOpcode(const MCInst &Inst, bool is16BitMode) {
return getRelaxedOpcodeBranch(Inst, is16BitMode);
}
+static X86::CondCode getCondFromBranch(const MCInst &MI,
+ const MCInstrInfo &MCII) {
+ unsigned Opcode = MI.getOpcode();
+ switch (Opcode) {
+ default:
+ return X86::COND_INVALID;
+ case X86::JCC_1: {
+ const MCInstrDesc &Desc = MCII.get(Opcode);
+ return static_cast<X86::CondCode>(
+ MI.getOperand(Desc.getNumOperands() - 1).getImm());
+ }
+ }
+}
+
+static X86::SecondMacroFusionInstKind
+classifySecondInstInMacroFusion(const MCInst &MI, const MCInstrInfo &MCII) {
+ X86::CondCode CC = getCondFromBranch(MI, MCII);
+ return classifySecondCondCodeInMacroFusion(CC);
+}
+
+/// Check if the instruction uses RIP relative addressing.
+static bool isRIPRelative(const MCInst &MI, const MCInstrInfo &MCII) {
+ unsigned Opcode = MI.getOpcode();
+ const MCInstrDesc &Desc = MCII.get(Opcode);
+ uint64_t TSFlags = Desc.TSFlags;
+ unsigned CurOp = X86II::getOperandBias(Desc);
+ int MemoryOperand = X86II::getMemoryOperandNo(TSFlags);
+ if (MemoryOperand < 0)
+ return false;
+ unsigned BaseRegNum = MemoryOperand + CurOp + X86::AddrBaseReg;
+ unsigned BaseReg = MI.getOperand(BaseRegNum).getReg();
+ return (BaseReg == X86::RIP);
+}
+
+/// Check if the instruction is valid as the first instruction in macro fusion.
+static bool isFirstMacroFusibleInst(const MCInst &Inst,
+ const MCInstrInfo &MCII) {
+ // An Intel instruction with RIP relative addressing is not macro fusible.
+ if (isRIPRelative(Inst, MCII))
+ return false;
+ X86::FirstMacroFusionInstKind FIK =
+ X86::classifyFirstOpcodeInMacroFusion(Inst.getOpcode());
+ return FIK != X86::FirstMacroFusionInstKind::Invalid;
+}
+
+/// Check if the two instructions will be macro-fused on the target cpu.
+bool X86AsmBackend::isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const {
+ const MCInstrDesc &InstDesc = MCII->get(Jcc.getOpcode());
+ if (!InstDesc.isConditionalBranch())
+ return false;
+ if (!isFirstMacroFusibleInst(Cmp, *MCII))
+ return false;
+ const X86::FirstMacroFusionInstKind CmpKind =
+ X86::classifyFirstOpcodeInMacroFusion(Cmp.getOpcode());
+ const X86::SecondMacroFusionInstKind BranchKind =
+ classifySecondInstInMacroFusion(Jcc, *MCII);
+ return X86::isMacroFused(CmpKind, BranchKind);
+}
+
+/// Check if the instruction has a variant symbol operand.
+static bool hasVariantSymbol(const MCInst &MI) {
+ for (auto &Operand : MI) {
+ if (!Operand.isExpr())
+ continue;
+ const MCExpr &Expr = *Operand.getExpr();
+ if (Expr.getKind() == MCExpr::SymbolRef &&
+ cast<MCSymbolRefExpr>(Expr).getKind() != MCSymbolRefExpr::VK_None)
+ return true;
+ }
+ return false;
+}
+
+bool X86AsmBackend::allowAutoPadding() const {
+ return (AlignBoundary != Align::None() &&
+ AlignBranchType != X86::AlignBranchNone);
+}
+
+bool X86AsmBackend::needAlign(MCObjectStreamer &OS) const {
+ if (!OS.getAllowAutoPadding())
+ return false;
+ assert(allowAutoPadding() && "incorrect initialization!");
+
+ MCAssembler &Assembler = OS.getAssembler();
+ MCSection *Sec = OS.getCurrentSectionOnly();
+ // To be Done: Currently don't deal with Bundle cases.
+ if (Assembler.isBundlingEnabled() && Sec->isBundleLocked())
+ return false;
+
+ // Branches only need to be aligned in 32-bit or 64-bit mode.
+ if (!(STI.hasFeature(X86::Mode64Bit) || STI.hasFeature(X86::Mode32Bit)))
+ return false;
+
+ return true;
+}
+
+/// Check if the instruction operand needs to be aligned. Padding is disabled
+/// before intruction which may be rewritten by linker(e.g. TLSCALL).
+bool X86AsmBackend::needAlignInst(const MCInst &Inst) const {
+ // Linker may rewrite the instruction with variant symbol operand.
+ if (hasVariantSymbol(Inst))
+ return false;
+
+ const MCInstrDesc &InstDesc = MCII->get(Inst.getOpcode());
+ return (InstDesc.isConditionalBranch() &&
+ (AlignBranchType & X86::AlignBranchJcc)) ||
+ (InstDesc.isUnconditionalBranch() &&
+ (AlignBranchType & X86::AlignBranchJmp)) ||
+ (InstDesc.isCall() &&
+ (AlignBranchType & X86::AlignBranchCall)) ||
+ (InstDesc.isReturn() &&
+ (AlignBranchType & X86::AlignBranchRet)) ||
+ (InstDesc.isIndirectBranch() &&
+ (AlignBranchType & X86::AlignBranchIndirect));
+}
+
+static bool canReuseBoundaryAlignFragment(const MCBoundaryAlignFragment &F) {
+ // If a MCBoundaryAlignFragment has not been used to emit NOP,we can reuse it.
+ return !F.canEmitNops();
+}
+
+MCBoundaryAlignFragment *
+X86AsmBackend::getOrCreateBoundaryAlignFragment(MCObjectStreamer &OS) const {
+ auto *F = dyn_cast_or_null<MCBoundaryAlignFragment>(OS.getCurrentFragment());
+ if (!F || !canReuseBoundaryAlignFragment(*F)) {
+ F = new MCBoundaryAlignFragment(AlignBoundary);
+ OS.insert(F);
+ }
+ return F;
+}
+
+/// Insert MCBoundaryAlignFragment before instructions to align branches.
+void X86AsmBackend::alignBranchesBegin(MCObjectStreamer &OS,
+ const MCInst &Inst) {
+ if (!needAlign(OS))
+ return;
+
+ MCFragment *CF = OS.getCurrentFragment();
+ bool NeedAlignFused = AlignBranchType & X86::AlignBranchFused;
+ if (NeedAlignFused && isMacroFused(PrevInst, Inst) && CF) {
+ // Macro fusion actually happens and there is no other fragment inserted
+ // after the previous instruction. NOP can be emitted in PF to align fused
+ // jcc.
+ if (auto *PF =
+ dyn_cast_or_null<MCBoundaryAlignFragment>(CF->getPrevNode())) {
+ const_cast<MCBoundaryAlignFragment *>(PF)->setEmitNops(true);
+ const_cast<MCBoundaryAlignFragment *>(PF)->setFused(true);
+ }
+ } else if (needAlignInst(Inst)) {
+ // Note: When there is at least one fragment, such as MCAlignFragment,
+ // inserted after the previous instruction, e.g.
+ //
+ // \code
+ // cmp %rax %rcx
+ // .align 16
+ // je .Label0
+ // \ endcode
+ //
+ // We will treat the JCC as a unfused branch although it may be fused
+ // with the CMP.
+ auto *F = getOrCreateBoundaryAlignFragment(OS);
+ F->setEmitNops(true);
+ F->setFused(false);
+ } else if (NeedAlignFused && isFirstMacroFusibleInst(Inst, *MCII)) {
+ // We don't know if macro fusion happens until the reaching the next
+ // instruction, so a place holder is put here if necessary.
+ getOrCreateBoundaryAlignFragment(OS);
+ }
+
+ PrevInst = Inst;
+}
+
+/// Insert a MCBoundaryAlignFragment to mark the end of the branch to be aligned
+/// if necessary.
+void X86AsmBackend::alignBranchesEnd(MCObjectStreamer &OS, const MCInst &Inst) {
+ if (!needAlign(OS))
+ return;
+ // If the branch is emitted into a MCRelaxableFragment, we can determine the
+ // size of the branch easily in MCAssembler::relaxBoundaryAlign. When the
+ // branch is fused, the fused branch(macro fusion pair) must be emitted into
+ // two fragments. Or when the branch is unfused, the branch must be emitted
+ // into one fragment. The MCRelaxableFragment naturally marks the end of the
+ // fused or unfused branch.
+ // Otherwise, we need to insert a MCBoundaryAlignFragment to mark the end of
+ // the branch. This MCBoundaryAlignFragment may be reused to emit NOP to align
+ // other branch.
+ if (needAlignInst(Inst) && !isa<MCRelaxableFragment>(OS.getCurrentFragment()))
+ OS.insert(new MCBoundaryAlignFragment(AlignBoundary));
+
+ // Update the maximum alignment on the current section if necessary.
+ MCSection *Sec = OS.getCurrentSectionOnly();
+ if (AlignBoundary.value() > Sec->getAlignment())
+ Sec->setAlignment(AlignBoundary);
+}
+
Optional<MCFixupKind> X86AsmBackend::getFixupKind(StringRef Name) const {
if (STI.getTargetTriple().isOSBinFormatELF()) {
if (STI.getTargetTriple().getArch() == Triple::x86_64) {
@@ -256,12 +489,100 @@ Optional<MCFixupKind> X86AsmBackend::getFixupKind(StringRef Name) const {
return MCAsmBackend::getFixupKind(Name);
}
+const MCFixupKindInfo &X86AsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
+ const static MCFixupKindInfo Infos[X86::NumTargetFixupKinds] = {
+ {"reloc_riprel_4byte", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"reloc_riprel_4byte_movq_load", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"reloc_riprel_4byte_relax", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"reloc_riprel_4byte_relax_rex", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"reloc_signed_4byte", 0, 32, 0},
+ {"reloc_signed_4byte_relax", 0, 32, 0},
+ {"reloc_global_offset_table", 0, 32, 0},
+ {"reloc_global_offset_table8", 0, 64, 0},
+ {"reloc_branch_4byte_pcrel", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ };
+
+ if (Kind < FirstTargetFixupKind)
+ return MCAsmBackend::getFixupKindInfo(Kind);
+
+ assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
+ "Invalid kind!");
+ assert(Infos[Kind - FirstTargetFixupKind].Name && "Empty fixup name!");
+ return Infos[Kind - FirstTargetFixupKind];
+}
+
bool X86AsmBackend::shouldForceRelocation(const MCAssembler &,
const MCFixup &Fixup,
const MCValue &) {
return Fixup.getKind() == FK_NONE;
}
+static unsigned getFixupKindSize(unsigned Kind) {
+ switch (Kind) {
+ default:
+ llvm_unreachable("invalid fixup kind!");
+ case FK_NONE:
+ return 0;
+ case FK_PCRel_1:
+ case FK_SecRel_1:
+ case FK_Data_1:
+ return 1;
+ case FK_PCRel_2:
+ case FK_SecRel_2:
+ case FK_Data_2:
+ return 2;
+ case FK_PCRel_4:
+ case X86::reloc_riprel_4byte:
+ case X86::reloc_riprel_4byte_relax:
+ case X86::reloc_riprel_4byte_relax_rex:
+ case X86::reloc_riprel_4byte_movq_load:
+ case X86::reloc_signed_4byte:
+ case X86::reloc_signed_4byte_relax:
+ case X86::reloc_global_offset_table:
+ case X86::reloc_branch_4byte_pcrel:
+ case FK_SecRel_4:
+ case FK_Data_4:
+ return 4;
+ case FK_PCRel_8:
+ case FK_SecRel_8:
+ case FK_Data_8:
+ case X86::reloc_global_offset_table8:
+ return 8;
+ }
+}
+
+void X86AsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
+ const MCValue &Target,
+ MutableArrayRef<char> Data,
+ uint64_t Value, bool IsResolved,
+ const MCSubtargetInfo *STI) const {
+ unsigned Size = getFixupKindSize(Fixup.getKind());
+
+ assert(Fixup.getOffset() + Size <= Data.size() && "Invalid fixup offset!");
+
+ int64_t SignedValue = static_cast<int64_t>(Value);
+ if ((Target.isAbsolute() || IsResolved) &&
+ getFixupKindInfo(Fixup.getKind()).Flags &
+ MCFixupKindInfo::FKF_IsPCRel) {
+ // check that PC relative fixup fits into the fixup size.
+ if (Size > 0 && !isIntN(Size * 8, SignedValue))
+ Asm.getContext().reportError(
+ Fixup.getLoc(), "value of " + Twine(SignedValue) +
+ " is too large for field of " + Twine(Size) +
+ ((Size == 1) ? " byte." : " bytes."));
+ } else {
+ // Check that uppper bits are either all zeros or all ones.
+ // Specifically ignore overflow/underflow as long as the leakage is
+ // limited to the lower bits. This is to remain compatible with
+ // other assemblers.
+ assert((Size == 0 || isIntN(Size * 8 + 1, SignedValue)) &&
+ "Value does not fit in the Fixup field");
+ }
+
+ for (unsigned i = 0; i != Size; ++i)
+ Data[Fixup.getOffset() + i] = uint8_t(Value >> (i * 8));
+}
+
bool X86AsmBackend::mayNeedRelaxation(const MCInst &Inst,
const MCSubtargetInfo &STI) const {
// Branches can always be relaxed in either mode.
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
index 6bd6c6cac7df..a4f8dd669e1e 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
@@ -101,6 +101,261 @@ namespace X86 {
COND_INVALID
};
+
+ // The classification for the first instruction in macro fusion.
+ enum class FirstMacroFusionInstKind {
+ // TEST
+ Test,
+ // CMP
+ Cmp,
+ // AND
+ And,
+ // ADD, SUB
+ AddSub,
+ // INC, DEC
+ IncDec,
+ // Not valid as a first macro fusion instruction
+ Invalid
+ };
+
+ enum class SecondMacroFusionInstKind {
+ // JA, JB and variants.
+ AB,
+ // JE, JL, JG and variants.
+ ELG,
+ // JS, JP, JO and variants
+ SPO,
+ // Not a fusible jump.
+ Invalid,
+ };
+
+ /// \returns the type of the first instruction in macro-fusion.
+ inline FirstMacroFusionInstKind
+ classifyFirstOpcodeInMacroFusion(unsigned Opcode) {
+ switch (Opcode) {
+ default:
+ return FirstMacroFusionInstKind::Invalid;
+ // TEST
+ case X86::TEST16i16:
+ case X86::TEST16mr:
+ case X86::TEST16ri:
+ case X86::TEST16rr:
+ case X86::TEST32i32:
+ case X86::TEST32mr:
+ case X86::TEST32ri:
+ case X86::TEST32rr:
+ case X86::TEST64i32:
+ case X86::TEST64mr:
+ case X86::TEST64ri32:
+ case X86::TEST64rr:
+ case X86::TEST8i8:
+ case X86::TEST8mr:
+ case X86::TEST8ri:
+ case X86::TEST8rr:
+ return FirstMacroFusionInstKind::Test;
+ case X86::AND16i16:
+ case X86::AND16ri:
+ case X86::AND16ri8:
+ case X86::AND16rm:
+ case X86::AND16rr:
+ case X86::AND16rr_REV:
+ case X86::AND32i32:
+ case X86::AND32ri:
+ case X86::AND32ri8:
+ case X86::AND32rm:
+ case X86::AND32rr:
+ case X86::AND32rr_REV:
+ case X86::AND64i32:
+ case X86::AND64ri32:
+ case X86::AND64ri8:
+ case X86::AND64rm:
+ case X86::AND64rr:
+ case X86::AND64rr_REV:
+ case X86::AND8i8:
+ case X86::AND8ri:
+ case X86::AND8ri8:
+ case X86::AND8rm:
+ case X86::AND8rr:
+ case X86::AND8rr_REV:
+ return FirstMacroFusionInstKind::And;
+ // CMP
+ case X86::CMP16i16:
+ case X86::CMP16mr:
+ case X86::CMP16ri:
+ case X86::CMP16ri8:
+ case X86::CMP16rm:
+ case X86::CMP16rr:
+ case X86::CMP16rr_REV:
+ case X86::CMP32i32:
+ case X86::CMP32mr:
+ case X86::CMP32ri:
+ case X86::CMP32ri8:
+ case X86::CMP32rm:
+ case X86::CMP32rr:
+ case X86::CMP32rr_REV:
+ case X86::CMP64i32:
+ case X86::CMP64mr:
+ case X86::CMP64ri32:
+ case X86::CMP64ri8:
+ case X86::CMP64rm:
+ case X86::CMP64rr:
+ case X86::CMP64rr_REV:
+ case X86::CMP8i8:
+ case X86::CMP8mr:
+ case X86::CMP8ri:
+ case X86::CMP8ri8:
+ case X86::CMP8rm:
+ case X86::CMP8rr:
+ case X86::CMP8rr_REV:
+ return FirstMacroFusionInstKind::Cmp;
+ // ADD
+ case X86::ADD16i16:
+ case X86::ADD16ri:
+ case X86::ADD16ri8:
+ case X86::ADD16rm:
+ case X86::ADD16rr:
+ case X86::ADD16rr_REV:
+ case X86::ADD32i32:
+ case X86::ADD32ri:
+ case X86::ADD32ri8:
+ case X86::ADD32rm:
+ case X86::ADD32rr:
+ case X86::ADD32rr_REV:
+ case X86::ADD64i32:
+ case X86::ADD64ri32:
+ case X86::ADD64ri8:
+ case X86::ADD64rm:
+ case X86::ADD64rr:
+ case X86::ADD64rr_REV:
+ case X86::ADD8i8:
+ case X86::ADD8ri:
+ case X86::ADD8ri8:
+ case X86::ADD8rm:
+ case X86::ADD8rr:
+ case X86::ADD8rr_REV:
+ // SUB
+ case X86::SUB16i16:
+ case X86::SUB16ri:
+ case X86::SUB16ri8:
+ case X86::SUB16rm:
+ case X86::SUB16rr:
+ case X86::SUB16rr_REV:
+ case X86::SUB32i32:
+ case X86::SUB32ri:
+ case X86::SUB32ri8:
+ case X86::SUB32rm:
+ case X86::SUB32rr:
+ case X86::SUB32rr_REV:
+ case X86::SUB64i32:
+ case X86::SUB64ri32:
+ case X86::SUB64ri8:
+ case X86::SUB64rm:
+ case X86::SUB64rr:
+ case X86::SUB64rr_REV:
+ case X86::SUB8i8:
+ case X86::SUB8ri:
+ case X86::SUB8ri8:
+ case X86::SUB8rm:
+ case X86::SUB8rr:
+ case X86::SUB8rr_REV:
+ return FirstMacroFusionInstKind::AddSub;
+ // INC
+ case X86::INC16r:
+ case X86::INC16r_alt:
+ case X86::INC32r:
+ case X86::INC32r_alt:
+ case X86::INC64r:
+ case X86::INC8r:
+ // DEC
+ case X86::DEC16r:
+ case X86::DEC16r_alt:
+ case X86::DEC32r:
+ case X86::DEC32r_alt:
+ case X86::DEC64r:
+ case X86::DEC8r:
+ return FirstMacroFusionInstKind::IncDec;
+ }
+ }
+
+ /// \returns the type of the second instruction in macro-fusion.
+ inline SecondMacroFusionInstKind
+ classifySecondCondCodeInMacroFusion(X86::CondCode CC) {
+ if (CC == X86::COND_INVALID)
+ return SecondMacroFusionInstKind::Invalid;
+
+ switch (CC) {
+ default:
+ return SecondMacroFusionInstKind::Invalid;
+ // JE,JZ
+ case X86::COND_E:
+ // JNE,JNZ
+ case X86::COND_NE:
+ // JL,JNGE
+ case X86::COND_L:
+ // JLE,JNG
+ case X86::COND_LE:
+ // JG,JNLE
+ case X86::COND_G:
+ // JGE,JNL
+ case X86::COND_GE:
+ return SecondMacroFusionInstKind::ELG;
+ // JB,JC
+ case X86::COND_B:
+ // JNA,JBE
+ case X86::COND_BE:
+ // JA,JNBE
+ case X86::COND_A:
+ // JAE,JNC,JNB
+ case X86::COND_AE:
+ return SecondMacroFusionInstKind::AB;
+ // JS
+ case X86::COND_S:
+ // JNS
+ case X86::COND_NS:
+ // JP,JPE
+ case X86::COND_P:
+ // JNP,JPO
+ case X86::COND_NP:
+ // JO
+ case X86::COND_O:
+ // JNO
+ case X86::COND_NO:
+ return SecondMacroFusionInstKind::SPO;
+ }
+ }
+
+ /// \param FirstKind kind of the first instruction in macro fusion.
+ /// \param SecondKind kind of the second instruction in macro fusion.
+ ///
+ /// \returns true if the two instruction can be macro fused.
+ inline bool isMacroFused(FirstMacroFusionInstKind FirstKind,
+ SecondMacroFusionInstKind SecondKind) {
+ switch (FirstKind) {
+ case X86::FirstMacroFusionInstKind::Test:
+ case X86::FirstMacroFusionInstKind::And:
+ return true;
+ case X86::FirstMacroFusionInstKind::Cmp:
+ case X86::FirstMacroFusionInstKind::AddSub:
+ return SecondKind == X86::SecondMacroFusionInstKind::AB ||
+ SecondKind == X86::SecondMacroFusionInstKind::ELG;
+ case X86::FirstMacroFusionInstKind::IncDec:
+ return SecondKind == X86::SecondMacroFusionInstKind::ELG;
+ case X86::FirstMacroFusionInstKind::Invalid:
+ return false;
+ }
+ llvm_unreachable("unknown fusion type");
+ }
+
+ /// Defines the possible values of the branch boundary alignment mask.
+ enum AlignBranchBoundaryKind : uint8_t {
+ AlignBranchNone = 0,
+ AlignBranchFused = 1U << 0,
+ AlignBranchJcc = 1U << 1,
+ AlignBranchJmp = 1U << 2,
+ AlignBranchCall = 1U << 3,
+ AlignBranchRet = 1U << 4,
+ AlignBranchIndirect = 1U << 5
+ };
} // end namespace X86;
/// X86II - This namespace holds all of the target specific flags that
@@ -645,9 +900,8 @@ namespace X86II {
NOTRACK = 1ULL << NoTrackShift
};
- // getBaseOpcodeFor - This function returns the "base" X86 opcode for the
- // specified machine instruction.
- //
+ /// \returns the "base" X86 opcode for the specified machine
+ /// instruction.
inline uint8_t getBaseOpcodeFor(uint64_t TSFlags) {
return TSFlags >> X86II::OpcodeShift;
}
@@ -656,8 +910,8 @@ namespace X86II {
return (TSFlags & X86II::ImmMask) != 0;
}
- /// getSizeOfImm - Decode the "size of immediate" field from the TSFlags field
- /// of the specified instruction.
+ /// Decode the "size of immediate" field from the TSFlags field of the
+ /// specified instruction.
inline unsigned getSizeOfImm(uint64_t TSFlags) {
switch (TSFlags & X86II::ImmMask) {
default: llvm_unreachable("Unknown immediate size");
@@ -673,9 +927,9 @@ namespace X86II {
}
}
- /// isImmPCRel - Return true if the immediate of the specified instruction's
- /// TSFlags indicates that it is pc relative.
- inline unsigned isImmPCRel(uint64_t TSFlags) {
+ /// \returns true if the immediate of the specified instruction's TSFlags
+ /// indicates that it is pc relative.
+ inline bool isImmPCRel(uint64_t TSFlags) {
switch (TSFlags & X86II::ImmMask) {
default: llvm_unreachable("Unknown immediate size");
case X86II::Imm8PCRel:
@@ -692,9 +946,9 @@ namespace X86II {
}
}
- /// isImmSigned - Return true if the immediate of the specified instruction's
+ /// \returns true if the immediate of the specified instruction's
/// TSFlags indicates that it is signed.
- inline unsigned isImmSigned(uint64_t TSFlags) {
+ inline bool isImmSigned(uint64_t TSFlags) {
switch (TSFlags & X86II::ImmMask) {
default: llvm_unreachable("Unknown immediate signedness");
case X86II::Imm32S:
@@ -711,8 +965,8 @@ namespace X86II {
}
}
- /// getOperandBias - compute whether all of the def operands are repeated
- /// in the uses and therefore should be skipped.
+ /// Compute whether all of the def operands are repeated in the uses and
+ /// therefore should be skipped.
/// This determines the start of the unique operand list. We need to determine
/// if all of the defs have a corresponding tied operand in the uses.
/// Unfortunately, the tied operand information is encoded in the uses not
@@ -750,8 +1004,8 @@ namespace X86II {
}
}
- /// getMemoryOperandNo - The function returns the MCInst operand # for the
- /// first field of the memory operand. If the instruction doesn't have a
+ /// The function returns the MCInst operand # for the first field of the
+ /// memory operand. If the instruction doesn't have a
/// memory operand, this returns -1.
///
/// Note that this ignores tied operands. If there is a tied register which
@@ -837,8 +1091,8 @@ namespace X86II {
}
}
- /// isX86_64ExtendedReg - Is the MachineOperand a x86-64 extended (r8 or
- /// higher) register? e.g. r8, xmm8, xmm13, etc.
+ /// \returns true if the MachineOperand is a x86-64 extended (r8 or
+ /// higher) register, e.g. r8, xmm8, xmm13, etc.
inline bool isX86_64ExtendedReg(unsigned RegNo) {
if ((RegNo >= X86::XMM8 && RegNo <= X86::XMM31) ||
(RegNo >= X86::YMM8 && RegNo <= X86::YMM31) ||
@@ -864,8 +1118,8 @@ namespace X86II {
return false;
}
- /// is32ExtendedReg - Is the MemoryOperand a 32 extended (zmm16 or higher)
- /// registers? e.g. zmm21, etc.
+ /// \returns true if the MemoryOperand is a 32 extended (zmm16 or higher)
+ /// registers, e.g. zmm21, etc.
static inline bool is32ExtendedReg(unsigned RegNo) {
return ((RegNo >= X86::XMM16 && RegNo <= X86::XMM31) ||
(RegNo >= X86::YMM16 && RegNo <= X86::YMM31) ||
@@ -878,12 +1132,12 @@ namespace X86II {
reg == X86::SIL || reg == X86::DIL);
}
- /// isKMasked - Is this a masked instruction.
+ /// \returns true if this is a masked instruction.
inline bool isKMasked(uint64_t TSFlags) {
return (TSFlags & X86II::EVEX_K) != 0;
}
- /// isKMergedMasked - Is this a merge masked instruction.
+ /// \returns true if this is a merge masked instruction.
inline bool isKMergeMasked(uint64_t TSFlags) {
return isKMasked(TSFlags) && (TSFlags & X86II::EVEX_Z) == 0;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp
index ea28bef42569..f4bb0fbf62cd 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp
@@ -36,9 +36,9 @@ void X86IntelInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
OS << getRegisterName(RegNo);
}
-void X86IntelInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
- StringRef Annot,
- const MCSubtargetInfo &STI) {
+void X86IntelInstPrinter::printInst(const MCInst *MI, uint64_t Address,
+ StringRef Annot, const MCSubtargetInfo &STI,
+ raw_ostream &OS) {
printInstFlags(MI, OS);
// In 16-bit mode, print data16 as data32.
@@ -47,7 +47,7 @@ void X86IntelInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
OS << "\tdata32";
} else if (!printAliasInstr(MI, OS) &&
!printVecCompareInstr(MI, OS))
- printInstruction(MI, OS);
+ printInstruction(MI, Address, OS);
// Next always print the annotation.
printAnnotation(OS, Annot);
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.h b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.h
index f32f49f7c417..b409b20cbea8 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.h
+++ b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.h
@@ -25,8 +25,8 @@ public:
: X86InstPrinterCommon(MAI, MII, MRI) {}
void printRegName(raw_ostream &OS, unsigned RegNo) const override;
- void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot,
- const MCSubtargetInfo &STI) override;
+ void printInst(const MCInst *MI, uint64_t Address, StringRef Annot,
+ const MCSubtargetInfo &STI, raw_ostream &OS) override;
bool printVecCompareInstr(const MCInst *MI, raw_ostream &OS);
// Autogenerated by tblgen, returns true if we successfully printed an
@@ -36,7 +36,7 @@ public:
unsigned PrintMethodIdx, raw_ostream &O);
// Autogenerated by tblgen.
- void printInstruction(const MCInst *MI, raw_ostream &O);
+ void printInstruction(const MCInst *MI, uint64_t Address, raw_ostream &O);
static const char *getRegisterName(unsigned RegNo);
void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) override;
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
index ac36bf3a12fa..54a293702bd0 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
@@ -42,91 +42,68 @@ class X86MCCodeEmitter : public MCCodeEmitter {
public:
X86MCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx)
- : MCII(mcii), Ctx(ctx) {
- }
+ : MCII(mcii), Ctx(ctx) {}
X86MCCodeEmitter(const X86MCCodeEmitter &) = delete;
X86MCCodeEmitter &operator=(const X86MCCodeEmitter &) = delete;
~X86MCCodeEmitter() override = default;
- bool is64BitMode(const MCSubtargetInfo &STI) const {
- return STI.getFeatureBits()[X86::Mode64Bit];
- }
-
- bool is32BitMode(const MCSubtargetInfo &STI) const {
- return STI.getFeatureBits()[X86::Mode32Bit];
- }
-
- bool is16BitMode(const MCSubtargetInfo &STI) const {
- return STI.getFeatureBits()[X86::Mode16Bit];
- }
-
- /// Is16BitMemOperand - Return true if the specified instruction has
- /// a 16-bit memory operand. Op specifies the operand # of the memoperand.
- bool Is16BitMemOperand(const MCInst &MI, unsigned Op,
- const MCSubtargetInfo &STI) const {
- const MCOperand &BaseReg = MI.getOperand(Op+X86::AddrBaseReg);
- const MCOperand &IndexReg = MI.getOperand(Op+X86::AddrIndexReg);
- const MCOperand &Disp = MI.getOperand(Op+X86::AddrDisp);
+ void emitPrefix(const MCInst &MI, raw_ostream &OS,
+ const MCSubtargetInfo &STI) const override;
- if (is16BitMode(STI) && BaseReg.getReg() == 0 &&
- Disp.isImm() && Disp.getImm() < 0x10000)
- return true;
- if ((BaseReg.getReg() != 0 &&
- X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg.getReg())) ||
- (IndexReg.getReg() != 0 &&
- X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg.getReg())))
- return true;
- return false;
- }
+ void encodeInstruction(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const override;
- unsigned GetX86RegNum(const MCOperand &MO) const {
+private:
+ unsigned getX86RegNum(const MCOperand &MO) const {
return Ctx.getRegisterInfo()->getEncodingValue(MO.getReg()) & 0x7;
}
unsigned getX86RegEncoding(const MCInst &MI, unsigned OpNum) const {
return Ctx.getRegisterInfo()->getEncodingValue(
- MI.getOperand(OpNum).getReg());
+ MI.getOperand(OpNum).getReg());
}
- // Does this register require a bit to be set in REX prefix.
+ /// \param MI a single low-level machine instruction.
+ /// \param OpNum the operand #.
+ /// \returns true if the OpNumth operand of MI require a bit to be set in
+ /// REX prefix.
bool isREXExtendedReg(const MCInst &MI, unsigned OpNum) const {
return (getX86RegEncoding(MI, OpNum) >> 3) & 1;
}
- void EmitByte(uint8_t C, unsigned &CurByte, raw_ostream &OS) const {
+ void emitByte(uint8_t C, unsigned &CurByte, raw_ostream &OS) const {
OS << (char)C;
++CurByte;
}
- void EmitConstant(uint64_t Val, unsigned Size, unsigned &CurByte,
+ void emitConstant(uint64_t Val, unsigned Size, unsigned &CurByte,
raw_ostream &OS) const {
// Output the constant in little endian byte order.
for (unsigned i = 0; i != Size; ++i) {
- EmitByte(Val & 255, CurByte, OS);
+ emitByte(Val & 255, CurByte, OS);
Val >>= 8;
}
}
- void EmitImmediate(const MCOperand &Disp, SMLoc Loc,
- unsigned ImmSize, MCFixupKind FixupKind,
- unsigned &CurByte, raw_ostream &OS,
- SmallVectorImpl<MCFixup> &Fixups,
- int ImmOffset = 0) const;
+ void emitImmediate(const MCOperand &Disp, SMLoc Loc, unsigned ImmSize,
+ MCFixupKind FixupKind, unsigned &CurByte, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups, int ImmOffset = 0) const;
- static uint8_t ModRMByte(unsigned Mod, unsigned RegOpcode, unsigned RM) {
+ static uint8_t modRMByte(unsigned Mod, unsigned RegOpcode, unsigned RM) {
assert(Mod < 4 && RegOpcode < 8 && RM < 8 && "ModRM Fields out of range!");
return RM | (RegOpcode << 3) | (Mod << 6);
}
- void EmitRegModRMByte(const MCOperand &ModRMReg, unsigned RegOpcodeFld,
+ void emitRegModRMByte(const MCOperand &ModRMReg, unsigned RegOpcodeFld,
unsigned &CurByte, raw_ostream &OS) const {
- EmitByte(ModRMByte(3, RegOpcodeFld, GetX86RegNum(ModRMReg)), CurByte, OS);
+ emitByte(modRMByte(3, RegOpcodeFld, getX86RegNum(ModRMReg)), CurByte, OS);
}
- void EmitSIBByte(unsigned SS, unsigned Index, unsigned Base,
+ void emitSIBByte(unsigned SS, unsigned Index, unsigned Base,
unsigned &CurByte, raw_ostream &OS) const {
- // SIB byte is in the same format as the ModRMByte.
- EmitByte(ModRMByte(SS, Index, Base), CurByte, OS);
+ // SIB byte is in the same format as the modRMByte.
+ emitByte(modRMByte(SS, Index, Base), CurByte, OS);
}
void emitMemModRMByte(const MCInst &MI, unsigned Op, unsigned RegOpcodeField,
@@ -134,43 +111,39 @@ public:
raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
- void encodeInstruction(const MCInst &MI, raw_ostream &OS,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const override;
+ void emitPrefixImpl(uint64_t TSFlags, unsigned &CurOp, unsigned &CurByte,
+ bool &Rex, const MCInst &MI, const MCInstrDesc &Desc,
+ const MCSubtargetInfo &STI, raw_ostream &OS) const;
- void EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, int MemOperand,
+ void emitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, int MemOperand,
const MCInst &MI, const MCInstrDesc &Desc,
raw_ostream &OS) const;
- void EmitSegmentOverridePrefix(unsigned &CurByte, unsigned SegOperand,
+ void emitSegmentOverridePrefix(unsigned &CurByte, unsigned SegOperand,
const MCInst &MI, raw_ostream &OS) const;
bool emitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, int MemOperand,
const MCInst &MI, const MCInstrDesc &Desc,
const MCSubtargetInfo &STI, raw_ostream &OS) const;
- uint8_t DetermineREXPrefix(const MCInst &MI, uint64_t TSFlags,
- int MemOperand, const MCInstrDesc &Desc) const;
-
- bool isPCRel32Branch(const MCInst &MI) const;
+ uint8_t determineREXPrefix(const MCInst &MI, uint64_t TSFlags, int MemOperand,
+ const MCInstrDesc &Desc) const;
};
} // end anonymous namespace
-/// isDisp8 - Return true if this signed displacement fits in a 8-bit
-/// sign-extended field.
-static bool isDisp8(int Value) {
- return Value == (int8_t)Value;
-}
+/// \returns true if this signed displacement fits in a 8-bit sign-extended
+/// field.
+static bool isDisp8(int Value) { return Value == (int8_t)Value; }
-/// isCDisp8 - Return true if this signed displacement fits in a 8-bit
-/// compressed dispacement field.
-static bool isCDisp8(uint64_t TSFlags, int Value, int& CValue) {
+/// \returns true if this signed displacement fits in a 8-bit compressed
+/// dispacement field.
+static bool isCDisp8(uint64_t TSFlags, int Value, int &CValue) {
assert(((TSFlags & X86II::EncodingMask) == X86II::EVEX) &&
"Compressed 8-bit displacement is only valid for EVEX inst.");
unsigned CD8_Scale =
- (TSFlags & X86II::CD8_Scale_Mask) >> X86II::CD8_Scale_Shift;
+ (TSFlags & X86II::CD8_Scale_Mask) >> X86II::CD8_Scale_Shift;
if (CD8_Scale == 0) {
CValue = Value;
return isDisp8(Value);
@@ -188,26 +161,49 @@ static bool isCDisp8(uint64_t TSFlags, int Value, int& CValue) {
return Ret;
}
-/// getImmFixupKind - Return the appropriate fixup kind to use for an immediate
-/// in an instruction with the specified TSFlags.
+/// \returns the appropriate fixup kind to use for an immediate in an
+/// instruction with the specified TSFlags.
static MCFixupKind getImmFixupKind(uint64_t TSFlags) {
unsigned Size = X86II::getSizeOfImm(TSFlags);
bool isPCRel = X86II::isImmPCRel(TSFlags);
if (X86II::isImmSigned(TSFlags)) {
switch (Size) {
- default: llvm_unreachable("Unsupported signed fixup size!");
- case 4: return MCFixupKind(X86::reloc_signed_4byte);
+ default:
+ llvm_unreachable("Unsupported signed fixup size!");
+ case 4:
+ return MCFixupKind(X86::reloc_signed_4byte);
}
}
return MCFixup::getKindForSize(Size, isPCRel);
}
-/// Is32BitMemOperand - Return true if the specified instruction has
-/// a 32-bit memory operand. Op specifies the operand # of the memoperand.
-static bool Is32BitMemOperand(const MCInst &MI, unsigned Op) {
- const MCOperand &BaseReg = MI.getOperand(Op+X86::AddrBaseReg);
- const MCOperand &IndexReg = MI.getOperand(Op+X86::AddrIndexReg);
+/// \param Op operand # of the memory operand.
+///
+/// \returns true if the specified instruction has a 16-bit memory operand.
+static bool is16BitMemOperand(const MCInst &MI, unsigned Op,
+ const MCSubtargetInfo &STI) {
+ const MCOperand &BaseReg = MI.getOperand(Op + X86::AddrBaseReg);
+ const MCOperand &IndexReg = MI.getOperand(Op + X86::AddrIndexReg);
+ const MCOperand &Disp = MI.getOperand(Op + X86::AddrDisp);
+
+ if (STI.hasFeature(X86::Mode16Bit) && BaseReg.getReg() == 0 && Disp.isImm() &&
+ Disp.getImm() < 0x10000)
+ return true;
+ if ((BaseReg.getReg() != 0 &&
+ X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg.getReg())) ||
+ (IndexReg.getReg() != 0 &&
+ X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg.getReg())))
+ return true;
+ return false;
+}
+
+/// \param Op operand # of the memory operand.
+///
+/// \returns true if the specified instruction has a 32-bit memory operand.
+static bool is32BitMemOperand(const MCInst &MI, unsigned Op) {
+ const MCOperand &BaseReg = MI.getOperand(Op + X86::AddrBaseReg);
+ const MCOperand &IndexReg = MI.getOperand(Op + X86::AddrIndexReg);
if ((BaseReg.getReg() != 0 &&
X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg.getReg())) ||
@@ -223,12 +219,13 @@ static bool Is32BitMemOperand(const MCInst &MI, unsigned Op) {
return false;
}
-/// Is64BitMemOperand - Return true if the specified instruction has
-/// a 64-bit memory operand. Op specifies the operand # of the memoperand.
+/// \param Op operand # of the memory operand.
+///
+/// \returns true if the specified instruction has a 64-bit memory operand.
#ifndef NDEBUG
-static bool Is64BitMemOperand(const MCInst &MI, unsigned Op) {
- const MCOperand &BaseReg = MI.getOperand(Op+X86::AddrBaseReg);
- const MCOperand &IndexReg = MI.getOperand(Op+X86::AddrIndexReg);
+static bool is64BitMemOperand(const MCInst &MI, unsigned Op) {
+ const MCOperand &BaseReg = MI.getOperand(Op + X86::AddrBaseReg);
+ const MCOperand &IndexReg = MI.getOperand(Op + X86::AddrIndexReg);
if ((BaseReg.getReg() != 0 &&
X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg.getReg())) ||
@@ -239,19 +236,15 @@ static bool Is64BitMemOperand(const MCInst &MI, unsigned Op) {
}
#endif
-/// StartsWithGlobalOffsetTable - Check if this expression starts with
-/// _GLOBAL_OFFSET_TABLE_ and if it is of the form
-/// _GLOBAL_OFFSET_TABLE_-symbol. This is needed to support PIC on ELF
-/// i386 as _GLOBAL_OFFSET_TABLE_ is magical. We check only simple case that
-/// are know to be used: _GLOBAL_OFFSET_TABLE_ by itself or at the start
-/// of a binary expression.
-enum GlobalOffsetTableExprKind {
- GOT_None,
- GOT_Normal,
- GOT_SymDiff
-};
+enum GlobalOffsetTableExprKind { GOT_None, GOT_Normal, GOT_SymDiff };
+
+/// Check if this expression starts with _GLOBAL_OFFSET_TABLE_ and if it is
+/// of the form _GLOBAL_OFFSET_TABLE_-symbol. This is needed to support PIC on
+/// ELF i386 as _GLOBAL_OFFSET_TABLE_ is magical. We check only simple case that
+/// are know to be used: _GLOBAL_OFFSET_TABLE_ by itself or at the start of a
+/// binary expression.
static GlobalOffsetTableExprKind
-StartsWithGlobalOffsetTable(const MCExpr *Expr) {
+startsWithGlobalOffsetTable(const MCExpr *Expr) {
const MCExpr *RHS = nullptr;
if (Expr->getKind() == MCExpr::Binary) {
const MCBinaryExpr *BE = static_cast<const MCBinaryExpr *>(Expr);
@@ -262,7 +255,7 @@ StartsWithGlobalOffsetTable(const MCExpr *Expr) {
if (Expr->getKind() != MCExpr::SymbolRef)
return GOT_None;
- const MCSymbolRefExpr *Ref = static_cast<const MCSymbolRefExpr*>(Expr);
+ const MCSymbolRefExpr *Ref = static_cast<const MCSymbolRefExpr *>(Expr);
const MCSymbol &S = Ref->getSymbol();
if (S.getName() != "_GLOBAL_OFFSET_TABLE_")
return GOT_None;
@@ -271,15 +264,15 @@ StartsWithGlobalOffsetTable(const MCExpr *Expr) {
return GOT_Normal;
}
-static bool HasSecRelSymbolRef(const MCExpr *Expr) {
+static bool hasSecRelSymbolRef(const MCExpr *Expr) {
if (Expr->getKind() == MCExpr::SymbolRef) {
- const MCSymbolRefExpr *Ref = static_cast<const MCSymbolRefExpr*>(Expr);
+ const MCSymbolRefExpr *Ref = static_cast<const MCSymbolRefExpr *>(Expr);
return Ref->getKind() == MCSymbolRefExpr::VK_SECREL;
}
return false;
}
-bool X86MCCodeEmitter::isPCRel32Branch(const MCInst &MI) const {
+static bool isPCRel32Branch(const MCInst &MI, const MCInstrInfo &MCII) {
unsigned Opcode = MI.getOpcode();
const MCInstrDesc &Desc = MCII.get(Opcode);
if ((Opcode != X86::CALL64pcrel32 && Opcode != X86::JMP_4) ||
@@ -295,18 +288,18 @@ bool X86MCCodeEmitter::isPCRel32Branch(const MCInst &MI) const {
return Ref && Ref->getKind() == MCSymbolRefExpr::VK_None;
}
-void X86MCCodeEmitter::
-EmitImmediate(const MCOperand &DispOp, SMLoc Loc, unsigned Size,
- MCFixupKind FixupKind, unsigned &CurByte, raw_ostream &OS,
- SmallVectorImpl<MCFixup> &Fixups, int ImmOffset) const {
+void X86MCCodeEmitter::emitImmediate(const MCOperand &DispOp, SMLoc Loc,
+ unsigned Size, MCFixupKind FixupKind,
+ unsigned &CurByte, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups,
+ int ImmOffset) const {
const MCExpr *Expr = nullptr;
if (DispOp.isImm()) {
// If this is a simple integer displacement that doesn't require a
// relocation, emit it now.
- if (FixupKind != FK_PCRel_1 &&
- FixupKind != FK_PCRel_2 &&
+ if (FixupKind != FK_PCRel_1 && FixupKind != FK_PCRel_2 &&
FixupKind != FK_PCRel_4) {
- EmitConstant(DispOp.getImm()+ImmOffset, Size, CurByte, OS);
+ emitConstant(DispOp.getImm() + ImmOffset, Size, CurByte, OS);
return;
}
Expr = MCConstantExpr::create(DispOp.getImm(), Ctx);
@@ -315,10 +308,9 @@ EmitImmediate(const MCOperand &DispOp, SMLoc Loc, unsigned Size,
}
// If we have an immoffset, add it to the expression.
- if ((FixupKind == FK_Data_4 ||
- FixupKind == FK_Data_8 ||
+ if ((FixupKind == FK_Data_4 || FixupKind == FK_Data_8 ||
FixupKind == MCFixupKind(X86::reloc_signed_4byte))) {
- GlobalOffsetTableExprKind Kind = StartsWithGlobalOffsetTable(Expr);
+ GlobalOffsetTableExprKind Kind = startsWithGlobalOffsetTable(Expr);
if (Kind != GOT_None) {
assert(ImmOffset == 0);
@@ -332,13 +324,13 @@ EmitImmediate(const MCOperand &DispOp, SMLoc Loc, unsigned Size,
if (Kind == GOT_Normal)
ImmOffset = CurByte;
} else if (Expr->getKind() == MCExpr::SymbolRef) {
- if (HasSecRelSymbolRef(Expr)) {
+ if (hasSecRelSymbolRef(Expr)) {
FixupKind = MCFixupKind(FK_SecRel_4);
}
} else if (Expr->getKind() == MCExpr::Binary) {
- const MCBinaryExpr *Bin = static_cast<const MCBinaryExpr*>(Expr);
- if (HasSecRelSymbolRef(Bin->getLHS())
- || HasSecRelSymbolRef(Bin->getRHS())) {
+ const MCBinaryExpr *Bin = static_cast<const MCBinaryExpr *>(Expr);
+ if (hasSecRelSymbolRef(Bin->getLHS()) ||
+ hasSecRelSymbolRef(Bin->getRHS())) {
FixupKind = MCFixupKind(FK_SecRel_4);
}
}
@@ -356,7 +348,7 @@ EmitImmediate(const MCOperand &DispOp, SMLoc Loc, unsigned Size,
// If this is a pc-relative load off _GLOBAL_OFFSET_TABLE_:
// leaq _GLOBAL_OFFSET_TABLE_(%rip), %r15
// this needs to be a GOTPC32 relocation.
- if (StartsWithGlobalOffsetTable(Expr) != GOT_None)
+ if (startsWithGlobalOffsetTable(Expr) != GOT_None)
FixupKind = MCFixupKind(X86::reloc_global_offset_table);
}
if (FixupKind == FK_PCRel_2)
@@ -370,7 +362,7 @@ EmitImmediate(const MCOperand &DispOp, SMLoc Loc, unsigned Size,
// Emit a symbolic constant as a fixup and 4 zeros.
Fixups.push_back(MCFixup::create(CurByte, Expr, FixupKind, Loc));
- EmitConstant(0, Size, CurByte, OS);
+ emitConstant(0, Size, CurByte, OS);
}
void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op,
@@ -379,19 +371,20 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op,
unsigned &CurByte, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
- const MCOperand &Disp = MI.getOperand(Op+X86::AddrDisp);
- const MCOperand &Base = MI.getOperand(Op+X86::AddrBaseReg);
- const MCOperand &Scale = MI.getOperand(Op+X86::AddrScaleAmt);
- const MCOperand &IndexReg = MI.getOperand(Op+X86::AddrIndexReg);
+ const MCOperand &Disp = MI.getOperand(Op + X86::AddrDisp);
+ const MCOperand &Base = MI.getOperand(Op + X86::AddrBaseReg);
+ const MCOperand &Scale = MI.getOperand(Op + X86::AddrScaleAmt);
+ const MCOperand &IndexReg = MI.getOperand(Op + X86::AddrIndexReg);
unsigned BaseReg = Base.getReg();
bool HasEVEX = (TSFlags & X86II::EncodingMask) == X86II::EVEX;
// Handle %rip relative addressing.
if (BaseReg == X86::RIP ||
- BaseReg == X86::EIP) { // [disp32+rIP] in X86-64 mode
- assert(is64BitMode(STI) && "Rip-relative addressing requires 64-bit mode");
+ BaseReg == X86::EIP) { // [disp32+rIP] in X86-64 mode
+ assert(STI.hasFeature(X86::Mode64Bit) &&
+ "Rip-relative addressing requires 64-bit mode");
assert(IndexReg.getReg() == 0 && "Invalid rip-relative address");
- EmitByte(ModRMByte(0, RegOpcodeField, 5), CurByte, OS);
+ emitByte(modRMByte(0, RegOpcodeField, 5), CurByte, OS);
unsigned Opcode = MI.getOpcode();
// movq loads are handled with a special relocation form which allows the
@@ -432,20 +425,20 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op,
? X86II::getSizeOfImm(TSFlags)
: 0;
- EmitImmediate(Disp, MI.getLoc(), 4, MCFixupKind(FixupKind),
- CurByte, OS, Fixups, -ImmSize);
+ emitImmediate(Disp, MI.getLoc(), 4, MCFixupKind(FixupKind), CurByte, OS,
+ Fixups, -ImmSize);
return;
}
- unsigned BaseRegNo = BaseReg ? GetX86RegNum(Base) : -1U;
+ unsigned BaseRegNo = BaseReg ? getX86RegNum(Base) : -1U;
// 16-bit addressing forms of the ModR/M byte have a different encoding for
// the R/M field and are far more limited in which registers can be used.
- if (Is16BitMemOperand(MI, Op, STI)) {
+ if (is16BitMemOperand(MI, Op, STI)) {
if (BaseReg) {
// For 32-bit addressing, the row and column values in Table 2-2 are
// basically the same. It's AX/CX/DX/BX/SP/BP/SI/DI in that order, with
- // some special cases. And GetX86RegNum reflects that numbering.
+ // some special cases. And getX86RegNum reflects that numbering.
// For 16-bit addressing it's more fun, as shown in the SDM Vol 2A,
// Table 2-1 "16-Bit Addressing Forms with the ModR/M byte". We can only
// use SI/DI/BP/BX, which have "row" values 4-7 in no particular order,
@@ -454,13 +447,13 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op,
//
// R16Table[] is a lookup from the normal RegNo, to the row values from
// Table 2-1 for 16-bit addressing modes. Where zero means disallowed.
- static const unsigned R16Table[] = { 0, 0, 0, 7, 0, 6, 4, 5 };
+ static const unsigned R16Table[] = {0, 0, 0, 7, 0, 6, 4, 5};
unsigned RMfield = R16Table[BaseRegNo];
assert(RMfield && "invalid 16-bit base register");
if (IndexReg.getReg()) {
- unsigned IndexReg16 = R16Table[GetX86RegNum(IndexReg)];
+ unsigned IndexReg16 = R16Table[getX86RegNum(IndexReg)];
assert(IndexReg16 && "invalid 16-bit index register");
// We must have one of SI/DI (4,5), and one of BP/BX (6,7).
@@ -479,23 +472,23 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op,
if (Disp.isImm() && isDisp8(Disp.getImm())) {
if (Disp.getImm() == 0 && RMfield != 6) {
// There is no displacement; just the register.
- EmitByte(ModRMByte(0, RegOpcodeField, RMfield), CurByte, OS);
+ emitByte(modRMByte(0, RegOpcodeField, RMfield), CurByte, OS);
return;
}
// Use the [REG]+disp8 form, including for [BP] which cannot be encoded.
- EmitByte(ModRMByte(1, RegOpcodeField, RMfield), CurByte, OS);
- EmitImmediate(Disp, MI.getLoc(), 1, FK_Data_1, CurByte, OS, Fixups);
+ emitByte(modRMByte(1, RegOpcodeField, RMfield), CurByte, OS);
+ emitImmediate(Disp, MI.getLoc(), 1, FK_Data_1, CurByte, OS, Fixups);
return;
}
// This is the [REG]+disp16 case.
- EmitByte(ModRMByte(2, RegOpcodeField, RMfield), CurByte, OS);
+ emitByte(modRMByte(2, RegOpcodeField, RMfield), CurByte, OS);
} else {
// There is no BaseReg; this is the plain [disp16] case.
- EmitByte(ModRMByte(0, RegOpcodeField, 6), CurByte, OS);
+ emitByte(modRMByte(0, RegOpcodeField, 6), CurByte, OS);
}
// Emit 16-bit displacement for plain disp16 or [REG]+disp16 cases.
- EmitImmediate(Disp, MI.getLoc(), 2, FK_Data_2, CurByte, OS, Fixups);
+ emitImmediate(Disp, MI.getLoc(), 2, FK_Data_2, CurByte, OS, Fixups);
return;
}
@@ -504,7 +497,7 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op,
// resolve addresses on-the-fly, otherwise use SIB (Intel Manual 2A, table
// 2-7) and absolute references.
- if (// The SIB byte must be used if there is an index register.
+ if ( // The SIB byte must be used if there is an index register.
IndexReg.getReg() == 0 &&
// The SIB byte must be used if the base is ESP/RSP/R12, all of which
// encode to an R/M value of 4, which indicates that a SIB byte is
@@ -512,11 +505,11 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op,
BaseRegNo != N86::ESP &&
// If there is no base register and we're in 64-bit mode, we need a SIB
// byte to emit an addr that is just 'disp32' (the non-RIP relative form).
- (!is64BitMode(STI) || BaseReg != 0)) {
+ (!STI.hasFeature(X86::Mode64Bit) || BaseReg != 0)) {
- if (BaseReg == 0) { // [disp32] in X86-32 mode
- EmitByte(ModRMByte(0, RegOpcodeField, 5), CurByte, OS);
- EmitImmediate(Disp, MI.getLoc(), 4, FK_Data_4, CurByte, OS, Fixups);
+ if (BaseReg == 0) { // [disp32] in X86-32 mode
+ emitByte(modRMByte(0, RegOpcodeField, 5), CurByte, OS);
+ emitImmediate(Disp, MI.getLoc(), 4, FK_Data_4, CurByte, OS, Fixups);
return;
}
@@ -526,7 +519,7 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op,
// by emitting a displacement of 0 below.
if (BaseRegNo != N86::EBP) {
if (Disp.isImm() && Disp.getImm() == 0) {
- EmitByte(ModRMByte(0, RegOpcodeField, BaseRegNo), CurByte, OS);
+ emitByte(modRMByte(0, RegOpcodeField, BaseRegNo), CurByte, OS);
return;
}
@@ -537,7 +530,7 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op,
// This is exclusively used by call *a@tlscall(base). The relocation
// (R_386_TLSCALL or R_X86_64_TLSCALL) applies to the beginning.
Fixups.push_back(MCFixup::create(0, Sym, FK_NONE, MI.getLoc()));
- EmitByte(ModRMByte(0, RegOpcodeField, BaseRegNo), CurByte, OS);
+ emitByte(modRMByte(0, RegOpcodeField, BaseRegNo), CurByte, OS);
return;
}
}
@@ -546,70 +539,70 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op,
// Otherwise, if the displacement fits in a byte, encode as [REG+disp8].
if (Disp.isImm()) {
if (!HasEVEX && isDisp8(Disp.getImm())) {
- EmitByte(ModRMByte(1, RegOpcodeField, BaseRegNo), CurByte, OS);
- EmitImmediate(Disp, MI.getLoc(), 1, FK_Data_1, CurByte, OS, Fixups);
+ emitByte(modRMByte(1, RegOpcodeField, BaseRegNo), CurByte, OS);
+ emitImmediate(Disp, MI.getLoc(), 1, FK_Data_1, CurByte, OS, Fixups);
return;
}
// Try EVEX compressed 8-bit displacement first; if failed, fall back to
// 32-bit displacement.
int CDisp8 = 0;
if (HasEVEX && isCDisp8(TSFlags, Disp.getImm(), CDisp8)) {
- EmitByte(ModRMByte(1, RegOpcodeField, BaseRegNo), CurByte, OS);
- EmitImmediate(Disp, MI.getLoc(), 1, FK_Data_1, CurByte, OS, Fixups,
+ emitByte(modRMByte(1, RegOpcodeField, BaseRegNo), CurByte, OS);
+ emitImmediate(Disp, MI.getLoc(), 1, FK_Data_1, CurByte, OS, Fixups,
CDisp8 - Disp.getImm());
return;
}
}
// Otherwise, emit the most general non-SIB encoding: [REG+disp32]
- EmitByte(ModRMByte(2, RegOpcodeField, BaseRegNo), CurByte, OS);
+ emitByte(modRMByte(2, RegOpcodeField, BaseRegNo), CurByte, OS);
unsigned Opcode = MI.getOpcode();
unsigned FixupKind = Opcode == X86::MOV32rm ? X86::reloc_signed_4byte_relax
: X86::reloc_signed_4byte;
- EmitImmediate(Disp, MI.getLoc(), 4, MCFixupKind(FixupKind), CurByte, OS,
+ emitImmediate(Disp, MI.getLoc(), 4, MCFixupKind(FixupKind), CurByte, OS,
Fixups);
return;
}
// We need a SIB byte, so start by outputting the ModR/M byte first
- assert(IndexReg.getReg() != X86::ESP &&
- IndexReg.getReg() != X86::RSP && "Cannot use ESP as index reg!");
+ assert(IndexReg.getReg() != X86::ESP && IndexReg.getReg() != X86::RSP &&
+ "Cannot use ESP as index reg!");
bool ForceDisp32 = false;
- bool ForceDisp8 = false;
+ bool ForceDisp8 = false;
int CDisp8 = 0;
int ImmOffset = 0;
if (BaseReg == 0) {
// If there is no base register, we emit the special case SIB byte with
// MOD=0, BASE=5, to JUST get the index, scale, and displacement.
- EmitByte(ModRMByte(0, RegOpcodeField, 4), CurByte, OS);
+ emitByte(modRMByte(0, RegOpcodeField, 4), CurByte, OS);
ForceDisp32 = true;
} else if (!Disp.isImm()) {
// Emit the normal disp32 encoding.
- EmitByte(ModRMByte(2, RegOpcodeField, 4), CurByte, OS);
+ emitByte(modRMByte(2, RegOpcodeField, 4), CurByte, OS);
ForceDisp32 = true;
} else if (Disp.getImm() == 0 &&
// Base reg can't be anything that ends up with '5' as the base
// reg, it is the magic [*] nomenclature that indicates no base.
BaseRegNo != N86::EBP) {
// Emit no displacement ModR/M byte
- EmitByte(ModRMByte(0, RegOpcodeField, 4), CurByte, OS);
+ emitByte(modRMByte(0, RegOpcodeField, 4), CurByte, OS);
} else if (!HasEVEX && isDisp8(Disp.getImm())) {
// Emit the disp8 encoding.
- EmitByte(ModRMByte(1, RegOpcodeField, 4), CurByte, OS);
- ForceDisp8 = true; // Make sure to force 8 bit disp if Base=EBP
+ emitByte(modRMByte(1, RegOpcodeField, 4), CurByte, OS);
+ ForceDisp8 = true; // Make sure to force 8 bit disp if Base=EBP
} else if (HasEVEX && isCDisp8(TSFlags, Disp.getImm(), CDisp8)) {
// Emit the disp8 encoding.
- EmitByte(ModRMByte(1, RegOpcodeField, 4), CurByte, OS);
- ForceDisp8 = true; // Make sure to force 8 bit disp if Base=EBP
+ emitByte(modRMByte(1, RegOpcodeField, 4), CurByte, OS);
+ ForceDisp8 = true; // Make sure to force 8 bit disp if Base=EBP
ImmOffset = CDisp8 - Disp.getImm();
} else {
// Emit the normal disp32 encoding.
- EmitByte(ModRMByte(2, RegOpcodeField, 4), CurByte, OS);
+ emitByte(modRMByte(2, RegOpcodeField, 4), CurByte, OS);
}
// Calculate what the SS field value should be...
- static const unsigned SSTable[] = { ~0U, 0, 1, ~0U, 2, ~0U, ~0U, ~0U, 3 };
+ static const unsigned SSTable[] = {~0U, 0, 1, ~0U, 2, ~0U, ~0U, ~0U, 3};
unsigned SS = SSTable[Scale.getImm()];
if (BaseReg == 0) {
@@ -617,30 +610,133 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op,
// Manual 2A, table 2-7. The displacement has already been output.
unsigned IndexRegNo;
if (IndexReg.getReg())
- IndexRegNo = GetX86RegNum(IndexReg);
+ IndexRegNo = getX86RegNum(IndexReg);
else // Examples: [ESP+1*<noreg>+4] or [scaled idx]+disp32 (MOD=0,BASE=5)
IndexRegNo = 4;
- EmitSIBByte(SS, IndexRegNo, 5, CurByte, OS);
+ emitSIBByte(SS, IndexRegNo, 5, CurByte, OS);
} else {
unsigned IndexRegNo;
if (IndexReg.getReg())
- IndexRegNo = GetX86RegNum(IndexReg);
+ IndexRegNo = getX86RegNum(IndexReg);
else
- IndexRegNo = 4; // For example [ESP+1*<noreg>+4]
- EmitSIBByte(SS, IndexRegNo, GetX86RegNum(Base), CurByte, OS);
+ IndexRegNo = 4; // For example [ESP+1*<noreg>+4]
+ emitSIBByte(SS, IndexRegNo, getX86RegNum(Base), CurByte, OS);
}
// Do we need to output a displacement?
if (ForceDisp8)
- EmitImmediate(Disp, MI.getLoc(), 1, FK_Data_1, CurByte, OS, Fixups, ImmOffset);
+ emitImmediate(Disp, MI.getLoc(), 1, FK_Data_1, CurByte, OS, Fixups,
+ ImmOffset);
else if (ForceDisp32 || Disp.getImm() != 0)
- EmitImmediate(Disp, MI.getLoc(), 4, MCFixupKind(X86::reloc_signed_4byte),
+ emitImmediate(Disp, MI.getLoc(), 4, MCFixupKind(X86::reloc_signed_4byte),
CurByte, OS, Fixups);
}
-/// EmitVEXOpcodePrefix - AVX instructions are encoded using a opcode prefix
+void X86MCCodeEmitter::emitPrefixImpl(uint64_t TSFlags, unsigned &CurOp,
+ unsigned &CurByte, bool &Rex,
+ const MCInst &MI, const MCInstrDesc &Desc,
+ const MCSubtargetInfo &STI,
+ raw_ostream &OS) const {
+ // Determine where the memory operand starts, if present.
+ int MemoryOperand = X86II::getMemoryOperandNo(TSFlags);
+ if (MemoryOperand != -1)
+ MemoryOperand += CurOp;
+
+ // Emit segment override opcode prefix as needed.
+ if (MemoryOperand >= 0)
+ emitSegmentOverridePrefix(CurByte, MemoryOperand + X86::AddrSegmentReg, MI,
+ OS);
+
+ // Emit the repeat opcode prefix as needed.
+ unsigned Flags = MI.getFlags();
+ if (TSFlags & X86II::REP || Flags & X86::IP_HAS_REPEAT)
+ emitByte(0xF3, CurByte, OS);
+ if (Flags & X86::IP_HAS_REPEAT_NE)
+ emitByte(0xF2, CurByte, OS);
+
+ // Emit the address size opcode prefix as needed.
+ bool need_address_override;
+ uint64_t AdSize = TSFlags & X86II::AdSizeMask;
+ if ((STI.hasFeature(X86::Mode16Bit) && AdSize == X86II::AdSize32) ||
+ (STI.hasFeature(X86::Mode32Bit) && AdSize == X86II::AdSize16) ||
+ (STI.hasFeature(X86::Mode64Bit) && AdSize == X86II::AdSize32)) {
+ need_address_override = true;
+ } else if (MemoryOperand < 0) {
+ need_address_override = false;
+ } else if (STI.hasFeature(X86::Mode64Bit)) {
+ assert(!is16BitMemOperand(MI, MemoryOperand, STI));
+ need_address_override = is32BitMemOperand(MI, MemoryOperand);
+ } else if (STI.hasFeature(X86::Mode32Bit)) {
+ assert(!is64BitMemOperand(MI, MemoryOperand));
+ need_address_override = is16BitMemOperand(MI, MemoryOperand, STI);
+ } else {
+ assert(STI.hasFeature(X86::Mode16Bit));
+ assert(!is64BitMemOperand(MI, MemoryOperand));
+ need_address_override = !is16BitMemOperand(MI, MemoryOperand, STI);
+ }
+
+ if (need_address_override)
+ emitByte(0x67, CurByte, OS);
+
+ // Encoding type for this instruction.
+ uint64_t Encoding = TSFlags & X86II::EncodingMask;
+ if (Encoding == 0)
+ Rex = emitOpcodePrefix(TSFlags, CurByte, MemoryOperand, MI, Desc, STI, OS);
+ else
+ emitVEXOpcodePrefix(TSFlags, CurByte, MemoryOperand, MI, Desc, OS);
+
+ uint64_t Form = TSFlags & X86II::FormMask;
+ switch (Form) {
+ default:
+ break;
+ case X86II::RawFrmDstSrc: {
+ unsigned siReg = MI.getOperand(1).getReg();
+ assert(((siReg == X86::SI && MI.getOperand(0).getReg() == X86::DI) ||
+ (siReg == X86::ESI && MI.getOperand(0).getReg() == X86::EDI) ||
+ (siReg == X86::RSI && MI.getOperand(0).getReg() == X86::RDI)) &&
+ "SI and DI register sizes do not match");
+ // Emit segment override opcode prefix as needed (not for %ds).
+ if (MI.getOperand(2).getReg() != X86::DS)
+ emitSegmentOverridePrefix(CurByte, 2, MI, OS);
+ // Emit AdSize prefix as needed.
+ if ((!STI.hasFeature(X86::Mode32Bit) && siReg == X86::ESI) ||
+ (STI.hasFeature(X86::Mode32Bit) && siReg == X86::SI))
+ emitByte(0x67, CurByte, OS);
+ CurOp += 3; // Consume operands.
+ break;
+ }
+ case X86II::RawFrmSrc: {
+ unsigned siReg = MI.getOperand(0).getReg();
+ // Emit segment override opcode prefix as needed (not for %ds).
+ if (MI.getOperand(1).getReg() != X86::DS)
+ emitSegmentOverridePrefix(CurByte, 1, MI, OS);
+ // Emit AdSize prefix as needed.
+ if ((!STI.hasFeature(X86::Mode32Bit) && siReg == X86::ESI) ||
+ (STI.hasFeature(X86::Mode32Bit) && siReg == X86::SI))
+ emitByte(0x67, CurByte, OS);
+ CurOp += 2; // Consume operands.
+ break;
+ }
+ case X86II::RawFrmDst: {
+ unsigned siReg = MI.getOperand(0).getReg();
+ // Emit AdSize prefix as needed.
+ if ((!STI.hasFeature(X86::Mode32Bit) && siReg == X86::EDI) ||
+ (STI.hasFeature(X86::Mode32Bit) && siReg == X86::DI))
+ emitByte(0x67, CurByte, OS);
+ ++CurOp; // Consume operand.
+ break;
+ }
+ case X86II::RawFrmMemOffs: {
+ // Emit segment override opcode prefix as needed.
+ emitSegmentOverridePrefix(CurByte, 1, MI, OS);
+ break;
+ }
+ }
+}
+
+/// emitVEXOpcodePrefix - AVX instructions are encoded using a opcode prefix
/// called VEX.
-void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
+void X86MCCodeEmitter::emitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
int MemOperand, const MCInst &MI,
const MCInstrDesc &Desc,
raw_ostream &OS) const {
@@ -690,13 +786,26 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
// 0b01010: XOP map select - 0Ah instructions with imm dword
uint8_t VEX_5M;
switch (TSFlags & X86II::OpMapMask) {
- default: llvm_unreachable("Invalid prefix!");
- case X86II::TB: VEX_5M = 0x1; break; // 0F
- case X86II::T8: VEX_5M = 0x2; break; // 0F 38
- case X86II::TA: VEX_5M = 0x3; break; // 0F 3A
- case X86II::XOP8: VEX_5M = 0x8; break;
- case X86II::XOP9: VEX_5M = 0x9; break;
- case X86II::XOPA: VEX_5M = 0xA; break;
+ default:
+ llvm_unreachable("Invalid prefix!");
+ case X86II::TB:
+ VEX_5M = 0x1;
+ break; // 0F
+ case X86II::T8:
+ VEX_5M = 0x2;
+ break; // 0F 38
+ case X86II::TA:
+ VEX_5M = 0x3;
+ break; // 0F 3A
+ case X86II::XOP8:
+ VEX_5M = 0x8;
+ break;
+ case X86II::XOP9:
+ VEX_5M = 0x9;
+ break;
+ case X86II::XOPA:
+ VEX_5M = 0xA;
+ break;
}
// VEX_4V (VEX vvvv field): a register specifier
@@ -724,9 +833,15 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
//
uint8_t VEX_PP = 0;
switch (TSFlags & X86II::OpPrefixMask) {
- case X86II::PD: VEX_PP = 0x1; break; // 66
- case X86II::XS: VEX_PP = 0x2; break; // F3
- case X86II::XD: VEX_PP = 0x3; break; // F2
+ case X86II::PD:
+ VEX_PP = 0x1;
+ break; // 66
+ case X86II::XS:
+ VEX_PP = 0x2;
+ break; // F3
+ case X86II::XD:
+ VEX_PP = 0x3;
+ break; // F2
}
// EVEX_U
@@ -751,7 +866,8 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
unsigned CurOp = X86II::getOperandBias(Desc);
switch (TSFlags & X86II::FormMask) {
- default: llvm_unreachable("Unexpected form in EmitVEXOpcodePrefix!");
+ default:
+ llvm_unreachable("Unexpected form in emitVEXOpcodePrefix!");
case X86II::RawFrm:
break;
case X86II::MRMDestMem: {
@@ -762,7 +878,8 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
//
unsigned BaseRegEnc = getX86RegEncoding(MI, MemOperand + X86::AddrBaseReg);
VEX_B = ~(BaseRegEnc >> 3) & 1;
- unsigned IndexRegEnc = getX86RegEncoding(MI, MemOperand+X86::AddrIndexReg);
+ unsigned IndexRegEnc =
+ getX86RegEncoding(MI, MemOperand + X86::AddrIndexReg);
VEX_X = ~(IndexRegEnc >> 3) & 1;
if (!HasVEX_4V) // Only needed with VSIB which don't use VVVV.
EVEX_V2 = ~(IndexRegEnc >> 4) & 1;
@@ -807,7 +924,8 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
unsigned BaseRegEnc = getX86RegEncoding(MI, MemOperand + X86::AddrBaseReg);
VEX_B = ~(BaseRegEnc >> 3) & 1;
- unsigned IndexRegEnc = getX86RegEncoding(MI, MemOperand+X86::AddrIndexReg);
+ unsigned IndexRegEnc =
+ getX86RegEncoding(MI, MemOperand + X86::AddrIndexReg);
VEX_X = ~(IndexRegEnc >> 3) & 1;
if (!HasVEX_4V) // Only needed with VSIB which don't use VVVV.
EVEX_V2 = ~(IndexRegEnc >> 4) & 1;
@@ -822,7 +940,8 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
unsigned BaseRegEnc = getX86RegEncoding(MI, MemOperand + X86::AddrBaseReg);
VEX_B = ~(BaseRegEnc >> 3) & 1;
- unsigned IndexRegEnc = getX86RegEncoding(MI, MemOperand+X86::AddrIndexReg);
+ unsigned IndexRegEnc =
+ getX86RegEncoding(MI, MemOperand + X86::AddrIndexReg);
VEX_X = ~(IndexRegEnc >> 3) & 1;
VEX_4V = ~getX86RegEncoding(MI, CurOp + X86::AddrNumOperands) & 0xf;
@@ -838,14 +957,19 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
unsigned BaseRegEnc = getX86RegEncoding(MI, MemOperand + X86::AddrBaseReg);
VEX_B = ~(BaseRegEnc >> 3) & 1;
- unsigned IndexRegEnc = getX86RegEncoding(MI, MemOperand+X86::AddrIndexReg);
+ unsigned IndexRegEnc =
+ getX86RegEncoding(MI, MemOperand + X86::AddrIndexReg);
VEX_X = ~(IndexRegEnc >> 3) & 1;
break;
}
- case X86II::MRM0m: case X86II::MRM1m:
- case X86II::MRM2m: case X86II::MRM3m:
- case X86II::MRM4m: case X86II::MRM5m:
- case X86II::MRM6m: case X86II::MRM7m: {
+ case X86II::MRM0m:
+ case X86II::MRM1m:
+ case X86II::MRM2m:
+ case X86II::MRM3m:
+ case X86II::MRM4m:
+ case X86II::MRM5m:
+ case X86II::MRM6m:
+ case X86II::MRM7m: {
// MRM[0-9]m instructions forms:
// MemAddr
// src1(VEX_4V), MemAddr
@@ -860,7 +984,8 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
unsigned BaseRegEnc = getX86RegEncoding(MI, MemOperand + X86::AddrBaseReg);
VEX_B = ~(BaseRegEnc >> 3) & 1;
- unsigned IndexRegEnc = getX86RegEncoding(MI, MemOperand+X86::AddrIndexReg);
+ unsigned IndexRegEnc =
+ getX86RegEncoding(MI, MemOperand + X86::AddrIndexReg);
VEX_X = ~(IndexRegEnc >> 3) & 1;
if (!HasVEX_4V) // Only needed with VSIB which don't use VVVV.
EVEX_V2 = ~(IndexRegEnc >> 4) & 1;
@@ -894,7 +1019,7 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
if (EVEX_b) {
if (HasEVEX_RC) {
- unsigned RcOperand = NumOps-1;
+ unsigned RcOperand = NumOps - 1;
assert(RcOperand >= CurOp);
EVEX_rc = MI.getOperand(RcOperand).getImm();
assert(EVEX_rc <= 3 && "Invalid rounding control!");
@@ -956,10 +1081,14 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
EncodeRC = true;
break;
}
- case X86II::MRM0r: case X86II::MRM1r:
- case X86II::MRM2r: case X86II::MRM3r:
- case X86II::MRM4r: case X86II::MRM5r:
- case X86II::MRM6r: case X86II::MRM7r: {
+ case X86II::MRM0r:
+ case X86II::MRM1r:
+ case X86II::MRM2r:
+ case X86II::MRM3r:
+ case X86II::MRM4r:
+ case X86II::MRM5r:
+ case X86II::MRM6r:
+ case X86II::MRM7r: {
// MRM0r-MRM7r instructions forms:
// dst(VEX_4V), src(ModR/M), imm8
if (HasVEX_4V) {
@@ -996,17 +1125,17 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
uint8_t LastByte = VEX_PP | (VEX_L << 2) | (VEX_4V << 3);
// Can we use the 2 byte VEX prefix?
- if (!(MI.getFlags() & X86::IP_USE_VEX3) &&
- Encoding == X86II::VEX && VEX_B && VEX_X && !VEX_W && (VEX_5M == 1)) {
- EmitByte(0xC5, CurByte, OS);
- EmitByte(LastByte | (VEX_R << 7), CurByte, OS);
+ if (!(MI.getFlags() & X86::IP_USE_VEX3) && Encoding == X86II::VEX &&
+ VEX_B && VEX_X && !VEX_W && (VEX_5M == 1)) {
+ emitByte(0xC5, CurByte, OS);
+ emitByte(LastByte | (VEX_R << 7), CurByte, OS);
return;
}
// 3 byte VEX prefix
- EmitByte(Encoding == X86II::XOP ? 0x8F : 0xC4, CurByte, OS);
- EmitByte(VEX_R << 7 | VEX_X << 6 | VEX_B << 5 | VEX_5M, CurByte, OS);
- EmitByte(LastByte | (VEX_W << 7), CurByte, OS);
+ emitByte(Encoding == X86II::XOP ? 0x8F : 0xC4, CurByte, OS);
+ emitByte(VEX_R << 7 | VEX_X << 6 | VEX_B << 5 | VEX_5M, CurByte, OS);
+ emitByte(LastByte | (VEX_W << 7), CurByte, OS);
} else {
assert(Encoding == X86II::EVEX && "unknown encoding!");
// EVEX opcode prefix can have 4 bytes
@@ -1014,39 +1143,30 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
// +-----+ +--------------+ +-------------------+ +------------------------+
// | 62h | | RXBR' | 00mm | | W | vvvv | U | pp | | z | L'L | b | v' | aaa |
// +-----+ +--------------+ +-------------------+ +------------------------+
- assert((VEX_5M & 0x3) == VEX_5M
- && "More than 2 significant bits in VEX.m-mmmm fields for EVEX!");
-
- EmitByte(0x62, CurByte, OS);
- EmitByte((VEX_R << 7) |
- (VEX_X << 6) |
- (VEX_B << 5) |
- (EVEX_R2 << 4) |
- VEX_5M, CurByte, OS);
- EmitByte((VEX_W << 7) |
- (VEX_4V << 3) |
- (EVEX_U << 2) |
- VEX_PP, CurByte, OS);
+ assert((VEX_5M & 0x3) == VEX_5M &&
+ "More than 2 significant bits in VEX.m-mmmm fields for EVEX!");
+
+ emitByte(0x62, CurByte, OS);
+ emitByte((VEX_R << 7) | (VEX_X << 6) | (VEX_B << 5) | (EVEX_R2 << 4) |
+ VEX_5M,
+ CurByte, OS);
+ emitByte((VEX_W << 7) | (VEX_4V << 3) | (EVEX_U << 2) | VEX_PP, CurByte,
+ OS);
if (EncodeRC)
- EmitByte((EVEX_z << 7) |
- (EVEX_rc << 5) |
- (EVEX_b << 4) |
- (EVEX_V2 << 3) |
- EVEX_aaa, CurByte, OS);
+ emitByte((EVEX_z << 7) | (EVEX_rc << 5) | (EVEX_b << 4) | (EVEX_V2 << 3) |
+ EVEX_aaa,
+ CurByte, OS);
else
- EmitByte((EVEX_z << 7) |
- (EVEX_L2 << 6) |
- (VEX_L << 5) |
- (EVEX_b << 4) |
- (EVEX_V2 << 3) |
- EVEX_aaa, CurByte, OS);
+ emitByte((EVEX_z << 7) | (EVEX_L2 << 6) | (VEX_L << 5) | (EVEX_b << 4) |
+ (EVEX_V2 << 3) | EVEX_aaa,
+ CurByte, OS);
}
}
-/// DetermineREXPrefix - Determine if the MCInst has to be encoded with a X86-64
-/// REX prefix which specifies 1) 64-bit instructions, 2) non-default operand
-/// size, and 3) use of X86-64 extended registers.
-uint8_t X86MCCodeEmitter::DetermineREXPrefix(const MCInst &MI, uint64_t TSFlags,
+/// Determine if the MCInst has to be encoded with a X86-64 REX prefix which
+/// specifies 1) 64-bit instructions, 2) non-default operand size, and 3) use
+/// of X86-64 extended registers.
+uint8_t X86MCCodeEmitter::determineREXPrefix(const MCInst &MI, uint64_t TSFlags,
int MemOperand,
const MCInstrDesc &Desc) const {
uint8_t REX = 0;
@@ -1055,7 +1175,8 @@ uint8_t X86MCCodeEmitter::DetermineREXPrefix(const MCInst &MI, uint64_t TSFlags,
if (TSFlags & X86II::REX_W)
REX |= 1 << 3; // set REX.W
- if (MI.getNumOperands() == 0) return REX;
+ if (MI.getNumOperands() == 0)
+ return REX;
unsigned NumOps = MI.getNumOperands();
unsigned CurOp = X86II::getOperandBias(Desc);
@@ -1063,12 +1184,13 @@ uint8_t X86MCCodeEmitter::DetermineREXPrefix(const MCInst &MI, uint64_t TSFlags,
// If it accesses SPL, BPL, SIL, or DIL, then it requires a 0x40 REX prefix.
for (unsigned i = CurOp; i != NumOps; ++i) {
const MCOperand &MO = MI.getOperand(i);
- if (!MO.isReg()) continue;
+ if (!MO.isReg())
+ continue;
unsigned Reg = MO.getReg();
if (Reg == X86::AH || Reg == X86::BH || Reg == X86::CH || Reg == X86::DH)
UsesHighByteReg = true;
if (X86II::isX86_64NonExtLowByteReg(Reg))
- // FIXME: The caller of DetermineREXPrefix slaps this prefix onto anything
+ // FIXME: The caller of determineREXPrefix slaps this prefix onto anything
// that returns non-zero.
REX |= 0x40; // REX fixed encoding prefix
}
@@ -1084,9 +1206,9 @@ uint8_t X86MCCodeEmitter::DetermineREXPrefix(const MCInst &MI, uint64_t TSFlags,
break;
case X86II::MRMSrcMem:
case X86II::MRMSrcMemCC:
- REX |= isREXExtendedReg(MI, CurOp++) << 2; // REX.R
- REX |= isREXExtendedReg(MI, MemOperand+X86::AddrBaseReg) << 0; // REX.B
- REX |= isREXExtendedReg(MI, MemOperand+X86::AddrIndexReg) << 1; // REX.X
+ REX |= isREXExtendedReg(MI, CurOp++) << 2; // REX.R
+ REX |= isREXExtendedReg(MI, MemOperand + X86::AddrBaseReg) << 0; // REX.B
+ REX |= isREXExtendedReg(MI, MemOperand + X86::AddrIndexReg) << 1; // REX.X
CurOp += X86::AddrNumOperands;
break;
case X86II::MRMDestReg:
@@ -1094,57 +1216,82 @@ uint8_t X86MCCodeEmitter::DetermineREXPrefix(const MCInst &MI, uint64_t TSFlags,
REX |= isREXExtendedReg(MI, CurOp++) << 2; // REX.R
break;
case X86II::MRMDestMem:
- REX |= isREXExtendedReg(MI, MemOperand+X86::AddrBaseReg) << 0; // REX.B
- REX |= isREXExtendedReg(MI, MemOperand+X86::AddrIndexReg) << 1; // REX.X
+ REX |= isREXExtendedReg(MI, MemOperand + X86::AddrBaseReg) << 0; // REX.B
+ REX |= isREXExtendedReg(MI, MemOperand + X86::AddrIndexReg) << 1; // REX.X
CurOp += X86::AddrNumOperands;
REX |= isREXExtendedReg(MI, CurOp++) << 2; // REX.R
break;
- case X86II::MRMXmCC: case X86II::MRMXm:
- case X86II::MRM0m: case X86II::MRM1m:
- case X86II::MRM2m: case X86II::MRM3m:
- case X86II::MRM4m: case X86II::MRM5m:
- case X86II::MRM6m: case X86II::MRM7m:
- REX |= isREXExtendedReg(MI, MemOperand+X86::AddrBaseReg) << 0; // REX.B
- REX |= isREXExtendedReg(MI, MemOperand+X86::AddrIndexReg) << 1; // REX.X
+ case X86II::MRMXmCC:
+ case X86II::MRMXm:
+ case X86II::MRM0m:
+ case X86II::MRM1m:
+ case X86II::MRM2m:
+ case X86II::MRM3m:
+ case X86II::MRM4m:
+ case X86II::MRM5m:
+ case X86II::MRM6m:
+ case X86II::MRM7m:
+ REX |= isREXExtendedReg(MI, MemOperand + X86::AddrBaseReg) << 0; // REX.B
+ REX |= isREXExtendedReg(MI, MemOperand + X86::AddrIndexReg) << 1; // REX.X
break;
- case X86II::MRMXrCC: case X86II::MRMXr:
- case X86II::MRM0r: case X86II::MRM1r:
- case X86II::MRM2r: case X86II::MRM3r:
- case X86II::MRM4r: case X86II::MRM5r:
- case X86II::MRM6r: case X86II::MRM7r:
+ case X86II::MRMXrCC:
+ case X86II::MRMXr:
+ case X86II::MRM0r:
+ case X86II::MRM1r:
+ case X86II::MRM2r:
+ case X86II::MRM3r:
+ case X86II::MRM4r:
+ case X86II::MRM5r:
+ case X86II::MRM6r:
+ case X86II::MRM7r:
REX |= isREXExtendedReg(MI, CurOp++) << 0; // REX.B
break;
}
if (REX && UsesHighByteReg)
- report_fatal_error("Cannot encode high byte register in REX-prefixed instruction");
+ report_fatal_error(
+ "Cannot encode high byte register in REX-prefixed instruction");
return REX;
}
-/// EmitSegmentOverridePrefix - Emit segment override opcode prefix as needed
-void X86MCCodeEmitter::EmitSegmentOverridePrefix(unsigned &CurByte,
+/// Emit segment override opcode prefix as needed.
+void X86MCCodeEmitter::emitSegmentOverridePrefix(unsigned &CurByte,
unsigned SegOperand,
const MCInst &MI,
raw_ostream &OS) const {
// Check for explicit segment override on memory operand.
switch (MI.getOperand(SegOperand).getReg()) {
- default: llvm_unreachable("Unknown segment register!");
- case 0: break;
- case X86::CS: EmitByte(0x2E, CurByte, OS); break;
- case X86::SS: EmitByte(0x36, CurByte, OS); break;
- case X86::DS: EmitByte(0x3E, CurByte, OS); break;
- case X86::ES: EmitByte(0x26, CurByte, OS); break;
- case X86::FS: EmitByte(0x64, CurByte, OS); break;
- case X86::GS: EmitByte(0x65, CurByte, OS); break;
+ default:
+ llvm_unreachable("Unknown segment register!");
+ case 0:
+ break;
+ case X86::CS:
+ emitByte(0x2E, CurByte, OS);
+ break;
+ case X86::SS:
+ emitByte(0x36, CurByte, OS);
+ break;
+ case X86::DS:
+ emitByte(0x3E, CurByte, OS);
+ break;
+ case X86::ES:
+ emitByte(0x26, CurByte, OS);
+ break;
+ case X86::FS:
+ emitByte(0x64, CurByte, OS);
+ break;
+ case X86::GS:
+ emitByte(0x65, CurByte, OS);
+ break;
}
}
/// Emit all instruction prefixes prior to the opcode.
///
-/// MemOperand is the operand # of the start of a memory operand if present. If
-/// Not present, it is -1.
+/// \param MemOperand the operand # of the start of a memory operand if present.
+/// If not present, it is -1.
///
-/// Returns true if a REX prefix was used.
+/// \returns true if a REX prefix was used.
bool X86MCCodeEmitter::emitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
int MemOperand, const MCInst &MI,
const MCInstrDesc &Desc,
@@ -1152,35 +1299,35 @@ bool X86MCCodeEmitter::emitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
raw_ostream &OS) const {
bool Ret = false;
// Emit the operand size opcode prefix as needed.
- if ((TSFlags & X86II::OpSizeMask) == (is16BitMode(STI) ? X86II::OpSize32
- : X86II::OpSize16))
- EmitByte(0x66, CurByte, OS);
+ if ((TSFlags & X86II::OpSizeMask) ==
+ (STI.hasFeature(X86::Mode16Bit) ? X86II::OpSize32 : X86II::OpSize16))
+ emitByte(0x66, CurByte, OS);
// Emit the LOCK opcode prefix.
if (TSFlags & X86II::LOCK || MI.getFlags() & X86::IP_HAS_LOCK)
- EmitByte(0xF0, CurByte, OS);
+ emitByte(0xF0, CurByte, OS);
// Emit the NOTRACK opcode prefix.
if (TSFlags & X86II::NOTRACK || MI.getFlags() & X86::IP_HAS_NOTRACK)
- EmitByte(0x3E, CurByte, OS);
+ emitByte(0x3E, CurByte, OS);
switch (TSFlags & X86II::OpPrefixMask) {
- case X86II::PD: // 66
- EmitByte(0x66, CurByte, OS);
+ case X86II::PD: // 66
+ emitByte(0x66, CurByte, OS);
break;
- case X86II::XS: // F3
- EmitByte(0xF3, CurByte, OS);
+ case X86II::XS: // F3
+ emitByte(0xF3, CurByte, OS);
break;
- case X86II::XD: // F2
- EmitByte(0xF2, CurByte, OS);
+ case X86II::XD: // F2
+ emitByte(0xF2, CurByte, OS);
break;
}
// Handle REX prefix.
// FIXME: Can this come before F2 etc to simplify emission?
- if (is64BitMode(STI)) {
- if (uint8_t REX = DetermineREXPrefix(MI, TSFlags, MemOperand, Desc)) {
- EmitByte(0x40 | REX, CurByte, OS);
+ if (STI.hasFeature(X86::Mode64Bit)) {
+ if (uint8_t REX = determineREXPrefix(MI, TSFlags, MemOperand, Desc)) {
+ emitByte(0x40 | REX, CurByte, OS);
Ret = true;
}
} else {
@@ -1189,33 +1336,50 @@ bool X86MCCodeEmitter::emitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
// 0x0F escape code must be emitted just before the opcode.
switch (TSFlags & X86II::OpMapMask) {
- case X86II::TB: // Two-byte opcode map
- case X86II::T8: // 0F 38
- case X86II::TA: // 0F 3A
- case X86II::ThreeDNow: // 0F 0F, second 0F emitted by caller.
- EmitByte(0x0F, CurByte, OS);
+ case X86II::TB: // Two-byte opcode map
+ case X86II::T8: // 0F 38
+ case X86II::TA: // 0F 3A
+ case X86II::ThreeDNow: // 0F 0F, second 0F emitted by caller.
+ emitByte(0x0F, CurByte, OS);
break;
}
switch (TSFlags & X86II::OpMapMask) {
- case X86II::T8: // 0F 38
- EmitByte(0x38, CurByte, OS);
+ case X86II::T8: // 0F 38
+ emitByte(0x38, CurByte, OS);
break;
- case X86II::TA: // 0F 3A
- EmitByte(0x3A, CurByte, OS);
+ case X86II::TA: // 0F 3A
+ emitByte(0x3A, CurByte, OS);
break;
}
return Ret;
}
-void X86MCCodeEmitter::
-encodeInstruction(const MCInst &MI, raw_ostream &OS,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
+void X86MCCodeEmitter::emitPrefix(const MCInst &MI, raw_ostream &OS,
+ const MCSubtargetInfo &STI) const {
+ unsigned Opcode = MI.getOpcode();
+ const MCInstrDesc &Desc = MCII.get(Opcode);
+ uint64_t TSFlags = Desc.TSFlags;
+
+ // Pseudo instructions don't get encoded.
+ if ((TSFlags & X86II::FormMask) == X86II::Pseudo)
+ return;
+
+ unsigned CurOp = X86II::getOperandBias(Desc);
+
+ // Keep track of the current byte being emitted.
+ unsigned CurByte = 0;
+
+ bool Rex = false;
+ emitPrefixImpl(TSFlags, CurOp, CurByte, Rex, MI, Desc, STI, OS);
+}
+
+void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
unsigned Opcode = MI.getOpcode();
const MCInstrDesc &Desc = MCII.get(Opcode);
uint64_t TSFlags = Desc.TSFlags;
- unsigned Flags = MI.getFlags();
// Pseudo instructions don't get encoded.
if ((TSFlags & X86II::FormMask) == X86II::Pseudo)
@@ -1227,8 +1391,8 @@ encodeInstruction(const MCInst &MI, raw_ostream &OS,
// Keep track of the current byte being emitted.
unsigned CurByte = 0;
- // Encoding type for this instruction.
- uint64_t Encoding = TSFlags & X86II::EncodingMask;
+ bool Rex = false;
+ emitPrefixImpl(TSFlags, CurOp, CurByte, Rex, MI, Desc, STI, OS);
// It uses the VEX.VVVV field?
bool HasVEX_4V = TSFlags & X86II::VEX_4V;
@@ -1241,104 +1405,25 @@ encodeInstruction(const MCInst &MI, raw_ostream &OS,
// Used if a register is encoded in 7:4 of immediate.
unsigned I8RegNum = 0;
- // Determine where the memory operand starts, if present.
- int MemoryOperand = X86II::getMemoryOperandNo(TSFlags);
- if (MemoryOperand != -1) MemoryOperand += CurOp;
-
- // Emit segment override opcode prefix as needed.
- if (MemoryOperand >= 0)
- EmitSegmentOverridePrefix(CurByte, MemoryOperand+X86::AddrSegmentReg,
- MI, OS);
-
- // Emit the repeat opcode prefix as needed.
- if (TSFlags & X86II::REP || Flags & X86::IP_HAS_REPEAT)
- EmitByte(0xF3, CurByte, OS);
- if (Flags & X86::IP_HAS_REPEAT_NE)
- EmitByte(0xF2, CurByte, OS);
-
- // Emit the address size opcode prefix as needed.
- bool need_address_override;
- uint64_t AdSize = TSFlags & X86II::AdSizeMask;
- if ((is16BitMode(STI) && AdSize == X86II::AdSize32) ||
- (is32BitMode(STI) && AdSize == X86II::AdSize16) ||
- (is64BitMode(STI) && AdSize == X86II::AdSize32)) {
- need_address_override = true;
- } else if (MemoryOperand < 0) {
- need_address_override = false;
- } else if (is64BitMode(STI)) {
- assert(!Is16BitMemOperand(MI, MemoryOperand, STI));
- need_address_override = Is32BitMemOperand(MI, MemoryOperand);
- } else if (is32BitMode(STI)) {
- assert(!Is64BitMemOperand(MI, MemoryOperand));
- need_address_override = Is16BitMemOperand(MI, MemoryOperand, STI);
- } else {
- assert(is16BitMode(STI));
- assert(!Is64BitMemOperand(MI, MemoryOperand));
- need_address_override = !Is16BitMemOperand(MI, MemoryOperand, STI);
- }
-
- if (need_address_override)
- EmitByte(0x67, CurByte, OS);
-
- bool Rex = false;
- if (Encoding == 0)
- Rex = emitOpcodePrefix(TSFlags, CurByte, MemoryOperand, MI, Desc, STI, OS);
- else
- EmitVEXOpcodePrefix(TSFlags, CurByte, MemoryOperand, MI, Desc, OS);
-
uint8_t BaseOpcode = X86II::getBaseOpcodeFor(TSFlags);
if ((TSFlags & X86II::OpMapMask) == X86II::ThreeDNow)
- BaseOpcode = 0x0F; // Weird 3DNow! encoding.
+ BaseOpcode = 0x0F; // Weird 3DNow! encoding.
unsigned OpcodeOffset = 0;
uint64_t Form = TSFlags & X86II::FormMask;
switch (Form) {
- default: errs() << "FORM: " << Form << "\n";
+ default:
+ errs() << "FORM: " << Form << "\n";
llvm_unreachable("Unknown FormMask value in X86MCCodeEmitter!");
case X86II::Pseudo:
llvm_unreachable("Pseudo instruction shouldn't be emitted");
- case X86II::RawFrmDstSrc: {
- unsigned siReg = MI.getOperand(1).getReg();
- assert(((siReg == X86::SI && MI.getOperand(0).getReg() == X86::DI) ||
- (siReg == X86::ESI && MI.getOperand(0).getReg() == X86::EDI) ||
- (siReg == X86::RSI && MI.getOperand(0).getReg() == X86::RDI)) &&
- "SI and DI register sizes do not match");
- // Emit segment override opcode prefix as needed (not for %ds).
- if (MI.getOperand(2).getReg() != X86::DS)
- EmitSegmentOverridePrefix(CurByte, 2, MI, OS);
- // Emit AdSize prefix as needed.
- if ((!is32BitMode(STI) && siReg == X86::ESI) ||
- (is32BitMode(STI) && siReg == X86::SI))
- EmitByte(0x67, CurByte, OS);
- CurOp += 3; // Consume operands.
- EmitByte(BaseOpcode, CurByte, OS);
- break;
- }
- case X86II::RawFrmSrc: {
- unsigned siReg = MI.getOperand(0).getReg();
- // Emit segment override opcode prefix as needed (not for %ds).
- if (MI.getOperand(1).getReg() != X86::DS)
- EmitSegmentOverridePrefix(CurByte, 1, MI, OS);
- // Emit AdSize prefix as needed.
- if ((!is32BitMode(STI) && siReg == X86::ESI) ||
- (is32BitMode(STI) && siReg == X86::SI))
- EmitByte(0x67, CurByte, OS);
- CurOp += 2; // Consume operands.
- EmitByte(BaseOpcode, CurByte, OS);
+ case X86II::RawFrmDstSrc:
+ case X86II::RawFrmSrc:
+ case X86II::RawFrmDst:
+ emitByte(BaseOpcode, CurByte, OS);
break;
- }
- case X86II::RawFrmDst: {
- unsigned siReg = MI.getOperand(0).getReg();
- // Emit AdSize prefix as needed.
- if ((!is32BitMode(STI) && siReg == X86::EDI) ||
- (is32BitMode(STI) && siReg == X86::DI))
- EmitByte(0x67, CurByte, OS);
- ++CurOp; // Consume operand.
- EmitByte(BaseOpcode, CurByte, OS);
- break;
- }
case X86II::AddCCFrm: {
// This will be added to the opcode in the fallthrough.
OpcodeOffset = MI.getOperand(NumOps - 1).getImm();
@@ -1346,49 +1431,47 @@ encodeInstruction(const MCInst &MI, raw_ostream &OS,
--NumOps; // Drop the operand from the end.
LLVM_FALLTHROUGH;
case X86II::RawFrm:
- EmitByte(BaseOpcode + OpcodeOffset, CurByte, OS);
+ emitByte(BaseOpcode + OpcodeOffset, CurByte, OS);
- if (!is64BitMode(STI) || !isPCRel32Branch(MI))
+ if (!STI.hasFeature(X86::Mode64Bit) || !isPCRel32Branch(MI, MCII))
break;
const MCOperand &Op = MI.getOperand(CurOp++);
- EmitImmediate(Op, MI.getLoc(), X86II::getSizeOfImm(TSFlags),
+ emitImmediate(Op, MI.getLoc(), X86II::getSizeOfImm(TSFlags),
MCFixupKind(X86::reloc_branch_4byte_pcrel), CurByte, OS,
Fixups);
break;
}
case X86II::RawFrmMemOffs:
- // Emit segment override opcode prefix as needed.
- EmitSegmentOverridePrefix(CurByte, 1, MI, OS);
- EmitByte(BaseOpcode, CurByte, OS);
- EmitImmediate(MI.getOperand(CurOp++), MI.getLoc(),
+ emitByte(BaseOpcode, CurByte, OS);
+ emitImmediate(MI.getOperand(CurOp++), MI.getLoc(),
X86II::getSizeOfImm(TSFlags), getImmFixupKind(TSFlags),
CurByte, OS, Fixups);
++CurOp; // skip segment operand
break;
case X86II::RawFrmImm8:
- EmitByte(BaseOpcode, CurByte, OS);
- EmitImmediate(MI.getOperand(CurOp++), MI.getLoc(),
+ emitByte(BaseOpcode, CurByte, OS);
+ emitImmediate(MI.getOperand(CurOp++), MI.getLoc(),
X86II::getSizeOfImm(TSFlags), getImmFixupKind(TSFlags),
CurByte, OS, Fixups);
- EmitImmediate(MI.getOperand(CurOp++), MI.getLoc(), 1, FK_Data_1, CurByte,
+ emitImmediate(MI.getOperand(CurOp++), MI.getLoc(), 1, FK_Data_1, CurByte,
OS, Fixups);
break;
case X86II::RawFrmImm16:
- EmitByte(BaseOpcode, CurByte, OS);
- EmitImmediate(MI.getOperand(CurOp++), MI.getLoc(),
+ emitByte(BaseOpcode, CurByte, OS);
+ emitImmediate(MI.getOperand(CurOp++), MI.getLoc(),
X86II::getSizeOfImm(TSFlags), getImmFixupKind(TSFlags),
CurByte, OS, Fixups);
- EmitImmediate(MI.getOperand(CurOp++), MI.getLoc(), 2, FK_Data_2, CurByte,
+ emitImmediate(MI.getOperand(CurOp++), MI.getLoc(), 2, FK_Data_2, CurByte,
OS, Fixups);
break;
case X86II::AddRegFrm:
- EmitByte(BaseOpcode + GetX86RegNum(MI.getOperand(CurOp++)), CurByte, OS);
+ emitByte(BaseOpcode + getX86RegNum(MI.getOperand(CurOp++)), CurByte, OS);
break;
case X86II::MRMDestReg: {
- EmitByte(BaseOpcode, CurByte, OS);
+ emitByte(BaseOpcode, CurByte, OS);
unsigned SrcRegNum = CurOp + 1;
if (HasEVEX_K) // Skip writemask
@@ -1397,13 +1480,13 @@ encodeInstruction(const MCInst &MI, raw_ostream &OS,
if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV)
++SrcRegNum;
- EmitRegModRMByte(MI.getOperand(CurOp),
- GetX86RegNum(MI.getOperand(SrcRegNum)), CurByte, OS);
+ emitRegModRMByte(MI.getOperand(CurOp),
+ getX86RegNum(MI.getOperand(SrcRegNum)), CurByte, OS);
CurOp = SrcRegNum + 1;
break;
}
case X86II::MRMDestMem: {
- EmitByte(BaseOpcode, CurByte, OS);
+ emitByte(BaseOpcode, CurByte, OS);
unsigned SrcRegNum = CurOp + X86::AddrNumOperands;
if (HasEVEX_K) // Skip writemask
@@ -1412,13 +1495,13 @@ encodeInstruction(const MCInst &MI, raw_ostream &OS,
if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV)
++SrcRegNum;
- emitMemModRMByte(MI, CurOp, GetX86RegNum(MI.getOperand(SrcRegNum)), TSFlags,
+ emitMemModRMByte(MI, CurOp, getX86RegNum(MI.getOperand(SrcRegNum)), TSFlags,
Rex, CurByte, OS, Fixups, STI);
CurOp = SrcRegNum + 1;
break;
}
case X86II::MRMSrcReg: {
- EmitByte(BaseOpcode, CurByte, OS);
+ emitByte(BaseOpcode, CurByte, OS);
unsigned SrcRegNum = CurOp + 1;
if (HasEVEX_K) // Skip writemask
@@ -1427,8 +1510,8 @@ encodeInstruction(const MCInst &MI, raw_ostream &OS,
if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV)
++SrcRegNum;
- EmitRegModRMByte(MI.getOperand(SrcRegNum),
- GetX86RegNum(MI.getOperand(CurOp)), CurByte, OS);
+ emitRegModRMByte(MI.getOperand(SrcRegNum),
+ getX86RegNum(MI.getOperand(CurOp)), CurByte, OS);
CurOp = SrcRegNum + 1;
if (HasVEX_I8Reg)
I8RegNum = getX86RegEncoding(MI, CurOp++);
@@ -1438,17 +1521,17 @@ encodeInstruction(const MCInst &MI, raw_ostream &OS,
break;
}
case X86II::MRMSrcReg4VOp3: {
- EmitByte(BaseOpcode, CurByte, OS);
+ emitByte(BaseOpcode, CurByte, OS);
unsigned SrcRegNum = CurOp + 1;
- EmitRegModRMByte(MI.getOperand(SrcRegNum),
- GetX86RegNum(MI.getOperand(CurOp)), CurByte, OS);
+ emitRegModRMByte(MI.getOperand(SrcRegNum),
+ getX86RegNum(MI.getOperand(CurOp)), CurByte, OS);
CurOp = SrcRegNum + 1;
++CurOp; // Encoded in VEX.VVVV
break;
}
case X86II::MRMSrcRegOp4: {
- EmitByte(BaseOpcode, CurByte, OS);
+ emitByte(BaseOpcode, CurByte, OS);
unsigned SrcRegNum = CurOp + 1;
// Skip 1st src (which is encoded in VEX_VVVV)
@@ -1458,8 +1541,8 @@ encodeInstruction(const MCInst &MI, raw_ostream &OS,
assert(HasVEX_I8Reg && "MRMSrcRegOp4 should imply VEX_I8Reg");
I8RegNum = getX86RegEncoding(MI, SrcRegNum++);
- EmitRegModRMByte(MI.getOperand(SrcRegNum),
- GetX86RegNum(MI.getOperand(CurOp)), CurByte, OS);
+ emitRegModRMByte(MI.getOperand(SrcRegNum),
+ getX86RegNum(MI.getOperand(CurOp)), CurByte, OS);
CurOp = SrcRegNum + 1;
break;
}
@@ -1468,24 +1551,24 @@ encodeInstruction(const MCInst &MI, raw_ostream &OS,
unsigned SecondOp = CurOp++;
unsigned CC = MI.getOperand(CurOp++).getImm();
- EmitByte(BaseOpcode + CC, CurByte, OS);
+ emitByte(BaseOpcode + CC, CurByte, OS);
- EmitRegModRMByte(MI.getOperand(SecondOp),
- GetX86RegNum(MI.getOperand(FirstOp)), CurByte, OS);
+ emitRegModRMByte(MI.getOperand(SecondOp),
+ getX86RegNum(MI.getOperand(FirstOp)), CurByte, OS);
break;
}
case X86II::MRMSrcMem: {
- unsigned FirstMemOp = CurOp+1;
+ unsigned FirstMemOp = CurOp + 1;
if (HasEVEX_K) // Skip writemask
++FirstMemOp;
if (HasVEX_4V)
- ++FirstMemOp; // Skip the register source (which is encoded in VEX_VVVV).
+ ++FirstMemOp; // Skip the register source (which is encoded in VEX_VVVV).
- EmitByte(BaseOpcode, CurByte, OS);
+ emitByte(BaseOpcode, CurByte, OS);
- emitMemModRMByte(MI, FirstMemOp, GetX86RegNum(MI.getOperand(CurOp)),
+ emitMemModRMByte(MI, FirstMemOp, getX86RegNum(MI.getOperand(CurOp)),
TSFlags, Rex, CurByte, OS, Fixups, STI);
CurOp = FirstMemOp + X86::AddrNumOperands;
if (HasVEX_I8Reg)
@@ -1493,28 +1576,28 @@ encodeInstruction(const MCInst &MI, raw_ostream &OS,
break;
}
case X86II::MRMSrcMem4VOp3: {
- unsigned FirstMemOp = CurOp+1;
+ unsigned FirstMemOp = CurOp + 1;
- EmitByte(BaseOpcode, CurByte, OS);
+ emitByte(BaseOpcode, CurByte, OS);
- emitMemModRMByte(MI, FirstMemOp, GetX86RegNum(MI.getOperand(CurOp)),
+ emitMemModRMByte(MI, FirstMemOp, getX86RegNum(MI.getOperand(CurOp)),
TSFlags, Rex, CurByte, OS, Fixups, STI);
CurOp = FirstMemOp + X86::AddrNumOperands;
++CurOp; // Encoded in VEX.VVVV.
break;
}
case X86II::MRMSrcMemOp4: {
- unsigned FirstMemOp = CurOp+1;
+ unsigned FirstMemOp = CurOp + 1;
- ++FirstMemOp; // Skip the register source (which is encoded in VEX_VVVV).
+ ++FirstMemOp; // Skip the register source (which is encoded in VEX_VVVV).
// Capture second register source (encoded in Imm[7:4])
assert(HasVEX_I8Reg && "MRMSrcRegOp4 should imply VEX_I8Reg");
I8RegNum = getX86RegEncoding(MI, FirstMemOp++);
- EmitByte(BaseOpcode, CurByte, OS);
+ emitByte(BaseOpcode, CurByte, OS);
- emitMemModRMByte(MI, FirstMemOp, GetX86RegNum(MI.getOperand(CurOp)),
+ emitMemModRMByte(MI, FirstMemOp, getX86RegNum(MI.getOperand(CurOp)),
TSFlags, Rex, CurByte, OS, Fixups, STI);
CurOp = FirstMemOp + X86::AddrNumOperands;
break;
@@ -1525,9 +1608,9 @@ encodeInstruction(const MCInst &MI, raw_ostream &OS,
CurOp = FirstMemOp + X86::AddrNumOperands;
unsigned CC = MI.getOperand(CurOp++).getImm();
- EmitByte(BaseOpcode + CC, CurByte, OS);
+ emitByte(BaseOpcode + CC, CurByte, OS);
- emitMemModRMByte(MI, FirstMemOp, GetX86RegNum(MI.getOperand(RegOp)),
+ emitMemModRMByte(MI, FirstMemOp, getX86RegNum(MI.getOperand(RegOp)),
TSFlags, Rex, CurByte, OS, Fixups, STI);
break;
}
@@ -1536,24 +1619,28 @@ encodeInstruction(const MCInst &MI, raw_ostream &OS,
unsigned RegOp = CurOp++;
unsigned CC = MI.getOperand(CurOp++).getImm();
- EmitByte(BaseOpcode + CC, CurByte, OS);
- EmitRegModRMByte(MI.getOperand(RegOp), 0, CurByte, OS);
+ emitByte(BaseOpcode + CC, CurByte, OS);
+ emitRegModRMByte(MI.getOperand(RegOp), 0, CurByte, OS);
break;
}
case X86II::MRMXr:
- case X86II::MRM0r: case X86II::MRM1r:
- case X86II::MRM2r: case X86II::MRM3r:
- case X86II::MRM4r: case X86II::MRM5r:
- case X86II::MRM6r: case X86II::MRM7r:
+ case X86II::MRM0r:
+ case X86II::MRM1r:
+ case X86II::MRM2r:
+ case X86II::MRM3r:
+ case X86II::MRM4r:
+ case X86II::MRM5r:
+ case X86II::MRM6r:
+ case X86II::MRM7r:
if (HasVEX_4V) // Skip the register dst (which is encoded in VEX_VVVV).
++CurOp;
if (HasEVEX_K) // Skip writemask
++CurOp;
- EmitByte(BaseOpcode, CurByte, OS);
- EmitRegModRMByte(MI.getOperand(CurOp++),
- (Form == X86II::MRMXr) ? 0 : Form-X86II::MRM0r,
- CurByte, OS);
+ emitByte(BaseOpcode, CurByte, OS);
+ emitRegModRMByte(MI.getOperand(CurOp++),
+ (Form == X86II::MRMXr) ? 0 : Form - X86II::MRM0r, CurByte,
+ OS);
break;
case X86II::MRMXmCC: {
@@ -1561,52 +1648,98 @@ encodeInstruction(const MCInst &MI, raw_ostream &OS,
CurOp = FirstMemOp + X86::AddrNumOperands;
unsigned CC = MI.getOperand(CurOp++).getImm();
- EmitByte(BaseOpcode + CC, CurByte, OS);
+ emitByte(BaseOpcode + CC, CurByte, OS);
emitMemModRMByte(MI, FirstMemOp, 0, TSFlags, Rex, CurByte, OS, Fixups, STI);
break;
}
case X86II::MRMXm:
- case X86II::MRM0m: case X86II::MRM1m:
- case X86II::MRM2m: case X86II::MRM3m:
- case X86II::MRM4m: case X86II::MRM5m:
- case X86II::MRM6m: case X86II::MRM7m:
+ case X86II::MRM0m:
+ case X86II::MRM1m:
+ case X86II::MRM2m:
+ case X86II::MRM3m:
+ case X86II::MRM4m:
+ case X86II::MRM5m:
+ case X86II::MRM6m:
+ case X86II::MRM7m:
if (HasVEX_4V) // Skip the register dst (which is encoded in VEX_VVVV).
++CurOp;
if (HasEVEX_K) // Skip writemask
++CurOp;
- EmitByte(BaseOpcode, CurByte, OS);
+ emitByte(BaseOpcode, CurByte, OS);
emitMemModRMByte(MI, CurOp,
(Form == X86II::MRMXm) ? 0 : Form - X86II::MRM0m, TSFlags,
Rex, CurByte, OS, Fixups, STI);
CurOp += X86::AddrNumOperands;
break;
- case X86II::MRM_C0: case X86II::MRM_C1: case X86II::MRM_C2:
- case X86II::MRM_C3: case X86II::MRM_C4: case X86II::MRM_C5:
- case X86II::MRM_C6: case X86II::MRM_C7: case X86II::MRM_C8:
- case X86II::MRM_C9: case X86II::MRM_CA: case X86II::MRM_CB:
- case X86II::MRM_CC: case X86II::MRM_CD: case X86II::MRM_CE:
- case X86II::MRM_CF: case X86II::MRM_D0: case X86II::MRM_D1:
- case X86II::MRM_D2: case X86II::MRM_D3: case X86II::MRM_D4:
- case X86II::MRM_D5: case X86II::MRM_D6: case X86II::MRM_D7:
- case X86II::MRM_D8: case X86II::MRM_D9: case X86II::MRM_DA:
- case X86II::MRM_DB: case X86II::MRM_DC: case X86II::MRM_DD:
- case X86II::MRM_DE: case X86II::MRM_DF: case X86II::MRM_E0:
- case X86II::MRM_E1: case X86II::MRM_E2: case X86II::MRM_E3:
- case X86II::MRM_E4: case X86II::MRM_E5: case X86II::MRM_E6:
- case X86II::MRM_E7: case X86II::MRM_E8: case X86II::MRM_E9:
- case X86II::MRM_EA: case X86II::MRM_EB: case X86II::MRM_EC:
- case X86II::MRM_ED: case X86II::MRM_EE: case X86II::MRM_EF:
- case X86II::MRM_F0: case X86II::MRM_F1: case X86II::MRM_F2:
- case X86II::MRM_F3: case X86II::MRM_F4: case X86II::MRM_F5:
- case X86II::MRM_F6: case X86II::MRM_F7: case X86II::MRM_F8:
- case X86II::MRM_F9: case X86II::MRM_FA: case X86II::MRM_FB:
- case X86II::MRM_FC: case X86II::MRM_FD: case X86II::MRM_FE:
+ case X86II::MRM_C0:
+ case X86II::MRM_C1:
+ case X86II::MRM_C2:
+ case X86II::MRM_C3:
+ case X86II::MRM_C4:
+ case X86II::MRM_C5:
+ case X86II::MRM_C6:
+ case X86II::MRM_C7:
+ case X86II::MRM_C8:
+ case X86II::MRM_C9:
+ case X86II::MRM_CA:
+ case X86II::MRM_CB:
+ case X86II::MRM_CC:
+ case X86II::MRM_CD:
+ case X86II::MRM_CE:
+ case X86II::MRM_CF:
+ case X86II::MRM_D0:
+ case X86II::MRM_D1:
+ case X86II::MRM_D2:
+ case X86II::MRM_D3:
+ case X86II::MRM_D4:
+ case X86II::MRM_D5:
+ case X86II::MRM_D6:
+ case X86II::MRM_D7:
+ case X86II::MRM_D8:
+ case X86II::MRM_D9:
+ case X86II::MRM_DA:
+ case X86II::MRM_DB:
+ case X86II::MRM_DC:
+ case X86II::MRM_DD:
+ case X86II::MRM_DE:
+ case X86II::MRM_DF:
+ case X86II::MRM_E0:
+ case X86II::MRM_E1:
+ case X86II::MRM_E2:
+ case X86II::MRM_E3:
+ case X86II::MRM_E4:
+ case X86II::MRM_E5:
+ case X86II::MRM_E6:
+ case X86II::MRM_E7:
+ case X86II::MRM_E8:
+ case X86II::MRM_E9:
+ case X86II::MRM_EA:
+ case X86II::MRM_EB:
+ case X86II::MRM_EC:
+ case X86II::MRM_ED:
+ case X86II::MRM_EE:
+ case X86II::MRM_EF:
+ case X86II::MRM_F0:
+ case X86II::MRM_F1:
+ case X86II::MRM_F2:
+ case X86II::MRM_F3:
+ case X86II::MRM_F4:
+ case X86II::MRM_F5:
+ case X86II::MRM_F6:
+ case X86II::MRM_F7:
+ case X86II::MRM_F8:
+ case X86II::MRM_F9:
+ case X86II::MRM_FA:
+ case X86II::MRM_FB:
+ case X86II::MRM_FC:
+ case X86II::MRM_FD:
+ case X86II::MRM_FE:
case X86II::MRM_FF:
- EmitByte(BaseOpcode, CurByte, OS);
- EmitByte(0xC0 + Form - X86II::MRM_C0, CurByte, OS);
+ emitByte(BaseOpcode, CurByte, OS);
+ emitByte(0xC0 + Form - X86II::MRM_C0, CurByte, OS);
break;
}
@@ -1620,21 +1753,21 @@ encodeInstruction(const MCInst &MI, raw_ostream &OS,
assert(Val < 16 && "Immediate operand value out of range");
I8RegNum |= Val;
}
- EmitImmediate(MCOperand::createImm(I8RegNum), MI.getLoc(), 1, FK_Data_1,
+ emitImmediate(MCOperand::createImm(I8RegNum), MI.getLoc(), 1, FK_Data_1,
CurByte, OS, Fixups);
} else {
// If there is a remaining operand, it must be a trailing immediate. Emit it
// according to the right size for the instruction. Some instructions
// (SSE4a extrq and insertq) have two trailing immediates.
while (CurOp != NumOps && NumOps - CurOp <= 2) {
- EmitImmediate(MI.getOperand(CurOp++), MI.getLoc(),
+ emitImmediate(MI.getOperand(CurOp++), MI.getLoc(),
X86II::getSizeOfImm(TSFlags), getImmFixupKind(TSFlags),
CurByte, OS, Fixups);
}
}
if ((TSFlags & X86II::OpMapMask) == X86II::ThreeDNow)
- EmitByte(X86II::getBaseOpcodeFor(TSFlags), CurByte, OS);
+ emitByte(X86II::getBaseOpcodeFor(TSFlags), CurByte, OS);
#ifndef NDEBUG
// FIXME: Verify.
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
index ced9eacc8b97..049a3a815984 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
@@ -290,12 +290,9 @@ void X86_MC::initLLVMToSEHAndCVRegMapping(MCRegisterInfo *MRI) {
MCSubtargetInfo *X86_MC::createX86MCSubtargetInfo(const Triple &TT,
StringRef CPU, StringRef FS) {
std::string ArchFS = X86_MC::ParseX86Triple(TT);
- if (!FS.empty()) {
- if (!ArchFS.empty())
- ArchFS = (Twine(ArchFS) + "," + FS).str();
- else
- ArchFS = FS;
- }
+ assert(!ArchFS.empty() && "Failed to parse X86 triple");
+ if (!FS.empty())
+ ArchFS = (Twine(ArchFS) + "," + FS).str();
std::string CPUName = CPU;
if (CPUName.empty())
@@ -323,7 +320,8 @@ static MCRegisterInfo *createX86MCRegisterInfo(const Triple &TT) {
}
static MCAsmInfo *createX86MCAsmInfo(const MCRegisterInfo &MRI,
- const Triple &TheTriple) {
+ const Triple &TheTriple,
+ const MCTargetOptions &Options) {
bool is64Bit = TheTriple.getArch() == Triple::x86_64;
MCAsmInfo *MAI;
@@ -554,7 +552,7 @@ static MCInstrAnalysis *createX86MCInstrAnalysis(const MCInstrInfo *Info) {
}
// Force static initialization.
-extern "C" void LLVMInitializeX86TargetMC() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeX86TargetMC() {
for (Target *T : {&getTheX86_32Target(), &getTheX86_64Target()}) {
// Register the MC asm info.
RegisterMCAsmInfoFn X(*T, createX86MCAsmInfo);
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/TargetInfo/X86TargetInfo.cpp b/contrib/llvm-project/llvm/lib/Target/X86/TargetInfo/X86TargetInfo.cpp
index 47c41626a666..18cda8f591c3 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/TargetInfo/X86TargetInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/TargetInfo/X86TargetInfo.cpp
@@ -19,7 +19,7 @@ Target &llvm::getTheX86_64Target() {
return TheX86_64Target;
}
-extern "C" void LLVMInitializeX86TargetInfo() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeX86TargetInfo() {
RegisterTarget<Triple::x86, /*HasJIT=*/true> X(
getTheX86_32Target(), "x86", "32-bit X86: Pentium-Pro and above", "X86");
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86.h b/contrib/llvm-project/llvm/lib/Target/X86/X86.h
index 6840fc12751d..0481a40d462a 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86.h
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86.h
@@ -150,6 +150,18 @@ void initializeX86ExpandPseudoPass(PassRegistry &);
void initializeX86FlagsCopyLoweringPassPass(PassRegistry &);
void initializeX86OptimizeLEAPassPass(PassRegistry &);
void initializeX86SpeculativeLoadHardeningPassPass(PassRegistry &);
+
+namespace X86AS {
+enum : unsigned {
+ GS = 256,
+ FS = 257,
+ SS = 258,
+ PTR32_SPTR = 270,
+ PTR32_UPTR = 271,
+ PTR64 = 272
+};
+} // End X86AS namespace
+
} // End llvm namespace
#endif
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86.td b/contrib/llvm-project/llvm/lib/Target/X86/X86.td
index d8631aca2734..a2b11d55f650 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86.td
@@ -304,12 +304,12 @@ def FeatureFastVariableShuffle
: SubtargetFeature<"fast-variable-shuffle",
"HasFastVariableShuffle",
"true", "Shuffles with variable masks are fast">;
-// On some X86 processors, there is no performance hazard to writing only the
-// lower parts of a YMM or ZMM register without clearing the upper part.
-def FeatureFastPartialYMMorZMMWrite
- : SubtargetFeature<"fast-partial-ymm-or-zmm-write",
- "HasFastPartialYMMorZMMWrite",
- "true", "Partial writes to YMM/ZMM registers are fast">;
+// On some X86 processors, a vzeroupper instruction should be inserted after
+// using ymm/zmm registers before executing code that may use SSE instructions.
+def FeatureInsertVZEROUPPER
+ : SubtargetFeature<"vzeroupper",
+ "InsertVZEROUPPER",
+ "true", "Should insert vzeroupper instructions">;
// FeatureFastScalarFSQRT should be enabled if scalar FSQRT has shorter latency
// than the corresponding NR code. FeatureFastVectorFSQRT should be enabled if
// vector FSQRT has higher throughput than the corresponding NR code.
@@ -386,6 +386,10 @@ def FeaturePrefer256Bit
: SubtargetFeature<"prefer-256-bit", "Prefer256Bit", "true",
"Prefer 256-bit AVX instructions">;
+def FeaturePreferMaskRegisters
+ : SubtargetFeature<"prefer-mask-registers", "PreferMaskRegisters", "true",
+ "Prefer AVX512 mask registers over PTEST/MOVMSK">;
+
// Lower indirect calls using a special construct called a `retpoline` to
// mitigate potential Spectre v2 attacks against them.
def FeatureRetpolineIndirectCalls
@@ -439,7 +443,7 @@ def FeatureFastHorizontalOps
: SubtargetFeature<
"fast-hops", "HasFastHorizontalOps", "true",
"Prefer horizontal vector math instructions (haddp, phsub, etc.) over "
- "normal vector instructions with shuffles", [FeatureSSE3]>;
+ "normal vector instructions with shuffles">;
def FeatureFastScalarShiftMasks
: SubtargetFeature<
@@ -451,6 +455,10 @@ def FeatureFastVectorShiftMasks
"fast-vector-shift-masks", "HasFastVectorShiftMasks", "true",
"Prefer a left/right vector logical shift pair over a shift+and pair">;
+def FeatureUseGLMDivSqrtCosts
+ : SubtargetFeature<"use-glm-div-sqrt-costs", "UseGLMDivSqrtCosts", "true",
+ "Use Goldmont specific floating point div/sqrt costs">;
+
// Merge branches using three-way conditional code.
def FeatureMergeToThreeWayBranch : SubtargetFeature<"merge-to-threeway-branch",
"ThreewayBranchProfitable", "true",
@@ -465,12 +473,6 @@ def FeatureUseAA : SubtargetFeature<"use-aa", "UseAA", "true",
def ProcIntelAtom : SubtargetFeature<"", "X86ProcFamily", "IntelAtom", "">;
// Silvermont
def ProcIntelSLM : SubtargetFeature<"", "X86ProcFamily", "IntelSLM", "">;
-// Goldmont
-def ProcIntelGLM : SubtargetFeature<"", "X86ProcFamily", "IntelGLM", "">;
-// Goldmont Plus
-def ProcIntelGLP : SubtargetFeature<"", "X86ProcFamily", "IntelGLP", "">;
-// Tremont
-def ProcIntelTRM : SubtargetFeature<"", "X86ProcFamily", "IntelTRM", "">;
//===----------------------------------------------------------------------===//
// Register File Description
@@ -499,6 +501,7 @@ include "X86SchedHaswell.td"
include "X86SchedBroadwell.td"
include "X86ScheduleSLM.td"
include "X86ScheduleZnver1.td"
+include "X86ScheduleZnver2.td"
include "X86ScheduleBdVer2.td"
include "X86ScheduleBtVer2.td"
include "X86SchedSkylakeClient.td"
@@ -521,7 +524,8 @@ def ProcessorFeatures {
FeatureCMPXCHG16B,
FeaturePOPCNT,
FeatureLAHFSAHF,
- FeatureMacroFusion];
+ FeatureMacroFusion,
+ FeatureInsertVZEROUPPER];
list<SubtargetFeature> NHMSpecificFeatures = [];
list<SubtargetFeature> NHMFeatures =
!listconcat(NHMInheritableFeatures, NHMSpecificFeatures);
@@ -701,7 +705,8 @@ def ProcessorFeatures {
FeatureCMPXCHG16B,
FeatureMOVBE,
FeatureSlowTwoMemOps,
- FeatureLAHFSAHF];
+ FeatureLAHFSAHF,
+ FeatureInsertVZEROUPPER];
list<SubtargetFeature> AtomSpecificFeatures = [ProcIntelAtom,
FeatureSlowUAMem16,
FeatureLEAForSP,
@@ -739,7 +744,7 @@ def ProcessorFeatures {
FeatureXSAVES,
FeatureCLFLUSHOPT,
FeatureFSGSBase];
- list<SubtargetFeature> GLMSpecificFeatures = [ProcIntelGLM,
+ list<SubtargetFeature> GLMSpecificFeatures = [FeatureUseGLMDivSqrtCosts,
FeaturePOPCNTFalseDeps];
list<SubtargetFeature> GLMInheritableFeatures =
!listconcat(SLMInheritableFeatures, GLMAdditionalFeatures);
@@ -750,7 +755,7 @@ def ProcessorFeatures {
list<SubtargetFeature> GLPAdditionalFeatures = [FeaturePTWRITE,
FeatureRDPID,
FeatureSGX];
- list<SubtargetFeature> GLPSpecificFeatures = [ProcIntelGLP];
+ list<SubtargetFeature> GLPSpecificFeatures = [FeatureUseGLMDivSqrtCosts];
list<SubtargetFeature> GLPInheritableFeatures =
!listconcat(GLMInheritableFeatures, GLPAdditionalFeatures);
list<SubtargetFeature> GLPFeatures =
@@ -762,7 +767,7 @@ def ProcessorFeatures {
FeatureMOVDIRI,
FeatureMOVDIR64B,
FeatureWAITPKG];
- list<SubtargetFeature> TRMSpecificFeatures = [ProcIntelTRM];
+ list<SubtargetFeature> TRMSpecificFeatures = [FeatureUseGLMDivSqrtCosts];
list<SubtargetFeature> TRMFeatures =
!listconcat(GLPInheritableFeatures, TRMAdditionalFeatures,
TRMSpecificFeatures);
@@ -801,8 +806,8 @@ def ProcessorFeatures {
FeatureBMI2,
FeatureFMA,
FeaturePRFCHW,
+ FeaturePreferMaskRegisters,
FeatureSlowTwoMemOps,
- FeatureFastPartialYMMorZMMWrite,
FeatureHasFastGather,
FeatureSlowPMADDWD];
// TODO Add AVX5124FMAPS/AVX5124VNNIW features
@@ -823,7 +828,8 @@ def ProcessorFeatures {
FeatureLAHFSAHF,
FeatureCMOV,
Feature64Bit,
- FeatureFastScalarShiftMasks];
+ FeatureFastScalarShiftMasks,
+ FeatureInsertVZEROUPPER];
list<SubtargetFeature> BarcelonaFeatures = BarcelonaInheritableFeatures;
// Bobcat
@@ -845,7 +851,9 @@ def ProcessorFeatures {
FeatureFast15ByteNOP,
FeatureFastScalarShiftMasks,
FeatureFastVectorShiftMasks];
- list<SubtargetFeature> BtVer1Features = BtVer1InheritableFeatures;
+ list<SubtargetFeature> BtVer1SpecificFeatures = [FeatureInsertVZEROUPPER];
+ list<SubtargetFeature> BtVer1Features =
+ !listconcat(BtVer1InheritableFeatures, BtVer1SpecificFeatures);
// Jaguar
list<SubtargetFeature> BtVer2AdditionalFeatures = [FeatureAVX,
@@ -858,7 +866,6 @@ def ProcessorFeatures {
FeatureXSAVEOPT];
list<SubtargetFeature> BtVer2SpecificFeatures = [FeatureFastLZCNT,
FeatureFastBEXTR,
- FeatureFastPartialYMMorZMMWrite,
FeatureFastHorizontalOps];
list<SubtargetFeature> BtVer2InheritableFeatures =
!listconcat(BtVer1InheritableFeatures, BtVer2AdditionalFeatures);
@@ -886,7 +893,8 @@ def ProcessorFeatures {
FeatureLAHFSAHF,
FeatureFast11ByteNOP,
FeatureFastScalarShiftMasks,
- FeatureBranchFusion];
+ FeatureBranchFusion,
+ FeatureInsertVZEROUPPER];
list<SubtargetFeature> BdVer1Features = BdVer1InheritableFeatures;
// PileDriver
@@ -949,6 +957,7 @@ def ProcessorFeatures {
FeatureSHA,
FeatureSSE4A,
FeatureSlowSHLD,
+ FeatureInsertVZEROUPPER,
FeatureX87,
FeatureXSAVE,
FeatureXSAVEC,
@@ -971,28 +980,32 @@ class Proc<string Name, list<SubtargetFeature> Features>
// NOTE: CMPXCHG8B is here for legacy compatbility so that it is only disabled
// if i386/i486 is specifically requested.
def : Proc<"generic", [FeatureX87, FeatureSlowUAMem16,
- FeatureCMPXCHG8B]>;
-def : Proc<"i386", [FeatureX87, FeatureSlowUAMem16]>;
-def : Proc<"i486", [FeatureX87, FeatureSlowUAMem16]>;
+ FeatureCMPXCHG8B, FeatureInsertVZEROUPPER]>;
+def : Proc<"i386", [FeatureX87, FeatureSlowUAMem16,
+ FeatureInsertVZEROUPPER]>;
+def : Proc<"i486", [FeatureX87, FeatureSlowUAMem16,
+ FeatureInsertVZEROUPPER]>;
def : Proc<"i586", [FeatureX87, FeatureSlowUAMem16,
- FeatureCMPXCHG8B]>;
+ FeatureCMPXCHG8B, FeatureInsertVZEROUPPER]>;
def : Proc<"pentium", [FeatureX87, FeatureSlowUAMem16,
- FeatureCMPXCHG8B]>;
+ FeatureCMPXCHG8B, FeatureInsertVZEROUPPER]>;
def : Proc<"pentium-mmx", [FeatureX87, FeatureSlowUAMem16,
- FeatureCMPXCHG8B, FeatureMMX]>;
+ FeatureCMPXCHG8B, FeatureMMX,
+ FeatureInsertVZEROUPPER]>;
def : Proc<"i686", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
- FeatureCMOV]>;
+ FeatureCMOV, FeatureInsertVZEROUPPER]>;
def : Proc<"pentiumpro", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
- FeatureCMOV, FeatureNOPL]>;
+ FeatureCMOV, FeatureNOPL, FeatureInsertVZEROUPPER]>;
def : Proc<"pentium2", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
FeatureMMX, FeatureCMOV, FeatureFXSR,
- FeatureNOPL]>;
+ FeatureNOPL, FeatureInsertVZEROUPPER]>;
foreach P = ["pentium3", "pentium3m"] in {
def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,FeatureMMX,
- FeatureSSE1, FeatureFXSR, FeatureNOPL, FeatureCMOV]>;
+ FeatureSSE1, FeatureFXSR, FeatureNOPL, FeatureCMOV,
+ FeatureInsertVZEROUPPER]>;
}
// Enable the PostRAScheduler for SSE2 and SSE3 class cpus.
@@ -1008,29 +1021,29 @@ foreach P = ["pentium3", "pentium3m"] in {
def : ProcessorModel<"pentium-m", GenericPostRAModel,
[FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
FeatureMMX, FeatureSSE2, FeatureFXSR, FeatureNOPL,
- FeatureCMOV]>;
+ FeatureCMOV, FeatureInsertVZEROUPPER]>;
foreach P = ["pentium4", "pentium4m"] in {
def : ProcessorModel<P, GenericPostRAModel,
[FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
FeatureMMX, FeatureSSE2, FeatureFXSR, FeatureNOPL,
- FeatureCMOV]>;
+ FeatureCMOV, FeatureInsertVZEROUPPER]>;
}
// Intel Quark.
-def : Proc<"lakemont", []>;
+def : Proc<"lakemont", [FeatureInsertVZEROUPPER]>;
// Intel Core Duo.
def : ProcessorModel<"yonah", SandyBridgeModel,
[FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
FeatureMMX, FeatureSSE3, FeatureFXSR, FeatureNOPL,
- FeatureCMOV]>;
+ FeatureCMOV, FeatureInsertVZEROUPPER]>;
// NetBurst.
def : ProcessorModel<"prescott", GenericPostRAModel,
[FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
FeatureMMX, FeatureSSE3, FeatureFXSR, FeatureNOPL,
- FeatureCMOV]>;
+ FeatureCMOV, FeatureInsertVZEROUPPER]>;
def : ProcessorModel<"nocona", GenericPostRAModel, [
FeatureX87,
FeatureSlowUAMem16,
@@ -1041,7 +1054,8 @@ def : ProcessorModel<"nocona", GenericPostRAModel, [
FeatureFXSR,
FeatureNOPL,
Feature64Bit,
- FeatureCMPXCHG16B
+ FeatureCMPXCHG16B,
+ FeatureInsertVZEROUPPER
]>;
// Intel Core 2 Solo/Duo.
@@ -1057,7 +1071,8 @@ def : ProcessorModel<"core2", SandyBridgeModel, [
Feature64Bit,
FeatureCMPXCHG16B,
FeatureLAHFSAHF,
- FeatureMacroFusion
+ FeatureMacroFusion,
+ FeatureInsertVZEROUPPER
]>;
def : ProcessorModel<"penryn", SandyBridgeModel, [
FeatureX87,
@@ -1071,7 +1086,8 @@ def : ProcessorModel<"penryn", SandyBridgeModel, [
Feature64Bit,
FeatureCMPXCHG16B,
FeatureLAHFSAHF,
- FeatureMacroFusion
+ FeatureMacroFusion,
+ FeatureInsertVZEROUPPER
]>;
// Atom CPUs.
@@ -1138,35 +1154,36 @@ def : ProcessorModel<"tigerlake", SkylakeServerModel,
// AMD CPUs.
def : Proc<"k6", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
- FeatureMMX]>;
+ FeatureMMX, FeatureInsertVZEROUPPER]>;
def : Proc<"k6-2", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
- Feature3DNow]>;
+ Feature3DNow, FeatureInsertVZEROUPPER]>;
def : Proc<"k6-3", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
- Feature3DNow]>;
+ Feature3DNow, FeatureInsertVZEROUPPER]>;
foreach P = ["athlon", "athlon-tbird"] in {
def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, FeatureCMOV,
- Feature3DNowA, FeatureNOPL, FeatureSlowSHLD]>;
+ Feature3DNowA, FeatureNOPL, FeatureSlowSHLD,
+ FeatureInsertVZEROUPPER]>;
}
foreach P = ["athlon-4", "athlon-xp", "athlon-mp"] in {
def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, FeatureCMOV,
FeatureSSE1, Feature3DNowA, FeatureFXSR, FeatureNOPL,
- FeatureSlowSHLD]>;
+ FeatureSlowSHLD, FeatureInsertVZEROUPPER]>;
}
foreach P = ["k8", "opteron", "athlon64", "athlon-fx"] in {
def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
FeatureSSE2, Feature3DNowA, FeatureFXSR, FeatureNOPL,
Feature64Bit, FeatureSlowSHLD, FeatureCMOV,
- FeatureFastScalarShiftMasks]>;
+ FeatureFastScalarShiftMasks, FeatureInsertVZEROUPPER]>;
}
foreach P = ["k8-sse3", "opteron-sse3", "athlon64-sse3"] in {
def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, FeatureSSE3,
Feature3DNowA, FeatureFXSR, FeatureNOPL, FeatureCMPXCHG16B,
FeatureSlowSHLD, FeatureCMOV, Feature64Bit,
- FeatureFastScalarShiftMasks]>;
+ FeatureFastScalarShiftMasks, FeatureInsertVZEROUPPER]>;
}
foreach P = ["amdfam10", "barcelona"] in {
@@ -1188,17 +1205,20 @@ def : Proc<"bdver3", ProcessorFeatures.BdVer3Features>;
def : Proc<"bdver4", ProcessorFeatures.BdVer4Features>;
def : ProcessorModel<"znver1", Znver1Model, ProcessorFeatures.ZNFeatures>;
-def : ProcessorModel<"znver2", Znver1Model, ProcessorFeatures.ZN2Features>;
+def : ProcessorModel<"znver2", Znver2Model, ProcessorFeatures.ZN2Features>;
def : Proc<"geode", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
- Feature3DNowA]>;
-
-def : Proc<"winchip-c6", [FeatureX87, FeatureSlowUAMem16, FeatureMMX]>;
-def : Proc<"winchip2", [FeatureX87, FeatureSlowUAMem16, Feature3DNow]>;
-def : Proc<"c3", [FeatureX87, FeatureSlowUAMem16, Feature3DNow]>;
+ Feature3DNowA, FeatureInsertVZEROUPPER]>;
+
+def : Proc<"winchip-c6", [FeatureX87, FeatureSlowUAMem16, FeatureMMX,
+ FeatureInsertVZEROUPPER]>;
+def : Proc<"winchip2", [FeatureX87, FeatureSlowUAMem16, Feature3DNow,
+ FeatureInsertVZEROUPPER]>;
+def : Proc<"c3", [FeatureX87, FeatureSlowUAMem16, Feature3DNow,
+ FeatureInsertVZEROUPPER]>;
def : Proc<"c3-2", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
FeatureMMX, FeatureSSE1, FeatureFXSR,
- FeatureCMOV]>;
+ FeatureCMOV, FeatureInsertVZEROUPPER]>;
// We also provide a generic 64-bit specific x86 processor model which tries to
// be good for modern chips without enabling instruction set encodings past the
@@ -1221,7 +1241,8 @@ def : ProcessorModel<"x86-64", SandyBridgeModel, [
Feature64Bit,
FeatureSlow3OpsLEA,
FeatureSlowIncDec,
- FeatureMacroFusion
+ FeatureMacroFusion,
+ FeatureInsertVZEROUPPER
]>;
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86AsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86AsmPrinter.cpp
index 8d27be30a277..39d16e7999cd 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86AsmPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86AsmPrinter.cpp
@@ -218,9 +218,16 @@ void X86AsmPrinter::PrintOperand(const MachineInstr *MI, unsigned OpNo,
O << MO.getImm();
return;
+ case MachineOperand::MO_ConstantPoolIndex:
case MachineOperand::MO_GlobalAddress: {
- if (IsATT)
+ switch (MI->getInlineAsmDialect()) {
+ case InlineAsm::AD_ATT:
O << '$';
+ break;
+ case InlineAsm::AD_Intel:
+ O << "offset ";
+ break;
+ }
PrintSymbolOperand(MO, O);
break;
}
@@ -336,14 +343,22 @@ void X86AsmPrinter::PrintMemReference(const MachineInstr *MI, unsigned OpNo,
PrintLeaMemReference(MI, OpNo, O, Modifier);
}
+
void X86AsmPrinter::PrintIntelMemReference(const MachineInstr *MI,
- unsigned OpNo, raw_ostream &O) {
+ unsigned OpNo, raw_ostream &O,
+ const char *Modifier) {
const MachineOperand &BaseReg = MI->getOperand(OpNo + X86::AddrBaseReg);
unsigned ScaleVal = MI->getOperand(OpNo + X86::AddrScaleAmt).getImm();
const MachineOperand &IndexReg = MI->getOperand(OpNo + X86::AddrIndexReg);
const MachineOperand &DispSpec = MI->getOperand(OpNo + X86::AddrDisp);
const MachineOperand &SegReg = MI->getOperand(OpNo + X86::AddrSegmentReg);
+ // If we really don't want to print out (rip), don't.
+ bool HasBaseReg = BaseReg.getReg() != 0;
+ if (HasBaseReg && Modifier && !strcmp(Modifier, "no-rip") &&
+ BaseReg.getReg() == X86::RIP)
+ HasBaseReg = false;
+
// If this has a segment register, print it.
if (SegReg.getReg()) {
PrintOperand(MI, OpNo + X86::AddrSegmentReg, O);
@@ -353,7 +368,7 @@ void X86AsmPrinter::PrintIntelMemReference(const MachineInstr *MI,
O << '[';
bool NeedPlus = false;
- if (BaseReg.getReg()) {
+ if (HasBaseReg) {
PrintOperand(MI, OpNo + X86::AddrBaseReg, O);
NeedPlus = true;
}
@@ -371,7 +386,7 @@ void X86AsmPrinter::PrintIntelMemReference(const MachineInstr *MI,
PrintOperand(MI, OpNo + X86::AddrDisp, O);
} else {
int64_t DispVal = DispSpec.getImm();
- if (DispVal || (!IndexReg.getReg() && !BaseReg.getReg())) {
+ if (DispVal || (!IndexReg.getReg() && !HasBaseReg)) {
if (NeedPlus) {
if (DispVal > 0)
O << " + ";
@@ -524,11 +539,6 @@ bool X86AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
bool X86AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
const char *ExtraCode,
raw_ostream &O) {
- if (MI->getInlineAsmDialect() == InlineAsm::AD_Intel) {
- PrintIntelMemReference(MI, OpNo, O);
- return false;
- }
-
if (ExtraCode && ExtraCode[0]) {
if (ExtraCode[1] != 0) return true; // Unknown modifier.
@@ -542,14 +552,26 @@ bool X86AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
// These only apply to registers, ignore on mem.
break;
case 'H':
- PrintMemReference(MI, OpNo, O, "H");
+ if (MI->getInlineAsmDialect() == InlineAsm::AD_Intel) {
+ return true; // Unsupported modifier in Intel inline assembly.
+ } else {
+ PrintMemReference(MI, OpNo, O, "H");
+ }
return false;
case 'P': // Don't print @PLT, but do print as memory.
- PrintMemReference(MI, OpNo, O, "no-rip");
+ if (MI->getInlineAsmDialect() == InlineAsm::AD_Intel) {
+ PrintIntelMemReference(MI, OpNo, O, "no-rip");
+ } else {
+ PrintMemReference(MI, OpNo, O, "no-rip");
+ }
return false;
}
}
- PrintMemReference(MI, OpNo, O, nullptr);
+ if (MI->getInlineAsmDialect() == InlineAsm::AD_Intel) {
+ PrintIntelMemReference(MI, OpNo, O, nullptr);
+ } else {
+ PrintMemReference(MI, OpNo, O, nullptr);
+ }
return false;
}
@@ -614,7 +636,7 @@ void X86AsmPrinter::EmitStartOfAsmFile(Module &M) {
Feat00Flags |= 1;
}
- if (M.getModuleFlag("cfguardtable"))
+ if (M.getModuleFlag("cfguard"))
Feat00Flags |= 0x800; // Object is CFG-aware.
OutStreamer->EmitSymbolAttribute(S, MCSA_Global);
@@ -727,7 +749,7 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
//===----------------------------------------------------------------------===//
// Force static initialization.
-extern "C" void LLVMInitializeX86AsmPrinter() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeX86AsmPrinter() {
RegisterAsmPrinter<X86AsmPrinter> X(getTheX86_32Target());
RegisterAsmPrinter<X86AsmPrinter> Y(getTheX86_64Target());
}
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86AsmPrinter.h b/contrib/llvm-project/llvm/lib/Target/X86/X86AsmPrinter.h
index a011310970b3..ee79401dc80d 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86AsmPrinter.h
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86AsmPrinter.h
@@ -26,7 +26,7 @@ class MCStreamer;
class MCSymbol;
class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter {
- const X86Subtarget *Subtarget;
+ const X86Subtarget *Subtarget = nullptr;
StackMaps SM;
FaultMaps FM;
std::unique_ptr<MCCodeEmitter> CodeEmitter;
@@ -60,7 +60,7 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter {
// to emit any necessary padding-NOPs.
void emitShadowPadding(MCStreamer &OutStreamer, const MCSubtargetInfo &STI);
private:
- const MachineFunction *MF;
+ const MachineFunction *MF = nullptr;
bool InShadow = false;
// RequiredShadowSize holds the length of the shadow specified in the most
@@ -112,7 +112,7 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter {
void PrintMemReference(const MachineInstr *MI, unsigned OpNo, raw_ostream &O,
const char *Modifier);
void PrintIntelMemReference(const MachineInstr *MI, unsigned OpNo,
- raw_ostream &O);
+ raw_ostream &O, const char *Modifier);
public:
X86AsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer);
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp
index 69c6b3356cbb..0f1d4b51062e 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp
@@ -46,6 +46,7 @@
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Function.h"
+#include "llvm/InitializePasses.h"
#include "llvm/MC/MCInstrDesc.h"
using namespace llvm;
@@ -83,13 +84,13 @@ public:
}
private:
- MachineRegisterInfo *MRI;
- const X86InstrInfo *TII;
- const X86RegisterInfo *TRI;
+ MachineRegisterInfo *MRI = nullptr;
+ const X86InstrInfo *TII = nullptr;
+ const X86RegisterInfo *TRI = nullptr;
SmallVector<std::pair<MachineInstr *, MachineInstr *>, 2>
BlockedLoadsStoresPairs;
SmallVector<MachineInstr *, 2> ForRemoval;
- AliasAnalysis *AA;
+ AliasAnalysis *AA = nullptr;
/// Returns couples of Load then Store to memory which look
/// like a memcpy.
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86CallFrameOptimization.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86CallFrameOptimization.cpp
index ad7e32b4efc8..f8faa572dffc 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86CallFrameOptimization.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86CallFrameOptimization.cpp
@@ -115,12 +115,12 @@ private:
StringRef getPassName() const override { return "X86 Optimize Call Frame"; }
- const X86InstrInfo *TII;
- const X86FrameLowering *TFL;
- const X86Subtarget *STI;
- MachineRegisterInfo *MRI;
- unsigned SlotSize;
- unsigned Log2SlotSize;
+ const X86InstrInfo *TII = nullptr;
+ const X86FrameLowering *TFL = nullptr;
+ const X86Subtarget *STI = nullptr;
+ MachineRegisterInfo *MRI = nullptr;
+ unsigned SlotSize = 0;
+ unsigned Log2SlotSize = 0;
};
} // end anonymous namespace
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86CallLowering.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86CallLowering.cpp
index 7ee637cfd523..57bf799cf89c 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86CallLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86CallLowering.cpp
@@ -115,7 +115,7 @@ struct OutgoingValueHandler : public CallLowering::ValueHandler {
MIRBuilder.buildConstant(OffsetReg, Offset);
Register AddrReg = MRI.createGenericVirtualRegister(p0);
- MIRBuilder.buildGEP(AddrReg, SPReg, OffsetReg);
+ MIRBuilder.buildPtrAdd(AddrReg, SPReg, OffsetReg);
MPO = MachinePointerInfo::getStack(MIRBuilder.getMF(), Offset);
return AddrReg;
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86CallingConv.td b/contrib/llvm-project/llvm/lib/Target/X86/X86CallingConv.td
index 4c49d68bec99..db1aef2fd09d 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86CallingConv.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86CallingConv.td
@@ -346,6 +346,10 @@ def RetCC_X86_Win64_C : CallingConv<[
// The X86-Win64 calling convention always returns __m64 values in RAX.
CCIfType<[x86mmx], CCBitConvertToType<i64>>,
+ // GCC returns FP values in RAX on Win64.
+ CCIfType<[f32], CCIfNotSubtarget<"hasSSE1()", CCBitConvertToType<i32>>>,
+ CCIfType<[f64], CCIfNotSubtarget<"hasSSE1()", CCBitConvertToType<i64>>>,
+
// Otherwise, everything is the same as 'normal' X86-64 C CC.
CCDelegateTo<RetCC_X86_64_C>
]>;
@@ -434,6 +438,7 @@ def RetCC_X86_32 : CallingConv<[
// If FastCC, use RetCC_X86_32_Fast.
CCIfCC<"CallingConv::Fast", CCDelegateTo<RetCC_X86_32_Fast>>,
CCIfCC<"CallingConv::Tail", CCDelegateTo<RetCC_X86_32_Fast>>,
+ // CFGuard_Check never returns a value so does not need a RetCC.
// If HiPE, use RetCC_X86_32_HiPE.
CCIfCC<"CallingConv::HiPE", CCDelegateTo<RetCC_X86_32_HiPE>>,
CCIfCC<"CallingConv::X86_VectorCall", CCDelegateTo<RetCC_X86_32_VectorCall>>,
@@ -606,10 +611,12 @@ def CC_X86_Win64_C : CallingConv<[
// A SwiftError is passed in R12.
CCIfSwiftError<CCIfType<[i64], CCAssignToReg<[R12]>>>,
+ // The 'CFGuardTarget' parameter, if any, is passed in RAX.
+ CCIfCFGuardTarget<CCAssignToReg<[RAX]>>,
+
// 128 bit vectors are passed by pointer
CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCPassIndirect<i64>>,
-
// 256 bit vectors are passed by pointer
CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64], CCPassIndirect<i64>>,
@@ -622,6 +629,16 @@ def CC_X86_Win64_C : CallingConv<[
// The first 4 MMX vector arguments are passed in GPRs.
CCIfType<[x86mmx], CCBitConvertToType<i64>>,
+ // If SSE was disabled, pass FP values smaller than 64-bits as integers in
+ // GPRs or on the stack.
+ CCIfType<[f32], CCIfNotSubtarget<"hasSSE1()", CCBitConvertToType<i32>>>,
+ CCIfType<[f64], CCIfNotSubtarget<"hasSSE1()", CCBitConvertToType<i64>>>,
+
+ // The first 4 FP/Vector arguments are passed in XMM registers.
+ CCIfType<[f32, f64],
+ CCAssignToRegWithShadow<[XMM0, XMM1, XMM2, XMM3],
+ [RCX , RDX , R8 , R9 ]>>,
+
// The first 4 integer arguments are passed in integer registers.
CCIfType<[i8 ], CCAssignToRegWithShadow<[CL , DL , R8B , R9B ],
[XMM0, XMM1, XMM2, XMM3]>>,
@@ -639,11 +656,6 @@ def CC_X86_Win64_C : CallingConv<[
CCIfType<[i64], CCAssignToRegWithShadow<[RCX , RDX , R8 , R9 ],
[XMM0, XMM1, XMM2, XMM3]>>,
- // The first 4 FP/Vector arguments are passed in XMM registers.
- CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
- CCAssignToRegWithShadow<[XMM0, XMM1, XMM2, XMM3],
- [RCX , RDX , R8 , R9 ]>>,
-
// Integer/FP values get stored in stack slots that are 8 bytes in size and
// 8-byte aligned if there are no more registers to hold them.
CCIfType<[i8, i16, i32, i64, f32, f64], CCAssignToStack<8, 8>>
@@ -936,6 +948,12 @@ def CC_X86_32_FastCC : CallingConv<[
CCDelegateTo<CC_X86_32_Common>
]>;
+def CC_X86_Win32_CFGuard_Check : CallingConv<[
+ // The CFGuard check call takes exactly one integer argument
+ // (i.e. the target function address), which is passed in ECX.
+ CCIfType<[i32], CCAssignToReg<[ECX]>>
+]>;
+
def CC_X86_32_GHC : CallingConv<[
// Promote i8/i16 arguments to i32.
CCIfType<[i8, i16], CCPromoteToType<i32>>,
@@ -1000,6 +1018,7 @@ def CC_X86_32 : CallingConv<[
CCIfCC<"CallingConv::X86_FastCall", CCDelegateTo<CC_X86_32_FastCall>>,
CCIfCC<"CallingConv::X86_VectorCall", CCDelegateTo<CC_X86_Win32_VectorCall>>,
CCIfCC<"CallingConv::X86_ThisCall", CCDelegateTo<CC_X86_32_ThisCall>>,
+ CCIfCC<"CallingConv::CFGuard_Check", CCDelegateTo<CC_X86_Win32_CFGuard_Check>>,
CCIfCC<"CallingConv::Fast", CCDelegateTo<CC_X86_32_FastCC>>,
CCIfCC<"CallingConv::Tail", CCDelegateTo<CC_X86_32_FastCC>>,
CCIfCC<"CallingConv::GHC", CCDelegateTo<CC_X86_32_GHC>>,
@@ -1136,7 +1155,9 @@ def CSR_64_HHVM : CalleeSavedRegs<(add R12)>;
// Register calling convention preserves few GPR and XMM8-15
def CSR_32_RegCall_NoSSE : CalleeSavedRegs<(add ESI, EDI, EBX, EBP, ESP)>;
def CSR_32_RegCall : CalleeSavedRegs<(add CSR_32_RegCall_NoSSE,
- (sequence "XMM%u", 4, 7))>;
+ (sequence "XMM%u", 4, 7))>;
+def CSR_Win32_CFGuard_Check_NoSSE : CalleeSavedRegs<(add CSR_32_RegCall_NoSSE, ECX)>;
+def CSR_Win32_CFGuard_Check : CalleeSavedRegs<(add CSR_32_RegCall, ECX)>;
def CSR_Win64_RegCall_NoSSE : CalleeSavedRegs<(add RBX, RBP, RSP,
(sequence "R%u", 10, 15))>;
def CSR_Win64_RegCall : CalleeSavedRegs<(add CSR_Win64_RegCall_NoSSE,
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86CmovConversion.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86CmovConversion.cpp
index 5123853f5455..fe43bf4cbbce 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86CmovConversion.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86CmovConversion.cpp
@@ -61,6 +61,7 @@
#include "llvm/CodeGen/TargetSchedule.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/DebugLoc.h"
+#include "llvm/InitializePasses.h"
#include "llvm/MC/MCSchedule.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
@@ -111,9 +112,9 @@ public:
static char ID;
private:
- MachineRegisterInfo *MRI;
- const TargetInstrInfo *TII;
- const TargetRegisterInfo *TRI;
+ MachineRegisterInfo *MRI = nullptr;
+ const TargetInstrInfo *TII = nullptr;
+ const TargetRegisterInfo *TRI = nullptr;
TargetSchedModel TSchedModel;
/// List of consecutive CMOV instructions.
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86CondBrFolding.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86CondBrFolding.cpp
index 1bf2d5ba7b8f..7ede94664bf6 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86CondBrFolding.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86CondBrFolding.cpp
@@ -115,8 +115,6 @@ private:
void optimizeCondBr(MachineBasicBlock &MBB,
SmallVectorImpl<MachineBasicBlock *> &BranchPath);
- void fixBranchProb(MachineBasicBlock *NextMBB, MachineBasicBlock *RootMBB,
- SmallVectorImpl<MachineBasicBlock *> &BranchPath);
void replaceBrDest(MachineBasicBlock *MBB, MachineBasicBlock *OrigDest,
MachineBasicBlock *NewDest);
void fixupModifiedCond(MachineBasicBlock *MBB);
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86DomainReassignment.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86DomainReassignment.cpp
index b4cf5cafbc6e..438b9fd8eebb 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86DomainReassignment.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86DomainReassignment.cpp
@@ -373,9 +373,9 @@ public:
};
class X86DomainReassignment : public MachineFunctionPass {
- const X86Subtarget *STI;
- MachineRegisterInfo *MRI;
- const X86InstrInfo *TII;
+ const X86Subtarget *STI = nullptr;
+ MachineRegisterInfo *MRI = nullptr;
+ const X86InstrInfo *TII = nullptr;
/// All edges that are included in some closure
DenseSet<unsigned> EnclosedEdges;
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86EvexToVex.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86EvexToVex.cpp
index 24c8e6d6f6eb..f1cf9b94c9e5 100755
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86EvexToVex.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86EvexToVex.cpp
@@ -84,7 +84,7 @@ public:
private:
/// Machine instruction info used throughout the class.
- const X86InstrInfo *TII;
+ const X86InstrInfo *TII = nullptr;
};
} // end anonymous namespace
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86ExpandPseudo.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86ExpandPseudo.cpp
index 9126a1fbea52..d35d65914b34 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86ExpandPseudo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86ExpandPseudo.cpp
@@ -41,11 +41,11 @@ public:
MachineFunctionPass::getAnalysisUsage(AU);
}
- const X86Subtarget *STI;
- const X86InstrInfo *TII;
- const X86RegisterInfo *TRI;
- const X86MachineFunctionInfo *X86FI;
- const X86FrameLowering *X86FL;
+ const X86Subtarget *STI = nullptr;
+ const X86InstrInfo *TII = nullptr;
+ const X86RegisterInfo *TRI = nullptr;
+ const X86MachineFunctionInfo *X86FI = nullptr;
+ const X86FrameLowering *X86FL = nullptr;
bool runOnMachineFunction(MachineFunction &Fn) override;
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86FastISel.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86FastISel.cpp
index e5e089d07d55..1dbf40683564 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86FastISel.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86FastISel.cpp
@@ -35,6 +35,7 @@
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/IntrinsicsX86.h"
#include "llvm/IR/Operator.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCSymbol.h"
@@ -3218,6 +3219,7 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
case CallingConv::X86_ThisCall:
case CallingConv::Win64:
case CallingConv::X86_64_SysV:
+ case CallingConv::CFGuard_Check:
break;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86FixupBWInsts.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86FixupBWInsts.cpp
index 9f7c4afde760..f8c4a2adb851 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86FixupBWInsts.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86FixupBWInsts.cpp
@@ -48,11 +48,14 @@
#include "X86InstrInfo.h"
#include "X86Subtarget.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineSizeOpts.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/Support/Debug.h"
@@ -113,6 +116,8 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<MachineLoopInfo>(); // Machine loop info is used to
// guide some heuristics.
+ AU.addRequired<ProfileSummaryInfoWrapperPass>();
+ AU.addRequired<LazyMachineBlockFrequencyInfoPass>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -127,19 +132,22 @@ public:
}
private:
- MachineFunction *MF;
+ MachineFunction *MF = nullptr;
/// Machine instruction info used throughout the class.
- const X86InstrInfo *TII;
+ const X86InstrInfo *TII = nullptr;
/// Local member for function's OptForSize attribute.
- bool OptForSize;
+ bool OptForSize = false;
/// Machine loop info used for guiding some heruistics.
- MachineLoopInfo *MLI;
+ MachineLoopInfo *MLI = nullptr;
/// Register Liveness information after the current instruction.
LivePhysRegs LiveRegs;
+
+ ProfileSummaryInfo *PSI;
+ MachineBlockFrequencyInfo *MBFI;
};
char FixupBWInstPass::ID = 0;
}
@@ -154,8 +162,11 @@ bool FixupBWInstPass::runOnMachineFunction(MachineFunction &MF) {
this->MF = &MF;
TII = MF.getSubtarget<X86Subtarget>().getInstrInfo();
- OptForSize = MF.getFunction().hasOptSize();
MLI = &getAnalysis<MachineLoopInfo>();
+ PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+ MBFI = (PSI && PSI->hasProfileSummary()) ?
+ &getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI() :
+ nullptr;
LiveRegs.init(TII->getRegisterInfo());
LLVM_DEBUG(dbgs() << "Start X86FixupBWInsts\n";);
@@ -426,6 +437,9 @@ void FixupBWInstPass::processBasicBlock(MachineFunction &MF,
// We run after PEI, so we need to AddPristinesAndCSRs.
LiveRegs.addLiveOuts(MBB);
+ OptForSize = MF.getFunction().hasOptSize() ||
+ llvm::shouldOptimizeForSize(&MBB, PSI, MBFI);
+
for (auto I = MBB.rbegin(); I != MBB.rend(); ++I) {
MachineInstr *MI = &*I;
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86FixupLEAs.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86FixupLEAs.cpp
index 543dc8b00fa0..9ac401bb0253 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86FixupLEAs.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86FixupLEAs.cpp
@@ -113,8 +113,8 @@ public:
private:
TargetSchedModel TSM;
- const X86InstrInfo *TII;
- const X86RegisterInfo *TRI;
+ const X86InstrInfo *TII = nullptr;
+ const X86RegisterInfo *TRI = nullptr;
};
}
@@ -650,6 +650,9 @@ void FixupLEAPass::processInstrForSlow3OpLEA(MachineBasicBlock::iterator &I,
.addReg(DestReg)
.add(Index);
LLVM_DEBUG(NewMI->dump(););
+
+ MBB.erase(I);
+ I = NewMI;
return;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86FixupSetCC.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86FixupSetCC.cpp
index cbde280aa280..924f429fc138 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86FixupSetCC.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86FixupSetCC.cpp
@@ -43,19 +43,8 @@ public:
bool runOnMachineFunction(MachineFunction &MF) override;
private:
- // Find the preceding instruction that imp-defs eflags.
- MachineInstr *findFlagsImpDef(MachineBasicBlock *MBB,
- MachineBasicBlock::reverse_iterator MI);
-
- // Return true if MI imp-uses eflags.
- bool impUsesFlags(MachineInstr *MI);
-
- // Return true if this is the opcode of a SetCC instruction with a register
- // output.
- bool isSetCCr(unsigned Opode);
-
- MachineRegisterInfo *MRI;
- const X86InstrInfo *TII;
+ MachineRegisterInfo *MRI = nullptr;
+ const X86InstrInfo *TII = nullptr;
enum { SearchBound = 16 };
@@ -67,31 +56,6 @@ char X86FixupSetCCPass::ID = 0;
FunctionPass *llvm::createX86FixupSetCC() { return new X86FixupSetCCPass(); }
-// We expect the instruction *immediately* before the setcc to imp-def
-// EFLAGS (because of scheduling glue). To make this less brittle w.r.t
-// scheduling, look backwards until we hit the beginning of the
-// basic-block, or a small bound (to avoid quadratic behavior).
-MachineInstr *
-X86FixupSetCCPass::findFlagsImpDef(MachineBasicBlock *MBB,
- MachineBasicBlock::reverse_iterator MI) {
- // FIXME: Should this be instr_rend(), and MI be reverse_instr_iterator?
- auto MBBStart = MBB->rend();
- for (int i = 0; (i < SearchBound) && (MI != MBBStart); ++i, ++MI)
- for (auto &Op : MI->implicit_operands())
- if (Op.isReg() && (Op.getReg() == X86::EFLAGS) && Op.isDef())
- return &*MI;
-
- return nullptr;
-}
-
-bool X86FixupSetCCPass::impUsesFlags(MachineInstr *MI) {
- for (auto &Op : MI->implicit_operands())
- if (Op.isReg() && (Op.getReg() == X86::EFLAGS) && Op.isUse())
- return true;
-
- return false;
-}
-
bool X86FixupSetCCPass::runOnMachineFunction(MachineFunction &MF) {
bool Changed = false;
MRI = &MF.getRegInfo();
@@ -100,7 +64,12 @@ bool X86FixupSetCCPass::runOnMachineFunction(MachineFunction &MF) {
SmallVector<MachineInstr*, 4> ToErase;
for (auto &MBB : MF) {
+ MachineInstr *FlagsDefMI = nullptr;
for (auto &MI : MBB) {
+ // Remember the most recent preceding eflags defining instruction.
+ if (MI.definesRegister(X86::EFLAGS))
+ FlagsDefMI = &MI;
+
// Find a setcc that is used by a zext.
// This doesn't have to be the only use, the transformation is safe
// regardless.
@@ -115,9 +84,6 @@ bool X86FixupSetCCPass::runOnMachineFunction(MachineFunction &MF) {
if (!ZExt)
continue;
- // Find the preceding instruction that imp-defs eflags.
- MachineInstr *FlagsDefMI = findFlagsImpDef(
- MI.getParent(), MachineBasicBlock::reverse_iterator(&MI));
if (!FlagsDefMI)
continue;
@@ -126,7 +92,7 @@ bool X86FixupSetCCPass::runOnMachineFunction(MachineFunction &MF) {
// it, itself, by definition, clobbers eflags. But it may happen that
// FlagsDefMI also *uses* eflags, in which case the transformation is
// invalid.
- if (impUsesFlags(FlagsDefMI))
+ if (FlagsDefMI->readsRegister(X86::EFLAGS))
continue;
++NumSubstZexts;
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp
index daa0196c6e04..b1d2de29c896 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp
@@ -87,12 +87,12 @@ public:
static char ID;
private:
- MachineRegisterInfo *MRI;
- const X86Subtarget *Subtarget;
- const X86InstrInfo *TII;
- const TargetRegisterInfo *TRI;
- const TargetRegisterClass *PromoteRC;
- MachineDominatorTree *MDT;
+ MachineRegisterInfo *MRI = nullptr;
+ const X86Subtarget *Subtarget = nullptr;
+ const X86InstrInfo *TII = nullptr;
+ const TargetRegisterInfo *TRI = nullptr;
+ const TargetRegisterClass *PromoteRC = nullptr;
+ MachineDominatorTree *MDT = nullptr;
CondRegArray collectCondsInRegs(MachineBasicBlock &MBB,
MachineBasicBlock::iterator CopyDefI);
@@ -702,6 +702,9 @@ bool X86FlagsCopyLoweringPass::runOnMachineFunction(MachineFunction &MF) {
}
Blocks.push_back(SuccMBB);
+
+ // After this, EFLAGS will be recreated before each use.
+ SuccMBB->removeLiveIn(X86::EFLAGS);
}
} while (!Blocks.empty());
@@ -807,10 +810,10 @@ void X86FlagsCopyLoweringPass::rewriteArithmetic(
CondRegArray &CondRegs) {
// Arithmetic is either reading CF or OF. Figure out which condition we need
// to preserve in a register.
- X86::CondCode Cond;
+ X86::CondCode Cond = X86::COND_INVALID;
// The addend to use to reset CF or OF when added to the flag value.
- int Addend;
+ int Addend = 0;
switch (getMnemonicFromOpcode(MI.getOpcode())) {
case FlagArithMnemonic::ADC:
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86FloatingPoint.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86FloatingPoint.cpp
index fcfb5bc91314..13bbd6ccfce4 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86FloatingPoint.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86FloatingPoint.cpp
@@ -40,6 +40,7 @@
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/InlineAsm.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
@@ -83,7 +84,7 @@ namespace {
StringRef getPassName() const override { return "X86 FP Stackifier"; }
private:
- const TargetInstrInfo *TII; // Machine instruction info.
+ const TargetInstrInfo *TII = nullptr; // Machine instruction info.
// Two CFG edges are related if they leave the same block, or enter the same
// block. The transitive closure of an edge under this relation is a
@@ -119,7 +120,7 @@ namespace {
SmallVector<LiveBundle, 8> LiveBundles;
// The edge bundle analysis provides indices into the LiveBundles vector.
- EdgeBundles *Bundles;
+ EdgeBundles *Bundles = nullptr;
// Return a bitmask of FP registers in block's live-in list.
static unsigned calcLiveInMask(MachineBasicBlock *MBB, bool RemoveFPs) {
@@ -143,14 +144,14 @@ namespace {
// Partition all the CFG edges into LiveBundles.
void bundleCFGRecomputeKillFlags(MachineFunction &MF);
- MachineBasicBlock *MBB; // Current basic block
+ MachineBasicBlock *MBB = nullptr; // Current basic block
// The hardware keeps track of how many FP registers are live, so we have
// to model that exactly. Usually, each live register corresponds to an
// FP<n> register, but when dealing with calls, returns, and inline
// assembly, it is sometimes necessary to have live scratch registers.
unsigned Stack[8]; // FP<n> Registers in each stack slot...
- unsigned StackTop; // The current top of the FP stack.
+ unsigned StackTop = 0; // The current top of the FP stack.
enum {
NumFPRegs = 8 // Including scratch pseudo-registers.
@@ -666,9 +667,12 @@ static const TableEntry OpcodeTable[] = {
{ X86::CMOVP_Fp32 , X86::CMOVP_F },
{ X86::CMOVP_Fp64 , X86::CMOVP_F },
{ X86::CMOVP_Fp80 , X86::CMOVP_F },
- { X86::COS_Fp32 , X86::COS_F },
- { X86::COS_Fp64 , X86::COS_F },
- { X86::COS_Fp80 , X86::COS_F },
+ { X86::COM_FpIr32 , X86::COM_FIr },
+ { X86::COM_FpIr64 , X86::COM_FIr },
+ { X86::COM_FpIr80 , X86::COM_FIr },
+ { X86::COM_Fpr32 , X86::COM_FST0r },
+ { X86::COM_Fpr64 , X86::COM_FST0r },
+ { X86::COM_Fpr80 , X86::COM_FST0r },
{ X86::DIVR_Fp32m , X86::DIVR_F32m },
{ X86::DIVR_Fp64m , X86::DIVR_F64m },
{ X86::DIVR_Fp64m32 , X86::DIVR_F32m },
@@ -741,9 +745,6 @@ static const TableEntry OpcodeTable[] = {
{ X86::MUL_FpI32m32 , X86::MUL_FI32m },
{ X86::MUL_FpI32m64 , X86::MUL_FI32m },
{ X86::MUL_FpI32m80 , X86::MUL_FI32m },
- { X86::SIN_Fp32 , X86::SIN_F },
- { X86::SIN_Fp64 , X86::SIN_F },
- { X86::SIN_Fp80 , X86::SIN_F },
{ X86::SQRT_Fp32 , X86::SQRT_F },
{ X86::SQRT_Fp64 , X86::SQRT_F },
{ X86::SQRT_Fp80 , X86::SQRT_F },
@@ -803,6 +804,10 @@ static unsigned getConcreteOpcode(unsigned Opcode) {
static const TableEntry PopTable[] = {
{ X86::ADD_FrST0 , X86::ADD_FPrST0 },
+ { X86::COMP_FST0r, X86::FCOMPP },
+ { X86::COM_FIr , X86::COM_FIPr },
+ { X86::COM_FST0r , X86::COMP_FST0r },
+
{ X86::DIVR_FrST0, X86::DIVR_FPrST0 },
{ X86::DIV_FrST0 , X86::DIV_FPrST0 },
@@ -841,7 +846,7 @@ void FPS::popStackAfter(MachineBasicBlock::iterator &I) {
int Opcode = Lookup(PopTable, I->getOpcode());
if (Opcode != -1) {
I->setDesc(TII->get(Opcode));
- if (Opcode == X86::UCOM_FPPr)
+ if (Opcode == X86::FCOMPP || Opcode == X86::UCOM_FPPr)
I->RemoveOperand(0);
} else { // Insert an explicit pop
I = BuildMI(*MBB, ++I, dl, TII->get(X86::ST_FPrr)).addReg(X86::ST0);
@@ -971,22 +976,23 @@ void FPS::shuffleStackTop(const unsigned char *FixStack,
//===----------------------------------------------------------------------===//
void FPS::handleCall(MachineBasicBlock::iterator &I) {
+ MachineInstr &MI = *I;
unsigned STReturns = 0;
- const MachineFunction* MF = I->getParent()->getParent();
- for (const auto &MO : I->operands()) {
- if (!MO.isReg())
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ MachineOperand &Op = MI.getOperand(i);
+ if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
continue;
- unsigned R = MO.getReg() - X86::FP0;
+ assert(Op.isImplicit() && "Expected implicit def/use");
- if (R < 8) {
- if (MF->getFunction().getCallingConv() != CallingConv::X86_RegCall) {
- assert(MO.isDef() && MO.isImplicit());
- }
+ if (Op.isDef())
+ STReturns |= 1 << getFPReg(Op);
- STReturns |= 1 << R;
- }
+ // Remove the operand so that later passes don't see it.
+ MI.RemoveOperand(i);
+ --i;
+ --e;
}
unsigned N = countTrailingOnes(STReturns);
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86FrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86FrameLowering.cpp
index 1b469a814adc..799c1f5d1285 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86FrameLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86FrameLowering.cpp
@@ -92,7 +92,7 @@ bool X86FrameLowering::hasFP(const MachineFunction &MF) const {
MFI.hasCopyImplyingStackAdjustment());
}
-static unsigned getSUBriOpcode(unsigned IsLP64, int64_t Imm) {
+static unsigned getSUBriOpcode(bool IsLP64, int64_t Imm) {
if (IsLP64) {
if (isInt<8>(Imm))
return X86::SUB64ri8;
@@ -104,7 +104,7 @@ static unsigned getSUBriOpcode(unsigned IsLP64, int64_t Imm) {
}
}
-static unsigned getADDriOpcode(unsigned IsLP64, int64_t Imm) {
+static unsigned getADDriOpcode(bool IsLP64, int64_t Imm) {
if (IsLP64) {
if (isInt<8>(Imm))
return X86::ADD64ri8;
@@ -116,12 +116,12 @@ static unsigned getADDriOpcode(unsigned IsLP64, int64_t Imm) {
}
}
-static unsigned getSUBrrOpcode(unsigned isLP64) {
- return isLP64 ? X86::SUB64rr : X86::SUB32rr;
+static unsigned getSUBrrOpcode(bool IsLP64) {
+ return IsLP64 ? X86::SUB64rr : X86::SUB32rr;
}
-static unsigned getADDrrOpcode(unsigned isLP64) {
- return isLP64 ? X86::ADD64rr : X86::ADD32rr;
+static unsigned getADDrrOpcode(bool IsLP64) {
+ return IsLP64 ? X86::ADD64rr : X86::ADD32rr;
}
static unsigned getANDriOpcode(bool IsLP64, int64_t Imm) {
@@ -135,7 +135,7 @@ static unsigned getANDriOpcode(bool IsLP64, int64_t Imm) {
return X86::AND32ri;
}
-static unsigned getLEArOpcode(unsigned IsLP64) {
+static unsigned getLEArOpcode(bool IsLP64) {
return IsLP64 ? X86::LEA64r : X86::LEA32r;
}
@@ -993,8 +993,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
bool NeedsWinFPO =
!IsFunclet && STI.isTargetWin32() && MMI.getModule()->getCodeViewFlag();
bool NeedsWinCFI = NeedsWin64CFI || NeedsWinFPO;
- bool NeedsDwarfCFI =
- !IsWin64Prologue && (MMI.hasDebugInfo() || Fn.needsUnwindTableEntry());
+ bool NeedsDwarfCFI = !IsWin64Prologue && MF.needsFrameMoves();
Register FramePtr = TRI->getFrameRegister(MF);
const Register MachineFramePtr =
STI.isTarget64BitILP32()
@@ -1262,7 +1261,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
if (Is64Bit) {
// Handle the 64-bit Windows ABI case where we need to call __chkstk.
// Function prologue is responsible for adjusting the stack pointer.
- int Alloc = isEAXAlive ? NumBytes - 8 : NumBytes;
+ int64_t Alloc = isEAXAlive ? NumBytes - 8 : NumBytes;
if (isUInt<32>(Alloc)) {
BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
.addImm(Alloc)
@@ -1614,10 +1613,9 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
bool HasFP = hasFP(MF);
uint64_t NumBytes = 0;
- bool NeedsDwarfCFI =
- (!MF.getTarget().getTargetTriple().isOSDarwin() &&
- !MF.getTarget().getTargetTriple().isOSWindows()) &&
- (MF.getMMI().hasDebugInfo() || MF.getFunction().needsUnwindTableEntry());
+ bool NeedsDwarfCFI = (!MF.getTarget().getTargetTriple().isOSDarwin() &&
+ !MF.getTarget().getTargetTriple().isOSWindows()) &&
+ MF.needsFrameMoves();
if (IsFunclet) {
assert(HasFP && "EH funclets without FP not yet implemented");
@@ -1862,7 +1860,7 @@ int X86FrameLowering::getWin64EHFrameIndexRef(const MachineFunction &MF,
return getFrameIndexReference(MF, FI, FrameReg);
FrameReg = TRI->getStackRegister();
- return alignTo(MFI.getMaxCallFrameSize(), getStackAlignment()) + it->second;
+ return alignDown(MFI.getMaxCallFrameSize(), getStackAlignment()) + it->second;
}
int X86FrameLowering::getFrameIndexReferenceSP(const MachineFunction &MF,
@@ -2812,11 +2810,9 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
unsigned StackAlign = getStackAlignment();
Amount = alignTo(Amount, StackAlign);
- MachineModuleInfo &MMI = MF.getMMI();
const Function &F = MF.getFunction();
bool WindowsCFI = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
- bool DwarfCFI = !WindowsCFI &&
- (MMI.hasDebugInfo() || F.needsUnwindTableEntry());
+ bool DwarfCFI = !WindowsCFI && MF.needsFrameMoves();
// If we have any exception handlers in this function, and we adjust
// the SP before calls, we may need to indicate this to the unwinder
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index 5b546d42d98a..bf33f399db28 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -25,6 +25,7 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsX86.h"
#include "llvm/IR/Type.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -335,7 +336,7 @@ namespace {
// Do not want to hoist if we're not optimizing for size.
// TODO: We'd like to remove this restriction.
// See the comment in X86InstrInfo.td for more info.
- if (!OptForSize)
+ if (!CurDAG->shouldOptForSize())
return false;
// Walk all the users of the immediate.
@@ -536,12 +537,17 @@ namespace {
// type.
static bool isLegalMaskCompare(SDNode *N, const X86Subtarget *Subtarget) {
unsigned Opcode = N->getOpcode();
- if (Opcode == X86ISD::CMPM || Opcode == ISD::SETCC ||
- Opcode == X86ISD::CMPM_SAE || Opcode == X86ISD::VFPCLASS) {
+ if (Opcode == X86ISD::CMPM || Opcode == X86ISD::STRICT_CMPM ||
+ Opcode == ISD::SETCC || Opcode == X86ISD::CMPM_SAE ||
+ Opcode == X86ISD::VFPCLASS) {
// We can get 256-bit 8 element types here without VLX being enabled. When
// this happens we will use 512-bit operations and the mask will not be
// zero extended.
EVT OpVT = N->getOperand(0).getValueType();
+ // The first operand of X86ISD::STRICT_CMPM is chain, so we need to get the
+ // second operand.
+ if (Opcode == X86ISD::STRICT_CMPM)
+ OpVT = N->getOperand(1).getValueType();
if (OpVT.is256BitVector() || OpVT.is128BitVector())
return Subtarget->hasVLX();
@@ -575,6 +581,12 @@ X86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const {
if (!N.hasOneUse())
return false;
+ // FIXME: Temporary hack to prevent strict floating point nodes from
+ // folding into masked operations illegally.
+ if (U == Root && Root->getOpcode() == ISD::VSELECT &&
+ N.getOpcode() != ISD::LOAD && N.getOpcode() != X86ISD::VBROADCAST_LOAD)
+ return false;
+
if (N.getOpcode() != ISD::LOAD)
return true;
@@ -804,8 +816,12 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
}
switch (N->getOpcode()) {
+ case ISD::FP_ROUND:
+ case ISD::STRICT_FP_ROUND:
case ISD::FP_TO_SINT:
- case ISD::FP_TO_UINT: {
+ case ISD::FP_TO_UINT:
+ case ISD::STRICT_FP_TO_SINT:
+ case ISD::STRICT_FP_TO_UINT: {
// Replace vector fp_to_s/uint with their X86 specific equivalent so we
// don't need 2 sets of patterns.
if (!N->getSimpleValueType(0).isVector())
@@ -814,13 +830,24 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
unsigned NewOpc;
switch (N->getOpcode()) {
default: llvm_unreachable("Unexpected opcode!");
- case ISD::FP_TO_SINT: NewOpc = X86ISD::CVTTP2SI; break;
- case ISD::FP_TO_UINT: NewOpc = X86ISD::CVTTP2UI; break;
+ case ISD::FP_ROUND: NewOpc = X86ISD::VFPROUND; break;
+ case ISD::STRICT_FP_ROUND: NewOpc = X86ISD::STRICT_VFPROUND; break;
+ case ISD::STRICT_FP_TO_SINT: NewOpc = X86ISD::STRICT_CVTTP2SI; break;
+ case ISD::FP_TO_SINT: NewOpc = X86ISD::CVTTP2SI; break;
+ case ISD::STRICT_FP_TO_UINT: NewOpc = X86ISD::STRICT_CVTTP2UI; break;
+ case ISD::FP_TO_UINT: NewOpc = X86ISD::CVTTP2UI; break;
}
- SDValue Res = CurDAG->getNode(NewOpc, SDLoc(N), N->getValueType(0),
- N->getOperand(0));
+ SDValue Res;
+ if (N->isStrictFPOpcode())
+ Res =
+ CurDAG->getNode(NewOpc, SDLoc(N), {N->getValueType(0), MVT::Other},
+ {N->getOperand(0), N->getOperand(1)});
+ else
+ Res =
+ CurDAG->getNode(NewOpc, SDLoc(N), N->getValueType(0),
+ N->getOperand(0));
--I;
- CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
+ CurDAG->ReplaceAllUsesWith(N, Res.getNode());
++I;
CurDAG->DeleteNode(N);
continue;
@@ -869,27 +896,45 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
continue;
}
case ISD::FCEIL:
+ case ISD::STRICT_FCEIL:
case ISD::FFLOOR:
+ case ISD::STRICT_FFLOOR:
case ISD::FTRUNC:
+ case ISD::STRICT_FTRUNC:
case ISD::FNEARBYINT:
- case ISD::FRINT: {
+ case ISD::STRICT_FNEARBYINT:
+ case ISD::FRINT:
+ case ISD::STRICT_FRINT: {
// Replace fp rounding with their X86 specific equivalent so we don't
// need 2 sets of patterns.
unsigned Imm;
switch (N->getOpcode()) {
default: llvm_unreachable("Unexpected opcode!");
+ case ISD::STRICT_FCEIL:
case ISD::FCEIL: Imm = 0xA; break;
+ case ISD::STRICT_FFLOOR:
case ISD::FFLOOR: Imm = 0x9; break;
+ case ISD::STRICT_FTRUNC:
case ISD::FTRUNC: Imm = 0xB; break;
+ case ISD::STRICT_FNEARBYINT:
case ISD::FNEARBYINT: Imm = 0xC; break;
+ case ISD::STRICT_FRINT:
case ISD::FRINT: Imm = 0x4; break;
}
SDLoc dl(N);
- SDValue Res = CurDAG->getNode(
- X86ISD::VRNDSCALE, dl, N->getValueType(0), N->getOperand(0),
- CurDAG->getTargetConstant(Imm, dl, MVT::i8));
+ bool IsStrict = N->isStrictFPOpcode();
+ SDValue Res;
+ if (IsStrict)
+ Res = CurDAG->getNode(X86ISD::STRICT_VRNDSCALE, dl,
+ {N->getValueType(0), MVT::Other},
+ {N->getOperand(0), N->getOperand(1),
+ CurDAG->getTargetConstant(Imm, dl, MVT::i8)});
+ else
+ Res = CurDAG->getNode(X86ISD::VRNDSCALE, dl, N->getValueType(0),
+ N->getOperand(0),
+ CurDAG->getTargetConstant(Imm, dl, MVT::i8));
--I;
- CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
+ CurDAG->ReplaceAllUsesWith(N, Res.getNode());
++I;
CurDAG->DeleteNode(N);
continue;
@@ -1017,12 +1062,7 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
// Here we could have an FP stack truncation or an FPStack <-> SSE convert.
// FPStack has extload and truncstore. SSE can fold direct loads into other
// operations. Based on this, decide what we want to do.
- MVT MemVT;
- if (N->getOpcode() == ISD::FP_ROUND)
- MemVT = DstVT; // FP_ROUND must use DstVT, we can't do a 'trunc load'.
- else
- MemVT = SrcIsSSE ? SrcVT : DstVT;
-
+ MVT MemVT = (N->getOpcode() == ISD::FP_ROUND) ? DstVT : SrcVT;
SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT);
SDLoc dl(N);
@@ -1075,22 +1115,47 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
// Here we could have an FP stack truncation or an FPStack <-> SSE convert.
// FPStack has extload and truncstore. SSE can fold direct loads into other
// operations. Based on this, decide what we want to do.
- MVT MemVT;
- if (N->getOpcode() == ISD::STRICT_FP_ROUND)
- MemVT = DstVT; // FP_ROUND must use DstVT, we can't do a 'trunc load'.
- else
- MemVT = SrcIsSSE ? SrcVT : DstVT;
-
+ MVT MemVT = (N->getOpcode() == ISD::STRICT_FP_ROUND) ? DstVT : SrcVT;
SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT);
SDLoc dl(N);
// FIXME: optimize the case where the src/dest is a load or store?
//Since the operation is StrictFP, use the preexisting chain.
- SDValue Store = CurDAG->getTruncStore(N->getOperand(0), dl, N->getOperand(1),
- MemTmp, MachinePointerInfo(), MemVT);
- SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, MemTmp,
- MachinePointerInfo(), MemVT);
+ SDValue Store, Result;
+ if (!SrcIsSSE) {
+ SDVTList VTs = CurDAG->getVTList(MVT::Other);
+ SDValue Ops[] = {N->getOperand(0), N->getOperand(1), MemTmp};
+ Store = CurDAG->getMemIntrinsicNode(X86ISD::FST, dl, VTs, Ops, MemVT,
+ MachinePointerInfo(), 0,
+ MachineMemOperand::MOStore);
+ if (N->getFlags().hasNoFPExcept()) {
+ SDNodeFlags Flags = Store->getFlags();
+ Flags.setNoFPExcept(true);
+ Store->setFlags(Flags);
+ }
+ } else {
+ assert(SrcVT == MemVT && "Unexpected VT!");
+ Store = CurDAG->getStore(N->getOperand(0), dl, N->getOperand(1), MemTmp,
+ MachinePointerInfo());
+ }
+
+ if (!DstIsSSE) {
+ SDVTList VTs = CurDAG->getVTList(DstVT, MVT::Other);
+ SDValue Ops[] = {Store, MemTmp};
+ Result = CurDAG->getMemIntrinsicNode(X86ISD::FLD, dl, VTs, Ops, MemVT,
+ MachinePointerInfo(), 0,
+ MachineMemOperand::MOLoad);
+ if (N->getFlags().hasNoFPExcept()) {
+ SDNodeFlags Flags = Result->getFlags();
+ Flags.setNoFPExcept(true);
+ Result->setFlags(Flags);
+ }
+ } else {
+ assert(DstVT == MemVT && "Unexpected VT!");
+ Result =
+ CurDAG->getLoad(DstVT, dl, Store, MemTmp, MachinePointerInfo());
+ }
// We're about to replace all uses of the FP_ROUND/FP_EXTEND with the
// extload we created. This will cause general havok on the dag because
@@ -2224,12 +2289,11 @@ bool X86DAGToDAGISel::selectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base,
AM.Scale = cast<ConstantSDNode>(Mgs->getScale())->getZExtValue();
unsigned AddrSpace = cast<MemSDNode>(Parent)->getPointerInfo().getAddrSpace();
- // AddrSpace 256 -> GS, 257 -> FS, 258 -> SS.
- if (AddrSpace == 256)
+ if (AddrSpace == X86AS::GS)
AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
- if (AddrSpace == 257)
+ if (AddrSpace == X86AS::FS)
AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
- if (AddrSpace == 258)
+ if (AddrSpace == X86AS::SS)
AM.Segment = CurDAG->getRegister(X86::SS, MVT::i16);
SDLoc DL(N);
@@ -3019,7 +3083,7 @@ bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) {
LLVM_FALLTHROUGH;
case X86ISD::ADD:
// Try to match inc/dec.
- if (!Subtarget->slowIncDec() || OptForSize) {
+ if (!Subtarget->slowIncDec() || CurDAG->shouldOptForSize()) {
bool IsOne = isOneConstant(StoredVal.getOperand(1));
bool IsNegOne = isAllOnesConstant(StoredVal.getOperand(1));
// ADD/SUB with 1/-1 and carry flag isn't used can use inc/dec.
@@ -4410,6 +4474,8 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
ReplaceNode(Node, CNode);
return;
}
+
+ break;
}
}
@@ -5094,6 +5160,17 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
MachineSDNode *NewNode;
SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
if (tryFoldLoad(Node, N0.getNode(), Reg, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
+ if (auto *LoadN = dyn_cast<LoadSDNode>(N0.getOperand(0).getNode())) {
+ if (!LoadN->isSimple()) {
+ unsigned NumVolBits = LoadN->getValueType(0).getSizeInBits();
+ if (MOpc == X86::TEST8mi && NumVolBits != 8)
+ break;
+ else if (MOpc == X86::TEST16mi && NumVolBits != 16)
+ break;
+ else if (MOpc == X86::TEST32mi && NumVolBits != 32)
+ break;
+ }
+ }
SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Imm,
Reg.getOperand(0) };
NewNode = CurDAG->getMachineNode(MOpc, dl, MVT::i32, MVT::Other, Ops);
@@ -5190,34 +5267,6 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
if (foldLoadStoreIntoMemOperand(Node))
return;
break;
- case ISD::FCEIL:
- case ISD::FFLOOR:
- case ISD::FTRUNC:
- case ISD::FNEARBYINT:
- case ISD::FRINT: {
- // Replace fp rounding with their X86 specific equivalent so we don't
- // need 2 sets of patterns.
- // FIXME: This can only happen when the nodes started as STRICT_* and have
- // been mutated into their non-STRICT equivalents. Eventually this
- // mutation will be removed and we should switch the STRICT_ nodes to a
- // strict version of RNDSCALE in PreProcessISelDAG.
- unsigned Imm;
- switch (Node->getOpcode()) {
- default: llvm_unreachable("Unexpected opcode!");
- case ISD::FCEIL: Imm = 0xA; break;
- case ISD::FFLOOR: Imm = 0x9; break;
- case ISD::FTRUNC: Imm = 0xB; break;
- case ISD::FNEARBYINT: Imm = 0xC; break;
- case ISD::FRINT: Imm = 0x4; break;
- }
- SDLoc dl(Node);
- SDValue Res = CurDAG->getNode(X86ISD::VRNDSCALE, dl, Node->getValueType(0),
- Node->getOperand(0),
- CurDAG->getTargetConstant(Imm, dl, MVT::i8));
- ReplaceNode(Node, Res.getNode());
- SelectCode(Res.getNode());
- return;
- }
}
SelectCode(Node);
@@ -5230,10 +5279,6 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
switch (ConstraintID) {
default:
llvm_unreachable("Unexpected asm memory constraint");
- case InlineAsm::Constraint_i:
- // FIXME: It seems strange that 'i' is needed here since it's supposed to
- // be an immediate and not a memory constraint.
- LLVM_FALLTHROUGH;
case InlineAsm::Constraint_o: // offsetable ??
case InlineAsm::Constraint_v: // not offsetable ??
case InlineAsm::Constraint_m: // memory
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp
index ed975e9248a8..0f152968ddfd 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -25,7 +25,9 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringSwitch.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/EHPersonalities.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/CodeGen/IntrinsicLowering.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -154,17 +156,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::X86_StdCall);
}
- if (Subtarget.isTargetDarwin()) {
- // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp.
- setUseUnderscoreSetJmp(false);
- setUseUnderscoreLongJmp(false);
- } else if (Subtarget.isTargetWindowsGNU()) {
- // MS runtime is weird: it exports _setjmp, but longjmp!
- setUseUnderscoreSetJmp(true);
- setUseUnderscoreLongJmp(false);
- } else {
- setUseUnderscoreSetJmp(true);
- setUseUnderscoreLongJmp(true);
+ if (Subtarget.getTargetTriple().isOSMSVCRT()) {
+ // MSVCRT doesn't have powi; fall back to pow
+ setLibcallName(RTLIB::POWI_F32, nullptr);
+ setLibcallName(RTLIB::POWI_F64, nullptr);
}
// If we don't have cmpxchg8b(meaing this is a 386/486), limit atomic size to
@@ -217,72 +212,69 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ShiftOp , MVT::i64 , Custom);
}
- // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
- // operation.
- setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote);
- setOperationAction(ISD::UINT_TO_FP , MVT::i8 , Promote);
- setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote);
-
if (!Subtarget.useSoftFloat()) {
- // We have an algorithm for SSE2->double, and we turn this into a
- // 64-bit FILD followed by conditional FADD for other targets.
- setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom);
+ // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
+ // operation.
+ setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
+ setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i8, Promote);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
+ setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i16, Promote);
// We have an algorithm for SSE2, and we turn this into a 64-bit
// FILD or VCVTUSI2SS/SD for other targets.
- setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Custom);
- } else {
- setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Expand);
- }
-
- // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
- // this operation.
- setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote);
- setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote);
-
- if (!Subtarget.useSoftFloat()) {
- // SSE has no i16 to fp conversion, only i32.
- if (X86ScalarSSEf32) {
- setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote);
- // f32 and f64 cases are Legal, f80 case is not
- setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom);
- } else {
- setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Custom);
- setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom);
- }
- } else {
- setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote);
- setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Expand);
- }
-
- // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
- // this operation.
- setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote);
- setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote);
-
- if (!Subtarget.useSoftFloat()) {
+ setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
+ setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Custom);
+ // We have an algorithm for SSE2->double, and we turn this into a
+ // 64-bit FILD followed by conditional FADD for other targets.
+ setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
+ setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Custom);
+
+ // Promote i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
+ // this operation.
+ setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
+ setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i8, Promote);
+ // SSE has no i16 to fp conversion, only i32. We promote in the handler
+ // to allow f80 to use i16 and f64 to use i16 with sse1 only
+ setOperationAction(ISD::SINT_TO_FP, MVT::i16, Custom);
+ setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i16, Custom);
+ // f32 and f64 cases are Legal with SSE1/SSE2, f80 case is not
+ setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
+ setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom);
// In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64
// are Legal, f80 is custom lowered.
- setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom);
- setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom);
-
- setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom);
- setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom);
- } else {
- setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote);
- setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Expand);
- setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Expand);
- }
-
- // Handle FP_TO_UINT by promoting the destination to a larger signed
- // conversion.
- setOperationAction(ISD::FP_TO_UINT , MVT::i1 , Promote);
- setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote);
- setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote);
-
- if (!Subtarget.useSoftFloat()) {
- setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
- setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
- }
+ setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
+ setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom);
+
+ // Promote i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
+ // this operation.
+ setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote);
+ // FIXME: This doesn't generate invalid exception when it should. PR44019.
+ setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i8, Promote);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i16, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i16, Custom);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
+ // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64
+ // are Legal, f80 is custom lowered.
+ setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom);
+
+ // Handle FP_TO_UINT by promoting the destination to a larger signed
+ // conversion.
+ setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote);
+ // FIXME: This doesn't generate invalid exception when it should. PR44019.
+ setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i8, Promote);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
+ // FIXME: This doesn't generate invalid exception when it should. PR44019.
+ setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i16, Promote);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Custom);
+ }
+
+ // Handle address space casts between mixed sized pointers.
+ setOperationAction(ISD::ADDRSPACECAST, MVT::i32, Custom);
+ setOperationAction(ISD::ADDRSPACECAST, MVT::i64, Custom);
// TODO: when we have SSE, these could be more efficient, by using movd/movq.
if (!X86ScalarSSEf64) {
@@ -409,12 +401,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
if (!Subtarget.hasMOVBE())
setOperationAction(ISD::BSWAP , MVT::i16 , Expand);
- // These should be promoted to a larger select which is supported.
- setOperationAction(ISD::SELECT , MVT::i1 , Promote);
// X86 wants to expand cmov itself.
for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128 }) {
setOperationAction(ISD::SELECT, VT, Custom);
setOperationAction(ISD::SETCC, VT, Custom);
+ setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
+ setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
}
for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
if (VT == MVT::i64 && !Subtarget.is64Bit())
@@ -619,6 +611,20 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
} else // SSE immediates.
addLegalFPImmediate(APFloat(+0.0)); // xorpd
}
+ // Handle constrained floating-point operations of scalar.
+ setOperationAction(ISD::STRICT_FADD, MVT::f32, Legal);
+ setOperationAction(ISD::STRICT_FADD, MVT::f64, Legal);
+ setOperationAction(ISD::STRICT_FSUB, MVT::f32, Legal);
+ setOperationAction(ISD::STRICT_FSUB, MVT::f64, Legal);
+ setOperationAction(ISD::STRICT_FMUL, MVT::f32, Legal);
+ setOperationAction(ISD::STRICT_FMUL, MVT::f64, Legal);
+ setOperationAction(ISD::STRICT_FDIV, MVT::f32, Legal);
+ setOperationAction(ISD::STRICT_FDIV, MVT::f64, Legal);
+ setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal);
+ setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
+ setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Legal);
+ setOperationAction(ISD::STRICT_FSQRT, MVT::f32, Legal);
+ setOperationAction(ISD::STRICT_FSQRT, MVT::f64, Legal);
// We don't support FMA.
setOperationAction(ISD::FMA, MVT::f64, Expand);
@@ -659,6 +665,17 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::LLROUND, MVT::f80, Expand);
setOperationAction(ISD::LRINT, MVT::f80, Expand);
setOperationAction(ISD::LLRINT, MVT::f80, Expand);
+
+ // Handle constrained floating-point operations of scalar.
+ setOperationAction(ISD::STRICT_FADD , MVT::f80, Legal);
+ setOperationAction(ISD::STRICT_FSUB , MVT::f80, Legal);
+ setOperationAction(ISD::STRICT_FMUL , MVT::f80, Legal);
+ setOperationAction(ISD::STRICT_FDIV , MVT::f80, Legal);
+ setOperationAction(ISD::STRICT_FSQRT , MVT::f80, Legal);
+ setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f80, Legal);
+ // FIXME: When the target is 64-bit, STRICT_FP_ROUND will be overwritten
+ // as Custom.
+ setOperationAction(ISD::STRICT_FP_ROUND, MVT::f80, Legal);
}
// f128 uses xmm registers, but most operations require libcalls.
@@ -668,22 +685,32 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
addLegalFPImmediate(APFloat::getZero(APFloat::IEEEquad())); // xorps
- setOperationAction(ISD::FADD, MVT::f128, Custom);
- setOperationAction(ISD::FSUB, MVT::f128, Custom);
- setOperationAction(ISD::FDIV, MVT::f128, Custom);
- setOperationAction(ISD::FMUL, MVT::f128, Custom);
- setOperationAction(ISD::FMA, MVT::f128, Expand);
+ setOperationAction(ISD::FADD, MVT::f128, LibCall);
+ setOperationAction(ISD::STRICT_FADD, MVT::f128, LibCall);
+ setOperationAction(ISD::FSUB, MVT::f128, LibCall);
+ setOperationAction(ISD::STRICT_FSUB, MVT::f128, LibCall);
+ setOperationAction(ISD::FDIV, MVT::f128, LibCall);
+ setOperationAction(ISD::STRICT_FDIV, MVT::f128, LibCall);
+ setOperationAction(ISD::FMUL, MVT::f128, LibCall);
+ setOperationAction(ISD::STRICT_FMUL, MVT::f128, LibCall);
+ setOperationAction(ISD::FMA, MVT::f128, LibCall);
+ setOperationAction(ISD::STRICT_FMA, MVT::f128, LibCall);
setOperationAction(ISD::FABS, MVT::f128, Custom);
setOperationAction(ISD::FNEG, MVT::f128, Custom);
setOperationAction(ISD::FCOPYSIGN, MVT::f128, Custom);
- setOperationAction(ISD::FSIN, MVT::f128, Expand);
- setOperationAction(ISD::FCOS, MVT::f128, Expand);
- setOperationAction(ISD::FSINCOS, MVT::f128, Expand);
- setOperationAction(ISD::FSQRT, MVT::f128, Expand);
-
- setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom);
+ setOperationAction(ISD::FSIN, MVT::f128, LibCall);
+ setOperationAction(ISD::STRICT_FSIN, MVT::f128, LibCall);
+ setOperationAction(ISD::FCOS, MVT::f128, LibCall);
+ setOperationAction(ISD::STRICT_FCOS, MVT::f128, LibCall);
+ setOperationAction(ISD::FSINCOS, MVT::f128, LibCall);
+ // No STRICT_FSINCOS
+ setOperationAction(ISD::FSQRT, MVT::f128, LibCall);
+ setOperationAction(ISD::STRICT_FSQRT, MVT::f128, LibCall);
+
+ setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom);
+ setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f128, Custom);
// We need to custom handle any FP_ROUND with an f128 input, but
// LegalizeDAG uses the result type to know when to run a custom handler.
// So we have to list all legal floating point result types here.
@@ -820,12 +847,15 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::VSELECT, MVT::v4f32, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
setOperationAction(ISD::SELECT, MVT::v4f32, Custom);
- setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom);
setOperationAction(ISD::LOAD, MVT::v2f32, Custom);
setOperationAction(ISD::STORE, MVT::v2f32, Custom);
- setOperationAction(ISD::STRICT_FP_ROUND, MVT::v4f32, Custom);
+ setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal);
+ setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal);
+ setOperationAction(ISD::STRICT_FMUL, MVT::v4f32, Legal);
+ setOperationAction(ISD::STRICT_FDIV, MVT::v4f32, Legal);
+ setOperationAction(ISD::STRICT_FSQRT, MVT::v4f32, Legal);
}
if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) {
@@ -895,6 +925,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
setOperationAction(ISD::SETCC, VT, Custom);
+ setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
+ setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
setOperationAction(ISD::CTPOP, VT, Custom);
setOperationAction(ISD::ABS, VT, Custom);
@@ -933,37 +965,38 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4i32, Legal);
+ setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i32, Custom);
// Custom legalize these to avoid over promotion or custom promotion.
- setOperationAction(ISD::FP_TO_SINT, MVT::v2i8, Custom);
- setOperationAction(ISD::FP_TO_SINT, MVT::v4i8, Custom);
- setOperationAction(ISD::FP_TO_SINT, MVT::v8i8, Custom);
- setOperationAction(ISD::FP_TO_SINT, MVT::v2i16, Custom);
- setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom);
- setOperationAction(ISD::FP_TO_UINT, MVT::v2i8, Custom);
- setOperationAction(ISD::FP_TO_UINT, MVT::v4i8, Custom);
- setOperationAction(ISD::FP_TO_UINT, MVT::v8i8, Custom);
- setOperationAction(ISD::FP_TO_UINT, MVT::v2i16, Custom);
- setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom);
-
- // By marking FP_TO_SINT v8i16 as Custom, will trick type legalization into
- // promoting v8i8 FP_TO_UINT into FP_TO_SINT. When the v8i16 FP_TO_SINT is
- // split again based on the input type, this will cause an AssertSExt i16 to
- // be emitted instead of an AssertZExt. This will allow packssdw followed by
- // packuswb to be used to truncate to v8i8. This is necessary since packusdw
- // isn't available until sse4.1.
- setOperationAction(ISD::FP_TO_SINT, MVT::v8i16, Custom);
+ for (auto VT : {MVT::v2i8, MVT::v4i8, MVT::v8i8, MVT::v2i16, MVT::v4i16}) {
+ setOperationAction(ISD::FP_TO_SINT, VT, Custom);
+ setOperationAction(ISD::FP_TO_UINT, VT, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_SINT, VT, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_UINT, VT, Custom);
+ }
setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
+ setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i32, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
+ setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i32, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom);
+ setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i32, Custom);
+
+ setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom);
+ setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32, Custom);
// Fast v2f32 UINT_TO_FP( v2i32 ) custom conversion.
+ setOperationAction(ISD::SINT_TO_FP, MVT::v2f32, Custom);
+ setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2f32, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v2f32, Custom);
+ setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2f32, Custom);
setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
+ setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v2f32, Custom);
setOperationAction(ISD::FP_ROUND, MVT::v2f32, Custom);
+ setOperationAction(ISD::STRICT_FP_ROUND, MVT::v2f32, Custom);
// We want to legalize this to an f64 load rather than an i64 load on
// 64-bit targets and two 32-bit loads on a 32-bit target. Similar for
@@ -1008,6 +1041,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// With AVX512, expanding (and promoting the shifts) is better.
if (!Subtarget.hasAVX512())
setOperationAction(ISD::ROTL, MVT::v16i8, Custom);
+
+ setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal);
+ setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal);
+ setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal);
+ setOperationAction(ISD::STRICT_FMUL, MVT::v2f64, Legal);
+ setOperationAction(ISD::STRICT_FDIV, MVT::v2f64, Legal);
}
if (!Subtarget.useSoftFloat() && Subtarget.hasSSSE3()) {
@@ -1029,11 +1068,16 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
if (!Subtarget.useSoftFloat() && Subtarget.hasSSE41()) {
for (MVT RoundedTy : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) {
- setOperationAction(ISD::FFLOOR, RoundedTy, Legal);
- setOperationAction(ISD::FCEIL, RoundedTy, Legal);
- setOperationAction(ISD::FTRUNC, RoundedTy, Legal);
- setOperationAction(ISD::FRINT, RoundedTy, Legal);
- setOperationAction(ISD::FNEARBYINT, RoundedTy, Legal);
+ setOperationAction(ISD::FFLOOR, RoundedTy, Legal);
+ setOperationAction(ISD::STRICT_FFLOOR, RoundedTy, Legal);
+ setOperationAction(ISD::FCEIL, RoundedTy, Legal);
+ setOperationAction(ISD::STRICT_FCEIL, RoundedTy, Legal);
+ setOperationAction(ISD::FTRUNC, RoundedTy, Legal);
+ setOperationAction(ISD::STRICT_FTRUNC, RoundedTy, Legal);
+ setOperationAction(ISD::FRINT, RoundedTy, Legal);
+ setOperationAction(ISD::STRICT_FRINT, RoundedTy, Legal);
+ setOperationAction(ISD::FNEARBYINT, RoundedTy, Legal);
+ setOperationAction(ISD::STRICT_FNEARBYINT, RoundedTy, Legal);
}
setOperationAction(ISD::SMAX, MVT::v16i8, Legal);
@@ -1072,6 +1116,17 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// i8 vectors are custom because the source register and source
// source memory operand types are not the same width.
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
+
+ if (Subtarget.is64Bit() && !Subtarget.hasAVX512()) {
+ // We need to scalarize v4i64->v432 uint_to_fp using cvtsi2ss, but we can
+ // do the pre and post work in the vector domain.
+ setOperationAction(ISD::UINT_TO_FP, MVT::v4i64, Custom);
+ setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i64, Custom);
+ // We need to mark SINT_TO_FP as Custom even though we want to expand it
+ // so that DAG combine doesn't try to turn it into uint_to_fp.
+ setOperationAction(ISD::SINT_TO_FP, MVT::v4i64, Custom);
+ setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i64, Custom);
+ }
}
if (!Subtarget.useSoftFloat() && Subtarget.hasXOP()) {
@@ -1105,25 +1160,45 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
: &X86::VR256RegClass);
for (auto VT : { MVT::v8f32, MVT::v4f64 }) {
- setOperationAction(ISD::FFLOOR, VT, Legal);
- setOperationAction(ISD::FCEIL, VT, Legal);
- setOperationAction(ISD::FTRUNC, VT, Legal);
- setOperationAction(ISD::FRINT, VT, Legal);
- setOperationAction(ISD::FNEARBYINT, VT, Legal);
- setOperationAction(ISD::FNEG, VT, Custom);
- setOperationAction(ISD::FABS, VT, Custom);
- setOperationAction(ISD::FCOPYSIGN, VT, Custom);
+ setOperationAction(ISD::FFLOOR, VT, Legal);
+ setOperationAction(ISD::STRICT_FFLOOR, VT, Legal);
+ setOperationAction(ISD::FCEIL, VT, Legal);
+ setOperationAction(ISD::STRICT_FCEIL, VT, Legal);
+ setOperationAction(ISD::FTRUNC, VT, Legal);
+ setOperationAction(ISD::STRICT_FTRUNC, VT, Legal);
+ setOperationAction(ISD::FRINT, VT, Legal);
+ setOperationAction(ISD::STRICT_FRINT, VT, Legal);
+ setOperationAction(ISD::FNEARBYINT, VT, Legal);
+ setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal);
+ setOperationAction(ISD::FNEG, VT, Custom);
+ setOperationAction(ISD::FABS, VT, Custom);
+ setOperationAction(ISD::FCOPYSIGN, VT, Custom);
}
// (fp_to_int:v8i16 (v8f32 ..)) requires the result type to be promoted
// even though v8i16 is a legal type.
- setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i16, MVT::v8i32);
- setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i16, MVT::v8i32);
- setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal);
+ setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i16, MVT::v8i32);
+ setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i16, MVT::v8i32);
+ setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v8i16, MVT::v8i32);
+ setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v8i16, MVT::v8i32);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal);
+ setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v8i32, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
-
- setOperationAction(ISD::STRICT_FP_ROUND, MVT::v8f32, Custom);
+ setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v8i32, Legal);
+
+ setOperationAction(ISD::STRICT_FP_ROUND, MVT::v4f32, Legal);
+ setOperationAction(ISD::STRICT_FADD, MVT::v8f32, Legal);
+ setOperationAction(ISD::STRICT_FADD, MVT::v4f64, Legal);
+ setOperationAction(ISD::STRICT_FSUB, MVT::v8f32, Legal);
+ setOperationAction(ISD::STRICT_FSUB, MVT::v4f64, Legal);
+ setOperationAction(ISD::STRICT_FMUL, MVT::v8f32, Legal);
+ setOperationAction(ISD::STRICT_FMUL, MVT::v4f64, Legal);
+ setOperationAction(ISD::STRICT_FDIV, MVT::v8f32, Legal);
+ setOperationAction(ISD::STRICT_FDIV, MVT::v4f64, Legal);
+ setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v4f64, Legal);
+ setOperationAction(ISD::STRICT_FSQRT, MVT::v8f32, Legal);
+ setOperationAction(ISD::STRICT_FSQRT, MVT::v4f64, Legal);
if (!Subtarget.hasAVX512())
setOperationAction(ISD::BITCAST, MVT::v32i1, Custom);
@@ -1169,6 +1244,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
setOperationAction(ISD::SETCC, VT, Custom);
+ setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
+ setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
setOperationAction(ISD::CTPOP, VT, Custom);
setOperationAction(ISD::CTLZ, VT, Custom);
@@ -1180,8 +1257,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
if (Subtarget.hasAnyFMA()) {
for (auto VT : { MVT::f32, MVT::f64, MVT::v4f32, MVT::v8f32,
- MVT::v2f64, MVT::v4f64 })
+ MVT::v2f64, MVT::v4f64 }) {
setOperationAction(ISD::FMA, VT, Legal);
+ setOperationAction(ISD::STRICT_FMA, VT, Legal);
+ }
}
for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
@@ -1233,6 +1312,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// The custom lowering for UINT_TO_FP for v8i32 becomes interesting
// when we have a 256bit-wide blend with immediate.
setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Custom);
+ setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v8i32, Custom);
// AVX2 also has wider vector sign/zero extending loads, VPMOV[SZ]X
for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) {
@@ -1299,12 +1379,18 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v1i1, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v1i1, Custom);
- setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i1, MVT::v8i32);
- setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i1, MVT::v8i32);
- setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v4i1, MVT::v4i32);
- setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v4i1, MVT::v4i32);
- setOperationAction(ISD::FP_TO_SINT, MVT::v2i1, Custom);
- setOperationAction(ISD::FP_TO_UINT, MVT::v2i1, Custom);
+ setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i1, MVT::v8i32);
+ setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i1, MVT::v8i32);
+ setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v4i1, MVT::v4i32);
+ setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v4i1, MVT::v4i32);
+ setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v8i1, MVT::v8i32);
+ setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v8i1, MVT::v8i32);
+ setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v4i1, MVT::v4i32);
+ setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v4i1, MVT::v4i32);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v2i1, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v2i1, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i1, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i1, Custom);
// There is no byte sized k-register load or store without AVX512DQ.
if (!Subtarget.hasDQI()) {
@@ -1331,6 +1417,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::SUB, VT, Custom);
setOperationAction(ISD::MUL, VT, Custom);
setOperationAction(ISD::SETCC, VT, Custom);
+ setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
+ setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
setOperationAction(ISD::SELECT, VT, Custom);
setOperationAction(ISD::TRUNCATE, VT, Custom);
setOperationAction(ISD::UADDSAT, VT, Custom);
@@ -1372,21 +1460,37 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FNEG, VT, Custom);
setOperationAction(ISD::FABS, VT, Custom);
setOperationAction(ISD::FMA, VT, Legal);
+ setOperationAction(ISD::STRICT_FMA, VT, Legal);
setOperationAction(ISD::FCOPYSIGN, VT, Custom);
}
- setOperationAction(ISD::FP_TO_SINT, MVT::v16i32, Legal);
- setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v16i16, MVT::v16i32);
- setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v16i8, MVT::v16i32);
- setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v16i1, MVT::v16i32);
- setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Legal);
- setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v16i1, MVT::v16i32);
- setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v16i8, MVT::v16i32);
- setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v16i16, MVT::v16i32);
- setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Legal);
- setOperationAction(ISD::UINT_TO_FP, MVT::v16i32, Legal);
-
- setOperationAction(ISD::STRICT_FP_ROUND, MVT::v16f32, Custom);
+ for (MVT VT : { MVT::v16i1, MVT::v16i8, MVT::v16i16 }) {
+ setOperationPromotedToType(ISD::FP_TO_SINT , VT, MVT::v16i32);
+ setOperationPromotedToType(ISD::FP_TO_UINT , VT, MVT::v16i32);
+ setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, VT, MVT::v16i32);
+ setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, VT, MVT::v16i32);
+ }
+ setOperationAction(ISD::FP_TO_SINT, MVT::v16i32, Legal);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Legal);
+ setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v16i32, Legal);
+ setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v16i32, Legal);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Legal);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v16i32, Legal);
+ setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v16i32, Legal);
+ setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v16i32, Legal);
+
+ setOperationAction(ISD::STRICT_FADD, MVT::v16f32, Legal);
+ setOperationAction(ISD::STRICT_FADD, MVT::v8f64, Legal);
+ setOperationAction(ISD::STRICT_FSUB, MVT::v16f32, Legal);
+ setOperationAction(ISD::STRICT_FSUB, MVT::v8f64, Legal);
+ setOperationAction(ISD::STRICT_FMUL, MVT::v16f32, Legal);
+ setOperationAction(ISD::STRICT_FMUL, MVT::v8f64, Legal);
+ setOperationAction(ISD::STRICT_FDIV, MVT::v16f32, Legal);
+ setOperationAction(ISD::STRICT_FDIV, MVT::v8f64, Legal);
+ setOperationAction(ISD::STRICT_FSQRT, MVT::v16f32, Legal);
+ setOperationAction(ISD::STRICT_FSQRT, MVT::v8f64, Legal);
+ setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v8f64, Legal);
+ setOperationAction(ISD::STRICT_FP_ROUND, MVT::v8f32, Legal);
setTruncStoreAction(MVT::v8i64, MVT::v8i8, Legal);
setTruncStoreAction(MVT::v8i64, MVT::v8i16, Legal);
@@ -1420,11 +1524,16 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::SIGN_EXTEND, MVT::v8i8, Custom);
for (auto VT : { MVT::v16f32, MVT::v8f64 }) {
- setOperationAction(ISD::FFLOOR, VT, Legal);
- setOperationAction(ISD::FCEIL, VT, Legal);
- setOperationAction(ISD::FTRUNC, VT, Legal);
- setOperationAction(ISD::FRINT, VT, Legal);
- setOperationAction(ISD::FNEARBYINT, VT, Legal);
+ setOperationAction(ISD::FFLOOR, VT, Legal);
+ setOperationAction(ISD::STRICT_FFLOOR, VT, Legal);
+ setOperationAction(ISD::FCEIL, VT, Legal);
+ setOperationAction(ISD::STRICT_FCEIL, VT, Legal);
+ setOperationAction(ISD::FTRUNC, VT, Legal);
+ setOperationAction(ISD::STRICT_FTRUNC, VT, Legal);
+ setOperationAction(ISD::FRINT, VT, Legal);
+ setOperationAction(ISD::STRICT_FRINT, VT, Legal);
+ setOperationAction(ISD::FNEARBYINT, VT, Legal);
+ setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal);
setOperationAction(ISD::SELECT, VT, Custom);
}
@@ -1459,6 +1568,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::ROTL, VT, Custom);
setOperationAction(ISD::ROTR, VT, Custom);
setOperationAction(ISD::SETCC, VT, Custom);
+ setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
+ setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
setOperationAction(ISD::SELECT, VT, Custom);
// The condition codes aren't legal in SSE/AVX and under AVX512 we use
@@ -1470,8 +1581,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
if (Subtarget.hasDQI()) {
setOperationAction(ISD::SINT_TO_FP, MVT::v8i64, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::v8i64, Legal);
+ setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v8i64, Legal);
+ setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v8i64, Legal);
setOperationAction(ISD::FP_TO_SINT, MVT::v8i64, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::v8i64, Legal);
+ setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v8i64, Legal);
+ setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v8i64, Legal);
setOperationAction(ISD::MUL, MVT::v8i64, Legal);
}
@@ -1532,13 +1647,25 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) {
// These operations are handled on non-VLX by artificially widening in
// isel patterns.
- // TODO: Custom widen in lowering on non-VLX and drop the isel patterns?
- setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal);
- setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v8i32,
+ Subtarget.hasVLX() ? Legal : Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v4i32,
+ Subtarget.hasVLX() ? Legal : Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::v2i32, Custom);
- setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Legal);
- setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
+ setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v8i32,
+ Subtarget.hasVLX() ? Legal : Custom);
+ setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32,
+ Subtarget.hasVLX() ? Legal : Custom);
+ setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i32, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v8i32,
+ Subtarget.hasVLX() ? Legal : Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v4i32,
+ Subtarget.hasVLX() ? Legal : Custom);
+ setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v8i32,
+ Subtarget.hasVLX() ? Legal : Custom);
+ setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32,
+ Subtarget.hasVLX() ? Legal : Custom);
for (auto VT : { MVT::v2i64, MVT::v4i64 }) {
setOperationAction(ISD::SMAX, VT, Legal);
@@ -1563,12 +1690,23 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
if (Subtarget.hasDQI()) {
for (auto VT : { MVT::v2i64, MVT::v4i64 }) {
- setOperationAction(ISD::SINT_TO_FP, VT, Legal);
- setOperationAction(ISD::UINT_TO_FP, VT, Legal);
- setOperationAction(ISD::FP_TO_SINT, VT, Legal);
- setOperationAction(ISD::FP_TO_UINT, VT, Legal);
-
- setOperationAction(ISD::MUL, VT, Legal);
+ setOperationAction(ISD::SINT_TO_FP, VT,
+ Subtarget.hasVLX() ? Legal : Custom);
+ setOperationAction(ISD::UINT_TO_FP, VT,
+ Subtarget.hasVLX() ? Legal : Custom);
+ setOperationAction(ISD::STRICT_SINT_TO_FP, VT,
+ Subtarget.hasVLX() ? Legal : Custom);
+ setOperationAction(ISD::STRICT_UINT_TO_FP, VT,
+ Subtarget.hasVLX() ? Legal : Custom);
+ setOperationAction(ISD::FP_TO_SINT, VT,
+ Subtarget.hasVLX() ? Legal : Custom);
+ setOperationAction(ISD::FP_TO_UINT, VT,
+ Subtarget.hasVLX() ? Legal : Custom);
+ setOperationAction(ISD::STRICT_FP_TO_SINT, VT,
+ Subtarget.hasVLX() ? Legal : Custom);
+ setOperationAction(ISD::STRICT_FP_TO_UINT, VT,
+ Subtarget.hasVLX() ? Legal : Custom);
+ setOperationAction(ISD::MUL, VT, Legal);
}
}
@@ -1739,12 +1877,14 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
if (Subtarget.hasDQI()) {
// Fast v2f32 SINT_TO_FP( v2i64 ) custom conversion.
// v2f32 UINT_TO_FP is already custom under SSE2.
- setOperationAction(ISD::SINT_TO_FP, MVT::v2f32, Custom);
assert(isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) &&
+ isOperationCustom(ISD::STRICT_UINT_TO_FP, MVT::v2f32) &&
"Unexpected operation action!");
// v2i64 FP_TO_S/UINT(v2f32) custom conversion.
- setOperationAction(ISD::FP_TO_SINT, MVT::v2f32, Custom);
- setOperationAction(ISD::FP_TO_UINT, MVT::v2f32, Custom);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v2f32, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v2f32, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2f32, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2f32, Custom);
}
if (Subtarget.hasBWI()) {
@@ -1828,8 +1968,15 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
if (Subtarget.is32Bit() &&
(Subtarget.isTargetWindowsMSVC() || Subtarget.isTargetWindowsItanium()))
for (ISD::NodeType Op :
- {ISD::FCEIL, ISD::FCOS, ISD::FEXP, ISD::FFLOOR, ISD::FREM, ISD::FLOG,
- ISD::FLOG10, ISD::FPOW, ISD::FSIN})
+ {ISD::FCEIL, ISD::STRICT_FCEIL,
+ ISD::FCOS, ISD::STRICT_FCOS,
+ ISD::FEXP, ISD::STRICT_FEXP,
+ ISD::FFLOOR, ISD::STRICT_FFLOOR,
+ ISD::FREM, ISD::STRICT_FREM,
+ ISD::FLOG, ISD::STRICT_FLOG,
+ ISD::FLOG10, ISD::STRICT_FLOG10,
+ ISD::FPOW, ISD::STRICT_FPOW,
+ ISD::FSIN, ISD::STRICT_FSIN})
if (isOperationExpand(Op, MVT::f32))
setOperationAction(Op, MVT::f32, Promote);
@@ -1870,6 +2017,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setTargetDAGCombine(ISD::ZERO_EXTEND_VECTOR_INREG);
setTargetDAGCombine(ISD::SINT_TO_FP);
setTargetDAGCombine(ISD::UINT_TO_FP);
+ setTargetDAGCombine(ISD::STRICT_SINT_TO_FP);
+ setTargetDAGCombine(ISD::STRICT_UINT_TO_FP);
setTargetDAGCombine(ISD::SETCC);
setTargetDAGCombine(ISD::MUL);
setTargetDAGCombine(ISD::XOR);
@@ -1901,6 +2050,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setPrefFunctionAlignment(Align(16));
verifyIntrinsicTables();
+
+ // Default to having -disable-strictnode-mutation on
+ IsStrictFPEnabled = true;
}
// This has so far only been implemented for 64-bit MachO.
@@ -1910,7 +2062,7 @@ bool X86TargetLowering::useLoadStackGuardNode() const {
bool X86TargetLowering::useStackGuardXorFP() const {
// Currently only MSVC CRTs XOR the frame pointer into the stack guard value.
- return Subtarget.getTargetTriple().isOSMSVCRT();
+ return Subtarget.getTargetTriple().isOSMSVCRT() && !Subtarget.isTargetMachO();
}
SDValue X86TargetLowering::emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val,
@@ -1946,9 +2098,13 @@ MVT X86TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
(VT.getVectorNumElements() > 16 && !Subtarget.hasBWI()) ||
(VT.getVectorNumElements() > 64 && Subtarget.hasBWI())))
return MVT::i8;
+ // Split v64i1 vectors if we don't have v64i8 available.
+ if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() &&
+ CC != CallingConv::X86_RegCall)
+ return MVT::v32i1;
// FIXME: Should we just make these types legal and custom split operations?
- if ((VT == MVT::v32i16 || VT == MVT::v64i8) &&
- Subtarget.hasAVX512() && !Subtarget.hasBWI() && !EnableOldKNLABI)
+ if ((VT == MVT::v32i16 || VT == MVT::v64i8) && !EnableOldKNLABI &&
+ Subtarget.useAVX512Regs() && !Subtarget.hasBWI())
return MVT::v16i32;
return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
}
@@ -1966,9 +2122,13 @@ unsigned X86TargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
(VT.getVectorNumElements() > 16 && !Subtarget.hasBWI()) ||
(VT.getVectorNumElements() > 64 && Subtarget.hasBWI())))
return VT.getVectorNumElements();
+ // Split v64i1 vectors if we don't have v64i8 available.
+ if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() &&
+ CC != CallingConv::X86_RegCall)
+ return 2;
// FIXME: Should we just make these types legal and custom split operations?
- if ((VT == MVT::v32i16 || VT == MVT::v64i8) &&
- Subtarget.hasAVX512() && !Subtarget.hasBWI() && !EnableOldKNLABI)
+ if ((VT == MVT::v32i16 || VT == MVT::v64i8) && !EnableOldKNLABI &&
+ Subtarget.useAVX512Regs() && !Subtarget.hasBWI())
return 1;
return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
}
@@ -1988,6 +2148,15 @@ unsigned X86TargetLowering::getVectorTypeBreakdownForCallingConv(
return NumIntermediates;
}
+ // Split v64i1 vectors if we don't have v64i8 available.
+ if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() &&
+ CC != CallingConv::X86_RegCall) {
+ RegisterVT = MVT::v32i1;
+ IntermediateVT = MVT::v32i1;
+ NumIntermediates = 2;
+ return 2;
+ }
+
return TargetLowering::getVectorTypeBreakdownForCallingConv(Context, CC, VT, IntermediateVT,
NumIntermediates, RegisterVT);
}
@@ -2383,6 +2552,10 @@ bool X86TargetLowering::isNoopAddrSpaceCast(unsigned SrcAS,
unsigned DestAS) const {
assert(SrcAS != DestAS && "Expected different address spaces!");
+ const TargetMachine &TM = getTargetMachine();
+ if (TM.getPointerSize(SrcAS) != TM.getPointerSize(DestAS))
+ return false;
+
return SrcAS < 256 && DestAS < 256;
}
@@ -2520,18 +2693,16 @@ X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
assert(VA.getLocInfo() != CCValAssign::FPExt &&
"Unexpected FP-extend for return value.");
- // If this is x86-64, and we disabled SSE, we can't return FP values,
- // or SSE or MMX vectors.
- if ((ValVT == MVT::f32 || ValVT == MVT::f64 ||
- VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) &&
- (Subtarget.is64Bit() && !Subtarget.hasSSE1())) {
+ // Report an error if we have attempted to return a value via an XMM
+ // register and SSE was disabled.
+ if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(VA.getLocReg())) {
errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
- } else if (ValVT == MVT::f64 &&
- (Subtarget.is64Bit() && !Subtarget.hasSSE2())) {
- // Likewise we can't return F64 values with SSE1 only. gcc does so, but
- // llvm-gcc has never done it right and no one has noticed, so this
- // should be OK for now.
+ } else if (!Subtarget.hasSSE2() &&
+ X86::FR64XRegClass.contains(VA.getLocReg()) &&
+ ValVT == MVT::f64) {
+ // When returning a double via an XMM register, report an error if SSE2 is
+ // not enabled.
errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled");
VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
}
@@ -2826,7 +2997,6 @@ SDValue X86TargetLowering::LowerCallResult(
const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
// Assign locations to each value returned by this call.
SmallVector<CCValAssign, 16> RVLocs;
- bool Is64Bit = Subtarget.is64Bit();
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
*DAG.getContext());
CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
@@ -2845,15 +3015,22 @@ SDValue X86TargetLowering::LowerCallResult(
RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32));
}
- // If this is x86-64, and we disabled SSE, we can't return FP values
- if ((CopyVT == MVT::f32 || CopyVT == MVT::f64 || CopyVT == MVT::f128) &&
- ((Is64Bit || Ins[InsIndex].Flags.isInReg()) && !Subtarget.hasSSE1())) {
+ // Report an error if there was an attempt to return FP values via XMM
+ // registers.
+ if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(VA.getLocReg())) {
errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
- VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
- } else if (CopyVT == MVT::f64 &&
- (Is64Bit && !Subtarget.hasSSE2())) {
+ if (VA.getLocReg() == X86::XMM1)
+ VA.convertToReg(X86::FP1); // Set reg to FP1, avoid hitting asserts.
+ else
+ VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
+ } else if (!Subtarget.hasSSE2() &&
+ X86::FR64XRegClass.contains(VA.getLocReg()) &&
+ CopyVT == MVT::f64) {
errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled");
- VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
+ if (VA.getLocReg() == X86::XMM1)
+ VA.convertToReg(X86::FP1); // Set reg to FP1, avoid hitting asserts.
+ else
+ VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
}
// If we prefer to use the value in xmm registers, copy it out as f80 and
@@ -2895,6 +3072,9 @@ SDValue X86TargetLowering::LowerCallResult(
Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
}
+ if (VA.getLocInfo() == CCValAssign::BCvt)
+ Val = DAG.getBitcast(VA.getValVT(), Val);
+
InVals.push_back(Val);
}
@@ -2993,9 +3173,7 @@ static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt) {
}
bool X86TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
- auto Attr =
- CI->getParent()->getParent()->getFnAttribute("disable-tail-calls");
- if (!CI->isTailCall() || Attr.getValueAsString() == "true")
+ if (!CI->isTailCall())
return false;
ImmutableCallSite CS(CI);
@@ -3464,8 +3642,8 @@ SDValue X86TargetLowering::LowerFormalArguments(
FuncInfo->getForwardedMustTailRegParms();
CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, CC_X86);
- // Conservatively forward AL on x86_64, since it might be used for varargs.
- if (Is64Bit && !CCInfo.isAllocated(X86::AL)) {
+ // Forward AL for SysV x86_64 targets, since it is used for varargs.
+ if (Is64Bit && !IsWin64 && !CCInfo.isAllocated(X86::AL)) {
unsigned ALVReg = MF.addLiveIn(X86::AL, &X86::GR8RegClass);
Forwards.push_back(ForwardedRegister(ALVReg, X86::AL, MVT::i8));
}
@@ -3618,7 +3796,6 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
bool IsGuaranteeTCO = MF.getTarget().Options.GuaranteedTailCallOpt ||
CallConv == CallingConv::Tail;
X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
- auto Attr = MF.getFunction().getFnAttribute("disable-tail-calls");
const auto *CI = dyn_cast_or_null<CallInst>(CLI.CS.getInstruction());
const Function *Fn = CI ? CI->getCalledFunction() : nullptr;
bool HasNCSR = (CI && CI->hasFnAttr("no_caller_saved_registers")) ||
@@ -3634,9 +3811,6 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
if (CallConv == CallingConv::X86_INTR)
report_fatal_error("X86 interrupts may not be called directly");
- if (Attr.getValueAsString() == "true")
- isTailCall = false;
-
if (Subtarget.isPICStyleGOT() && !IsGuaranteeTCO) {
// If we are using a GOT, disable tail calls to external symbols with
// default visibility. Tail calling such a symbol requires using a GOT
@@ -3728,7 +3902,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
"the only memory argument");
}
- if (!IsSibcall)
+ if (!IsSibcall && !IsMustTail)
Chain = DAG.getCALLSEQ_START(Chain, NumBytesToPush,
NumBytes - NumBytesToPush, dl);
@@ -4013,7 +4187,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
SmallVector<SDValue, 8> Ops;
- if (!IsSibcall && isTailCall) {
+ if (!IsSibcall && isTailCall && !IsMustTail) {
Chain = DAG.getCALLSEQ_END(Chain,
DAG.getIntPtrConstant(NumBytesToPop, dl, true),
DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
@@ -4183,23 +4357,13 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
/// Make the stack size align e.g 16n + 12 aligned for a 16-byte align
/// requirement.
unsigned
-X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize,
- SelectionDAG& DAG) const {
- const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
- const TargetFrameLowering &TFI = *Subtarget.getFrameLowering();
- unsigned StackAlignment = TFI.getStackAlignment();
- uint64_t AlignMask = StackAlignment - 1;
- int64_t Offset = StackSize;
- unsigned SlotSize = RegInfo->getSlotSize();
- if ( (Offset & AlignMask) <= (StackAlignment - SlotSize) ) {
- // Number smaller than 12 so just add the difference.
- Offset += ((StackAlignment - SlotSize) - (Offset & AlignMask));
- } else {
- // Mask out lower bits, add stackalignment once plus the 12 bytes.
- Offset = ((~AlignMask) & Offset) + StackAlignment +
- (StackAlignment-SlotSize);
- }
- return Offset;
+X86TargetLowering::GetAlignedArgumentStackSize(const unsigned StackSize,
+ SelectionDAG &DAG) const {
+ const Align StackAlignment(Subtarget.getFrameLowering()->getStackAlignment());
+ const uint64_t SlotSize = Subtarget.getRegisterInfo()->getSlotSize();
+ assert(StackSize % SlotSize == 0 &&
+ "StackSize must be a multiple of SlotSize");
+ return alignTo(StackSize + SlotSize, StackAlignment) - SlotSize;
}
/// Return true if the given stack call argument is already available in the
@@ -4643,8 +4807,8 @@ bool X86::isCalleePop(CallingConv::ID CallingConv,
}
}
-/// Return true if the condition is an unsigned comparison operation.
-static bool isX86CCUnsigned(unsigned X86CC) {
+/// Return true if the condition is an signed comparison operation.
+static bool isX86CCSigned(unsigned X86CC) {
switch (X86CC) {
default:
llvm_unreachable("Invalid integer condition!");
@@ -4654,12 +4818,12 @@ static bool isX86CCUnsigned(unsigned X86CC) {
case X86::COND_A:
case X86::COND_BE:
case X86::COND_AE:
- return true;
+ return false;
case X86::COND_G:
case X86::COND_GE:
case X86::COND_L:
case X86::COND_LE:
- return false;
+ return true;
}
}
@@ -4700,7 +4864,7 @@ static X86::CondCode TranslateX86CC(ISD::CondCode SetCCOpcode, const SDLoc &DL,
// X >= 0 -> X == 0, jump on !sign.
return X86::COND_NS;
}
- if (SetCCOpcode == ISD::SETLT && RHSC->getAPIntValue() == 1) {
+ if (SetCCOpcode == ISD::SETLT && RHSC->isOne()) {
// X < 1 -> X <= 0
RHS = DAG.getConstant(0, DL, RHS.getValueType());
return X86::COND_LE;
@@ -4949,12 +5113,6 @@ bool X86TargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
(1 - MulC).isPowerOf2() || (-(MulC + 1)).isPowerOf2();
}
-bool X86TargetLowering::shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT,
- bool IsSigned) const {
- // f80 UINT_TO_FP is more efficient using Strict code if FCMOV is available.
- return !IsSigned && FpVT == MVT::f80 && Subtarget.hasCMov();
-}
-
bool X86TargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
unsigned Index) const {
if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT))
@@ -5334,15 +5492,18 @@ static bool canWidenShuffleElements(ArrayRef<int> Mask,
static bool canWidenShuffleElements(ArrayRef<int> Mask,
const APInt &Zeroable,
+ bool V2IsZero,
SmallVectorImpl<int> &WidenedMask) {
- SmallVector<int, 32> TargetMask(Mask.begin(), Mask.end());
- for (int i = 0, Size = TargetMask.size(); i < Size; ++i) {
- if (TargetMask[i] == SM_SentinelUndef)
- continue;
- if (Zeroable[i])
- TargetMask[i] = SM_SentinelZero;
+ // Create an alternative mask with info about zeroable elements.
+ // Here we do not set undef elements as zeroable.
+ SmallVector<int, 64> ZeroableMask(Mask.begin(), Mask.end());
+ if (V2IsZero) {
+ assert(!Zeroable.isNullValue() && "V2's non-undef elements are used?!");
+ for (int i = 0, Size = Mask.size(); i != Size; ++i)
+ if (Mask[i] != SM_SentinelUndef && Zeroable[i])
+ ZeroableMask[i] = SM_SentinelZero;
}
- return canWidenShuffleElements(TargetMask, WidenedMask);
+ return canWidenShuffleElements(ZeroableMask, WidenedMask);
}
static bool canWidenShuffleElements(ArrayRef<int> Mask) {
@@ -5764,11 +5925,29 @@ static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG,
// Widen the vector if needed.
Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec, ZeroIdx);
- // Clear the upper bits of the subvector and move it to its insert position.
unsigned ShiftLeft = NumElems - SubVecNumElems;
+ unsigned ShiftRight = NumElems - SubVecNumElems - IdxVal;
+
+ // Do an optimization for the the most frequently used types.
+ if (WideOpVT != MVT::v64i1 || Subtarget.is64Bit()) {
+ APInt Mask0 = APInt::getBitsSet(NumElems, IdxVal, IdxVal + SubVecNumElems);
+ Mask0.flipAllBits();
+ SDValue CMask0 = DAG.getConstant(Mask0, dl, MVT::getIntegerVT(NumElems));
+ SDValue VMask0 = DAG.getNode(ISD::BITCAST, dl, WideOpVT, CMask0);
+ Vec = DAG.getNode(ISD::AND, dl, WideOpVT, Vec, VMask0);
+ SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec,
+ DAG.getTargetConstant(ShiftLeft, dl, MVT::i8));
+ SubVec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, SubVec,
+ DAG.getTargetConstant(ShiftRight, dl, MVT::i8));
+ Op = DAG.getNode(ISD::OR, dl, WideOpVT, Vec, SubVec);
+
+ // Reduce to original width if needed.
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx);
+ }
+
+ // Clear the upper bits of the subvector and move it to its insert position.
SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec,
DAG.getTargetConstant(ShiftLeft, dl, MVT::i8));
- unsigned ShiftRight = NumElems - SubVecNumElems - IdxVal;
SubVec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, SubVec,
DAG.getTargetConstant(ShiftRight, dl, MVT::i8));
@@ -5850,7 +6029,7 @@ static SDValue getExtendInVec(unsigned Opcode, const SDLoc &DL, EVT VT,
"Expected VTs to be the same size!");
unsigned Scale = VT.getScalarSizeInBits() / InVT.getScalarSizeInBits();
In = extractSubVector(In, 0, DAG, DL,
- std::max(128U, VT.getSizeInBits() / Scale));
+ std::max(128U, (unsigned)VT.getSizeInBits() / Scale));
InVT = In.getValueType();
}
@@ -6719,9 +6898,97 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
return true;
}
+/// Compute whether each element of a shuffle is zeroable.
+///
+/// A "zeroable" vector shuffle element is one which can be lowered to zero.
+/// Either it is an undef element in the shuffle mask, the element of the input
+/// referenced is undef, or the element of the input referenced is known to be
+/// zero. Many x86 shuffles can zero lanes cheaply and we often want to handle
+/// as many lanes with this technique as possible to simplify the remaining
+/// shuffle.
+static void computeZeroableShuffleElements(ArrayRef<int> Mask,
+ SDValue V1, SDValue V2,
+ APInt &KnownUndef, APInt &KnownZero) {
+ int Size = Mask.size();
+ KnownUndef = KnownZero = APInt::getNullValue(Size);
+
+ V1 = peekThroughBitcasts(V1);
+ V2 = peekThroughBitcasts(V2);
+
+ bool V1IsZero = ISD::isBuildVectorAllZeros(V1.getNode());
+ bool V2IsZero = ISD::isBuildVectorAllZeros(V2.getNode());
+
+ int VectorSizeInBits = V1.getValueSizeInBits();
+ int ScalarSizeInBits = VectorSizeInBits / Size;
+ assert(!(VectorSizeInBits % ScalarSizeInBits) && "Illegal shuffle mask size");
+
+ for (int i = 0; i < Size; ++i) {
+ int M = Mask[i];
+ // Handle the easy cases.
+ if (M < 0) {
+ KnownUndef.setBit(i);
+ continue;
+ }
+ if ((M >= 0 && M < Size && V1IsZero) || (M >= Size && V2IsZero)) {
+ KnownZero.setBit(i);
+ continue;
+ }
+
+ // Determine shuffle input and normalize the mask.
+ SDValue V = M < Size ? V1 : V2;
+ M %= Size;
+
+ // Currently we can only search BUILD_VECTOR for UNDEF/ZERO elements.
+ if (V.getOpcode() != ISD::BUILD_VECTOR)
+ continue;
+
+ // If the BUILD_VECTOR has fewer elements then the bitcasted portion of
+ // the (larger) source element must be UNDEF/ZERO.
+ if ((Size % V.getNumOperands()) == 0) {
+ int Scale = Size / V->getNumOperands();
+ SDValue Op = V.getOperand(M / Scale);
+ if (Op.isUndef())
+ KnownUndef.setBit(i);
+ if (X86::isZeroNode(Op))
+ KnownZero.setBit(i);
+ else if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
+ APInt Val = Cst->getAPIntValue();
+ Val = Val.extractBits(ScalarSizeInBits, (M % Scale) * ScalarSizeInBits);
+ if (Val == 0)
+ KnownZero.setBit(i);
+ } else if (ConstantFPSDNode *Cst = dyn_cast<ConstantFPSDNode>(Op)) {
+ APInt Val = Cst->getValueAPF().bitcastToAPInt();
+ Val = Val.extractBits(ScalarSizeInBits, (M % Scale) * ScalarSizeInBits);
+ if (Val == 0)
+ KnownZero.setBit(i);
+ }
+ continue;
+ }
+
+ // If the BUILD_VECTOR has more elements then all the (smaller) source
+ // elements must be UNDEF or ZERO.
+ if ((V.getNumOperands() % Size) == 0) {
+ int Scale = V->getNumOperands() / Size;
+ bool AllUndef = true;
+ bool AllZero = true;
+ for (int j = 0; j < Scale; ++j) {
+ SDValue Op = V.getOperand((M * Scale) + j);
+ AllUndef &= Op.isUndef();
+ AllZero &= X86::isZeroNode(Op);
+ }
+ if (AllUndef)
+ KnownUndef.setBit(i);
+ if (AllZero)
+ KnownZero.setBit(i);
+ continue;
+ }
+ }
+}
+
/// Decode a target shuffle mask and inputs and see if any values are
/// known to be undef or zero from their inputs.
/// Returns true if the target shuffle mask was decoded.
+/// FIXME: Merge this with computeZeroableShuffleElements?
static bool getTargetShuffleAndZeroables(SDValue N, SmallVectorImpl<int> &Mask,
SmallVectorImpl<SDValue> &Ops,
APInt &KnownUndef, APInt &KnownZero) {
@@ -6741,7 +7008,7 @@ static bool getTargetShuffleAndZeroables(SDValue N, SmallVectorImpl<int> &Mask,
V1 = peekThroughBitcasts(V1);
V2 = peekThroughBitcasts(V2);
- assert((VT.getSizeInBits() % Mask.size()) == 0 &&
+ assert((VT.getSizeInBits() % Size) == 0 &&
"Illegal split of shuffle value type");
unsigned EltSizeInBits = VT.getSizeInBits() / Size;
@@ -6810,7 +7077,8 @@ static bool getTargetShuffleAndZeroables(SDValue N, SmallVectorImpl<int> &Mask,
// Replace target shuffle mask elements with known undef/zero sentinels.
static void resolveTargetShuffleFromZeroables(SmallVectorImpl<int> &Mask,
const APInt &KnownUndef,
- const APInt &KnownZero) {
+ const APInt &KnownZero,
+ bool ResolveKnownZeros= true) {
unsigned NumElts = Mask.size();
assert(KnownUndef.getBitWidth() == NumElts &&
KnownZero.getBitWidth() == NumElts && "Shuffle mask size mismatch");
@@ -6818,7 +7086,7 @@ static void resolveTargetShuffleFromZeroables(SmallVectorImpl<int> &Mask,
for (unsigned i = 0; i != NumElts; ++i) {
if (KnownUndef[i])
Mask[i] = SM_SentinelUndef;
- else if (KnownZero[i])
+ else if (ResolveKnownZeros && KnownZero[i])
Mask[i] = SM_SentinelZero;
}
}
@@ -8306,7 +8574,7 @@ static SDValue lowerBuildVectorAsBroadcast(BuildVectorSDNode *BVOp,
// TODO: If multiple splats are generated to load the same constant,
// it may be detrimental to overall size. There needs to be a way to detect
// that condition to know if this is truly a size win.
- bool OptForSize = DAG.getMachineFunction().getFunction().hasOptSize();
+ bool OptForSize = DAG.shouldOptForSize();
// Handle broadcasting a single constant scalar from the constant pool
// into a vector.
@@ -8552,7 +8820,7 @@ static SDValue LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG,
ImmH = DAG.getBitcast(MVT::v32i1, ImmH);
DstVec = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v64i1, ImmL, ImmH);
} else {
- MVT ImmVT = MVT::getIntegerVT(std::max(VT.getSizeInBits(), 8U));
+ MVT ImmVT = MVT::getIntegerVT(std::max((unsigned)VT.getSizeInBits(), 8U));
SDValue Imm = DAG.getConstant(Immediate, dl, ImmVT);
MVT VecVT = VT.getSizeInBits() >= 8 ? VT : MVT::v8i1;
DstVec = DAG.getBitcast(VecVT, Imm);
@@ -10130,13 +10398,18 @@ static bool isNoopShuffleMask(ArrayRef<int> Mask) {
return true;
}
-/// Test whether there are elements crossing 128-bit lanes in this
+/// Test whether there are elements crossing LaneSizeInBits lanes in this
/// shuffle mask.
///
/// X86 divides up its shuffles into in-lane and cross-lane shuffle operations
/// and we routinely test for these.
-static bool is128BitLaneCrossingShuffleMask(MVT VT, ArrayRef<int> Mask) {
- int LaneSize = 128 / VT.getScalarSizeInBits();
+static bool isLaneCrossingShuffleMask(unsigned LaneSizeInBits,
+ unsigned ScalarSizeInBits,
+ ArrayRef<int> Mask) {
+ assert(LaneSizeInBits && ScalarSizeInBits &&
+ (LaneSizeInBits % ScalarSizeInBits) == 0 &&
+ "Illegal shuffle lane size");
+ int LaneSize = LaneSizeInBits / ScalarSizeInBits;
int Size = Mask.size();
for (int i = 0; i < Size; ++i)
if (Mask[i] >= 0 && (Mask[i] % Size) / LaneSize != i / LaneSize)
@@ -10144,6 +10417,12 @@ static bool is128BitLaneCrossingShuffleMask(MVT VT, ArrayRef<int> Mask) {
return false;
}
+/// Test whether there are elements crossing 128-bit lanes in this
+/// shuffle mask.
+static bool is128BitLaneCrossingShuffleMask(MVT VT, ArrayRef<int> Mask) {
+ return isLaneCrossingShuffleMask(128, VT.getScalarSizeInBits(), Mask);
+}
+
/// Test whether a shuffle mask is equivalent within each sub-lane.
///
/// This checks a shuffle mask to see if it is performing the same
@@ -10424,84 +10703,6 @@ static SDValue getV4X86ShuffleImm8ForMask(ArrayRef<int> Mask, const SDLoc &DL,
return DAG.getTargetConstant(getV4X86ShuffleImm(Mask), DL, MVT::i8);
}
-/// Compute whether each element of a shuffle is zeroable.
-///
-/// A "zeroable" vector shuffle element is one which can be lowered to zero.
-/// Either it is an undef element in the shuffle mask, the element of the input
-/// referenced is undef, or the element of the input referenced is known to be
-/// zero. Many x86 shuffles can zero lanes cheaply and we often want to handle
-/// as many lanes with this technique as possible to simplify the remaining
-/// shuffle.
-static APInt computeZeroableShuffleElements(ArrayRef<int> Mask,
- SDValue V1, SDValue V2) {
- APInt Zeroable(Mask.size(), 0);
- V1 = peekThroughBitcasts(V1);
- V2 = peekThroughBitcasts(V2);
-
- bool V1IsZero = ISD::isBuildVectorAllZeros(V1.getNode());
- bool V2IsZero = ISD::isBuildVectorAllZeros(V2.getNode());
-
- int VectorSizeInBits = V1.getValueSizeInBits();
- int ScalarSizeInBits = VectorSizeInBits / Mask.size();
- assert(!(VectorSizeInBits % ScalarSizeInBits) && "Illegal shuffle mask size");
-
- for (int i = 0, Size = Mask.size(); i < Size; ++i) {
- int M = Mask[i];
- // Handle the easy cases.
- if (M < 0 || (M >= 0 && M < Size && V1IsZero) || (M >= Size && V2IsZero)) {
- Zeroable.setBit(i);
- continue;
- }
-
- // Determine shuffle input and normalize the mask.
- SDValue V = M < Size ? V1 : V2;
- M %= Size;
-
- // Currently we can only search BUILD_VECTOR for UNDEF/ZERO elements.
- if (V.getOpcode() != ISD::BUILD_VECTOR)
- continue;
-
- // If the BUILD_VECTOR has fewer elements then the bitcasted portion of
- // the (larger) source element must be UNDEF/ZERO.
- if ((Size % V.getNumOperands()) == 0) {
- int Scale = Size / V->getNumOperands();
- SDValue Op = V.getOperand(M / Scale);
- if (Op.isUndef() || X86::isZeroNode(Op))
- Zeroable.setBit(i);
- else if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
- APInt Val = Cst->getAPIntValue();
- Val.lshrInPlace((M % Scale) * ScalarSizeInBits);
- Val = Val.getLoBits(ScalarSizeInBits);
- if (Val == 0)
- Zeroable.setBit(i);
- } else if (ConstantFPSDNode *Cst = dyn_cast<ConstantFPSDNode>(Op)) {
- APInt Val = Cst->getValueAPF().bitcastToAPInt();
- Val.lshrInPlace((M % Scale) * ScalarSizeInBits);
- Val = Val.getLoBits(ScalarSizeInBits);
- if (Val == 0)
- Zeroable.setBit(i);
- }
- continue;
- }
-
- // If the BUILD_VECTOR has more elements then all the (smaller) source
- // elements must be UNDEF or ZERO.
- if ((V.getNumOperands() % Size) == 0) {
- int Scale = V->getNumOperands() / Size;
- bool AllZeroable = true;
- for (int j = 0; j < Scale; ++j) {
- SDValue Op = V.getOperand((M * Scale) + j);
- AllZeroable &= (Op.isUndef() || X86::isZeroNode(Op));
- }
- if (AllZeroable)
- Zeroable.setBit(i);
- continue;
- }
- }
-
- return Zeroable;
-}
-
// The Shuffle result is as follow:
// 0*a[0]0*a[1]...0*a[n] , n >=0 where a[] elements in a ascending order.
// Each Zeroable's element correspond to a particular Mask's element.
@@ -10616,11 +10817,11 @@ static SDValue lowerShuffleToEXPAND(const SDLoc &DL, MVT VT,
return DAG.getNode(X86ISD::EXPAND, DL, VT, ExpandedVector, ZeroVector, VMask);
}
-static bool matchVectorShuffleWithUNPCK(MVT VT, SDValue &V1, SDValue &V2,
- unsigned &UnpackOpcode, bool IsUnary,
- ArrayRef<int> TargetMask,
- const SDLoc &DL, SelectionDAG &DAG,
- const X86Subtarget &Subtarget) {
+static bool matchShuffleWithUNPCK(MVT VT, SDValue &V1, SDValue &V2,
+ unsigned &UnpackOpcode, bool IsUnary,
+ ArrayRef<int> TargetMask, const SDLoc &DL,
+ SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
int NumElts = VT.getVectorNumElements();
bool Undef1 = true, Undef2 = true, Zero1 = true, Zero2 = true;
@@ -10728,8 +10929,8 @@ static SDValue lowerShuffleWithUNPCK(const SDLoc &DL, MVT VT,
return SDValue();
}
-static bool matchVectorShuffleAsVPMOV(ArrayRef<int> Mask, bool SwappedOps,
- int Delta) {
+static bool matchShuffleAsVPMOV(ArrayRef<int> Mask, bool SwappedOps,
+ int Delta) {
int Size = (int)Mask.size();
int Split = Size / Delta;
int TruncatedVectorStart = SwappedOps ? Size : 0;
@@ -10814,8 +11015,8 @@ static SDValue lowerShuffleWithVPMOV(const SDLoc &DL, ArrayRef<int> Mask,
// The first half/quarter of the mask should refer to every second/fourth
// element of the vector truncated and bitcasted.
- if (!matchVectorShuffleAsVPMOV(Mask, SwappedOps, 2) &&
- !matchVectorShuffleAsVPMOV(Mask, SwappedOps, 4))
+ if (!matchShuffleAsVPMOV(Mask, SwappedOps, 2) &&
+ !matchShuffleAsVPMOV(Mask, SwappedOps, 4))
return SDValue();
return DAG.getNode(X86ISD::VTRUNC, DL, VT, Src);
@@ -10823,11 +11024,10 @@ static SDValue lowerShuffleWithVPMOV(const SDLoc &DL, ArrayRef<int> Mask,
// X86 has dedicated pack instructions that can handle specific truncation
// operations: PACKSS and PACKUS.
-static bool matchVectorShuffleWithPACK(MVT VT, MVT &SrcVT, SDValue &V1,
- SDValue &V2, unsigned &PackOpcode,
- ArrayRef<int> TargetMask,
- SelectionDAG &DAG,
- const X86Subtarget &Subtarget) {
+static bool matchShuffleWithPACK(MVT VT, MVT &SrcVT, SDValue &V1, SDValue &V2,
+ unsigned &PackOpcode, ArrayRef<int> TargetMask,
+ SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
unsigned NumElts = VT.getVectorNumElements();
unsigned BitSize = VT.getScalarSizeInBits();
MVT PackSVT = MVT::getIntegerVT(BitSize * 2);
@@ -10880,8 +11080,8 @@ static SDValue lowerShuffleWithPACK(const SDLoc &DL, MVT VT, ArrayRef<int> Mask,
const X86Subtarget &Subtarget) {
MVT PackVT;
unsigned PackOpcode;
- if (matchVectorShuffleWithPACK(VT, PackVT, V1, V2, PackOpcode, Mask, DAG,
- Subtarget))
+ if (matchShuffleWithPACK(VT, PackVT, V1, V2, PackOpcode, Mask, DAG,
+ Subtarget))
return DAG.getNode(PackOpcode, DL, VT, DAG.getBitcast(PackVT, V1),
DAG.getBitcast(PackVT, V2));
@@ -10972,10 +11172,10 @@ static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask,
const X86Subtarget &Subtarget,
SelectionDAG &DAG);
-static bool matchVectorShuffleAsBlend(SDValue V1, SDValue V2,
- MutableArrayRef<int> Mask,
- const APInt &Zeroable, bool &ForceV1Zero,
- bool &ForceV2Zero, uint64_t &BlendMask) {
+static bool matchShuffleAsBlend(SDValue V1, SDValue V2,
+ MutableArrayRef<int> Mask,
+ const APInt &Zeroable, bool &ForceV1Zero,
+ bool &ForceV2Zero, uint64_t &BlendMask) {
bool V1IsZeroOrUndef =
V1.isUndef() || ISD::isBuildVectorAllZeros(V1.getNode());
bool V2IsZeroOrUndef =
@@ -11038,8 +11238,8 @@ static SDValue lowerShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
uint64_t BlendMask = 0;
bool ForceV1Zero = false, ForceV2Zero = false;
SmallVector<int, 64> Mask(Original.begin(), Original.end());
- if (!matchVectorShuffleAsBlend(V1, V2, Mask, Zeroable, ForceV1Zero, ForceV2Zero,
- BlendMask))
+ if (!matchShuffleAsBlend(V1, V2, Mask, Zeroable, ForceV1Zero, ForceV2Zero,
+ BlendMask))
return SDValue();
// Create a REAL zero vector - ISD::isBuildVectorAllZeros allows UNDEFs.
@@ -11161,7 +11361,7 @@ static SDValue lowerShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
case MVT::v32i16:
case MVT::v64i8: {
// Attempt to lower to a bitmask if we can. Only if not optimizing for size.
- bool OptForSize = DAG.getMachineFunction().getFunction().hasOptSize();
+ bool OptForSize = DAG.shouldOptForSize();
if (!OptForSize) {
if (SDValue Masked = lowerShuffleAsBitMask(DL, VT, V1, V2, Mask, Zeroable,
Subtarget, DAG))
@@ -11609,9 +11809,11 @@ static SDValue lowerShuffleAsRotate(const SDLoc &DL, MVT VT, SDValue V1,
}
/// Try to lower a vector shuffle as a byte shift sequence.
-static SDValue lowerVectorShuffleAsByteShiftMask(
- const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
- const APInt &Zeroable, const X86Subtarget &Subtarget, SelectionDAG &DAG) {
+static SDValue lowerShuffleAsByteShiftMask(const SDLoc &DL, MVT VT, SDValue V1,
+ SDValue V2, ArrayRef<int> Mask,
+ const APInt &Zeroable,
+ const X86Subtarget &Subtarget,
+ SelectionDAG &DAG) {
assert(!isNoopShuffleMask(Mask) && "We shouldn't lower no-op shuffles!");
assert(VT.is128BitVector() && "Only 128-bit vectors supported");
@@ -14056,8 +14258,8 @@ static SDValue lowerV8I16Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
return BitBlend;
// Try to use byte shift instructions to mask.
- if (SDValue V = lowerVectorShuffleAsByteShiftMask(
- DL, MVT::v8i16, V1, V2, Mask, Zeroable, Subtarget, DAG))
+ if (SDValue V = lowerShuffleAsByteShiftMask(DL, MVT::v8i16, V1, V2, Mask,
+ Zeroable, Subtarget, DAG))
return V;
// Try to lower by permuting the inputs into an unpack instruction.
@@ -14318,8 +14520,8 @@ static SDValue lowerV16I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
return V;
// Try to use byte shift instructions to mask.
- if (SDValue V = lowerVectorShuffleAsByteShiftMask(
- DL, MVT::v16i8, V1, V2, Mask, Zeroable, Subtarget, DAG))
+ if (SDValue V = lowerShuffleAsByteShiftMask(DL, MVT::v16i8, V1, V2, Mask,
+ Zeroable, Subtarget, DAG))
return V;
// Check for SSSE3 which lets us lower all v16i8 shuffles much more directly
@@ -14686,6 +14888,36 @@ static SDValue lowerShuffleAsSplitOrBlend(const SDLoc &DL, MVT VT, SDValue V1,
DAG);
}
+// Lower as SHUFPD(VPERM2F128(V1, V2), VPERM2F128(V1, V2)).
+// TODO: Extend to support v8f32 (+ 512-bit shuffles).
+static SDValue lowerShuffleAsLanePermuteAndSHUFP(const SDLoc &DL, MVT VT,
+ SDValue V1, SDValue V2,
+ ArrayRef<int> Mask,
+ SelectionDAG &DAG) {
+ assert(VT == MVT::v4f64 && "Only for v4f64 shuffles");
+
+ int LHSMask[4] = {-1, -1, -1, -1};
+ int RHSMask[4] = {-1, -1, -1, -1};
+ unsigned SHUFPMask = 0;
+
+ // As SHUFPD uses a single LHS/RHS element per lane, we can always
+ // perform the shuffle once the lanes have been shuffled in place.
+ for (int i = 0; i != 4; ++i) {
+ int M = Mask[i];
+ if (M < 0)
+ continue;
+ int LaneBase = i & ~1;
+ auto &LaneMask = (i & 1) ? RHSMask : LHSMask;
+ LaneMask[LaneBase + (M & 1)] = M;
+ SHUFPMask |= (M & 1) << i;
+ }
+
+ SDValue LHS = DAG.getVectorShuffle(VT, DL, V1, V2, LHSMask);
+ SDValue RHS = DAG.getVectorShuffle(VT, DL, V1, V2, RHSMask);
+ return DAG.getNode(X86ISD::SHUFP, DL, VT, LHS, RHS,
+ DAG.getTargetConstant(SHUFPMask, DL, MVT::i8));
+}
+
/// Lower a vector shuffle crossing multiple 128-bit lanes as
/// a lane permutation followed by a per-lane permutation.
///
@@ -14764,13 +14996,22 @@ static SDValue lowerShuffleAsLanePermuteAndShuffle(
int Size = Mask.size();
int LaneSize = Size / 2;
+ // Fold to SHUFPD(VPERM2F128(V1, V2), VPERM2F128(V1, V2)).
+ // Only do this if the elements aren't all from the lower lane,
+ // otherwise we're (probably) better off doing a split.
+ if (VT == MVT::v4f64 &&
+ !all_of(Mask, [LaneSize](int M) { return M < LaneSize; }))
+ if (SDValue V =
+ lowerShuffleAsLanePermuteAndSHUFP(DL, VT, V1, V2, Mask, DAG))
+ return V;
+
// If there are only inputs from one 128-bit lane, splitting will in fact be
// less expensive. The flags track whether the given lane contains an element
// that crosses to another lane.
if (!Subtarget.hasAVX2()) {
bool LaneCrossing[2] = {false, false};
for (int i = 0; i < Size; ++i)
- if (Mask[i] >= 0 && (Mask[i] % Size) / LaneSize != i / LaneSize)
+ if (Mask[i] >= 0 && ((Mask[i] % Size) / LaneSize) != (i / LaneSize))
LaneCrossing[(Mask[i] % Size) / LaneSize] = true;
if (!LaneCrossing[0] || !LaneCrossing[1])
return splitAndLowerShuffle(DL, VT, V1, V2, Mask, DAG);
@@ -14778,7 +15019,7 @@ static SDValue lowerShuffleAsLanePermuteAndShuffle(
bool LaneUsed[2] = {false, false};
for (int i = 0; i < Size; ++i)
if (Mask[i] >= 0)
- LaneUsed[(Mask[i] / LaneSize)] = true;
+ LaneUsed[(Mask[i] % Size) / LaneSize] = true;
if (!LaneUsed[0] || !LaneUsed[1])
return splitAndLowerShuffle(DL, VT, V1, V2, Mask, DAG);
}
@@ -14817,8 +15058,10 @@ static SDValue lowerV2X128Shuffle(const SDLoc &DL, MVT VT, SDValue V1,
if (Subtarget.hasAVX2() && V2.isUndef())
return SDValue();
+ bool V2IsZero = !V2.isUndef() && ISD::isBuildVectorAllZeros(V2.getNode());
+
SmallVector<int, 4> WidenedMask;
- if (!canWidenShuffleElements(Mask, Zeroable, WidenedMask))
+ if (!canWidenShuffleElements(Mask, Zeroable, V2IsZero, WidenedMask))
return SDValue();
bool IsLowZero = (Zeroable & 0x3) == 0x3;
@@ -15637,6 +15880,18 @@ static SDValue lowerV4F64Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
Zeroable, Subtarget, DAG))
return Op;
+ // If we have lane crossing shuffles AND they don't all come from the lower
+ // lane elements, lower to SHUFPD(VPERM2F128(V1, V2), VPERM2F128(V1, V2)).
+ // TODO: Handle BUILD_VECTOR sources which getVectorShuffle currently
+ // canonicalize to a blend of splat which isn't necessary for this combine.
+ if (is128BitLaneCrossingShuffleMask(MVT::v4f64, Mask) &&
+ !all_of(Mask, [](int M) { return M < 2 || (4 <= M && M < 6); }) &&
+ (V1.getOpcode() != ISD::BUILD_VECTOR) &&
+ (V2.getOpcode() != ISD::BUILD_VECTOR))
+ if (SDValue Op = lowerShuffleAsLanePermuteAndSHUFP(DL, MVT::v4f64, V1, V2,
+ Mask, DAG))
+ return Op;
+
// If we have one input in place, then we can permute the other input and
// blend the result.
if (isShuffleMaskInputInPlace(0, Mask) || isShuffleMaskInputInPlace(1, Mask))
@@ -16950,6 +17205,10 @@ static SDValue lower1BitShuffle(const SDLoc &DL, ArrayRef<int> Mask,
ExtVT = Subtarget.canExtendTo512BW() ? MVT::v32i16 : MVT::v32i8;
break;
case MVT::v64i1:
+ // Fall back to scalarization. FIXME: We can do better if the shuffle
+ // can be partitioned cleanly.
+ if (!Subtarget.useBWIRegs())
+ return SDValue();
ExtVT = MVT::v64i8;
break;
}
@@ -17039,8 +17298,8 @@ static bool canonicalizeShuffleMaskWithCommute(ArrayRef<int> Mask) {
/// above in helper routines. The canonicalization attempts to widen shuffles
/// to involve fewer lanes of wider elements, consolidate symmetric patterns
/// s.t. only one of the two inputs needs to be tested, etc.
-static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget &Subtarget,
- SelectionDAG &DAG) {
+static SDValue lowerVECTOR_SHUFFLE(SDValue Op, const X86Subtarget &Subtarget,
+ SelectionDAG &DAG) {
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
ArrayRef<int> OrigMask = SVOp->getMask();
SDValue V1 = Op.getOperand(0);
@@ -17086,29 +17345,22 @@ static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget &Subtarget,
// We actually see shuffles that are entirely re-arrangements of a set of
// zero inputs. This mostly happens while decomposing complex shuffles into
// simple ones. Directly lower these as a buildvector of zeros.
- APInt Zeroable = computeZeroableShuffleElements(OrigMask, V1, V2);
+ APInt KnownUndef, KnownZero;
+ computeZeroableShuffleElements(OrigMask, V1, V2, KnownUndef, KnownZero);
+
+ APInt Zeroable = KnownUndef | KnownZero;
if (Zeroable.isAllOnesValue())
return getZeroVector(VT, Subtarget, DAG, DL);
bool V2IsZero = !V2IsUndef && ISD::isBuildVectorAllZeros(V2.getNode());
- // Create an alternative mask with info about zeroable elements.
- // Here we do not set undef elements as zeroable.
- SmallVector<int, 64> ZeroableMask(OrigMask.begin(), OrigMask.end());
- if (V2IsZero) {
- assert(!Zeroable.isNullValue() && "V2's non-undef elements are used?!");
- for (int i = 0; i != NumElements; ++i)
- if (OrigMask[i] != SM_SentinelUndef && Zeroable[i])
- ZeroableMask[i] = SM_SentinelZero;
- }
-
// Try to collapse shuffles into using a vector type with fewer elements but
// wider element types. We cap this to not form integers or floating point
// elements wider than 64 bits, but it might be interesting to form i128
// integers to handle flipping the low and high halves of AVX 256-bit vectors.
SmallVector<int, 16> WidenedMask;
if (VT.getScalarSizeInBits() < 64 && !Is1BitVector &&
- canWidenShuffleElements(ZeroableMask, WidenedMask)) {
+ canWidenShuffleElements(OrigMask, Zeroable, V2IsZero, WidenedMask)) {
// Shuffle mask widening should not interfere with a broadcast opportunity
// by obfuscating the operands with bitcasts.
// TODO: Avoid lowering directly from this top-level function: make this
@@ -18307,7 +18559,7 @@ static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget,
"Unexpected funnel shift type!");
// Expand slow SHLD/SHRD cases if we are not optimizing for size.
- bool OptForSize = DAG.getMachineFunction().getFunction().hasOptSize();
+ bool OptForSize = DAG.shouldOptForSize();
if (!OptForSize && Subtarget.isSHLDSlow())
return SDValue();
@@ -18328,8 +18580,13 @@ static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget,
static SDValue LowerI64IntToFP_AVX512DQ(SDValue Op, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
assert((Op.getOpcode() == ISD::SINT_TO_FP ||
- Op.getOpcode() == ISD::UINT_TO_FP) && "Unexpected opcode!");
- SDValue Src = Op.getOperand(0);
+ Op.getOpcode() == ISD::STRICT_SINT_TO_FP ||
+ Op.getOpcode() == ISD::STRICT_UINT_TO_FP ||
+ Op.getOpcode() == ISD::UINT_TO_FP) &&
+ "Unexpected opcode!");
+ bool IsStrict = Op->isStrictFPOpcode();
+ unsigned OpNo = IsStrict ? 1 : 0;
+ SDValue Src = Op.getOperand(OpNo);
MVT SrcVT = Src.getSimpleValueType();
MVT VT = Op.getSimpleValueType();
@@ -18346,7 +18603,17 @@ static SDValue LowerI64IntToFP_AVX512DQ(SDValue Op, SelectionDAG &DAG,
SDLoc dl(Op);
SDValue InVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecInVT, Src);
+ if (IsStrict) {
+ SDValue CvtVec = DAG.getNode(Op.getOpcode(), dl, {VecVT, MVT::Other},
+ {Op.getOperand(0), InVec});
+ SDValue Chain = CvtVec.getValue(1);
+ SDValue Value = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, CvtVec,
+ DAG.getIntPtrConstant(0, dl));
+ return DAG.getMergeValues({Value, Chain}, dl);
+ }
+
SDValue CvtVec = DAG.getNode(Op.getOpcode(), dl, VecVT, InVec);
+
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, CvtVec,
DAG.getIntPtrConstant(0, dl));
}
@@ -18415,44 +18682,157 @@ static SDValue vectorizeExtractedCast(SDValue Cast, SelectionDAG &DAG,
DAG.getIntPtrConstant(0, DL));
}
+static SDValue lowerINT_TO_FP_vXi64(SDValue Op, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+ SDLoc DL(Op);
+ bool IsStrict = Op->isStrictFPOpcode();
+ MVT VT = Op->getSimpleValueType(0);
+ SDValue Src = Op->getOperand(IsStrict ? 1 : 0);
+
+ if (Subtarget.hasDQI()) {
+ assert(!Subtarget.hasVLX() && "Unexpected features");
+
+ assert((Src.getSimpleValueType() == MVT::v2i64 ||
+ Src.getSimpleValueType() == MVT::v4i64) &&
+ "Unsupported custom type");
+
+ // With AVX512DQ, but not VLX we need to widen to get a 512-bit result type.
+ assert((VT == MVT::v4f32 || VT == MVT::v2f64 || VT == MVT::v4f64) &&
+ "Unexpected VT!");
+ MVT WideVT = VT == MVT::v4f32 ? MVT::v8f32 : MVT::v8f64;
+
+ // Need to concat with zero vector for strict fp to avoid spurious
+ // exceptions.
+ SDValue Tmp = IsStrict ? DAG.getConstant(0, DL, MVT::v8i64)
+ : DAG.getUNDEF(MVT::v8i64);
+ Src = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, MVT::v8i64, Tmp, Src,
+ DAG.getIntPtrConstant(0, DL));
+ SDValue Res, Chain;
+ if (IsStrict) {
+ Res = DAG.getNode(Op.getOpcode(), DL, {WideVT, MVT::Other},
+ {Op->getOperand(0), Src});
+ Chain = Res.getValue(1);
+ } else {
+ Res = DAG.getNode(Op.getOpcode(), DL, WideVT, Src);
+ }
+
+ Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res,
+ DAG.getIntPtrConstant(0, DL));
+
+ if (IsStrict)
+ return DAG.getMergeValues({Res, Chain}, DL);
+ return Res;
+ }
+
+ bool IsSigned = Op->getOpcode() == ISD::SINT_TO_FP ||
+ Op->getOpcode() == ISD::STRICT_SINT_TO_FP;
+ if (VT != MVT::v4f32 || IsSigned)
+ return SDValue();
+
+ SDValue Zero = DAG.getConstant(0, DL, MVT::v4i64);
+ SDValue One = DAG.getConstant(1, DL, MVT::v4i64);
+ SDValue Sign = DAG.getNode(ISD::OR, DL, MVT::v4i64,
+ DAG.getNode(ISD::SRL, DL, MVT::v4i64, Src, One),
+ DAG.getNode(ISD::AND, DL, MVT::v4i64, Src, One));
+ SDValue IsNeg = DAG.getSetCC(DL, MVT::v4i64, Src, Zero, ISD::SETLT);
+ SDValue SignSrc = DAG.getSelect(DL, MVT::v4i64, IsNeg, Sign, Src);
+ SmallVector<SDValue, 4> SignCvts(4);
+ SmallVector<SDValue, 4> Chains(4);
+ for (int i = 0; i != 4; ++i) {
+ SDValue Src = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, SignSrc,
+ DAG.getIntPtrConstant(i, DL));
+ if (IsStrict) {
+ SignCvts[i] =
+ DAG.getNode(ISD::STRICT_SINT_TO_FP, DL, {MVT::f32, MVT::Other},
+ {Op.getOperand(0), Src});
+ Chains[i] = SignCvts[i].getValue(1);
+ } else {
+ SignCvts[i] = DAG.getNode(ISD::SINT_TO_FP, DL, MVT::f32, Src);
+ }
+ }
+ SDValue SignCvt = DAG.getBuildVector(VT, DL, SignCvts);
+
+ SDValue Slow, Chain;
+ if (IsStrict) {
+ Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
+ Slow = DAG.getNode(ISD::STRICT_FADD, DL, {MVT::v4f32, MVT::Other},
+ {Chain, SignCvt, SignCvt});
+ Chain = Slow.getValue(1);
+ } else {
+ Slow = DAG.getNode(ISD::FADD, DL, MVT::v4f32, SignCvt, SignCvt);
+ }
+
+ IsNeg = DAG.getNode(ISD::TRUNCATE, DL, MVT::v4i32, IsNeg);
+ SDValue Cvt = DAG.getSelect(DL, MVT::v4f32, IsNeg, Slow, SignCvt);
+
+ if (IsStrict)
+ return DAG.getMergeValues({Cvt, Chain}, DL);
+
+ return Cvt;
+}
+
SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
SelectionDAG &DAG) const {
- SDValue Src = Op.getOperand(0);
+ bool IsStrict = Op->isStrictFPOpcode();
+ unsigned OpNo = IsStrict ? 1 : 0;
+ SDValue Src = Op.getOperand(OpNo);
+ SDValue Chain = IsStrict ? Op->getOperand(0) : DAG.getEntryNode();
MVT SrcVT = Src.getSimpleValueType();
MVT VT = Op.getSimpleValueType();
SDLoc dl(Op);
- if (VT == MVT::f128)
- return LowerF128Call(Op, DAG, RTLIB::getSINTTOFP(SrcVT, VT));
-
if (SDValue Extract = vectorizeExtractedCast(Op, DAG, Subtarget))
return Extract;
if (SrcVT.isVector()) {
if (SrcVT == MVT::v2i32 && VT == MVT::v2f64) {
+ // Note: Since v2f64 is a legal type. We don't need to zero extend the
+ // source for strict FP.
+ if (IsStrict)
+ return DAG.getNode(
+ X86ISD::STRICT_CVTSI2P, dl, {VT, MVT::Other},
+ {Chain, DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i32, Src,
+ DAG.getUNDEF(SrcVT))});
return DAG.getNode(X86ISD::CVTSI2P, dl, VT,
DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i32, Src,
DAG.getUNDEF(SrcVT)));
}
+ if (SrcVT == MVT::v2i64 || SrcVT == MVT::v4i64)
+ return lowerINT_TO_FP_vXi64(Op, DAG, Subtarget);
+
return SDValue();
}
assert(SrcVT <= MVT::i64 && SrcVT >= MVT::i16 &&
"Unknown SINT_TO_FP to lower!");
+ bool UseSSEReg = isScalarFPTypeInSSEReg(VT);
+
// These are really Legal; return the operand so the caller accepts it as
// Legal.
- if (SrcVT == MVT::i32 && isScalarFPTypeInSSEReg(VT))
+ if (SrcVT == MVT::i32 && UseSSEReg)
return Op;
- if (SrcVT == MVT::i64 && isScalarFPTypeInSSEReg(VT) && Subtarget.is64Bit())
+ if (SrcVT == MVT::i64 && UseSSEReg && Subtarget.is64Bit())
return Op;
if (SDValue V = LowerI64IntToFP_AVX512DQ(Op, DAG, Subtarget))
return V;
- SDValue ValueToStore = Op.getOperand(0);
- if (SrcVT == MVT::i64 && isScalarFPTypeInSSEReg(VT) &&
- !Subtarget.is64Bit())
+ // SSE doesn't have an i16 conversion so we need to promote.
+ if (SrcVT == MVT::i16 && (UseSSEReg || VT == MVT::f128)) {
+ SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, Src);
+ if (IsStrict)
+ return DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, {VT, MVT::Other},
+ {Chain, Ext});
+
+ return DAG.getNode(ISD::SINT_TO_FP, dl, VT, Ext);
+ }
+
+ if (VT == MVT::f128)
+ return LowerF128Call(Op, DAG, RTLIB::getSINTTOFP(SrcVT, VT));
+
+ SDValue ValueToStore = Src;
+ if (SrcVT == MVT::i64 && UseSSEReg && !Subtarget.is64Bit())
// Bitcasting to f64 here allows us to do a single 64-bit store from
// an SSE register, avoiding the store forwarding penalty that would come
// with two 32-bit stores.
@@ -18463,13 +18843,18 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
auto PtrVT = getPointerTy(MF.getDataLayout());
int SSFI = MF.getFrameInfo().CreateStackObject(Size, Size, false);
SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
- SDValue Chain = DAG.getStore(
- DAG.getEntryNode(), dl, ValueToStore, StackSlot,
+ Chain = DAG.getStore(
+ Chain, dl, ValueToStore, StackSlot,
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI));
- return BuildFILD(Op, SrcVT, Chain, StackSlot, DAG);
+ std::pair<SDValue, SDValue> Tmp = BuildFILD(Op, SrcVT, Chain, StackSlot, DAG);
+
+ if (IsStrict)
+ return DAG.getMergeValues({Tmp.first, Tmp.second}, dl);
+
+ return Tmp.first;
}
-SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain,
+std::pair<SDValue, SDValue> X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain,
SDValue StackSlot,
SelectionDAG &DAG) const {
// Build the FILD
@@ -18498,9 +18883,9 @@ SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain,
SDValue Result =
DAG.getMemIntrinsicNode(useSSE ? X86ISD::FILD_FLAG : X86ISD::FILD, DL,
Tys, FILDOps, SrcVT, LoadMMO);
+ Chain = Result.getValue(1);
if (useSSE) {
- Chain = Result.getValue(1);
SDValue InFlag = Result.getValue(2);
// FIXME: Currently the FST is glued to the FILD_FLAG. This
@@ -18522,9 +18907,10 @@ SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain,
Result = DAG.getLoad(
Op.getValueType(), DL, Chain, StackSlot,
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI));
+ Chain = Result.getValue(1);
}
- return Result;
+ return { Result, Chain };
}
/// Horizontal vector math instructions may be slower than normal math with
@@ -18532,7 +18918,7 @@ SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain,
/// implementation, and likely shuffle complexity of the alternate sequence.
static bool shouldUseHorizontalOp(bool IsSingleSource, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
- bool IsOptimizingSize = DAG.getMachineFunction().getFunction().hasOptSize();
+ bool IsOptimizingSize = DAG.shouldOptForSize();
bool HasFastHOps = Subtarget.hasFastHorizontalOps();
return !IsSingleSource || IsOptimizingSize || HasFastHOps;
}
@@ -18553,6 +18939,8 @@ static SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG,
#endif
*/
+ bool IsStrict = Op->isStrictFPOpcode();
+ unsigned OpNo = IsStrict ? 1 : 0;
SDLoc dl(Op);
LLVMContext *Context = DAG.getContext();
@@ -18573,8 +18961,8 @@ static SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG,
SDValue CPIdx1 = DAG.getConstantPool(C1, PtrVT, 16);
// Load the 64-bit value into an XMM register.
- SDValue XR1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
- Op.getOperand(0));
+ SDValue XR1 =
+ DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Op.getOperand(OpNo));
SDValue CLod0 =
DAG.getLoad(MVT::v4i32, dl, DAG.getEntryNode(), CPIdx0,
MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
@@ -18587,51 +18975,81 @@ static SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG,
MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
/* Alignment = */ 16);
SDValue XR2F = DAG.getBitcast(MVT::v2f64, Unpck1);
+ SDValue Sub;
+ SDValue Chain;
// TODO: Are there any fast-math-flags to propagate here?
- SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, XR2F, CLod1);
+ if (IsStrict) {
+ Sub = DAG.getNode(ISD::STRICT_FSUB, dl, {MVT::v2f64, MVT::Other},
+ {Op.getOperand(0), XR2F, CLod1});
+ Chain = Sub.getValue(1);
+ } else
+ Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, XR2F, CLod1);
SDValue Result;
- if (Subtarget.hasSSE3() && shouldUseHorizontalOp(true, DAG, Subtarget)) {
+ if (!IsStrict && Subtarget.hasSSE3() &&
+ shouldUseHorizontalOp(true, DAG, Subtarget)) {
+ // FIXME: Do we need a STRICT version of FHADD?
Result = DAG.getNode(X86ISD::FHADD, dl, MVT::v2f64, Sub, Sub);
} else {
SDValue Shuffle = DAG.getVectorShuffle(MVT::v2f64, dl, Sub, Sub, {1,-1});
- Result = DAG.getNode(ISD::FADD, dl, MVT::v2f64, Shuffle, Sub);
+ if (IsStrict) {
+ Result = DAG.getNode(ISD::STRICT_FADD, dl, {MVT::v2f64, MVT::Other},
+ {Chain, Shuffle, Sub});
+ Chain = Result.getValue(1);
+ } else
+ Result = DAG.getNode(ISD::FADD, dl, MVT::v2f64, Shuffle, Sub);
}
+ Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Result,
+ DAG.getIntPtrConstant(0, dl));
+ if (IsStrict)
+ return DAG.getMergeValues({Result, Chain}, dl);
- return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Result,
- DAG.getIntPtrConstant(0, dl));
+ return Result;
}
/// 32-bit unsigned integer to float expansion.
static SDValue LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
+ unsigned OpNo = Op.getNode()->isStrictFPOpcode() ? 1 : 0;
SDLoc dl(Op);
// FP constant to bias correct the final result.
SDValue Bias = DAG.getConstantFP(BitsToDouble(0x4330000000000000ULL), dl,
MVT::f64);
// Load the 32-bit value into an XMM register.
- SDValue Load = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32,
- Op.getOperand(0));
+ SDValue Load =
+ DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Op.getOperand(OpNo));
// Zero out the upper parts of the register.
Load = getShuffleVectorZeroOrUndef(Load, 0, true, Subtarget, DAG);
- Load = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
- DAG.getBitcast(MVT::v2f64, Load),
- DAG.getIntPtrConstant(0, dl));
-
// Or the load with the bias.
SDValue Or = DAG.getNode(
ISD::OR, dl, MVT::v2i64,
- DAG.getBitcast(MVT::v2i64,
- DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f64, Load)),
+ DAG.getBitcast(MVT::v2i64, Load),
DAG.getBitcast(MVT::v2i64,
DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f64, Bias)));
Or =
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
DAG.getBitcast(MVT::v2f64, Or), DAG.getIntPtrConstant(0, dl));
+ if (Op.getNode()->isStrictFPOpcode()) {
+ // Subtract the bias.
+ // TODO: Are there any fast-math-flags to propagate here?
+ SDValue Chain = Op.getOperand(0);
+ SDValue Sub = DAG.getNode(ISD::STRICT_FSUB, dl, {MVT::f64, MVT::Other},
+ {Chain, Or, Bias});
+
+ if (Op.getValueType() == Sub.getValueType())
+ return Sub;
+
+ // Handle final rounding.
+ std::pair<SDValue, SDValue> ResultPair = DAG.getStrictFPExtendOrRound(
+ Sub, Sub.getValue(1), dl, Op.getSimpleValueType());
+
+ return DAG.getMergeValues({ResultPair.first, ResultPair.second}, dl);
+ }
+
// Subtract the bias.
// TODO: Are there any fast-math-flags to propagate here?
SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Or, Bias);
@@ -18646,38 +19064,123 @@ static SDValue lowerUINT_TO_FP_v2i32(SDValue Op, SelectionDAG &DAG,
if (Op.getSimpleValueType() != MVT::v2f64)
return SDValue();
- SDValue N0 = Op.getOperand(0);
+ bool IsStrict = Op->isStrictFPOpcode();
+
+ SDValue N0 = Op.getOperand(IsStrict ? 1 : 0);
assert(N0.getSimpleValueType() == MVT::v2i32 && "Unexpected input type");
- // Legalize to v4i32 type.
- N0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v4i32, N0,
- DAG.getUNDEF(MVT::v2i32));
+ if (Subtarget.hasAVX512()) {
+ if (!Subtarget.hasVLX()) {
+ // Let generic type legalization widen this.
+ if (!IsStrict)
+ return SDValue();
+ // Otherwise pad the integer input with 0s and widen the operation.
+ N0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v4i32, N0,
+ DAG.getConstant(0, DL, MVT::v2i32));
+ SDValue Res = DAG.getNode(Op->getOpcode(), DL, {MVT::v4f64, MVT::Other},
+ {Op.getOperand(0), N0});
+ SDValue Chain = Res.getValue(1);
+ Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2f64, Res,
+ DAG.getIntPtrConstant(0, DL));
+ return DAG.getMergeValues({Res, Chain}, DL);
+ }
- if (Subtarget.hasAVX512())
+ // Legalize to v4i32 type.
+ N0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v4i32, N0,
+ DAG.getUNDEF(MVT::v2i32));
+ if (IsStrict)
+ return DAG.getNode(X86ISD::STRICT_CVTUI2P, DL, {MVT::v2f64, MVT::Other},
+ {Op.getOperand(0), N0});
return DAG.getNode(X86ISD::CVTUI2P, DL, MVT::v2f64, N0);
+ }
- // Same implementation as VectorLegalizer::ExpandUINT_TO_FLOAT,
- // but using v2i32 to v2f64 with X86ISD::CVTSI2P.
- SDValue HalfWord = DAG.getConstant(16, DL, MVT::v4i32);
- SDValue HalfWordMask = DAG.getConstant(0x0000FFFF, DL, MVT::v4i32);
-
- // Two to the power of half-word-size.
- SDValue TWOHW = DAG.getConstantFP((double)(1 << 16), DL, MVT::v2f64);
-
- // Clear upper part of LO, lower HI.
- SDValue HI = DAG.getNode(ISD::SRL, DL, MVT::v4i32, N0, HalfWord);
- SDValue LO = DAG.getNode(ISD::AND, DL, MVT::v4i32, N0, HalfWordMask);
-
- SDValue fHI = DAG.getNode(X86ISD::CVTSI2P, DL, MVT::v2f64, HI);
- fHI = DAG.getNode(ISD::FMUL, DL, MVT::v2f64, fHI, TWOHW);
- SDValue fLO = DAG.getNode(X86ISD::CVTSI2P, DL, MVT::v2f64, LO);
+ // Zero extend to 2i64, OR with the floating point representation of 2^52.
+ // This gives us the floating point equivalent of 2^52 + the i32 integer
+ // since double has 52-bits of mantissa. Then subtract 2^52 in floating
+ // point leaving just our i32 integers in double format.
+ SDValue ZExtIn = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v2i64, N0);
+ SDValue VBias =
+ DAG.getConstantFP(BitsToDouble(0x4330000000000000ULL), DL, MVT::v2f64);
+ SDValue Or = DAG.getNode(ISD::OR, DL, MVT::v2i64, ZExtIn,
+ DAG.getBitcast(MVT::v2i64, VBias));
+ Or = DAG.getBitcast(MVT::v2f64, Or);
- // Add the two halves.
- return DAG.getNode(ISD::FADD, DL, MVT::v2f64, fHI, fLO);
+ if (IsStrict)
+ return DAG.getNode(ISD::STRICT_FSUB, DL, {MVT::v2f64, MVT::Other},
+ {Op.getOperand(0), Or, VBias});
+ return DAG.getNode(ISD::FSUB, DL, MVT::v2f64, Or, VBias);
}
static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
+ SDLoc DL(Op);
+ bool IsStrict = Op->isStrictFPOpcode();
+ SDValue V = Op->getOperand(IsStrict ? 1 : 0);
+ MVT VecIntVT = V.getSimpleValueType();
+ assert((VecIntVT == MVT::v4i32 || VecIntVT == MVT::v8i32) &&
+ "Unsupported custom type");
+
+ if (Subtarget.hasAVX512()) {
+ // With AVX512, but not VLX we need to widen to get a 512-bit result type.
+ assert(!Subtarget.hasVLX() && "Unexpected features");
+ MVT VT = Op->getSimpleValueType(0);
+
+ // v8i32->v8f64 is legal with AVX512 so just return it.
+ if (VT == MVT::v8f64)
+ return Op;
+
+ assert((VT == MVT::v4f32 || VT == MVT::v8f32 || VT == MVT::v4f64) &&
+ "Unexpected VT!");
+ MVT WideVT = VT == MVT::v4f64 ? MVT::v8f64 : MVT::v16f32;
+ MVT WideIntVT = VT == MVT::v4f64 ? MVT::v8i32 : MVT::v16i32;
+ // Need to concat with zero vector for strict fp to avoid spurious
+ // exceptions.
+ SDValue Tmp =
+ IsStrict ? DAG.getConstant(0, DL, WideIntVT) : DAG.getUNDEF(WideIntVT);
+ V = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideIntVT, Tmp, V,
+ DAG.getIntPtrConstant(0, DL));
+ SDValue Res, Chain;
+ if (IsStrict) {
+ Res = DAG.getNode(ISD::STRICT_UINT_TO_FP, DL, {WideVT, MVT::Other},
+ {Op->getOperand(0), V});
+ Chain = Res.getValue(1);
+ } else {
+ Res = DAG.getNode(ISD::UINT_TO_FP, DL, WideVT, V);
+ }
+
+ Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res,
+ DAG.getIntPtrConstant(0, DL));
+
+ if (IsStrict)
+ return DAG.getMergeValues({Res, Chain}, DL);
+ return Res;
+ }
+
+ if (Subtarget.hasAVX() && VecIntVT == MVT::v4i32 &&
+ Op->getSimpleValueType(0) == MVT::v4f64) {
+ SDValue ZExtIn = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v4i64, V);
+ Constant *Bias = ConstantFP::get(
+ *DAG.getContext(),
+ APFloat(APFloat::IEEEdouble(), APInt(64, 0x4330000000000000ULL)));
+ auto PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
+ SDValue CPIdx = DAG.getConstantPool(Bias, PtrVT, /*Alignment*/ 8);
+ SDVTList Tys = DAG.getVTList(MVT::v4f64, MVT::Other);
+ SDValue Ops[] = {DAG.getEntryNode(), CPIdx};
+ SDValue VBias = DAG.getMemIntrinsicNode(
+ X86ISD::VBROADCAST_LOAD, DL, Tys, Ops, MVT::f64,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
+ /*Alignment*/ 8, MachineMemOperand::MOLoad);
+
+ SDValue Or = DAG.getNode(ISD::OR, DL, MVT::v4i64, ZExtIn,
+ DAG.getBitcast(MVT::v4i64, VBias));
+ Or = DAG.getBitcast(MVT::v4f64, Or);
+
+ if (IsStrict)
+ return DAG.getNode(ISD::STRICT_FSUB, DL, {MVT::v4f64, MVT::Other},
+ {Op.getOperand(0), Or, VBias});
+ return DAG.getNode(ISD::FSUB, DL, MVT::v4f64, Or, VBias);
+ }
+
// The algorithm is the following:
// #ifdef __SSE4_1__
// uint4 lo = _mm_blend_epi16( v, (uint4) 0x4b000000, 0xaa);
@@ -18690,18 +19193,6 @@ static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, SelectionDAG &DAG,
// float4 fhi = (float4) hi - (0x1.0p39f + 0x1.0p23f);
// return (float4) lo + fhi;
- // We shouldn't use it when unsafe-fp-math is enabled though: we might later
- // reassociate the two FADDs, and if we do that, the algorithm fails
- // spectacularly (PR24512).
- // FIXME: If we ever have some kind of Machine FMF, this should be marked
- // as non-fast and always be enabled. Why isn't SDAG FMF enough? Because
- // there's also the MachineCombiner reassociations happening on Machine IR.
- if (DAG.getTarget().Options.UnsafeFPMath)
- return SDValue();
-
- SDLoc DL(Op);
- SDValue V = Op->getOperand(0);
- MVT VecIntVT = V.getSimpleValueType();
bool Is128 = VecIntVT == MVT::v4i32;
MVT VecFloatVT = Is128 ? MVT::v4f32 : MVT::v8f32;
// If we convert to something else than the supported type, e.g., to v4f64,
@@ -18709,9 +19200,6 @@ static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, SelectionDAG &DAG,
if (VecFloatVT != Op->getSimpleValueType(0))
return SDValue();
- assert((VecIntVT == MVT::v4i32 || VecIntVT == MVT::v8i32) &&
- "Unsupported custom type");
-
// In the #idef/#else code, we have in common:
// - The vector of constants:
// -- 0x4b000000
@@ -18756,23 +19244,35 @@ static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, SelectionDAG &DAG,
High = DAG.getNode(ISD::OR, DL, VecIntVT, HighShift, VecCstHigh);
}
- // Create the vector constant for -(0x1.0p39f + 0x1.0p23f).
- SDValue VecCstFAdd = DAG.getConstantFP(
- APFloat(APFloat::IEEEsingle(), APInt(32, 0xD3000080)), DL, VecFloatVT);
+ // Create the vector constant for (0x1.0p39f + 0x1.0p23f).
+ SDValue VecCstFSub = DAG.getConstantFP(
+ APFloat(APFloat::IEEEsingle(), APInt(32, 0x53000080)), DL, VecFloatVT);
// float4 fhi = (float4) hi - (0x1.0p39f + 0x1.0p23f);
+ // NOTE: By using fsub of a positive constant instead of fadd of a negative
+ // constant, we avoid reassociation in MachineCombiner when unsafe-fp-math is
+ // enabled. See PR24512.
SDValue HighBitcast = DAG.getBitcast(VecFloatVT, High);
// TODO: Are there any fast-math-flags to propagate here?
- SDValue FHigh =
- DAG.getNode(ISD::FADD, DL, VecFloatVT, HighBitcast, VecCstFAdd);
- // return (float4) lo + fhi;
+ // (float4) lo;
SDValue LowBitcast = DAG.getBitcast(VecFloatVT, Low);
+ // return (float4) lo + fhi;
+ if (IsStrict) {
+ SDValue FHigh = DAG.getNode(ISD::STRICT_FSUB, DL, {VecFloatVT, MVT::Other},
+ {Op.getOperand(0), HighBitcast, VecCstFSub});
+ return DAG.getNode(ISD::STRICT_FADD, DL, {VecFloatVT, MVT::Other},
+ {FHigh.getValue(1), LowBitcast, FHigh});
+ }
+
+ SDValue FHigh =
+ DAG.getNode(ISD::FSUB, DL, VecFloatVT, HighBitcast, VecCstFSub);
return DAG.getNode(ISD::FADD, DL, VecFloatVT, LowBitcast, FHigh);
}
static SDValue lowerUINT_TO_FP_vec(SDValue Op, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
- SDValue N0 = Op.getOperand(0);
+ unsigned OpNo = Op.getNode()->isStrictFPOpcode() ? 1 : 0;
+ SDValue N0 = Op.getOperand(OpNo);
MVT SrcVT = N0.getSimpleValueType();
SDLoc dl(Op);
@@ -18783,18 +19283,23 @@ static SDValue lowerUINT_TO_FP_vec(SDValue Op, SelectionDAG &DAG,
return lowerUINT_TO_FP_v2i32(Op, DAG, Subtarget, dl);
case MVT::v4i32:
case MVT::v8i32:
- assert(!Subtarget.hasAVX512());
return lowerUINT_TO_FP_vXi32(Op, DAG, Subtarget);
+ case MVT::v2i64:
+ case MVT::v4i64:
+ return lowerINT_TO_FP_vXi64(Op, DAG, Subtarget);
}
}
SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
SelectionDAG &DAG) const {
- SDValue N0 = Op.getOperand(0);
+ bool IsStrict = Op->isStrictFPOpcode();
+ unsigned OpNo = IsStrict ? 1 : 0;
+ SDValue Src = Op.getOperand(OpNo);
SDLoc dl(Op);
auto PtrVT = getPointerTy(DAG.getDataLayout());
- MVT SrcVT = N0.getSimpleValueType();
- MVT DstVT = Op.getSimpleValueType();
+ MVT SrcVT = Src.getSimpleValueType();
+ MVT DstVT = Op->getSimpleValueType(0);
+ SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
if (DstVT == MVT::f128)
return LowerF128Call(Op, DAG, RTLIB::getUINTTOFP(SrcVT, DstVT));
@@ -18814,8 +19319,11 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
// Promote i32 to i64 and use a signed conversion on 64-bit targets.
if (SrcVT == MVT::i32 && Subtarget.is64Bit()) {
- N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, N0);
- return DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, N0);
+ Src = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Src);
+ if (IsStrict)
+ return DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, {DstVT, MVT::Other},
+ {Chain, Src});
+ return DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Src);
}
if (SDValue V = LowerI64IntToFP_AVX512DQ(Op, DAG, Subtarget))
@@ -18823,7 +19331,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
if (SrcVT == MVT::i64 && DstVT == MVT::f64 && X86ScalarSSEf64)
return LowerUINT_TO_FP_i64(Op, DAG, Subtarget);
- if (SrcVT == MVT::i32 && X86ScalarSSEf64)
+ if (SrcVT == MVT::i32 && X86ScalarSSEf64 && DstVT != MVT::f80)
return LowerUINT_TO_FP_i32(Op, DAG, Subtarget);
if (Subtarget.is64Bit() && SrcVT == MVT::i64 && DstVT == MVT::f32)
return SDValue();
@@ -18832,23 +19340,28 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
SDValue StackSlot = DAG.CreateStackTemporary(MVT::i64);
if (SrcVT == MVT::i32) {
SDValue OffsetSlot = DAG.getMemBasePlusOffset(StackSlot, 4, dl);
- SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0),
- StackSlot, MachinePointerInfo());
+ SDValue Store1 =
+ DAG.getStore(Chain, dl, Src, StackSlot, MachinePointerInfo());
SDValue Store2 = DAG.getStore(Store1, dl, DAG.getConstant(0, dl, MVT::i32),
OffsetSlot, MachinePointerInfo());
- SDValue Fild = BuildFILD(Op, MVT::i64, Store2, StackSlot, DAG);
- return Fild;
+ std::pair<SDValue, SDValue> Tmp =
+ BuildFILD(Op, MVT::i64, Store2, StackSlot, DAG);
+ if (IsStrict)
+ return DAG.getMergeValues({Tmp.first, Tmp.second}, dl);
+
+ return Tmp.first;
}
assert(SrcVT == MVT::i64 && "Unexpected type in UINT_TO_FP");
- SDValue ValueToStore = Op.getOperand(0);
- if (isScalarFPTypeInSSEReg(Op.getValueType()) && !Subtarget.is64Bit())
+ SDValue ValueToStore = Src;
+ if (isScalarFPTypeInSSEReg(Op.getValueType()) && !Subtarget.is64Bit()) {
// Bitcasting to f64 here allows us to do a single 64-bit store from
// an SSE register, avoiding the store forwarding penalty that would come
// with two 32-bit stores.
ValueToStore = DAG.getBitcast(MVT::f64, ValueToStore);
- SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, ValueToStore, StackSlot,
- MachinePointerInfo());
+ }
+ SDValue Store =
+ DAG.getStore(Chain, dl, ValueToStore, StackSlot, MachinePointerInfo());
// For i64 source, we need to add the appropriate power of 2 if the input
// was negative. This is the same as the optimization in
// DAGTypeLegalizer::ExpandIntOp_UNIT_TO_FP, and for it to be safe here,
@@ -18863,32 +19376,42 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
SDValue Ops[] = { Store, StackSlot };
SDValue Fild = DAG.getMemIntrinsicNode(X86ISD::FILD, dl, Tys, Ops,
MVT::i64, MMO);
+ Chain = Fild.getValue(1);
- APInt FF(32, 0x5F800000ULL);
// Check whether the sign bit is set.
SDValue SignSet = DAG.getSetCC(
dl, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i64),
- Op.getOperand(0), DAG.getConstant(0, dl, MVT::i64), ISD::SETLT);
+ Op.getOperand(OpNo), DAG.getConstant(0, dl, MVT::i64), ISD::SETLT);
- // Build a 64 bit pair (0, FF) in the constant pool, with FF in the lo bits.
+ // Build a 64 bit pair (FF, 0) in the constant pool, with FF in the hi bits.
+ APInt FF(64, 0x5F80000000000000ULL);
SDValue FudgePtr = DAG.getConstantPool(
- ConstantInt::get(*DAG.getContext(), FF.zext(64)), PtrVT);
+ ConstantInt::get(*DAG.getContext(), FF), PtrVT);
// Get a pointer to FF if the sign bit was set, or to 0 otherwise.
SDValue Zero = DAG.getIntPtrConstant(0, dl);
SDValue Four = DAG.getIntPtrConstant(4, dl);
- SDValue Offset = DAG.getSelect(dl, Zero.getValueType(), SignSet, Zero, Four);
+ SDValue Offset = DAG.getSelect(dl, Zero.getValueType(), SignSet, Four, Zero);
FudgePtr = DAG.getNode(ISD::ADD, dl, PtrVT, FudgePtr, Offset);
// Load the value out, extending it from f32 to f80.
- // FIXME: Avoid the extend by constructing the right constant pool?
SDValue Fudge = DAG.getExtLoad(
- ISD::EXTLOAD, dl, MVT::f80, DAG.getEntryNode(), FudgePtr,
+ ISD::EXTLOAD, dl, MVT::f80, Chain, FudgePtr,
MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), MVT::f32,
/* Alignment = */ 4);
+ Chain = Fudge.getValue(1);
// Extend everything to 80 bits to force it to be done on x87.
// TODO: Are there any fast-math-flags to propagate here?
+ if (IsStrict) {
+ SDValue Add = DAG.getNode(ISD::STRICT_FADD, dl, {MVT::f80, MVT::Other},
+ {Chain, Fild, Fudge});
+ // STRICT_FP_ROUND can't handle equal types.
+ if (DstVT == MVT::f80)
+ return Add;
+ return DAG.getNode(ISD::STRICT_FP_ROUND, dl, {DstVT, MVT::Other},
+ {Add.getValue(1), Add, DAG.getIntPtrConstant(0, dl)});
+ }
SDValue Add = DAG.getNode(ISD::FADD, dl, MVT::f80, Fild, Fudge);
return DAG.getNode(ISD::FP_ROUND, dl, DstVT, Add,
DAG.getIntPtrConstant(0, dl));
@@ -18902,11 +19425,13 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
// result.
SDValue
X86TargetLowering::FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
- bool IsSigned) const {
+ bool IsSigned, SDValue &Chain) const {
+ bool IsStrict = Op->isStrictFPOpcode();
SDLoc DL(Op);
EVT DstTy = Op.getValueType();
- EVT TheVT = Op.getOperand(0).getValueType();
+ SDValue Value = Op.getOperand(IsStrict ? 1 : 0);
+ EVT TheVT = Value.getValueType();
auto PtrVT = getPointerTy(DAG.getDataLayout());
if (TheVT != MVT::f32 && TheVT != MVT::f64 && TheVT != MVT::f80) {
@@ -18920,6 +19445,8 @@ X86TargetLowering::FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
// used for the 32-bit subtarget, but also for f80 on a 64-bit target.
bool UnsignedFixup = !IsSigned && DstTy == MVT::i64;
+ // FIXME: This does not generate an invalid exception if the input does not
+ // fit in i32. PR44019
if (!IsSigned && DstTy != MVT::i64) {
// Replace the fp-to-uint32 operation with an fp-to-sint64 FIST.
// The low 32 bits of the fist result will have the correct uint32 result.
@@ -18938,8 +19465,8 @@ X86TargetLowering::FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
int SSFI = MF.getFrameInfo().CreateStackObject(MemSize, MemSize, false);
SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
- SDValue Chain = DAG.getEntryNode();
- SDValue Value = Op.getOperand(0);
+ Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
+
SDValue Adjust; // 0x0 or 0x80000000, for result sign bit adjustment.
if (UnsignedFixup) {
@@ -18949,8 +19476,9 @@ X86TargetLowering::FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
// of a signed i64. Let Thresh be the FP equivalent of
// 0x8000000000000000ULL.
//
- // Adjust i32 = (Value < Thresh) ? 0 : 0x80000000;
- // FistSrc = (Value < Thresh) ? Value : (Value - Thresh);
+ // Adjust = (Value < Thresh) ? 0 : 0x80000000;
+ // FltOfs = (Value < Thresh) ? 0 : 0x80000000;
+ // FistSrc = (Value - FltOfs);
// Fist-to-mem64 FistSrc
// Add 0 or 0x800...0ULL to the 64-bit result, which is equivalent
// to XOR'ing the high 32 bits with Adjust.
@@ -18975,19 +19503,31 @@ X86TargetLowering::FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
SDValue ThreshVal = DAG.getConstantFP(Thresh, DL, TheVT);
- SDValue Cmp = DAG.getSetCC(DL,
- getSetCCResultType(DAG.getDataLayout(),
- *DAG.getContext(), TheVT),
- Value, ThreshVal, ISD::SETLT);
+ EVT ResVT = getSetCCResultType(DAG.getDataLayout(),
+ *DAG.getContext(), TheVT);
+ SDValue Cmp;
+ if (IsStrict) {
+ Cmp = DAG.getSetCC(DL, ResVT, Value, ThreshVal, ISD::SETLT,
+ Chain, /*IsSignaling*/ true);
+ Chain = Cmp.getValue(1);
+ } else {
+ Cmp = DAG.getSetCC(DL, ResVT, Value, ThreshVal, ISD::SETLT);
+ }
+
Adjust = DAG.getSelect(DL, MVT::i64, Cmp,
DAG.getConstant(0, DL, MVT::i64),
DAG.getConstant(APInt::getSignMask(64),
DL, MVT::i64));
- SDValue Sub = DAG.getNode(ISD::FSUB, DL, TheVT, Value, ThreshVal);
- Cmp = DAG.getSetCC(DL, getSetCCResultType(DAG.getDataLayout(),
- *DAG.getContext(), TheVT),
- Value, ThreshVal, ISD::SETLT);
- Value = DAG.getSelect(DL, TheVT, Cmp, Value, Sub);
+ SDValue FltOfs = DAG.getSelect(DL, TheVT, Cmp,
+ DAG.getConstantFP(0.0, DL, TheVT),
+ ThreshVal);
+
+ if (IsStrict) {
+ Value = DAG.getNode(ISD::STRICT_FSUB, DL, { TheVT, MVT::Other},
+ { Chain, Value, FltOfs });
+ Chain = Value.getValue(1);
+ } else
+ Value = DAG.getNode(ISD::FSUB, DL, TheVT, Value, FltOfs);
}
MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, SSFI);
@@ -19017,6 +19557,7 @@ X86TargetLowering::FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
Ops, DstTy, MMO);
SDValue Res = DAG.getLoad(Op.getValueType(), SDLoc(Op), FIST, StackSlot, MPI);
+ Chain = Res.getValue(1);
// If we need an unsigned fixup, XOR the result with adjust.
if (UnsignedFixup)
@@ -19036,7 +19577,7 @@ static SDValue LowerAVXExtend(SDValue Op, SelectionDAG &DAG,
assert(VT.isVector() && InVT.isVector() && "Expected vector type");
assert((Opc == ISD::ANY_EXTEND || Opc == ISD::ZERO_EXTEND) &&
"Unexpected extension opcode");
- assert(VT.getVectorNumElements() == VT.getVectorNumElements() &&
+ assert(VT.getVectorNumElements() == InVT.getVectorNumElements() &&
"Expected same number of elements");
assert((VT.getVectorElementType() == MVT::i16 ||
VT.getVectorElementType() == MVT::i32 ||
@@ -19512,48 +20053,137 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
}
SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
- bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT;
- MVT VT = Op.getSimpleValueType();
- SDValue Src = Op.getOperand(0);
+ bool IsStrict = Op->isStrictFPOpcode();
+ bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
+ Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
+ MVT VT = Op->getSimpleValueType(0);
+ SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
MVT SrcVT = Src.getSimpleValueType();
SDLoc dl(Op);
- if (SrcVT == MVT::f128) {
- RTLIB::Libcall LC;
- if (Op.getOpcode() == ISD::FP_TO_SINT)
- LC = RTLIB::getFPTOSINT(SrcVT, VT);
- else
- LC = RTLIB::getFPTOUINT(SrcVT, VT);
-
- MakeLibCallOptions CallOptions;
- return makeLibCall(DAG, LC, VT, Src, CallOptions, SDLoc(Op)).first;
- }
-
if (VT.isVector()) {
if (VT == MVT::v2i1 && SrcVT == MVT::v2f64) {
MVT ResVT = MVT::v4i32;
MVT TruncVT = MVT::v4i1;
- unsigned Opc = IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI;
+ unsigned Opc;
+ if (IsStrict)
+ Opc = IsSigned ? X86ISD::STRICT_CVTTP2SI : X86ISD::STRICT_CVTTP2UI;
+ else
+ Opc = IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI;
+
if (!IsSigned && !Subtarget.hasVLX()) {
+ assert(Subtarget.useAVX512Regs() && "Unexpected features!");
// Widen to 512-bits.
ResVT = MVT::v8i32;
TruncVT = MVT::v8i1;
- Opc = ISD::FP_TO_UINT;
- Src = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v8f64,
- DAG.getUNDEF(MVT::v8f64),
- Src, DAG.getIntPtrConstant(0, dl));
+ Opc = Op.getOpcode();
+ // Need to concat with zero vector for strict fp to avoid spurious
+ // exceptions.
+ // TODO: Should we just do this for non-strict as well?
+ SDValue Tmp = IsStrict ? DAG.getConstantFP(0.0, dl, MVT::v8f64)
+ : DAG.getUNDEF(MVT::v8f64);
+ Src = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v8f64, Tmp, Src,
+ DAG.getIntPtrConstant(0, dl));
+ }
+ SDValue Res, Chain;
+ if (IsStrict) {
+ Res =
+ DAG.getNode(Opc, dl, {ResVT, MVT::Other}, {Op->getOperand(0), Src});
+ Chain = Res.getValue(1);
+ } else {
+ Res = DAG.getNode(Opc, dl, ResVT, Src);
}
- SDValue Res = DAG.getNode(Opc, dl, ResVT, Src);
+
Res = DAG.getNode(ISD::TRUNCATE, dl, TruncVT, Res);
- return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i1, Res,
- DAG.getIntPtrConstant(0, dl));
+ Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i1, Res,
+ DAG.getIntPtrConstant(0, dl));
+ if (IsStrict)
+ return DAG.getMergeValues({Res, Chain}, dl);
+ return Res;
+ }
+
+ // v8f64->v8i32 is legal, but we need v8i32 to be custom for v8f32.
+ if (VT == MVT::v8i32 && SrcVT == MVT::v8f64) {
+ assert(!IsSigned && "Expected unsigned conversion!");
+ assert(Subtarget.useAVX512Regs() && "Requires avx512f");
+ return Op;
+ }
+
+ // Widen vXi32 fp_to_uint with avx512f to 512-bit source.
+ if ((VT == MVT::v4i32 || VT == MVT::v8i32) &&
+ (SrcVT == MVT::v4f64 || SrcVT == MVT::v4f32 || SrcVT == MVT::v8f32)) {
+ assert(!IsSigned && "Expected unsigned conversion!");
+ assert(Subtarget.useAVX512Regs() && !Subtarget.hasVLX() &&
+ "Unexpected features!");
+ MVT WideVT = SrcVT == MVT::v4f64 ? MVT::v8f64 : MVT::v16f32;
+ MVT ResVT = SrcVT == MVT::v4f64 ? MVT::v8i32 : MVT::v16i32;
+ // Need to concat with zero vector for strict fp to avoid spurious
+ // exceptions.
+ // TODO: Should we just do this for non-strict as well?
+ SDValue Tmp =
+ IsStrict ? DAG.getConstantFP(0.0, dl, WideVT) : DAG.getUNDEF(WideVT);
+ Src = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideVT, Tmp, Src,
+ DAG.getIntPtrConstant(0, dl));
+
+ SDValue Res, Chain;
+ if (IsStrict) {
+ Res = DAG.getNode(ISD::STRICT_FP_TO_UINT, dl, {ResVT, MVT::Other},
+ {Op->getOperand(0), Src});
+ Chain = Res.getValue(1);
+ } else {
+ Res = DAG.getNode(ISD::FP_TO_UINT, dl, ResVT, Src);
+ }
+
+ Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Res,
+ DAG.getIntPtrConstant(0, dl));
+
+ if (IsStrict)
+ return DAG.getMergeValues({Res, Chain}, dl);
+ return Res;
+ }
+
+ // Widen vXi64 fp_to_uint/fp_to_sint with avx512dq to 512-bit source.
+ if ((VT == MVT::v2i64 || VT == MVT::v4i64) &&
+ (SrcVT == MVT::v2f64 || SrcVT == MVT::v4f64 || SrcVT == MVT::v4f32)) {
+ assert(Subtarget.useAVX512Regs() && Subtarget.hasDQI() &&
+ !Subtarget.hasVLX() && "Unexpected features!");
+ MVT WideVT = SrcVT == MVT::v4f32 ? MVT::v8f32 : MVT::v8f64;
+ // Need to concat with zero vector for strict fp to avoid spurious
+ // exceptions.
+ // TODO: Should we just do this for non-strict as well?
+ SDValue Tmp =
+ IsStrict ? DAG.getConstantFP(0.0, dl, WideVT) : DAG.getUNDEF(WideVT);
+ Src = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideVT, Tmp, Src,
+ DAG.getIntPtrConstant(0, dl));
+
+ SDValue Res, Chain;
+ if (IsStrict) {
+ Res = DAG.getNode(Op.getOpcode(), dl, {MVT::v8i64, MVT::Other},
+ {Op->getOperand(0), Src});
+ Chain = Res.getValue(1);
+ } else {
+ Res = DAG.getNode(Op.getOpcode(), dl, MVT::v8i64, Src);
+ }
+
+ Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Res,
+ DAG.getIntPtrConstant(0, dl));
+
+ if (IsStrict)
+ return DAG.getMergeValues({Res, Chain}, dl);
+ return Res;
}
- assert(Subtarget.hasDQI() && Subtarget.hasVLX() && "Requires AVX512DQVL!");
if (VT == MVT::v2i64 && SrcVT == MVT::v2f32) {
- return DAG.getNode(IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI, dl, VT,
- DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Src,
- DAG.getUNDEF(MVT::v2f32)));
+ assert(Subtarget.hasDQI() && Subtarget.hasVLX() && "Requires AVX512DQVL");
+ SDValue Tmp = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Src,
+ DAG.getUNDEF(MVT::v2f32));
+ if (IsStrict) {
+ unsigned Opc = IsSigned ? X86ISD::STRICT_CVTTP2SI
+ : X86ISD::STRICT_CVTTP2UI;
+ return DAG.getNode(Opc, dl, {VT, MVT::Other}, {Op->getOperand(0), Tmp});
+ }
+ unsigned Opc = IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI;
+ return DAG.getNode(Opc, dl, VT, Tmp);
}
return SDValue();
@@ -19575,9 +20205,21 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
assert(VT == MVT::i32 && "Unexpected VT!");
// Promote i32 to i64 and use a signed operation on 64-bit targets.
+ // FIXME: This does not generate an invalid exception if the input does not
+ // fit in i32. PR44019
if (Subtarget.is64Bit()) {
- SDValue Res = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i64, Src);
- return DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
+ SDValue Res, Chain;
+ if (IsStrict) {
+ Res = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { MVT::i64, MVT::Other},
+ { Op.getOperand(0), Src });
+ Chain = Res.getValue(1);
+ } else
+ Res = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i64, Src);
+
+ Res = DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
+ if (IsStrict)
+ return DAG.getMergeValues({ Res, Chain }, dl);
+ return Res;
}
// Use default expansion for SSE1/2 targets without SSE3. With SSE3 we can
@@ -19586,28 +20228,65 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
return SDValue();
}
- // Promote i16 to i32 if we can use a SSE operation.
- if (VT == MVT::i16 && UseSSEReg) {
+ // Promote i16 to i32 if we can use a SSE operation or the type is f128.
+ // FIXME: This does not generate an invalid exception if the input does not
+ // fit in i16. PR44019
+ if (VT == MVT::i16 && (UseSSEReg || SrcVT == MVT::f128)) {
assert(IsSigned && "Expected i16 FP_TO_UINT to have been promoted!");
- SDValue Res = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Src);
- return DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
+ SDValue Res, Chain;
+ if (IsStrict) {
+ Res = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { MVT::i32, MVT::Other},
+ { Op.getOperand(0), Src });
+ Chain = Res.getValue(1);
+ } else
+ Res = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Src);
+
+ Res = DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
+ if (IsStrict)
+ return DAG.getMergeValues({ Res, Chain }, dl);
+ return Res;
}
- // If this is a SINT_TO_FP using SSEReg we're done.
+ // If this is a FP_TO_SINT using SSEReg we're done.
if (UseSSEReg && IsSigned)
return Op;
+ // fp128 needs to use a libcall.
+ if (SrcVT == MVT::f128) {
+ RTLIB::Libcall LC;
+ if (IsSigned)
+ LC = RTLIB::getFPTOSINT(SrcVT, VT);
+ else
+ LC = RTLIB::getFPTOUINT(SrcVT, VT);
+
+ SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
+ MakeLibCallOptions CallOptions;
+ std::pair<SDValue, SDValue> Tmp = makeLibCall(DAG, LC, VT, Src, CallOptions,
+ SDLoc(Op), Chain);
+
+ if (IsStrict)
+ return DAG.getMergeValues({ Tmp.first, Tmp.second }, dl);
+
+ return Tmp.first;
+ }
+
// Fall back to X87.
- if (SDValue V = FP_TO_INTHelper(Op, DAG, IsSigned))
+ SDValue Chain;
+ if (SDValue V = FP_TO_INTHelper(Op, DAG, IsSigned, Chain)) {
+ if (IsStrict)
+ return DAG.getMergeValues({V, Chain}, dl);
return V;
+ }
llvm_unreachable("Expected FP_TO_INTHelper to handle all remaining cases.");
}
SDValue X86TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
+ bool IsStrict = Op->isStrictFPOpcode();
+
SDLoc DL(Op);
MVT VT = Op.getSimpleValueType();
- SDValue In = Op.getOperand(0);
+ SDValue In = Op.getOperand(IsStrict ? 1 : 0);
MVT SVT = In.getSimpleValueType();
if (VT == MVT::f128) {
@@ -19617,14 +20296,19 @@ SDValue X86TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
assert(SVT == MVT::v2f32 && "Only customize MVT::v2f32 type legalization!");
- return DAG.getNode(X86ISD::VFPEXT, DL, VT,
- DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v4f32,
- In, DAG.getUNDEF(SVT)));
+ SDValue Res =
+ DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v4f32, In, DAG.getUNDEF(SVT));
+ if (IsStrict)
+ return DAG.getNode(X86ISD::STRICT_VFPEXT, DL, {VT, MVT::Other},
+ {Op->getOperand(0), Res});
+ return DAG.getNode(X86ISD::VFPEXT, DL, VT, Res);
}
SDValue X86TargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
+ bool IsStrict = Op->isStrictFPOpcode();
+
MVT VT = Op.getSimpleValueType();
- SDValue In = Op.getOperand(0);
+ SDValue In = Op.getOperand(IsStrict ? 1 : 0);
MVT SVT = In.getSimpleValueType();
// It's legal except when f128 is involved
@@ -19636,17 +20320,17 @@ SDValue X86TargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
// FP_ROUND node has a second operand indicating whether it is known to be
// precise. That doesn't take part in the LibCall so we can't directly use
// LowerF128Call.
+
+ SDLoc dl(Op);
+ SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
MakeLibCallOptions CallOptions;
- return makeLibCall(DAG, LC, VT, In, CallOptions, SDLoc(Op)).first;
-}
+ std::pair<SDValue, SDValue> Tmp = makeLibCall(DAG, LC, VT, In, CallOptions,
+ dl, Chain);
-// FIXME: This is a hack to allow FP_ROUND to be marked Custom without breaking
-// the default expansion of STRICT_FP_ROUND.
-static SDValue LowerSTRICT_FP_ROUND(SDValue Op, SelectionDAG &DAG) {
- // FIXME: Need to form a libcall with an input chain for f128.
- assert(Op.getOperand(0).getValueType() != MVT::f128 &&
- "Don't know how to handle f128 yet!");
- return Op;
+ if (IsStrict)
+ return DAG.getMergeValues({ Tmp.first, Tmp.second }, dl);
+
+ return Tmp.first;
}
/// Depending on uarch and/or optimizing for size, we might prefer to use a
@@ -19724,12 +20408,6 @@ static SDValue lowerAddSubToHorizontalOp(SDValue Op, SelectionDAG &DAG,
/// Depending on uarch and/or optimizing for size, we might prefer to use a
/// vector operation in place of the typical scalar operation.
SDValue X86TargetLowering::lowerFaddFsub(SDValue Op, SelectionDAG &DAG) const {
- if (Op.getValueType() == MVT::f128) {
- RTLIB::Libcall LC = Op.getOpcode() == ISD::FADD ? RTLIB::ADD_F128
- : RTLIB::SUB_F128;
- return LowerF128Call(Op, DAG, LC);
- }
-
assert((Op.getValueType() == MVT::f32 || Op.getValueType() == MVT::f64) &&
"Only expecting float/double");
return lowerAddSubToHorizontalOp(Op, DAG, Subtarget);
@@ -20013,6 +20691,19 @@ static bool hasNonFlagsUse(SDValue Op) {
return false;
}
+// Transform to an x86-specific ALU node with flags if there is a chance of
+// using an RMW op or only the flags are used. Otherwise, leave
+// the node alone and emit a 'cmp' or 'test' instruction.
+static bool isProfitableToUseFlagOp(SDValue Op) {
+ for (SDNode *U : Op->uses())
+ if (U->getOpcode() != ISD::CopyToReg &&
+ U->getOpcode() != ISD::SETCC &&
+ U->getOpcode() != ISD::STORE)
+ return false;
+
+ return true;
+}
+
/// Emit nodes that will be selected as "test Op0,Op0", or something
/// equivalent.
static SDValue EmitTest(SDValue Op, unsigned X86CC, const SDLoc &dl,
@@ -20076,15 +20767,8 @@ static SDValue EmitTest(SDValue Op, unsigned X86CC, const SDLoc &dl,
case ISD::SUB:
case ISD::OR:
case ISD::XOR:
- // Transform to an x86-specific ALU node with flags if there is a chance of
- // using an RMW op or only the flags are used. Otherwise, leave
- // the node alone and emit a 'test' instruction.
- for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
- UE = Op.getNode()->use_end(); UI != UE; ++UI)
- if (UI->getOpcode() != ISD::CopyToReg &&
- UI->getOpcode() != ISD::SETCC &&
- UI->getOpcode() != ISD::STORE)
- goto default_case;
+ if (!isProfitableToUseFlagOp(Op))
+ break;
// Otherwise use a regular EFLAGS-setting instruction.
switch (ArithOp.getOpcode()) {
@@ -20112,7 +20796,6 @@ static SDValue EmitTest(SDValue Op, unsigned X86CC, const SDLoc &dl,
Op->getOperand(1)).getValue(1);
}
default:
- default_case:
break;
}
@@ -20131,15 +20814,26 @@ static SDValue EmitTest(SDValue Op, unsigned X86CC, const SDLoc &dl,
/// Emit nodes that will be selected as "cmp Op0,Op1", or something
/// equivalent.
-SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
- const SDLoc &dl, SelectionDAG &DAG) const {
+static std::pair<SDValue, SDValue> EmitCmp(SDValue Op0, SDValue Op1,
+ unsigned X86CC, const SDLoc &dl,
+ SelectionDAG &DAG,
+ const X86Subtarget &Subtarget,
+ SDValue Chain, bool IsSignaling) {
if (isNullConstant(Op1))
- return EmitTest(Op0, X86CC, dl, DAG, Subtarget);
+ return std::make_pair(EmitTest(Op0, X86CC, dl, DAG, Subtarget), Chain);
EVT CmpVT = Op0.getValueType();
- if (CmpVT.isFloatingPoint())
- return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op0, Op1);
+ if (CmpVT.isFloatingPoint()) {
+ if (Chain) {
+ SDValue Res =
+ DAG.getNode(IsSignaling ? X86ISD::STRICT_FCMPS : X86ISD::STRICT_FCMP,
+ dl, {MVT::i32, MVT::Other}, {Chain, Op0, Op1});
+ return std::make_pair(Res, Res.getValue(1));
+ }
+ return std::make_pair(DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op0, Op1),
+ SDValue());
+ }
assert((CmpVT == MVT::i8 || CmpVT == MVT::i16 ||
CmpVT == MVT::i32 || CmpVT == MVT::i64) && "Unexpected VT!");
@@ -20154,7 +20848,7 @@ SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
if ((COp0 && !COp0->getAPIntValue().isSignedIntN(8)) ||
(COp1 && !COp1->getAPIntValue().isSignedIntN(8))) {
unsigned ExtendOp =
- isX86CCUnsigned(X86CC) ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
+ isX86CCSigned(X86CC) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
if (X86CC == X86::COND_E || X86CC == X86::COND_NE) {
// For equality comparisons try to use SIGN_EXTEND if the input was
// truncate from something with enough sign bits.
@@ -20178,10 +20872,22 @@ SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
Op1 = DAG.getNode(ExtendOp, dl, CmpVT, Op1);
}
}
+
+ // Try to shrink i64 compares if the input has enough zero bits.
+ // FIXME: Do this for non-constant compares for constant on LHS?
+ if (CmpVT == MVT::i64 && isa<ConstantSDNode>(Op1) && !isX86CCSigned(X86CC) &&
+ Op0.hasOneUse() && // Hacky way to not break CSE opportunities with sub.
+ cast<ConstantSDNode>(Op1)->getAPIntValue().getActiveBits() <= 32 &&
+ DAG.MaskedValueIsZero(Op0, APInt::getHighBitsSet(64, 32))) {
+ CmpVT = MVT::i32;
+ Op0 = DAG.getNode(ISD::TRUNCATE, dl, CmpVT, Op0);
+ Op1 = DAG.getNode(ISD::TRUNCATE, dl, CmpVT, Op1);
+ }
+
// Use SUB instead of CMP to enable CSE between SUB and CMP.
SDVTList VTs = DAG.getVTList(CmpVT, MVT::i32);
SDValue Sub = DAG.getNode(X86ISD::SUB, dl, VTs, Op0, Op1);
- return Sub.getValue(1);
+ return std::make_pair(Sub.getValue(1), SDValue());
}
/// Convert a comparison if required by the subtarget.
@@ -20189,16 +20895,19 @@ SDValue X86TargetLowering::ConvertCmpIfNecessary(SDValue Cmp,
SelectionDAG &DAG) const {
// If the subtarget does not support the FUCOMI instruction, floating-point
// comparisons have to be converted.
- if (Subtarget.hasCMov() ||
- Cmp.getOpcode() != X86ISD::CMP ||
- !Cmp.getOperand(0).getValueType().isFloatingPoint() ||
- !Cmp.getOperand(1).getValueType().isFloatingPoint())
+ bool IsCmp = Cmp.getOpcode() == X86ISD::CMP;
+ bool IsStrictCmp = Cmp.getOpcode() == X86ISD::STRICT_FCMP ||
+ Cmp.getOpcode() == X86ISD::STRICT_FCMPS;
+
+ if (Subtarget.hasCMov() || (!IsCmp && !IsStrictCmp) ||
+ !Cmp.getOperand(IsStrictCmp ? 1 : 0).getValueType().isFloatingPoint() ||
+ !Cmp.getOperand(IsStrictCmp ? 2 : 1).getValueType().isFloatingPoint())
return Cmp;
// The instruction selector will select an FUCOM instruction instead of
// FUCOMI, which writes the comparison result to FPSW instead of EFLAGS. Hence
// build an SDNode sequence that transfers the result from FPSW into EFLAGS:
- // (X86sahf (trunc (srl (X86fp_stsw (trunc (X86cmp ...)), 8))))
+ // (X86sahf (trunc (srl (X86fp_stsw (trunc (X86any_fcmp ...)), 8))))
SDLoc dl(Cmp);
SDValue TruncFPSW = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, Cmp);
SDValue FNStSW = DAG.getNode(X86ISD::FNSTSW16r, dl, MVT::i16, TruncFPSW);
@@ -20399,7 +21108,7 @@ static SDValue LowerAndToBT(SDValue And, ISD::CondCode CC,
} else {
// Use BT if the immediate can't be encoded in a TEST instruction or we
// are optimizing for size and the immedaite won't fit in a byte.
- bool OptForSize = DAG.getMachineFunction().getFunction().hasOptSize();
+ bool OptForSize = DAG.shouldOptForSize();
if ((!isUInt<32>(AndRHSVal) || (OptForSize && !isUInt<8>(AndRHSVal))) &&
isPowerOf2_64(AndRHSVal)) {
Src = AndLHS;
@@ -20442,7 +21151,7 @@ static SDValue LowerAndToBT(SDValue And, ISD::CondCode CC,
/// Turns an ISD::CondCode into a value suitable for SSE floating-point mask
/// CMPs.
static unsigned translateX86FSETCC(ISD::CondCode SetCCOpcode, SDValue &Op0,
- SDValue &Op1) {
+ SDValue &Op1, bool &IsAlwaysSignaling) {
unsigned SSECC;
bool Swap = false;
@@ -20481,6 +21190,22 @@ static unsigned translateX86FSETCC(ISD::CondCode SetCCOpcode, SDValue &Op0,
if (Swap)
std::swap(Op0, Op1);
+ switch (SetCCOpcode) {
+ default:
+ IsAlwaysSignaling = true;
+ break;
+ case ISD::SETEQ:
+ case ISD::SETOEQ:
+ case ISD::SETUEQ:
+ case ISD::SETNE:
+ case ISD::SETONE:
+ case ISD::SETUNE:
+ case ISD::SETO:
+ case ISD::SETUO:
+ IsAlwaysSignaling = false;
+ break;
+ }
+
return SSECC;
}
@@ -20625,12 +21350,14 @@ static SDValue LowerVSETCCWithSUBUS(SDValue Op0, SDValue Op1, MVT VT,
static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
- SDValue Op0 = Op.getOperand(0);
- SDValue Op1 = Op.getOperand(1);
- SDValue CC = Op.getOperand(2);
- MVT VT = Op.getSimpleValueType();
+ bool IsStrict = Op.getOpcode() == ISD::STRICT_FSETCC ||
+ Op.getOpcode() == ISD::STRICT_FSETCCS;
+ SDValue Op0 = Op.getOperand(IsStrict ? 1 : 0);
+ SDValue Op1 = Op.getOperand(IsStrict ? 2 : 1);
+ SDValue CC = Op.getOperand(IsStrict ? 3 : 2);
+ MVT VT = Op->getSimpleValueType(0);
ISD::CondCode Cond = cast<CondCodeSDNode>(CC)->get();
- bool isFP = Op.getOperand(1).getSimpleValueType().isFloatingPoint();
+ bool isFP = Op1.getSimpleValueType().isFloatingPoint();
SDLoc dl(Op);
if (isFP) {
@@ -20639,57 +21366,119 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
assert(EltVT == MVT::f32 || EltVT == MVT::f64);
#endif
+ bool IsSignaling = Op.getOpcode() == ISD::STRICT_FSETCCS;
+ SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
+
unsigned Opc;
if (Subtarget.hasAVX512() && VT.getVectorElementType() == MVT::i1) {
assert(VT.getVectorNumElements() <= 16);
- Opc = X86ISD::CMPM;
+ Opc = IsStrict ? X86ISD::STRICT_CMPM : X86ISD::CMPM;
} else {
- Opc = X86ISD::CMPP;
+ Opc = IsStrict ? X86ISD::STRICT_CMPP : X86ISD::CMPP;
// The SSE/AVX packed FP comparison nodes are defined with a
// floating-point vector result that matches the operand type. This allows
// them to work with an SSE1 target (integer vector types are not legal).
VT = Op0.getSimpleValueType();
}
- // In the two cases not handled by SSE compare predicates (SETUEQ/SETONE),
- // emit two comparisons and a logic op to tie them together.
SDValue Cmp;
- unsigned SSECC = translateX86FSETCC(Cond, Op0, Op1);
- if (SSECC >= 8 && !Subtarget.hasAVX()) {
- // LLVM predicate is SETUEQ or SETONE.
- unsigned CC0, CC1;
- unsigned CombineOpc;
- if (Cond == ISD::SETUEQ) {
- CC0 = 3; // UNORD
- CC1 = 0; // EQ
- CombineOpc = X86ISD::FOR;
+ bool IsAlwaysSignaling;
+ unsigned SSECC = translateX86FSETCC(Cond, Op0, Op1, IsAlwaysSignaling);
+ if (!Subtarget.hasAVX()) {
+ // TODO: We could use following steps to handle a quiet compare with
+ // signaling encodings.
+ // 1. Get ordered masks from a quiet ISD::SETO
+ // 2. Use the masks to mask potential unordered elements in operand A, B
+ // 3. Get the compare results of masked A, B
+ // 4. Calculating final result using the mask and result from 3
+ // But currently, we just fall back to scalar operations.
+ if (IsStrict && IsAlwaysSignaling && !IsSignaling)
+ return SDValue();
+
+ // Insert an extra signaling instruction to raise exception.
+ if (IsStrict && !IsAlwaysSignaling && IsSignaling) {
+ SDValue SignalCmp = DAG.getNode(
+ Opc, dl, {VT, MVT::Other},
+ {Chain, Op0, Op1, DAG.getTargetConstant(1, dl, MVT::i8)}); // LT_OS
+ // FIXME: It seems we need to update the flags of all new strict nodes.
+ // Otherwise, mayRaiseFPException in MI will return false due to
+ // NoFPExcept = false by default. However, I didn't find it in other
+ // patches.
+ SignalCmp->setFlags(Op->getFlags());
+ Chain = SignalCmp.getValue(1);
+ }
+
+ // In the two cases not handled by SSE compare predicates (SETUEQ/SETONE),
+ // emit two comparisons and a logic op to tie them together.
+ if (SSECC >= 8) {
+ // LLVM predicate is SETUEQ or SETONE.
+ unsigned CC0, CC1;
+ unsigned CombineOpc;
+ if (Cond == ISD::SETUEQ) {
+ CC0 = 3; // UNORD
+ CC1 = 0; // EQ
+ CombineOpc = X86ISD::FOR;
+ } else {
+ assert(Cond == ISD::SETONE);
+ CC0 = 7; // ORD
+ CC1 = 4; // NEQ
+ CombineOpc = X86ISD::FAND;
+ }
+
+ SDValue Cmp0, Cmp1;
+ if (IsStrict) {
+ Cmp0 = DAG.getNode(
+ Opc, dl, {VT, MVT::Other},
+ {Chain, Op0, Op1, DAG.getTargetConstant(CC0, dl, MVT::i8)});
+ Cmp1 = DAG.getNode(
+ Opc, dl, {VT, MVT::Other},
+ {Chain, Op0, Op1, DAG.getTargetConstant(CC1, dl, MVT::i8)});
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Cmp0.getValue(1),
+ Cmp1.getValue(1));
+ } else {
+ Cmp0 = DAG.getNode(
+ Opc, dl, VT, Op0, Op1, DAG.getTargetConstant(CC0, dl, MVT::i8));
+ Cmp1 = DAG.getNode(
+ Opc, dl, VT, Op0, Op1, DAG.getTargetConstant(CC1, dl, MVT::i8));
+ }
+ Cmp = DAG.getNode(CombineOpc, dl, VT, Cmp0, Cmp1);
} else {
- assert(Cond == ISD::SETONE);
- CC0 = 7; // ORD
- CC1 = 4; // NEQ
- CombineOpc = X86ISD::FAND;
+ if (IsStrict) {
+ Cmp = DAG.getNode(
+ Opc, dl, {VT, MVT::Other},
+ {Chain, Op0, Op1, DAG.getTargetConstant(SSECC, dl, MVT::i8)});
+ Chain = Cmp.getValue(1);
+ } else
+ Cmp = DAG.getNode(
+ Opc, dl, VT, Op0, Op1, DAG.getTargetConstant(SSECC, dl, MVT::i8));
}
-
- SDValue Cmp0 = DAG.getNode(Opc, dl, VT, Op0, Op1,
- DAG.getTargetConstant(CC0, dl, MVT::i8));
- SDValue Cmp1 = DAG.getNode(Opc, dl, VT, Op0, Op1,
- DAG.getTargetConstant(CC1, dl, MVT::i8));
- Cmp = DAG.getNode(CombineOpc, dl, VT, Cmp0, Cmp1);
} else {
// Handle all other FP comparisons here.
- Cmp = DAG.getNode(Opc, dl, VT, Op0, Op1,
- DAG.getTargetConstant(SSECC, dl, MVT::i8));
+ if (IsStrict) {
+ // Make a flip on already signaling CCs before setting bit 4 of AVX CC.
+ SSECC |= (IsAlwaysSignaling ^ IsSignaling) << 4;
+ Cmp = DAG.getNode(
+ Opc, dl, {VT, MVT::Other},
+ {Chain, Op0, Op1, DAG.getTargetConstant(SSECC, dl, MVT::i8)});
+ Chain = Cmp.getValue(1);
+ } else
+ Cmp = DAG.getNode(
+ Opc, dl, VT, Op0, Op1, DAG.getTargetConstant(SSECC, dl, MVT::i8));
}
// If this is SSE/AVX CMPP, bitcast the result back to integer to match the
// result type of SETCC. The bitcast is expected to be optimized away
// during combining/isel.
- if (Opc == X86ISD::CMPP)
- Cmp = DAG.getBitcast(Op.getSimpleValueType(), Cmp);
+ Cmp = DAG.getBitcast(Op.getSimpleValueType(), Cmp);
+
+ if (IsStrict)
+ return DAG.getMergeValues({Cmp, Chain}, dl);
return Cmp;
}
+ assert(!IsStrict && "Strict SETCC only handles FP operands.");
+
MVT VTOp0 = Op0.getSimpleValueType();
(void)VTOp0;
assert(VTOp0 == Op1.getSimpleValueType() &&
@@ -20860,6 +21649,30 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
if (Opc == X86ISD::PCMPGT && !Subtarget.hasSSE42()) {
assert(Subtarget.hasSSE2() && "Don't know how to lower!");
+ // Special case for sign bit test. We can use a v4i32 PCMPGT and shuffle
+ // the odd elements over the even elements.
+ if (!FlipSigns && !Invert && ISD::isBuildVectorAllZeros(Op0.getNode())) {
+ Op0 = DAG.getConstant(0, dl, MVT::v4i32);
+ Op1 = DAG.getBitcast(MVT::v4i32, Op1);
+
+ SDValue GT = DAG.getNode(X86ISD::PCMPGT, dl, MVT::v4i32, Op0, Op1);
+ static const int MaskHi[] = { 1, 1, 3, 3 };
+ SDValue Result = DAG.getVectorShuffle(MVT::v4i32, dl, GT, GT, MaskHi);
+
+ return DAG.getBitcast(VT, Result);
+ }
+
+ if (!FlipSigns && !Invert && ISD::isBuildVectorAllOnes(Op1.getNode())) {
+ Op0 = DAG.getBitcast(MVT::v4i32, Op0);
+ Op1 = DAG.getConstant(-1, dl, MVT::v4i32);
+
+ SDValue GT = DAG.getNode(X86ISD::PCMPGT, dl, MVT::v4i32, Op0, Op1);
+ static const int MaskHi[] = { 1, 1, 3, 3 };
+ SDValue Result = DAG.getVectorShuffle(MVT::v4i32, dl, GT, GT, MaskHi);
+
+ return DAG.getBitcast(VT, Result);
+ }
+
// Since SSE has no unsigned integer comparisons, we need to flip the sign
// bits of the inputs before performing those operations. The lower
// compare is always unsigned.
@@ -20999,8 +21812,9 @@ static SDValue EmitAVX512Test(SDValue Op0, SDValue Op1, ISD::CondCode CC,
/// corresponding X86 condition code constant in X86CC.
SDValue X86TargetLowering::emitFlagsForSetcc(SDValue Op0, SDValue Op1,
ISD::CondCode CC, const SDLoc &dl,
- SelectionDAG &DAG,
- SDValue &X86CC) const {
+ SelectionDAG &DAG, SDValue &X86CC,
+ SDValue &Chain,
+ bool IsSignaling) const {
// Optimize to BT if possible.
// Lower (X & (1 << N)) == 0 to BT(X, N).
// Lower ((X >>u N) & 1) != 0 to BT(X, N).
@@ -21043,12 +21857,32 @@ SDValue X86TargetLowering::emitFlagsForSetcc(SDValue Op0, SDValue Op1,
}
}
+ // Try to use the carry flag from the add in place of an separate CMP for:
+ // (seteq (add X, -1), -1). Similar for setne.
+ if (isAllOnesConstant(Op1) && Op0.getOpcode() == ISD::ADD &&
+ Op0.getOperand(1) == Op1 && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
+ if (isProfitableToUseFlagOp(Op0)) {
+ SDVTList VTs = DAG.getVTList(Op0.getValueType(), MVT::i32);
+
+ SDValue New = DAG.getNode(X86ISD::ADD, dl, VTs, Op0.getOperand(0),
+ Op0.getOperand(1));
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Op0.getNode(), 0), New);
+ X86::CondCode CCode = CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B;
+ X86CC = DAG.getTargetConstant(CCode, dl, MVT::i8);
+ return SDValue(New.getNode(), 1);
+ }
+ }
+
bool IsFP = Op1.getSimpleValueType().isFloatingPoint();
X86::CondCode CondCode = TranslateX86CC(CC, dl, IsFP, Op0, Op1, DAG);
if (CondCode == X86::COND_INVALID)
return SDValue();
- SDValue EFLAGS = EmitCmp(Op0, Op1, CondCode, dl, DAG);
+ std::pair<SDValue, SDValue> Tmp =
+ EmitCmp(Op0, Op1, CondCode, dl, DAG, Subtarget, Chain, IsSignaling);
+ SDValue EFLAGS = Tmp.first;
+ if (Chain)
+ Chain = Tmp.second;
EFLAGS = ConvertCmpIfNecessary(EFLAGS, DAG);
X86CC = DAG.getTargetConstant(CondCode, dl, MVT::i8);
return EFLAGS;
@@ -21056,35 +21890,48 @@ SDValue X86TargetLowering::emitFlagsForSetcc(SDValue Op0, SDValue Op1,
SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
- MVT VT = Op.getSimpleValueType();
+ bool IsStrict = Op.getOpcode() == ISD::STRICT_FSETCC ||
+ Op.getOpcode() == ISD::STRICT_FSETCCS;
+ MVT VT = Op->getSimpleValueType(0);
if (VT.isVector()) return LowerVSETCC(Op, Subtarget, DAG);
assert(VT == MVT::i8 && "SetCC type must be 8-bit integer");
- SDValue Op0 = Op.getOperand(0);
- SDValue Op1 = Op.getOperand(1);
+ SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
+ SDValue Op0 = Op.getOperand(IsStrict ? 1 : 0);
+ SDValue Op1 = Op.getOperand(IsStrict ? 2 : 1);
SDLoc dl(Op);
- ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+ ISD::CondCode CC =
+ cast<CondCodeSDNode>(Op.getOperand(IsStrict ? 3 : 2))->get();
// Handle f128 first, since one possible outcome is a normal integer
// comparison which gets handled by emitFlagsForSetcc.
if (Op0.getValueType() == MVT::f128) {
- softenSetCCOperands(DAG, MVT::f128, Op0, Op1, CC, dl, Op0, Op1);
+ softenSetCCOperands(DAG, MVT::f128, Op0, Op1, CC, dl, Op0, Op1, Chain,
+ Op.getOpcode() == ISD::STRICT_FSETCCS);
// If softenSetCCOperands returned a scalar, use it.
if (!Op1.getNode()) {
assert(Op0.getValueType() == Op.getValueType() &&
"Unexpected setcc expansion!");
+ if (IsStrict)
+ return DAG.getMergeValues({Op0, Chain}, dl);
return Op0;
}
}
SDValue X86CC;
- SDValue EFLAGS = emitFlagsForSetcc(Op0, Op1, CC, dl, DAG, X86CC);
+ SDValue EFLAGS = emitFlagsForSetcc(Op0, Op1, CC, dl, DAG, X86CC, Chain,
+ Op.getOpcode() == ISD::STRICT_FSETCCS);
if (!EFLAGS)
return SDValue();
- return DAG.getNode(X86ISD::SETCC, dl, MVT::i8, X86CC, EFLAGS);
+ SDValue Res = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, X86CC, EFLAGS);
+
+ if (IsStrict)
+ return DAG.getMergeValues({Res, Chain}, dl);
+
+ return Res;
}
SDValue X86TargetLowering::LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const {
@@ -21215,8 +22062,10 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
(Subtarget.hasSSE1() && VT == MVT::f32)) &&
VT == Cond.getOperand(0).getSimpleValueType() && Cond->hasOneUse()) {
SDValue CondOp0 = Cond.getOperand(0), CondOp1 = Cond.getOperand(1);
- unsigned SSECC = translateX86FSETCC(
- cast<CondCodeSDNode>(Cond.getOperand(2))->get(), CondOp0, CondOp1);
+ bool IsAlwaysSignaling;
+ unsigned SSECC =
+ translateX86FSETCC(cast<CondCodeSDNode>(Cond.getOperand(2))->get(),
+ CondOp0, CondOp1, IsAlwaysSignaling);
if (Subtarget.hasAVX512()) {
SDValue Cmp =
@@ -21454,8 +22303,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
if (AddTest) {
CC = DAG.getTargetConstant(X86::COND_NE, DL, MVT::i8);
- Cond = EmitCmp(Cond, DAG.getConstant(0, DL, Cond.getValueType()),
- X86::COND_NE, DL, DAG);
+ Cond = EmitTest(Cond, X86::COND_NE, DL, DAG, Subtarget);
}
// a < b ? -1 : 0 -> RES = ~setcc_carry
@@ -21711,7 +22559,7 @@ static SDValue LowerSIGN_EXTEND(SDValue Op, const X86Subtarget &Subtarget,
return LowerSIGN_EXTEND_Mask(Op, Subtarget, DAG);
assert(VT.isVector() && InVT.isVector() && "Expected vector type");
- assert(VT.getVectorNumElements() == VT.getVectorNumElements() &&
+ assert(VT.getVectorNumElements() == InVT.getVectorNumElements() &&
"Expected same number of elements");
assert((VT.getVectorElementType() == MVT::i16 ||
VT.getVectorElementType() == MVT::i32 ||
@@ -21765,12 +22613,14 @@ static SDValue splitVectorStore(StoreSDNode *Store, SelectionDAG &DAG) {
"Expecting 256/512-bit op");
// Splitting volatile memory ops is not allowed unless the operation was not
- // legal to begin with. We are assuming the input op is legal (this transform
- // is only used for targets with AVX).
+ // legal to begin with. Assume the input store is legal (this transform is
+ // only used for targets with AVX). Note: It is possible that we have an
+ // illegal type like v2i128, and so we could allow splitting a volatile store
+ // in that case if that is important.
if (!Store->isSimple())
return SDValue();
- MVT StoreVT = StoredVal.getSimpleValueType();
+ EVT StoreVT = StoredVal.getValueType();
unsigned NumElems = StoreVT.getVectorNumElements();
unsigned HalfSize = StoredVal.getValueSizeInBits() / 2;
unsigned HalfAlign = (128 == HalfSize ? 16 : 32);
@@ -22174,8 +23024,7 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
if (addTest) {
X86::CondCode X86Cond = Inverted ? X86::COND_E : X86::COND_NE;
CC = DAG.getTargetConstant(X86Cond, dl, MVT::i8);
- Cond = EmitCmp(Cond, DAG.getConstant(0, dl, Cond.getValueType()),
- X86Cond, dl, DAG);
+ Cond = EmitTest(Cond, X86Cond, dl, DAG, Subtarget);
}
Cond = ConvertCmpIfNecessary(Cond, DAG);
return DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(),
@@ -22201,7 +23050,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
SDNode *Node = Op.getNode();
SDValue Chain = Op.getOperand(0);
SDValue Size = Op.getOperand(1);
- unsigned Align = Op.getConstantOperandVal(2);
+ MaybeAlign Alignment(Op.getConstantOperandVal(2));
EVT VT = Node->getValueType(0);
// Chain the dynamic stack allocation so that it doesn't modify the stack
@@ -22221,11 +23070,12 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT);
Chain = SP.getValue(1);
const TargetFrameLowering &TFI = *Subtarget.getFrameLowering();
- unsigned StackAlign = TFI.getStackAlignment();
+ const Align StackAlign(TFI.getStackAlignment());
Result = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value
- if (Align > StackAlign)
- Result = DAG.getNode(ISD::AND, dl, VT, Result,
- DAG.getConstant(-(uint64_t)Align, dl, VT));
+ if (Alignment && Alignment > StackAlign)
+ Result =
+ DAG.getNode(ISD::AND, dl, VT, Result,
+ DAG.getConstant(~(Alignment->value() - 1ULL), dl, VT));
Chain = DAG.getCopyToReg(Chain, dl, SPReg, Result); // Output chain
} else if (SplitStack) {
MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -22256,9 +23106,9 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, SPTy);
Chain = SP.getValue(1);
- if (Align) {
+ if (Alignment) {
SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0),
- DAG.getConstant(-(uint64_t)Align, dl, VT));
+ DAG.getConstant(~(Alignment->value() - 1ULL), dl, VT));
Chain = DAG.getCopyToReg(Chain, dl, SPReg, SP);
}
@@ -22777,6 +23627,7 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
unsigned IntNo = Op.getConstantOperandVal(0);
MVT VT = Op.getSimpleValueType();
const IntrinsicData* IntrData = getIntrinsicWithoutChain(IntNo);
+
if (IntrData) {
switch(IntrData->Type) {
case INTR_TYPE_1OP: {
@@ -22794,7 +23645,8 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
if (!isRoundModeCurDirection(Rnd))
return SDValue();
}
- return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1));
+ return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(),
+ Op.getOperand(1));
}
case INTR_TYPE_1OP_SAE: {
SDValue Sae = Op.getOperand(2);
@@ -22866,7 +23718,7 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
}
return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(),
- Src1, Src2, Src3);
+ {Src1, Src2, Src3});
}
case INTR_TYPE_4OP:
return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1),
@@ -22890,8 +23742,9 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
if (!isRoundModeCurDirection(Rnd))
return SDValue();
}
- return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src),
- Mask, PassThru, Subtarget, DAG);
+ return getVectorMaskingNode(
+ DAG.getNode(IntrData->Opc0, dl, VT, Src), Mask, PassThru,
+ Subtarget, DAG);
}
case INTR_TYPE_1OP_MASK_SAE: {
SDValue Src = Op.getOperand(1);
@@ -22907,8 +23760,8 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
else
return SDValue();
- return getVectorMaskingNode(DAG.getNode(Opc, dl, VT, Src),
- Mask, PassThru, Subtarget, DAG);
+ return getVectorMaskingNode(DAG.getNode(Opc, dl, VT, Src), Mask, PassThru,
+ Subtarget, DAG);
}
case INTR_TYPE_SCALAR_MASK: {
SDValue Src1 = Op.getOperand(1);
@@ -23114,8 +23967,8 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return SDValue();
}
//default rounding mode
- return DAG.getNode(IntrData->Opc0, dl, MaskVT, Op.getOperand(1),
- Op.getOperand(2), CC);
+ return DAG.getNode(IntrData->Opc0, dl, MaskVT,
+ {Op.getOperand(1), Op.getOperand(2), CC});
}
case CMP_MASK_SCALAR_CC: {
SDValue Src1 = Op.getOperand(1);
@@ -23315,8 +24168,8 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
MVT SrcVT = Src.getSimpleValueType();
MVT MaskVT = MVT::getVectorVT(MVT::i1, SrcVT.getVectorNumElements());
Mask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
- return DAG.getNode(IntrData->Opc1, dl, Op.getValueType(), Src, PassThru,
- Mask);
+ return DAG.getNode(IntrData->Opc1, dl, Op.getValueType(),
+ {Src, PassThru, Mask});
}
case CVTPS2PH_MASK: {
SDValue Src = Op.getOperand(1);
@@ -23622,9 +24475,12 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
SDValue ShAmt = Op.getOperand(2);
// If the argument is a constant, convert it to a target constant.
if (auto *C = dyn_cast<ConstantSDNode>(ShAmt)) {
- ShAmt = DAG.getTargetConstant(C->getZExtValue(), DL, MVT::i32);
+ // Clamp out of bounds shift amounts since they will otherwise be masked
+ // to 8-bits which may make it no longer out of bounds.
+ unsigned ShiftAmount = C->getAPIntValue().getLimitedValue(255);
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Op.getValueType(),
- Op.getOperand(0), Op.getOperand(1), ShAmt);
+ Op.getOperand(0), Op.getOperand(1),
+ DAG.getTargetConstant(ShiftAmount, DL, MVT::i32));
}
unsigned NewIntrinsic;
@@ -23977,7 +24833,7 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
MFI.setHasCopyImplyingStackAdjustment(true);
// Don't do anything here, we will expand these intrinsics out later
// during FinalizeISel in EmitInstrWithCustomInserter.
- return SDValue();
+ return Op;
}
case Intrinsic::x86_lwpins32:
case Intrinsic::x86_lwpins64:
@@ -24152,9 +25008,11 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
MVT MaskVT = MVT::getVectorVT(MVT::i1, MemVT.getVectorNumElements());
SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
+ SDValue Offset = DAG.getUNDEF(VMask.getValueType());
- return DAG.getMaskedStore(Chain, dl, DataToTruncate, Addr, VMask, MemVT,
- MemIntr->getMemOperand(), true /* truncating */);
+ return DAG.getMaskedStore(Chain, dl, DataToTruncate, Addr, Offset, VMask,
+ MemVT, MemIntr->getMemOperand(), ISD::UNINDEXED,
+ true /* truncating */);
}
case X86ISD::VTRUNCUS:
case X86ISD::VTRUNCS: {
@@ -24249,7 +25107,7 @@ SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
// FIXME? Maybe this could be a TableGen attribute on some registers and
// this table could be generated automatically from RegInfo.
-Register X86TargetLowering::getRegisterByName(const char* RegName, EVT VT,
+Register X86TargetLowering::getRegisterByName(const char* RegName, LLT VT,
const MachineFunction &MF) const {
const TargetFrameLowering &TFI = *Subtarget.getFrameLowering();
@@ -24538,12 +25396,13 @@ SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op,
MachineFunction &MF = DAG.getMachineFunction();
const TargetFrameLowering &TFI = *Subtarget.getFrameLowering();
- unsigned StackAlignment = TFI.getStackAlignment();
+ const Align StackAlignment(TFI.getStackAlignment());
MVT VT = Op.getSimpleValueType();
SDLoc DL(Op);
// Save FP Control Word to stack slot
- int SSFI = MF.getFrameInfo().CreateStackObject(2, StackAlignment, false);
+ int SSFI =
+ MF.getFrameInfo().CreateStackObject(2, StackAlignment.value(), false);
SDValue StackSlot =
DAG.getFrameIndex(SSFI, getPointerTy(DAG.getDataLayout()));
@@ -27464,12 +28323,11 @@ static SDValue LowerMLOAD(SDValue Op, const X86Subtarget &Subtarget,
if (PassThru.isUndef() || ISD::isBuildVectorAllZeros(PassThru.getNode()))
return Op;
- SDValue NewLoad = DAG.getMaskedLoad(VT, dl, N->getChain(),
- N->getBasePtr(), Mask,
- getZeroVector(VT, Subtarget, DAG, dl),
- N->getMemoryVT(), N->getMemOperand(),
- N->getExtensionType(),
- N->isExpandingLoad());
+ SDValue NewLoad = DAG.getMaskedLoad(
+ VT, dl, N->getChain(), N->getBasePtr(), N->getOffset(), Mask,
+ getZeroVector(VT, Subtarget, DAG, dl), N->getMemoryVT(),
+ N->getMemOperand(), N->getAddressingMode(), N->getExtensionType(),
+ N->isExpandingLoad());
// Emit a blend.
SDValue Select = DAG.getNode(ISD::VSELECT, dl, MaskVT, Mask, NewLoad,
PassThru);
@@ -27503,11 +28361,10 @@ static SDValue LowerMLOAD(SDValue Op, const X86Subtarget &Subtarget,
MVT WideMaskVT = MVT::getVectorVT(MVT::i1, NumEltsInWideVec);
Mask = ExtendToType(Mask, WideMaskVT, DAG, true);
- SDValue NewLoad = DAG.getMaskedLoad(WideDataVT, dl, N->getChain(),
- N->getBasePtr(), Mask, PassThru,
- N->getMemoryVT(), N->getMemOperand(),
- N->getExtensionType(),
- N->isExpandingLoad());
+ SDValue NewLoad = DAG.getMaskedLoad(
+ WideDataVT, dl, N->getChain(), N->getBasePtr(), N->getOffset(), Mask,
+ PassThru, N->getMemoryVT(), N->getMemOperand(), N->getAddressingMode(),
+ N->getExtensionType(), N->isExpandingLoad());
SDValue Exract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
NewLoad.getValue(0),
@@ -27553,7 +28410,8 @@ static SDValue LowerMSTORE(SDValue Op, const X86Subtarget &Subtarget,
DataToStore = ExtendToType(DataToStore, WideDataVT, DAG);
Mask = ExtendToType(Mask, WideMaskVT, DAG, true);
return DAG.getMaskedStore(N->getChain(), dl, DataToStore, N->getBasePtr(),
- Mask, N->getMemoryVT(), N->getMemOperand(),
+ N->getOffset(), Mask, N->getMemoryVT(),
+ N->getMemOperand(), N->getAddressingMode(),
N->isTruncatingStore(), N->isCompressingStore());
}
@@ -27607,29 +28465,31 @@ static SDValue LowerMGATHER(SDValue Op, const X86Subtarget &Subtarget,
return DAG.getMergeValues({Extract, NewGather.getValue(2)}, dl);
}
-SDValue X86TargetLowering::LowerGC_TRANSITION_START(SDValue Op,
- SelectionDAG &DAG) const {
- // TODO: Eventually, the lowering of these nodes should be informed by or
- // deferred to the GC strategy for the function in which they appear. For
- // now, however, they must be lowered to something. Since they are logically
- // no-ops in the case of a null GC strategy (or a GC strategy which does not
- // require special handling for these nodes), lower them as literal NOOPs for
- // the time being.
- SmallVector<SDValue, 2> Ops;
+static SDValue LowerADDRSPACECAST(SDValue Op, SelectionDAG &DAG) {
+ SDLoc dl(Op);
+ SDValue Src = Op.getOperand(0);
+ MVT DstVT = Op.getSimpleValueType();
- Ops.push_back(Op.getOperand(0));
- if (Op->getGluedNode())
- Ops.push_back(Op->getOperand(Op->getNumOperands() - 1));
+ AddrSpaceCastSDNode *N = cast<AddrSpaceCastSDNode>(Op.getNode());
+ unsigned SrcAS = N->getSrcAddressSpace();
- SDLoc OpDL(Op);
- SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
- SDValue NOOP(DAG.getMachineNode(X86::NOOP, SDLoc(Op), VTs, Ops), 0);
+ assert(SrcAS != N->getDestAddressSpace() &&
+ "addrspacecast must be between different address spaces");
- return NOOP;
+ if (SrcAS == X86AS::PTR32_UPTR && DstVT == MVT::i64) {
+ Op = DAG.getNode(ISD::ZERO_EXTEND, dl, DstVT, Src);
+ } else if (DstVT == MVT::i64) {
+ Op = DAG.getNode(ISD::SIGN_EXTEND, dl, DstVT, Src);
+ } else if (DstVT == MVT::i32) {
+ Op = DAG.getNode(ISD::TRUNCATE, dl, DstVT, Src);
+ } else {
+ report_fatal_error("Bad address space in addrspacecast");
+ }
+ return Op;
}
-SDValue X86TargetLowering::LowerGC_TRANSITION_END(SDValue Op,
- SelectionDAG &DAG) const {
+SDValue X86TargetLowering::LowerGC_TRANSITION(SDValue Op,
+ SelectionDAG &DAG) const {
// TODO: Eventually, the lowering of these nodes should be informed by or
// deferred to the GC strategy for the function in which they appear. For
// now, however, they must be lowered to something. Since they are logically
@@ -27651,9 +28511,21 @@ SDValue X86TargetLowering::LowerGC_TRANSITION_END(SDValue Op,
SDValue X86TargetLowering::LowerF128Call(SDValue Op, SelectionDAG &DAG,
RTLIB::Libcall Call) const {
- SmallVector<SDValue, 2> Ops(Op->op_begin(), Op->op_end());
+
+ bool IsStrict = Op->isStrictFPOpcode();
+ unsigned Offset = IsStrict ? 1 : 0;
+ SmallVector<SDValue, 2> Ops(Op->op_begin() + Offset, Op->op_end());
+
+ SDLoc dl(Op);
+ SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
MakeLibCallOptions CallOptions;
- return makeLibCall(DAG, Call, MVT::f128, Ops, CallOptions, SDLoc(Op)).first;
+ std::pair<SDValue, SDValue> Tmp = makeLibCall(DAG, Call, MVT::f128, Ops,
+ CallOptions, dl, Chain);
+
+ if (IsStrict)
+ return DAG.getMergeValues({ Tmp.first, Tmp.second }, dl);
+
+ return Tmp.first;
}
/// Provide custom lowering hooks for some operations.
@@ -27673,7 +28545,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::BITREVERSE: return LowerBITREVERSE(Op, Subtarget, DAG);
case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, Subtarget, DAG);
- case ISD::VECTOR_SHUFFLE: return lowerVectorShuffle(Op, Subtarget, DAG);
+ case ISD::VECTOR_SHUFFLE: return lowerVECTOR_SHUFFLE(Op, Subtarget, DAG);
case ISD::VSELECT: return LowerVSELECT(Op, DAG);
case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
@@ -27690,7 +28562,9 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::SRL_PARTS: return LowerShiftParts(Op, DAG);
case ISD::FSHL:
case ISD::FSHR: return LowerFunnelShift(Op, Subtarget, DAG);
+ case ISD::STRICT_SINT_TO_FP:
case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
+ case ISD::STRICT_UINT_TO_FP:
case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG);
case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG);
case ISD::ZERO_EXTEND: return LowerZERO_EXTEND(Op, Subtarget, DAG);
@@ -27700,21 +28574,24 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::SIGN_EXTEND_VECTOR_INREG:
return LowerEXTEND_VECTOR_INREG(Op, Subtarget, DAG);
case ISD::FP_TO_SINT:
- case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG);
- case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
- case ISD::FP_ROUND: return LowerFP_ROUND(Op, DAG);
- case ISD::STRICT_FP_ROUND: return LowerSTRICT_FP_ROUND(Op, DAG);
+ case ISD::STRICT_FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ case ISD::STRICT_FP_TO_UINT: return LowerFP_TO_INT(Op, DAG);
+ case ISD::FP_EXTEND:
+ case ISD::STRICT_FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
+ case ISD::FP_ROUND:
+ case ISD::STRICT_FP_ROUND: return LowerFP_ROUND(Op, DAG);
case ISD::LOAD: return LowerLoad(Op, Subtarget, DAG);
case ISD::STORE: return LowerStore(Op, Subtarget, DAG);
case ISD::FADD:
case ISD::FSUB: return lowerFaddFsub(Op, DAG);
- case ISD::FMUL: return LowerF128Call(Op, DAG, RTLIB::MUL_F128);
- case ISD::FDIV: return LowerF128Call(Op, DAG, RTLIB::DIV_F128);
case ISD::FABS:
case ISD::FNEG: return LowerFABSorFNEG(Op, DAG);
case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
case ISD::FGETSIGN: return LowerFGETSIGN(Op, DAG);
- case ISD::SETCC: return LowerSETCC(Op, DAG);
+ case ISD::SETCC:
+ case ISD::STRICT_FSETCC:
+ case ISD::STRICT_FSETCCS: return LowerSETCC(Op, DAG);
case ISD::SETCCCARRY: return LowerSETCCCARRY(Op, DAG);
case ISD::SELECT: return LowerSELECT(Op, DAG);
case ISD::BRCOND: return LowerBRCOND(Op, DAG);
@@ -27778,8 +28655,9 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::MGATHER: return LowerMGATHER(Op, Subtarget, DAG);
case ISD::MSCATTER: return LowerMSCATTER(Op, Subtarget, DAG);
case ISD::GC_TRANSITION_START:
- return LowerGC_TRANSITION_START(Op, DAG);
- case ISD::GC_TRANSITION_END: return LowerGC_TRANSITION_END(Op, DAG);
+ case ISD::GC_TRANSITION_END: return LowerGC_TRANSITION(Op, DAG);
+ case ISD::ADDRSPACECAST:
+ return LowerADDRSPACECAST(Op, DAG);
}
}
@@ -27865,8 +28743,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
}
case X86ISD::VPMADDWD:
case X86ISD::AVG: {
- // Legalize types for ISD::UADDSAT/SADDSAT/USUBSAT/SSUBSAT and
- // X86ISD::AVG/VPMADDWD by widening.
+ // Legalize types for X86ISD::AVG/VPMADDWD by widening.
assert(Subtarget.hasSSE2() && "Requires at least SSE2!");
EVT VT = N->getValueType(0);
@@ -28114,10 +28991,14 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
return;
}
case ISD::FP_TO_SINT:
- case ISD::FP_TO_UINT: {
- bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
+ case ISD::STRICT_FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ case ISD::STRICT_FP_TO_UINT: {
+ bool IsStrict = N->isStrictFPOpcode();
+ bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT ||
+ N->getOpcode() == ISD::STRICT_FP_TO_SINT;
EVT VT = N->getValueType(0);
- SDValue Src = N->getOperand(0);
+ SDValue Src = N->getOperand(IsStrict ? 1 : 0);
EVT SrcVT = Src.getValueType();
if (VT.isVector() && VT.getScalarSizeInBits() < 32) {
@@ -28128,13 +29009,19 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
unsigned NewEltWidth = std::min(128 / VT.getVectorNumElements(), 32U);
MVT PromoteVT = MVT::getVectorVT(MVT::getIntegerVT(NewEltWidth),
VT.getVectorNumElements());
- SDValue Res = DAG.getNode(ISD::FP_TO_SINT, dl, PromoteVT, Src);
+ SDValue Res;
+ SDValue Chain;
+ if (IsStrict) {
+ Res = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, {PromoteVT, MVT::Other},
+ {N->getOperand(0), Src});
+ Chain = Res.getValue(1);
+ } else
+ Res = DAG.getNode(ISD::FP_TO_SINT, dl, PromoteVT, Src);
// Preserve what we know about the size of the original result. Except
// when the result is v2i32 since we can't widen the assert.
if (PromoteVT != MVT::v2i32)
- Res = DAG.getNode(N->getOpcode() == ISD::FP_TO_UINT ? ISD::AssertZext
- : ISD::AssertSext,
+ Res = DAG.getNode(!IsSigned ? ISD::AssertZext : ISD::AssertSext,
dl, PromoteVT, Res,
DAG.getValueType(VT.getVectorElementType()));
@@ -28149,6 +29036,8 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
ConcatOps[0] = Res;
Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, ConcatVT, ConcatOps);
Results.push_back(Res);
+ if (IsStrict)
+ Results.push_back(Chain);
return;
}
@@ -28160,16 +29049,49 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
assert(getTypeAction(*DAG.getContext(), VT) == TypeWidenVector &&
"Unexpected type action!");
if (Src.getValueType() == MVT::v2f64) {
+ unsigned Opc;
+ if (IsStrict)
+ Opc = IsSigned ? X86ISD::STRICT_CVTTP2SI : X86ISD::STRICT_CVTTP2UI;
+ else
+ Opc = IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI;
+
+ // If we have VLX we can emit a target specific FP_TO_UINT node,.
if (!IsSigned && !Subtarget.hasVLX()) {
- // If we have VLX we can emit a target specific FP_TO_UINT node,
- // otherwise we can defer to the generic legalizer which will widen
+ // Otherwise we can defer to the generic legalizer which will widen
// the input as well. This will be further widened during op
// legalization to v8i32<-v8f64.
- return;
+ // For strict nodes we'll need to widen ourselves.
+ // FIXME: Fix the type legalizer to safely widen strict nodes?
+ if (!IsStrict)
+ return;
+ Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f64, Src,
+ DAG.getConstantFP(0.0, dl, MVT::v2f64));
+ Opc = N->getOpcode();
+ }
+ SDValue Res;
+ SDValue Chain;
+ if (IsStrict) {
+ Res = DAG.getNode(Opc, dl, {MVT::v4i32, MVT::Other},
+ {N->getOperand(0), Src});
+ Chain = Res.getValue(1);
+ } else {
+ Res = DAG.getNode(Opc, dl, MVT::v4i32, Src);
}
- unsigned Opc = IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI;
- SDValue Res = DAG.getNode(Opc, dl, MVT::v4i32, Src);
Results.push_back(Res);
+ if (IsStrict)
+ Results.push_back(Chain);
+ return;
+ }
+
+ // Custom widen strict v2f32->v2i32 by padding with zeros.
+ // FIXME: Should generic type legalizer do this?
+ if (Src.getValueType() == MVT::v2f32 && IsStrict) {
+ Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Src,
+ DAG.getConstantFP(0.0, dl, MVT::v2f32));
+ SDValue Res = DAG.getNode(N->getOpcode(), dl, {MVT::v4i32, MVT::Other},
+ {N->getOperand(0), Src});
+ Results.push_back(Res);
+ Results.push_back(Res.getValue(1));
return;
}
@@ -28183,64 +29105,168 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
if (Subtarget.hasDQI() && VT == MVT::i64 &&
(SrcVT == MVT::f32 || SrcVT == MVT::f64)) {
assert(!Subtarget.is64Bit() && "i64 should be legal");
- unsigned NumElts = Subtarget.hasVLX() ? 4 : 8;
- // Using a 256-bit input here to guarantee 128-bit input for f32 case.
- // TODO: Use 128-bit vectors for f64 case?
- // TODO: Use 128-bit vectors for f32 by using CVTTP2SI/CVTTP2UI.
+ unsigned NumElts = Subtarget.hasVLX() ? 2 : 8;
+ // If we use a 128-bit result we might need to use a target specific node.
+ unsigned SrcElts =
+ std::max(NumElts, 128U / (unsigned)SrcVT.getSizeInBits());
MVT VecVT = MVT::getVectorVT(MVT::i64, NumElts);
- MVT VecInVT = MVT::getVectorVT(SrcVT.getSimpleVT(), NumElts);
+ MVT VecInVT = MVT::getVectorVT(SrcVT.getSimpleVT(), SrcElts);
+ unsigned Opc = N->getOpcode();
+ if (NumElts != SrcElts) {
+ if (IsStrict)
+ Opc = IsSigned ? X86ISD::STRICT_CVTTP2SI : X86ISD::STRICT_CVTTP2UI;
+ else
+ Opc = IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI;
+ }
SDValue ZeroIdx = DAG.getIntPtrConstant(0, dl);
SDValue Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VecInVT,
DAG.getConstantFP(0.0, dl, VecInVT), Src,
ZeroIdx);
- Res = DAG.getNode(N->getOpcode(), SDLoc(N), VecVT, Res);
+ SDValue Chain;
+ if (IsStrict) {
+ SDVTList Tys = DAG.getVTList(VecVT, MVT::Other);
+ Res = DAG.getNode(Opc, SDLoc(N), Tys, N->getOperand(0), Res);
+ Chain = Res.getValue(1);
+ } else
+ Res = DAG.getNode(Opc, SDLoc(N), VecVT, Res);
Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Res, ZeroIdx);
Results.push_back(Res);
+ if (IsStrict)
+ Results.push_back(Chain);
return;
}
- if (SDValue V = FP_TO_INTHelper(SDValue(N, 0), DAG, IsSigned))
+ SDValue Chain;
+ if (SDValue V = FP_TO_INTHelper(SDValue(N, 0), DAG, IsSigned, Chain)) {
Results.push_back(V);
+ if (IsStrict)
+ Results.push_back(Chain);
+ }
return;
}
- case ISD::SINT_TO_FP: {
- assert(Subtarget.hasDQI() && Subtarget.hasVLX() && "Requires AVX512DQVL!");
- SDValue Src = N->getOperand(0);
- if (N->getValueType(0) != MVT::v2f32 || Src.getValueType() != MVT::v2i64)
- return;
- Results.push_back(DAG.getNode(X86ISD::CVTSI2P, dl, MVT::v4f32, Src));
- return;
- }
- case ISD::UINT_TO_FP: {
- assert(Subtarget.hasSSE2() && "Requires at least SSE2!");
+ case ISD::SINT_TO_FP:
+ case ISD::STRICT_SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ case ISD::STRICT_UINT_TO_FP: {
+ bool IsStrict = N->isStrictFPOpcode();
+ bool IsSigned = N->getOpcode() == ISD::SINT_TO_FP ||
+ N->getOpcode() == ISD::STRICT_SINT_TO_FP;
EVT VT = N->getValueType(0);
if (VT != MVT::v2f32)
return;
- SDValue Src = N->getOperand(0);
+ SDValue Src = N->getOperand(IsStrict ? 1 : 0);
EVT SrcVT = Src.getValueType();
if (Subtarget.hasDQI() && Subtarget.hasVLX() && SrcVT == MVT::v2i64) {
- Results.push_back(DAG.getNode(X86ISD::CVTUI2P, dl, MVT::v4f32, Src));
+ if (IsStrict) {
+ unsigned Opc = IsSigned ? X86ISD::STRICT_CVTSI2P
+ : X86ISD::STRICT_CVTUI2P;
+ SDValue Res = DAG.getNode(Opc, dl, {MVT::v4f32, MVT::Other},
+ {N->getOperand(0), Src});
+ Results.push_back(Res);
+ Results.push_back(Res.getValue(1));
+ } else {
+ unsigned Opc = IsSigned ? X86ISD::CVTSI2P : X86ISD::CVTUI2P;
+ Results.push_back(DAG.getNode(Opc, dl, MVT::v4f32, Src));
+ }
return;
}
+ if (SrcVT == MVT::v2i64 && !IsSigned && Subtarget.is64Bit() &&
+ Subtarget.hasSSE41() && !Subtarget.hasAVX512()) {
+ SDValue Zero = DAG.getConstant(0, dl, SrcVT);
+ SDValue One = DAG.getConstant(1, dl, SrcVT);
+ SDValue Sign = DAG.getNode(ISD::OR, dl, SrcVT,
+ DAG.getNode(ISD::SRL, dl, SrcVT, Src, One),
+ DAG.getNode(ISD::AND, dl, SrcVT, Src, One));
+ SDValue IsNeg = DAG.getSetCC(dl, MVT::v2i64, Src, Zero, ISD::SETLT);
+ SDValue SignSrc = DAG.getSelect(dl, SrcVT, IsNeg, Sign, Src);
+ SmallVector<SDValue, 4> SignCvts(4, DAG.getConstantFP(0.0, dl, MVT::f32));
+ for (int i = 0; i != 2; ++i) {
+ SDValue Src = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64,
+ SignSrc, DAG.getIntPtrConstant(i, dl));
+ if (IsStrict)
+ SignCvts[i] =
+ DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, {MVT::f32, MVT::Other},
+ {N->getOperand(0), Src});
+ else
+ SignCvts[i] = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, Src);
+ };
+ SDValue SignCvt = DAG.getBuildVector(MVT::v4f32, dl, SignCvts);
+ SDValue Slow, Chain;
+ if (IsStrict) {
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ SignCvts[0].getValue(1), SignCvts[1].getValue(1));
+ Slow = DAG.getNode(ISD::STRICT_FADD, dl, {MVT::v4f32, MVT::Other},
+ {Chain, SignCvt, SignCvt});
+ Chain = Slow.getValue(1);
+ } else {
+ Slow = DAG.getNode(ISD::FADD, dl, MVT::v4f32, SignCvt, SignCvt);
+ }
+ IsNeg = DAG.getBitcast(MVT::v4i32, IsNeg);
+ IsNeg =
+ DAG.getVectorShuffle(MVT::v4i32, dl, IsNeg, IsNeg, {1, 3, -1, -1});
+ SDValue Cvt = DAG.getSelect(dl, MVT::v4f32, IsNeg, Slow, SignCvt);
+ Results.push_back(Cvt);
+ if (IsStrict)
+ Results.push_back(Chain);
+ return;
+ }
+
if (SrcVT != MVT::v2i32)
return;
+
+ if (IsSigned || Subtarget.hasAVX512()) {
+ if (!IsStrict)
+ return;
+
+ // Custom widen strict v2i32->v2f32 to avoid scalarization.
+ // FIXME: Should generic type legalizer do this?
+ Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i32, Src,
+ DAG.getConstant(0, dl, MVT::v2i32));
+ SDValue Res = DAG.getNode(N->getOpcode(), dl, {MVT::v4f32, MVT::Other},
+ {N->getOperand(0), Src});
+ Results.push_back(Res);
+ Results.push_back(Res.getValue(1));
+ return;
+ }
+
+ assert(Subtarget.hasSSE2() && "Requires at least SSE2!");
SDValue ZExtIn = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v2i64, Src);
SDValue VBias =
DAG.getConstantFP(BitsToDouble(0x4330000000000000ULL), dl, MVT::v2f64);
SDValue Or = DAG.getNode(ISD::OR, dl, MVT::v2i64, ZExtIn,
DAG.getBitcast(MVT::v2i64, VBias));
Or = DAG.getBitcast(MVT::v2f64, Or);
- // TODO: Are there any fast-math-flags to propagate here?
- SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, Or, VBias);
- Results.push_back(DAG.getNode(X86ISD::VFPROUND, dl, MVT::v4f32, Sub));
+ if (IsStrict) {
+ SDValue Sub = DAG.getNode(ISD::STRICT_FSUB, dl, {MVT::v2f64, MVT::Other},
+ {N->getOperand(0), Or, VBias});
+ SDValue Res = DAG.getNode(X86ISD::STRICT_VFPROUND, dl,
+ {MVT::v4f32, MVT::Other},
+ {Sub.getValue(1), Sub});
+ Results.push_back(Res);
+ Results.push_back(Res.getValue(1));
+ } else {
+ // TODO: Are there any fast-math-flags to propagate here?
+ SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, Or, VBias);
+ Results.push_back(DAG.getNode(X86ISD::VFPROUND, dl, MVT::v4f32, Sub));
+ }
return;
}
+ case ISD::STRICT_FP_ROUND:
case ISD::FP_ROUND: {
- if (!isTypeLegal(N->getOperand(0).getValueType()))
- return;
- SDValue V = DAG.getNode(X86ISD::VFPROUND, dl, MVT::v4f32, N->getOperand(0));
+ bool IsStrict = N->isStrictFPOpcode();
+ SDValue Src = N->getOperand(IsStrict ? 1 : 0);
+ if (!isTypeLegal(Src.getValueType()))
+ return;
+ SDValue V;
+ if (IsStrict)
+ V = DAG.getNode(X86ISD::STRICT_VFPROUND, dl, {MVT::v4f32, MVT::Other},
+ {N->getOperand(0), N->getOperand(1)});
+ else
+ V = DAG.getNode(X86ISD::VFPROUND, dl, MVT::v4f32, N->getOperand(0));
Results.push_back(V);
+ if (IsStrict)
+ Results.push_back(V.getValue(1));
return;
}
case ISD::FP_EXTEND: {
@@ -28543,6 +29569,28 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
Results.push_back(Res.getValue(1));
return;
}
+ case ISD::ADDRSPACECAST: {
+ SDValue Src = N->getOperand(0);
+ EVT DstVT = N->getValueType(0);
+ AddrSpaceCastSDNode *CastN = cast<AddrSpaceCastSDNode>(N);
+ unsigned SrcAS = CastN->getSrcAddressSpace();
+
+ assert(SrcAS != CastN->getDestAddressSpace() &&
+ "addrspacecast must be between different address spaces");
+
+ SDValue Res;
+ if (SrcAS == X86AS::PTR32_UPTR && DstVT == MVT::i64)
+ Res = DAG.getNode(ISD::ZERO_EXTEND, dl, DstVT, Src);
+ else if (DstVT == MVT::i64)
+ Res = DAG.getNode(ISD::SIGN_EXTEND, dl, DstVT, Src);
+ else if (DstVT == MVT::i32)
+ Res = DAG.getNode(ISD::TRUNCATE, dl, DstVT, Src);
+ else
+ report_fatal_error("Unrecognized addrspacecast type legalization");
+
+ Results.push_back(Res);
+ return;
+ }
}
}
@@ -28566,9 +29614,12 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::CALL: return "X86ISD::CALL";
case X86ISD::BT: return "X86ISD::BT";
case X86ISD::CMP: return "X86ISD::CMP";
+ case X86ISD::STRICT_FCMP: return "X86ISD::STRICT_FCMP";
+ case X86ISD::STRICT_FCMPS: return "X86ISD::STRICT_FCMPS";
case X86ISD::COMI: return "X86ISD::COMI";
case X86ISD::UCOMI: return "X86ISD::UCOMI";
case X86ISD::CMPM: return "X86ISD::CMPM";
+ case X86ISD::STRICT_CMPM: return "X86ISD::STRICT_CMPM";
case X86ISD::CMPM_SAE: return "X86ISD::CMPM_SAE";
case X86ISD::SETCC: return "X86ISD::SETCC";
case X86ISD::SETCC_CARRY: return "X86ISD::SETCC_CARRY";
@@ -28653,10 +29704,12 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::VMTRUNCSTORES: return "X86ISD::VMTRUNCSTORES";
case X86ISD::VMTRUNCSTOREUS: return "X86ISD::VMTRUNCSTOREUS";
case X86ISD::VFPEXT: return "X86ISD::VFPEXT";
+ case X86ISD::STRICT_VFPEXT: return "X86ISD::STRICT_VFPEXT";
case X86ISD::VFPEXT_SAE: return "X86ISD::VFPEXT_SAE";
case X86ISD::VFPEXTS: return "X86ISD::VFPEXTS";
case X86ISD::VFPEXTS_SAE: return "X86ISD::VFPEXTS_SAE";
case X86ISD::VFPROUND: return "X86ISD::VFPROUND";
+ case X86ISD::STRICT_VFPROUND: return "X86ISD::STRICT_VFPROUND";
case X86ISD::VMFPROUND: return "X86ISD::VMFPROUND";
case X86ISD::VFPROUND_RND: return "X86ISD::VFPROUND_RND";
case X86ISD::VFPROUNDS: return "X86ISD::VFPROUNDS";
@@ -28676,6 +29729,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::VROTRI: return "X86ISD::VROTRI";
case X86ISD::VPPERM: return "X86ISD::VPPERM";
case X86ISD::CMPP: return "X86ISD::CMPP";
+ case X86ISD::STRICT_CMPP: return "X86ISD::STRICT_CMPP";
case X86ISD::PCMPEQ: return "X86ISD::PCMPEQ";
case X86ISD::PCMPGT: return "X86ISD::PCMPGT";
case X86ISD::PHMINPOS: return "X86ISD::PHMINPOS";
@@ -28776,6 +29830,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::VPMADD52H: return "X86ISD::VPMADD52H";
case X86ISD::VPMADD52L: return "X86ISD::VPMADD52L";
case X86ISD::VRNDSCALE: return "X86ISD::VRNDSCALE";
+ case X86ISD::STRICT_VRNDSCALE: return "X86ISD::STRICT_VRNDSCALE";
case X86ISD::VRNDSCALE_SAE: return "X86ISD::VRNDSCALE_SAE";
case X86ISD::VRNDSCALES: return "X86ISD::VRNDSCALES";
case X86ISD::VRNDSCALES_SAE: return "X86ISD::VRNDSCALES_SAE";
@@ -28837,6 +29892,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::UINT_TO_FP_RND: return "X86ISD::UINT_TO_FP_RND";
case X86ISD::CVTTP2SI: return "X86ISD::CVTTP2SI";
case X86ISD::CVTTP2UI: return "X86ISD::CVTTP2UI";
+ case X86ISD::STRICT_CVTTP2SI: return "X86ISD::STRICT_CVTTP2SI";
+ case X86ISD::STRICT_CVTTP2UI: return "X86ISD::STRICT_CVTTP2UI";
case X86ISD::MCVTTP2SI: return "X86ISD::MCVTTP2SI";
case X86ISD::MCVTTP2UI: return "X86ISD::MCVTTP2UI";
case X86ISD::CVTTP2SI_SAE: return "X86ISD::CVTTP2SI_SAE";
@@ -28847,6 +29904,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::CVTTS2UI_SAE: return "X86ISD::CVTTS2UI_SAE";
case X86ISD::CVTSI2P: return "X86ISD::CVTSI2P";
case X86ISD::CVTUI2P: return "X86ISD::CVTUI2P";
+ case X86ISD::STRICT_CVTSI2P: return "X86ISD::STRICT_CVTSI2P";
+ case X86ISD::STRICT_CVTUI2P: return "X86ISD::STRICT_CVTUI2P";
case X86ISD::MCVTSI2P: return "X86ISD::MCVTSI2P";
case X86ISD::MCVTUI2P: return "X86ISD::MCVTUI2P";
case X86ISD::VFPCLASS: return "X86ISD::VFPCLASS";
@@ -29099,8 +30158,8 @@ bool X86TargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
return true;
}
-bool
-X86TargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
+bool X86TargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
+ EVT VT) const {
if (!Subtarget.hasAnyFMA())
return false;
@@ -31518,28 +32577,26 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
case X86ISD::VSRAI:
case X86ISD::VSHLI:
case X86ISD::VSRLI: {
- if (auto *ShiftImm = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
- if (ShiftImm->getAPIntValue().uge(VT.getScalarSizeInBits())) {
- Known.setAllZero();
- break;
- }
+ unsigned ShAmt = Op.getConstantOperandVal(1);
+ if (ShAmt >= VT.getScalarSizeInBits()) {
+ Known.setAllZero();
+ break;
+ }
- Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
- unsigned ShAmt = ShiftImm->getZExtValue();
- if (Opc == X86ISD::VSHLI) {
- Known.Zero <<= ShAmt;
- Known.One <<= ShAmt;
- // Low bits are known zero.
- Known.Zero.setLowBits(ShAmt);
- } else if (Opc == X86ISD::VSRLI) {
- Known.Zero.lshrInPlace(ShAmt);
- Known.One.lshrInPlace(ShAmt);
- // High bits are known zero.
- Known.Zero.setHighBits(ShAmt);
- } else {
- Known.Zero.ashrInPlace(ShAmt);
- Known.One.ashrInPlace(ShAmt);
- }
+ Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ if (Opc == X86ISD::VSHLI) {
+ Known.Zero <<= ShAmt;
+ Known.One <<= ShAmt;
+ // Low bits are known zero.
+ Known.Zero.setLowBits(ShAmt);
+ } else if (Opc == X86ISD::VSRLI) {
+ Known.Zero.lshrInPlace(ShAmt);
+ Known.One.lshrInPlace(ShAmt);
+ // High bits are known zero.
+ Known.Zero.setHighBits(ShAmt);
+ } else {
+ Known.Zero.ashrInPlace(ShAmt);
+ Known.One.ashrInPlace(ShAmt);
}
break;
}
@@ -32103,8 +33160,8 @@ static bool matchBinaryShuffle(MVT MaskVT, ArrayRef<int> Mask,
if (((MaskVT == MVT::v8i16 || MaskVT == MVT::v16i8) && Subtarget.hasSSE2()) ||
((MaskVT == MVT::v16i16 || MaskVT == MVT::v32i8) && Subtarget.hasInt256()) ||
((MaskVT == MVT::v32i16 || MaskVT == MVT::v64i8) && Subtarget.hasBWI())) {
- if (matchVectorShuffleWithPACK(MaskVT, SrcVT, V1, V2, Shuffle, Mask, DAG,
- Subtarget)) {
+ if (matchShuffleWithPACK(MaskVT, SrcVT, V1, V2, Shuffle, Mask, DAG,
+ Subtarget)) {
DstVT = MaskVT;
return true;
}
@@ -32116,8 +33173,8 @@ static bool matchBinaryShuffle(MVT MaskVT, ArrayRef<int> Mask,
(MaskVT.is256BitVector() && 32 <= EltSizeInBits && Subtarget.hasAVX()) ||
(MaskVT.is256BitVector() && Subtarget.hasAVX2()) ||
(MaskVT.is512BitVector() && Subtarget.hasAVX512())) {
- if (matchVectorShuffleWithUNPCK(MaskVT, V1, V2, Shuffle, IsUnary, Mask, DL,
- DAG, Subtarget)) {
+ if (matchShuffleWithUNPCK(MaskVT, V1, V2, Shuffle, IsUnary, Mask, DL, DAG,
+ Subtarget)) {
SrcVT = DstVT = MaskVT;
if (MaskVT.is256BitVector() && !Subtarget.hasAVX2())
SrcVT = DstVT = (32 == EltSizeInBits ? MVT::v8f32 : MVT::v4f64);
@@ -32155,8 +33212,8 @@ static bool matchBinaryPermuteShuffle(
uint64_t BlendMask = 0;
bool ForceV1Zero = false, ForceV2Zero = false;
SmallVector<int, 8> TargetMask(Mask.begin(), Mask.end());
- if (matchVectorShuffleAsBlend(V1, V2, TargetMask, Zeroable, ForceV1Zero,
- ForceV2Zero, BlendMask)) {
+ if (matchShuffleAsBlend(V1, V2, TargetMask, Zeroable, ForceV1Zero,
+ ForceV2Zero, BlendMask)) {
if (MaskVT == MVT::v16i16) {
// We can only use v16i16 PBLENDW if the lanes are repeated.
SmallVector<int, 8> RepeatedMask;
@@ -32410,10 +33467,9 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
(!MaskVT.is256BitVector() || Subtarget.hasAVX2());
// Determine zeroable mask elements.
- APInt Zeroable(NumMaskElts, 0);
- for (unsigned i = 0; i != NumMaskElts; ++i)
- if (isUndefOrZero(Mask[i]))
- Zeroable.setBit(i);
+ APInt KnownUndef, KnownZero;
+ resolveZeroablesFromTargetShuffle(Mask, KnownUndef, KnownZero);
+ APInt Zeroable = KnownUndef | KnownZero;
if (UnaryShuffle) {
// If we are shuffling a X86ISD::VZEXT_LOAD then we can use the load
@@ -32834,7 +33890,8 @@ static SDValue combineX86ShuffleChainWithExtract(
Offset += Src.getConstantOperandVal(1);
Src = Src.getOperand(0);
}
- WideSizeInBits = std::max(WideSizeInBits, Src.getValueSizeInBits());
+ WideSizeInBits = std::max(WideSizeInBits,
+ (unsigned)Src.getValueSizeInBits());
assert((Offset % BaseVT.getVectorNumElements()) == 0 &&
"Unexpected subvector extraction");
Offset /= BaseVT.getVectorNumElements();
@@ -33026,6 +34083,10 @@ static SDValue combineX86ShufflesRecursively(
ArrayRef<int> RootMask, ArrayRef<const SDNode *> SrcNodes, unsigned Depth,
bool HasVariableMask, bool AllowVariableMask, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
+ assert(RootMask.size() > 0 &&
+ (RootMask.size() > 1 || (RootMask[0] == 0 && SrcOpIndex == 0)) &&
+ "Illegal shuffle root mask");
+
// Bound the depth of our recursive combine because this is ultimately
// quadratic in nature.
const unsigned MaxRecursionDepth = 8;
@@ -33056,106 +34117,137 @@ static SDValue combineX86ShufflesRecursively(
OpZero, DAG, Depth, false))
return SDValue();
- resolveTargetShuffleFromZeroables(OpMask, OpUndef, OpZero);
-
- // Add the inputs to the Ops list, avoiding duplicates.
- SmallVector<SDValue, 16> Ops(SrcOps.begin(), SrcOps.end());
-
- auto AddOp = [&Ops](SDValue Input, int InsertionPoint) -> int {
- // Attempt to find an existing match.
- SDValue InputBC = peekThroughBitcasts(Input);
- for (int i = 0, e = Ops.size(); i < e; ++i)
- if (InputBC == peekThroughBitcasts(Ops[i]))
- return i;
- // Match failed - should we replace an existing Op?
- if (InsertionPoint >= 0) {
- Ops[InsertionPoint] = Input;
- return InsertionPoint;
+ SmallVector<int, 64> Mask;
+ SmallVector<SDValue, 16> Ops;
+
+ // We don't need to merge masks if the root is empty.
+ bool EmptyRoot = (Depth == 0) && (RootMask.size() == 1);
+ if (EmptyRoot) {
+ // Only resolve zeros if it will remove an input, otherwise we might end
+ // up in an infinite loop.
+ bool ResolveKnownZeros = true;
+ if (!OpZero.isNullValue()) {
+ APInt UsedInputs = APInt::getNullValue(OpInputs.size());
+ for (int i = 0, e = OpMask.size(); i != e; ++i) {
+ int M = OpMask[i];
+ if (OpUndef[i] || OpZero[i] || isUndefOrZero(M))
+ continue;
+ UsedInputs.setBit(M / OpMask.size());
+ if (UsedInputs.isAllOnesValue()) {
+ ResolveKnownZeros = false;
+ break;
+ }
+ }
}
- // Add to the end of the Ops list.
- Ops.push_back(Input);
- return Ops.size() - 1;
- };
+ resolveTargetShuffleFromZeroables(OpMask, OpUndef, OpZero,
+ ResolveKnownZeros);
- SmallVector<int, 2> OpInputIdx;
- for (SDValue OpInput : OpInputs)
- OpInputIdx.push_back(AddOp(OpInput, OpInputIdx.empty() ? SrcOpIndex : -1));
-
- assert(((RootMask.size() > OpMask.size() &&
- RootMask.size() % OpMask.size() == 0) ||
- (OpMask.size() > RootMask.size() &&
- OpMask.size() % RootMask.size() == 0) ||
- OpMask.size() == RootMask.size()) &&
- "The smaller number of elements must divide the larger.");
-
- // This function can be performance-critical, so we rely on the power-of-2
- // knowledge that we have about the mask sizes to replace div/rem ops with
- // bit-masks and shifts.
- assert(isPowerOf2_32(RootMask.size()) && "Non-power-of-2 shuffle mask sizes");
- assert(isPowerOf2_32(OpMask.size()) && "Non-power-of-2 shuffle mask sizes");
- unsigned RootMaskSizeLog2 = countTrailingZeros(RootMask.size());
- unsigned OpMaskSizeLog2 = countTrailingZeros(OpMask.size());
-
- unsigned MaskWidth = std::max<unsigned>(OpMask.size(), RootMask.size());
- unsigned RootRatio = std::max<unsigned>(1, OpMask.size() >> RootMaskSizeLog2);
- unsigned OpRatio = std::max<unsigned>(1, RootMask.size() >> OpMaskSizeLog2);
- assert((RootRatio == 1 || OpRatio == 1) &&
- "Must not have a ratio for both incoming and op masks!");
-
- assert(isPowerOf2_32(MaskWidth) && "Non-power-of-2 shuffle mask sizes");
- assert(isPowerOf2_32(RootRatio) && "Non-power-of-2 shuffle mask sizes");
- assert(isPowerOf2_32(OpRatio) && "Non-power-of-2 shuffle mask sizes");
- unsigned RootRatioLog2 = countTrailingZeros(RootRatio);
- unsigned OpRatioLog2 = countTrailingZeros(OpRatio);
-
- SmallVector<int, 64> Mask(MaskWidth, SM_SentinelUndef);
-
- // Merge this shuffle operation's mask into our accumulated mask. Note that
- // this shuffle's mask will be the first applied to the input, followed by the
- // root mask to get us all the way to the root value arrangement. The reason
- // for this order is that we are recursing up the operation chain.
- for (unsigned i = 0; i < MaskWidth; ++i) {
- unsigned RootIdx = i >> RootRatioLog2;
- if (RootMask[RootIdx] < 0) {
- // This is a zero or undef lane, we're done.
- Mask[i] = RootMask[RootIdx];
- continue;
- }
+ Mask = OpMask;
+ Ops.append(OpInputs.begin(), OpInputs.end());
+ } else {
+ resolveTargetShuffleFromZeroables(OpMask, OpUndef, OpZero);
+
+ // Add the inputs to the Ops list, avoiding duplicates.
+ Ops.append(SrcOps.begin(), SrcOps.end());
+
+ auto AddOp = [&Ops](SDValue Input, int InsertionPoint) -> int {
+ // Attempt to find an existing match.
+ SDValue InputBC = peekThroughBitcasts(Input);
+ for (int i = 0, e = Ops.size(); i < e; ++i)
+ if (InputBC == peekThroughBitcasts(Ops[i]))
+ return i;
+ // Match failed - should we replace an existing Op?
+ if (InsertionPoint >= 0) {
+ Ops[InsertionPoint] = Input;
+ return InsertionPoint;
+ }
+ // Add to the end of the Ops list.
+ Ops.push_back(Input);
+ return Ops.size() - 1;
+ };
- unsigned RootMaskedIdx =
- RootRatio == 1
- ? RootMask[RootIdx]
- : (RootMask[RootIdx] << RootRatioLog2) + (i & (RootRatio - 1));
+ SmallVector<int, 2> OpInputIdx;
+ for (SDValue OpInput : OpInputs)
+ OpInputIdx.push_back(
+ AddOp(OpInput, OpInputIdx.empty() ? SrcOpIndex : -1));
+
+ assert(((RootMask.size() > OpMask.size() &&
+ RootMask.size() % OpMask.size() == 0) ||
+ (OpMask.size() > RootMask.size() &&
+ OpMask.size() % RootMask.size() == 0) ||
+ OpMask.size() == RootMask.size()) &&
+ "The smaller number of elements must divide the larger.");
+
+ // This function can be performance-critical, so we rely on the power-of-2
+ // knowledge that we have about the mask sizes to replace div/rem ops with
+ // bit-masks and shifts.
+ assert(isPowerOf2_32(RootMask.size()) &&
+ "Non-power-of-2 shuffle mask sizes");
+ assert(isPowerOf2_32(OpMask.size()) && "Non-power-of-2 shuffle mask sizes");
+ unsigned RootMaskSizeLog2 = countTrailingZeros(RootMask.size());
+ unsigned OpMaskSizeLog2 = countTrailingZeros(OpMask.size());
+
+ unsigned MaskWidth = std::max<unsigned>(OpMask.size(), RootMask.size());
+ unsigned RootRatio =
+ std::max<unsigned>(1, OpMask.size() >> RootMaskSizeLog2);
+ unsigned OpRatio = std::max<unsigned>(1, RootMask.size() >> OpMaskSizeLog2);
+ assert((RootRatio == 1 || OpRatio == 1) &&
+ "Must not have a ratio for both incoming and op masks!");
+
+ assert(isPowerOf2_32(MaskWidth) && "Non-power-of-2 shuffle mask sizes");
+ assert(isPowerOf2_32(RootRatio) && "Non-power-of-2 shuffle mask sizes");
+ assert(isPowerOf2_32(OpRatio) && "Non-power-of-2 shuffle mask sizes");
+ unsigned RootRatioLog2 = countTrailingZeros(RootRatio);
+ unsigned OpRatioLog2 = countTrailingZeros(OpRatio);
+
+ Mask.resize(MaskWidth, SM_SentinelUndef);
+
+ // Merge this shuffle operation's mask into our accumulated mask. Note that
+ // this shuffle's mask will be the first applied to the input, followed by
+ // the root mask to get us all the way to the root value arrangement. The
+ // reason for this order is that we are recursing up the operation chain.
+ for (unsigned i = 0; i < MaskWidth; ++i) {
+ unsigned RootIdx = i >> RootRatioLog2;
+ if (RootMask[RootIdx] < 0) {
+ // This is a zero or undef lane, we're done.
+ Mask[i] = RootMask[RootIdx];
+ continue;
+ }
- // Just insert the scaled root mask value if it references an input other
- // than the SrcOp we're currently inserting.
- if ((RootMaskedIdx < (SrcOpIndex * MaskWidth)) ||
- (((SrcOpIndex + 1) * MaskWidth) <= RootMaskedIdx)) {
- Mask[i] = RootMaskedIdx;
- continue;
- }
+ unsigned RootMaskedIdx =
+ RootRatio == 1
+ ? RootMask[RootIdx]
+ : (RootMask[RootIdx] << RootRatioLog2) + (i & (RootRatio - 1));
- RootMaskedIdx = RootMaskedIdx & (MaskWidth - 1);
- unsigned OpIdx = RootMaskedIdx >> OpRatioLog2;
- if (OpMask[OpIdx] < 0) {
- // The incoming lanes are zero or undef, it doesn't matter which ones we
- // are using.
- Mask[i] = OpMask[OpIdx];
- continue;
- }
+ // Just insert the scaled root mask value if it references an input other
+ // than the SrcOp we're currently inserting.
+ if ((RootMaskedIdx < (SrcOpIndex * MaskWidth)) ||
+ (((SrcOpIndex + 1) * MaskWidth) <= RootMaskedIdx)) {
+ Mask[i] = RootMaskedIdx;
+ continue;
+ }
- // Ok, we have non-zero lanes, map them through to one of the Op's inputs.
- unsigned OpMaskedIdx =
- OpRatio == 1
- ? OpMask[OpIdx]
- : (OpMask[OpIdx] << OpRatioLog2) + (RootMaskedIdx & (OpRatio - 1));
+ RootMaskedIdx = RootMaskedIdx & (MaskWidth - 1);
+ unsigned OpIdx = RootMaskedIdx >> OpRatioLog2;
+ if (OpMask[OpIdx] < 0) {
+ // The incoming lanes are zero or undef, it doesn't matter which ones we
+ // are using.
+ Mask[i] = OpMask[OpIdx];
+ continue;
+ }
- OpMaskedIdx = OpMaskedIdx & (MaskWidth - 1);
- int InputIdx = OpMask[OpIdx] / (int)OpMask.size();
- assert(0 <= OpInputIdx[InputIdx] && "Unknown target shuffle input");
- OpMaskedIdx += OpInputIdx[InputIdx] * MaskWidth;
+ // Ok, we have non-zero lanes, map them through to one of the Op's inputs.
+ unsigned OpMaskedIdx = OpRatio == 1 ? OpMask[OpIdx]
+ : (OpMask[OpIdx] << OpRatioLog2) +
+ (RootMaskedIdx & (OpRatio - 1));
- Mask[i] = OpMaskedIdx;
+ OpMaskedIdx = OpMaskedIdx & (MaskWidth - 1);
+ int InputIdx = OpMask[OpIdx] / (int)OpMask.size();
+ assert(0 <= OpInputIdx[InputIdx] && "Unknown target shuffle input");
+ OpMaskedIdx += OpInputIdx[InputIdx] * MaskWidth;
+
+ Mask[i] = OpMaskedIdx;
+ }
}
// Remove unused/repeated shuffle source ops.
@@ -33189,13 +34281,18 @@ static SDValue combineX86ShufflesRecursively(
// the remaining recursion depth.
if (Ops.size() < (MaxRecursionDepth - Depth)) {
for (int i = 0, e = Ops.size(); i < e; ++i) {
+ // For empty roots, we need to resolve zeroable elements before combining
+ // them with other shuffles.
+ SmallVector<int, 64> ResolvedMask = Mask;
+ if (EmptyRoot)
+ resolveTargetShuffleFromZeroables(ResolvedMask, OpUndef, OpZero);
bool AllowVar = false;
if (Ops[i].getNode()->hasOneUse() ||
SDNode::areOnlyUsersOf(CombinedNodes, Ops[i].getNode()))
AllowVar = AllowVariableMask;
if (SDValue Res = combineX86ShufflesRecursively(
- Ops, i, Root, Mask, CombinedNodes, Depth + 1, HasVariableMask,
- AllowVar, DAG, Subtarget))
+ Ops, i, Root, ResolvedMask, CombinedNodes, Depth + 1,
+ HasVariableMask, AllowVar, DAG, Subtarget))
return Res;
}
}
@@ -34207,6 +35304,15 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG,
In.getOperand(0).getValueType() == MVT::v2i64)
return N->getOperand(0); // return the bitcast
break;
+ case X86ISD::STRICT_CVTTP2SI:
+ case X86ISD::STRICT_CVTTP2UI:
+ case X86ISD::STRICT_CVTSI2P:
+ case X86ISD::STRICT_CVTUI2P:
+ case X86ISD::STRICT_VFPROUND:
+ if (In.getOperand(1).getValueType() == MVT::v2f64 ||
+ In.getOperand(1).getValueType() == MVT::v2i64)
+ return N->getOperand(0);
+ break;
}
}
@@ -34698,6 +35804,23 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
return true;
}
+ // If we don't demand all elements, then attempt to combine to a simpler
+ // shuffle.
+ // TODO: Handle other depths, but first we need to handle the fact that
+ // it might combine to the same shuffle.
+ if (!DemandedElts.isAllOnesValue() && Depth == 0) {
+ SmallVector<int, 64> DemandedMask(NumElts, SM_SentinelUndef);
+ for (int i = 0; i != NumElts; ++i)
+ if (DemandedElts[i])
+ DemandedMask[i] = i;
+
+ SDValue NewShuffle = combineX86ShufflesRecursively(
+ {Op}, 0, Op, DemandedMask, {}, Depth, /*HasVarMask*/ false,
+ /*AllowVarMask*/ true, TLO.DAG, Subtarget);
+ if (NewShuffle)
+ return TLO.CombineTo(Op, NewShuffle);
+ }
+
return false;
}
@@ -34739,117 +35862,110 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
}
case X86ISD::VSHLI: {
SDValue Op0 = Op.getOperand(0);
- SDValue Op1 = Op.getOperand(1);
- if (auto *ShiftImm = dyn_cast<ConstantSDNode>(Op1)) {
- if (ShiftImm->getAPIntValue().uge(BitWidth))
- break;
+ unsigned ShAmt = Op.getConstantOperandVal(1);
+ if (ShAmt >= BitWidth)
+ break;
- unsigned ShAmt = ShiftImm->getZExtValue();
- APInt DemandedMask = OriginalDemandedBits.lshr(ShAmt);
-
- // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
- // single shift. We can do this if the bottom bits (which are shifted
- // out) are never demanded.
- if (Op0.getOpcode() == X86ISD::VSRLI &&
- OriginalDemandedBits.countTrailingZeros() >= ShAmt) {
- if (auto *Shift2Imm = dyn_cast<ConstantSDNode>(Op0.getOperand(1))) {
- if (Shift2Imm->getAPIntValue().ult(BitWidth)) {
- int Diff = ShAmt - Shift2Imm->getZExtValue();
- if (Diff == 0)
- return TLO.CombineTo(Op, Op0.getOperand(0));
-
- unsigned NewOpc = Diff < 0 ? X86ISD::VSRLI : X86ISD::VSHLI;
- SDValue NewShift = TLO.DAG.getNode(
- NewOpc, SDLoc(Op), VT, Op0.getOperand(0),
- TLO.DAG.getTargetConstant(std::abs(Diff), SDLoc(Op), MVT::i8));
- return TLO.CombineTo(Op, NewShift);
- }
- }
+ APInt DemandedMask = OriginalDemandedBits.lshr(ShAmt);
+
+ // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
+ // single shift. We can do this if the bottom bits (which are shifted
+ // out) are never demanded.
+ if (Op0.getOpcode() == X86ISD::VSRLI &&
+ OriginalDemandedBits.countTrailingZeros() >= ShAmt) {
+ unsigned Shift2Amt = Op0.getConstantOperandVal(1);
+ if (Shift2Amt < BitWidth) {
+ int Diff = ShAmt - Shift2Amt;
+ if (Diff == 0)
+ return TLO.CombineTo(Op, Op0.getOperand(0));
+
+ unsigned NewOpc = Diff < 0 ? X86ISD::VSRLI : X86ISD::VSHLI;
+ SDValue NewShift = TLO.DAG.getNode(
+ NewOpc, SDLoc(Op), VT, Op0.getOperand(0),
+ TLO.DAG.getTargetConstant(std::abs(Diff), SDLoc(Op), MVT::i8));
+ return TLO.CombineTo(Op, NewShift);
}
+ }
- if (SimplifyDemandedBits(Op0, DemandedMask, OriginalDemandedElts, Known,
- TLO, Depth + 1))
- return true;
+ if (SimplifyDemandedBits(Op0, DemandedMask, OriginalDemandedElts, Known,
+ TLO, Depth + 1))
+ return true;
- assert(!Known.hasConflict() && "Bits known to be one AND zero?");
- Known.Zero <<= ShAmt;
- Known.One <<= ShAmt;
+ assert(!Known.hasConflict() && "Bits known to be one AND zero?");
+ Known.Zero <<= ShAmt;
+ Known.One <<= ShAmt;
- // Low bits known zero.
- Known.Zero.setLowBits(ShAmt);
- }
+ // Low bits known zero.
+ Known.Zero.setLowBits(ShAmt);
break;
}
case X86ISD::VSRLI: {
- if (auto *ShiftImm = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
- if (ShiftImm->getAPIntValue().uge(BitWidth))
- break;
+ unsigned ShAmt = Op.getConstantOperandVal(1);
+ if (ShAmt >= BitWidth)
+ break;
- unsigned ShAmt = ShiftImm->getZExtValue();
- APInt DemandedMask = OriginalDemandedBits << ShAmt;
+ APInt DemandedMask = OriginalDemandedBits << ShAmt;
- if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask,
- OriginalDemandedElts, Known, TLO, Depth + 1))
- return true;
+ if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask,
+ OriginalDemandedElts, Known, TLO, Depth + 1))
+ return true;
- assert(!Known.hasConflict() && "Bits known to be one AND zero?");
- Known.Zero.lshrInPlace(ShAmt);
- Known.One.lshrInPlace(ShAmt);
+ assert(!Known.hasConflict() && "Bits known to be one AND zero?");
+ Known.Zero.lshrInPlace(ShAmt);
+ Known.One.lshrInPlace(ShAmt);
- // High bits known zero.
- Known.Zero.setHighBits(ShAmt);
- }
+ // High bits known zero.
+ Known.Zero.setHighBits(ShAmt);
break;
}
case X86ISD::VSRAI: {
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
- if (auto *ShiftImm = dyn_cast<ConstantSDNode>(Op1)) {
- if (ShiftImm->getAPIntValue().uge(BitWidth))
- break;
+ unsigned ShAmt = cast<ConstantSDNode>(Op1)->getZExtValue();
+ if (ShAmt >= BitWidth)
+ break;
- unsigned ShAmt = ShiftImm->getZExtValue();
- APInt DemandedMask = OriginalDemandedBits << ShAmt;
+ APInt DemandedMask = OriginalDemandedBits << ShAmt;
- // If we just want the sign bit then we don't need to shift it.
- if (OriginalDemandedBits.isSignMask())
- return TLO.CombineTo(Op, Op0);
+ // If we just want the sign bit then we don't need to shift it.
+ if (OriginalDemandedBits.isSignMask())
+ return TLO.CombineTo(Op, Op0);
- // fold (VSRAI (VSHLI X, C1), C1) --> X iff NumSignBits(X) > C1
- if (Op0.getOpcode() == X86ISD::VSHLI && Op1 == Op0.getOperand(1)) {
- SDValue Op00 = Op0.getOperand(0);
- unsigned NumSignBits =
- TLO.DAG.ComputeNumSignBits(Op00, OriginalDemandedElts);
- if (ShAmt < NumSignBits)
- return TLO.CombineTo(Op, Op00);
- }
+ // fold (VSRAI (VSHLI X, C1), C1) --> X iff NumSignBits(X) > C1
+ if (Op0.getOpcode() == X86ISD::VSHLI &&
+ Op.getOperand(1) == Op0.getOperand(1)) {
+ SDValue Op00 = Op0.getOperand(0);
+ unsigned NumSignBits =
+ TLO.DAG.ComputeNumSignBits(Op00, OriginalDemandedElts);
+ if (ShAmt < NumSignBits)
+ return TLO.CombineTo(Op, Op00);
+ }
- // If any of the demanded bits are produced by the sign extension, we also
- // demand the input sign bit.
- if (OriginalDemandedBits.countLeadingZeros() < ShAmt)
- DemandedMask.setSignBit();
+ // If any of the demanded bits are produced by the sign extension, we also
+ // demand the input sign bit.
+ if (OriginalDemandedBits.countLeadingZeros() < ShAmt)
+ DemandedMask.setSignBit();
- if (SimplifyDemandedBits(Op0, DemandedMask, OriginalDemandedElts, Known,
- TLO, Depth + 1))
- return true;
+ if (SimplifyDemandedBits(Op0, DemandedMask, OriginalDemandedElts, Known,
+ TLO, Depth + 1))
+ return true;
- assert(!Known.hasConflict() && "Bits known to be one AND zero?");
- Known.Zero.lshrInPlace(ShAmt);
- Known.One.lshrInPlace(ShAmt);
+ assert(!Known.hasConflict() && "Bits known to be one AND zero?");
+ Known.Zero.lshrInPlace(ShAmt);
+ Known.One.lshrInPlace(ShAmt);
- // If the input sign bit is known to be zero, or if none of the top bits
- // are demanded, turn this into an unsigned shift right.
- if (Known.Zero[BitWidth - ShAmt - 1] ||
- OriginalDemandedBits.countLeadingZeros() >= ShAmt)
- return TLO.CombineTo(
- Op, TLO.DAG.getNode(X86ISD::VSRLI, SDLoc(Op), VT, Op0, Op1));
+ // If the input sign bit is known to be zero, or if none of the top bits
+ // are demanded, turn this into an unsigned shift right.
+ if (Known.Zero[BitWidth - ShAmt - 1] ||
+ OriginalDemandedBits.countLeadingZeros() >= ShAmt)
+ return TLO.CombineTo(
+ Op, TLO.DAG.getNode(X86ISD::VSRLI, SDLoc(Op), VT, Op0, Op1));
- // High bits are known one.
- if (Known.One[BitWidth - ShAmt - 1])
- Known.One.setHighBits(ShAmt);
- }
+ // High bits are known one.
+ if (Known.One[BitWidth - ShAmt - 1])
+ Known.One.setHighBits(ShAmt);
break;
}
case X86ISD::PEXTRB:
@@ -35005,6 +36121,13 @@ SDValue X86TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode(
return Vec;
break;
}
+ case X86ISD::PCMPGT:
+ // icmp sgt(0, R) == ashr(R, BitWidth-1).
+ // iff we only need the sign bit then we can use R directly.
+ if (DemandedBits.isSignMask() &&
+ ISD::isBuildVectorAllZeros(Op.getOperand(0).getNode()))
+ return Op.getOperand(1);
+ break;
}
APInt ShuffleUndef, ShuffleZero;
@@ -35053,123 +36176,6 @@ SDValue X86TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode(
Op, DemandedBits, DemandedElts, DAG, Depth);
}
-/// Check if a vector extract from a target-specific shuffle of a load can be
-/// folded into a single element load.
-/// Similar handling for VECTOR_SHUFFLE is performed by DAGCombiner, but
-/// shuffles have been custom lowered so we need to handle those here.
-static SDValue
-XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
- TargetLowering::DAGCombinerInfo &DCI) {
- if (DCI.isBeforeLegalizeOps())
- return SDValue();
-
- SDValue InVec = N->getOperand(0);
- SDValue EltNo = N->getOperand(1);
- EVT EltVT = N->getValueType(0);
-
- if (!isa<ConstantSDNode>(EltNo))
- return SDValue();
-
- EVT OriginalVT = InVec.getValueType();
- unsigned NumOriginalElts = OriginalVT.getVectorNumElements();
-
- // Peek through bitcasts, don't duplicate a load with other uses.
- InVec = peekThroughOneUseBitcasts(InVec);
-
- EVT CurrentVT = InVec.getValueType();
- if (!CurrentVT.isVector())
- return SDValue();
-
- unsigned NumCurrentElts = CurrentVT.getVectorNumElements();
- if ((NumOriginalElts % NumCurrentElts) != 0)
- return SDValue();
-
- if (!isTargetShuffle(InVec.getOpcode()))
- return SDValue();
-
- // Don't duplicate a load with other uses.
- if (!InVec.hasOneUse())
- return SDValue();
-
- SmallVector<int, 16> ShuffleMask;
- SmallVector<SDValue, 2> ShuffleOps;
- bool UnaryShuffle;
- if (!getTargetShuffleMask(InVec.getNode(), CurrentVT.getSimpleVT(), true,
- ShuffleOps, ShuffleMask, UnaryShuffle))
- return SDValue();
-
- unsigned Scale = NumOriginalElts / NumCurrentElts;
- if (Scale > 1) {
- SmallVector<int, 16> ScaledMask;
- scaleShuffleMask<int>(Scale, ShuffleMask, ScaledMask);
- ShuffleMask = std::move(ScaledMask);
- }
- assert(ShuffleMask.size() == NumOriginalElts && "Shuffle mask size mismatch");
-
- // Select the input vector, guarding against out of range extract vector.
- int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
- int Idx = (Elt > (int)NumOriginalElts) ? SM_SentinelUndef : ShuffleMask[Elt];
-
- if (Idx == SM_SentinelZero)
- return EltVT.isInteger() ? DAG.getConstant(0, SDLoc(N), EltVT)
- : DAG.getConstantFP(+0.0, SDLoc(N), EltVT);
- if (Idx == SM_SentinelUndef)
- return DAG.getUNDEF(EltVT);
-
- // Bail if any mask element is SM_SentinelZero - getVectorShuffle below
- // won't handle it.
- if (llvm::any_of(ShuffleMask, [](int M) { return M == SM_SentinelZero; }))
- return SDValue();
-
- assert(0 <= Idx && Idx < (int)(2 * NumOriginalElts) &&
- "Shuffle index out of range");
- SDValue LdNode = (Idx < (int)NumOriginalElts) ? ShuffleOps[0] : ShuffleOps[1];
-
- // If inputs to shuffle are the same for both ops, then allow 2 uses
- unsigned AllowedUses =
- (ShuffleOps.size() > 1 && ShuffleOps[0] == ShuffleOps[1]) ? 2 : 1;
-
- if (LdNode.getOpcode() == ISD::BITCAST) {
- // Don't duplicate a load with other uses.
- if (!LdNode.getNode()->hasNUsesOfValue(AllowedUses, 0))
- return SDValue();
-
- AllowedUses = 1; // only allow 1 load use if we have a bitcast
- LdNode = LdNode.getOperand(0);
- }
-
- if (!ISD::isNormalLoad(LdNode.getNode()))
- return SDValue();
-
- LoadSDNode *LN0 = cast<LoadSDNode>(LdNode);
-
- if (!LN0 || !LN0->hasNUsesOfValue(AllowedUses, 0) || !LN0->isSimple())
- return SDValue();
-
- // If there's a bitcast before the shuffle, check if the load type and
- // alignment is valid.
- unsigned Align = LN0->getAlignment();
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
- EltVT.getTypeForEVT(*DAG.getContext()));
-
- if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, EltVT))
- return SDValue();
-
- // All checks match so transform back to vector_shuffle so that DAG combiner
- // can finish the job
- SDLoc dl(N);
-
- // Create shuffle node taking into account the case that its a unary shuffle
- SDValue Shuffle = UnaryShuffle ? DAG.getUNDEF(OriginalVT)
- : DAG.getBitcast(OriginalVT, ShuffleOps[1]);
- Shuffle = DAG.getVectorShuffle(OriginalVT, dl,
- DAG.getBitcast(OriginalVT, ShuffleOps[0]),
- Shuffle, ShuffleMask);
- return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, N->getValueType(0), Shuffle,
- EltNo);
-}
-
// Helper to peek through bitops/setcc to determine size of source vector.
// Allows combineBitcastvxi1 to determine what size vector generated a <X x i1>.
static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size) {
@@ -35714,7 +36720,7 @@ static SDValue createPSADBW(SelectionDAG &DAG, const SDValue &Zext0,
const X86Subtarget &Subtarget) {
// Find the appropriate width for the PSADBW.
EVT InVT = Zext0.getOperand(0).getValueType();
- unsigned RegSize = std::max(128u, InVT.getSizeInBits());
+ unsigned RegSize = std::max(128u, (unsigned)InVT.getSizeInBits());
// "Zero-extend" the i8 vectors. This is not a per-element zext, rather we
// fill in the missing vector elements with 0.
@@ -36263,6 +37269,10 @@ static SDValue combineReductionToHorizontal(SDNode *ExtElt, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
assert(ExtElt->getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Unexpected caller");
+ // We need at least SSE2 to anything here.
+ if (!Subtarget.hasSSE2())
+ return SDValue();
+
ISD::NodeType Opc;
SDValue Rdx =
DAG.matchBinOpReduction(ExtElt, Opc, {ISD::ADD, ISD::FADD}, true);
@@ -36382,8 +37392,9 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG,
EVT VT = N->getValueType(0);
SDLoc dl(InputVector);
bool IsPextr = N->getOpcode() != ISD::EXTRACT_VECTOR_ELT;
+ unsigned NumSrcElts = SrcVT.getVectorNumElements();
- if (CIdx && CIdx->getAPIntValue().uge(SrcVT.getVectorNumElements()))
+ if (CIdx && CIdx->getAPIntValue().uge(NumSrcElts))
return IsPextr ? DAG.getConstant(0, dl, VT) : DAG.getUNDEF(VT);
// Integer Constant Folding.
@@ -36419,14 +37430,11 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG,
}
// TODO - Remove this once we can handle the implicit zero-extension of
- // X86ISD::PEXTRW/X86ISD::PEXTRB in XFormVExtractWithShuffleIntoLoad,
- // combineHorizontalPredicateResult and combineBasicSADPattern.
+ // X86ISD::PEXTRW/X86ISD::PEXTRB in combineHorizontalPredicateResult and
+ // combineBasicSADPattern.
return SDValue();
}
- if (SDValue NewOp = XFormVExtractWithShuffleIntoLoad(N, DAG, DCI))
- return NewOp;
-
// Detect mmx extraction of all bits as a i64. It works better as a bitcast.
if (InputVector.getOpcode() == ISD::BITCAST && InputVector.hasOneUse() &&
VT == MVT::i64 && SrcVT == MVT::v1i64 && isNullConstant(EltIdx)) {
@@ -36482,7 +37490,6 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG,
};
if (all_of(InputVector->uses(), IsBoolExtract) &&
BoolExtracts.size() > 1) {
- unsigned NumSrcElts = SrcVT.getVectorNumElements();
EVT BCVT = EVT::getIntegerVT(*DAG.getContext(), NumSrcElts);
if (SDValue BC =
combineBitcastvxi1(DAG, BCVT, InputVector, dl, Subtarget)) {
@@ -36568,9 +37575,8 @@ combineVSelectWithAllOnesOrZeros(SDNode *N, SelectionDAG &DAG,
if (TValIsAllZeros || FValIsAllOnes) {
SDValue CC = Cond.getOperand(2);
- ISD::CondCode NewCC =
- ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
- Cond.getOperand(0).getValueType().isInteger());
+ ISD::CondCode NewCC = ISD::getSetCCInverse(
+ cast<CondCodeSDNode>(CC)->get(), Cond.getOperand(0).getValueType());
Cond = DAG.getSetCC(DL, CondVT, Cond.getOperand(0), Cond.getOperand(1),
NewCC);
std::swap(LHS, RHS);
@@ -36761,37 +37767,117 @@ static SDValue combineVSelectToBLENDV(SDNode *N, SelectionDAG &DAG,
if (VT.is512BitVector())
return SDValue();
- // TODO: Add other opcodes eventually lowered into BLEND.
- for (SDNode::use_iterator UI = Cond->use_begin(), UE = Cond->use_end();
- UI != UE; ++UI)
- if ((UI->getOpcode() != ISD::VSELECT &&
- UI->getOpcode() != X86ISD::BLENDV) ||
- UI.getOperandNo() != 0)
+ auto OnlyUsedAsSelectCond = [](SDValue Cond) {
+ for (SDNode::use_iterator UI = Cond->use_begin(), UE = Cond->use_end();
+ UI != UE; ++UI)
+ if ((UI->getOpcode() != ISD::VSELECT &&
+ UI->getOpcode() != X86ISD::BLENDV) ||
+ UI.getOperandNo() != 0)
+ return false;
+
+ return true;
+ };
+
+ if (OnlyUsedAsSelectCond(Cond)) {
+ APInt DemandedMask(APInt::getSignMask(BitWidth));
+ KnownBits Known;
+ TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
+ !DCI.isBeforeLegalizeOps());
+ if (!TLI.SimplifyDemandedBits(Cond, DemandedMask, Known, TLO, 0, true))
return SDValue();
+ // If we changed the computation somewhere in the DAG, this change will
+ // affect all users of Cond. Update all the nodes so that we do not use
+ // the generic VSELECT anymore. Otherwise, we may perform wrong
+ // optimizations as we messed with the actual expectation for the vector
+ // boolean values.
+ for (SDNode *U : Cond->uses()) {
+ if (U->getOpcode() == X86ISD::BLENDV)
+ continue;
+
+ SDValue SB = DAG.getNode(X86ISD::BLENDV, SDLoc(U), U->getValueType(0),
+ Cond, U->getOperand(1), U->getOperand(2));
+ DAG.ReplaceAllUsesOfValueWith(SDValue(U, 0), SB);
+ DCI.AddToWorklist(U);
+ }
+ DCI.CommitTargetLoweringOpt(TLO);
+ return SDValue(N, 0);
+ }
+
+ // Otherwise we can still at least try to simplify multiple use bits.
APInt DemandedMask(APInt::getSignMask(BitWidth));
+ APInt DemandedElts(APInt::getAllOnesValue(VT.getVectorNumElements()));
KnownBits Known;
TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
!DCI.isBeforeLegalizeOps());
- if (!TLI.SimplifyDemandedBits(Cond, DemandedMask, Known, TLO, 0, true))
+ if (SDValue V = TLI.SimplifyMultipleUseDemandedBits(Cond, DemandedMask,
+ DemandedElts, DAG, 0))
+ return DAG.getNode(X86ISD::BLENDV, SDLoc(N), N->getValueType(0),
+ V, N->getOperand(1), N->getOperand(2));
+
+ return SDValue();
+}
+
+// Try to match:
+// (or (and (M, (sub 0, X)), (pandn M, X)))
+// which is a special case of:
+// (select M, (sub 0, X), X)
+// Per:
+// http://graphics.stanford.edu/~seander/bithacks.html#ConditionalNegate
+// We know that, if fNegate is 0 or 1:
+// (fNegate ? -v : v) == ((v ^ -fNegate) + fNegate)
+//
+// Here, we have a mask, M (all 1s or 0), and, similarly, we know that:
+// ((M & 1) ? -X : X) == ((X ^ -(M & 1)) + (M & 1))
+// ( M ? -X : X) == ((X ^ M ) + (M & 1))
+// This lets us transform our vselect to:
+// (add (xor X, M), (and M, 1))
+// And further to:
+// (sub (xor X, M), M)
+static SDValue combineLogicBlendIntoConditionalNegate(
+ EVT VT, SDValue Mask, SDValue X, SDValue Y, const SDLoc &DL,
+ SelectionDAG &DAG, const X86Subtarget &Subtarget) {
+ EVT MaskVT = Mask.getValueType();
+ assert(MaskVT.isInteger() &&
+ DAG.ComputeNumSignBits(Mask) == MaskVT.getScalarSizeInBits() &&
+ "Mask must be zero/all-bits");
+
+ if (X.getValueType() != MaskVT || Y.getValueType() != MaskVT)
+ return SDValue();
+ if (!DAG.getTargetLoweringInfo().isOperationLegal(ISD::SUB, MaskVT))
return SDValue();
- // If we changed the computation somewhere in the DAG, this change will
- // affect all users of Cond. Update all the nodes so that we do not use
- // the generic VSELECT anymore. Otherwise, we may perform wrong
- // optimizations as we messed with the actual expectation for the vector
- // boolean values.
- for (SDNode *U : Cond->uses()) {
- if (U->getOpcode() == X86ISD::BLENDV)
- continue;
+ auto IsNegV = [](SDNode *N, SDValue V) {
+ return N->getOpcode() == ISD::SUB && N->getOperand(1) == V &&
+ ISD::isBuildVectorAllZeros(N->getOperand(0).getNode());
+ };
- SDValue SB = DAG.getNode(X86ISD::BLENDV, SDLoc(U), U->getValueType(0),
- Cond, U->getOperand(1), U->getOperand(2));
- DAG.ReplaceAllUsesOfValueWith(SDValue(U, 0), SB);
- DCI.AddToWorklist(U);
- }
- DCI.CommitTargetLoweringOpt(TLO);
- return SDValue(N, 0);
+ SDValue V;
+ if (IsNegV(Y.getNode(), X))
+ V = X;
+ else if (IsNegV(X.getNode(), Y))
+ V = Y;
+ else
+ return SDValue();
+
+ SDValue SubOp1 = DAG.getNode(ISD::XOR, DL, MaskVT, V, Mask);
+ SDValue SubOp2 = Mask;
+
+ // If the negate was on the false side of the select, then
+ // the operands of the SUB need to be swapped. PR 27251.
+ // This is because the pattern being matched above is
+ // (vselect M, (sub (0, X), X) -> (sub (xor X, M), M)
+ // but if the pattern matched was
+ // (vselect M, X, (sub (0, X))), that is really negation of the pattern
+ // above, -(vselect M, (sub 0, X), X), and therefore the replacement
+ // pattern also needs to be a negation of the replacement pattern above.
+ // And -(sub X, Y) is just sub (Y, X), so swapping the operands of the
+ // sub accomplishes the negation of the replacement pattern.
+ if (V == Y)
+ std::swap(SubOp1, SubOp2);
+
+ SDValue Res = DAG.getNode(ISD::SUB, DL, MaskVT, SubOp1, SubOp2);
+ return DAG.getBitcast(VT, Res);
}
/// Do target-specific dag combines on SELECT and VSELECT nodes.
@@ -36811,10 +37897,21 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
EVT VT = LHS.getValueType();
EVT CondVT = Cond.getValueType();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ bool CondConstantVector = ISD::isBuildVectorOfConstantSDNodes(Cond.getNode());
+
+ // Attempt to combine (select M, (sub 0, X), X) -> (sub (xor X, M), M).
+ // Limit this to cases of non-constant masks that createShuffleMaskFromVSELECT
+ // can't catch, plus vXi8 cases where we'd likely end up with BLENDV.
+ if (CondVT.isVector() && CondVT.isInteger() &&
+ CondVT.getScalarSizeInBits() == VT.getScalarSizeInBits() &&
+ (!CondConstantVector || CondVT.getScalarType() == MVT::i8) &&
+ DAG.ComputeNumSignBits(Cond) == CondVT.getScalarSizeInBits())
+ if (SDValue V = combineLogicBlendIntoConditionalNegate(VT, Cond, RHS, LHS,
+ DL, DAG, Subtarget))
+ return V;
// Convert vselects with constant condition into shuffles.
- if (ISD::isBuildVectorOfConstantSDNodes(Cond.getNode()) &&
- DCI.isBeforeLegalizeOps()) {
+ if (CondConstantVector && DCI.isBeforeLegalizeOps()) {
SmallVector<int, 64> Mask;
if (createShuffleMaskFromVSELECT(Mask, Cond))
return DAG.getVectorShuffle(VT, DL, LHS, RHS, Mask);
@@ -36843,7 +37940,7 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
// the operands would cause it to handle comparisons between positive
// and negative zero incorrectly.
if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)) {
- if (!DAG.getTarget().Options.UnsafeFPMath &&
+ if (!DAG.getTarget().Options.NoSignedZerosFPMath &&
!(DAG.isKnownNeverZeroFloat(LHS) ||
DAG.isKnownNeverZeroFloat(RHS)))
break;
@@ -36854,7 +37951,7 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
case ISD::SETOLE:
// Converting this to a min would handle comparisons between positive
// and negative zero incorrectly.
- if (!DAG.getTarget().Options.UnsafeFPMath &&
+ if (!DAG.getTarget().Options.NoSignedZerosFPMath &&
!DAG.isKnownNeverZeroFloat(LHS) && !DAG.isKnownNeverZeroFloat(RHS))
break;
Opcode = X86ISD::FMIN;
@@ -36873,7 +37970,7 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
case ISD::SETOGE:
// Converting this to a max would handle comparisons between positive
// and negative zero incorrectly.
- if (!DAG.getTarget().Options.UnsafeFPMath &&
+ if (!DAG.getTarget().Options.NoSignedZerosFPMath &&
!DAG.isKnownNeverZeroFloat(LHS) && !DAG.isKnownNeverZeroFloat(RHS))
break;
Opcode = X86ISD::FMAX;
@@ -36883,7 +37980,7 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
// the operands would cause it to handle comparisons between positive
// and negative zero incorrectly.
if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)) {
- if (!DAG.getTarget().Options.UnsafeFPMath &&
+ if (!DAG.getTarget().Options.NoSignedZerosFPMath &&
!(DAG.isKnownNeverZeroFloat(LHS) ||
DAG.isKnownNeverZeroFloat(RHS)))
break;
@@ -36911,7 +38008,7 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
// Converting this to a min would handle comparisons between positive
// and negative zero incorrectly, and swapping the operands would
// cause it to handle NaNs incorrectly.
- if (!DAG.getTarget().Options.UnsafeFPMath &&
+ if (!DAG.getTarget().Options.NoSignedZerosFPMath &&
!(DAG.isKnownNeverZeroFloat(LHS) ||
DAG.isKnownNeverZeroFloat(RHS))) {
if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))
@@ -36922,8 +38019,7 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
break;
case ISD::SETUGT:
// Converting this to a min would handle NaNs incorrectly.
- if (!DAG.getTarget().Options.UnsafeFPMath &&
- (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)))
+ if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))
break;
Opcode = X86ISD::FMIN;
break;
@@ -36948,7 +38044,7 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
// Converting this to a max would handle comparisons between positive
// and negative zero incorrectly, and swapping the operands would
// cause it to handle NaNs incorrectly.
- if (!DAG.getTarget().Options.UnsafeFPMath &&
+ if (!DAG.getTarget().Options.NoSignedZerosFPMath &&
!DAG.isKnownNeverZeroFloat(LHS) &&
!DAG.isKnownNeverZeroFloat(RHS)) {
if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))
@@ -37093,7 +38189,7 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
SDValue Other;
if (ISD::isBuildVectorAllZeros(LHS.getNode())) {
Other = RHS;
- CC = ISD::getSetCCInverse(CC, true);
+ CC = ISD::getSetCCInverse(CC, VT.getVectorElementType());
} else if (ISD::isBuildVectorAllZeros(RHS.getNode())) {
Other = LHS;
}
@@ -37165,7 +38261,7 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
SDValue Other;
if (ISD::isBuildVectorAllOnes(LHS.getNode())) {
Other = RHS;
- CC = ISD::getSetCCInverse(CC, true);
+ CC = ISD::getSetCCInverse(CC, VT.getVectorElementType());
} else if (ISD::isBuildVectorAllOnes(RHS.getNode())) {
Other = LHS;
}
@@ -37788,7 +38884,7 @@ static SDValue combineCMov(SDNode *N, SelectionDAG &DAG,
}
/// Different mul shrinking modes.
-enum ShrinkMode { MULS8, MULU8, MULS16, MULU16 };
+enum class ShrinkMode { MULS8, MULU8, MULS16, MULU16 };
static bool canReduceVMulWidth(SDNode *N, SelectionDAG &DAG, ShrinkMode &Mode) {
EVT VT = N->getOperand(0).getValueType();
@@ -37809,16 +38905,16 @@ static bool canReduceVMulWidth(SDNode *N, SelectionDAG &DAG, ShrinkMode &Mode) {
unsigned MinSignBits = std::min(SignBits[0], SignBits[1]);
// When ranges are from -128 ~ 127, use MULS8 mode.
if (MinSignBits >= 25)
- Mode = MULS8;
+ Mode = ShrinkMode::MULS8;
// When ranges are from 0 ~ 255, use MULU8 mode.
else if (AllPositive && MinSignBits >= 24)
- Mode = MULU8;
+ Mode = ShrinkMode::MULU8;
// When ranges are from -32768 ~ 32767, use MULS16 mode.
else if (MinSignBits >= 17)
- Mode = MULS16;
+ Mode = ShrinkMode::MULS16;
// When ranges are from 0 ~ 65535, use MULU16 mode.
else if (AllPositive && MinSignBits >= 16)
- Mode = MULU16;
+ Mode = ShrinkMode::MULU16;
else
return false;
return true;
@@ -37888,15 +38984,17 @@ static SDValue reduceVMULWidth(SDNode *N, SelectionDAG &DAG,
// Generate the lower part of mul: pmullw. For MULU8/MULS8, only the
// lower part is needed.
SDValue MulLo = DAG.getNode(ISD::MUL, DL, ReducedVT, NewN0, NewN1);
- if (Mode == MULU8 || Mode == MULS8)
- return DAG.getNode((Mode == MULU8) ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND,
+ if (Mode == ShrinkMode::MULU8 || Mode == ShrinkMode::MULS8)
+ return DAG.getNode((Mode == ShrinkMode::MULU8) ? ISD::ZERO_EXTEND
+ : ISD::SIGN_EXTEND,
DL, VT, MulLo);
MVT ResVT = MVT::getVectorVT(MVT::i32, NumElts / 2);
// Generate the higher part of mul: pmulhw/pmulhuw. For MULU16/MULS16,
// the higher part is also needed.
- SDValue MulHi = DAG.getNode(Mode == MULS16 ? ISD::MULHS : ISD::MULHU, DL,
- ReducedVT, NewN0, NewN1);
+ SDValue MulHi =
+ DAG.getNode(Mode == ShrinkMode::MULS16 ? ISD::MULHS : ISD::MULHU, DL,
+ ReducedVT, NewN0, NewN1);
// Repack the lower part and higher part result of mul into a wider
// result.
@@ -38294,7 +39392,7 @@ static SDValue combineShiftLeft(SDNode *N, SelectionDAG &DAG) {
// We shift all of the values by one. In many cases we do not have
// hardware support for this operation. This is better expressed as an ADD
// of two values.
- if (N1SplatC->getAPIntValue() == 1)
+ if (N1SplatC->isOne())
return DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, N0);
}
@@ -38546,15 +39644,15 @@ static SDValue combineVectorShiftImm(SDNode *N, SelectionDAG &DAG,
bool LogicalShift = X86ISD::VSHLI == Opcode || X86ISD::VSRLI == Opcode;
EVT VT = N->getValueType(0);
SDValue N0 = N->getOperand(0);
- SDValue N1 = N->getOperand(1);
unsigned NumBitsPerElt = VT.getScalarSizeInBits();
assert(VT == N0.getValueType() && (NumBitsPerElt % 8) == 0 &&
"Unexpected value type");
- assert(N1.getValueType() == MVT::i8 && "Unexpected shift amount type");
+ assert(N->getOperand(1).getValueType() == MVT::i8 &&
+ "Unexpected shift amount type");
// Out of range logical bit shifts are guaranteed to be zero.
// Out of range arithmetic bit shifts splat the sign bit.
- unsigned ShiftVal = cast<ConstantSDNode>(N1)->getZExtValue();
+ unsigned ShiftVal = N->getConstantOperandVal(1);
if (ShiftVal >= NumBitsPerElt) {
if (LogicalShift)
return DAG.getConstant(0, SDLoc(N), VT);
@@ -39094,6 +40192,71 @@ static SDValue combineParity(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(ISD::ZERO_EXTEND, DL, N->getValueType(0), Setnp);
}
+
+// Look for (and (bitcast (vXi1 (concat_vectors (vYi1 setcc), undef,))), C)
+// Where C is a mask containing the same number of bits as the setcc and
+// where the setcc will freely 0 upper bits of k-register. We can replace the
+// undef in the concat with 0s and remove the AND. This mainly helps with
+// v2i1/v4i1 setcc being casted to scalar.
+static SDValue combineScalarAndWithMaskSetcc(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+ assert(N->getOpcode() == ISD::AND && "Unexpected opcode!");
+
+ EVT VT = N->getValueType(0);
+
+ // Make sure this is an AND with constant. We will check the value of the
+ // constant later.
+ if (!isa<ConstantSDNode>(N->getOperand(1)))
+ return SDValue();
+
+ // This is implied by the ConstantSDNode.
+ assert(!VT.isVector() && "Expected scalar VT!");
+
+ if (N->getOperand(0).getOpcode() != ISD::BITCAST ||
+ !N->getOperand(0).hasOneUse() ||
+ !N->getOperand(0).getOperand(0).hasOneUse())
+ return SDValue();
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDValue Src = N->getOperand(0).getOperand(0);
+ EVT SrcVT = Src.getValueType();
+ if (!SrcVT.isVector() || SrcVT.getVectorElementType() != MVT::i1 ||
+ !TLI.isTypeLegal(SrcVT))
+ return SDValue();
+
+ if (Src.getOpcode() != ISD::CONCAT_VECTORS)
+ return SDValue();
+
+ // We only care about the first subvector of the concat, we expect the
+ // other subvectors to be ignored due to the AND if we make the change.
+ SDValue SubVec = Src.getOperand(0);
+ EVT SubVecVT = SubVec.getValueType();
+
+ // First subvector should be a setcc with a legal result type. The RHS of the
+ // AND should be a mask with this many bits.
+ if (SubVec.getOpcode() != ISD::SETCC || !TLI.isTypeLegal(SubVecVT) ||
+ !N->getConstantOperandAPInt(1).isMask(SubVecVT.getVectorNumElements()))
+ return SDValue();
+
+ EVT SetccVT = SubVec.getOperand(0).getValueType();
+ if (!TLI.isTypeLegal(SetccVT) ||
+ !(Subtarget.hasVLX() || SetccVT.is512BitVector()))
+ return SDValue();
+
+ if (!(Subtarget.hasBWI() || SetccVT.getScalarSizeInBits() >= 32))
+ return SDValue();
+
+ // We passed all the checks. Rebuild the concat_vectors with zeroes
+ // and cast it back to VT.
+ SDLoc dl(N);
+ SmallVector<SDValue, 4> Ops(Src.getNumOperands(),
+ DAG.getConstant(0, dl, SubVecVT));
+ Ops[0] = SubVec;
+ SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, dl, SrcVT,
+ Ops);
+ return DAG.getBitcast(VT, Concat);
+}
+
static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
@@ -39132,9 +40295,12 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
if (matchScalarReduction(SDValue(N, 0), ISD::AND, SrcOps) &&
SrcOps.size() == 1) {
SDLoc dl(N);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
unsigned NumElts = SrcOps[0].getValueType().getVectorNumElements();
EVT MaskVT = EVT::getIntegerVT(*DAG.getContext(), NumElts);
SDValue Mask = combineBitcastvxi1(DAG, MaskVT, SrcOps[0], dl, Subtarget);
+ if (!Mask && TLI.isTypeLegal(SrcOps[0].getValueType()))
+ Mask = DAG.getBitcast(MaskVT, SrcOps[0]);
if (Mask) {
APInt AllBits = APInt::getAllOnesValue(NumElts);
return DAG.getSetCC(dl, MVT::i1, Mask,
@@ -39143,6 +40309,9 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
}
}
+ if (SDValue V = combineScalarAndWithMaskSetcc(N, DAG, Subtarget))
+ return V;
+
if (DCI.isBeforeLegalizeOps())
return SDValue();
@@ -39290,68 +40459,6 @@ static bool matchLogicBlend(SDNode *N, SDValue &X, SDValue &Y, SDValue &Mask) {
return true;
}
-// Try to match:
-// (or (and (M, (sub 0, X)), (pandn M, X)))
-// which is a special case of vselect:
-// (vselect M, (sub 0, X), X)
-// Per:
-// http://graphics.stanford.edu/~seander/bithacks.html#ConditionalNegate
-// We know that, if fNegate is 0 or 1:
-// (fNegate ? -v : v) == ((v ^ -fNegate) + fNegate)
-//
-// Here, we have a mask, M (all 1s or 0), and, similarly, we know that:
-// ((M & 1) ? -X : X) == ((X ^ -(M & 1)) + (M & 1))
-// ( M ? -X : X) == ((X ^ M ) + (M & 1))
-// This lets us transform our vselect to:
-// (add (xor X, M), (and M, 1))
-// And further to:
-// (sub (xor X, M), M)
-static SDValue combineLogicBlendIntoConditionalNegate(
- EVT VT, SDValue Mask, SDValue X, SDValue Y, const SDLoc &DL,
- SelectionDAG &DAG, const X86Subtarget &Subtarget) {
- EVT MaskVT = Mask.getValueType();
- assert(MaskVT.isInteger() &&
- DAG.ComputeNumSignBits(Mask) == MaskVT.getScalarSizeInBits() &&
- "Mask must be zero/all-bits");
-
- if (X.getValueType() != MaskVT || Y.getValueType() != MaskVT)
- return SDValue();
- if (!DAG.getTargetLoweringInfo().isOperationLegal(ISD::SUB, MaskVT))
- return SDValue();
-
- auto IsNegV = [](SDNode *N, SDValue V) {
- return N->getOpcode() == ISD::SUB && N->getOperand(1) == V &&
- ISD::isBuildVectorAllZeros(N->getOperand(0).getNode());
- };
-
- SDValue V;
- if (IsNegV(Y.getNode(), X))
- V = X;
- else if (IsNegV(X.getNode(), Y))
- V = Y;
- else
- return SDValue();
-
- SDValue SubOp1 = DAG.getNode(ISD::XOR, DL, MaskVT, V, Mask);
- SDValue SubOp2 = Mask;
-
- // If the negate was on the false side of the select, then
- // the operands of the SUB need to be swapped. PR 27251.
- // This is because the pattern being matched above is
- // (vselect M, (sub (0, X), X) -> (sub (xor X, M), M)
- // but if the pattern matched was
- // (vselect M, X, (sub (0, X))), that is really negation of the pattern
- // above, -(vselect M, (sub 0, X), X), and therefore the replacement
- // pattern also needs to be a negation of the replacement pattern above.
- // And -(sub X, Y) is just sub (Y, X), so swapping the operands of the
- // sub accomplishes the negation of the replacement pattern.
- if (V == Y)
- std::swap(SubOp1, SubOp2);
-
- SDValue Res = DAG.getNode(ISD::SUB, DL, MaskVT, SubOp1, SubOp2);
- return DAG.getBitcast(VT, Res);
-}
-
// Try to fold:
// (or (and (m, y), (pandn m, x)))
// into:
@@ -39512,66 +40619,20 @@ static SDValue combineOrCmpEqZeroToCtlzSrl(SDNode *N, SelectionDAG &DAG,
return Ret;
}
-static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
- TargetLowering::DAGCombinerInfo &DCI,
- const X86Subtarget &Subtarget) {
+static SDValue combineOrShiftToFunnelShift(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+ assert(N->getOpcode() == ISD::OR && "Expected ISD::OR node");
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- // If this is SSE1 only convert to FOR to avoid scalarization.
- if (Subtarget.hasSSE1() && !Subtarget.hasSSE2() && VT == MVT::v4i32) {
- return DAG.getBitcast(MVT::v4i32,
- DAG.getNode(X86ISD::FOR, SDLoc(N), MVT::v4f32,
- DAG.getBitcast(MVT::v4f32, N0),
- DAG.getBitcast(MVT::v4f32, N1)));
- }
-
- // Match any-of bool scalar reductions into a bitcast/movmsk + cmp.
- // TODO: Support multiple SrcOps.
- if (VT == MVT::i1) {
- SmallVector<SDValue, 2> SrcOps;
- if (matchScalarReduction(SDValue(N, 0), ISD::OR, SrcOps) &&
- SrcOps.size() == 1) {
- SDLoc dl(N);
- unsigned NumElts = SrcOps[0].getValueType().getVectorNumElements();
- EVT MaskVT = EVT::getIntegerVT(*DAG.getContext(), NumElts);
- SDValue Mask = combineBitcastvxi1(DAG, MaskVT, SrcOps[0], dl, Subtarget);
- if (Mask) {
- APInt AllBits = APInt::getNullValue(NumElts);
- return DAG.getSetCC(dl, MVT::i1, Mask,
- DAG.getConstant(AllBits, dl, MaskVT), ISD::SETNE);
- }
- }
- }
-
- if (DCI.isBeforeLegalizeOps())
- return SDValue();
-
- if (SDValue R = combineCompareEqual(N, DAG, DCI, Subtarget))
- return R;
-
- if (SDValue FPLogic = convertIntLogicToFPLogic(N, DAG, Subtarget))
- return FPLogic;
-
- if (SDValue R = canonicalizeBitSelect(N, DAG, Subtarget))
- return R;
-
- if (SDValue R = combineLogicBlendIntoPBLENDV(N, DAG, Subtarget))
- return R;
-
- // Attempt to recursively combine an OR of shuffles.
- if (VT.isVector() && (VT.getScalarSizeInBits() % 8) == 0) {
- SDValue Op(N, 0);
- if (SDValue Res = combineX86ShufflesRecursively(Op, DAG, Subtarget))
- return Res;
- }
-
- if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
+ if (!TLI.isOperationLegalOrCustom(ISD::FSHL, VT) ||
+ !TLI.isOperationLegalOrCustom(ISD::FSHR, VT))
return SDValue();
// fold (or (x << c) | (y >> (64 - c))) ==> (shld64 x, y, c)
- bool OptForSize = DAG.getMachineFunction().getFunction().hasOptSize();
+ bool OptForSize = DAG.shouldOptForSize();
unsigned Bits = VT.getScalarSizeInBits();
// SHLD/SHRD instructions have lower register pressure, but on some
@@ -39589,11 +40650,13 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
if (!N0.hasOneUse() || !N1.hasOneUse())
return SDValue();
+ EVT ShiftVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
+
SDValue ShAmt0 = N0.getOperand(1);
- if (ShAmt0.getValueType() != MVT::i8)
+ if (ShAmt0.getValueType() != ShiftVT)
return SDValue();
SDValue ShAmt1 = N1.getOperand(1);
- if (ShAmt1.getValueType() != MVT::i8)
+ if (ShAmt1.getValueType() != ShiftVT)
return SDValue();
// Peek through any modulo shift masks.
@@ -39628,12 +40691,12 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
std::swap(ShMsk0, ShMsk1);
}
- auto GetFunnelShift = [&DAG, &DL, VT, Opc](SDValue Op0, SDValue Op1,
- SDValue Amt) {
+ auto GetFunnelShift = [&DAG, &DL, VT, Opc, &ShiftVT](SDValue Op0, SDValue Op1,
+ SDValue Amt) {
if (Opc == ISD::FSHR)
std::swap(Op0, Op1);
return DAG.getNode(Opc, DL, VT, Op0, Op1,
- DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, Amt));
+ DAG.getNode(ISD::TRUNCATE, DL, ShiftVT, Amt));
};
// OR( SHL( X, C ), SRL( Y, 32 - C ) ) -> FSHL( X, Y, C )
@@ -39674,7 +40737,7 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
(ShAmt1Op0 == ShAmt0 || ShAmt1Op0 == ShMsk0)) {
if (Op1.getOpcode() == InnerShift &&
isa<ConstantSDNode>(Op1.getOperand(1)) &&
- Op1.getConstantOperandAPInt(1) == 1) {
+ Op1.getConstantOperandAPInt(1).isOneValue()) {
return GetFunnelShift(Op0, Op1.getOperand(0), ShAmt0);
}
// Test for ADD( Y, Y ) as an equivalent to SHL( Y, 1 ).
@@ -39689,6 +40752,70 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget &Subtarget) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+
+ // If this is SSE1 only convert to FOR to avoid scalarization.
+ if (Subtarget.hasSSE1() && !Subtarget.hasSSE2() && VT == MVT::v4i32) {
+ return DAG.getBitcast(MVT::v4i32,
+ DAG.getNode(X86ISD::FOR, SDLoc(N), MVT::v4f32,
+ DAG.getBitcast(MVT::v4f32, N0),
+ DAG.getBitcast(MVT::v4f32, N1)));
+ }
+
+ // Match any-of bool scalar reductions into a bitcast/movmsk + cmp.
+ // TODO: Support multiple SrcOps.
+ if (VT == MVT::i1) {
+ SmallVector<SDValue, 2> SrcOps;
+ if (matchScalarReduction(SDValue(N, 0), ISD::OR, SrcOps) &&
+ SrcOps.size() == 1) {
+ SDLoc dl(N);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ unsigned NumElts = SrcOps[0].getValueType().getVectorNumElements();
+ EVT MaskVT = EVT::getIntegerVT(*DAG.getContext(), NumElts);
+ SDValue Mask = combineBitcastvxi1(DAG, MaskVT, SrcOps[0], dl, Subtarget);
+ if (!Mask && TLI.isTypeLegal(SrcOps[0].getValueType()))
+ Mask = DAG.getBitcast(MaskVT, SrcOps[0]);
+ if (Mask) {
+ APInt AllBits = APInt::getNullValue(NumElts);
+ return DAG.getSetCC(dl, MVT::i1, Mask,
+ DAG.getConstant(AllBits, dl, MaskVT), ISD::SETNE);
+ }
+ }
+ }
+
+ if (DCI.isBeforeLegalizeOps())
+ return SDValue();
+
+ if (SDValue R = combineCompareEqual(N, DAG, DCI, Subtarget))
+ return R;
+
+ if (SDValue FPLogic = convertIntLogicToFPLogic(N, DAG, Subtarget))
+ return FPLogic;
+
+ if (SDValue R = canonicalizeBitSelect(N, DAG, Subtarget))
+ return R;
+
+ if (SDValue R = combineLogicBlendIntoPBLENDV(N, DAG, Subtarget))
+ return R;
+
+ if (SDValue R = combineOrShiftToFunnelShift(N, DAG, Subtarget))
+ return R;
+
+ // Attempt to recursively combine an OR of shuffles.
+ if (VT.isVector() && (VT.getScalarSizeInBits() % 8) == 0) {
+ SDValue Op(N, 0);
+ if (SDValue Res = combineX86ShufflesRecursively(Op, DAG, Subtarget))
+ return Res;
+ }
+
+ return SDValue();
+}
+
/// Try to turn tests against the signbit in the form of:
/// XOR(TRUNCATE(SRL(X, size(X)-1)), 1)
/// into:
@@ -39758,8 +40885,8 @@ static SDValue foldVectorXorShiftIntoCmp(SDNode *N, SelectionDAG &DAG,
default: return SDValue();
case MVT::v16i8:
case MVT::v8i16:
- case MVT::v4i32: if (!Subtarget.hasSSE2()) return SDValue(); break;
- case MVT::v2i64: if (!Subtarget.hasSSE42()) return SDValue(); break;
+ case MVT::v4i32:
+ case MVT::v2i64: if (!Subtarget.hasSSE2()) return SDValue(); break;
case MVT::v32i8:
case MVT::v16i16:
case MVT::v8i32:
@@ -39783,7 +40910,7 @@ static SDValue foldVectorXorShiftIntoCmp(SDNode *N, SelectionDAG &DAG,
// Create a greater-than comparison against -1. We don't use the more obvious
// greater-than-or-equal-to-zero because SSE/AVX don't have that instruction.
- return DAG.getNode(X86ISD::PCMPGT, SDLoc(N), VT, Shift.getOperand(0), Ones);
+ return DAG.getSetCC(SDLoc(N), VT, Shift.getOperand(0), Ones, ISD::SETGT);
}
/// Detect patterns of truncation with unsigned saturation:
@@ -39950,7 +41077,7 @@ static SDValue combineTruncateWithSat(SDValue In, EVT VT, const SDLoc &DL,
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (TLI.isTypeLegal(InVT) && InVT.isVector() && SVT != MVT::i1 &&
Subtarget.hasAVX512() && (InSVT != MVT::i16 || Subtarget.hasBWI())) {
- unsigned TruncOpc;
+ unsigned TruncOpc = 0;
SDValue SatVal;
if (auto SSatVal = detectSSatPattern(In, VT)) {
SatVal = SSatVal;
@@ -40252,6 +41379,7 @@ static bool getParamsForOneTrueMaskedElt(MaskedLoadStoreSDNode *MaskedOp,
static SDValue
reduceMaskedLoadToScalarLoad(MaskedLoadSDNode *ML, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI) {
+ assert(ML->isUnindexed() && "Unexpected indexed masked load!");
// TODO: This is not x86-specific, so it could be lifted to DAGCombiner.
// However, some target hooks may need to be added to know when the transform
// is profitable. Endianness would also have to be considered.
@@ -40279,6 +41407,7 @@ reduceMaskedLoadToScalarLoad(MaskedLoadSDNode *ML, SelectionDAG &DAG,
static SDValue
combineMaskedLoadConstantMask(MaskedLoadSDNode *ML, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI) {
+ assert(ML->isUnindexed() && "Unexpected indexed masked load!");
if (!ISD::isBuildVectorOfConstantSDNodes(ML->getMask().getNode()))
return SDValue();
@@ -40314,10 +41443,10 @@ combineMaskedLoadConstantMask(MaskedLoadSDNode *ML, SelectionDAG &DAG,
// The new masked load has an undef pass-through operand. The select uses the
// original pass-through operand.
- SDValue NewML = DAG.getMaskedLoad(VT, DL, ML->getChain(), ML->getBasePtr(),
- ML->getMask(), DAG.getUNDEF(VT),
- ML->getMemoryVT(), ML->getMemOperand(),
- ML->getExtensionType());
+ SDValue NewML = DAG.getMaskedLoad(
+ VT, DL, ML->getChain(), ML->getBasePtr(), ML->getOffset(), ML->getMask(),
+ DAG.getUNDEF(VT), ML->getMemoryVT(), ML->getMemOperand(),
+ ML->getAddressingMode(), ML->getExtensionType());
SDValue Blend = DAG.getSelect(DL, VT, ML->getMask(), NewML,
ML->getPassThru());
@@ -40403,8 +41532,9 @@ static SDValue combineMaskedStore(SDNode *N, SelectionDAG &DAG,
TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
Mst->getMemoryVT())) {
return DAG.getMaskedStore(Mst->getChain(), SDLoc(N), Value.getOperand(0),
- Mst->getBasePtr(), Mask,
- Mst->getMemoryVT(), Mst->getMemOperand(), true);
+ Mst->getBasePtr(), Mst->getOffset(), Mask,
+ Mst->getMemoryVT(), Mst->getMemOperand(),
+ Mst->getAddressingMode(), true);
}
return SDValue();
@@ -40593,59 +41723,24 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
cast<LoadSDNode>(St->getValue())->isSimple() &&
St->getChain().hasOneUse() && St->isSimple()) {
LoadSDNode *Ld = cast<LoadSDNode>(St->getValue().getNode());
- SmallVector<SDValue, 8> Ops;
if (!ISD::isNormalLoad(Ld))
return SDValue();
- // If this is not the MMX case, i.e. we are just turning i64 load/store
- // into f64 load/store, avoid the transformation if there are multiple
- // uses of the loaded value.
- if (!VT.isVector() && !Ld->hasNUsesOfValue(1, 0))
+ // Avoid the transformation if there are multiple uses of the loaded value.
+ if (!Ld->hasNUsesOfValue(1, 0))
return SDValue();
SDLoc LdDL(Ld);
SDLoc StDL(N);
- // If we are a 64-bit capable x86, lower to a single movq load/store pair.
- // Otherwise, if it's legal to use f64 SSE instructions, use f64 load/store
- // pair instead.
- if (Subtarget.is64Bit() || F64IsLegal) {
- MVT LdVT = Subtarget.is64Bit() ? MVT::i64 : MVT::f64;
- SDValue NewLd = DAG.getLoad(LdVT, LdDL, Ld->getChain(), Ld->getBasePtr(),
- Ld->getMemOperand());
-
- // Make sure new load is placed in same chain order.
- DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
- return DAG.getStore(St->getChain(), StDL, NewLd, St->getBasePtr(),
- St->getMemOperand());
- }
-
- // Otherwise, lower to two pairs of 32-bit loads / stores.
- SDValue LoAddr = Ld->getBasePtr();
- SDValue HiAddr = DAG.getMemBasePlusOffset(LoAddr, 4, LdDL);
-
- SDValue LoLd = DAG.getLoad(MVT::i32, LdDL, Ld->getChain(), LoAddr,
- Ld->getPointerInfo(), Ld->getAlignment(),
- Ld->getMemOperand()->getFlags());
- SDValue HiLd = DAG.getLoad(MVT::i32, LdDL, Ld->getChain(), HiAddr,
- Ld->getPointerInfo().getWithOffset(4),
- MinAlign(Ld->getAlignment(), 4),
- Ld->getMemOperand()->getFlags());
- // Make sure new loads are placed in same chain order.
- DAG.makeEquivalentMemoryOrdering(Ld, LoLd);
- DAG.makeEquivalentMemoryOrdering(Ld, HiLd);
-
- LoAddr = St->getBasePtr();
- HiAddr = DAG.getMemBasePlusOffset(LoAddr, 4, StDL);
-
- SDValue LoSt =
- DAG.getStore(St->getChain(), StDL, LoLd, LoAddr, St->getPointerInfo(),
- St->getAlignment(), St->getMemOperand()->getFlags());
- SDValue HiSt = DAG.getStore(St->getChain(), StDL, HiLd, HiAddr,
- St->getPointerInfo().getWithOffset(4),
- MinAlign(St->getAlignment(), 4),
- St->getMemOperand()->getFlags());
- return DAG.getNode(ISD::TokenFactor, StDL, MVT::Other, LoSt, HiSt);
+ // Lower to a single movq load/store pair.
+ SDValue NewLd = DAG.getLoad(MVT::f64, LdDL, Ld->getChain(),
+ Ld->getBasePtr(), Ld->getMemOperand());
+
+ // Make sure new load is placed in same chain order.
+ DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
+ return DAG.getStore(St->getChain(), StDL, NewLd, St->getBasePtr(),
+ St->getMemOperand());
}
// This is similar to the above case, but here we handle a scalar 64-bit
@@ -41351,23 +42446,25 @@ static SDValue isFNEG(SelectionDAG &DAG, SDNode *N, unsigned Depth = 0) {
SDValue Op = peekThroughBitcasts(SDValue(N, 0));
EVT VT = Op->getValueType(0);
- // Make sure the element size does't change.
+
+ // Make sure the element size doesn't change.
if (VT.getScalarSizeInBits() != ScalarSize)
return SDValue();
- if (auto SVOp = dyn_cast<ShuffleVectorSDNode>(Op.getNode())) {
+ unsigned Opc = Op.getOpcode();
+ switch (Opc) {
+ case ISD::VECTOR_SHUFFLE: {
// For a VECTOR_SHUFFLE(VEC1, VEC2), if the VEC2 is undef, then the negate
// of this is VECTOR_SHUFFLE(-VEC1, UNDEF). The mask can be anything here.
- if (!SVOp->getOperand(1).isUndef())
+ if (!Op.getOperand(1).isUndef())
return SDValue();
- if (SDValue NegOp0 = isFNEG(DAG, SVOp->getOperand(0).getNode(), Depth + 1))
+ if (SDValue NegOp0 = isFNEG(DAG, Op.getOperand(0).getNode(), Depth + 1))
if (NegOp0.getValueType() == VT) // FIXME: Can we do better?
- return DAG.getVectorShuffle(VT, SDLoc(SVOp), NegOp0, DAG.getUNDEF(VT),
- SVOp->getMask());
- return SDValue();
+ return DAG.getVectorShuffle(VT, SDLoc(Op), NegOp0, DAG.getUNDEF(VT),
+ cast<ShuffleVectorSDNode>(Op)->getMask());
+ break;
}
- unsigned Opc = Op.getOpcode();
- if (Opc == ISD::INSERT_VECTOR_ELT) {
+ case ISD::INSERT_VECTOR_ELT: {
// Negate of INSERT_VECTOR_ELT(UNDEF, V, INDEX) is INSERT_VECTOR_ELT(UNDEF,
// -V, INDEX).
SDValue InsVector = Op.getOperand(0);
@@ -41378,34 +42475,35 @@ static SDValue isFNEG(SelectionDAG &DAG, SDNode *N, unsigned Depth = 0) {
if (NegInsVal.getValueType() == VT.getVectorElementType()) // FIXME
return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Op), VT, InsVector,
NegInsVal, Op.getOperand(2));
- return SDValue();
+ break;
}
+ case ISD::FSUB:
+ case ISD::XOR:
+ case X86ISD::FXOR: {
+ SDValue Op1 = Op.getOperand(1);
+ SDValue Op0 = Op.getOperand(0);
- if (Opc != X86ISD::FXOR && Opc != ISD::XOR && Opc != ISD::FSUB)
- return SDValue();
-
- SDValue Op1 = Op.getOperand(1);
- SDValue Op0 = Op.getOperand(0);
-
- // For XOR and FXOR, we want to check if constant bits of Op1 are sign bit
- // masks. For FSUB, we have to check if constant bits of Op0 are sign bit
- // masks and hence we swap the operands.
- if (Opc == ISD::FSUB)
- std::swap(Op0, Op1);
+ // For XOR and FXOR, we want to check if constant
+ // bits of Op1 are sign bit masks. For FSUB, we
+ // have to check if constant bits of Op0 are sign
+ // bit masks and hence we swap the operands.
+ if (Opc == ISD::FSUB)
+ std::swap(Op0, Op1);
- APInt UndefElts;
- SmallVector<APInt, 16> EltBits;
- // Extract constant bits and see if they are all sign bit masks. Ignore the
- // undef elements.
- if (getTargetConstantBitsFromNode(Op1, ScalarSize,
- UndefElts, EltBits,
- /* AllowWholeUndefs */ true,
- /* AllowPartialUndefs */ false)) {
- for (unsigned I = 0, E = EltBits.size(); I < E; I++)
- if (!UndefElts[I] && !EltBits[I].isSignMask())
- return SDValue();
+ APInt UndefElts;
+ SmallVector<APInt, 16> EltBits;
+ // Extract constant bits and see if they are all
+ // sign bit masks. Ignore the undef elements.
+ if (getTargetConstantBitsFromNode(Op1, ScalarSize, UndefElts, EltBits,
+ /* AllowWholeUndefs */ true,
+ /* AllowPartialUndefs */ false)) {
+ for (unsigned I = 0, E = EltBits.size(); I < E; I++)
+ if (!UndefElts[I] && !EltBits[I].isSignMask())
+ return SDValue();
- return peekThroughBitcasts(Op0);
+ return peekThroughBitcasts(Op0);
+ }
+ }
}
return SDValue();
@@ -41642,8 +42740,7 @@ static SDValue foldXor1SetCC(SDNode *N, SelectionDAG &DAG) {
return SDValue();
SDValue LHS = N->getOperand(0);
- auto *RHSC = dyn_cast<ConstantSDNode>(N->getOperand(1));
- if (!RHSC || RHSC->getZExtValue() != 1 || LHS->getOpcode() != X86ISD::SETCC)
+ if (!isOneConstant(N->getOperand(1)) || LHS->getOpcode() != X86ISD::SETCC)
return SDValue();
X86::CondCode NewCC = X86::GetOppositeBranchCondition(
@@ -41817,8 +42914,9 @@ static SDValue combineFOr(SDNode *N, SelectionDAG &DAG,
static SDValue combineFMinFMax(SDNode *N, SelectionDAG &DAG) {
assert(N->getOpcode() == X86ISD::FMIN || N->getOpcode() == X86ISD::FMAX);
- // Only perform optimizations if UnsafeMath is used.
- if (!DAG.getTarget().Options.UnsafeFPMath)
+ // FMIN/FMAX are commutative if no NaNs and no negative zeros are allowed.
+ if (!DAG.getTarget().Options.NoNaNsFPMath ||
+ !DAG.getTarget().Options.NoSignedZerosFPMath)
return SDValue();
// If we run in unsafe-math mode, then convert the FMAX and FMIN nodes
@@ -41943,6 +43041,7 @@ static SDValue combineX86INT_TO_FP(SDNode *N, SelectionDAG &DAG,
static SDValue combineCVTP2I_CVTTP2I(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI) {
+ // FIXME: Handle strict fp nodes.
EVT VT = N->getValueType(0);
// Convert a full vector load into vzload when not all bits are needed.
@@ -41951,7 +43050,7 @@ static SDValue combineCVTP2I_CVTTP2I(SDNode *N, SelectionDAG &DAG,
if (VT.getVectorNumElements() < InVT.getVectorNumElements() &&
ISD::isNormalLoad(In.getNode()) && In.hasOneUse()) {
assert(InVT.is128BitVector() && "Expected 128-bit input vector");
- LoadSDNode *LN = cast<LoadSDNode>(N->getOperand(0));
+ LoadSDNode *LN = cast<LoadSDNode>(In);
// Unless the load is volatile or atomic.
if (LN->isSimple()) {
SDLoc dl(N);
@@ -42569,6 +43668,44 @@ static SDValue combineZext(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+/// Recursive helper for combineVectorSizedSetCCEquality() to see if we have a
+/// recognizable memcmp expansion.
+static bool isOrXorXorTree(SDValue X, bool Root = true) {
+ if (X.getOpcode() == ISD::OR)
+ return isOrXorXorTree(X.getOperand(0), false) &&
+ isOrXorXorTree(X.getOperand(1), false);
+ if (Root)
+ return false;
+ return X.getOpcode() == ISD::XOR;
+}
+
+/// Recursive helper for combineVectorSizedSetCCEquality() to emit the memcmp
+/// expansion.
+template<typename F>
+static SDValue emitOrXorXorTree(SDValue X, SDLoc &DL, SelectionDAG &DAG,
+ EVT VecVT, EVT CmpVT, bool HasPT, F SToV) {
+ SDValue Op0 = X.getOperand(0);
+ SDValue Op1 = X.getOperand(1);
+ if (X.getOpcode() == ISD::OR) {
+ SDValue A = emitOrXorXorTree(Op0, DL, DAG, VecVT, CmpVT, HasPT, SToV);
+ SDValue B = emitOrXorXorTree(Op1, DL, DAG, VecVT, CmpVT, HasPT, SToV);
+ if (VecVT != CmpVT)
+ return DAG.getNode(ISD::OR, DL, CmpVT, A, B);
+ if (HasPT)
+ return DAG.getNode(ISD::OR, DL, VecVT, A, B);
+ return DAG.getNode(ISD::AND, DL, CmpVT, A, B);
+ } else if (X.getOpcode() == ISD::XOR) {
+ SDValue A = SToV(Op0);
+ SDValue B = SToV(Op1);
+ if (VecVT != CmpVT)
+ return DAG.getSetCC(DL, CmpVT, A, B, ISD::SETNE);
+ if (HasPT)
+ return DAG.getNode(ISD::XOR, DL, VecVT, A, B);
+ return DAG.getSetCC(DL, CmpVT, A, B, ISD::SETEQ);
+ }
+ llvm_unreachable("Impossible");
+}
+
/// Try to map a 128-bit or larger integer comparison to vector instructions
/// before type legalization splits it up into chunks.
static SDValue combineVectorSizedSetCCEquality(SDNode *SetCC, SelectionDAG &DAG,
@@ -42589,10 +43726,8 @@ static SDValue combineVectorSizedSetCCEquality(SDNode *SetCC, SelectionDAG &DAG,
// logically-combined vector-sized operands compared to zero. This pattern may
// be generated by the memcmp expansion pass with oversized integer compares
// (see PR33325).
- bool IsOrXorXorCCZero = isNullConstant(Y) && X.getOpcode() == ISD::OR &&
- X.getOperand(0).getOpcode() == ISD::XOR &&
- X.getOperand(1).getOpcode() == ISD::XOR;
- if (isNullConstant(Y) && !IsOrXorXorCCZero)
+ bool IsOrXorXorTreeCCZero = isNullConstant(Y) && isOrXorXorTree(X);
+ if (isNullConstant(Y) && !IsOrXorXorTreeCCZero)
return SDValue();
// Don't perform this combine if constructing the vector will be expensive.
@@ -42602,66 +43737,102 @@ static SDValue combineVectorSizedSetCCEquality(SDNode *SetCC, SelectionDAG &DAG,
X.getOpcode() == ISD::LOAD;
};
if ((!IsVectorBitCastCheap(X) || !IsVectorBitCastCheap(Y)) &&
- !IsOrXorXorCCZero)
+ !IsOrXorXorTreeCCZero)
return SDValue();
EVT VT = SetCC->getValueType(0);
SDLoc DL(SetCC);
bool HasAVX = Subtarget.hasAVX();
- // Use XOR (plus OR) and PTEST after SSE4.1 and before AVX512.
+ // Use XOR (plus OR) and PTEST after SSE4.1 for 128/256-bit operands.
+ // Use PCMPNEQ (plus OR) and KORTEST for 512-bit operands.
// Otherwise use PCMPEQ (plus AND) and mask testing.
if ((OpSize == 128 && Subtarget.hasSSE2()) ||
(OpSize == 256 && HasAVX) ||
(OpSize == 512 && Subtarget.useAVX512Regs())) {
bool HasPT = Subtarget.hasSSE41();
+
+ // PTEST and MOVMSK are slow on Knights Landing and Knights Mill and widened
+ // vector registers are essentially free. (Technically, widening registers
+ // prevents load folding, but the tradeoff is worth it.)
+ bool PreferKOT = Subtarget.preferMaskRegisters();
+ bool NeedZExt = PreferKOT && !Subtarget.hasVLX() && OpSize != 512;
+
EVT VecVT = MVT::v16i8;
- EVT CmpVT = MVT::v16i8;
- if (OpSize == 256)
- VecVT = CmpVT = MVT::v32i8;
- if (OpSize == 512) {
+ EVT CmpVT = PreferKOT ? MVT::v16i1 : VecVT;
+ if (OpSize == 256) {
+ VecVT = MVT::v32i8;
+ CmpVT = PreferKOT ? MVT::v32i1 : VecVT;
+ }
+ EVT CastVT = VecVT;
+ bool NeedsAVX512FCast = false;
+ if (OpSize == 512 || NeedZExt) {
if (Subtarget.hasBWI()) {
VecVT = MVT::v64i8;
CmpVT = MVT::v64i1;
+ if (OpSize == 512)
+ CastVT = VecVT;
} else {
VecVT = MVT::v16i32;
CmpVT = MVT::v16i1;
+ CastVT = OpSize == 512 ? VecVT :
+ OpSize == 256 ? MVT::v8i32 : MVT::v4i32;
+ NeedsAVX512FCast = true;
+ }
+ }
+
+ auto ScalarToVector = [&](SDValue X) -> SDValue {
+ bool TmpZext = false;
+ EVT TmpCastVT = CastVT;
+ if (X.getOpcode() == ISD::ZERO_EXTEND) {
+ SDValue OrigX = X.getOperand(0);
+ unsigned OrigSize = OrigX.getScalarValueSizeInBits();
+ if (OrigSize < OpSize) {
+ if (OrigSize == 128) {
+ TmpCastVT = NeedsAVX512FCast ? MVT::v4i32 : MVT::v16i8;
+ X = OrigX;
+ TmpZext = true;
+ } else if (OrigSize == 256) {
+ TmpCastVT = NeedsAVX512FCast ? MVT::v8i32 : MVT::v32i8;
+ X = OrigX;
+ TmpZext = true;
+ }
+ }
}
- }
+ X = DAG.getBitcast(TmpCastVT, X);
+ if (!NeedZExt && !TmpZext)
+ return X;
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ MVT VecIdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT,
+ DAG.getConstant(0, DL, VecVT), X,
+ DAG.getConstant(0, DL, VecIdxVT));
+ };
SDValue Cmp;
- if (IsOrXorXorCCZero) {
+ if (IsOrXorXorTreeCCZero) {
// This is a bitwise-combined equality comparison of 2 pairs of vectors:
// setcc i128 (or (xor A, B), (xor C, D)), 0, eq|ne
// Use 2 vector equality compares and 'and' the results before doing a
// MOVMSK.
- SDValue A = DAG.getBitcast(VecVT, X.getOperand(0).getOperand(0));
- SDValue B = DAG.getBitcast(VecVT, X.getOperand(0).getOperand(1));
- SDValue C = DAG.getBitcast(VecVT, X.getOperand(1).getOperand(0));
- SDValue D = DAG.getBitcast(VecVT, X.getOperand(1).getOperand(1));
- if (VecVT == CmpVT && HasPT) {
- SDValue Cmp1 = DAG.getNode(ISD::XOR, DL, VecVT, A, B);
- SDValue Cmp2 = DAG.getNode(ISD::XOR, DL, VecVT, C, D);
- Cmp = DAG.getNode(ISD::OR, DL, VecVT, Cmp1, Cmp2);
- } else {
- SDValue Cmp1 = DAG.getSetCC(DL, CmpVT, A, B, ISD::SETEQ);
- SDValue Cmp2 = DAG.getSetCC(DL, CmpVT, C, D, ISD::SETEQ);
- Cmp = DAG.getNode(ISD::AND, DL, CmpVT, Cmp1, Cmp2);
- }
+ Cmp = emitOrXorXorTree(X, DL, DAG, VecVT, CmpVT, HasPT, ScalarToVector);
} else {
- SDValue VecX = DAG.getBitcast(VecVT, X);
- SDValue VecY = DAG.getBitcast(VecVT, Y);
- if (VecVT == CmpVT && HasPT) {
+ SDValue VecX = ScalarToVector(X);
+ SDValue VecY = ScalarToVector(Y);
+ if (VecVT != CmpVT) {
+ Cmp = DAG.getSetCC(DL, CmpVT, VecX, VecY, ISD::SETNE);
+ } else if (HasPT) {
Cmp = DAG.getNode(ISD::XOR, DL, VecVT, VecX, VecY);
} else {
Cmp = DAG.getSetCC(DL, CmpVT, VecX, VecY, ISD::SETEQ);
}
}
- // For 512-bits we want to emit a setcc that will lower to kortest.
+ // AVX512 should emit a setcc that will lower to kortest.
if (VecVT != CmpVT) {
- EVT KRegVT = CmpVT == MVT::v64i1 ? MVT::i64 : MVT::i16;
- SDValue Mask = DAG.getAllOnesConstant(DL, KRegVT);
- return DAG.getSetCC(DL, VT, DAG.getBitcast(KRegVT, Cmp), Mask, CC);
+ EVT KRegVT = CmpVT == MVT::v64i1 ? MVT::i64 :
+ CmpVT == MVT::v32i1 ? MVT::i32 : MVT::i16;
+ return DAG.getSetCC(DL, VT, DAG.getBitcast(KRegVT, Cmp),
+ DAG.getConstant(0, DL, KRegVT), CC);
}
if (HasPT) {
SDValue BCCmp = DAG.getBitcast(OpSize == 256 ? MVT::v4i64 : MVT::v2i64,
@@ -42687,9 +43858,9 @@ static SDValue combineVectorSizedSetCCEquality(SDNode *SetCC, SelectionDAG &DAG,
static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
- ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
- SDValue LHS = N->getOperand(0);
- SDValue RHS = N->getOperand(1);
+ const ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
+ const SDValue LHS = N->getOperand(0);
+ const SDValue RHS = N->getOperand(1);
EVT VT = N->getValueType(0);
EVT OpVT = LHS.getValueType();
SDLoc DL(N);
@@ -42716,30 +43887,35 @@ static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG,
if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
(CC == ISD::SETNE || CC == ISD::SETEQ || ISD::isSignedIntSetCC(CC))) {
- // Put build_vectors on the right.
- if (LHS.getOpcode() == ISD::BUILD_VECTOR) {
- std::swap(LHS, RHS);
- CC = ISD::getSetCCSwappedOperands(CC);
+ // Using temporaries to avoid messing up operand ordering for later
+ // transformations if this doesn't work.
+ SDValue Op0 = LHS;
+ SDValue Op1 = RHS;
+ ISD::CondCode TmpCC = CC;
+ // Put build_vector on the right.
+ if (Op0.getOpcode() == ISD::BUILD_VECTOR) {
+ std::swap(Op0, Op1);
+ TmpCC = ISD::getSetCCSwappedOperands(TmpCC);
}
bool IsSEXT0 =
- (LHS.getOpcode() == ISD::SIGN_EXTEND) &&
- (LHS.getOperand(0).getValueType().getVectorElementType() == MVT::i1);
- bool IsVZero1 = ISD::isBuildVectorAllZeros(RHS.getNode());
+ (Op0.getOpcode() == ISD::SIGN_EXTEND) &&
+ (Op0.getOperand(0).getValueType().getVectorElementType() == MVT::i1);
+ bool IsVZero1 = ISD::isBuildVectorAllZeros(Op1.getNode());
if (IsSEXT0 && IsVZero1) {
- assert(VT == LHS.getOperand(0).getValueType() &&
+ assert(VT == Op0.getOperand(0).getValueType() &&
"Uexpected operand type");
- if (CC == ISD::SETGT)
+ if (TmpCC == ISD::SETGT)
return DAG.getConstant(0, DL, VT);
- if (CC == ISD::SETLE)
+ if (TmpCC == ISD::SETLE)
return DAG.getConstant(1, DL, VT);
- if (CC == ISD::SETEQ || CC == ISD::SETGE)
- return DAG.getNOT(DL, LHS.getOperand(0), VT);
+ if (TmpCC == ISD::SETEQ || TmpCC == ISD::SETGE)
+ return DAG.getNOT(DL, Op0.getOperand(0), VT);
- assert((CC == ISD::SETNE || CC == ISD::SETLT) &&
+ assert((TmpCC == ISD::SETNE || TmpCC == ISD::SETLT) &&
"Unexpected condition code!");
- return LHS.getOperand(0);
+ return Op0.getOperand(0);
}
}
@@ -42752,8 +43928,7 @@ static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG,
VT.getVectorElementType() == MVT::i1 &&
(OpVT.getVectorElementType() == MVT::i8 ||
OpVT.getVectorElementType() == MVT::i16)) {
- SDValue Setcc = DAG.getNode(ISD::SETCC, DL, OpVT, LHS, RHS,
- N->getOperand(2));
+ SDValue Setcc = DAG.getSetCC(DL, OpVT, LHS, RHS, CC);
return DAG.getNode(ISD::TRUNCATE, DL, VT, Setcc);
}
@@ -42985,16 +44160,18 @@ static SDValue combineVectorCompareAndMaskUnaryOp(SDNode *N,
// unary operation isn't a bitwise AND, or if the sizes of the operations
// aren't the same.
EVT VT = N->getValueType(0);
- if (!VT.isVector() || N->getOperand(0)->getOpcode() != ISD::AND ||
- N->getOperand(0)->getOperand(0)->getOpcode() != ISD::SETCC ||
- VT.getSizeInBits() != N->getOperand(0).getValueSizeInBits())
+ bool IsStrict = N->isStrictFPOpcode();
+ SDValue Op0 = N->getOperand(IsStrict ? 1 : 0);
+ if (!VT.isVector() || Op0->getOpcode() != ISD::AND ||
+ Op0->getOperand(0)->getOpcode() != ISD::SETCC ||
+ VT.getSizeInBits() != Op0.getValueSizeInBits())
return SDValue();
// Now check that the other operand of the AND is a constant. We could
// make the transformation for non-constant splats as well, but it's unclear
// that would be a benefit as it would not eliminate any operations, just
// perform one more step in scalar code before moving to the vector unit.
- if (auto *BV = dyn_cast<BuildVectorSDNode>(N->getOperand(0).getOperand(1))) {
+ if (auto *BV = dyn_cast<BuildVectorSDNode>(Op0.getOperand(1))) {
// Bail out if the vector isn't a constant.
if (!BV->isConstant())
return SDValue();
@@ -43004,12 +44181,19 @@ static SDValue combineVectorCompareAndMaskUnaryOp(SDNode *N,
EVT IntVT = BV->getValueType(0);
// Create a new constant of the appropriate type for the transformed
// DAG.
- SDValue SourceConst = DAG.getNode(N->getOpcode(), DL, VT, SDValue(BV, 0));
+ SDValue SourceConst;
+ if (IsStrict)
+ SourceConst = DAG.getNode(N->getOpcode(), DL, {VT, MVT::Other},
+ {N->getOperand(0), SDValue(BV, 0)});
+ else
+ SourceConst = DAG.getNode(N->getOpcode(), DL, VT, SDValue(BV, 0));
// The AND node needs bitcasts to/from an integer vector type around it.
SDValue MaskConst = DAG.getBitcast(IntVT, SourceConst);
- SDValue NewAnd = DAG.getNode(ISD::AND, DL, IntVT,
- N->getOperand(0)->getOperand(0), MaskConst);
+ SDValue NewAnd = DAG.getNode(ISD::AND, DL, IntVT, Op0->getOperand(0),
+ MaskConst);
SDValue Res = DAG.getBitcast(VT, NewAnd);
+ if (IsStrict)
+ return DAG.getMergeValues({Res, SourceConst.getValue(1)}, DL);
return Res;
}
@@ -43053,7 +44237,8 @@ static SDValue combineToFPTruncExtElt(SDNode *N, SelectionDAG &DAG) {
static SDValue combineUIntToFP(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
- SDValue Op0 = N->getOperand(0);
+ bool IsStrict = N->isStrictFPOpcode();
+ SDValue Op0 = N->getOperand(IsStrict ? 1 : 0);
EVT VT = N->getValueType(0);
EVT InVT = Op0.getValueType();
@@ -43067,14 +44252,21 @@ static SDValue combineUIntToFP(SDNode *N, SelectionDAG &DAG,
SDValue P = DAG.getNode(ISD::ZERO_EXTEND, dl, DstVT, Op0);
// UINT_TO_FP isn't legal without AVX512 so use SINT_TO_FP.
+ if (IsStrict)
+ return DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, {VT, MVT::Other},
+ {N->getOperand(0), P});
return DAG.getNode(ISD::SINT_TO_FP, dl, VT, P);
}
// Since UINT_TO_FP is legal (it's marked custom), dag combiner won't
// optimize it to a SINT_TO_FP when the sign bit is known zero. Perform
// the optimization here.
- if (DAG.SignBitIsZero(Op0))
+ if (DAG.SignBitIsZero(Op0)) {
+ if (IsStrict)
+ return DAG.getNode(ISD::STRICT_SINT_TO_FP, SDLoc(N), {VT, MVT::Other},
+ {N->getOperand(0), Op0});
return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, Op0);
+ }
return SDValue();
}
@@ -43084,11 +44276,12 @@ static SDValue combineSIntToFP(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
// First try to optimize away the conversion entirely when it's
// conditionally from a constant. Vectors only.
+ bool IsStrict = N->isStrictFPOpcode();
if (SDValue Res = combineVectorCompareAndMaskUnaryOp(N, DAG))
return Res;
// Now move on to more general possibilities.
- SDValue Op0 = N->getOperand(0);
+ SDValue Op0 = N->getOperand(IsStrict ? 1 : 0);
EVT VT = N->getValueType(0);
EVT InVT = Op0.getValueType();
@@ -43100,6 +44293,9 @@ static SDValue combineSIntToFP(SDNode *N, SelectionDAG &DAG,
EVT DstVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32,
InVT.getVectorNumElements());
SDValue P = DAG.getNode(ISD::SIGN_EXTEND, dl, DstVT, Op0);
+ if (IsStrict)
+ return DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, {VT, MVT::Other},
+ {N->getOperand(0), P});
return DAG.getNode(ISD::SINT_TO_FP, dl, VT, P);
}
@@ -43117,6 +44313,9 @@ static SDValue combineSIntToFP(SDNode *N, SelectionDAG &DAG,
SDLoc dl(N);
if (DCI.isBeforeLegalize() || TruncVT != MVT::v2i32) {
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, TruncVT, Op0);
+ if (IsStrict)
+ return DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, {VT, MVT::Other},
+ {N->getOperand(0), Trunc});
return DAG.getNode(ISD::SINT_TO_FP, dl, VT, Trunc);
}
// If we're after legalize and the type is v2i32 we need to shuffle and
@@ -43125,6 +44324,9 @@ static SDValue combineSIntToFP(SDNode *N, SelectionDAG &DAG,
SDValue Cast = DAG.getBitcast(MVT::v4i32, Op0);
SDValue Shuf = DAG.getVectorShuffle(MVT::v4i32, dl, Cast, Cast,
{ 0, 2, -1, -1 });
+ if (IsStrict)
+ return DAG.getNode(X86ISD::STRICT_CVTSI2P, dl, {VT, MVT::Other},
+ {N->getOperand(0), Shuf});
return DAG.getNode(X86ISD::CVTSI2P, dl, VT, Shuf);
}
}
@@ -43148,13 +44350,16 @@ static SDValue combineSIntToFP(SDNode *N, SelectionDAG &DAG,
if (Ld->isSimple() && !VT.isVector() &&
ISD::isNON_EXTLoad(Op0.getNode()) && Op0.hasOneUse() &&
!Subtarget.is64Bit() && LdVT == MVT::i64) {
- SDValue FILDChain = Subtarget.getTargetLowering()->BuildFILD(
+ std::pair<SDValue, SDValue> Tmp = Subtarget.getTargetLowering()->BuildFILD(
SDValue(N, 0), LdVT, Ld->getChain(), Op0, DAG);
- DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), FILDChain.getValue(1));
- return FILDChain;
+ DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), Tmp.second);
+ return Tmp.first;
}
}
+ if (IsStrict)
+ return SDValue();
+
if (SDValue V = combineToFPTruncExtElt(N, DAG))
return V;
@@ -43579,7 +44784,8 @@ static SDValue combineLoopMAddPattern(SDNode *N, SelectionDAG &DAG,
auto UsePMADDWD = [&](SDValue Op) {
ShrinkMode Mode;
return Op.getOpcode() == ISD::MUL &&
- canReduceVMulWidth(Op.getNode(), DAG, Mode) && Mode != MULU16 &&
+ canReduceVMulWidth(Op.getNode(), DAG, Mode) &&
+ Mode != ShrinkMode::MULU16 &&
(!Subtarget.hasSSE41() ||
(Op->isOnlyUserOf(Op.getOperand(0).getNode()) &&
Op->isOnlyUserOf(Op.getOperand(1).getNode())));
@@ -43784,7 +44990,8 @@ static SDValue matchPMADDWD(SelectionDAG &DAG, SDValue Op0, SDValue Op1,
// Check if the Mul source can be safely shrunk.
ShrinkMode Mode;
- if (!canReduceVMulWidth(Mul.getNode(), DAG, Mode) || Mode == MULU16)
+ if (!canReduceVMulWidth(Mul.getNode(), DAG, Mode) ||
+ Mode == ShrinkMode::MULU16)
return SDValue();
auto PMADDBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
@@ -44468,7 +45675,6 @@ static SDValue combineExtractSubvector(SDNode *N, SelectionDAG &DAG,
SDValue InVec = N->getOperand(0);
SDValue InVecBC = peekThroughBitcasts(InVec);
EVT InVecVT = InVec.getValueType();
- EVT InVecBCVT = InVecBC.getValueType();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (Subtarget.hasAVX() && !Subtarget.hasAVX2() &&
@@ -44512,31 +45718,6 @@ static SDValue combineExtractSubvector(SDNode *N, SelectionDAG &DAG,
VT, SDLoc(N),
InVec.getNode()->ops().slice(IdxVal, VT.getVectorNumElements()));
- // Try to move vector bitcast after extract_subv by scaling extraction index:
- // extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index')
- // TODO: Move this to DAGCombiner::visitEXTRACT_SUBVECTOR
- if (InVec != InVecBC && InVecBCVT.isVector()) {
- unsigned SrcNumElts = InVecBCVT.getVectorNumElements();
- unsigned DestNumElts = InVecVT.getVectorNumElements();
- if ((DestNumElts % SrcNumElts) == 0) {
- unsigned DestSrcRatio = DestNumElts / SrcNumElts;
- if ((VT.getVectorNumElements() % DestSrcRatio) == 0) {
- unsigned NewExtNumElts = VT.getVectorNumElements() / DestSrcRatio;
- EVT NewExtVT = EVT::getVectorVT(*DAG.getContext(),
- InVecBCVT.getScalarType(), NewExtNumElts);
- if ((N->getConstantOperandVal(1) % DestSrcRatio) == 0 &&
- TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) {
- unsigned IndexValScaled = N->getConstantOperandVal(1) / DestSrcRatio;
- SDLoc DL(N);
- SDValue NewIndex = DAG.getIntPtrConstant(IndexValScaled, DL);
- SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
- InVecBC, NewIndex);
- return DAG.getBitcast(VT, NewExtract);
- }
- }
- }
- }
-
// If we are extracting from an insert into a zero vector, replace with a
// smaller insert into zero if we don't access less than the original
// subvector. Don't do this for i1 vectors.
@@ -44583,7 +45764,7 @@ static SDValue combineExtractSubvector(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(X86ISD::CVTSI2P, SDLoc(N), VT, InVec.getOperand(0));
}
// v2f64 CVTUDQ2PD(v4i32).
- if (InOpcode == ISD::UINT_TO_FP &&
+ if (InOpcode == ISD::UINT_TO_FP && Subtarget.hasVLX() &&
InVec.getOperand(0).getValueType() == MVT::v4i32) {
return DAG.getNode(X86ISD::CVTUI2P, SDLoc(N), VT, InVec.getOperand(0));
}
@@ -44751,6 +45932,9 @@ static SDValue combineKSHIFT(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI) {
EVT VT = N->getValueType(0);
+ if (ISD::isBuildVectorAllZeros(N->getOperand(0).getNode()))
+ return DAG.getConstant(0, SDLoc(N), VT);
+
APInt KnownUndef, KnownZero;
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
APInt DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements());
@@ -44802,8 +45986,12 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::MLOAD: return combineMaskedLoad(N, DAG, DCI, Subtarget);
case ISD::STORE: return combineStore(N, DAG, DCI, Subtarget);
case ISD::MSTORE: return combineMaskedStore(N, DAG, DCI, Subtarget);
- case ISD::SINT_TO_FP: return combineSIntToFP(N, DAG, DCI, Subtarget);
- case ISD::UINT_TO_FP: return combineUIntToFP(N, DAG, Subtarget);
+ case ISD::SINT_TO_FP:
+ case ISD::STRICT_SINT_TO_FP:
+ return combineSIntToFP(N, DAG, DCI, Subtarget);
+ case ISD::UINT_TO_FP:
+ case ISD::STRICT_UINT_TO_FP:
+ return combineUIntToFP(N, DAG, Subtarget);
case ISD::FADD:
case ISD::FSUB: return combineFaddFsub(N, DAG, Subtarget);
case ISD::FNEG: return combineFneg(N, DAG, Subtarget);
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.h b/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.h
index 6f7e90008de4..3a17099da38f 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.h
@@ -598,6 +598,34 @@ namespace llvm {
// For avx512-vp2intersect
VP2INTERSECT,
+ /// X86 strict FP compare instructions.
+ STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE,
+ STRICT_FCMPS,
+
+ // Vector packed double/float comparison.
+ STRICT_CMPP,
+
+ /// Vector comparison generating mask bits for fp and
+ /// integer signed and unsigned data types.
+ STRICT_CMPM,
+
+ // Vector float/double to signed/unsigned integer with truncation.
+ STRICT_CVTTP2SI, STRICT_CVTTP2UI,
+
+ // Vector FP extend.
+ STRICT_VFPEXT,
+
+ // Vector FP round.
+ STRICT_VFPROUND,
+
+ // RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
+ // Also used by the legacy (V)ROUND intrinsics where we mask out the
+ // scaling part of the immediate.
+ STRICT_VRNDSCALE,
+
+ // Vector signed/unsigned integer to float/double.
+ STRICT_CVTSI2P, STRICT_CVTUI2P,
+
// Compare and swap.
LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE,
LCMPXCHG8_DAG,
@@ -969,9 +997,7 @@ namespace llvm {
unsigned
getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
- if (ConstraintCode == "i")
- return InlineAsm::Constraint_i;
- else if (ConstraintCode == "o")
+ if (ConstraintCode == "o")
return InlineAsm::Constraint_o;
else if (ConstraintCode == "v")
return InlineAsm::Constraint_v;
@@ -1056,7 +1082,8 @@ namespace llvm {
/// Return true if an FMA operation is faster than a pair of fmul and fadd
/// instructions. fmuladd intrinsics will be expanded to FMAs when this
/// method returns true, otherwise fmuladd is expanded to fmul + fadd.
- bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
+ bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
+ EVT VT) const override;
/// Return true if it's profitable to narrow
/// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow
@@ -1125,9 +1152,6 @@ namespace llvm {
bool decomposeMulByConstant(LLVMContext &Context, EVT VT,
SDValue C) const override;
- bool shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT,
- bool IsSigned) const override;
-
/// Return true if EXTRACT_SUBVECTOR is cheap for this result type
/// with this index.
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
@@ -1165,7 +1189,7 @@ namespace llvm {
return nullptr; // nothing to do, move along.
}
- Register getRegisterByName(const char* RegName, EVT VT,
+ Register getRegisterByName(const char* RegName, LLT VT,
const MachineFunction &MF) const override;
/// If a physical register, this returns the register that receives the
@@ -1203,8 +1227,9 @@ namespace llvm {
/// offset as appropriate.
Value *getSafeStackPointerLocation(IRBuilder<> &IRB) const override;
- SDValue BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue StackSlot,
- SelectionDAG &DAG) const;
+ std::pair<SDValue, SDValue> BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain,
+ SDValue StackSlot,
+ SelectionDAG &DAG) const;
bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override;
@@ -1315,7 +1340,8 @@ namespace llvm {
unsigned getAddressSpace(void) const;
- SDValue FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool isSigned) const;
+ SDValue FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool isSigned,
+ SDValue &Chain) const;
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const;
@@ -1340,6 +1366,7 @@ namespace llvm {
SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSTRICT_FSETCC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
@@ -1358,8 +1385,7 @@ namespace llvm {
SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerGC_TRANSITION_START(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerGC_TRANSITION_END(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerGC_TRANSITION(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerFaddFsub(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
@@ -1477,20 +1503,15 @@ namespace llvm {
MachineBasicBlock *EmitSjLjDispatchBlock(MachineInstr &MI,
MachineBasicBlock *MBB) const;
- /// Emit nodes that will be selected as "cmp Op0,Op1", or something
- /// equivalent, for use with the given x86 condition code.
- SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, const SDLoc &dl,
- SelectionDAG &DAG) const;
-
/// Convert a comparison if required by the subtarget.
SDValue ConvertCmpIfNecessary(SDValue Cmp, SelectionDAG &DAG) const;
/// Emit flags for the given setcc condition and operands. Also returns the
/// corresponding X86 condition code constant in X86CC.
- SDValue emitFlagsForSetcc(SDValue Op0, SDValue Op1,
- ISD::CondCode CC, const SDLoc &dl,
- SelectionDAG &DAG,
- SDValue &X86CC) const;
+ SDValue emitFlagsForSetcc(SDValue Op0, SDValue Op1, ISD::CondCode CC,
+ const SDLoc &dl, SelectionDAG &DAG,
+ SDValue &X86CC, SDValue &Chain,
+ bool IsSignaling) const;
/// Check if replacement of SQRT with RSQRT should be disabled.
bool isFsqrtCheap(SDValue Operand, SelectionDAG &DAG) const override;
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86IndirectBranchTracking.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86IndirectBranchTracking.cpp
index cc0f59ab329d..48d0d8a35704 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86IndirectBranchTracking.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86IndirectBranchTracking.cpp
@@ -48,12 +48,12 @@ private:
static char ID;
/// Machine instruction info used throughout the class.
- const X86InstrInfo *TII;
+ const X86InstrInfo *TII = nullptr;
/// Endbr opcode for the current machine function.
- unsigned int EndbrOpcode;
+ unsigned int EndbrOpcode = 0;
- /// Adds a new ENDBR instruction to the begining of the MBB.
+ /// Adds a new ENDBR instruction to the beginning of the MBB.
/// The function will not add it if already exists.
/// It will add ENDBR32 or ENDBR64 opcode, depending on the target.
/// \returns true if the ENDBR was added and false otherwise.
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAVX512.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAVX512.td
index 9b5de59430a5..32f012033fb0 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -2078,7 +2078,7 @@ multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE,
"$cc, $src2, $src1", "$src1, $src2, $cc",
(OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
(OpNode_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
- timm:$cc)>, EVEX_4V, VEX_LIG, Sched<[sched]>;
+ timm:$cc)>, EVEX_4V, VEX_LIG, Sched<[sched]>, SIMD_EXC;
let mayLoad = 1 in
defm rm_Int : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
(outs _.KRC:$dst),
@@ -2089,8 +2089,9 @@ multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE,
timm:$cc),
(OpNode_su (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2,
timm:$cc)>, EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
- Sched<[sched.Folded, sched.ReadAfterFold]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
+ let Uses = [MXCSR] in
defm rrb_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
(outs _.KRC:$dst),
(ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
@@ -2111,7 +2112,7 @@ multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE,
[(set _.KRC:$dst, (OpNode _.FRC:$src1,
_.FRC:$src2,
timm:$cc))]>,
- EVEX_4V, VEX_LIG, Sched<[sched]>;
+ EVEX_4V, VEX_LIG, Sched<[sched]>, SIMD_EXC;
def rm : AVX512Ii8<0xC2, MRMSrcMem,
(outs _.KRC:$dst),
(ins _.FRC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
@@ -2121,7 +2122,7 @@ multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE,
(_.ScalarLdFrag addr:$src2),
timm:$cc))]>,
EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
- Sched<[sched.Folded, sched.ReadAfterFold]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
}
}
@@ -2522,11 +2523,12 @@ def X86cmpm_imm_commute : SDNodeXForm<timm, [{
multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _,
string Name> {
+let Uses = [MXCSR], mayRaiseFPException = 1 in {
defm rri : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
(outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,u8imm:$cc),
"vcmp"#_.Suffix,
"$cc, $src2, $src1", "$src1, $src2, $cc",
- (X86cmpm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
+ (X86any_cmpm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
(X86cmpm_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
1>, Sched<[sched]>;
@@ -2534,8 +2536,8 @@ multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _,
(outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
"vcmp"#_.Suffix,
"$cc, $src2, $src1", "$src1, $src2, $cc",
- (X86cmpm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
- timm:$cc),
+ (X86any_cmpm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
+ timm:$cc),
(X86cmpm_su (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
timm:$cc)>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
@@ -2546,17 +2548,18 @@ multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _,
"vcmp"#_.Suffix,
"$cc, ${src2}"#_.BroadcastStr#", $src1",
"$src1, ${src2}"#_.BroadcastStr#", $cc",
- (X86cmpm (_.VT _.RC:$src1),
- (_.VT (_.BroadcastLdFrag addr:$src2)),
- timm:$cc),
+ (X86any_cmpm (_.VT _.RC:$src1),
+ (_.VT (_.BroadcastLdFrag addr:$src2)),
+ timm:$cc),
(X86cmpm_su (_.VT _.RC:$src1),
(_.VT (_.BroadcastLdFrag addr:$src2)),
timm:$cc)>,
EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
+ }
// Patterns for selecting with loads in other operand.
- def : Pat<(X86cmpm (_.LdFrag addr:$src2), (_.VT _.RC:$src1),
- timm:$cc),
+ def : Pat<(X86any_cmpm (_.LdFrag addr:$src2), (_.VT _.RC:$src1),
+ timm:$cc),
(!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2,
(X86cmpm_imm_commute timm:$cc))>;
@@ -2567,8 +2570,8 @@ multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _,
_.RC:$src1, addr:$src2,
(X86cmpm_imm_commute timm:$cc))>;
- def : Pat<(X86cmpm (_.BroadcastLdFrag addr:$src2),
- (_.VT _.RC:$src1), timm:$cc),
+ def : Pat<(X86any_cmpm (_.BroadcastLdFrag addr:$src2),
+ (_.VT _.RC:$src1), timm:$cc),
(!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2,
(X86cmpm_imm_commute timm:$cc))>;
@@ -2582,6 +2585,7 @@ multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _,
multiclass avx512_vcmp_sae<X86FoldableSchedWrite sched, X86VectorVTInfo _> {
// comparison code form (VCMP[EQ/LT/LE/...]
+ let Uses = [MXCSR] in
defm rrib : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
(outs _.KRC:$dst),(ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
"vcmp"#_.Suffix,
@@ -2639,7 +2643,7 @@ def X86Vfpclass_su : PatFrag<(ops node:$src1, node:$src2),
multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr,
X86FoldableSchedWrite sched, X86VectorVTInfo _,
Predicate prd> {
- let Predicates = [prd], ExeDomain = _.ExeDomain in {
+ let Predicates = [prd], ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
(ins _.RC:$src1, i32u8imm:$src2),
OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
@@ -2679,7 +2683,7 @@ multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr,
multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr,
X86FoldableSchedWrite sched, X86VectorVTInfo _,
string mem>{
- let ExeDomain = _.ExeDomain in {
+ let ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
(ins _.RC:$src1, i32u8imm:$src2),
OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
@@ -3197,8 +3201,8 @@ def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
multiclass axv512_cmp_packed_cc_no_vlx_lowering<string InstStr,
X86VectorVTInfo Narrow,
X86VectorVTInfo Wide> {
-def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1),
- (Narrow.VT Narrow.RC:$src2), timm:$cc)),
+def : Pat<(Narrow.KVT (X86any_cmpm (Narrow.VT Narrow.RC:$src1),
+ (Narrow.VT Narrow.RC:$src2), timm:$cc)),
(COPY_TO_REGCLASS
(!cast<Instruction>(InstStr#"Zrri")
(Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
@@ -3215,8 +3219,8 @@ def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
timm:$cc), Narrow.KRC)>;
// Broadcast load.
-def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1),
- (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc)),
+def : Pat<(Narrow.KVT (X86any_cmpm (Narrow.VT Narrow.RC:$src1),
+ (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc)),
(COPY_TO_REGCLASS
(!cast<Instruction>(InstStr#"Zrmbi")
(Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
@@ -3231,8 +3235,8 @@ def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
addr:$src2, timm:$cc), Narrow.KRC)>;
// Commuted with broadcast load.
-def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)),
- (Narrow.VT Narrow.RC:$src1), timm:$cc)),
+def : Pat<(Narrow.KVT (X86any_cmpm (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)),
+ (Narrow.VT Narrow.RC:$src1), timm:$cc)),
(COPY_TO_REGCLASS
(!cast<Instruction>(InstStr#"Zrmbi")
(Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
@@ -3928,6 +3932,17 @@ def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
(VMOVPQIto64Zrr GR64:$dst, VR128X:$src), 0>;
+// Conversions between masks and scalar fp.
+def : Pat<(v32i1 (bitconvert FR32X:$src)),
+ (KMOVDkr (VMOVSS2DIZrr FR32X:$src))>;
+def : Pat<(f32 (bitconvert VK32:$src)),
+ (VMOVDI2SSZrr (KMOVDrk VK32:$src))>;
+
+def : Pat<(v64i1 (bitconvert FR64X:$src)),
+ (KMOVQkr (VMOVSDto64Zrr FR64X:$src))>;
+def : Pat<(f64 (bitconvert VK64:$src)),
+ (VMOV64toSDZrr (KMOVQrk VK64:$src))>;
+
//===----------------------------------------------------------------------===//
// AVX-512 MOVSS, MOVSD
//===----------------------------------------------------------------------===//
@@ -5278,7 +5293,7 @@ defm : avx512_logical_lowering_types<"VPANDN", X86andnp>;
multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
SDNode OpNode, SDNode VecNode,
X86FoldableSchedWrite sched, bit IsCommutable> {
- let ExeDomain = _.ExeDomain in {
+ let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
@@ -5312,7 +5327,7 @@ multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
SDNode VecNode, X86FoldableSchedWrite sched,
bit IsCommutable = 0> {
- let ExeDomain = _.ExeDomain in
+ let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
"$rc, $src2, $src1", "$src1, $src2, $rc",
@@ -5329,16 +5344,17 @@ multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(_.VT (VecNode _.RC:$src1, _.RC:$src2))>,
- Sched<[sched]>;
+ Sched<[sched]>, SIMD_EXC;
defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(_.VT (VecNode _.RC:$src1,
_.ScalarIntMemCPat:$src2))>,
- Sched<[sched.Folded, sched.ReadAfterFold]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
- let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
+ let isCodeGenOnly = 1, Predicates = [HasAVX512],
+ Uses = [MXCSR], mayRaiseFPException = 1 in {
def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
(ins _.FRC:$src1, _.FRC:$src2),
OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
@@ -5356,6 +5372,7 @@ multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
EVEX2VEXOverride<EVEX2VexOvrd#"rm">;
}
+ let Uses = [MXCSR] in
defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
"{sae}, $src2, $src1", "$src1, $src2, {sae}",
@@ -5391,13 +5408,13 @@ multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode,
NAME#"SD">,
XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
}
-defm VADD : avx512_binop_s_round<0x58, "vadd", fadd, X86fadds, X86faddRnds,
+defm VADD : avx512_binop_s_round<0x58, "vadd", any_fadd, X86fadds, X86faddRnds,
SchedWriteFAddSizes, 1>;
-defm VMUL : avx512_binop_s_round<0x59, "vmul", fmul, X86fmuls, X86fmulRnds,
+defm VMUL : avx512_binop_s_round<0x59, "vmul", any_fmul, X86fmuls, X86fmulRnds,
SchedWriteFMulSizes, 1>;
-defm VSUB : avx512_binop_s_round<0x5C, "vsub", fsub, X86fsubs, X86fsubRnds,
+defm VSUB : avx512_binop_s_round<0x5C, "vsub", any_fsub, X86fsubs, X86fsubRnds,
SchedWriteFAddSizes, 0>;
-defm VDIV : avx512_binop_s_round<0x5E, "vdiv", fdiv, X86fdivs, X86fdivRnds,
+defm VDIV : avx512_binop_s_round<0x5E, "vdiv", any_fdiv, X86fdivs, X86fdivRnds,
SchedWriteFDivSizes, 0>;
defm VMIN : avx512_binop_s_sae<0x5D, "vmin", X86fmin, X86fmins, X86fminSAEs,
SchedWriteFCmpSizes, 0>;
@@ -5429,27 +5446,28 @@ multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr,
}
defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc,
SchedWriteFCmp.Scl, "VMINCSS">, XS,
- EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
+ EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC;
defm VMINCSDZ : avx512_comutable_binop_s<0x5D, "vminsd", f64x_info, X86fminc,
SchedWriteFCmp.Scl, "VMINCSD">, XD,
VEX_W, EVEX_4V, VEX_LIG,
- EVEX_CD8<64, CD8VT1>;
+ EVEX_CD8<64, CD8VT1>, SIMD_EXC;
defm VMAXCSSZ : avx512_comutable_binop_s<0x5F, "vmaxss", f32x_info, X86fmaxc,
SchedWriteFCmp.Scl, "VMAXCSS">, XS,
- EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
+ EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC;
defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc,
SchedWriteFCmp.Scl, "VMAXCSD">, XD,
VEX_W, EVEX_4V, VEX_LIG,
- EVEX_CD8<64, CD8VT1>;
+ EVEX_CD8<64, CD8VT1>, SIMD_EXC;
multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
X86VectorVTInfo _, X86FoldableSchedWrite sched,
bit IsCommutable,
bit IsKCommutable = IsCommutable> {
- let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
+ let ExeDomain = _.ExeDomain, hasSideEffects = 0,
+ Uses = [MXCSR], mayRaiseFPException = 1 in {
defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
"$src2, $src1", "$src1, $src2",
@@ -5476,7 +5494,7 @@ multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpN
multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr,
SDPatternOperator OpNodeRnd,
X86FoldableSchedWrite sched, X86VectorVTInfo _> {
- let ExeDomain = _.ExeDomain in
+ let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr##_.Suffix,
"$rc, $src2, $src1", "$src1, $src2, $rc",
@@ -5487,7 +5505,7 @@ multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr,
multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr,
SDPatternOperator OpNodeSAE,
X86FoldableSchedWrite sched, X86VectorVTInfo _> {
- let ExeDomain = _.ExeDomain in
+ let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
"{sae}, $src2, $src1", "$src1, $src2, {sae}",
@@ -5526,6 +5544,7 @@ multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator Op
}
}
+let Uses = [MXCSR] in
multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
X86SchedWriteSizes sched> {
defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
@@ -5536,6 +5555,7 @@ multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeR
EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
}
+let Uses = [MXCSR] in
multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
X86SchedWriteSizes sched> {
defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
@@ -5546,16 +5566,16 @@ multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd
EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
}
-defm VADD : avx512_fp_binop_p<0x58, "vadd", fadd, HasAVX512,
+defm VADD : avx512_fp_binop_p<0x58, "vadd", any_fadd, HasAVX512,
SchedWriteFAddSizes, 1>,
avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, SchedWriteFAddSizes>;
-defm VMUL : avx512_fp_binop_p<0x59, "vmul", fmul, HasAVX512,
+defm VMUL : avx512_fp_binop_p<0x59, "vmul", any_fmul, HasAVX512,
SchedWriteFMulSizes, 1>,
avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SchedWriteFMulSizes>;
-defm VSUB : avx512_fp_binop_p<0x5C, "vsub", fsub, HasAVX512,
+defm VSUB : avx512_fp_binop_p<0x5C, "vsub", any_fsub, HasAVX512,
SchedWriteFAddSizes>,
avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SchedWriteFAddSizes>;
-defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", fdiv, HasAVX512,
+defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", any_fdiv, HasAVX512,
SchedWriteFDivSizes>,
avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SchedWriteFDivSizes>;
defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, HasAVX512,
@@ -5570,6 +5590,7 @@ let isCodeGenOnly = 1 in {
defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, HasAVX512,
SchedWriteFCmpSizes, 1>;
}
+let Uses = []<Register>, mayRaiseFPException = 0 in {
defm VAND : avx512_fp_binop_p<0x54, "vand", null_frag, HasDQI,
SchedWriteFLogicSizes, 1>;
defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, HasDQI,
@@ -5578,10 +5599,11 @@ defm VOR : avx512_fp_binop_p<0x56, "vor", null_frag, HasDQI,
SchedWriteFLogicSizes, 1>;
defm VXOR : avx512_fp_binop_p<0x57, "vxor", null_frag, HasDQI,
SchedWriteFLogicSizes, 1>;
+}
multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86FoldableSchedWrite sched, X86VectorVTInfo _> {
- let ExeDomain = _.ExeDomain in {
+ let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
"$src2, $src1", "$src1, $src2",
@@ -5603,7 +5625,7 @@ multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86FoldableSchedWrite sched, X86VectorVTInfo _> {
- let ExeDomain = _.ExeDomain in {
+ let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
defm rr: AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
"$src2, $src1", "$src1, $src2",
@@ -6399,7 +6421,8 @@ let Predicates = [HasAVX512] in {
multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86FoldableSchedWrite sched,
X86VectorVTInfo _, string Suff> {
- let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
+ let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
+ Uses = [MXCSR], mayRaiseFPException = 1 in {
defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
@@ -6425,7 +6448,8 @@ multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86FoldableSchedWrite sched,
X86VectorVTInfo _, string Suff> {
- let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in
+ let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
+ Uses = [MXCSR] in
defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
@@ -6462,7 +6486,7 @@ multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
VEX_W;
}
-defm VFMADD213 : avx512_fma3p_213_f<0xA8, "vfmadd213", X86Fmadd, X86FmaddRnd>;
+defm VFMADD213 : avx512_fma3p_213_f<0xA8, "vfmadd213", X86any_Fmadd, X86FmaddRnd>;
defm VFMSUB213 : avx512_fma3p_213_f<0xAA, "vfmsub213", X86Fmsub, X86FmsubRnd>;
defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub, X86FmaddsubRnd>;
defm VFMSUBADD213 : avx512_fma3p_213_f<0xA7, "vfmsubadd213", X86Fmsubadd, X86FmsubaddRnd>;
@@ -6473,7 +6497,8 @@ defm VFNMSUB213 : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86Fnmsub, X86FnmsubR
multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86FoldableSchedWrite sched,
X86VectorVTInfo _, string Suff> {
- let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
+ let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
+ Uses = [MXCSR], mayRaiseFPException = 1 in {
defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
@@ -6500,7 +6525,8 @@ multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86FoldableSchedWrite sched,
X86VectorVTInfo _, string Suff> {
- let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in
+ let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
+ Uses = [MXCSR] in
defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
@@ -6538,7 +6564,7 @@ multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
VEX_W;
}
-defm VFMADD231 : avx512_fma3p_231_f<0xB8, "vfmadd231", X86Fmadd, X86FmaddRnd>;
+defm VFMADD231 : avx512_fma3p_231_f<0xB8, "vfmadd231", X86any_Fmadd, X86FmaddRnd>;
defm VFMSUB231 : avx512_fma3p_231_f<0xBA, "vfmsub231", X86Fmsub, X86FmsubRnd>;
defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub, X86FmaddsubRnd>;
defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd, X86FmsubaddRnd>;
@@ -6548,7 +6574,8 @@ defm VFNMSUB231 : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86Fnmsub, X86FnmsubR
multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86FoldableSchedWrite sched,
X86VectorVTInfo _, string Suff> {
- let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
+ let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
+ Uses = [MXCSR], mayRaiseFPException = 1 in {
defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
@@ -6578,7 +6605,8 @@ multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
multiclass avx512_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86FoldableSchedWrite sched,
X86VectorVTInfo _, string Suff> {
- let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in
+ let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
+ Uses = [MXCSR] in
defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
@@ -6616,7 +6644,7 @@ multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
VEX_W;
}
-defm VFMADD132 : avx512_fma3p_132_f<0x98, "vfmadd132", X86Fmadd, X86FmaddRnd>;
+defm VFMADD132 : avx512_fma3p_132_f<0x98, "vfmadd132", X86any_Fmadd, X86FmaddRnd>;
defm VFMSUB132 : avx512_fma3p_132_f<0x9A, "vfmsub132", X86Fmsub, X86FmsubRnd>;
defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub, X86FmaddsubRnd>;
defm VFMSUBADD132 : avx512_fma3p_132_f<0x97, "vfmsubadd132", X86Fmsubadd, X86FmsubaddRnd>;
@@ -6630,14 +6658,15 @@ let Constraints = "$src1 = $dst", hasSideEffects = 0 in {
defm r_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3), OpcodeStr,
"$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
- AVX512FMA3Base, Sched<[SchedWriteFMA.Scl]>;
+ AVX512FMA3Base, Sched<[SchedWriteFMA.Scl]>, SIMD_EXC;
let mayLoad = 1 in
defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.IntScalarMemOp:$src3), OpcodeStr,
"$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
- AVX512FMA3Base, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>;
+ AVX512FMA3Base, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>, SIMD_EXC;
+ let Uses = [MXCSR] in
defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", (null_frag), 1, 1>,
@@ -6648,13 +6677,14 @@ let Constraints = "$src1 = $dst", hasSideEffects = 0 in {
(ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- !if(MaskOnlyReg, [], [RHS_r])>, Sched<[SchedWriteFMA.Scl]>;
+ !if(MaskOnlyReg, [], [RHS_r])>, Sched<[SchedWriteFMA.Scl]>, SIMD_EXC;
def m : AVX512FMA3S<opc, MRMSrcMem, (outs _.FRC:$dst),
(ins _.FRC:$src1, _.FRC:$src2, _.ScalarMemOp:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [RHS_m]>, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>;
+ [RHS_m]>, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>, SIMD_EXC;
+ let Uses = [MXCSR] in
def rb : AVX512FMA3S<opc, MRMSrcReg, (outs _.FRC:$dst),
(ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3, AVX512RC:$rc),
!strconcat(OpcodeStr,
@@ -6711,7 +6741,7 @@ multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132,
}
}
-defm VFMADD : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", X86Fmadd, X86FmaddRnd>;
+defm VFMADD : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", X86any_Fmadd, X86FmaddRnd>;
defm VFMSUB : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86Fmsub, X86FmsubRnd>;
defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86Fnmadd, X86FnmaddRnd>;
defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86Fnmsub, X86FnmsubRnd>;
@@ -6918,7 +6948,7 @@ multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode RndOp, string Prefix,
}
}
-defm : avx512_scalar_fma_patterns<X86Fmadd, X86FmaddRnd, "VFMADD", "SS",
+defm : avx512_scalar_fma_patterns<X86any_Fmadd, X86FmaddRnd, "VFMADD", "SS",
X86Movss, v4f32x_info, fp32imm0>;
defm : avx512_scalar_fma_patterns<X86Fmsub, X86FmsubRnd, "VFMSUB", "SS",
X86Movss, v4f32x_info, fp32imm0>;
@@ -6927,7 +6957,7 @@ defm : avx512_scalar_fma_patterns<X86Fnmadd, X86FnmaddRnd, "VFNMADD", "SS",
defm : avx512_scalar_fma_patterns<X86Fnmsub, X86FnmsubRnd, "VFNMSUB", "SS",
X86Movss, v4f32x_info, fp32imm0>;
-defm : avx512_scalar_fma_patterns<X86Fmadd, X86FmaddRnd, "VFMADD", "SD",
+defm : avx512_scalar_fma_patterns<X86any_Fmadd, X86FmaddRnd, "VFMADD", "SD",
X86Movsd, v2f64x_info, fp64imm0>;
defm : avx512_scalar_fma_patterns<X86Fmsub, X86FmsubRnd, "VFMSUB", "SD",
X86Movsd, v2f64x_info, fp64imm0>;
@@ -6997,7 +7027,10 @@ defm VPMADD52HUQ : avx512_pmadd52_common<0xb5, "vpmadd52huq", x86vpmadd52h,
multiclass avx512_vcvtsi<bits<8> opc, SDPatternOperator OpNode, X86FoldableSchedWrite sched,
RegisterClass SrcRC, X86VectorVTInfo DstVT,
X86MemOperand x86memop, PatFrag ld_frag, string asm,
- string mem> {
+ string mem, list<Register> _Uses = [MXCSR],
+ bit _mayRaiseFPException = 1> {
+let ExeDomain = DstVT.ExeDomain, Uses = _Uses,
+ mayRaiseFPException = _mayRaiseFPException in {
let hasSideEffects = 0, isCodeGenOnly = 1 in {
def rr : SI<opc, MRMSrcReg, (outs DstVT.FRC:$dst),
(ins DstVT.FRC:$src1, SrcRC:$src),
@@ -7023,6 +7056,7 @@ multiclass avx512_vcvtsi<bits<8> opc, SDPatternOperator OpNode, X86FoldableSched
(OpNode (DstVT.VT DstVT.RC:$src1),
(ld_frag addr:$src2)))]>,
EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
+}
def : InstAlias<"v"#asm#mem#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
(!cast<Instruction>(NAME#"rr_Int") DstVT.RC:$dst,
DstVT.RC:$src1, SrcRC:$src2), 0, "att">;
@@ -7032,6 +7066,7 @@ multiclass avx512_vcvtsi_round<bits<8> opc, SDNode OpNode,
X86FoldableSchedWrite sched, RegisterClass SrcRC,
X86VectorVTInfo DstVT, string asm,
string mem> {
+ let ExeDomain = DstVT.ExeDomain, Uses = [MXCSR] in
def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
(ins DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc),
!strconcat(asm,
@@ -7066,7 +7101,7 @@ defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
v4f32x_info, i64mem, loadi64, "cvtsi2ss", "q">,
XS, VEX_W, EVEX_CD8<64, CD8VT1>;
defm VCVTSI2SDZ : avx512_vcvtsi<0x2A, null_frag, WriteCvtI2SD, GR32,
- v2f64x_info, i32mem, loadi32, "cvtsi2sd", "l">,
+ v2f64x_info, i32mem, loadi32, "cvtsi2sd", "l", [], 0>,
XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
WriteCvtI2SD, GR64,
@@ -7078,22 +7113,22 @@ def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
(VCVTSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
-def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))),
+def : Pat<(f32 (any_sint_to_fp (loadi32 addr:$src))),
(VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
-def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))),
+def : Pat<(f32 (any_sint_to_fp (loadi64 addr:$src))),
(VCVTSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
-def : Pat<(f64 (sint_to_fp (loadi32 addr:$src))),
+def : Pat<(f64 (any_sint_to_fp (loadi32 addr:$src))),
(VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
-def : Pat<(f64 (sint_to_fp (loadi64 addr:$src))),
+def : Pat<(f64 (any_sint_to_fp (loadi64 addr:$src))),
(VCVTSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
-def : Pat<(f32 (sint_to_fp GR32:$src)),
+def : Pat<(f32 (any_sint_to_fp GR32:$src)),
(VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
-def : Pat<(f32 (sint_to_fp GR64:$src)),
+def : Pat<(f32 (any_sint_to_fp GR64:$src)),
(VCVTSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
-def : Pat<(f64 (sint_to_fp GR32:$src)),
+def : Pat<(f64 (any_sint_to_fp GR32:$src)),
(VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
-def : Pat<(f64 (sint_to_fp GR64:$src)),
+def : Pat<(f64 (any_sint_to_fp GR64:$src)),
(VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
defm VCVTUSI2SSZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
@@ -7105,7 +7140,7 @@ defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
v4f32x_info, i64mem, loadi64, "cvtusi2ss", "q">,
XS, VEX_W, EVEX_CD8<64, CD8VT1>;
defm VCVTUSI2SDZ : avx512_vcvtsi<0x7B, null_frag, WriteCvtI2SD, GR32, v2f64x_info,
- i32mem, loadi32, "cvtusi2sd", "l">,
+ i32mem, loadi32, "cvtusi2sd", "l", [], 0>,
XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
WriteCvtI2SD, GR64,
@@ -7117,22 +7152,22 @@ def : InstAlias<"vcvtusi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
def : InstAlias<"vcvtusi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
(VCVTUSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
-def : Pat<(f32 (uint_to_fp (loadi32 addr:$src))),
+def : Pat<(f32 (any_uint_to_fp (loadi32 addr:$src))),
(VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
-def : Pat<(f32 (uint_to_fp (loadi64 addr:$src))),
+def : Pat<(f32 (any_uint_to_fp (loadi64 addr:$src))),
(VCVTUSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
-def : Pat<(f64 (uint_to_fp (loadi32 addr:$src))),
+def : Pat<(f64 (any_uint_to_fp (loadi32 addr:$src))),
(VCVTUSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
-def : Pat<(f64 (uint_to_fp (loadi64 addr:$src))),
+def : Pat<(f64 (any_uint_to_fp (loadi64 addr:$src))),
(VCVTUSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
-def : Pat<(f32 (uint_to_fp GR32:$src)),
+def : Pat<(f32 (any_uint_to_fp GR32:$src)),
(VCVTUSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
-def : Pat<(f32 (uint_to_fp GR64:$src)),
+def : Pat<(f32 (any_uint_to_fp GR64:$src)),
(VCVTUSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
-def : Pat<(f64 (uint_to_fp GR32:$src)),
+def : Pat<(f64 (any_uint_to_fp GR32:$src)),
(VCVTUSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
-def : Pat<(f64 (uint_to_fp GR64:$src)),
+def : Pat<(f64 (any_uint_to_fp GR64:$src)),
(VCVTUSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
}
@@ -7145,11 +7180,12 @@ multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT,
SDNode OpNodeRnd,
X86FoldableSchedWrite sched, string asm,
string aliasStr> {
- let Predicates = [HasAVX512] in {
+ let Predicates = [HasAVX512], ExeDomain = SrcVT.ExeDomain in {
def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src),
!strconcat(asm,"\t{$src, $dst|$dst, $src}"),
[(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src)))]>,
- EVEX, VEX_LIG, Sched<[sched]>;
+ EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
+ let Uses = [MXCSR] in
def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src, AVX512RC:$rc),
!strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"),
[(set DstVT.RC:$dst, (OpNodeRnd (SrcVT.VT SrcVT.RC:$src),(i32 timm:$rc)))]>,
@@ -7159,7 +7195,7 @@ multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT,
!strconcat(asm,"\t{$src, $dst|$dst, $src}"),
[(set DstVT.RC:$dst, (OpNode
(SrcVT.VT SrcVT.ScalarIntMemCPat:$src)))]>,
- EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>;
+ EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
} // Predicates = [HasAVX512]
def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
@@ -7202,82 +7238,82 @@ defm VCVTSD2USI64Z: avx512_cvt_s_int_round<0x79, f64x_info, i64x_info, X86cvts2u
let Predicates = [HasAVX512] in {
def : Pat<(v4f32 (X86Movss
(v4f32 VR128X:$dst),
- (v4f32 (scalar_to_vector (f32 (sint_to_fp GR64:$src)))))),
+ (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR64:$src)))))),
(VCVTSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
def : Pat<(v4f32 (X86Movss
(v4f32 VR128X:$dst),
- (v4f32 (scalar_to_vector (f32 (sint_to_fp (loadi64 addr:$src))))))),
+ (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi64 addr:$src))))))),
(VCVTSI642SSZrm_Int VR128X:$dst, addr:$src)>;
def : Pat<(v4f32 (X86Movss
(v4f32 VR128X:$dst),
- (v4f32 (scalar_to_vector (f32 (sint_to_fp GR32:$src)))))),
+ (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR32:$src)))))),
(VCVTSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
def : Pat<(v4f32 (X86Movss
(v4f32 VR128X:$dst),
- (v4f32 (scalar_to_vector (f32 (sint_to_fp (loadi32 addr:$src))))))),
+ (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi32 addr:$src))))))),
(VCVTSI2SSZrm_Int VR128X:$dst, addr:$src)>;
def : Pat<(v2f64 (X86Movsd
(v2f64 VR128X:$dst),
- (v2f64 (scalar_to_vector (f64 (sint_to_fp GR64:$src)))))),
+ (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR64:$src)))))),
(VCVTSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
def : Pat<(v2f64 (X86Movsd
(v2f64 VR128X:$dst),
- (v2f64 (scalar_to_vector (f64 (sint_to_fp (loadi64 addr:$src))))))),
+ (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi64 addr:$src))))))),
(VCVTSI642SDZrm_Int VR128X:$dst, addr:$src)>;
def : Pat<(v2f64 (X86Movsd
(v2f64 VR128X:$dst),
- (v2f64 (scalar_to_vector (f64 (sint_to_fp GR32:$src)))))),
+ (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR32:$src)))))),
(VCVTSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
def : Pat<(v2f64 (X86Movsd
(v2f64 VR128X:$dst),
- (v2f64 (scalar_to_vector (f64 (sint_to_fp (loadi32 addr:$src))))))),
+ (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi32 addr:$src))))))),
(VCVTSI2SDZrm_Int VR128X:$dst, addr:$src)>;
def : Pat<(v4f32 (X86Movss
(v4f32 VR128X:$dst),
- (v4f32 (scalar_to_vector (f32 (uint_to_fp GR64:$src)))))),
+ (v4f32 (scalar_to_vector (f32 (any_uint_to_fp GR64:$src)))))),
(VCVTUSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
def : Pat<(v4f32 (X86Movss
(v4f32 VR128X:$dst),
- (v4f32 (scalar_to_vector (f32 (uint_to_fp (loadi64 addr:$src))))))),
+ (v4f32 (scalar_to_vector (f32 (any_uint_to_fp (loadi64 addr:$src))))))),
(VCVTUSI642SSZrm_Int VR128X:$dst, addr:$src)>;
def : Pat<(v4f32 (X86Movss
(v4f32 VR128X:$dst),
- (v4f32 (scalar_to_vector (f32 (uint_to_fp GR32:$src)))))),
+ (v4f32 (scalar_to_vector (f32 (any_uint_to_fp GR32:$src)))))),
(VCVTUSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
def : Pat<(v4f32 (X86Movss
(v4f32 VR128X:$dst),
- (v4f32 (scalar_to_vector (f32 (uint_to_fp (loadi32 addr:$src))))))),
+ (v4f32 (scalar_to_vector (f32 (any_uint_to_fp (loadi32 addr:$src))))))),
(VCVTUSI2SSZrm_Int VR128X:$dst, addr:$src)>;
def : Pat<(v2f64 (X86Movsd
(v2f64 VR128X:$dst),
- (v2f64 (scalar_to_vector (f64 (uint_to_fp GR64:$src)))))),
+ (v2f64 (scalar_to_vector (f64 (any_uint_to_fp GR64:$src)))))),
(VCVTUSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
def : Pat<(v2f64 (X86Movsd
(v2f64 VR128X:$dst),
- (v2f64 (scalar_to_vector (f64 (uint_to_fp (loadi64 addr:$src))))))),
+ (v2f64 (scalar_to_vector (f64 (any_uint_to_fp (loadi64 addr:$src))))))),
(VCVTUSI642SDZrm_Int VR128X:$dst, addr:$src)>;
def : Pat<(v2f64 (X86Movsd
(v2f64 VR128X:$dst),
- (v2f64 (scalar_to_vector (f64 (uint_to_fp GR32:$src)))))),
+ (v2f64 (scalar_to_vector (f64 (any_uint_to_fp GR32:$src)))))),
(VCVTUSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
def : Pat<(v2f64 (X86Movsd
(v2f64 VR128X:$dst),
- (v2f64 (scalar_to_vector (f64 (uint_to_fp (loadi32 addr:$src))))))),
+ (v2f64 (scalar_to_vector (f64 (any_uint_to_fp (loadi32 addr:$src))))))),
(VCVTUSI2SDZrm_Int VR128X:$dst, addr:$src)>;
} // Predicates = [HasAVX512]
@@ -7286,22 +7322,23 @@ multiclass avx512_cvt_s_all<bits<8> opc, string asm, X86VectorVTInfo _SrcRC,
X86VectorVTInfo _DstRC, SDNode OpNode,
SDNode OpNodeInt, SDNode OpNodeSAE,
X86FoldableSchedWrite sched, string aliasStr>{
-let Predicates = [HasAVX512] in {
+let Predicates = [HasAVX512], ExeDomain = _SrcRC.ExeDomain in {
let isCodeGenOnly = 1 in {
def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
!strconcat(asm,"\t{$src, $dst|$dst, $src}"),
[(set _DstRC.RC:$dst, (OpNode _SrcRC.FRC:$src))]>,
- EVEX, VEX_LIG, Sched<[sched]>;
+ EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
def rm : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.ScalarMemOp:$src),
!strconcat(asm,"\t{$src, $dst|$dst, $src}"),
[(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src)))]>,
- EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>;
+ EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
}
def rr_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
!strconcat(asm,"\t{$src, $dst|$dst, $src}"),
[(set _DstRC.RC:$dst, (OpNodeInt (_SrcRC.VT _SrcRC.RC:$src)))]>,
- EVEX, VEX_LIG, Sched<[sched]>;
+ EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
+ let Uses = [MXCSR] in
def rrb_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
!strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"),
[(set _DstRC.RC:$dst, (OpNodeSAE (_SrcRC.VT _SrcRC.RC:$src)))]>,
@@ -7311,7 +7348,7 @@ let Predicates = [HasAVX512] in {
!strconcat(asm,"\t{$src, $dst|$dst, $src}"),
[(set _DstRC.RC:$dst,
(OpNodeInt (_SrcRC.VT _SrcRC.ScalarIntMemCPat:$src)))]>,
- EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>;
+ EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
} //HasAVX512
def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
@@ -7324,35 +7361,36 @@ let Predicates = [HasAVX512] in {
}
defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i32x_info,
- fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
+ any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
"{l}">, XS, EVEX_CD8<32, CD8VT1>;
defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i64x_info,
- fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
+ any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
"{q}">, VEX_W, XS, EVEX_CD8<32, CD8VT1>;
defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i32x_info,
- fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I,
+ any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I,
"{l}">, XD, EVEX_CD8<64, CD8VT1>;
defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i64x_info,
- fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I,
+ any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I,
"{q}">, VEX_W, XD, EVEX_CD8<64, CD8VT1>;
defm VCVTTSS2USIZ: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i32x_info,
- fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
+ any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
"{l}">, XS, EVEX_CD8<32, CD8VT1>;
defm VCVTTSS2USI64Z: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i64x_info,
- fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
+ any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
"{q}">, XS,VEX_W, EVEX_CD8<32, CD8VT1>;
defm VCVTTSD2USIZ: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i32x_info,
- fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I,
+ any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I,
"{l}">, XD, EVEX_CD8<64, CD8VT1>;
defm VCVTTSD2USI64Z: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i64x_info,
- fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I,
+ any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I,
"{q}">, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
//===----------------------------------------------------------------------===//
// AVX-512 Convert form float to double and back
//===----------------------------------------------------------------------===//
+let Uses = [MXCSR], mayRaiseFPException = 1 in
multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
X86VectorVTInfo _Src, SDNode OpNode,
X86FoldableSchedWrite sched> {
@@ -7387,6 +7425,7 @@ multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _
multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
X86VectorVTInfo _Src, SDNode OpNodeSAE,
X86FoldableSchedWrite sched> {
+ let Uses = [MXCSR] in
defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
"{sae}, $src2, $src1", "$src1, $src2, {sae}",
@@ -7399,6 +7438,7 @@ multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTIn
multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
X86VectorVTInfo _Src, SDNode OpNodeRnd,
X86FoldableSchedWrite sched> {
+ let Uses = [MXCSR] in
defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr,
"$rc, $src2, $src1", "$src1, $src2, $rc",
@@ -7435,28 +7475,28 @@ defm VCVTSS2SD : avx512_cvt_fp_scalar_ss2sd<0x5A, "vcvtss2sd", X86fpexts,
X86fpextsSAE, WriteCvtSS2SD, f32x_info,
f64x_info>;
-def : Pat<(f64 (fpextend FR32X:$src)),
+def : Pat<(f64 (any_fpextend FR32X:$src)),
(VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), FR32X:$src)>,
Requires<[HasAVX512]>;
-def : Pat<(f64 (fpextend (loadf32 addr:$src))),
+def : Pat<(f64 (any_fpextend (loadf32 addr:$src))),
(VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
Requires<[HasAVX512, OptForSize]>;
-def : Pat<(f32 (fpround FR64X:$src)),
+def : Pat<(f32 (any_fpround FR64X:$src)),
(VCVTSD2SSZrr (f32 (IMPLICIT_DEF)), FR64X:$src)>,
Requires<[HasAVX512]>;
def : Pat<(v4f32 (X86Movss
(v4f32 VR128X:$dst),
(v4f32 (scalar_to_vector
- (f32 (fpround (f64 (extractelt VR128X:$src, (iPTR 0))))))))),
+ (f32 (any_fpround (f64 (extractelt VR128X:$src, (iPTR 0))))))))),
(VCVTSD2SSZrr_Int VR128X:$dst, VR128X:$src)>,
Requires<[HasAVX512]>;
def : Pat<(v2f64 (X86Movsd
(v2f64 VR128X:$dst),
(v2f64 (scalar_to_vector
- (f64 (fpextend (f32 (extractelt VR128X:$src, (iPTR 0))))))))),
+ (f64 (any_fpextend (f32 (extractelt VR128X:$src, (iPTR 0))))))))),
(VCVTSS2SDZrr_Int VR128X:$dst, VR128X:$src)>,
Requires<[HasAVX512]>;
@@ -7472,7 +7512,7 @@ multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
string Alias = "", X86MemOperand MemOp = _Src.MemOp,
RegisterClass MaskRC = _.KRCWM,
dag LdDAG = (_.VT (OpNode (_Src.VT (_Src.LdFrag addr:$src))))> {
-
+let Uses = [MXCSR], mayRaiseFPException = 1 in {
defm rr : AVX512_maskable_common<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _Src.RC:$src),
(ins _.RC:$src0, MaskRC:$mask, _Src.RC:$src),
@@ -7512,11 +7552,13 @@ multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
_.RC:$src0),
vselect, "$src0 = $dst">,
EVEX, EVEX_B, Sched<[sched.Folded]>;
+ }
}
// Coversion with SAE - suppress all exceptions
multiclass avx512_vcvt_fp_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
X86VectorVTInfo _Src, SDNode OpNodeSAE,
X86FoldableSchedWrite sched> {
+ let Uses = [MXCSR] in
defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _Src.RC:$src), OpcodeStr,
"{sae}, $src", "$src, {sae}",
@@ -7528,6 +7570,7 @@ multiclass avx512_vcvt_fp_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
multiclass avx512_vcvt_fp_rc<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
X86VectorVTInfo _Src, SDNode OpNodeRnd,
X86FoldableSchedWrite sched> {
+ let Uses = [MXCSR] in
defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _Src.RC:$src, AVX512RC:$rc), OpcodeStr,
"$rc, $src", "$src, $rc",
@@ -7551,14 +7594,14 @@ multiclass avx512_cvtps2pd<bits<8> opc, string OpcodeStr,
X86SchedWriteWidths sched> {
let Predicates = [HasAVX512] in {
defm Z : avx512_vcvt_fpextend<opc, OpcodeStr, v8f64_info, v8f32x_info,
- fpextend, sched.ZMM>,
+ any_fpextend, sched.ZMM>,
avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f32x_info,
X86vfpextSAE, sched.ZMM>, EVEX_V512;
}
let Predicates = [HasVLX] in {
defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, v2f64x_info, v4f32x_info,
- X86vfpext, sched.XMM, "{1to2}", "", f64mem>, EVEX_V128;
- defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, v4f64x_info, v4f32x_info, fpextend,
+ X86any_vfpext, sched.XMM, "{1to2}", "", f64mem>, EVEX_V128;
+ defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, v4f64x_info, v4f32x_info, any_fpextend,
sched.YMM>, EVEX_V256;
}
}
@@ -7566,7 +7609,7 @@ multiclass avx512_cvtps2pd<bits<8> opc, string OpcodeStr,
// Truncate Double to Float
multiclass avx512_cvtpd2ps<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched> {
let Predicates = [HasAVX512] in {
- defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8f64_info, X86vfpround, sched.ZMM>,
+ defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8f64_info, X86any_vfpround, sched.ZMM>,
avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8f64_info,
X86vfproundRnd, sched.ZMM>, EVEX_V512;
}
@@ -7574,7 +7617,7 @@ multiclass avx512_cvtpd2ps<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sc
defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2f64x_info,
null_frag, sched.XMM, "{1to2}", "{x}", f128mem, VK2WM>,
EVEX_V128;
- defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4f64x_info, X86vfpround,
+ defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4f64x_info, X86any_vfpround,
sched.YMM, "{1to4}", "{y}">, EVEX_V256;
}
@@ -7624,70 +7667,10 @@ defm VCVTPD2PS : avx512_cvtpd2ps<0x5A, "vcvtpd2ps", SchedWriteCvtPD2PS>,
defm VCVTPS2PD : avx512_cvtps2pd<0x5A, "vcvtps2pd", SchedWriteCvtPS2PD>,
PS, EVEX_CD8<32, CD8VH>;
-let Predicates = [HasAVX512] in {
- def : Pat<(v8f32 (fpround (v8f64 VR512:$src))),
- (VCVTPD2PSZrr VR512:$src)>;
- def : Pat<(vselect VK8WM:$mask, (v8f32 (fpround (v8f64 VR512:$src))),
- VR256X:$src0),
- (VCVTPD2PSZrrk VR256X:$src0, VK8WM:$mask, VR512:$src)>;
- def : Pat<(vselect VK8WM:$mask, (v8f32 (fpround (v8f64 VR512:$src))),
- v8f32x_info.ImmAllZerosV),
- (VCVTPD2PSZrrkz VK8WM:$mask, VR512:$src)>;
-
- def : Pat<(v8f32 (fpround (loadv8f64 addr:$src))),
- (VCVTPD2PSZrm addr:$src)>;
- def : Pat<(vselect VK8WM:$mask, (v8f32 (fpround (loadv8f64 addr:$src))),
- VR256X:$src0),
- (VCVTPD2PSZrmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
- def : Pat<(vselect VK8WM:$mask, (v8f32 (fpround (loadv8f64 addr:$src))),
- v8f32x_info.ImmAllZerosV),
- (VCVTPD2PSZrmkz VK8WM:$mask, addr:$src)>;
-
- def : Pat<(v8f32 (fpround (v8f64 (X86VBroadcastld64 addr:$src)))),
- (VCVTPD2PSZrmb addr:$src)>;
- def : Pat<(vselect VK8WM:$mask,
- (fpround (v8f64 (X86VBroadcastld64 addr:$src))),
- (v8f32 VR256X:$src0)),
- (VCVTPD2PSZrmbk VR256X:$src0, VK8WM:$mask, addr:$src)>;
- def : Pat<(vselect VK8WM:$mask,
- (fpround (v8f64 (X86VBroadcastld64 addr:$src))),
- v8f32x_info.ImmAllZerosV),
- (VCVTPD2PSZrmbkz VK8WM:$mask, addr:$src)>;
-}
-
let Predicates = [HasVLX] in {
- def : Pat<(v4f32 (fpround (v4f64 VR256X:$src))),
- (VCVTPD2PSZ256rr VR256X:$src)>;
- def : Pat<(vselect VK4WM:$mask, (v4f32 (fpround (v4f64 VR256X:$src))),
- VR128X:$src0),
- (VCVTPD2PSZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>;
- def : Pat<(vselect VK4WM:$mask, (v4f32 (fpround (v4f64 VR256X:$src))),
- v4f32x_info.ImmAllZerosV),
- (VCVTPD2PSZ256rrkz VK4WM:$mask, VR256X:$src)>;
-
- def : Pat<(v4f32 (fpround (loadv4f64 addr:$src))),
- (VCVTPD2PSZ256rm addr:$src)>;
- def : Pat<(vselect VK4WM:$mask, (v4f32 (fpround (loadv4f64 addr:$src))),
- VR128X:$src0),
- (VCVTPD2PSZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
- def : Pat<(vselect VK4WM:$mask, (v4f32 (fpround (loadv4f64 addr:$src))),
- v4f32x_info.ImmAllZerosV),
- (VCVTPD2PSZ256rmkz VK4WM:$mask, addr:$src)>;
-
- def : Pat<(v4f32 (fpround (v4f64 (X86VBroadcastld64 addr:$src)))),
- (VCVTPD2PSZ256rmb addr:$src)>;
- def : Pat<(vselect VK4WM:$mask,
- (v4f32 (fpround (v4f64 (X86VBroadcastld64 addr:$src)))),
- VR128X:$src0),
- (VCVTPD2PSZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
- def : Pat<(vselect VK4WM:$mask,
- (v4f32 (fpround (v4f64 (X86VBroadcastld64 addr:$src)))),
- v4f32x_info.ImmAllZerosV),
- (VCVTPD2PSZ256rmbkz VK4WM:$mask, addr:$src)>;
-
// Special patterns to allow use of X86vmfpround for masking. Instruction
// patterns have been disabled with null_frag.
- def : Pat<(X86vfpround (v2f64 VR128X:$src)),
+ def : Pat<(X86any_vfpround (v2f64 VR128X:$src)),
(VCVTPD2PSZ128rr VR128X:$src)>;
def : Pat<(X86vmfpround (v2f64 VR128X:$src), (v4f32 VR128X:$src0),
VK2WM:$mask),
@@ -7696,7 +7679,7 @@ let Predicates = [HasVLX] in {
VK2WM:$mask),
(VCVTPD2PSZ128rrkz VK2WM:$mask, VR128X:$src)>;
- def : Pat<(X86vfpround (loadv2f64 addr:$src)),
+ def : Pat<(X86any_vfpround (loadv2f64 addr:$src)),
(VCVTPD2PSZ128rm addr:$src)>;
def : Pat<(X86vmfpround (loadv2f64 addr:$src), (v4f32 VR128X:$src0),
VK2WM:$mask),
@@ -7705,7 +7688,7 @@ let Predicates = [HasVLX] in {
VK2WM:$mask),
(VCVTPD2PSZ128rmkz VK2WM:$mask, addr:$src)>;
- def : Pat<(X86vfpround (v2f64 (X86VBroadcastld64 addr:$src))),
+ def : Pat<(X86any_vfpround (v2f64 (X86VBroadcastld64 addr:$src))),
(VCVTPD2PSZ128rmb addr:$src)>;
def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)),
(v4f32 VR128X:$src0), VK2WM:$mask),
@@ -7716,6 +7699,7 @@ let Predicates = [HasVLX] in {
}
// Convert Signed/Unsigned Doubleword to Double
+let Uses = []<Register>, mayRaiseFPException = 0 in
multiclass avx512_cvtdq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode,
SDNode OpNode128, X86SchedWriteWidths sched> {
// No rounding in this op
@@ -8075,34 +8059,34 @@ multiclass avx512_cvtqq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode,
VK4WM:$mask, i64mem:$src), 0, "att">;
}
-defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", sint_to_fp, X86VSintToFP,
+defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", any_sint_to_fp, X86any_VSintToFP,
SchedWriteCvtDQ2PD>, XS, EVEX_CD8<32, CD8VH>;
-defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", sint_to_fp,
+defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", any_sint_to_fp,
X86VSintToFpRnd, SchedWriteCvtDQ2PS>,
PS, EVEX_CD8<32, CD8VF>;
-defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", X86cvttp2si,
+defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", X86any_cvttp2si,
X86cvttp2siSAE, SchedWriteCvtPS2DQ>,
XS, EVEX_CD8<32, CD8VF>;
-defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", X86cvttp2si,
+defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", X86any_cvttp2si,
X86cvttp2siSAE, SchedWriteCvtPD2DQ>,
PD, VEX_W, EVEX_CD8<64, CD8VF>;
-defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", X86cvttp2ui,
+defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", X86any_cvttp2ui,
X86cvttp2uiSAE, SchedWriteCvtPS2DQ>, PS,
EVEX_CD8<32, CD8VF>;
-defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", X86cvttp2ui,
+defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", X86any_cvttp2ui,
X86cvttp2uiSAE, SchedWriteCvtPD2DQ>,
PS, VEX_W, EVEX_CD8<64, CD8VF>;
-defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", uint_to_fp,
- X86VUintToFP, SchedWriteCvtDQ2PD>, XS,
+defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", any_uint_to_fp,
+ X86any_VUintToFP, SchedWriteCvtDQ2PD>, XS,
EVEX_CD8<32, CD8VH>;
-defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", uint_to_fp,
+defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", any_uint_to_fp,
X86VUintToFpRnd, SchedWriteCvtDQ2PS>, XD,
EVEX_CD8<32, CD8VF>;
@@ -8138,35 +8122,35 @@ defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtp2UInt,
X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, PD,
EVEX_CD8<32, CD8VH>;
-defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", X86cvttp2si,
+defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", X86any_cvttp2si,
X86cvttp2siSAE, SchedWriteCvtPD2DQ>, VEX_W,
PD, EVEX_CD8<64, CD8VF>;
-defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", X86cvttp2si,
+defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", X86any_cvttp2si,
X86cvttp2siSAE, SchedWriteCvtPS2DQ>, PD,
EVEX_CD8<32, CD8VH>;
-defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", X86cvttp2ui,
+defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", X86any_cvttp2ui,
X86cvttp2uiSAE, SchedWriteCvtPD2DQ>, VEX_W,
PD, EVEX_CD8<64, CD8VF>;
-defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", X86cvttp2ui,
+defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", X86any_cvttp2ui,
X86cvttp2uiSAE, SchedWriteCvtPS2DQ>, PD,
EVEX_CD8<32, CD8VH>;
-defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", sint_to_fp,
+defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", any_sint_to_fp,
X86VSintToFpRnd, SchedWriteCvtDQ2PD>, VEX_W, XS,
EVEX_CD8<64, CD8VF>;
-defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", uint_to_fp,
+defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", any_uint_to_fp,
X86VUintToFpRnd, SchedWriteCvtDQ2PD>, VEX_W, XS,
EVEX_CD8<64, CD8VF>;
-defm VCVTQQ2PS : avx512_cvtqq2ps<0x5B, "vcvtqq2ps", sint_to_fp,
+defm VCVTQQ2PS : avx512_cvtqq2ps<0x5B, "vcvtqq2ps", any_sint_to_fp,
X86VSintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, PS,
EVEX_CD8<64, CD8VF>;
-defm VCVTUQQ2PS : avx512_cvtqq2ps<0x7A, "vcvtuqq2ps", uint_to_fp,
+defm VCVTUQQ2PS : avx512_cvtqq2ps<0x7A, "vcvtuqq2ps", any_uint_to_fp,
X86VUintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, XD,
EVEX_CD8<64, CD8VF>;
@@ -8202,7 +8186,7 @@ let Predicates = [HasVLX] in {
// Special patterns to allow use of X86mcvttp2si for masking. Instruction
// patterns have been disabled with null_frag.
- def : Pat<(v4i32 (X86cvttp2si (v2f64 VR128X:$src))),
+ def : Pat<(v4i32 (X86any_cvttp2si (v2f64 VR128X:$src))),
(VCVTTPD2DQZ128rr VR128X:$src)>;
def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
VK2WM:$mask),
@@ -8211,7 +8195,7 @@ let Predicates = [HasVLX] in {
VK2WM:$mask),
(VCVTTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>;
- def : Pat<(v4i32 (X86cvttp2si (loadv2f64 addr:$src))),
+ def : Pat<(v4i32 (X86any_cvttp2si (loadv2f64 addr:$src))),
(VCVTTPD2DQZ128rm addr:$src)>;
def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
VK2WM:$mask),
@@ -8220,7 +8204,7 @@ let Predicates = [HasVLX] in {
VK2WM:$mask),
(VCVTTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>;
- def : Pat<(v4i32 (X86cvttp2si (v2f64 (X86VBroadcastld64 addr:$src)))),
+ def : Pat<(v4i32 (X86any_cvttp2si (v2f64 (X86VBroadcastld64 addr:$src)))),
(VCVTTPD2DQZ128rmb addr:$src)>;
def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)),
(v4i32 VR128X:$src0), VK2WM:$mask),
@@ -8260,7 +8244,7 @@ let Predicates = [HasVLX] in {
// Special patterns to allow use of X86mcvtp2UInt for masking. Instruction
// patterns have been disabled with null_frag.
- def : Pat<(v4i32 (X86cvttp2ui (v2f64 VR128X:$src))),
+ def : Pat<(v4i32 (X86any_cvttp2ui (v2f64 VR128X:$src))),
(VCVTTPD2UDQZ128rr VR128X:$src)>;
def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
VK2WM:$mask),
@@ -8269,7 +8253,7 @@ let Predicates = [HasVLX] in {
VK2WM:$mask),
(VCVTTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>;
- def : Pat<(v4i32 (X86cvttp2ui (loadv2f64 addr:$src))),
+ def : Pat<(v4i32 (X86any_cvttp2ui (loadv2f64 addr:$src))),
(VCVTTPD2UDQZ128rm addr:$src)>;
def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
VK2WM:$mask),
@@ -8278,7 +8262,7 @@ let Predicates = [HasVLX] in {
VK2WM:$mask),
(VCVTTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>;
- def : Pat<(v4i32 (X86cvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)))),
+ def : Pat<(v4i32 (X86any_cvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)))),
(VCVTTPD2UDQZ128rmb addr:$src)>;
def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)),
(v4i32 VR128X:$src0), VK2WM:$mask),
@@ -8311,7 +8295,7 @@ let Predicates = [HasDQI, HasVLX] in {
v2i64x_info.ImmAllZerosV)),
(VCVTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>;
- def : Pat<(v2i64 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
+ def : Pat<(v2i64 (X86any_cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
(VCVTTPS2QQZ128rm addr:$src)>;
def : Pat<(v2i64 (vselect VK2WM:$mask,
(X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
@@ -8322,7 +8306,7 @@ let Predicates = [HasDQI, HasVLX] in {
v2i64x_info.ImmAllZerosV)),
(VCVTTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>;
- def : Pat<(v2i64 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
+ def : Pat<(v2i64 (X86any_cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
(VCVTTPS2UQQZ128rm addr:$src)>;
def : Pat<(v2i64 (vselect VK2WM:$mask,
(X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
@@ -8334,63 +8318,26 @@ let Predicates = [HasDQI, HasVLX] in {
(VCVTTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>;
}
-let Predicates = [HasAVX512, NoVLX] in {
-def : Pat<(v8i32 (X86cvttp2ui (v8f32 VR256X:$src1))),
- (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
- (v16f32 (INSERT_SUBREG (IMPLICIT_DEF),
- VR256X:$src1, sub_ymm)))), sub_ymm)>;
-
-def : Pat<(v4i32 (X86cvttp2ui (v4f32 VR128X:$src1))),
- (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
- (v16f32 (INSERT_SUBREG (IMPLICIT_DEF),
- VR128X:$src1, sub_xmm)))), sub_xmm)>;
-
-def : Pat<(v4i32 (X86cvttp2ui (v4f64 VR256X:$src1))),
- (EXTRACT_SUBREG (v8i32 (VCVTTPD2UDQZrr
- (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
- VR256X:$src1, sub_ymm)))), sub_xmm)>;
-
-def : Pat<(v8f32 (uint_to_fp (v8i32 VR256X:$src1))),
- (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
- (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
- VR256X:$src1, sub_ymm)))), sub_ymm)>;
-
-def : Pat<(v4f32 (uint_to_fp (v4i32 VR128X:$src1))),
- (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
- (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
- VR128X:$src1, sub_xmm)))), sub_xmm)>;
-
-def : Pat<(v4f64 (uint_to_fp (v4i32 VR128X:$src1))),
- (EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr
- (v8i32 (INSERT_SUBREG (IMPLICIT_DEF),
- VR128X:$src1, sub_xmm)))), sub_ymm)>;
-
-def : Pat<(v2f64 (X86VUintToFP (v4i32 VR128X:$src1))),
- (EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr
- (v8i32 (INSERT_SUBREG (IMPLICIT_DEF),
- VR128X:$src1, sub_xmm)))), sub_xmm)>;
-}
-
let Predicates = [HasVLX] in {
- def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
+ def : Pat<(v2f64 (X86any_VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
(VCVTDQ2PDZ128rm addr:$src)>;
def : Pat<(v2f64 (vselect VK2WM:$mask,
- (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
+ (X86any_VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
VR128X:$src0)),
(VCVTDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
def : Pat<(v2f64 (vselect VK2WM:$mask,
- (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
+ (X86any_VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
v2f64x_info.ImmAllZerosV)),
(VCVTDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>;
- def : Pat<(v2f64 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
+ def : Pat<(v2f64 (X86any_VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
(VCVTUDQ2PDZ128rm addr:$src)>;
def : Pat<(v2f64 (vselect VK2WM:$mask,
- (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
+ (X86any_VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
VR128X:$src0)),
(VCVTUDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
def : Pat<(v2f64 (vselect VK2WM:$mask,
- (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
+ (X86any_VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
v2f64x_info.ImmAllZerosV)),
(VCVTUDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>;
}
@@ -8398,7 +8345,7 @@ let Predicates = [HasVLX] in {
let Predicates = [HasDQI, HasVLX] in {
// Special patterns to allow use of X86VMSintToFP for masking. Instruction
// patterns have been disabled with null_frag.
- def : Pat<(v4f32 (X86VSintToFP (v2i64 VR128X:$src))),
+ def : Pat<(v4f32 (X86any_VSintToFP (v2i64 VR128X:$src))),
(VCVTQQ2PSZ128rr VR128X:$src)>;
def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), (v4f32 VR128X:$src0),
VK2WM:$mask),
@@ -8407,7 +8354,7 @@ let Predicates = [HasDQI, HasVLX] in {
VK2WM:$mask),
(VCVTQQ2PSZ128rrkz VK2WM:$mask, VR128X:$src)>;
- def : Pat<(v4f32 (X86VSintToFP (loadv2i64 addr:$src))),
+ def : Pat<(v4f32 (X86any_VSintToFP (loadv2i64 addr:$src))),
(VCVTQQ2PSZ128rm addr:$src)>;
def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), (v4f32 VR128X:$src0),
VK2WM:$mask),
@@ -8416,7 +8363,7 @@ let Predicates = [HasDQI, HasVLX] in {
VK2WM:$mask),
(VCVTQQ2PSZ128rmkz VK2WM:$mask, addr:$src)>;
- def : Pat<(v4f32 (X86VSintToFP (v2i64 (X86VBroadcastld64 addr:$src)))),
+ def : Pat<(v4f32 (X86any_VSintToFP (v2i64 (X86VBroadcastld64 addr:$src)))),
(VCVTQQ2PSZ128rmb addr:$src)>;
def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
(v4f32 VR128X:$src0), VK2WM:$mask),
@@ -8427,7 +8374,7 @@ let Predicates = [HasDQI, HasVLX] in {
// Special patterns to allow use of X86VMUintToFP for masking. Instruction
// patterns have been disabled with null_frag.
- def : Pat<(v4f32 (X86VUintToFP (v2i64 VR128X:$src))),
+ def : Pat<(v4f32 (X86any_VUintToFP (v2i64 VR128X:$src))),
(VCVTUQQ2PSZ128rr VR128X:$src)>;
def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), (v4f32 VR128X:$src0),
VK2WM:$mask),
@@ -8436,7 +8383,7 @@ let Predicates = [HasDQI, HasVLX] in {
VK2WM:$mask),
(VCVTUQQ2PSZ128rrkz VK2WM:$mask, VR128X:$src)>;
- def : Pat<(v4f32 (X86VUintToFP (loadv2i64 addr:$src))),
+ def : Pat<(v4f32 (X86any_VUintToFP (loadv2i64 addr:$src))),
(VCVTUQQ2PSZ128rm addr:$src)>;
def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), (v4f32 VR128X:$src0),
VK2WM:$mask),
@@ -8445,7 +8392,7 @@ let Predicates = [HasDQI, HasVLX] in {
VK2WM:$mask),
(VCVTUQQ2PSZ128rmkz VK2WM:$mask, addr:$src)>;
- def : Pat<(v4f32 (X86VUintToFP (v2i64 (X86VBroadcastld64 addr:$src)))),
+ def : Pat<(v4f32 (X86any_VUintToFP (v2i64 (X86VBroadcastld64 addr:$src)))),
(VCVTUQQ2PSZ128rmb addr:$src)>;
def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
(v4f32 VR128X:$src0), VK2WM:$mask),
@@ -8455,72 +8402,11 @@ let Predicates = [HasDQI, HasVLX] in {
(VCVTUQQ2PSZ128rmbkz VK2WM:$mask, addr:$src)>;
}
-let Predicates = [HasDQI, NoVLX] in {
-def : Pat<(v2i64 (X86cvttp2si (v2f64 VR128X:$src1))),
- (EXTRACT_SUBREG (v8i64 (VCVTTPD2QQZrr
- (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
- VR128X:$src1, sub_xmm)))), sub_xmm)>;
-
-def : Pat<(v4i64 (X86cvttp2si (v4f32 VR128X:$src1))),
- (EXTRACT_SUBREG (v8i64 (VCVTTPS2QQZrr
- (v8f32 (INSERT_SUBREG (IMPLICIT_DEF),
- VR128X:$src1, sub_xmm)))), sub_ymm)>;
-
-def : Pat<(v4i64 (X86cvttp2si (v4f64 VR256X:$src1))),
- (EXTRACT_SUBREG (v8i64 (VCVTTPD2QQZrr
- (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
- VR256X:$src1, sub_ymm)))), sub_ymm)>;
-
-def : Pat<(v2i64 (X86cvttp2ui (v2f64 VR128X:$src1))),
- (EXTRACT_SUBREG (v8i64 (VCVTTPD2UQQZrr
- (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
- VR128X:$src1, sub_xmm)))), sub_xmm)>;
-
-def : Pat<(v4i64 (X86cvttp2ui (v4f32 VR128X:$src1))),
- (EXTRACT_SUBREG (v8i64 (VCVTTPS2UQQZrr
- (v8f32 (INSERT_SUBREG (IMPLICIT_DEF),
- VR128X:$src1, sub_xmm)))), sub_ymm)>;
-
-def : Pat<(v4i64 (X86cvttp2ui (v4f64 VR256X:$src1))),
- (EXTRACT_SUBREG (v8i64 (VCVTTPD2UQQZrr
- (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
- VR256X:$src1, sub_ymm)))), sub_ymm)>;
-
-def : Pat<(v4f32 (sint_to_fp (v4i64 VR256X:$src1))),
- (EXTRACT_SUBREG (v8f32 (VCVTQQ2PSZrr
- (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
- VR256X:$src1, sub_ymm)))), sub_xmm)>;
-
-def : Pat<(v2f64 (sint_to_fp (v2i64 VR128X:$src1))),
- (EXTRACT_SUBREG (v8f64 (VCVTQQ2PDZrr
- (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
- VR128X:$src1, sub_xmm)))), sub_xmm)>;
-
-def : Pat<(v4f64 (sint_to_fp (v4i64 VR256X:$src1))),
- (EXTRACT_SUBREG (v8f64 (VCVTQQ2PDZrr
- (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
- VR256X:$src1, sub_ymm)))), sub_ymm)>;
-
-def : Pat<(v4f32 (uint_to_fp (v4i64 VR256X:$src1))),
- (EXTRACT_SUBREG (v8f32 (VCVTUQQ2PSZrr
- (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
- VR256X:$src1, sub_ymm)))), sub_xmm)>;
-
-def : Pat<(v2f64 (uint_to_fp (v2i64 VR128X:$src1))),
- (EXTRACT_SUBREG (v8f64 (VCVTUQQ2PDZrr
- (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
- VR128X:$src1, sub_xmm)))), sub_xmm)>;
-
-def : Pat<(v4f64 (uint_to_fp (v4i64 VR256X:$src1))),
- (EXTRACT_SUBREG (v8f64 (VCVTUQQ2PDZrr
- (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
- VR256X:$src1, sub_ymm)))), sub_ymm)>;
-}
-
//===----------------------------------------------------------------------===//
// Half precision conversion instructions
//===----------------------------------------------------------------------===//
+let Uses = [MXCSR], mayRaiseFPException = 1 in
multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src,
X86MemOperand x86memop, PatFrag ld_frag,
X86FoldableSchedWrite sched> {
@@ -8537,6 +8423,7 @@ multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src,
multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
X86FoldableSchedWrite sched> {
+ let Uses = [MXCSR] in
defm rrb : AVX512_maskable<0x13, MRMSrcReg, _dest, (outs _dest.RC:$dst),
(ins _src.RC:$src), "vcvtph2ps",
"{sae}, $src", "$src, {sae}",
@@ -8568,7 +8455,7 @@ let Predicates = [HasVLX] in {
multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src,
X86MemOperand x86memop, SchedWrite RR, SchedWrite MR> {
-let ExeDomain = GenericDomain in {
+let ExeDomain = GenericDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
def rr : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
(ins _src.RC:$src1, i32u8imm:$src2),
"vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
@@ -8605,7 +8492,7 @@ let ExeDomain = GenericDomain in {
multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
SchedWrite Sched> {
- let hasSideEffects = 0 in
+ let hasSideEffects = 0, Uses = [MXCSR] in
defm rrb : AVX512_maskable_in_asm<0x1D, MRMDestReg, _dest,
(outs _dest.RC:$dst),
(ins _src.RC:$src1, i32u8imm:$src2),
@@ -8664,52 +8551,51 @@ let Predicates = [HasVLX] in {
// Unordered/Ordered scalar fp compare with Sae and set EFLAGS
multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _,
- string OpcodeStr, X86FoldableSchedWrite sched> {
- let hasSideEffects = 0 in
+ string OpcodeStr, Domain d,
+ X86FoldableSchedWrite sched = WriteFCom> {
+ let hasSideEffects = 0, Uses = [MXCSR] in
def rrb: AVX512<opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2),
!strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"), []>,
EVEX, EVEX_B, VEX_LIG, EVEX_V128, Sched<[sched]>;
}
let Defs = [EFLAGS], Predicates = [HasAVX512] in {
- defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss", WriteFCom>,
+ defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss", SSEPackedSingle>,
AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
- defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd", WriteFCom>,
+ defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd", SSEPackedDouble>,
AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
- defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss", WriteFCom>,
+ defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss", SSEPackedSingle>,
AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
- defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd", WriteFCom>,
+ defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd", SSEPackedDouble>,
AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
}
let Defs = [EFLAGS], Predicates = [HasAVX512] in {
- defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86cmp, f32, f32mem, loadf32,
- "ucomiss", WriteFCom>, PS, EVEX, VEX_LIG,
+ defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86any_fcmp, f32, f32mem, loadf32,
+ "ucomiss", SSEPackedSingle>, PS, EVEX, VEX_LIG,
EVEX_CD8<32, CD8VT1>;
- defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86cmp, f64, f64mem, loadf64,
- "ucomisd", WriteFCom>, PD, EVEX,
+ defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86any_fcmp, f64, f64mem, loadf64,
+ "ucomisd", SSEPackedDouble>, PD, EVEX,
+ VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
+ defm VCOMISSZ : sse12_ord_cmp<0x2F, FR32X, X86strict_fcmps, f32, f32mem, loadf32,
+ "comiss", SSEPackedSingle>, PS, EVEX, VEX_LIG,
+ EVEX_CD8<32, CD8VT1>;
+ defm VCOMISDZ : sse12_ord_cmp<0x2F, FR64X, X86strict_fcmps, f64, f64mem, loadf64,
+ "comisd", SSEPackedDouble>, PD, EVEX,
VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
- let Pattern = []<dag> in {
- defm VCOMISSZ : sse12_ord_cmp<0x2F, FR32X, undef, f32, f32mem, loadf32,
- "comiss", WriteFCom>, PS, EVEX, VEX_LIG,
- EVEX_CD8<32, CD8VT1>;
- defm VCOMISDZ : sse12_ord_cmp<0x2F, FR64X, undef, f64, f64mem, loadf64,
- "comisd", WriteFCom>, PD, EVEX,
- VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
- }
let isCodeGenOnly = 1 in {
defm VUCOMISSZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v4f32, ssmem,
- sse_load_f32, "ucomiss", WriteFCom>, PS, EVEX, VEX_LIG,
+ sse_load_f32, "ucomiss", SSEPackedSingle>, PS, EVEX, VEX_LIG,
EVEX_CD8<32, CD8VT1>;
defm VUCOMISDZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v2f64, sdmem,
- sse_load_f64, "ucomisd", WriteFCom>, PD, EVEX,
+ sse_load_f64, "ucomisd", SSEPackedDouble>, PD, EVEX,
VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
defm VCOMISSZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v4f32, ssmem,
- sse_load_f32, "comiss", WriteFCom>, PS, EVEX, VEX_LIG,
+ sse_load_f32, "comiss", SSEPackedSingle>, PS, EVEX, VEX_LIG,
EVEX_CD8<32, CD8VT1>;
defm VCOMISDZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v2f64, sdmem,
- sse_load_f64, "comisd", WriteFCom>, PD, EVEX,
+ sse_load_f64, "comisd", SSEPackedDouble>, PD, EVEX,
VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
}
}
@@ -8717,7 +8603,7 @@ let Defs = [EFLAGS], Predicates = [HasAVX512] in {
/// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd
multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86FoldableSchedWrite sched, X86VectorVTInfo _> {
- let Predicates = [HasAVX512], ExeDomain = _.ExeDomain in {
+ let Predicates = [HasAVX512], ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
@@ -8767,6 +8653,7 @@ multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
}
}
+let Uses = [MXCSR] in
multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86SchedWriteWidths sched> {
defm PSZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"), OpNode, sched.ZMM,
@@ -8798,12 +8685,12 @@ defm VRCP14 : avx512_fp14_p_vl_all<0x4C, "vrcp14", X86rcp14, SchedWriteFRcp>;
multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
SDNode OpNode, SDNode OpNodeSAE,
X86FoldableSchedWrite sched> {
- let ExeDomain = _.ExeDomain in {
+ let ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
- Sched<[sched]>;
+ Sched<[sched]>, SIMD_EXC;
defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
@@ -8815,7 +8702,7 @@ multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
(ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2)>,
- Sched<[sched.Folded, sched.ReadAfterFold]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
}
}
@@ -8840,7 +8727,7 @@ defm VGETEXP : avx512_eri_s<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs,
multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
SDNode OpNode, X86FoldableSchedWrite sched> {
- let ExeDomain = _.ExeDomain in {
+ let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src), OpcodeStr, "$src", "$src",
(OpNode (_.VT _.RC:$src))>,
@@ -8862,7 +8749,7 @@ multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
}
multiclass avx512_fp28_p_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
SDNode OpNode, X86FoldableSchedWrite sched> {
- let ExeDomain = _.ExeDomain in
+ let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src), OpcodeStr,
"{sae}, $src", "$src, {sae}",
@@ -8923,25 +8810,26 @@ multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr,
multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr,
X86FoldableSchedWrite sched, X86VectorVTInfo _>{
- let ExeDomain = _.ExeDomain in {
+ let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src), OpcodeStr, "$src", "$src",
- (_.VT (fsqrt _.RC:$src))>, EVEX,
+ (_.VT (any_fsqrt _.RC:$src))>, EVEX,
Sched<[sched]>;
defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.MemOp:$src), OpcodeStr, "$src", "$src",
- (fsqrt (_.VT
+ (any_fsqrt (_.VT
(bitconvert (_.LdFrag addr:$src))))>, EVEX,
Sched<[sched.Folded, sched.ReadAfterFold]>;
defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.ScalarMemOp:$src), OpcodeStr,
"${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
- (fsqrt (_.VT
+ (any_fsqrt (_.VT
(_.BroadcastLdFrag addr:$src)))>,
EVEX, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
+let Uses = [MXCSR], mayRaiseFPException = 1 in
multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr,
X86SchedWriteSizes sched> {
defm PSZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
@@ -8967,6 +8855,7 @@ multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr,
}
}
+let Uses = [MXCSR] in
multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr,
X86SchedWriteSizes sched> {
defm PSZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"),
@@ -8985,13 +8874,14 @@ multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWri
"$src2, $src1", "$src1, $src2",
(X86fsqrts (_.VT _.RC:$src1),
(_.VT _.RC:$src2))>,
- Sched<[sched]>;
+ Sched<[sched]>, SIMD_EXC;
defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(X86fsqrts (_.VT _.RC:$src1),
_.ScalarIntMemCPat:$src2)>,
- Sched<[sched.Folded, sched.ReadAfterFold]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
+ let Uses = [MXCSR] in
defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
"$rc, $src2, $src1", "$src1, $src2, $rc",
@@ -9004,23 +8894,23 @@ multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWri
def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
(ins _.FRC:$src1, _.FRC:$src2),
OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
- Sched<[sched]>;
+ Sched<[sched]>, SIMD_EXC;
let mayLoad = 1 in
def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
(ins _.FRC:$src1, _.ScalarMemOp:$src2),
OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
- Sched<[sched.Folded, sched.ReadAfterFold]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
}
}
let Predicates = [HasAVX512] in {
- def : Pat<(_.EltVT (fsqrt _.FRC:$src)),
+ def : Pat<(_.EltVT (any_fsqrt _.FRC:$src)),
(!cast<Instruction>(Name#Zr)
(_.EltVT (IMPLICIT_DEF)), _.FRC:$src)>;
}
let Predicates = [HasAVX512, OptForSize] in {
- def : Pat<(_.EltVT (fsqrt (load addr:$src))),
+ def : Pat<(_.EltVT (any_fsqrt (load addr:$src))),
(!cast<Instruction>(Name#Zm)
(_.EltVT (IMPLICIT_DEF)), addr:$src)>;
}
@@ -9047,8 +8937,9 @@ multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
"$src3, $src2, $src1", "$src1, $src2, $src3",
(_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2),
(i32 timm:$src3)))>,
- Sched<[sched]>;
+ Sched<[sched]>, SIMD_EXC;
+ let Uses = [MXCSR] in
defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
"$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3",
@@ -9062,30 +8953,30 @@ multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
"$src3, $src2, $src1", "$src1, $src2, $src3",
(_.VT (X86RndScales _.RC:$src1,
_.ScalarIntMemCPat:$src2, (i32 timm:$src3)))>,
- Sched<[sched.Folded, sched.ReadAfterFold]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [HasAVX512] in {
def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
(ins _.FRC:$src1, _.FRC:$src2, i32u8imm:$src3),
OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- []>, Sched<[sched]>;
+ []>, Sched<[sched]>, SIMD_EXC;
let mayLoad = 1 in
def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
(ins _.FRC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- []>, Sched<[sched.Folded, sched.ReadAfterFold]>;
+ []>, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
}
}
let Predicates = [HasAVX512] in {
- def : Pat<(X86VRndScale _.FRC:$src1, timm:$src2),
+ def : Pat<(X86any_VRndScale _.FRC:$src1, timm:$src2),
(_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
_.FRC:$src1, timm:$src2))>;
}
let Predicates = [HasAVX512, OptForSize] in {
- def : Pat<(X86VRndScale (_.ScalarLdFrag addr:$src1), timm:$src2),
+ def : Pat<(X86any_VRndScale (_.ScalarLdFrag addr:$src1), timm:$src2),
(_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
addr:$src1, timm:$src2))>;
}
@@ -9681,7 +9572,7 @@ defm : AVX512_pmovx_patterns<"VPMOVSX", sext, sext_invec>;
defm : AVX512_pmovx_patterns<"VPMOVZX", zext, zext_invec>;
// Without BWI we can't do a trunc from v16i16 to v16i8. DAG combine can merge
-// ext+trunc aggresively making it impossible to legalize the DAG to this
+// ext+trunc aggressively making it impossible to legalize the DAG to this
// pattern directly.
let Predicates = [HasAVX512, NoBWI] in {
def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
@@ -10101,7 +9992,7 @@ defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", WriteVarShuffle256,
//all instruction created with FROUND_CURRENT
multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86FoldableSchedWrite sched, X86VectorVTInfo _> {
- let ExeDomain = _.ExeDomain in {
+ let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, i32u8imm:$src2),
OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2",
@@ -10127,7 +10018,7 @@ multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNo
multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
SDNode OpNode, X86FoldableSchedWrite sched,
X86VectorVTInfo _> {
- let ExeDomain = _.ExeDomain in
+ let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, i32u8imm:$src2),
OpcodeStr##_.Suffix, "$src2, {sae}, $src1",
@@ -10160,7 +10051,7 @@ multiclass avx512_common_unary_fp_sae_packed_imm<string OpcodeStr,
//all instruction created with FROUND_CURRENT
multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86FoldableSchedWrite sched, X86VectorVTInfo _>{
- let ExeDomain = _.ExeDomain in {
+ let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
@@ -10232,7 +10123,7 @@ multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
// op(reg_vec2,mem_scalar,imm)
multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86FoldableSchedWrite sched, X86VectorVTInfo _> {
- let ExeDomain = _.ExeDomain in {
+ let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
defm rri : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
@@ -10254,7 +10145,7 @@ multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
SDNode OpNode, X86FoldableSchedWrite sched,
X86VectorVTInfo _> {
- let ExeDomain = _.ExeDomain in
+ let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
OpcodeStr, "$src3, {sae}, $src2, $src1",
@@ -10268,7 +10159,7 @@ multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
//handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86FoldableSchedWrite sched, X86VectorVTInfo _> {
- let ExeDomain = _.ExeDomain in
+ let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
defm NAME#rrib : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
OpcodeStr, "$src3, {sae}, $src2, $src1",
@@ -10350,7 +10241,7 @@ defm VREDUCE : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56
X86VReduce, X86VReduceSAE, SchedWriteFRnd, HasDQI>,
AVX512AIi8Base, EVEX;
defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09,
- X86VRndScale, X86VRndScaleSAE, SchedWriteFRnd, HasAVX512>,
+ X86any_VRndScale, X86VRndScaleSAE, SchedWriteFRnd, HasAVX512>,
AVX512AIi8Base, EVEX;
defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26,
X86VGetMant, X86VGetMantSAE, SchedWriteFRnd, HasAVX512>,
@@ -10892,10 +10783,12 @@ def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (v2f64 (simple_load
// AVX-512 - Unpack Instructions
//===----------------------------------------------------------------------===//
+let Uses = []<Register>, mayRaiseFPException = 0 in {
defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh, HasAVX512,
SchedWriteFShuffleSizes, 0, 1>;
defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl, HasAVX512,
SchedWriteFShuffleSizes>;
+}
defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl,
SchedWriteShuffle, HasBWI>;
@@ -11587,7 +11480,8 @@ let Predicates = [HasVLX] in {
multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr,
X86FoldableSchedWrite sched, X86VectorVTInfo _,
X86VectorVTInfo TblVT>{
- let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
+ let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
+ Uses = [MXCSR], mayRaiseFPException = 1 in {
defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
@@ -11619,7 +11513,7 @@ multiclass avx512_fixupimm_packed_sae<bits<8> opc, string OpcodeStr,
X86FoldableSchedWrite sched,
X86VectorVTInfo _, X86VectorVTInfo TblVT>
: avx512_fixupimm_packed<opc, OpcodeStr, sched, _, TblVT> {
-let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
+let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
defm rrib : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2",
@@ -11643,7 +11537,8 @@ multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr,
(X86VFixupimms (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
(_src3VT.VT _src3VT.RC:$src3),
- (i32 timm:$src4))>, Sched<[sched]>;
+ (i32 timm:$src4))>, Sched<[sched]>, SIMD_EXC;
+ let Uses = [MXCSR] in
defm rrib : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2",
@@ -11661,7 +11556,7 @@ multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr,
(_src3VT.VT (scalar_to_vector
(_src3VT.ScalarLdFrag addr:$src3))),
(i32 timm:$src4))>,
- Sched<[sched.Folded, sched.ReadAfterFold]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
}
}
@@ -11978,6 +11873,7 @@ let Constraints = "$src1 = $dst" in
multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode,
X86FoldableSchedWrite sched, X86VectorVTInfo VTI,
bit IsCommutable> {
+ let ExeDomain = VTI.ExeDomain in {
defm r : AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
(ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
"$src3, $src2", "$src2, $src3",
@@ -12000,6 +11896,7 @@ multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode,
(VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>,
EVEX_4V, EVEX_CD8<32, CD8VF>, EVEX_B,
T8PD, Sched<[sched.Folded, sched.ReadAfterFold]>;
+ }
}
multiclass VNNI_common<bits<8> Op, string OpStr, SDNode OpNode,
@@ -12164,7 +12061,7 @@ defm VGF2P8AFFINEQB : GF2P8AFFINE_avx512_common<0xCE, "vgf2p8affineqb",
//===----------------------------------------------------------------------===//
let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedSingle,
- Constraints = "$src1 = $dst" in {
+ Constraints = "$src1 = $dst", Uses = [MXCSR], mayRaiseFPException = 1 in {
defm V4FMADDPSrm : AVX512_maskable_3src_in_asm<0x9A, MRMSrcMem, v16f32_info,
(outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
"v4fmaddps", "$src3, $src2", "$src2, $src3",
@@ -12210,9 +12107,9 @@ defm VP4DPWSSDSrm : AVX512_maskable_3src_in_asm<0x53, MRMSrcMem, v16i32_info,
}
let hasSideEffects = 0 in {
- let mayStore = 1 in
+ let mayStore = 1, SchedRW = [WriteFStoreX] in
def MASKPAIR16STORE : PseudoI<(outs), (ins anymem:$dst, VK16PAIR:$src), []>;
- let mayLoad = 1 in
+ let mayLoad = 1, SchedRW = [WriteFLoadX] in
def MASKPAIR16LOAD : PseudoI<(outs VK16PAIR:$dst), (ins anymem:$src), []>;
}
@@ -12220,7 +12117,7 @@ let hasSideEffects = 0 in {
// VP2INTERSECT
//===----------------------------------------------------------------------===//
-multiclass avx512_vp2intersect_modes<X86VectorVTInfo _> {
+multiclass avx512_vp2intersect_modes<X86FoldableSchedWrite sched, X86VectorVTInfo _> {
def rr : I<0x68, MRMSrcReg,
(outs _.KRPC:$dst),
(ins _.RC:$src1, _.RC:$src2),
@@ -12228,7 +12125,7 @@ multiclass avx512_vp2intersect_modes<X86VectorVTInfo _> {
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set _.KRPC:$dst, (X86vp2intersect
_.RC:$src1, (_.VT _.RC:$src2)))]>,
- EVEX_4V, T8XD;
+ EVEX_4V, T8XD, Sched<[sched]>;
def rm : I<0x68, MRMSrcMem,
(outs _.KRPC:$dst),
@@ -12237,7 +12134,8 @@ multiclass avx512_vp2intersect_modes<X86VectorVTInfo _> {
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set _.KRPC:$dst, (X86vp2intersect
_.RC:$src1, (_.VT (bitconvert (_.LdFrag addr:$src2)))))]>,
- EVEX_4V, T8XD, EVEX_CD8<_.EltSize, CD8VF>;
+ EVEX_4V, T8XD, EVEX_CD8<_.EltSize, CD8VF>,
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
def rmb : I<0x68, MRMSrcMem,
(outs _.KRPC:$dst),
@@ -12246,21 +12144,22 @@ multiclass avx512_vp2intersect_modes<X86VectorVTInfo _> {
", $src1, $dst|$dst, $src1, ${src2}", _.BroadcastStr ,"}"),
[(set _.KRPC:$dst, (X86vp2intersect
_.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))]>,
- EVEX_4V, T8XD, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>;
+ EVEX_4V, T8XD, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
-multiclass avx512_vp2intersect<AVX512VLVectorVTInfo _> {
+multiclass avx512_vp2intersect<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
let Predicates = [HasAVX512, HasVP2INTERSECT] in
- defm Z : avx512_vp2intersect_modes<_.info512>, EVEX_V512;
+ defm Z : avx512_vp2intersect_modes<sched.ZMM, _.info512>, EVEX_V512;
let Predicates = [HasAVX512, HasVP2INTERSECT, HasVLX] in {
- defm Z256 : avx512_vp2intersect_modes<_.info256>, EVEX_V256;
- defm Z128 : avx512_vp2intersect_modes<_.info128>, EVEX_V128;
+ defm Z256 : avx512_vp2intersect_modes<sched.YMM, _.info256>, EVEX_V256;
+ defm Z128 : avx512_vp2intersect_modes<sched.XMM, _.info128>, EVEX_V128;
}
}
-defm VP2INTERSECTD : avx512_vp2intersect<avx512vl_i32_info>;
-defm VP2INTERSECTQ : avx512_vp2intersect<avx512vl_i64_info>, VEX_W;
+defm VP2INTERSECTD : avx512_vp2intersect<SchedWriteVecALU, avx512vl_i32_info>;
+defm VP2INTERSECTQ : avx512_vp2intersect<SchedWriteVecALU, avx512vl_i64_info>, VEX_W;
multiclass avx512_binop_all2<bits<8> opc, string OpcodeStr,
X86SchedWriteWidths sched,
@@ -12293,17 +12192,19 @@ defm VCVTNE2PS2BF16 : avx512_binop_all2<0x72, "vcvtne2ps2bf16",
// Truncate Float to BFloat16
multiclass avx512_cvtps2bf16<bits<8> opc, string OpcodeStr,
X86SchedWriteWidths sched> {
- let Predicates = [HasBF16] in {
+ let Predicates = [HasBF16], Uses = []<Register>, mayRaiseFPException = 0 in {
defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i16x_info, v16f32_info,
X86cvtneps2bf16, sched.ZMM>, EVEX_V512;
}
let Predicates = [HasBF16, HasVLX] in {
+ let Uses = []<Register>, mayRaiseFPException = 0 in {
defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8i16x_info, v4f32x_info,
null_frag, sched.XMM, "{1to4}", "{x}", f128mem,
VK4WM>, EVEX_V128;
defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i16x_info, v8f32x_info,
X86cvtneps2bf16,
sched.YMM, "{1to8}", "{y}">, EVEX_V256;
+ }
def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
(!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
@@ -12358,19 +12259,21 @@ let Predicates = [HasBF16, HasVLX] in {
let Constraints = "$src1 = $dst" in {
multiclass avx512_dpbf16ps_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ X86FoldableSchedWrite sched,
X86VectorVTInfo _, X86VectorVTInfo src_v> {
defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
(_.VT (OpNode _.RC:$src1, _.RC:$src2, _.RC:$src3))>,
- EVEX_4V;
+ EVEX_4V, Sched<[sched]>;
defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.MemOp:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
(_.VT (OpNode _.RC:$src1, _.RC:$src2,
(src_v.VT (bitconvert
- (src_v.LdFrag addr:$src3)))))>, EVEX_4V;
+ (src_v.LdFrag addr:$src3)))))>, EVEX_4V,
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.ScalarMemOp:$src3),
@@ -12379,26 +12282,26 @@ multiclass avx512_dpbf16ps_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
!strconcat("$src2, ${src3}", _.BroadcastStr),
(_.VT (OpNode _.RC:$src1, _.RC:$src2,
(src_v.VT (src_v.BroadcastLdFrag addr:$src3))))>,
- EVEX_B, EVEX_4V;
+ EVEX_B, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
} // Constraints = "$src1 = $dst"
multiclass avx512_dpbf16ps_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
- AVX512VLVectorVTInfo _,
+ X86SchedWriteWidths sched, AVX512VLVectorVTInfo _,
AVX512VLVectorVTInfo src_v, Predicate prd> {
let Predicates = [prd] in {
- defm Z : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, _.info512,
+ defm Z : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512,
src_v.info512>, EVEX_V512;
}
let Predicates = [HasVLX, prd] in {
- defm Z256 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, _.info256,
+ defm Z256 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256,
src_v.info256>, EVEX_V256;
- defm Z128 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, _.info128,
+ defm Z128 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128,
src_v.info128>, EVEX_V128;
}
}
-defm VDPBF16PS : avx512_dpbf16ps_sizes<0x52, "vdpbf16ps", X86dpbf16ps,
+defm VDPBF16PS : avx512_dpbf16ps_sizes<0x52, "vdpbf16ps", X86dpbf16ps, SchedWriteFMA,
avx512vl_f32_info, avx512vl_i32_info,
HasBF16>, T8XS, EVEX_CD8<32, CD8VF>;
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrControl.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrControl.td
index e1e6eea59884..32faeb1a86f2 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrControl.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrControl.td
@@ -220,12 +220,12 @@ let isCall = 1 in
// registers are added manually.
let Uses = [ESP, SSP] in {
def CALLpcrel32 : Ii32PCRel<0xE8, RawFrm,
- (outs), (ins i32imm_pcrel:$dst),
+ (outs), (ins i32imm_brtarget:$dst),
"call{l}\t$dst", []>, OpSize32,
Requires<[Not64BitMode]>, Sched<[WriteJump]>;
let hasSideEffects = 0 in
def CALLpcrel16 : Ii16PCRel<0xE8, RawFrm,
- (outs), (ins i16imm_pcrel:$dst),
+ (outs), (ins i16imm_brtarget:$dst),
"call{w}\t$dst", []>, OpSize16,
Sched<[WriteJump]>;
def CALL16r : I<0xFF, MRM2r, (outs), (ins GR16:$dst),
@@ -285,7 +285,7 @@ let isCall = 1 in
// Tail call stuff.
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
isCodeGenOnly = 1, Uses = [ESP, SSP] in {
- def TCRETURNdi : PseudoI<(outs), (ins i32imm_pcrel:$dst, i32imm:$offset),
+ def TCRETURNdi : PseudoI<(outs), (ins i32imm_brtarget:$dst, i32imm:$offset),
[]>, Sched<[WriteJump]>, NotMemoryFoldable;
def TCRETURNri : PseudoI<(outs), (ins ptr_rc_tailcall:$dst, i32imm:$offset),
[]>, Sched<[WriteJump]>, NotMemoryFoldable;
@@ -293,7 +293,7 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
def TCRETURNmi : PseudoI<(outs), (ins i32mem_TC:$dst, i32imm:$offset),
[]>, Sched<[WriteJumpLd]>;
- def TAILJMPd : PseudoI<(outs), (ins i32imm_pcrel:$dst),
+ def TAILJMPd : PseudoI<(outs), (ins i32imm_brtarget:$dst),
[]>, Sched<[WriteJump]>;
def TAILJMPr : PseudoI<(outs), (ins ptr_rc_tailcall:$dst),
@@ -309,10 +309,11 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBranch = 1,
isCodeGenOnly = 1, SchedRW = [WriteJump] in
let Uses = [ESP, EFLAGS, SSP] in {
def TCRETURNdicc : PseudoI<(outs),
- (ins i32imm_pcrel:$dst, i32imm:$offset, i32imm:$cond), []>;
+ (ins i32imm_brtarget:$dst, i32imm:$offset, i32imm:$cond),
+ []>;
// This gets substituted to a conditional jump instruction in MC lowering.
- def TAILJMPd_CC : PseudoI<(outs), (ins i32imm_pcrel:$dst, i32imm:$cond), []>;
+ def TAILJMPd_CC : PseudoI<(outs), (ins i32imm_brtarget:$dst, i32imm:$cond), []>;
}
@@ -328,7 +329,7 @@ let isCall = 1, Uses = [RSP, SSP], SchedRW = [WriteJump] in {
// that the offset between an arbitrary immediate and the call will fit in
// the 32-bit pcrel field that we have.
def CALL64pcrel32 : Ii32PCRel<0xE8, RawFrm,
- (outs), (ins i64i32imm_pcrel:$dst),
+ (outs), (ins i64i32imm_brtarget:$dst),
"call{q}\t$dst", []>, OpSize32,
Requires<[In64BitMode]>;
def CALL64r : I<0xFF, MRM2r, (outs), (ins GR64:$dst),
@@ -357,7 +358,7 @@ let isCall = 1, Uses = [RSP, SSP], SchedRW = [WriteJump] in {
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
isCodeGenOnly = 1, Uses = [RSP, SSP] in {
def TCRETURNdi64 : PseudoI<(outs),
- (ins i64i32imm_pcrel:$dst, i32imm:$offset),
+ (ins i64i32imm_brtarget:$dst, i32imm:$offset),
[]>, Sched<[WriteJump]>;
def TCRETURNri64 : PseudoI<(outs),
(ins ptr_rc_tailcall:$dst, i32imm:$offset),
@@ -367,7 +368,7 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
(ins i64mem_TC:$dst, i32imm:$offset),
[]>, Sched<[WriteJumpLd]>, NotMemoryFoldable;
- def TAILJMPd64 : PseudoI<(outs), (ins i64i32imm_pcrel:$dst),
+ def TAILJMPd64 : PseudoI<(outs), (ins i64i32imm_brtarget:$dst),
[]>, Sched<[WriteJump]>;
def TAILJMPr64 : PseudoI<(outs), (ins ptr_rc_tailcall:$dst),
@@ -415,10 +416,10 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBranch = 1,
isCodeGenOnly = 1, SchedRW = [WriteJump] in
let Uses = [RSP, EFLAGS, SSP] in {
def TCRETURNdi64cc : PseudoI<(outs),
- (ins i64i32imm_pcrel:$dst, i32imm:$offset,
+ (ins i64i32imm_brtarget:$dst, i32imm:$offset,
i32imm:$cond), []>;
// This gets substituted to a conditional jump instruction in MC lowering.
def TAILJMPd64_CC : PseudoI<(outs),
- (ins i64i32imm_pcrel:$dst, i32imm:$cond), []>;
+ (ins i64i32imm_brtarget:$dst, i32imm:$cond), []>;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFMA.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFMA.td
index 0cca71bdc431..9e43a532a3f8 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFMA.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFMA.td
@@ -95,7 +95,8 @@ multiclass fma3p_rm_132<bits<8> opc, string OpcodeStr, RegisterClass RC,
Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>;
}
-let Constraints = "$src1 = $dst", hasSideEffects = 0, isCommutable = 1 in
+let Constraints = "$src1 = $dst", hasSideEffects = 0, isCommutable = 1,
+ Uses = [MXCSR], mayRaiseFPException = 1 in
multiclass fma3p_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
string OpcodeStr, string PackTy, string Suff,
PatFrag MemFrag128, PatFrag MemFrag256,
@@ -122,7 +123,7 @@ multiclass fma3p_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
// Fused Multiply-Add
let ExeDomain = SSEPackedSingle in {
defm VFMADD : fma3p_forms<0x98, 0xA8, 0xB8, "vfmadd", "ps", "PS",
- loadv4f32, loadv8f32, X86Fmadd, v4f32, v8f32,
+ loadv4f32, loadv8f32, X86any_Fmadd, v4f32, v8f32,
SchedWriteFMA>;
defm VFMSUB : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "ps", "PS",
loadv4f32, loadv8f32, X86Fmsub, v4f32, v8f32,
@@ -137,7 +138,7 @@ let ExeDomain = SSEPackedSingle in {
let ExeDomain = SSEPackedDouble in {
defm VFMADD : fma3p_forms<0x98, 0xA8, 0xB8, "vfmadd", "pd", "PD",
- loadv2f64, loadv4f64, X86Fmadd, v2f64,
+ loadv2f64, loadv4f64, X86any_Fmadd, v2f64,
v4f64, SchedWriteFMA>, VEX_W;
defm VFMSUB : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "pd", "PD",
loadv2f64, loadv4f64, X86Fmsub, v2f64,
@@ -237,7 +238,7 @@ multiclass fma3s_rm_132<bits<8> opc, string OpcodeStr,
}
let Constraints = "$src1 = $dst", isCommutable = 1, isCodeGenOnly = 1,
- hasSideEffects = 0 in
+ hasSideEffects = 0, Uses = [MXCSR], mayRaiseFPException = 1 in
multiclass fma3s_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
string OpStr, string PackTy, string Suff,
SDNode OpNode, RegisterClass RC,
@@ -263,7 +264,8 @@ multiclass fma3s_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
// the lowest element of the FMA*_Int instruction. Even though such analysis
// may be not implemented yet we allow the routines doing the actual commute
// transformation to decide if one or another instruction is commutable or not.
-let Constraints = "$src1 = $dst", isCommutable = 1, hasSideEffects = 0 in
+let Constraints = "$src1 = $dst", isCommutable = 1, hasSideEffects = 0,
+ Uses = [MXCSR], mayRaiseFPException = 1 in
multiclass fma3s_rm_int<bits<8> opc, string OpcodeStr,
Operand memopr, RegisterClass RC,
X86FoldableSchedWrite sched> {
@@ -317,7 +319,7 @@ multiclass fma3s<bits<8> opc132, bits<8> opc213, bits<8> opc231,
VR128, sdmem, sched>, VEX_W;
}
-defm VFMADD : fma3s<0x99, 0xA9, 0xB9, "vfmadd", X86Fmadd,
+defm VFMADD : fma3s<0x99, 0xA9, 0xB9, "vfmadd", X86any_Fmadd,
SchedWriteFMA.Scl>, VEX_LIG;
defm VFMSUB : fma3s<0x9B, 0xAB, 0xBB, "vfmsub", X86Fmsub,
SchedWriteFMA.Scl>, VEX_LIG;
@@ -370,12 +372,12 @@ multiclass scalar_fma_patterns<SDNode Op, string Prefix, string Suffix,
}
}
-defm : scalar_fma_patterns<X86Fmadd, "VFMADD", "SS", X86Movss, v4f32, f32, FR32, loadf32>;
+defm : scalar_fma_patterns<X86any_Fmadd, "VFMADD", "SS", X86Movss, v4f32, f32, FR32, loadf32>;
defm : scalar_fma_patterns<X86Fmsub, "VFMSUB", "SS", X86Movss, v4f32, f32, FR32, loadf32>;
defm : scalar_fma_patterns<X86Fnmadd, "VFNMADD", "SS", X86Movss, v4f32, f32, FR32, loadf32>;
defm : scalar_fma_patterns<X86Fnmsub, "VFNMSUB", "SS", X86Movss, v4f32, f32, FR32, loadf32>;
-defm : scalar_fma_patterns<X86Fmadd, "VFMADD", "SD", X86Movsd, v2f64, f64, FR64, loadf64>;
+defm : scalar_fma_patterns<X86any_Fmadd, "VFMADD", "SD", X86Movsd, v2f64, f64, FR64, loadf64>;
defm : scalar_fma_patterns<X86Fmsub, "VFMSUB", "SD", X86Movsd, v2f64, f64, FR64, loadf64>;
defm : scalar_fma_patterns<X86Fnmadd, "VFNMADD", "SD", X86Movsd, v2f64, f64, FR64, loadf64>;
defm : scalar_fma_patterns<X86Fnmsub, "VFNMSUB", "SD", X86Movsd, v2f64, f64, FR64, loadf64>;
@@ -384,6 +386,7 @@ defm : scalar_fma_patterns<X86Fnmsub, "VFNMSUB", "SD", X86Movsd, v2f64, f64, FR6
// FMA4 - AMD 4 operand Fused Multiply-Add instructions
//===----------------------------------------------------------------------===//
+let Uses = [MXCSR], mayRaiseFPException = 1 in
multiclass fma4s<bits<8> opc, string OpcodeStr, RegisterClass RC,
X86MemOperand x86memop, ValueType OpVT, SDNode OpNode,
PatFrag mem_frag, X86FoldableSchedWrite sched> {
@@ -425,7 +428,8 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
multiclass fma4s_int<bits<8> opc, string OpcodeStr, Operand memop,
ValueType VT, X86FoldableSchedWrite sched> {
-let isCodeGenOnly = 1, hasSideEffects = 0 in {
+let isCodeGenOnly = 1, hasSideEffects = 0,
+ Uses = [MXCSR], mayRaiseFPException = 1 in {
def rr_Int : FMA4S_Int<opc, MRMSrcRegOp4, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, VR128:$src3),
!strconcat(OpcodeStr,
@@ -458,6 +462,7 @@ let isCodeGenOnly = 1, hasSideEffects = 0 in {
} // isCodeGenOnly = 1
}
+let Uses = [MXCSR], mayRaiseFPException = 1 in
multiclass fma4p<bits<8> opc, string OpcodeStr, SDNode OpNode,
ValueType OpVT128, ValueType OpVT256,
PatFrag ld_frag128, PatFrag ld_frag256,
@@ -533,7 +538,7 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in {
let ExeDomain = SSEPackedSingle in {
// Scalar Instructions
- defm VFMADDSS4 : fma4s<0x6A, "vfmaddss", FR32, f32mem, f32, X86Fmadd, loadf32,
+ defm VFMADDSS4 : fma4s<0x6A, "vfmaddss", FR32, f32mem, f32, X86any_Fmadd, loadf32,
SchedWriteFMA.Scl>,
fma4s_int<0x6A, "vfmaddss", ssmem, v4f32,
SchedWriteFMA.Scl>;
@@ -550,7 +555,7 @@ let ExeDomain = SSEPackedSingle in {
fma4s_int<0x7E, "vfnmsubss", ssmem, v4f32,
SchedWriteFMA.Scl>;
// Packed Instructions
- defm VFMADDPS4 : fma4p<0x68, "vfmaddps", X86Fmadd, v4f32, v8f32,
+ defm VFMADDPS4 : fma4p<0x68, "vfmaddps", X86any_Fmadd, v4f32, v8f32,
loadv4f32, loadv8f32, SchedWriteFMA>;
defm VFMSUBPS4 : fma4p<0x6C, "vfmsubps", X86Fmsub, v4f32, v8f32,
loadv4f32, loadv8f32, SchedWriteFMA>;
@@ -566,7 +571,7 @@ let ExeDomain = SSEPackedSingle in {
let ExeDomain = SSEPackedDouble in {
// Scalar Instructions
- defm VFMADDSD4 : fma4s<0x6B, "vfmaddsd", FR64, f64mem, f64, X86Fmadd, loadf64,
+ defm VFMADDSD4 : fma4s<0x6B, "vfmaddsd", FR64, f64mem, f64, X86any_Fmadd, loadf64,
SchedWriteFMA.Scl>,
fma4s_int<0x6B, "vfmaddsd", sdmem, v2f64,
SchedWriteFMA.Scl>;
@@ -583,7 +588,7 @@ let ExeDomain = SSEPackedDouble in {
fma4s_int<0x7F, "vfnmsubsd", sdmem, v2f64,
SchedWriteFMA.Scl>;
// Packed Instructions
- defm VFMADDPD4 : fma4p<0x69, "vfmaddpd", X86Fmadd, v2f64, v4f64,
+ defm VFMADDPD4 : fma4p<0x69, "vfmaddpd", X86any_Fmadd, v2f64, v4f64,
loadv2f64, loadv4f64, SchedWriteFMA>;
defm VFMSUBPD4 : fma4p<0x6D, "vfmsubpd", X86Fmsub, v2f64, v4f64,
loadv2f64, loadv4f64, SchedWriteFMA>;
@@ -624,12 +629,12 @@ multiclass scalar_fma4_patterns<SDNode Op, string Name,
}
}
-defm : scalar_fma4_patterns<X86Fmadd, "VFMADDSS4", v4f32, f32, FR32, loadf32>;
+defm : scalar_fma4_patterns<X86any_Fmadd, "VFMADDSS4", v4f32, f32, FR32, loadf32>;
defm : scalar_fma4_patterns<X86Fmsub, "VFMSUBSS4", v4f32, f32, FR32, loadf32>;
defm : scalar_fma4_patterns<X86Fnmadd, "VFNMADDSS4", v4f32, f32, FR32, loadf32>;
defm : scalar_fma4_patterns<X86Fnmsub, "VFNMSUBSS4", v4f32, f32, FR32, loadf32>;
-defm : scalar_fma4_patterns<X86Fmadd, "VFMADDSD4", v2f64, f64, FR64, loadf64>;
+defm : scalar_fma4_patterns<X86any_Fmadd, "VFMADDSD4", v2f64, f64, FR64, loadf64>;
defm : scalar_fma4_patterns<X86Fmsub, "VFMSUBSD4", v2f64, f64, FR64, loadf64>;
defm : scalar_fma4_patterns<X86Fnmadd, "VFNMADDSD4", v2f64, f64, FR64, loadf64>;
defm : scalar_fma4_patterns<X86Fnmsub, "VFNMSUBSD4", v2f64, f64, FR64, loadf64>;
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFPStack.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFPStack.td
index 2ec6d50f9702..1830262205c6 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFPStack.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFPStack.td
@@ -29,7 +29,7 @@ def SDTX86CwdStore : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
def X86fld : SDNode<"X86ISD::FLD", SDTX86Fld,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def X86fst : SDNode<"X86ISD::FST", SDTX86Fst,
- [SDNPHasChain, SDNPInGlue, SDNPMayStore,
+ [SDNPHasChain, SDNPOptInGlue, SDNPMayStore,
SDNPMemOperand]>;
def X86fild : SDNode<"X86ISD::FILD", SDTX86Fild,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
@@ -37,7 +37,7 @@ def X86fildflag : SDNode<"X86ISD::FILD_FLAG", SDTX86Fild,
[SDNPHasChain, SDNPOutGlue, SDNPMayLoad,
SDNPMemOperand]>;
def X86fist : SDNode<"X86ISD::FIST", SDTX86Fist,
- [SDNPHasChain, SDNPInGlue, SDNPMayStore,
+ [SDNPHasChain, SDNPOptInGlue, SDNPMayStore,
SDNPMemOperand]>;
def X86fp_stsw : SDNode<"X86ISD::FNSTSW16r", SDTX86Fnstsw>;
def X86fp_to_mem : SDNode<"X86ISD::FP_TO_INT_IN_MEM", SDTX86Fst,
@@ -282,32 +282,32 @@ def _FI32m : FPI<0xDA, fp, (outs), (ins i32mem:$src),
!strconcat("fi", asmstring, "{l}\t$src")>;
}
-let Defs = [FPSW], Uses = [FPCW] in {
+let Uses = [FPCW], mayRaiseFPException = 1 in {
// FPBinary_rr just defines pseudo-instructions, no need to set a scheduling
// resources.
let hasNoSchedulingInfo = 1 in {
-defm ADD : FPBinary_rr<fadd>;
-defm SUB : FPBinary_rr<fsub>;
-defm MUL : FPBinary_rr<fmul>;
-defm DIV : FPBinary_rr<fdiv>;
+defm ADD : FPBinary_rr<any_fadd>;
+defm SUB : FPBinary_rr<any_fsub>;
+defm MUL : FPBinary_rr<any_fmul>;
+defm DIV : FPBinary_rr<any_fdiv>;
}
// Sets the scheduling resources for the actual NAME#_F<size>m defintions.
let SchedRW = [WriteFAddLd] in {
-defm ADD : FPBinary<fadd, MRM0m, "add">;
-defm SUB : FPBinary<fsub, MRM4m, "sub">;
-defm SUBR: FPBinary<fsub ,MRM5m, "subr", 0>;
+defm ADD : FPBinary<any_fadd, MRM0m, "add">;
+defm SUB : FPBinary<any_fsub, MRM4m, "sub">;
+defm SUBR: FPBinary<any_fsub ,MRM5m, "subr", 0>;
}
let SchedRW = [WriteFMulLd] in {
-defm MUL : FPBinary<fmul, MRM1m, "mul">;
+defm MUL : FPBinary<any_fmul, MRM1m, "mul">;
}
let SchedRW = [WriteFDivLd] in {
-defm DIV : FPBinary<fdiv, MRM6m, "div">;
-defm DIVR: FPBinary<fdiv, MRM7m, "divr", 0>;
+defm DIV : FPBinary<any_fdiv, MRM6m, "div">;
+defm DIVR: FPBinary<any_fdiv, MRM7m, "divr", 0>;
}
-} // Defs = [FPSW]
+} // Uses = [FPCW], mayRaiseFPException = 1
class FPST0rInst<Format fp, string asm>
: FPI<0xD8, fp, (outs), (ins RSTi:$op), asm>;
@@ -319,7 +319,7 @@ class FPrST0PInst<Format fp, string asm>
// NOTE: GAS and apparently all other AT&T style assemblers have a broken notion
// of some of the 'reverse' forms of the fsub and fdiv instructions. As such,
// we have to put some 'r's in and take them out of weird places.
-let SchedRW = [WriteFAdd], Defs = [FPSW], Uses = [FPCW] in {
+let SchedRW = [WriteFAdd], Uses = [FPCW], mayRaiseFPException = 1 in {
def ADD_FST0r : FPST0rInst <MRM0r, "fadd\t{$op, %st|st, $op}">;
def ADD_FrST0 : FPrST0Inst <MRM0r, "fadd\t{%st, $op|$op, st}">;
def ADD_FPrST0 : FPrST0PInst<MRM0r, "faddp\t{%st, $op|$op, st}">;
@@ -330,16 +330,16 @@ def SUB_FST0r : FPST0rInst <MRM4r, "fsub\t{$op, %st|st, $op}">;
def SUBR_FrST0 : FPrST0Inst <MRM4r, "fsub{|r}\t{%st, $op|$op, st}">;
def SUBR_FPrST0 : FPrST0PInst<MRM4r, "fsub{|r}p\t{%st, $op|$op, st}">;
} // SchedRW
-let SchedRW = [WriteFCom], Defs = [FPSW], Uses = [FPCW] in {
+let SchedRW = [WriteFCom], Uses = [FPCW], mayRaiseFPException = 1 in {
def COM_FST0r : FPST0rInst <MRM2r, "fcom\t$op">;
def COMP_FST0r : FPST0rInst <MRM3r, "fcomp\t$op">;
} // SchedRW
-let SchedRW = [WriteFMul], Defs = [FPSW], Uses = [FPCW] in {
+let SchedRW = [WriteFMul], Uses = [FPCW], mayRaiseFPException = 1 in {
def MUL_FST0r : FPST0rInst <MRM1r, "fmul\t{$op, %st|st, $op}">;
def MUL_FrST0 : FPrST0Inst <MRM1r, "fmul\t{%st, $op|$op, st}">;
def MUL_FPrST0 : FPrST0PInst<MRM1r, "fmulp\t{%st, $op|$op, st}">;
} // SchedRW
-let SchedRW = [WriteFDiv], Defs = [FPSW], Uses = [FPCW] in {
+let SchedRW = [WriteFDiv], Uses = [FPCW], mayRaiseFPException = 1 in {
def DIVR_FST0r : FPST0rInst <MRM7r, "fdivr\t{$op, %st|st, $op}">;
def DIV_FrST0 : FPrST0Inst <MRM7r, "fdiv{r}\t{%st, $op|$op, st}">;
def DIV_FPrST0 : FPrST0PInst<MRM7r, "fdiv{r}p\t{%st, $op|$op, st}">;
@@ -359,20 +359,14 @@ def _Fp80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src), OneArgFPRW,
def _F : FPI<0xD9, fp, (outs), (ins), asmstring>;
}
-let Defs = [FPSW], Uses = [FPCW] in {
-
let SchedRW = [WriteFSign] in {
defm CHS : FPUnary<fneg, MRM_E0, "fchs">;
defm ABS : FPUnary<fabs, MRM_E1, "fabs">;
}
+let Uses = [FPCW], mayRaiseFPException = 1 in {
let SchedRW = [WriteFSqrt80] in
-defm SQRT: FPUnary<fsqrt,MRM_FA, "fsqrt">;
-
-let SchedRW = [WriteMicrocoded] in {
-defm SIN : FPUnary<fsin, MRM_FE, "fsin">;
-defm COS : FPUnary<fcos, MRM_FF, "fcos">;
-}
+defm SQRT: FPUnary<any_fsqrt,MRM_FA, "fsqrt">;
let SchedRW = [WriteFCom] in {
let hasSideEffects = 0 in {
@@ -383,11 +377,11 @@ def TST_Fp80 : FpI_<(outs), (ins RFP80:$src), OneArgFP, []>;
def TST_F : FPI<0xD9, MRM_E4, (outs), (ins), "ftst">;
} // SchedRW
-} // Defs = [FPSW]
+} // Uses = [FPCW], mayRaiseFPException = 1
// Versions of FP instructions that take a single memory operand. Added for the
// disassembler; remove as they are included with patterns elsewhere.
-let SchedRW = [WriteFComLd], Defs = [FPSW], Uses = [FPCW] in {
+let SchedRW = [WriteFComLd], Uses = [FPCW], mayRaiseFPException = 1 in {
def FCOM32m : FPI<0xD8, MRM2m, (outs), (ins f32mem:$src), "fcom{s}\t$src">;
def FCOMP32m : FPI<0xD8, MRM3m, (outs), (ins f32mem:$src), "fcomp{s}\t$src">;
@@ -402,14 +396,21 @@ def FICOMP32m: FPI<0xDA, MRM3m, (outs), (ins i32mem:$src), "ficomp{l}\t$src">;
} // SchedRW
let SchedRW = [WriteMicrocoded] in {
+let Defs = [FPSW, FPCW] in {
def FLDENVm : FPI<0xD9, MRM4m, (outs), (ins f32mem:$src), "fldenv\t$src">;
-def FSTENVm : FPI<0xD9, MRM6m, (outs), (ins f32mem:$dst), "fnstenv\t$dst">;
-
def FRSTORm : FPI<0xDD, MRM4m, (outs), (ins f32mem:$dst), "frstor\t$dst">;
+}
+
+let Defs = [FPSW, FPCW], Uses = [FPSW, FPCW] in {
+def FSTENVm : FPI<0xD9, MRM6m, (outs), (ins f32mem:$dst), "fnstenv\t$dst">;
def FSAVEm : FPI<0xDD, MRM6m, (outs), (ins f32mem:$dst), "fnsave\t$dst">;
+}
+
+let Uses = [FPSW] in
def FNSTSWm : FPI<0xDD, MRM7m, (outs), (ins i16mem:$dst), "fnstsw\t$dst">;
def FBLDm : FPI<0xDF, MRM4m, (outs), (ins f80mem:$src), "fbld\t$src">;
+let Uses = [FPCW] ,mayRaiseFPException = 1 in
def FBSTPm : FPI<0xDF, MRM6m, (outs), (ins f80mem:$dst), "fbstp\t$dst">;
} // SchedRW
@@ -435,7 +436,6 @@ multiclass FPCMov<PatLeaf cc> {
Requires<[HasCMov]>;
}
-let Defs = [FPSW] in {
let SchedRW = [WriteFCMOV] in {
let Uses = [EFLAGS], Constraints = "$src1 = $dst" in {
defm CMOVB : FPCMov<X86_COND_B>;
@@ -469,6 +469,7 @@ def CMOVNP_F : FPI<0xDB, MRM3r, (outs), (ins RSTi:$op),
} // Predicates = [HasCMov]
} // SchedRW
+let mayRaiseFPException = 1 in {
// Floating point loads & stores.
let SchedRW = [WriteLoad], Uses = [FPCW] in {
let canFoldAsLoad = 1 in {
@@ -485,6 +486,7 @@ def LD_Fp64m80 : FpI_<(outs RFP80:$dst), (ins f64mem:$src), ZeroArgFP,
[(set RFP80:$dst, (f80 (extloadf64 addr:$src)))]>;
def LD_Fp32m80 : FpI_<(outs RFP80:$dst), (ins f32mem:$src), ZeroArgFP,
[(set RFP80:$dst, (f80 (extloadf32 addr:$src)))]>;
+let mayRaiseFPException = 0 in {
def ILD_Fp16m32: FpIf32<(outs RFP32:$dst), (ins i16mem:$src), ZeroArgFP,
[(set RFP32:$dst, (X86fild16 addr:$src))]>;
def ILD_Fp32m32: FpIf32<(outs RFP32:$dst), (ins i32mem:$src), ZeroArgFP,
@@ -503,6 +505,7 @@ def ILD_Fp32m80: FpI_<(outs RFP80:$dst), (ins i32mem:$src), ZeroArgFP,
[(set RFP80:$dst, (X86fild32 addr:$src))]>;
def ILD_Fp64m80: FpI_<(outs RFP80:$dst), (ins i64mem:$src), ZeroArgFP,
[(set RFP80:$dst, (X86fild64 addr:$src))]>;
+} // mayRaiseFPException = 0
} // SchedRW
let SchedRW = [WriteStore], Uses = [FPCW] in {
@@ -546,10 +549,12 @@ let mayLoad = 1, SchedRW = [WriteLoad], Uses = [FPCW] in {
def LD_F32m : FPI<0xD9, MRM0m, (outs), (ins f32mem:$src), "fld{s}\t$src">;
def LD_F64m : FPI<0xDD, MRM0m, (outs), (ins f64mem:$src), "fld{l}\t$src">;
def LD_F80m : FPI<0xDB, MRM5m, (outs), (ins f80mem:$src), "fld{t}\t$src">;
+let mayRaiseFPException = 0 in {
def ILD_F16m : FPI<0xDF, MRM0m, (outs), (ins i16mem:$src), "fild{s}\t$src">;
def ILD_F32m : FPI<0xDB, MRM0m, (outs), (ins i32mem:$src), "fild{l}\t$src">;
def ILD_F64m : FPI<0xDF, MRM5m, (outs), (ins i64mem:$src), "fild{ll}\t$src">;
}
+}
let mayStore = 1, SchedRW = [WriteStore], Uses = [FPCW] in {
def ST_F32m : FPI<0xD9, MRM2m, (outs), (ins f32mem:$dst), "fst{s}\t$dst">;
def ST_F64m : FPI<0xDD, MRM2m, (outs), (ins f64mem:$dst), "fst{l}\t$dst">;
@@ -621,7 +626,7 @@ def LD_F0 : FPI<0xD9, MRM_EE, (outs), (ins), "fldz">;
let SchedRW = [WriteFLD1], Uses = [FPCW] in
def LD_F1 : FPI<0xD9, MRM_E8, (outs), (ins), "fld1">;
-let SchedRW = [WriteFLDC], Uses = [FPCW] in {
+let SchedRW = [WriteFLDC], Defs = [FPSW], Uses = [FPCW] in {
def FLDL2T : I<0xD9, MRM_E9, (outs), (ins), "fldl2t", []>;
def FLDL2E : I<0xD9, MRM_EA, (outs), (ins), "fldl2e", []>;
def FLDPI : I<0xD9, MRM_EB, (outs), (ins), "fldpi", []>;
@@ -632,29 +637,44 @@ def FLDLN2 : I<0xD9, MRM_ED, (outs), (ins), "fldln2", []>;
// Floating point compares.
let SchedRW = [WriteFCom], Uses = [FPCW] in {
def UCOM_Fpr32 : FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP,
- [(set FPSW, (trunc (X86cmp RFP32:$lhs, RFP32:$rhs)))]>;
+ [(set FPSW, (trunc (X86any_fcmp RFP32:$lhs, RFP32:$rhs)))]>;
def UCOM_Fpr64 : FpIf64<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP,
- [(set FPSW, (trunc (X86cmp RFP64:$lhs, RFP64:$rhs)))]>;
+ [(set FPSW, (trunc (X86any_fcmp RFP64:$lhs, RFP64:$rhs)))]>;
def UCOM_Fpr80 : FpI_ <(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP,
- [(set FPSW, (trunc (X86cmp RFP80:$lhs, RFP80:$rhs)))]>;
+ [(set FPSW, (trunc (X86any_fcmp RFP80:$lhs, RFP80:$rhs)))]>;
+def COM_Fpr32 : FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP,
+ [(set FPSW, (trunc (X86strict_fcmps RFP32:$lhs, RFP32:$rhs)))]>;
+def COM_Fpr64 : FpIf64<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP,
+ [(set FPSW, (trunc (X86strict_fcmps RFP64:$lhs, RFP64:$rhs)))]>;
+def COM_Fpr80 : FpI_ <(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP,
+ [(set FPSW, (trunc (X86strict_fcmps RFP80:$lhs, RFP80:$rhs)))]>;
} // SchedRW
-} // Defs = [FPSW]
+} // mayRaiseFPException = 1
-let SchedRW = [WriteFCom] in {
+let SchedRW = [WriteFCom], mayRaiseFPException = 1 in {
// CC = ST(0) cmp ST(i)
-let Defs = [EFLAGS, FPSW], Uses = [FPCW] in {
+let Defs = [EFLAGS, FPCW], Uses = [FPCW] in {
def UCOM_FpIr32: FpI_<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP,
- [(set EFLAGS, (X86cmp RFP32:$lhs, RFP32:$rhs))]>,
+ [(set EFLAGS, (X86any_fcmp RFP32:$lhs, RFP32:$rhs))]>,
Requires<[FPStackf32, HasCMov]>;
def UCOM_FpIr64: FpI_<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP,
- [(set EFLAGS, (X86cmp RFP64:$lhs, RFP64:$rhs))]>,
+ [(set EFLAGS, (X86any_fcmp RFP64:$lhs, RFP64:$rhs))]>,
Requires<[FPStackf64, HasCMov]>;
def UCOM_FpIr80: FpI_<(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP,
- [(set EFLAGS, (X86cmp RFP80:$lhs, RFP80:$rhs))]>,
+ [(set EFLAGS, (X86any_fcmp RFP80:$lhs, RFP80:$rhs))]>,
+ Requires<[HasCMov]>;
+def COM_FpIr32: FpI_<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP,
+ [(set EFLAGS, (X86strict_fcmps RFP32:$lhs, RFP32:$rhs))]>,
+ Requires<[FPStackf32, HasCMov]>;
+def COM_FpIr64: FpI_<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP,
+ [(set EFLAGS, (X86strict_fcmps RFP64:$lhs, RFP64:$rhs))]>,
+ Requires<[FPStackf64, HasCMov]>;
+def COM_FpIr80: FpI_<(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP,
+ [(set EFLAGS, (X86strict_fcmps RFP80:$lhs, RFP80:$rhs))]>,
Requires<[HasCMov]>;
}
-let Defs = [FPSW], Uses = [ST0, FPCW] in {
+let Uses = [ST0, FPCW] in {
def UCOM_Fr : FPI<0xDD, MRM4r, // FPSW = cmp ST(0) with ST(i)
(outs), (ins RSTi:$reg), "fucom\t$reg">;
def UCOM_FPr : FPI<0xDD, MRM5r, // FPSW = cmp ST(0) with ST(i), pop
@@ -678,7 +698,7 @@ def COM_FIPr : FPI<0xDF, MRM6r, (outs), (ins RSTi:$reg),
// Floating point flag ops.
let SchedRW = [WriteALU] in {
-let Defs = [AX], Uses = [FPSW] in
+let Defs = [AX, FPSW], Uses = [FPSW] in
def FNSTSW16r : I<0xDF, MRM_E0, // AX = fp flags
(outs), (ins), "fnstsw\t{%ax|ax}",
[(set AX, (X86fp_stsw FPSW))]>;
@@ -694,51 +714,61 @@ def FLDCW16m : I<0xD9, MRM5m, // X87 control world = [mem16]
// FPU control instructions
let SchedRW = [WriteMicrocoded] in {
-let Defs = [FPSW] in {
-def FNINIT : I<0xDB, MRM_E3, (outs), (ins), "fninit", []>;
def FFREE : FPI<0xDD, MRM0r, (outs), (ins RSTi:$reg), "ffree\t$reg">;
def FFREEP : FPI<0xDF, MRM0r, (outs), (ins RSTi:$reg), "ffreep\t$reg">;
+let Defs = [FPSW, FPCW] in
+def FNINIT : I<0xDB, MRM_E3, (outs), (ins), "fninit", []>;
// Clear exceptions
+let Defs = [FPSW] in
def FNCLEX : I<0xDB, MRM_E2, (outs), (ins), "fnclex", []>;
-} // Defs = [FPSW]
} // SchedRW
// Operand-less floating-point instructions for the disassembler.
+let Defs = [FPSW] in
def FNOP : I<0xD9, MRM_D0, (outs), (ins), "fnop", []>, Sched<[WriteNop]>;
let SchedRW = [WriteMicrocoded] in {
let Defs = [FPSW] in {
def WAIT : I<0x9B, RawFrm, (outs), (ins), "wait", []>;
def FXAM : I<0xD9, MRM_E5, (outs), (ins), "fxam", []>;
+def FDECSTP : I<0xD9, MRM_F6, (outs), (ins), "fdecstp", []>;
+def FINCSTP : I<0xD9, MRM_F7, (outs), (ins), "fincstp", []>;
+let Uses = [FPCW], mayRaiseFPException = 1 in {
def F2XM1 : I<0xD9, MRM_F0, (outs), (ins), "f2xm1", []>;
def FYL2X : I<0xD9, MRM_F1, (outs), (ins), "fyl2x", []>;
def FPTAN : I<0xD9, MRM_F2, (outs), (ins), "fptan", []>;
def FPATAN : I<0xD9, MRM_F3, (outs), (ins), "fpatan", []>;
def FXTRACT : I<0xD9, MRM_F4, (outs), (ins), "fxtract", []>;
def FPREM1 : I<0xD9, MRM_F5, (outs), (ins), "fprem1", []>;
-def FDECSTP : I<0xD9, MRM_F6, (outs), (ins), "fdecstp", []>;
-def FINCSTP : I<0xD9, MRM_F7, (outs), (ins), "fincstp", []>;
def FPREM : I<0xD9, MRM_F8, (outs), (ins), "fprem", []>;
def FYL2XP1 : I<0xD9, MRM_F9, (outs), (ins), "fyl2xp1", []>;
+def FSIN : I<0xD9, MRM_FE, (outs), (ins), "fsin", []>;
+def FCOS : I<0xD9, MRM_FF, (outs), (ins), "fcos", []>;
def FSINCOS : I<0xD9, MRM_FB, (outs), (ins), "fsincos", []>;
def FRNDINT : I<0xD9, MRM_FC, (outs), (ins), "frndint", []>;
def FSCALE : I<0xD9, MRM_FD, (outs), (ins), "fscale", []>;
def FCOMPP : I<0xDE, MRM_D9, (outs), (ins), "fcompp", []>;
+} // Uses = [FPCW], mayRaiseFPException = 1
} // Defs = [FPSW]
+let Uses = [FPSW, FPCW] in {
def FXSAVE : I<0xAE, MRM0m, (outs), (ins opaquemem:$dst),
"fxsave\t$dst", [(int_x86_fxsave addr:$dst)]>, TB,
Requires<[HasFXSR]>;
def FXSAVE64 : RI<0xAE, MRM0m, (outs), (ins opaquemem:$dst),
"fxsave64\t$dst", [(int_x86_fxsave64 addr:$dst)]>,
TB, Requires<[HasFXSR, In64BitMode]>;
+} // Uses = [FPSW, FPCW]
+
+let Defs = [FPSW, FPCW] in {
def FXRSTOR : I<0xAE, MRM1m, (outs), (ins opaquemem:$src),
"fxrstor\t$src", [(int_x86_fxrstor addr:$src)]>,
TB, Requires<[HasFXSR]>;
def FXRSTOR64 : RI<0xAE, MRM1m, (outs), (ins opaquemem:$src),
"fxrstor64\t$src", [(int_x86_fxrstor64 addr:$src)]>,
TB, Requires<[HasFXSR, In64BitMode]>;
+} // Defs = [FPSW, FPCW]
} // SchedRW
//===----------------------------------------------------------------------===//
@@ -747,7 +777,10 @@ def FXRSTOR64 : RI<0xAE, MRM1m, (outs), (ins opaquemem:$src),
// Required for RET of f32 / f64 / f80 values.
def : Pat<(X86fldf32 addr:$src), (LD_Fp32m addr:$src)>;
+def : Pat<(X86fldf32 addr:$src), (LD_Fp32m64 addr:$src)>;
def : Pat<(X86fldf64 addr:$src), (LD_Fp64m addr:$src)>;
+def : Pat<(X86fldf32 addr:$src), (LD_Fp32m80 addr:$src)>;
+def : Pat<(X86fldf64 addr:$src), (LD_Fp64m80 addr:$src)>;
def : Pat<(X86fldf80 addr:$src), (LD_Fp80m addr:$src)>;
// Required for CALL which return f32 / f64 / f80 values.
@@ -775,19 +808,19 @@ def : Pat<(X86fist64 RFP80:$src, addr:$op), (IST_Fp64m80 addr:$op, RFP80:$src)>;
// FP extensions map onto simple pseudo-value conversions if they are to/from
// the FP stack.
-def : Pat<(f64 (fpextend RFP32:$src)), (COPY_TO_REGCLASS RFP32:$src, RFP64)>,
+def : Pat<(f64 (any_fpextend RFP32:$src)), (COPY_TO_REGCLASS RFP32:$src, RFP64)>,
Requires<[FPStackf32]>;
-def : Pat<(f80 (fpextend RFP32:$src)), (COPY_TO_REGCLASS RFP32:$src, RFP80)>,
+def : Pat<(f80 (any_fpextend RFP32:$src)), (COPY_TO_REGCLASS RFP32:$src, RFP80)>,
Requires<[FPStackf32]>;
-def : Pat<(f80 (fpextend RFP64:$src)), (COPY_TO_REGCLASS RFP64:$src, RFP80)>,
+def : Pat<(f80 (any_fpextend RFP64:$src)), (COPY_TO_REGCLASS RFP64:$src, RFP80)>,
Requires<[FPStackf64]>;
// FP truncations map onto simple pseudo-value conversions if they are to/from
// the FP stack. We have validated that only value-preserving truncations make
// it through isel.
-def : Pat<(f32 (fpround RFP64:$src)), (COPY_TO_REGCLASS RFP64:$src, RFP32)>,
+def : Pat<(f32 (any_fpround RFP64:$src)), (COPY_TO_REGCLASS RFP64:$src, RFP32)>,
Requires<[FPStackf32]>;
-def : Pat<(f32 (fpround RFP80:$src)), (COPY_TO_REGCLASS RFP80:$src, RFP32)>,
+def : Pat<(f32 (any_fpround RFP80:$src)), (COPY_TO_REGCLASS RFP80:$src, RFP32)>,
Requires<[FPStackf32]>;
-def : Pat<(f64 (fpround RFP80:$src)), (COPY_TO_REGCLASS RFP80:$src, RFP64)>,
+def : Pat<(f64 (any_fpround RFP80:$src)), (COPY_TO_REGCLASS RFP80:$src, RFP64)>,
Requires<[FPStackf64]>;
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFormats.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFormats.td
index e8f0d937dff4..2f797fcfb8de 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFormats.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFormats.td
@@ -227,6 +227,7 @@ class EVEX_V512 { bit hasEVEX_L2 = 1; bit hasVEX_L = 0; }
class EVEX_V256 { bit hasEVEX_L2 = 0; bit hasVEX_L = 1; }
class EVEX_V128 { bit hasEVEX_L2 = 0; bit hasVEX_L = 0; }
class NOTRACK { bit hasNoTrackPrefix = 1; }
+class SIMD_EXC { list<Register> Uses = [MXCSR]; bit mayRaiseFPException = 1; }
// Specify AVX512 8-bit compressed displacement encoding based on the vector
// element size in bits (8, 16, 32, 64) and the CDisp8 form.
@@ -441,12 +442,15 @@ class Ii32PCRel<bits<8> o, Format f, dag outs, dag ins, string asm,
// FPStack Instruction Templates:
// FPI - Floating Point Instruction template.
class FPI<bits<8> o, Format F, dag outs, dag ins, string asm>
- : I<o, F, outs, ins, asm, []> {}
+ : I<o, F, outs, ins, asm, []> {
+ let Defs = [FPSW];
+}
// FpI_ - Floating Point Pseudo Instruction template. Not Predicated.
class FpI_<dag outs, dag ins, FPFormat fp, list<dag> pattern>
: PseudoI<outs, ins, pattern> {
let FPForm = fp;
+ let Defs = [FPSW];
}
// Templates for instructions that use a 16- or 32-bit segmented address as
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
index de6f8a81dff6..3250123e5aa6 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -127,11 +127,32 @@ def X86vfpext : SDNode<"X86ISD::VFPEXT",
SDTypeProfile<1, 1, [SDTCVecEltisVT<0, f64>,
SDTCVecEltisVT<1, f32>,
SDTCisSameSizeAs<0, 1>]>>;
+
+def X86strict_vfpext : SDNode<"X86ISD::STRICT_VFPEXT",
+ SDTypeProfile<1, 1, [SDTCVecEltisVT<0, f64>,
+ SDTCVecEltisVT<1, f32>,
+ SDTCisSameSizeAs<0, 1>]>,
+ [SDNPHasChain]>;
+
+def X86any_vfpext : PatFrags<(ops node:$src),
+ [(X86strict_vfpext node:$src),
+ (X86vfpext node:$src)]>;
+
def X86vfpround: SDNode<"X86ISD::VFPROUND",
SDTypeProfile<1, 1, [SDTCVecEltisVT<0, f32>,
SDTCVecEltisVT<1, f64>,
SDTCisOpSmallerThanOp<0, 1>]>>;
+def X86strict_vfpround: SDNode<"X86ISD::STRICT_VFPROUND",
+ SDTypeProfile<1, 1, [SDTCVecEltisVT<0, f32>,
+ SDTCVecEltisVT<1, f64>,
+ SDTCisOpSmallerThanOp<0, 1>]>,
+ [SDNPHasChain]>;
+
+def X86any_vfpround : PatFrags<(ops node:$src),
+ [(X86strict_vfpround node:$src),
+ (X86vfpround node:$src)]>;
+
def X86frounds : SDNode<"X86ISD::VFPROUNDS",
SDTypeProfile<1, 2, [SDTCVecEltisVT<0, f32>,
SDTCisSameAs<0, 1>,
@@ -169,10 +190,15 @@ def X86vshiftimm : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
def X86vshldq : SDNode<"X86ISD::VSHLDQ", X86vshiftimm>;
def X86vshrdq : SDNode<"X86ISD::VSRLDQ", X86vshiftimm>;
-def X86cmpp : SDNode<"X86ISD::CMPP", SDTX86VFCMP>;
def X86pcmpeq : SDNode<"X86ISD::PCMPEQ", SDTIntBinOp, [SDNPCommutative]>;
def X86pcmpgt : SDNode<"X86ISD::PCMPGT", SDTIntBinOp>;
+def X86cmpp : SDNode<"X86ISD::CMPP", SDTX86VFCMP>;
+def X86strict_cmpp : SDNode<"X86ISD::STRICT_CMPP", SDTX86VFCMP, [SDNPHasChain]>;
+def X86any_cmpp : PatFrags<(ops node:$src1, node:$src2, node:$src3),
+ [(X86strict_cmpp node:$src1, node:$src2, node:$src3),
+ (X86cmpp node:$src1, node:$src2, node:$src3)]>;
+
def X86CmpMaskCC :
SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCVecEltisVT<0, i1>,
SDTCisVec<1>, SDTCisSameAs<2, 1>,
@@ -182,6 +208,10 @@ def X86CmpMaskCCScalar :
SDTCisVT<3, i8>]>;
def X86cmpm : SDNode<"X86ISD::CMPM", X86CmpMaskCC>;
+def X86strict_cmpm : SDNode<"X86ISD::STRICT_CMPM", X86CmpMaskCC, [SDNPHasChain]>;
+def X86any_cmpm : PatFrags<(ops node:$src1, node:$src2, node:$src3),
+ [(X86strict_cmpm node:$src1, node:$src2, node:$src3),
+ (X86cmpm node:$src1, node:$src2, node:$src3)]>;
def X86cmpmSAE : SDNode<"X86ISD::CMPM_SAE", X86CmpMaskCC>;
def X86cmpms : SDNode<"X86ISD::FSETCCM", X86CmpMaskCCScalar>;
def X86cmpmsSAE : SDNode<"X86ISD::FSETCCM_SAE", X86CmpMaskCCScalar>;
@@ -436,6 +466,12 @@ def X86VRangeSAE : SDNode<"X86ISD::VRANGE_SAE", SDTFPBinOpImm>;
def X86VReduce : SDNode<"X86ISD::VREDUCE", SDTFPUnaryOpImm>;
def X86VReduceSAE : SDNode<"X86ISD::VREDUCE_SAE", SDTFPUnaryOpImm>;
def X86VRndScale : SDNode<"X86ISD::VRNDSCALE", SDTFPUnaryOpImm>;
+def X86strict_VRndScale : SDNode<"X86ISD::STRICT_VRNDSCALE", SDTFPUnaryOpImm,
+ [SDNPHasChain]>;
+def X86any_VRndScale : PatFrags<(ops node:$src1, node:$src2),
+ [(X86strict_VRndScale node:$src1, node:$src2),
+ (X86VRndScale node:$src1, node:$src2)]>;
+
def X86VRndScaleSAE: SDNode<"X86ISD::VRNDSCALE_SAE", SDTFPUnaryOpImm>;
def X86VGetMant : SDNode<"X86ISD::VGETMANT", SDTFPUnaryOpImm>;
def X86VGetMantSAE : SDNode<"X86ISD::VGETMANT_SAE", SDTFPUnaryOpImm>;
@@ -493,7 +529,11 @@ def X86fgetexpSAE : SDNode<"X86ISD::FGETEXP_SAE", SDTFPUnaryOp>;
def X86fgetexps : SDNode<"X86ISD::FGETEXPS", SDTFPBinOp>;
def X86fgetexpSAEs : SDNode<"X86ISD::FGETEXPS_SAE", SDTFPBinOp>;
-def X86Fmadd : SDNode<"ISD::FMA", SDTFPTernaryOp, [SDNPCommutative]>;
+def X86Fmadd : SDNode<"ISD::FMA", SDTFPTernaryOp, [SDNPCommutative]>;
+def X86strict_Fmadd : SDNode<"ISD::STRICT_FMA", SDTFPTernaryOp, [SDNPCommutative, SDNPHasChain]>;
+def X86any_Fmadd : PatFrags<(ops node:$src1, node:$src2, node:$src3),
+ [(X86strict_Fmadd node:$src1, node:$src2, node:$src3),
+ (X86Fmadd node:$src1, node:$src2, node:$src3)]>;
def X86Fnmadd : SDNode<"X86ISD::FNMADD", SDTFPTernaryOp, [SDNPCommutative]>;
def X86Fmsub : SDNode<"X86ISD::FMSUB", SDTFPTernaryOp, [SDNPCommutative]>;
def X86Fnmsub : SDNode<"X86ISD::FNMSUB", SDTFPTernaryOp, [SDNPCommutative]>;
@@ -621,9 +661,26 @@ def X86cvtp2UIntRnd : SDNode<"X86ISD::CVTP2UI_RND", SDTFloatToIntRnd>;
// cvtt fp-to-int staff
def X86cvttp2si : SDNode<"X86ISD::CVTTP2SI", SDTFloatToInt>;
def X86cvttp2ui : SDNode<"X86ISD::CVTTP2UI", SDTFloatToInt>;
+def X86strict_cvttp2si : SDNode<"X86ISD::STRICT_CVTTP2SI", SDTFloatToInt, [SDNPHasChain]>;
+def X86strict_cvttp2ui : SDNode<"X86ISD::STRICT_CVTTP2UI", SDTFloatToInt, [SDNPHasChain]>;
+def X86any_cvttp2si : PatFrags<(ops node:$src),
+ [(X86strict_cvttp2si node:$src),
+ (X86cvttp2si node:$src)]>;
+def X86any_cvttp2ui : PatFrags<(ops node:$src),
+ [(X86strict_cvttp2ui node:$src),
+ (X86cvttp2ui node:$src)]>;
def X86VSintToFP : SDNode<"X86ISD::CVTSI2P", SDTVintToFP>;
def X86VUintToFP : SDNode<"X86ISD::CVTUI2P", SDTVintToFP>;
+def X86strict_VSintToFP : SDNode<"X86ISD::STRICT_CVTSI2P", SDTVintToFP, [SDNPHasChain]>;
+def X86strict_VUintToFP : SDNode<"X86ISD::STRICT_CVTUI2P", SDTVintToFP, [SDNPHasChain]>;
+def X86any_VSintToFP : PatFrags<(ops node:$src),
+ [(X86strict_VSintToFP node:$src),
+ (X86VSintToFP node:$src)]>;
+def X86any_VUintToFP : PatFrags<(ops node:$src),
+ [(X86strict_VUintToFP node:$src),
+ (X86VUintToFP node:$src)]>;
+
// cvt int-to-fp staff
def X86cvtp2Int : SDNode<"X86ISD::CVTP2SI", SDTFloatToInt>;
@@ -706,6 +763,10 @@ def X86GF2P8affineinvqb : SDNode<"X86ISD::GF2P8AFFINEINVQB", SDTBlend>;
def X86GF2P8affineqb : SDNode<"X86ISD::GF2P8AFFINEQB", SDTBlend>;
def X86GF2P8mulb : SDNode<"X86ISD::GF2P8MULB", SDTIntBinOp>;
+def SDTX86MaskedStore: SDTypeProfile<0, 3, [ // masked store
+ SDTCisVec<0>, SDTCisPtrTy<1>, SDTCisVec<2>, SDTCisSameNumEltsAs<0, 2>
+]>;
+
//===----------------------------------------------------------------------===//
// SSE Complex Patterns
//===----------------------------------------------------------------------===//
@@ -1040,9 +1101,10 @@ def vinsert256_insert : PatFrag<(ops node:$bigvec, node:$smallvec,
INSERT_get_vinsert256_imm>;
def masked_load : PatFrag<(ops node:$src1, node:$src2, node:$src3),
- (masked_ld node:$src1, node:$src2, node:$src3), [{
+ (masked_ld node:$src1, undef, node:$src2, node:$src3), [{
return !cast<MaskedLoadSDNode>(N)->isExpandingLoad() &&
- cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD;
+ cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD &&
+ cast<MaskedLoadSDNode>(N)->isUnindexed();
}]>;
def masked_load_aligned : PatFrag<(ops node:$src1, node:$src2, node:$src3),
@@ -1055,17 +1117,19 @@ def masked_load_aligned : PatFrag<(ops node:$src1, node:$src2, node:$src3),
}]>;
def X86mExpandingLoad : PatFrag<(ops node:$src1, node:$src2, node:$src3),
- (masked_ld node:$src1, node:$src2, node:$src3), [{
- return cast<MaskedLoadSDNode>(N)->isExpandingLoad();
+ (masked_ld node:$src1, undef, node:$src2, node:$src3), [{
+ return cast<MaskedLoadSDNode>(N)->isExpandingLoad() &&
+ cast<MaskedLoadSDNode>(N)->isUnindexed();
}]>;
// Masked store fragments.
// X86mstore can't be implemented in core DAG files because some targets
// do not support vector types (llvm-tblgen will fail).
def masked_store : PatFrag<(ops node:$src1, node:$src2, node:$src3),
- (masked_st node:$src1, node:$src2, node:$src3), [{
- return (!cast<MaskedStoreSDNode>(N)->isTruncatingStore()) &&
- (!cast<MaskedStoreSDNode>(N)->isCompressingStore());
+ (masked_st node:$src1, node:$src2, undef, node:$src3), [{
+ return !cast<MaskedStoreSDNode>(N)->isTruncatingStore() &&
+ !cast<MaskedStoreSDNode>(N)->isCompressingStore() &&
+ cast<MaskedStoreSDNode>(N)->isUnindexed();
}]>;
def masked_store_aligned : PatFrag<(ops node:$src1, node:$src2, node:$src3),
@@ -1078,16 +1142,18 @@ def masked_store_aligned : PatFrag<(ops node:$src1, node:$src2, node:$src3),
}]>;
def X86mCompressingStore : PatFrag<(ops node:$src1, node:$src2, node:$src3),
- (masked_st node:$src1, node:$src2, node:$src3), [{
- return cast<MaskedStoreSDNode>(N)->isCompressingStore();
+ (masked_st node:$src1, node:$src2, undef, node:$src3), [{
+ return cast<MaskedStoreSDNode>(N)->isCompressingStore() &&
+ cast<MaskedStoreSDNode>(N)->isUnindexed();
}]>;
// masked truncstore fragments
// X86mtruncstore can't be implemented in core DAG files because some targets
// doesn't support vector type ( llvm-tblgen will fail)
def X86mtruncstore : PatFrag<(ops node:$src1, node:$src2, node:$src3),
- (masked_st node:$src1, node:$src2, node:$src3), [{
- return cast<MaskedStoreSDNode>(N)->isTruncatingStore();
+ (masked_st node:$src1, node:$src2, undef, node:$src3), [{
+ return cast<MaskedStoreSDNode>(N)->isTruncatingStore() &&
+ cast<MaskedStoreSDNode>(N)->isUnindexed();
}]>;
def masked_truncstorevi8 :
PatFrag<(ops node:$src1, node:$src2, node:$src3),
@@ -1111,10 +1177,10 @@ def X86TruncSStore : SDNode<"X86ISD::VTRUNCSTORES", SDTStore,
def X86TruncUSStore : SDNode<"X86ISD::VTRUNCSTOREUS", SDTStore,
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
-def X86MTruncSStore : SDNode<"X86ISD::VMTRUNCSTORES", SDTMaskedStore,
+def X86MTruncSStore : SDNode<"X86ISD::VMTRUNCSTORES", SDTX86MaskedStore,
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
-def X86MTruncUSStore : SDNode<"X86ISD::VMTRUNCSTOREUS", SDTMaskedStore,
+def X86MTruncUSStore : SDNode<"X86ISD::VMTRUNCSTOREUS", SDTX86MaskedStore,
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
def truncstore_s_vi8 : PatFrag<(ops node:$val, node:$ptr),
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.cpp
index aa75e6a85a0c..245346d82731 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -1761,10 +1761,11 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
case X86::VCMPPSZ128rrik:
case X86::VCMPPDZ256rrik:
case X86::VCMPPSZ256rrik: {
- unsigned Imm = MI.getOperand(MI.getNumOperands() - 1).getImm() & 0x1f;
+ unsigned Imm =
+ MI.getOperand(MI.getNumExplicitOperands() - 1).getImm() & 0x1f;
Imm = X86::getSwappedVCMPImm(Imm);
auto &WorkingMI = cloneIfNew(MI);
- WorkingMI.getOperand(MI.getNumOperands() - 1).setImm(Imm);
+ WorkingMI.getOperand(MI.getNumExplicitOperands() - 1).setImm(Imm);
return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
OpIdx1, OpIdx2);
}
@@ -2963,8 +2964,8 @@ static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg,
void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- const DebugLoc &DL, unsigned DestReg,
- unsigned SrcReg, bool KillSrc) const {
+ const DebugLoc &DL, MCRegister DestReg,
+ MCRegister SrcReg, bool KillSrc) const {
// First deal with the normal symmetric copies.
bool HasAVX = Subtarget.hasAVX();
bool HasVLX = Subtarget.hasVLX();
@@ -3046,15 +3047,11 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
report_fatal_error("Cannot emit physreg copy instruction");
}
-bool X86InstrInfo::isCopyInstrImpl(const MachineInstr &MI,
- const MachineOperand *&Src,
- const MachineOperand *&Dest) const {
- if (MI.isMoveReg()) {
- Dest = &MI.getOperand(0);
- Src = &MI.getOperand(1);
- return true;
- }
- return false;
+Optional<DestSourcePair>
+X86InstrInfo::isCopyInstrImpl(const MachineInstr &MI) const {
+ if (MI.isMoveReg())
+ return DestSourcePair{MI.getOperand(0), MI.getOperand(1)};
+ return None;
}
static unsigned getLoadStoreRegOpcode(unsigned Reg,
@@ -3221,8 +3218,9 @@ bool X86InstrInfo::getMemOperandWithOffset(
Offset = DispMO.getImm();
- assert(BaseOp->isReg() && "getMemOperandWithOffset only supports base "
- "operands of type register.");
+ if (!BaseOp->isReg())
+ return false;
+
return true;
}
@@ -3963,9 +3961,7 @@ static bool ExpandMOVImmSExti8(MachineInstrBuilder &MIB,
MachineFunction &MF = *MBB.getParent();
const X86FrameLowering *TFL = Subtarget.getFrameLowering();
bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
- bool NeedsDwarfCFI =
- !IsWin64Prologue &&
- (MF.getMMI().hasDebugInfo() || MF.getFunction().needsUnwindTableEntry());
+ bool NeedsDwarfCFI = !IsWin64Prologue && MF.needsFrameMoves();
bool EmitCFI = !TFL->hasFP(MF) && NeedsDwarfCFI;
if (EmitCFI) {
TFL->BuildCFI(MBB, I, DL,
@@ -4708,6 +4704,10 @@ static MachineInstr *FuseInst(MachineFunction &MF, unsigned Opcode,
updateOperandRegConstraints(MF, *NewMI, TII);
+ // Copy the NoFPExcept flag from the instruction we're fusing.
+ if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept))
+ NewMI->setFlag(MachineInstr::MIFlag::NoFPExcept);
+
MachineBasicBlock *MBB = InsertPt->getParent();
MBB->insert(InsertPt, NewMI);
@@ -7233,8 +7233,8 @@ bool X86InstrInfo::hasHighOperandLatency(const TargetSchedModel &SchedModel,
bool X86InstrInfo::hasReassociableOperands(const MachineInstr &Inst,
const MachineBasicBlock *MBB) const {
- assert((Inst.getNumOperands() == 3 || Inst.getNumOperands() == 4) &&
- "Reassociation needs binary operators");
+ assert(Inst.getNumExplicitOperands() == 3 && Inst.getNumExplicitDefs() == 1 &&
+ Inst.getNumDefs() <= 2 && "Reassociation needs binary operators");
// Integer binary math/logic instructions have a third source operand:
// the EFLAGS register. That operand must be both defined here and never
@@ -7242,13 +7242,11 @@ bool X86InstrInfo::hasReassociableOperands(const MachineInstr &Inst,
// not change anything because rearranging the operands could affect other
// instructions that depend on the exact status flags (zero, sign, etc.)
// that are set by using these particular operands with this operation.
- if (Inst.getNumOperands() == 4) {
- assert(Inst.getOperand(3).isReg() &&
- Inst.getOperand(3).getReg() == X86::EFLAGS &&
- "Unexpected operand in reassociable instruction");
- if (!Inst.getOperand(3).isDead())
- return false;
- }
+ const MachineOperand *FlagDef = Inst.findRegisterDefOperand(X86::EFLAGS);
+ assert((Inst.getNumDefs() == 1 || FlagDef) &&
+ "Implicit def isn't flags?");
+ if (FlagDef && !FlagDef->isDead())
+ return false;
return TargetInstrInfo::hasReassociableOperands(Inst, MBB);
}
@@ -7558,15 +7556,57 @@ bool X86InstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst) const {
}
}
+/// If \p DescribedReg overlaps with the MOVrr instruction's destination
+/// register then, if possible, describe the value in terms of the source
+/// register.
+static Optional<ParamLoadedValue>
+describeMOVrrLoadedValue(const MachineInstr &MI, Register DescribedReg,
+ const TargetRegisterInfo *TRI) {
+ Register DestReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
+
+ auto Expr = DIExpression::get(MI.getMF()->getFunction().getContext(), {});
+
+ // If the described register is the destination, just return the source.
+ if (DestReg == DescribedReg)
+ return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr);
+
+ // If the described register is a sub-register of the destination register,
+ // then pick out the source register's corresponding sub-register.
+ if (unsigned SubRegIdx = TRI->getSubRegIndex(DestReg, DescribedReg)) {
+ unsigned SrcSubReg = TRI->getSubReg(SrcReg, SubRegIdx);
+ return ParamLoadedValue(MachineOperand::CreateReg(SrcSubReg, false), Expr);
+ }
+
+ // The remaining case to consider is when the described register is a
+ // super-register of the destination register. MOV8rr and MOV16rr does not
+ // write to any of the other bytes in the register, meaning that we'd have to
+ // describe the value using a combination of the source register and the
+ // non-overlapping bits in the described register, which is not currently
+ // possible.
+ if (MI.getOpcode() == X86::MOV8rr || MI.getOpcode() == X86::MOV16rr ||
+ !TRI->isSuperRegister(DestReg, DescribedReg))
+ return None;
+
+ assert(MI.getOpcode() == X86::MOV32rr && "Unexpected super-register case");
+ return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr);
+}
+
Optional<ParamLoadedValue>
-X86InstrInfo::describeLoadedValue(const MachineInstr &MI) const {
+X86InstrInfo::describeLoadedValue(const MachineInstr &MI, Register Reg) const {
const MachineOperand *Op = nullptr;
DIExpression *Expr = nullptr;
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+
switch (MI.getOpcode()) {
case X86::LEA32r:
case X86::LEA64r:
case X86::LEA64_32r: {
+ // We may need to describe a 64-bit parameter with a 32-bit LEA.
+ if (!TRI->isSuperRegisterEq(MI.getOperand(0).getReg(), Reg))
+ return None;
+
// Operand 4 could be global address. For now we do not support
// such situation.
if (!MI.getOperand(4).isImm() || !MI.getOperand(2).isImm())
@@ -7574,7 +7614,6 @@ X86InstrInfo::describeLoadedValue(const MachineInstr &MI) const {
const MachineOperand &Op1 = MI.getOperand(1);
const MachineOperand &Op2 = MI.getOperand(3);
- const TargetRegisterInfo *TRI = &getRegisterInfo();
assert(Op2.isReg() && (Op2.getReg() == X86::NoRegister ||
Register::isPhysicalRegister(Op2.getReg())));
@@ -7638,13 +7677,56 @@ X86InstrInfo::describeLoadedValue(const MachineInstr &MI) const {
return ParamLoadedValue(*Op, Expr);;
}
+ case X86::MOV32ri:
+ case X86::MOV64ri:
+ case X86::MOV64ri32:
+ // MOV32ri may be used for producing zero-extended 32-bit immediates in
+ // 64-bit parameters, so we need to consider super-registers.
+ if (!TRI->isSuperRegisterEq(MI.getOperand(0).getReg(), Reg))
+ return None;
+ return ParamLoadedValue(MI.getOperand(1), Expr);
+ case X86::MOV8rr:
+ case X86::MOV16rr:
+ case X86::MOV32rr:
+ case X86::MOV64rr:
+ return describeMOVrrLoadedValue(MI, Reg, TRI);
case X86::XOR32rr: {
+ // 64-bit parameters are zero-materialized using XOR32rr, so also consider
+ // super-registers.
+ if (!TRI->isSuperRegisterEq(MI.getOperand(0).getReg(), Reg))
+ return None;
if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg())
return ParamLoadedValue(MachineOperand::CreateImm(0), Expr);
return None;
}
+ case X86::MOVSX64rr32: {
+ // We may need to describe the lower 32 bits of the MOVSX; for example, in
+ // cases like this:
+ //
+ // $ebx = [...]
+ // $rdi = MOVSX64rr32 $ebx
+ // $esi = MOV32rr $edi
+ if (!TRI->isSubRegisterEq(MI.getOperand(0).getReg(), Reg))
+ return None;
+
+ Expr = DIExpression::get(MI.getMF()->getFunction().getContext(), {});
+
+ // If the described register is the destination register we need to
+ // sign-extend the source register from 32 bits. The other case we handle
+ // is when the described register is the 32-bit sub-register of the
+ // destination register, in case we just need to return the source
+ // register.
+ if (Reg == MI.getOperand(0).getReg())
+ Expr = DIExpression::appendExt(Expr, 32, 64, true);
+ else
+ assert(X86MCRegisterClasses[X86::GR32RegClassID].contains(Reg) &&
+ "Unhandled sub-register case for MOVSX64rr32");
+
+ return ParamLoadedValue(MI.getOperand(1), Expr);
+ }
default:
- return TargetInstrInfo::describeLoadedValue(MI);
+ assert(!MI.isMoveImmediate() && "Unexpected MoveImm instruction");
+ return TargetInstrInfo::describeLoadedValue(MI, Reg);
}
}
@@ -7654,38 +7736,31 @@ void X86InstrInfo::setSpecialOperandAttr(MachineInstr &OldMI1,
MachineInstr &OldMI2,
MachineInstr &NewMI1,
MachineInstr &NewMI2) const {
- // Integer instructions define an implicit EFLAGS source register operand as
- // the third source (fourth total) operand.
- if (OldMI1.getNumOperands() != 4 || OldMI2.getNumOperands() != 4)
- return;
+ // Integer instructions may define an implicit EFLAGS dest register operand.
+ MachineOperand *OldFlagDef1 = OldMI1.findRegisterDefOperand(X86::EFLAGS);
+ MachineOperand *OldFlagDef2 = OldMI2.findRegisterDefOperand(X86::EFLAGS);
- assert(NewMI1.getNumOperands() == 4 && NewMI2.getNumOperands() == 4 &&
+ assert(!OldFlagDef1 == !OldFlagDef2 &&
"Unexpected instruction type for reassociation");
- MachineOperand &OldOp1 = OldMI1.getOperand(3);
- MachineOperand &OldOp2 = OldMI2.getOperand(3);
- MachineOperand &NewOp1 = NewMI1.getOperand(3);
- MachineOperand &NewOp2 = NewMI2.getOperand(3);
+ if (!OldFlagDef1 || !OldFlagDef2)
+ return;
- assert(OldOp1.isReg() && OldOp1.getReg() == X86::EFLAGS && OldOp1.isDead() &&
- "Must have dead EFLAGS operand in reassociable instruction");
- assert(OldOp2.isReg() && OldOp2.getReg() == X86::EFLAGS && OldOp2.isDead() &&
+ assert(OldFlagDef1->isDead() && OldFlagDef2->isDead() &&
"Must have dead EFLAGS operand in reassociable instruction");
- (void)OldOp1;
- (void)OldOp2;
+ MachineOperand *NewFlagDef1 = NewMI1.findRegisterDefOperand(X86::EFLAGS);
+ MachineOperand *NewFlagDef2 = NewMI2.findRegisterDefOperand(X86::EFLAGS);
- assert(NewOp1.isReg() && NewOp1.getReg() == X86::EFLAGS &&
- "Unexpected operand in reassociable instruction");
- assert(NewOp2.isReg() && NewOp2.getReg() == X86::EFLAGS &&
+ assert(NewFlagDef1 && NewFlagDef2 &&
"Unexpected operand in reassociable instruction");
// Mark the new EFLAGS operands as dead to be helpful to subsequent iterations
// of this pass or other passes. The EFLAGS operands must be dead in these new
// instructions because the EFLAGS operands in the original instructions must
// be dead in order for reassociation to occur.
- NewOp1.setIsDead();
- NewOp2.setIsDead();
+ NewFlagDef1->setIsDead();
+ NewFlagDef2->setIsDead();
}
std::pair<unsigned, unsigned>
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.h b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.h
index 22b7b1d4cb19..1d2da5305357 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.h
@@ -312,7 +312,7 @@ public:
ArrayRef<MachineOperand> Cond, unsigned TrueReg,
unsigned FalseReg) const override;
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
- const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
+ const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg,
bool KillSrc) const override;
void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI, unsigned SrcReg,
@@ -522,8 +522,8 @@ public:
return MI.getDesc().TSFlags & X86II::LOCK;
}
- Optional<ParamLoadedValue>
- describeLoadedValue(const MachineInstr &MI) const override;
+ Optional<ParamLoadedValue> describeLoadedValue(const MachineInstr &MI,
+ Register Reg) const override;
protected:
/// Commutes the operands in the given instruction by changing the operands
@@ -542,10 +542,10 @@ protected:
unsigned CommuteOpIdx2) const override;
/// If the specific machine instruction is a instruction that moves/copies
- /// value from one register to another register return true along with
- /// @Source machine operand and @Destination machine operand.
- bool isCopyInstrImpl(const MachineInstr &MI, const MachineOperand *&Source,
- const MachineOperand *&Destination) const override;
+ /// value from one register to another register return destination and source
+ /// registers as machine operands.
+ Optional<DestSourcePair>
+ isCopyInstrImpl(const MachineInstr &MI) const override;
private:
/// This is a helper for convertToThreeAddress for 8 and 16-bit instructions.
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.td
index e452145f3b65..ca5425e8b89f 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.td
@@ -142,6 +142,8 @@ def X86shld : SDNode<"X86ISD::SHLD", SDTIntShiftDOp>;
def X86shrd : SDNode<"X86ISD::SHRD", SDTIntShiftDOp>;
def X86cmp : SDNode<"X86ISD::CMP" , SDTX86CmpTest>;
+def X86strict_fcmp : SDNode<"X86ISD::STRICT_FCMP", SDTX86CmpTest, [SDNPHasChain]>;
+def X86strict_fcmps : SDNode<"X86ISD::STRICT_FCMPS", SDTX86CmpTest, [SDNPHasChain]>;
def X86bt : SDNode<"X86ISD::BT", SDTX86CmpTest>;
def X86cmov : SDNode<"X86ISD::CMOV", SDTX86Cmov>;
@@ -375,6 +377,9 @@ class X86VMemOperand<RegisterClass RC, string printMethod,
}
def anymem : X86MemOperand<"printanymem">;
+def X86any_fcmp : PatFrags<(ops node:$lhs, node:$rhs),
+ [(X86strict_fcmp node:$lhs, node:$rhs),
+ (X86cmp node:$lhs, node:$rhs)]>;
// FIXME: Right now we allow any size during parsing, but we might want to
// restrict to only unsized memory.
@@ -449,18 +454,6 @@ def i64mem_TC : Operand<i64> {
let OperandType = "OPERAND_MEMORY";
}
-let OperandType = "OPERAND_PCREL",
- ParserMatchClass = X86AbsMemAsmOperand,
- PrintMethod = "printPCRelImm" in {
-def i32imm_pcrel : Operand<i32>;
-def i16imm_pcrel : Operand<i16>;
-
-// Branch targets have OtherVT type and print as pc-relative values.
-def brtarget : Operand<OtherVT>;
-def brtarget8 : Operand<OtherVT>;
-
-}
-
// Special parser to detect 16-bit mode to select 16-bit displacement.
def X86AbsMem16AsmOperand : AsmOperandClass {
let Name = "AbsMem16";
@@ -468,15 +461,27 @@ def X86AbsMem16AsmOperand : AsmOperandClass {
let SuperClasses = [X86AbsMemAsmOperand];
}
-// Branch targets have OtherVT type and print as pc-relative values.
-let OperandType = "OPERAND_PCREL",
- PrintMethod = "printPCRelImm" in {
-let ParserMatchClass = X86AbsMem16AsmOperand in
- def brtarget16 : Operand<OtherVT>;
-let ParserMatchClass = X86AbsMemAsmOperand in
- def brtarget32 : Operand<OtherVT>;
+// Branch targets print as pc-relative values.
+class BranchTargetOperand<ValueType ty> : Operand<ty> {
+ let OperandType = "OPERAND_PCREL";
+ let PrintMethod = "printPCRelImm";
+ let ParserMatchClass = X86AbsMemAsmOperand;
}
+def i32imm_brtarget : BranchTargetOperand<i32>;
+def i16imm_brtarget : BranchTargetOperand<i16>;
+
+// 64-bits but only 32 bits are significant, and those bits are treated as being
+// pc relative.
+def i64i32imm_brtarget : BranchTargetOperand<i64>;
+
+def brtarget : BranchTargetOperand<OtherVT>;
+def brtarget8 : BranchTargetOperand<OtherVT>;
+def brtarget16 : BranchTargetOperand<OtherVT> {
+ let ParserMatchClass = X86AbsMem16AsmOperand;
+}
+def brtarget32 : BranchTargetOperand<OtherVT>;
+
let RenderMethod = "addSrcIdxOperands" in {
def X86SrcIdx8Operand : AsmOperandClass {
let Name = "SrcIdx8";
@@ -751,14 +756,6 @@ def i64u8imm : Operand<i64> {
let OperandType = "OPERAND_IMMEDIATE";
}
-// 64-bits but only 32 bits are significant, and those bits are treated as being
-// pc relative.
-def i64i32imm_pcrel : Operand<i64> {
- let PrintMethod = "printPCRelImm";
- let ParserMatchClass = X86AbsMemAsmOperand;
- let OperandType = "OPERAND_PCREL";
-}
-
def lea64_32mem : Operand<i32> {
let PrintMethod = "printanymem";
let MIOperandInfo = (ops GR64, i8imm, GR64_NOSP, i32imm, SEGMENT_REG);
@@ -983,12 +980,12 @@ def IsNotPIC : Predicate<"!TM.isPositionIndependent()">;
// the Function object through the <Target>Subtarget and objections were raised
// to that (see post-commit review comments for r301750).
let RecomputePerFunction = 1 in {
- def OptForSize : Predicate<"MF->getFunction().hasOptSize()">;
+ def OptForSize : Predicate<"shouldOptForSize(MF)">;
def OptForMinSize : Predicate<"MF->getFunction().hasMinSize()">;
- def OptForSpeed : Predicate<"!MF->getFunction().hasOptSize()">;
+ def OptForSpeed : Predicate<"!shouldOptForSize(MF)">;
def UseIncDec : Predicate<"!Subtarget->slowIncDec() || "
- "MF->getFunction().hasOptSize()">;
- def NoSSE41_Or_OptForSize : Predicate<"MF->getFunction().hasOptSize() || "
+ "shouldOptForSize(MF)">;
+ def NoSSE41_Or_OptForSize : Predicate<"shouldOptForSize(MF) || "
"!Subtarget->hasSSE41()">;
}
@@ -2846,7 +2843,7 @@ let SchedRW = [WriteStore], Defs = [EFLAGS] in {
//===----------------------------------------------------------------------===//
// CLZERO Instruction
//
-let SchedRW = [WriteSystem] in {
+let SchedRW = [WriteLoad] in {
let Uses = [EAX] in
def CLZERO32r : I<0x01, MRM_FC, (outs), (ins), "clzero", []>,
TB, Requires<[HasCLZERO, Not64BitMode]>;
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrMMX.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrMMX.td
index cd9a866c91cb..0f4d4d764cc9 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrMMX.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrMMX.td
@@ -508,16 +508,16 @@ def MMX_PSHUFWmi : MMXIi8<0x70, MRMSrcMem,
// -- Conversion Instructions
defm MMX_CVTPS2PI : sse12_cvt_pint<0x2D, VR128, VR64, int_x86_sse_cvtps2pi,
f64mem, load, "cvtps2pi\t{$src, $dst|$dst, $src}",
- WriteCvtPS2I, SSEPackedSingle>, PS;
+ WriteCvtPS2I, SSEPackedSingle>, PS, SIMD_EXC;
defm MMX_CVTPD2PI : sse12_cvt_pint<0x2D, VR128, VR64, int_x86_sse_cvtpd2pi,
f128mem, memop, "cvtpd2pi\t{$src, $dst|$dst, $src}",
- WriteCvtPD2I, SSEPackedDouble>, PD;
+ WriteCvtPD2I, SSEPackedDouble>, PD, SIMD_EXC;
defm MMX_CVTTPS2PI : sse12_cvt_pint<0x2C, VR128, VR64, int_x86_sse_cvttps2pi,
f64mem, load, "cvttps2pi\t{$src, $dst|$dst, $src}",
- WriteCvtPS2I, SSEPackedSingle>, PS;
+ WriteCvtPS2I, SSEPackedSingle>, PS, SIMD_EXC;
defm MMX_CVTTPD2PI : sse12_cvt_pint<0x2C, VR128, VR64, int_x86_sse_cvttpd2pi,
f128mem, memop, "cvttpd2pi\t{$src, $dst|$dst, $src}",
- WriteCvtPD2I, SSEPackedDouble>, PD;
+ WriteCvtPD2I, SSEPackedDouble>, PD, SIMD_EXC;
defm MMX_CVTPI2PD : sse12_cvt_pint<0x2A, VR64, VR128, int_x86_sse_cvtpi2pd,
i64mem, load, "cvtpi2pd\t{$src, $dst|$dst, $src}",
WriteCvtI2PD, SSEPackedDouble>, PD;
@@ -525,7 +525,7 @@ let Constraints = "$src1 = $dst" in {
defm MMX_CVTPI2PS : sse12_cvt_pint_3addr<0x2A, VR64, VR128,
int_x86_sse_cvtpi2ps,
i64mem, load, "cvtpi2ps\t{$src2, $dst|$dst, $src2}",
- SSEPackedSingle>, PS;
+ SSEPackedSingle>, PS, SIMD_EXC;
}
// Extract / Insert
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSSE.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSSE.td
index 09a04c0338b4..c45f342ed75b 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSSE.td
@@ -823,7 +823,9 @@ let Constraints = "$src1 = $dst" in {
multiclass sse12_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag,
string asm, string mem, X86FoldableSchedWrite sched,
+ Domain d,
SchedRead Int2Fpu = ReadDefault> {
+ let ExeDomain = d in {
def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
!strconcat(asm,"\t{$src, $dst|$dst, $src}"),
[(set DstRC:$dst, (OpNode SrcRC:$src))]>,
@@ -832,18 +834,19 @@ multiclass sse12_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
mem#"\t{$src, $dst|$dst, $src}",
[(set DstRC:$dst, (OpNode (ld_frag addr:$src)))]>,
Sched<[sched.Folded]>;
+ }
}
multiclass sse12_cvt_p<bits<8> opc, RegisterClass RC, X86MemOperand x86memop,
ValueType DstTy, ValueType SrcTy, PatFrag ld_frag,
string asm, Domain d, X86FoldableSchedWrite sched> {
-let hasSideEffects = 0 in {
+let hasSideEffects = 0, Uses = [MXCSR], mayRaiseFPException = 1 in {
def rr : I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src), asm,
- [(set RC:$dst, (DstTy (sint_to_fp (SrcTy RC:$src))))], d>,
+ [(set RC:$dst, (DstTy (any_sint_to_fp (SrcTy RC:$src))))], d>,
Sched<[sched]>;
let mayLoad = 1 in
def rm : I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), asm,
- [(set RC:$dst, (DstTy (sint_to_fp
+ [(set RC:$dst, (DstTy (any_sint_to_fp
(SrcTy (ld_frag addr:$src)))))], d>,
Sched<[sched.Folded]>;
}
@@ -851,8 +854,8 @@ let hasSideEffects = 0 in {
multiclass sse12_vcvt_avx<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
X86MemOperand x86memop, string asm, string mem,
- X86FoldableSchedWrite sched> {
-let hasSideEffects = 0, Predicates = [UseAVX] in {
+ X86FoldableSchedWrite sched, Domain d> {
+let hasSideEffects = 0, Predicates = [UseAVX], ExeDomain = d in {
def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src),
!strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
Sched<[sched, ReadDefault, ReadInt2Fpu]>;
@@ -864,22 +867,22 @@ let hasSideEffects = 0, Predicates = [UseAVX] in {
} // hasSideEffects = 0
}
-let isCodeGenOnly = 1, Predicates = [UseAVX] in {
-defm VCVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32,
+let isCodeGenOnly = 1, Predicates = [UseAVX], Uses = [MXCSR], mayRaiseFPException = 1 in {
+defm VCVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, any_fp_to_sint, f32mem, loadf32,
"cvttss2si", "cvttss2si",
- WriteCvtSS2I>,
+ WriteCvtSS2I, SSEPackedSingle>,
XS, VEX, VEX_LIG;
-defm VCVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32,
+defm VCVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, any_fp_to_sint, f32mem, loadf32,
"cvttss2si", "cvttss2si",
- WriteCvtSS2I>,
+ WriteCvtSS2I, SSEPackedSingle>,
XS, VEX, VEX_W, VEX_LIG;
-defm VCVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64,
+defm VCVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, any_fp_to_sint, f64mem, loadf64,
"cvttsd2si", "cvttsd2si",
- WriteCvtSD2I>,
+ WriteCvtSD2I, SSEPackedDouble>,
XD, VEX, VEX_LIG;
-defm VCVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64,
+defm VCVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, any_fp_to_sint, f64mem, loadf64,
"cvttsd2si", "cvttsd2si",
- WriteCvtSD2I>,
+ WriteCvtSD2I, SSEPackedDouble>,
XD, VEX, VEX_W, VEX_LIG;
}
@@ -889,60 +892,64 @@ defm VCVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64,
// where appropriate to do so.
let isCodeGenOnly = 1 in {
defm VCVTSI2SS : sse12_vcvt_avx<0x2A, GR32, FR32, i32mem, "cvtsi2ss", "l",
- WriteCvtI2SS>, XS, VEX_4V, VEX_LIG;
+ WriteCvtI2SS, SSEPackedSingle>, XS, VEX_4V,
+ VEX_LIG, SIMD_EXC;
defm VCVTSI642SS : sse12_vcvt_avx<0x2A, GR64, FR32, i64mem, "cvtsi2ss", "q",
- WriteCvtI2SS>, XS, VEX_4V, VEX_W, VEX_LIG;
+ WriteCvtI2SS, SSEPackedSingle>, XS, VEX_4V,
+ VEX_W, VEX_LIG, SIMD_EXC;
defm VCVTSI2SD : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd", "l",
- WriteCvtI2SD>, XD, VEX_4V, VEX_LIG;
+ WriteCvtI2SD, SSEPackedDouble>, XD, VEX_4V,
+ VEX_LIG;
defm VCVTSI642SD : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd", "q",
- WriteCvtI2SD>, XD, VEX_4V, VEX_W, VEX_LIG;
+ WriteCvtI2SD, SSEPackedDouble>, XD, VEX_4V,
+ VEX_W, VEX_LIG, SIMD_EXC;
} // isCodeGenOnly = 1
let Predicates = [UseAVX] in {
- def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))),
+ def : Pat<(f32 (any_sint_to_fp (loadi32 addr:$src))),
(VCVTSI2SSrm (f32 (IMPLICIT_DEF)), addr:$src)>;
- def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))),
+ def : Pat<(f32 (any_sint_to_fp (loadi64 addr:$src))),
(VCVTSI642SSrm (f32 (IMPLICIT_DEF)), addr:$src)>;
- def : Pat<(f64 (sint_to_fp (loadi32 addr:$src))),
+ def : Pat<(f64 (any_sint_to_fp (loadi32 addr:$src))),
(VCVTSI2SDrm (f64 (IMPLICIT_DEF)), addr:$src)>;
- def : Pat<(f64 (sint_to_fp (loadi64 addr:$src))),
+ def : Pat<(f64 (any_sint_to_fp (loadi64 addr:$src))),
(VCVTSI642SDrm (f64 (IMPLICIT_DEF)), addr:$src)>;
- def : Pat<(f32 (sint_to_fp GR32:$src)),
+ def : Pat<(f32 (any_sint_to_fp GR32:$src)),
(VCVTSI2SSrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
- def : Pat<(f32 (sint_to_fp GR64:$src)),
+ def : Pat<(f32 (any_sint_to_fp GR64:$src)),
(VCVTSI642SSrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
- def : Pat<(f64 (sint_to_fp GR32:$src)),
+ def : Pat<(f64 (any_sint_to_fp GR32:$src)),
(VCVTSI2SDrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
- def : Pat<(f64 (sint_to_fp GR64:$src)),
+ def : Pat<(f64 (any_sint_to_fp GR64:$src)),
(VCVTSI642SDrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
}
let isCodeGenOnly = 1 in {
-defm CVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32,
+defm CVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, any_fp_to_sint, f32mem, loadf32,
"cvttss2si", "cvttss2si",
- WriteCvtSS2I>, XS;
-defm CVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32,
+ WriteCvtSS2I, SSEPackedSingle>, XS, SIMD_EXC;
+defm CVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, any_fp_to_sint, f32mem, loadf32,
"cvttss2si", "cvttss2si",
- WriteCvtSS2I>, XS, REX_W;
-defm CVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64,
+ WriteCvtSS2I, SSEPackedSingle>, XS, REX_W, SIMD_EXC;
+defm CVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, any_fp_to_sint, f64mem, loadf64,
"cvttsd2si", "cvttsd2si",
- WriteCvtSD2I>, XD;
-defm CVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64,
+ WriteCvtSD2I, SSEPackedDouble>, XD, SIMD_EXC;
+defm CVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, any_fp_to_sint, f64mem, loadf64,
"cvttsd2si", "cvttsd2si",
- WriteCvtSD2I>, XD, REX_W;
-defm CVTSI2SS : sse12_cvt_s<0x2A, GR32, FR32, sint_to_fp, i32mem, loadi32,
+ WriteCvtSD2I, SSEPackedDouble>, XD, REX_W, SIMD_EXC;
+defm CVTSI2SS : sse12_cvt_s<0x2A, GR32, FR32, any_sint_to_fp, i32mem, loadi32,
"cvtsi2ss", "cvtsi2ss{l}",
- WriteCvtI2SS, ReadInt2Fpu>, XS;
-defm CVTSI642SS : sse12_cvt_s<0x2A, GR64, FR32, sint_to_fp, i64mem, loadi64,
+ WriteCvtI2SS, SSEPackedSingle, ReadInt2Fpu>, XS, SIMD_EXC;
+defm CVTSI642SS : sse12_cvt_s<0x2A, GR64, FR32, any_sint_to_fp, i64mem, loadi64,
"cvtsi2ss", "cvtsi2ss{q}",
- WriteCvtI2SS, ReadInt2Fpu>, XS, REX_W;
-defm CVTSI2SD : sse12_cvt_s<0x2A, GR32, FR64, sint_to_fp, i32mem, loadi32,
+ WriteCvtI2SS, SSEPackedSingle, ReadInt2Fpu>, XS, REX_W, SIMD_EXC;
+defm CVTSI2SD : sse12_cvt_s<0x2A, GR32, FR64, any_sint_to_fp, i32mem, loadi32,
"cvtsi2sd", "cvtsi2sd{l}",
- WriteCvtI2SD, ReadInt2Fpu>, XD;
-defm CVTSI642SD : sse12_cvt_s<0x2A, GR64, FR64, sint_to_fp, i64mem, loadi64,
+ WriteCvtI2SD, SSEPackedDouble, ReadInt2Fpu>, XD;
+defm CVTSI642SD : sse12_cvt_s<0x2A, GR64, FR64, any_sint_to_fp, i64mem, loadi64,
"cvtsi2sd", "cvtsi2sd{q}",
- WriteCvtI2SD, ReadInt2Fpu>, XD, REX_W;
+ WriteCvtI2SD, SSEPackedDouble, ReadInt2Fpu>, XD, REX_W, SIMD_EXC;
} // isCodeGenOnly = 1
// Conversion Instructions Intrinsics - Match intrinsics which expect MM
@@ -951,7 +958,8 @@ defm CVTSI642SD : sse12_cvt_s<0x2A, GR64, FR64, sint_to_fp, i64mem, loadi64,
multiclass sse12_cvt_sint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
ValueType DstVT, ValueType SrcVT, SDNode OpNode,
Operand memop, ComplexPattern mem_cpat, string asm,
- X86FoldableSchedWrite sched> {
+ X86FoldableSchedWrite sched, Domain d> {
+let ExeDomain = d in {
def rr_Int : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
!strconcat(asm, "\t{$src, $dst|$dst, $src}"),
[(set DstRC:$dst, (DstVT (OpNode (SrcVT SrcRC:$src))))]>,
@@ -961,12 +969,13 @@ multiclass sse12_cvt_sint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
[(set DstRC:$dst, (DstVT (OpNode (SrcVT mem_cpat:$src))))]>,
Sched<[sched.Folded]>;
}
+}
multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC,
RegisterClass DstRC, X86MemOperand x86memop,
string asm, string mem, X86FoldableSchedWrite sched,
- bit Is2Addr = 1> {
-let hasSideEffects = 0 in {
+ Domain d, bit Is2Addr = 1> {
+let hasSideEffects = 0, ExeDomain = d in {
def rr_Int : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src2),
!if(Is2Addr,
!strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
@@ -982,39 +991,50 @@ let hasSideEffects = 0 in {
}
}
+let Uses = [MXCSR], mayRaiseFPException = 1 in {
let Predicates = [UseAVX] in {
defm VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v2f64,
X86cvts2si, sdmem, sse_load_f64, "cvtsd2si",
- WriteCvtSD2I>, XD, VEX, VEX_LIG;
+ WriteCvtSD2I, SSEPackedDouble>, XD, VEX, VEX_LIG;
defm VCVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v2f64,
X86cvts2si, sdmem, sse_load_f64, "cvtsd2si",
- WriteCvtSD2I>, XD, VEX, VEX_W, VEX_LIG;
+ WriteCvtSD2I, SSEPackedDouble>, XD, VEX, VEX_W, VEX_LIG;
}
defm CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v2f64, X86cvts2si,
- sdmem, sse_load_f64, "cvtsd2si", WriteCvtSD2I>, XD;
+ sdmem, sse_load_f64, "cvtsd2si", WriteCvtSD2I,
+ SSEPackedDouble>, XD;
defm CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v2f64, X86cvts2si,
- sdmem, sse_load_f64, "cvtsd2si", WriteCvtSD2I>, XD, REX_W;
-
+ sdmem, sse_load_f64, "cvtsd2si", WriteCvtSD2I,
+ SSEPackedDouble>, XD, REX_W;
+}
let Predicates = [UseAVX] in {
defm VCVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
- i32mem, "cvtsi2ss", "l", WriteCvtI2SS, 0>, XS, VEX_4V, VEX_LIG;
+ i32mem, "cvtsi2ss", "l", WriteCvtI2SS, SSEPackedSingle, 0>,
+ XS, VEX_4V, VEX_LIG, SIMD_EXC;
defm VCVTSI642SS : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
- i64mem, "cvtsi2ss", "q", WriteCvtI2SS, 0>, XS, VEX_4V, VEX_LIG, VEX_W;
+ i64mem, "cvtsi2ss", "q", WriteCvtI2SS, SSEPackedSingle, 0>,
+ XS, VEX_4V, VEX_LIG, VEX_W, SIMD_EXC;
defm VCVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
- i32mem, "cvtsi2sd", "l", WriteCvtI2SD, 0>, XD, VEX_4V, VEX_LIG;
+ i32mem, "cvtsi2sd", "l", WriteCvtI2SD, SSEPackedDouble, 0>,
+ XD, VEX_4V, VEX_LIG;
defm VCVTSI642SD : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
- i64mem, "cvtsi2sd", "q", WriteCvtI2SD, 0>, XD, VEX_4V, VEX_LIG, VEX_W;
+ i64mem, "cvtsi2sd", "q", WriteCvtI2SD, SSEPackedDouble, 0>,
+ XD, VEX_4V, VEX_LIG, VEX_W, SIMD_EXC;
}
let Constraints = "$src1 = $dst" in {
defm CVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
- i32mem, "cvtsi2ss", "l", WriteCvtI2SS>, XS;
+ i32mem, "cvtsi2ss", "l", WriteCvtI2SS, SSEPackedSingle>,
+ XS, SIMD_EXC;
defm CVTSI642SS : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
- i64mem, "cvtsi2ss", "q", WriteCvtI2SS>, XS, REX_W;
+ i64mem, "cvtsi2ss", "q", WriteCvtI2SS, SSEPackedSingle>,
+ XS, REX_W, SIMD_EXC;
defm CVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
- i32mem, "cvtsi2sd", "l", WriteCvtI2SD>, XD;
+ i32mem, "cvtsi2sd", "l", WriteCvtI2SD, SSEPackedDouble>,
+ XD;
defm CVTSI642SD : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
- i64mem, "cvtsi2sd", "q", WriteCvtI2SD>, XD, REX_W;
+ i64mem, "cvtsi2sd", "q", WriteCvtI2SD, SSEPackedDouble>,
+ XD, REX_W, SIMD_EXC;
}
def : InstAlias<"vcvtsi2ss{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
@@ -1048,34 +1068,38 @@ def : InstAlias<"cvtsi2sd\t{$src, $dst|$dst, $src}",
/// SSE 1 Only
// Aliases for intrinsics
-let Predicates = [UseAVX] in {
+let Predicates = [UseAVX], Uses = [MXCSR], mayRaiseFPException = 1 in {
defm VCVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v4f32, X86cvtts2Int,
ssmem, sse_load_f32, "cvttss2si",
- WriteCvtSS2I>, XS, VEX, VEX_LIG;
+ WriteCvtSS2I, SSEPackedSingle>, XS, VEX, VEX_LIG;
defm VCVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v4f32,
X86cvtts2Int, ssmem, sse_load_f32,
- "cvttss2si", WriteCvtSS2I>,
+ "cvttss2si", WriteCvtSS2I, SSEPackedSingle>,
XS, VEX, VEX_LIG, VEX_W;
defm VCVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v2f64, X86cvtts2Int,
sdmem, sse_load_f64, "cvttsd2si",
- WriteCvtSS2I>, XD, VEX, VEX_LIG;
+ WriteCvtSS2I, SSEPackedDouble>, XD, VEX, VEX_LIG;
defm VCVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v2f64,
X86cvtts2Int, sdmem, sse_load_f64,
- "cvttsd2si", WriteCvtSS2I>,
+ "cvttsd2si", WriteCvtSS2I, SSEPackedDouble>,
XD, VEX, VEX_LIG, VEX_W;
}
+let Uses = [MXCSR], mayRaiseFPException = 1 in {
defm CVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v4f32, X86cvtts2Int,
ssmem, sse_load_f32, "cvttss2si",
- WriteCvtSS2I>, XS;
+ WriteCvtSS2I, SSEPackedSingle>, XS;
defm CVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v4f32,
X86cvtts2Int, ssmem, sse_load_f32,
- "cvttss2si", WriteCvtSS2I>, XS, REX_W;
+ "cvttss2si", WriteCvtSS2I, SSEPackedSingle>,
+ XS, REX_W;
defm CVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v2f64, X86cvtts2Int,
sdmem, sse_load_f64, "cvttsd2si",
- WriteCvtSD2I>, XD;
+ WriteCvtSD2I, SSEPackedDouble>, XD;
defm CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v2f64,
X86cvtts2Int, sdmem, sse_load_f64,
- "cvttsd2si", WriteCvtSD2I>, XD, REX_W;
+ "cvttsd2si", WriteCvtSD2I, SSEPackedDouble>,
+ XD, REX_W;
+}
def : InstAlias<"vcvttss2si{l}\t{$src, $dst|$dst, $src}",
(VCVTTSS2SIrr_Int GR32:$dst, VR128:$src), 0, "att">;
@@ -1111,20 +1135,21 @@ def : InstAlias<"cvttsd2si{q}\t{$src, $dst|$dst, $src}",
def : InstAlias<"cvttsd2si{q}\t{$src, $dst|$dst, $src}",
(CVTTSD2SI64rm_Int GR64:$dst, f64mem:$src), 0, "att">;
-let Predicates = [UseAVX] in {
+let Predicates = [UseAVX], Uses = [MXCSR], mayRaiseFPException = 1 in {
defm VCVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v4f32, X86cvts2si,
ssmem, sse_load_f32, "cvtss2si",
- WriteCvtSS2I>, XS, VEX, VEX_LIG;
+ WriteCvtSS2I, SSEPackedSingle>, XS, VEX, VEX_LIG;
defm VCVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v4f32, X86cvts2si,
ssmem, sse_load_f32, "cvtss2si",
- WriteCvtSS2I>, XS, VEX, VEX_W, VEX_LIG;
+ WriteCvtSS2I, SSEPackedSingle>, XS, VEX, VEX_W, VEX_LIG;
}
+let Uses = [MXCSR], mayRaiseFPException = 1 in {
defm CVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v4f32, X86cvts2si,
ssmem, sse_load_f32, "cvtss2si",
- WriteCvtSS2I>, XS;
+ WriteCvtSS2I, SSEPackedSingle>, XS;
defm CVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v4f32, X86cvts2si,
ssmem, sse_load_f32, "cvtss2si",
- WriteCvtSS2I>, XS, REX_W;
+ WriteCvtSS2I, SSEPackedSingle>, XS, REX_W;
defm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, i128mem, v4f32, v4i32, load,
"vcvtdq2ps\t{$src, $dst|$dst, $src}",
@@ -1139,6 +1164,7 @@ defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, i128mem, v4f32, v4i32, memop,
"cvtdq2ps\t{$src, $dst|$dst, $src}",
SSEPackedSingle, WriteCvtI2PS>,
PS, Requires<[UseSSE2]>;
+}
// AVX aliases
def : InstAlias<"vcvtss2si{l}\t{$src, $dst|$dst, $src}",
@@ -1184,31 +1210,32 @@ def VCVTSD2SSrr : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst),
(ins FR32:$src1, FR64:$src2),
"cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
VEX_4V, VEX_LIG, VEX_WIG,
- Sched<[WriteCvtSD2SS]>;
+ Sched<[WriteCvtSD2SS]>, SIMD_EXC;
let mayLoad = 1 in
def VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst),
(ins FR32:$src1, f64mem:$src2),
"vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
XD, VEX_4V, VEX_LIG, VEX_WIG,
- Sched<[WriteCvtSD2SS.Folded, WriteCvtSD2SS.ReadAfterFold]>;
+ Sched<[WriteCvtSD2SS.Folded, WriteCvtSD2SS.ReadAfterFold]>, SIMD_EXC;
}
-def : Pat<(f32 (fpround FR64:$src)),
+def : Pat<(f32 (any_fpround FR64:$src)),
(VCVTSD2SSrr (f32 (IMPLICIT_DEF)), FR64:$src)>,
Requires<[UseAVX]>;
let isCodeGenOnly = 1 in {
def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src),
"cvtsd2ss\t{$src, $dst|$dst, $src}",
- [(set FR32:$dst, (fpround FR64:$src))]>,
- Sched<[WriteCvtSD2SS]>;
+ [(set FR32:$dst, (any_fpround FR64:$src))]>,
+ Sched<[WriteCvtSD2SS]>, SIMD_EXC;
def CVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src),
"cvtsd2ss\t{$src, $dst|$dst, $src}",
- [(set FR32:$dst, (fpround (loadf64 addr:$src)))]>,
+ [(set FR32:$dst, (any_fpround (loadf64 addr:$src)))]>,
XD, Requires<[UseSSE2, OptForSize]>,
- Sched<[WriteCvtSD2SS.Folded]>;
+ Sched<[WriteCvtSD2SS.Folded]>, SIMD_EXC;
}
+let Uses = [MXCSR], mayRaiseFPException = 1 in {
def VCVTSD2SSrr_Int: I<0x5A, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
@@ -1238,6 +1265,7 @@ def CVTSD2SSrm_Int: I<0x5A, MRMSrcMem,
XD, Requires<[UseSSE2]>,
Sched<[WriteCvtSD2SS.Folded, WriteCvtSD2SS.ReadAfterFold]>;
}
+}
// Convert scalar single to scalar double
// SSE2 instructions with XS prefix
@@ -1246,34 +1274,34 @@ def VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst),
(ins FR64:$src1, FR32:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
XS, VEX_4V, VEX_LIG, VEX_WIG,
- Sched<[WriteCvtSS2SD]>, Requires<[UseAVX]>;
+ Sched<[WriteCvtSS2SD]>, Requires<[UseAVX]>, SIMD_EXC;
let mayLoad = 1 in
def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst),
(ins FR64:$src1, f32mem:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
XS, VEX_4V, VEX_LIG, VEX_WIG,
Sched<[WriteCvtSS2SD.Folded, WriteCvtSS2SD.ReadAfterFold]>,
- Requires<[UseAVX, OptForSize]>;
+ Requires<[UseAVX, OptForSize]>, SIMD_EXC;
} // isCodeGenOnly = 1, hasSideEffects = 0
-def : Pat<(f64 (fpextend FR32:$src)),
+def : Pat<(f64 (any_fpextend FR32:$src)),
(VCVTSS2SDrr (f64 (IMPLICIT_DEF)), FR32:$src)>, Requires<[UseAVX]>;
-def : Pat<(fpextend (loadf32 addr:$src)),
+def : Pat<(any_fpextend (loadf32 addr:$src)),
(VCVTSS2SDrm (f64 (IMPLICIT_DEF)), addr:$src)>, Requires<[UseAVX, OptForSize]>;
let isCodeGenOnly = 1 in {
def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src),
"cvtss2sd\t{$src, $dst|$dst, $src}",
- [(set FR64:$dst, (fpextend FR32:$src))]>,
- XS, Requires<[UseSSE2]>, Sched<[WriteCvtSS2SD]>;
+ [(set FR64:$dst, (any_fpextend FR32:$src))]>,
+ XS, Requires<[UseSSE2]>, Sched<[WriteCvtSS2SD]>, SIMD_EXC;
def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src),
"cvtss2sd\t{$src, $dst|$dst, $src}",
- [(set FR64:$dst, (fpextend (loadf32 addr:$src)))]>,
+ [(set FR64:$dst, (any_fpextend (loadf32 addr:$src)))]>,
XS, Requires<[UseSSE2, OptForSize]>,
- Sched<[WriteCvtSS2SD.Folded]>;
+ Sched<[WriteCvtSS2SD.Folded]>, SIMD_EXC;
} // isCodeGenOnly = 1
-let hasSideEffects = 0 in {
+let hasSideEffects = 0, Uses = [MXCSR], mayRaiseFPException = 1 in {
def VCVTSS2SDrr_Int: I<0x5A, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
@@ -1307,53 +1335,53 @@ let Predicates = [UseAVX] in {
def : Pat<(v4f32 (X86Movss
(v4f32 VR128:$dst),
(v4f32 (scalar_to_vector
- (f32 (fpround (f64 (extractelt VR128:$src, (iPTR 0))))))))),
+ (f32 (any_fpround (f64 (extractelt VR128:$src, (iPTR 0))))))))),
(VCVTSD2SSrr_Int VR128:$dst, VR128:$src)>;
def : Pat<(v2f64 (X86Movsd
(v2f64 VR128:$dst),
(v2f64 (scalar_to_vector
- (f64 (fpextend (f32 (extractelt VR128:$src, (iPTR 0))))))))),
+ (f64 (any_fpextend (f32 (extractelt VR128:$src, (iPTR 0))))))))),
(VCVTSS2SDrr_Int VR128:$dst, VR128:$src)>;
def : Pat<(v4f32 (X86Movss
(v4f32 VR128:$dst),
- (v4f32 (scalar_to_vector (f32 (sint_to_fp GR64:$src)))))),
+ (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR64:$src)))))),
(VCVTSI642SSrr_Int VR128:$dst, GR64:$src)>;
def : Pat<(v4f32 (X86Movss
(v4f32 VR128:$dst),
- (v4f32 (scalar_to_vector (f32 (sint_to_fp (loadi64 addr:$src))))))),
+ (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi64 addr:$src))))))),
(VCVTSI642SSrm_Int VR128:$dst, addr:$src)>;
def : Pat<(v4f32 (X86Movss
(v4f32 VR128:$dst),
- (v4f32 (scalar_to_vector (f32 (sint_to_fp GR32:$src)))))),
+ (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR32:$src)))))),
(VCVTSI2SSrr_Int VR128:$dst, GR32:$src)>;
def : Pat<(v4f32 (X86Movss
(v4f32 VR128:$dst),
- (v4f32 (scalar_to_vector (f32 (sint_to_fp (loadi32 addr:$src))))))),
+ (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi32 addr:$src))))))),
(VCVTSI2SSrm_Int VR128:$dst, addr:$src)>;
def : Pat<(v2f64 (X86Movsd
(v2f64 VR128:$dst),
- (v2f64 (scalar_to_vector (f64 (sint_to_fp GR64:$src)))))),
+ (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR64:$src)))))),
(VCVTSI642SDrr_Int VR128:$dst, GR64:$src)>;
def : Pat<(v2f64 (X86Movsd
(v2f64 VR128:$dst),
- (v2f64 (scalar_to_vector (f64 (sint_to_fp (loadi64 addr:$src))))))),
+ (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi64 addr:$src))))))),
(VCVTSI642SDrm_Int VR128:$dst, addr:$src)>;
def : Pat<(v2f64 (X86Movsd
(v2f64 VR128:$dst),
- (v2f64 (scalar_to_vector (f64 (sint_to_fp GR32:$src)))))),
+ (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR32:$src)))))),
(VCVTSI2SDrr_Int VR128:$dst, GR32:$src)>;
def : Pat<(v2f64 (X86Movsd
(v2f64 VR128:$dst),
- (v2f64 (scalar_to_vector (f64 (sint_to_fp (loadi32 addr:$src))))))),
+ (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi32 addr:$src))))))),
(VCVTSI2SDrm_Int VR128:$dst, addr:$src)>;
} // Predicates = [UseAVX]
@@ -1361,55 +1389,55 @@ let Predicates = [UseSSE2] in {
def : Pat<(v4f32 (X86Movss
(v4f32 VR128:$dst),
(v4f32 (scalar_to_vector
- (f32 (fpround (f64 (extractelt VR128:$src, (iPTR 0))))))))),
+ (f32 (any_fpround (f64 (extractelt VR128:$src, (iPTR 0))))))))),
(CVTSD2SSrr_Int VR128:$dst, VR128:$src)>;
def : Pat<(v2f64 (X86Movsd
(v2f64 VR128:$dst),
(v2f64 (scalar_to_vector
- (f64 (fpextend (f32 (extractelt VR128:$src, (iPTR 0))))))))),
+ (f64 (any_fpextend (f32 (extractelt VR128:$src, (iPTR 0))))))))),
(CVTSS2SDrr_Int VR128:$dst, VR128:$src)>;
def : Pat<(v2f64 (X86Movsd
(v2f64 VR128:$dst),
- (v2f64 (scalar_to_vector (f64 (sint_to_fp GR64:$src)))))),
+ (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR64:$src)))))),
(CVTSI642SDrr_Int VR128:$dst, GR64:$src)>;
def : Pat<(v2f64 (X86Movsd
(v2f64 VR128:$dst),
- (v2f64 (scalar_to_vector (f64 (sint_to_fp (loadi64 addr:$src))))))),
+ (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi64 addr:$src))))))),
(CVTSI642SDrm_Int VR128:$dst, addr:$src)>;
def : Pat<(v2f64 (X86Movsd
(v2f64 VR128:$dst),
- (v2f64 (scalar_to_vector (f64 (sint_to_fp GR32:$src)))))),
+ (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR32:$src)))))),
(CVTSI2SDrr_Int VR128:$dst, GR32:$src)>;
def : Pat<(v2f64 (X86Movsd
(v2f64 VR128:$dst),
- (v2f64 (scalar_to_vector (f64 (sint_to_fp (loadi32 addr:$src))))))),
+ (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi32 addr:$src))))))),
(CVTSI2SDrm_Int VR128:$dst, addr:$src)>;
} // Predicates = [UseSSE2]
let Predicates = [UseSSE1] in {
def : Pat<(v4f32 (X86Movss
(v4f32 VR128:$dst),
- (v4f32 (scalar_to_vector (f32 (sint_to_fp GR64:$src)))))),
+ (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR64:$src)))))),
(CVTSI642SSrr_Int VR128:$dst, GR64:$src)>;
def : Pat<(v4f32 (X86Movss
(v4f32 VR128:$dst),
- (v4f32 (scalar_to_vector (f32 (sint_to_fp (loadi64 addr:$src))))))),
+ (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi64 addr:$src))))))),
(CVTSI642SSrm_Int VR128:$dst, addr:$src)>;
def : Pat<(v4f32 (X86Movss
(v4f32 VR128:$dst),
- (v4f32 (scalar_to_vector (f32 (sint_to_fp GR32:$src)))))),
+ (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR32:$src)))))),
(CVTSI2SSrr_Int VR128:$dst, GR32:$src)>;
def : Pat<(v4f32 (X86Movss
(v4f32 VR128:$dst),
- (v4f32 (scalar_to_vector (f32 (sint_to_fp (loadi32 addr:$src))))))),
+ (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi32 addr:$src))))))),
(CVTSI2SSrm_Int VR128:$dst, addr:$src)>;
} // Predicates = [UseSSE1]
@@ -1418,36 +1446,36 @@ let Predicates = [HasAVX, NoVLX] in {
def VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (v4i32 (X86cvtp2Int (v4f32 VR128:$src))))]>,
- VEX, Sched<[WriteCvtPS2I]>, VEX_WIG;
+ VEX, Sched<[WriteCvtPS2I]>, VEX_WIG, SIMD_EXC;
def VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v4i32 (X86cvtp2Int (loadv4f32 addr:$src))))]>,
- VEX, Sched<[WriteCvtPS2ILd]>, VEX_WIG;
+ VEX, Sched<[WriteCvtPS2ILd]>, VEX_WIG, SIMD_EXC;
def VCVTPS2DQYrr : VPDI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}",
[(set VR256:$dst,
(v8i32 (X86cvtp2Int (v8f32 VR256:$src))))]>,
- VEX, VEX_L, Sched<[WriteCvtPS2IY]>, VEX_WIG;
+ VEX, VEX_L, Sched<[WriteCvtPS2IY]>, VEX_WIG, SIMD_EXC;
def VCVTPS2DQYrm : VPDI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}",
[(set VR256:$dst,
(v8i32 (X86cvtp2Int (loadv8f32 addr:$src))))]>,
- VEX, VEX_L, Sched<[WriteCvtPS2IYLd]>, VEX_WIG;
+ VEX, VEX_L, Sched<[WriteCvtPS2IYLd]>, VEX_WIG, SIMD_EXC;
}
def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (v4i32 (X86cvtp2Int (v4f32 VR128:$src))))]>,
- Sched<[WriteCvtPS2I]>;
+ Sched<[WriteCvtPS2I]>, SIMD_EXC;
def CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v4i32 (X86cvtp2Int (memopv4f32 addr:$src))))]>,
- Sched<[WriteCvtPS2ILd]>;
+ Sched<[WriteCvtPS2ILd]>, SIMD_EXC;
// Convert Packed Double FP to Packed DW Integers
-let Predicates = [HasAVX, NoVLX] in {
+let Predicates = [HasAVX, NoVLX], Uses = [MXCSR], mayRaiseFPException = 1 in {
// The assembler can recognize rr 256-bit instructions by seeing a ymm
// register, but the same isn't true when using memory operands instead.
// Provide other assembly rr and rm forms to address this explicitly.
@@ -1486,35 +1514,36 @@ def CVTPD2DQrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvtpd2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v4i32 (X86cvtp2Int (memopv2f64 addr:$src))))]>,
- Sched<[WriteCvtPD2ILd]>;
+ Sched<[WriteCvtPD2ILd]>, SIMD_EXC;
def CVTPD2DQrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtpd2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v4i32 (X86cvtp2Int (v2f64 VR128:$src))))]>,
- Sched<[WriteCvtPD2I]>;
+ Sched<[WriteCvtPD2I]>, SIMD_EXC;
// Convert with truncation packed single/double fp to doubleword
// SSE2 packed instructions with XS prefix
+let Uses = [MXCSR], mayRaiseFPException = 1 in {
let Predicates = [HasAVX, NoVLX] in {
def VCVTTPS2DQrr : VS2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
- (v4i32 (X86cvttp2si (v4f32 VR128:$src))))]>,
+ (v4i32 (X86any_cvttp2si (v4f32 VR128:$src))))]>,
VEX, Sched<[WriteCvtPS2I]>, VEX_WIG;
def VCVTTPS2DQrm : VS2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
- (v4i32 (X86cvttp2si (loadv4f32 addr:$src))))]>,
+ (v4i32 (X86any_cvttp2si (loadv4f32 addr:$src))))]>,
VEX, Sched<[WriteCvtPS2ILd]>, VEX_WIG;
def VCVTTPS2DQYrr : VS2SI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
[(set VR256:$dst,
- (v8i32 (X86cvttp2si (v8f32 VR256:$src))))]>,
+ (v8i32 (X86any_cvttp2si (v8f32 VR256:$src))))]>,
VEX, VEX_L, Sched<[WriteCvtPS2IY]>, VEX_WIG;
def VCVTTPS2DQYrm : VS2SI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
[(set VR256:$dst,
- (v8i32 (X86cvttp2si (loadv8f32 addr:$src))))]>,
+ (v8i32 (X86any_cvttp2si (loadv8f32 addr:$src))))]>,
VEX, VEX_L,
Sched<[WriteCvtPS2IYLd]>, VEX_WIG;
}
@@ -1522,40 +1551,41 @@ def VCVTTPS2DQYrm : VS2SI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src)
def CVTTPS2DQrr : S2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
- (v4i32 (X86cvttp2si (v4f32 VR128:$src))))]>,
+ (v4i32 (X86any_cvttp2si (v4f32 VR128:$src))))]>,
Sched<[WriteCvtPS2I]>;
def CVTTPS2DQrm : S2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
- (v4i32 (X86cvttp2si (memopv4f32 addr:$src))))]>,
+ (v4i32 (X86any_cvttp2si (memopv4f32 addr:$src))))]>,
Sched<[WriteCvtPS2ILd]>;
+}
// The assembler can recognize rr 256-bit instructions by seeing a ymm
// register, but the same isn't true when using memory operands instead.
// Provide other assembly rr and rm forms to address this explicitly.
-let Predicates = [HasAVX, NoVLX] in {
+let Predicates = [HasAVX, NoVLX], Uses = [MXCSR], mayRaiseFPException = 1 in {
// XMM only
def VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvttpd2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
- (v4i32 (X86cvttp2si (v2f64 VR128:$src))))]>,
+ (v4i32 (X86any_cvttp2si (v2f64 VR128:$src))))]>,
VEX, Sched<[WriteCvtPD2I]>, VEX_WIG;
def VCVTTPD2DQrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvttpd2dq{x}\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
- (v4i32 (X86cvttp2si (loadv2f64 addr:$src))))]>,
+ (v4i32 (X86any_cvttp2si (loadv2f64 addr:$src))))]>,
VEX, Sched<[WriteCvtPD2ILd]>, VEX_WIG;
// YMM only
def VCVTTPD2DQYrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
"cvttpd2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
- (v4i32 (X86cvttp2si (v4f64 VR256:$src))))]>,
+ (v4i32 (X86any_cvttp2si (v4f64 VR256:$src))))]>,
VEX, VEX_L, Sched<[WriteCvtPD2IY]>, VEX_WIG;
def VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
"cvttpd2dq{y}\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
- (v4i32 (X86cvttp2si (loadv4f64 addr:$src))))]>,
+ (v4i32 (X86any_cvttp2si (loadv4f64 addr:$src))))]>,
VEX, VEX_L, Sched<[WriteCvtPD2IYLd]>, VEX_WIG;
} // Predicates = [HasAVX, NoVLX]
@@ -1565,29 +1595,29 @@ def : InstAlias<"vcvttpd2dqy\t{$src, $dst|$dst, $src}",
(VCVTTPD2DQYrr VR128:$dst, VR256:$src), 0, "att">;
let Predicates = [HasAVX, NoVLX] in {
- def : Pat<(v4i32 (fp_to_sint (v4f64 VR256:$src))),
+ def : Pat<(v4i32 (any_fp_to_sint (v4f64 VR256:$src))),
(VCVTTPD2DQYrr VR256:$src)>;
- def : Pat<(v4i32 (fp_to_sint (loadv4f64 addr:$src))),
+ def : Pat<(v4i32 (any_fp_to_sint (loadv4f64 addr:$src))),
(VCVTTPD2DQYrm addr:$src)>;
}
def CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvttpd2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
- (v4i32 (X86cvttp2si (v2f64 VR128:$src))))]>,
- Sched<[WriteCvtPD2I]>;
+ (v4i32 (X86any_cvttp2si (v2f64 VR128:$src))))]>,
+ Sched<[WriteCvtPD2I]>, SIMD_EXC;
def CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src),
"cvttpd2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
- (v4i32 (X86cvttp2si (memopv2f64 addr:$src))))]>,
- Sched<[WriteCvtPD2ILd]>;
+ (v4i32 (X86any_cvttp2si (memopv2f64 addr:$src))))]>,
+ Sched<[WriteCvtPD2ILd]>, SIMD_EXC;
// Convert packed single to packed double
-let Predicates = [HasAVX, NoVLX] in {
+let Predicates = [HasAVX, NoVLX], Uses = [MXCSR], mayRaiseFPException = 1 in {
// SSE2 instructions without OpSize prefix
def VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vcvtps2pd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (v2f64 (X86vfpext (v4f32 VR128:$src))))]>,
+ [(set VR128:$dst, (v2f64 (X86any_vfpext (v4f32 VR128:$src))))]>,
PS, VEX, Sched<[WriteCvtPS2PD]>, VEX_WIG;
def VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
"vcvtps2pd\t{$src, $dst|$dst, $src}",
@@ -1595,7 +1625,7 @@ def VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
PS, VEX, Sched<[WriteCvtPS2PD.Folded]>, VEX_WIG;
def VCVTPS2PDYrr : I<0x5A, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
"vcvtps2pd\t{$src, $dst|$dst, $src}",
- [(set VR256:$dst, (v4f64 (fpextend (v4f32 VR128:$src))))]>,
+ [(set VR256:$dst, (v4f64 (any_fpextend (v4f32 VR128:$src))))]>,
PS, VEX, VEX_L, Sched<[WriteCvtPS2PDY]>, VEX_WIG;
def VCVTPS2PDYrm : I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src),
"vcvtps2pd\t{$src, $dst|$dst, $src}",
@@ -1603,10 +1633,10 @@ def VCVTPS2PDYrm : I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src),
PS, VEX, VEX_L, Sched<[WriteCvtPS2PDY.Folded]>, VEX_WIG;
}
-let Predicates = [UseSSE2] in {
+let Predicates = [UseSSE2], Uses = [MXCSR], mayRaiseFPException = 1 in {
def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtps2pd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (v2f64 (X86vfpext (v4f32 VR128:$src))))]>,
+ [(set VR128:$dst, (v2f64 (X86any_vfpext (v4f32 VR128:$src))))]>,
PS, Sched<[WriteCvtPS2PD]>;
def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
"cvtps2pd\t{$src, $dst|$dst, $src}",
@@ -1620,7 +1650,7 @@ let hasSideEffects = 0, mayLoad = 1 in
def VCVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
"vcvtdq2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
- (v2f64 (X86VSintToFP
+ (v2f64 (X86any_VSintToFP
(bc_v4i32
(v2i64 (scalar_to_vector
(loadi64 addr:$src)))))))]>,
@@ -1628,18 +1658,18 @@ def VCVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
def VCVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vcvtdq2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
- (v2f64 (X86VSintToFP (v4i32 VR128:$src))))]>,
+ (v2f64 (X86any_VSintToFP (v4i32 VR128:$src))))]>,
VEX, Sched<[WriteCvtI2PD]>, VEX_WIG;
def VCVTDQ2PDYrm : S2SI<0xE6, MRMSrcMem, (outs VR256:$dst), (ins i128mem:$src),
"vcvtdq2pd\t{$src, $dst|$dst, $src}",
[(set VR256:$dst,
- (v4f64 (sint_to_fp (loadv4i32 addr:$src))))]>,
+ (v4f64 (any_sint_to_fp (loadv4i32 addr:$src))))]>,
VEX, VEX_L, Sched<[WriteCvtI2PDYLd]>,
VEX_WIG;
def VCVTDQ2PDYrr : S2SI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
"vcvtdq2pd\t{$src, $dst|$dst, $src}",
[(set VR256:$dst,
- (v4f64 (sint_to_fp (v4i32 VR128:$src))))]>,
+ (v4f64 (any_sint_to_fp (v4i32 VR128:$src))))]>,
VEX, VEX_L, Sched<[WriteCvtI2PDY]>, VEX_WIG;
}
@@ -1647,7 +1677,7 @@ let hasSideEffects = 0, mayLoad = 1 in
def CVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
"cvtdq2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
- (v2f64 (X86VSintToFP
+ (v2f64 (X86any_VSintToFP
(bc_v4i32
(v2i64 (scalar_to_vector
(loadi64 addr:$src)))))))]>,
@@ -1655,18 +1685,18 @@ def CVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
def CVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtdq2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
- (v2f64 (X86VSintToFP (v4i32 VR128:$src))))]>,
+ (v2f64 (X86any_VSintToFP (v4i32 VR128:$src))))]>,
Sched<[WriteCvtI2PD]>;
// AVX register conversion intrinsics
let Predicates = [HasAVX, NoVLX] in {
- def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
+ def : Pat<(v2f64 (X86any_VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
(VCVTDQ2PDrm addr:$src)>;
} // Predicates = [HasAVX, NoVLX]
// SSE2 register conversion intrinsics
let Predicates = [UseSSE2] in {
- def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
+ def : Pat<(v2f64 (X86any_VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
(CVTDQ2PDrm addr:$src)>;
} // Predicates = [UseSSE2]
@@ -1674,24 +1704,24 @@ let Predicates = [UseSSE2] in {
// The assembler can recognize rr 256-bit instructions by seeing a ymm
// register, but the same isn't true when using memory operands instead.
// Provide other assembly rr and rm forms to address this explicitly.
-let Predicates = [HasAVX, NoVLX] in {
+let Predicates = [HasAVX, NoVLX], Uses = [MXCSR], mayRaiseFPException = 1 in {
// XMM only
def VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtpd2ps\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (X86vfpround (v2f64 VR128:$src)))]>,
+ [(set VR128:$dst, (X86any_vfpround (v2f64 VR128:$src)))]>,
VEX, Sched<[WriteCvtPD2PS]>, VEX_WIG;
def VCVTPD2PSrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvtpd2ps{x}\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (X86vfpround (loadv2f64 addr:$src)))]>,
+ [(set VR128:$dst, (X86any_vfpround (loadv2f64 addr:$src)))]>,
VEX, Sched<[WriteCvtPD2PS.Folded]>, VEX_WIG;
def VCVTPD2PSYrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
"cvtpd2ps\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (X86vfpround VR256:$src))]>,
+ [(set VR128:$dst, (X86any_vfpround VR256:$src))]>,
VEX, VEX_L, Sched<[WriteCvtPD2PSY]>, VEX_WIG;
def VCVTPD2PSYrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
"cvtpd2ps{y}\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (X86vfpround (loadv4f64 addr:$src)))]>,
+ [(set VR128:$dst, (X86any_vfpround (loadv4f64 addr:$src)))]>,
VEX, VEX_L, Sched<[WriteCvtPD2PSY.Folded]>, VEX_WIG;
} // Predicates = [HasAVX, NoVLX]
@@ -1702,19 +1732,12 @@ def : InstAlias<"vcvtpd2psy\t{$src, $dst|$dst, $src}",
def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtpd2ps\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (X86vfpround (v2f64 VR128:$src)))]>,
- Sched<[WriteCvtPD2PS]>;
+ [(set VR128:$dst, (X86any_vfpround (v2f64 VR128:$src)))]>,
+ Sched<[WriteCvtPD2PS]>, SIMD_EXC;
def CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvtpd2ps\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (X86vfpround (memopv2f64 addr:$src)))]>,
- Sched<[WriteCvtPD2PS.Folded]>;
-
-let Predicates = [HasAVX, NoVLX] in {
- def : Pat<(v4f32 (fpround (v4f64 VR256:$src))),
- (VCVTPD2PSYrr VR256:$src)>;
- def : Pat<(v4f32 (fpround (loadv4f64 addr:$src))),
- (VCVTPD2PSYrm addr:$src)>;
-}
+ [(set VR128:$dst, (X86any_vfpround (memopv2f64 addr:$src)))]>,
+ Sched<[WriteCvtPD2PS.Folded]>, SIMD_EXC;
//===----------------------------------------------------------------------===//
// SSE 1 & 2 - Compare Instructions
@@ -1725,6 +1748,7 @@ multiclass sse12_cmp_scalar<RegisterClass RC, X86MemOperand x86memop,
SDNode OpNode, ValueType VT,
PatFrag ld_frag, string asm,
X86FoldableSchedWrite sched> {
+let Uses = [MXCSR], mayRaiseFPException = 1 in {
let isCommutable = 1 in
def rr : SIi8<0xC2, MRMSrcReg,
(outs RC:$dst), (ins RC:$src1, RC:$src2, u8imm:$cc), asm,
@@ -1736,6 +1760,7 @@ multiclass sse12_cmp_scalar<RegisterClass RC, X86MemOperand x86memop,
(ld_frag addr:$src2), timm:$cc))]>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
+}
let isCodeGenOnly = 1 in {
let ExeDomain = SSEPackedSingle in
@@ -1763,6 +1788,7 @@ let isCodeGenOnly = 1 in {
multiclass sse12_cmp_scalar_int<Operand memop,
Intrinsic Int, string asm, X86FoldableSchedWrite sched,
ComplexPattern mem_cpat> {
+let Uses = [MXCSR], mayRaiseFPException = 1 in {
def rr_Int : SIi8<0xC2, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src, u8imm:$cc), asm,
[(set VR128:$dst, (Int VR128:$src1,
@@ -1775,6 +1801,7 @@ let mayLoad = 1 in
mem_cpat:$src, timm:$cc))]>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
+}
// Aliases to match intrinsics which expect XMM operand(s).
let ExeDomain = SSEPackedSingle in
@@ -1802,9 +1829,10 @@ let Constraints = "$src1 = $dst" in {
// sse12_ord_cmp - Unordered/Ordered scalar fp compare and set EFLAGS
multiclass sse12_ord_cmp<bits<8> opc, RegisterClass RC, SDNode OpNode,
ValueType vt, X86MemOperand x86memop,
- PatFrag ld_frag, string OpcodeStr,
- X86FoldableSchedWrite sched> {
-let hasSideEffects = 0 in {
+ PatFrag ld_frag, string OpcodeStr, Domain d,
+ X86FoldableSchedWrite sched = WriteFCom> {
+let hasSideEffects = 0, Uses = [MXCSR], mayRaiseFPException = 1,
+ ExeDomain = d in {
def rr: SI<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
[(set EFLAGS, (OpNode (vt RC:$src1), RC:$src2))]>,
@@ -1822,7 +1850,9 @@ let mayLoad = 1 in
multiclass sse12_ord_cmp_int<bits<8> opc, RegisterClass RC, SDNode OpNode,
ValueType vt, Operand memop,
ComplexPattern mem_cpat, string OpcodeStr,
- X86FoldableSchedWrite sched> {
+ Domain d,
+ X86FoldableSchedWrite sched = WriteFCom> {
+let Uses = [MXCSR], mayRaiseFPException = 1, ExeDomain = d in {
def rr_Int: SI<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
[(set EFLAGS, (OpNode (vt RC:$src1), RC:$src2))]>,
@@ -1834,52 +1864,48 @@ let mayLoad = 1 in
mem_cpat:$src2))]>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
+}
let Defs = [EFLAGS] in {
- defm VUCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32,
- "ucomiss", WriteFCom>, PS, VEX, VEX_LIG, VEX_WIG;
- defm VUCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64,
- "ucomisd", WriteFCom>, PD, VEX, VEX_LIG, VEX_WIG;
- let Pattern = []<dag> in {
- defm VCOMISS : sse12_ord_cmp<0x2F, FR32, undef, f32, f32mem, loadf32,
- "comiss", WriteFCom>, PS, VEX, VEX_LIG, VEX_WIG;
- defm VCOMISD : sse12_ord_cmp<0x2F, FR64, undef, f64, f64mem, loadf64,
- "comisd", WriteFCom>, PD, VEX, VEX_LIG, VEX_WIG;
- }
+ defm VUCOMISS : sse12_ord_cmp<0x2E, FR32, X86any_fcmp, f32, f32mem, loadf32,
+ "ucomiss", SSEPackedSingle>, PS, VEX, VEX_LIG, VEX_WIG;
+ defm VUCOMISD : sse12_ord_cmp<0x2E, FR64, X86any_fcmp, f64, f64mem, loadf64,
+ "ucomisd", SSEPackedDouble>, PD, VEX, VEX_LIG, VEX_WIG;
+ defm VCOMISS : sse12_ord_cmp<0x2F, FR32, X86strict_fcmps, f32, f32mem, loadf32,
+ "comiss", SSEPackedSingle>, PS, VEX, VEX_LIG, VEX_WIG;
+ defm VCOMISD : sse12_ord_cmp<0x2F, FR64, X86strict_fcmps, f64, f64mem, loadf64,
+ "comisd", SSEPackedDouble>, PD, VEX, VEX_LIG, VEX_WIG;
let isCodeGenOnly = 1 in {
defm VUCOMISS : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v4f32, ssmem,
- sse_load_f32, "ucomiss", WriteFCom>, PS, VEX, VEX_LIG, VEX_WIG;
+ sse_load_f32, "ucomiss", SSEPackedSingle>, PS, VEX, VEX_LIG, VEX_WIG;
defm VUCOMISD : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v2f64, sdmem,
- sse_load_f64, "ucomisd", WriteFCom>, PD, VEX, VEX_LIG, VEX_WIG;
+ sse_load_f64, "ucomisd", SSEPackedDouble>, PD, VEX, VEX_LIG, VEX_WIG;
defm VCOMISS : sse12_ord_cmp_int<0x2F, VR128, X86comi, v4f32, ssmem,
- sse_load_f32, "comiss", WriteFCom>, PS, VEX, VEX_LIG, VEX_WIG;
+ sse_load_f32, "comiss", SSEPackedSingle>, PS, VEX, VEX_LIG, VEX_WIG;
defm VCOMISD : sse12_ord_cmp_int<0x2F, VR128, X86comi, v2f64, sdmem,
- sse_load_f64, "comisd", WriteFCom>, PD, VEX, VEX_LIG, VEX_WIG;
- }
- defm UCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32,
- "ucomiss", WriteFCom>, PS;
- defm UCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64,
- "ucomisd", WriteFCom>, PD;
-
- let Pattern = []<dag> in {
- defm COMISS : sse12_ord_cmp<0x2F, FR32, undef, f32, f32mem, loadf32,
- "comiss", WriteFCom>, PS;
- defm COMISD : sse12_ord_cmp<0x2F, FR64, undef, f64, f64mem, loadf64,
- "comisd", WriteFCom>, PD;
+ sse_load_f64, "comisd", SSEPackedDouble>, PD, VEX, VEX_LIG, VEX_WIG;
}
+ defm UCOMISS : sse12_ord_cmp<0x2E, FR32, X86any_fcmp, f32, f32mem, loadf32,
+ "ucomiss", SSEPackedSingle>, PS;
+ defm UCOMISD : sse12_ord_cmp<0x2E, FR64, X86any_fcmp, f64, f64mem, loadf64,
+ "ucomisd", SSEPackedDouble>, PD;
+ defm COMISS : sse12_ord_cmp<0x2F, FR32, X86strict_fcmps, f32, f32mem, loadf32,
+ "comiss", SSEPackedSingle>, PS;
+ defm COMISD : sse12_ord_cmp<0x2F, FR64, X86strict_fcmps, f64, f64mem, loadf64,
+ "comisd", SSEPackedDouble>, PD;
let isCodeGenOnly = 1 in {
defm UCOMISS : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v4f32, ssmem,
- sse_load_f32, "ucomiss", WriteFCom>, PS;
+ sse_load_f32, "ucomiss", SSEPackedSingle>, PS;
defm UCOMISD : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v2f64, sdmem,
- sse_load_f64, "ucomisd", WriteFCom>, PD;
+ sse_load_f64, "ucomisd", SSEPackedDouble>, PD;
defm COMISS : sse12_ord_cmp_int<0x2F, VR128, X86comi, v4f32, ssmem,
- sse_load_f32, "comiss", WriteFCom>, PS;
+ sse_load_f32, "comiss", SSEPackedSingle>, PS;
defm COMISD : sse12_ord_cmp_int<0x2F, VR128, X86comi, v2f64, sdmem,
- sse_load_f64, "comisd", WriteFCom>, PD;
+ sse_load_f64, "comisd", SSEPackedDouble>, PD;
}
} // Defs = [EFLAGS]
@@ -1888,17 +1914,19 @@ multiclass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop,
ValueType VT, string asm,
X86FoldableSchedWrite sched,
Domain d, PatFrag ld_frag> {
+let Uses = [MXCSR], mayRaiseFPException = 1 in {
let isCommutable = 1 in
def rri : PIi8<0xC2, MRMSrcReg,
(outs RC:$dst), (ins RC:$src1, RC:$src2, u8imm:$cc), asm,
- [(set RC:$dst, (VT (X86cmpp RC:$src1, RC:$src2, timm:$cc)))], d>,
+ [(set RC:$dst, (VT (X86any_cmpp RC:$src1, RC:$src2, timm:$cc)))], d>,
Sched<[sched]>;
def rmi : PIi8<0xC2, MRMSrcMem,
(outs RC:$dst), (ins RC:$src1, x86memop:$src2, u8imm:$cc), asm,
[(set RC:$dst,
- (VT (X86cmpp RC:$src1, (ld_frag addr:$src2), timm:$cc)))], d>,
+ (VT (X86any_cmpp RC:$src1, (ld_frag addr:$src2), timm:$cc)))], d>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
+}
defm VCMPPS : sse12_cmp_packed<VR128, f128mem, v4f32,
"cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
@@ -1928,20 +1956,20 @@ def CommutableCMPCC : PatLeaf<(timm), [{
// Patterns to select compares with loads in first operand.
let Predicates = [HasAVX] in {
- def : Pat<(v4f64 (X86cmpp (loadv4f64 addr:$src2), VR256:$src1,
- CommutableCMPCC:$cc)),
+ def : Pat<(v4f64 (X86any_cmpp (loadv4f64 addr:$src2), VR256:$src1,
+ CommutableCMPCC:$cc)),
(VCMPPDYrmi VR256:$src1, addr:$src2, timm:$cc)>;
- def : Pat<(v8f32 (X86cmpp (loadv8f32 addr:$src2), VR256:$src1,
- CommutableCMPCC:$cc)),
+ def : Pat<(v8f32 (X86any_cmpp (loadv8f32 addr:$src2), VR256:$src1,
+ CommutableCMPCC:$cc)),
(VCMPPSYrmi VR256:$src1, addr:$src2, timm:$cc)>;
- def : Pat<(v2f64 (X86cmpp (loadv2f64 addr:$src2), VR128:$src1,
- CommutableCMPCC:$cc)),
+ def : Pat<(v2f64 (X86any_cmpp (loadv2f64 addr:$src2), VR128:$src1,
+ CommutableCMPCC:$cc)),
(VCMPPDrmi VR128:$src1, addr:$src2, timm:$cc)>;
- def : Pat<(v4f32 (X86cmpp (loadv4f32 addr:$src2), VR128:$src1,
- CommutableCMPCC:$cc)),
+ def : Pat<(v4f32 (X86any_cmpp (loadv4f32 addr:$src2), VR128:$src1,
+ CommutableCMPCC:$cc)),
(VCMPPSrmi VR128:$src1, addr:$src2, timm:$cc)>;
def : Pat<(f64 (X86cmps (loadf64 addr:$src2), FR64:$src1,
@@ -1954,8 +1982,8 @@ let Predicates = [HasAVX] in {
}
let Predicates = [UseSSE2] in {
- def : Pat<(v2f64 (X86cmpp (memopv2f64 addr:$src2), VR128:$src1,
- CommutableCMPCC:$cc)),
+ def : Pat<(v2f64 (X86any_cmpp (memopv2f64 addr:$src2), VR128:$src1,
+ CommutableCMPCC:$cc)),
(CMPPDrmi VR128:$src1, addr:$src2, timm:$cc)>;
def : Pat<(f64 (X86cmps (loadf64 addr:$src2), FR64:$src1,
@@ -1964,8 +1992,8 @@ let Predicates = [UseSSE2] in {
}
let Predicates = [UseSSE1] in {
- def : Pat<(v4f32 (X86cmpp (memopv4f32 addr:$src2), VR128:$src1,
- CommutableCMPCC:$cc)),
+ def : Pat<(v4f32 (X86any_cmpp (memopv4f32 addr:$src2), VR128:$src1,
+ CommutableCMPCC:$cc)),
(CMPPSrmi VR128:$src1, addr:$src2, timm:$cc)>;
def : Pat<(f32 (X86cmps (loadf32 addr:$src2), FR32:$src1,
@@ -2555,6 +2583,7 @@ def : Pat<(X86fandn VR128:$src1, (memopv4f32 addr:$src2)),
/// classes below
multiclass basic_sse12_fp_binop_p<bits<8> opc, string OpcodeStr,
SDNode OpNode, X86SchedWriteSizes sched> {
+let Uses = [MXCSR], mayRaiseFPException = 1 in {
let Predicates = [HasAVX, NoVLX] in {
defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode,
VR128, v4f32, f128mem, loadv4f32,
@@ -2580,9 +2609,11 @@ multiclass basic_sse12_fp_binop_p<bits<8> opc, string OpcodeStr,
sched.PD.XMM>, PD;
}
}
+}
multiclass basic_sse12_fp_binop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86SchedWriteSizes sched> {
+let Uses = [MXCSR], mayRaiseFPException = 1 in {
defm V#NAME#SS : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"),
OpNode, FR32, f32mem, SSEPackedSingle, sched.PS.Scl, 0>,
XS, VEX_4V, VEX_LIG, VEX_WIG;
@@ -2599,10 +2630,12 @@ multiclass basic_sse12_fp_binop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
sched.PD.Scl>, XD;
}
}
+}
multiclass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr,
SDPatternOperator OpNode,
X86SchedWriteSizes sched> {
+let Uses = [MXCSR], mayRaiseFPException = 1 in {
defm V#NAME#SS : sse12_fp_scalar_int<opc, OpcodeStr, OpNode, VR128, v4f32,
!strconcat(OpcodeStr, "ss"), ssmem, sse_load_f32,
SSEPackedSingle, sched.PS.Scl, 0>, XS, VEX_4V, VEX_LIG, VEX_WIG;
@@ -2619,20 +2652,21 @@ multiclass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr,
SSEPackedDouble, sched.PD.Scl>, XD;
}
}
+}
// Binary Arithmetic instructions
-defm ADD : basic_sse12_fp_binop_p<0x58, "add", fadd, SchedWriteFAddSizes>,
- basic_sse12_fp_binop_s<0x58, "add", fadd, SchedWriteFAddSizes>,
+defm ADD : basic_sse12_fp_binop_p<0x58, "add", any_fadd, SchedWriteFAddSizes>,
+ basic_sse12_fp_binop_s<0x58, "add", any_fadd, SchedWriteFAddSizes>,
basic_sse12_fp_binop_s_int<0x58, "add", null_frag, SchedWriteFAddSizes>;
-defm MUL : basic_sse12_fp_binop_p<0x59, "mul", fmul, SchedWriteFMulSizes>,
- basic_sse12_fp_binop_s<0x59, "mul", fmul, SchedWriteFMulSizes>,
+defm MUL : basic_sse12_fp_binop_p<0x59, "mul", any_fmul, SchedWriteFMulSizes>,
+ basic_sse12_fp_binop_s<0x59, "mul", any_fmul, SchedWriteFMulSizes>,
basic_sse12_fp_binop_s_int<0x59, "mul", null_frag, SchedWriteFMulSizes>;
let isCommutable = 0 in {
- defm SUB : basic_sse12_fp_binop_p<0x5C, "sub", fsub, SchedWriteFAddSizes>,
- basic_sse12_fp_binop_s<0x5C, "sub", fsub, SchedWriteFAddSizes>,
+ defm SUB : basic_sse12_fp_binop_p<0x5C, "sub", any_fsub, SchedWriteFAddSizes>,
+ basic_sse12_fp_binop_s<0x5C, "sub", any_fsub, SchedWriteFAddSizes>,
basic_sse12_fp_binop_s_int<0x5C, "sub", null_frag, SchedWriteFAddSizes>;
- defm DIV : basic_sse12_fp_binop_p<0x5E, "div", fdiv, SchedWriteFDivSizes>,
- basic_sse12_fp_binop_s<0x5E, "div", fdiv, SchedWriteFDivSizes>,
+ defm DIV : basic_sse12_fp_binop_p<0x5E, "div", any_fdiv, SchedWriteFDivSizes>,
+ basic_sse12_fp_binop_s<0x5E, "div", any_fdiv, SchedWriteFDivSizes>,
basic_sse12_fp_binop_s_int<0x5E, "div", null_frag, SchedWriteFDivSizes>;
defm MAX : basic_sse12_fp_binop_p<0x5F, "max", X86fmax, SchedWriteFCmpSizes>,
basic_sse12_fp_binop_s<0x5F, "max", X86fmax, SchedWriteFCmpSizes>,
@@ -2727,15 +2761,15 @@ multiclass scalar_math_patterns<SDNode Op, string OpcPrefix, SDNode Move,
}
}
-defm : scalar_math_patterns<fadd, "ADDSS", X86Movss, v4f32, f32, FR32, loadf32, UseSSE1>;
-defm : scalar_math_patterns<fsub, "SUBSS", X86Movss, v4f32, f32, FR32, loadf32, UseSSE1>;
-defm : scalar_math_patterns<fmul, "MULSS", X86Movss, v4f32, f32, FR32, loadf32, UseSSE1>;
-defm : scalar_math_patterns<fdiv, "DIVSS", X86Movss, v4f32, f32, FR32, loadf32, UseSSE1>;
+defm : scalar_math_patterns<any_fadd, "ADDSS", X86Movss, v4f32, f32, FR32, loadf32, UseSSE1>;
+defm : scalar_math_patterns<any_fsub, "SUBSS", X86Movss, v4f32, f32, FR32, loadf32, UseSSE1>;
+defm : scalar_math_patterns<any_fmul, "MULSS", X86Movss, v4f32, f32, FR32, loadf32, UseSSE1>;
+defm : scalar_math_patterns<any_fdiv, "DIVSS", X86Movss, v4f32, f32, FR32, loadf32, UseSSE1>;
-defm : scalar_math_patterns<fadd, "ADDSD", X86Movsd, v2f64, f64, FR64, loadf64, UseSSE2>;
-defm : scalar_math_patterns<fsub, "SUBSD", X86Movsd, v2f64, f64, FR64, loadf64, UseSSE2>;
-defm : scalar_math_patterns<fmul, "MULSD", X86Movsd, v2f64, f64, FR64, loadf64, UseSSE2>;
-defm : scalar_math_patterns<fdiv, "DIVSD", X86Movsd, v2f64, f64, FR64, loadf64, UseSSE2>;
+defm : scalar_math_patterns<any_fadd, "ADDSD", X86Movsd, v2f64, f64, FR64, loadf64, UseSSE2>;
+defm : scalar_math_patterns<any_fsub, "SUBSD", X86Movsd, v2f64, f64, FR64, loadf64, UseSSE2>;
+defm : scalar_math_patterns<any_fmul, "MULSD", X86Movsd, v2f64, f64, FR64, loadf64, UseSSE2>;
+defm : scalar_math_patterns<any_fdiv, "DIVSD", X86Movsd, v2f64, f64, FR64, loadf64, UseSSE2>;
/// Unop Arithmetic
/// In addition, we also have a special variant of the scalar form here to
@@ -2961,10 +2995,10 @@ multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
}
// Square root.
-defm SQRT : sse1_fp_unop_s<0x51, "sqrt", fsqrt, SchedWriteFSqrt, UseAVX>,
- sse1_fp_unop_p<0x51, "sqrt", fsqrt, SchedWriteFSqrt, [HasAVX, NoVLX]>,
- sse2_fp_unop_s<0x51, "sqrt", fsqrt, SchedWriteFSqrt64, UseAVX>,
- sse2_fp_unop_p<0x51, "sqrt", fsqrt, SchedWriteFSqrt64>;
+defm SQRT : sse1_fp_unop_s<0x51, "sqrt", any_fsqrt, SchedWriteFSqrt, UseAVX>,
+ sse1_fp_unop_p<0x51, "sqrt", any_fsqrt, SchedWriteFSqrt, [HasAVX, NoVLX]>,
+ sse2_fp_unop_s<0x51, "sqrt", any_fsqrt, SchedWriteFSqrt64, UseAVX>,
+ sse2_fp_unop_p<0x51, "sqrt", any_fsqrt, SchedWriteFSqrt64>, SIMD_EXC;
// Reciprocal approximations. Note that these typically require refinement
// in order to obtain suitable precision.
@@ -2993,8 +3027,8 @@ multiclass scalar_unary_math_patterns<SDNode OpNode, string OpcPrefix, SDNode Mo
}
}
-defm : scalar_unary_math_patterns<fsqrt, "SQRTSS", X86Movss, v4f32, UseSSE1>;
-defm : scalar_unary_math_patterns<fsqrt, "SQRTSD", X86Movsd, v2f64, UseSSE2>;
+defm : scalar_unary_math_patterns<any_fsqrt, "SQRTSS", X86Movss, v4f32, UseSSE1>;
+defm : scalar_unary_math_patterns<any_fsqrt, "SQRTSD", X86Movsd, v2f64, UseSSE2>;
multiclass scalar_unary_math_intr_patterns<Intrinsic Intr, string OpcPrefix,
SDNode Move, ValueType VT,
@@ -4436,6 +4470,7 @@ def LDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
multiclass sse3_addsub<string OpcodeStr, ValueType vt, RegisterClass RC,
X86MemOperand x86memop, X86FoldableSchedWrite sched,
PatFrag ld_frag, bit Is2Addr = 1> {
+let Uses = [MXCSR], mayRaiseFPException = 1 in {
def rr : I<0xD0, MRMSrcReg,
(outs RC:$dst), (ins RC:$src1, RC:$src2),
!if(Is2Addr,
@@ -4451,6 +4486,7 @@ multiclass sse3_addsub<string OpcodeStr, ValueType vt, RegisterClass RC,
[(set RC:$dst, (vt (X86Addsub RC:$src1, (ld_frag addr:$src2))))]>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
+}
let Predicates = [HasAVX] in {
let ExeDomain = SSEPackedSingle in {
@@ -4488,6 +4524,7 @@ multiclass S3D_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
X86MemOperand x86memop, SDNode OpNode,
X86FoldableSchedWrite sched, PatFrag ld_frag,
bit Is2Addr = 1> {
+let Uses = [MXCSR], mayRaiseFPException = 1 in {
def rr : S3DI<o, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
@@ -4502,10 +4539,12 @@ multiclass S3D_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
[(set RC:$dst, (vt (OpNode RC:$src1, (ld_frag addr:$src2))))]>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
+}
multiclass S3_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
X86MemOperand x86memop, SDNode OpNode,
X86FoldableSchedWrite sched, PatFrag ld_frag,
bit Is2Addr = 1> {
+let Uses = [MXCSR], mayRaiseFPException = 1 in {
def rr : S3I<o, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
@@ -4520,6 +4559,7 @@ multiclass S3_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
[(set RC:$dst, (vt (OpNode RC:$src1, (ld_frag addr:$src2))))]>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
+}
let Predicates = [HasAVX] in {
let ExeDomain = SSEPackedSingle in {
@@ -5348,6 +5388,7 @@ multiclass sse41_fp_unop_p<bits<8> opc, string OpcodeStr,
X86FoldableSchedWrite sched> {
// Intrinsic operation, reg.
// Vector intrinsic operation, reg
+let Uses = [MXCSR], mayRaiseFPException = 1 in {
def r : SS4AIi8<opc, MRMSrcReg,
(outs RC:$dst), (ins RC:$src1, i32u8imm:$src2),
!strconcat(OpcodeStr,
@@ -5364,6 +5405,7 @@ multiclass sse41_fp_unop_p<bits<8> opc, string OpcodeStr,
(VT (OpNode (mem_frag addr:$src1), timm:$src2)))]>,
Sched<[sched.Folded]>;
}
+}
multiclass avx_fp_unop_rm<bits<8> opcss, bits<8> opcsd,
string OpcodeStr, X86FoldableSchedWrite sched> {
@@ -5400,6 +5442,7 @@ let ExeDomain = SSEPackedDouble, hasSideEffects = 0, isCodeGenOnly = 1 in {
multiclass sse41_fp_unop_s<bits<8> opcss, bits<8> opcsd,
string OpcodeStr, X86FoldableSchedWrite sched> {
+let Uses = [MXCSR], mayRaiseFPException = 1 in {
let ExeDomain = SSEPackedSingle, hasSideEffects = 0, isCodeGenOnly = 1 in {
def SSr : SS4AIi8<opcss, MRMSrcReg,
(outs FR32:$dst), (ins FR32:$src1, i32u8imm:$src2),
@@ -5430,11 +5473,13 @@ let ExeDomain = SSEPackedDouble, hasSideEffects = 0, isCodeGenOnly = 1 in {
[]>, Sched<[sched.Folded, sched.ReadAfterFold]>;
} // ExeDomain = SSEPackedDouble, hasSideEffects = 0
}
+}
multiclass sse41_fp_binop_s<bits<8> opcss, bits<8> opcsd,
string OpcodeStr, X86FoldableSchedWrite sched,
ValueType VT32, ValueType VT64,
SDNode OpNode, bit Is2Addr = 1> {
+let Uses = [MXCSR], mayRaiseFPException = 1 in {
let ExeDomain = SSEPackedSingle in {
def SSr_Int : SS4AIi8<opcss, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32u8imm:$src3),
@@ -5481,56 +5526,57 @@ let ExeDomain = SSEPackedDouble in {
Sched<[sched.Folded, sched.ReadAfterFold]>;
} // ExeDomain = SSEPackedDouble, isCodeGenOnly = 1
}
+}
// FP round - roundss, roundps, roundsd, roundpd
let Predicates = [HasAVX, NoVLX] in {
- let ExeDomain = SSEPackedSingle in {
+ let ExeDomain = SSEPackedSingle, Uses = [MXCSR], mayRaiseFPException = 1 in {
// Intrinsic form
defm VROUNDPS : sse41_fp_unop_p<0x08, "vroundps", f128mem, VR128, v4f32,
- loadv4f32, X86VRndScale, SchedWriteFRnd.XMM>,
+ loadv4f32, X86any_VRndScale, SchedWriteFRnd.XMM>,
VEX, VEX_WIG;
defm VROUNDPSY : sse41_fp_unop_p<0x08, "vroundps", f256mem, VR256, v8f32,
- loadv8f32, X86VRndScale, SchedWriteFRnd.YMM>,
+ loadv8f32, X86any_VRndScale, SchedWriteFRnd.YMM>,
VEX, VEX_L, VEX_WIG;
}
- let ExeDomain = SSEPackedDouble in {
+ let ExeDomain = SSEPackedDouble, Uses = [MXCSR], mayRaiseFPException = 1 in {
defm VROUNDPD : sse41_fp_unop_p<0x09, "vroundpd", f128mem, VR128, v2f64,
- loadv2f64, X86VRndScale, SchedWriteFRnd.XMM>,
+ loadv2f64, X86any_VRndScale, SchedWriteFRnd.XMM>,
VEX, VEX_WIG;
defm VROUNDPDY : sse41_fp_unop_p<0x09, "vroundpd", f256mem, VR256, v4f64,
- loadv4f64, X86VRndScale, SchedWriteFRnd.YMM>,
+ loadv4f64, X86any_VRndScale, SchedWriteFRnd.YMM>,
VEX, VEX_L, VEX_WIG;
}
}
let Predicates = [UseAVX] in {
defm VROUND : sse41_fp_binop_s<0x0A, 0x0B, "vround", SchedWriteFRnd.Scl,
v4f32, v2f64, X86RndScales, 0>,
- VEX_4V, VEX_LIG, VEX_WIG;
+ VEX_4V, VEX_LIG, VEX_WIG, SIMD_EXC;
defm VROUND : avx_fp_unop_rm<0x0A, 0x0B, "vround", SchedWriteFRnd.Scl>,
- VEX_4V, VEX_LIG, VEX_WIG;
+ VEX_4V, VEX_LIG, VEX_WIG, SIMD_EXC;
}
let Predicates = [UseAVX] in {
- def : Pat<(X86VRndScale FR32:$src1, timm:$src2),
+ def : Pat<(X86any_VRndScale FR32:$src1, timm:$src2),
(VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src1, timm:$src2)>;
- def : Pat<(X86VRndScale FR64:$src1, timm:$src2),
+ def : Pat<(X86any_VRndScale FR64:$src1, timm:$src2),
(VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src1, timm:$src2)>;
}
let Predicates = [UseAVX, OptForSize] in {
- def : Pat<(X86VRndScale (loadf32 addr:$src1), timm:$src2),
+ def : Pat<(X86any_VRndScale (loadf32 addr:$src1), timm:$src2),
(VROUNDSSm (f32 (IMPLICIT_DEF)), addr:$src1, timm:$src2)>;
- def : Pat<(X86VRndScale (loadf64 addr:$src1), timm:$src2),
+ def : Pat<(X86any_VRndScale (loadf64 addr:$src1), timm:$src2),
(VROUNDSDm (f64 (IMPLICIT_DEF)), addr:$src1, timm:$src2)>;
}
let ExeDomain = SSEPackedSingle in
defm ROUNDPS : sse41_fp_unop_p<0x08, "roundps", f128mem, VR128, v4f32,
- memopv4f32, X86VRndScale, SchedWriteFRnd.XMM>;
+ memopv4f32, X86any_VRndScale, SchedWriteFRnd.XMM>;
let ExeDomain = SSEPackedDouble in
defm ROUNDPD : sse41_fp_unop_p<0x09, "roundpd", f128mem, VR128, v2f64,
- memopv2f64, X86VRndScale, SchedWriteFRnd.XMM>;
+ memopv2f64, X86any_VRndScale, SchedWriteFRnd.XMM>;
defm ROUND : sse41_fp_unop_s<0x0A, 0x0B, "round", SchedWriteFRnd.Scl>;
@@ -5539,16 +5585,16 @@ defm ROUND : sse41_fp_binop_s<0x0A, 0x0B, "round", SchedWriteFRnd.Scl,
v4f32, v2f64, X86RndScales>;
let Predicates = [UseSSE41] in {
- def : Pat<(X86VRndScale FR32:$src1, timm:$src2),
+ def : Pat<(X86any_VRndScale FR32:$src1, timm:$src2),
(ROUNDSSr FR32:$src1, timm:$src2)>;
- def : Pat<(X86VRndScale FR64:$src1, timm:$src2),
+ def : Pat<(X86any_VRndScale FR64:$src1, timm:$src2),
(ROUNDSDr FR64:$src1, timm:$src2)>;
}
let Predicates = [UseSSE41, OptForSize] in {
- def : Pat<(X86VRndScale (loadf32 addr:$src1), timm:$src2),
+ def : Pat<(X86any_VRndScale (loadf32 addr:$src1), timm:$src2),
(ROUNDSSm addr:$src1, timm:$src2)>;
- def : Pat<(X86VRndScale (loadf64 addr:$src1), timm:$src2),
+ def : Pat<(X86any_VRndScale (loadf64 addr:$src1), timm:$src2),
(ROUNDSDm addr:$src1, timm:$src2)>;
}
@@ -5959,6 +6005,7 @@ let Predicates = [HasAVX] in {
SchedWriteMPSAD.XMM>, VEX_4V, VEX_WIG;
}
+let Uses = [MXCSR], mayRaiseFPException = 1 in {
let ExeDomain = SSEPackedSingle in
defm VDPPS : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_sse41_dpps,
VR128, load, f128mem, 0,
@@ -5972,6 +6019,7 @@ let Predicates = [HasAVX] in {
VR256, load, i256mem, 0,
SchedWriteDPPS.YMM>, VEX_4V, VEX_L, VEX_WIG;
}
+}
let Predicates = [HasAVX2] in {
let isCommutable = 0 in {
@@ -5991,11 +6039,11 @@ let Constraints = "$src1 = $dst" in {
let ExeDomain = SSEPackedSingle in
defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", int_x86_sse41_dpps,
VR128, memop, f128mem, 1,
- SchedWriteDPPS.XMM>;
+ SchedWriteDPPS.XMM>, SIMD_EXC;
let ExeDomain = SSEPackedDouble in
defm DPPD : SS41I_binop_rmi_int<0x41, "dppd", int_x86_sse41_dppd,
VR128, memop, f128mem, 1,
- SchedWriteDPPD.XMM>;
+ SchedWriteDPPD.XMM>, SIMD_EXC;
}
/// SS41I_blend_rmi - SSE 4.1 blend with 8-bit immediate
@@ -7266,12 +7314,12 @@ multiclass f16c_ps2ph<RegisterClass RC, X86MemOperand x86memop,
}
let Predicates = [HasF16C, NoVLX] in {
- defm VCVTPH2PS : f16c_ph2ps<VR128, f64mem, WriteCvtPH2PS>;
- defm VCVTPH2PSY : f16c_ph2ps<VR256, f128mem, WriteCvtPH2PSY>, VEX_L;
+ defm VCVTPH2PS : f16c_ph2ps<VR128, f64mem, WriteCvtPH2PS>, SIMD_EXC;
+ defm VCVTPH2PSY : f16c_ph2ps<VR256, f128mem, WriteCvtPH2PSY>, VEX_L, SIMD_EXC;
defm VCVTPS2PH : f16c_ps2ph<VR128, f64mem, WriteCvtPS2PH,
- WriteCvtPS2PHSt>;
+ WriteCvtPS2PHSt>, SIMD_EXC;
defm VCVTPS2PHY : f16c_ps2ph<VR256, f128mem, WriteCvtPS2PHY,
- WriteCvtPS2PHYSt>, VEX_L;
+ WriteCvtPS2PHYSt>, VEX_L, SIMD_EXC;
// Pattern match vcvtph2ps of a scalar i64 load.
def : Pat<(v4f32 (X86cvtph2ps (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrTSX.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrTSX.td
index 3a1212342a13..41b839425ccd 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrTSX.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrTSX.td
@@ -31,7 +31,7 @@ def XBEGIN_4 : Ii32PCRel<0xc7, MRM_F8, (outs), (ins brtarget32:$dst),
"xbegin\t$dst", []>, OpSize32;
}
-// Psuedo instruction to fake the definition of EAX on the fallback code path.
+// Pseudo instruction to fake the definition of EAX on the fallback code path.
let isPseudo = 1, Defs = [EAX] in {
def XABORT_DEF : I<0, Pseudo, (outs), (ins), "# XABORT DEF", []>;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstructionSelector.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86InstructionSelector.cpp
index 01620b7b64c9..3f9d626ff912 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstructionSelector.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstructionSelector.cpp
@@ -34,6 +34,7 @@
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/IntrinsicsX86.h"
#include "llvm/Support/AtomicOrdering.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/Debug.h"
@@ -111,8 +112,6 @@ private:
bool materializeFP(MachineInstr &I, MachineRegisterInfo &MRI,
MachineFunction &MF) const;
bool selectImplicitDefOrPHI(MachineInstr &I, MachineRegisterInfo &MRI) const;
- bool selectShift(MachineInstr &I, MachineRegisterInfo &MRI,
- MachineFunction &MF) const;
bool selectDivRem(MachineInstr &I, MachineRegisterInfo &MRI,
MachineFunction &MF) const;
bool selectIntrinsicWSideEffects(MachineInstr &I, MachineRegisterInfo &MRI,
@@ -342,7 +341,7 @@ bool X86InstructionSelector::select(MachineInstr &I) {
case TargetOpcode::G_STORE:
case TargetOpcode::G_LOAD:
return selectLoadStoreOp(I, MRI, MF);
- case TargetOpcode::G_GEP:
+ case TargetOpcode::G_PTR_ADD:
case TargetOpcode::G_FRAME_INDEX:
return selectFrameIndexOrGep(I, MRI, MF);
case TargetOpcode::G_GLOBAL_VALUE:
@@ -380,10 +379,6 @@ bool X86InstructionSelector::select(MachineInstr &I) {
case TargetOpcode::G_IMPLICIT_DEF:
case TargetOpcode::G_PHI:
return selectImplicitDefOrPHI(I, MRI);
- case TargetOpcode::G_SHL:
- case TargetOpcode::G_ASHR:
- case TargetOpcode::G_LSHR:
- return selectShift(I, MRI, MF);
case TargetOpcode::G_SDIV:
case TargetOpcode::G_UDIV:
case TargetOpcode::G_SREM:
@@ -482,7 +477,7 @@ static void X86SelectAddress(const MachineInstr &I,
assert(MRI.getType(I.getOperand(0).getReg()).isPointer() &&
"unsupported type.");
- if (I.getOpcode() == TargetOpcode::G_GEP) {
+ if (I.getOpcode() == TargetOpcode::G_PTR_ADD) {
if (auto COff = getConstantVRegVal(I.getOperand(2).getReg(), MRI)) {
int64_t Imm = *COff;
if (isInt<32>(Imm)) { // Check for displacement overflow.
@@ -566,7 +561,7 @@ bool X86InstructionSelector::selectFrameIndexOrGep(MachineInstr &I,
MachineFunction &MF) const {
unsigned Opc = I.getOpcode();
- assert((Opc == TargetOpcode::G_FRAME_INDEX || Opc == TargetOpcode::G_GEP) &&
+ assert((Opc == TargetOpcode::G_FRAME_INDEX || Opc == TargetOpcode::G_PTR_ADD) &&
"unexpected instruction");
const Register DefReg = I.getOperand(0).getReg();
@@ -1225,7 +1220,7 @@ bool X86InstructionSelector::emitExtractSubreg(unsigned DstReg, unsigned SrcReg,
if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
- LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC\n");
+ LLVM_DEBUG(dbgs() << "Failed to constrain EXTRACT_SUBREG\n");
return false;
}
@@ -1519,78 +1514,6 @@ bool X86InstructionSelector::selectImplicitDefOrPHI(
return true;
}
-// Currently GlobalIsel TableGen generates patterns for shift imm and shift 1,
-// but with shiftCount i8. In G_LSHR/G_ASHR/G_SHL like LLVM-IR both arguments
-// has the same type, so for now only shift i8 can use auto generated
-// TableGen patterns.
-bool X86InstructionSelector::selectShift(MachineInstr &I,
- MachineRegisterInfo &MRI,
- MachineFunction &MF) const {
-
- assert((I.getOpcode() == TargetOpcode::G_SHL ||
- I.getOpcode() == TargetOpcode::G_ASHR ||
- I.getOpcode() == TargetOpcode::G_LSHR) &&
- "unexpected instruction");
-
- Register DstReg = I.getOperand(0).getReg();
- const LLT DstTy = MRI.getType(DstReg);
- const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
-
- const static struct ShiftEntry {
- unsigned SizeInBits;
- unsigned OpLSHR;
- unsigned OpASHR;
- unsigned OpSHL;
- } OpTable[] = {
- {8, X86::SHR8rCL, X86::SAR8rCL, X86::SHL8rCL}, // i8
- {16, X86::SHR16rCL, X86::SAR16rCL, X86::SHL16rCL}, // i16
- {32, X86::SHR32rCL, X86::SAR32rCL, X86::SHL32rCL}, // i32
- {64, X86::SHR64rCL, X86::SAR64rCL, X86::SHL64rCL} // i64
- };
-
- if (DstRB.getID() != X86::GPRRegBankID)
- return false;
-
- auto ShiftEntryIt = std::find_if(
- std::begin(OpTable), std::end(OpTable), [DstTy](const ShiftEntry &El) {
- return El.SizeInBits == DstTy.getSizeInBits();
- });
- if (ShiftEntryIt == std::end(OpTable))
- return false;
-
- unsigned Opcode = 0;
- switch (I.getOpcode()) {
- case TargetOpcode::G_SHL:
- Opcode = ShiftEntryIt->OpSHL;
- break;
- case TargetOpcode::G_ASHR:
- Opcode = ShiftEntryIt->OpASHR;
- break;
- case TargetOpcode::G_LSHR:
- Opcode = ShiftEntryIt->OpLSHR;
- break;
- default:
- return false;
- }
-
- Register Op0Reg = I.getOperand(1).getReg();
- Register Op1Reg = I.getOperand(2).getReg();
-
- assert(MRI.getType(Op1Reg).getSizeInBits() == 8);
-
- BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(TargetOpcode::COPY),
- X86::CL)
- .addReg(Op1Reg);
-
- MachineInstr &ShiftInst =
- *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Opcode), DstReg)
- .addReg(Op0Reg);
-
- constrainSelectedInstRegOperands(ShiftInst, TII, TRI, RBI);
- I.eraseFromParent();
- return true;
-}
-
bool X86InstructionSelector::selectDivRem(MachineInstr &I,
MachineRegisterInfo &MRI,
MachineFunction &MF) const {
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/contrib/llvm-project/llvm/lib/Target/X86/X86IntrinsicsInfo.h
index 1d7adbaa9e99..40bf28df3b90 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86IntrinsicsInfo.h
@@ -15,6 +15,7 @@
#include "X86ISelLowering.h"
#include "X86InstrInfo.h"
+#include "llvm/IR/IntrinsicsX86.h"
namespace llvm {
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86LegalizerInfo.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86LegalizerInfo.cpp
index 04121f863c89..da53d6420021 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86LegalizerInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86LegalizerInfo.cpp
@@ -77,7 +77,7 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI,
setLegalizeScalarToDifferentSizeStrategy(MemOp, 0,
narrowToSmallerAndWidenToSmallest);
setLegalizeScalarToDifferentSizeStrategy(
- G_GEP, 1, widenToLargerTypesUnsupportedOtherwise);
+ G_PTR_ADD, 1, widenToLargerTypesUnsupportedOtherwise);
setLegalizeScalarToDifferentSizeStrategy(
G_CONSTANT, 0, widenToLargerTypesAndNarrowToLargest);
@@ -140,8 +140,8 @@ void X86LegalizerInfo::setLegalizerInfo32bit() {
setAction({G_FRAME_INDEX, p0}, Legal);
setAction({G_GLOBAL_VALUE, p0}, Legal);
- setAction({G_GEP, p0}, Legal);
- setAction({G_GEP, 1, s32}, Legal);
+ setAction({G_PTR_ADD, p0}, Legal);
+ setAction({G_PTR_ADD, 1, s32}, Legal);
if (!Subtarget.is64Bit()) {
getActionDefinitionsBuilder(G_PTRTOINT)
@@ -223,7 +223,7 @@ void X86LegalizerInfo::setLegalizerInfo64bit() {
setAction({MemOp, s64}, Legal);
// Pointer-handling
- setAction({G_GEP, 1, s64}, Legal);
+ setAction({G_PTR_ADD, 1, s64}, Legal);
getActionDefinitionsBuilder(G_PTRTOINT)
.legalForCartesianProduct({s1, s8, s16, s32, s64}, {p0})
.maxScalar(0, s64)
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86MCInstLower.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86MCInstLower.cpp
index 78098fd6262f..2fc9a2af01d7 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86MCInstLower.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86MCInstLower.cpp
@@ -569,6 +569,7 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
if (OutMI.getOperand(OutMI.getNumOperands() - 1).getImm() == 0) {
unsigned NewOpc;
switch (OutMI.getOpcode()) {
+ default: llvm_unreachable("Invalid opcode");
case X86::VPCMPBZ128rmi: NewOpc = X86::VPCMPEQBZ128rm; break;
case X86::VPCMPBZ128rmik: NewOpc = X86::VPCMPEQBZ128rmk; break;
case X86::VPCMPBZ128rri: NewOpc = X86::VPCMPEQBZ128rr; break;
@@ -640,6 +641,7 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
if (OutMI.getOperand(OutMI.getNumOperands() - 1).getImm() == 6) {
unsigned NewOpc;
switch (OutMI.getOpcode()) {
+ default: llvm_unreachable("Invalid opcode");
case X86::VPCMPBZ128rmi: NewOpc = X86::VPCMPGTBZ128rm; break;
case X86::VPCMPBZ128rmik: NewOpc = X86::VPCMPGTBZ128rmk; break;
case X86::VPCMPBZ128rri: NewOpc = X86::VPCMPGTBZ128rr; break;
@@ -876,6 +878,52 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
case X86::MOVSX64rr32:
SimplifyMOVSX(OutMI);
break;
+
+ case X86::VCMPPDrri:
+ case X86::VCMPPDYrri:
+ case X86::VCMPPSrri:
+ case X86::VCMPPSYrri:
+ case X86::VCMPSDrr:
+ case X86::VCMPSSrr: {
+ // Swap the operands if it will enable a 2 byte VEX encoding.
+ // FIXME: Change the immediate to improve opportunities?
+ if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(1).getReg()) &&
+ X86II::isX86_64ExtendedReg(OutMI.getOperand(2).getReg())) {
+ unsigned Imm = MI->getOperand(3).getImm() & 0x7;
+ switch (Imm) {
+ default: break;
+ case 0x00: // EQUAL
+ case 0x03: // UNORDERED
+ case 0x04: // NOT EQUAL
+ case 0x07: // ORDERED
+ std::swap(OutMI.getOperand(1), OutMI.getOperand(2));
+ break;
+ }
+ }
+ break;
+ }
+
+ case X86::VMOVHLPSrr:
+ case X86::VUNPCKHPDrr:
+ // These are not truly commutable so hide them from the default case.
+ break;
+
+ default: {
+ // If the instruction is a commutable arithmetic instruction we might be
+ // able to commute the operands to get a 2 byte VEX prefix.
+ uint64_t TSFlags = MI->getDesc().TSFlags;
+ if (MI->getDesc().isCommutable() &&
+ (TSFlags & X86II::EncodingMask) == X86II::VEX &&
+ (TSFlags & X86II::OpMapMask) == X86II::TB &&
+ (TSFlags & X86II::FormMask) == X86II::MRMSrcReg &&
+ !(TSFlags & X86II::VEX_W) && (TSFlags & X86II::VEX_4V) &&
+ OutMI.getNumOperands() == 3) {
+ if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(1).getReg()) &&
+ X86II::isX86_64ExtendedReg(OutMI.getOperand(2).getReg()))
+ std::swap(OutMI.getOperand(1), OutMI.getOperand(2));
+ }
+ break;
+ }
}
}
@@ -983,13 +1031,32 @@ void X86AsmPrinter::LowerTlsAddr(X86MCInstLower &MCInstLowering,
}
}
+/// Return the longest nop which can be efficiently decoded for the given
+/// target cpu. 15-bytes is the longest single NOP instruction, but some
+/// platforms can't decode the longest forms efficiently.
+static unsigned MaxLongNopLength(const MCSubtargetInfo &STI) {
+ uint64_t MaxNopLength = 10;
+ if (STI.getFeatureBits()[X86::ProcIntelSLM])
+ MaxNopLength = 7;
+ else if (STI.getFeatureBits()[X86::FeatureFast15ByteNOP])
+ MaxNopLength = 15;
+ else if (STI.getFeatureBits()[X86::FeatureFast11ByteNOP])
+ MaxNopLength = 11;
+ return MaxNopLength;
+}
+
/// Emit the largest nop instruction smaller than or equal to \p NumBytes
/// bytes. Return the size of nop emitted.
static unsigned EmitNop(MCStreamer &OS, unsigned NumBytes, bool Is64Bit,
const MCSubtargetInfo &STI) {
- // This works only for 64bit. For 32bit we have to do additional checking if
- // the CPU supports multi-byte nops.
- assert(Is64Bit && "EmitNops only supports X86-64");
+ if (!Is64Bit) {
+ // TODO Do additional checking if the CPU supports multi-byte nops.
+ OS.EmitInstruction(MCInstBuilder(X86::NOOP), STI);
+ return 1;
+ }
+
+ // Cap a single nop emission at the profitable value for the target
+ NumBytes = std::min(NumBytes, MaxLongNopLength(STI));
unsigned NopSize;
unsigned Opc, BaseReg, ScaleVal, IndexReg, Displacement, SegmentReg;
@@ -1094,10 +1161,35 @@ static void EmitNops(MCStreamer &OS, unsigned NumBytes, bool Is64Bit,
}
}
+/// A RAII helper which defines a region of instructions which can't have
+/// padding added between them for correctness.
+struct NoAutoPaddingScope {
+ MCStreamer &OS;
+ const bool OldAllowAutoPadding;
+ NoAutoPaddingScope(MCStreamer &OS)
+ : OS(OS), OldAllowAutoPadding(OS.getAllowAutoPadding()) {
+ changeAndComment(false);
+ }
+ ~NoAutoPaddingScope() {
+ changeAndComment(OldAllowAutoPadding);
+ }
+ void changeAndComment(bool b) {
+ if (b == OS.getAllowAutoPadding())
+ return;
+ OS.setAllowAutoPadding(b);
+ if (b)
+ OS.emitRawComment("autopadding");
+ else
+ OS.emitRawComment("noautopadding");
+ }
+};
+
void X86AsmPrinter::LowerSTATEPOINT(const MachineInstr &MI,
X86MCInstLower &MCIL) {
assert(Subtarget->is64Bit() && "Statepoint currently only supports X86-64");
+ NoAutoPaddingScope NoPadScope(*OutStreamer);
+
StatepointOpers SOpers(&MI);
if (unsigned PatchBytes = SOpers.getNumPatchBytes()) {
EmitNops(*OutStreamer, PatchBytes, Subtarget->is64Bit(),
@@ -1148,7 +1240,10 @@ void X86AsmPrinter::LowerSTATEPOINT(const MachineInstr &MI,
// Record our statepoint node in the same section used by STACKMAP
// and PATCHPOINT
- SM.recordStatepoint(MI);
+ auto &Ctx = OutStreamer->getContext();
+ MCSymbol *MILabel = Ctx.createTempSymbol();
+ OutStreamer->EmitLabel(MILabel);
+ SM.recordStatepoint(*MILabel, MI);
}
void X86AsmPrinter::LowerFAULTING_OP(const MachineInstr &FaultingMI,
@@ -1156,6 +1251,8 @@ void X86AsmPrinter::LowerFAULTING_OP(const MachineInstr &FaultingMI,
// FAULTING_LOAD_OP <def>, <faltinf type>, <MBB handler>,
// <opcode>, <operands>
+ NoAutoPaddingScope NoPadScope(*OutStreamer);
+
Register DefRegister = FaultingMI.getOperand(0).getReg();
FaultMaps::FaultKind FK =
static_cast<FaultMaps::FaultKind>(FaultingMI.getOperand(1).getImm());
@@ -1163,8 +1260,12 @@ void X86AsmPrinter::LowerFAULTING_OP(const MachineInstr &FaultingMI,
unsigned Opcode = FaultingMI.getOperand(3).getImm();
unsigned OperandsBeginIdx = 4;
+ auto &Ctx = OutStreamer->getContext();
+ MCSymbol *FaultingLabel = Ctx.createTempSymbol();
+ OutStreamer->EmitLabel(FaultingLabel);
+
assert(FK < FaultMaps::FaultKindMax && "Invalid Faulting Kind!");
- FM.recordFaultingOp(FK, HandlerLabel);
+ FM.recordFaultingOp(FK, FaultingLabel, HandlerLabel);
MCInst MI;
MI.setOpcode(Opcode);
@@ -1199,6 +1300,8 @@ void X86AsmPrinter::LowerPATCHABLE_OP(const MachineInstr &MI,
X86MCInstLower &MCIL) {
// PATCHABLE_OP minsize, opcode, operands
+ NoAutoPaddingScope NoPadScope(*OutStreamer);
+
unsigned MinSize = MI.getOperand(0).getImm();
unsigned Opcode = MI.getOperand(1).getImm();
@@ -1236,7 +1339,12 @@ void X86AsmPrinter::LowerPATCHABLE_OP(const MachineInstr &MI,
// <id>, <shadowBytes>, ...
void X86AsmPrinter::LowerSTACKMAP(const MachineInstr &MI) {
SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo());
- SM.recordStackMap(MI);
+
+ auto &Ctx = OutStreamer->getContext();
+ MCSymbol *MILabel = Ctx.createTempSymbol();
+ OutStreamer->EmitLabel(MILabel);
+
+ SM.recordStackMap(*MILabel, MI);
unsigned NumShadowBytes = MI.getOperand(1).getImm();
SMShadowTracker.reset(NumShadowBytes);
}
@@ -1249,7 +1357,12 @@ void X86AsmPrinter::LowerPATCHPOINT(const MachineInstr &MI,
SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo());
- SM.recordPatchPoint(MI);
+ NoAutoPaddingScope NoPadScope(*OutStreamer);
+
+ auto &Ctx = OutStreamer->getContext();
+ MCSymbol *MILabel = Ctx.createTempSymbol();
+ OutStreamer->EmitLabel(MILabel);
+ SM.recordPatchPoint(*MILabel, MI);
PatchPointOpers opers(&MI);
unsigned ScratchIdx = opers.getNextScratchIdx();
@@ -1305,6 +1418,8 @@ void X86AsmPrinter::LowerPATCHABLE_EVENT_CALL(const MachineInstr &MI,
X86MCInstLower &MCIL) {
assert(Subtarget->is64Bit() && "XRay custom events only supports X86-64");
+ NoAutoPaddingScope NoPadScope(*OutStreamer);
+
// We want to emit the following pattern, which follows the x86 calling
// convention to prepare for the trampoline call to be patched in.
//
@@ -1337,10 +1452,10 @@ void X86AsmPrinter::LowerPATCHABLE_EVENT_CALL(const MachineInstr &MI,
// The default C calling convention will place two arguments into %rcx and
// %rdx -- so we only work with those.
- unsigned DestRegs[] = {X86::RDI, X86::RSI};
+ const Register DestRegs[] = {X86::RDI, X86::RSI};
bool UsedMask[] = {false, false};
// Filled out in loop.
- unsigned SrcRegs[] = {0, 0};
+ Register SrcRegs[] = {0, 0};
// Then we put the operands in the %rdi and %rsi registers. We spill the
// values in the register before we clobber them, and mark them as used in
@@ -1350,7 +1465,7 @@ void X86AsmPrinter::LowerPATCHABLE_EVENT_CALL(const MachineInstr &MI,
for (unsigned I = 0; I < MI.getNumOperands(); ++I)
if (auto Op = MCIL.LowerMachineOperand(&MI, MI.getOperand(I))) {
assert(Op->isReg() && "Only support arguments in registers");
- SrcRegs[I] = Op->getReg();
+ SrcRegs[I] = getX86SubSuperRegister(Op->getReg(), 64);
if (SrcRegs[I] != DestRegs[I]) {
UsedMask[I] = true;
EmitAndCountInstruction(
@@ -1361,6 +1476,9 @@ void X86AsmPrinter::LowerPATCHABLE_EVENT_CALL(const MachineInstr &MI,
}
// Now that the register values are stashed, mov arguments into place.
+ // FIXME: This doesn't work if one of the later SrcRegs is equal to an
+ // earlier DestReg. We will have already overwritten over the register before
+ // we can copy from it.
for (unsigned I = 0; I < MI.getNumOperands(); ++I)
if (SrcRegs[I] != DestRegs[I])
EmitAndCountInstruction(
@@ -1396,6 +1514,8 @@ void X86AsmPrinter::LowerPATCHABLE_TYPED_EVENT_CALL(const MachineInstr &MI,
X86MCInstLower &MCIL) {
assert(Subtarget->is64Bit() && "XRay typed events only supports X86-64");
+ NoAutoPaddingScope NoPadScope(*OutStreamer);
+
// We want to emit the following pattern, which follows the x86 calling
// convention to prepare for the trampoline call to be patched in.
//
@@ -1429,11 +1549,11 @@ void X86AsmPrinter::LowerPATCHABLE_TYPED_EVENT_CALL(const MachineInstr &MI,
// An x86-64 convention may place three arguments into %rcx, %rdx, and R8,
// so we'll work with those. Or we may be called via SystemV, in which case
// we don't have to do any translation.
- unsigned DestRegs[] = {X86::RDI, X86::RSI, X86::RDX};
+ const Register DestRegs[] = {X86::RDI, X86::RSI, X86::RDX};
bool UsedMask[] = {false, false, false};
// Will fill out src regs in the loop.
- unsigned SrcRegs[] = {0, 0, 0};
+ Register SrcRegs[] = {0, 0, 0};
// Then we put the operands in the SystemV registers. We spill the values in
// the registers before we clobber them, and mark them as used in UsedMask.
@@ -1443,7 +1563,7 @@ void X86AsmPrinter::LowerPATCHABLE_TYPED_EVENT_CALL(const MachineInstr &MI,
if (auto Op = MCIL.LowerMachineOperand(&MI, MI.getOperand(I))) {
// TODO: Is register only support adequate?
assert(Op->isReg() && "Only supports arguments in registers");
- SrcRegs[I] = Op->getReg();
+ SrcRegs[I] = getX86SubSuperRegister(Op->getReg(), 64);
if (SrcRegs[I] != DestRegs[I]) {
UsedMask[I] = true;
EmitAndCountInstruction(
@@ -1459,6 +1579,9 @@ void X86AsmPrinter::LowerPATCHABLE_TYPED_EVENT_CALL(const MachineInstr &MI,
// is clobbers. We've already added nops to account for the size of mov and
// push if the register is in the right place, so we only have to worry about
// emitting movs.
+ // FIXME: This doesn't work if one of the later SrcRegs is equal to an
+ // earlier DestReg. We will have already overwritten over the register before
+ // we can copy from it.
for (unsigned I = 0; I < MI.getNumOperands(); ++I)
if (UsedMask[I])
EmitAndCountInstruction(
@@ -1490,6 +1613,19 @@ void X86AsmPrinter::LowerPATCHABLE_TYPED_EVENT_CALL(const MachineInstr &MI,
void X86AsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI,
X86MCInstLower &MCIL) {
+
+ NoAutoPaddingScope NoPadScope(*OutStreamer);
+
+ const Function &F = MF->getFunction();
+ if (F.hasFnAttribute("patchable-function-entry")) {
+ unsigned Num;
+ if (F.getFnAttribute("patchable-function-entry")
+ .getValueAsString()
+ .getAsInteger(10, Num))
+ return;
+ EmitNops(*OutStreamer, Num, Subtarget->is64Bit(), getSubtargetInfo());
+ return;
+ }
// We want to emit the following pattern:
//
// .p2align 1, ...
@@ -1517,6 +1653,8 @@ void X86AsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI,
void X86AsmPrinter::LowerPATCHABLE_RET(const MachineInstr &MI,
X86MCInstLower &MCIL) {
+ NoAutoPaddingScope NoPadScope(*OutStreamer);
+
// Since PATCHABLE_RET takes the opcode of the return statement as an
// argument, we use that to emit the correct form of the RET that we want.
// i.e. when we see this:
@@ -1547,6 +1685,8 @@ void X86AsmPrinter::LowerPATCHABLE_RET(const MachineInstr &MI,
void X86AsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI,
X86MCInstLower &MCIL) {
+ NoAutoPaddingScope NoPadScope(*OutStreamer);
+
// Like PATCHABLE_RET, we have the actual instruction in the operands to this
// instruction so we lower that particular instruction and its operands.
// Unlike PATCHABLE_RET though, we put the sled before the JMP, much like how
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86MacroFusion.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86MacroFusion.cpp
index c6da4b09dd60..b19d1263e0c9 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86MacroFusion.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86MacroFusion.cpp
@@ -11,6 +11,7 @@
//
//===----------------------------------------------------------------------===//
+#include "MCTargetDesc/X86BaseInfo.h"
#include "X86MacroFusion.h"
#include "X86Subtarget.h"
#include "llvm/CodeGen/MacroFusion.h"
@@ -18,160 +19,13 @@
using namespace llvm;
-namespace {
-
-// The classification for the first instruction.
-enum class FirstInstrKind { Test, Cmp, And, ALU, IncDec, Invalid };
-
-// The classification for the second instruction (jump).
-enum class JumpKind {
- // JE, JL, JG and variants.
- ELG,
- // JA, JB and variants.
- AB,
- // JS, JP, JO and variants.
- SPO,
- // Not a fusable jump.
- Invalid,
-};
-
-} // namespace
-
-static FirstInstrKind classifyFirst(const MachineInstr &MI) {
- switch (MI.getOpcode()) {
- default:
- return FirstInstrKind::Invalid;
- case X86::TEST8rr:
- case X86::TEST16rr:
- case X86::TEST32rr:
- case X86::TEST64rr:
- case X86::TEST8ri:
- case X86::TEST16ri:
- case X86::TEST32ri:
- case X86::TEST64ri32:
- case X86::TEST8mr:
- case X86::TEST16mr:
- case X86::TEST32mr:
- case X86::TEST64mr:
- return FirstInstrKind::Test;
- case X86::AND16ri:
- case X86::AND16ri8:
- case X86::AND16rm:
- case X86::AND16rr:
- case X86::AND32ri:
- case X86::AND32ri8:
- case X86::AND32rm:
- case X86::AND32rr:
- case X86::AND64ri32:
- case X86::AND64ri8:
- case X86::AND64rm:
- case X86::AND64rr:
- case X86::AND8ri:
- case X86::AND8rm:
- case X86::AND8rr:
- return FirstInstrKind::And;
- case X86::CMP16ri:
- case X86::CMP16ri8:
- case X86::CMP16rm:
- case X86::CMP16rr:
- case X86::CMP16mr:
- case X86::CMP32ri:
- case X86::CMP32ri8:
- case X86::CMP32rm:
- case X86::CMP32rr:
- case X86::CMP32mr:
- case X86::CMP64ri32:
- case X86::CMP64ri8:
- case X86::CMP64rm:
- case X86::CMP64rr:
- case X86::CMP64mr:
- case X86::CMP8ri:
- case X86::CMP8rm:
- case X86::CMP8rr:
- case X86::CMP8mr:
- return FirstInstrKind::Cmp;
- case X86::ADD16ri:
- case X86::ADD16ri8:
- case X86::ADD16ri8_DB:
- case X86::ADD16ri_DB:
- case X86::ADD16rm:
- case X86::ADD16rr:
- case X86::ADD16rr_DB:
- case X86::ADD32ri:
- case X86::ADD32ri8:
- case X86::ADD32ri8_DB:
- case X86::ADD32ri_DB:
- case X86::ADD32rm:
- case X86::ADD32rr:
- case X86::ADD32rr_DB:
- case X86::ADD64ri32:
- case X86::ADD64ri32_DB:
- case X86::ADD64ri8:
- case X86::ADD64ri8_DB:
- case X86::ADD64rm:
- case X86::ADD64rr:
- case X86::ADD64rr_DB:
- case X86::ADD8ri:
- case X86::ADD8ri_DB:
- case X86::ADD8rm:
- case X86::ADD8rr:
- case X86::ADD8rr_DB:
- case X86::SUB16ri:
- case X86::SUB16ri8:
- case X86::SUB16rm:
- case X86::SUB16rr:
- case X86::SUB32ri:
- case X86::SUB32ri8:
- case X86::SUB32rm:
- case X86::SUB32rr:
- case X86::SUB64ri32:
- case X86::SUB64ri8:
- case X86::SUB64rm:
- case X86::SUB64rr:
- case X86::SUB8ri:
- case X86::SUB8rm:
- case X86::SUB8rr:
- return FirstInstrKind::ALU;
- case X86::INC16r:
- case X86::INC32r:
- case X86::INC64r:
- case X86::INC8r:
- case X86::DEC16r:
- case X86::DEC32r:
- case X86::DEC64r:
- case X86::DEC8r:
- return FirstInstrKind::IncDec;
- }
+static X86::FirstMacroFusionInstKind classifyFirst(const MachineInstr &MI) {
+ return X86::classifyFirstOpcodeInMacroFusion(MI.getOpcode());
}
-static JumpKind classifySecond(const MachineInstr &MI) {
+static X86::SecondMacroFusionInstKind classifySecond(const MachineInstr &MI) {
X86::CondCode CC = X86::getCondFromBranch(MI);
- if (CC == X86::COND_INVALID)
- return JumpKind::Invalid;
-
- switch (CC) {
- default:
- return JumpKind::Invalid;
- case X86::COND_E:
- case X86::COND_NE:
- case X86::COND_L:
- case X86::COND_LE:
- case X86::COND_G:
- case X86::COND_GE:
- return JumpKind::ELG;
- case X86::COND_B:
- case X86::COND_BE:
- case X86::COND_A:
- case X86::COND_AE:
- return JumpKind::AB;
- case X86::COND_S:
- case X86::COND_NS:
- case X86::COND_P:
- case X86::COND_NP:
- case X86::COND_O:
- case X86::COND_NO:
- return JumpKind::SPO;
- }
+ return X86::classifySecondCondCodeInMacroFusion(CC);
}
/// Check if the instr pair, FirstMI and SecondMI, should be fused
@@ -187,40 +41,27 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
if (!(ST.hasBranchFusion() || ST.hasMacroFusion()))
return false;
- const JumpKind BranchKind = classifySecond(SecondMI);
+ const X86::SecondMacroFusionInstKind BranchKind = classifySecond(SecondMI);
- if (BranchKind == JumpKind::Invalid)
+ if (BranchKind == X86::SecondMacroFusionInstKind::Invalid)
return false; // Second cannot be fused with anything.
if (FirstMI == nullptr)
return true; // We're only checking whether Second can be fused at all.
- const FirstInstrKind TestKind = classifyFirst(*FirstMI);
+ const X86::FirstMacroFusionInstKind TestKind = classifyFirst(*FirstMI);
if (ST.hasBranchFusion()) {
// Branch fusion can merge CMP and TEST with all conditional jumps.
- return (TestKind == FirstInstrKind::Cmp ||
- TestKind == FirstInstrKind::Test);
+ return (TestKind == X86::FirstMacroFusionInstKind::Cmp ||
+ TestKind == X86::FirstMacroFusionInstKind::Test);
}
if (ST.hasMacroFusion()) {
- // Macro Fusion rules are a bit more complex. See Agner Fog's
- // Microarchitecture table 9.2 "Instruction Fusion".
- switch (TestKind) {
- case FirstInstrKind::Test:
- case FirstInstrKind::And:
- return true;
- case FirstInstrKind::Cmp:
- case FirstInstrKind::ALU:
- return BranchKind == JumpKind::ELG || BranchKind == JumpKind::AB;
- case FirstInstrKind::IncDec:
- return BranchKind == JumpKind::ELG;
- case FirstInstrKind::Invalid:
- return false;
- }
+ return X86::isMacroFused(TestKind, BranchKind);
}
- llvm_unreachable("unknown branch fusion type");
+ llvm_unreachable("unknown fusion type");
}
namespace llvm {
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86OptimizeLEAs.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86OptimizeLEAs.cpp
index 1aee01563c4b..0c791b6674dc 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86OptimizeLEAs.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86OptimizeLEAs.cpp
@@ -25,6 +25,8 @@
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -32,6 +34,7 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineSizeOpts.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/IR/DebugInfoMetadata.h"
@@ -247,6 +250,12 @@ public:
static char ID;
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<ProfileSummaryInfoWrapperPass>();
+ AU.addRequired<LazyMachineBlockFrequencyInfoPass>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
private:
using MemOpMap = DenseMap<MemOpKey, SmallVector<MachineInstr *, 16>>;
@@ -294,9 +303,9 @@ private:
DenseMap<const MachineInstr *, unsigned> InstrPos;
- MachineRegisterInfo *MRI;
- const X86InstrInfo *TII;
- const X86RegisterInfo *TRI;
+ MachineRegisterInfo *MRI = nullptr;
+ const X86InstrInfo *TII = nullptr;
+ const X86RegisterInfo *TRI = nullptr;
};
} // end anonymous namespace
@@ -681,6 +690,11 @@ bool X86OptimizeLEAPass::runOnMachineFunction(MachineFunction &MF) {
MRI = &MF.getRegInfo();
TII = MF.getSubtarget<X86Subtarget>().getInstrInfo();
TRI = MF.getSubtarget<X86Subtarget>().getRegisterInfo();
+ auto *PSI =
+ &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+ auto *MBFI = (PSI && PSI->hasProfileSummary()) ?
+ &getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI() :
+ nullptr;
// Process all basic blocks.
for (auto &MBB : MF) {
@@ -699,7 +713,9 @@ bool X86OptimizeLEAPass::runOnMachineFunction(MachineFunction &MF) {
// Remove redundant address calculations. Do it only for -Os/-Oz since only
// a code size gain is expected from this part of the pass.
- if (MF.getFunction().hasOptSize())
+ bool OptForSize = MF.getFunction().hasOptSize() ||
+ llvm::shouldOptimizeForSize(&MBB, PSI, MBFI);
+ if (OptForSize)
Changed |= removeRedundantAddrCalc(LEAs);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86PadShortFunction.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86PadShortFunction.cpp
index af974c805c36..4c6bd0ccc2cd 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86PadShortFunction.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86PadShortFunction.cpp
@@ -17,8 +17,11 @@
#include "X86InstrInfo.h"
#include "X86Subtarget.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineSizeOpts.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetSchedule.h"
#include "llvm/IR/Function.h"
@@ -52,6 +55,12 @@ namespace {
bool runOnMachineFunction(MachineFunction &MF) override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<ProfileSummaryInfoWrapperPass>();
+ AU.addRequired<LazyMachineBlockFrequencyInfoPass>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
MachineFunctionProperties getRequiredProperties() const override {
return MachineFunctionProperties().set(
MachineFunctionProperties::Property::NoVRegs);
@@ -105,6 +114,12 @@ bool PadShortFunc::runOnMachineFunction(MachineFunction &MF) {
TSM.init(&MF.getSubtarget());
+ auto *PSI =
+ &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+ auto *MBFI = (PSI && PSI->hasProfileSummary()) ?
+ &getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI() :
+ nullptr;
+
// Search through basic blocks and mark the ones that have early returns
ReturnBBs.clear();
VisitedBBs.clear();
@@ -118,6 +133,11 @@ bool PadShortFunc::runOnMachineFunction(MachineFunction &MF) {
MachineBasicBlock *MBB = I->first;
unsigned Cycles = I->second;
+ // Function::hasOptSize is already checked above.
+ bool OptForSize = llvm::shouldOptimizeForSize(MBB, PSI, MBFI);
+ if (OptForSize)
+ continue;
+
if (Cycles < Threshold) {
// BB ends in a return. Skip over any DBG_VALUE instructions
// trailing the terminator.
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86PfmCounters.td b/contrib/llvm-project/llvm/lib/Target/X86/X86PfmCounters.td
index 5610f4bc8873..93238983afa2 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86PfmCounters.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86PfmCounters.td
@@ -81,14 +81,14 @@ def HaswellPfmCounters : ProcPfmCounters {
let CycleCounter = UnhaltedCoreCyclesPfmCounter;
let UopsCounter = UopsIssuedPfmCounter;
let IssueCounters = [
- PfmIssueCounter<"HWPort0", "uops_dispatched_port:port_0">,
- PfmIssueCounter<"HWPort1", "uops_dispatched_port:port_1">,
- PfmIssueCounter<"HWPort2", "uops_dispatched_port:port_2">,
- PfmIssueCounter<"HWPort3", "uops_dispatched_port:port_3">,
- PfmIssueCounter<"HWPort4", "uops_dispatched_port:port_4">,
- PfmIssueCounter<"HWPort5", "uops_dispatched_port:port_5">,
- PfmIssueCounter<"HWPort6", "uops_dispatched_port:port_6">,
- PfmIssueCounter<"HWPort7", "uops_dispatched_port:port_7">
+ PfmIssueCounter<"HWPort0", "uops_executed_port:port_0">,
+ PfmIssueCounter<"HWPort1", "uops_executed_port:port_1">,
+ PfmIssueCounter<"HWPort2", "uops_executed_port:port_2">,
+ PfmIssueCounter<"HWPort3", "uops_executed_port:port_3">,
+ PfmIssueCounter<"HWPort4", "uops_executed_port:port_4">,
+ PfmIssueCounter<"HWPort5", "uops_executed_port:port_5">,
+ PfmIssueCounter<"HWPort6", "uops_executed_port:port_6">,
+ PfmIssueCounter<"HWPort7", "uops_executed_port:port_7">
];
}
def : PfmCountersBinding<"haswell", HaswellPfmCounters>;
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86RegisterBankInfo.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86RegisterBankInfo.cpp
index daddf4231897..9c076d2d6769 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86RegisterBankInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86RegisterBankInfo.cpp
@@ -40,8 +40,9 @@ X86RegisterBankInfo::X86RegisterBankInfo(const TargetRegisterInfo &TRI)
assert(RBGPR.getSize() == 64 && "GPRs should hold up to 64-bit");
}
-const RegisterBank &X86RegisterBankInfo::getRegBankFromRegClass(
- const TargetRegisterClass &RC) const {
+const RegisterBank &
+X86RegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC,
+ LLT) const {
if (X86::GR8RegClass.hasSubClassEq(&RC) ||
X86::GR16RegClass.hasSubClassEq(&RC) ||
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86RegisterBankInfo.h b/contrib/llvm-project/llvm/lib/Target/X86/X86RegisterBankInfo.h
index c1f3001c6180..d5afd2cae761 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86RegisterBankInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86RegisterBankInfo.h
@@ -64,8 +64,8 @@ private:
public:
X86RegisterBankInfo(const TargetRegisterInfo &TRI);
- const RegisterBank &
- getRegBankFromRegClass(const TargetRegisterClass &RC) const override;
+ const RegisterBank &getRegBankFromRegClass(const TargetRegisterClass &RC,
+ LLT) const override;
InstructionMappings
getInstrAlternativeMappings(const MachineInstr &MI) const override;
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86RegisterInfo.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86RegisterInfo.cpp
index ff625325b4c9..f69626b2622e 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86RegisterInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86RegisterInfo.cpp
@@ -341,6 +341,10 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
return (HasSSE ? CSR_32_RegCall_SaveList :
CSR_32_RegCall_NoSSE_SaveList);
}
+ case CallingConv::CFGuard_Check:
+ assert(!Is64Bit && "CFGuard check mechanism only used on 32-bit X86");
+ return (HasSSE ? CSR_Win32_CFGuard_Check_SaveList
+ : CSR_Win32_CFGuard_Check_NoSSE_SaveList);
case CallingConv::Cold:
if (Is64Bit)
return CSR_64_MostRegs_SaveList;
@@ -455,6 +459,10 @@ X86RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
return (HasSSE ? CSR_32_RegCall_RegMask :
CSR_32_RegCall_NoSSE_RegMask);
}
+ case CallingConv::CFGuard_Check:
+ assert(!Is64Bit && "CFGuard check mechanism only used on 32-bit X86");
+ return (HasSSE ? CSR_Win32_CFGuard_Check_RegMask
+ : CSR_Win32_CFGuard_Check_NoSSE_RegMask);
case CallingConv::Cold:
if (Is64Bit)
return CSR_64_MostRegs_RegMask;
@@ -515,24 +523,27 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
// Set the floating point control register as reserved.
Reserved.set(X86::FPCW);
+ // Set the floating point status register as reserved.
+ Reserved.set(X86::FPSW);
+
+ // Set the SIMD floating point control register as reserved.
+ Reserved.set(X86::MXCSR);
+
// Set the stack-pointer register and its aliases as reserved.
- for (MCSubRegIterator I(X86::RSP, this, /*IncludeSelf=*/true); I.isValid();
- ++I)
- Reserved.set(*I);
+ for (const MCPhysReg &SubReg : subregs_inclusive(X86::RSP))
+ Reserved.set(SubReg);
// Set the Shadow Stack Pointer as reserved.
Reserved.set(X86::SSP);
// Set the instruction pointer register and its aliases as reserved.
- for (MCSubRegIterator I(X86::RIP, this, /*IncludeSelf=*/true); I.isValid();
- ++I)
- Reserved.set(*I);
+ for (const MCPhysReg &SubReg : subregs_inclusive(X86::RIP))
+ Reserved.set(SubReg);
// Set the frame-pointer register and its aliases as reserved if needed.
if (TFI->hasFP(MF)) {
- for (MCSubRegIterator I(X86::RBP, this, /*IncludeSelf=*/true); I.isValid();
- ++I)
- Reserved.set(*I);
+ for (const MCPhysReg &SubReg : subregs_inclusive(X86::RBP))
+ Reserved.set(SubReg);
}
// Set the base-pointer register and its aliases as reserved if needed.
@@ -545,9 +556,8 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
"this calling convention.");
Register BasePtr = getX86SubSuperRegister(getBaseRegister(), 64);
- for (MCSubRegIterator I(BasePtr, this, /*IncludeSelf=*/true);
- I.isValid(); ++I)
- Reserved.set(*I);
+ for (const MCPhysReg &SubReg : subregs_inclusive(BasePtr))
+ Reserved.set(SubReg);
}
// Mark the segment registers as reserved.
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86RegisterInfo.td b/contrib/llvm-project/llvm/lib/Target/X86/X86RegisterInfo.td
index 0528b90c1fd5..3cfaf714e93e 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86RegisterInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86RegisterInfo.td
@@ -294,6 +294,11 @@ def FPSW : X86Reg<"fpsr", 0>;
// Floating-point control word
def FPCW : X86Reg<"fpcr", 0>;
+// SIMD Floating-point control register.
+// Note: We only model the "Uses" of the control bits: current rounding modes,
+// DAZ, FTZ and exception masks. We don't model the "Defs" of flag bits.
+def MXCSR : X86Reg<"mxcsr", 0>;
+
// Status flags register.
//
// Note that some flags that are commonly thought of as part of the status
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86RetpolineThunks.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86RetpolineThunks.cpp
index f8464c7e8298..9085d7f068ac 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86RetpolineThunks.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86RetpolineThunks.cpp
@@ -63,13 +63,13 @@ public:
}
private:
- MachineModuleInfo *MMI;
- const TargetMachine *TM;
- bool Is64Bit;
- const X86Subtarget *STI;
- const X86InstrInfo *TII;
+ MachineModuleInfo *MMI = nullptr;
+ const TargetMachine *TM = nullptr;
+ bool Is64Bit = false;
+ const X86Subtarget *STI = nullptr;
+ const X86InstrInfo *TII = nullptr;
- bool InsertedThunks;
+ bool InsertedThunks = false;
void createThunkFunction(Module &M, StringRef Name);
void insertRegReturnAddrClobber(MachineBasicBlock &MBB, unsigned Reg);
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86ScheduleAtom.td b/contrib/llvm-project/llvm/lib/Target/X86/X86ScheduleAtom.td
index 78acb1065ec8..b0153ca9da36 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86ScheduleAtom.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86ScheduleAtom.td
@@ -888,8 +888,7 @@ def AtomWrite01_174 : SchedWriteRes<[AtomPort01]> {
let Latency = 174;
let ResourceCycles = [174];
}
-def : InstRW<[AtomWrite01_174], (instrs FSINCOS)>;
-def : InstRW<[AtomWrite01_174], (instregex "(COS|SIN)_F")>;
+def : InstRW<[AtomWrite01_174], (instrs FSINCOS, FSIN, FCOS)>;
def AtomWrite01_183 : SchedWriteRes<[AtomPort01]> {
let Latency = 183;
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86ScheduleSLM.td b/contrib/llvm-project/llvm/lib/Target/X86/X86ScheduleSLM.td
index 8e3ce721f1a1..dcd155ea0e0e 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86ScheduleSLM.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86ScheduleSLM.td
@@ -202,8 +202,8 @@ defm : SLMWriteResPair<WriteFAddX, [SLM_FPC_RSV1], 3>;
defm : SLMWriteResPair<WriteFAddY, [SLM_FPC_RSV1], 3>;
defm : X86WriteResPairUnsupported<WriteFAddZ>;
defm : SLMWriteResPair<WriteFAdd64, [SLM_FPC_RSV1], 3>;
-defm : SLMWriteResPair<WriteFAdd64X, [SLM_FPC_RSV1], 3>;
-defm : SLMWriteResPair<WriteFAdd64Y, [SLM_FPC_RSV1], 3>;
+defm : SLMWriteResPair<WriteFAdd64X, [SLM_FPC_RSV1], 4, [2]>;
+defm : SLMWriteResPair<WriteFAdd64Y, [SLM_FPC_RSV1], 4, [2]>;
defm : X86WriteResPairUnsupported<WriteFAdd64Z>;
defm : SLMWriteResPair<WriteFCmp, [SLM_FPC_RSV1], 3>;
defm : SLMWriteResPair<WriteFCmpX, [SLM_FPC_RSV1], 3>;
@@ -219,8 +219,8 @@ defm : SLMWriteResPair<WriteFMulX, [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>
defm : SLMWriteResPair<WriteFMulY, [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>;
defm : X86WriteResPairUnsupported<WriteFMulZ>;
defm : SLMWriteResPair<WriteFMul64, [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>;
-defm : SLMWriteResPair<WriteFMul64X, [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>;
-defm : SLMWriteResPair<WriteFMul64Y, [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>;
+defm : SLMWriteResPair<WriteFMul64X, [SLM_FPC_RSV0, SLMFPMultiplier], 7, [1,4]>;
+defm : SLMWriteResPair<WriteFMul64Y, [SLM_FPC_RSV0, SLMFPMultiplier], 7, [1,4]>;
defm : X86WriteResPairUnsupported<WriteFMul64Z>;
defm : SLMWriteResPair<WriteFDiv, [SLM_FPC_RSV0, SLMFPDivider], 19, [1,17]>;
defm : SLMWriteResPair<WriteFDivX, [SLM_FPC_RSV0, SLMFPDivider], 39, [1,39]>;
@@ -380,8 +380,8 @@ def : WriteRes<WriteVecExtractSt, [SLM_FPC_RSV0, SLM_MEC_RSV]> {
// Horizontal add/sub instructions.
////////////////////////////////////////////////////////////////////////////////
-defm : SLMWriteResPair<WriteFHAdd, [SLM_FPC_RSV01], 3, [2]>;
-defm : SLMWriteResPair<WriteFHAddY, [SLM_FPC_RSV01], 3, [2]>;
+defm : SLMWriteResPair<WriteFHAdd, [SLM_FPC_RSV01], 6, [6], 4>;
+defm : SLMWriteResPair<WriteFHAddY, [SLM_FPC_RSV01], 6, [6], 4>;
defm : X86WriteResPairUnsupported<WriteFHAddZ>;
defm : SLMWriteResPair<WritePHAdd, [SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WritePHAddX, [SLM_FPC_RSV01], 1>;
@@ -486,7 +486,7 @@ defm : X86WriteResPairUnsupported<WriteFBlendZ>;
defm : SLMWriteResPair<WriteVarBlend, [SLM_FPC_RSV0], 1>;
defm : X86WriteResPairUnsupported<WriteVarBlendY>;
defm : X86WriteResPairUnsupported<WriteVarBlendZ>;
-defm : SLMWriteResPair<WriteFVarBlend, [SLM_FPC_RSV0], 1>;
+defm : SLMWriteResPair<WriteFVarBlend, [SLM_FPC_RSV0], 4, [4], 3>;
defm : X86WriteResPairUnsupported<WriteFVarBlendY>;
defm : X86WriteResPairUnsupported<WriteFVarBlendZ>;
defm : X86WriteResPairUnsupported<WriteFShuffle256>;
@@ -511,4 +511,20 @@ defm : X86WriteResUnsupported<WriteCvtPS2PHSt>;
defm : X86WriteResUnsupported<WriteCvtPS2PHYSt>;
defm : X86WriteResUnsupported<WriteCvtPS2PHZSt>;
+// Remaining SLM instrs.
+
+def SLMWriteResGroup1rr : SchedWriteRes<[SLM_FPC_RSV01]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+ let ResourceCycles = [4];
+}
+def: InstRW<[SLMWriteResGroup1rr], (instrs PADDQrr, PSUBQrr, PCMPEQQrr)>;
+
+def SLMWriteResGroup1rm : SchedWriteRes<[SLM_MEC_RSV,SLM_FPC_RSV01]> {
+ let Latency = 7;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1,4];
+}
+def: InstRW<[SLMWriteResGroup1rm], (instrs PADDQrm, PSUBQrm, PCMPEQQrm)>;
+
} // SchedModel
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86ScheduleZnver2.td b/contrib/llvm-project/llvm/lib/Target/X86/X86ScheduleZnver2.td
new file mode 100644
index 000000000000..4537d9cc7956
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86ScheduleZnver2.td
@@ -0,0 +1,1548 @@
+//=- X86ScheduleZnver2.td - X86 Znver2 Scheduling -------------*- tablegen -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the machine model for Znver2 to support instruction
+// scheduling and other instruction cost heuristics.
+//
+//===----------------------------------------------------------------------===//
+
+def Znver2Model : SchedMachineModel {
+ // Zen can decode 4 instructions per cycle.
+ let IssueWidth = 4;
+ // Based on the reorder buffer we define MicroOpBufferSize
+ let MicroOpBufferSize = 224;
+ let LoadLatency = 4;
+ let MispredictPenalty = 17;
+ let HighLatency = 25;
+ let PostRAScheduler = 1;
+
+ // FIXME: This variable is required for incomplete model.
+ // We haven't catered all instructions.
+ // So, we reset the value of this variable so as to
+ // say that the model is incomplete.
+ let CompleteModel = 0;
+}
+
+let SchedModel = Znver2Model in {
+
+// Zen can issue micro-ops to 10 different units in one cycle.
+// These are
+// * Four integer ALU units (ZALU0, ZALU1, ZALU2, ZALU3)
+// * Three AGU units (ZAGU0, ZAGU1, ZAGU2)
+// * Four FPU units (ZFPU0, ZFPU1, ZFPU2, ZFPU3)
+// AGUs feed load store queues @two loads and 1 store per cycle.
+
+// Four ALU units are defined below
+def Zn2ALU0 : ProcResource<1>;
+def Zn2ALU1 : ProcResource<1>;
+def Zn2ALU2 : ProcResource<1>;
+def Zn2ALU3 : ProcResource<1>;
+
+// Three AGU units are defined below
+def Zn2AGU0 : ProcResource<1>;
+def Zn2AGU1 : ProcResource<1>;
+def Zn2AGU2 : ProcResource<1>;
+
+// Four FPU units are defined below
+def Zn2FPU0 : ProcResource<1>;
+def Zn2FPU1 : ProcResource<1>;
+def Zn2FPU2 : ProcResource<1>;
+def Zn2FPU3 : ProcResource<1>;
+
+// FPU grouping
+def Zn2FPU013 : ProcResGroup<[Zn2FPU0, Zn2FPU1, Zn2FPU3]>;
+def Zn2FPU01 : ProcResGroup<[Zn2FPU0, Zn2FPU1]>;
+def Zn2FPU12 : ProcResGroup<[Zn2FPU1, Zn2FPU2]>;
+def Zn2FPU13 : ProcResGroup<[Zn2FPU1, Zn2FPU3]>;
+def Zn2FPU23 : ProcResGroup<[Zn2FPU2, Zn2FPU3]>;
+def Zn2FPU02 : ProcResGroup<[Zn2FPU0, Zn2FPU2]>;
+def Zn2FPU03 : ProcResGroup<[Zn2FPU0, Zn2FPU3]>;
+
+// Below are the grouping of the units.
+// Micro-ops to be issued to multiple units are tackled this way.
+
+// ALU grouping
+// Zn2ALU03 - 0,3 grouping
+def Zn2ALU03: ProcResGroup<[Zn2ALU0, Zn2ALU3]>;
+
+// 64 Entry (16x4 entries) Int Scheduler
+def Zn2ALU : ProcResGroup<[Zn2ALU0, Zn2ALU1, Zn2ALU2, Zn2ALU3]> {
+ let BufferSize=64;
+}
+
+// 28 Entry (14x2) AGU group. AGUs can't be used for all ALU operations
+// but are relevant for some instructions
+def Zn2AGU : ProcResGroup<[Zn2AGU0, Zn2AGU1, Zn2AGU2]> {
+ let BufferSize=28;
+}
+
+// Integer Multiplication issued on ALU1.
+def Zn2Multiplier : ProcResource<1>;
+
+// Integer division issued on ALU2.
+def Zn2Divider : ProcResource<1>;
+
+// 4 Cycles load-to use Latency is captured
+def : ReadAdvance<ReadAfterLd, 4>;
+
+// 7 Cycles vector load-to use Latency is captured
+def : ReadAdvance<ReadAfterVecLd, 7>;
+def : ReadAdvance<ReadAfterVecXLd, 7>;
+def : ReadAdvance<ReadAfterVecYLd, 7>;
+
+def : ReadAdvance<ReadInt2Fpu, 0>;
+
+// The Integer PRF for Zen is 168 entries, and it holds the architectural and
+// speculative version of the 64-bit integer registers.
+// Reference: "Software Optimization Guide for AMD Family 17h Processors"
+def Zn2IntegerPRF : RegisterFile<168, [GR64, CCR]>;
+
+// 36 Entry (9x4 entries) floating-point Scheduler
+def Zn2FPU : ProcResGroup<[Zn2FPU0, Zn2FPU1, Zn2FPU2, Zn2FPU3]> {
+ let BufferSize=36;
+}
+
+// The Zen FP Retire Queue renames SIMD and FP uOps onto a pool of 160 128-bit
+// registers. Operations on 256-bit data types are cracked into two COPs.
+// Reference: "Software Optimization Guide for AMD Family 17h Processors"
+def Zn2FpuPRF: RegisterFile<160, [VR64, VR128, VR256], [1, 1, 2]>;
+
+// The unit can track up to 192 macro ops in-flight.
+// The retire unit handles in-order commit of up to 8 macro ops per cycle.
+// Reference: "Software Optimization Guide for AMD Family 17h Processors"
+// To be noted, the retire unit is shared between integer and FP ops.
+// In SMT mode it is 96 entry per thread. But, we do not use the conservative
+// value here because there is currently no way to fully mode the SMT mode,
+// so there is no point in trying.
+def Zn2RCU : RetireControlUnit<192, 8>;
+
+// (a folded load is an instruction that loads and does some operation)
+// Ex: ADDPD xmm,[mem]-> This instruction has two micro-ops
+// Instructions with folded loads are usually micro-fused, so they only appear
+// as two micro-ops.
+// a. load and
+// b. addpd
+// This multiclass is for folded loads for integer units.
+multiclass Zn2WriteResPair<X86FoldableSchedWrite SchedRW,
+ list<ProcResourceKind> ExePorts,
+ int Lat, list<int> Res = [], int UOps = 1,
+ int LoadLat = 4, int LoadUOps = 1> {
+ // Register variant takes 1-cycle on Execution Port.
+ def : WriteRes<SchedRW, ExePorts> {
+ let Latency = Lat;
+ let ResourceCycles = Res;
+ let NumMicroOps = UOps;
+ }
+
+ // Memory variant also uses a cycle on Zn2AGU
+ // adds LoadLat cycles to the latency (default = 4).
+ def : WriteRes<SchedRW.Folded, !listconcat([Zn2AGU], ExePorts)> {
+ let Latency = !add(Lat, LoadLat);
+ let ResourceCycles = !if(!empty(Res), [], !listconcat([1], Res));
+ let NumMicroOps = !add(UOps, LoadUOps);
+ }
+}
+
+// This multiclass is for folded loads for floating point units.
+multiclass Zn2WriteResFpuPair<X86FoldableSchedWrite SchedRW,
+ list<ProcResourceKind> ExePorts,
+ int Lat, list<int> Res = [], int UOps = 1,
+ int LoadLat = 7, int LoadUOps = 0> {
+ // Register variant takes 1-cycle on Execution Port.
+ def : WriteRes<SchedRW, ExePorts> {
+ let Latency = Lat;
+ let ResourceCycles = Res;
+ let NumMicroOps = UOps;
+ }
+
+ // Memory variant also uses a cycle on Zn2AGU
+ // adds LoadLat cycles to the latency (default = 7).
+ def : WriteRes<SchedRW.Folded, !listconcat([Zn2AGU], ExePorts)> {
+ let Latency = !add(Lat, LoadLat);
+ let ResourceCycles = !if(!empty(Res), [], !listconcat([1], Res));
+ let NumMicroOps = !add(UOps, LoadUOps);
+ }
+}
+
+// WriteRMW is set for instructions with Memory write
+// operation in codegen
+def : WriteRes<WriteRMW, [Zn2AGU]>;
+
+def : WriteRes<WriteStore, [Zn2AGU]>;
+def : WriteRes<WriteStoreNT, [Zn2AGU]>;
+def : WriteRes<WriteMove, [Zn2ALU]>;
+def : WriteRes<WriteLoad, [Zn2AGU]> { let Latency = 8; }
+
+def : WriteRes<WriteZero, []>;
+def : WriteRes<WriteLEA, [Zn2ALU]>;
+defm : Zn2WriteResPair<WriteALU, [Zn2ALU], 1>;
+defm : Zn2WriteResPair<WriteADC, [Zn2ALU], 1>;
+
+defm : Zn2WriteResPair<WriteIMul8, [Zn2ALU1, Zn2Multiplier], 4>;
+
+defm : X86WriteRes<WriteBSWAP32, [Zn2ALU], 1, [4], 1>;
+defm : X86WriteRes<WriteBSWAP64, [Zn2ALU], 1, [4], 1>;
+defm : X86WriteRes<WriteCMPXCHG, [Zn2ALU], 1, [1], 1>;
+defm : X86WriteRes<WriteCMPXCHGRMW,[Zn2ALU,Zn2AGU], 8, [1,1], 5>;
+defm : X86WriteRes<WriteXCHG, [Zn2ALU], 1, [2], 2>;
+
+defm : Zn2WriteResPair<WriteShift, [Zn2ALU], 1>;
+defm : Zn2WriteResPair<WriteShiftCL, [Zn2ALU], 1>;
+defm : Zn2WriteResPair<WriteRotate, [Zn2ALU], 1>;
+defm : Zn2WriteResPair<WriteRotateCL, [Zn2ALU], 1>;
+
+defm : X86WriteRes<WriteSHDrri, [Zn2ALU], 1, [1], 1>;
+defm : X86WriteResUnsupported<WriteSHDrrcl>;
+defm : X86WriteResUnsupported<WriteSHDmri>;
+defm : X86WriteResUnsupported<WriteSHDmrcl>;
+
+defm : Zn2WriteResPair<WriteJump, [Zn2ALU], 1>;
+defm : Zn2WriteResFpuPair<WriteCRC32, [Zn2FPU0], 3>;
+
+defm : Zn2WriteResPair<WriteCMOV, [Zn2ALU], 1>;
+def : WriteRes<WriteSETCC, [Zn2ALU]>;
+def : WriteRes<WriteSETCCStore, [Zn2ALU, Zn2AGU]>;
+defm : X86WriteRes<WriteLAHFSAHF, [Zn2ALU], 2, [1], 2>;
+
+defm : X86WriteRes<WriteBitTest, [Zn2ALU], 1, [1], 1>;
+defm : X86WriteRes<WriteBitTestImmLd, [Zn2ALU,Zn2AGU], 5, [1,1], 2>;
+defm : X86WriteRes<WriteBitTestRegLd, [Zn2ALU,Zn2AGU], 5, [1,1], 2>;
+defm : X86WriteRes<WriteBitTestSet, [Zn2ALU], 2, [1], 2>;
+
+// Bit counts.
+defm : Zn2WriteResPair<WriteBSF, [Zn2ALU], 3>;
+defm : Zn2WriteResPair<WriteBSR, [Zn2ALU], 3>;
+defm : Zn2WriteResPair<WriteLZCNT, [Zn2ALU], 1>;
+defm : Zn2WriteResPair<WriteTZCNT, [Zn2ALU], 2>;
+defm : Zn2WriteResPair<WritePOPCNT, [Zn2ALU], 1>;
+
+// Treat misc copies as a move.
+def : InstRW<[WriteMove], (instrs COPY)>;
+
+// BMI1 BEXTR, BMI2 BZHI
+defm : Zn2WriteResPair<WriteBEXTR, [Zn2ALU], 1>;
+defm : Zn2WriteResPair<WriteBZHI, [Zn2ALU], 1>;
+
+// IDIV
+defm : Zn2WriteResPair<WriteDiv8, [Zn2ALU2, Zn2Divider], 15, [1,15], 1>;
+defm : Zn2WriteResPair<WriteDiv16, [Zn2ALU2, Zn2Divider], 17, [1,17], 2>;
+defm : Zn2WriteResPair<WriteDiv32, [Zn2ALU2, Zn2Divider], 25, [1,25], 2>;
+defm : Zn2WriteResPair<WriteDiv64, [Zn2ALU2, Zn2Divider], 41, [1,41], 2>;
+defm : Zn2WriteResPair<WriteIDiv8, [Zn2ALU2, Zn2Divider], 15, [1,15], 1>;
+defm : Zn2WriteResPair<WriteIDiv16, [Zn2ALU2, Zn2Divider], 17, [1,17], 2>;
+defm : Zn2WriteResPair<WriteIDiv32, [Zn2ALU2, Zn2Divider], 25, [1,25], 2>;
+defm : Zn2WriteResPair<WriteIDiv64, [Zn2ALU2, Zn2Divider], 41, [1,41], 2>;
+
+// IMULH
+def : WriteRes<WriteIMulH, [Zn2ALU1, Zn2Multiplier]>{
+ let Latency = 4;
+}
+
+// Floating point operations
+defm : X86WriteRes<WriteFLoad, [Zn2AGU], 8, [1], 1>;
+defm : X86WriteRes<WriteFLoadX, [Zn2AGU], 8, [1], 1>;
+defm : X86WriteRes<WriteFLoadY, [Zn2AGU], 8, [1], 1>;
+defm : X86WriteRes<WriteFMaskedLoad, [Zn2AGU,Zn2FPU01], 8, [1,1], 1>;
+defm : X86WriteRes<WriteFMaskedLoadY, [Zn2AGU,Zn2FPU01], 8, [1,1], 2>;
+defm : X86WriteRes<WriteFMaskedStore32, [Zn2AGU,Zn2FPU01], 4, [1,1], 1>;
+defm : X86WriteRes<WriteFMaskedStore32Y, [Zn2AGU,Zn2FPU01], 5, [1,2], 2>;
+defm : X86WriteRes<WriteFMaskedStore64, [Zn2AGU,Zn2FPU01], 4, [1,1], 1>;
+defm : X86WriteRes<WriteFMaskedStore64Y, [Zn2AGU,Zn2FPU01], 5, [1,2], 2>;
+
+defm : X86WriteRes<WriteFStore, [Zn2AGU], 1, [1], 1>;
+defm : X86WriteRes<WriteFStoreX, [Zn2AGU], 1, [1], 1>;
+defm : X86WriteRes<WriteFStoreY, [Zn2AGU], 1, [1], 1>;
+defm : X86WriteRes<WriteFStoreNT, [Zn2AGU,Zn2FPU2], 8, [1,1], 1>;
+defm : X86WriteRes<WriteFStoreNTX, [Zn2AGU], 1, [1], 1>;
+defm : X86WriteRes<WriteFStoreNTY, [Zn2AGU], 1, [1], 1>;
+defm : X86WriteRes<WriteFMove, [Zn2FPU], 1, [1], 1>;
+defm : X86WriteRes<WriteFMoveX, [Zn2FPU], 1, [1], 1>;
+defm : X86WriteRes<WriteFMoveY, [Zn2FPU], 1, [1], 1>;
+
+defm : Zn2WriteResFpuPair<WriteFAdd, [Zn2FPU0], 3>;
+defm : Zn2WriteResFpuPair<WriteFAddX, [Zn2FPU0], 3>;
+defm : Zn2WriteResFpuPair<WriteFAddY, [Zn2FPU0], 3>;
+defm : X86WriteResPairUnsupported<WriteFAddZ>;
+defm : Zn2WriteResFpuPair<WriteFAdd64, [Zn2FPU0], 3>;
+defm : Zn2WriteResFpuPair<WriteFAdd64X, [Zn2FPU0], 3>;
+defm : Zn2WriteResFpuPair<WriteFAdd64Y, [Zn2FPU0], 3>;
+defm : X86WriteResPairUnsupported<WriteFAdd64Z>;
+defm : Zn2WriteResFpuPair<WriteFCmp, [Zn2FPU0], 3>;
+defm : Zn2WriteResFpuPair<WriteFCmpX, [Zn2FPU0], 3>;
+defm : Zn2WriteResFpuPair<WriteFCmpY, [Zn2FPU0], 3>;
+defm : X86WriteResPairUnsupported<WriteFCmpZ>;
+defm : Zn2WriteResFpuPair<WriteFCmp64, [Zn2FPU0], 3>;
+defm : Zn2WriteResFpuPair<WriteFCmp64X, [Zn2FPU0], 3>;
+defm : Zn2WriteResFpuPair<WriteFCmp64Y, [Zn2FPU0], 3>;
+defm : X86WriteResPairUnsupported<WriteFCmp64Z>;
+defm : Zn2WriteResFpuPair<WriteFCom, [Zn2FPU0], 3>;
+defm : Zn2WriteResFpuPair<WriteFBlend, [Zn2FPU01], 1>;
+defm : Zn2WriteResFpuPair<WriteFBlendY, [Zn2FPU01], 1>;
+defm : X86WriteResPairUnsupported<WriteFBlendZ>;
+defm : Zn2WriteResFpuPair<WriteFVarBlend, [Zn2FPU01], 1>;
+defm : Zn2WriteResFpuPair<WriteFVarBlendY,[Zn2FPU01], 1>;
+defm : X86WriteResPairUnsupported<WriteFVarBlendZ>;
+defm : Zn2WriteResFpuPair<WriteVarBlend, [Zn2FPU0], 1>;
+defm : Zn2WriteResFpuPair<WriteVarBlendY, [Zn2FPU0], 1>;
+defm : X86WriteResPairUnsupported<WriteVarBlendZ>;
+defm : Zn2WriteResFpuPair<WriteCvtSS2I, [Zn2FPU3], 5>;
+defm : Zn2WriteResFpuPair<WriteCvtPS2I, [Zn2FPU3], 5>;
+defm : Zn2WriteResFpuPair<WriteCvtPS2IY, [Zn2FPU3], 5>;
+defm : X86WriteResPairUnsupported<WriteCvtPS2IZ>;
+defm : Zn2WriteResFpuPair<WriteCvtSD2I, [Zn2FPU3], 5>;
+defm : Zn2WriteResFpuPair<WriteCvtPD2I, [Zn2FPU3], 5>;
+defm : Zn2WriteResFpuPair<WriteCvtPD2IY, [Zn2FPU3], 5>;
+defm : X86WriteResPairUnsupported<WriteCvtPD2IZ>;
+defm : Zn2WriteResFpuPair<WriteCvtI2SS, [Zn2FPU3], 5>;
+defm : Zn2WriteResFpuPair<WriteCvtI2PS, [Zn2FPU3], 5>;
+defm : Zn2WriteResFpuPair<WriteCvtI2PSY, [Zn2FPU3], 5>;
+defm : X86WriteResPairUnsupported<WriteCvtI2PSZ>;
+defm : Zn2WriteResFpuPair<WriteCvtI2SD, [Zn2FPU3], 5>;
+defm : Zn2WriteResFpuPair<WriteCvtI2PD, [Zn2FPU3], 5>;
+defm : Zn2WriteResFpuPair<WriteCvtI2PDY, [Zn2FPU3], 5>;
+defm : X86WriteResPairUnsupported<WriteCvtI2PDZ>;
+defm : Zn2WriteResFpuPair<WriteFDiv, [Zn2FPU3], 15>;
+defm : Zn2WriteResFpuPair<WriteFDivX, [Zn2FPU3], 15>;
+defm : X86WriteResPairUnsupported<WriteFDivZ>;
+defm : Zn2WriteResFpuPair<WriteFDiv64, [Zn2FPU3], 15>;
+defm : Zn2WriteResFpuPair<WriteFDiv64X, [Zn2FPU3], 15>;
+defm : X86WriteResPairUnsupported<WriteFDiv64Z>;
+defm : Zn2WriteResFpuPair<WriteFSign, [Zn2FPU3], 2>;
+defm : Zn2WriteResFpuPair<WriteFRnd, [Zn2FPU3], 4, [1], 1, 7, 0>;
+defm : Zn2WriteResFpuPair<WriteFRndY, [Zn2FPU3], 4, [1], 1, 7, 0>;
+defm : X86WriteResPairUnsupported<WriteFRndZ>;
+defm : Zn2WriteResFpuPair<WriteFLogic, [Zn2FPU], 1>;
+defm : Zn2WriteResFpuPair<WriteFLogicY, [Zn2FPU], 1>;
+defm : X86WriteResPairUnsupported<WriteFLogicZ>;
+defm : Zn2WriteResFpuPair<WriteFTest, [Zn2FPU], 1>;
+defm : Zn2WriteResFpuPair<WriteFTestY, [Zn2FPU], 1>;
+defm : X86WriteResPairUnsupported<WriteFTestZ>;
+defm : Zn2WriteResFpuPair<WriteFShuffle, [Zn2FPU12], 1>;
+defm : Zn2WriteResFpuPair<WriteFShuffleY, [Zn2FPU12], 1>;
+defm : X86WriteResPairUnsupported<WriteFShuffleZ>;
+defm : Zn2WriteResFpuPair<WriteFVarShuffle, [Zn2FPU12], 1>;
+defm : Zn2WriteResFpuPair<WriteFVarShuffleY,[Zn2FPU12], 1>;
+defm : X86WriteResPairUnsupported<WriteFVarShuffleZ>;
+defm : Zn2WriteResFpuPair<WriteFMul, [Zn2FPU01], 3, [1], 1, 7, 1>;
+defm : Zn2WriteResFpuPair<WriteFMulX, [Zn2FPU01], 3, [1], 1, 7, 1>;
+defm : Zn2WriteResFpuPair<WriteFMulY, [Zn2FPU01], 4, [1], 1, 7, 1>;
+defm : X86WriteResPairUnsupported<WriteFMulZ>;
+defm : Zn2WriteResFpuPair<WriteFMul64, [Zn2FPU01], 3, [1], 1, 7, 1>;
+defm : Zn2WriteResFpuPair<WriteFMul64X, [Zn2FPU01], 3, [1], 1, 7, 1>;
+defm : Zn2WriteResFpuPair<WriteFMul64Y, [Zn2FPU01], 4, [1], 1, 7, 1>;
+defm : X86WriteResPairUnsupported<WriteFMul64Z>;
+defm : Zn2WriteResFpuPair<WriteFMA, [Zn2FPU03], 5>;
+defm : Zn2WriteResFpuPair<WriteFMAX, [Zn2FPU03], 5>;
+defm : Zn2WriteResFpuPair<WriteFMAY, [Zn2FPU03], 5>;
+defm : X86WriteResPairUnsupported<WriteFMAZ>;
+defm : Zn2WriteResFpuPair<WriteFRcp, [Zn2FPU01], 5>;
+defm : Zn2WriteResFpuPair<WriteFRcpX, [Zn2FPU01], 5>;
+defm : Zn2WriteResFpuPair<WriteFRcpY, [Zn2FPU01], 5, [1], 1, 7, 2>;
+defm : X86WriteResPairUnsupported<WriteFRcpZ>;
+defm : Zn2WriteResFpuPair<WriteFRsqrtX, [Zn2FPU01], 5, [1], 1, 7, 1>;
+defm : X86WriteResPairUnsupported<WriteFRsqrtZ>;
+defm : Zn2WriteResFpuPair<WriteFSqrt, [Zn2FPU3], 20, [20]>;
+defm : Zn2WriteResFpuPair<WriteFSqrtX, [Zn2FPU3], 20, [20]>;
+defm : Zn2WriteResFpuPair<WriteFSqrtY, [Zn2FPU3], 28, [28], 1, 7, 1>;
+defm : X86WriteResPairUnsupported<WriteFSqrtZ>;
+defm : Zn2WriteResFpuPair<WriteFSqrt64, [Zn2FPU3], 20, [20]>;
+defm : Zn2WriteResFpuPair<WriteFSqrt64X, [Zn2FPU3], 20, [20]>;
+defm : Zn2WriteResFpuPair<WriteFSqrt64Y, [Zn2FPU3], 20, [20], 1, 7, 1>;
+defm : X86WriteResPairUnsupported<WriteFSqrt64Z>;
+defm : Zn2WriteResFpuPair<WriteFSqrt80, [Zn2FPU3], 20, [20]>;
+
+// Vector integer operations which uses FPU units
+defm : X86WriteRes<WriteVecLoad, [Zn2AGU], 8, [1], 1>;
+defm : X86WriteRes<WriteVecLoadX, [Zn2AGU], 8, [1], 1>;
+defm : X86WriteRes<WriteVecLoadY, [Zn2AGU], 8, [1], 1>;
+defm : X86WriteRes<WriteVecLoadNT, [Zn2AGU], 8, [1], 1>;
+defm : X86WriteRes<WriteVecLoadNTY, [Zn2AGU], 8, [1], 1>;
+defm : X86WriteRes<WriteVecMaskedLoad, [Zn2AGU,Zn2FPU01], 8, [1,2], 2>;
+defm : X86WriteRes<WriteVecMaskedLoadY, [Zn2AGU,Zn2FPU01], 8, [1,2], 2>;
+defm : X86WriteRes<WriteVecStore, [Zn2AGU], 1, [1], 1>;
+defm : X86WriteRes<WriteVecStoreX, [Zn2AGU], 1, [1], 1>;
+defm : X86WriteRes<WriteVecStoreY, [Zn2AGU], 1, [1], 1>;
+defm : X86WriteRes<WriteVecStoreNT, [Zn2AGU], 1, [1], 1>;
+defm : X86WriteRes<WriteVecStoreNTY, [Zn2AGU], 1, [1], 1>;
+defm : X86WriteRes<WriteVecMaskedStore, [Zn2AGU,Zn2FPU01], 4, [1,1], 1>;
+defm : X86WriteRes<WriteVecMaskedStoreY, [Zn2AGU,Zn2FPU01], 5, [1,1], 2>;
+defm : X86WriteRes<WriteVecMove, [Zn2FPU], 1, [1], 1>;
+defm : X86WriteRes<WriteVecMoveX, [Zn2FPU], 1, [1], 1>;
+defm : X86WriteRes<WriteVecMoveY, [Zn2FPU], 2, [1], 2>;
+defm : X86WriteRes<WriteVecMoveToGpr, [Zn2FPU2], 2, [1], 1>;
+defm : X86WriteRes<WriteVecMoveFromGpr, [Zn2FPU2], 3, [1], 1>;
+defm : X86WriteRes<WriteEMMS, [Zn2FPU], 2, [1], 1>;
+
+defm : Zn2WriteResFpuPair<WriteVecShift, [Zn2FPU], 1>;
+defm : Zn2WriteResFpuPair<WriteVecShiftX, [Zn2FPU2], 1>;
+defm : Zn2WriteResFpuPair<WriteVecShiftY, [Zn2FPU2], 2>;
+defm : X86WriteResPairUnsupported<WriteVecShiftZ>;
+defm : Zn2WriteResFpuPair<WriteVecShiftImm, [Zn2FPU], 1>;
+defm : Zn2WriteResFpuPair<WriteVecShiftImmX, [Zn2FPU], 1>;
+defm : Zn2WriteResFpuPair<WriteVecShiftImmY, [Zn2FPU], 1>;
+defm : X86WriteResPairUnsupported<WriteVecShiftImmZ>;
+defm : Zn2WriteResFpuPair<WriteVecLogic, [Zn2FPU], 1>;
+defm : Zn2WriteResFpuPair<WriteVecLogicX, [Zn2FPU], 1>;
+defm : Zn2WriteResFpuPair<WriteVecLogicY, [Zn2FPU], 1>;
+defm : X86WriteResPairUnsupported<WriteVecLogicZ>;
+defm : Zn2WriteResFpuPair<WriteVecTest, [Zn2FPU12], 1, [2], 1, 7, 1>;
+defm : Zn2WriteResFpuPair<WriteVecTestY, [Zn2FPU12], 1, [2], 1, 7, 1>;
+defm : X86WriteResPairUnsupported<WriteVecTestZ>;
+defm : Zn2WriteResFpuPair<WriteVecALU, [Zn2FPU], 1>;
+defm : Zn2WriteResFpuPair<WriteVecALUX, [Zn2FPU], 1>;
+defm : Zn2WriteResFpuPair<WriteVecALUY, [Zn2FPU], 1>;
+defm : X86WriteResPairUnsupported<WriteVecALUZ>;
+defm : Zn2WriteResFpuPair<WriteVecIMul, [Zn2FPU0], 4>;
+defm : Zn2WriteResFpuPair<WriteVecIMulX, [Zn2FPU0], 4>;
+defm : Zn2WriteResFpuPair<WriteVecIMulY, [Zn2FPU0], 4>;
+defm : X86WriteResPairUnsupported<WriteVecIMulZ>;
+defm : Zn2WriteResFpuPair<WritePMULLD, [Zn2FPU0], 4, [1], 1, 7, 1>;
+defm : Zn2WriteResFpuPair<WritePMULLDY, [Zn2FPU0], 3, [1], 1, 7, 1>;
+defm : X86WriteResPairUnsupported<WritePMULLDZ>;
+defm : Zn2WriteResFpuPair<WriteShuffle, [Zn2FPU], 1>;
+defm : Zn2WriteResFpuPair<WriteShuffleX, [Zn2FPU], 1>;
+defm : Zn2WriteResFpuPair<WriteShuffleY, [Zn2FPU], 1>;
+defm : X86WriteResPairUnsupported<WriteShuffleZ>;
+defm : Zn2WriteResFpuPair<WriteVarShuffle, [Zn2FPU], 1>;
+defm : Zn2WriteResFpuPair<WriteVarShuffleX,[Zn2FPU], 1>;
+defm : Zn2WriteResFpuPair<WriteVarShuffleY,[Zn2FPU], 1>;
+defm : X86WriteResPairUnsupported<WriteVarShuffleZ>;
+defm : Zn2WriteResFpuPair<WriteBlend, [Zn2FPU01], 1>;
+defm : Zn2WriteResFpuPair<WriteBlendY, [Zn2FPU01], 1>;
+defm : X86WriteResPairUnsupported<WriteBlendZ>;
+defm : Zn2WriteResFpuPair<WriteShuffle256, [Zn2FPU], 2>;
+defm : Zn2WriteResFpuPair<WriteVarShuffle256, [Zn2FPU], 2>;
+defm : Zn2WriteResFpuPair<WritePSADBW, [Zn2FPU0], 3>;
+defm : Zn2WriteResFpuPair<WritePSADBWX, [Zn2FPU0], 3>;
+defm : Zn2WriteResFpuPair<WritePSADBWY, [Zn2FPU0], 3>;
+defm : X86WriteResPairUnsupported<WritePSADBWZ>;
+defm : Zn2WriteResFpuPair<WritePHMINPOS, [Zn2FPU0], 4>;
+
+// Vector Shift Operations
+defm : Zn2WriteResFpuPair<WriteVarVecShift, [Zn2FPU12], 1>;
+defm : Zn2WriteResFpuPair<WriteVarVecShiftY, [Zn2FPU12], 1>;
+defm : X86WriteResPairUnsupported<WriteVarVecShiftZ>;
+
+// Vector insert/extract operations.
+defm : Zn2WriteResFpuPair<WriteVecInsert, [Zn2FPU], 1>;
+
+def : WriteRes<WriteVecExtract, [Zn2FPU12, Zn2FPU2]> {
+ let Latency = 2;
+ let ResourceCycles = [1, 2];
+}
+def : WriteRes<WriteVecExtractSt, [Zn2AGU, Zn2FPU12, Zn2FPU2]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1, 2, 3];
+}
+
+// MOVMSK Instructions.
+def : WriteRes<WriteFMOVMSK, [Zn2FPU2]>;
+def : WriteRes<WriteMMXMOVMSK, [Zn2FPU2]>;
+def : WriteRes<WriteVecMOVMSK, [Zn2FPU2]>;
+
+def : WriteRes<WriteVecMOVMSKY, [Zn2FPU2]> {
+ let NumMicroOps = 2;
+ let Latency = 2;
+ let ResourceCycles = [2];
+}
+
+// AES Instructions.
+defm : Zn2WriteResFpuPair<WriteAESDecEnc, [Zn2FPU01], 4>;
+defm : Zn2WriteResFpuPair<WriteAESIMC, [Zn2FPU01], 4>;
+defm : Zn2WriteResFpuPair<WriteAESKeyGen, [Zn2FPU01], 4>;
+
+def : WriteRes<WriteFence, [Zn2AGU]>;
+def : WriteRes<WriteNop, []>;
+
+// Following instructions with latency=100 are microcoded.
+// We set long latency so as to block the entire pipeline.
+defm : Zn2WriteResFpuPair<WriteFShuffle256, [Zn2FPU], 100>;
+defm : Zn2WriteResFpuPair<WriteFVarShuffle256, [Zn2FPU], 100>;
+
+// Microcoded Instructions
+def Zn2WriteMicrocoded : SchedWriteRes<[]> {
+ let Latency = 100;
+}
+
+def : SchedAlias<WriteMicrocoded, Zn2WriteMicrocoded>;
+def : SchedAlias<WriteFCMOV, Zn2WriteMicrocoded>;
+def : SchedAlias<WriteSystem, Zn2WriteMicrocoded>;
+def : SchedAlias<WriteMPSAD, Zn2WriteMicrocoded>;
+def : SchedAlias<WriteMPSADY, Zn2WriteMicrocoded>;
+def : SchedAlias<WriteMPSADLd, Zn2WriteMicrocoded>;
+def : SchedAlias<WriteMPSADYLd, Zn2WriteMicrocoded>;
+def : SchedAlias<WriteCLMul, Zn2WriteMicrocoded>;
+def : SchedAlias<WriteCLMulLd, Zn2WriteMicrocoded>;
+def : SchedAlias<WritePCmpIStrM, Zn2WriteMicrocoded>;
+def : SchedAlias<WritePCmpIStrMLd, Zn2WriteMicrocoded>;
+def : SchedAlias<WritePCmpEStrI, Zn2WriteMicrocoded>;
+def : SchedAlias<WritePCmpEStrILd, Zn2WriteMicrocoded>;
+def : SchedAlias<WritePCmpEStrM, Zn2WriteMicrocoded>;
+def : SchedAlias<WritePCmpEStrMLd, Zn2WriteMicrocoded>;
+def : SchedAlias<WritePCmpIStrI, Zn2WriteMicrocoded>;
+def : SchedAlias<WritePCmpIStrILd, Zn2WriteMicrocoded>;
+def : SchedAlias<WriteLDMXCSR, Zn2WriteMicrocoded>;
+def : SchedAlias<WriteSTMXCSR, Zn2WriteMicrocoded>;
+
+//=== Regex based InstRW ===//
+// Notation:
+// - r: register.
+// - m = memory.
+// - i = immediate
+// - mm: 64 bit mmx register.
+// - x = 128 bit xmm register.
+// - (x)mm = mmx or xmm register.
+// - y = 256 bit ymm register.
+// - v = any vector register.
+
+//=== Integer Instructions ===//
+//-- Move instructions --//
+// MOV.
+// r16,m.
+def : InstRW<[WriteALULd, ReadAfterLd], (instregex "MOV16rm")>;
+
+// MOVSX, MOVZX.
+// r,m.
+def : InstRW<[WriteLoad], (instregex "MOV(S|Z)X32rm(8|16)")>;
+
+// XCHG.
+// r,r.
+def Zn2WriteXCHG : SchedWriteRes<[Zn2ALU]> {
+ let NumMicroOps = 2;
+}
+
+def : InstRW<[Zn2WriteXCHG], (instregex "XCHG(8|16|32|64)rr", "XCHG(16|32|64)ar")>;
+
+// r,m.
+def Zn2WriteXCHGrm : SchedWriteRes<[Zn2AGU, Zn2ALU]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+def : InstRW<[Zn2WriteXCHGrm, ReadAfterLd], (instregex "XCHG(8|16|32|64)rm")>;
+
+def : InstRW<[WriteMicrocoded], (instrs XLAT)>;
+
+// POP16.
+// r.
+def Zn2WritePop16r : SchedWriteRes<[Zn2AGU]>{
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+def : InstRW<[Zn2WritePop16r], (instregex "POP16rmm")>;
+def : InstRW<[WriteMicrocoded], (instregex "POPF(16|32)")>;
+def : InstRW<[WriteMicrocoded], (instregex "POPA(16|32)")>;
+
+
+// PUSH.
+// r. Has default values.
+// m.
+def Zn2WritePUSH : SchedWriteRes<[Zn2AGU]>{
+ let Latency = 4;
+}
+def : InstRW<[Zn2WritePUSH], (instregex "PUSH(16|32)rmm")>;
+
+//PUSHF
+def : InstRW<[WriteMicrocoded], (instregex "PUSHF(16|32)")>;
+
+// PUSHA.
+def Zn2WritePushA : SchedWriteRes<[Zn2AGU]> {
+ let Latency = 8;
+}
+def : InstRW<[Zn2WritePushA], (instregex "PUSHA(16|32)")>;
+
+//LAHF
+def : InstRW<[WriteMicrocoded], (instrs LAHF)>;
+
+// MOVBE.
+// r,m.
+def Zn2WriteMOVBE : SchedWriteRes<[Zn2AGU, Zn2ALU]> {
+ let Latency = 5;
+}
+def : InstRW<[Zn2WriteMOVBE, ReadAfterLd], (instregex "MOVBE(16|32|64)rm")>;
+
+// m16,r16.
+def : InstRW<[Zn2WriteMOVBE], (instregex "MOVBE(16|32|64)mr")>;
+
+//-- Arithmetic instructions --//
+
+// ADD SUB.
+// m,r/i.
+def : InstRW<[WriteALULd], (instregex "(ADD|SUB)(8|16|32|64)m(r|i)",
+ "(ADD|SUB)(8|16|32|64)mi8",
+ "(ADD|SUB)64mi32")>;
+
+// ADC SBB.
+// m,r/i.
+def : InstRW<[WriteALULd],
+ (instregex "(ADC|SBB)(8|16|32|64)m(r|i)",
+ "(ADC|SBB)(16|32|64)mi8",
+ "(ADC|SBB)64mi32")>;
+
+// INC DEC NOT NEG.
+// m.
+def : InstRW<[WriteALULd],
+ (instregex "(INC|DEC|NOT|NEG)(8|16|32|64)m")>;
+
+// MUL IMUL.
+// r16.
+def Zn2WriteMul16 : SchedWriteRes<[Zn2ALU1, Zn2Multiplier]> {
+ let Latency = 3;
+}
+def : SchedAlias<WriteIMul16, Zn2WriteMul16>;
+def : SchedAlias<WriteIMul16Imm, Zn2WriteMul16>;
+def : SchedAlias<WriteIMul16Reg, Zn2WriteMul16>;
+
+// m16.
+def Zn2WriteMul16Ld : SchedWriteRes<[Zn2AGU, Zn2ALU1, Zn2Multiplier]> {
+ let Latency = 7;
+}
+def : SchedAlias<WriteIMul16Ld, Zn2WriteMul16Ld>;
+def : SchedAlias<WriteIMul16ImmLd, Zn2WriteMul16Ld>;
+def : SchedAlias<WriteIMul16RegLd, Zn2WriteMul16Ld>;
+
+// r32.
+def Zn2WriteMul32 : SchedWriteRes<[Zn2ALU1, Zn2Multiplier]> {
+ let Latency = 3;
+}
+def : SchedAlias<WriteIMul32, Zn2WriteMul32>;
+def : SchedAlias<WriteIMul32Imm, Zn2WriteMul32>;
+def : SchedAlias<WriteIMul32Reg, Zn2WriteMul32>;
+
+// m32.
+def Zn2WriteMul32Ld : SchedWriteRes<[Zn2AGU, Zn2ALU1, Zn2Multiplier]> {
+ let Latency = 7;
+}
+def : SchedAlias<WriteIMul32Ld, Zn2WriteMul32Ld>;
+def : SchedAlias<WriteIMul32ImmLd, Zn2WriteMul32Ld>;
+def : SchedAlias<WriteIMul32RegLd, Zn2WriteMul32Ld>;
+
+// r64.
+def Zn2WriteMul64 : SchedWriteRes<[Zn2ALU1, Zn2Multiplier]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+def : SchedAlias<WriteIMul64, Zn2WriteMul64>;
+def : SchedAlias<WriteIMul64Imm, Zn2WriteMul64>;
+def : SchedAlias<WriteIMul64Reg, Zn2WriteMul64>;
+
+// m64.
+def Zn2WriteMul64Ld : SchedWriteRes<[Zn2AGU, Zn2ALU1, Zn2Multiplier]> {
+ let Latency = 8;
+ let NumMicroOps = 2;
+}
+def : SchedAlias<WriteIMul64Ld, Zn2WriteMul64Ld>;
+def : SchedAlias<WriteIMul64ImmLd, Zn2WriteMul64Ld>;
+def : SchedAlias<WriteIMul64RegLd, Zn2WriteMul64Ld>;
+
+// MULX.
+// r32,r32,r32.
+def Zn2WriteMulX32 : SchedWriteRes<[Zn2ALU1, Zn2Multiplier]> {
+ let Latency = 3;
+ let ResourceCycles = [1, 2];
+}
+def : InstRW<[Zn2WriteMulX32], (instrs MULX32rr)>;
+
+// r32,r32,m32.
+def Zn2WriteMulX32Ld : SchedWriteRes<[Zn2AGU, Zn2ALU1, Zn2Multiplier]> {
+ let Latency = 7;
+ let ResourceCycles = [1, 2, 2];
+}
+def : InstRW<[Zn2WriteMulX32Ld, ReadAfterLd], (instrs MULX32rm)>;
+
+// r64,r64,r64.
+def Zn2WriteMulX64 : SchedWriteRes<[Zn2ALU1]> {
+ let Latency = 3;
+}
+def : InstRW<[Zn2WriteMulX64], (instrs MULX64rr)>;
+
+// r64,r64,m64.
+def Zn2WriteMulX64Ld : SchedWriteRes<[Zn2AGU, Zn2ALU1, Zn2Multiplier]> {
+ let Latency = 7;
+}
+def : InstRW<[Zn2WriteMulX64Ld, ReadAfterLd], (instrs MULX64rm)>;
+
+//-- Control transfer instructions --//
+
+// J(E|R)CXZ.
+def Zn2WriteJCXZ : SchedWriteRes<[Zn2ALU03]>;
+def : InstRW<[Zn2WriteJCXZ], (instrs JCXZ, JECXZ, JRCXZ)>;
+
+// INTO
+def : InstRW<[WriteMicrocoded], (instrs INTO)>;
+
+// LOOP.
+def Zn2WriteLOOP : SchedWriteRes<[Zn2ALU03]>;
+def : InstRW<[Zn2WriteLOOP], (instrs LOOP)>;
+
+// LOOP(N)E, LOOP(N)Z
+def Zn2WriteLOOPE : SchedWriteRes<[Zn2ALU03]>;
+def : InstRW<[Zn2WriteLOOPE], (instrs LOOPE, LOOPNE)>;
+
+// CALL.
+// r.
+def Zn2WriteCALLr : SchedWriteRes<[Zn2AGU, Zn2ALU03]>;
+def : InstRW<[Zn2WriteCALLr], (instregex "CALL(16|32)r")>;
+
+def : InstRW<[WriteMicrocoded], (instregex "CALL(16|32)m")>;
+
+// RET.
+def Zn2WriteRET : SchedWriteRes<[Zn2ALU03]> {
+ let NumMicroOps = 2;
+}
+def : InstRW<[Zn2WriteRET], (instregex "RET(L|Q|W)", "LRET(L|Q|W)",
+ "IRET(16|32|64)")>;
+
+//-- Logic instructions --//
+
+// AND OR XOR.
+// m,r/i.
+def : InstRW<[WriteALULd],
+ (instregex "(AND|OR|XOR)(8|16|32|64)m(r|i)",
+ "(AND|OR|XOR)(8|16|32|64)mi8", "(AND|OR|XOR)64mi32")>;
+
+// Define ALU latency variants
+def Zn2WriteALULat2 : SchedWriteRes<[Zn2ALU]> {
+ let Latency = 2;
+}
+def Zn2WriteALULat2Ld : SchedWriteRes<[Zn2AGU, Zn2ALU]> {
+ let Latency = 6;
+}
+
+// BT.
+// m,i.
+def : InstRW<[WriteShiftLd], (instregex "BT(16|32|64)mi8")>;
+
+// BTR BTS BTC.
+// r,r,i.
+def Zn2WriteBTRSC : SchedWriteRes<[Zn2ALU]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+def : InstRW<[Zn2WriteBTRSC], (instregex "BT(R|S|C)(16|32|64)r(r|i8)")>;
+
+// m,r,i.
+def Zn2WriteBTRSCm : SchedWriteRes<[Zn2AGU, Zn2ALU]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+// m,r,i.
+def : SchedAlias<WriteBitTestSetImmRMW, Zn2WriteBTRSCm>;
+def : SchedAlias<WriteBitTestSetRegRMW, Zn2WriteBTRSCm>;
+
+// BLSI BLSMSK BLSR.
+// r,r.
+def : SchedAlias<WriteBLS, Zn2WriteALULat2>;
+// r,m.
+def : SchedAlias<WriteBLSLd, Zn2WriteALULat2Ld>;
+
+// CLD STD.
+def : InstRW<[WriteALU], (instrs STD, CLD)>;
+
+// PDEP PEXT.
+// r,r,r.
+def : InstRW<[WriteMicrocoded], (instregex "PDEP(32|64)rr", "PEXT(32|64)rr")>;
+// r,r,m.
+def : InstRW<[WriteMicrocoded], (instregex "PDEP(32|64)rm", "PEXT(32|64)rm")>;
+
+// RCR RCL.
+// m,i.
+def : InstRW<[WriteMicrocoded], (instregex "RC(R|L)(8|16|32|64)m(1|i|CL)")>;
+
+// SHR SHL SAR.
+// m,i.
+def : InstRW<[WriteShiftLd], (instregex "S(A|H)(R|L)(8|16|32|64)m(i|1)")>;
+
+// SHRD SHLD.
+// m,r
+def : InstRW<[WriteShiftLd], (instregex "SH(R|L)D(16|32|64)mri8")>;
+
+// r,r,cl.
+def : InstRW<[WriteMicrocoded], (instregex "SH(R|L)D(16|32|64)rrCL")>;
+
+// m,r,cl.
+def : InstRW<[WriteMicrocoded], (instregex "SH(R|L)D(16|32|64)mrCL")>;
+
+//-- Misc instructions --//
+// CMPXCHG8B.
+def Zn2WriteCMPXCHG8B : SchedWriteRes<[Zn2AGU, Zn2ALU]> {
+ let NumMicroOps = 18;
+}
+def : InstRW<[Zn2WriteCMPXCHG8B], (instrs CMPXCHG8B)>;
+
+def : InstRW<[WriteMicrocoded], (instrs CMPXCHG16B)>;
+
+// LEAVE
+def Zn2WriteLEAVE : SchedWriteRes<[Zn2ALU, Zn2AGU]> {
+ let Latency = 8;
+ let NumMicroOps = 2;
+}
+def : InstRW<[Zn2WriteLEAVE], (instregex "LEAVE")>;
+
+// PAUSE.
+def : InstRW<[WriteMicrocoded], (instrs PAUSE)>;
+
+// RDTSC.
+def : InstRW<[WriteMicrocoded], (instregex "RDTSC")>;
+
+// RDPMC.
+def : InstRW<[WriteMicrocoded], (instrs RDPMC)>;
+
+// RDRAND.
+def : InstRW<[WriteMicrocoded], (instregex "RDRAND(16|32|64)r")>;
+
+// XGETBV.
+def : InstRW<[WriteMicrocoded], (instregex "XGETBV")>;
+
+//-- String instructions --//
+// CMPS.
+def : InstRW<[WriteMicrocoded], (instregex "CMPS(B|L|Q|W)")>;
+
+// LODSB/W.
+def : InstRW<[WriteMicrocoded], (instregex "LODS(B|W)")>;
+
+// LODSD/Q.
+def : InstRW<[WriteMicrocoded], (instregex "LODS(L|Q)")>;
+
+// MOVS.
+def : InstRW<[WriteMicrocoded], (instregex "MOVS(B|L|Q|W)")>;
+
+// SCAS.
+def : InstRW<[WriteMicrocoded], (instregex "SCAS(B|W|L|Q)")>;
+
+// STOS
+def : InstRW<[WriteMicrocoded], (instregex "STOS(B|L|Q|W)")>;
+
+// XADD.
+def Zn2XADD : SchedWriteRes<[Zn2ALU]>;
+def : InstRW<[Zn2XADD], (instregex "XADD(8|16|32|64)rr")>;
+def : InstRW<[WriteMicrocoded], (instregex "XADD(8|16|32|64)rm")>;
+
+//=== Floating Point x87 Instructions ===//
+//-- Move instructions --//
+
+def Zn2WriteFLDr : SchedWriteRes<[Zn2FPU13]> ;
+
+def Zn2WriteSTr: SchedWriteRes<[Zn2FPU23]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+
+// LD_F.
+// r.
+def : InstRW<[Zn2WriteFLDr], (instregex "LD_Frr")>;
+
+// m.
+def Zn2WriteLD_F80m : SchedWriteRes<[Zn2AGU, Zn2FPU13]> {
+ let NumMicroOps = 2;
+}
+def : InstRW<[Zn2WriteLD_F80m], (instregex "LD_F80m")>;
+
+// FBLD.
+def : InstRW<[WriteMicrocoded], (instregex "FBLDm")>;
+
+// FST(P).
+// r.
+def : InstRW<[Zn2WriteSTr], (instregex "ST_(F|FP)rr")>;
+
+// m80.
+def Zn2WriteST_FP80m : SchedWriteRes<[Zn2AGU, Zn2FPU23]> {
+ let Latency = 5;
+}
+def : InstRW<[Zn2WriteST_FP80m], (instregex "ST_FP80m")>;
+
+// FBSTP.
+// m80.
+def : InstRW<[WriteMicrocoded], (instregex "FBSTPm")>;
+
+def Zn2WriteFXCH : SchedWriteRes<[Zn2FPU]>;
+
+// FXCHG.
+def : InstRW<[Zn2WriteFXCH], (instrs XCH_F)>;
+
+// FILD.
+def Zn2WriteFILD : SchedWriteRes<[Zn2AGU, Zn2FPU3]> {
+ let Latency = 11;
+ let NumMicroOps = 2;
+}
+def : InstRW<[Zn2WriteFILD], (instregex "ILD_F(16|32|64)m")>;
+
+// FIST(P) FISTTP.
+def Zn2WriteFIST : SchedWriteRes<[Zn2AGU, Zn2FPU23]> {
+ let Latency = 12;
+}
+def : InstRW<[Zn2WriteFIST], (instregex "IS(T|TT)_(F|FP)(16|32|64)m")>;
+
+def Zn2WriteFPU13 : SchedWriteRes<[Zn2AGU, Zn2FPU13]> {
+ let Latency = 8;
+}
+
+def Zn2WriteFPU3 : SchedWriteRes<[Zn2AGU, Zn2FPU3]> {
+ let Latency = 11;
+}
+
+// FLDZ.
+def : SchedAlias<WriteFLD0, Zn2WriteFPU13>;
+
+// FLD1.
+def : SchedAlias<WriteFLD1, Zn2WriteFPU3>;
+
+// FLDPI FLDL2E etc.
+def : SchedAlias<WriteFLDC, Zn2WriteFPU3>;
+
+// FNSTSW.
+// AX.
+def : InstRW<[WriteMicrocoded], (instrs FNSTSW16r)>;
+
+// m16.
+def : InstRW<[WriteMicrocoded], (instrs FNSTSWm)>;
+
+// FLDCW.
+def : InstRW<[WriteMicrocoded], (instrs FLDCW16m)>;
+
+// FNSTCW.
+def : InstRW<[WriteMicrocoded], (instrs FNSTCW16m)>;
+
+// FINCSTP FDECSTP.
+def : InstRW<[Zn2WriteFPU3], (instrs FINCSTP, FDECSTP)>;
+
+// FFREE.
+def : InstRW<[Zn2WriteFPU3], (instregex "FFREE")>;
+
+// FNSAVE.
+def : InstRW<[WriteMicrocoded], (instregex "FSAVEm")>;
+
+// FRSTOR.
+def : InstRW<[WriteMicrocoded], (instregex "FRSTORm")>;
+
+//-- Arithmetic instructions --//
+
+def Zn2WriteFPU3Lat1 : SchedWriteRes<[Zn2FPU3]> ;
+
+def Zn2WriteFPU0Lat1 : SchedWriteRes<[Zn2FPU0]> ;
+
+def Zn2WriteFPU0Lat1Ld : SchedWriteRes<[Zn2AGU, Zn2FPU0]> {
+ let Latency = 8;
+}
+
+// FCHS.
+def : InstRW<[Zn2WriteFPU3Lat1], (instregex "CHS_F")>;
+
+// FCOM(P) FUCOM(P).
+// r.
+def : InstRW<[Zn2WriteFPU0Lat1], (instregex "COM(P?)_FST0r", "UCOM_F(P?)r")>;
+// m.
+def : InstRW<[Zn2WriteFPU0Lat1Ld], (instregex "FCOM(P?)(32|64)m")>;
+
+// FCOMPP FUCOMPP.
+// r.
+def : InstRW<[Zn2WriteFPU0Lat1], (instrs FCOMPP, UCOM_FPPr)>;
+
+def Zn2WriteFPU02 : SchedWriteRes<[Zn2AGU, Zn2FPU02]>
+{
+ let Latency = 9;
+}
+
+// FCOMI(P) FUCOMI(P).
+// m.
+def : InstRW<[Zn2WriteFPU02], (instrs COM_FIPr, COM_FIr, UCOM_FIPr, UCOM_FIr)>;
+
+def Zn2WriteFPU03 : SchedWriteRes<[Zn2AGU, Zn2FPU03]>
+{
+ let Latency = 12;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,3];
+}
+
+// FICOM(P).
+def : InstRW<[Zn2WriteFPU03], (instregex "FICOM(P?)(16|32)m")>;
+
+// FTST.
+def : InstRW<[Zn2WriteFPU0Lat1], (instregex "TST_F")>;
+
+// FXAM.
+def : InstRW<[Zn2WriteFPU3Lat1], (instrs FXAM)>;
+
+// FPREM.
+def : InstRW<[WriteMicrocoded], (instrs FPREM)>;
+
+// FPREM1.
+def : InstRW<[WriteMicrocoded], (instrs FPREM1)>;
+
+// FRNDINT.
+def : InstRW<[WriteMicrocoded], (instrs FRNDINT)>;
+
+// FSCALE.
+def : InstRW<[WriteMicrocoded], (instrs FSCALE)>;
+
+// FXTRACT.
+def : InstRW<[WriteMicrocoded], (instrs FXTRACT)>;
+
+// FNOP.
+def : InstRW<[Zn2WriteFPU0Lat1], (instrs FNOP)>;
+
+// WAIT.
+def : InstRW<[Zn2WriteFPU0Lat1], (instrs WAIT)>;
+
+// FNCLEX.
+def : InstRW<[WriteMicrocoded], (instrs FNCLEX)>;
+
+// FNINIT.
+def : InstRW<[WriteMicrocoded], (instrs FNINIT)>;
+
+//=== Integer MMX and XMM Instructions ===//
+
+// PACKSSWB/DW.
+// mm <- mm.
+def Zn2WriteFPU12 : SchedWriteRes<[Zn2FPU12]> ;
+def Zn2WriteFPU12Y : SchedWriteRes<[Zn2FPU12]> {
+ let NumMicroOps = 2;
+}
+def Zn2WriteFPU12m : SchedWriteRes<[Zn2AGU, Zn2FPU12]> ;
+def Zn2WriteFPU12Ym : SchedWriteRes<[Zn2AGU, Zn2FPU12]> {
+ let Latency = 8;
+ let NumMicroOps = 2;
+}
+
+def : InstRW<[Zn2WriteFPU12], (instrs MMX_PACKSSDWirr,
+ MMX_PACKSSWBirr,
+ MMX_PACKUSWBirr)>;
+def : InstRW<[Zn2WriteFPU12m], (instrs MMX_PACKSSDWirm,
+ MMX_PACKSSWBirm,
+ MMX_PACKUSWBirm)>;
+
+// VPMOVSX/ZX BW BD BQ WD WQ DQ.
+// y <- x.
+def : InstRW<[Zn2WriteFPU12Y], (instregex "VPMOV(SX|ZX)(BW|BD|BQ|WD|WQ|DQ)Yrr")>;
+def : InstRW<[Zn2WriteFPU12Ym], (instregex "VPMOV(SX|ZX)(BW|BD|BQ|WD|WQ|DQ)Yrm")>;
+
+def Zn2WriteFPU013 : SchedWriteRes<[Zn2FPU013]> ;
+def Zn2WriteFPU013Y : SchedWriteRes<[Zn2FPU013]> ;
+def Zn2WriteFPU013m : SchedWriteRes<[Zn2AGU, Zn2FPU013]> {
+ let Latency = 8;
+ let NumMicroOps = 2;
+}
+def Zn2WriteFPU013Ld : SchedWriteRes<[Zn2AGU, Zn2FPU013]> {
+ let Latency = 8;
+ let NumMicroOps = 2;
+}
+def Zn2WriteFPU013LdY : SchedWriteRes<[Zn2AGU, Zn2FPU013]> {
+ let Latency = 8;
+ let NumMicroOps = 2;
+}
+
+// PBLENDW.
+// x,x,i / v,v,v,i
+def : InstRW<[Zn2WriteFPU013], (instregex "(V?)PBLENDWrri")>;
+// ymm
+def : InstRW<[Zn2WriteFPU013Y], (instrs VPBLENDWYrri)>;
+
+// x,m,i / v,v,m,i
+def : InstRW<[Zn2WriteFPU013Ld], (instregex "(V?)PBLENDWrmi")>;
+// y,m,i
+def : InstRW<[Zn2WriteFPU013LdY], (instrs VPBLENDWYrmi)>;
+
+def Zn2WriteFPU01 : SchedWriteRes<[Zn2FPU01]> ;
+def Zn2WriteFPU01Y : SchedWriteRes<[Zn2FPU01]> {
+ let NumMicroOps = 2;
+}
+
+// VPBLENDD.
+// v,v,v,i.
+def : InstRW<[Zn2WriteFPU01], (instrs VPBLENDDrri)>;
+// ymm
+def : InstRW<[Zn2WriteFPU01Y], (instrs VPBLENDDYrri)>;
+
+// v,v,m,i
+def Zn2WriteFPU01Op2 : SchedWriteRes<[Zn2AGU, Zn2FPU01]> {
+ let NumMicroOps = 2;
+ let Latency = 8;
+ let ResourceCycles = [1, 2];
+}
+def Zn2WriteFPU01Op2Y : SchedWriteRes<[Zn2AGU, Zn2FPU01]> {
+ let NumMicroOps = 2;
+ let Latency = 9;
+ let ResourceCycles = [1, 3];
+}
+def : InstRW<[Zn2WriteFPU01Op2], (instrs VPBLENDDrmi)>;
+def : InstRW<[Zn2WriteFPU01Op2Y], (instrs VPBLENDDYrmi)>;
+
+// MASKMOVQ.
+def : InstRW<[WriteMicrocoded], (instregex "MMX_MASKMOVQ(64)?")>;
+
+// MASKMOVDQU.
+def : InstRW<[WriteMicrocoded], (instregex "(V?)MASKMOVDQU(64)?")>;
+
+// VPMASKMOVD.
+// ymm
+def : InstRW<[WriteMicrocoded],
+ (instregex "VPMASKMOVD(Y?)rm")>;
+// m, v,v.
+def : InstRW<[WriteMicrocoded], (instregex "VPMASKMOV(D|Q)(Y?)mr")>;
+
+// VPBROADCAST B/W.
+// x, m8/16.
+def Zn2WriteVPBROADCAST128Ld : SchedWriteRes<[Zn2AGU, Zn2FPU12]> {
+ let Latency = 8;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1, 2];
+}
+def : InstRW<[Zn2WriteVPBROADCAST128Ld],
+ (instregex "VPBROADCAST(B|W)rm")>;
+
+// y, m8/16
+def Zn2WriteVPBROADCAST256Ld : SchedWriteRes<[Zn2AGU, Zn2FPU1]> {
+ let Latency = 8;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1, 2];
+}
+def : InstRW<[Zn2WriteVPBROADCAST256Ld],
+ (instregex "VPBROADCAST(B|W)Yrm")>;
+
+// VPGATHER.
+def : InstRW<[WriteMicrocoded], (instregex "VPGATHER(Q|D)(Q|D)(Y?)rm")>;
+
+//-- Arithmetic instructions --//
+
+// HADD, HSUB PS/PD
+// PHADD|PHSUB (S) W/D.
+def : SchedAlias<WritePHAdd, Zn2WriteMicrocoded>;
+def : SchedAlias<WritePHAddLd, Zn2WriteMicrocoded>;
+def : SchedAlias<WritePHAddX, Zn2WriteMicrocoded>;
+def : SchedAlias<WritePHAddXLd, Zn2WriteMicrocoded>;
+def : SchedAlias<WritePHAddY, Zn2WriteMicrocoded>;
+def : SchedAlias<WritePHAddYLd, Zn2WriteMicrocoded>;
+
+// PCMPGTQ.
+def Zn2WritePCMPGTQr : SchedWriteRes<[Zn2FPU03]>;
+def : InstRW<[Zn2WritePCMPGTQr], (instregex "(V?)PCMPGTQ(Y?)rr")>;
+
+// x <- x,m.
+def Zn2WritePCMPGTQm : SchedWriteRes<[Zn2AGU, Zn2FPU03]> {
+ let Latency = 8;
+}
+// ymm.
+def Zn2WritePCMPGTQYm : SchedWriteRes<[Zn2AGU, Zn2FPU03]> {
+ let Latency = 8;
+}
+def : InstRW<[Zn2WritePCMPGTQm], (instregex "(V?)PCMPGTQrm")>;
+def : InstRW<[Zn2WritePCMPGTQYm], (instrs VPCMPGTQYrm)>;
+
+//-- Logic instructions --//
+
+// PSLL,PSRL,PSRA W/D/Q.
+// x,x / v,v,x.
+def Zn2WritePShift : SchedWriteRes<[Zn2FPU2]> ;
+def Zn2WritePShiftY : SchedWriteRes<[Zn2FPU2]> ;
+
+// PSLL,PSRL DQ.
+def : InstRW<[Zn2WritePShift], (instregex "(V?)PS(R|L)LDQri")>;
+def : InstRW<[Zn2WritePShiftY], (instregex "(V?)PS(R|L)LDQYri")>;
+
+//=== Floating Point XMM and YMM Instructions ===//
+//-- Move instructions --//
+
+// VPERM2F128.
+def : InstRW<[WriteMicrocoded], (instrs VPERM2F128rr)>;
+def : InstRW<[WriteMicrocoded], (instrs VPERM2F128rm)>;
+
+def Zn2WriteBROADCAST : SchedWriteRes<[Zn2AGU, Zn2FPU13]> {
+ let NumMicroOps = 2;
+ let Latency = 8;
+}
+// VBROADCASTF128.
+def : InstRW<[Zn2WriteBROADCAST], (instrs VBROADCASTF128)>;
+
+// EXTRACTPS.
+// r32,x,i.
+def Zn2WriteEXTRACTPSr : SchedWriteRes<[Zn2FPU12, Zn2FPU2]> {
+ let Latency = 2;
+ let ResourceCycles = [1, 2];
+}
+def : InstRW<[Zn2WriteEXTRACTPSr], (instregex "(V?)EXTRACTPSrr")>;
+
+def Zn2WriteEXTRACTPSm : SchedWriteRes<[Zn2AGU,Zn2FPU12, Zn2FPU2]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+ let ResourceCycles = [5, 1, 2];
+}
+// m32,x,i.
+def : InstRW<[Zn2WriteEXTRACTPSm], (instregex "(V?)EXTRACTPSmr")>;
+
+// VEXTRACTF128.
+// x,y,i.
+def : InstRW<[Zn2WriteFPU013], (instrs VEXTRACTF128rr)>;
+
+// m128,y,i.
+def : InstRW<[Zn2WriteFPU013m], (instrs VEXTRACTF128mr)>;
+
+def Zn2WriteVINSERT128r: SchedWriteRes<[Zn2FPU013]> {
+ let Latency = 2;
+// let ResourceCycles = [2];
+}
+def Zn2WriteVINSERT128Ld: SchedWriteRes<[Zn2AGU,Zn2FPU013]> {
+ let Latency = 9;
+ let NumMicroOps = 2;
+}
+// VINSERTF128.
+// y,y,x,i.
+def : InstRW<[Zn2WriteVINSERT128r], (instrs VINSERTF128rr)>;
+def : InstRW<[Zn2WriteVINSERT128Ld], (instrs VINSERTF128rm)>;
+
+// VGATHER.
+def : InstRW<[WriteMicrocoded], (instregex "VGATHER(Q|D)(PD|PS)(Y?)rm")>;
+
+//-- Conversion instructions --//
+def Zn2WriteCVTPD2PSr: SchedWriteRes<[Zn2FPU3]> {
+ let Latency = 3;
+}
+def Zn2WriteCVTPD2PSYr: SchedWriteRes<[Zn2FPU3]> {
+ let Latency = 3;
+}
+
+// CVTPD2PS.
+// x,x.
+def : SchedAlias<WriteCvtPD2PS, Zn2WriteCVTPD2PSr>;
+// y,y.
+def : SchedAlias<WriteCvtPD2PSY, Zn2WriteCVTPD2PSYr>;
+// z,z.
+defm : X86WriteResUnsupported<WriteCvtPD2PSZ>;
+
+def Zn2WriteCVTPD2PSLd: SchedWriteRes<[Zn2AGU,Zn2FPU03]> {
+ let Latency = 10;
+ let NumMicroOps = 2;
+}
+// x,m128.
+def : SchedAlias<WriteCvtPD2PSLd, Zn2WriteCVTPD2PSLd>;
+
+// x,m256.
+def Zn2WriteCVTPD2PSYLd : SchedWriteRes<[Zn2AGU, Zn2FPU3]> {
+ let Latency = 10;
+}
+def : SchedAlias<WriteCvtPD2PSYLd, Zn2WriteCVTPD2PSYLd>;
+// z,m512
+defm : X86WriteResUnsupported<WriteCvtPD2PSZLd>;
+
+// CVTSD2SS.
+// x,x.
+// Same as WriteCVTPD2PSr
+def : SchedAlias<WriteCvtSD2SS, Zn2WriteCVTPD2PSr>;
+
+// x,m64.
+def : SchedAlias<WriteCvtSD2SSLd, Zn2WriteCVTPD2PSLd>;
+
+// CVTPS2PD.
+// x,x.
+def Zn2WriteCVTPS2PDr : SchedWriteRes<[Zn2FPU3]> {
+ let Latency = 3;
+}
+def : SchedAlias<WriteCvtPS2PD, Zn2WriteCVTPS2PDr>;
+
+// x,m64.
+// y,m128.
+def Zn2WriteCVTPS2PDLd : SchedWriteRes<[Zn2AGU, Zn2FPU3]> {
+ let Latency = 10;
+ let NumMicroOps = 2;
+}
+def : SchedAlias<WriteCvtPS2PDLd, Zn2WriteCVTPS2PDLd>;
+def : SchedAlias<WriteCvtPS2PDYLd, Zn2WriteCVTPS2PDLd>;
+defm : X86WriteResUnsupported<WriteCvtPS2PDZLd>;
+
+// y,x.
+def Zn2WriteVCVTPS2PDY : SchedWriteRes<[Zn2FPU3]> {
+ let Latency = 3;
+}
+def : SchedAlias<WriteCvtPS2PDY, Zn2WriteVCVTPS2PDY>;
+defm : X86WriteResUnsupported<WriteCvtPS2PDZ>;
+
+// CVTSS2SD.
+// x,x.
+def Zn2WriteCVTSS2SDr : SchedWriteRes<[Zn2FPU3]> {
+ let Latency = 3;
+}
+def : SchedAlias<WriteCvtSS2SD, Zn2WriteCVTSS2SDr>;
+
+// x,m32.
+def Zn2WriteCVTSS2SDLd : SchedWriteRes<[Zn2AGU, Zn2FPU3]> {
+ let Latency = 10;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1, 2];
+}
+def : SchedAlias<WriteCvtSS2SDLd, Zn2WriteCVTSS2SDLd>;
+
+def Zn2WriteCVTDQ2PDr: SchedWriteRes<[Zn2FPU12,Zn2FPU3]> {
+ let Latency = 3;
+}
+// CVTDQ2PD.
+// x,x.
+def : InstRW<[Zn2WriteCVTDQ2PDr], (instregex "(V)?CVTDQ2PDrr")>;
+
+// Same as xmm
+// y,x.
+def : InstRW<[Zn2WriteCVTDQ2PDr], (instrs VCVTDQ2PDYrr)>;
+def : InstRW<[Zn2WriteCVTDQ2PDr], (instrs VCVTDQ2PSYrr)>;
+
+def Zn2WriteCVTPD2DQr: SchedWriteRes<[Zn2FPU12, Zn2FPU3]> {
+ let Latency = 3;
+}
+// CVT(T)PD2DQ.
+// x,x.
+def : InstRW<[Zn2WriteCVTPD2DQr], (instregex "(V?)CVT(T?)PD2DQrr")>;
+
+def Zn2WriteCVTPD2DQLd: SchedWriteRes<[Zn2AGU,Zn2FPU12,Zn2FPU3]> {
+ let Latency = 10;
+ let NumMicroOps = 2;
+}
+// x,m128.
+def : InstRW<[Zn2WriteCVTPD2DQLd], (instregex "(V?)CVT(T?)PD2DQrm")>;
+// same as xmm handling
+// x,y.
+def : InstRW<[Zn2WriteCVTPD2DQr], (instregex "VCVT(T?)PD2DQYrr")>;
+// x,m256.
+def : InstRW<[Zn2WriteCVTPD2DQLd], (instregex "VCVT(T?)PD2DQYrm")>;
+
+def Zn2WriteCVTPS2PIr: SchedWriteRes<[Zn2FPU3]> {
+ let Latency = 4;
+}
+// CVT(T)PS2PI.
+// mm,x.
+def : InstRW<[Zn2WriteCVTPS2PIr], (instregex "MMX_CVT(T?)PS2PIirr")>;
+
+// CVTPI2PD.
+// x,mm.
+def : InstRW<[Zn2WriteCVTPS2PDr], (instrs MMX_CVTPI2PDirr)>;
+
+// CVT(T)PD2PI.
+// mm,x.
+def : InstRW<[Zn2WriteCVTPS2PIr], (instregex "MMX_CVT(T?)PD2PIirr")>;
+
+def Zn2WriteCVSTSI2SSr: SchedWriteRes<[Zn2FPU3]> {
+ let Latency = 4;
+}
+
+// same as CVTPD2DQr
+// CVT(T)SS2SI.
+// r32,x.
+def : InstRW<[Zn2WriteCVTPD2DQr], (instregex "(V?)CVT(T?)SS2SI(64)?rr")>;
+// same as CVTPD2DQm
+// r32,m32.
+def : InstRW<[Zn2WriteCVTPD2DQLd], (instregex "(V?)CVT(T?)SS2SI(64)?rm")>;
+
+def Zn2WriteCVSTSI2SDr: SchedWriteRes<[Zn2FPU013, Zn2FPU3]> {
+ let Latency = 4;
+}
+// CVTSI2SD.
+// x,r32/64.
+def : InstRW<[Zn2WriteCVSTSI2SDr], (instregex "(V?)CVTSI(64)?2SDrr")>;
+
+
+def Zn2WriteCVSTSI2SIr: SchedWriteRes<[Zn2FPU3, Zn2FPU2]> {
+ let Latency = 4;
+}
+def Zn2WriteCVSTSI2SILd: SchedWriteRes<[Zn2AGU, Zn2FPU3, Zn2FPU2]> {
+ let Latency = 11;
+}
+// CVTSD2SI.
+// r32/64
+def : InstRW<[Zn2WriteCVSTSI2SIr], (instregex "(V?)CVT(T?)SD2SI(64)?rr")>;
+// r32,m32.
+def : InstRW<[Zn2WriteCVSTSI2SILd], (instregex "(V?)CVT(T?)SD2SI(64)?rm")>;
+
+// VCVTPS2PH.
+// x,v,i.
+def : SchedAlias<WriteCvtPS2PH, Zn2WriteMicrocoded>;
+def : SchedAlias<WriteCvtPS2PHY, Zn2WriteMicrocoded>;
+defm : X86WriteResUnsupported<WriteCvtPS2PHZ>;
+// m,v,i.
+def : SchedAlias<WriteCvtPS2PHSt, Zn2WriteMicrocoded>;
+def : SchedAlias<WriteCvtPS2PHYSt, Zn2WriteMicrocoded>;
+defm : X86WriteResUnsupported<WriteCvtPS2PHZSt>;
+
+// VCVTPH2PS.
+// v,x.
+def : SchedAlias<WriteCvtPH2PS, Zn2WriteMicrocoded>;
+def : SchedAlias<WriteCvtPH2PSY, Zn2WriteMicrocoded>;
+defm : X86WriteResUnsupported<WriteCvtPH2PSZ>;
+// v,m.
+def : SchedAlias<WriteCvtPH2PSLd, Zn2WriteMicrocoded>;
+def : SchedAlias<WriteCvtPH2PSYLd, Zn2WriteMicrocoded>;
+defm : X86WriteResUnsupported<WriteCvtPH2PSZLd>;
+
+//-- SSE4A instructions --//
+// EXTRQ
+def Zn2WriteEXTRQ: SchedWriteRes<[Zn2FPU12, Zn2FPU2]> {
+ let Latency = 2;
+}
+def : InstRW<[Zn2WriteEXTRQ], (instregex "EXTRQ")>;
+
+// INSERTQ
+def Zn2WriteINSERTQ: SchedWriteRes<[Zn2FPU03,Zn2FPU1]> {
+ let Latency = 4;
+}
+def : InstRW<[Zn2WriteINSERTQ], (instregex "INSERTQ")>;
+
+//-- SHA instructions --//
+// SHA256MSG2
+def : InstRW<[WriteMicrocoded], (instregex "SHA256MSG2(Y?)r(r|m)")>;
+
+// SHA1MSG1, SHA256MSG1
+// x,x.
+def Zn2WriteSHA1MSG1r : SchedWriteRes<[Zn2FPU12]> {
+ let Latency = 2;
+}
+def : InstRW<[Zn2WriteSHA1MSG1r], (instregex "SHA(1|256)MSG1rr")>;
+// x,m.
+def Zn2WriteSHA1MSG1Ld : SchedWriteRes<[Zn2AGU, Zn2FPU12]> {
+ let Latency = 9;
+}
+def : InstRW<[Zn2WriteSHA1MSG1Ld], (instregex "SHA(1|256)MSG1rm")>;
+
+// SHA1MSG2
+// x,x.
+def Zn2WriteSHA1MSG2r : SchedWriteRes<[Zn2FPU12]> ;
+def : InstRW<[Zn2WriteSHA1MSG2r], (instregex "SHA1MSG2rr")>;
+// x,m.
+def Zn2WriteSHA1MSG2Ld : SchedWriteRes<[Zn2AGU, Zn2FPU12]> {
+ let Latency = 8;
+}
+def : InstRW<[Zn2WriteSHA1MSG2Ld], (instregex "SHA1MSG2rm")>;
+
+// SHA1NEXTE
+// x,x.
+def Zn2WriteSHA1NEXTEr : SchedWriteRes<[Zn2FPU1]> ;
+def : InstRW<[Zn2WriteSHA1NEXTEr], (instregex "SHA1NEXTErr")>;
+// x,m.
+def Zn2WriteSHA1NEXTELd : SchedWriteRes<[Zn2AGU, Zn2FPU1]> {
+ let Latency = 8;
+}
+def : InstRW<[Zn2WriteSHA1NEXTELd], (instregex "SHA1NEXTErm")>;
+
+// SHA1RNDS4
+// x,x.
+def Zn2WriteSHA1RNDS4r : SchedWriteRes<[Zn2FPU1]> {
+ let Latency = 6;
+}
+def : InstRW<[Zn2WriteSHA1RNDS4r], (instregex "SHA1RNDS4rr")>;
+// x,m.
+def Zn2WriteSHA1RNDS4Ld : SchedWriteRes<[Zn2AGU, Zn2FPU1]> {
+ let Latency = 13;
+}
+def : InstRW<[Zn2WriteSHA1RNDS4Ld], (instregex "SHA1RNDS4rm")>;
+
+// SHA256RNDS2
+// x,x.
+def Zn2WriteSHA256RNDS2r : SchedWriteRes<[Zn2FPU1]> {
+ let Latency = 4;
+}
+def : InstRW<[Zn2WriteSHA256RNDS2r], (instregex "SHA256RNDS2rr")>;
+// x,m.
+def Zn2WriteSHA256RNDS2Ld : SchedWriteRes<[Zn2AGU, Zn2FPU1]> {
+ let Latency = 11;
+}
+def : InstRW<[Zn2WriteSHA256RNDS2Ld], (instregex "SHA256RNDS2rm")>;
+
+//-- Arithmetic instructions --//
+
+// HADD, HSUB PS/PD
+def : SchedAlias<WriteFHAdd, Zn2WriteMicrocoded>;
+def : SchedAlias<WriteFHAddLd, Zn2WriteMicrocoded>;
+def : SchedAlias<WriteFHAddY, Zn2WriteMicrocoded>;
+def : SchedAlias<WriteFHAddYLd, Zn2WriteMicrocoded>;
+
+// VDIVPS.
+// TODO - convert to Zn2WriteResFpuPair
+// y,y,y.
+def Zn2WriteVDIVPSYr : SchedWriteRes<[Zn2FPU3]> {
+ let Latency = 10;
+ let ResourceCycles = [10];
+}
+def : SchedAlias<WriteFDivY, Zn2WriteVDIVPSYr>;
+
+// y,y,m256.
+def Zn2WriteVDIVPSYLd : SchedWriteRes<[Zn2AGU, Zn2FPU3]> {
+ let Latency = 17;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1, 17];
+}
+def : SchedAlias<WriteFDivYLd, Zn2WriteVDIVPSYLd>;
+
+// VDIVPD.
+// TODO - convert to Zn2WriteResFpuPair
+// y,y,y.
+def Zn2WriteVDIVPDY : SchedWriteRes<[Zn2FPU3]> {
+ let Latency = 13;
+ let ResourceCycles = [13];
+}
+def : SchedAlias<WriteFDiv64Y, Zn2WriteVDIVPDY>;
+
+// y,y,m256.
+def Zn2WriteVDIVPDYLd : SchedWriteRes<[Zn2AGU, Zn2FPU3]> {
+ let Latency = 20;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,20];
+}
+def : SchedAlias<WriteFDiv64YLd, Zn2WriteVDIVPDYLd>;
+
+// DPPS.
+// x,x,i / v,v,v,i.
+def : SchedAlias<WriteDPPS, Zn2WriteMicrocoded>;
+def : SchedAlias<WriteDPPSY, Zn2WriteMicrocoded>;
+
+// x,m,i / v,v,m,i.
+def : SchedAlias<WriteDPPSLd, Zn2WriteMicrocoded>;
+def : SchedAlias<WriteDPPSYLd,Zn2WriteMicrocoded>;
+
+// DPPD.
+// x,x,i.
+def : SchedAlias<WriteDPPD, Zn2WriteMicrocoded>;
+
+// x,m,i.
+def : SchedAlias<WriteDPPDLd, Zn2WriteMicrocoded>;
+
+// RSQRTSS
+// TODO - convert to Zn2WriteResFpuPair
+// x,x.
+def Zn2WriteRSQRTSSr : SchedWriteRes<[Zn2FPU02]> {
+ let Latency = 5;
+}
+def : SchedAlias<WriteFRsqrt, Zn2WriteRSQRTSSr>;
+
+// x,m128.
+def Zn2WriteRSQRTSSLd: SchedWriteRes<[Zn2AGU, Zn2FPU02]> {
+ let Latency = 12;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,2];
+}
+def : SchedAlias<WriteFRsqrtLd, Zn2WriteRSQRTSSLd>;
+
+// RSQRTPS
+// TODO - convert to Zn2WriteResFpuPair
+// y,y.
+def Zn2WriteRSQRTPSYr : SchedWriteRes<[Zn2FPU01]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+ let ResourceCycles = [2];
+}
+def : SchedAlias<WriteFRsqrtY, Zn2WriteRSQRTPSYr>;
+
+// y,m256.
+def Zn2WriteRSQRTPSYLd : SchedWriteRes<[Zn2AGU, Zn2FPU01]> {
+ let Latency = 12;
+ let NumMicroOps = 2;
+}
+def : SchedAlias<WriteFRsqrtYLd, Zn2WriteRSQRTPSYLd>;
+
+//-- Other instructions --//
+
+// VZEROUPPER.
+def : InstRW<[WriteALU], (instrs VZEROUPPER)>;
+
+// VZEROALL.
+def : InstRW<[WriteMicrocoded], (instrs VZEROALL)>;
+
+} // SchedModel
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp
index b8980789258e..9aa47c532e82 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp
@@ -148,8 +148,8 @@ private:
/// Manages the predicate state traced through the program.
struct PredState {
- unsigned InitialReg;
- unsigned PoisonReg;
+ unsigned InitialReg = 0;
+ unsigned PoisonReg = 0;
const TargetRegisterClass *RC;
MachineSSAUpdater SSA;
@@ -158,10 +158,10 @@ private:
: RC(RC), SSA(MF) {}
};
- const X86Subtarget *Subtarget;
- MachineRegisterInfo *MRI;
- const X86InstrInfo *TII;
- const TargetRegisterInfo *TRI;
+ const X86Subtarget *Subtarget = nullptr;
+ MachineRegisterInfo *MRI = nullptr;
+ const X86InstrInfo *TII = nullptr;
+ const TargetRegisterInfo *TRI = nullptr;
Optional<PredState> PS;
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86Subtarget.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86Subtarget.cpp
index f8f78da52cc2..75c3a70b430a 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86Subtarget.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86Subtarget.cpp
@@ -324,8 +324,8 @@ X86Subtarget::X86Subtarget(const Triple &TT, StringRef CPU, StringRef FS,
MaybeAlign StackAlignOverride,
unsigned PreferVectorWidthOverride,
unsigned RequiredVectorWidth)
- : X86GenSubtargetInfo(TT, CPU, FS), PICStyle(PICStyles::None), TM(TM),
- TargetTriple(TT), StackAlignOverride(StackAlignOverride),
+ : X86GenSubtargetInfo(TT, CPU, FS), PICStyle(PICStyles::Style::None),
+ TM(TM), TargetTriple(TT), StackAlignOverride(StackAlignOverride),
PreferVectorWidthOverride(PreferVectorWidthOverride),
RequiredVectorWidth(RequiredVectorWidth),
In64BitMode(TargetTriple.getArch() == Triple::x86_64),
@@ -337,15 +337,15 @@ X86Subtarget::X86Subtarget(const Triple &TT, StringRef CPU, StringRef FS,
FrameLowering(*this, getStackAlignment()) {
// Determine the PICStyle based on the target selected.
if (!isPositionIndependent())
- setPICStyle(PICStyles::None);
+ setPICStyle(PICStyles::Style::None);
else if (is64Bit())
- setPICStyle(PICStyles::RIPRel);
+ setPICStyle(PICStyles::Style::RIPRel);
else if (isTargetCOFF())
- setPICStyle(PICStyles::None);
+ setPICStyle(PICStyles::Style::None);
else if (isTargetDarwin())
- setPICStyle(PICStyles::StubPIC);
+ setPICStyle(PICStyles::Style::StubPIC);
else if (isTargetELF())
- setPICStyle(PICStyles::GOT);
+ setPICStyle(PICStyles::Style::GOT);
CallLoweringInfo.reset(new X86CallLowering(*getTargetLowering()));
Legalizer.reset(new X86LegalizerInfo(*this, TM));
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86Subtarget.h b/contrib/llvm-project/llvm/lib/Target/X86/X86Subtarget.h
index e8efe8f2afe5..f4e8d30328ca 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86Subtarget.h
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86Subtarget.h
@@ -40,7 +40,7 @@ class GlobalValue;
///
namespace PICStyles {
-enum Style {
+enum class Style {
StubPIC, // Used on i386-darwin in pic mode.
GOT, // Used on 32 bit elf on when in pic mode.
RIPRel, // Used on X86-64 when in pic mode.
@@ -56,10 +56,7 @@ public:
enum X86ProcFamilyEnum {
Others,
IntelAtom,
- IntelSLM,
- IntelGLM,
- IntelGLP,
- IntelTRM
+ IntelSLM
};
protected:
@@ -256,9 +253,9 @@ protected:
/// mask over multiple fixed shuffles.
bool HasFastVariableShuffle = false;
- /// True if there is no performance penalty to writing only the lower parts
- /// of a YMM or ZMM register without clearing the upper part.
- bool HasFastPartialYMMorZMMWrite = false;
+ /// True if vzeroupper instructions should be inserted after code that uses
+ /// ymm or zmm registers.
+ bool InsertVZEROUPPER = false;
/// True if there is no performance penalty for writing NOPs with up to
/// 11 bytes.
@@ -445,9 +442,15 @@ protected:
/// Indicates target prefers 256 bit instructions.
bool Prefer256Bit = false;
+ /// Indicates target prefers AVX512 mask registers.
+ bool PreferMaskRegisters = false;
+
/// Threeway branch is profitable in this subtarget.
bool ThreewayBranchProfitable = false;
+ /// Use Goldmont specific floating point div/sqrt costs.
+ bool UseGLMDivSqrtCosts = false;
+
/// What processor and OS we're targeting.
Triple TargetTriple;
@@ -655,9 +658,7 @@ public:
bool hasFastVariableShuffle() const {
return HasFastVariableShuffle;
}
- bool hasFastPartialYMMorZMMWrite() const {
- return HasFastPartialYMMorZMMWrite;
- }
+ bool insertVZEROUPPER() const { return InsertVZEROUPPER; }
bool hasFastGather() const { return HasFastGather; }
bool hasFastScalarFSQRT() const { return HasFastScalarFSQRT; }
bool hasFastVectorFSQRT() const { return HasFastVectorFSQRT; }
@@ -706,6 +707,8 @@ public:
return UseRetpolineIndirectBranches;
}
bool useRetpolineExternalThunk() const { return UseRetpolineExternalThunk; }
+ bool preferMaskRegisters() const { return PreferMaskRegisters; }
+ bool useGLMDivSqrtCosts() const { return UseGLMDivSqrtCosts; }
unsigned getPreferVectorWidth() const { return PreferVectorWidth; }
unsigned getRequiredVectorWidth() const { return RequiredVectorWidth; }
@@ -738,11 +741,6 @@ public:
/// TODO: to be removed later and replaced with suitable properties
bool isAtom() const { return X86ProcFamily == IntelAtom; }
bool isSLM() const { return X86ProcFamily == IntelSLM; }
- bool isGLM() const {
- return X86ProcFamily == IntelGLM ||
- X86ProcFamily == IntelGLP ||
- X86ProcFamily == IntelTRM;
- }
bool useSoftFloat() const { return UseSoftFloat; }
bool useAA() const override { return UseAA; }
@@ -801,11 +799,11 @@ public:
bool isTargetWin32() const { return !In64BitMode && isOSWindows(); }
- bool isPICStyleGOT() const { return PICStyle == PICStyles::GOT; }
- bool isPICStyleRIPRel() const { return PICStyle == PICStyles::RIPRel; }
+ bool isPICStyleGOT() const { return PICStyle == PICStyles::Style::GOT; }
+ bool isPICStyleRIPRel() const { return PICStyle == PICStyles::Style::RIPRel; }
bool isPICStyleStubPIC() const {
- return PICStyle == PICStyles::StubPIC;
+ return PICStyle == PICStyles::Style::StubPIC;
}
bool isPositionIndependent() const { return TM.isPositionIndependent(); }
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86TargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86TargetMachine.cpp
index c15297134e4d..8c696e9adbed 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86TargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86TargetMachine.cpp
@@ -46,6 +46,7 @@
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/Transforms/CFGuard.h"
#include <memory>
#include <string>
@@ -60,7 +61,7 @@ static cl::opt<bool> EnableCondBrFoldingPass("x86-condbr-folding",
"folding pass"),
cl::init(false), cl::Hidden);
-extern "C" void LLVMInitializeX86Target() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeX86Target() {
// Register the target.
RegisterTargetMachine<X86TargetMachine> X(getTheX86_32Target());
RegisterTargetMachine<X86TargetMachine> Y(getTheX86_64Target());
@@ -229,9 +230,7 @@ X86TargetMachine::X86TargetMachine(const Target &T, const Triple &TT,
this->Options.NoTrapAfterNoreturn = TT.isOSBinFormatMachO();
}
- // Outlining is available for x86-64.
- if (TT.getArch() == Triple::x86_64)
- setMachineOutliner(true);
+ setMachineOutliner(true);
initAsmInfo();
}
@@ -414,6 +413,16 @@ void X86PassConfig::addIRPasses() {
// thunk. These will be a no-op unless a function subtarget has the retpoline
// feature enabled.
addPass(createIndirectBrExpandPass());
+
+ // Add Control Flow Guard checks.
+ const Triple &TT = TM->getTargetTriple();
+ if (TT.isOSWindows()) {
+ if (TT.getArch() == Triple::x86_64) {
+ addPass(createCFGuardDispatchPass());
+ } else {
+ addPass(createCFGuardCheckPass());
+ }
+ }
}
bool X86PassConfig::addInstSelector() {
@@ -530,6 +539,9 @@ void X86PassConfig::addPreEmitPass2() {
(!TT.isOSWindows() ||
MAI->getExceptionHandlingType() == ExceptionHandling::DwarfCFI))
addPass(createCFIInstrInserter());
+ // Identify valid longjmp targets for Windows Control Flow Guard.
+ if (TT.isOSWindows())
+ addPass(createCFGuardLongjmpPass());
}
std::unique_ptr<CSEConfigBase> X86PassConfig::getCSEConfig() const {
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 70fd857fcf01..b754836ea517 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -169,12 +169,13 @@ unsigned X86TTIImpl::getMaxInterleaveFactor(unsigned VF) {
return 2;
}
-int X86TTIImpl::getArithmeticInstrCost(
- unsigned Opcode, Type *Ty,
- TTI::OperandValueKind Op1Info, TTI::OperandValueKind Op2Info,
- TTI::OperandValueProperties Opd1PropInfo,
- TTI::OperandValueProperties Opd2PropInfo,
- ArrayRef<const Value *> Args) {
+int X86TTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
+ TTI::OperandValueKind Op1Info,
+ TTI::OperandValueKind Op2Info,
+ TTI::OperandValueProperties Opd1PropInfo,
+ TTI::OperandValueProperties Opd2PropInfo,
+ ArrayRef<const Value *> Args,
+ const Instruction *CxtI) {
// Legalize the type.
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
@@ -188,7 +189,7 @@ int X86TTIImpl::getArithmeticInstrCost(
{ ISD::FDIV, MVT::v2f64, 65 }, // divpd
};
- if (ST->isGLM())
+ if (ST->useGLMDivSqrtCosts())
if (const auto *Entry = CostTableLookup(GLMCostTable, ISD,
LT.second))
return LT.first * Entry->Cost;
@@ -280,7 +281,7 @@ int X86TTIImpl::getArithmeticInstrCost(
TargetTransformInfo::OP_None,
TargetTransformInfo::OP_None);
- if (ISD == ISD::UREM)
+ else // UREM
return getArithmeticInstrCost(Instruction::And, Ty, Op1Info, Op2Info,
TargetTransformInfo::OP_None,
TargetTransformInfo::OP_None);
@@ -1389,6 +1390,7 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
{ ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i64, 5 },
{ ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i64, 5 },
+ { ISD::UINT_TO_FP, MVT::f32, MVT::i64, 1 },
{ ISD::UINT_TO_FP, MVT::f64, MVT::i64, 1 },
{ ISD::FP_TO_UINT, MVT::i64, MVT::f32, 1 },
{ ISD::FP_TO_UINT, MVT::i64, MVT::f64, 1 },
@@ -1397,6 +1399,7 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
{ ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 },
{ ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f64, 1 },
{ ISD::FP_TO_UINT, MVT::v8i32, MVT::v8f32, 1 },
+ { ISD::FP_TO_UINT, MVT::v8i32, MVT::v8f64, 1 },
{ ISD::FP_TO_UINT, MVT::v8i16, MVT::v8f64, 2 },
{ ISD::FP_TO_UINT, MVT::v8i8, MVT::v8f64, 2 },
{ ISD::FP_TO_UINT, MVT::v16i32, MVT::v16f32, 1 },
@@ -1550,6 +1553,7 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
{ ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, 6 },
{ ISD::TRUNCATE, MVT::v2i8, MVT::v2i64, 1 }, // PSHUFB
+ { ISD::UINT_TO_FP, MVT::f32, MVT::i64, 4 },
{ ISD::UINT_TO_FP, MVT::f64, MVT::i64, 4 },
};
@@ -1576,9 +1580,14 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 6 },
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v2i64, 15 },
+ { ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 2 },
+ { ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f64, 2 },
+
{ ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 3 },
+ { ISD::UINT_TO_FP, MVT::f32, MVT::i64, 6 },
{ ISD::UINT_TO_FP, MVT::f64, MVT::i64, 6 },
+
{ ISD::FP_TO_UINT, MVT::i64, MVT::f32, 4 },
{ ISD::FP_TO_UINT, MVT::i64, MVT::f64, 4 },
@@ -2199,7 +2208,7 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
MVT MTy = LT.second;
// Attempt to lookup cost.
- if (ST->isGLM())
+ if (ST->useGLMDivSqrtCosts())
if (const auto *Entry = CostTableLookup(GLMCostTbl, ISD, MTy))
return LT.first * Entry->Cost;
@@ -2374,6 +2383,13 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
}
int X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
+ static const CostTblEntry SLMCostTbl[] = {
+ { ISD::EXTRACT_VECTOR_ELT, MVT::i8, 4 },
+ { ISD::EXTRACT_VECTOR_ELT, MVT::i16, 4 },
+ { ISD::EXTRACT_VECTOR_ELT, MVT::i32, 4 },
+ { ISD::EXTRACT_VECTOR_ELT, MVT::i64, 7 }
+ };
+
assert(Val->isVectorTy() && "This must be a vector type");
Type *ScalarType = Val->getScalarType();
@@ -2390,9 +2406,22 @@ int X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
unsigned Width = LT.second.getVectorNumElements();
Index = Index % Width;
- // Floating point scalars are already located in index #0.
- if (ScalarType->isFloatingPointTy() && Index == 0)
- return 0;
+ if (Index == 0) {
+ // Floating point scalars are already located in index #0.
+ if (ScalarType->isFloatingPointTy())
+ return 0;
+
+ // Assume movd/movq XMM <-> GPR is relatively cheap on all targets.
+ if (ScalarType->isIntegerTy())
+ return 1;
+ }
+
+ int ISD = TLI->InstructionOpcodeToISD(Opcode);
+ assert(ISD && "Unexpected vector opcode");
+ MVT MScalarTy = LT.second.getScalarType();
+ if (ST->isSLM())
+ if (auto *Entry = CostTableLookup(SLMCostTbl, ISD, MScalarTy))
+ return LT.first * Entry->Cost;
}
// Add to the base cost if we know that the extracted element of a vector is
@@ -2404,8 +2433,9 @@ int X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
return BaseT::getVectorInstrCost(Opcode, Val, Index) + RegisterFileMoveCost;
}
-int X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
- unsigned AddressSpace, const Instruction *I) {
+int X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
+ MaybeAlign Alignment, unsigned AddressSpace,
+ const Instruction *I) {
// Handle non-power-of-two vectors such as <3 x float>
if (VectorType *VTy = dyn_cast<VectorType>(Src)) {
unsigned NumElem = VTy->getVectorNumElements();
@@ -2456,7 +2486,7 @@ int X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy,
VectorType *SrcVTy = dyn_cast<VectorType>(SrcTy);
if (!SrcVTy)
// To calculate scalar take the regular cost, without mask
- return getMemoryOpCost(Opcode, SrcTy, Alignment, AddressSpace);
+ return getMemoryOpCost(Opcode, SrcTy, MaybeAlign(Alignment), AddressSpace);
unsigned NumElem = SrcVTy->getVectorNumElements();
VectorType *MaskTy =
@@ -2474,7 +2504,7 @@ int X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy,
int ValueSplitCost = getScalarizationOverhead(SrcVTy, IsLoad, IsStore);
int MemopCost =
NumElem * BaseT::getMemoryOpCost(Opcode, SrcVTy->getScalarType(),
- Alignment, AddressSpace);
+ MaybeAlign(Alignment), AddressSpace);
return MemopCost + ValueSplitCost + MaskSplitCost + MaskCmpCost;
}
@@ -2533,6 +2563,11 @@ int X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, Type *ValTy,
// We use the Intel Architecture Code Analyzer(IACA) to measure the throughput
// and make it as the cost.
+ static const CostTblEntry SLMCostTblPairWise[] = {
+ { ISD::FADD, MVT::v2f64, 3 },
+ { ISD::ADD, MVT::v2i64, 5 },
+ };
+
static const CostTblEntry SSE2CostTblPairWise[] = {
{ ISD::FADD, MVT::v2f64, 2 },
{ ISD::FADD, MVT::v4f32, 4 },
@@ -2558,6 +2593,11 @@ int X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, Type *ValTy,
{ ISD::ADD, MVT::v32i8, 4 },
};
+ static const CostTblEntry SLMCostTblNoPairWise[] = {
+ { ISD::FADD, MVT::v2f64, 3 },
+ { ISD::ADD, MVT::v2i64, 5 },
+ };
+
static const CostTblEntry SSE2CostTblNoPairWise[] = {
{ ISD::FADD, MVT::v2f64, 2 },
{ ISD::FADD, MVT::v4f32, 4 },
@@ -2594,6 +2634,10 @@ int X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, Type *ValTy,
if (VT.isSimple()) {
MVT MTy = VT.getSimpleVT();
if (IsPairwise) {
+ if (ST->isSLM())
+ if (const auto *Entry = CostTableLookup(SLMCostTblPairWise, ISD, MTy))
+ return Entry->Cost;
+
if (ST->hasAVX())
if (const auto *Entry = CostTableLookup(AVX1CostTblPairWise, ISD, MTy))
return Entry->Cost;
@@ -2602,6 +2646,10 @@ int X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, Type *ValTy,
if (const auto *Entry = CostTableLookup(SSE2CostTblPairWise, ISD, MTy))
return Entry->Cost;
} else {
+ if (ST->isSLM())
+ if (const auto *Entry = CostTableLookup(SLMCostTblNoPairWise, ISD, MTy))
+ return Entry->Cost;
+
if (ST->hasAVX())
if (const auto *Entry = CostTableLookup(AVX1CostTblNoPairWise, ISD, MTy))
return Entry->Cost;
@@ -2617,6 +2665,10 @@ int X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, Type *ValTy,
MVT MTy = LT.second;
if (IsPairwise) {
+ if (ST->isSLM())
+ if (const auto *Entry = CostTableLookup(SLMCostTblPairWise, ISD, MTy))
+ return LT.first * Entry->Cost;
+
if (ST->hasAVX())
if (const auto *Entry = CostTableLookup(AVX1CostTblPairWise, ISD, MTy))
return LT.first * Entry->Cost;
@@ -2625,6 +2677,10 @@ int X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, Type *ValTy,
if (const auto *Entry = CostTableLookup(SSE2CostTblPairWise, ISD, MTy))
return LT.first * Entry->Cost;
} else {
+ if (ST->isSLM())
+ if (const auto *Entry = CostTableLookup(SLMCostTblNoPairWise, ISD, MTy))
+ return LT.first * Entry->Cost;
+
if (ST->hasAVX())
if (const auto *Entry = CostTableLookup(AVX1CostTblNoPairWise, ISD, MTy))
return LT.first * Entry->Cost;
@@ -2634,6 +2690,24 @@ int X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, Type *ValTy,
return LT.first * Entry->Cost;
}
+ // FIXME: These assume a naive kshift+binop lowering, which is probably
+ // conservative in most cases.
+ // FIXME: This doesn't cost large types like v128i1 correctly.
+ static const CostTblEntry AVX512BoolReduction[] = {
+ { ISD::AND, MVT::v2i1, 3 },
+ { ISD::AND, MVT::v4i1, 5 },
+ { ISD::AND, MVT::v8i1, 7 },
+ { ISD::AND, MVT::v16i1, 9 },
+ { ISD::AND, MVT::v32i1, 11 },
+ { ISD::AND, MVT::v64i1, 13 },
+ { ISD::OR, MVT::v2i1, 3 },
+ { ISD::OR, MVT::v4i1, 5 },
+ { ISD::OR, MVT::v8i1, 7 },
+ { ISD::OR, MVT::v16i1, 9 },
+ { ISD::OR, MVT::v32i1, 11 },
+ { ISD::OR, MVT::v64i1, 13 },
+ };
+
static const CostTblEntry AVX2BoolReduction[] = {
{ ISD::AND, MVT::v16i16, 2 }, // vpmovmskb + cmp
{ ISD::AND, MVT::v32i8, 2 }, // vpmovmskb + cmp
@@ -2664,7 +2738,10 @@ int X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, Type *ValTy,
};
// Handle bool allof/anyof patterns.
- if (ValTy->getVectorElementType()->isIntegerTy(1)) {
+ if (!IsPairwise && ValTy->getVectorElementType()->isIntegerTy(1)) {
+ if (ST->hasAVX512())
+ if (const auto *Entry = CostTableLookup(AVX512BoolReduction, ISD, MTy))
+ return LT.first * Entry->Cost;
if (ST->hasAVX2())
if (const auto *Entry = CostTableLookup(AVX2BoolReduction, ISD, MTy))
return LT.first * Entry->Cost;
@@ -2956,7 +3033,7 @@ int X86TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
return std::max(1, Cost);
}
-int X86TTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
+int X86TTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm,
Type *Ty) {
assert(Ty->isIntegerTy());
@@ -3053,8 +3130,8 @@ int X86TTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
return X86TTIImpl::getIntImmCost(Imm, Ty);
}
-int X86TTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
- Type *Ty) {
+int X86TTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
+ const APInt &Imm, Type *Ty) {
assert(Ty->isIntegerTy());
unsigned BitSize = Ty->getPrimitiveSizeInBits();
@@ -3164,7 +3241,7 @@ int X86TTIImpl::getGSVectorCost(unsigned Opcode, Type *SrcVTy, Value *Ptr,
? ST->getGatherOverhead()
: ST->getScatterOverhead();
return GSOverhead + VF * getMemoryOpCost(Opcode, SrcVTy->getScalarType(),
- Alignment, AddressSpace);
+ MaybeAlign(Alignment), AddressSpace);
}
/// Return the cost of full scalarization of gather / scatter operation.
@@ -3194,7 +3271,7 @@ int X86TTIImpl::getGSScalarCost(unsigned Opcode, Type *SrcVTy,
// The cost of the scalar loads/stores.
int MemoryOpCost = VF * getMemoryOpCost(Opcode, SrcVTy->getScalarType(),
- Alignment, AddressSpace);
+ MaybeAlign(Alignment), AddressSpace);
int InsertExtractCost = 0;
if (Opcode == Instruction::Load)
@@ -3224,8 +3301,10 @@ int X86TTIImpl::getGatherScatterOpCost(unsigned Opcode, Type *SrcVTy,
unsigned AddressSpace = PtrTy->getAddressSpace();
bool Scalarize = false;
- if ((Opcode == Instruction::Load && !isLegalMaskedGather(SrcVTy)) ||
- (Opcode == Instruction::Store && !isLegalMaskedScatter(SrcVTy)))
+ if ((Opcode == Instruction::Load &&
+ !isLegalMaskedGather(SrcVTy, MaybeAlign(Alignment))) ||
+ (Opcode == Instruction::Store &&
+ !isLegalMaskedScatter(SrcVTy, MaybeAlign(Alignment))))
Scalarize = true;
// Gather / Scatter for vector 2 is not profitable on KNL / SKX
// Vector-4 of gather/scatter instruction does not exist on KNL.
@@ -3348,7 +3427,7 @@ bool X86TTIImpl::isLegalMaskedCompressStore(Type *DataTy) {
return isLegalMaskedExpandLoad(DataTy);
}
-bool X86TTIImpl::isLegalMaskedGather(Type *DataTy) {
+bool X86TTIImpl::isLegalMaskedGather(Type *DataTy, MaybeAlign Alignment) {
// Some CPUs have better gather performance than others.
// TODO: Remove the explicit ST->hasAVX512()?, That would mean we would only
// enable gather with a -march.
@@ -3386,11 +3465,11 @@ bool X86TTIImpl::isLegalMaskedGather(Type *DataTy) {
return IntWidth == 32 || IntWidth == 64;
}
-bool X86TTIImpl::isLegalMaskedScatter(Type *DataType) {
+bool X86TTIImpl::isLegalMaskedScatter(Type *DataType, MaybeAlign Alignment) {
// AVX2 doesn't support scatter
if (!ST->hasAVX512())
return false;
- return isLegalMaskedGather(DataType);
+ return isLegalMaskedGather(DataType, Alignment);
}
bool X86TTIImpl::hasDivRemOp(Type *DataType, bool IsSigned) {
@@ -3443,10 +3522,9 @@ X86TTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
// version is not as fast for three way compare (see #33329).
const unsigned PreferredWidth = ST->getPreferVectorWidth();
if (PreferredWidth >= 512 && ST->hasAVX512()) Options.LoadSizes.push_back(64);
- if (PreferredWidth >= 256 && ST->hasAVX2()) Options.LoadSizes.push_back(32);
+ if (PreferredWidth >= 256 && ST->hasAVX()) Options.LoadSizes.push_back(32);
if (PreferredWidth >= 128 && ST->hasSSE2()) Options.LoadSizes.push_back(16);
- // All GPR and vector loads can be unaligned. SIMD compare requires integer
- // vectors (SSE2/AVX2).
+ // All GPR and vector loads can be unaligned.
Options.AllowOverlappingLoads = true;
}
if (ST->is64Bit()) {
@@ -3520,8 +3598,8 @@ int X86TTIImpl::getInterleavedMemoryOpCostAVX2(unsigned Opcode, Type *VecTy,
// Get the cost of one memory operation.
Type *SingleMemOpTy = VectorType::get(VecTy->getVectorElementType(),
LegalVT.getVectorNumElements());
- unsigned MemOpCost =
- getMemoryOpCost(Opcode, SingleMemOpTy, Alignment, AddressSpace);
+ unsigned MemOpCost = getMemoryOpCost(Opcode, SingleMemOpTy,
+ MaybeAlign(Alignment), AddressSpace);
VectorType *VT = VectorType::get(ScalarTy, VF);
EVT ETy = TLI->getValueType(DL, VT);
@@ -3620,8 +3698,8 @@ int X86TTIImpl::getInterleavedMemoryOpCostAVX512(unsigned Opcode, Type *VecTy,
// Get the cost of one memory operation.
Type *SingleMemOpTy = VectorType::get(VecTy->getVectorElementType(),
LegalVT.getVectorNumElements());
- unsigned MemOpCost =
- getMemoryOpCost(Opcode, SingleMemOpTy, Alignment, AddressSpace);
+ unsigned MemOpCost = getMemoryOpCost(Opcode, SingleMemOpTy,
+ MaybeAlign(Alignment), AddressSpace);
unsigned VF = VecTy->getVectorNumElements() / Factor;
MVT VT = MVT::getVectorVT(MVT::getVT(VecTy->getScalarType()), VF);
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86TargetTransformInfo.h b/contrib/llvm-project/llvm/lib/Target/X86/X86TargetTransformInfo.h
index 7581257f41f8..b9c2dbd78058 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86TargetTransformInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86TargetTransformInfo.h
@@ -51,7 +51,6 @@ class X86TTIImpl : public BasicTTIImplBase<X86TTIImpl> {
X86::FeatureFastBEXTR,
X86::FeatureFastHorizontalOps,
X86::FeatureFastLZCNT,
- X86::FeatureFastPartialYMMorZMMWrite,
X86::FeatureFastScalarFSQRT,
X86::FeatureFastSHLDRotate,
X86::FeatureFastScalarShiftMasks,
@@ -77,6 +76,9 @@ class X86TTIImpl : public BasicTTIImplBase<X86TTIImpl> {
X86::FeatureSlowSHLD,
X86::FeatureSlowTwoMemOps,
X86::FeatureSlowUAMem16,
+ X86::FeaturePreferMaskRegisters,
+ X86::FeatureInsertVZEROUPPER,
+ X86::FeatureUseGLMDivSqrtCosts,
// Perf-tuning flags.
X86::FeatureHasFastGather,
@@ -88,10 +90,7 @@ class X86TTIImpl : public BasicTTIImplBase<X86TTIImpl> {
// CPU name enums. These just follow CPU string.
X86::ProcIntelAtom,
- X86::ProcIntelGLM,
- X86::ProcIntelGLP,
X86::ProcIntelSLM,
- X86::ProcIntelTRM,
};
public:
@@ -126,14 +125,15 @@ public:
TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
- ArrayRef<const Value *> Args = ArrayRef<const Value *>());
+ ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
+ const Instruction *CxtI = nullptr);
int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp);
int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
const Instruction *I = nullptr);
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
const Instruction *I = nullptr);
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
- int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+ int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
unsigned AddressSpace, const Instruction *I = nullptr);
int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
unsigned AddressSpace);
@@ -179,9 +179,9 @@ public:
unsigned getUserCost(const User *U, ArrayRef<const Value *> Operands);
- int getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty);
- int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
- Type *Ty);
+ int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty);
+ int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
+ Type *Ty);
bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
TargetTransformInfo::LSRCost &C2);
bool canMacroFuseCmp();
@@ -189,8 +189,8 @@ public:
bool isLegalMaskedStore(Type *DataType, MaybeAlign Alignment);
bool isLegalNTLoad(Type *DataType, Align Alignment);
bool isLegalNTStore(Type *DataType, Align Alignment);
- bool isLegalMaskedGather(Type *DataType);
- bool isLegalMaskedScatter(Type *DataType);
+ bool isLegalMaskedGather(Type *DataType, MaybeAlign Alignment);
+ bool isLegalMaskedScatter(Type *DataType, MaybeAlign Alignment);
bool isLegalMaskedExpandLoad(Type *DataType);
bool isLegalMaskedCompressStore(Type *DataType);
bool hasDivRemOp(Type *DataType, bool IsSigned);
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86VZeroUpper.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86VZeroUpper.cpp
index 9280d030b5d5..7a8308ef1ba9 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86VZeroUpper.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86VZeroUpper.cpp
@@ -279,7 +279,7 @@ void VZeroUpperInserter::processBasicBlock(MachineBasicBlock &MBB) {
/// function calls.
bool VZeroUpperInserter::runOnMachineFunction(MachineFunction &MF) {
const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
- if (!ST.hasAVX() || ST.hasFastPartialYMMorZMMWrite())
+ if (!ST.hasAVX() || !ST.insertVZEROUPPER())
return false;
TII = ST.getInstrInfo();
MachineRegisterInfo &MRI = MF.getRegInfo();
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86WinAllocaExpander.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86WinAllocaExpander.cpp
index ae72c6427588..42e8fba2201e 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86WinAllocaExpander.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86WinAllocaExpander.cpp
@@ -54,14 +54,14 @@ private:
/// Lower a WinAlloca instruction.
void lower(MachineInstr* MI, Lowering L);
- MachineRegisterInfo *MRI;
- const X86Subtarget *STI;
- const TargetInstrInfo *TII;
- const X86RegisterInfo *TRI;
- unsigned StackPtr;
- unsigned SlotSize;
- int64_t StackProbeSize;
- bool NoStackArgProbe;
+ MachineRegisterInfo *MRI = nullptr;
+ const X86Subtarget *STI = nullptr;
+ const TargetInstrInfo *TII = nullptr;
+ const X86RegisterInfo *TRI = nullptr;
+ unsigned StackPtr = 0;
+ unsigned SlotSize = 0;
+ int64_t StackProbeSize = 0;
+ bool NoStackArgProbe = false;
StringRef getPassName() const override { return "X86 WinAlloca Expander"; }
static char ID;
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86WinEHState.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86WinEHState.cpp
index d65e1f3ab414..78d3f6460189 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86WinEHState.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86WinEHState.cpp
@@ -23,7 +23,8 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsX86.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
@@ -91,7 +92,7 @@ private:
EHPersonality Personality = EHPersonality::Unknown;
Function *PersonalityFn = nullptr;
bool UseStackGuard = false;
- int ParentBaseState;
+ int ParentBaseState = 0;
FunctionCallee SehLongjmpUnwind = nullptr;
Constant *Cookie = nullptr;
@@ -178,11 +179,6 @@ bool WinEHStatePass::runOnFunction(Function &F) {
{Int8PtrType, Type::getInt32Ty(TheModule->getContext())},
/*isVarArg=*/true));
- // Disable frame pointer elimination in this function.
- // FIXME: Do the nested handlers need to keep the parent ebp in ebp, or can we
- // use an arbitrary register?
- F.addFnAttr("no-frame-pointer-elim", "true");
-
emitExceptionRegistrationRecord(&F);
// The state numbers calculated here in IR must agree with what we calculate
diff --git a/contrib/llvm-project/llvm/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp b/contrib/llvm-project/llvm/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp
index ff3d41fd5274..0505686e645b 100644
--- a/contrib/llvm-project/llvm/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp
@@ -37,7 +37,6 @@ public:
DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
ArrayRef<uint8_t> Bytes, uint64_t Address,
- raw_ostream &VStream,
raw_ostream &CStream) const override;
};
}
@@ -735,9 +734,10 @@ DecodeL4RSrcDstSrcDstInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
return S;
}
-MCDisassembler::DecodeStatus XCoreDisassembler::getInstruction(
- MCInst &instr, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t Address,
- raw_ostream &vStream, raw_ostream &cStream) const {
+MCDisassembler::DecodeStatus
+XCoreDisassembler::getInstruction(MCInst &instr, uint64_t &Size,
+ ArrayRef<uint8_t> Bytes, uint64_t Address,
+ raw_ostream &cStream) const {
uint16_t insn16;
if (!readInstruction16(Bytes, Address, Size, insn16)) {
@@ -774,7 +774,7 @@ static MCDisassembler *createXCoreDisassembler(const Target &T,
return new XCoreDisassembler(STI, Ctx);
}
-extern "C" void LLVMInitializeXCoreDisassembler() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeXCoreDisassembler() {
// Register the disassembler.
TargetRegistry::RegisterMCDisassembler(getTheXCoreTarget(),
createXCoreDisassembler);
diff --git a/contrib/llvm-project/llvm/lib/Target/XCore/MCTargetDesc/XCoreInstPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/XCore/MCTargetDesc/XCoreInstPrinter.cpp
index d231e0981324..a6296cbdaf90 100644
--- a/contrib/llvm-project/llvm/lib/Target/XCore/MCTargetDesc/XCoreInstPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/XCore/MCTargetDesc/XCoreInstPrinter.cpp
@@ -30,9 +30,10 @@ void XCoreInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
OS << StringRef(getRegisterName(RegNo)).lower();
}
-void XCoreInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
- StringRef Annot, const MCSubtargetInfo &STI) {
- printInstruction(MI, O);
+void XCoreInstPrinter::printInst(const MCInst *MI, uint64_t Address,
+ StringRef Annot, const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ printInstruction(MI, Address, O);
printAnnotation(O, Annot);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/XCore/MCTargetDesc/XCoreInstPrinter.h b/contrib/llvm-project/llvm/lib/Target/XCore/MCTargetDesc/XCoreInstPrinter.h
index 4f0940323505..c7868bf4cf8e 100644
--- a/contrib/llvm-project/llvm/lib/Target/XCore/MCTargetDesc/XCoreInstPrinter.h
+++ b/contrib/llvm-project/llvm/lib/Target/XCore/MCTargetDesc/XCoreInstPrinter.h
@@ -27,12 +27,12 @@ public:
: MCInstPrinter(MAI, MII, MRI) {}
// Autogenerated by tblgen.
- void printInstruction(const MCInst *MI, raw_ostream &O);
+ void printInstruction(const MCInst *MI, uint64_t Address, raw_ostream &O);
static const char *getRegisterName(unsigned RegNo);
void printRegName(raw_ostream &OS, unsigned RegNo) const override;
- void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
- const MCSubtargetInfo &STI) override;
+ void printInst(const MCInst *MI, uint64_t Address, StringRef Annot,
+ const MCSubtargetInfo &STI, raw_ostream &O) override;
private:
void printInlineJT(const MCInst *MI, int opNum, raw_ostream &O);
diff --git a/contrib/llvm-project/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp b/contrib/llvm-project/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
index 877f38e22f9b..46ebccee521e 100644
--- a/contrib/llvm-project/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
@@ -55,7 +55,8 @@ createXCoreMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
}
static MCAsmInfo *createXCoreMCAsmInfo(const MCRegisterInfo &MRI,
- const Triple &TT) {
+ const Triple &TT,
+ const MCTargetOptions &Options) {
MCAsmInfo *MAI = new XCoreMCAsmInfo(TT);
// Initial state of the frame pointer is SP.
@@ -121,7 +122,7 @@ static MCTargetStreamer *createTargetAsmStreamer(MCStreamer &S,
}
// Force static initialization.
-extern "C" void LLVMInitializeXCoreTargetMC() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeXCoreTargetMC() {
// Register the MC asm info.
RegisterMCAsmInfoFn X(getTheXCoreTarget(), createXCoreMCAsmInfo);
diff --git a/contrib/llvm-project/llvm/lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp b/contrib/llvm-project/llvm/lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp
index 5604f29db3e9..d5f66c2bd824 100644
--- a/contrib/llvm-project/llvm/lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp
@@ -15,7 +15,7 @@ Target &llvm::getTheXCoreTarget() {
return TheXCoreTarget;
}
-extern "C" void LLVMInitializeXCoreTargetInfo() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeXCoreTargetInfo() {
RegisterTarget<Triple::xcore> X(getTheXCoreTarget(), "xcore", "XCore",
"XCore");
}
diff --git a/contrib/llvm-project/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp
index 6b3dc27cb886..35dc56e90419 100644
--- a/contrib/llvm-project/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp
@@ -292,6 +292,6 @@ void XCoreAsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
// Force static initialization.
-extern "C" void LLVMInitializeXCoreAsmPrinter() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeXCoreAsmPrinter() {
RegisterAsmPrinter<XCoreAsmPrinter> X(getTheXCoreTarget());
}
diff --git a/contrib/llvm-project/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp
index 5fd9e23258b0..b1f9717fbddc 100644
--- a/contrib/llvm-project/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp
@@ -24,6 +24,7 @@
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsXCore.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
diff --git a/contrib/llvm-project/llvm/lib/Target/XCore/XCoreISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/XCore/XCoreISelLowering.cpp
index bf006fd673f1..70770f4c8e7c 100644
--- a/contrib/llvm-project/llvm/lib/Target/XCore/XCoreISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/XCore/XCoreISelLowering.cpp
@@ -31,6 +31,7 @@
#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsXCore.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
diff --git a/contrib/llvm-project/llvm/lib/Target/XCore/XCoreInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/XCore/XCoreInstrInfo.cpp
index bbad8e354586..db44a56be538 100644
--- a/contrib/llvm-project/llvm/lib/Target/XCore/XCoreInstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/XCore/XCoreInstrInfo.cpp
@@ -330,8 +330,8 @@ XCoreInstrInfo::removeBranch(MachineBasicBlock &MBB, int *BytesRemoved) const {
void XCoreInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
- const DebugLoc &DL, unsigned DestReg,
- unsigned SrcReg, bool KillSrc) const {
+ const DebugLoc &DL, MCRegister DestReg,
+ MCRegister SrcReg, bool KillSrc) const {
bool GRDest = XCore::GRRegsRegClass.contains(DestReg);
bool GRSrc = XCore::GRRegsRegClass.contains(SrcReg);
diff --git a/contrib/llvm-project/llvm/lib/Target/XCore/XCoreInstrInfo.h b/contrib/llvm-project/llvm/lib/Target/XCore/XCoreInstrInfo.h
index b9621f136589..057fb763efbf 100644
--- a/contrib/llvm-project/llvm/lib/Target/XCore/XCoreInstrInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/XCore/XCoreInstrInfo.h
@@ -63,7 +63,7 @@ public:
int *BytesRemoved = nullptr) const override;
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
- const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
+ const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg,
bool KillSrc) const override;
void storeRegToStackSlot(MachineBasicBlock &MBB,
diff --git a/contrib/llvm-project/llvm/lib/Target/XCore/XCoreLowerThreadLocal.cpp b/contrib/llvm-project/llvm/lib/Target/XCore/XCoreLowerThreadLocal.cpp
index a18fb28f2fe9..83fc16ed98fc 100644
--- a/contrib/llvm-project/llvm/lib/Target/XCore/XCoreLowerThreadLocal.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/XCore/XCoreLowerThreadLocal.cpp
@@ -18,6 +18,7 @@
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsXCore.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/NoFolder.h"
#include "llvm/IR/ValueHandle.h"
diff --git a/contrib/llvm-project/llvm/lib/Target/XCore/XCoreRegisterInfo.cpp b/contrib/llvm-project/llvm/lib/Target/XCore/XCoreRegisterInfo.cpp
index 86ec7f82d4d1..56fed26ebd7b 100644
--- a/contrib/llvm-project/llvm/lib/Target/XCore/XCoreRegisterInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/XCore/XCoreRegisterInfo.cpp
@@ -203,7 +203,7 @@ static void InsertSPConstInst(MachineBasicBlock::iterator II,
}
bool XCoreRegisterInfo::needsFrameMoves(const MachineFunction &MF) {
- return MF.getMMI().hasDebugInfo() || MF.getFunction().needsUnwindTableEntry();
+ return MF.needsFrameMoves();
}
const MCPhysReg *
diff --git a/contrib/llvm-project/llvm/lib/Target/XCore/XCoreTargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/XCore/XCoreTargetMachine.cpp
index b5b7445265b7..736bc4148a19 100644
--- a/contrib/llvm-project/llvm/lib/Target/XCore/XCoreTargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/XCore/XCoreTargetMachine.cpp
@@ -105,7 +105,7 @@ void XCorePassConfig::addPreEmitPass() {
}
// Force static initialization.
-extern "C" void LLVMInitializeXCoreTarget() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeXCoreTarget() {
RegisterTargetMachine<XCoreTargetMachine> X(getTheXCoreTarget());
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/contrib/llvm-project/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
index a24de3ca213f..59b94567a9c2 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -25,6 +25,7 @@
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/PatternMatch.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/CFGuard/CFGuard.cpp b/contrib/llvm-project/llvm/lib/Transforms/CFGuard/CFGuard.cpp
new file mode 100644
index 000000000000..7c5e90cb53cd
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Transforms/CFGuard/CFGuard.cpp
@@ -0,0 +1,305 @@
+//===-- CFGuard.cpp - Control Flow Guard checks -----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains the IR transform to add Microsoft's Control Flow Guard
+/// checks on Windows targets.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/CFGuard.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+
+using namespace llvm;
+
+using OperandBundleDef = OperandBundleDefT<Value *>;
+
+#define DEBUG_TYPE "cfguard"
+
+STATISTIC(CFGuardCounter, "Number of Control Flow Guard checks added");
+
+namespace {
+
+/// Adds Control Flow Guard (CFG) checks on indirect function calls/invokes.
+/// These checks ensure that the target address corresponds to the start of an
+/// address-taken function. X86_64 targets use the CF_Dispatch mechanism. X86,
+/// ARM, and AArch64 targets use the CF_Check machanism.
+class CFGuard : public FunctionPass {
+public:
+ static char ID;
+
+ enum Mechanism { CF_Check, CF_Dispatch };
+
+ // Default constructor required for the INITIALIZE_PASS macro.
+ CFGuard() : FunctionPass(ID) {
+ initializeCFGuardPass(*PassRegistry::getPassRegistry());
+ // By default, use the guard check mechanism.
+ GuardMechanism = CF_Check;
+ }
+
+ // Recommended constructor used to specify the type of guard mechanism.
+ CFGuard(Mechanism Var) : FunctionPass(ID) {
+ initializeCFGuardPass(*PassRegistry::getPassRegistry());
+ GuardMechanism = Var;
+ }
+
+ /// Inserts a Control Flow Guard (CFG) check on an indirect call using the CFG
+ /// check mechanism. When the image is loaded, the loader puts the appropriate
+ /// guard check function pointer in the __guard_check_icall_fptr global
+ /// symbol. This checks that the target address is a valid address-taken
+ /// function. The address of the target function is passed to the guard check
+ /// function in an architecture-specific register (e.g. ECX on 32-bit X86,
+ /// X15 on Aarch64, and R0 on ARM). The guard check function has no return
+ /// value (if the target is invalid, the guard check funtion will raise an
+ /// error).
+ ///
+ /// For example, the following LLVM IR:
+ /// \code
+ /// %func_ptr = alloca i32 ()*, align 8
+ /// store i32 ()* @target_func, i32 ()** %func_ptr, align 8
+ /// %0 = load i32 ()*, i32 ()** %func_ptr, align 8
+ /// %1 = call i32 %0()
+ /// \endcode
+ ///
+ /// is transformed to:
+ /// \code
+ /// %func_ptr = alloca i32 ()*, align 8
+ /// store i32 ()* @target_func, i32 ()** %func_ptr, align 8
+ /// %0 = load i32 ()*, i32 ()** %func_ptr, align 8
+ /// %1 = load void (i8*)*, void (i8*)** @__guard_check_icall_fptr
+ /// %2 = bitcast i32 ()* %0 to i8*
+ /// call cfguard_checkcc void %1(i8* %2)
+ /// %3 = call i32 %0()
+ /// \endcode
+ ///
+ /// For example, the following X86 assembly code:
+ /// \code
+ /// movl $_target_func, %eax
+ /// calll *%eax
+ /// \endcode
+ ///
+ /// is transformed to:
+ /// \code
+ /// movl $_target_func, %ecx
+ /// calll *___guard_check_icall_fptr
+ /// calll *%ecx
+ /// \endcode
+ ///
+ /// \param CB indirect call to instrument.
+ void insertCFGuardCheck(CallBase *CB);
+
+ /// Inserts a Control Flow Guard (CFG) check on an indirect call using the CFG
+ /// dispatch mechanism. When the image is loaded, the loader puts the
+ /// appropriate guard check function pointer in the
+ /// __guard_dispatch_icall_fptr global symbol. This checks that the target
+ /// address is a valid address-taken function and, if so, tail calls the
+ /// target. The target address is passed in an architecture-specific register
+ /// (e.g. RAX on X86_64), with all other arguments for the target function
+ /// passed as usual.
+ ///
+ /// For example, the following LLVM IR:
+ /// \code
+ /// %func_ptr = alloca i32 ()*, align 8
+ /// store i32 ()* @target_func, i32 ()** %func_ptr, align 8
+ /// %0 = load i32 ()*, i32 ()** %func_ptr, align 8
+ /// %1 = call i32 %0()
+ /// \endcode
+ ///
+ /// is transformed to:
+ /// \code
+ /// %func_ptr = alloca i32 ()*, align 8
+ /// store i32 ()* @target_func, i32 ()** %func_ptr, align 8
+ /// %0 = load i32 ()*, i32 ()** %func_ptr, align 8
+ /// %1 = load i32 ()*, i32 ()** @__guard_dispatch_icall_fptr
+ /// %2 = call i32 %1() [ "cfguardtarget"(i32 ()* %0) ]
+ /// \endcode
+ ///
+ /// For example, the following X86_64 assembly code:
+ /// \code
+ /// leaq target_func(%rip), %rax
+ /// callq *%rax
+ /// \endcode
+ ///
+ /// is transformed to:
+ /// \code
+ /// leaq target_func(%rip), %rax
+ /// callq *__guard_dispatch_icall_fptr(%rip)
+ /// \endcode
+ ///
+ /// \param CB indirect call to instrument.
+ void insertCFGuardDispatch(CallBase *CB);
+
+ bool doInitialization(Module &M) override;
+ bool runOnFunction(Function &F) override;
+
+private:
+ // Only add checks if the module has the cfguard=2 flag.
+ int cfguard_module_flag = 0;
+ Mechanism GuardMechanism = CF_Check;
+ FunctionType *GuardFnType = nullptr;
+ PointerType *GuardFnPtrType = nullptr;
+ Constant *GuardFnGlobal = nullptr;
+};
+
+} // end anonymous namespace
+
+void CFGuard::insertCFGuardCheck(CallBase *CB) {
+
+ assert(Triple(CB->getModule()->getTargetTriple()).isOSWindows() &&
+ "Only applicable for Windows targets");
+ assert(CB->isIndirectCall() &&
+ "Control Flow Guard checks can only be added to indirect calls");
+
+ IRBuilder<> B(CB);
+ Value *CalledOperand = CB->getCalledOperand();
+
+ // Load the global symbol as a pointer to the check function.
+ LoadInst *GuardCheckLoad = B.CreateLoad(GuardFnPtrType, GuardFnGlobal);
+
+ // Create new call instruction. The CFGuard check should always be a call,
+ // even if the original CallBase is an Invoke or CallBr instruction.
+ CallInst *GuardCheck =
+ B.CreateCall(GuardFnType, GuardCheckLoad,
+ {B.CreateBitCast(CalledOperand, B.getInt8PtrTy())});
+
+ // Ensure that the first argument is passed in the correct register
+ // (e.g. ECX on 32-bit X86 targets).
+ GuardCheck->setCallingConv(CallingConv::CFGuard_Check);
+}
+
+void CFGuard::insertCFGuardDispatch(CallBase *CB) {
+
+ assert(Triple(CB->getModule()->getTargetTriple()).isOSWindows() &&
+ "Only applicable for Windows targets");
+ assert(CB->isIndirectCall() &&
+ "Control Flow Guard checks can only be added to indirect calls");
+
+ IRBuilder<> B(CB);
+ Value *CalledOperand = CB->getCalledOperand();
+ Type *CalledOperandType = CalledOperand->getType();
+
+ // Cast the guard dispatch global to the type of the called operand.
+ PointerType *PTy = PointerType::get(CalledOperandType, 0);
+ if (GuardFnGlobal->getType() != PTy)
+ GuardFnGlobal = ConstantExpr::getBitCast(GuardFnGlobal, PTy);
+
+ // Load the global as a pointer to a function of the same type.
+ LoadInst *GuardDispatchLoad = B.CreateLoad(CalledOperandType, GuardFnGlobal);
+
+ // Add the original call target as a cfguardtarget operand bundle.
+ SmallVector<llvm::OperandBundleDef, 1> Bundles;
+ CB->getOperandBundlesAsDefs(Bundles);
+ Bundles.emplace_back("cfguardtarget", CalledOperand);
+
+ // Create a copy of the call/invoke instruction and add the new bundle.
+ CallBase *NewCB;
+ if (CallInst *CI = dyn_cast<CallInst>(CB)) {
+ NewCB = CallInst::Create(CI, Bundles, CB);
+ } else {
+ assert(isa<InvokeInst>(CB) && "Unknown indirect call type");
+ InvokeInst *II = cast<InvokeInst>(CB);
+ NewCB = llvm::InvokeInst::Create(II, Bundles, CB);
+ }
+
+ // Change the target of the call to be the guard dispatch function.
+ NewCB->setCalledOperand(GuardDispatchLoad);
+
+ // Replace the original call/invoke with the new instruction.
+ CB->replaceAllUsesWith(NewCB);
+
+ // Delete the original call/invoke.
+ CB->eraseFromParent();
+}
+
+bool CFGuard::doInitialization(Module &M) {
+
+ // Check if this module has the cfguard flag and read its value.
+ if (auto *MD =
+ mdconst::extract_or_null<ConstantInt>(M.getModuleFlag("cfguard")))
+ cfguard_module_flag = MD->getZExtValue();
+
+ // Skip modules for which CFGuard checks have been disabled.
+ if (cfguard_module_flag != 2)
+ return false;
+
+ // Set up prototypes for the guard check and dispatch functions.
+ GuardFnType = FunctionType::get(Type::getVoidTy(M.getContext()),
+ {Type::getInt8PtrTy(M.getContext())}, false);
+ GuardFnPtrType = PointerType::get(GuardFnType, 0);
+
+ // Get or insert the guard check or dispatch global symbols.
+ if (GuardMechanism == CF_Check) {
+ GuardFnGlobal =
+ M.getOrInsertGlobal("__guard_check_icall_fptr", GuardFnPtrType);
+ } else {
+ assert(GuardMechanism == CF_Dispatch && "Invalid CFGuard mechanism");
+ GuardFnGlobal =
+ M.getOrInsertGlobal("__guard_dispatch_icall_fptr", GuardFnPtrType);
+ }
+
+ return true;
+}
+
+bool CFGuard::runOnFunction(Function &F) {
+
+ // Skip modules for which CFGuard checks have been disabled.
+ if (cfguard_module_flag != 2)
+ return false;
+
+ SmallVector<CallBase *, 8> IndirectCalls;
+
+ // Iterate over the instructions to find all indirect call/invoke/callbr
+ // instructions. Make a separate list of pointers to indirect
+ // call/invoke/callbr instructions because the original instructions will be
+ // deleted as the checks are added.
+ for (BasicBlock &BB : F.getBasicBlockList()) {
+ for (Instruction &I : BB.getInstList()) {
+ auto *CB = dyn_cast<CallBase>(&I);
+ if (CB && CB->isIndirectCall() && !CB->hasFnAttr("guard_nocf")) {
+ IndirectCalls.push_back(CB);
+ CFGuardCounter++;
+ }
+ }
+ }
+
+ // If no checks are needed, return early.
+ if (IndirectCalls.empty()) {
+ return false;
+ }
+
+ // For each indirect call/invoke, add the appropriate dispatch or check.
+ if (GuardMechanism == CF_Dispatch) {
+ for (CallBase *CB : IndirectCalls) {
+ insertCFGuardDispatch(CB);
+ }
+ } else {
+ for (CallBase *CB : IndirectCalls) {
+ insertCFGuardCheck(CB);
+ }
+ }
+
+ return true;
+}
+
+char CFGuard::ID = 0;
+INITIALIZE_PASS(CFGuard, "CFGuard", "CFGuard", false, false)
+
+FunctionPass *llvm::createCFGuardCheckPass() {
+ return new CFGuard(CFGuard::CF_Check);
+}
+
+FunctionPass *llvm::createCFGuardDispatchPass() {
+ return new CFGuard(CFGuard::CF_Dispatch);
+} \ No newline at end of file
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp
index c3e05577f044..c2dbd6f41642 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp
@@ -99,11 +99,11 @@ bool Lowerer::lowerRemainingCoroIntrinsics(Function &F) {
namespace {
-struct CoroCleanup : FunctionPass {
+struct CoroCleanupLegacy : FunctionPass {
static char ID; // Pass identification, replacement for typeid
- CoroCleanup() : FunctionPass(ID) {
- initializeCoroCleanupPass(*PassRegistry::getPassRegistry());
+ CoroCleanupLegacy() : FunctionPass(ID) {
+ initializeCoroCleanupLegacyPass(*PassRegistry::getPassRegistry());
}
std::unique_ptr<Lowerer> L;
@@ -132,8 +132,8 @@ struct CoroCleanup : FunctionPass {
};
}
-char CoroCleanup::ID = 0;
-INITIALIZE_PASS(CoroCleanup, "coro-cleanup",
+char CoroCleanupLegacy::ID = 0;
+INITIALIZE_PASS(CoroCleanupLegacy, "coro-cleanup",
"Lower all coroutine related intrinsics", false, false)
-Pass *llvm::createCoroCleanupPass() { return new CoroCleanup(); }
+Pass *llvm::createCoroCleanupLegacyPass() { return new CoroCleanupLegacy(); }
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroEarly.cpp b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroEarly.cpp
index 55993d33ee4e..e73fb9eeb1e9 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroEarly.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroEarly.cpp
@@ -22,7 +22,7 @@ using namespace llvm;
#define DEBUG_TYPE "coro-early"
namespace {
-// Created on demand if CoroEarly pass has work to do.
+// Created on demand if the coro-early pass has work to do.
class Lowerer : public coro::LowererBase {
IRBuilder<> Builder;
PointerType *const AnyResumeFnPtrTy;
@@ -225,10 +225,10 @@ bool Lowerer::lowerEarlyIntrinsics(Function &F) {
namespace {
-struct CoroEarly : public FunctionPass {
+struct CoroEarlyLegacy : public FunctionPass {
static char ID; // Pass identification, replacement for typeid.
- CoroEarly() : FunctionPass(ID) {
- initializeCoroEarlyPass(*PassRegistry::getPassRegistry());
+ CoroEarlyLegacy() : FunctionPass(ID) {
+ initializeCoroEarlyLegacyPass(*PassRegistry::getPassRegistry());
}
std::unique_ptr<Lowerer> L;
@@ -267,8 +267,8 @@ struct CoroEarly : public FunctionPass {
};
}
-char CoroEarly::ID = 0;
-INITIALIZE_PASS(CoroEarly, "coro-early", "Lower early coroutine intrinsics",
- false, false)
+char CoroEarlyLegacy::ID = 0;
+INITIALIZE_PASS(CoroEarlyLegacy, "coro-early",
+ "Lower early coroutine intrinsics", false, false)
-Pass *llvm::createCoroEarlyPass() { return new CoroEarly(); }
+Pass *llvm::createCoroEarlyLegacyPass() { return new CoroEarlyLegacy(); }
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroElide.cpp b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroElide.cpp
index aca77119023b..23d22e23861a 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroElide.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroElide.cpp
@@ -15,6 +15,7 @@
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/InstIterator.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/ErrorHandling.h"
@@ -23,7 +24,7 @@ using namespace llvm;
#define DEBUG_TYPE "coro-elide"
namespace {
-// Created on demand if CoroElide pass has work to do.
+// Created on demand if the coro-elide pass has work to do.
struct Lowerer : coro::LowererBase {
SmallVector<CoroIdInst *, 4> CoroIds;
SmallVector<CoroBeginInst *, 1> CoroBegins;
@@ -276,10 +277,10 @@ static bool replaceDevirtTrigger(Function &F) {
//===----------------------------------------------------------------------===//
namespace {
-struct CoroElide : FunctionPass {
+struct CoroElideLegacy : FunctionPass {
static char ID;
- CoroElide() : FunctionPass(ID) {
- initializeCoroElidePass(*PassRegistry::getPassRegistry());
+ CoroElideLegacy() : FunctionPass(ID) {
+ initializeCoroElideLegacyPass(*PassRegistry::getPassRegistry());
}
std::unique_ptr<Lowerer> L;
@@ -329,15 +330,15 @@ struct CoroElide : FunctionPass {
};
}
-char CoroElide::ID = 0;
+char CoroElideLegacy::ID = 0;
INITIALIZE_PASS_BEGIN(
- CoroElide, "coro-elide",
+ CoroElideLegacy, "coro-elide",
"Coroutine frame allocation elision and indirect calls replacement", false,
false)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_END(
- CoroElide, "coro-elide",
+ CoroElideLegacy, "coro-elide",
"Coroutine frame allocation elision and indirect calls replacement", false,
false)
-Pass *llvm::createCoroElidePass() { return new CoroElide(); }
+Pass *llvm::createCoroElideLegacyPass() { return new CoroElideLegacy(); }
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroInternal.h b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroInternal.h
index c151474316f9..7eb35400c0d5 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroInternal.h
+++ b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroInternal.h
@@ -21,10 +21,10 @@ class CallGraph;
class CallGraphSCC;
class PassRegistry;
-void initializeCoroEarlyPass(PassRegistry &);
-void initializeCoroSplitPass(PassRegistry &);
-void initializeCoroElidePass(PassRegistry &);
-void initializeCoroCleanupPass(PassRegistry &);
+void initializeCoroEarlyLegacyPass(PassRegistry &);
+void initializeCoroSplitLegacyPass(PassRegistry &);
+void initializeCoroElideLegacyPass(PassRegistry &);
+void initializeCoroCleanupLegacyPass(PassRegistry &);
// CoroEarly pass marks every function that has coro.begin with a string
// attribute "coroutine.presplit"="0". CoroSplit pass processes the coroutine
@@ -43,7 +43,8 @@ void initializeCoroCleanupPass(PassRegistry &);
namespace coro {
-bool declaresIntrinsics(Module &M, std::initializer_list<StringRef>);
+bool declaresIntrinsics(const Module &M,
+ const std::initializer_list<StringRef>);
void replaceAllCoroAllocs(CoroBeginInst *CB, bool Replacement);
void replaceAllCoroFrees(CoroBeginInst *CB, Value *Replacement);
void replaceCoroFree(CoroIdInst *CoroId, bool Elide);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
index 04723cbde417..66cb3e74e53e 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
@@ -27,7 +27,6 @@
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/CallGraphSCCPass.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
@@ -52,6 +51,7 @@
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/Verifier.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
@@ -60,6 +60,7 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
#include <cassert>
#include <cstddef>
@@ -157,8 +158,9 @@ private:
} // end anonymous namespace
-static void maybeFreeRetconStorage(IRBuilder<> &Builder, coro::Shape &Shape,
- Value *FramePtr, CallGraph *CG) {
+static void maybeFreeRetconStorage(IRBuilder<> &Builder,
+ const coro::Shape &Shape, Value *FramePtr,
+ CallGraph *CG) {
assert(Shape.ABI == coro::ABI::Retcon ||
Shape.ABI == coro::ABI::RetconOnce);
if (Shape.RetconLowering.IsFrameInlineInStorage)
@@ -168,9 +170,9 @@ static void maybeFreeRetconStorage(IRBuilder<> &Builder, coro::Shape &Shape,
}
/// Replace a non-unwind call to llvm.coro.end.
-static void replaceFallthroughCoroEnd(CoroEndInst *End, coro::Shape &Shape,
- Value *FramePtr, bool InResume,
- CallGraph *CG) {
+static void replaceFallthroughCoroEnd(CoroEndInst *End,
+ const coro::Shape &Shape, Value *FramePtr,
+ bool InResume, CallGraph *CG) {
// Start inserting right before the coro.end.
IRBuilder<> Builder(End);
@@ -218,7 +220,7 @@ static void replaceFallthroughCoroEnd(CoroEndInst *End, coro::Shape &Shape,
}
/// Replace an unwind call to llvm.coro.end.
-static void replaceUnwindCoroEnd(CoroEndInst *End, coro::Shape &Shape,
+static void replaceUnwindCoroEnd(CoroEndInst *End, const coro::Shape &Shape,
Value *FramePtr, bool InResume, CallGraph *CG){
IRBuilder<> Builder(End);
@@ -245,7 +247,7 @@ static void replaceUnwindCoroEnd(CoroEndInst *End, coro::Shape &Shape,
}
}
-static void replaceCoroEnd(CoroEndInst *End, coro::Shape &Shape,
+static void replaceCoroEnd(CoroEndInst *End, const coro::Shape &Shape,
Value *FramePtr, bool InResume, CallGraph *CG) {
if (End->isUnwind())
replaceUnwindCoroEnd(End, Shape, FramePtr, InResume, CG);
@@ -781,7 +783,7 @@ static Function *createClone(Function &F, const Twine &Suffix,
}
/// Remove calls to llvm.coro.end in the original function.
-static void removeCoroEnds(coro::Shape &Shape, CallGraph *CG) {
+static void removeCoroEnds(const coro::Shape &Shape, CallGraph *CG) {
for (auto End : Shape.CoroEnds) {
replaceCoroEnd(End, Shape, Shape.FramePtr, /*in resume*/ false, CG);
}
@@ -906,17 +908,29 @@ scanPHIsAndUpdateValueMap(Instruction *Prev, BasicBlock *NewBlock,
// values and select the correct case successor when possible.
static bool simplifyTerminatorLeadingToRet(Instruction *InitialInst) {
DenseMap<Value *, Value *> ResolvedValues;
+ BasicBlock *UnconditionalSucc = nullptr;
Instruction *I = InitialInst;
while (I->isTerminator()) {
if (isa<ReturnInst>(I)) {
- if (I != InitialInst)
+ if (I != InitialInst) {
+ // If InitialInst is an unconditional branch,
+ // remove PHI values that come from basic block of InitialInst
+ if (UnconditionalSucc)
+ for (PHINode &PN : UnconditionalSucc->phis()) {
+ int idx = PN.getBasicBlockIndex(InitialInst->getParent());
+ if (idx != -1)
+ PN.removeIncomingValue(idx);
+ }
ReplaceInstWithInst(InitialInst, I->clone());
+ }
return true;
}
if (auto *BR = dyn_cast<BranchInst>(I)) {
if (BR->isUnconditional()) {
BasicBlock *BB = BR->getSuccessor(0);
+ if (I == InitialInst)
+ UnconditionalSucc = BB;
scanPHIsAndUpdateValueMap(I, BB, ResolvedValues);
I = BB->getFirstNonPHIOrDbgOrLifetime();
continue;
@@ -1407,9 +1421,10 @@ static void prepareForSplit(Function &F, CallGraph &CG) {
CG[&F]->addCalledFunction(IndirectCall, CG.getCallsExternalNode());
}
-// Make sure that there is a devirtualization trigger function that CoroSplit
-// pass uses the force restart CGSCC pipeline. If devirt trigger function is not
-// found, we will create one and add it to the current SCC.
+// Make sure that there is a devirtualization trigger function that the
+// coro-split pass uses to force a restart of the CGSCC pipeline. If the devirt
+// trigger function is not found, we will create one and add it to the current
+// SCC.
static void createDevirtTriggerFunc(CallGraph &CG, CallGraphSCC &SCC) {
Module &M = CG.getModule();
if (M.getFunction(CORO_DEVIRT_TRIGGER_FN))
@@ -1512,11 +1527,11 @@ static bool replaceAllPrepares(Function *PrepareFn, CallGraph &CG) {
namespace {
-struct CoroSplit : public CallGraphSCCPass {
+struct CoroSplitLegacy : public CallGraphSCCPass {
static char ID; // Pass identification, replacement for typeid
- CoroSplit() : CallGraphSCCPass(ID) {
- initializeCoroSplitPass(*PassRegistry::getPassRegistry());
+ CoroSplitLegacy() : CallGraphSCCPass(ID) {
+ initializeCoroSplitLegacyPass(*PassRegistry::getPassRegistry());
}
bool Run = false;
@@ -1586,16 +1601,16 @@ struct CoroSplit : public CallGraphSCCPass {
} // end anonymous namespace
-char CoroSplit::ID = 0;
+char CoroSplitLegacy::ID = 0;
INITIALIZE_PASS_BEGIN(
- CoroSplit, "coro-split",
+ CoroSplitLegacy, "coro-split",
"Split coroutine into a set of functions driving its state machine", false,
false)
INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
INITIALIZE_PASS_END(
- CoroSplit, "coro-split",
+ CoroSplitLegacy, "coro-split",
"Split coroutine into a set of functions driving its state machine", false,
false)
-Pass *llvm::createCoroSplitPass() { return new CoroSplit(); }
+Pass *llvm::createCoroSplitLegacyPass() { return new CoroSplitLegacy(); }
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/Coroutines.cpp b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/Coroutines.cpp
index f39483b27518..02d11af3303f 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/Coroutines.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/Coroutines.cpp
@@ -11,14 +11,13 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Coroutines.h"
-#include "llvm-c/Transforms/Coroutines.h"
#include "CoroInstr.h"
#include "CoroInternal.h"
+#include "llvm-c/Transforms/Coroutines.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/CallGraphSCCPass.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
@@ -31,10 +30,12 @@
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
+#include "llvm/Transforms/Utils/Local.h"
#include <cassert>
#include <cstddef>
#include <utility>
@@ -42,39 +43,39 @@
using namespace llvm;
void llvm::initializeCoroutines(PassRegistry &Registry) {
- initializeCoroEarlyPass(Registry);
- initializeCoroSplitPass(Registry);
- initializeCoroElidePass(Registry);
- initializeCoroCleanupPass(Registry);
+ initializeCoroEarlyLegacyPass(Registry);
+ initializeCoroSplitLegacyPass(Registry);
+ initializeCoroElideLegacyPass(Registry);
+ initializeCoroCleanupLegacyPass(Registry);
}
static void addCoroutineOpt0Passes(const PassManagerBuilder &Builder,
legacy::PassManagerBase &PM) {
- PM.add(createCoroSplitPass());
- PM.add(createCoroElidePass());
+ PM.add(createCoroSplitLegacyPass());
+ PM.add(createCoroElideLegacyPass());
PM.add(createBarrierNoopPass());
- PM.add(createCoroCleanupPass());
+ PM.add(createCoroCleanupLegacyPass());
}
static void addCoroutineEarlyPasses(const PassManagerBuilder &Builder,
legacy::PassManagerBase &PM) {
- PM.add(createCoroEarlyPass());
+ PM.add(createCoroEarlyLegacyPass());
}
static void addCoroutineScalarOptimizerPasses(const PassManagerBuilder &Builder,
legacy::PassManagerBase &PM) {
- PM.add(createCoroElidePass());
+ PM.add(createCoroElideLegacyPass());
}
static void addCoroutineSCCPasses(const PassManagerBuilder &Builder,
legacy::PassManagerBase &PM) {
- PM.add(createCoroSplitPass());
+ PM.add(createCoroSplitLegacyPass());
}
static void addCoroutineOptimizerLastPasses(const PassManagerBuilder &Builder,
legacy::PassManagerBase &PM) {
- PM.add(createCoroCleanupPass());
+ PM.add(createCoroCleanupLegacyPass());
}
void llvm::addCoroutinePassesToExtensionPoints(PassManagerBuilder &Builder) {
@@ -150,8 +151,8 @@ static bool isCoroutineIntrinsicName(StringRef Name) {
// Verifies if a module has named values listed. Also, in debug mode verifies
// that names are intrinsic names.
-bool coro::declaresIntrinsics(Module &M,
- std::initializer_list<StringRef> List) {
+bool coro::declaresIntrinsics(const Module &M,
+ const std::initializer_list<StringRef> List) {
for (StringRef Name : List) {
assert(isCoroutineIntrinsicName(Name) && "not a coroutine intrinsic");
if (M.getNamedValue(Name))
@@ -634,17 +635,17 @@ void AnyCoroIdRetconInst::checkWellFormed() const {
}
void LLVMAddCoroEarlyPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createCoroEarlyPass());
+ unwrap(PM)->add(createCoroEarlyLegacyPass());
}
void LLVMAddCoroSplitPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createCoroSplitPass());
+ unwrap(PM)->add(createCoroSplitLegacyPass());
}
void LLVMAddCoroElidePass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createCoroElidePass());
+ unwrap(PM)->add(createCoroElideLegacyPass());
}
void LLVMAddCoroCleanupPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createCoroCleanupPass());
+ unwrap(PM)->add(createCoroCleanupLegacyPass());
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/AlwaysInliner.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
index c50805692b98..06d1763353f4 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
@@ -22,6 +22,7 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/Inliner.h"
#include "llvm/Transforms/Utils/Cloning.h"
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
index dd9f74a881ee..cdf8a2eb598e 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -70,6 +70,7 @@
#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
@@ -386,8 +387,9 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
// Just add all the struct element types.
Type *AgTy = cast<PointerType>(I->getType())->getElementType();
- Value *TheAlloca = new AllocaInst(AgTy, DL.getAllocaAddrSpace(), nullptr,
- I->getParamAlignment(), "", InsertPt);
+ Value *TheAlloca =
+ new AllocaInst(AgTy, DL.getAllocaAddrSpace(), nullptr,
+ MaybeAlign(I->getParamAlignment()), "", InsertPt);
StructType *STy = cast<StructType>(AgTy);
Value *Idxs[2] = {ConstantInt::get(Type::getInt32Ty(F->getContext()), 0),
nullptr};
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/Attributor.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/Attributor.cpp
index 95f47345d8fd..f2995817eaf8 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/Attributor.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/Attributor.cpp
@@ -23,14 +23,18 @@
#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/LazyValueInfo.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Verifier.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -53,6 +57,8 @@ STATISTIC(NumAttributesValidFixpoint,
"Number of abstract attributes in a valid fixpoint state");
STATISTIC(NumAttributesManifested,
"Number of abstract attributes manifested in IR");
+STATISTIC(NumAttributesFixedDueToRequiredDependences,
+ "Number of abstract attributes fixed due to required dependences");
// Some helper macros to deal with statistics tracking.
//
@@ -98,6 +104,36 @@ STATISTIC(NumAttributesManifested,
STATS_DECLTRACK(NAME, Floating, \
("Number of floating values known to be '" #NAME "'"))
+// Specialization of the operator<< for abstract attributes subclasses. This
+// disambiguates situations where multiple operators are applicable.
+namespace llvm {
+#define PIPE_OPERATOR(CLASS) \
+ raw_ostream &operator<<(raw_ostream &OS, const CLASS &AA) { \
+ return OS << static_cast<const AbstractAttribute &>(AA); \
+ }
+
+PIPE_OPERATOR(AAIsDead)
+PIPE_OPERATOR(AANoUnwind)
+PIPE_OPERATOR(AANoSync)
+PIPE_OPERATOR(AANoRecurse)
+PIPE_OPERATOR(AAWillReturn)
+PIPE_OPERATOR(AANoReturn)
+PIPE_OPERATOR(AAReturnedValues)
+PIPE_OPERATOR(AANonNull)
+PIPE_OPERATOR(AANoAlias)
+PIPE_OPERATOR(AADereferenceable)
+PIPE_OPERATOR(AAAlign)
+PIPE_OPERATOR(AANoCapture)
+PIPE_OPERATOR(AAValueSimplify)
+PIPE_OPERATOR(AANoFree)
+PIPE_OPERATOR(AAHeapToStack)
+PIPE_OPERATOR(AAReachability)
+PIPE_OPERATOR(AAMemoryBehavior)
+PIPE_OPERATOR(AAValueConstantRange)
+
+#undef PIPE_OPERATOR
+} // namespace llvm
+
// TODO: Determine a good default value.
//
// In the LLVM-TS and SPEC2006, 32 seems to not induce compile time overheads
@@ -120,6 +156,10 @@ static cl::opt<bool> DisableAttributor(
cl::desc("Disable the attributor inter-procedural deduction pass."),
cl::init(true));
+static cl::opt<bool> AnnotateDeclarationCallSites(
+ "attributor-annotate-decl-cs", cl::Hidden,
+ cl::desc("Annotate call sites of function declarations."), cl::init(false));
+
static cl::opt<bool> ManifestInternal(
"attributor-manifest-internal", cl::Hidden,
cl::desc("Manifest Attributor internal string attributes."),
@@ -147,6 +187,74 @@ ChangeStatus llvm::operator&(ChangeStatus l, ChangeStatus r) {
}
///}
+Argument *IRPosition::getAssociatedArgument() const {
+ if (getPositionKind() == IRP_ARGUMENT)
+ return cast<Argument>(&getAnchorValue());
+
+ // Not an Argument and no argument number means this is not a call site
+ // argument, thus we cannot find a callback argument to return.
+ int ArgNo = getArgNo();
+ if (ArgNo < 0)
+ return nullptr;
+
+ // Use abstract call sites to make the connection between the call site
+ // values and the ones in callbacks. If a callback was found that makes use
+ // of the underlying call site operand, we want the corresponding callback
+ // callee argument and not the direct callee argument.
+ Optional<Argument *> CBCandidateArg;
+ SmallVector<const Use *, 4> CBUses;
+ ImmutableCallSite ICS(&getAnchorValue());
+ AbstractCallSite::getCallbackUses(ICS, CBUses);
+ for (const Use *U : CBUses) {
+ AbstractCallSite ACS(U);
+ assert(ACS && ACS.isCallbackCall());
+ if (!ACS.getCalledFunction())
+ continue;
+
+ for (unsigned u = 0, e = ACS.getNumArgOperands(); u < e; u++) {
+
+ // Test if the underlying call site operand is argument number u of the
+ // callback callee.
+ if (ACS.getCallArgOperandNo(u) != ArgNo)
+ continue;
+
+ assert(ACS.getCalledFunction()->arg_size() > u &&
+ "ACS mapped into var-args arguments!");
+ if (CBCandidateArg.hasValue()) {
+ CBCandidateArg = nullptr;
+ break;
+ }
+ CBCandidateArg = ACS.getCalledFunction()->getArg(u);
+ }
+ }
+
+ // If we found a unique callback candidate argument, return it.
+ if (CBCandidateArg.hasValue() && CBCandidateArg.getValue())
+ return CBCandidateArg.getValue();
+
+ // If no callbacks were found, or none used the underlying call site operand
+ // exclusively, use the direct callee argument if available.
+ const Function *Callee = ICS.getCalledFunction();
+ if (Callee && Callee->arg_size() > unsigned(ArgNo))
+ return Callee->getArg(ArgNo);
+
+ return nullptr;
+}
+
+/// For calls (and invokes) we will only replace instruction uses to not disturb
+/// the old style call graph.
+/// TODO: Remove this once we get rid of the old PM.
+static void replaceAllInstructionUsesWith(Value &Old, Value &New) {
+ if (!isa<CallBase>(Old))
+ return Old.replaceAllUsesWith(&New);
+ SmallVector<Use *, 8> Uses;
+ for (Use &U : Old.uses())
+ if (isa<Instruction>(U.getUser()))
+ Uses.push_back(&U);
+ for (Use *U : Uses)
+ U->set(&New);
+}
+
/// Recursively visit all values that might become \p IRP at some point. This
/// will be done by looking through cast instructions, selects, phis, and calls
/// with the "returned" attribute. Once we cannot look through the value any
@@ -234,7 +342,7 @@ static bool genericValueTraversal(
// If we actually used liveness information so we have to record a dependence.
if (AnyDead)
- A.recordDependence(*LivenessAA, QueryingAA);
+ A.recordDependence(*LivenessAA, QueryingAA, DepClassTy::OPTIONAL);
// All values have been visited.
return true;
@@ -282,34 +390,18 @@ static bool addIfNotExistent(LLVMContext &Ctx, const Attribute &Attr,
llvm_unreachable("Expected enum or string attribute!");
}
-static const Value *getPointerOperand(const Instruction *I) {
- if (auto *LI = dyn_cast<LoadInst>(I))
- if (!LI->isVolatile())
- return LI->getPointerOperand();
- if (auto *SI = dyn_cast<StoreInst>(I))
- if (!SI->isVolatile())
- return SI->getPointerOperand();
-
- if (auto *CXI = dyn_cast<AtomicCmpXchgInst>(I))
- if (!CXI->isVolatile())
- return CXI->getPointerOperand();
-
- if (auto *RMWI = dyn_cast<AtomicRMWInst>(I))
- if (!RMWI->isVolatile())
- return RMWI->getPointerOperand();
-
- return nullptr;
-}
-static const Value *getBasePointerOfAccessPointerOperand(const Instruction *I,
- int64_t &BytesOffset,
- const DataLayout &DL) {
- const Value *Ptr = getPointerOperand(I);
+static const Value *
+getBasePointerOfAccessPointerOperand(const Instruction *I, int64_t &BytesOffset,
+ const DataLayout &DL,
+ bool AllowNonInbounds = false) {
+ const Value *Ptr =
+ Attributor::getPointerOperand(I, /* AllowVolatile */ false);
if (!Ptr)
return nullptr;
return GetPointerBaseWithConstantOffset(Ptr, BytesOffset, DL,
- /*AllowNonInbounds*/ false);
+ AllowNonInbounds);
}
ChangeStatus AbstractAttribute::update(Attributor &A) {
@@ -328,7 +420,7 @@ ChangeStatus AbstractAttribute::update(Attributor &A) {
}
ChangeStatus
-IRAttributeManifest::manifestAttrs(Attributor &A, IRPosition &IRP,
+IRAttributeManifest::manifestAttrs(Attributor &A, const IRPosition &IRP,
const ArrayRef<Attribute> &DeducedAttrs) {
Function *ScopeFn = IRP.getAssociatedFunction();
IRPosition::Kind PK = IRP.getPositionKind();
@@ -457,13 +549,20 @@ bool IRPosition::hasAttr(ArrayRef<Attribute::AttrKind> AKs,
}
void IRPosition::getAttrs(ArrayRef<Attribute::AttrKind> AKs,
- SmallVectorImpl<Attribute> &Attrs) const {
- for (const IRPosition &EquivIRP : SubsumingPositionIterator(*this))
+ SmallVectorImpl<Attribute> &Attrs,
+ bool IgnoreSubsumingPositions) const {
+ for (const IRPosition &EquivIRP : SubsumingPositionIterator(*this)) {
for (Attribute::AttrKind AK : AKs) {
const Attribute &Attr = EquivIRP.getAttr(AK);
if (Attr.getKindAsEnum() == AK)
Attrs.push_back(Attr);
}
+ // The first position returned by the SubsumingPositionIterator is
+ // always the position itself. If we ignore subsuming positions we
+ // are done after the first iteration.
+ if (IgnoreSubsumingPositions)
+ break;
+ }
}
void IRPosition::verify() {
@@ -517,38 +616,24 @@ void IRPosition::verify() {
}
namespace {
-/// Helper functions to clamp a state \p S of type \p StateType with the
+/// Helper function to clamp a state \p S of type \p StateType with the
/// information in \p R and indicate/return if \p S did change (as-in update is
/// required to be run again).
-///
-///{
template <typename StateType>
-ChangeStatus clampStateAndIndicateChange(StateType &S, const StateType &R);
-
-template <>
-ChangeStatus clampStateAndIndicateChange<IntegerState>(IntegerState &S,
- const IntegerState &R) {
+ChangeStatus clampStateAndIndicateChange(StateType &S, const StateType &R) {
auto Assumed = S.getAssumed();
S ^= R;
return Assumed == S.getAssumed() ? ChangeStatus::UNCHANGED
: ChangeStatus::CHANGED;
}
-template <>
-ChangeStatus clampStateAndIndicateChange<BooleanState>(BooleanState &S,
- const BooleanState &R) {
- return clampStateAndIndicateChange<IntegerState>(S, R);
-}
-///}
-
/// Clamp the information known for all returned values of a function
/// (identified by \p QueryingAA) into \p S.
template <typename AAType, typename StateType = typename AAType::StateType>
static void clampReturnedValueStates(Attributor &A, const AAType &QueryingAA,
StateType &S) {
LLVM_DEBUG(dbgs() << "[Attributor] Clamp return value states for "
- << static_cast<const AbstractAttribute &>(QueryingAA)
- << " into " << S << "\n");
+ << QueryingAA << " into " << S << "\n");
assert((QueryingAA.getIRPosition().getPositionKind() ==
IRPosition::IRP_RETURNED ||
@@ -593,7 +678,8 @@ struct AAComposeTwoGenericDeduction
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
- ChangeStatus ChangedF = F<AAType, G<AAType, Base, StateType>, StateType>::updateImpl(A);
+ ChangeStatus ChangedF =
+ F<AAType, G<AAType, Base, StateType>, StateType>::updateImpl(A);
ChangeStatus ChangedG = G<AAType, Base, StateType>::updateImpl(A);
return ChangedF | ChangedG;
}
@@ -621,8 +707,7 @@ template <typename AAType, typename StateType = typename AAType::StateType>
static void clampCallSiteArgumentStates(Attributor &A, const AAType &QueryingAA,
StateType &S) {
LLVM_DEBUG(dbgs() << "[Attributor] Clamp call site argument states for "
- << static_cast<const AbstractAttribute &>(QueryingAA)
- << " into " << S << "\n");
+ << QueryingAA << " into " << S << "\n");
assert(QueryingAA.getIRPosition().getPositionKind() ==
IRPosition::IRP_ARGUMENT &&
@@ -718,7 +803,7 @@ struct AAFromMustBeExecutedContext : public Base {
void initialize(Attributor &A) override {
Base::initialize(A);
- IRPosition &IRP = this->getIRPosition();
+ const IRPosition &IRP = this->getIRPosition();
Instruction *CtxI = IRP.getCtxI();
if (!CtxI)
@@ -739,21 +824,16 @@ struct AAFromMustBeExecutedContext : public Base {
MustBeExecutedContextExplorer &Explorer =
A.getInfoCache().getMustBeExecutedContextExplorer();
- SetVector<const Use *> NextUses;
-
- for (const Use *U : Uses) {
+ auto EIt = Explorer.begin(CtxI), EEnd = Explorer.end(CtxI);
+ for (unsigned u = 0; u < Uses.size(); ++u) {
+ const Use *U = Uses[u];
if (const Instruction *UserI = dyn_cast<Instruction>(U->getUser())) {
- auto EIt = Explorer.begin(CtxI), EEnd = Explorer.end(CtxI);
- bool Found = EIt.count(UserI);
- while (!Found && ++EIt != EEnd)
- Found = EIt.getCurrentInst() == UserI;
+ bool Found = Explorer.findInContextOf(UserI, EIt, EEnd);
if (Found && Base::followUse(A, U, UserI))
for (const Use &Us : UserI->uses())
- NextUses.insert(&Us);
+ Uses.insert(&Us);
}
}
- for (const Use *U : NextUses)
- Uses.insert(U);
return BeforeState == S ? ChangeStatus::UNCHANGED : ChangeStatus::CHANGED;
}
@@ -994,13 +1074,15 @@ ChangeStatus AAReturnedValuesImpl::manifest(Attributor &A) {
auto ReplaceCallSiteUsersWith = [](CallBase &CB, Constant &C) {
if (CB.getNumUses() == 0 || CB.isMustTailCall())
return ChangeStatus::UNCHANGED;
- CB.replaceAllUsesWith(&C);
+ replaceAllInstructionUsesWith(CB, C);
return ChangeStatus::CHANGED;
};
// If the assumed unique return value is an argument, annotate it.
if (auto *UniqueRVArg = dyn_cast<Argument>(UniqueRV.getValue())) {
- getIRPosition() = IRPosition::argument(*UniqueRVArg);
+ // TODO: This should be handled differently!
+ this->AnchorVal = UniqueRVArg;
+ this->KindOrArgNo = UniqueRVArg->getArgNo();
Changed = IRAttribute::manifest(A);
} else if (auto *RVC = dyn_cast<Constant>(UniqueRV.getValue())) {
// We can replace the returned value with the unique returned constant.
@@ -1010,14 +1092,18 @@ ChangeStatus AAReturnedValuesImpl::manifest(Attributor &A) {
if (CallBase *CB = dyn_cast<CallBase>(U.getUser()))
if (CB->isCallee(&U)) {
Constant *RVCCast =
- ConstantExpr::getTruncOrBitCast(RVC, CB->getType());
+ CB->getType() == RVC->getType()
+ ? RVC
+ : ConstantExpr::getTruncOrBitCast(RVC, CB->getType());
Changed = ReplaceCallSiteUsersWith(*CB, *RVCCast) | Changed;
}
} else {
assert(isa<CallBase>(AnchorValue) &&
"Expcected a function or call base anchor!");
Constant *RVCCast =
- ConstantExpr::getTruncOrBitCast(RVC, AnchorValue.getType());
+ AnchorValue.getType() == RVC->getType()
+ ? RVC
+ : ConstantExpr::getTruncOrBitCast(RVC, AnchorValue.getType());
Changed = ReplaceCallSiteUsersWith(cast<CallBase>(AnchorValue), *RVCCast);
}
if (Changed == ChangeStatus::CHANGED)
@@ -1157,8 +1243,7 @@ ChangeStatus AAReturnedValuesImpl::updateImpl(Attributor &A) {
const auto &RetValAA = A.getAAFor<AAReturnedValues>(
*this, IRPosition::function(*CB->getCalledFunction()));
LLVM_DEBUG(dbgs() << "[AAReturnedValues] Found another AAReturnedValues: "
- << static_cast<const AbstractAttribute &>(RetValAA)
- << "\n");
+ << RetValAA << "\n");
// Skip dead ends, thus if we do not know anything about the returned
// call we mark it as unresolved and it will stay that way.
@@ -1393,7 +1478,7 @@ ChangeStatus AANoSyncImpl::updateImpl(Attributor &A) {
auto CheckRWInstForNoSync = [&](Instruction &I) {
/// We are looking for volatile instructions or Non-Relaxed atomics.
- /// FIXME: We should ipmrove the handling of intrinsics.
+ /// FIXME: We should improve the handling of intrinsics.
if (isa<IntrinsicInst>(&I) && isNoSyncIntrinsic(&I))
return true;
@@ -1532,6 +1617,115 @@ struct AANoFreeCallSite final : AANoFreeImpl {
void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(nofree); }
};
+/// NoFree attribute for floating values.
+struct AANoFreeFloating : AANoFreeImpl {
+ AANoFreeFloating(const IRPosition &IRP) : AANoFreeImpl(IRP) {}
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override{STATS_DECLTRACK_FLOATING_ATTR(nofree)}
+
+ /// See Abstract Attribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ const IRPosition &IRP = getIRPosition();
+
+ const auto &NoFreeAA =
+ A.getAAFor<AANoFree>(*this, IRPosition::function_scope(IRP));
+ if (NoFreeAA.isAssumedNoFree())
+ return ChangeStatus::UNCHANGED;
+
+ Value &AssociatedValue = getIRPosition().getAssociatedValue();
+ auto Pred = [&](const Use &U, bool &Follow) -> bool {
+ Instruction *UserI = cast<Instruction>(U.getUser());
+ if (auto *CB = dyn_cast<CallBase>(UserI)) {
+ if (CB->isBundleOperand(&U))
+ return false;
+ if (!CB->isArgOperand(&U))
+ return true;
+ unsigned ArgNo = CB->getArgOperandNo(&U);
+
+ const auto &NoFreeArg = A.getAAFor<AANoFree>(
+ *this, IRPosition::callsite_argument(*CB, ArgNo));
+ return NoFreeArg.isAssumedNoFree();
+ }
+
+ if (isa<GetElementPtrInst>(UserI) || isa<BitCastInst>(UserI) ||
+ isa<PHINode>(UserI) || isa<SelectInst>(UserI)) {
+ Follow = true;
+ return true;
+ }
+
+ // Unknown user.
+ return false;
+ };
+ if (!A.checkForAllUses(Pred, *this, AssociatedValue))
+ return indicatePessimisticFixpoint();
+
+ return ChangeStatus::UNCHANGED;
+ }
+};
+
+/// NoFree attribute for a call site argument.
+struct AANoFreeArgument final : AANoFreeFloating {
+ AANoFreeArgument(const IRPosition &IRP) : AANoFreeFloating(IRP) {}
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_ARG_ATTR(nofree) }
+};
+
+/// NoFree attribute for call site arguments.
+struct AANoFreeCallSiteArgument final : AANoFreeFloating {
+ AANoFreeCallSiteArgument(const IRPosition &IRP) : AANoFreeFloating(IRP) {}
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ // TODO: Once we have call site specific value information we can provide
+ // call site specific liveness information and then it makes
+ // sense to specialize attributes for call sites arguments instead of
+ // redirecting requests to the callee argument.
+ Argument *Arg = getAssociatedArgument();
+ if (!Arg)
+ return indicatePessimisticFixpoint();
+ const IRPosition &ArgPos = IRPosition::argument(*Arg);
+ auto &ArgAA = A.getAAFor<AANoFree>(*this, ArgPos);
+ return clampStateAndIndicateChange(
+ getState(), static_cast<const AANoFree::StateType &>(ArgAA.getState()));
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override{STATS_DECLTRACK_CSARG_ATTR(nofree)};
+};
+
+/// NoFree attribute for function return value.
+struct AANoFreeReturned final : AANoFreeFloating {
+ AANoFreeReturned(const IRPosition &IRP) : AANoFreeFloating(IRP) {
+ llvm_unreachable("NoFree is not applicable to function returns!");
+ }
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ llvm_unreachable("NoFree is not applicable to function returns!");
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ llvm_unreachable("NoFree is not applicable to function returns!");
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {}
+};
+
+/// NoFree attribute deduction for a call site return value.
+struct AANoFreeCallSiteReturned final : AANoFreeFloating {
+ AANoFreeCallSiteReturned(const IRPosition &IRP) : AANoFreeFloating(IRP) {}
+
+ ChangeStatus manifest(Attributor &A) override {
+ return ChangeStatus::UNCHANGED;
+ }
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_CSRET_ATTR(nofree) }
+};
+
/// ------------------------ NonNull Argument Attribute ------------------------
static int64_t getKnownNonNullAndDerefBytesForUse(
Attributor &A, AbstractAttribute &QueryingAA, Value &AssociatedValue,
@@ -1558,30 +1752,49 @@ static int64_t getKnownNonNullAndDerefBytesForUse(
unsigned ArgNo = ICS.getArgumentNo(U);
IRPosition IRP = IRPosition::callsite_argument(ICS, ArgNo);
- auto &DerefAA = A.getAAFor<AADereferenceable>(QueryingAA, IRP);
+ // As long as we only use known information there is no need to track
+ // dependences here.
+ auto &DerefAA = A.getAAFor<AADereferenceable>(QueryingAA, IRP,
+ /* TrackDependence */ false);
IsNonNull |= DerefAA.isKnownNonNull();
return DerefAA.getKnownDereferenceableBytes();
}
+ // We need to follow common pointer manipulation uses to the accesses they
+ // feed into. We can try to be smart to avoid looking through things we do not
+ // like for now, e.g., non-inbounds GEPs.
+ if (isa<CastInst>(I)) {
+ TrackUse = true;
+ return 0;
+ }
+ if (auto *GEP = dyn_cast<GetElementPtrInst>(I))
+ if (GEP->hasAllConstantIndices()) {
+ TrackUse = true;
+ return 0;
+ }
+
int64_t Offset;
if (const Value *Base = getBasePointerOfAccessPointerOperand(I, Offset, DL)) {
- if (Base == &AssociatedValue && getPointerOperand(I) == UseV) {
+ if (Base == &AssociatedValue &&
+ Attributor::getPointerOperand(I, /* AllowVolatile */ false) == UseV) {
int64_t DerefBytes =
- Offset + (int64_t)DL.getTypeStoreSize(PtrTy->getPointerElementType());
+ (int64_t)DL.getTypeStoreSize(PtrTy->getPointerElementType()) + Offset;
IsNonNull |= !NullPointerIsDefined;
- return DerefBytes;
+ return std::max(int64_t(0), DerefBytes);
}
}
- if (const Value *Base =
- GetPointerBaseWithConstantOffset(UseV, Offset, DL,
- /*AllowNonInbounds*/ false)) {
- auto &DerefAA =
- A.getAAFor<AADereferenceable>(QueryingAA, IRPosition::value(*Base));
- IsNonNull |= (!NullPointerIsDefined && DerefAA.isKnownNonNull());
- IsNonNull |= (!NullPointerIsDefined && (Offset != 0));
- int64_t DerefBytes = DerefAA.getKnownDereferenceableBytes();
- return std::max(int64_t(0), DerefBytes - Offset);
+
+ /// Corner case when an offset is 0.
+ if (const Value *Base = getBasePointerOfAccessPointerOperand(
+ I, Offset, DL, /*AllowNonInbounds*/ true)) {
+ if (Offset == 0 && Base == &AssociatedValue &&
+ Attributor::getPointerOperand(I, /* AllowVolatile */ false) == UseV) {
+ int64_t DerefBytes =
+ (int64_t)DL.getTypeStoreSize(PtrTy->getPointerElementType());
+ IsNonNull |= !NullPointerIsDefined;
+ return std::max(int64_t(0), DerefBytes);
+ }
}
return 0;
@@ -1599,6 +1812,8 @@ struct AANonNullImpl : AANonNull {
if (!NullIsDefined &&
hasAttr({Attribute::NonNull, Attribute::Dereferenceable}))
indicateOptimisticFixpoint();
+ else if (isa<ConstantPointerNull>(getAssociatedValue()))
+ indicatePessimisticFixpoint();
else
AANonNull::initialize(A);
}
@@ -1609,7 +1824,7 @@ struct AANonNullImpl : AANonNull {
bool TrackUse = false;
getKnownNonNullAndDerefBytesForUse(A, *this, getAssociatedValue(), U, I,
IsNonNull, TrackUse);
- takeKnownMaximum(IsNonNull);
+ setKnown(IsNonNull);
return TrackUse;
}
@@ -1629,24 +1844,6 @@ struct AANonNullFloating
using Base = AAFromMustBeExecutedContext<AANonNull, AANonNullImpl>;
AANonNullFloating(const IRPosition &IRP) : Base(IRP) {}
- /// See AbstractAttribute::initialize(...).
- void initialize(Attributor &A) override {
- Base::initialize(A);
-
- if (isAtFixpoint())
- return;
-
- const IRPosition &IRP = getIRPosition();
- const Value &V = IRP.getAssociatedValue();
- const DataLayout &DL = A.getDataLayout();
-
- // TODO: This context sensitive query should be removed once we can do
- // context sensitive queries in the genericValueTraversal below.
- if (isKnownNonZero(&V, DL, 0, /* TODO: AC */ nullptr, IRP.getCtxI(),
- /* TODO: DT */ nullptr))
- indicateOptimisticFixpoint();
- }
-
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
ChangeStatus Change = Base::updateImpl(A);
@@ -1654,20 +1851,24 @@ struct AANonNullFloating
return Change;
if (!NullIsDefined) {
- const auto &DerefAA = A.getAAFor<AADereferenceable>(*this, getIRPosition());
+ const auto &DerefAA =
+ A.getAAFor<AADereferenceable>(*this, getIRPosition());
if (DerefAA.getAssumedDereferenceableBytes())
return Change;
}
const DataLayout &DL = A.getDataLayout();
- auto VisitValueCB = [&](Value &V, AAAlign::StateType &T,
+ DominatorTree *DT = nullptr;
+ InformationCache &InfoCache = A.getInfoCache();
+ if (const Function *Fn = getAnchorScope())
+ DT = InfoCache.getAnalysisResultForFunction<DominatorTreeAnalysis>(*Fn);
+
+ auto VisitValueCB = [&](Value &V, AANonNull::StateType &T,
bool Stripped) -> bool {
const auto &AA = A.getAAFor<AANonNull>(*this, IRPosition::value(V));
if (!Stripped && this == &AA) {
- if (!isKnownNonZero(&V, DL, 0, /* TODO: AC */ nullptr,
- /* CtxI */ getCtxI(),
- /* TODO: DT */ nullptr))
+ if (!isKnownNonZero(&V, DL, 0, /* TODO: AC */ nullptr, getCtxI(), DT))
T.indicatePessimisticFixpoint();
} else {
// Use abstract attribute information.
@@ -1814,6 +2015,208 @@ struct AANoRecurseCallSite final : AANoRecurseImpl {
void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(norecurse); }
};
+/// -------------------- Undefined-Behavior Attributes ------------------------
+
+struct AAUndefinedBehaviorImpl : public AAUndefinedBehavior {
+ AAUndefinedBehaviorImpl(const IRPosition &IRP) : AAUndefinedBehavior(IRP) {}
+
+ /// See AbstractAttribute::updateImpl(...).
+ // through a pointer (i.e. also branches etc.)
+ ChangeStatus updateImpl(Attributor &A) override {
+ const size_t UBPrevSize = KnownUBInsts.size();
+ const size_t NoUBPrevSize = AssumedNoUBInsts.size();
+
+ auto InspectMemAccessInstForUB = [&](Instruction &I) {
+ // Skip instructions that are already saved.
+ if (AssumedNoUBInsts.count(&I) || KnownUBInsts.count(&I))
+ return true;
+
+ // If we reach here, we know we have an instruction
+ // that accesses memory through a pointer operand,
+ // for which getPointerOperand() should give it to us.
+ const Value *PtrOp =
+ Attributor::getPointerOperand(&I, /* AllowVolatile */ true);
+ assert(PtrOp &&
+ "Expected pointer operand of memory accessing instruction");
+
+ // A memory access through a pointer is considered UB
+ // only if the pointer has constant null value.
+ // TODO: Expand it to not only check constant values.
+ if (!isa<ConstantPointerNull>(PtrOp)) {
+ AssumedNoUBInsts.insert(&I);
+ return true;
+ }
+ const Type *PtrTy = PtrOp->getType();
+
+ // Because we only consider instructions inside functions,
+ // assume that a parent function exists.
+ const Function *F = I.getFunction();
+
+ // A memory access using constant null pointer is only considered UB
+ // if null pointer is _not_ defined for the target platform.
+ if (llvm::NullPointerIsDefined(F, PtrTy->getPointerAddressSpace()))
+ AssumedNoUBInsts.insert(&I);
+ else
+ KnownUBInsts.insert(&I);
+ return true;
+ };
+
+ auto InspectBrInstForUB = [&](Instruction &I) {
+ // A conditional branch instruction is considered UB if it has `undef`
+ // condition.
+
+ // Skip instructions that are already saved.
+ if (AssumedNoUBInsts.count(&I) || KnownUBInsts.count(&I))
+ return true;
+
+ // We know we have a branch instruction.
+ auto BrInst = cast<BranchInst>(&I);
+
+ // Unconditional branches are never considered UB.
+ if (BrInst->isUnconditional())
+ return true;
+
+ // Either we stopped and the appropriate action was taken,
+ // or we got back a simplified value to continue.
+ Optional<Value *> SimplifiedCond =
+ stopOnUndefOrAssumed(A, BrInst->getCondition(), BrInst);
+ if (!SimplifiedCond.hasValue())
+ return true;
+ AssumedNoUBInsts.insert(&I);
+ return true;
+ };
+
+ A.checkForAllInstructions(InspectMemAccessInstForUB, *this,
+ {Instruction::Load, Instruction::Store,
+ Instruction::AtomicCmpXchg,
+ Instruction::AtomicRMW});
+ A.checkForAllInstructions(InspectBrInstForUB, *this, {Instruction::Br});
+ if (NoUBPrevSize != AssumedNoUBInsts.size() ||
+ UBPrevSize != KnownUBInsts.size())
+ return ChangeStatus::CHANGED;
+ return ChangeStatus::UNCHANGED;
+ }
+
+ bool isKnownToCauseUB(Instruction *I) const override {
+ return KnownUBInsts.count(I);
+ }
+
+ bool isAssumedToCauseUB(Instruction *I) const override {
+ // In simple words, if an instruction is not in the assumed to _not_
+ // cause UB, then it is assumed UB (that includes those
+ // in the KnownUBInsts set). The rest is boilerplate
+ // is to ensure that it is one of the instructions we test
+ // for UB.
+
+ switch (I->getOpcode()) {
+ case Instruction::Load:
+ case Instruction::Store:
+ case Instruction::AtomicCmpXchg:
+ case Instruction::AtomicRMW:
+ return !AssumedNoUBInsts.count(I);
+ case Instruction::Br: {
+ auto BrInst = cast<BranchInst>(I);
+ if (BrInst->isUnconditional())
+ return false;
+ return !AssumedNoUBInsts.count(I);
+ } break;
+ default:
+ return false;
+ }
+ return false;
+ }
+
+ ChangeStatus manifest(Attributor &A) override {
+ if (KnownUBInsts.empty())
+ return ChangeStatus::UNCHANGED;
+ for (Instruction *I : KnownUBInsts)
+ A.changeToUnreachableAfterManifest(I);
+ return ChangeStatus::CHANGED;
+ }
+
+ /// See AbstractAttribute::getAsStr()
+ const std::string getAsStr() const override {
+ return getAssumed() ? "undefined-behavior" : "no-ub";
+ }
+
+ /// Note: The correctness of this analysis depends on the fact that the
+ /// following 2 sets will stop changing after some point.
+ /// "Change" here means that their size changes.
+ /// The size of each set is monotonically increasing
+ /// (we only add items to them) and it is upper bounded by the number of
+ /// instructions in the processed function (we can never save more
+ /// elements in either set than this number). Hence, at some point,
+ /// they will stop increasing.
+ /// Consequently, at some point, both sets will have stopped
+ /// changing, effectively making the analysis reach a fixpoint.
+
+ /// Note: These 2 sets are disjoint and an instruction can be considered
+ /// one of 3 things:
+ /// 1) Known to cause UB (AAUndefinedBehavior could prove it) and put it in
+ /// the KnownUBInsts set.
+ /// 2) Assumed to cause UB (in every updateImpl, AAUndefinedBehavior
+ /// has a reason to assume it).
+ /// 3) Assumed to not cause UB. very other instruction - AAUndefinedBehavior
+ /// could not find a reason to assume or prove that it can cause UB,
+ /// hence it assumes it doesn't. We have a set for these instructions
+ /// so that we don't reprocess them in every update.
+ /// Note however that instructions in this set may cause UB.
+
+protected:
+ /// A set of all live instructions _known_ to cause UB.
+ SmallPtrSet<Instruction *, 8> KnownUBInsts;
+
+private:
+ /// A set of all the (live) instructions that are assumed to _not_ cause UB.
+ SmallPtrSet<Instruction *, 8> AssumedNoUBInsts;
+
+ // Should be called on updates in which if we're processing an instruction
+ // \p I that depends on a value \p V, one of the following has to happen:
+ // - If the value is assumed, then stop.
+ // - If the value is known but undef, then consider it UB.
+ // - Otherwise, do specific processing with the simplified value.
+ // We return None in the first 2 cases to signify that an appropriate
+ // action was taken and the caller should stop.
+ // Otherwise, we return the simplified value that the caller should
+ // use for specific processing.
+ Optional<Value *> stopOnUndefOrAssumed(Attributor &A, const Value *V,
+ Instruction *I) {
+ const auto &ValueSimplifyAA =
+ A.getAAFor<AAValueSimplify>(*this, IRPosition::value(*V));
+ Optional<Value *> SimplifiedV =
+ ValueSimplifyAA.getAssumedSimplifiedValue(A);
+ if (!ValueSimplifyAA.isKnown()) {
+ // Don't depend on assumed values.
+ return llvm::None;
+ }
+ if (!SimplifiedV.hasValue()) {
+ // If it is known (which we tested above) but it doesn't have a value,
+ // then we can assume `undef` and hence the instruction is UB.
+ KnownUBInsts.insert(I);
+ return llvm::None;
+ }
+ Value *Val = SimplifiedV.getValue();
+ if (isa<UndefValue>(Val)) {
+ KnownUBInsts.insert(I);
+ return llvm::None;
+ }
+ return Val;
+ }
+};
+
+struct AAUndefinedBehaviorFunction final : AAUndefinedBehaviorImpl {
+ AAUndefinedBehaviorFunction(const IRPosition &IRP)
+ : AAUndefinedBehaviorImpl(IRP) {}
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ STATS_DECL(UndefinedBehaviorInstruction, Instruction,
+ "Number of instructions known to have UB");
+ BUILD_STAT_NAME(UndefinedBehaviorInstruction, Instruction) +=
+ KnownUBInsts.size();
+ }
+};
+
/// ------------------------ Will-Return Attributes ----------------------------
// Helper function that checks whether a function has any cycle.
@@ -1914,6 +2317,32 @@ struct AAWillReturnCallSite final : AAWillReturnImpl {
void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(willreturn); }
};
+/// -------------------AAReachability Attribute--------------------------
+
+struct AAReachabilityImpl : AAReachability {
+ AAReachabilityImpl(const IRPosition &IRP) : AAReachability(IRP) {}
+
+ const std::string getAsStr() const override {
+ // TODO: Return the number of reachable queries.
+ return "reachable";
+ }
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override { indicatePessimisticFixpoint(); }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ return indicatePessimisticFixpoint();
+ }
+};
+
+struct AAReachabilityFunction final : public AAReachabilityImpl {
+ AAReachabilityFunction(const IRPosition &IRP) : AAReachabilityImpl(IRP) {}
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_FN_ATTR(reachable); }
+};
+
/// ------------------------ NoAlias Argument Attribute ------------------------
struct AANoAliasImpl : AANoAlias {
@@ -1954,8 +2383,43 @@ struct AANoAliasFloating final : AANoAliasImpl {
/// NoAlias attribute for an argument.
struct AANoAliasArgument final
: AAArgumentFromCallSiteArguments<AANoAlias, AANoAliasImpl> {
- AANoAliasArgument(const IRPosition &IRP)
- : AAArgumentFromCallSiteArguments<AANoAlias, AANoAliasImpl>(IRP) {}
+ using Base = AAArgumentFromCallSiteArguments<AANoAlias, AANoAliasImpl>;
+ AANoAliasArgument(const IRPosition &IRP) : Base(IRP) {}
+
+ /// See AbstractAttribute::update(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ // We have to make sure no-alias on the argument does not break
+ // synchronization when this is a callback argument, see also [1] below.
+ // If synchronization cannot be affected, we delegate to the base updateImpl
+ // function, otherwise we give up for now.
+
+ // If the function is no-sync, no-alias cannot break synchronization.
+ const auto &NoSyncAA = A.getAAFor<AANoSync>(
+ *this, IRPosition::function_scope(getIRPosition()));
+ if (NoSyncAA.isAssumedNoSync())
+ return Base::updateImpl(A);
+
+ // If the argument is read-only, no-alias cannot break synchronization.
+ const auto &MemBehaviorAA =
+ A.getAAFor<AAMemoryBehavior>(*this, getIRPosition());
+ if (MemBehaviorAA.isAssumedReadOnly())
+ return Base::updateImpl(A);
+
+ // If the argument is never passed through callbacks, no-alias cannot break
+ // synchronization.
+ if (A.checkForAllCallSites(
+ [](AbstractCallSite ACS) { return !ACS.isCallbackCall(); }, *this,
+ true))
+ return Base::updateImpl(A);
+
+ // TODO: add no-alias but make sure it doesn't break synchronization by
+ // introducing fake uses. See:
+ // [1] Compiler Optimizations for OpenMP, J. Doerfert and H. Finkel,
+ // International Workshop on OpenMP 2018,
+ // http://compilers.cs.uni-saarland.de/people/doerfert/par_opt18.pdf
+
+ return indicatePessimisticFixpoint();
+ }
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override { STATS_DECLTRACK_ARG_ATTR(noalias) }
@@ -1987,6 +2451,8 @@ struct AANoAliasCallSiteArgument final : AANoAliasImpl {
// (i) Check whether noalias holds in the definition.
auto &NoAliasAA = A.getAAFor<AANoAlias>(*this, IRP);
+ LLVM_DEBUG(dbgs() << "[Attributor][AANoAliasCSArg] check definition: " << V
+ << " :: " << NoAliasAA << "\n");
if (!NoAliasAA.isAssumedNoAlias())
return indicatePessimisticFixpoint();
@@ -2008,6 +2474,7 @@ struct AANoAliasCallSiteArgument final : AANoAliasImpl {
// (iii) Check there is no other pointer argument which could alias with the
// value.
+ // TODO: AbstractCallSite
ImmutableCallSite ICS(&getAnchorValue());
for (unsigned i = 0; i < ICS.getNumArgOperands(); i++) {
if (getArgNo() == (int)i)
@@ -2018,7 +2485,7 @@ struct AANoAliasCallSiteArgument final : AANoAliasImpl {
if (const Function *F = getAnchorScope()) {
if (AAResults *AAR = A.getInfoCache().getAAResultsForFunction(*F)) {
- bool IsAliasing = AAR->isNoAlias(&getAssociatedValue(), ArgOp);
+ bool IsAliasing = !AAR->isNoAlias(&getAssociatedValue(), ArgOp);
LLVM_DEBUG(dbgs()
<< "[Attributor][NoAliasCSArg] Check alias between "
"callsite arguments "
@@ -2026,7 +2493,7 @@ struct AANoAliasCallSiteArgument final : AANoAliasImpl {
<< getAssociatedValue() << " " << *ArgOp << " => "
<< (IsAliasing ? "" : "no-") << "alias \n");
- if (IsAliasing)
+ if (!IsAliasing)
continue;
}
}
@@ -2108,42 +2575,229 @@ struct AANoAliasCallSiteReturned final : AANoAliasImpl {
/// -------------------AAIsDead Function Attribute-----------------------
-struct AAIsDeadImpl : public AAIsDead {
- AAIsDeadImpl(const IRPosition &IRP) : AAIsDead(IRP) {}
+struct AAIsDeadValueImpl : public AAIsDead {
+ AAIsDeadValueImpl(const IRPosition &IRP) : AAIsDead(IRP) {}
+
+ /// See AAIsDead::isAssumedDead().
+ bool isAssumedDead() const override { return getAssumed(); }
+
+ /// See AAIsDead::isAssumedDead(BasicBlock *).
+ bool isAssumedDead(const BasicBlock *BB) const override { return false; }
+
+ /// See AAIsDead::isKnownDead(BasicBlock *).
+ bool isKnownDead(const BasicBlock *BB) const override { return false; }
+
+ /// See AAIsDead::isAssumedDead(Instruction *I).
+ bool isAssumedDead(const Instruction *I) const override {
+ return I == getCtxI() && isAssumedDead();
+ }
+
+ /// See AAIsDead::isKnownDead(Instruction *I).
+ bool isKnownDead(const Instruction *I) const override {
+ return I == getCtxI() && getKnown();
+ }
+ /// See AbstractAttribute::getAsStr().
+ const std::string getAsStr() const override {
+ return isAssumedDead() ? "assumed-dead" : "assumed-live";
+ }
+};
+
+struct AAIsDeadFloating : public AAIsDeadValueImpl {
+ AAIsDeadFloating(const IRPosition &IRP) : AAIsDeadValueImpl(IRP) {}
+
+ /// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
- const Function *F = getAssociatedFunction();
- if (F && !F->isDeclaration())
- exploreFromEntry(A, F);
+ if (Instruction *I = dyn_cast<Instruction>(&getAssociatedValue()))
+ if (!wouldInstructionBeTriviallyDead(I))
+ indicatePessimisticFixpoint();
+ if (isa<UndefValue>(getAssociatedValue()))
+ indicatePessimisticFixpoint();
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ auto UsePred = [&](const Use &U, bool &Follow) {
+ Instruction *UserI = cast<Instruction>(U.getUser());
+ if (CallSite CS = CallSite(UserI)) {
+ if (!CS.isArgOperand(&U))
+ return false;
+ const IRPosition &CSArgPos =
+ IRPosition::callsite_argument(CS, CS.getArgumentNo(&U));
+ const auto &CSArgIsDead = A.getAAFor<AAIsDead>(*this, CSArgPos);
+ return CSArgIsDead.isAssumedDead();
+ }
+ if (ReturnInst *RI = dyn_cast<ReturnInst>(UserI)) {
+ const IRPosition &RetPos = IRPosition::returned(*RI->getFunction());
+ const auto &RetIsDeadAA = A.getAAFor<AAIsDead>(*this, RetPos);
+ return RetIsDeadAA.isAssumedDead();
+ }
+ Follow = true;
+ return wouldInstructionBeTriviallyDead(UserI);
+ };
+
+ if (!A.checkForAllUses(UsePred, *this, getAssociatedValue()))
+ return indicatePessimisticFixpoint();
+ return ChangeStatus::UNCHANGED;
+ }
+
+ /// See AbstractAttribute::manifest(...).
+ ChangeStatus manifest(Attributor &A) override {
+ Value &V = getAssociatedValue();
+ if (auto *I = dyn_cast<Instruction>(&V))
+ if (wouldInstructionBeTriviallyDead(I)) {
+ A.deleteAfterManifest(*I);
+ return ChangeStatus::CHANGED;
+ }
+
+ if (V.use_empty())
+ return ChangeStatus::UNCHANGED;
+
+ UndefValue &UV = *UndefValue::get(V.getType());
+ bool AnyChange = A.changeValueAfterManifest(V, UV);
+ return AnyChange ? ChangeStatus::CHANGED : ChangeStatus::UNCHANGED;
}
- void exploreFromEntry(Attributor &A, const Function *F) {
- ToBeExploredPaths.insert(&(F->getEntryBlock().front()));
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ STATS_DECLTRACK_FLOATING_ATTR(IsDead)
+ }
+};
- for (size_t i = 0; i < ToBeExploredPaths.size(); ++i)
- if (const Instruction *NextNoReturnI =
- findNextNoReturn(A, ToBeExploredPaths[i]))
- NoReturnCalls.insert(NextNoReturnI);
+struct AAIsDeadArgument : public AAIsDeadFloating {
+ AAIsDeadArgument(const IRPosition &IRP) : AAIsDeadFloating(IRP) {}
- // Mark the block live after we looked for no-return instructions.
- assumeLive(A, F->getEntryBlock());
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ if (!getAssociatedFunction()->hasExactDefinition())
+ indicatePessimisticFixpoint();
}
- /// Find the next assumed noreturn instruction in the block of \p I starting
- /// from, thus including, \p I.
- ///
- /// The caller is responsible to monitor the ToBeExploredPaths set as new
- /// instructions discovered in other basic block will be placed in there.
- ///
- /// \returns The next assumed noreturn instructions in the block of \p I
- /// starting from, thus including, \p I.
- const Instruction *findNextNoReturn(Attributor &A, const Instruction *I);
+ /// See AbstractAttribute::manifest(...).
+ ChangeStatus manifest(Attributor &A) override {
+ ChangeStatus Changed = AAIsDeadFloating::manifest(A);
+ Argument &Arg = *getAssociatedArgument();
+ if (Arg.getParent()->hasLocalLinkage())
+ if (A.registerFunctionSignatureRewrite(
+ Arg, /* ReplacementTypes */ {},
+ Attributor::ArgumentReplacementInfo::CalleeRepairCBTy{},
+ Attributor::ArgumentReplacementInfo::ACSRepairCBTy{}))
+ return ChangeStatus::CHANGED;
+ return Changed;
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_ARG_ATTR(IsDead) }
+};
+
+struct AAIsDeadCallSiteArgument : public AAIsDeadValueImpl {
+ AAIsDeadCallSiteArgument(const IRPosition &IRP) : AAIsDeadValueImpl(IRP) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ if (isa<UndefValue>(getAssociatedValue()))
+ indicatePessimisticFixpoint();
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ // TODO: Once we have call site specific value information we can provide
+ // call site specific liveness information and then it makes
+ // sense to specialize attributes for call sites arguments instead of
+ // redirecting requests to the callee argument.
+ Argument *Arg = getAssociatedArgument();
+ if (!Arg)
+ return indicatePessimisticFixpoint();
+ const IRPosition &ArgPos = IRPosition::argument(*Arg);
+ auto &ArgAA = A.getAAFor<AAIsDead>(*this, ArgPos);
+ return clampStateAndIndicateChange(
+ getState(), static_cast<const AAIsDead::StateType &>(ArgAA.getState()));
+ }
+
+ /// See AbstractAttribute::manifest(...).
+ ChangeStatus manifest(Attributor &A) override {
+ CallBase &CB = cast<CallBase>(getAnchorValue());
+ Use &U = CB.getArgOperandUse(getArgNo());
+ assert(!isa<UndefValue>(U.get()) &&
+ "Expected undef values to be filtered out!");
+ UndefValue &UV = *UndefValue::get(U->getType());
+ if (A.changeUseAfterManifest(U, UV))
+ return ChangeStatus::CHANGED;
+ return ChangeStatus::UNCHANGED;
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_CSARG_ATTR(IsDead) }
+};
+
+struct AAIsDeadReturned : public AAIsDeadValueImpl {
+ AAIsDeadReturned(const IRPosition &IRP) : AAIsDeadValueImpl(IRP) {}
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+
+ auto PredForCallSite = [&](AbstractCallSite ACS) {
+ if (ACS.isCallbackCall())
+ return false;
+ const IRPosition &CSRetPos =
+ IRPosition::callsite_returned(ACS.getCallSite());
+ const auto &RetIsDeadAA = A.getAAFor<AAIsDead>(*this, CSRetPos);
+ return RetIsDeadAA.isAssumedDead();
+ };
+
+ if (!A.checkForAllCallSites(PredForCallSite, *this, true))
+ return indicatePessimisticFixpoint();
+
+ return ChangeStatus::UNCHANGED;
+ }
+
+ /// See AbstractAttribute::manifest(...).
+ ChangeStatus manifest(Attributor &A) override {
+ // TODO: Rewrite the signature to return void?
+ bool AnyChange = false;
+ UndefValue &UV = *UndefValue::get(getAssociatedFunction()->getReturnType());
+ auto RetInstPred = [&](Instruction &I) {
+ ReturnInst &RI = cast<ReturnInst>(I);
+ if (!isa<UndefValue>(RI.getReturnValue()))
+ AnyChange |= A.changeUseAfterManifest(RI.getOperandUse(0), UV);
+ return true;
+ };
+ A.checkForAllInstructions(RetInstPred, *this, {Instruction::Ret});
+ return AnyChange ? ChangeStatus::CHANGED : ChangeStatus::UNCHANGED;
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_FNRET_ATTR(IsDead) }
+};
+
+struct AAIsDeadCallSiteReturned : public AAIsDeadFloating {
+ AAIsDeadCallSiteReturned(const IRPosition &IRP) : AAIsDeadFloating(IRP) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {}
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_CSRET_ATTR(IsDead) }
+};
+
+struct AAIsDeadFunction : public AAIsDead {
+ AAIsDeadFunction(const IRPosition &IRP) : AAIsDead(IRP) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ const Function *F = getAssociatedFunction();
+ if (F && !F->isDeclaration()) {
+ ToBeExploredFrom.insert(&F->getEntryBlock().front());
+ assumeLive(A, F->getEntryBlock());
+ }
+ }
/// See AbstractAttribute::getAsStr().
const std::string getAsStr() const override {
return "Live[#BB " + std::to_string(AssumedLiveBlocks.size()) + "/" +
- std::to_string(getAssociatedFunction()->size()) + "][#NRI " +
- std::to_string(NoReturnCalls.size()) + "]";
+ std::to_string(getAssociatedFunction()->size()) + "][#TBEP " +
+ std::to_string(ToBeExploredFrom.size()) + "][#KDE " +
+ std::to_string(KnownDeadEnds.size()) + "]";
}
/// See AbstractAttribute::manifest(...).
@@ -2164,73 +2818,22 @@ struct AAIsDeadImpl : public AAIsDead {
// function allows to catch asynchronous exceptions.
bool Invoke2CallAllowed = !mayCatchAsynchronousExceptions(F);
- for (const Instruction *NRC : NoReturnCalls) {
- Instruction *I = const_cast<Instruction *>(NRC);
- BasicBlock *BB = I->getParent();
- Instruction *SplitPos = I->getNextNode();
- // TODO: mark stuff before unreachable instructions as dead.
-
- if (auto *II = dyn_cast<InvokeInst>(I)) {
- // If we keep the invoke the split position is at the beginning of the
- // normal desitination block (it invokes a noreturn function after all).
- BasicBlock *NormalDestBB = II->getNormalDest();
- SplitPos = &NormalDestBB->front();
-
- /// Invoke is replaced with a call and unreachable is placed after it if
- /// the callee is nounwind and noreturn. Otherwise, we keep the invoke
- /// and only place an unreachable in the normal successor.
- if (Invoke2CallAllowed) {
- if (II->getCalledFunction()) {
- const IRPosition &IPos = IRPosition::callsite_function(*II);
- const auto &AANoUnw = A.getAAFor<AANoUnwind>(*this, IPos);
- if (AANoUnw.isAssumedNoUnwind()) {
- LLVM_DEBUG(dbgs()
- << "[AAIsDead] Replace invoke with call inst\n");
- // We do not need an invoke (II) but instead want a call followed
- // by an unreachable. However, we do not remove II as other
- // abstract attributes might have it cached as part of their
- // results. Given that we modify the CFG anyway, we simply keep II
- // around but in a new dead block. To avoid II being live through
- // a different edge we have to ensure the block we place it in is
- // only reached from the current block of II and then not reached
- // at all when we insert the unreachable.
- SplitBlockPredecessors(NormalDestBB, {BB}, ".i2c");
- CallInst *CI = createCallMatchingInvoke(II);
- CI->insertBefore(II);
- CI->takeName(II);
- II->replaceAllUsesWith(CI);
- SplitPos = CI->getNextNode();
- }
- }
- }
-
- if (SplitPos == &NormalDestBB->front()) {
- // If this is an invoke of a noreturn function the edge to the normal
- // destination block is dead but not necessarily the block itself.
- // TODO: We need to move to an edge based system during deduction and
- // also manifest.
- assert(!NormalDestBB->isLandingPad() &&
- "Expected the normal destination not to be a landingpad!");
- if (NormalDestBB->getUniquePredecessor() == BB) {
- assumeLive(A, *NormalDestBB);
- } else {
- BasicBlock *SplitBB =
- SplitBlockPredecessors(NormalDestBB, {BB}, ".dead");
- // The split block is live even if it contains only an unreachable
- // instruction at the end.
- assumeLive(A, *SplitBB);
- SplitPos = SplitBB->getTerminator();
- HasChanged = ChangeStatus::CHANGED;
- }
- }
- }
-
- if (isa_and_nonnull<UnreachableInst>(SplitPos))
+ KnownDeadEnds.set_union(ToBeExploredFrom);
+ for (const Instruction *DeadEndI : KnownDeadEnds) {
+ auto *CB = dyn_cast<CallBase>(DeadEndI);
+ if (!CB)
+ continue;
+ const auto &NoReturnAA =
+ A.getAAFor<AANoReturn>(*this, IRPosition::callsite_function(*CB));
+ bool MayReturn = !NoReturnAA.isAssumedNoReturn();
+ if (MayReturn && (!Invoke2CallAllowed || !isa<InvokeInst>(CB)))
continue;
- BB = SplitPos->getParent();
- SplitBlock(BB, SplitPos);
- changeToUnreachable(BB->getTerminator(), /* UseLLVMTrap */ false);
+ if (auto *II = dyn_cast<InvokeInst>(DeadEndI))
+ A.registerInvokeWithDeadSuccessor(const_cast<InvokeInst &>(*II));
+ else
+ A.changeToUnreachableAfterManifest(
+ const_cast<Instruction *>(DeadEndI->getNextNode()));
HasChanged = ChangeStatus::CHANGED;
}
@@ -2244,6 +2847,12 @@ struct AAIsDeadImpl : public AAIsDead {
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override;
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {}
+
+ /// Returns true if the function is assumed dead.
+ bool isAssumedDead() const override { return false; }
+
/// See AAIsDead::isAssumedDead(BasicBlock *).
bool isAssumedDead(const BasicBlock *BB) const override {
assert(BB->getParent() == getAssociatedFunction() &&
@@ -2272,8 +2881,14 @@ struct AAIsDeadImpl : public AAIsDead {
if (!AssumedLiveBlocks.count(I->getParent()))
return true;
- // If it is not after a noreturn call, than it is live.
- return isAfterNoReturn(I);
+ // If it is not after a liveness barrier it is live.
+ const Instruction *PrevI = I->getPrevNode();
+ while (PrevI) {
+ if (KnownDeadEnds.count(PrevI) || ToBeExploredFrom.count(PrevI))
+ return true;
+ PrevI = PrevI->getPrevNode();
+ }
+ return false;
}
/// See AAIsDead::isKnownDead(Instruction *I).
@@ -2281,9 +2896,6 @@ struct AAIsDeadImpl : public AAIsDead {
return getKnown() && isAssumedDead(I);
}
- /// Check if instruction is after noreturn call, in other words, assumed dead.
- bool isAfterNoReturn(const Instruction *I) const;
-
/// Determine if \p F might catch asynchronous exceptions.
static bool mayCatchAsynchronousExceptions(const Function &F) {
return F.hasPersonalityFn() && !canSimplifyInvokeNoUnwind(&F);
@@ -2291,9 +2903,9 @@ struct AAIsDeadImpl : public AAIsDead {
/// Assume \p BB is (partially) live now and indicate to the Attributor \p A
/// that internal function called from \p BB should now be looked at.
- void assumeLive(Attributor &A, const BasicBlock &BB) {
+ bool assumeLive(Attributor &A, const BasicBlock &BB) {
if (!AssumedLiveBlocks.insert(&BB).second)
- return;
+ return false;
// We assume that all of BB is (probably) live now and if there are calls to
// internal functions we will assume that those are now live as well. This
@@ -2304,140 +2916,219 @@ struct AAIsDeadImpl : public AAIsDead {
if (const Function *F = ICS.getCalledFunction())
if (F->hasLocalLinkage())
A.markLiveInternalFunction(*F);
+ return true;
}
- /// Collection of to be explored paths.
- SmallSetVector<const Instruction *, 8> ToBeExploredPaths;
+ /// Collection of instructions that need to be explored again, e.g., we
+ /// did assume they do not transfer control to (one of their) successors.
+ SmallSetVector<const Instruction *, 8> ToBeExploredFrom;
+
+ /// Collection of instructions that are known to not transfer control.
+ SmallSetVector<const Instruction *, 8> KnownDeadEnds;
/// Collection of all assumed live BasicBlocks.
DenseSet<const BasicBlock *> AssumedLiveBlocks;
-
- /// Collection of calls with noreturn attribute, assumed or knwon.
- SmallSetVector<const Instruction *, 4> NoReturnCalls;
};
-struct AAIsDeadFunction final : public AAIsDeadImpl {
- AAIsDeadFunction(const IRPosition &IRP) : AAIsDeadImpl(IRP) {}
+static bool
+identifyAliveSuccessors(Attributor &A, const CallBase &CB,
+ AbstractAttribute &AA,
+ SmallVectorImpl<const Instruction *> &AliveSuccessors) {
+ const IRPosition &IPos = IRPosition::callsite_function(CB);
+
+ const auto &NoReturnAA = A.getAAFor<AANoReturn>(AA, IPos);
+ if (NoReturnAA.isAssumedNoReturn())
+ return !NoReturnAA.isKnownNoReturn();
+ if (CB.isTerminator())
+ AliveSuccessors.push_back(&CB.getSuccessor(0)->front());
+ else
+ AliveSuccessors.push_back(CB.getNextNode());
+ return false;
+}
- /// See AbstractAttribute::trackStatistics()
- void trackStatistics() const override {
- STATS_DECL(PartiallyDeadBlocks, Function,
- "Number of basic blocks classified as partially dead");
- BUILD_STAT_NAME(PartiallyDeadBlocks, Function) += NoReturnCalls.size();
+static bool
+identifyAliveSuccessors(Attributor &A, const InvokeInst &II,
+ AbstractAttribute &AA,
+ SmallVectorImpl<const Instruction *> &AliveSuccessors) {
+ bool UsedAssumedInformation =
+ identifyAliveSuccessors(A, cast<CallBase>(II), AA, AliveSuccessors);
+
+ // First, determine if we can change an invoke to a call assuming the
+ // callee is nounwind. This is not possible if the personality of the
+ // function allows to catch asynchronous exceptions.
+ if (AAIsDeadFunction::mayCatchAsynchronousExceptions(*II.getFunction())) {
+ AliveSuccessors.push_back(&II.getUnwindDest()->front());
+ } else {
+ const IRPosition &IPos = IRPosition::callsite_function(II);
+ const auto &AANoUnw = A.getAAFor<AANoUnwind>(AA, IPos);
+ if (AANoUnw.isAssumedNoUnwind()) {
+ UsedAssumedInformation |= !AANoUnw.isKnownNoUnwind();
+ } else {
+ AliveSuccessors.push_back(&II.getUnwindDest()->front());
+ }
}
-};
+ return UsedAssumedInformation;
+}
-bool AAIsDeadImpl::isAfterNoReturn(const Instruction *I) const {
- const Instruction *PrevI = I->getPrevNode();
- while (PrevI) {
- if (NoReturnCalls.count(PrevI))
- return true;
- PrevI = PrevI->getPrevNode();
+static Optional<ConstantInt *>
+getAssumedConstant(Attributor &A, const Value &V, AbstractAttribute &AA,
+ bool &UsedAssumedInformation) {
+ const auto &ValueSimplifyAA =
+ A.getAAFor<AAValueSimplify>(AA, IRPosition::value(V));
+ Optional<Value *> SimplifiedV = ValueSimplifyAA.getAssumedSimplifiedValue(A);
+ UsedAssumedInformation |= !ValueSimplifyAA.isKnown();
+ if (!SimplifiedV.hasValue())
+ return llvm::None;
+ if (isa_and_nonnull<UndefValue>(SimplifiedV.getValue()))
+ return llvm::None;
+ return dyn_cast_or_null<ConstantInt>(SimplifiedV.getValue());
+}
+
+static bool
+identifyAliveSuccessors(Attributor &A, const BranchInst &BI,
+ AbstractAttribute &AA,
+ SmallVectorImpl<const Instruction *> &AliveSuccessors) {
+ bool UsedAssumedInformation = false;
+ if (BI.getNumSuccessors() == 1) {
+ AliveSuccessors.push_back(&BI.getSuccessor(0)->front());
+ } else {
+ Optional<ConstantInt *> CI =
+ getAssumedConstant(A, *BI.getCondition(), AA, UsedAssumedInformation);
+ if (!CI.hasValue()) {
+ // No value yet, assume both edges are dead.
+ } else if (CI.getValue()) {
+ const BasicBlock *SuccBB =
+ BI.getSuccessor(1 - CI.getValue()->getZExtValue());
+ AliveSuccessors.push_back(&SuccBB->front());
+ } else {
+ AliveSuccessors.push_back(&BI.getSuccessor(0)->front());
+ AliveSuccessors.push_back(&BI.getSuccessor(1)->front());
+ UsedAssumedInformation = false;
+ }
}
- return false;
+ return UsedAssumedInformation;
}
-const Instruction *AAIsDeadImpl::findNextNoReturn(Attributor &A,
- const Instruction *I) {
- const BasicBlock *BB = I->getParent();
- const Function &F = *BB->getParent();
-
- // Flag to determine if we can change an invoke to a call assuming the callee
- // is nounwind. This is not possible if the personality of the function allows
- // to catch asynchronous exceptions.
- bool Invoke2CallAllowed = !mayCatchAsynchronousExceptions(F);
-
- // TODO: We should have a function that determines if an "edge" is dead.
- // Edges could be from an instruction to the next or from a terminator
- // to the successor. For now, we need to special case the unwind block
- // of InvokeInst below.
-
- while (I) {
- ImmutableCallSite ICS(I);
-
- if (ICS) {
- const IRPosition &IPos = IRPosition::callsite_function(ICS);
- // Regarless of the no-return property of an invoke instruction we only
- // learn that the regular successor is not reachable through this
- // instruction but the unwind block might still be.
- if (auto *Invoke = dyn_cast<InvokeInst>(I)) {
- // Use nounwind to justify the unwind block is dead as well.
- const auto &AANoUnw = A.getAAFor<AANoUnwind>(*this, IPos);
- if (!Invoke2CallAllowed || !AANoUnw.isAssumedNoUnwind()) {
- assumeLive(A, *Invoke->getUnwindDest());
- ToBeExploredPaths.insert(&Invoke->getUnwindDest()->front());
- }
+static bool
+identifyAliveSuccessors(Attributor &A, const SwitchInst &SI,
+ AbstractAttribute &AA,
+ SmallVectorImpl<const Instruction *> &AliveSuccessors) {
+ bool UsedAssumedInformation = false;
+ Optional<ConstantInt *> CI =
+ getAssumedConstant(A, *SI.getCondition(), AA, UsedAssumedInformation);
+ if (!CI.hasValue()) {
+ // No value yet, assume all edges are dead.
+ } else if (CI.getValue()) {
+ for (auto &CaseIt : SI.cases()) {
+ if (CaseIt.getCaseValue() == CI.getValue()) {
+ AliveSuccessors.push_back(&CaseIt.getCaseSuccessor()->front());
+ return UsedAssumedInformation;
}
-
- const auto &NoReturnAA = A.getAAFor<AANoReturn>(*this, IPos);
- if (NoReturnAA.isAssumedNoReturn())
- return I;
}
-
- I = I->getNextNode();
+ AliveSuccessors.push_back(&SI.getDefaultDest()->front());
+ return UsedAssumedInformation;
+ } else {
+ for (const BasicBlock *SuccBB : successors(SI.getParent()))
+ AliveSuccessors.push_back(&SuccBB->front());
}
+ return UsedAssumedInformation;
+}
- // get new paths (reachable blocks).
- for (const BasicBlock *SuccBB : successors(BB)) {
- assumeLive(A, *SuccBB);
- ToBeExploredPaths.insert(&SuccBB->front());
- }
+ChangeStatus AAIsDeadFunction::updateImpl(Attributor &A) {
+ ChangeStatus Change = ChangeStatus::UNCHANGED;
- // No noreturn instruction found.
- return nullptr;
-}
+ LLVM_DEBUG(dbgs() << "[AAIsDead] Live [" << AssumedLiveBlocks.size() << "/"
+ << getAssociatedFunction()->size() << "] BBs and "
+ << ToBeExploredFrom.size() << " exploration points and "
+ << KnownDeadEnds.size() << " known dead ends\n");
-ChangeStatus AAIsDeadImpl::updateImpl(Attributor &A) {
- ChangeStatus Status = ChangeStatus::UNCHANGED;
+ // Copy and clear the list of instructions we need to explore from. It is
+ // refilled with instructions the next update has to look at.
+ SmallVector<const Instruction *, 8> Worklist(ToBeExploredFrom.begin(),
+ ToBeExploredFrom.end());
+ decltype(ToBeExploredFrom) NewToBeExploredFrom;
- // Temporary collection to iterate over existing noreturn instructions. This
- // will alow easier modification of NoReturnCalls collection
- SmallVector<const Instruction *, 8> NoReturnChanged;
+ SmallVector<const Instruction *, 8> AliveSuccessors;
+ while (!Worklist.empty()) {
+ const Instruction *I = Worklist.pop_back_val();
+ LLVM_DEBUG(dbgs() << "[AAIsDead] Exploration inst: " << *I << "\n");
- for (const Instruction *I : NoReturnCalls)
- NoReturnChanged.push_back(I);
+ AliveSuccessors.clear();
- for (const Instruction *I : NoReturnChanged) {
- size_t Size = ToBeExploredPaths.size();
+ bool UsedAssumedInformation = false;
+ switch (I->getOpcode()) {
+ // TODO: look for (assumed) UB to backwards propagate "deadness".
+ default:
+ if (I->isTerminator()) {
+ for (const BasicBlock *SuccBB : successors(I->getParent()))
+ AliveSuccessors.push_back(&SuccBB->front());
+ } else {
+ AliveSuccessors.push_back(I->getNextNode());
+ }
+ break;
+ case Instruction::Call:
+ UsedAssumedInformation = identifyAliveSuccessors(A, cast<CallInst>(*I),
+ *this, AliveSuccessors);
+ break;
+ case Instruction::Invoke:
+ UsedAssumedInformation = identifyAliveSuccessors(A, cast<InvokeInst>(*I),
+ *this, AliveSuccessors);
+ break;
+ case Instruction::Br:
+ UsedAssumedInformation = identifyAliveSuccessors(A, cast<BranchInst>(*I),
+ *this, AliveSuccessors);
+ break;
+ case Instruction::Switch:
+ UsedAssumedInformation = identifyAliveSuccessors(A, cast<SwitchInst>(*I),
+ *this, AliveSuccessors);
+ break;
+ }
- const Instruction *NextNoReturnI = findNextNoReturn(A, I);
- if (NextNoReturnI != I) {
- Status = ChangeStatus::CHANGED;
- NoReturnCalls.remove(I);
- if (NextNoReturnI)
- NoReturnCalls.insert(NextNoReturnI);
+ if (UsedAssumedInformation) {
+ NewToBeExploredFrom.insert(I);
+ } else {
+ Change = ChangeStatus::CHANGED;
+ if (AliveSuccessors.empty() ||
+ (I->isTerminator() && AliveSuccessors.size() < I->getNumSuccessors()))
+ KnownDeadEnds.insert(I);
}
- // Explore new paths.
- while (Size != ToBeExploredPaths.size()) {
- Status = ChangeStatus::CHANGED;
- if (const Instruction *NextNoReturnI =
- findNextNoReturn(A, ToBeExploredPaths[Size++]))
- NoReturnCalls.insert(NextNoReturnI);
+ LLVM_DEBUG(dbgs() << "[AAIsDead] #AliveSuccessors: "
+ << AliveSuccessors.size() << " UsedAssumedInformation: "
+ << UsedAssumedInformation << "\n");
+
+ for (const Instruction *AliveSuccessor : AliveSuccessors) {
+ if (!I->isTerminator()) {
+ assert(AliveSuccessors.size() == 1 &&
+ "Non-terminator expected to have a single successor!");
+ Worklist.push_back(AliveSuccessor);
+ } else {
+ if (assumeLive(A, *AliveSuccessor->getParent()))
+ Worklist.push_back(AliveSuccessor);
+ }
}
}
- LLVM_DEBUG(dbgs() << "[AAIsDead] AssumedLiveBlocks: "
- << AssumedLiveBlocks.size() << " Total number of blocks: "
- << getAssociatedFunction()->size() << "\n");
+ ToBeExploredFrom = std::move(NewToBeExploredFrom);
// If we know everything is live there is no need to query for liveness.
- if (NoReturnCalls.empty() &&
- getAssociatedFunction()->size() == AssumedLiveBlocks.size()) {
- // Indicating a pessimistic fixpoint will cause the state to be "invalid"
- // which will cause the Attributor to not return the AAIsDead on request,
- // which will prevent us from querying isAssumedDead().
- indicatePessimisticFixpoint();
- assert(!isValidState() && "Expected an invalid state!");
- Status = ChangeStatus::CHANGED;
- }
-
- return Status;
+ // Instead, indicating a pessimistic fixpoint will cause the state to be
+ // "invalid" and all queries to be answered conservatively without lookups.
+ // To be in this state we have to (1) finished the exploration and (3) not
+ // discovered any non-trivial dead end and (2) not ruled unreachable code
+ // dead.
+ if (ToBeExploredFrom.empty() &&
+ getAssociatedFunction()->size() == AssumedLiveBlocks.size() &&
+ llvm::all_of(KnownDeadEnds, [](const Instruction *DeadEndI) {
+ return DeadEndI->isTerminator() && DeadEndI->getNumSuccessors() == 0;
+ }))
+ return indicatePessimisticFixpoint();
+ return Change;
}
/// Liveness information for a call sites.
-struct AAIsDeadCallSite final : AAIsDeadImpl {
- AAIsDeadCallSite(const IRPosition &IRP) : AAIsDeadImpl(IRP) {}
+struct AAIsDeadCallSite final : AAIsDeadFunction {
+ AAIsDeadCallSite(const IRPosition &IRP) : AAIsDeadFunction(IRP) {}
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
@@ -2463,10 +3154,9 @@ struct AAIsDeadCallSite final : AAIsDeadImpl {
template <>
ChangeStatus clampStateAndIndicateChange<DerefState>(DerefState &S,
const DerefState &R) {
- ChangeStatus CS0 = clampStateAndIndicateChange<IntegerState>(
- S.DerefBytesState, R.DerefBytesState);
- ChangeStatus CS1 =
- clampStateAndIndicateChange<IntegerState>(S.GlobalState, R.GlobalState);
+ ChangeStatus CS0 =
+ clampStateAndIndicateChange(S.DerefBytesState, R.DerefBytesState);
+ ChangeStatus CS1 = clampStateAndIndicateChange(S.GlobalState, R.GlobalState);
return CS0 | CS1;
}
@@ -2496,16 +3186,49 @@ struct AADereferenceableImpl : AADereferenceable {
const StateType &getState() const override { return *this; }
/// }
+ /// Helper function for collecting accessed bytes in must-be-executed-context
+ void addAccessedBytesForUse(Attributor &A, const Use *U,
+ const Instruction *I) {
+ const Value *UseV = U->get();
+ if (!UseV->getType()->isPointerTy())
+ return;
+
+ Type *PtrTy = UseV->getType();
+ const DataLayout &DL = A.getDataLayout();
+ int64_t Offset;
+ if (const Value *Base = getBasePointerOfAccessPointerOperand(
+ I, Offset, DL, /*AllowNonInbounds*/ true)) {
+ if (Base == &getAssociatedValue() &&
+ Attributor::getPointerOperand(I, /* AllowVolatile */ false) == UseV) {
+ uint64_t Size = DL.getTypeStoreSize(PtrTy->getPointerElementType());
+ addAccessedBytes(Offset, Size);
+ }
+ }
+ return;
+ }
+
/// See AAFromMustBeExecutedContext
bool followUse(Attributor &A, const Use *U, const Instruction *I) {
bool IsNonNull = false;
bool TrackUse = false;
int64_t DerefBytes = getKnownNonNullAndDerefBytesForUse(
A, *this, getAssociatedValue(), U, I, IsNonNull, TrackUse);
+
+ addAccessedBytesForUse(A, U, I);
takeKnownDerefBytesMaximum(DerefBytes);
return TrackUse;
}
+ /// See AbstractAttribute::manifest(...).
+ ChangeStatus manifest(Attributor &A) override {
+ ChangeStatus Change = AADereferenceable::manifest(A);
+ if (isAssumedNonNull() && hasAttr(Attribute::DereferenceableOrNull)) {
+ removeAttrs({Attribute::DereferenceableOrNull});
+ return ChangeStatus::CHANGED;
+ }
+ return Change;
+ }
+
void getDeducedAttributes(LLVMContext &Ctx,
SmallVectorImpl<Attribute> &Attrs) const override {
// TODO: Add *_globally support
@@ -2564,6 +3287,8 @@ struct AADereferenceableFloating
T.GlobalState &= DS.GlobalState;
}
+ // TODO: Use `AAConstantRange` to infer dereferenceable bytes.
+
// For now we do not try to "increase" dereferenceability due to negative
// indices as we first have to come up with code to deal with loops and
// for overflows of the dereferenceable bytes.
@@ -2654,30 +3379,6 @@ struct AADereferenceableCallSiteReturned final
AADereferenceable, AADereferenceableImpl>;
AADereferenceableCallSiteReturned(const IRPosition &IRP) : Base(IRP) {}
- /// See AbstractAttribute::initialize(...).
- void initialize(Attributor &A) override {
- Base::initialize(A);
- Function *F = getAssociatedFunction();
- if (!F)
- indicatePessimisticFixpoint();
- }
-
- /// See AbstractAttribute::updateImpl(...).
- ChangeStatus updateImpl(Attributor &A) override {
- // TODO: Once we have call site specific value information we can provide
- // call site specific liveness information and then it makes
- // sense to specialize attributes for call sites arguments instead of
- // redirecting requests to the callee argument.
-
- ChangeStatus Change = Base::updateImpl(A);
- Function *F = getAssociatedFunction();
- const IRPosition &FnPos = IRPosition::returned(*F);
- auto &FnAA = A.getAAFor<AADereferenceable>(*this, FnPos);
- return Change |
- clampStateAndIndicateChange(
- getState(), static_cast<const DerefState &>(FnAA.getState()));
- }
-
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override {
STATS_DECLTRACK_CS_ATTR(dereferenceable);
@@ -2686,16 +3387,69 @@ struct AADereferenceableCallSiteReturned final
// ------------------------ Align Argument Attribute ------------------------
+static unsigned int getKnownAlignForUse(Attributor &A,
+ AbstractAttribute &QueryingAA,
+ Value &AssociatedValue, const Use *U,
+ const Instruction *I, bool &TrackUse) {
+ // We need to follow common pointer manipulation uses to the accesses they
+ // feed into.
+ if (isa<CastInst>(I)) {
+ // Follow all but ptr2int casts.
+ TrackUse = !isa<PtrToIntInst>(I);
+ return 0;
+ }
+ if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
+ if (GEP->hasAllConstantIndices()) {
+ TrackUse = true;
+ return 0;
+ }
+ }
+
+ unsigned Alignment = 0;
+ if (ImmutableCallSite ICS = ImmutableCallSite(I)) {
+ if (ICS.isBundleOperand(U) || ICS.isCallee(U))
+ return 0;
+
+ unsigned ArgNo = ICS.getArgumentNo(U);
+ IRPosition IRP = IRPosition::callsite_argument(ICS, ArgNo);
+ // As long as we only use known information there is no need to track
+ // dependences here.
+ auto &AlignAA = A.getAAFor<AAAlign>(QueryingAA, IRP,
+ /* TrackDependence */ false);
+ Alignment = AlignAA.getKnownAlign();
+ }
+
+ const Value *UseV = U->get();
+ if (auto *SI = dyn_cast<StoreInst>(I))
+ Alignment = SI->getAlignment();
+ else if (auto *LI = dyn_cast<LoadInst>(I))
+ Alignment = LI->getAlignment();
+
+ if (Alignment <= 1)
+ return 0;
+
+ auto &DL = A.getDataLayout();
+ int64_t Offset;
+
+ if (const Value *Base = GetPointerBaseWithConstantOffset(UseV, Offset, DL)) {
+ if (Base == &AssociatedValue) {
+ // BasePointerAddr + Offset = Alignment * Q for some integer Q.
+ // So we can say that the maximum power of two which is a divisor of
+ // gcd(Offset, Alignment) is an alignment.
+
+ uint32_t gcd =
+ greatestCommonDivisor(uint32_t(abs((int32_t)Offset)), Alignment);
+ Alignment = llvm::PowerOf2Floor(gcd);
+ }
+ }
+
+ return Alignment;
+}
struct AAAlignImpl : AAAlign {
AAAlignImpl(const IRPosition &IRP) : AAAlign(IRP) {}
- // Max alignemnt value allowed in IR
- static const unsigned MAX_ALIGN = 1U << 29;
-
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
- takeAssumedMinimum(MAX_ALIGN);
-
SmallVector<Attribute, 4> Attrs;
getAttrs({Attribute::Alignment}, Attrs);
for (const Attribute &Attr : Attrs)
@@ -2718,7 +3472,7 @@ struct AAAlignImpl : AAAlign {
if (SI->getPointerOperand() == &AnchorVal)
if (SI->getAlignment() < getAssumedAlign()) {
STATS_DECLTRACK(AAAlign, Store,
- "Number of times alignemnt added to a store");
+ "Number of times alignment added to a store");
SI->setAlignment(Align(getAssumedAlign()));
Changed = ChangeStatus::CHANGED;
}
@@ -2727,7 +3481,7 @@ struct AAAlignImpl : AAAlign {
if (LI->getAlignment() < getAssumedAlign()) {
LI->setAlignment(Align(getAssumedAlign()));
STATS_DECLTRACK(AAAlign, Load,
- "Number of times alignemnt added to a load");
+ "Number of times alignment added to a load");
Changed = ChangeStatus::CHANGED;
}
}
@@ -2748,6 +3502,16 @@ struct AAAlignImpl : AAAlign {
Attrs.emplace_back(
Attribute::getWithAlignment(Ctx, Align(getAssumedAlign())));
}
+ /// See AAFromMustBeExecutedContext
+ bool followUse(Attributor &A, const Use *U, const Instruction *I) {
+ bool TrackUse = false;
+
+ unsigned int KnownAlign =
+ getKnownAlignForUse(A, *this, getAssociatedValue(), U, I, TrackUse);
+ takeKnownMaximum(KnownAlign);
+
+ return TrackUse;
+ }
/// See AbstractAttribute::getAsStr().
const std::string getAsStr() const override {
@@ -2758,11 +3522,14 @@ struct AAAlignImpl : AAAlign {
};
/// Align attribute for a floating value.
-struct AAAlignFloating : AAAlignImpl {
- AAAlignFloating(const IRPosition &IRP) : AAAlignImpl(IRP) {}
+struct AAAlignFloating : AAFromMustBeExecutedContext<AAAlign, AAAlignImpl> {
+ using Base = AAFromMustBeExecutedContext<AAAlign, AAAlignImpl>;
+ AAAlignFloating(const IRPosition &IRP) : Base(IRP) {}
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
+ Base::updateImpl(A);
+
const DataLayout &DL = A.getDataLayout();
auto VisitValueCB = [&](Value &V, AAAlign::StateType &T,
@@ -2808,9 +3575,12 @@ struct AAAlignReturned final
/// Align attribute for function argument.
struct AAAlignArgument final
- : AAArgumentFromCallSiteArguments<AAAlign, AAAlignImpl> {
+ : AAArgumentFromCallSiteArgumentsAndMustBeExecutedContext<AAAlign,
+ AAAlignImpl> {
AAAlignArgument(const IRPosition &IRP)
- : AAArgumentFromCallSiteArguments<AAAlign, AAAlignImpl>(IRP) {}
+ : AAArgumentFromCallSiteArgumentsAndMustBeExecutedContext<AAAlign,
+ AAAlignImpl>(
+ IRP) {}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override { STATS_DECLTRACK_ARG_ATTR(aligned) }
@@ -2824,35 +3594,39 @@ struct AAAlignCallSiteArgument final : AAAlignFloating {
return AAAlignImpl::manifest(A);
}
+ /// See AbstractAttribute::updateImpl(Attributor &A).
+ ChangeStatus updateImpl(Attributor &A) override {
+ ChangeStatus Changed = AAAlignFloating::updateImpl(A);
+ if (Argument *Arg = getAssociatedArgument()) {
+ const auto &ArgAlignAA = A.getAAFor<AAAlign>(
+ *this, IRPosition::argument(*Arg), /* TrackDependence */ false,
+ DepClassTy::OPTIONAL);
+ takeKnownMaximum(ArgAlignAA.getKnownAlign());
+ }
+ return Changed;
+ }
+
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override { STATS_DECLTRACK_CSARG_ATTR(aligned) }
};
/// Align attribute deduction for a call site return value.
-struct AAAlignCallSiteReturned final : AAAlignImpl {
- AAAlignCallSiteReturned(const IRPosition &IRP) : AAAlignImpl(IRP) {}
+struct AAAlignCallSiteReturned final
+ : AACallSiteReturnedFromReturnedAndMustBeExecutedContext<AAAlign,
+ AAAlignImpl> {
+ using Base =
+ AACallSiteReturnedFromReturnedAndMustBeExecutedContext<AAAlign,
+ AAAlignImpl>;
+ AAAlignCallSiteReturned(const IRPosition &IRP) : Base(IRP) {}
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
- AAAlignImpl::initialize(A);
+ Base::initialize(A);
Function *F = getAssociatedFunction();
if (!F)
indicatePessimisticFixpoint();
}
- /// See AbstractAttribute::updateImpl(...).
- ChangeStatus updateImpl(Attributor &A) override {
- // TODO: Once we have call site specific value information we can provide
- // call site specific liveness information and then it makes
- // sense to specialize attributes for call sites arguments instead of
- // redirecting requests to the callee argument.
- Function *F = getAssociatedFunction();
- const IRPosition &FnPos = IRPosition::returned(*F);
- auto &FnAA = A.getAAFor<AAAlign>(*this, FnPos);
- return clampStateAndIndicateChange(
- getState(), static_cast<const AAAlign::StateType &>(FnAA.getState()));
- }
-
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(align); }
};
@@ -2865,7 +3639,7 @@ struct AANoReturnImpl : public AANoReturn {
void initialize(Attributor &A) override {
AANoReturn::initialize(A);
Function *F = getAssociatedFunction();
- if (!F || F->hasFnAttribute(Attribute::WillReturn))
+ if (!F)
indicatePessimisticFixpoint();
}
@@ -2876,9 +3650,6 @@ struct AANoReturnImpl : public AANoReturn {
/// See AbstractAttribute::updateImpl(Attributor &A).
virtual ChangeStatus updateImpl(Attributor &A) override {
- const auto &WillReturnAA = A.getAAFor<AAWillReturn>(*this, getIRPosition());
- if (WillReturnAA.isKnownWillReturn())
- return indicatePessimisticFixpoint();
auto CheckForNoReturn = [](Instruction &) { return false; };
if (!A.checkForAllInstructions(CheckForNoReturn, *this,
{(unsigned)Instruction::Ret}))
@@ -2924,7 +3695,16 @@ struct AANoCaptureImpl : public AANoCapture {
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
- AANoCapture::initialize(A);
+ if (hasAttr(getAttrKind(), /* IgnoreSubsumingPositions */ true)) {
+ indicateOptimisticFixpoint();
+ return;
+ }
+ Function *AnchorScope = getAnchorScope();
+ if (isFnInterfaceKind() &&
+ (!AnchorScope || !AnchorScope->hasExactDefinition())) {
+ indicatePessimisticFixpoint();
+ return;
+ }
// You cannot "capture" null in the default address space.
if (isa<ConstantPointerNull>(getAssociatedValue()) &&
@@ -2933,13 +3713,11 @@ struct AANoCaptureImpl : public AANoCapture {
return;
}
- const IRPosition &IRP = getIRPosition();
- const Function *F =
- getArgNo() >= 0 ? IRP.getAssociatedFunction() : IRP.getAnchorScope();
+ const Function *F = getArgNo() >= 0 ? getAssociatedFunction() : AnchorScope;
// Check what state the associated function can actually capture.
if (F)
- determineFunctionCaptureCapabilities(IRP, *F, *this);
+ determineFunctionCaptureCapabilities(getIRPosition(), *F, *this);
else
indicatePessimisticFixpoint();
}
@@ -2967,7 +3745,7 @@ struct AANoCaptureImpl : public AANoCapture {
/// state in memory and through "returning/throwing", respectively.
static void determineFunctionCaptureCapabilities(const IRPosition &IRP,
const Function &F,
- IntegerState &State) {
+ BitIntegerState &State) {
// TODO: Once we have memory behavior attributes we should use them here.
// If we know we cannot communicate or write to memory, we do not care about
@@ -2992,7 +3770,7 @@ struct AANoCaptureImpl : public AANoCapture {
// Check existing "returned" attributes.
int ArgNo = IRP.getArgNo();
if (F.doesNotThrow() && ArgNo >= 0) {
- for (unsigned u = 0, e = F.arg_size(); u< e; ++u)
+ for (unsigned u = 0, e = F.arg_size(); u < e; ++u)
if (F.hasParamAttribute(u, Attribute::Returned)) {
if (u == unsigned(ArgNo))
State.removeAssumedBits(NOT_CAPTURED_IN_RET);
@@ -3036,7 +3814,7 @@ struct AACaptureUseTracker final : public CaptureTracker {
/// the search is stopped with \p CapturedInMemory and \p CapturedInInteger
/// conservatively set to true.
AACaptureUseTracker(Attributor &A, AANoCapture &NoCaptureAA,
- const AAIsDead &IsDeadAA, IntegerState &State,
+ const AAIsDead &IsDeadAA, AANoCapture::StateType &State,
SmallVectorImpl<const Value *> &PotentialCopies,
unsigned &RemainingUsesToExplore)
: A(A), NoCaptureAA(NoCaptureAA), IsDeadAA(IsDeadAA), State(State),
@@ -3155,7 +3933,7 @@ private:
const AAIsDead &IsDeadAA;
/// The state currently updated.
- IntegerState &State;
+ AANoCapture::StateType &State;
/// Set of potential copies of the tracked value.
SmallVectorImpl<const Value *> &PotentialCopies;
@@ -3238,9 +4016,11 @@ ChangeStatus AANoCaptureImpl::updateImpl(Attributor &A) {
while (T.isAssumed(NO_CAPTURE_MAYBE_RETURNED) && Idx < PotentialCopies.size())
Tracker.valueMayBeCaptured(PotentialCopies[Idx++]);
- AAAlign::StateType &S = getState();
+ AANoCapture::StateType &S = getState();
auto Assumed = S.getAssumed();
S.intersectAssumedBits(T.getAssumed());
+ if (!isAssumedNoCaptureMaybeReturned())
+ return indicatePessimisticFixpoint();
return Assumed == S.getAssumed() ? ChangeStatus::UNCHANGED
: ChangeStatus::CHANGED;
}
@@ -3257,6 +4037,14 @@ struct AANoCaptureArgument final : AANoCaptureImpl {
struct AANoCaptureCallSiteArgument final : AANoCaptureImpl {
AANoCaptureCallSiteArgument(const IRPosition &IRP) : AANoCaptureImpl(IRP) {}
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ if (Argument *Arg = getAssociatedArgument())
+ if (Arg->hasByValAttr())
+ indicateOptimisticFixpoint();
+ AANoCaptureImpl::initialize(A);
+ }
+
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
// TODO: Once we have call site specific value information we can provide
@@ -3375,6 +4163,28 @@ struct AAValueSimplifyImpl : AAValueSimplify {
return true;
}
+ bool askSimplifiedValueForAAValueConstantRange(Attributor &A) {
+ if (!getAssociatedValue().getType()->isIntegerTy())
+ return false;
+
+ const auto &ValueConstantRangeAA =
+ A.getAAFor<AAValueConstantRange>(*this, getIRPosition());
+
+ Optional<ConstantInt *> COpt =
+ ValueConstantRangeAA.getAssumedConstantInt(A);
+ if (COpt.hasValue()) {
+ if (auto *C = COpt.getValue())
+ SimplifiedAssociatedValue = C;
+ else
+ return false;
+ } else {
+ // FIXME: It should be llvm::None but if you set llvm::None,
+ // values are mistakenly infered as `undef` now.
+ SimplifiedAssociatedValue = &getAssociatedValue();
+ }
+ return true;
+ }
+
/// See AbstractAttribute::manifest(...).
ChangeStatus manifest(Attributor &A) override {
ChangeStatus Changed = ChangeStatus::UNCHANGED;
@@ -3389,7 +4199,7 @@ struct AAValueSimplifyImpl : AAValueSimplify {
if (!V.user_empty() && &V != C && V.getType() == C->getType()) {
LLVM_DEBUG(dbgs() << "[Attributor][ValueSimplify] " << V << " -> " << *C
<< "\n");
- V.replaceAllUsesWith(C);
+ A.changeValueAfterManifest(V, *C);
Changed = ChangeStatus::CHANGED;
}
}
@@ -3397,6 +4207,15 @@ struct AAValueSimplifyImpl : AAValueSimplify {
return Changed | AAValueSimplify::manifest(A);
}
+ /// See AbstractState::indicatePessimisticFixpoint(...).
+ ChangeStatus indicatePessimisticFixpoint() override {
+ // NOTE: Associated value will be returned in a pessimistic fixpoint and is
+ // regarded as known. That's why`indicateOptimisticFixpoint` is called.
+ SimplifiedAssociatedValue = &getAssociatedValue();
+ indicateOptimisticFixpoint();
+ return ChangeStatus::CHANGED;
+ }
+
protected:
// An assumed simplified value. Initially, it is set to Optional::None, which
// means that the value is not clear under current assumption. If in the
@@ -3408,20 +4227,49 @@ protected:
struct AAValueSimplifyArgument final : AAValueSimplifyImpl {
AAValueSimplifyArgument(const IRPosition &IRP) : AAValueSimplifyImpl(IRP) {}
+ void initialize(Attributor &A) override {
+ AAValueSimplifyImpl::initialize(A);
+ if (!getAssociatedFunction() || getAssociatedFunction()->isDeclaration())
+ indicatePessimisticFixpoint();
+ if (hasAttr({Attribute::InAlloca, Attribute::StructRet, Attribute::Nest},
+ /* IgnoreSubsumingPositions */ true))
+ indicatePessimisticFixpoint();
+ }
+
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
+ // Byval is only replacable if it is readonly otherwise we would write into
+ // the replaced value and not the copy that byval creates implicitly.
+ Argument *Arg = getAssociatedArgument();
+ if (Arg->hasByValAttr()) {
+ const auto &MemAA = A.getAAFor<AAMemoryBehavior>(*this, getIRPosition());
+ if (!MemAA.isAssumedReadOnly())
+ return indicatePessimisticFixpoint();
+ }
+
bool HasValueBefore = SimplifiedAssociatedValue.hasValue();
auto PredForCallSite = [&](AbstractCallSite ACS) {
// Check if we have an associated argument or not (which can happen for
// callback calls).
- if (Value *ArgOp = ACS.getCallArgOperand(getArgNo()))
- return checkAndUpdate(A, *this, *ArgOp, SimplifiedAssociatedValue);
- return false;
+ Value *ArgOp = ACS.getCallArgOperand(getArgNo());
+ if (!ArgOp)
+ return false;
+ // We can only propagate thread independent values through callbacks.
+ // This is different to direct/indirect call sites because for them we
+ // know the thread executing the caller and callee is the same. For
+ // callbacks this is not guaranteed, thus a thread dependent value could
+ // be different for the caller and callee, making it invalid to propagate.
+ if (ACS.isCallbackCall())
+ if (auto *C = dyn_cast<Constant>(ArgOp))
+ if (C->isThreadDependent())
+ return false;
+ return checkAndUpdate(A, *this, *ArgOp, SimplifiedAssociatedValue);
};
if (!A.checkForAllCallSites(PredForCallSite, *this, true))
- return indicatePessimisticFixpoint();
+ if (!askSimplifiedValueForAAValueConstantRange(A))
+ return indicatePessimisticFixpoint();
// If a candicate was found in this update, return CHANGED.
return HasValueBefore == SimplifiedAssociatedValue.hasValue()
@@ -3447,7 +4295,8 @@ struct AAValueSimplifyReturned : AAValueSimplifyImpl {
};
if (!A.checkForAllReturnedValues(PredForReturned, *this))
- return indicatePessimisticFixpoint();
+ if (!askSimplifiedValueForAAValueConstantRange(A))
+ return indicatePessimisticFixpoint();
// If a candicate was found in this update, return CHANGED.
return HasValueBefore == SimplifiedAssociatedValue.hasValue()
@@ -3468,7 +4317,7 @@ struct AAValueSimplifyFloating : AAValueSimplifyImpl {
Value &V = getAnchorValue();
// TODO: add other stuffs
- if (isa<Constant>(V) || isa<UndefValue>(V))
+ if (isa<Constant>(V))
indicatePessimisticFixpoint();
}
@@ -3480,10 +4329,10 @@ struct AAValueSimplifyFloating : AAValueSimplifyImpl {
auto &AA = A.getAAFor<AAValueSimplify>(*this, IRPosition::value(V));
if (!Stripped && this == &AA) {
// TODO: Look the instruction and check recursively.
+
LLVM_DEBUG(
dbgs() << "[Attributor][ValueSimplify] Can't be stripped more : "
<< V << "\n");
- indicatePessimisticFixpoint();
return false;
}
return checkAndUpdate(A, *this, V, SimplifiedAssociatedValue);
@@ -3492,7 +4341,8 @@ struct AAValueSimplifyFloating : AAValueSimplifyImpl {
if (!genericValueTraversal<AAValueSimplify, BooleanState>(
A, getIRPosition(), *this, static_cast<BooleanState &>(*this),
VisitValueCB))
- return indicatePessimisticFixpoint();
+ if (!askSimplifiedValueForAAValueConstantRange(A))
+ return indicatePessimisticFixpoint();
// If a candicate was found in this update, return CHANGED.
@@ -3601,7 +4451,7 @@ struct AAHeapToStackImpl : public AAHeapToStack {
AI = new BitCastInst(AI, MallocCall->getType(), "malloc_bc",
AI->getNextNode());
- MallocCall->replaceAllUsesWith(AI);
+ replaceAllInstructionUsesWith(*MallocCall, *AI);
if (auto *II = dyn_cast<InvokeInst>(MallocCall)) {
auto *NBB = II->getNormalDest();
@@ -3645,76 +4495,80 @@ ChangeStatus AAHeapToStackImpl::updateImpl(Attributor &A) {
const Function *F = getAssociatedFunction();
const auto *TLI = A.getInfoCache().getTargetLibraryInfoForFunction(*F);
- auto UsesCheck = [&](Instruction &I) {
- SmallPtrSet<const Use *, 8> Visited;
- SmallVector<const Use *, 8> Worklist;
-
- for (Use &U : I.uses())
- Worklist.push_back(&U);
+ MustBeExecutedContextExplorer &Explorer =
+ A.getInfoCache().getMustBeExecutedContextExplorer();
- while (!Worklist.empty()) {
- const Use *U = Worklist.pop_back_val();
- if (!Visited.insert(U).second)
- continue;
-
- auto *UserI = U->getUser();
+ auto FreeCheck = [&](Instruction &I) {
+ const auto &Frees = FreesForMalloc.lookup(&I);
+ if (Frees.size() != 1)
+ return false;
+ Instruction *UniqueFree = *Frees.begin();
+ return Explorer.findInContextOf(UniqueFree, I.getNextNode());
+ };
+ auto UsesCheck = [&](Instruction &I) {
+ bool ValidUsesOnly = true;
+ bool MustUse = true;
+ auto Pred = [&](const Use &U, bool &Follow) -> bool {
+ Instruction *UserI = cast<Instruction>(U.getUser());
if (isa<LoadInst>(UserI))
- continue;
+ return true;
if (auto *SI = dyn_cast<StoreInst>(UserI)) {
- if (SI->getValueOperand() == U->get()) {
- LLVM_DEBUG(dbgs() << "[H2S] escaping store to memory: " << *UserI << "\n");
- return false;
+ if (SI->getValueOperand() == U.get()) {
+ LLVM_DEBUG(dbgs()
+ << "[H2S] escaping store to memory: " << *UserI << "\n");
+ ValidUsesOnly = false;
+ } else {
+ // A store into the malloc'ed memory is fine.
}
- // A store into the malloc'ed memory is fine.
- continue;
+ return true;
}
-
- // NOTE: Right now, if a function that has malloc pointer as an argument
- // frees memory, we assume that the malloc pointer is freed.
-
- // TODO: Add nofree callsite argument attribute to indicate that pointer
- // argument is not freed.
if (auto *CB = dyn_cast<CallBase>(UserI)) {
- if (!CB->isArgOperand(U))
- continue;
-
- if (CB->isLifetimeStartOrEnd())
- continue;
-
+ if (!CB->isArgOperand(&U) || CB->isLifetimeStartOrEnd())
+ return true;
// Record malloc.
if (isFreeCall(UserI, TLI)) {
- FreesForMalloc[&I].insert(
- cast<Instruction>(const_cast<User *>(UserI)));
- continue;
+ if (MustUse) {
+ FreesForMalloc[&I].insert(UserI);
+ } else {
+ LLVM_DEBUG(dbgs() << "[H2S] free potentially on different mallocs: "
+ << *UserI << "\n");
+ ValidUsesOnly = false;
+ }
+ return true;
}
- // If a function does not free memory we are fine
- const auto &NoFreeAA =
- A.getAAFor<AANoFree>(*this, IRPosition::callsite_function(*CB));
+ unsigned ArgNo = CB->getArgOperandNo(&U);
- unsigned ArgNo = U - CB->arg_begin();
const auto &NoCaptureAA = A.getAAFor<AANoCapture>(
*this, IRPosition::callsite_argument(*CB, ArgNo));
- if (!NoCaptureAA.isAssumedNoCapture() || !NoFreeAA.isAssumedNoFree()) {
+ // If a callsite argument use is nofree, we are fine.
+ const auto &ArgNoFreeAA = A.getAAFor<AANoFree>(
+ *this, IRPosition::callsite_argument(*CB, ArgNo));
+
+ if (!NoCaptureAA.isAssumedNoCapture() ||
+ !ArgNoFreeAA.isAssumedNoFree()) {
LLVM_DEBUG(dbgs() << "[H2S] Bad user: " << *UserI << "\n");
- return false;
+ ValidUsesOnly = false;
}
- continue;
+ return true;
}
- if (isa<GetElementPtrInst>(UserI) || isa<BitCastInst>(UserI)) {
- for (Use &U : UserI->uses())
- Worklist.push_back(&U);
- continue;
+ if (isa<GetElementPtrInst>(UserI) || isa<BitCastInst>(UserI) ||
+ isa<PHINode>(UserI) || isa<SelectInst>(UserI)) {
+ MustUse &= !(isa<PHINode>(UserI) || isa<SelectInst>(UserI));
+ Follow = true;
+ return true;
}
-
- // Unknown user.
+ // Unknown user for which we can not track uses further (in a way that
+ // makes sense).
LLVM_DEBUG(dbgs() << "[H2S] Unknown user: " << *UserI << "\n");
- return false;
- }
- return true;
+ ValidUsesOnly = false;
+ return true;
+ };
+ A.checkForAllUses(Pred, *this, I);
+ return ValidUsesOnly;
};
auto MallocCallocCheck = [&](Instruction &I) {
@@ -3730,8 +4584,8 @@ ChangeStatus AAHeapToStackImpl::updateImpl(Attributor &A) {
if (IsMalloc) {
if (auto *Size = dyn_cast<ConstantInt>(I.getOperand(0)))
- if (Size->getValue().sle(MaxHeapToStackSize))
- if (UsesCheck(I)) {
+ if (Size->getValue().ule(MaxHeapToStackSize))
+ if (UsesCheck(I) || FreeCheck(I)) {
MallocCalls.insert(&I);
return true;
}
@@ -3740,8 +4594,8 @@ ChangeStatus AAHeapToStackImpl::updateImpl(Attributor &A) {
if (auto *Num = dyn_cast<ConstantInt>(I.getOperand(0)))
if (auto *Size = dyn_cast<ConstantInt>(I.getOperand(1)))
if ((Size->getValue().umul_ov(Num->getValue(), Overflow))
- .sle(MaxHeapToStackSize))
- if (!Overflow && UsesCheck(I)) {
+ .ule(MaxHeapToStackSize))
+ if (!Overflow && (UsesCheck(I) || FreeCheck(I))) {
MallocCalls.insert(&I);
return true;
}
@@ -3767,8 +4621,10 @@ struct AAHeapToStackFunction final : public AAHeapToStackImpl {
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override {
STATS_DECL(MallocCalls, Function,
- "Number of MallocCalls converted to allocas");
- BUILD_STAT_NAME(MallocCalls, Function) += MallocCalls.size();
+ "Number of malloc calls converted to allocas");
+ for (auto *C : MallocCalls)
+ if (!BadMallocCalls.count(C))
+ ++BUILD_STAT_NAME(MallocCalls, Function);
}
};
@@ -3787,9 +4643,10 @@ struct AAMemoryBehaviorImpl : public AAMemoryBehavior {
/// Return the memory behavior information encoded in the IR for \p IRP.
static void getKnownStateFromValue(const IRPosition &IRP,
- IntegerState &State) {
+ BitIntegerState &State,
+ bool IgnoreSubsumingPositions = false) {
SmallVector<Attribute, 2> Attrs;
- IRP.getAttrs(AttrKinds, Attrs);
+ IRP.getAttrs(AttrKinds, Attrs, IgnoreSubsumingPositions);
for (const Attribute &Attr : Attrs) {
switch (Attr.getKindAsEnum()) {
case Attribute::ReadNone:
@@ -3829,7 +4686,7 @@ struct AAMemoryBehaviorImpl : public AAMemoryBehavior {
/// See AbstractAttribute::manifest(...).
ChangeStatus manifest(Attributor &A) override {
- IRPosition &IRP = getIRPosition();
+ const IRPosition &IRP = getIRPosition();
// Check if we would improve the existing attributes first.
SmallVector<Attribute, 4> DeducedAttrs;
@@ -3911,12 +4768,25 @@ struct AAMemoryBehaviorArgument : AAMemoryBehaviorFloating {
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
- AAMemoryBehaviorFloating::initialize(A);
+ intersectAssumedBits(BEST_STATE);
+ const IRPosition &IRP = getIRPosition();
+ // TODO: Make IgnoreSubsumingPositions a property of an IRAttribute so we
+ // can query it when we use has/getAttr. That would allow us to reuse the
+ // initialize of the base class here.
+ bool HasByVal =
+ IRP.hasAttr({Attribute::ByVal}, /* IgnoreSubsumingPositions */ true);
+ getKnownStateFromValue(IRP, getState(),
+ /* IgnoreSubsumingPositions */ HasByVal);
// Initialize the use vector with all direct uses of the associated value.
Argument *Arg = getAssociatedArgument();
- if (!Arg || !Arg->getParent()->hasExactDefinition())
+ if (!Arg || !Arg->getParent()->hasExactDefinition()) {
indicatePessimisticFixpoint();
+ } else {
+ // Initialize the use vector with all direct uses of the associated value.
+ for (const Use &U : Arg->uses())
+ Uses.insert(&U);
+ }
}
ChangeStatus manifest(Attributor &A) override {
@@ -3929,7 +4799,6 @@ struct AAMemoryBehaviorArgument : AAMemoryBehaviorFloating {
return AAMemoryBehaviorFloating::manifest(A);
}
-
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override {
if (isAssumedReadNone())
@@ -3945,6 +4814,19 @@ struct AAMemoryBehaviorCallSiteArgument final : AAMemoryBehaviorArgument {
AAMemoryBehaviorCallSiteArgument(const IRPosition &IRP)
: AAMemoryBehaviorArgument(IRP) {}
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ if (Argument *Arg = getAssociatedArgument()) {
+ if (Arg->hasByValAttr()) {
+ addKnownBits(NO_WRITES);
+ removeKnownBits(NO_READS);
+ removeAssumedBits(NO_READS);
+ }
+ } else {
+ }
+ AAMemoryBehaviorArgument::initialize(A);
+ }
+
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
// TODO: Once we have call site specific value information we can provide
@@ -3956,7 +4838,7 @@ struct AAMemoryBehaviorCallSiteArgument final : AAMemoryBehaviorArgument {
auto &ArgAA = A.getAAFor<AAMemoryBehavior>(*this, ArgPos);
return clampStateAndIndicateChange(
getState(),
- static_cast<const AANoCapture::StateType &>(ArgAA.getState()));
+ static_cast<const AAMemoryBehavior::StateType &>(ArgAA.getState()));
}
/// See AbstractAttribute::trackStatistics()
@@ -4036,7 +4918,8 @@ struct AAMemoryBehaviorCallSite final : AAMemoryBehaviorImpl {
const IRPosition &FnPos = IRPosition::function(*F);
auto &FnAA = A.getAAFor<AAMemoryBehavior>(*this, FnPos);
return clampStateAndIndicateChange(
- getState(), static_cast<const AAAlign::StateType &>(FnAA.getState()));
+ getState(),
+ static_cast<const AAMemoryBehavior::StateType &>(FnAA.getState()));
}
/// See AbstractAttribute::trackStatistics()
@@ -4090,19 +4973,26 @@ ChangeStatus AAMemoryBehaviorFloating::updateImpl(Attributor &A) {
// First, check the function scope. We take the known information and we avoid
// work if the assumed information implies the current assumed information for
- // this attribute.
- const auto &FnMemAA = A.getAAFor<AAMemoryBehavior>(*this, FnPos);
- S.addKnownBits(FnMemAA.getKnown());
- if ((S.getAssumed() & FnMemAA.getAssumed()) == S.getAssumed())
- return ChangeStatus::UNCHANGED;
+ // this attribute. This is a valid for all but byval arguments.
+ Argument *Arg = IRP.getAssociatedArgument();
+ AAMemoryBehavior::base_t FnMemAssumedState =
+ AAMemoryBehavior::StateType::getWorstState();
+ if (!Arg || !Arg->hasByValAttr()) {
+ const auto &FnMemAA = A.getAAFor<AAMemoryBehavior>(*this, FnPos);
+ FnMemAssumedState = FnMemAA.getAssumed();
+ S.addKnownBits(FnMemAA.getKnown());
+ if ((S.getAssumed() & FnMemAA.getAssumed()) == S.getAssumed())
+ return ChangeStatus::UNCHANGED;
+ }
// Make sure the value is not captured (except through "return"), if
// it is, any information derived would be irrelevant anyway as we cannot
// check the potential aliases introduced by the capture. However, no need
// to fall back to anythign less optimistic than the function state.
- const auto &ArgNoCaptureAA = A.getAAFor<AANoCapture>(*this, IRP);
+ const auto &ArgNoCaptureAA = A.getAAFor<AANoCapture>(
+ *this, IRP, /* TrackDependence */ true, DepClassTy::OPTIONAL);
if (!ArgNoCaptureAA.isAssumedNoCaptureMaybeReturned()) {
- S.intersectAssumedBits(FnMemAA.getAssumed());
+ S.intersectAssumedBits(FnMemAssumedState);
return ChangeStatus::CHANGED;
}
@@ -4223,7 +5113,451 @@ void AAMemoryBehaviorFloating::analyzeUseIn(Attributor &A, const Use *U,
if (UserI->mayWriteToMemory())
removeAssumedBits(NO_WRITES);
}
+/// ------------------ Value Constant Range Attribute -------------------------
+
+struct AAValueConstantRangeImpl : AAValueConstantRange {
+ using StateType = IntegerRangeState;
+ AAValueConstantRangeImpl(const IRPosition &IRP) : AAValueConstantRange(IRP) {}
+
+ /// See AbstractAttribute::getAsStr().
+ const std::string getAsStr() const override {
+ std::string Str;
+ llvm::raw_string_ostream OS(Str);
+ OS << "range(" << getBitWidth() << ")<";
+ getKnown().print(OS);
+ OS << " / ";
+ getAssumed().print(OS);
+ OS << ">";
+ return OS.str();
+ }
+
+ /// Helper function to get a SCEV expr for the associated value at program
+ /// point \p I.
+ const SCEV *getSCEV(Attributor &A, const Instruction *I = nullptr) const {
+ if (!getAnchorScope())
+ return nullptr;
+
+ ScalarEvolution *SE =
+ A.getInfoCache().getAnalysisResultForFunction<ScalarEvolutionAnalysis>(
+ *getAnchorScope());
+
+ LoopInfo *LI = A.getInfoCache().getAnalysisResultForFunction<LoopAnalysis>(
+ *getAnchorScope());
+
+ if (!SE || !LI)
+ return nullptr;
+
+ const SCEV *S = SE->getSCEV(&getAssociatedValue());
+ if (!I)
+ return S;
+
+ return SE->getSCEVAtScope(S, LI->getLoopFor(I->getParent()));
+ }
+
+ /// Helper function to get a range from SCEV for the associated value at
+ /// program point \p I.
+ ConstantRange getConstantRangeFromSCEV(Attributor &A,
+ const Instruction *I = nullptr) const {
+ if (!getAnchorScope())
+ return getWorstState(getBitWidth());
+
+ ScalarEvolution *SE =
+ A.getInfoCache().getAnalysisResultForFunction<ScalarEvolutionAnalysis>(
+ *getAnchorScope());
+
+ const SCEV *S = getSCEV(A, I);
+ if (!SE || !S)
+ return getWorstState(getBitWidth());
+
+ return SE->getUnsignedRange(S);
+ }
+
+ /// Helper function to get a range from LVI for the associated value at
+ /// program point \p I.
+ ConstantRange
+ getConstantRangeFromLVI(Attributor &A,
+ const Instruction *CtxI = nullptr) const {
+ if (!getAnchorScope())
+ return getWorstState(getBitWidth());
+
+ LazyValueInfo *LVI =
+ A.getInfoCache().getAnalysisResultForFunction<LazyValueAnalysis>(
+ *getAnchorScope());
+
+ if (!LVI || !CtxI)
+ return getWorstState(getBitWidth());
+ return LVI->getConstantRange(&getAssociatedValue(),
+ const_cast<BasicBlock *>(CtxI->getParent()),
+ const_cast<Instruction *>(CtxI));
+ }
+
+ /// See AAValueConstantRange::getKnownConstantRange(..).
+ ConstantRange
+ getKnownConstantRange(Attributor &A,
+ const Instruction *CtxI = nullptr) const override {
+ if (!CtxI || CtxI == getCtxI())
+ return getKnown();
+
+ ConstantRange LVIR = getConstantRangeFromLVI(A, CtxI);
+ ConstantRange SCEVR = getConstantRangeFromSCEV(A, CtxI);
+ return getKnown().intersectWith(SCEVR).intersectWith(LVIR);
+ }
+
+ /// See AAValueConstantRange::getAssumedConstantRange(..).
+ ConstantRange
+ getAssumedConstantRange(Attributor &A,
+ const Instruction *CtxI = nullptr) const override {
+ // TODO: Make SCEV use Attributor assumption.
+ // We may be able to bound a variable range via assumptions in
+ // Attributor. ex.) If x is assumed to be in [1, 3] and y is known to
+ // evolve to x^2 + x, then we can say that y is in [2, 12].
+
+ if (!CtxI || CtxI == getCtxI())
+ return getAssumed();
+
+ ConstantRange LVIR = getConstantRangeFromLVI(A, CtxI);
+ ConstantRange SCEVR = getConstantRangeFromSCEV(A, CtxI);
+ return getAssumed().intersectWith(SCEVR).intersectWith(LVIR);
+ }
+
+ /// See AbstractAttribute::initialize(..).
+ void initialize(Attributor &A) override {
+ // Intersect a range given by SCEV.
+ intersectKnown(getConstantRangeFromSCEV(A, getCtxI()));
+
+ // Intersect a range given by LVI.
+ intersectKnown(getConstantRangeFromLVI(A, getCtxI()));
+ }
+
+ /// Helper function to create MDNode for range metadata.
+ static MDNode *
+ getMDNodeForConstantRange(Type *Ty, LLVMContext &Ctx,
+ const ConstantRange &AssumedConstantRange) {
+ Metadata *LowAndHigh[] = {ConstantAsMetadata::get(ConstantInt::get(
+ Ty, AssumedConstantRange.getLower())),
+ ConstantAsMetadata::get(ConstantInt::get(
+ Ty, AssumedConstantRange.getUpper()))};
+ return MDNode::get(Ctx, LowAndHigh);
+ }
+
+ /// Return true if \p Assumed is included in \p KnownRanges.
+ static bool isBetterRange(const ConstantRange &Assumed, MDNode *KnownRanges) {
+
+ if (Assumed.isFullSet())
+ return false;
+
+ if (!KnownRanges)
+ return true;
+
+ // If multiple ranges are annotated in IR, we give up to annotate assumed
+ // range for now.
+
+ // TODO: If there exists a known range which containts assumed range, we
+ // can say assumed range is better.
+ if (KnownRanges->getNumOperands() > 2)
+ return false;
+
+ ConstantInt *Lower =
+ mdconst::extract<ConstantInt>(KnownRanges->getOperand(0));
+ ConstantInt *Upper =
+ mdconst::extract<ConstantInt>(KnownRanges->getOperand(1));
+
+ ConstantRange Known(Lower->getValue(), Upper->getValue());
+ return Known.contains(Assumed) && Known != Assumed;
+ }
+
+ /// Helper function to set range metadata.
+ static bool
+ setRangeMetadataIfisBetterRange(Instruction *I,
+ const ConstantRange &AssumedConstantRange) {
+ auto *OldRangeMD = I->getMetadata(LLVMContext::MD_range);
+ if (isBetterRange(AssumedConstantRange, OldRangeMD)) {
+ if (!AssumedConstantRange.isEmptySet()) {
+ I->setMetadata(LLVMContext::MD_range,
+ getMDNodeForConstantRange(I->getType(), I->getContext(),
+ AssumedConstantRange));
+ return true;
+ }
+ }
+ return false;
+ }
+
+ /// See AbstractAttribute::manifest()
+ ChangeStatus manifest(Attributor &A) override {
+ ChangeStatus Changed = ChangeStatus::UNCHANGED;
+ ConstantRange AssumedConstantRange = getAssumedConstantRange(A);
+ assert(!AssumedConstantRange.isFullSet() && "Invalid state");
+
+ auto &V = getAssociatedValue();
+ if (!AssumedConstantRange.isEmptySet() &&
+ !AssumedConstantRange.isSingleElement()) {
+ if (Instruction *I = dyn_cast<Instruction>(&V))
+ if (isa<CallInst>(I) || isa<LoadInst>(I))
+ if (setRangeMetadataIfisBetterRange(I, AssumedConstantRange))
+ Changed = ChangeStatus::CHANGED;
+ }
+
+ return Changed;
+ }
+};
+
+struct AAValueConstantRangeArgument final : public AAValueConstantRangeImpl {
+
+ AAValueConstantRangeArgument(const IRPosition &IRP)
+ : AAValueConstantRangeImpl(IRP) {}
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ // TODO: Use AAArgumentFromCallSiteArguments
+
+ IntegerRangeState S(getBitWidth());
+ clampCallSiteArgumentStates<AAValueConstantRange, IntegerRangeState>(
+ A, *this, S);
+
+ // TODO: If we know we visited all incoming values, thus no are assumed
+ // dead, we can take the known information from the state T.
+ return clampStateAndIndicateChange<IntegerRangeState>(this->getState(), S);
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ STATS_DECLTRACK_ARG_ATTR(value_range)
+ }
+};
+
+struct AAValueConstantRangeReturned : AAValueConstantRangeImpl {
+ AAValueConstantRangeReturned(const IRPosition &IRP)
+ : AAValueConstantRangeImpl(IRP) {}
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ // TODO: Use AAReturnedFromReturnedValues
+
+ // TODO: If we know we visited all returned values, thus no are assumed
+ // dead, we can take the known information from the state T.
+
+ IntegerRangeState S(getBitWidth());
+
+ clampReturnedValueStates<AAValueConstantRange, IntegerRangeState>(A, *this,
+ S);
+ return clampStateAndIndicateChange<StateType>(this->getState(), S);
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ STATS_DECLTRACK_FNRET_ATTR(value_range)
+ }
+};
+
+struct AAValueConstantRangeFloating : AAValueConstantRangeImpl {
+ AAValueConstantRangeFloating(const IRPosition &IRP)
+ : AAValueConstantRangeImpl(IRP) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ AAValueConstantRange::initialize(A);
+ Value &V = getAssociatedValue();
+
+ if (auto *C = dyn_cast<ConstantInt>(&V)) {
+ unionAssumed(ConstantRange(C->getValue()));
+ indicateOptimisticFixpoint();
+ return;
+ }
+
+ if (isa<UndefValue>(&V)) {
+ indicateOptimisticFixpoint();
+ return;
+ }
+
+ if (auto *I = dyn_cast<Instruction>(&V))
+ if (isa<BinaryOperator>(I) || isa<CmpInst>(I)) {
+ Value *LHS = I->getOperand(0);
+ Value *RHS = I->getOperand(1);
+
+ if (LHS->getType()->isIntegerTy() && RHS->getType()->isIntegerTy())
+ return;
+ }
+
+ // If it is a load instruction with range metadata, use it.
+ if (LoadInst *LI = dyn_cast<LoadInst>(&V))
+ if (auto *RangeMD = LI->getMetadata(LLVMContext::MD_range)) {
+ intersectKnown(getConstantRangeFromMetadata(*RangeMD));
+ return;
+ }
+
+ // Otherwise we give up.
+ indicatePessimisticFixpoint();
+
+ LLVM_DEBUG(dbgs() << "[Attributor][AAValueConstantRange] We give up: "
+ << getAssociatedValue());
+ }
+
+ bool calculateBinaryOperator(Attributor &A, BinaryOperator *BinOp,
+ IntegerRangeState &T, Instruction *CtxI) {
+ Value *LHS = BinOp->getOperand(0);
+ Value *RHS = BinOp->getOperand(1);
+
+ auto &LHSAA =
+ A.getAAFor<AAValueConstantRange>(*this, IRPosition::value(*LHS));
+ auto LHSAARange = LHSAA.getAssumedConstantRange(A, CtxI);
+
+ auto &RHSAA =
+ A.getAAFor<AAValueConstantRange>(*this, IRPosition::value(*RHS));
+ auto RHSAARange = RHSAA.getAssumedConstantRange(A, CtxI);
+
+ auto AssumedRange = LHSAARange.binaryOp(BinOp->getOpcode(), RHSAARange);
+
+ T.unionAssumed(AssumedRange);
+
+ // TODO: Track a known state too.
+
+ return T.isValidState();
+ }
+
+ bool calculateCmpInst(Attributor &A, CmpInst *CmpI, IntegerRangeState &T,
+ Instruction *CtxI) {
+ Value *LHS = CmpI->getOperand(0);
+ Value *RHS = CmpI->getOperand(1);
+
+ auto &LHSAA =
+ A.getAAFor<AAValueConstantRange>(*this, IRPosition::value(*LHS));
+ auto &RHSAA =
+ A.getAAFor<AAValueConstantRange>(*this, IRPosition::value(*RHS));
+
+ auto LHSAARange = LHSAA.getAssumedConstantRange(A, CtxI);
+ auto RHSAARange = RHSAA.getAssumedConstantRange(A, CtxI);
+
+ // If one of them is empty set, we can't decide.
+ if (LHSAARange.isEmptySet() || RHSAARange.isEmptySet())
+ return true;
+
+ bool MustTrue = false, MustFalse = false;
+
+ auto AllowedRegion =
+ ConstantRange::makeAllowedICmpRegion(CmpI->getPredicate(), RHSAARange);
+
+ auto SatisfyingRegion = ConstantRange::makeSatisfyingICmpRegion(
+ CmpI->getPredicate(), RHSAARange);
+
+ if (AllowedRegion.intersectWith(LHSAARange).isEmptySet())
+ MustFalse = true;
+
+ if (SatisfyingRegion.contains(LHSAARange))
+ MustTrue = true;
+
+ assert((!MustTrue || !MustFalse) &&
+ "Either MustTrue or MustFalse should be false!");
+
+ if (MustTrue)
+ T.unionAssumed(ConstantRange(APInt(/* numBits */ 1, /* val */ 1)));
+ else if (MustFalse)
+ T.unionAssumed(ConstantRange(APInt(/* numBits */ 1, /* val */ 0)));
+ else
+ T.unionAssumed(ConstantRange(/* BitWidth */ 1, /* isFullSet */ true));
+
+ LLVM_DEBUG(dbgs() << "[AAValueConstantRange] " << *CmpI << " " << LHSAA
+ << " " << RHSAA << "\n");
+
+ // TODO: Track a known state too.
+ return T.isValidState();
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ Instruction *CtxI = getCtxI();
+ auto VisitValueCB = [&](Value &V, IntegerRangeState &T,
+ bool Stripped) -> bool {
+ Instruction *I = dyn_cast<Instruction>(&V);
+ if (!I) {
+
+ // If the value is not instruction, we query AA to Attributor.
+ const auto &AA =
+ A.getAAFor<AAValueConstantRange>(*this, IRPosition::value(V));
+
+ // Clamp operator is not used to utilize a program point CtxI.
+ T.unionAssumed(AA.getAssumedConstantRange(A, CtxI));
+
+ return T.isValidState();
+ }
+
+ if (auto *BinOp = dyn_cast<BinaryOperator>(I))
+ return calculateBinaryOperator(A, BinOp, T, CtxI);
+ else if (auto *CmpI = dyn_cast<CmpInst>(I))
+ return calculateCmpInst(A, CmpI, T, CtxI);
+ else {
+ // Give up with other instructions.
+ // TODO: Add other instructions
+ T.indicatePessimisticFixpoint();
+ return false;
+ }
+ };
+
+ IntegerRangeState T(getBitWidth());
+
+ if (!genericValueTraversal<AAValueConstantRange, IntegerRangeState>(
+ A, getIRPosition(), *this, T, VisitValueCB))
+ return indicatePessimisticFixpoint();
+
+ return clampStateAndIndicateChange(getState(), T);
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ STATS_DECLTRACK_FLOATING_ATTR(value_range)
+ }
+};
+
+struct AAValueConstantRangeFunction : AAValueConstantRangeImpl {
+ AAValueConstantRangeFunction(const IRPosition &IRP)
+ : AAValueConstantRangeImpl(IRP) {}
+
+ /// See AbstractAttribute::initialize(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ llvm_unreachable("AAValueConstantRange(Function|CallSite)::updateImpl will "
+ "not be called");
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_FN_ATTR(value_range) }
+};
+
+struct AAValueConstantRangeCallSite : AAValueConstantRangeFunction {
+ AAValueConstantRangeCallSite(const IRPosition &IRP)
+ : AAValueConstantRangeFunction(IRP) {}
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(value_range) }
+};
+
+struct AAValueConstantRangeCallSiteReturned : AAValueConstantRangeReturned {
+ AAValueConstantRangeCallSiteReturned(const IRPosition &IRP)
+ : AAValueConstantRangeReturned(IRP) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ // If it is a load instruction with range metadata, use the metadata.
+ if (CallInst *CI = dyn_cast<CallInst>(&getAssociatedValue()))
+ if (auto *RangeMD = CI->getMetadata(LLVMContext::MD_range))
+ intersectKnown(getConstantRangeFromMetadata(*RangeMD));
+
+ AAValueConstantRangeReturned::initialize(A);
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ STATS_DECLTRACK_CSRET_ATTR(value_range)
+ }
+};
+struct AAValueConstantRangeCallSiteArgument : AAValueConstantRangeFloating {
+ AAValueConstantRangeCallSiteArgument(const IRPosition &IRP)
+ : AAValueConstantRangeFloating(IRP) {}
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ STATS_DECLTRACK_CSARG_ATTR(value_range)
+ }
+};
/// ----------------------------------------------------------------------------
/// Attributor
/// ----------------------------------------------------------------------------
@@ -4234,6 +5568,8 @@ bool Attributor::isAssumedDead(const AbstractAttribute &AA,
if (!CtxI)
return false;
+ // TODO: Find a good way to utilize fine and coarse grained liveness
+ // information.
if (!LivenessAA)
LivenessAA =
&getAAFor<AAIsDead>(AA, IRPosition::function(*CtxI->getFunction()),
@@ -4247,7 +5583,58 @@ bool Attributor::isAssumedDead(const AbstractAttribute &AA,
return false;
// We actually used liveness information so we have to record a dependence.
- recordDependence(*LivenessAA, AA);
+ recordDependence(*LivenessAA, AA, DepClassTy::OPTIONAL);
+
+ return true;
+}
+
+bool Attributor::checkForAllUses(
+ const function_ref<bool(const Use &, bool &)> &Pred,
+ const AbstractAttribute &QueryingAA, const Value &V) {
+ const IRPosition &IRP = QueryingAA.getIRPosition();
+ SmallVector<const Use *, 16> Worklist;
+ SmallPtrSet<const Use *, 16> Visited;
+
+ for (const Use &U : V.uses())
+ Worklist.push_back(&U);
+
+ LLVM_DEBUG(dbgs() << "[Attributor] Got " << Worklist.size()
+ << " initial uses to check\n");
+
+ if (Worklist.empty())
+ return true;
+
+ bool AnyDead = false;
+ const Function *ScopeFn = IRP.getAnchorScope();
+ const auto *LivenessAA =
+ ScopeFn ? &getAAFor<AAIsDead>(QueryingAA, IRPosition::function(*ScopeFn),
+ /* TrackDependence */ false)
+ : nullptr;
+
+ while (!Worklist.empty()) {
+ const Use *U = Worklist.pop_back_val();
+ if (!Visited.insert(U).second)
+ continue;
+ LLVM_DEBUG(dbgs() << "[Attributor] Check use: " << **U << "\n");
+ if (Instruction *UserI = dyn_cast<Instruction>(U->getUser()))
+ if (LivenessAA && LivenessAA->isAssumedDead(UserI)) {
+ LLVM_DEBUG(dbgs() << "[Attributor] Dead user: " << *UserI << ": "
+ << *LivenessAA << "\n");
+ AnyDead = true;
+ continue;
+ }
+
+ bool Follow = false;
+ if (!Pred(*U, Follow))
+ return false;
+ if (!Follow)
+ continue;
+ for (const Use &UU : U->getUser()->uses())
+ Worklist.push_back(&UU);
+ }
+
+ if (AnyDead)
+ recordDependence(*LivenessAA, QueryingAA, DepClassTy::OPTIONAL);
return true;
}
@@ -4284,10 +5671,12 @@ bool Attributor::checkForAllCallSites(
for (const Use &U : Fn.uses()) {
AbstractCallSite ACS(&U);
if (!ACS) {
- LLVM_DEBUG(dbgs() << "[Attributor] Function "
- << Fn.getName()
+ LLVM_DEBUG(dbgs() << "[Attributor] Function " << Fn.getName()
<< " has non call site use " << *U.get() << " in "
<< *U.getUser() << "\n");
+ // BlockAddress users are allowed.
+ if (isa<BlockAddress>(U.getUser()))
+ continue;
return false;
}
@@ -4296,14 +5685,14 @@ bool Attributor::checkForAllCallSites(
const auto *LivenessAA =
lookupAAFor<AAIsDead>(IRPosition::function(*Caller), QueryingAA,
- /* TrackDependence */ false);
+ /* TrackDependence */ false);
// Skip dead calls.
if (LivenessAA && LivenessAA->isAssumedDead(I)) {
// We actually used liveness information so we have to record a
// dependence.
if (QueryingAA)
- recordDependence(*LivenessAA, *QueryingAA);
+ recordDependence(*LivenessAA, *QueryingAA, DepClassTy::OPTIONAL);
continue;
}
@@ -4313,8 +5702,7 @@ bool Attributor::checkForAllCallSites(
if (!RequireAllCallSites)
continue;
LLVM_DEBUG(dbgs() << "[Attributor] User " << EffectiveUse->getUser()
- << " is an invalid use of "
- << Fn.getName() << "\n");
+ << " is an invalid use of " << Fn.getName() << "\n");
return false;
}
@@ -4417,7 +5805,7 @@ bool Attributor::checkForAllInstructions(
// If we actually used liveness information so we have to record a dependence.
if (AnyDead)
- recordDependence(LivenessAA, QueryingAA);
+ recordDependence(LivenessAA, QueryingAA, DepClassTy::OPTIONAL);
return true;
}
@@ -4451,7 +5839,7 @@ bool Attributor::checkForAllReadWriteInstructions(
// If we actually used liveness information so we have to record a dependence.
if (AnyDead)
- recordDependence(LivenessAA, QueryingAA);
+ recordDependence(LivenessAA, QueryingAA, DepClassTy::OPTIONAL);
return true;
}
@@ -4467,7 +5855,7 @@ ChangeStatus Attributor::run(Module &M) {
unsigned IterationCounter = 1;
SmallVector<AbstractAttribute *, 64> ChangedAAs;
- SetVector<AbstractAttribute *> Worklist;
+ SetVector<AbstractAttribute *> Worklist, InvalidAAs;
Worklist.insert(AllAbstractAttributes.begin(), AllAbstractAttributes.end());
bool RecomputeDependences = false;
@@ -4478,6 +5866,29 @@ ChangeStatus Attributor::run(Module &M) {
LLVM_DEBUG(dbgs() << "\n\n[Attributor] #Iteration: " << IterationCounter
<< ", Worklist size: " << Worklist.size() << "\n");
+ // For invalid AAs we can fix dependent AAs that have a required dependence,
+ // thereby folding long dependence chains in a single step without the need
+ // to run updates.
+ for (unsigned u = 0; u < InvalidAAs.size(); ++u) {
+ AbstractAttribute *InvalidAA = InvalidAAs[u];
+ auto &QuerriedAAs = QueryMap[InvalidAA];
+ LLVM_DEBUG(dbgs() << "[Attributor] InvalidAA: " << *InvalidAA << " has "
+ << QuerriedAAs.RequiredAAs.size() << "/"
+ << QuerriedAAs.OptionalAAs.size()
+ << " required/optional dependences\n");
+ for (AbstractAttribute *DepOnInvalidAA : QuerriedAAs.RequiredAAs) {
+ AbstractState &DOIAAState = DepOnInvalidAA->getState();
+ DOIAAState.indicatePessimisticFixpoint();
+ ++NumAttributesFixedDueToRequiredDependences;
+ assert(DOIAAState.isAtFixpoint() && "Expected fixpoint state!");
+ if (!DOIAAState.isValidState())
+ InvalidAAs.insert(DepOnInvalidAA);
+ }
+ if (!RecomputeDependences)
+ Worklist.insert(QuerriedAAs.OptionalAAs.begin(),
+ QuerriedAAs.OptionalAAs.end());
+ }
+
// If dependences (=QueryMap) are recomputed we have to look at all abstract
// attributes again, regardless of what changed in the last iteration.
if (RecomputeDependences) {
@@ -4493,22 +5904,35 @@ ChangeStatus Attributor::run(Module &M) {
// changed to the work list.
for (AbstractAttribute *ChangedAA : ChangedAAs) {
auto &QuerriedAAs = QueryMap[ChangedAA];
- Worklist.insert(QuerriedAAs.begin(), QuerriedAAs.end());
+ Worklist.insert(QuerriedAAs.OptionalAAs.begin(),
+ QuerriedAAs.OptionalAAs.end());
+ Worklist.insert(QuerriedAAs.RequiredAAs.begin(),
+ QuerriedAAs.RequiredAAs.end());
}
LLVM_DEBUG(dbgs() << "[Attributor] #Iteration: " << IterationCounter
<< ", Worklist+Dependent size: " << Worklist.size()
<< "\n");
- // Reset the changed set.
+ // Reset the changed and invalid set.
ChangedAAs.clear();
+ InvalidAAs.clear();
// Update all abstract attribute in the work list and record the ones that
// changed.
for (AbstractAttribute *AA : Worklist)
- if (!isAssumedDead(*AA, nullptr))
- if (AA->update(*this) == ChangeStatus::CHANGED)
+ if (!AA->getState().isAtFixpoint() && !isAssumedDead(*AA, nullptr)) {
+ QueriedNonFixAA = false;
+ if (AA->update(*this) == ChangeStatus::CHANGED) {
ChangedAAs.push_back(AA);
+ if (!AA->getState().isValidState())
+ InvalidAAs.insert(AA);
+ } else if (!QueriedNonFixAA) {
+ // If the attribute did not query any non-fix information, the state
+ // will not change and we can indicate that right away.
+ AA->getState().indicateOptimisticFixpoint();
+ }
+ }
// Check if we recompute the dependences in the next iteration.
RecomputeDependences = (DepRecomputeInterval > 0 &&
@@ -4552,7 +5976,10 @@ ChangeStatus Attributor::run(Module &M) {
}
auto &QuerriedAAs = QueryMap[ChangedAA];
- ChangedAAs.append(QuerriedAAs.begin(), QuerriedAAs.end());
+ ChangedAAs.append(QuerriedAAs.OptionalAAs.begin(),
+ QuerriedAAs.OptionalAAs.end());
+ ChangedAAs.append(QuerriedAAs.RequiredAAs.begin(),
+ QuerriedAAs.RequiredAAs.end());
}
LLVM_DEBUG({
@@ -4611,27 +6038,85 @@ ChangeStatus Attributor::run(Module &M) {
LLVM_DEBUG(dbgs() << "\n[Attributor] Delete at least "
<< ToBeDeletedFunctions.size() << " functions and "
<< ToBeDeletedBlocks.size() << " blocks and "
- << ToBeDeletedInsts.size() << " instructions\n");
+ << ToBeDeletedInsts.size() << " instructions and "
+ << ToBeChangedUses.size() << " uses\n");
+
+ SmallVector<Instruction *, 32> DeadInsts;
+ SmallVector<Instruction *, 32> TerminatorsToFold;
+
+ for (auto &It : ToBeChangedUses) {
+ Use *U = It.first;
+ Value *NewV = It.second;
+ Value *OldV = U->get();
+ LLVM_DEBUG(dbgs() << "Use " << *NewV << " in " << *U->getUser()
+ << " instead of " << *OldV << "\n");
+ U->set(NewV);
+ if (Instruction *I = dyn_cast<Instruction>(OldV))
+ if (!isa<PHINode>(I) && !ToBeDeletedInsts.count(I) &&
+ isInstructionTriviallyDead(I)) {
+ DeadInsts.push_back(I);
+ }
+ if (isa<Constant>(NewV) && isa<BranchInst>(U->getUser())) {
+ Instruction *UserI = cast<Instruction>(U->getUser());
+ if (isa<UndefValue>(NewV)) {
+ ToBeChangedToUnreachableInsts.insert(UserI);
+ } else {
+ TerminatorsToFold.push_back(UserI);
+ }
+ }
+ }
+ for (auto &V : InvokeWithDeadSuccessor)
+ if (InvokeInst *II = dyn_cast_or_null<InvokeInst>(V)) {
+ bool UnwindBBIsDead = II->hasFnAttr(Attribute::NoUnwind);
+ bool NormalBBIsDead = II->hasFnAttr(Attribute::NoReturn);
+ bool Invoke2CallAllowed =
+ !AAIsDeadFunction::mayCatchAsynchronousExceptions(
+ *II->getFunction());
+ assert((UnwindBBIsDead || NormalBBIsDead) &&
+ "Invoke does not have dead successors!");
+ BasicBlock *BB = II->getParent();
+ BasicBlock *NormalDestBB = II->getNormalDest();
+ if (UnwindBBIsDead) {
+ Instruction *NormalNextIP = &NormalDestBB->front();
+ if (Invoke2CallAllowed) {
+ changeToCall(II);
+ NormalNextIP = BB->getTerminator();
+ }
+ if (NormalBBIsDead)
+ ToBeChangedToUnreachableInsts.insert(NormalNextIP);
+ } else {
+ assert(NormalBBIsDead && "Broken invariant!");
+ if (!NormalDestBB->getUniquePredecessor())
+ NormalDestBB = SplitBlockPredecessors(NormalDestBB, {BB}, ".dead");
+ ToBeChangedToUnreachableInsts.insert(&NormalDestBB->front());
+ }
+ }
+ for (auto &V : ToBeChangedToUnreachableInsts)
+ if (Instruction *I = dyn_cast_or_null<Instruction>(V))
+ changeToUnreachable(I, /* UseLLVMTrap */ false);
+ for (Instruction *I : TerminatorsToFold)
+ ConstantFoldTerminator(I->getParent());
+
for (Instruction *I : ToBeDeletedInsts) {
- if (!I->use_empty())
- I->replaceAllUsesWith(UndefValue::get(I->getType()));
- I->eraseFromParent();
+ I->replaceAllUsesWith(UndefValue::get(I->getType()));
+ if (!isa<PHINode>(I) && isInstructionTriviallyDead(I))
+ DeadInsts.push_back(I);
+ else
+ I->eraseFromParent();
}
+ RecursivelyDeleteTriviallyDeadInstructions(DeadInsts);
+
if (unsigned NumDeadBlocks = ToBeDeletedBlocks.size()) {
SmallVector<BasicBlock *, 8> ToBeDeletedBBs;
ToBeDeletedBBs.reserve(NumDeadBlocks);
ToBeDeletedBBs.append(ToBeDeletedBlocks.begin(), ToBeDeletedBlocks.end());
- DeleteDeadBlocks(ToBeDeletedBBs);
- STATS_DECLTRACK(AAIsDead, BasicBlock,
- "Number of dead basic blocks deleted.");
- }
-
- STATS_DECL(AAIsDead, Function, "Number of dead functions deleted.");
- for (Function *Fn : ToBeDeletedFunctions) {
- Fn->replaceAllUsesWith(UndefValue::get(Fn->getType()));
- Fn->eraseFromParent();
- STATS_TRACK(AAIsDead, Function);
+ // Actually we do not delete the blocks but squash them into a single
+ // unreachable but untangling branches that jump here is something we need
+ // to do in a more generic way.
+ DetatchDeadBlocks(ToBeDeletedBBs, nullptr);
+ STATS_DECL(AAIsDead, BasicBlock, "Number of dead basic blocks deleted.");
+ BUILD_STAT_NAME(AAIsDead, BasicBlock) += ToBeDeletedBlocks.size();
}
// Identify dead internal functions and delete them. This happens outside
@@ -4651,22 +6136,33 @@ ChangeStatus Attributor::run(Module &M) {
if (!F)
continue;
- const auto *LivenessAA =
- lookupAAFor<AAIsDead>(IRPosition::function(*F));
- if (LivenessAA &&
- !checkForAllCallSites([](AbstractCallSite ACS) { return false; },
- *LivenessAA, true))
+ if (!checkForAllCallSites(
+ [this](AbstractCallSite ACS) {
+ return ToBeDeletedFunctions.count(
+ ACS.getInstruction()->getFunction());
+ },
+ *F, true, nullptr))
continue;
- STATS_TRACK(AAIsDead, Function);
- F->replaceAllUsesWith(UndefValue::get(F->getType()));
- F->eraseFromParent();
+ ToBeDeletedFunctions.insert(F);
InternalFns[u] = nullptr;
FoundDeadFn = true;
}
}
}
+ STATS_DECL(AAIsDead, Function, "Number of dead functions deleted.");
+ BUILD_STAT_NAME(AAIsDead, Function) += ToBeDeletedFunctions.size();
+
+ // Rewrite the functions as requested during manifest.
+ ManifestChange = ManifestChange | rewriteFunctionSignatures();
+
+ for (Function *Fn : ToBeDeletedFunctions) {
+ Fn->deleteBody();
+ Fn->replaceAllUsesWith(UndefValue::get(Fn->getType()));
+ Fn->eraseFromParent();
+ }
+
if (VerifyMaxFixpointIterations &&
IterationCounter != MaxFixpointIterations) {
errs() << "\n[Attributor] Fixpoint iteration done after: "
@@ -4679,6 +6175,252 @@ ChangeStatus Attributor::run(Module &M) {
return ManifestChange;
}
+bool Attributor::registerFunctionSignatureRewrite(
+ Argument &Arg, ArrayRef<Type *> ReplacementTypes,
+ ArgumentReplacementInfo::CalleeRepairCBTy &&CalleeRepairCB,
+ ArgumentReplacementInfo::ACSRepairCBTy &&ACSRepairCB) {
+
+ auto CallSiteCanBeChanged = [](AbstractCallSite ACS) {
+ // Forbid must-tail calls for now.
+ return !ACS.isCallbackCall() && !ACS.getCallSite().isMustTailCall();
+ };
+
+ Function *Fn = Arg.getParent();
+ // Avoid var-arg functions for now.
+ if (Fn->isVarArg()) {
+ LLVM_DEBUG(dbgs() << "[Attributor] Cannot rewrite var-args functions\n");
+ return false;
+ }
+
+ // Avoid functions with complicated argument passing semantics.
+ AttributeList FnAttributeList = Fn->getAttributes();
+ if (FnAttributeList.hasAttrSomewhere(Attribute::Nest) ||
+ FnAttributeList.hasAttrSomewhere(Attribute::StructRet) ||
+ FnAttributeList.hasAttrSomewhere(Attribute::InAlloca)) {
+ LLVM_DEBUG(
+ dbgs() << "[Attributor] Cannot rewrite due to complex attribute\n");
+ return false;
+ }
+
+ // Avoid callbacks for now.
+ if (!checkForAllCallSites(CallSiteCanBeChanged, *Fn, true, nullptr)) {
+ LLVM_DEBUG(dbgs() << "[Attributor] Cannot rewrite all call sites\n");
+ return false;
+ }
+
+ auto InstPred = [](Instruction &I) {
+ if (auto *CI = dyn_cast<CallInst>(&I))
+ return !CI->isMustTailCall();
+ return true;
+ };
+
+ // Forbid must-tail calls for now.
+ // TODO:
+ bool AnyDead;
+ auto &OpcodeInstMap = InfoCache.getOpcodeInstMapForFunction(*Fn);
+ if (!checkForAllInstructionsImpl(OpcodeInstMap, InstPred, nullptr, AnyDead,
+ {Instruction::Call})) {
+ LLVM_DEBUG(dbgs() << "[Attributor] Cannot rewrite due to instructions\n");
+ return false;
+ }
+
+ SmallVectorImpl<ArgumentReplacementInfo *> &ARIs = ArgumentReplacementMap[Fn];
+ if (ARIs.size() == 0)
+ ARIs.resize(Fn->arg_size());
+
+ // If we have a replacement already with less than or equal new arguments,
+ // ignore this request.
+ ArgumentReplacementInfo *&ARI = ARIs[Arg.getArgNo()];
+ if (ARI && ARI->getNumReplacementArgs() <= ReplacementTypes.size()) {
+ LLVM_DEBUG(dbgs() << "[Attributor] Existing rewrite is preferred\n");
+ return false;
+ }
+
+ // If we have a replacement already but we like the new one better, delete
+ // the old.
+ if (ARI)
+ delete ARI;
+
+ // Remember the replacement.
+ ARI = new ArgumentReplacementInfo(*this, Arg, ReplacementTypes,
+ std::move(CalleeRepairCB),
+ std::move(ACSRepairCB));
+
+ return true;
+}
+
+ChangeStatus Attributor::rewriteFunctionSignatures() {
+ ChangeStatus Changed = ChangeStatus::UNCHANGED;
+
+ for (auto &It : ArgumentReplacementMap) {
+ Function *OldFn = It.getFirst();
+
+ // Deleted functions do not require rewrites.
+ if (ToBeDeletedFunctions.count(OldFn))
+ continue;
+
+ const SmallVectorImpl<ArgumentReplacementInfo *> &ARIs = It.getSecond();
+ assert(ARIs.size() == OldFn->arg_size() && "Inconsistent state!");
+
+ SmallVector<Type *, 16> NewArgumentTypes;
+ SmallVector<AttributeSet, 16> NewArgumentAttributes;
+
+ // Collect replacement argument types and copy over existing attributes.
+ AttributeList OldFnAttributeList = OldFn->getAttributes();
+ for (Argument &Arg : OldFn->args()) {
+ if (ArgumentReplacementInfo *ARI = ARIs[Arg.getArgNo()]) {
+ NewArgumentTypes.append(ARI->ReplacementTypes.begin(),
+ ARI->ReplacementTypes.end());
+ NewArgumentAttributes.append(ARI->getNumReplacementArgs(),
+ AttributeSet());
+ } else {
+ NewArgumentTypes.push_back(Arg.getType());
+ NewArgumentAttributes.push_back(
+ OldFnAttributeList.getParamAttributes(Arg.getArgNo()));
+ }
+ }
+
+ FunctionType *OldFnTy = OldFn->getFunctionType();
+ Type *RetTy = OldFnTy->getReturnType();
+
+ // Construct the new function type using the new arguments types.
+ FunctionType *NewFnTy =
+ FunctionType::get(RetTy, NewArgumentTypes, OldFnTy->isVarArg());
+
+ LLVM_DEBUG(dbgs() << "[Attributor] Function rewrite '" << OldFn->getName()
+ << "' from " << *OldFn->getFunctionType() << " to "
+ << *NewFnTy << "\n");
+
+ // Create the new function body and insert it into the module.
+ Function *NewFn = Function::Create(NewFnTy, OldFn->getLinkage(),
+ OldFn->getAddressSpace(), "");
+ OldFn->getParent()->getFunctionList().insert(OldFn->getIterator(), NewFn);
+ NewFn->takeName(OldFn);
+ NewFn->copyAttributesFrom(OldFn);
+
+ // Patch the pointer to LLVM function in debug info descriptor.
+ NewFn->setSubprogram(OldFn->getSubprogram());
+ OldFn->setSubprogram(nullptr);
+
+ // Recompute the parameter attributes list based on the new arguments for
+ // the function.
+ LLVMContext &Ctx = OldFn->getContext();
+ NewFn->setAttributes(AttributeList::get(
+ Ctx, OldFnAttributeList.getFnAttributes(),
+ OldFnAttributeList.getRetAttributes(), NewArgumentAttributes));
+
+ // Since we have now created the new function, splice the body of the old
+ // function right into the new function, leaving the old rotting hulk of the
+ // function empty.
+ NewFn->getBasicBlockList().splice(NewFn->begin(),
+ OldFn->getBasicBlockList());
+
+ // Set of all "call-like" instructions that invoke the old function mapped
+ // to their new replacements.
+ SmallVector<std::pair<CallBase *, CallBase *>, 8> CallSitePairs;
+
+ // Callback to create a new "call-like" instruction for a given one.
+ auto CallSiteReplacementCreator = [&](AbstractCallSite ACS) {
+ CallBase *OldCB = cast<CallBase>(ACS.getInstruction());
+ const AttributeList &OldCallAttributeList = OldCB->getAttributes();
+
+ // Collect the new argument operands for the replacement call site.
+ SmallVector<Value *, 16> NewArgOperands;
+ SmallVector<AttributeSet, 16> NewArgOperandAttributes;
+ for (unsigned OldArgNum = 0; OldArgNum < ARIs.size(); ++OldArgNum) {
+ unsigned NewFirstArgNum = NewArgOperands.size();
+ (void)NewFirstArgNum; // only used inside assert.
+ if (ArgumentReplacementInfo *ARI = ARIs[OldArgNum]) {
+ if (ARI->ACSRepairCB)
+ ARI->ACSRepairCB(*ARI, ACS, NewArgOperands);
+ assert(ARI->getNumReplacementArgs() + NewFirstArgNum ==
+ NewArgOperands.size() &&
+ "ACS repair callback did not provide as many operand as new "
+ "types were registered!");
+ // TODO: Exose the attribute set to the ACS repair callback
+ NewArgOperandAttributes.append(ARI->ReplacementTypes.size(),
+ AttributeSet());
+ } else {
+ NewArgOperands.push_back(ACS.getCallArgOperand(OldArgNum));
+ NewArgOperandAttributes.push_back(
+ OldCallAttributeList.getParamAttributes(OldArgNum));
+ }
+ }
+
+ assert(NewArgOperands.size() == NewArgOperandAttributes.size() &&
+ "Mismatch # argument operands vs. # argument operand attributes!");
+ assert(NewArgOperands.size() == NewFn->arg_size() &&
+ "Mismatch # argument operands vs. # function arguments!");
+
+ SmallVector<OperandBundleDef, 4> OperandBundleDefs;
+ OldCB->getOperandBundlesAsDefs(OperandBundleDefs);
+
+ // Create a new call or invoke instruction to replace the old one.
+ CallBase *NewCB;
+ if (InvokeInst *II = dyn_cast<InvokeInst>(OldCB)) {
+ NewCB =
+ InvokeInst::Create(NewFn, II->getNormalDest(), II->getUnwindDest(),
+ NewArgOperands, OperandBundleDefs, "", OldCB);
+ } else {
+ auto *NewCI = CallInst::Create(NewFn, NewArgOperands, OperandBundleDefs,
+ "", OldCB);
+ NewCI->setTailCallKind(cast<CallInst>(OldCB)->getTailCallKind());
+ NewCB = NewCI;
+ }
+
+ // Copy over various properties and the new attributes.
+ uint64_t W;
+ if (OldCB->extractProfTotalWeight(W))
+ NewCB->setProfWeight(W);
+ NewCB->setCallingConv(OldCB->getCallingConv());
+ NewCB->setDebugLoc(OldCB->getDebugLoc());
+ NewCB->takeName(OldCB);
+ NewCB->setAttributes(AttributeList::get(
+ Ctx, OldCallAttributeList.getFnAttributes(),
+ OldCallAttributeList.getRetAttributes(), NewArgOperandAttributes));
+
+ CallSitePairs.push_back({OldCB, NewCB});
+ return true;
+ };
+
+ // Use the CallSiteReplacementCreator to create replacement call sites.
+ bool Success =
+ checkForAllCallSites(CallSiteReplacementCreator, *OldFn, true, nullptr);
+ (void)Success;
+ assert(Success && "Assumed call site replacement to succeed!");
+
+ // Rewire the arguments.
+ auto OldFnArgIt = OldFn->arg_begin();
+ auto NewFnArgIt = NewFn->arg_begin();
+ for (unsigned OldArgNum = 0; OldArgNum < ARIs.size();
+ ++OldArgNum, ++OldFnArgIt) {
+ if (ArgumentReplacementInfo *ARI = ARIs[OldArgNum]) {
+ if (ARI->CalleeRepairCB)
+ ARI->CalleeRepairCB(*ARI, *NewFn, NewFnArgIt);
+ NewFnArgIt += ARI->ReplacementTypes.size();
+ } else {
+ NewFnArgIt->takeName(&*OldFnArgIt);
+ OldFnArgIt->replaceAllUsesWith(&*NewFnArgIt);
+ ++NewFnArgIt;
+ }
+ }
+
+ // Eliminate the instructions *after* we visited all of them.
+ for (auto &CallSitePair : CallSitePairs) {
+ CallBase &OldCB = *CallSitePair.first;
+ CallBase &NewCB = *CallSitePair.second;
+ OldCB.replaceAllUsesWith(&NewCB);
+ OldCB.eraseFromParent();
+ }
+
+ ToBeDeletedFunctions.insert(OldFn);
+
+ Changed = ChangeStatus::CHANGED;
+ }
+
+ return Changed;
+}
+
void Attributor::initializeInformationCache(Function &F) {
// Walk all instructions to find interesting instructions that might be
@@ -4710,6 +6452,9 @@ void Attributor::initializeInformationCache(Function &F) {
case Instruction::Invoke:
case Instruction::CleanupRet:
case Instruction::CatchSwitch:
+ case Instruction::AtomicRMW:
+ case Instruction::AtomicCmpXchg:
+ case Instruction::Br:
case Instruction::Resume:
case Instruction::Ret:
IsInterestingOpcode = true;
@@ -4721,9 +6466,26 @@ void Attributor::initializeInformationCache(Function &F) {
}
}
+void Attributor::recordDependence(const AbstractAttribute &FromAA,
+ const AbstractAttribute &ToAA,
+ DepClassTy DepClass) {
+ if (FromAA.getState().isAtFixpoint())
+ return;
+
+ if (DepClass == DepClassTy::REQUIRED)
+ QueryMap[&FromAA].RequiredAAs.insert(
+ const_cast<AbstractAttribute *>(&ToAA));
+ else
+ QueryMap[&FromAA].OptionalAAs.insert(
+ const_cast<AbstractAttribute *>(&ToAA));
+ QueriedNonFixAA = true;
+}
+
void Attributor::identifyDefaultAbstractAttributes(Function &F) {
if (!VisitedFunctions.insert(&F).second)
return;
+ if (F.isDeclaration())
+ return;
IRPosition FPos = IRPosition::function(F);
@@ -4735,6 +6497,9 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) {
// Every function might be "will-return".
getOrCreateAAFor<AAWillReturn>(FPos);
+ // Every function might contain instructions that cause "undefined behavior".
+ getOrCreateAAFor<AAUndefinedBehavior>(FPos);
+
// Every function can be nounwind.
getOrCreateAAFor<AANoUnwind>(FPos);
@@ -4766,6 +6531,9 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) {
IRPosition RetPos = IRPosition::returned(F);
+ // Every returned value might be dead.
+ getOrCreateAAFor<AAIsDead>(RetPos);
+
// Every function might be simplified.
getOrCreateAAFor<AAValueSimplify>(RetPos);
@@ -4811,16 +6579,41 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) {
// Every argument with pointer type might be marked
// "readnone/readonly/writeonly/..."
getOrCreateAAFor<AAMemoryBehavior>(ArgPos);
+
+ // Every argument with pointer type might be marked nofree.
+ getOrCreateAAFor<AANoFree>(ArgPos);
}
}
auto CallSitePred = [&](Instruction &I) -> bool {
CallSite CS(&I);
- if (CS.getCalledFunction()) {
- for (int i = 0, e = CS.getCalledFunction()->arg_size(); i < e; i++) {
+ if (Function *Callee = CS.getCalledFunction()) {
+ // Skip declerations except if annotations on their call sites were
+ // explicitly requested.
+ if (!AnnotateDeclarationCallSites && Callee->isDeclaration() &&
+ !Callee->hasMetadata(LLVMContext::MD_callback))
+ return true;
+
+ if (!Callee->getReturnType()->isVoidTy() && !CS->use_empty()) {
+
+ IRPosition CSRetPos = IRPosition::callsite_returned(CS);
+
+ // Call site return values might be dead.
+ getOrCreateAAFor<AAIsDead>(CSRetPos);
+
+ // Call site return integer values might be limited by a constant range.
+ if (Callee->getReturnType()->isIntegerTy()) {
+ getOrCreateAAFor<AAValueConstantRange>(CSRetPos);
+ }
+ }
+
+ for (int i = 0, e = CS.getNumArgOperands(); i < e; i++) {
IRPosition CSArgPos = IRPosition::callsite_argument(CS, i);
+ // Every call site argument might be dead.
+ getOrCreateAAFor<AAIsDead>(CSArgPos);
+
// Call site argument might be simplified.
getOrCreateAAFor<AAValueSimplify>(CSArgPos);
@@ -4838,6 +6631,13 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) {
// Call site argument attribute "align".
getOrCreateAAFor<AAAlign>(CSArgPos);
+
+ // Call site argument attribute
+ // "readnone/readonly/writeonly/..."
+ getOrCreateAAFor<AAMemoryBehavior>(CSArgPos);
+
+ // Call site argument attribute "nofree".
+ getOrCreateAAFor<AANoFree>(CSArgPos);
}
}
return true;
@@ -4903,11 +6703,24 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const IRPosition &Pos) {
<< Pos.getAnchorValue().getName() << "@" << Pos.getArgNo() << "]}";
}
-raw_ostream &llvm::operator<<(raw_ostream &OS, const IntegerState &S) {
+template <typename base_ty, base_ty BestState, base_ty WorstState>
+raw_ostream &
+llvm::operator<<(raw_ostream &OS,
+ const IntegerStateBase<base_ty, BestState, WorstState> &S) {
return OS << "(" << S.getKnown() << "-" << S.getAssumed() << ")"
<< static_cast<const AbstractState &>(S);
}
+raw_ostream &llvm::operator<<(raw_ostream &OS, const IntegerRangeState &S) {
+ OS << "range-state(" << S.getBitWidth() << ")<";
+ S.getKnown().print(OS);
+ OS << " / ";
+ S.getAssumed().print(OS);
+ OS << ">";
+
+ return OS << static_cast<const AbstractState &>(S);
+}
+
raw_ostream &llvm::operator<<(raw_ostream &OS, const AbstractState &S) {
return OS << (!S.isValidState() ? "top" : (S.isAtFixpoint() ? "fix" : ""));
}
@@ -4963,7 +6776,9 @@ static bool runAttributorOnModule(Module &M, AnalysisGetter &AG) {
A.identifyDefaultAbstractAttributes(F);
}
- return A.run(M) == ChangeStatus::CHANGED;
+ bool Changed = A.run(M) == ChangeStatus::CHANGED;
+ assert(!verifyModule(M, &errs()) && "Module verification failed!");
+ return Changed;
}
PreservedAnalyses AttributorPass::run(Module &M, ModuleAnalysisManager &AM) {
@@ -5011,7 +6826,9 @@ const char AANoFree::ID = 0;
const char AANonNull::ID = 0;
const char AANoRecurse::ID = 0;
const char AAWillReturn::ID = 0;
+const char AAUndefinedBehavior::ID = 0;
const char AANoAlias::ID = 0;
+const char AAReachability::ID = 0;
const char AANoReturn::ID = 0;
const char AAIsDead::ID = 0;
const char AADereferenceable::ID = 0;
@@ -5020,6 +6837,7 @@ const char AANoCapture::ID = 0;
const char AAValueSimplify::ID = 0;
const char AAHeapToStack::ID = 0;
const char AAMemoryBehavior::ID = 0;
+const char AAValueConstantRange::ID = 0;
// Macro magic to create the static generator function for attributes that
// follow the naming scheme.
@@ -5115,11 +6933,9 @@ const char AAMemoryBehavior::ID = 0;
CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoUnwind)
CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoSync)
-CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoFree)
CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoRecurse)
CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAWillReturn)
CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoReturn)
-CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAIsDead)
CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAReturnedValues)
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANonNull)
@@ -5127,10 +6943,15 @@ CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoAlias)
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AADereferenceable)
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAAlign)
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoCapture)
+CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAValueConstantRange)
CREATE_ALL_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAValueSimplify)
+CREATE_ALL_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAIsDead)
+CREATE_ALL_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoFree)
CREATE_FUNCTION_ONLY_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAHeapToStack)
+CREATE_FUNCTION_ONLY_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAReachability)
+CREATE_FUNCTION_ONLY_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAUndefinedBehavior)
CREATE_NON_RET_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAMemoryBehavior)
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/BarrierNoopPass.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/BarrierNoopPass.cpp
index 6b68aa90c567..b49a92ad16b3 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/BarrierNoopPass.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/BarrierNoopPass.cpp
@@ -17,6 +17,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Transforms/IPO.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/BlockExtractor.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/BlockExtractor.cpp
index de80c88c1591..aec470ffadc4 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/BlockExtractor.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/BlockExtractor.cpp
@@ -15,6 +15,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/CalledValuePropagation.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/CalledValuePropagation.cpp
index 20cb3213628e..f28a399b1779 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/CalledValuePropagation.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/CalledValuePropagation.cpp
@@ -21,6 +21,8 @@
#include "llvm/Analysis/ValueLatticeUtils.h"
#include "llvm/IR/InstVisitor.h"
#include "llvm/IR/MDBuilder.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/IPO.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/ConstantMerge.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/ConstantMerge.cpp
index 3cf839e397f8..ea1278aa108f 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/ConstantMerge.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/ConstantMerge.cpp
@@ -28,6 +28,7 @@
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Transforms/IPO.h"
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/CrossDSOCFI.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/CrossDSOCFI.cpp
index e20159ba0db5..2fe9a59ad210 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/CrossDSOCFI.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/CrossDSOCFI.cpp
@@ -26,6 +26,7 @@
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
index 968a13110b16..61d519d8ae88 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -37,6 +37,7 @@
#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp
index fc52db562c62..7f138d206fac 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp
@@ -18,6 +18,7 @@
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/Utils/GlobalStatus.h"
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp
index b38cb6d0ed3f..2cb184e8d4f4 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp
@@ -11,6 +11,8 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
index b174c63a577b..b6d0b2e35694 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -48,6 +48,7 @@
#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/FunctionImport.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/FunctionImport.cpp
index 3f5cc078d75f..be0446a946ec 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/FunctionImport.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/FunctionImport.cpp
@@ -31,6 +31,7 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/ModuleSummaryIndex.h"
#include "llvm/IRReader/IRReader.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Linker/IRMover.h"
#include "llvm/Object/ModuleSymbolTable.h"
#include "llvm/Object/SymbolicFile.h"
@@ -231,7 +232,8 @@ selectCallee(const ModuleSummaryIndex &Index,
return false;
}
- if (Summary->instCount() > Threshold) {
+ if ((Summary->instCount() > Threshold) &&
+ !Summary->fflags().AlwaysInline) {
Reason = FunctionImporter::ImportFailureReason::TooLarge;
return false;
}
@@ -280,7 +282,8 @@ updateValueInfoForIndirectCalls(const ModuleSummaryIndex &Index, ValueInfo VI) {
}
static void computeImportForReferencedGlobals(
- const FunctionSummary &Summary, const GVSummaryMapTy &DefinedGVSummaries,
+ const FunctionSummary &Summary, const ModuleSummaryIndex &Index,
+ const GVSummaryMapTy &DefinedGVSummaries,
FunctionImporter::ImportMapTy &ImportList,
StringMap<FunctionImporter::ExportSetTy> *ExportLists) {
for (auto &VI : Summary.refs()) {
@@ -303,16 +306,28 @@ static void computeImportForReferencedGlobals(
RefSummary->modulePath() != Summary.modulePath();
};
+ auto MarkExported = [&](const ValueInfo &VI, const GlobalValueSummary *S) {
+ if (ExportLists)
+ (*ExportLists)[S->modulePath()].insert(VI);
+ };
+
for (auto &RefSummary : VI.getSummaryList())
if (isa<GlobalVarSummary>(RefSummary.get()) &&
- canImportGlobalVar(RefSummary.get()) &&
+ Index.canImportGlobalVar(RefSummary.get(), /* AnalyzeRefs */ true) &&
!LocalNotInModule(RefSummary.get())) {
auto ILI = ImportList[RefSummary->modulePath()].insert(VI.getGUID());
// Only update stat if we haven't already imported this variable.
if (ILI.second)
NumImportedGlobalVarsThinLink++;
- if (ExportLists)
- (*ExportLists)[RefSummary->modulePath()].insert(VI.getGUID());
+ MarkExported(VI, RefSummary.get());
+ // Promote referenced functions and variables. We don't promote
+ // objects referenced by writeonly variable initializer, because
+ // we convert such variables initializers to "zeroinitializer".
+ // See processGlobalForThinLTO.
+ if (!Index.isWriteOnly(cast<GlobalVarSummary>(RefSummary.get())))
+ for (const auto &VI : RefSummary->refs())
+ for (const auto &RefFn : VI.getSummaryList())
+ MarkExported(VI, RefFn.get());
break;
}
}
@@ -351,8 +366,8 @@ static void computeImportForFunction(
FunctionImporter::ImportMapTy &ImportList,
StringMap<FunctionImporter::ExportSetTy> *ExportLists,
FunctionImporter::ImportThresholdsTy &ImportThresholds) {
- computeImportForReferencedGlobals(Summary, DefinedGVSummaries, ImportList,
- ExportLists);
+ computeImportForReferencedGlobals(Summary, Index, DefinedGVSummaries,
+ ImportList, ExportLists);
static int ImportCount = 0;
for (auto &Edge : Summary.calls()) {
ValueInfo VI = Edge.first;
@@ -462,7 +477,8 @@ static void computeImportForFunction(
CalleeSummary = CalleeSummary->getBaseObject();
ResolvedCalleeSummary = cast<FunctionSummary>(CalleeSummary);
- assert(ResolvedCalleeSummary->instCount() <= NewThreshold &&
+ assert((ResolvedCalleeSummary->fflags().AlwaysInline ||
+ (ResolvedCalleeSummary->instCount() <= NewThreshold)) &&
"selectCallee() didn't honor the threshold");
auto ExportModulePath = ResolvedCalleeSummary->modulePath();
@@ -481,7 +497,7 @@ static void computeImportForFunction(
// Make exports in the source module.
if (ExportLists) {
auto &ExportList = (*ExportLists)[ExportModulePath];
- ExportList.insert(VI.getGUID());
+ ExportList.insert(VI);
if (!PreviouslyImported) {
// This is the first time this function was exported from its source
// module, so mark all functions and globals it references as exported
@@ -489,14 +505,11 @@ static void computeImportForFunction(
// For efficiency, we unconditionally add all the referenced GUIDs
// to the ExportList for this module, and will prune out any not
// defined in the module later in a single pass.
- for (auto &Edge : ResolvedCalleeSummary->calls()) {
- auto CalleeGUID = Edge.first.getGUID();
- ExportList.insert(CalleeGUID);
- }
- for (auto &Ref : ResolvedCalleeSummary->refs()) {
- auto GUID = Ref.getGUID();
- ExportList.insert(GUID);
- }
+ for (auto &Edge : ResolvedCalleeSummary->calls())
+ ExportList.insert(Edge.first);
+
+ for (auto &Ref : ResolvedCalleeSummary->refs())
+ ExportList.insert(Ref);
}
}
}
@@ -591,29 +604,64 @@ static void ComputeImportForModule(
}
#ifndef NDEBUG
+static bool isGlobalVarSummary(const ModuleSummaryIndex &Index, ValueInfo VI) {
+ auto SL = VI.getSummaryList();
+ return SL.empty()
+ ? false
+ : SL[0]->getSummaryKind() == GlobalValueSummary::GlobalVarKind;
+}
+
static bool isGlobalVarSummary(const ModuleSummaryIndex &Index,
GlobalValue::GUID G) {
- if (const auto &VI = Index.getValueInfo(G)) {
- auto SL = VI.getSummaryList();
- if (!SL.empty())
- return SL[0]->getSummaryKind() == GlobalValueSummary::GlobalVarKind;
- }
+ if (const auto &VI = Index.getValueInfo(G))
+ return isGlobalVarSummary(Index, VI);
return false;
}
-static GlobalValue::GUID getGUID(GlobalValue::GUID G) { return G; }
-
template <class T>
static unsigned numGlobalVarSummaries(const ModuleSummaryIndex &Index,
T &Cont) {
unsigned NumGVS = 0;
for (auto &V : Cont)
- if (isGlobalVarSummary(Index, getGUID(V)))
+ if (isGlobalVarSummary(Index, V))
++NumGVS;
return NumGVS;
}
#endif
+#ifndef NDEBUG
+static bool
+checkVariableImport(const ModuleSummaryIndex &Index,
+ StringMap<FunctionImporter::ImportMapTy> &ImportLists,
+ StringMap<FunctionImporter::ExportSetTy> &ExportLists) {
+
+ DenseSet<GlobalValue::GUID> FlattenedImports;
+
+ for (auto &ImportPerModule : ImportLists)
+ for (auto &ExportPerModule : ImportPerModule.second)
+ FlattenedImports.insert(ExportPerModule.second.begin(),
+ ExportPerModule.second.end());
+
+ // Checks that all GUIDs of read/writeonly vars we see in export lists
+ // are also in the import lists. Otherwise we my face linker undefs,
+ // because readonly and writeonly vars are internalized in their
+ // source modules.
+ auto IsReadOrWriteOnlyVar = [&](StringRef ModulePath, const ValueInfo &VI) {
+ auto *GVS = dyn_cast_or_null<GlobalVarSummary>(
+ Index.findSummaryInModule(VI, ModulePath));
+ return GVS && (Index.isReadOnly(GVS) || Index.isWriteOnly(GVS));
+ };
+
+ for (auto &ExportPerModule : ExportLists)
+ for (auto &VI : ExportPerModule.second)
+ if (!FlattenedImports.count(VI.getGUID()) &&
+ IsReadOrWriteOnlyVar(ExportPerModule.first(), VI))
+ return false;
+
+ return true;
+}
+#endif
+
/// Compute all the import and export for every module using the Index.
void llvm::ComputeCrossModuleImport(
const ModuleSummaryIndex &Index,
@@ -639,13 +687,14 @@ void llvm::ComputeCrossModuleImport(
const auto &DefinedGVSummaries =
ModuleToDefinedGVSummaries.lookup(ELI.first());
for (auto EI = ELI.second.begin(); EI != ELI.second.end();) {
- if (!DefinedGVSummaries.count(*EI))
- EI = ELI.second.erase(EI);
+ if (!DefinedGVSummaries.count(EI->getGUID()))
+ ELI.second.erase(EI++);
else
++EI;
}
}
+ assert(checkVariableImport(Index, ImportLists, ExportLists));
#ifndef NDEBUG
LLVM_DEBUG(dbgs() << "Import/Export lists for " << ImportLists.size()
<< " modules:\n");
@@ -852,18 +901,8 @@ void llvm::computeDeadSymbolsWithConstProp(
function_ref<PrevailingType(GlobalValue::GUID)> isPrevailing,
bool ImportEnabled) {
computeDeadSymbols(Index, GUIDPreservedSymbols, isPrevailing);
- if (ImportEnabled) {
+ if (ImportEnabled)
Index.propagateAttributes(GUIDPreservedSymbols);
- } else {
- // If import is disabled we should drop read/write-only attribute
- // from all summaries to prevent internalization.
- for (auto &P : Index)
- for (auto &S : P.second.SummaryList)
- if (auto *GVS = dyn_cast<GlobalVarSummary>(S.get())) {
- GVS->setReadOnly(false);
- GVS->setWriteOnly(false);
- }
- }
}
/// Compute the set of summaries needed for a ThinLTO backend compilation of
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/GlobalDCE.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/GlobalDCE.cpp
index f010f7b703a6..72b8d7522f04 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/GlobalDCE.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/GlobalDCE.cpp
@@ -22,7 +22,9 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/Utils/CtorUtils.h"
#include "llvm/Transforms/Utils/GlobalStatus.h"
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/GlobalOpt.cpp
index 819715b9f8da..0fd966457ece 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/GlobalOpt.cpp
@@ -25,7 +25,6 @@
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
@@ -53,6 +52,7 @@
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/AtomicOrdering.h"
#include "llvm/Support/Casting.h"
@@ -65,6 +65,7 @@
#include "llvm/Transforms/Utils/CtorUtils.h"
#include "llvm/Transforms/Utils/Evaluator.h"
#include "llvm/Transforms/Utils/GlobalStatus.h"
+#include "llvm/Transforms/Utils/Local.h"
#include <cassert>
#include <cstdint>
#include <utility>
@@ -432,6 +433,20 @@ static bool GlobalUsersSafeToSRA(GlobalValue *GV) {
return true;
}
+static bool CanDoGlobalSRA(GlobalVariable *GV) {
+ Constant *Init = GV->getInitializer();
+
+ if (isa<StructType>(Init->getType())) {
+ // nothing to check
+ } else if (SequentialType *STy = dyn_cast<SequentialType>(Init->getType())) {
+ if (STy->getNumElements() > 16 && GV->hasNUsesOrMore(16))
+ return false; // It's not worth it.
+ } else
+ return false;
+
+ return GlobalUsersSafeToSRA(GV);
+}
+
/// Copy over the debug info for a variable to its SRA replacements.
static void transferSRADebugInfo(GlobalVariable *GV, GlobalVariable *NGV,
uint64_t FragmentOffsetInBits,
@@ -461,88 +476,94 @@ static void transferSRADebugInfo(GlobalVariable *GV, GlobalVariable *NGV,
/// insert so that the caller can reprocess it.
static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) {
// Make sure this global only has simple uses that we can SRA.
- if (!GlobalUsersSafeToSRA(GV))
+ if (!CanDoGlobalSRA(GV))
return nullptr;
assert(GV->hasLocalLinkage());
Constant *Init = GV->getInitializer();
Type *Ty = Init->getType();
- std::vector<GlobalVariable *> NewGlobals;
- Module::GlobalListType &Globals = GV->getParent()->getGlobalList();
+ std::map<unsigned, GlobalVariable *> NewGlobals;
// Get the alignment of the global, either explicit or target-specific.
unsigned StartAlignment = GV->getAlignment();
if (StartAlignment == 0)
StartAlignment = DL.getABITypeAlignment(GV->getType());
- if (StructType *STy = dyn_cast<StructType>(Ty)) {
- unsigned NumElements = STy->getNumElements();
- NewGlobals.reserve(NumElements);
- const StructLayout &Layout = *DL.getStructLayout(STy);
- for (unsigned i = 0, e = NumElements; i != e; ++i) {
- Constant *In = Init->getAggregateElement(i);
- assert(In && "Couldn't get element of initializer?");
- GlobalVariable *NGV = new GlobalVariable(STy->getElementType(i), false,
- GlobalVariable::InternalLinkage,
- In, GV->getName()+"."+Twine(i),
- GV->getThreadLocalMode(),
- GV->getType()->getAddressSpace());
- NGV->setExternallyInitialized(GV->isExternallyInitialized());
- NGV->copyAttributesFrom(GV);
- Globals.push_back(NGV);
- NewGlobals.push_back(NGV);
+ // Loop over all users and create replacement variables for used aggregate
+ // elements.
+ for (User *GEP : GV->users()) {
+ assert(((isa<ConstantExpr>(GEP) && cast<ConstantExpr>(GEP)->getOpcode() ==
+ Instruction::GetElementPtr) ||
+ isa<GetElementPtrInst>(GEP)) &&
+ "NonGEP CE's are not SRAable!");
+
+ // Ignore the 1th operand, which has to be zero or else the program is quite
+ // broken (undefined). Get the 2nd operand, which is the structure or array
+ // index.
+ unsigned ElementIdx = cast<ConstantInt>(GEP->getOperand(2))->getZExtValue();
+ if (NewGlobals.count(ElementIdx) == 1)
+ continue; // we`ve already created replacement variable
+ assert(NewGlobals.count(ElementIdx) == 0);
+
+ Type *ElTy = nullptr;
+ if (StructType *STy = dyn_cast<StructType>(Ty))
+ ElTy = STy->getElementType(ElementIdx);
+ else if (SequentialType *STy = dyn_cast<SequentialType>(Ty))
+ ElTy = STy->getElementType();
+ assert(ElTy);
+
+ Constant *In = Init->getAggregateElement(ElementIdx);
+ assert(In && "Couldn't get element of initializer?");
+
+ GlobalVariable *NGV = new GlobalVariable(
+ ElTy, false, GlobalVariable::InternalLinkage, In,
+ GV->getName() + "." + Twine(ElementIdx), GV->getThreadLocalMode(),
+ GV->getType()->getAddressSpace());
+ NGV->setExternallyInitialized(GV->isExternallyInitialized());
+ NGV->copyAttributesFrom(GV);
+ NewGlobals.insert(std::make_pair(ElementIdx, NGV));
+
+ if (StructType *STy = dyn_cast<StructType>(Ty)) {
+ const StructLayout &Layout = *DL.getStructLayout(STy);
// Calculate the known alignment of the field. If the original aggregate
// had 256 byte alignment for example, something might depend on that:
// propagate info to each field.
- uint64_t FieldOffset = Layout.getElementOffset(i);
+ uint64_t FieldOffset = Layout.getElementOffset(ElementIdx);
Align NewAlign(MinAlign(StartAlignment, FieldOffset));
- if (NewAlign > Align(DL.getABITypeAlignment(STy->getElementType(i))))
+ if (NewAlign >
+ Align(DL.getABITypeAlignment(STy->getElementType(ElementIdx))))
NGV->setAlignment(NewAlign);
// Copy over the debug info for the variable.
uint64_t Size = DL.getTypeAllocSizeInBits(NGV->getValueType());
- uint64_t FragmentOffsetInBits = Layout.getElementOffsetInBits(i);
- transferSRADebugInfo(GV, NGV, FragmentOffsetInBits, Size, NumElements);
- }
- } else if (SequentialType *STy = dyn_cast<SequentialType>(Ty)) {
- unsigned NumElements = STy->getNumElements();
- if (NumElements > 16 && GV->hasNUsesOrMore(16))
- return nullptr; // It's not worth it.
- NewGlobals.reserve(NumElements);
- auto ElTy = STy->getElementType();
- uint64_t EltSize = DL.getTypeAllocSize(ElTy);
- Align EltAlign(DL.getABITypeAlignment(ElTy));
- uint64_t FragmentSizeInBits = DL.getTypeAllocSizeInBits(ElTy);
- for (unsigned i = 0, e = NumElements; i != e; ++i) {
- Constant *In = Init->getAggregateElement(i);
- assert(In && "Couldn't get element of initializer?");
-
- GlobalVariable *NGV = new GlobalVariable(STy->getElementType(), false,
- GlobalVariable::InternalLinkage,
- In, GV->getName()+"."+Twine(i),
- GV->getThreadLocalMode(),
- GV->getType()->getAddressSpace());
- NGV->setExternallyInitialized(GV->isExternallyInitialized());
- NGV->copyAttributesFrom(GV);
- Globals.push_back(NGV);
- NewGlobals.push_back(NGV);
+ uint64_t FragmentOffsetInBits = Layout.getElementOffsetInBits(ElementIdx);
+ transferSRADebugInfo(GV, NGV, FragmentOffsetInBits, Size,
+ STy->getNumElements());
+ } else if (SequentialType *STy = dyn_cast<SequentialType>(Ty)) {
+ uint64_t EltSize = DL.getTypeAllocSize(ElTy);
+ Align EltAlign(DL.getABITypeAlignment(ElTy));
+ uint64_t FragmentSizeInBits = DL.getTypeAllocSizeInBits(ElTy);
// Calculate the known alignment of the field. If the original aggregate
// had 256 byte alignment for example, something might depend on that:
// propagate info to each field.
- Align NewAlign(MinAlign(StartAlignment, EltSize * i));
+ Align NewAlign(MinAlign(StartAlignment, EltSize * ElementIdx));
if (NewAlign > EltAlign)
NGV->setAlignment(NewAlign);
- transferSRADebugInfo(GV, NGV, FragmentSizeInBits * i, FragmentSizeInBits,
- NumElements);
+ transferSRADebugInfo(GV, NGV, FragmentSizeInBits * ElementIdx,
+ FragmentSizeInBits, STy->getNumElements());
}
}
if (NewGlobals.empty())
return nullptr;
+ Module::GlobalListType &Globals = GV->getParent()->getGlobalList();
+ for (auto NewGlobalVar : NewGlobals)
+ Globals.push_back(NewGlobalVar.second);
+
LLVM_DEBUG(dbgs() << "PERFORMING GLOBAL SRA ON: " << *GV << "\n");
Constant *NullInt =Constant::getNullValue(Type::getInt32Ty(GV->getContext()));
@@ -558,11 +579,11 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) {
// Ignore the 1th operand, which has to be zero or else the program is quite
// broken (undefined). Get the 2nd operand, which is the structure or array
// index.
- unsigned Val = cast<ConstantInt>(GEP->getOperand(2))->getZExtValue();
- if (Val >= NewGlobals.size()) Val = 0; // Out of bound array access.
+ unsigned ElementIdx = cast<ConstantInt>(GEP->getOperand(2))->getZExtValue();
+ assert(NewGlobals.count(ElementIdx) == 1);
- Value *NewPtr = NewGlobals[Val];
- Type *NewTy = NewGlobals[Val]->getValueType();
+ Value *NewPtr = NewGlobals[ElementIdx];
+ Type *NewTy = NewGlobals[ElementIdx]->getValueType();
// Form a shorter GEP if needed.
if (GEP->getNumOperands() > 3) {
@@ -580,7 +601,8 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) {
for (unsigned i = 3, e = GEPI->getNumOperands(); i != e; ++i)
Idxs.push_back(GEPI->getOperand(i));
NewPtr = GetElementPtrInst::Create(
- NewTy, NewPtr, Idxs, GEPI->getName() + "." + Twine(Val), GEPI);
+ NewTy, NewPtr, Idxs, GEPI->getName() + "." + Twine(ElementIdx),
+ GEPI);
}
}
GEP->replaceAllUsesWith(NewPtr);
@@ -595,17 +617,8 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) {
Globals.erase(GV);
++NumSRA;
- // Loop over the new globals array deleting any globals that are obviously
- // dead. This can arise due to scalarization of a structure or an array that
- // has elements that are dead.
- unsigned FirstGlobal = 0;
- for (unsigned i = 0, e = NewGlobals.size(); i != e; ++i)
- if (NewGlobals[i]->use_empty()) {
- Globals.erase(NewGlobals[i]);
- if (FirstGlobal == i) ++FirstGlobal;
- }
-
- return FirstGlobal != NewGlobals.size() ? NewGlobals[FirstGlobal] : nullptr;
+ assert(NewGlobals.size() > 0);
+ return NewGlobals.begin()->second;
}
/// Return true if all users of the specified value will trap if the value is
@@ -2285,10 +2298,14 @@ OptimizeFunctions(Module &M,
// So, remove unreachable blocks from the function, because a) there's
// no point in analyzing them and b) GlobalOpt should otherwise grow
// some more complicated logic to break these cycles.
+ // Removing unreachable blocks might invalidate the dominator so we
+ // recalculate it.
if (!F->isDeclaration()) {
- auto &DT = LookupDomTree(*F);
- DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
- Changed |= removeUnreachableBlocks(*F, &DTU);
+ if (removeUnreachableBlocks(*F)) {
+ auto &DT = LookupDomTree(*F);
+ DT.recalculate(*F);
+ Changed = true;
+ }
}
Changed |= processGlobal(*F, GetTLI, LookupDomTree);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/GlobalSplit.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/GlobalSplit.cpp
index 060043a40b89..4a319ead23c0 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/GlobalSplit.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/GlobalSplit.cpp
@@ -29,6 +29,7 @@
#include "llvm/IR/Operator.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Transforms/IPO.h"
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/HotColdSplitting.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/HotColdSplitting.cpp
index cfdcc8db7f50..5e690714bfdf 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/HotColdSplitting.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/HotColdSplitting.cpp
@@ -25,6 +25,7 @@
///
//===----------------------------------------------------------------------===//
+#include "llvm/Transforms/IPO/HotColdSplitting.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
@@ -53,13 +54,14 @@
#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/BlockFrequency.h"
#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/IPO.h"
-#include "llvm/Transforms/IPO/HotColdSplitting.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
@@ -205,6 +207,11 @@ bool HotColdSplitting::shouldOutlineFrom(const Function &F) const {
if (F.hasFnAttribute(Attribute::NoInline))
return false;
+ // A function marked `noreturn` may contain unreachable terminators: these
+ // should not be considered cold, as the function may be a trampoline.
+ if (F.hasFnAttribute(Attribute::NoReturn))
+ return false;
+
if (F.hasFnAttribute(Attribute::SanitizeAddress) ||
F.hasFnAttribute(Attribute::SanitizeHWAddress) ||
F.hasFnAttribute(Attribute::SanitizeThread) ||
@@ -326,6 +333,9 @@ Function *HotColdSplitting::extractColdRegion(
}
CI->setIsNoInline();
+ if (OrigF->hasSection())
+ OutF->setSection(OrigF->getSection());
+
markFunctionCold(*OutF, BFI != nullptr);
LLVM_DEBUG(llvm::dbgs() << "Outlined Region: " << *OutF);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp
index 7dc4d9ee9e34..1bda13a9bdd8 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp
@@ -21,6 +21,7 @@
#include "llvm/IR/Constants.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Transforms/IPO.h"
using namespace llvm;
@@ -254,7 +255,7 @@ static bool PropagateConstantReturn(Function &F) {
// Find the index of the retval to replace with
int index = -1;
if (ExtractValueInst *EV = dyn_cast<ExtractValueInst>(Ins))
- if (EV->hasIndices())
+ if (EV->getNumIndices() == 1)
index = *EV->idx_begin();
// If this use uses a specific return value, and we have a replacement,
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/IPO.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/IPO.cpp
index bddf75211599..8a15800cbdb5 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/IPO.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/IPO.cpp
@@ -43,7 +43,7 @@ void llvm::initializeIPO(PassRegistry &Registry) {
initializeBlockExtractorPass(Registry);
initializeSingleLoopExtractorPass(Registry);
initializeLowerTypeTestsPass(Registry);
- initializeMergeFunctionsPass(Registry);
+ initializeMergeFunctionsLegacyPassPass(Registry);
initializePartialInlinerLegacyPassPass(Registry);
initializeAttributorLegacyPassPass(Registry);
initializePostOrderFunctionAttrsLegacyPassPass(Registry);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/InferFunctionAttrs.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/InferFunctionAttrs.cpp
index d1a68b28bd33..685f8f7d7a00 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/InferFunctionAttrs.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/InferFunctionAttrs.cpp
@@ -11,6 +11,7 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BuildLibCalls.h"
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/InlineSimple.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/InlineSimple.cpp
index efb71b73cbb7..e818743544e6 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/InlineSimple.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/InlineSimple.cpp
@@ -21,6 +21,7 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/Inliner.h"
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/Internalize.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/Internalize.cpp
index 2e269604e379..e1644819af61 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/Internalize.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/Internalize.cpp
@@ -24,6 +24,7 @@
#include "llvm/ADT/StringSet.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/LoopExtractor.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/LoopExtractor.cpp
index add2ae053735..f7108e8002ac 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/LoopExtractor.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/LoopExtractor.cpp
@@ -19,6 +19,7 @@
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/IPO.h"
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/LowerTypeTests.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
index 2dec366d70e2..fa664966faf7 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
@@ -54,6 +54,7 @@
#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/Casting.h"
@@ -1810,7 +1811,7 @@ bool LowerTypeTestsModule::lower() {
// reference them. This is used to partition the set of type identifiers in
// the module into disjoint sets.
using GlobalClassesTy = EquivalenceClasses<
- PointerUnion3<GlobalTypeMember *, Metadata *, ICallBranchFunnel *>>;
+ PointerUnion<GlobalTypeMember *, Metadata *, ICallBranchFunnel *>>;
GlobalClassesTy GlobalClasses;
// Verify the type metadata and build a few data structures to let us
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/MergeFunctions.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/MergeFunctions.cpp
index 8b9abaddc84c..06d2a2f31941 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/MergeFunctions.cpp
@@ -115,12 +115,14 @@
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/IR/ValueMap.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/IPO/MergeFunctions.h"
#include "llvm/Transforms/Utils/FunctionComparator.h"
#include <algorithm>
#include <cassert>
@@ -195,16 +197,12 @@ public:
/// by considering all pointer types to be equivalent. Once identified,
/// MergeFunctions will fold them by replacing a call to one to a call to a
/// bitcast of the other.
-class MergeFunctions : public ModulePass {
+class MergeFunctions {
public:
- static char ID;
-
- MergeFunctions()
- : ModulePass(ID), FnTree(FunctionNodeCmp(&GlobalNumbers)) {
- initializeMergeFunctionsPass(*PassRegistry::getPassRegistry());
+ MergeFunctions() : FnTree(FunctionNodeCmp(&GlobalNumbers)) {
}
- bool runOnModule(Module &M) override;
+ bool runOnModule(Module &M);
private:
// The function comparison operator is provided here so that FunctionNodes do
@@ -297,14 +295,39 @@ private:
DenseMap<AssertingVH<Function>, FnTreeType::iterator> FNodesInTree;
};
-} // end anonymous namespace
+class MergeFunctionsLegacyPass : public ModulePass {
+public:
+ static char ID;
+
+ MergeFunctionsLegacyPass(): ModulePass(ID) {
+ initializeMergeFunctionsLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnModule(Module &M) override {
+ if (skipModule(M))
+ return false;
-char MergeFunctions::ID = 0;
+ MergeFunctions MF;
+ return MF.runOnModule(M);
+ }
+};
-INITIALIZE_PASS(MergeFunctions, "mergefunc", "Merge Functions", false, false)
+} // end anonymous namespace
+
+char MergeFunctionsLegacyPass::ID = 0;
+INITIALIZE_PASS(MergeFunctionsLegacyPass, "mergefunc",
+ "Merge Functions", false, false)
ModulePass *llvm::createMergeFunctionsPass() {
- return new MergeFunctions();
+ return new MergeFunctionsLegacyPass();
+}
+
+PreservedAnalyses MergeFunctionsPass::run(Module &M,
+ ModuleAnalysisManager &AM) {
+ MergeFunctions MF;
+ if (!MF.runOnModule(M))
+ return PreservedAnalyses::all();
+ return PreservedAnalyses::none();
}
#ifndef NDEBUG
@@ -386,9 +409,6 @@ static bool isEligibleForMerging(Function &F) {
}
bool MergeFunctions::runOnModule(Module &M) {
- if (skipModule(M))
- return false;
-
bool Changed = false;
// All functions in the module, ordered by hash. Functions with a unique
@@ -449,28 +469,10 @@ void MergeFunctions::replaceDirectCallers(Function *Old, Function *New) {
++UI;
CallSite CS(U->getUser());
if (CS && CS.isCallee(U)) {
- // Transfer the called function's attributes to the call site. Due to the
- // bitcast we will 'lose' ABI changing attributes because the 'called
- // function' is no longer a Function* but the bitcast. Code that looks up
- // the attributes from the called function will fail.
-
- // FIXME: This is not actually true, at least not anymore. The callsite
- // will always have the same ABI affecting attributes as the callee,
- // because otherwise the original input has UB. Note that Old and New
- // always have matching ABI, so no attributes need to be changed.
- // Transferring other attributes may help other optimizations, but that
- // should be done uniformly and not in this ad-hoc way.
- auto &Context = New->getContext();
- auto NewPAL = New->getAttributes();
- SmallVector<AttributeSet, 4> NewArgAttrs;
- for (unsigned argIdx = 0; argIdx < CS.arg_size(); argIdx++)
- NewArgAttrs.push_back(NewPAL.getParamAttributes(argIdx));
- // Don't transfer attributes from the function to the callee. Function
- // attributes typically aren't relevant to the calling convention or ABI.
- CS.setAttributes(AttributeList::get(Context, /*FnAttrs=*/AttributeSet(),
- NewPAL.getRetAttributes(),
- NewArgAttrs));
-
+ // Do not copy attributes from the called function to the call-site.
+ // Function comparison ensures that the attributes are the same up to
+ // type congruences in byval(), in which case we need to keep the byval
+ // type of the call-site, not the callee function.
remove(CS.getInstruction()->getFunction());
U->set(BitcastNew);
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/PartialInlining.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/PartialInlining.cpp
index e193074884af..cd3701e90308 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/PartialInlining.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/PartialInlining.cpp
@@ -42,6 +42,7 @@
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/User.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/BlockFrequency.h"
#include "llvm/Support/BranchProbability.h"
@@ -701,7 +702,7 @@ PartialInlinerImpl::computeOutliningInfo(Function *F) {
return OutliningInfo;
}
-// Check if there is PGO data or user annoated branch data:
+// Check if there is PGO data or user annotated branch data:
static bool hasProfileData(Function *F, FunctionOutliningInfo *OI) {
if (F->hasProfileData())
return true;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
index 5314a8219b1e..9c992830879a 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -147,6 +147,10 @@ cl::opt<bool> EnableOrderFileInstrumentation(
"enable-order-file-instrumentation", cl::init(false), cl::Hidden,
cl::desc("Enable order file instrumentation (default = off)"));
+static cl::opt<bool>
+ EnableMatrix("enable-matrix", cl::init(false), cl::Hidden,
+ cl::desc("Enable lowering of the matrix intrinsics"));
+
PassManagerBuilder::PassManagerBuilder() {
OptLevel = 2;
SizeLevel = 0;
@@ -320,19 +324,26 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
legacy::PassManagerBase &MPM) {
// Start of function pass.
// Break up aggregate allocas, using SSAUpdater.
+ assert(OptLevel >= 1 && "Calling function optimizer with no optimization level!");
MPM.add(createSROAPass());
MPM.add(createEarlyCSEPass(true /* Enable mem-ssa. */)); // Catch trivial redundancies
- if (EnableGVNHoist)
- MPM.add(createGVNHoistPass());
- if (EnableGVNSink) {
- MPM.add(createGVNSinkPass());
- MPM.add(createCFGSimplificationPass());
+
+ if (OptLevel > 1) {
+ if (EnableGVNHoist)
+ MPM.add(createGVNHoistPass());
+ if (EnableGVNSink) {
+ MPM.add(createGVNSinkPass());
+ MPM.add(createCFGSimplificationPass());
+ }
}
- // Speculative execution if the target has divergent branches; otherwise nop.
- MPM.add(createSpeculativeExecutionIfHasBranchDivergencePass());
- MPM.add(createJumpThreadingPass()); // Thread jumps.
- MPM.add(createCorrelatedValuePropagationPass()); // Propagate conditionals
+ if (OptLevel > 1) {
+ // Speculative execution if the target has divergent branches; otherwise nop.
+ MPM.add(createSpeculativeExecutionIfHasBranchDivergencePass());
+
+ MPM.add(createJumpThreadingPass()); // Thread jumps.
+ MPM.add(createCorrelatedValuePropagationPass()); // Propagate conditionals
+ }
MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
// Combine silly seq's
if (OptLevel > 2)
@@ -346,8 +357,10 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
if (SizeLevel == 0)
MPM.add(createPGOMemOPSizeOptLegacyPass());
- MPM.add(createTailCallEliminationPass()); // Eliminate tail calls
- MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
+ // TODO: Investigate the cost/benefit of tail call elimination on debugging.
+ if (OptLevel > 1)
+ MPM.add(createTailCallEliminationPass()); // Eliminate tail calls
+ MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
MPM.add(createReassociatePass()); // Reassociate expressions
// Begin the loop pass pipeline.
@@ -360,6 +373,7 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
}
// Rotate Loop - disable header duplication at -Oz
MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1));
+ // TODO: Investigate promotion cap for O1.
MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
if (EnableSimpleLoopUnswitch)
MPM.add(createSimpleLoopUnswitchLegacyPass());
@@ -402,16 +416,19 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
// opened up by them.
addInstructionCombiningPass(MPM);
addExtensionsToPM(EP_Peephole, MPM);
- MPM.add(createJumpThreadingPass()); // Thread jumps
- MPM.add(createCorrelatedValuePropagationPass());
- MPM.add(createDeadStoreEliminationPass()); // Delete dead stores
- MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
+ if (OptLevel > 1) {
+ MPM.add(createJumpThreadingPass()); // Thread jumps
+ MPM.add(createCorrelatedValuePropagationPass());
+ MPM.add(createDeadStoreEliminationPass()); // Delete dead stores
+ MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
+ }
addExtensionsToPM(EP_ScalarOptimizerLate, MPM);
if (RerollLoops)
MPM.add(createLoopRerollPass());
+ // TODO: Investigate if this is too expensive at O1.
MPM.add(createAggressiveDCEPass()); // Delete dead instructions
MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
// Clean up after everything.
@@ -656,6 +673,14 @@ void PassManagerBuilder::populateModulePassManager(
MPM.add(createFloat2IntPass());
MPM.add(createLowerConstantIntrinsicsPass());
+ if (EnableMatrix) {
+ MPM.add(createLowerMatrixIntrinsicsPass());
+ // CSE the pointer arithmetic of the column vectors. This allows alias
+ // analysis to establish no-aliasing between loads and stores of different
+ // columns of the same matrix.
+ MPM.add(createEarlyCSEPass(false));
+ }
+
addExtensionsToPM(EP_VectorizerStart, MPM);
// Re-rotate loops in all our loop nests. These may have fallout out of
@@ -899,7 +924,8 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
// LTO provides additional opportunities for tailcall elimination due to
// link-time inlining, and visibility of nocapture attribute.
- PM.add(createTailCallEliminationPass());
+ if (OptLevel > 1)
+ PM.add(createTailCallEliminationPass());
// Infer attributes on declarations, call sites, arguments, etc.
PM.add(createPostOrderFunctionAttrsLegacyPass()); // Add nocapture.
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/PruneEH.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/PruneEH.cpp
index cb3915dfb678..45a0ce20eb17 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/PruneEH.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/PruneEH.cpp
@@ -18,15 +18,16 @@
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/CallGraphSCCPass.h"
#include "llvm/Analysis/EHPersonalities.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/Utils/Local.h"
#include <algorithm>
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/SCCP.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/SCCP.cpp
index 307690729b14..fdffffba0c2d 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/SCCP.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/SCCP.cpp
@@ -2,6 +2,7 @@
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/PostDominators.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/Scalar/SCCP.h"
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/SampleProfile.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/SampleProfile.cpp
index 6184681db8a2..a1fbb1adc412 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -26,13 +26,17 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/None.h"
+#include "llvm/ADT/SCCIterator.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/CallGraphSCCPass.h"
#include "llvm/Analysis/InlineCost.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
@@ -57,6 +61,7 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/ValueSymbolTable.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/ProfileData/SampleProf.h"
@@ -90,6 +95,12 @@ using namespace llvm;
using namespace sampleprof;
using ProfileCount = Function::ProfileCount;
#define DEBUG_TYPE "sample-profile"
+#define CSINLINE_DEBUG DEBUG_TYPE "-inline"
+
+STATISTIC(NumCSInlined,
+ "Number of functions inlined with context sensitive profile");
+STATISTIC(NumCSNotInlined,
+ "Number of functions not inlined with context sensitive profile");
// Command line option to specify the file to read samples from. This is
// mainly used for debugging.
@@ -136,6 +147,25 @@ static cl::opt<bool> ProfileAccurateForSymsInList(
cl::desc("For symbols in profile symbol list, regard their profiles to "
"be accurate. It may be overriden by profile-sample-accurate. "));
+static cl::opt<bool> ProfileMergeInlinee(
+ "sample-profile-merge-inlinee", cl::Hidden, cl::init(false),
+ cl::desc("Merge past inlinee's profile to outline version if sample "
+ "profile loader decided not to inline a call site."));
+
+static cl::opt<bool> ProfileTopDownLoad(
+ "sample-profile-top-down-load", cl::Hidden, cl::init(false),
+ cl::desc("Do profile annotation and inlining for functions in top-down "
+ "order of call graph during sample profile loading."));
+
+static cl::opt<bool> ProfileSizeInline(
+ "sample-profile-inline-size", cl::Hidden, cl::init(false),
+ cl::desc("Inline cold call sites in profile loader if it's beneficial "
+ "for code size."));
+
+static cl::opt<int> SampleColdCallSiteThreshold(
+ "sample-profile-cold-inline-threshold", cl::Hidden, cl::init(45),
+ cl::desc("Threshold for inlining cold callsites"));
+
namespace {
using BlockWeightMap = DenseMap<const BasicBlock *, uint64_t>;
@@ -285,7 +315,7 @@ public:
bool doInitialization(Module &M);
bool runOnModule(Module &M, ModuleAnalysisManager *AM,
- ProfileSummaryInfo *_PSI);
+ ProfileSummaryInfo *_PSI, CallGraph *CG);
void dump() { Reader->dump(); }
@@ -305,6 +335,10 @@ protected:
bool inlineCallInstruction(Instruction *I);
bool inlineHotFunctions(Function &F,
DenseSet<GlobalValue::GUID> &InlinedGUIDs);
+ // Inline cold/small functions in addition to hot ones
+ bool shouldInlineColdCallee(Instruction &CallInst);
+ void emitOptimizationRemarksForInlineCandidates(
+ const SmallVector<Instruction *, 10> &Candidates, const Function &F, bool Hot);
void printEdgeWeight(raw_ostream &OS, Edge E);
void printBlockWeight(raw_ostream &OS, const BasicBlock *BB) const;
void printBlockEquivalence(raw_ostream &OS, const BasicBlock *BB);
@@ -317,6 +351,7 @@ protected:
void propagateWeights(Function &F);
uint64_t visitEdge(Edge E, unsigned *NumUnknownEdges, Edge *UnknownEdge);
void buildEdges(Function &F);
+ std::vector<Function *> buildFunctionOrder(Module &M, CallGraph *CG);
bool propagateThroughEdges(Function &F, bool UpdateBlockCount);
void computeDominanceAndLoopInfo(Function &F);
void clearFunctionData();
@@ -869,21 +904,52 @@ bool SampleProfileLoader::inlineCallInstruction(Instruction *I) {
getInlineCost(cast<CallBase>(*I), Params, GetTTI(*CalledFunction), GetAC,
None, nullptr, nullptr);
if (Cost.isNever()) {
- ORE->emit(OptimizationRemark(DEBUG_TYPE, "Not inline", DLoc, BB)
+ ORE->emit(OptimizationRemarkAnalysis(CSINLINE_DEBUG, "InlineFail", DLoc, BB)
<< "incompatible inlining");
return false;
}
InlineFunctionInfo IFI(nullptr, &GetAC);
if (InlineFunction(CS, IFI)) {
// The call to InlineFunction erases I, so we can't pass it here.
- ORE->emit(OptimizationRemark(DEBUG_TYPE, "HotInline", DLoc, BB)
- << "inlined hot callee '" << ore::NV("Callee", CalledFunction)
+ ORE->emit(OptimizationRemark(CSINLINE_DEBUG, "InlineSuccess", DLoc, BB)
+ << "inlined callee '" << ore::NV("Callee", CalledFunction)
<< "' into '" << ore::NV("Caller", BB->getParent()) << "'");
return true;
}
return false;
}
+bool SampleProfileLoader::shouldInlineColdCallee(Instruction &CallInst) {
+ if (!ProfileSizeInline)
+ return false;
+
+ Function *Callee = CallSite(&CallInst).getCalledFunction();
+ if (Callee == nullptr)
+ return false;
+
+ InlineCost Cost =
+ getInlineCost(cast<CallBase>(CallInst), getInlineParams(),
+ GetTTI(*Callee), GetAC, None, nullptr, nullptr);
+
+ return Cost.getCost() <= SampleColdCallSiteThreshold;
+}
+
+void SampleProfileLoader::emitOptimizationRemarksForInlineCandidates(
+ const SmallVector<Instruction *, 10> &Candidates, const Function &F,
+ bool Hot) {
+ for (auto I : Candidates) {
+ Function *CalledFunction = CallSite(I).getCalledFunction();
+ if (CalledFunction) {
+ ORE->emit(OptimizationRemarkAnalysis(CSINLINE_DEBUG, "InlineAttempt",
+ I->getDebugLoc(), I->getParent())
+ << "previous inlining reattempted for "
+ << (Hot ? "hotness: '" : "size: '")
+ << ore::NV("Callee", CalledFunction) << "' into '"
+ << ore::NV("Caller", &F) << "'");
+ }
+ }
+}
+
/// Iteratively inline hot callsites of a function.
///
/// Iteratively traverse all callsites of the function \p F, and find if
@@ -916,20 +982,28 @@ bool SampleProfileLoader::inlineHotFunctions(
SmallVector<Instruction *, 10> CIS;
for (auto &BB : F) {
bool Hot = false;
- SmallVector<Instruction *, 10> Candidates;
+ SmallVector<Instruction *, 10> AllCandidates;
+ SmallVector<Instruction *, 10> ColdCandidates;
for (auto &I : BB.getInstList()) {
const FunctionSamples *FS = nullptr;
if ((isa<CallInst>(I) || isa<InvokeInst>(I)) &&
!isa<IntrinsicInst>(I) && (FS = findCalleeFunctionSamples(I))) {
- Candidates.push_back(&I);
+ AllCandidates.push_back(&I);
if (FS->getEntrySamples() > 0)
localNotInlinedCallSites.try_emplace(&I, FS);
if (callsiteIsHot(FS, PSI))
Hot = true;
+ else if (shouldInlineColdCallee(I))
+ ColdCandidates.push_back(&I);
}
}
if (Hot) {
- CIS.insert(CIS.begin(), Candidates.begin(), Candidates.end());
+ CIS.insert(CIS.begin(), AllCandidates.begin(), AllCandidates.end());
+ emitOptimizationRemarksForInlineCandidates(AllCandidates, F, true);
+ }
+ else {
+ CIS.insert(CIS.begin(), ColdCandidates.begin(), ColdCandidates.end());
+ emitOptimizationRemarksForInlineCandidates(ColdCandidates, F, false);
}
}
for (auto I : CIS) {
@@ -975,6 +1049,7 @@ bool SampleProfileLoader::inlineHotFunctions(
inlineCallInstruction(DI)) {
localNotInlinedCallSites.erase(I);
LocalChanged = true;
+ ++NumCSInlined;
}
} else {
LLVM_DEBUG(dbgs()
@@ -987,6 +1062,7 @@ bool SampleProfileLoader::inlineHotFunctions(
if (inlineCallInstruction(I)) {
localNotInlinedCallSites.erase(I);
LocalChanged = true;
+ ++NumCSInlined;
}
} else if (IsThinLTOPreLink) {
findCalleeFunctionSamples(*I)->findInlinedFunctions(
@@ -1006,10 +1082,35 @@ bool SampleProfileLoader::inlineHotFunctions(
Function *Callee = CallSite(I).getCalledFunction();
if (!Callee || Callee->isDeclaration())
continue;
+
+ ORE->emit(OptimizationRemarkAnalysis(CSINLINE_DEBUG, "NotInline",
+ I->getDebugLoc(), I->getParent())
+ << "previous inlining not repeated: '"
+ << ore::NV("Callee", Callee) << "' into '"
+ << ore::NV("Caller", &F) << "'");
+
+ ++NumCSNotInlined;
const FunctionSamples *FS = Pair.getSecond();
- auto pair =
- notInlinedCallInfo.try_emplace(Callee, NotInlinedProfileInfo{0});
- pair.first->second.entryCount += FS->getEntrySamples();
+ if (FS->getTotalSamples() == 0 && FS->getEntrySamples() == 0) {
+ continue;
+ }
+
+ if (ProfileMergeInlinee) {
+ // Use entry samples as head samples during the merge, as inlinees
+ // don't have head samples.
+ assert(FS->getHeadSamples() == 0 && "Expect 0 head sample for inlinee");
+ const_cast<FunctionSamples *>(FS)->addHeadSamples(FS->getEntrySamples());
+
+ // Note that we have to do the merge right after processing function.
+ // This allows OutlineFS's profile to be used for annotation during
+ // top-down processing of functions' annotation.
+ FunctionSamples *OutlineFS = Reader->getOrCreateSamplesFor(*Callee);
+ OutlineFS->merge(*FS);
+ } else {
+ auto pair =
+ notInlinedCallInfo.try_emplace(Callee, NotInlinedProfileInfo{0});
+ pair.first->second.entryCount += FS->getEntrySamples();
+ }
}
return Changed;
}
@@ -1673,6 +1774,33 @@ INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
INITIALIZE_PASS_END(SampleProfileLoaderLegacyPass, "sample-profile",
"Sample Profile loader", false, false)
+std::vector<Function *>
+SampleProfileLoader::buildFunctionOrder(Module &M, CallGraph *CG) {
+ std::vector<Function *> FunctionOrderList;
+ FunctionOrderList.reserve(M.size());
+
+ if (!ProfileTopDownLoad || CG == nullptr) {
+ for (Function &F : M)
+ if (!F.isDeclaration())
+ FunctionOrderList.push_back(&F);
+ return FunctionOrderList;
+ }
+
+ assert(&CG->getModule() == &M);
+ scc_iterator<CallGraph *> CGI = scc_begin(CG);
+ while (!CGI.isAtEnd()) {
+ for (CallGraphNode *node : *CGI) {
+ auto F = node->getFunction();
+ if (F && !F->isDeclaration())
+ FunctionOrderList.push_back(F);
+ }
+ ++CGI;
+ }
+
+ std::reverse(FunctionOrderList.begin(), FunctionOrderList.end());
+ return FunctionOrderList;
+}
+
bool SampleProfileLoader::doInitialization(Module &M) {
auto &Ctx = M.getContext();
@@ -1710,7 +1838,7 @@ ModulePass *llvm::createSampleProfileLoaderPass(StringRef Name) {
}
bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM,
- ProfileSummaryInfo *_PSI) {
+ ProfileSummaryInfo *_PSI, CallGraph *CG) {
GUIDToFuncNameMapper Mapper(M, *Reader, GUIDToFuncNameMap);
if (!ProfileIsValid)
return false;
@@ -1745,11 +1873,11 @@ bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM,
}
bool retval = false;
- for (auto &F : M)
- if (!F.isDeclaration()) {
- clearFunctionData();
- retval |= runOnFunction(F, AM);
- }
+ for (auto F : buildFunctionOrder(M, CG)) {
+ assert(!F->isDeclaration());
+ clearFunctionData();
+ retval |= runOnFunction(*F, AM);
+ }
// Account for cold calls not inlined....
for (const std::pair<Function *, NotInlinedProfileInfo> &pair :
@@ -1764,7 +1892,7 @@ bool SampleProfileLoaderLegacyPass::runOnModule(Module &M) {
TTIWP = &getAnalysis<TargetTransformInfoWrapperPass>();
ProfileSummaryInfo *PSI =
&getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
- return SampleLoader.runOnModule(M, nullptr, PSI);
+ return SampleLoader.runOnModule(M, nullptr, PSI, nullptr);
}
bool SampleProfileLoader::runOnFunction(Function &F, ModuleAnalysisManager *AM) {
@@ -1845,10 +1973,12 @@ PreservedAnalyses SampleProfileLoaderPass::run(Module &M,
: ProfileRemappingFileName,
IsThinLTOPreLink, GetAssumptionCache, GetTTI);
- SampleLoader.doInitialization(M);
+ if (!SampleLoader.doInitialization(M))
+ return PreservedAnalyses::all();
ProfileSummaryInfo *PSI = &AM.getResult<ProfileSummaryAnalysis>(M);
- if (!SampleLoader.runOnModule(M, &AM, PSI))
+ CallGraph &CG = AM.getResult<CallGraphAnalysis>(M);
+ if (!SampleLoader.runOnModule(M, &AM, PSI, &CG))
return PreservedAnalyses::all();
return PreservedAnalyses::none();
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp
index 106db3c8bd9d..655a7a404951 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp
@@ -16,6 +16,7 @@
#include "llvm/Transforms/IPO/StripDeadPrototypes.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Transforms/IPO.h"
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/StripSymbols.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/StripSymbols.cpp
index 67a473612fc1..6ce00714523b 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/StripSymbols.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/StripSymbols.cpp
@@ -20,7 +20,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DerivedTypes.h"
@@ -28,8 +27,10 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/TypeFinder.h"
#include "llvm/IR/ValueSymbolTable.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
namespace {
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
index 690b5e8bf49e..87a18171787f 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
@@ -17,6 +17,7 @@
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Object/ModuleSymbolTable.h"
#include "llvm/Pass.h"
#include "llvm/Support/ScopedPrinter.h"
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
index f0cf5581ba8a..5ccfb29b01a1 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
@@ -80,10 +80,12 @@
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/ModuleSummaryIndexYAML.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/PassRegistry.h"
#include "llvm/PassSupport.h"
#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/MathExtras.h"
@@ -486,7 +488,7 @@ struct DevirtModule {
bool areRemarksEnabled();
- void scanTypeTestUsers(Function *TypeTestFunc, Function *AssumeFunc);
+ void scanTypeTestUsers(Function *TypeTestFunc);
void scanTypeCheckedLoadUsers(Function *TypeCheckedLoadFunc);
void buildTypeIdentifierMap(
@@ -615,8 +617,8 @@ struct WholeProgramDevirt : public ModulePass {
bool UseCommandLine = false;
- ModuleSummaryIndex *ExportSummary;
- const ModuleSummaryIndex *ImportSummary;
+ ModuleSummaryIndex *ExportSummary = nullptr;
+ const ModuleSummaryIndex *ImportSummary = nullptr;
WholeProgramDevirt() : ModulePass(ID), UseCommandLine(true) {
initializeWholeProgramDevirtPass(*PassRegistry::getPassRegistry());
@@ -709,7 +711,7 @@ void runWholeProgramDevirtOnIndex(
void updateIndexWPDForExports(
ModuleSummaryIndex &Summary,
- function_ref<bool(StringRef, GlobalValue::GUID)> isExported,
+ function_ref<bool(StringRef, ValueInfo)> isExported,
std::map<ValueInfo, std::vector<VTableSlotSummary>> &LocalWPDTargetsMap) {
for (auto &T : LocalWPDTargetsMap) {
auto &VI = T.first;
@@ -717,7 +719,7 @@ void updateIndexWPDForExports(
assert(VI.getSummaryList().size() == 1 &&
"Devirt of local target has more than one copy");
auto &S = VI.getSummaryList()[0];
- if (!isExported(S->modulePath(), VI.getGUID()))
+ if (!isExported(S->modulePath(), VI))
continue;
// It's been exported by a cross module import.
@@ -840,18 +842,31 @@ bool DevirtModule::tryFindVirtualCallTargets(
bool DevirtIndex::tryFindVirtualCallTargets(
std::vector<ValueInfo> &TargetsForSlot, const TypeIdCompatibleVtableInfo TIdInfo,
uint64_t ByteOffset) {
- for (const TypeIdOffsetVtableInfo P : TIdInfo) {
- // VTable initializer should have only one summary, or all copies must be
- // linkonce/weak ODR.
- assert(P.VTableVI.getSummaryList().size() == 1 ||
- llvm::all_of(
- P.VTableVI.getSummaryList(),
- [&](const std::unique_ptr<GlobalValueSummary> &Summary) {
- return GlobalValue::isLinkOnceODRLinkage(Summary->linkage()) ||
- GlobalValue::isWeakODRLinkage(Summary->linkage());
- }));
- const auto *VS = cast<GlobalVarSummary>(P.VTableVI.getSummaryList()[0].get());
- if (!P.VTableVI.getSummaryList()[0]->isLive())
+ for (const TypeIdOffsetVtableInfo &P : TIdInfo) {
+ // Find the first non-available_externally linkage vtable initializer.
+ // We can have multiple available_externally, linkonce_odr and weak_odr
+ // vtable initializers, however we want to skip available_externally as they
+ // do not have type metadata attached, and therefore the summary will not
+ // contain any vtable functions. We can also have multiple external
+ // vtable initializers in the case of comdats, which we cannot check here.
+ // The linker should give an error in this case.
+ //
+ // Also, handle the case of same-named local Vtables with the same path
+ // and therefore the same GUID. This can happen if there isn't enough
+ // distinguishing path when compiling the source file. In that case we
+ // conservatively return false early.
+ const GlobalVarSummary *VS = nullptr;
+ bool LocalFound = false;
+ for (auto &S : P.VTableVI.getSummaryList()) {
+ if (GlobalValue::isLocalLinkage(S->linkage())) {
+ if (LocalFound)
+ return false;
+ LocalFound = true;
+ }
+ if (!GlobalValue::isAvailableExternallyLinkage(S->linkage()))
+ VS = cast<GlobalVarSummary>(S->getBaseObject());
+ }
+ if (!VS->isLive())
continue;
for (auto VTP : VS->vTableFuncs()) {
if (VTP.VTableOffset != P.AddressPointOffset + ByteOffset)
@@ -1233,8 +1248,7 @@ std::string DevirtModule::getGlobalName(VTableSlot Slot,
bool DevirtModule::shouldExportConstantsAsAbsoluteSymbols() {
Triple T(M.getTargetTriple());
- return (T.getArch() == Triple::x86 || T.getArch() == Triple::x86_64) &&
- T.getObjectFormat() == Triple::ELF;
+ return T.isX86() && T.getObjectFormat() == Triple::ELF;
}
void DevirtModule::exportGlobal(VTableSlot Slot, ArrayRef<uint64_t> Args,
@@ -1548,8 +1562,7 @@ bool DevirtModule::areRemarksEnabled() {
return false;
}
-void DevirtModule::scanTypeTestUsers(Function *TypeTestFunc,
- Function *AssumeFunc) {
+void DevirtModule::scanTypeTestUsers(Function *TypeTestFunc) {
// Find all virtual calls via a virtual table pointer %p under an assumption
// of the form llvm.assume(llvm.type.test(%p, %md)). This indicates that %p
// points to a member of the type identifier %md. Group calls by (type ID,
@@ -1784,7 +1797,7 @@ bool DevirtModule::run() {
return false;
if (TypeTestFunc && AssumeFunc)
- scanTypeTestUsers(TypeTestFunc, AssumeFunc);
+ scanTypeTestUsers(TypeTestFunc);
if (TypeCheckedLoadFunc)
scanTypeCheckedLoadUsers(TypeCheckedLoadFunc);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 8bc34825f8a7..ec976a971e3c 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -890,6 +890,10 @@ Instruction *InstCombiner::foldAddWithConstant(BinaryOperator &Add) {
if (match(Op0, m_ZExt(m_Value(X))) &&
X->getType()->getScalarSizeInBits() == 1)
return SelectInst::Create(X, AddOne(Op1C), Op1);
+ // sext(bool) + C -> bool ? C - 1 : C
+ if (match(Op0, m_SExt(m_Value(X))) &&
+ X->getType()->getScalarSizeInBits() == 1)
+ return SelectInst::Create(X, SubOne(Op1C), Op1);
// ~X + C --> (C-1) - X
if (match(Op0, m_Not(m_Value(X))))
@@ -1288,12 +1292,6 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
return BinaryOperator::CreateSub(RHS, A);
}
- // Canonicalize sext to zext for better value tracking potential.
- // add A, sext(B) --> sub A, zext(B)
- if (match(&I, m_c_Add(m_Value(A), m_OneUse(m_SExt(m_Value(B))))) &&
- B->getType()->isIntOrIntVectorTy(1))
- return BinaryOperator::CreateSub(A, Builder.CreateZExt(B, Ty));
-
// A + -B --> A - B
if (match(RHS, m_Neg(m_Value(B))))
return BinaryOperator::CreateSub(LHS, B);
@@ -1587,7 +1585,7 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
/// &A[10] - &A[0]: we should compile this to "10". LHS/RHS are the pointer
/// operands to the ptrtoint instructions for the LHS/RHS of the subtract.
Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS,
- Type *Ty) {
+ Type *Ty, bool IsNUW) {
// If LHS is a gep based on RHS or RHS is a gep based on LHS, we can optimize
// this.
bool Swapped = false;
@@ -1655,6 +1653,15 @@ Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS,
// Emit the offset of the GEP and an intptr_t.
Value *Result = EmitGEPOffset(GEP1);
+ // If this is a single inbounds GEP and the original sub was nuw,
+ // then the final multiplication is also nuw. We match an extra add zero
+ // here, because that's what EmitGEPOffset() generates.
+ Instruction *I;
+ if (IsNUW && !GEP2 && !Swapped && GEP1->isInBounds() &&
+ match(Result, m_Add(m_Instruction(I), m_Zero())) &&
+ I->getOpcode() == Instruction::Mul)
+ I->setHasNoUnsignedWrap();
+
// If we had a constant expression GEP on the other side offsetting the
// pointer, subtract it from the offset we have.
if (GEP2) {
@@ -1881,6 +1888,74 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
Y, Builder.CreateNot(Op1, Op1->getName() + ".not"));
}
+ {
+ // (sub (and Op1, (neg X)), Op1) --> neg (and Op1, (add X, -1))
+ Value *X;
+ if (match(Op0, m_OneUse(m_c_And(m_Specific(Op1),
+ m_OneUse(m_Neg(m_Value(X))))))) {
+ return BinaryOperator::CreateNeg(Builder.CreateAnd(
+ Op1, Builder.CreateAdd(X, Constant::getAllOnesValue(I.getType()))));
+ }
+ }
+
+ {
+ // (sub (and Op1, C), Op1) --> neg (and Op1, ~C)
+ Constant *C;
+ if (match(Op0, m_OneUse(m_And(m_Specific(Op1), m_Constant(C))))) {
+ return BinaryOperator::CreateNeg(
+ Builder.CreateAnd(Op1, Builder.CreateNot(C)));
+ }
+ }
+
+ {
+ // If we have a subtraction between some value and a select between
+ // said value and something else, sink subtraction into select hands, i.e.:
+ // sub (select %Cond, %TrueVal, %FalseVal), %Op1
+ // ->
+ // select %Cond, (sub %TrueVal, %Op1), (sub %FalseVal, %Op1)
+ // or
+ // sub %Op0, (select %Cond, %TrueVal, %FalseVal)
+ // ->
+ // select %Cond, (sub %Op0, %TrueVal), (sub %Op0, %FalseVal)
+ // This will result in select between new subtraction and 0.
+ auto SinkSubIntoSelect =
+ [Ty = I.getType()](Value *Select, Value *OtherHandOfSub,
+ auto SubBuilder) -> Instruction * {
+ Value *Cond, *TrueVal, *FalseVal;
+ if (!match(Select, m_OneUse(m_Select(m_Value(Cond), m_Value(TrueVal),
+ m_Value(FalseVal)))))
+ return nullptr;
+ if (OtherHandOfSub != TrueVal && OtherHandOfSub != FalseVal)
+ return nullptr;
+ // While it is really tempting to just create two subtractions and let
+ // InstCombine fold one of those to 0, it isn't possible to do so
+ // because of worklist visitation order. So ugly it is.
+ bool OtherHandOfSubIsTrueVal = OtherHandOfSub == TrueVal;
+ Value *NewSub = SubBuilder(OtherHandOfSubIsTrueVal ? FalseVal : TrueVal);
+ Constant *Zero = Constant::getNullValue(Ty);
+ SelectInst *NewSel =
+ SelectInst::Create(Cond, OtherHandOfSubIsTrueVal ? Zero : NewSub,
+ OtherHandOfSubIsTrueVal ? NewSub : Zero);
+ // Preserve prof metadata if any.
+ NewSel->copyMetadata(cast<Instruction>(*Select));
+ return NewSel;
+ };
+ if (Instruction *NewSel = SinkSubIntoSelect(
+ /*Select=*/Op0, /*OtherHandOfSub=*/Op1,
+ [Builder = &Builder, Op1](Value *OtherHandOfSelect) {
+ return Builder->CreateSub(OtherHandOfSelect,
+ /*OtherHandOfSub=*/Op1);
+ }))
+ return NewSel;
+ if (Instruction *NewSel = SinkSubIntoSelect(
+ /*Select=*/Op1, /*OtherHandOfSub=*/Op0,
+ [Builder = &Builder, Op0](Value *OtherHandOfSelect) {
+ return Builder->CreateSub(/*OtherHandOfSub=*/Op0,
+ OtherHandOfSelect);
+ }))
+ return NewSel;
+ }
+
if (Op1->hasOneUse()) {
Value *X = nullptr, *Y = nullptr, *Z = nullptr;
Constant *C = nullptr;
@@ -1896,14 +1971,16 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
Builder.CreateNot(Y, Y->getName() + ".not"));
// 0 - (X sdiv C) -> (X sdiv -C) provided the negation doesn't overflow.
- // TODO: This could be extended to match arbitrary vector constants.
- const APInt *DivC;
- if (match(Op0, m_Zero()) && match(Op1, m_SDiv(m_Value(X), m_APInt(DivC))) &&
- !DivC->isMinSignedValue() && *DivC != 1) {
- Constant *NegDivC = ConstantInt::get(I.getType(), -(*DivC));
- Instruction *BO = BinaryOperator::CreateSDiv(X, NegDivC);
- BO->setIsExact(cast<BinaryOperator>(Op1)->isExact());
- return BO;
+ if (match(Op0, m_Zero())) {
+ Constant *Op11C;
+ if (match(Op1, m_SDiv(m_Value(X), m_Constant(Op11C))) &&
+ !Op11C->containsUndefElement() && Op11C->isNotMinSignedValue() &&
+ Op11C->isNotOneValue()) {
+ Instruction *BO =
+ BinaryOperator::CreateSDiv(X, ConstantExpr::getNeg(Op11C));
+ BO->setIsExact(cast<BinaryOperator>(Op1)->isExact());
+ return BO;
+ }
}
// 0 - (X << Y) -> (-X << Y) when X is freely negatable.
@@ -1921,6 +1998,14 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
Add->setHasNoSignedWrap(I.hasNoSignedWrap());
return Add;
}
+ // sub [nsw] X, zext(bool Y) -> add [nsw] X, sext(bool Y)
+ // 'nuw' is dropped in favor of the canonical form.
+ if (match(Op1, m_ZExt(m_Value(Y))) && Y->getType()->isIntOrIntVectorTy(1)) {
+ Value *Sext = Builder.CreateSExt(Y, I.getType());
+ BinaryOperator *Add = BinaryOperator::CreateAdd(Op0, Sext);
+ Add->setHasNoSignedWrap(I.hasNoSignedWrap());
+ return Add;
+ }
// X - A*-B -> X + A*B
// X - -A*B -> X + A*B
@@ -1975,13 +2060,15 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
Value *LHSOp, *RHSOp;
if (match(Op0, m_PtrToInt(m_Value(LHSOp))) &&
match(Op1, m_PtrToInt(m_Value(RHSOp))))
- if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType()))
+ if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType(),
+ I.hasNoUnsignedWrap()))
return replaceInstUsesWith(I, Res);
// trunc(p)-trunc(q) -> trunc(p-q)
if (match(Op0, m_Trunc(m_PtrToInt(m_Value(LHSOp)))) &&
match(Op1, m_Trunc(m_PtrToInt(m_Value(RHSOp)))))
- if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType()))
+ if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType(),
+ /* IsNUW */ false))
return replaceInstUsesWith(I, Res);
// Canonicalize a shifty way to code absolute value to the common pattern.
diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 4a30b60ca931..cc0a9127f8b1 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -3279,6 +3279,23 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
NotLHS, NotRHS);
}
}
+
+ // Pull 'not' into operands of select if both operands are one-use compares.
+ // Inverting the predicates eliminates the 'not' operation.
+ // Example:
+ // not (select ?, (cmp TPred, ?, ?), (cmp FPred, ?, ?) -->
+ // select ?, (cmp InvTPred, ?, ?), (cmp InvFPred, ?, ?)
+ // TODO: Canonicalize by hoisting 'not' into an arm of the select if only
+ // 1 select operand is a cmp?
+ if (auto *Sel = dyn_cast<SelectInst>(Op0)) {
+ auto *CmpT = dyn_cast<CmpInst>(Sel->getTrueValue());
+ auto *CmpF = dyn_cast<CmpInst>(Sel->getFalseValue());
+ if (CmpT && CmpF && CmpT->hasOneUse() && CmpF->hasOneUse()) {
+ CmpT->setPredicate(CmpT->getInversePredicate());
+ CmpF->setPredicate(CmpF->getInversePredicate());
+ return replaceInstUsesWith(I, Sel);
+ }
+ }
}
if (Instruction *NewXor = sinkNotIntoXor(I, Builder))
diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index c650d242cd50..f463c5fa1138 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -40,6 +40,12 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsX86.h"
+#include "llvm/IR/IntrinsicsARM.h"
+#include "llvm/IR/IntrinsicsAArch64.h"
+#include "llvm/IR/IntrinsicsNVPTX.h"
+#include "llvm/IR/IntrinsicsAMDGPU.h"
+#include "llvm/IR/IntrinsicsPowerPC.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/PatternMatch.h"
@@ -2279,6 +2285,35 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
break;
}
+ case Intrinsic::copysign: {
+ if (SignBitMustBeZero(II->getArgOperand(1), &TLI)) {
+ // If we know that the sign argument is positive, reduce to FABS:
+ // copysign X, Pos --> fabs X
+ Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs,
+ II->getArgOperand(0), II);
+ return replaceInstUsesWith(*II, Fabs);
+ }
+ // TODO: There should be a ValueTracking sibling like SignBitMustBeOne.
+ const APFloat *C;
+ if (match(II->getArgOperand(1), m_APFloat(C)) && C->isNegative()) {
+ // If we know that the sign argument is negative, reduce to FNABS:
+ // copysign X, Neg --> fneg (fabs X)
+ Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs,
+ II->getArgOperand(0), II);
+ return replaceInstUsesWith(*II, Builder.CreateFNegFMF(Fabs, II));
+ }
+
+ // Propagate sign argument through nested calls:
+ // copysign X, (copysign ?, SignArg) --> copysign X, SignArg
+ Value *SignArg;
+ if (match(II->getArgOperand(1),
+ m_Intrinsic<Intrinsic::copysign>(m_Value(), m_Value(SignArg)))) {
+ II->setArgOperand(1, SignArg);
+ return II;
+ }
+
+ break;
+ }
case Intrinsic::fabs: {
Value *Cond;
Constant *LHS, *RHS;
@@ -2452,6 +2487,64 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
// TODO should we convert this to an AND if the RHS is constant?
}
break;
+ case Intrinsic::x86_bmi_pext_32:
+ case Intrinsic::x86_bmi_pext_64:
+ if (auto *MaskC = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
+ if (MaskC->isNullValue())
+ return replaceInstUsesWith(CI, ConstantInt::get(II->getType(), 0));
+ if (MaskC->isAllOnesValue())
+ return replaceInstUsesWith(CI, II->getArgOperand(0));
+
+ if (auto *SrcC = dyn_cast<ConstantInt>(II->getArgOperand(0))) {
+ uint64_t Src = SrcC->getZExtValue();
+ uint64_t Mask = MaskC->getZExtValue();
+ uint64_t Result = 0;
+ uint64_t BitToSet = 1;
+
+ while (Mask) {
+ // Isolate lowest set bit.
+ uint64_t BitToTest = Mask & -Mask;
+ if (BitToTest & Src)
+ Result |= BitToSet;
+
+ BitToSet <<= 1;
+ // Clear lowest set bit.
+ Mask &= Mask - 1;
+ }
+
+ return replaceInstUsesWith(CI, ConstantInt::get(II->getType(), Result));
+ }
+ }
+ break;
+ case Intrinsic::x86_bmi_pdep_32:
+ case Intrinsic::x86_bmi_pdep_64:
+ if (auto *MaskC = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
+ if (MaskC->isNullValue())
+ return replaceInstUsesWith(CI, ConstantInt::get(II->getType(), 0));
+ if (MaskC->isAllOnesValue())
+ return replaceInstUsesWith(CI, II->getArgOperand(0));
+
+ if (auto *SrcC = dyn_cast<ConstantInt>(II->getArgOperand(0))) {
+ uint64_t Src = SrcC->getZExtValue();
+ uint64_t Mask = MaskC->getZExtValue();
+ uint64_t Result = 0;
+ uint64_t BitToTest = 1;
+
+ while (Mask) {
+ // Isolate lowest set bit.
+ uint64_t BitToSet = Mask & -Mask;
+ if (BitToTest & Src)
+ Result |= BitToSet;
+
+ BitToTest <<= 1;
+ // Clear lowest set bit;
+ Mask &= Mask - 1;
+ }
+
+ return replaceInstUsesWith(CI, ConstantInt::get(II->getType(), Result));
+ }
+ }
+ break;
case Intrinsic::x86_vcvtph2ps_128:
case Intrinsic::x86_vcvtph2ps_256: {
@@ -3308,6 +3401,60 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
}
break;
}
+ case Intrinsic::arm_mve_pred_i2v: {
+ Value *Arg = II->getArgOperand(0);
+ Value *ArgArg;
+ if (match(Arg, m_Intrinsic<Intrinsic::arm_mve_pred_v2i>(m_Value(ArgArg))) &&
+ II->getType() == ArgArg->getType())
+ return replaceInstUsesWith(*II, ArgArg);
+ Constant *XorMask;
+ if (match(Arg,
+ m_Xor(m_Intrinsic<Intrinsic::arm_mve_pred_v2i>(m_Value(ArgArg)),
+ m_Constant(XorMask))) &&
+ II->getType() == ArgArg->getType()) {
+ if (auto *CI = dyn_cast<ConstantInt>(XorMask)) {
+ if (CI->getValue().trunc(16).isAllOnesValue()) {
+ auto TrueVector = Builder.CreateVectorSplat(
+ II->getType()->getVectorNumElements(), Builder.getTrue());
+ return BinaryOperator::Create(Instruction::Xor, ArgArg, TrueVector);
+ }
+ }
+ }
+ KnownBits ScalarKnown(32);
+ if (SimplifyDemandedBits(II, 0, APInt::getLowBitsSet(32, 16),
+ ScalarKnown, 0))
+ return II;
+ break;
+ }
+ case Intrinsic::arm_mve_pred_v2i: {
+ Value *Arg = II->getArgOperand(0);
+ Value *ArgArg;
+ if (match(Arg, m_Intrinsic<Intrinsic::arm_mve_pred_i2v>(m_Value(ArgArg))))
+ return replaceInstUsesWith(*II, ArgArg);
+ if (!II->getMetadata(LLVMContext::MD_range)) {
+ Type *IntTy32 = Type::getInt32Ty(II->getContext());
+ Metadata *M[] = {
+ ConstantAsMetadata::get(ConstantInt::get(IntTy32, 0)),
+ ConstantAsMetadata::get(ConstantInt::get(IntTy32, 0xFFFF))
+ };
+ II->setMetadata(LLVMContext::MD_range, MDNode::get(II->getContext(), M));
+ return II;
+ }
+ break;
+ }
+ case Intrinsic::arm_mve_vadc:
+ case Intrinsic::arm_mve_vadc_predicated: {
+ unsigned CarryOp =
+ (II->getIntrinsicID() == Intrinsic::arm_mve_vadc_predicated) ? 3 : 2;
+ assert(II->getArgOperand(CarryOp)->getType()->getScalarSizeInBits() == 32 &&
+ "Bad type for intrinsic!");
+
+ KnownBits CarryKnown(32);
+ if (SimplifyDemandedBits(II, CarryOp, APInt::getOneBitSet(32, 29),
+ CarryKnown))
+ return II;
+ break;
+ }
case Intrinsic::amdgcn_rcp: {
Value *Src = II->getArgOperand(0);
@@ -3317,7 +3464,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
if (const ConstantFP *C = dyn_cast<ConstantFP>(Src)) {
const APFloat &ArgVal = C->getValueAPF();
- APFloat Val(ArgVal.getSemantics(), 1.0);
+ APFloat Val(ArgVal.getSemantics(), 1);
APFloat::opStatus Status = Val.divide(ArgVal,
APFloat::rmNearestTiesToEven);
// Only do this if it was exact and therefore not dependent on the
@@ -3872,7 +4019,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
return eraseInstFromFunction(CI);
// Bail if we cross over an intrinsic with side effects, such as
- // llvm.stacksave, llvm.read_register, or llvm.setjmp.
+ // llvm.stacksave, or llvm.read_register.
if (II2->mayHaveSideEffects()) {
CannotRemove = true;
break;
@@ -4019,12 +4166,12 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
// Is this guard followed by another guard? We scan forward over a small
// fixed window of instructions to handle common cases with conditions
// computed between guards.
- Instruction *NextInst = II->getNextNode();
+ Instruction *NextInst = II->getNextNonDebugInstruction();
for (unsigned i = 0; i < GuardWideningWindow; i++) {
// Note: Using context-free form to avoid compile time blow up
if (!isSafeToSpeculativelyExecute(NextInst))
break;
- NextInst = NextInst->getNextNode();
+ NextInst = NextInst->getNextNonDebugInstruction();
}
Value *NextCond = nullptr;
if (match(NextInst,
@@ -4032,18 +4179,18 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
Value *CurrCond = II->getArgOperand(0);
// Remove a guard that it is immediately preceded by an identical guard.
- if (CurrCond == NextCond)
- return eraseInstFromFunction(*NextInst);
-
// Otherwise canonicalize guard(a); guard(b) -> guard(a & b).
- Instruction* MoveI = II->getNextNode();
- while (MoveI != NextInst) {
- auto *Temp = MoveI;
- MoveI = MoveI->getNextNode();
- Temp->moveBefore(II);
+ if (CurrCond != NextCond) {
+ Instruction *MoveI = II->getNextNonDebugInstruction();
+ while (MoveI != NextInst) {
+ auto *Temp = MoveI;
+ MoveI = MoveI->getNextNonDebugInstruction();
+ Temp->moveBefore(II);
+ }
+ II->setArgOperand(0, Builder.CreateAnd(CurrCond, NextCond));
}
- II->setArgOperand(0, Builder.CreateAnd(CurrCond, NextCond));
- return eraseInstFromFunction(*NextInst);
+ eraseInstFromFunction(*NextInst);
+ return II;
}
break;
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 65aaef28d87a..71b7f279e5fa 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -18,6 +18,7 @@
#include "llvm/IR/DIBuilder.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/KnownBits.h"
+#include <numeric>
using namespace llvm;
using namespace PatternMatch;
@@ -843,33 +844,33 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
return nullptr;
}
-Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, ZExtInst &CI,
+Instruction *InstCombiner::transformZExtICmp(ICmpInst *Cmp, ZExtInst &Zext,
bool DoTransform) {
// If we are just checking for a icmp eq of a single bit and zext'ing it
// to an integer, then shift the bit to the appropriate place and then
// cast to integer to avoid the comparison.
const APInt *Op1CV;
- if (match(ICI->getOperand(1), m_APInt(Op1CV))) {
+ if (match(Cmp->getOperand(1), m_APInt(Op1CV))) {
// zext (x <s 0) to i32 --> x>>u31 true if signbit set.
// zext (x >s -1) to i32 --> (x>>u31)^1 true if signbit clear.
- if ((ICI->getPredicate() == ICmpInst::ICMP_SLT && Op1CV->isNullValue()) ||
- (ICI->getPredicate() == ICmpInst::ICMP_SGT && Op1CV->isAllOnesValue())) {
- if (!DoTransform) return ICI;
+ if ((Cmp->getPredicate() == ICmpInst::ICMP_SLT && Op1CV->isNullValue()) ||
+ (Cmp->getPredicate() == ICmpInst::ICMP_SGT && Op1CV->isAllOnesValue())) {
+ if (!DoTransform) return Cmp;
- Value *In = ICI->getOperand(0);
+ Value *In = Cmp->getOperand(0);
Value *Sh = ConstantInt::get(In->getType(),
In->getType()->getScalarSizeInBits() - 1);
In = Builder.CreateLShr(In, Sh, In->getName() + ".lobit");
- if (In->getType() != CI.getType())
- In = Builder.CreateIntCast(In, CI.getType(), false /*ZExt*/);
+ if (In->getType() != Zext.getType())
+ In = Builder.CreateIntCast(In, Zext.getType(), false /*ZExt*/);
- if (ICI->getPredicate() == ICmpInst::ICMP_SGT) {
+ if (Cmp->getPredicate() == ICmpInst::ICMP_SGT) {
Constant *One = ConstantInt::get(In->getType(), 1);
In = Builder.CreateXor(In, One, In->getName() + ".not");
}
- return replaceInstUsesWith(CI, In);
+ return replaceInstUsesWith(Zext, In);
}
// zext (X == 0) to i32 --> X^1 iff X has only the low bit set.
@@ -882,24 +883,24 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, ZExtInst &CI,
// zext (X != 2) to i32 --> (X>>1)^1 iff X has only the 2nd bit set.
if ((Op1CV->isNullValue() || Op1CV->isPowerOf2()) &&
// This only works for EQ and NE
- ICI->isEquality()) {
+ Cmp->isEquality()) {
// If Op1C some other power of two, convert:
- KnownBits Known = computeKnownBits(ICI->getOperand(0), 0, &CI);
+ KnownBits Known = computeKnownBits(Cmp->getOperand(0), 0, &Zext);
APInt KnownZeroMask(~Known.Zero);
if (KnownZeroMask.isPowerOf2()) { // Exactly 1 possible 1?
- if (!DoTransform) return ICI;
+ if (!DoTransform) return Cmp;
- bool isNE = ICI->getPredicate() == ICmpInst::ICMP_NE;
+ bool isNE = Cmp->getPredicate() == ICmpInst::ICMP_NE;
if (!Op1CV->isNullValue() && (*Op1CV != KnownZeroMask)) {
// (X&4) == 2 --> false
// (X&4) != 2 --> true
- Constant *Res = ConstantInt::get(CI.getType(), isNE);
- return replaceInstUsesWith(CI, Res);
+ Constant *Res = ConstantInt::get(Zext.getType(), isNE);
+ return replaceInstUsesWith(Zext, Res);
}
uint32_t ShAmt = KnownZeroMask.logBase2();
- Value *In = ICI->getOperand(0);
+ Value *In = Cmp->getOperand(0);
if (ShAmt) {
// Perform a logical shr by shiftamt.
// Insert the shift to put the result in the low bit.
@@ -912,11 +913,11 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, ZExtInst &CI,
In = Builder.CreateXor(In, One);
}
- if (CI.getType() == In->getType())
- return replaceInstUsesWith(CI, In);
+ if (Zext.getType() == In->getType())
+ return replaceInstUsesWith(Zext, In);
- Value *IntCast = Builder.CreateIntCast(In, CI.getType(), false);
- return replaceInstUsesWith(CI, IntCast);
+ Value *IntCast = Builder.CreateIntCast(In, Zext.getType(), false);
+ return replaceInstUsesWith(Zext, IntCast);
}
}
}
@@ -924,19 +925,19 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, ZExtInst &CI,
// icmp ne A, B is equal to xor A, B when A and B only really have one bit.
// It is also profitable to transform icmp eq into not(xor(A, B)) because that
// may lead to additional simplifications.
- if (ICI->isEquality() && CI.getType() == ICI->getOperand(0)->getType()) {
- if (IntegerType *ITy = dyn_cast<IntegerType>(CI.getType())) {
- Value *LHS = ICI->getOperand(0);
- Value *RHS = ICI->getOperand(1);
+ if (Cmp->isEquality() && Zext.getType() == Cmp->getOperand(0)->getType()) {
+ if (IntegerType *ITy = dyn_cast<IntegerType>(Zext.getType())) {
+ Value *LHS = Cmp->getOperand(0);
+ Value *RHS = Cmp->getOperand(1);
- KnownBits KnownLHS = computeKnownBits(LHS, 0, &CI);
- KnownBits KnownRHS = computeKnownBits(RHS, 0, &CI);
+ KnownBits KnownLHS = computeKnownBits(LHS, 0, &Zext);
+ KnownBits KnownRHS = computeKnownBits(RHS, 0, &Zext);
if (KnownLHS.Zero == KnownRHS.Zero && KnownLHS.One == KnownRHS.One) {
APInt KnownBits = KnownLHS.Zero | KnownLHS.One;
APInt UnknownBit = ~KnownBits;
if (UnknownBit.countPopulation() == 1) {
- if (!DoTransform) return ICI;
+ if (!DoTransform) return Cmp;
Value *Result = Builder.CreateXor(LHS, RHS);
@@ -949,10 +950,10 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, ZExtInst &CI,
Result = Builder.CreateLShr(
Result, ConstantInt::get(ITy, UnknownBit.countTrailingZeros()));
- if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
+ if (Cmp->getPredicate() == ICmpInst::ICMP_EQ)
Result = Builder.CreateXor(Result, ConstantInt::get(ITy, 1));
- Result->takeName(ICI);
- return replaceInstUsesWith(CI, Result);
+ Result->takeName(Cmp);
+ return replaceInstUsesWith(Zext, Result);
}
}
}
@@ -1172,8 +1173,8 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
}
}
- if (ICmpInst *ICI = dyn_cast<ICmpInst>(Src))
- return transformZExtICmp(ICI, CI);
+ if (ICmpInst *Cmp = dyn_cast<ICmpInst>(Src))
+ return transformZExtICmp(Cmp, CI);
BinaryOperator *SrcI = dyn_cast<BinaryOperator>(Src);
if (SrcI && SrcI->getOpcode() == Instruction::Or) {
@@ -1188,7 +1189,9 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
// zext (or icmp, icmp) -> or (zext icmp), (zext icmp)
Value *LCast = Builder.CreateZExt(LHS, CI.getType(), LHS->getName());
Value *RCast = Builder.CreateZExt(RHS, CI.getType(), RHS->getName());
- BinaryOperator *Or = BinaryOperator::Create(Instruction::Or, LCast, RCast);
+ Value *Or = Builder.CreateOr(LCast, RCast, CI.getName());
+ if (auto *OrInst = dyn_cast<Instruction>(Or))
+ Builder.SetInsertPoint(OrInst);
// Perform the elimination.
if (auto *LZExt = dyn_cast<ZExtInst>(LCast))
@@ -1196,7 +1199,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
if (auto *RZExt = dyn_cast<ZExtInst>(RCast))
transformZExtICmp(RHS, *RZExt);
- return Or;
+ return replaceInstUsesWith(CI, Or);
}
}
@@ -1621,6 +1624,11 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &FPT) {
Value *X;
Instruction *Op = dyn_cast<Instruction>(FPT.getOperand(0));
if (Op && Op->hasOneUse()) {
+ // FIXME: The FMF should propagate from the fptrunc, not the source op.
+ IRBuilder<>::FastMathFlagGuard FMFG(Builder);
+ if (isa<FPMathOperator>(Op))
+ Builder.setFastMathFlags(Op->getFastMathFlags());
+
if (match(Op, m_FNeg(m_Value(X)))) {
Value *InnerTrunc = Builder.CreateFPTrunc(X, Ty);
@@ -1630,6 +1638,24 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &FPT) {
return BinaryOperator::CreateFNegFMF(InnerTrunc, Op);
return UnaryOperator::CreateFNegFMF(InnerTrunc, Op);
}
+
+ // If we are truncating a select that has an extended operand, we can
+ // narrow the other operand and do the select as a narrow op.
+ Value *Cond, *X, *Y;
+ if (match(Op, m_Select(m_Value(Cond), m_FPExt(m_Value(X)), m_Value(Y))) &&
+ X->getType() == Ty) {
+ // fptrunc (select Cond, (fpext X), Y --> select Cond, X, (fptrunc Y)
+ Value *NarrowY = Builder.CreateFPTrunc(Y, Ty);
+ Value *Sel = Builder.CreateSelect(Cond, X, NarrowY, "narrow.sel", Op);
+ return replaceInstUsesWith(FPT, Sel);
+ }
+ if (match(Op, m_Select(m_Value(Cond), m_Value(Y), m_FPExt(m_Value(X)))) &&
+ X->getType() == Ty) {
+ // fptrunc (select Cond, Y, (fpext X) --> select Cond, (fptrunc Y), X
+ Value *NarrowY = Builder.CreateFPTrunc(Y, Ty);
+ Value *Sel = Builder.CreateSelect(Cond, NarrowY, X, "narrow.sel", Op);
+ return replaceInstUsesWith(FPT, Sel);
+ }
}
if (auto *II = dyn_cast<IntrinsicInst>(FPT.getOperand(0))) {
@@ -1808,7 +1834,7 @@ Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) {
Type *Ty = CI.getType();
unsigned AS = CI.getPointerAddressSpace();
- if (Ty->getScalarSizeInBits() == DL.getIndexSizeInBits(AS))
+ if (Ty->getScalarSizeInBits() == DL.getPointerSizeInBits(AS))
return commonPointerCastTransforms(CI);
Type *PtrTy = DL.getIntPtrType(CI.getContext(), AS);
@@ -1820,12 +1846,24 @@ Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) {
}
/// This input value (which is known to have vector type) is being zero extended
-/// or truncated to the specified vector type.
+/// or truncated to the specified vector type. Since the zext/trunc is done
+/// using an integer type, we have a (bitcast(cast(bitcast))) pattern,
+/// endianness will impact which end of the vector that is extended or
+/// truncated.
+///
+/// A vector is always stored with index 0 at the lowest address, which
+/// corresponds to the most significant bits for a big endian stored integer and
+/// the least significant bits for little endian. A trunc/zext of an integer
+/// impacts the big end of the integer. Thus, we need to add/remove elements at
+/// the front of the vector for big endian targets, and the back of the vector
+/// for little endian targets.
+///
/// Try to replace it with a shuffle (and vector/vector bitcast) if possible.
///
/// The source and destination vector types may have different element types.
-static Instruction *optimizeVectorResize(Value *InVal, VectorType *DestTy,
- InstCombiner &IC) {
+static Instruction *optimizeVectorResizeWithIntegerBitCasts(Value *InVal,
+ VectorType *DestTy,
+ InstCombiner &IC) {
// We can only do this optimization if the output is a multiple of the input
// element size, or the input is a multiple of the output element size.
// Convert the input type to have the same element type as the output.
@@ -1844,31 +1882,53 @@ static Instruction *optimizeVectorResize(Value *InVal, VectorType *DestTy,
InVal = IC.Builder.CreateBitCast(InVal, SrcTy);
}
+ bool IsBigEndian = IC.getDataLayout().isBigEndian();
+ unsigned SrcElts = SrcTy->getNumElements();
+ unsigned DestElts = DestTy->getNumElements();
+
+ assert(SrcElts != DestElts && "Element counts should be different.");
+
// Now that the element types match, get the shuffle mask and RHS of the
// shuffle to use, which depends on whether we're increasing or decreasing the
// size of the input.
- SmallVector<uint32_t, 16> ShuffleMask;
+ SmallVector<uint32_t, 16> ShuffleMaskStorage;
+ ArrayRef<uint32_t> ShuffleMask;
Value *V2;
- if (SrcTy->getNumElements() > DestTy->getNumElements()) {
- // If we're shrinking the number of elements, just shuffle in the low
- // elements from the input and use undef as the second shuffle input.
- V2 = UndefValue::get(SrcTy);
- for (unsigned i = 0, e = DestTy->getNumElements(); i != e; ++i)
- ShuffleMask.push_back(i);
+ // Produce an identify shuffle mask for the src vector.
+ ShuffleMaskStorage.resize(SrcElts);
+ std::iota(ShuffleMaskStorage.begin(), ShuffleMaskStorage.end(), 0);
+ if (SrcElts > DestElts) {
+ // If we're shrinking the number of elements (rewriting an integer
+ // truncate), just shuffle in the elements corresponding to the least
+ // significant bits from the input and use undef as the second shuffle
+ // input.
+ V2 = UndefValue::get(SrcTy);
+ // Make sure the shuffle mask selects the "least significant bits" by
+ // keeping elements from back of the src vector for big endian, and from the
+ // front for little endian.
+ ShuffleMask = ShuffleMaskStorage;
+ if (IsBigEndian)
+ ShuffleMask = ShuffleMask.take_back(DestElts);
+ else
+ ShuffleMask = ShuffleMask.take_front(DestElts);
} else {
- // If we're increasing the number of elements, shuffle in all of the
- // elements from InVal and fill the rest of the result elements with zeros
- // from a constant zero.
+ // If we're increasing the number of elements (rewriting an integer zext),
+ // shuffle in all of the elements from InVal. Fill the rest of the result
+ // elements with zeros from a constant zero.
V2 = Constant::getNullValue(SrcTy);
- unsigned SrcElts = SrcTy->getNumElements();
- for (unsigned i = 0, e = SrcElts; i != e; ++i)
- ShuffleMask.push_back(i);
-
- // The excess elements reference the first element of the zero input.
- for (unsigned i = 0, e = DestTy->getNumElements()-SrcElts; i != e; ++i)
- ShuffleMask.push_back(SrcElts);
+ // Use first elt from V2 when indicating zero in the shuffle mask.
+ uint32_t NullElt = SrcElts;
+ // Extend with null values in the "most significant bits" by adding elements
+ // in front of the src vector for big endian, and at the back for little
+ // endian.
+ unsigned DeltaElts = DestElts - SrcElts;
+ if (IsBigEndian)
+ ShuffleMaskStorage.insert(ShuffleMaskStorage.begin(), DeltaElts, NullElt);
+ else
+ ShuffleMaskStorage.append(DeltaElts, NullElt);
+ ShuffleMask = ShuffleMaskStorage;
}
return new ShuffleVectorInst(InVal, V2,
@@ -2217,6 +2277,31 @@ Instruction *InstCombiner::optimizeBitCastFromPhi(CastInst &CI, PHINode *PN) {
}
}
+ // Check that each user of each old PHI node is something that we can
+ // rewrite, so that all of the old PHI nodes can be cleaned up afterwards.
+ for (auto *OldPN : OldPhiNodes) {
+ for (User *V : OldPN->users()) {
+ if (auto *SI = dyn_cast<StoreInst>(V)) {
+ if (!SI->isSimple() || SI->getOperand(0) != OldPN)
+ return nullptr;
+ } else if (auto *BCI = dyn_cast<BitCastInst>(V)) {
+ // Verify it's a B->A cast.
+ Type *TyB = BCI->getOperand(0)->getType();
+ Type *TyA = BCI->getType();
+ if (TyA != DestTy || TyB != SrcTy)
+ return nullptr;
+ } else if (auto *PHI = dyn_cast<PHINode>(V)) {
+ // As long as the user is another old PHI node, then even if we don't
+ // rewrite it, the PHI web we're considering won't have any users
+ // outside itself, so it'll be dead.
+ if (OldPhiNodes.count(PHI) == 0)
+ return nullptr;
+ } else {
+ return nullptr;
+ }
+ }
+ }
+
// For each old PHI node, create a corresponding new PHI node with a type A.
SmallDenseMap<PHINode *, PHINode *> NewPNodes;
for (auto *OldPN : OldPhiNodes) {
@@ -2234,9 +2319,14 @@ Instruction *InstCombiner::optimizeBitCastFromPhi(CastInst &CI, PHINode *PN) {
if (auto *C = dyn_cast<Constant>(V)) {
NewV = ConstantExpr::getBitCast(C, DestTy);
} else if (auto *LI = dyn_cast<LoadInst>(V)) {
- Builder.SetInsertPoint(LI->getNextNode());
- NewV = Builder.CreateBitCast(LI, DestTy);
- Worklist.Add(LI);
+ // Explicitly perform load combine to make sure no opposing transform
+ // can remove the bitcast in the meantime and trigger an infinite loop.
+ Builder.SetInsertPoint(LI);
+ NewV = combineLoadToNewType(*LI, DestTy);
+ // Remove the old load and its use in the old phi, which itself becomes
+ // dead once the whole transform finishes.
+ replaceInstUsesWith(*LI, UndefValue::get(LI->getType()));
+ eraseInstFromFunction(*LI);
} else if (auto *BCI = dyn_cast<BitCastInst>(V)) {
NewV = BCI->getOperand(0);
} else if (auto *PrevPN = dyn_cast<PHINode>(V)) {
@@ -2259,26 +2349,33 @@ Instruction *InstCombiner::optimizeBitCastFromPhi(CastInst &CI, PHINode *PN) {
Instruction *RetVal = nullptr;
for (auto *OldPN : OldPhiNodes) {
PHINode *NewPN = NewPNodes[OldPN];
- for (User *V : OldPN->users()) {
+ for (auto It = OldPN->user_begin(), End = OldPN->user_end(); It != End; ) {
+ User *V = *It;
+ // We may remove this user, advance to avoid iterator invalidation.
+ ++It;
if (auto *SI = dyn_cast<StoreInst>(V)) {
- if (SI->isSimple() && SI->getOperand(0) == OldPN) {
- Builder.SetInsertPoint(SI);
- auto *NewBC =
- cast<BitCastInst>(Builder.CreateBitCast(NewPN, SrcTy));
- SI->setOperand(0, NewBC);
- Worklist.Add(SI);
- assert(hasStoreUsersOnly(*NewBC));
- }
+ assert(SI->isSimple() && SI->getOperand(0) == OldPN);
+ Builder.SetInsertPoint(SI);
+ auto *NewBC =
+ cast<BitCastInst>(Builder.CreateBitCast(NewPN, SrcTy));
+ SI->setOperand(0, NewBC);
+ Worklist.Add(SI);
+ assert(hasStoreUsersOnly(*NewBC));
}
else if (auto *BCI = dyn_cast<BitCastInst>(V)) {
- // Verify it's a B->A cast.
Type *TyB = BCI->getOperand(0)->getType();
Type *TyA = BCI->getType();
- if (TyA == DestTy && TyB == SrcTy) {
- Instruction *I = replaceInstUsesWith(*BCI, NewPN);
- if (BCI == &CI)
- RetVal = I;
- }
+ assert(TyA == DestTy && TyB == SrcTy);
+ (void) TyA;
+ (void) TyB;
+ Instruction *I = replaceInstUsesWith(*BCI, NewPN);
+ if (BCI == &CI)
+ RetVal = I;
+ } else if (auto *PHI = dyn_cast<PHINode>(V)) {
+ assert(OldPhiNodes.count(PHI) > 0);
+ (void) PHI;
+ } else {
+ llvm_unreachable("all uses should be handled");
}
}
}
@@ -2374,8 +2471,8 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
CastInst *SrcCast = cast<CastInst>(Src);
if (BitCastInst *BCIn = dyn_cast<BitCastInst>(SrcCast->getOperand(0)))
if (isa<VectorType>(BCIn->getOperand(0)->getType()))
- if (Instruction *I = optimizeVectorResize(BCIn->getOperand(0),
- cast<VectorType>(DestTy), *this))
+ if (Instruction *I = optimizeVectorResizeWithIntegerBitCasts(
+ BCIn->getOperand(0), cast<VectorType>(DestTy), *this))
return I;
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index a9f64feb600c..f38dc436722d 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -2566,9 +2566,6 @@ Instruction *InstCombiner::foldICmpAddConstant(ICmpInst &Cmp,
Type *Ty = Add->getType();
CmpInst::Predicate Pred = Cmp.getPredicate();
- if (!Add->hasOneUse())
- return nullptr;
-
// If the add does not wrap, we can always adjust the compare by subtracting
// the constants. Equality comparisons are handled elsewhere. SGE/SLE/UGE/ULE
// are canonicalized to SGT/SLT/UGT/ULT.
@@ -2602,6 +2599,9 @@ Instruction *InstCombiner::foldICmpAddConstant(ICmpInst &Cmp,
return new ICmpInst(ICmpInst::ICMP_UGE, X, ConstantInt::get(Ty, Lower));
}
+ if (!Add->hasOneUse())
+ return nullptr;
+
// X+C <u C2 -> (X & -C2) == C
// iff C & (C2-1) == 0
// C2 is a power of 2
@@ -3364,6 +3364,23 @@ static Value *foldICmpWithLowBitMaskedVal(ICmpInst &I,
llvm_unreachable("All possible folds are handled.");
}
+ // The mask value may be a vector constant that has undefined elements. But it
+ // may not be safe to propagate those undefs into the new compare, so replace
+ // those elements by copying an existing, defined, and safe scalar constant.
+ Type *OpTy = M->getType();
+ auto *VecC = dyn_cast<Constant>(M);
+ if (OpTy->isVectorTy() && VecC && VecC->containsUndefElement()) {
+ Constant *SafeReplacementConstant = nullptr;
+ for (unsigned i = 0, e = OpTy->getVectorNumElements(); i != e; ++i) {
+ if (!isa<UndefValue>(VecC->getAggregateElement(i))) {
+ SafeReplacementConstant = VecC->getAggregateElement(i);
+ break;
+ }
+ }
+ assert(SafeReplacementConstant && "Failed to find undef replacement");
+ M = Constant::replaceUndefsWith(VecC, SafeReplacementConstant);
+ }
+
return Builder.CreateICmp(DstPred, X, M);
}
@@ -4930,7 +4947,7 @@ Instruction *InstCombiner::foldICmpUsingKnownBits(ICmpInst &I) {
// Get scalar or pointer size.
unsigned BitWidth = Ty->isIntOrIntVectorTy()
? Ty->getScalarSizeInBits()
- : DL.getIndexTypeSizeInBits(Ty->getScalarType());
+ : DL.getPointerTypeSizeInBits(Ty->getScalarType());
if (!BitWidth)
return nullptr;
@@ -5167,6 +5184,7 @@ llvm::getFlippedStrictnessPredicateAndConstant(CmpInst::Predicate Pred,
return WillIncrement ? !C->isMaxValue(IsSigned) : !C->isMinValue(IsSigned);
};
+ Constant *SafeReplacementConstant = nullptr;
if (auto *CI = dyn_cast<ConstantInt>(C)) {
// Bail out if the constant can't be safely incremented/decremented.
if (!ConstantIsOk(CI))
@@ -5186,12 +5204,23 @@ llvm::getFlippedStrictnessPredicateAndConstant(CmpInst::Predicate Pred,
auto *CI = dyn_cast<ConstantInt>(Elt);
if (!CI || !ConstantIsOk(CI))
return llvm::None;
+
+ if (!SafeReplacementConstant)
+ SafeReplacementConstant = CI;
}
} else {
// ConstantExpr?
return llvm::None;
}
+ // It may not be safe to change a compare predicate in the presence of
+ // undefined elements, so replace those elements with the first safe constant
+ // that we found.
+ if (C->containsUndefElement()) {
+ assert(SafeReplacementConstant && "Replacement constant not set");
+ C = Constant::replaceUndefsWith(C, SafeReplacementConstant);
+ }
+
CmpInst::Predicate NewPred = CmpInst::getFlippedStrictnessPredicate(Pred);
// Increment or decrement the constant.
@@ -5374,6 +5403,36 @@ static Instruction *foldVectorCmp(CmpInst &Cmp,
return nullptr;
}
+// extract(uadd.with.overflow(A, B), 0) ult A
+// -> extract(uadd.with.overflow(A, B), 1)
+static Instruction *foldICmpOfUAddOv(ICmpInst &I) {
+ CmpInst::Predicate Pred = I.getPredicate();
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ Value *UAddOv;
+ Value *A, *B;
+ auto UAddOvResultPat = m_ExtractValue<0>(
+ m_Intrinsic<Intrinsic::uadd_with_overflow>(m_Value(A), m_Value(B)));
+ if (match(Op0, UAddOvResultPat) &&
+ ((Pred == ICmpInst::ICMP_ULT && (Op1 == A || Op1 == B)) ||
+ (Pred == ICmpInst::ICMP_EQ && match(Op1, m_ZeroInt()) &&
+ (match(A, m_One()) || match(B, m_One()))) ||
+ (Pred == ICmpInst::ICMP_NE && match(Op1, m_AllOnes()) &&
+ (match(A, m_AllOnes()) || match(B, m_AllOnes())))))
+ // extract(uadd.with.overflow(A, B), 0) < A
+ // extract(uadd.with.overflow(A, 1), 0) == 0
+ // extract(uadd.with.overflow(A, -1), 0) != -1
+ UAddOv = cast<ExtractValueInst>(Op0)->getAggregateOperand();
+ else if (match(Op1, UAddOvResultPat) &&
+ Pred == ICmpInst::ICMP_UGT && (Op0 == A || Op0 == B))
+ // A > extract(uadd.with.overflow(A, B), 0)
+ UAddOv = cast<ExtractValueInst>(Op1)->getAggregateOperand();
+ else
+ return nullptr;
+
+ return ExtractValueInst::Create(UAddOv, 1);
+}
+
Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
bool Changed = false;
const SimplifyQuery Q = SQ.getWithInstruction(&I);
@@ -5562,6 +5621,9 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
if (Instruction *Res = foldICmpEquality(I))
return Res;
+ if (Instruction *Res = foldICmpOfUAddOv(I))
+ return Res;
+
// The 'cmpxchg' instruction returns an aggregate containing the old value and
// an i1 which indicates whether or not we successfully did the swap.
//
diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index 1dbc06d92e7a..1a746cb87abb 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -369,7 +369,8 @@ public:
Instruction *visitFNeg(UnaryOperator &I);
Instruction *visitAdd(BinaryOperator &I);
Instruction *visitFAdd(BinaryOperator &I);
- Value *OptimizePointerDifference(Value *LHS, Value *RHS, Type *Ty);
+ Value *OptimizePointerDifference(
+ Value *LHS, Value *RHS, Type *Ty, bool isNUW);
Instruction *visitSub(BinaryOperator &I);
Instruction *visitFSub(BinaryOperator &I);
Instruction *visitMul(BinaryOperator &I);
@@ -446,6 +447,7 @@ public:
Instruction *visitLandingPadInst(LandingPadInst &LI);
Instruction *visitVAStartInst(VAStartInst &I);
Instruction *visitVACopyInst(VACopyInst &I);
+ Instruction *visitFreeze(FreezeInst &I);
/// Specify what to return for unhandled instructions.
Instruction *visitInstruction(Instruction &I) { return nullptr; }
@@ -465,6 +467,9 @@ public:
/// \return true if successful.
bool replacePointer(Instruction &I, Value *V);
+ LoadInst *combineLoadToNewType(LoadInst &LI, Type *NewTy,
+ const Twine &Suffix = "");
+
private:
bool shouldChangeType(unsigned FromBitWidth, unsigned ToBitWidth) const;
bool shouldChangeType(Type *From, Type *To) const;
@@ -705,7 +710,7 @@ public:
Instruction *eraseInstFromFunction(Instruction &I) {
LLVM_DEBUG(dbgs() << "IC: ERASE " << I << '\n');
assert(I.use_empty() && "Cannot erase instruction that is used!");
- salvageDebugInfo(I);
+ salvageDebugInfoOrMarkUndef(I);
// Make sure that we reprocess all operands now that we reduced their
// use counts.
diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index 3a0e05832fcb..ebf9d24eecc4 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -449,8 +449,8 @@ static bool isSupportedAtomicType(Type *Ty) {
///
/// Note that this will create all of the instructions with whatever insert
/// point the \c InstCombiner currently is using.
-static LoadInst *combineLoadToNewType(InstCombiner &IC, LoadInst &LI, Type *NewTy,
- const Twine &Suffix = "") {
+LoadInst *InstCombiner::combineLoadToNewType(LoadInst &LI, Type *NewTy,
+ const Twine &Suffix) {
assert((!LI.isAtomic() || isSupportedAtomicType(NewTy)) &&
"can't fold an atomic load to requested type");
@@ -460,10 +460,17 @@ static LoadInst *combineLoadToNewType(InstCombiner &IC, LoadInst &LI, Type *NewT
if (!(match(Ptr, m_BitCast(m_Value(NewPtr))) &&
NewPtr->getType()->getPointerElementType() == NewTy &&
NewPtr->getType()->getPointerAddressSpace() == AS))
- NewPtr = IC.Builder.CreateBitCast(Ptr, NewTy->getPointerTo(AS));
+ NewPtr = Builder.CreateBitCast(Ptr, NewTy->getPointerTo(AS));
- LoadInst *NewLoad = IC.Builder.CreateAlignedLoad(
- NewTy, NewPtr, LI.getAlignment(), LI.isVolatile(), LI.getName() + Suffix);
+ unsigned Align = LI.getAlignment();
+ if (!Align)
+ // If old load did not have an explicit alignment specified,
+ // manually preserve the implied (ABI) alignment of the load.
+ // Else we may inadvertently incorrectly over-promise alignment.
+ Align = getDataLayout().getABITypeAlignment(LI.getType());
+
+ LoadInst *NewLoad = Builder.CreateAlignedLoad(
+ NewTy, NewPtr, Align, LI.isVolatile(), LI.getName() + Suffix);
NewLoad->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
copyMetadataForLoad(*NewLoad, LI);
return NewLoad;
@@ -526,7 +533,7 @@ static StoreInst *combineStoreToNewValue(InstCombiner &IC, StoreInst &SI, Value
/// Returns true if instruction represent minmax pattern like:
/// select ((cmp load V1, load V2), V1, V2).
-static bool isMinMaxWithLoads(Value *V) {
+static bool isMinMaxWithLoads(Value *V, Type *&LoadTy) {
assert(V->getType()->isPointerTy() && "Expected pointer type.");
// Ignore possible ty* to ixx* bitcast.
V = peekThroughBitcast(V);
@@ -540,6 +547,7 @@ static bool isMinMaxWithLoads(Value *V) {
if (!match(V, m_Select(m_Cmp(Pred, m_Instruction(L1), m_Instruction(L2)),
m_Value(LHS), m_Value(RHS))))
return false;
+ LoadTy = L1->getType();
return (match(L1, m_Load(m_Specific(LHS))) &&
match(L2, m_Load(m_Specific(RHS)))) ||
(match(L1, m_Load(m_Specific(RHS))) &&
@@ -585,20 +593,22 @@ static Instruction *combineLoadToOperationType(InstCombiner &IC, LoadInst &LI) {
// size is a legal integer type.
// Do not perform canonicalization if minmax pattern is found (to avoid
// infinite loop).
+ Type *Dummy;
if (!Ty->isIntegerTy() && Ty->isSized() &&
+ !(Ty->isVectorTy() && Ty->getVectorIsScalable()) &&
DL.isLegalInteger(DL.getTypeStoreSizeInBits(Ty)) &&
DL.typeSizeEqualsStoreSize(Ty) &&
!DL.isNonIntegralPointerType(Ty) &&
!isMinMaxWithLoads(
- peekThroughBitcast(LI.getPointerOperand(), /*OneUseOnly=*/true))) {
+ peekThroughBitcast(LI.getPointerOperand(), /*OneUseOnly=*/true),
+ Dummy)) {
if (all_of(LI.users(), [&LI](User *U) {
auto *SI = dyn_cast<StoreInst>(U);
return SI && SI->getPointerOperand() != &LI &&
!SI->getPointerOperand()->isSwiftError();
})) {
- LoadInst *NewLoad = combineLoadToNewType(
- IC, LI,
- Type::getIntNTy(LI.getContext(), DL.getTypeStoreSizeInBits(Ty)));
+ LoadInst *NewLoad = IC.combineLoadToNewType(
+ LI, Type::getIntNTy(LI.getContext(), DL.getTypeStoreSizeInBits(Ty)));
// Replace all the stores with stores of the newly loaded value.
for (auto UI = LI.user_begin(), UE = LI.user_end(); UI != UE;) {
auto *SI = cast<StoreInst>(*UI++);
@@ -620,7 +630,7 @@ static Instruction *combineLoadToOperationType(InstCombiner &IC, LoadInst &LI) {
if (auto* CI = dyn_cast<CastInst>(LI.user_back()))
if (CI->isNoopCast(DL))
if (!LI.isAtomic() || isSupportedAtomicType(CI->getDestTy())) {
- LoadInst *NewLoad = combineLoadToNewType(IC, LI, CI->getDestTy());
+ LoadInst *NewLoad = IC.combineLoadToNewType(LI, CI->getDestTy());
CI->replaceAllUsesWith(NewLoad);
IC.eraseInstFromFunction(*CI);
return &LI;
@@ -648,8 +658,8 @@ static Instruction *unpackLoadToAggregate(InstCombiner &IC, LoadInst &LI) {
// If the struct only have one element, we unpack.
auto NumElements = ST->getNumElements();
if (NumElements == 1) {
- LoadInst *NewLoad = combineLoadToNewType(IC, LI, ST->getTypeAtIndex(0U),
- ".unpack");
+ LoadInst *NewLoad = IC.combineLoadToNewType(LI, ST->getTypeAtIndex(0U),
+ ".unpack");
AAMDNodes AAMD;
LI.getAAMetadata(AAMD);
NewLoad->setAAMetadata(AAMD);
@@ -698,7 +708,7 @@ static Instruction *unpackLoadToAggregate(InstCombiner &IC, LoadInst &LI) {
auto *ET = AT->getElementType();
auto NumElements = AT->getNumElements();
if (NumElements == 1) {
- LoadInst *NewLoad = combineLoadToNewType(IC, LI, ET, ".unpack");
+ LoadInst *NewLoad = IC.combineLoadToNewType(LI, ET, ".unpack");
AAMDNodes AAMD;
LI.getAAMetadata(AAMD);
NewLoad->setAAMetadata(AAMD);
@@ -1322,7 +1332,14 @@ static bool removeBitcastsFromLoadStoreOnMinMax(InstCombiner &IC,
auto *LI = cast<LoadInst>(SI.getValueOperand());
if (!LI->getType()->isIntegerTy())
return false;
- if (!isMinMaxWithLoads(LoadAddr))
+ Type *CmpLoadTy;
+ if (!isMinMaxWithLoads(LoadAddr, CmpLoadTy))
+ return false;
+
+ // Make sure we're not changing the size of the load/store.
+ const auto &DL = IC.getDataLayout();
+ if (DL.getTypeStoreSizeInBits(LI->getType()) !=
+ DL.getTypeStoreSizeInBits(CmpLoadTy))
return false;
if (!all_of(LI->users(), [LI, LoadAddr](User *U) {
@@ -1334,8 +1351,7 @@ static bool removeBitcastsFromLoadStoreOnMinMax(InstCombiner &IC,
return false;
IC.Builder.SetInsertPoint(LI);
- LoadInst *NewLI = combineLoadToNewType(
- IC, *LI, LoadAddr->getType()->getPointerElementType());
+ LoadInst *NewLI = IC.combineLoadToNewType(*LI, CmpLoadTy);
// Replace all the stores with stores of the newly loaded value.
for (auto *UI : LI->users()) {
auto *USI = cast<StoreInst>(UI);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 0b9128a9f5a1..2774e46151fa 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -1239,6 +1239,14 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) {
Value *YZ = Builder.CreateFMulFMF(Y, Op0, &I);
return BinaryOperator::CreateFDivFMF(YZ, X, &I);
}
+ // Z / (1.0 / Y) => (Y * Z)
+ //
+ // This is a special case of Z / (X / Y) => (Y * Z) / X, with X = 1.0. The
+ // m_OneUse check is avoided because even in the case of the multiple uses
+ // for 1.0/Y, the number of instructions remain the same and a division is
+ // replaced by a multiplication.
+ if (match(Op1, m_FDiv(m_SpecificFP(1.0), m_Value(Y))))
+ return BinaryOperator::CreateFMulFMF(Y, Op0, &I);
}
if (I.hasAllowReassoc() && Op0->hasOneUse() && Op1->hasOneUse()) {
@@ -1368,8 +1376,10 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) {
}
// 1 urem X -> zext(X != 1)
- if (match(Op0, m_One()))
- return CastInst::CreateZExtOrBitCast(Builder.CreateICmpNE(Op1, Op0), Ty);
+ if (match(Op0, m_One())) {
+ Value *Cmp = Builder.CreateICmpNE(Op1, ConstantInt::get(Ty, 1));
+ return CastInst::CreateZExtOrBitCast(Cmp, Ty);
+ }
// X urem C -> X < C ? X : X - C, where C >= signbit.
if (match(Op1, m_Negative())) {
diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
index e0376b7582f3..74e015a4f1d4 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -14,9 +14,10 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/PatternMatch.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
using namespace llvm::PatternMatch;
@@ -180,13 +181,14 @@ Instruction *InstCombiner::FoldIntegerTypedPHI(PHINode &PN) {
"Not enough available ptr typed incoming values");
PHINode *MatchingPtrPHI = nullptr;
unsigned NumPhis = 0;
- for (auto II = BB->begin(), EI = BasicBlock::iterator(BB->getFirstNonPHI());
- II != EI; II++, NumPhis++) {
+ for (auto II = BB->begin(); II != BB->end(); II++, NumPhis++) {
// FIXME: consider handling this in AggressiveInstCombine
+ PHINode *PtrPHI = dyn_cast<PHINode>(II);
+ if (!PtrPHI)
+ break;
if (NumPhis > MaxNumPhis)
return nullptr;
- PHINode *PtrPHI = dyn_cast<PHINode>(II);
- if (!PtrPHI || PtrPHI == &PN || PtrPHI->getType() != IntToPtr->getType())
+ if (PtrPHI == &PN || PtrPHI->getType() != IntToPtr->getType())
continue;
MatchingPtrPHI = PtrPHI;
for (unsigned i = 0; i != PtrPHI->getNumIncomingValues(); ++i) {
diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 9fc871e49b30..05a624fde86b 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -704,16 +704,24 @@ static Value *canonicalizeSaturatedSubtract(const ICmpInst *ICI,
assert((Pred == ICmpInst::ICMP_UGE || Pred == ICmpInst::ICMP_UGT) &&
"Unexpected isUnsigned predicate!");
- // Account for swapped form of subtraction: ((a > b) ? b - a : 0).
+ // Ensure the sub is of the form:
+ // (a > b) ? a - b : 0 -> usub.sat(a, b)
+ // (a > b) ? b - a : 0 -> -usub.sat(a, b)
+ // Checking for both a-b and a+(-b) as a constant.
bool IsNegative = false;
- if (match(TrueVal, m_Sub(m_Specific(B), m_Specific(A))))
+ const APInt *C;
+ if (match(TrueVal, m_Sub(m_Specific(B), m_Specific(A))) ||
+ (match(A, m_APInt(C)) &&
+ match(TrueVal, m_Add(m_Specific(B), m_SpecificInt(-*C)))))
IsNegative = true;
- else if (!match(TrueVal, m_Sub(m_Specific(A), m_Specific(B))))
+ else if (!match(TrueVal, m_Sub(m_Specific(A), m_Specific(B))) &&
+ !(match(B, m_APInt(C)) &&
+ match(TrueVal, m_Add(m_Specific(A), m_SpecificInt(-*C)))))
return nullptr;
- // If sub is used anywhere else, we wouldn't be able to eliminate it
- // afterwards.
- if (!TrueVal->hasOneUse())
+ // If we are adding a negate and the sub and icmp are used anywhere else, we
+ // would end up with more instructions.
+ if (IsNegative && !TrueVal->hasOneUse() && !ICI->hasOneUse())
return nullptr;
// (a > b) ? a - b : 0 -> usub.sat(a, b)
@@ -781,6 +789,13 @@ static Value *canonicalizeSaturatedAdd(ICmpInst *Cmp, Value *TVal, Value *FVal,
return Builder.CreateBinaryIntrinsic(
Intrinsic::uadd_sat, BO->getOperand(0), BO->getOperand(1));
}
+ // The overflow may be detected via the add wrapping round.
+ if (match(Cmp0, m_c_Add(m_Specific(Cmp1), m_Value(Y))) &&
+ match(FVal, m_c_Add(m_Specific(Cmp1), m_Specific(Y)))) {
+ // ((X + Y) u< X) ? -1 : (X + Y) --> uadd.sat(X, Y)
+ // ((X + Y) u< Y) ? -1 : (X + Y) --> uadd.sat(X, Y)
+ return Builder.CreateBinaryIntrinsic(Intrinsic::uadd_sat, Cmp1, Y);
+ }
return nullptr;
}
@@ -1725,6 +1740,128 @@ static Instruction *foldAddSubSelect(SelectInst &SI,
return nullptr;
}
+/// Turn X + Y overflows ? -1 : X + Y -> uadd_sat X, Y
+/// And X - Y overflows ? 0 : X - Y -> usub_sat X, Y
+/// Along with a number of patterns similar to:
+/// X + Y overflows ? (X < 0 ? INTMIN : INTMAX) : X + Y --> sadd_sat X, Y
+/// X - Y overflows ? (X > 0 ? INTMAX : INTMIN) : X - Y --> ssub_sat X, Y
+static Instruction *
+foldOverflowingAddSubSelect(SelectInst &SI, InstCombiner::BuilderTy &Builder) {
+ Value *CondVal = SI.getCondition();
+ Value *TrueVal = SI.getTrueValue();
+ Value *FalseVal = SI.getFalseValue();
+
+ WithOverflowInst *II;
+ if (!match(CondVal, m_ExtractValue<1>(m_WithOverflowInst(II))) ||
+ !match(FalseVal, m_ExtractValue<0>(m_Specific(II))))
+ return nullptr;
+
+ Value *X = II->getLHS();
+ Value *Y = II->getRHS();
+
+ auto IsSignedSaturateLimit = [&](Value *Limit, bool IsAdd) {
+ Type *Ty = Limit->getType();
+
+ ICmpInst::Predicate Pred;
+ Value *TrueVal, *FalseVal, *Op;
+ const APInt *C;
+ if (!match(Limit, m_Select(m_ICmp(Pred, m_Value(Op), m_APInt(C)),
+ m_Value(TrueVal), m_Value(FalseVal))))
+ return false;
+
+ auto IsZeroOrOne = [](const APInt &C) {
+ return C.isNullValue() || C.isOneValue();
+ };
+ auto IsMinMax = [&](Value *Min, Value *Max) {
+ APInt MinVal = APInt::getSignedMinValue(Ty->getScalarSizeInBits());
+ APInt MaxVal = APInt::getSignedMaxValue(Ty->getScalarSizeInBits());
+ return match(Min, m_SpecificInt(MinVal)) &&
+ match(Max, m_SpecificInt(MaxVal));
+ };
+
+ if (Op != X && Op != Y)
+ return false;
+
+ if (IsAdd) {
+ // X + Y overflows ? (X <s 0 ? INTMIN : INTMAX) : X + Y --> sadd_sat X, Y
+ // X + Y overflows ? (X <s 1 ? INTMIN : INTMAX) : X + Y --> sadd_sat X, Y
+ // X + Y overflows ? (Y <s 0 ? INTMIN : INTMAX) : X + Y --> sadd_sat X, Y
+ // X + Y overflows ? (Y <s 1 ? INTMIN : INTMAX) : X + Y --> sadd_sat X, Y
+ if (Pred == ICmpInst::ICMP_SLT && IsZeroOrOne(*C) &&
+ IsMinMax(TrueVal, FalseVal))
+ return true;
+ // X + Y overflows ? (X >s 0 ? INTMAX : INTMIN) : X + Y --> sadd_sat X, Y
+ // X + Y overflows ? (X >s -1 ? INTMAX : INTMIN) : X + Y --> sadd_sat X, Y
+ // X + Y overflows ? (Y >s 0 ? INTMAX : INTMIN) : X + Y --> sadd_sat X, Y
+ // X + Y overflows ? (Y >s -1 ? INTMAX : INTMIN) : X + Y --> sadd_sat X, Y
+ if (Pred == ICmpInst::ICMP_SGT && IsZeroOrOne(*C + 1) &&
+ IsMinMax(FalseVal, TrueVal))
+ return true;
+ } else {
+ // X - Y overflows ? (X <s 0 ? INTMIN : INTMAX) : X - Y --> ssub_sat X, Y
+ // X - Y overflows ? (X <s -1 ? INTMIN : INTMAX) : X - Y --> ssub_sat X, Y
+ if (Op == X && Pred == ICmpInst::ICMP_SLT && IsZeroOrOne(*C + 1) &&
+ IsMinMax(TrueVal, FalseVal))
+ return true;
+ // X - Y overflows ? (X >s -1 ? INTMAX : INTMIN) : X - Y --> ssub_sat X, Y
+ // X - Y overflows ? (X >s -2 ? INTMAX : INTMIN) : X - Y --> ssub_sat X, Y
+ if (Op == X && Pred == ICmpInst::ICMP_SGT && IsZeroOrOne(*C + 2) &&
+ IsMinMax(FalseVal, TrueVal))
+ return true;
+ // X - Y overflows ? (Y <s 0 ? INTMAX : INTMIN) : X - Y --> ssub_sat X, Y
+ // X - Y overflows ? (Y <s 1 ? INTMAX : INTMIN) : X - Y --> ssub_sat X, Y
+ if (Op == Y && Pred == ICmpInst::ICMP_SLT && IsZeroOrOne(*C) &&
+ IsMinMax(FalseVal, TrueVal))
+ return true;
+ // X - Y overflows ? (Y >s 0 ? INTMIN : INTMAX) : X - Y --> ssub_sat X, Y
+ // X - Y overflows ? (Y >s -1 ? INTMIN : INTMAX) : X - Y --> ssub_sat X, Y
+ if (Op == Y && Pred == ICmpInst::ICMP_SGT && IsZeroOrOne(*C + 1) &&
+ IsMinMax(TrueVal, FalseVal))
+ return true;
+ }
+
+ return false;
+ };
+
+ Intrinsic::ID NewIntrinsicID;
+ if (II->getIntrinsicID() == Intrinsic::uadd_with_overflow &&
+ match(TrueVal, m_AllOnes()))
+ // X + Y overflows ? -1 : X + Y -> uadd_sat X, Y
+ NewIntrinsicID = Intrinsic::uadd_sat;
+ else if (II->getIntrinsicID() == Intrinsic::usub_with_overflow &&
+ match(TrueVal, m_Zero()))
+ // X - Y overflows ? 0 : X - Y -> usub_sat X, Y
+ NewIntrinsicID = Intrinsic::usub_sat;
+ else if (II->getIntrinsicID() == Intrinsic::sadd_with_overflow &&
+ IsSignedSaturateLimit(TrueVal, /*IsAdd=*/true))
+ // X + Y overflows ? (X <s 0 ? INTMIN : INTMAX) : X + Y --> sadd_sat X, Y
+ // X + Y overflows ? (X <s 1 ? INTMIN : INTMAX) : X + Y --> sadd_sat X, Y
+ // X + Y overflows ? (X >s 0 ? INTMAX : INTMIN) : X + Y --> sadd_sat X, Y
+ // X + Y overflows ? (X >s -1 ? INTMAX : INTMIN) : X + Y --> sadd_sat X, Y
+ // X + Y overflows ? (Y <s 0 ? INTMIN : INTMAX) : X + Y --> sadd_sat X, Y
+ // X + Y overflows ? (Y <s 1 ? INTMIN : INTMAX) : X + Y --> sadd_sat X, Y
+ // X + Y overflows ? (Y >s 0 ? INTMAX : INTMIN) : X + Y --> sadd_sat X, Y
+ // X + Y overflows ? (Y >s -1 ? INTMAX : INTMIN) : X + Y --> sadd_sat X, Y
+ NewIntrinsicID = Intrinsic::sadd_sat;
+ else if (II->getIntrinsicID() == Intrinsic::ssub_with_overflow &&
+ IsSignedSaturateLimit(TrueVal, /*IsAdd=*/false))
+ // X - Y overflows ? (X <s 0 ? INTMIN : INTMAX) : X - Y --> ssub_sat X, Y
+ // X - Y overflows ? (X <s -1 ? INTMIN : INTMAX) : X - Y --> ssub_sat X, Y
+ // X - Y overflows ? (X >s -1 ? INTMAX : INTMIN) : X - Y --> ssub_sat X, Y
+ // X - Y overflows ? (X >s -2 ? INTMAX : INTMIN) : X - Y --> ssub_sat X, Y
+ // X - Y overflows ? (Y <s 0 ? INTMAX : INTMIN) : X - Y --> ssub_sat X, Y
+ // X - Y overflows ? (Y <s 1 ? INTMAX : INTMIN) : X - Y --> ssub_sat X, Y
+ // X - Y overflows ? (Y >s 0 ? INTMIN : INTMAX) : X - Y --> ssub_sat X, Y
+ // X - Y overflows ? (Y >s -1 ? INTMIN : INTMAX) : X - Y --> ssub_sat X, Y
+ NewIntrinsicID = Intrinsic::ssub_sat;
+ else
+ return nullptr;
+
+ Function *F =
+ Intrinsic::getDeclaration(SI.getModule(), NewIntrinsicID, SI.getType());
+ return CallInst::Create(F, {X, Y});
+}
+
Instruction *InstCombiner::foldSelectExtConst(SelectInst &Sel) {
Constant *C;
if (!match(Sel.getTrueValue(), m_Constant(C)) &&
@@ -2296,7 +2433,9 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
// See if we are selecting two values based on a comparison of the two values.
if (FCmpInst *FCI = dyn_cast<FCmpInst>(CondVal)) {
- if (FCI->getOperand(0) == TrueVal && FCI->getOperand(1) == FalseVal) {
+ Value *Cmp0 = FCI->getOperand(0), *Cmp1 = FCI->getOperand(1);
+ if ((Cmp0 == TrueVal && Cmp1 == FalseVal) ||
+ (Cmp0 == FalseVal && Cmp1 == TrueVal)) {
// Canonicalize to use ordered comparisons by swapping the select
// operands.
//
@@ -2305,30 +2444,12 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
if (FCI->hasOneUse() && FCmpInst::isUnordered(FCI->getPredicate())) {
FCmpInst::Predicate InvPred = FCI->getInversePredicate();
IRBuilder<>::FastMathFlagGuard FMFG(Builder);
+ // FIXME: The FMF should propagate from the select, not the fcmp.
Builder.setFastMathFlags(FCI->getFastMathFlags());
- Value *NewCond = Builder.CreateFCmp(InvPred, TrueVal, FalseVal,
- FCI->getName() + ".inv");
-
- return SelectInst::Create(NewCond, FalseVal, TrueVal,
- SI.getName() + ".p");
- }
-
- // NOTE: if we wanted to, this is where to detect MIN/MAX
- } else if (FCI->getOperand(0) == FalseVal && FCI->getOperand(1) == TrueVal){
- // Canonicalize to use ordered comparisons by swapping the select
- // operands.
- //
- // e.g.
- // (X ugt Y) ? X : Y -> (X ole Y) ? X : Y
- if (FCI->hasOneUse() && FCmpInst::isUnordered(FCI->getPredicate())) {
- FCmpInst::Predicate InvPred = FCI->getInversePredicate();
- IRBuilder<>::FastMathFlagGuard FMFG(Builder);
- Builder.setFastMathFlags(FCI->getFastMathFlags());
- Value *NewCond = Builder.CreateFCmp(InvPred, FalseVal, TrueVal,
+ Value *NewCond = Builder.CreateFCmp(InvPred, Cmp0, Cmp1,
FCI->getName() + ".inv");
-
- return SelectInst::Create(NewCond, FalseVal, TrueVal,
- SI.getName() + ".p");
+ Value *NewSel = Builder.CreateSelect(NewCond, FalseVal, TrueVal);
+ return replaceInstUsesWith(SI, NewSel);
}
// NOTE: if we wanted to, this is where to detect MIN/MAX
@@ -2391,6 +2512,8 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
if (Instruction *Add = foldAddSubSelect(SI, Builder))
return Add;
+ if (Instruction *Add = foldOverflowingAddSubSelect(SI, Builder))
+ return Add;
// Turn (select C, (op X, Y), (op X, Z)) -> (op X, (select C, Y, Z))
auto *TI = dyn_cast<Instruction>(TrueVal);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
index 64294838644f..fbff5dd4a8cd 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -138,24 +138,6 @@ Value *InstCombiner::reassociateShiftAmtsOfTwoSameDirectionShifts(
return Ret;
}
-// Try to replace `undef` constants in C with Replacement.
-static Constant *replaceUndefsWith(Constant *C, Constant *Replacement) {
- if (C && match(C, m_Undef()))
- return Replacement;
-
- if (auto *CV = dyn_cast<ConstantVector>(C)) {
- llvm::SmallVector<Constant *, 32> NewOps(CV->getNumOperands());
- for (unsigned i = 0, NumElts = NewOps.size(); i != NumElts; ++i) {
- Constant *EltC = CV->getOperand(i);
- NewOps[i] = EltC && match(EltC, m_Undef()) ? Replacement : EltC;
- }
- return ConstantVector::get(NewOps);
- }
-
- // Don't know how to deal with this constant.
- return C;
-}
-
// If we have some pattern that leaves only some low bits set, and then performs
// left-shift of those bits, if none of the bits that are left after the final
// shift are modified by the mask, we can omit the mask.
@@ -180,10 +162,20 @@ dropRedundantMaskingOfLeftShiftInput(BinaryOperator *OuterShift,
"The input must be 'shl'!");
Value *Masked, *ShiftShAmt;
- match(OuterShift, m_Shift(m_Value(Masked), m_Value(ShiftShAmt)));
+ match(OuterShift,
+ m_Shift(m_Value(Masked), m_ZExtOrSelf(m_Value(ShiftShAmt))));
+
+ // *If* there is a truncation between an outer shift and a possibly-mask,
+ // then said truncation *must* be one-use, else we can't perform the fold.
+ Value *Trunc;
+ if (match(Masked, m_CombineAnd(m_Trunc(m_Value(Masked)), m_Value(Trunc))) &&
+ !Trunc->hasOneUse())
+ return nullptr;
Type *NarrowestTy = OuterShift->getType();
Type *WidestTy = Masked->getType();
+ bool HadTrunc = WidestTy != NarrowestTy;
+
// The mask must be computed in a type twice as wide to ensure
// that no bits are lost if the sum-of-shifts is wider than the base type.
Type *ExtendedTy = WidestTy->getExtendedType();
@@ -204,6 +196,14 @@ dropRedundantMaskingOfLeftShiftInput(BinaryOperator *OuterShift,
Constant *NewMask;
if (match(Masked, m_c_And(m_CombineOr(MaskA, MaskB), m_Value(X)))) {
+ // Peek through an optional zext of the shift amount.
+ match(MaskShAmt, m_ZExtOrSelf(m_Value(MaskShAmt)));
+
+ // We have two shift amounts from two different shifts. The types of those
+ // shift amounts may not match. If that's the case let's bailout now.
+ if (MaskShAmt->getType() != ShiftShAmt->getType())
+ return nullptr;
+
// Can we simplify (MaskShAmt+ShiftShAmt) ?
auto *SumOfShAmts = dyn_cast_or_null<Constant>(SimplifyAddInst(
MaskShAmt, ShiftShAmt, /*IsNSW=*/false, /*IsNUW=*/false, Q));
@@ -216,7 +216,7 @@ dropRedundantMaskingOfLeftShiftInput(BinaryOperator *OuterShift,
// completely unknown. Replace the the `undef` shift amounts with final
// shift bitwidth to ensure that the value remains undef when creating the
// subsequent shift op.
- SumOfShAmts = replaceUndefsWith(
+ SumOfShAmts = Constant::replaceUndefsWith(
SumOfShAmts, ConstantInt::get(SumOfShAmts->getType()->getScalarType(),
ExtendedTy->getScalarSizeInBits()));
auto *ExtendedSumOfShAmts = ConstantExpr::getZExt(SumOfShAmts, ExtendedTy);
@@ -228,6 +228,14 @@ dropRedundantMaskingOfLeftShiftInput(BinaryOperator *OuterShift,
} else if (match(Masked, m_c_And(m_CombineOr(MaskC, MaskD), m_Value(X))) ||
match(Masked, m_Shr(m_Shl(m_Value(X), m_Value(MaskShAmt)),
m_Deferred(MaskShAmt)))) {
+ // Peek through an optional zext of the shift amount.
+ match(MaskShAmt, m_ZExtOrSelf(m_Value(MaskShAmt)));
+
+ // We have two shift amounts from two different shifts. The types of those
+ // shift amounts may not match. If that's the case let's bailout now.
+ if (MaskShAmt->getType() != ShiftShAmt->getType())
+ return nullptr;
+
// Can we simplify (ShiftShAmt-MaskShAmt) ?
auto *ShAmtsDiff = dyn_cast_or_null<Constant>(SimplifySubInst(
ShiftShAmt, MaskShAmt, /*IsNSW=*/false, /*IsNUW=*/false, Q));
@@ -241,7 +249,7 @@ dropRedundantMaskingOfLeftShiftInput(BinaryOperator *OuterShift,
// bitwidth of innermost shift to ensure that the value remains undef when
// creating the subsequent shift op.
unsigned WidestTyBitWidth = WidestTy->getScalarSizeInBits();
- ShAmtsDiff = replaceUndefsWith(
+ ShAmtsDiff = Constant::replaceUndefsWith(
ShAmtsDiff, ConstantInt::get(ShAmtsDiff->getType()->getScalarType(),
-WidestTyBitWidth));
auto *ExtendedNumHighBitsToClear = ConstantExpr::getZExt(
@@ -272,10 +280,15 @@ dropRedundantMaskingOfLeftShiftInput(BinaryOperator *OuterShift,
return nullptr;
}
+ // If we need to apply truncation, let's do it first, since we can.
+ // We have already ensured that the old truncation will go away.
+ if (HadTrunc)
+ X = Builder.CreateTrunc(X, NarrowestTy);
+
// No 'NUW'/'NSW'! We no longer know that we won't shift-out non-0 bits.
+ // We didn't change the Type of this outermost shift, so we can just do it.
auto *NewShift = BinaryOperator::Create(OuterShift->getOpcode(), X,
OuterShift->getOperand(1));
-
if (!NeedMask)
return NewShift;
@@ -283,6 +296,50 @@ dropRedundantMaskingOfLeftShiftInput(BinaryOperator *OuterShift,
return BinaryOperator::Create(Instruction::And, NewShift, NewMask);
}
+/// If we have a shift-by-constant of a bitwise logic op that itself has a
+/// shift-by-constant operand with identical opcode, we may be able to convert
+/// that into 2 independent shifts followed by the logic op. This eliminates a
+/// a use of an intermediate value (reduces dependency chain).
+static Instruction *foldShiftOfShiftedLogic(BinaryOperator &I,
+ InstCombiner::BuilderTy &Builder) {
+ assert(I.isShift() && "Expected a shift as input");
+ auto *LogicInst = dyn_cast<BinaryOperator>(I.getOperand(0));
+ if (!LogicInst || !LogicInst->isBitwiseLogicOp() || !LogicInst->hasOneUse())
+ return nullptr;
+
+ const APInt *C0, *C1;
+ if (!match(I.getOperand(1), m_APInt(C1)))
+ return nullptr;
+
+ Instruction::BinaryOps ShiftOpcode = I.getOpcode();
+ Type *Ty = I.getType();
+
+ // Find a matching one-use shift by constant. The fold is not valid if the sum
+ // of the shift values equals or exceeds bitwidth.
+ // TODO: Remove the one-use check if the other logic operand (Y) is constant.
+ Value *X, *Y;
+ auto matchFirstShift = [&](Value *V) {
+ return !isa<ConstantExpr>(V) &&
+ match(V, m_OneUse(m_Shift(m_Value(X), m_APInt(C0)))) &&
+ cast<BinaryOperator>(V)->getOpcode() == ShiftOpcode &&
+ (*C0 + *C1).ult(Ty->getScalarSizeInBits());
+ };
+
+ // Logic ops are commutative, so check each operand for a match.
+ if (matchFirstShift(LogicInst->getOperand(0)))
+ Y = LogicInst->getOperand(1);
+ else if (matchFirstShift(LogicInst->getOperand(1)))
+ Y = LogicInst->getOperand(0);
+ else
+ return nullptr;
+
+ // shift (logic (shift X, C0), Y), C1 -> logic (shift X, C0+C1), (shift Y, C1)
+ Constant *ShiftSumC = ConstantInt::get(Ty, *C0 + *C1);
+ Value *NewShift1 = Builder.CreateBinOp(ShiftOpcode, X, ShiftSumC);
+ Value *NewShift2 = Builder.CreateBinOp(ShiftOpcode, Y, I.getOperand(1));
+ return BinaryOperator::Create(LogicInst->getOpcode(), NewShift1, NewShift2);
+}
+
Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) {
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
assert(Op0->getType() == Op1->getType());
@@ -335,6 +392,9 @@ Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) {
return &I;
}
+ if (Instruction *Logic = foldShiftOfShiftedLogic(I, Builder))
+ return Logic;
+
return nullptr;
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index d30ab8001897..47ce83974c8d 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -14,6 +14,8 @@
#include "InstCombineInternal.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/IntrinsicsAMDGPU.h"
+#include "llvm/IR/IntrinsicsX86.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/KnownBits.h"
@@ -348,8 +350,36 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
assert(!LHSKnown.hasConflict() && "Bits known to be one AND zero?");
// If the operands are constants, see if we can simplify them.
- if (ShrinkDemandedConstant(I, 1, DemandedMask) ||
- ShrinkDemandedConstant(I, 2, DemandedMask))
+ // This is similar to ShrinkDemandedConstant, but for a select we want to
+ // try to keep the selected constants the same as icmp value constants, if
+ // we can. This helps not break apart (or helps put back together)
+ // canonical patterns like min and max.
+ auto CanonicalizeSelectConstant = [](Instruction *I, unsigned OpNo,
+ APInt DemandedMask) {
+ const APInt *SelC;
+ if (!match(I->getOperand(OpNo), m_APInt(SelC)))
+ return false;
+
+ // Get the constant out of the ICmp, if there is one.
+ const APInt *CmpC;
+ ICmpInst::Predicate Pred;
+ if (!match(I->getOperand(0), m_c_ICmp(Pred, m_APInt(CmpC), m_Value())) ||
+ CmpC->getBitWidth() != SelC->getBitWidth())
+ return ShrinkDemandedConstant(I, OpNo, DemandedMask);
+
+ // If the constant is already the same as the ICmp, leave it as-is.
+ if (*CmpC == *SelC)
+ return false;
+ // If the constants are not already the same, but can be with the demand
+ // mask, use the constant value from the ICmp.
+ if ((*CmpC & DemandedMask) == (*SelC & DemandedMask)) {
+ I->setOperand(OpNo, ConstantInt::get(I->getType(), *CmpC));
+ return true;
+ }
+ return ShrinkDemandedConstant(I, OpNo, DemandedMask);
+ };
+ if (CanonicalizeSelectConstant(I, 1, DemandedMask) ||
+ CanonicalizeSelectConstant(I, 2, DemandedMask))
return I;
// Only known if known in both the LHS and RHS.
@@ -1247,30 +1277,57 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
break;
}
case Instruction::ShuffleVector: {
- ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I);
- unsigned LHSVWidth =
- Shuffle->getOperand(0)->getType()->getVectorNumElements();
- APInt LeftDemanded(LHSVWidth, 0), RightDemanded(LHSVWidth, 0);
+ auto *Shuffle = cast<ShuffleVectorInst>(I);
+ assert(Shuffle->getOperand(0)->getType() ==
+ Shuffle->getOperand(1)->getType() &&
+ "Expected shuffle operands to have same type");
+ unsigned OpWidth =
+ Shuffle->getOperand(0)->getType()->getVectorNumElements();
+ APInt LeftDemanded(OpWidth, 0), RightDemanded(OpWidth, 0);
for (unsigned i = 0; i < VWidth; i++) {
if (DemandedElts[i]) {
unsigned MaskVal = Shuffle->getMaskValue(i);
if (MaskVal != -1u) {
- assert(MaskVal < LHSVWidth * 2 &&
+ assert(MaskVal < OpWidth * 2 &&
"shufflevector mask index out of range!");
- if (MaskVal < LHSVWidth)
+ if (MaskVal < OpWidth)
LeftDemanded.setBit(MaskVal);
else
- RightDemanded.setBit(MaskVal - LHSVWidth);
+ RightDemanded.setBit(MaskVal - OpWidth);
}
}
}
- APInt LHSUndefElts(LHSVWidth, 0);
+ APInt LHSUndefElts(OpWidth, 0);
simplifyAndSetOp(I, 0, LeftDemanded, LHSUndefElts);
- APInt RHSUndefElts(LHSVWidth, 0);
+ APInt RHSUndefElts(OpWidth, 0);
simplifyAndSetOp(I, 1, RightDemanded, RHSUndefElts);
+ // If this shuffle does not change the vector length and the elements
+ // demanded by this shuffle are an identity mask, then this shuffle is
+ // unnecessary.
+ //
+ // We are assuming canonical form for the mask, so the source vector is
+ // operand 0 and operand 1 is not used.
+ //
+ // Note that if an element is demanded and this shuffle mask is undefined
+ // for that element, then the shuffle is not considered an identity
+ // operation. The shuffle prevents poison from the operand vector from
+ // leaking to the result by replacing poison with an undefined value.
+ if (VWidth == OpWidth) {
+ bool IsIdentityShuffle = true;
+ for (unsigned i = 0; i < VWidth; i++) {
+ unsigned MaskVal = Shuffle->getMaskValue(i);
+ if (DemandedElts[i] && i != MaskVal) {
+ IsIdentityShuffle = false;
+ break;
+ }
+ }
+ if (IsIdentityShuffle)
+ return Shuffle->getOperand(0);
+ }
+
bool NewUndefElts = false;
unsigned LHSIdx = -1u, LHSValIdx = -1u;
unsigned RHSIdx = -1u, RHSValIdx = -1u;
@@ -1283,23 +1340,23 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
} else if (!DemandedElts[i]) {
NewUndefElts = true;
UndefElts.setBit(i);
- } else if (MaskVal < LHSVWidth) {
+ } else if (MaskVal < OpWidth) {
if (LHSUndefElts[MaskVal]) {
NewUndefElts = true;
UndefElts.setBit(i);
} else {
- LHSIdx = LHSIdx == -1u ? i : LHSVWidth;
- LHSValIdx = LHSValIdx == -1u ? MaskVal : LHSVWidth;
+ LHSIdx = LHSIdx == -1u ? i : OpWidth;
+ LHSValIdx = LHSValIdx == -1u ? MaskVal : OpWidth;
LHSUniform = LHSUniform && (MaskVal == i);
}
} else {
- if (RHSUndefElts[MaskVal - LHSVWidth]) {
+ if (RHSUndefElts[MaskVal - OpWidth]) {
NewUndefElts = true;
UndefElts.setBit(i);
} else {
- RHSIdx = RHSIdx == -1u ? i : LHSVWidth;
- RHSValIdx = RHSValIdx == -1u ? MaskVal - LHSVWidth : LHSVWidth;
- RHSUniform = RHSUniform && (MaskVal - LHSVWidth == i);
+ RHSIdx = RHSIdx == -1u ? i : OpWidth;
+ RHSValIdx = RHSValIdx == -1u ? MaskVal - OpWidth : OpWidth;
+ RHSUniform = RHSUniform && (MaskVal - OpWidth == i);
}
}
}
@@ -1308,20 +1365,20 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
// this constant vector to single insertelement instruction.
// shufflevector V, C, <v1, v2, .., ci, .., vm> ->
// insertelement V, C[ci], ci-n
- if (LHSVWidth == Shuffle->getType()->getNumElements()) {
+ if (OpWidth == Shuffle->getType()->getNumElements()) {
Value *Op = nullptr;
Constant *Value = nullptr;
unsigned Idx = -1u;
// Find constant vector with the single element in shuffle (LHS or RHS).
- if (LHSIdx < LHSVWidth && RHSUniform) {
+ if (LHSIdx < OpWidth && RHSUniform) {
if (auto *CV = dyn_cast<ConstantVector>(Shuffle->getOperand(0))) {
Op = Shuffle->getOperand(1);
Value = CV->getOperand(LHSValIdx);
Idx = LHSIdx;
}
}
- if (RHSIdx < LHSVWidth && LHSUniform) {
+ if (RHSIdx < OpWidth && LHSUniform) {
if (auto *CV = dyn_cast<ConstantVector>(Shuffle->getOperand(1))) {
Op = Shuffle->getOperand(0);
Value = CV->getOperand(RHSValIdx);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 9c890748e5ab..f604c9dc32ca 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -1390,20 +1390,6 @@ static Value *evaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask) {
llvm_unreachable("failed to reorder elements of vector instruction!");
}
-static void recognizeIdentityMask(const SmallVectorImpl<int> &Mask,
- bool &isLHSID, bool &isRHSID) {
- isLHSID = isRHSID = true;
-
- for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
- if (Mask[i] < 0) continue; // Ignore undef values.
- // Is this an identity shuffle of the LHS value?
- isLHSID &= (Mask[i] == (int)i);
-
- // Is this an identity shuffle of the RHS value?
- isRHSID &= (Mask[i]-e == i);
- }
-}
-
// Returns true if the shuffle is extracting a contiguous range of values from
// LHS, for example:
// +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
@@ -1560,9 +1546,11 @@ static Instruction *foldSelectShuffle(ShuffleVectorInst &Shuf,
if (!Shuf.isSelect())
return nullptr;
- // Canonicalize to choose from operand 0 first.
+ // Canonicalize to choose from operand 0 first unless operand 1 is undefined.
+ // Commuting undef to operand 0 conflicts with another canonicalization.
unsigned NumElts = Shuf.getType()->getVectorNumElements();
- if (Shuf.getMaskValue(0) >= (int)NumElts) {
+ if (!isa<UndefValue>(Shuf.getOperand(1)) &&
+ Shuf.getMaskValue(0) >= (int)NumElts) {
// TODO: Can we assert that both operands of a shuffle-select are not undef
// (otherwise, it would have been folded by instsimplify?
Shuf.commute();
@@ -1753,7 +1741,8 @@ static Instruction *foldIdentityExtractShuffle(ShuffleVectorInst &Shuf) {
return new ShuffleVectorInst(X, Y, ConstantVector::get(NewMask));
}
-/// Try to replace a shuffle with an insertelement.
+/// Try to replace a shuffle with an insertelement or try to replace a shuffle
+/// operand with the operand of an insertelement.
static Instruction *foldShuffleWithInsert(ShuffleVectorInst &Shuf) {
Value *V0 = Shuf.getOperand(0), *V1 = Shuf.getOperand(1);
SmallVector<int, 16> Mask = Shuf.getShuffleMask();
@@ -1765,6 +1754,31 @@ static Instruction *foldShuffleWithInsert(ShuffleVectorInst &Shuf) {
if (NumElts != (int)(V0->getType()->getVectorNumElements()))
return nullptr;
+ // This is a specialization of a fold in SimplifyDemandedVectorElts. We may
+ // not be able to handle it there if the insertelement has >1 use.
+ // If the shuffle has an insertelement operand but does not choose the
+ // inserted scalar element from that value, then we can replace that shuffle
+ // operand with the source vector of the insertelement.
+ Value *X;
+ uint64_t IdxC;
+ if (match(V0, m_InsertElement(m_Value(X), m_Value(), m_ConstantInt(IdxC)))) {
+ // shuf (inselt X, ?, IdxC), ?, Mask --> shuf X, ?, Mask
+ if (none_of(Mask, [IdxC](int MaskElt) { return MaskElt == (int)IdxC; })) {
+ Shuf.setOperand(0, X);
+ return &Shuf;
+ }
+ }
+ if (match(V1, m_InsertElement(m_Value(X), m_Value(), m_ConstantInt(IdxC)))) {
+ // Offset the index constant by the vector width because we are checking for
+ // accesses to the 2nd vector input of the shuffle.
+ IdxC += NumElts;
+ // shuf ?, (inselt X, ?, IdxC), Mask --> shuf ?, X, Mask
+ if (none_of(Mask, [IdxC](int MaskElt) { return MaskElt == (int)IdxC; })) {
+ Shuf.setOperand(1, X);
+ return &Shuf;
+ }
+ }
+
// shuffle (insert ?, Scalar, IndexC), V1, Mask --> insert V1, Scalar, IndexC'
auto isShufflingScalarIntoOp1 = [&](Value *&Scalar, ConstantInt *&IndexC) {
// We need an insertelement with a constant index.
@@ -1891,29 +1905,21 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
LHS, RHS, SVI.getMask(), SVI.getType(), SQ.getWithInstruction(&SVI)))
return replaceInstUsesWith(SVI, V);
- // Canonicalize shuffle(x ,x,mask) -> shuffle(x, undef,mask')
- // Canonicalize shuffle(undef,x,mask) -> shuffle(x, undef,mask').
+ // shuffle x, x, mask --> shuffle x, undef, mask'
unsigned VWidth = SVI.getType()->getVectorNumElements();
unsigned LHSWidth = LHS->getType()->getVectorNumElements();
SmallVector<int, 16> Mask = SVI.getShuffleMask();
Type *Int32Ty = Type::getInt32Ty(SVI.getContext());
- if (LHS == RHS || isa<UndefValue>(LHS)) {
+ if (LHS == RHS) {
+ assert(!isa<UndefValue>(RHS) && "Shuffle with 2 undef ops not simplified?");
// Remap any references to RHS to use LHS.
SmallVector<Constant*, 16> Elts;
- for (unsigned i = 0, e = LHSWidth; i != VWidth; ++i) {
- if (Mask[i] < 0) {
- Elts.push_back(UndefValue::get(Int32Ty));
- continue;
- }
-
- if ((Mask[i] >= (int)e && isa<UndefValue>(RHS)) ||
- (Mask[i] < (int)e && isa<UndefValue>(LHS))) {
- Mask[i] = -1; // Turn into undef.
+ for (unsigned i = 0; i != VWidth; ++i) {
+ // Propagate undef elements or force mask to LHS.
+ if (Mask[i] < 0)
Elts.push_back(UndefValue::get(Int32Ty));
- } else {
- Mask[i] = Mask[i] % e; // Force to LHS.
- Elts.push_back(ConstantInt::get(Int32Ty, Mask[i]));
- }
+ else
+ Elts.push_back(ConstantInt::get(Int32Ty, Mask[i] % LHSWidth));
}
SVI.setOperand(0, SVI.getOperand(1));
SVI.setOperand(1, UndefValue::get(RHS->getType()));
@@ -1921,6 +1927,12 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
return &SVI;
}
+ // shuffle undef, x, mask --> shuffle x, undef, mask'
+ if (isa<UndefValue>(LHS)) {
+ SVI.commute();
+ return &SVI;
+ }
+
if (Instruction *I = canonicalizeInsertSplat(SVI, Builder))
return I;
@@ -1948,16 +1960,6 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
if (Instruction *I = foldIdentityPaddedShuffles(SVI))
return I;
- if (VWidth == LHSWidth) {
- // Analyze the shuffle, are the LHS or RHS and identity shuffles?
- bool isLHSID, isRHSID;
- recognizeIdentityMask(Mask, isLHSID, isRHSID);
-
- // Eliminate identity shuffles.
- if (isLHSID) return replaceInstUsesWith(SVI, LHS);
- if (isRHSID) return replaceInstUsesWith(SVI, RHS);
- }
-
if (isa<UndefValue>(RHS) && canEvaluateShuffled(LHS, Mask)) {
Value *V = evaluateInDifferentElementOrder(LHS, Mask);
return replaceInstUsesWith(SVI, V);
@@ -2235,12 +2237,5 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
return new ShuffleVectorInst(newLHS, newRHS, ConstantVector::get(Elts));
}
- // If the result mask is an identity, replace uses of this instruction with
- // corresponding argument.
- bool isLHSID, isRHSID;
- recognizeIdentityMask(newMask, isLHSID, isRHSID);
- if (isLHSID && VWidth == LHSOp0Width) return replaceInstUsesWith(SVI, newLHS);
- if (isRHSID && VWidth == RHSOp0Width) return replaceInstUsesWith(SVI, newRHS);
-
return MadeChange ? &SVI : nullptr;
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index ecb486c544e0..801c09a317a7 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -86,6 +86,7 @@
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/CBindingWrapping.h"
#include "llvm/Support/Casting.h"
@@ -121,6 +122,9 @@ STATISTIC(NumReassoc , "Number of reassociations");
DEBUG_COUNTER(VisitCounter, "instcombine-visit",
"Controls which instructions are visited");
+static constexpr unsigned InstCombineDefaultMaxIterations = 1000;
+static constexpr unsigned InstCombineDefaultInfiniteLoopThreshold = 1000;
+
static cl::opt<bool>
EnableCodeSinking("instcombine-code-sinking", cl::desc("Enable code sinking"),
cl::init(true));
@@ -129,6 +133,17 @@ static cl::opt<bool>
EnableExpensiveCombines("expensive-combines",
cl::desc("Enable expensive instruction combines"));
+static cl::opt<unsigned> LimitMaxIterations(
+ "instcombine-max-iterations",
+ cl::desc("Limit the maximum number of instruction combining iterations"),
+ cl::init(InstCombineDefaultMaxIterations));
+
+static cl::opt<unsigned> InfiniteLoopDetectionThreshold(
+ "instcombine-infinite-loop-threshold",
+ cl::desc("Number of instruction combining iterations considered an "
+ "infinite loop"),
+ cl::init(InstCombineDefaultInfiniteLoopThreshold), cl::Hidden);
+
static cl::opt<unsigned>
MaxArraySize("instcombine-maxarray-size", cl::init(1024),
cl::desc("Maximum array size considered when doing a combine"));
@@ -759,35 +774,52 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) {
Value *InstCombiner::SimplifySelectsFeedingBinaryOp(BinaryOperator &I,
Value *LHS, Value *RHS) {
- Instruction::BinaryOps Opcode = I.getOpcode();
- // (op (select (a, b, c)), (select (a, d, e))) -> (select (a, (op b, d), (op
- // c, e)))
- Value *A, *B, *C, *D, *E;
- Value *SI = nullptr;
- if (match(LHS, m_Select(m_Value(A), m_Value(B), m_Value(C))) &&
- match(RHS, m_Select(m_Specific(A), m_Value(D), m_Value(E)))) {
- bool SelectsHaveOneUse = LHS->hasOneUse() && RHS->hasOneUse();
-
- FastMathFlags FMF;
- BuilderTy::FastMathFlagGuard Guard(Builder);
- if (isa<FPMathOperator>(&I)) {
- FMF = I.getFastMathFlags();
- Builder.setFastMathFlags(FMF);
- }
+ Value *A, *B, *C, *D, *E, *F;
+ bool LHSIsSelect = match(LHS, m_Select(m_Value(A), m_Value(B), m_Value(C)));
+ bool RHSIsSelect = match(RHS, m_Select(m_Value(D), m_Value(E), m_Value(F)));
+ if (!LHSIsSelect && !RHSIsSelect)
+ return nullptr;
- Value *V1 = SimplifyBinOp(Opcode, C, E, FMF, SQ.getWithInstruction(&I));
- Value *V2 = SimplifyBinOp(Opcode, B, D, FMF, SQ.getWithInstruction(&I));
- if (V1 && V2)
- SI = Builder.CreateSelect(A, V2, V1);
- else if (V2 && SelectsHaveOneUse)
- SI = Builder.CreateSelect(A, V2, Builder.CreateBinOp(Opcode, C, E));
- else if (V1 && SelectsHaveOneUse)
- SI = Builder.CreateSelect(A, Builder.CreateBinOp(Opcode, B, D), V1);
+ FastMathFlags FMF;
+ BuilderTy::FastMathFlagGuard Guard(Builder);
+ if (isa<FPMathOperator>(&I)) {
+ FMF = I.getFastMathFlags();
+ Builder.setFastMathFlags(FMF);
+ }
- if (SI)
- SI->takeName(&I);
+ Instruction::BinaryOps Opcode = I.getOpcode();
+ SimplifyQuery Q = SQ.getWithInstruction(&I);
+
+ Value *Cond, *True = nullptr, *False = nullptr;
+ if (LHSIsSelect && RHSIsSelect && A == D) {
+ // (A ? B : C) op (A ? E : F) -> A ? (B op E) : (C op F)
+ Cond = A;
+ True = SimplifyBinOp(Opcode, B, E, FMF, Q);
+ False = SimplifyBinOp(Opcode, C, F, FMF, Q);
+
+ if (LHS->hasOneUse() && RHS->hasOneUse()) {
+ if (False && !True)
+ True = Builder.CreateBinOp(Opcode, B, E);
+ else if (True && !False)
+ False = Builder.CreateBinOp(Opcode, C, F);
+ }
+ } else if (LHSIsSelect && LHS->hasOneUse()) {
+ // (A ? B : C) op Y -> A ? (B op Y) : (C op Y)
+ Cond = A;
+ True = SimplifyBinOp(Opcode, B, RHS, FMF, Q);
+ False = SimplifyBinOp(Opcode, C, RHS, FMF, Q);
+ } else if (RHSIsSelect && RHS->hasOneUse()) {
+ // X op (D ? E : F) -> D ? (X op E) : (X op F)
+ Cond = D;
+ True = SimplifyBinOp(Opcode, LHS, E, FMF, Q);
+ False = SimplifyBinOp(Opcode, LHS, F, FMF, Q);
}
+ if (!True || !False)
+ return nullptr;
+
+ Value *SI = Builder.CreateSelect(Cond, True, False);
+ SI->takeName(&I);
return SI;
}
@@ -1526,11 +1558,13 @@ Instruction *InstCombiner::foldVectorBinop(BinaryOperator &Inst) {
// If this is a widening shuffle, we must be able to extend with undef
// elements. If the original binop does not produce an undef in the high
// lanes, then this transform is not safe.
+ // Similarly for undef lanes due to the shuffle mask, we can only
+ // transform binops that preserve undef.
// TODO: We could shuffle those non-undef constant values into the
// result by using a constant vector (rather than an undef vector)
// as operand 1 of the new binop, but that might be too aggressive
// for target-independent shuffle creation.
- if (I >= SrcVecNumElts) {
+ if (I >= SrcVecNumElts || ShMask[I] < 0) {
Constant *MaybeUndef =
ConstOp1 ? ConstantExpr::get(Opcode, UndefScalar, CElt)
: ConstantExpr::get(Opcode, CElt, UndefScalar);
@@ -1615,6 +1649,15 @@ Instruction *InstCombiner::narrowMathIfNoOverflow(BinaryOperator &BO) {
return CastInst::Create(CastOpc, NarrowBO, BO.getType());
}
+static bool isMergedGEPInBounds(GEPOperator &GEP1, GEPOperator &GEP2) {
+ // At least one GEP must be inbounds.
+ if (!GEP1.isInBounds() && !GEP2.isInBounds())
+ return false;
+
+ return (GEP1.isInBounds() || GEP1.hasAllZeroIndices()) &&
+ (GEP2.isInBounds() || GEP2.hasAllZeroIndices());
+}
+
Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
SmallVector<Value*, 8> Ops(GEP.op_begin(), GEP.op_end());
Type *GEPType = GEP.getType();
@@ -1724,8 +1767,11 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// The first two arguments can vary for any GEP, the rest have to be
// static for struct slots
- if (J > 1 && CurTy->isStructTy())
- return nullptr;
+ if (J > 1) {
+ assert(CurTy && "No current type?");
+ if (CurTy->isStructTy())
+ return nullptr;
+ }
DI = J;
} else {
@@ -1885,6 +1931,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// Update the GEP in place if possible.
if (Src->getNumOperands() == 2) {
+ GEP.setIsInBounds(isMergedGEPInBounds(*Src, *cast<GEPOperator>(&GEP)));
GEP.setOperand(0, Src->getOperand(0));
GEP.setOperand(1, Sum);
return &GEP;
@@ -1901,7 +1948,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
}
if (!Indices.empty())
- return GEP.isInBounds() && Src->isInBounds()
+ return isMergedGEPInBounds(*Src, *cast<GEPOperator>(&GEP))
? GetElementPtrInst::CreateInBounds(
Src->getSourceElementType(), Src->getOperand(0), Indices,
GEP.getName())
@@ -2154,15 +2201,17 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// of a bitcasted pointer to vector or array of the same dimensions:
// gep (bitcast <c x ty>* X to [c x ty]*), Y, Z --> gep X, Y, Z
// gep (bitcast [c x ty]* X to <c x ty>*), Y, Z --> gep X, Y, Z
- auto areMatchingArrayAndVecTypes = [](Type *ArrTy, Type *VecTy) {
+ auto areMatchingArrayAndVecTypes = [](Type *ArrTy, Type *VecTy,
+ const DataLayout &DL) {
return ArrTy->getArrayElementType() == VecTy->getVectorElementType() &&
- ArrTy->getArrayNumElements() == VecTy->getVectorNumElements();
+ ArrTy->getArrayNumElements() == VecTy->getVectorNumElements() &&
+ DL.getTypeAllocSize(ArrTy) == DL.getTypeAllocSize(VecTy);
};
if (GEP.getNumOperands() == 3 &&
((GEPEltType->isArrayTy() && SrcEltType->isVectorTy() &&
- areMatchingArrayAndVecTypes(GEPEltType, SrcEltType)) ||
+ areMatchingArrayAndVecTypes(GEPEltType, SrcEltType, DL)) ||
(GEPEltType->isVectorTy() && SrcEltType->isArrayTy() &&
- areMatchingArrayAndVecTypes(SrcEltType, GEPEltType)))) {
+ areMatchingArrayAndVecTypes(SrcEltType, GEPEltType, DL)))) {
// Create a new GEP here, as using `setOperand()` followed by
// `setSourceElementType()` won't actually update the type of the
@@ -2401,12 +2450,13 @@ Instruction *InstCombiner::visitAllocSite(Instruction &MI) {
replaceInstUsesWith(*C,
ConstantInt::get(Type::getInt1Ty(C->getContext()),
C->isFalseWhenEqual()));
- } else if (isa<BitCastInst>(I) || isa<GetElementPtrInst>(I) ||
- isa<AddrSpaceCastInst>(I)) {
- replaceInstUsesWith(*I, UndefValue::get(I->getType()));
} else if (auto *SI = dyn_cast<StoreInst>(I)) {
for (auto *DII : DIIs)
ConvertDebugDeclareToDebugValue(DII, SI, *DIB);
+ } else {
+ // Casts, GEP, or anything else: we're about to delete this instruction,
+ // so it can not have any valid uses.
+ replaceInstUsesWith(*I, UndefValue::get(I->getType()));
}
eraseInstFromFunction(*I);
}
@@ -3111,6 +3161,15 @@ Instruction *InstCombiner::visitLandingPadInst(LandingPadInst &LI) {
return nullptr;
}
+Instruction *InstCombiner::visitFreeze(FreezeInst &I) {
+ Value *Op0 = I.getOperand(0);
+
+ if (Value *V = SimplifyFreezeInst(Op0, SQ.getWithInstruction(&I)))
+ return replaceInstUsesWith(I, V);
+
+ return nullptr;
+}
+
/// Try to move the specified instruction from its current block into the
/// beginning of DestBlock, which can only happen if it's safe to move the
/// instruction past all of the instructions between it and the end of its
@@ -3322,10 +3381,6 @@ bool InstCombiner::run() {
// Move the name to the new instruction first.
Result->takeName(I);
- // Push the new instruction and any users onto the worklist.
- Worklist.AddUsersToWorkList(*Result);
- Worklist.Add(Result);
-
// Insert the new instruction into the basic block...
BasicBlock *InstParent = I->getParent();
BasicBlock::iterator InsertPos = I->getIterator();
@@ -3337,6 +3392,10 @@ bool InstCombiner::run() {
InstParent->getInstList().insert(InsertPos, Result);
+ // Push the new instruction and any users onto the worklist.
+ Worklist.AddUsersToWorkList(*Result);
+ Worklist.Add(Result);
+
eraseInstFromFunction(*I);
} else {
LLVM_DEBUG(dbgs() << "IC: Mod = " << OrigI << '\n'
@@ -3392,8 +3451,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, const DataLayout &DL,
if (isInstructionTriviallyDead(Inst, TLI)) {
++NumDeadInst;
LLVM_DEBUG(dbgs() << "IC: DCE: " << *Inst << '\n');
- if (!salvageDebugInfo(*Inst))
- replaceDbgUsesWithUndef(Inst);
+ salvageDebugInfoOrMarkUndef(*Inst);
Inst->eraseFromParent();
MadeIRChange = true;
continue;
@@ -3507,10 +3565,11 @@ static bool combineInstructionsOverFunction(
Function &F, InstCombineWorklist &Worklist, AliasAnalysis *AA,
AssumptionCache &AC, TargetLibraryInfo &TLI, DominatorTree &DT,
OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI,
- ProfileSummaryInfo *PSI, bool ExpensiveCombines = true,
- LoopInfo *LI = nullptr) {
+ ProfileSummaryInfo *PSI, bool ExpensiveCombines, unsigned MaxIterations,
+ LoopInfo *LI) {
auto &DL = F.getParent()->getDataLayout();
ExpensiveCombines |= EnableExpensiveCombines;
+ MaxIterations = std::min(MaxIterations, LimitMaxIterations.getValue());
/// Builder - This is an IRBuilder that automatically inserts new
/// instructions into the worklist when they are created.
@@ -3529,9 +3588,23 @@ static bool combineInstructionsOverFunction(
MadeIRChange = LowerDbgDeclare(F);
// Iterate while there is work to do.
- int Iteration = 0;
+ unsigned Iteration = 0;
while (true) {
++Iteration;
+
+ if (Iteration > InfiniteLoopDetectionThreshold) {
+ report_fatal_error(
+ "Instruction Combining seems stuck in an infinite loop after " +
+ Twine(InfiniteLoopDetectionThreshold) + " iterations.");
+ }
+
+ if (Iteration > MaxIterations) {
+ LLVM_DEBUG(dbgs() << "\n\n[IC] Iteration limit #" << MaxIterations
+ << " on " << F.getName()
+ << " reached; stopping before reaching a fixpoint\n");
+ break;
+ }
+
LLVM_DEBUG(dbgs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on "
<< F.getName() << "\n");
@@ -3543,11 +3616,19 @@ static bool combineInstructionsOverFunction(
if (!IC.run())
break;
+
+ MadeIRChange = true;
}
- return MadeIRChange || Iteration > 1;
+ return MadeIRChange;
}
+InstCombinePass::InstCombinePass(bool ExpensiveCombines)
+ : ExpensiveCombines(ExpensiveCombines), MaxIterations(LimitMaxIterations) {}
+
+InstCombinePass::InstCombinePass(bool ExpensiveCombines, unsigned MaxIterations)
+ : ExpensiveCombines(ExpensiveCombines), MaxIterations(MaxIterations) {}
+
PreservedAnalyses InstCombinePass::run(Function &F,
FunctionAnalysisManager &AM) {
auto &AC = AM.getResult<AssumptionAnalysis>(F);
@@ -3565,8 +3646,9 @@ PreservedAnalyses InstCombinePass::run(Function &F,
auto *BFI = (PSI && PSI->hasProfileSummary()) ?
&AM.getResult<BlockFrequencyAnalysis>(F) : nullptr;
- if (!combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, DT, ORE,
- BFI, PSI, ExpensiveCombines, LI))
+ if (!combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, DT, ORE, BFI,
+ PSI, ExpensiveCombines, MaxIterations,
+ LI))
// No changes, all analyses are preserved.
return PreservedAnalyses::all();
@@ -3615,12 +3697,26 @@ bool InstructionCombiningPass::runOnFunction(Function &F) {
&getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() :
nullptr;
- return combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, DT, ORE,
- BFI, PSI, ExpensiveCombines, LI);
+ return combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, DT, ORE, BFI,
+ PSI, ExpensiveCombines, MaxIterations,
+ LI);
}
char InstructionCombiningPass::ID = 0;
+InstructionCombiningPass::InstructionCombiningPass(bool ExpensiveCombines)
+ : FunctionPass(ID), ExpensiveCombines(ExpensiveCombines),
+ MaxIterations(InstCombineDefaultMaxIterations) {
+ initializeInstructionCombiningPassPass(*PassRegistry::getPassRegistry());
+}
+
+InstructionCombiningPass::InstructionCombiningPass(bool ExpensiveCombines,
+ unsigned MaxIterations)
+ : FunctionPass(ID), ExpensiveCombines(ExpensiveCombines),
+ MaxIterations(MaxIterations) {
+ initializeInstructionCombiningPassPass(*PassRegistry::getPassRegistry());
+}
+
INITIALIZE_PASS_BEGIN(InstructionCombiningPass, "instcombine",
"Combine redundant instructions", false, false)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
@@ -3647,6 +3743,11 @@ FunctionPass *llvm::createInstructionCombiningPass(bool ExpensiveCombines) {
return new InstructionCombiningPass(ExpensiveCombines);
}
+FunctionPass *llvm::createInstructionCombiningPass(bool ExpensiveCombines,
+ unsigned MaxIterations) {
+ return new InstructionCombiningPass(ExpensiveCombines, MaxIterations);
+}
+
void LLVMAddInstructionCombiningPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createInstructionCombiningPass());
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index d92ee11c2e1a..79c119489a65 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -59,6 +59,7 @@
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
@@ -1808,6 +1809,8 @@ bool ModuleAddressSanitizer::ShouldInstrumentGlobal(GlobalVariable *G) {
if (GlobalsMD.get(G).IsBlacklisted) return false;
if (!Ty->isSized()) return false;
if (!G->hasInitializer()) return false;
+ // Only instrument globals of default address spaces
+ if (G->getAddressSpace()) return false;
if (GlobalWasGeneratedByCompiler(G)) return false; // Our own globals.
// Two problems with thread-locals:
// - The address of the main thread's copy can't be computed at link-time.
@@ -2898,15 +2901,14 @@ void FunctionStackPoisoner::copyArgsPassedByValToAllocas() {
for (Argument &Arg : F.args()) {
if (Arg.hasByValAttr()) {
Type *Ty = Arg.getType()->getPointerElementType();
- unsigned Alignment = Arg.getParamAlignment();
- if (Alignment == 0)
- Alignment = DL.getABITypeAlignment(Ty);
+ const Align Alignment =
+ DL.getValueOrABITypeAlignment(Arg.getParamAlign(), Ty);
AllocaInst *AI = IRB.CreateAlloca(
Ty, nullptr,
(Arg.hasName() ? Arg.getName() : "Arg" + Twine(Arg.getArgNo())) +
".byval");
- AI->setAlignment(Align(Alignment));
+ AI->setAlignment(Alignment);
Arg.replaceAllUsesWith(AI);
uint64_t AllocSize = DL.getTypeAllocSize(Ty);
@@ -2993,7 +2995,6 @@ void FunctionStackPoisoner::processStaticAllocas() {
Instruction *InsBefore = AllocaVec[0];
IRBuilder<> IRB(InsBefore);
- IRB.SetCurrentDebugLocation(EntryDebugLocation);
// Make sure non-instrumented allocas stay in the entry block. Otherwise,
// debug info is broken, because only entry-block allocas are treated as
@@ -3088,14 +3089,12 @@ void FunctionStackPoisoner::processStaticAllocas() {
Instruction *Term =
SplitBlockAndInsertIfThen(UseAfterReturnIsEnabled, InsBefore, false);
IRBuilder<> IRBIf(Term);
- IRBIf.SetCurrentDebugLocation(EntryDebugLocation);
StackMallocIdx = StackMallocSizeClass(LocalStackSize);
assert(StackMallocIdx <= kMaxAsanStackMallocSizeClass);
Value *FakeStackValue =
IRBIf.CreateCall(AsanStackMallocFunc[StackMallocIdx],
ConstantInt::get(IntptrTy, LocalStackSize));
IRB.SetInsertPoint(InsBefore);
- IRB.SetCurrentDebugLocation(EntryDebugLocation);
FakeStack = createPHI(IRB, UseAfterReturnIsEnabled, FakeStackValue, Term,
ConstantInt::get(IntptrTy, 0));
@@ -3103,14 +3102,11 @@ void FunctionStackPoisoner::processStaticAllocas() {
IRB.CreateICmpEQ(FakeStack, Constant::getNullValue(IntptrTy));
Term = SplitBlockAndInsertIfThen(NoFakeStack, InsBefore, false);
IRBIf.SetInsertPoint(Term);
- IRBIf.SetCurrentDebugLocation(EntryDebugLocation);
Value *AllocaValue =
DoDynamicAlloca ? createAllocaForLayout(IRBIf, L, true) : StaticAlloca;
IRB.SetInsertPoint(InsBefore);
- IRB.SetCurrentDebugLocation(EntryDebugLocation);
LocalStackBase = createPHI(IRB, NoFakeStack, AllocaValue, Term, FakeStack);
- IRB.SetCurrentDebugLocation(EntryDebugLocation);
IRB.CreateStore(LocalStackBase, LocalStackBaseAlloca);
DIExprFlags |= DIExpression::DerefBefore;
} else {
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp
index ae34be986537..9abb62ac788c 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp
@@ -24,6 +24,7 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
index 55c64fa4b727..d35abb92dd08 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
@@ -27,7 +27,9 @@
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/MDBuilder.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -623,6 +625,10 @@ static bool checkMDProf(MDNode *MD, BranchProbability &TrueProb,
assert(SumWt >= TrueWt && SumWt >= FalseWt &&
"Overflow calculating branch probabilities.");
+ // Guard against 0-to-0 branch weights to avoid a division-by-zero crash.
+ if (SumWt == 0)
+ return false;
+
TrueProb = BranchProbability::getBranchProbability(TrueWt, SumWt);
FalseProb = BranchProbability::getBranchProbability(FalseWt, SumWt);
return true;
@@ -1061,6 +1067,7 @@ static bool shouldSplit(Instruction *InsertPoint,
DenseSet<Value *> &ConditionValues,
DominatorTree &DT,
DenseSet<Instruction *> &Unhoistables) {
+ assert(InsertPoint && "Null InsertPoint");
CHR_DEBUG(
dbgs() << "shouldSplit " << *InsertPoint << " PrevConditionValues ";
for (Value *V : PrevConditionValues) {
@@ -1071,7 +1078,6 @@ static bool shouldSplit(Instruction *InsertPoint,
dbgs() << *V << ", ";
}
dbgs() << "\n");
- assert(InsertPoint && "Null InsertPoint");
// If any of Bases isn't hoistable to the hoist point, split.
for (Value *V : ConditionValues) {
DenseMap<Instruction *, bool> Visited;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
index c0353cba0b2f..cf9a6a321c7a 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
@@ -55,7 +55,6 @@
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/Attributes.h"
@@ -83,13 +82,16 @@
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/SpecialCaseList.h"
+#include "llvm/Support/VirtualFileSystem.h"
#include "llvm/Transforms/Instrumentation.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
#include <algorithm>
#include <cassert>
#include <cstddef>
@@ -479,7 +481,9 @@ DataFlowSanitizer::DataFlowSanitizer(
std::vector<std::string> AllABIListFiles(std::move(ABIListFiles));
AllABIListFiles.insert(AllABIListFiles.end(), ClABIListFiles.begin(),
ClABIListFiles.end());
- ABIList.set(SpecialCaseList::createOrDie(AllABIListFiles));
+ // FIXME: should we propagate vfs::FileSystem to this constructor?
+ ABIList.set(
+ SpecialCaseList::createOrDie(AllABIListFiles, *vfs::getRealFileSystem()));
}
FunctionType *DataFlowSanitizer::getArgsFunctionType(FunctionType *T) {
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp
index ac6082441eae..bf3e4ed3e31f 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp
@@ -30,6 +30,7 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -128,9 +129,9 @@ private:
// Checksum, produced by hash of EdgeDestinations
SmallVector<uint32_t, 4> FileChecksums;
- Module *M;
+ Module *M = nullptr;
std::function<const TargetLibraryInfo &(Function &F)> GetTLI;
- LLVMContext *Ctx;
+ LLVMContext *Ctx = nullptr;
SmallVector<std::unique_ptr<GCOVFunction>, 16> Funcs;
std::vector<Regex> FilterRe;
std::vector<Regex> ExcludeRe;
@@ -384,7 +385,7 @@ namespace {
return EdgeDestinations;
}
- uint32_t getFuncChecksum() {
+ uint32_t getFuncChecksum() const {
return FuncChecksum;
}
@@ -713,7 +714,10 @@ void GCOVProfiler::emitProfileNotes() {
// to have a counter for the function definition.
uint32_t Line = SP->getLine();
auto Filename = getFilename(SP);
- Func.getBlock(&EntryBlock).getFile(Filename).addLine(Line);
+
+ // Artificial functions such as global initializers
+ if (!SP->isArtificial())
+ Func.getBlock(&EntryBlock).getFile(Filename).addLine(Line);
for (auto &BB : F) {
GCOVBlock &Block = Func.getBlock(&BB);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
index f87132ee4758..7e8f8e27a97b 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
@@ -38,6 +38,7 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
@@ -221,7 +222,7 @@ public:
Value *untagPointer(IRBuilder<> &IRB, Value *PtrLong);
bool instrumentStack(
SmallVectorImpl<AllocaInst *> &Allocas,
- DenseMap<AllocaInst *, std::vector<DbgDeclareInst *>> &AllocaDeclareMap,
+ DenseMap<AllocaInst *, std::vector<DbgVariableIntrinsic *>> &AllocaDbgMap,
SmallVectorImpl<Instruction *> &RetVec, Value *StackTag);
Value *readRegister(IRBuilder<> &IRB, StringRef Name);
bool instrumentLandingPads(SmallVectorImpl<Instruction *> &RetVec);
@@ -284,7 +285,6 @@ private:
FunctionCallee HwasanTagMemoryFunc;
FunctionCallee HwasanGenerateTagFunc;
- FunctionCallee HwasanThreadEnterFunc;
Constant *ShadowGlobal;
@@ -473,9 +473,6 @@ void HWAddressSanitizer::initializeCallbacks(Module &M) {
HWAsanHandleVfork =
M.getOrInsertFunction("__hwasan_handle_vfork", IRB.getVoidTy(), IntptrTy);
-
- HwasanThreadEnterFunc =
- M.getOrInsertFunction("__hwasan_thread_enter", IRB.getVoidTy());
}
Value *HWAddressSanitizer::getDynamicShadowIfunc(IRBuilder<> &IRB) {
@@ -792,7 +789,7 @@ bool HWAddressSanitizer::tagAlloca(IRBuilder<> &IRB, AllocaInst *AI,
// llvm.memset right here into either a sequence of stores, or a call to
// hwasan_tag_memory.
if (ShadowSize)
- IRB.CreateMemSet(ShadowPtr, JustTag, ShadowSize, /*Align=*/1);
+ IRB.CreateMemSet(ShadowPtr, JustTag, ShadowSize, Align::None());
if (Size != AlignedSize) {
IRB.CreateStore(
ConstantInt::get(Int8Ty, Size % Mapping.getObjectAlignment()),
@@ -934,34 +931,13 @@ void HWAddressSanitizer::emitPrologue(IRBuilder<> &IRB, bool WithFrameRecord) {
Value *SlotPtr = getHwasanThreadSlotPtr(IRB, IntptrTy);
assert(SlotPtr);
- Instruction *ThreadLong = IRB.CreateLoad(IntptrTy, SlotPtr);
-
- Function *F = IRB.GetInsertBlock()->getParent();
- if (F->getFnAttribute("hwasan-abi").getValueAsString() == "interceptor") {
- Value *ThreadLongEqZero =
- IRB.CreateICmpEQ(ThreadLong, ConstantInt::get(IntptrTy, 0));
- auto *Br = cast<BranchInst>(SplitBlockAndInsertIfThen(
- ThreadLongEqZero, cast<Instruction>(ThreadLongEqZero)->getNextNode(),
- false, MDBuilder(*C).createBranchWeights(1, 100000)));
-
- IRB.SetInsertPoint(Br);
- // FIXME: This should call a new runtime function with a custom calling
- // convention to avoid needing to spill all arguments here.
- IRB.CreateCall(HwasanThreadEnterFunc);
- LoadInst *ReloadThreadLong = IRB.CreateLoad(IntptrTy, SlotPtr);
-
- IRB.SetInsertPoint(&*Br->getSuccessor(0)->begin());
- PHINode *ThreadLongPhi = IRB.CreatePHI(IntptrTy, 2);
- ThreadLongPhi->addIncoming(ThreadLong, ThreadLong->getParent());
- ThreadLongPhi->addIncoming(ReloadThreadLong, ReloadThreadLong->getParent());
- ThreadLong = ThreadLongPhi;
- }
-
+ Value *ThreadLong = IRB.CreateLoad(IntptrTy, SlotPtr);
// Extract the address field from ThreadLong. Unnecessary on AArch64 with TBI.
Value *ThreadLongMaybeUntagged =
TargetTriple.isAArch64() ? ThreadLong : untagPointer(IRB, ThreadLong);
if (WithFrameRecord) {
+ Function *F = IRB.GetInsertBlock()->getParent();
StackBaseTag = IRB.CreateAShr(ThreadLong, 3);
// Prepare ring buffer data.
@@ -1040,7 +1016,7 @@ bool HWAddressSanitizer::instrumentLandingPads(
bool HWAddressSanitizer::instrumentStack(
SmallVectorImpl<AllocaInst *> &Allocas,
- DenseMap<AllocaInst *, std::vector<DbgDeclareInst *>> &AllocaDeclareMap,
+ DenseMap<AllocaInst *, std::vector<DbgVariableIntrinsic *>> &AllocaDbgMap,
SmallVectorImpl<Instruction *> &RetVec, Value *StackTag) {
// Ideally, we want to calculate tagged stack base pointer, and rewrite all
// alloca addresses using that. Unfortunately, offsets are not known yet
@@ -1062,11 +1038,15 @@ bool HWAddressSanitizer::instrumentStack(
AI->replaceUsesWithIf(Replacement,
[AILong](Use &U) { return U.getUser() != AILong; });
- for (auto *DDI : AllocaDeclareMap.lookup(AI)) {
- DIExpression *OldExpr = DDI->getExpression();
- DIExpression *NewExpr = DIExpression::append(
- OldExpr, {dwarf::DW_OP_LLVM_tag_offset, RetagMask(N)});
- DDI->setArgOperand(2, MetadataAsValue::get(*C, NewExpr));
+ for (auto *DDI : AllocaDbgMap.lookup(AI)) {
+ // Prepend "tag_offset, N" to the dwarf expression.
+ // Tag offset logically applies to the alloca pointer, and it makes sense
+ // to put it at the beginning of the expression.
+ SmallVector<uint64_t, 8> NewOps = {dwarf::DW_OP_LLVM_tag_offset,
+ RetagMask(N)};
+ DDI->setArgOperand(
+ 2, MetadataAsValue::get(*C, DIExpression::prependOpcodes(
+ DDI->getExpression(), NewOps)));
}
size_t Size = getAllocaSizeInBytes(*AI);
@@ -1113,7 +1093,7 @@ bool HWAddressSanitizer::sanitizeFunction(Function &F) {
SmallVector<AllocaInst*, 8> AllocasToInstrument;
SmallVector<Instruction*, 8> RetVec;
SmallVector<Instruction*, 8> LandingPadVec;
- DenseMap<AllocaInst *, std::vector<DbgDeclareInst *>> AllocaDeclareMap;
+ DenseMap<AllocaInst *, std::vector<DbgVariableIntrinsic *>> AllocaDbgMap;
for (auto &BB : F) {
for (auto &Inst : BB) {
if (ClInstrumentStack)
@@ -1127,9 +1107,10 @@ bool HWAddressSanitizer::sanitizeFunction(Function &F) {
isa<CleanupReturnInst>(Inst))
RetVec.push_back(&Inst);
- if (auto *DDI = dyn_cast<DbgDeclareInst>(&Inst))
- if (auto *Alloca = dyn_cast_or_null<AllocaInst>(DDI->getAddress()))
- AllocaDeclareMap[Alloca].push_back(DDI);
+ if (auto *DDI = dyn_cast<DbgVariableIntrinsic>(&Inst))
+ if (auto *Alloca =
+ dyn_cast_or_null<AllocaInst>(DDI->getVariableLocation()))
+ AllocaDbgMap[Alloca].push_back(DDI);
if (InstrumentLandingPads && isa<LandingPadInst>(Inst))
LandingPadVec.push_back(&Inst);
@@ -1172,7 +1153,7 @@ bool HWAddressSanitizer::sanitizeFunction(Function &F) {
if (!AllocasToInstrument.empty()) {
Value *StackTag =
ClGenerateTagsWithCalls ? nullptr : getStackBaseTag(EntryIRB);
- Changed |= instrumentStack(AllocasToInstrument, AllocaDeclareMap, RetVec,
+ Changed |= instrumentStack(AllocasToInstrument, AllocaDbgMap, RetVec,
StackTag);
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
index 74d6e76eceb6..d5787c8f62a1 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
@@ -36,6 +36,7 @@
#include "llvm/IR/PassManager.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/Support/Casting.h"
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/InstrOrderFile.cpp b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/InstrOrderFile.cpp
index 93d3a8a14d5c..518b8895e836 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/InstrOrderFile.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/InstrOrderFile.cpp
@@ -9,6 +9,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Transforms/Instrumentation/InstrOrderFile.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
@@ -19,6 +20,7 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/PassRegistry.h"
#include "llvm/ProfileData/InstrProf.h"
@@ -28,7 +30,6 @@
#include "llvm/Support/Path.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Instrumentation.h"
-#include "llvm/Transforms/Instrumentation/InstrOrderFile.h"
#include <fstream>
#include <map>
#include <mutex>
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
index 1f092a5f3103..04c7e856b5d4 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
@@ -37,6 +37,7 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/Support/Casting.h"
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 69c9020e060b..80acab307578 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -170,12 +170,14 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsX86.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueMap.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/AtomicOrdering.h"
#include "llvm/Support/Casting.h"
@@ -202,8 +204,8 @@ using namespace llvm;
#define DEBUG_TYPE "msan"
static const unsigned kOriginSize = 4;
-static const unsigned kMinOriginAlignment = 4;
-static const unsigned kShadowTLSAlignment = 8;
+static const Align kMinOriginAlignment = Align(4);
+static const Align kShadowTLSAlignment = Align(8);
// These constants must be kept in sync with the ones in msan.h.
static const unsigned kParamTLSSize = 800;
@@ -1086,15 +1088,15 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
/// Fill memory range with the given origin value.
void paintOrigin(IRBuilder<> &IRB, Value *Origin, Value *OriginPtr,
- unsigned Size, unsigned Alignment) {
+ unsigned Size, Align Alignment) {
const DataLayout &DL = F.getParent()->getDataLayout();
- unsigned IntptrAlignment = DL.getABITypeAlignment(MS.IntptrTy);
+ const Align IntptrAlignment = Align(DL.getABITypeAlignment(MS.IntptrTy));
unsigned IntptrSize = DL.getTypeStoreSize(MS.IntptrTy);
assert(IntptrAlignment >= kMinOriginAlignment);
assert(IntptrSize >= kOriginSize);
unsigned Ofs = 0;
- unsigned CurrentAlignment = Alignment;
+ Align CurrentAlignment = Alignment;
if (Alignment >= IntptrAlignment && IntptrSize > kOriginSize) {
Value *IntptrOrigin = originToIntptr(IRB, Origin);
Value *IntptrOriginPtr =
@@ -1102,7 +1104,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
for (unsigned i = 0; i < Size / IntptrSize; ++i) {
Value *Ptr = i ? IRB.CreateConstGEP1_32(MS.IntptrTy, IntptrOriginPtr, i)
: IntptrOriginPtr;
- IRB.CreateAlignedStore(IntptrOrigin, Ptr, CurrentAlignment);
+ IRB.CreateAlignedStore(IntptrOrigin, Ptr, CurrentAlignment.value());
Ofs += IntptrSize / kOriginSize;
CurrentAlignment = IntptrAlignment;
}
@@ -1111,23 +1113,22 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
for (unsigned i = Ofs; i < (Size + kOriginSize - 1) / kOriginSize; ++i) {
Value *GEP =
i ? IRB.CreateConstGEP1_32(MS.OriginTy, OriginPtr, i) : OriginPtr;
- IRB.CreateAlignedStore(Origin, GEP, CurrentAlignment);
+ IRB.CreateAlignedStore(Origin, GEP, CurrentAlignment.value());
CurrentAlignment = kMinOriginAlignment;
}
}
void storeOrigin(IRBuilder<> &IRB, Value *Addr, Value *Shadow, Value *Origin,
- Value *OriginPtr, unsigned Alignment, bool AsCall) {
+ Value *OriginPtr, Align Alignment, bool AsCall) {
const DataLayout &DL = F.getParent()->getDataLayout();
- unsigned OriginAlignment = std::max(kMinOriginAlignment, Alignment);
+ const Align OriginAlignment = std::max(kMinOriginAlignment, Alignment);
unsigned StoreSize = DL.getTypeStoreSize(Shadow->getType());
if (Shadow->getType()->isAggregateType()) {
paintOrigin(IRB, updateOrigin(Origin, IRB), OriginPtr, StoreSize,
OriginAlignment);
} else {
Value *ConvertedShadow = convertToShadowTyNoVec(Shadow, IRB);
- Constant *ConstantShadow = dyn_cast_or_null<Constant>(ConvertedShadow);
- if (ConstantShadow) {
+ if (auto *ConstantShadow = dyn_cast<Constant>(ConvertedShadow)) {
if (ClCheckConstantShadow && !ConstantShadow->isZeroValue())
paintOrigin(IRB, updateOrigin(Origin, IRB), OriginPtr, StoreSize,
OriginAlignment);
@@ -1164,12 +1165,13 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
Value *Shadow = SI->isAtomic() ? getCleanShadow(Val) : getShadow(Val);
Value *ShadowPtr, *OriginPtr;
Type *ShadowTy = Shadow->getType();
- unsigned Alignment = SI->getAlignment();
- unsigned OriginAlignment = std::max(kMinOriginAlignment, Alignment);
+ const Align Alignment = assumeAligned(SI->getAlignment());
+ const Align OriginAlignment = std::max(kMinOriginAlignment, Alignment);
std::tie(ShadowPtr, OriginPtr) =
getShadowOriginPtr(Addr, IRB, ShadowTy, Alignment, /*isStore*/ true);
- StoreInst *NewSI = IRB.CreateAlignedStore(Shadow, ShadowPtr, Alignment);
+ StoreInst *NewSI =
+ IRB.CreateAlignedStore(Shadow, ShadowPtr, Alignment.value());
LLVM_DEBUG(dbgs() << " STORE: " << *NewSI << "\n");
(void)NewSI;
@@ -1207,8 +1209,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
Value *ConvertedShadow = convertToShadowTyNoVec(Shadow, IRB);
LLVM_DEBUG(dbgs() << " SHAD1 : " << *ConvertedShadow << "\n");
- Constant *ConstantShadow = dyn_cast_or_null<Constant>(ConvertedShadow);
- if (ConstantShadow) {
+ if (auto *ConstantShadow = dyn_cast<Constant>(ConvertedShadow)) {
if (ClCheckConstantShadow && !ConstantShadow->isZeroValue()) {
insertWarningFn(IRB, Origin);
}
@@ -1403,10 +1404,9 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
///
/// Shadow = ShadowBase + Offset
/// Origin = (OriginBase + Offset) & ~3ULL
- std::pair<Value *, Value *> getShadowOriginPtrUserspace(Value *Addr,
- IRBuilder<> &IRB,
- Type *ShadowTy,
- unsigned Alignment) {
+ std::pair<Value *, Value *>
+ getShadowOriginPtrUserspace(Value *Addr, IRBuilder<> &IRB, Type *ShadowTy,
+ MaybeAlign Alignment) {
Value *ShadowOffset = getShadowPtrOffset(Addr, IRB);
Value *ShadowLong = ShadowOffset;
uint64_t ShadowBase = MS.MapParams->ShadowBase;
@@ -1424,8 +1424,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
if (OriginBase != 0)
OriginLong = IRB.CreateAdd(OriginLong,
ConstantInt::get(MS.IntptrTy, OriginBase));
- if (Alignment < kMinOriginAlignment) {
- uint64_t Mask = kMinOriginAlignment - 1;
+ if (!Alignment || *Alignment < kMinOriginAlignment) {
+ uint64_t Mask = kMinOriginAlignment.value() - 1;
OriginLong =
IRB.CreateAnd(OriginLong, ConstantInt::get(MS.IntptrTy, ~Mask));
}
@@ -1435,9 +1435,10 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
return std::make_pair(ShadowPtr, OriginPtr);
}
- std::pair<Value *, Value *>
- getShadowOriginPtrKernel(Value *Addr, IRBuilder<> &IRB, Type *ShadowTy,
- unsigned Alignment, bool isStore) {
+ std::pair<Value *, Value *> getShadowOriginPtrKernel(Value *Addr,
+ IRBuilder<> &IRB,
+ Type *ShadowTy,
+ bool isStore) {
Value *ShadowOriginPtrs;
const DataLayout &DL = F.getParent()->getDataLayout();
int Size = DL.getTypeStoreSize(ShadowTy);
@@ -1462,14 +1463,11 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
std::pair<Value *, Value *> getShadowOriginPtr(Value *Addr, IRBuilder<> &IRB,
Type *ShadowTy,
- unsigned Alignment,
+ MaybeAlign Alignment,
bool isStore) {
- std::pair<Value *, Value *> ret;
if (MS.CompileKernel)
- ret = getShadowOriginPtrKernel(Addr, IRB, ShadowTy, Alignment, isStore);
- else
- ret = getShadowOriginPtrUserspace(Addr, IRB, ShadowTy, Alignment);
- return ret;
+ return getShadowOriginPtrKernel(Addr, IRB, ShadowTy, isStore);
+ return getShadowOriginPtrUserspace(Addr, IRB, ShadowTy, Alignment);
}
/// Compute the shadow address for a given function argument.
@@ -1619,11 +1617,9 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
// ByVal pointer itself has clean shadow. We copy the actual
// argument shadow to the underlying memory.
// Figure out maximal valid memcpy alignment.
- unsigned ArgAlign = FArg.getParamAlignment();
- if (ArgAlign == 0) {
- Type *EltType = A->getType()->getPointerElementType();
- ArgAlign = DL.getABITypeAlignment(EltType);
- }
+ const Align ArgAlign = DL.getValueOrABITypeAlignment(
+ MaybeAlign(FArg.getParamAlignment()),
+ A->getType()->getPointerElementType());
Value *CpShadowPtr =
getShadowOriginPtr(V, EntryIRB, EntryIRB.getInt8Ty(), ArgAlign,
/*isStore*/ true)
@@ -1635,7 +1631,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
CpShadowPtr, Constant::getNullValue(EntryIRB.getInt8Ty()),
Size, ArgAlign);
} else {
- unsigned CopyAlign = std::min(ArgAlign, kShadowTLSAlignment);
+ const Align CopyAlign = std::min(ArgAlign, kShadowTLSAlignment);
Value *Cpy = EntryIRB.CreateMemCpy(CpShadowPtr, CopyAlign, Base,
CopyAlign, Size);
LLVM_DEBUG(dbgs() << " ByValCpy: " << *Cpy << "\n");
@@ -1647,8 +1643,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
// ParamTLS overflow.
*ShadowPtr = getCleanShadow(V);
} else {
- *ShadowPtr = EntryIRB.CreateAlignedLoad(getShadowTy(&FArg), Base,
- kShadowTLSAlignment);
+ *ShadowPtr = EntryIRB.CreateAlignedLoad(
+ getShadowTy(&FArg), Base, kShadowTLSAlignment.value());
}
}
LLVM_DEBUG(dbgs()
@@ -1782,13 +1778,13 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
IRBuilder<> IRB(I.getNextNode());
Type *ShadowTy = getShadowTy(&I);
Value *Addr = I.getPointerOperand();
- Value *ShadowPtr, *OriginPtr;
- unsigned Alignment = I.getAlignment();
+ Value *ShadowPtr = nullptr, *OriginPtr = nullptr;
+ const Align Alignment = assumeAligned(I.getAlignment());
if (PropagateShadow) {
std::tie(ShadowPtr, OriginPtr) =
getShadowOriginPtr(Addr, IRB, ShadowTy, Alignment, /*isStore*/ false);
- setShadow(&I,
- IRB.CreateAlignedLoad(ShadowTy, ShadowPtr, Alignment, "_msld"));
+ setShadow(&I, IRB.CreateAlignedLoad(ShadowTy, ShadowPtr,
+ Alignment.value(), "_msld"));
} else {
setShadow(&I, getCleanShadow(&I));
}
@@ -1801,9 +1797,9 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
if (MS.TrackOrigins) {
if (PropagateShadow) {
- unsigned OriginAlignment = std::max(kMinOriginAlignment, Alignment);
- setOrigin(
- &I, IRB.CreateAlignedLoad(MS.OriginTy, OriginPtr, OriginAlignment));
+ const Align OriginAlignment = std::max(kMinOriginAlignment, Alignment);
+ setOrigin(&I, IRB.CreateAlignedLoad(MS.OriginTy, OriginPtr,
+ OriginAlignment.value()));
} else {
setOrigin(&I, getCleanOrigin());
}
@@ -1825,8 +1821,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
IRBuilder<> IRB(&I);
Value *Addr = I.getOperand(0);
- Value *ShadowPtr = getShadowOriginPtr(Addr, IRB, I.getType(),
- /*Alignment*/ 1, /*isStore*/ true)
+ Value *ShadowPtr = getShadowOriginPtr(Addr, IRB, I.getType(), Align::None(),
+ /*isStore*/ true)
.first;
if (ClCheckAccessAddress)
@@ -2458,7 +2454,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
// We don't know the pointer alignment (could be unaligned SSE store!).
// Have to assume to worst case.
std::tie(ShadowPtr, OriginPtr) = getShadowOriginPtr(
- Addr, IRB, Shadow->getType(), /*Alignment*/ 1, /*isStore*/ true);
+ Addr, IRB, Shadow->getType(), Align::None(), /*isStore*/ true);
IRB.CreateAlignedStore(Shadow, ShadowPtr, 1);
if (ClCheckAccessAddress)
@@ -2478,15 +2474,15 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
Value *Addr = I.getArgOperand(0);
Type *ShadowTy = getShadowTy(&I);
- Value *ShadowPtr, *OriginPtr;
+ Value *ShadowPtr = nullptr, *OriginPtr = nullptr;
if (PropagateShadow) {
// We don't know the pointer alignment (could be unaligned SSE load!).
// Have to assume to worst case.
- unsigned Alignment = 1;
+ const Align Alignment = Align::None();
std::tie(ShadowPtr, OriginPtr) =
getShadowOriginPtr(Addr, IRB, ShadowTy, Alignment, /*isStore*/ false);
- setShadow(&I,
- IRB.CreateAlignedLoad(ShadowTy, ShadowPtr, Alignment, "_msld"));
+ setShadow(&I, IRB.CreateAlignedLoad(ShadowTy, ShadowPtr,
+ Alignment.value(), "_msld"));
} else {
setShadow(&I, getCleanShadow(&I));
}
@@ -2873,7 +2869,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
Value* Addr = I.getArgOperand(0);
Type *Ty = IRB.getInt32Ty();
Value *ShadowPtr =
- getShadowOriginPtr(Addr, IRB, Ty, /*Alignment*/ 1, /*isStore*/ true)
+ getShadowOriginPtr(Addr, IRB, Ty, Align::None(), /*isStore*/ true)
.first;
IRB.CreateStore(getCleanShadow(Ty),
@@ -2889,7 +2885,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
IRBuilder<> IRB(&I);
Value *Addr = I.getArgOperand(0);
Type *Ty = IRB.getInt32Ty();
- unsigned Alignment = 1;
+ const Align Alignment = Align::None();
Value *ShadowPtr, *OriginPtr;
std::tie(ShadowPtr, OriginPtr) =
getShadowOriginPtr(Addr, IRB, Ty, Alignment, /*isStore*/ false);
@@ -2897,7 +2893,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
if (ClCheckAccessAddress)
insertShadowCheck(Addr, &I);
- Value *Shadow = IRB.CreateAlignedLoad(Ty, ShadowPtr, Alignment, "_ldmxcsr");
+ Value *Shadow =
+ IRB.CreateAlignedLoad(Ty, ShadowPtr, Alignment.value(), "_ldmxcsr");
Value *Origin = MS.TrackOrigins ? IRB.CreateLoad(MS.OriginTy, OriginPtr)
: getCleanOrigin();
insertShadowCheck(Shadow, Origin, &I);
@@ -2907,14 +2904,15 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
IRBuilder<> IRB(&I);
Value *V = I.getArgOperand(0);
Value *Addr = I.getArgOperand(1);
- unsigned Align = cast<ConstantInt>(I.getArgOperand(2))->getZExtValue();
+ const MaybeAlign Alignment(
+ cast<ConstantInt>(I.getArgOperand(2))->getZExtValue());
Value *Mask = I.getArgOperand(3);
Value *Shadow = getShadow(V);
Value *ShadowPtr;
Value *OriginPtr;
std::tie(ShadowPtr, OriginPtr) = getShadowOriginPtr(
- Addr, IRB, Shadow->getType(), Align, /*isStore*/ true);
+ Addr, IRB, Shadow->getType(), Alignment, /*isStore*/ true);
if (ClCheckAccessAddress) {
insertShadowCheck(Addr, &I);
@@ -2923,20 +2921,22 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
insertShadowCheck(Mask, &I);
}
- IRB.CreateMaskedStore(Shadow, ShadowPtr, Align, Mask);
+ IRB.CreateMaskedStore(Shadow, ShadowPtr, Alignment ? Alignment->value() : 0,
+ Mask);
if (MS.TrackOrigins) {
auto &DL = F.getParent()->getDataLayout();
paintOrigin(IRB, getOrigin(V), OriginPtr,
DL.getTypeStoreSize(Shadow->getType()),
- std::max(Align, kMinOriginAlignment));
+ llvm::max(Alignment, kMinOriginAlignment));
}
}
bool handleMaskedLoad(IntrinsicInst &I) {
IRBuilder<> IRB(&I);
Value *Addr = I.getArgOperand(0);
- unsigned Align = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
+ const MaybeAlign Alignment(
+ cast<ConstantInt>(I.getArgOperand(1))->getZExtValue());
Value *Mask = I.getArgOperand(2);
Value *PassThru = I.getArgOperand(3);
@@ -2944,9 +2944,10 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
Value *ShadowPtr, *OriginPtr;
if (PropagateShadow) {
std::tie(ShadowPtr, OriginPtr) =
- getShadowOriginPtr(Addr, IRB, ShadowTy, Align, /*isStore*/ false);
- setShadow(&I, IRB.CreateMaskedLoad(ShadowPtr, Align, Mask,
- getShadow(PassThru), "_msmaskedld"));
+ getShadowOriginPtr(Addr, IRB, ShadowTy, Alignment, /*isStore*/ false);
+ setShadow(&I, IRB.CreateMaskedLoad(
+ ShadowPtr, Alignment ? Alignment->value() : 0, Mask,
+ getShadow(PassThru), "_msmaskedld"));
} else {
setShadow(&I, getCleanShadow(&I));
}
@@ -3278,7 +3279,10 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
// Clear out readonly/readnone attributes.
AttrBuilder B;
B.addAttribute(Attribute::ReadOnly)
- .addAttribute(Attribute::ReadNone);
+ .addAttribute(Attribute::ReadNone)
+ .addAttribute(Attribute::WriteOnly)
+ .addAttribute(Attribute::ArgMemOnly)
+ .addAttribute(Attribute::Speculatable);
Func->removeAttributes(AttributeList::FunctionIndex, B);
}
@@ -3312,8 +3316,10 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
"ByVal argument is not a pointer!");
Size = DL.getTypeAllocSize(A->getType()->getPointerElementType());
if (ArgOffset + Size > kParamTLSSize) break;
- unsigned ParamAlignment = CS.getParamAlignment(i);
- unsigned Alignment = std::min(ParamAlignment, kShadowTLSAlignment);
+ const MaybeAlign ParamAlignment(CS.getParamAlignment(i));
+ MaybeAlign Alignment = llvm::None;
+ if (ParamAlignment)
+ Alignment = std::min(*ParamAlignment, kShadowTLSAlignment);
Value *AShadowPtr =
getShadowOriginPtr(A, IRB, IRB.getInt8Ty(), Alignment,
/*isStore*/ false)
@@ -3326,7 +3332,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
Size = DL.getTypeAllocSize(A->getType());
if (ArgOffset + Size > kParamTLSSize) break;
Store = IRB.CreateAlignedStore(ArgShadow, ArgShadowBase,
- kShadowTLSAlignment);
+ kShadowTLSAlignment.value());
Constant *Cst = dyn_cast<Constant>(ArgShadow);
if (Cst && Cst->isNullValue()) ArgIsInitialized = true;
}
@@ -3352,7 +3358,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
IRBuilder<> IRBBefore(&I);
// Until we have full dynamic coverage, make sure the retval shadow is 0.
Value *Base = getShadowPtrForRetval(&I, IRBBefore);
- IRBBefore.CreateAlignedStore(getCleanShadow(&I), Base, kShadowTLSAlignment);
+ IRBBefore.CreateAlignedStore(getCleanShadow(&I), Base,
+ kShadowTLSAlignment.value());
BasicBlock::iterator NextInsn;
if (CS.isCall()) {
NextInsn = ++I.getIterator();
@@ -3376,7 +3383,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
IRBuilder<> IRBAfter(&*NextInsn);
Value *RetvalShadow = IRBAfter.CreateAlignedLoad(
getShadowTy(&I), getShadowPtrForRetval(&I, IRBAfter),
- kShadowTLSAlignment, "_msret");
+ kShadowTLSAlignment.value(), "_msret");
setShadow(&I, RetvalShadow);
if (MS.TrackOrigins)
setOrigin(&I, IRBAfter.CreateLoad(MS.OriginTy,
@@ -3403,10 +3410,10 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
if (CheckReturnValue) {
insertShadowCheck(RetVal, &I);
Value *Shadow = getCleanShadow(RetVal);
- IRB.CreateAlignedStore(Shadow, ShadowPtr, kShadowTLSAlignment);
+ IRB.CreateAlignedStore(Shadow, ShadowPtr, kShadowTLSAlignment.value());
} else {
Value *Shadow = getShadow(RetVal);
- IRB.CreateAlignedStore(Shadow, ShadowPtr, kShadowTLSAlignment);
+ IRB.CreateAlignedStore(Shadow, ShadowPtr, kShadowTLSAlignment.value());
if (MS.TrackOrigins)
IRB.CreateStore(getOrigin(RetVal), getOriginPtrForRetval(IRB));
}
@@ -3447,11 +3454,12 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
{IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()), Len});
} else {
Value *ShadowBase, *OriginBase;
- std::tie(ShadowBase, OriginBase) =
- getShadowOriginPtr(&I, IRB, IRB.getInt8Ty(), 1, /*isStore*/ true);
+ std::tie(ShadowBase, OriginBase) = getShadowOriginPtr(
+ &I, IRB, IRB.getInt8Ty(), Align::None(), /*isStore*/ true);
Value *PoisonValue = IRB.getInt8(PoisonStack ? ClPoisonStackPattern : 0);
- IRB.CreateMemSet(ShadowBase, PoisonValue, Len, I.getAlignment());
+ IRB.CreateMemSet(ShadowBase, PoisonValue, Len,
+ MaybeAlign(I.getAlignment()));
}
if (PoisonStack && MS.TrackOrigins) {
@@ -3863,7 +3871,7 @@ struct VarArgAMD64Helper : public VarArgHelper {
if (!ShadowBase)
continue;
Value *Shadow = MSV.getShadow(A);
- IRB.CreateAlignedStore(Shadow, ShadowBase, kShadowTLSAlignment);
+ IRB.CreateAlignedStore(Shadow, ShadowBase, kShadowTLSAlignment.value());
if (MS.TrackOrigins) {
Value *Origin = MSV.getOrigin(A);
unsigned StoreSize = DL.getTypeStoreSize(Shadow->getType());
@@ -3904,7 +3912,7 @@ struct VarArgAMD64Helper : public VarArgHelper {
IRBuilder<> IRB(&I);
Value *VAListTag = I.getArgOperand(0);
Value *ShadowPtr, *OriginPtr;
- unsigned Alignment = 8;
+ const Align Alignment = Align(8);
std::tie(ShadowPtr, OriginPtr) =
MSV.getShadowOriginPtr(VAListTag, IRB, IRB.getInt8Ty(), Alignment,
/*isStore*/ true);
@@ -3942,10 +3950,11 @@ struct VarArgAMD64Helper : public VarArgHelper {
IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, AMD64FpEndOffset),
VAArgOverflowSize);
VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
- IRB.CreateMemCpy(VAArgTLSCopy, 8, MS.VAArgTLS, 8, CopySize);
+ IRB.CreateMemCpy(VAArgTLSCopy, Align(8), MS.VAArgTLS, Align(8), CopySize);
if (MS.TrackOrigins) {
VAArgTLSOriginCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
- IRB.CreateMemCpy(VAArgTLSOriginCopy, 8, MS.VAArgOriginTLS, 8, CopySize);
+ IRB.CreateMemCpy(VAArgTLSOriginCopy, Align(8), MS.VAArgOriginTLS,
+ Align(8), CopySize);
}
}
@@ -3964,7 +3973,7 @@ struct VarArgAMD64Helper : public VarArgHelper {
Value *RegSaveAreaPtr =
IRB.CreateLoad(RegSaveAreaPtrTy, RegSaveAreaPtrPtr);
Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr;
- unsigned Alignment = 16;
+ const Align Alignment = Align(16);
std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) =
MSV.getShadowOriginPtr(RegSaveAreaPtr, IRB, IRB.getInt8Ty(),
Alignment, /*isStore*/ true);
@@ -4032,7 +4041,8 @@ struct VarArgMIPS64Helper : public VarArgHelper {
VAArgOffset = alignTo(VAArgOffset, 8);
if (!Base)
continue;
- IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment);
+ IRB.CreateAlignedStore(MSV.getShadow(A), Base,
+ kShadowTLSAlignment.value());
}
Constant *TotalVAArgSize = ConstantInt::get(IRB.getInt64Ty(), VAArgOffset);
@@ -4058,7 +4068,7 @@ struct VarArgMIPS64Helper : public VarArgHelper {
VAStartInstrumentationList.push_back(&I);
Value *VAListTag = I.getArgOperand(0);
Value *ShadowPtr, *OriginPtr;
- unsigned Alignment = 8;
+ const Align Alignment = Align(8);
std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr(
VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true);
IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
@@ -4070,7 +4080,7 @@ struct VarArgMIPS64Helper : public VarArgHelper {
VAStartInstrumentationList.push_back(&I);
Value *VAListTag = I.getArgOperand(0);
Value *ShadowPtr, *OriginPtr;
- unsigned Alignment = 8;
+ const Align Alignment = Align(8);
std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr(
VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true);
IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
@@ -4089,7 +4099,7 @@ struct VarArgMIPS64Helper : public VarArgHelper {
// If there is a va_start in this function, make a backup copy of
// va_arg_tls somewhere in the function entry block.
VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
- IRB.CreateMemCpy(VAArgTLSCopy, 8, MS.VAArgTLS, 8, CopySize);
+ IRB.CreateMemCpy(VAArgTLSCopy, Align(8), MS.VAArgTLS, Align(8), CopySize);
}
// Instrument va_start.
@@ -4105,7 +4115,7 @@ struct VarArgMIPS64Helper : public VarArgHelper {
Value *RegSaveAreaPtr =
IRB.CreateLoad(RegSaveAreaPtrTy, RegSaveAreaPtrPtr);
Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr;
- unsigned Alignment = 8;
+ const Align Alignment = Align(8);
std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) =
MSV.getShadowOriginPtr(RegSaveAreaPtr, IRB, IRB.getInt8Ty(),
Alignment, /*isStore*/ true);
@@ -4203,7 +4213,8 @@ struct VarArgAArch64Helper : public VarArgHelper {
continue;
if (!Base)
continue;
- IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment);
+ IRB.CreateAlignedStore(MSV.getShadow(A), Base,
+ kShadowTLSAlignment.value());
}
Constant *OverflowSize =
ConstantInt::get(IRB.getInt64Ty(), OverflowOffset - AArch64VAEndOffset);
@@ -4227,7 +4238,7 @@ struct VarArgAArch64Helper : public VarArgHelper {
VAStartInstrumentationList.push_back(&I);
Value *VAListTag = I.getArgOperand(0);
Value *ShadowPtr, *OriginPtr;
- unsigned Alignment = 8;
+ const Align Alignment = Align(8);
std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr(
VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true);
IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
@@ -4239,7 +4250,7 @@ struct VarArgAArch64Helper : public VarArgHelper {
VAStartInstrumentationList.push_back(&I);
Value *VAListTag = I.getArgOperand(0);
Value *ShadowPtr, *OriginPtr;
- unsigned Alignment = 8;
+ const Align Alignment = Align(8);
std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr(
VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true);
IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
@@ -4280,7 +4291,7 @@ struct VarArgAArch64Helper : public VarArgHelper {
IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, AArch64VAEndOffset),
VAArgOverflowSize);
VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
- IRB.CreateMemCpy(VAArgTLSCopy, 8, MS.VAArgTLS, 8, CopySize);
+ IRB.CreateMemCpy(VAArgTLSCopy, Align(8), MS.VAArgTLS, Align(8), CopySize);
}
Value *GrArgSize = ConstantInt::get(MS.IntptrTy, kAArch64GrArgSize);
@@ -4331,14 +4342,15 @@ struct VarArgAArch64Helper : public VarArgHelper {
Value *GrRegSaveAreaShadowPtr =
MSV.getShadowOriginPtr(GrRegSaveAreaPtr, IRB, IRB.getInt8Ty(),
- /*Alignment*/ 8, /*isStore*/ true)
+ Align(8), /*isStore*/ true)
.first;
Value *GrSrcPtr = IRB.CreateInBoundsGEP(IRB.getInt8Ty(), VAArgTLSCopy,
GrRegSaveAreaShadowPtrOff);
Value *GrCopySize = IRB.CreateSub(GrArgSize, GrRegSaveAreaShadowPtrOff);
- IRB.CreateMemCpy(GrRegSaveAreaShadowPtr, 8, GrSrcPtr, 8, GrCopySize);
+ IRB.CreateMemCpy(GrRegSaveAreaShadowPtr, Align(8), GrSrcPtr, Align(8),
+ GrCopySize);
// Again, but for FP/SIMD values.
Value *VrRegSaveAreaShadowPtrOff =
@@ -4346,7 +4358,7 @@ struct VarArgAArch64Helper : public VarArgHelper {
Value *VrRegSaveAreaShadowPtr =
MSV.getShadowOriginPtr(VrRegSaveAreaPtr, IRB, IRB.getInt8Ty(),
- /*Alignment*/ 8, /*isStore*/ true)
+ Align(8), /*isStore*/ true)
.first;
Value *VrSrcPtr = IRB.CreateInBoundsGEP(
@@ -4356,20 +4368,21 @@ struct VarArgAArch64Helper : public VarArgHelper {
VrRegSaveAreaShadowPtrOff);
Value *VrCopySize = IRB.CreateSub(VrArgSize, VrRegSaveAreaShadowPtrOff);
- IRB.CreateMemCpy(VrRegSaveAreaShadowPtr, 8, VrSrcPtr, 8, VrCopySize);
+ IRB.CreateMemCpy(VrRegSaveAreaShadowPtr, Align(8), VrSrcPtr, Align(8),
+ VrCopySize);
// And finally for remaining arguments.
Value *StackSaveAreaShadowPtr =
MSV.getShadowOriginPtr(StackSaveAreaPtr, IRB, IRB.getInt8Ty(),
- /*Alignment*/ 16, /*isStore*/ true)
+ Align(16), /*isStore*/ true)
.first;
Value *StackSrcPtr =
IRB.CreateInBoundsGEP(IRB.getInt8Ty(), VAArgTLSCopy,
IRB.getInt32(AArch64VAEndOffset));
- IRB.CreateMemCpy(StackSaveAreaShadowPtr, 16, StackSrcPtr, 16,
- VAArgOverflowSize);
+ IRB.CreateMemCpy(StackSaveAreaShadowPtr, Align(16), StackSrcPtr,
+ Align(16), VAArgOverflowSize);
}
}
};
@@ -4461,7 +4474,8 @@ struct VarArgPowerPC64Helper : public VarArgHelper {
Base = getShadowPtrForVAArgument(A->getType(), IRB,
VAArgOffset - VAArgBase, ArgSize);
if (Base)
- IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment);
+ IRB.CreateAlignedStore(MSV.getShadow(A), Base,
+ kShadowTLSAlignment.value());
}
VAArgOffset += ArgSize;
VAArgOffset = alignTo(VAArgOffset, 8);
@@ -4494,7 +4508,7 @@ struct VarArgPowerPC64Helper : public VarArgHelper {
VAStartInstrumentationList.push_back(&I);
Value *VAListTag = I.getArgOperand(0);
Value *ShadowPtr, *OriginPtr;
- unsigned Alignment = 8;
+ const Align Alignment = Align(8);
std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr(
VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true);
IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
@@ -4505,7 +4519,7 @@ struct VarArgPowerPC64Helper : public VarArgHelper {
IRBuilder<> IRB(&I);
Value *VAListTag = I.getArgOperand(0);
Value *ShadowPtr, *OriginPtr;
- unsigned Alignment = 8;
+ const Align Alignment = Align(8);
std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr(
VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true);
// Unpoison the whole __va_list_tag.
@@ -4526,7 +4540,7 @@ struct VarArgPowerPC64Helper : public VarArgHelper {
// If there is a va_start in this function, make a backup copy of
// va_arg_tls somewhere in the function entry block.
VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
- IRB.CreateMemCpy(VAArgTLSCopy, 8, MS.VAArgTLS, 8, CopySize);
+ IRB.CreateMemCpy(VAArgTLSCopy, Align(8), MS.VAArgTLS, Align(8), CopySize);
}
// Instrument va_start.
@@ -4542,7 +4556,7 @@ struct VarArgPowerPC64Helper : public VarArgHelper {
Value *RegSaveAreaPtr =
IRB.CreateLoad(RegSaveAreaPtrTy, RegSaveAreaPtrPtr);
Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr;
- unsigned Alignment = 8;
+ const Align Alignment = Align(8);
std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) =
MSV.getShadowOriginPtr(RegSaveAreaPtr, IRB, IRB.getInt8Ty(),
Alignment, /*isStore*/ true);
@@ -4595,7 +4609,10 @@ bool MemorySanitizer::sanitizeFunction(Function &F, TargetLibraryInfo &TLI) {
// Clear out readonly/readnone attributes.
AttrBuilder B;
B.addAttribute(Attribute::ReadOnly)
- .addAttribute(Attribute::ReadNone);
+ .addAttribute(Attribute::ReadNone)
+ .addAttribute(Attribute::WriteOnly)
+ .addAttribute(Attribute::ArgMemOnly)
+ .addAttribute(Attribute::Speculatable);
F.removeAttributes(AttributeList::FunctionIndex, B);
return Visitor.runOnFunction();
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
index ca1bb62389e9..cc96bdd1d516 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -47,6 +47,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
#include "CFGMST.h"
#include "ValueProfileCollector.h"
#include "llvm/ADT/APInt.h"
@@ -92,6 +93,7 @@
#include "llvm/IR/ProfileSummary.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/ProfileData/InstrProfReader.h"
@@ -106,7 +108,6 @@
#include "llvm/Support/GraphWriter.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Instrumentation.h"
-#include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/MisExpect.h"
#include <algorithm>
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp
index 9f81bb16d0a7..d0afe2959b39 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp
@@ -35,6 +35,7 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/Type.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/PassRegistry.h"
#include "llvm/PassSupport.h"
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/PoisonChecking.cpp b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/PoisonChecking.cpp
index 81d92e724c7d..71ecfd9a2642 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/PoisonChecking.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/PoisonChecking.cpp
@@ -65,10 +65,11 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstVisitor.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/PatternMatch.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
index f8fa9cad03b8..e6dc684c2e77 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
@@ -32,6 +32,7 @@
#include "llvm/IR/Mangler.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
index ac274a155a80..9b7edad3444b 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
@@ -26,7 +26,6 @@
#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
@@ -37,6 +36,7 @@
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
+#include "llvm/InitializePasses.h"
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -45,6 +45,7 @@
#include "llvm/Transforms/Instrumentation.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/EscapeEnumerator.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp b/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp
index b341dd807508..7a01ec967fb5 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp
@@ -26,6 +26,7 @@
#include "ObjCARC.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/IR/Constants.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp b/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp
index 36aa513ec554..ecf8220ae95d 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp
@@ -34,6 +34,8 @@
#include "llvm/IR/Dominators.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/Operator.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ObjCARCExpand.cpp b/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ObjCARCExpand.cpp
index 04e98d8f5577..205d8ddf151d 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ObjCARCExpand.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ObjCARCExpand.cpp
@@ -28,6 +28,7 @@
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/PassAnalysisSupport.h"
#include "llvm/PassRegistry.h"
diff --git a/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
index 6653ff0bb91a..b80c1675050b 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
@@ -58,8 +58,10 @@
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -506,6 +508,20 @@ namespace {
ARCInstKind &Class);
void OptimizeIndividualCalls(Function &F);
+ /// Optimize an individual call, optionally passing the
+ /// GetArgRCIdentityRoot if it has already been computed.
+ void OptimizeIndividualCallImpl(
+ Function &F, DenseMap<BasicBlock *, ColorVector> &BlockColors,
+ Instruction *Inst, ARCInstKind Class, const Value *Arg);
+
+ /// Try to optimize an AutoreleaseRV with a RetainRV or ClaimRV. If the
+ /// optimization occurs, returns true to indicate that the caller should
+ /// assume the instructions are dead.
+ bool OptimizeInlinedAutoreleaseRVCall(
+ Function &F, DenseMap<BasicBlock *, ColorVector> &BlockColors,
+ Instruction *Inst, const Value *&Arg, ARCInstKind Class,
+ Instruction *AutoreleaseRV, const Value *&AutoreleaseRVArg);
+
void CheckForCFGHazards(const BasicBlock *BB,
DenseMap<const BasicBlock *, BBState> &BBStates,
BBState &MyStates) const;
@@ -589,8 +605,7 @@ void ObjCARCOpt::getAnalysisUsage(AnalysisUsage &AU) const {
}
/// Turn objc_retainAutoreleasedReturnValue into objc_retain if the operand is
-/// not a return value. Or, if it can be paired with an
-/// objc_autoreleaseReturnValue, delete the pair and return true.
+/// not a return value.
bool
ObjCARCOpt::OptimizeRetainRVCall(Function &F, Instruction *RetainRV) {
// Check for the argument being from an immediately preceding call or invoke.
@@ -616,39 +631,6 @@ ObjCARCOpt::OptimizeRetainRVCall(Function &F, Instruction *RetainRV) {
}
}
- // Track PHIs which are equivalent to our Arg.
- SmallDenseSet<const Value*, 2> EquivalentArgs;
- EquivalentArgs.insert(Arg);
-
- // Add PHIs that are equivalent to Arg to ArgUsers.
- if (const PHINode *PN = dyn_cast<PHINode>(Arg)) {
- SmallVector<const Value *, 2> ArgUsers;
- getEquivalentPHIs(*PN, ArgUsers);
- EquivalentArgs.insert(ArgUsers.begin(), ArgUsers.end());
- }
-
- // Check for being preceded by an objc_autoreleaseReturnValue on the same
- // pointer. In this case, we can delete the pair.
- BasicBlock::iterator I = RetainRV->getIterator(),
- Begin = RetainRV->getParent()->begin();
- if (I != Begin) {
- do
- --I;
- while (I != Begin && IsNoopInstruction(&*I));
- if (GetBasicARCInstKind(&*I) == ARCInstKind::AutoreleaseRV &&
- EquivalentArgs.count(GetArgRCIdentityRoot(&*I))) {
- Changed = true;
- ++NumPeeps;
-
- LLVM_DEBUG(dbgs() << "Erasing autoreleaseRV,retainRV pair: " << *I << "\n"
- << "Erasing " << *RetainRV << "\n");
-
- EraseInstruction(&*I);
- EraseInstruction(RetainRV);
- return true;
- }
- }
-
// Turn it to a plain objc_retain.
Changed = true;
++NumPeeps;
@@ -666,6 +648,62 @@ ObjCARCOpt::OptimizeRetainRVCall(Function &F, Instruction *RetainRV) {
return false;
}
+bool ObjCARCOpt::OptimizeInlinedAutoreleaseRVCall(
+ Function &F, DenseMap<BasicBlock *, ColorVector> &BlockColors,
+ Instruction *Inst, const Value *&Arg, ARCInstKind Class,
+ Instruction *AutoreleaseRV, const Value *&AutoreleaseRVArg) {
+ // Must be in the same basic block.
+ assert(Inst->getParent() == AutoreleaseRV->getParent());
+
+ // Must operate on the same root.
+ Arg = GetArgRCIdentityRoot(Inst);
+ AutoreleaseRVArg = GetArgRCIdentityRoot(AutoreleaseRV);
+ if (Arg != AutoreleaseRVArg) {
+ // If there isn't an exact match, check if we have equivalent PHIs.
+ const PHINode *PN = dyn_cast<PHINode>(Arg);
+ if (!PN)
+ return false;
+
+ SmallVector<const Value *, 4> ArgUsers;
+ getEquivalentPHIs(*PN, ArgUsers);
+ if (llvm::find(ArgUsers, AutoreleaseRVArg) == ArgUsers.end())
+ return false;
+ }
+
+ // Okay, this is a match. Merge them.
+ ++NumPeeps;
+ LLVM_DEBUG(dbgs() << "Found inlined objc_autoreleaseReturnValue '"
+ << *AutoreleaseRV << "' paired with '" << *Inst << "'\n");
+
+ // Delete the RV pair, starting with the AutoreleaseRV.
+ AutoreleaseRV->replaceAllUsesWith(
+ cast<CallInst>(AutoreleaseRV)->getArgOperand(0));
+ EraseInstruction(AutoreleaseRV);
+ if (Class == ARCInstKind::RetainRV) {
+ // AutoreleaseRV and RetainRV cancel out. Delete the RetainRV.
+ Inst->replaceAllUsesWith(cast<CallInst>(Inst)->getArgOperand(0));
+ EraseInstruction(Inst);
+ return true;
+ }
+
+ // ClaimRV is a frontend peephole for RetainRV + Release. Since the
+ // AutoreleaseRV and RetainRV cancel out, replace the ClaimRV with a Release.
+ assert(Class == ARCInstKind::ClaimRV);
+ Value *CallArg = cast<CallInst>(Inst)->getArgOperand(0);
+ CallInst *Release = CallInst::Create(
+ EP.get(ARCRuntimeEntryPointKind::Release), CallArg, "", Inst);
+ assert(IsAlwaysTail(ARCInstKind::ClaimRV) &&
+ "Expected ClaimRV to be safe to tail call");
+ Release->setTailCall();
+ Inst->replaceAllUsesWith(CallArg);
+ EraseInstruction(Inst);
+
+ // Run the normal optimizations on Release.
+ OptimizeIndividualCallImpl(F, BlockColors, Release, ARCInstKind::Release,
+ Arg);
+ return true;
+}
+
/// Turn objc_autoreleaseReturnValue into objc_autorelease if the result is not
/// used as a return value.
void ObjCARCOpt::OptimizeAutoreleaseRVCall(Function &F,
@@ -752,286 +790,370 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
isScopedEHPersonality(classifyEHPersonality(F.getPersonalityFn())))
BlockColors = colorEHFunclets(F);
+ // Store any delayed AutoreleaseRV intrinsics, so they can be easily paired
+ // with RetainRV and ClaimRV.
+ Instruction *DelayedAutoreleaseRV = nullptr;
+ const Value *DelayedAutoreleaseRVArg = nullptr;
+ auto setDelayedAutoreleaseRV = [&](Instruction *AutoreleaseRV) {
+ assert(!DelayedAutoreleaseRV || !AutoreleaseRV);
+ DelayedAutoreleaseRV = AutoreleaseRV;
+ DelayedAutoreleaseRVArg = nullptr;
+ };
+ auto optimizeDelayedAutoreleaseRV = [&]() {
+ if (!DelayedAutoreleaseRV)
+ return;
+ OptimizeIndividualCallImpl(F, BlockColors, DelayedAutoreleaseRV,
+ ARCInstKind::AutoreleaseRV,
+ DelayedAutoreleaseRVArg);
+ setDelayedAutoreleaseRV(nullptr);
+ };
+ auto shouldDelayAutoreleaseRV = [&](Instruction *NonARCInst) {
+ // Nothing to delay, but we may as well skip the logic below.
+ if (!DelayedAutoreleaseRV)
+ return true;
+
+ // If we hit the end of the basic block we're not going to find an RV-pair.
+ // Stop delaying.
+ if (NonARCInst->isTerminator())
+ return false;
+
+ // Given the frontend rules for emitting AutoreleaseRV, RetainRV, and
+ // ClaimRV, it's probably safe to skip over even opaque function calls
+ // here since OptimizeInlinedAutoreleaseRVCall will confirm that they
+ // have the same RCIdentityRoot. However, what really matters is
+ // skipping instructions or intrinsics that the inliner could leave behind;
+ // be conservative for now and don't skip over opaque calls, which could
+ // potentially include other ARC calls.
+ auto *CB = dyn_cast<CallBase>(NonARCInst);
+ if (!CB)
+ return true;
+ return CB->getIntrinsicID() != Intrinsic::not_intrinsic;
+ };
+
// Visit all objc_* calls in F.
for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) {
Instruction *Inst = &*I++;
ARCInstKind Class = GetBasicARCInstKind(Inst);
- LLVM_DEBUG(dbgs() << "Visiting: Class: " << Class << "; " << *Inst << "\n");
-
- // Some of the ARC calls can be deleted if their arguments are global
- // variables that are inert in ARC.
- if (IsNoopOnGlobal(Class)) {
- Value *Opnd = Inst->getOperand(0);
- if (auto *GV = dyn_cast<GlobalVariable>(Opnd->stripPointerCasts()))
- if (GV->hasAttribute("objc_arc_inert")) {
- if (!Inst->getType()->isVoidTy())
- Inst->replaceAllUsesWith(Opnd);
- Inst->eraseFromParent();
- continue;
- }
- }
-
+ // Skip this loop if this instruction isn't itself an ARC intrinsic.
+ const Value *Arg = nullptr;
switch (Class) {
- default: break;
-
- // Delete no-op casts. These function calls have special semantics, but
- // the semantics are entirely implemented via lowering in the front-end,
- // so by the time they reach the optimizer, they are just no-op calls
- // which return their argument.
- //
- // There are gray areas here, as the ability to cast reference-counted
- // pointers to raw void* and back allows code to break ARC assumptions,
- // however these are currently considered to be unimportant.
- case ARCInstKind::NoopCast:
- Changed = true;
- ++NumNoops;
- LLVM_DEBUG(dbgs() << "Erasing no-op cast: " << *Inst << "\n");
- EraseInstruction(Inst);
- continue;
-
- // If the pointer-to-weak-pointer is null, it's undefined behavior.
- case ARCInstKind::StoreWeak:
- case ARCInstKind::LoadWeak:
- case ARCInstKind::LoadWeakRetained:
- case ARCInstKind::InitWeak:
- case ARCInstKind::DestroyWeak: {
- CallInst *CI = cast<CallInst>(Inst);
- if (IsNullOrUndef(CI->getArgOperand(0))) {
- Changed = true;
- Type *Ty = CI->getArgOperand(0)->getType();
- new StoreInst(UndefValue::get(cast<PointerType>(Ty)->getElementType()),
- Constant::getNullValue(Ty),
- CI);
- Value *NewValue = UndefValue::get(CI->getType());
- LLVM_DEBUG(
- dbgs() << "A null pointer-to-weak-pointer is undefined behavior."
- "\nOld = "
- << *CI << "\nNew = " << *NewValue << "\n");
- CI->replaceAllUsesWith(NewValue);
- CI->eraseFromParent();
- continue;
- }
- break;
- }
- case ARCInstKind::CopyWeak:
- case ARCInstKind::MoveWeak: {
- CallInst *CI = cast<CallInst>(Inst);
- if (IsNullOrUndef(CI->getArgOperand(0)) ||
- IsNullOrUndef(CI->getArgOperand(1))) {
- Changed = true;
- Type *Ty = CI->getArgOperand(0)->getType();
- new StoreInst(UndefValue::get(cast<PointerType>(Ty)->getElementType()),
- Constant::getNullValue(Ty),
- CI);
-
- Value *NewValue = UndefValue::get(CI->getType());
- LLVM_DEBUG(
- dbgs() << "A null pointer-to-weak-pointer is undefined behavior."
- "\nOld = "
- << *CI << "\nNew = " << *NewValue << "\n");
-
- CI->replaceAllUsesWith(NewValue);
- CI->eraseFromParent();
- continue;
- }
- break;
- }
- case ARCInstKind::RetainRV:
- if (OptimizeRetainRVCall(F, Inst))
- continue;
+ default:
+ optimizeDelayedAutoreleaseRV();
break;
+ case ARCInstKind::CallOrUser:
+ case ARCInstKind::User:
+ case ARCInstKind::None:
+ // This is a non-ARC instruction. If we're delaying an AutoreleaseRV,
+ // check if it's safe to skip over it; if not, optimize the AutoreleaseRV
+ // now.
+ if (!shouldDelayAutoreleaseRV(Inst))
+ optimizeDelayedAutoreleaseRV();
+ continue;
case ARCInstKind::AutoreleaseRV:
- OptimizeAutoreleaseRVCall(F, Inst, Class);
+ optimizeDelayedAutoreleaseRV();
+ setDelayedAutoreleaseRV(Inst);
+ continue;
+ case ARCInstKind::RetainRV:
+ case ARCInstKind::ClaimRV:
+ if (DelayedAutoreleaseRV) {
+ // We have a potential RV pair. Check if they cancel out.
+ if (OptimizeInlinedAutoreleaseRVCall(F, BlockColors, Inst, Arg, Class,
+ DelayedAutoreleaseRV,
+ DelayedAutoreleaseRVArg)) {
+ setDelayedAutoreleaseRV(nullptr);
+ continue;
+ }
+ optimizeDelayedAutoreleaseRV();
+ }
break;
}
- // objc_autorelease(x) -> objc_release(x) if x is otherwise unused.
- if (IsAutorelease(Class) && Inst->use_empty()) {
- CallInst *Call = cast<CallInst>(Inst);
- const Value *Arg = Call->getArgOperand(0);
- Arg = FindSingleUseIdentifiedObject(Arg);
- if (Arg) {
- Changed = true;
- ++NumAutoreleases;
-
- // Create the declaration lazily.
- LLVMContext &C = Inst->getContext();
-
- Function *Decl = EP.get(ARCRuntimeEntryPointKind::Release);
- CallInst *NewCall = CallInst::Create(Decl, Call->getArgOperand(0), "",
- Call);
- NewCall->setMetadata(MDKindCache.get(ARCMDKindID::ImpreciseRelease),
- MDNode::get(C, None));
-
- LLVM_DEBUG(
- dbgs() << "Replacing autorelease{,RV}(x) with objc_release(x) "
- "since x is otherwise unused.\nOld: "
- << *Call << "\nNew: " << *NewCall << "\n");
-
- EraseInstruction(Call);
- Inst = NewCall;
- Class = ARCInstKind::Release;
+ OptimizeIndividualCallImpl(F, BlockColors, Inst, Class, Arg);
+ }
+
+ // Catch the final delayed AutoreleaseRV.
+ optimizeDelayedAutoreleaseRV();
+}
+
+void ObjCARCOpt::OptimizeIndividualCallImpl(
+ Function &F, DenseMap<BasicBlock *, ColorVector> &BlockColors,
+ Instruction *Inst, ARCInstKind Class, const Value *Arg) {
+ LLVM_DEBUG(dbgs() << "Visiting: Class: " << Class << "; " << *Inst << "\n");
+
+ // Some of the ARC calls can be deleted if their arguments are global
+ // variables that are inert in ARC.
+ if (IsNoopOnGlobal(Class)) {
+ Value *Opnd = Inst->getOperand(0);
+ if (auto *GV = dyn_cast<GlobalVariable>(Opnd->stripPointerCasts()))
+ if (GV->hasAttribute("objc_arc_inert")) {
+ if (!Inst->getType()->isVoidTy())
+ Inst->replaceAllUsesWith(Opnd);
+ Inst->eraseFromParent();
+ return;
}
- }
+ }
- // For functions which can never be passed stack arguments, add
- // a tail keyword.
- if (IsAlwaysTail(Class) && !cast<CallInst>(Inst)->isNoTailCall()) {
+ switch (Class) {
+ default:
+ break;
+
+ // Delete no-op casts. These function calls have special semantics, but
+ // the semantics are entirely implemented via lowering in the front-end,
+ // so by the time they reach the optimizer, they are just no-op calls
+ // which return their argument.
+ //
+ // There are gray areas here, as the ability to cast reference-counted
+ // pointers to raw void* and back allows code to break ARC assumptions,
+ // however these are currently considered to be unimportant.
+ case ARCInstKind::NoopCast:
+ Changed = true;
+ ++NumNoops;
+ LLVM_DEBUG(dbgs() << "Erasing no-op cast: " << *Inst << "\n");
+ EraseInstruction(Inst);
+ return;
+
+ // If the pointer-to-weak-pointer is null, it's undefined behavior.
+ case ARCInstKind::StoreWeak:
+ case ARCInstKind::LoadWeak:
+ case ARCInstKind::LoadWeakRetained:
+ case ARCInstKind::InitWeak:
+ case ARCInstKind::DestroyWeak: {
+ CallInst *CI = cast<CallInst>(Inst);
+ if (IsNullOrUndef(CI->getArgOperand(0))) {
Changed = true;
+ Type *Ty = CI->getArgOperand(0)->getType();
+ new StoreInst(UndefValue::get(cast<PointerType>(Ty)->getElementType()),
+ Constant::getNullValue(Ty), CI);
+ Value *NewValue = UndefValue::get(CI->getType());
LLVM_DEBUG(
- dbgs() << "Adding tail keyword to function since it can never be "
- "passed stack args: "
- << *Inst << "\n");
- cast<CallInst>(Inst)->setTailCall();
+ dbgs() << "A null pointer-to-weak-pointer is undefined behavior."
+ "\nOld = "
+ << *CI << "\nNew = " << *NewValue << "\n");
+ CI->replaceAllUsesWith(NewValue);
+ CI->eraseFromParent();
+ return;
}
-
- // Ensure that functions that can never have a "tail" keyword due to the
- // semantics of ARC truly do not do so.
- if (IsNeverTail(Class)) {
+ break;
+ }
+ case ARCInstKind::CopyWeak:
+ case ARCInstKind::MoveWeak: {
+ CallInst *CI = cast<CallInst>(Inst);
+ if (IsNullOrUndef(CI->getArgOperand(0)) ||
+ IsNullOrUndef(CI->getArgOperand(1))) {
Changed = true;
- LLVM_DEBUG(dbgs() << "Removing tail keyword from function: " << *Inst
- << "\n");
- cast<CallInst>(Inst)->setTailCall(false);
+ Type *Ty = CI->getArgOperand(0)->getType();
+ new StoreInst(UndefValue::get(cast<PointerType>(Ty)->getElementType()),
+ Constant::getNullValue(Ty), CI);
+
+ Value *NewValue = UndefValue::get(CI->getType());
+ LLVM_DEBUG(
+ dbgs() << "A null pointer-to-weak-pointer is undefined behavior."
+ "\nOld = "
+ << *CI << "\nNew = " << *NewValue << "\n");
+
+ CI->replaceAllUsesWith(NewValue);
+ CI->eraseFromParent();
+ return;
}
+ break;
+ }
+ case ARCInstKind::RetainRV:
+ if (OptimizeRetainRVCall(F, Inst))
+ return;
+ break;
+ case ARCInstKind::AutoreleaseRV:
+ OptimizeAutoreleaseRVCall(F, Inst, Class);
+ break;
+ }
- // Set nounwind as needed.
- if (IsNoThrow(Class)) {
+ // objc_autorelease(x) -> objc_release(x) if x is otherwise unused.
+ if (IsAutorelease(Class) && Inst->use_empty()) {
+ CallInst *Call = cast<CallInst>(Inst);
+ const Value *Arg = Call->getArgOperand(0);
+ Arg = FindSingleUseIdentifiedObject(Arg);
+ if (Arg) {
Changed = true;
- LLVM_DEBUG(dbgs() << "Found no throw class. Setting nounwind on: "
- << *Inst << "\n");
- cast<CallInst>(Inst)->setDoesNotThrow();
- }
+ ++NumAutoreleases;
- if (!IsNoopOnNull(Class)) {
- UsedInThisFunction |= 1 << unsigned(Class);
- continue;
- }
+ // Create the declaration lazily.
+ LLVMContext &C = Inst->getContext();
- const Value *Arg = GetArgRCIdentityRoot(Inst);
+ Function *Decl = EP.get(ARCRuntimeEntryPointKind::Release);
+ CallInst *NewCall =
+ CallInst::Create(Decl, Call->getArgOperand(0), "", Call);
+ NewCall->setMetadata(MDKindCache.get(ARCMDKindID::ImpreciseRelease),
+ MDNode::get(C, None));
- // ARC calls with null are no-ops. Delete them.
- if (IsNullOrUndef(Arg)) {
- Changed = true;
- ++NumNoops;
- LLVM_DEBUG(dbgs() << "ARC calls with null are no-ops. Erasing: " << *Inst
- << "\n");
- EraseInstruction(Inst);
- continue;
+ LLVM_DEBUG(dbgs() << "Replacing autorelease{,RV}(x) with objc_release(x) "
+ "since x is otherwise unused.\nOld: "
+ << *Call << "\nNew: " << *NewCall << "\n");
+
+ EraseInstruction(Call);
+ Inst = NewCall;
+ Class = ARCInstKind::Release;
}
+ }
+
+ // For functions which can never be passed stack arguments, add
+ // a tail keyword.
+ if (IsAlwaysTail(Class) && !cast<CallInst>(Inst)->isNoTailCall()) {
+ Changed = true;
+ LLVM_DEBUG(
+ dbgs() << "Adding tail keyword to function since it can never be "
+ "passed stack args: "
+ << *Inst << "\n");
+ cast<CallInst>(Inst)->setTailCall();
+ }
+
+ // Ensure that functions that can never have a "tail" keyword due to the
+ // semantics of ARC truly do not do so.
+ if (IsNeverTail(Class)) {
+ Changed = true;
+ LLVM_DEBUG(dbgs() << "Removing tail keyword from function: " << *Inst
+ << "\n");
+ cast<CallInst>(Inst)->setTailCall(false);
+ }
+
+ // Set nounwind as needed.
+ if (IsNoThrow(Class)) {
+ Changed = true;
+ LLVM_DEBUG(dbgs() << "Found no throw class. Setting nounwind on: " << *Inst
+ << "\n");
+ cast<CallInst>(Inst)->setDoesNotThrow();
+ }
- // Keep track of which of retain, release, autorelease, and retain_block
- // are actually present in this function.
+ // Note: This catches instructions unrelated to ARC.
+ if (!IsNoopOnNull(Class)) {
UsedInThisFunction |= 1 << unsigned(Class);
+ return;
+ }
+
+ // If we haven't already looked up the root, look it up now.
+ if (!Arg)
+ Arg = GetArgRCIdentityRoot(Inst);
+
+ // ARC calls with null are no-ops. Delete them.
+ if (IsNullOrUndef(Arg)) {
+ Changed = true;
+ ++NumNoops;
+ LLVM_DEBUG(dbgs() << "ARC calls with null are no-ops. Erasing: " << *Inst
+ << "\n");
+ EraseInstruction(Inst);
+ return;
+ }
+
+ // Keep track of which of retain, release, autorelease, and retain_block
+ // are actually present in this function.
+ UsedInThisFunction |= 1 << unsigned(Class);
+
+ // If Arg is a PHI, and one or more incoming values to the
+ // PHI are null, and the call is control-equivalent to the PHI, and there
+ // are no relevant side effects between the PHI and the call, and the call
+ // is not a release that doesn't have the clang.imprecise_release tag, the
+ // call could be pushed up to just those paths with non-null incoming
+ // values. For now, don't bother splitting critical edges for this.
+ if (Class == ARCInstKind::Release &&
+ !Inst->getMetadata(MDKindCache.get(ARCMDKindID::ImpreciseRelease)))
+ return;
+
+ SmallVector<std::pair<Instruction *, const Value *>, 4> Worklist;
+ Worklist.push_back(std::make_pair(Inst, Arg));
+ do {
+ std::pair<Instruction *, const Value *> Pair = Worklist.pop_back_val();
+ Inst = Pair.first;
+ Arg = Pair.second;
- // If Arg is a PHI, and one or more incoming values to the
- // PHI are null, and the call is control-equivalent to the PHI, and there
- // are no relevant side effects between the PHI and the call, and the call
- // is not a release that doesn't have the clang.imprecise_release tag, the
- // call could be pushed up to just those paths with non-null incoming
- // values. For now, don't bother splitting critical edges for this.
- if (Class == ARCInstKind::Release &&
- !Inst->getMetadata(MDKindCache.get(ARCMDKindID::ImpreciseRelease)))
+ const PHINode *PN = dyn_cast<PHINode>(Arg);
+ if (!PN)
continue;
- SmallVector<std::pair<Instruction *, const Value *>, 4> Worklist;
- Worklist.push_back(std::make_pair(Inst, Arg));
- do {
- std::pair<Instruction *, const Value *> Pair = Worklist.pop_back_val();
- Inst = Pair.first;
- Arg = Pair.second;
-
- const PHINode *PN = dyn_cast<PHINode>(Arg);
- if (!PN) continue;
-
- // Determine if the PHI has any null operands, or any incoming
- // critical edges.
- bool HasNull = false;
- bool HasCriticalEdges = false;
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
- Value *Incoming =
- GetRCIdentityRoot(PN->getIncomingValue(i));
- if (IsNullOrUndef(Incoming))
- HasNull = true;
- else if (PN->getIncomingBlock(i)->getTerminator()->getNumSuccessors() !=
- 1) {
- HasCriticalEdges = true;
- break;
- }
+ // Determine if the PHI has any null operands, or any incoming
+ // critical edges.
+ bool HasNull = false;
+ bool HasCriticalEdges = false;
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ Value *Incoming = GetRCIdentityRoot(PN->getIncomingValue(i));
+ if (IsNullOrUndef(Incoming))
+ HasNull = true;
+ else if (PN->getIncomingBlock(i)->getTerminator()->getNumSuccessors() !=
+ 1) {
+ HasCriticalEdges = true;
+ break;
}
- // If we have null operands and no critical edges, optimize.
- if (!HasCriticalEdges && HasNull) {
- SmallPtrSet<Instruction *, 4> DependingInstructions;
- SmallPtrSet<const BasicBlock *, 4> Visited;
-
- // Check that there is nothing that cares about the reference
- // count between the call and the phi.
- switch (Class) {
- case ARCInstKind::Retain:
- case ARCInstKind::RetainBlock:
- // These can always be moved up.
- break;
- case ARCInstKind::Release:
- // These can't be moved across things that care about the retain
- // count.
- FindDependencies(NeedsPositiveRetainCount, Arg,
- Inst->getParent(), Inst,
- DependingInstructions, Visited, PA);
- break;
- case ARCInstKind::Autorelease:
- // These can't be moved across autorelease pool scope boundaries.
- FindDependencies(AutoreleasePoolBoundary, Arg,
- Inst->getParent(), Inst,
- DependingInstructions, Visited, PA);
- break;
- case ARCInstKind::ClaimRV:
- case ARCInstKind::RetainRV:
- case ARCInstKind::AutoreleaseRV:
- // Don't move these; the RV optimization depends on the autoreleaseRV
- // being tail called, and the retainRV being immediately after a call
- // (which might still happen if we get lucky with codegen layout, but
- // it's not worth taking the chance).
- continue;
- default:
- llvm_unreachable("Invalid dependence flavor");
- }
+ }
+ // If we have null operands and no critical edges, optimize.
+ if (HasCriticalEdges)
+ continue;
+ if (!HasNull)
+ continue;
- if (DependingInstructions.size() == 1 &&
- *DependingInstructions.begin() == PN) {
- Changed = true;
- ++NumPartialNoops;
- // Clone the call into each predecessor that has a non-null value.
- CallInst *CInst = cast<CallInst>(Inst);
- Type *ParamTy = CInst->getArgOperand(0)->getType();
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
- Value *Incoming =
- GetRCIdentityRoot(PN->getIncomingValue(i));
- if (!IsNullOrUndef(Incoming)) {
- Value *Op = PN->getIncomingValue(i);
- Instruction *InsertPos = &PN->getIncomingBlock(i)->back();
- CallInst *Clone = cast<CallInst>(CloneCallInstForBB(
- *CInst, *InsertPos->getParent(), BlockColors));
- if (Op->getType() != ParamTy)
- Op = new BitCastInst(Op, ParamTy, "", InsertPos);
- Clone->setArgOperand(0, Op);
- Clone->insertBefore(InsertPos);
-
- LLVM_DEBUG(dbgs() << "Cloning " << *CInst
- << "\n"
- "And inserting clone at "
- << *InsertPos << "\n");
- Worklist.push_back(std::make_pair(Clone, Incoming));
- }
- }
- // Erase the original call.
- LLVM_DEBUG(dbgs() << "Erasing: " << *CInst << "\n");
- EraseInstruction(CInst);
- continue;
- }
- }
- } while (!Worklist.empty());
- }
+ SmallPtrSet<Instruction *, 4> DependingInstructions;
+ SmallPtrSet<const BasicBlock *, 4> Visited;
+
+ // Check that there is nothing that cares about the reference
+ // count between the call and the phi.
+ switch (Class) {
+ case ARCInstKind::Retain:
+ case ARCInstKind::RetainBlock:
+ // These can always be moved up.
+ break;
+ case ARCInstKind::Release:
+ // These can't be moved across things that care about the retain
+ // count.
+ FindDependencies(NeedsPositiveRetainCount, Arg, Inst->getParent(), Inst,
+ DependingInstructions, Visited, PA);
+ break;
+ case ARCInstKind::Autorelease:
+ // These can't be moved across autorelease pool scope boundaries.
+ FindDependencies(AutoreleasePoolBoundary, Arg, Inst->getParent(), Inst,
+ DependingInstructions, Visited, PA);
+ break;
+ case ARCInstKind::ClaimRV:
+ case ARCInstKind::RetainRV:
+ case ARCInstKind::AutoreleaseRV:
+ // Don't move these; the RV optimization depends on the autoreleaseRV
+ // being tail called, and the retainRV being immediately after a call
+ // (which might still happen if we get lucky with codegen layout, but
+ // it's not worth taking the chance).
+ continue;
+ default:
+ llvm_unreachable("Invalid dependence flavor");
+ }
+
+ if (DependingInstructions.size() != 1)
+ continue;
+ if (*DependingInstructions.begin() != PN)
+ continue;
+
+ Changed = true;
+ ++NumPartialNoops;
+ // Clone the call into each predecessor that has a non-null value.
+ CallInst *CInst = cast<CallInst>(Inst);
+ Type *ParamTy = CInst->getArgOperand(0)->getType();
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ Value *Incoming = GetRCIdentityRoot(PN->getIncomingValue(i));
+ if (IsNullOrUndef(Incoming))
+ continue;
+ Value *Op = PN->getIncomingValue(i);
+ Instruction *InsertPos = &PN->getIncomingBlock(i)->back();
+ CallInst *Clone = cast<CallInst>(
+ CloneCallInstForBB(*CInst, *InsertPos->getParent(), BlockColors));
+ if (Op->getType() != ParamTy)
+ Op = new BitCastInst(Op, ParamTy, "", InsertPos);
+ Clone->setArgOperand(0, Op);
+ Clone->insertBefore(InsertPos);
+
+ LLVM_DEBUG(dbgs() << "Cloning " << *CInst << "\n"
+ "And inserting clone at "
+ << *InsertPos << "\n");
+ Worklist.push_back(std::make_pair(Clone, Incoming));
+ }
+ // Erase the original call.
+ LLVM_DEBUG(dbgs() << "Erasing: " << *CInst << "\n");
+ EraseInstruction(CInst);
+ } while (!Worklist.empty());
}
/// If we have a top down pointer in the S_Use state, make sure that there are
diff --git a/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp b/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp
index b768f7973b87..99a2055aba94 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp
@@ -13,6 +13,7 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/ADCE.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/ADCE.cpp
index 7f7460c5746a..cc3d3bf7cdbf 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/ADCE.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/ADCE.cpp
@@ -42,6 +42,7 @@
#include "llvm/IR/PassManager.h"
#include "llvm/IR/Use.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/Support/Casting.h"
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
index 0e9f03a06061..06deaf3c4f9a 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
@@ -15,6 +15,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/InitializePasses.h"
#define AA_NAME "alignment-from-assumptions"
#define DEBUG_TYPE AA_NAME
#include "llvm/Transforms/Scalar/AlignmentFromAssumptions.h"
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/BDCE.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/BDCE.cpp
index 9bd387c33e80..0fa38fa80b17 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/BDCE.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/BDCE.cpp
@@ -19,13 +19,14 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/DemandedBits.h"
#include "llvm/Analysis/GlobalsModRef.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
#define DEBUG_TYPE "bdce"
@@ -101,7 +102,7 @@ static bool bitTrackingDCE(Function &F, DemandedBits &DB) {
(I.getType()->isIntOrIntVectorTy() &&
DB.getDemandedBits(&I).isNullValue() &&
wouldInstructionBeTriviallyDead(&I))) {
- salvageDebugInfo(I);
+ salvageDebugInfoOrMarkUndef(I);
Worklist.push_back(&I);
I.dropAllReferences();
Changed = true;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp
index c3fba923104f..e34c011b1c87 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp
@@ -59,13 +59,15 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/PatternMatch.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
using namespace PatternMatch;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
index 9f340afbf7c2..5bfece010bec 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
@@ -14,7 +14,7 @@
// cost. If the constant can be folded into the instruction (the cost is
// TCC_Free) or the cost is just a simple operation (TCC_BASIC), then we don't
// consider it expensive and leave it alone. This is the default behavior and
-// the default implementation of getIntImmCost will always return TCC_Free.
+// the default implementation of getIntImmCostInst will always return TCC_Free.
//
// If the cost is more than TCC_BASIC, then the integer constant can't be folded
// into the instruction and it might be beneficial to hoist the constant.
@@ -43,7 +43,6 @@
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfoMetadata.h"
@@ -54,6 +53,7 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/BlockFrequency.h"
#include "llvm/Support/Casting.h"
@@ -61,6 +61,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SizeOpts.h"
#include <algorithm>
#include <cassert>
@@ -361,11 +362,11 @@ void ConstantHoistingPass::collectConstantCandidates(
// Ask the target about the cost of materializing the constant for the given
// instruction and operand index.
if (auto IntrInst = dyn_cast<IntrinsicInst>(Inst))
- Cost = TTI->getIntImmCost(IntrInst->getIntrinsicID(), Idx,
- ConstInt->getValue(), ConstInt->getType());
+ Cost = TTI->getIntImmCostIntrin(IntrInst->getIntrinsicID(), Idx,
+ ConstInt->getValue(), ConstInt->getType());
else
- Cost = TTI->getIntImmCost(Inst->getOpcode(), Idx, ConstInt->getValue(),
- ConstInt->getType());
+ Cost = TTI->getIntImmCostInst(Inst->getOpcode(), Idx, ConstInt->getValue(),
+ ConstInt->getType());
// Ignore cheap integer constants.
if (Cost > TargetTransformInfo::TCC_Basic) {
@@ -415,7 +416,7 @@ void ConstantHoistingPass::collectConstantCandidates(
// usually lowered to a load from constant pool. Such operation is unlikely
// to be cheaper than compute it by <Base + Offset>, which can be lowered to
// an ADD instruction or folded into Load/Store instruction.
- int Cost = TTI->getIntImmCost(Instruction::Add, 1, Offset, PtrIntTy);
+ int Cost = TTI->getIntImmCostInst(Instruction::Add, 1, Offset, PtrIntTy);
ConstCandVecType &ExprCandVec = ConstGEPCandMap[BaseGV];
ConstCandMapType::iterator Itr;
bool Inserted;
@@ -486,9 +487,10 @@ void ConstantHoistingPass::collectConstantCandidates(
// Scan all operands.
for (unsigned Idx = 0, E = Inst->getNumOperands(); Idx != E; ++Idx) {
// The cost of materializing the constants (defined in
- // `TargetTransformInfo::getIntImmCost`) for instructions which only take
- // constant variables is lower than `TargetTransformInfo::TCC_Basic`. So
- // it's safe for us to collect constant candidates from all IntrinsicInsts.
+ // `TargetTransformInfo::getIntImmCostInst`) for instructions which only
+ // take constant variables is lower than `TargetTransformInfo::TCC_Basic`.
+ // So it's safe for us to collect constant candidates from all
+ // IntrinsicInsts.
if (canReplaceOperandWithVariable(Inst, Idx) || isa<IntrinsicInst>(Inst)) {
collectConstantCandidates(ConstCandMap, Inst, Idx);
}
@@ -499,9 +501,13 @@ void ConstantHoistingPass::collectConstantCandidates(
/// into an instruction itself.
void ConstantHoistingPass::collectConstantCandidates(Function &Fn) {
ConstCandMapType ConstCandMap;
- for (BasicBlock &BB : Fn)
+ for (BasicBlock &BB : Fn) {
+ // Ignore unreachable basic blocks.
+ if (!DT->isReachableFromEntry(&BB))
+ continue;
for (Instruction &Inst : BB)
collectConstantCandidates(ConstCandMap, &Inst);
+ }
}
// This helper function is necessary to deal with values that have different
@@ -552,7 +558,8 @@ ConstantHoistingPass::maximizeConstantsInRange(ConstCandVecType::iterator S,
unsigned NumUses = 0;
bool OptForSize = Entry->getParent()->hasOptSize() ||
- llvm::shouldOptimizeForSize(Entry->getParent(), PSI, BFI);
+ llvm::shouldOptimizeForSize(Entry->getParent(), PSI, BFI,
+ PGSOQueryType::IRPass);
if (!OptForSize || std::distance(S,E) > 100) {
for (auto ConstCand = S; ConstCand != E; ++ConstCand) {
NumUses += ConstCand->Uses.size();
@@ -575,7 +582,7 @@ ConstantHoistingPass::maximizeConstantsInRange(ConstCandVecType::iterator S,
for (auto User : ConstCand->Uses) {
unsigned Opcode = User.Inst->getOpcode();
unsigned OpndIdx = User.OpndIdx;
- Cost += TTI->getIntImmCost(Opcode, OpndIdx, Value, Ty);
+ Cost += TTI->getIntImmCostInst(Opcode, OpndIdx, Value, Ty);
LLVM_DEBUG(dbgs() << "Cost: " << Cost << "\n");
for (auto C2 = S; C2 != E; ++C2) {
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstantProp.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstantProp.cpp
index e9e6afe3fdd4..73bf1d521b1d 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstantProp.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstantProp.cpp
@@ -25,6 +25,7 @@
#include "llvm/IR/Constant.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instruction.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/DebugCounter.h"
#include "llvm/Transforms/Scalar.h"
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index 2ef85268df48..3435bc7f5eaa 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -37,6 +37,7 @@
#include "llvm/IR/PassManager.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
@@ -194,7 +195,14 @@ static bool simplifyCommonValuePhi(PHINode *P, LazyValueInfo *LVI,
}
// All constant incoming values map to the same variable along the incoming
- // edges of the phi. The phi is unnecessary.
+ // edges of the phi. The phi is unnecessary. However, we must drop all
+ // poison-generating flags to ensure that no poison is propagated to the phi
+ // location by performing this substitution.
+ // Warning: If the underlying analysis changes, this may not be enough to
+ // guarantee that poison is not propagated.
+ // TODO: We may be able to re-infer flags by re-analyzing the instruction.
+ if (auto *CommonInst = dyn_cast<Instruction>(CommonValue))
+ CommonInst->dropPoisonGeneratingFlags();
P->replaceAllUsesWith(CommonValue);
P->eraseFromParent();
++NumPhiCommon;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/DCE.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/DCE.cpp
index a79d775aa7f3..a4b0c8df98f6 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/DCE.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/DCE.cpp
@@ -19,12 +19,14 @@
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instruction.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/DebugCounter.h"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
#define DEBUG_TYPE "dce"
@@ -80,6 +82,43 @@ Pass *llvm::createDeadInstEliminationPass() {
return new DeadInstElimination();
}
+//===--------------------------------------------------------------------===//
+// RedundantDbgInstElimination pass implementation
+//
+
+namespace {
+struct RedundantDbgInstElimination : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ RedundantDbgInstElimination() : FunctionPass(ID) {
+ initializeRedundantDbgInstEliminationPass(*PassRegistry::getPassRegistry());
+ }
+ bool runOnFunction(Function &F) override {
+ if (skipFunction(F))
+ return false;
+ bool Changed = false;
+ for (auto &BB : F)
+ Changed |= RemoveRedundantDbgInstrs(&BB);
+ return Changed;
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ }
+};
+}
+
+char RedundantDbgInstElimination::ID = 0;
+INITIALIZE_PASS(RedundantDbgInstElimination, "redundant-dbg-inst-elim",
+ "Redundant Dbg Instruction Elimination", false, false)
+
+Pass *llvm::createRedundantDbgInstEliminationPass() {
+ return new RedundantDbgInstElimination();
+}
+
+//===--------------------------------------------------------------------===//
+// DeadCodeElimination pass implementation
+//
+
static bool DCEInstruction(Instruction *I,
SmallSetVector<Instruction *, 16> &WorkList,
const TargetLibraryInfo *TLI) {
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 685de82810ed..1ba4aab999e1 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -17,6 +17,7 @@
#include "llvm/Transforms/Scalar/DeadStoreElimination.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
@@ -48,6 +49,7 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
@@ -99,6 +101,7 @@ static void
deleteDeadInstruction(Instruction *I, BasicBlock::iterator *BBI,
MemoryDependenceResults &MD, const TargetLibraryInfo &TLI,
InstOverlapIntervalsTy &IOL, OrderedBasicBlock &OBB,
+ MapVector<Instruction *, bool> &ThrowableInst,
SmallSetVector<const Value *, 16> *ValueSet = nullptr) {
SmallVector<Instruction*, 32> NowDeadInsts;
@@ -112,6 +115,10 @@ deleteDeadInstruction(Instruction *I, BasicBlock::iterator *BBI,
// Before we touch this instruction, remove it from memdep!
do {
Instruction *DeadInst = NowDeadInsts.pop_back_val();
+ // Mark the DeadInst as dead in the list of throwable instructions.
+ auto It = ThrowableInst.find(DeadInst);
+ if (It != ThrowableInst.end())
+ ThrowableInst[It->first] = false;
++NumFastOther;
// Try to preserve debug information attached to the dead instruction.
@@ -144,6 +151,9 @@ deleteDeadInstruction(Instruction *I, BasicBlock::iterator *BBI,
DeadInst->eraseFromParent();
} while (!NowDeadInsts.empty());
*BBI = NewIter;
+ // Pop dead entries from back of ThrowableInst till we find an alive entry.
+ while (!ThrowableInst.empty() && !ThrowableInst.back().second)
+ ThrowableInst.pop_back();
}
/// Does this instruction write some memory? This only returns true for things
@@ -169,15 +179,18 @@ static bool hasAnalyzableMemoryWrite(Instruction *I,
}
if (auto CS = CallSite(I)) {
if (Function *F = CS.getCalledFunction()) {
- StringRef FnName = F->getName();
- if (TLI.has(LibFunc_strcpy) && FnName == TLI.getName(LibFunc_strcpy))
- return true;
- if (TLI.has(LibFunc_strncpy) && FnName == TLI.getName(LibFunc_strncpy))
- return true;
- if (TLI.has(LibFunc_strcat) && FnName == TLI.getName(LibFunc_strcat))
- return true;
- if (TLI.has(LibFunc_strncat) && FnName == TLI.getName(LibFunc_strncat))
- return true;
+ LibFunc LF;
+ if (TLI.getLibFunc(*F, LF) && TLI.has(LF)) {
+ switch (LF) {
+ case LibFunc_strcpy:
+ case LibFunc_strncpy:
+ case LibFunc_strcat:
+ case LibFunc_strncat:
+ return true;
+ default:
+ return false;
+ }
+ }
}
}
return false;
@@ -656,7 +669,8 @@ static void findUnconditionalPreds(SmallVectorImpl<BasicBlock *> &Blocks,
static bool handleFree(CallInst *F, AliasAnalysis *AA,
MemoryDependenceResults *MD, DominatorTree *DT,
const TargetLibraryInfo *TLI,
- InstOverlapIntervalsTy &IOL, OrderedBasicBlock &OBB) {
+ InstOverlapIntervalsTy &IOL, OrderedBasicBlock &OBB,
+ MapVector<Instruction *, bool> &ThrowableInst) {
bool MadeChange = false;
MemoryLocation Loc = MemoryLocation(F->getOperand(0));
@@ -690,7 +704,8 @@ static bool handleFree(CallInst *F, AliasAnalysis *AA,
// DCE instructions only used to calculate that store.
BasicBlock::iterator BBI(Dependency);
- deleteDeadInstruction(Dependency, &BBI, *MD, *TLI, IOL, OBB);
+ deleteDeadInstruction(Dependency, &BBI, *MD, *TLI, IOL, OBB,
+ ThrowableInst);
++NumFastStores;
MadeChange = true;
@@ -747,8 +762,8 @@ static void removeAccessedObjects(const MemoryLocation &LoadedLoc,
static bool handleEndBlock(BasicBlock &BB, AliasAnalysis *AA,
MemoryDependenceResults *MD,
const TargetLibraryInfo *TLI,
- InstOverlapIntervalsTy &IOL,
- OrderedBasicBlock &OBB) {
+ InstOverlapIntervalsTy &IOL, OrderedBasicBlock &OBB,
+ MapVector<Instruction *, bool> &ThrowableInst) {
bool MadeChange = false;
// Keep track of all of the stack objects that are dead at the end of the
@@ -809,7 +824,7 @@ static bool handleEndBlock(BasicBlock &BB, AliasAnalysis *AA,
<< '\n');
// DCE instructions only used to calculate that store.
- deleteDeadInstruction(Dead, &BBI, *MD, *TLI, IOL, OBB,
+ deleteDeadInstruction(Dead, &BBI, *MD, *TLI, IOL, OBB, ThrowableInst,
&DeadStackObjects);
++NumFastStores;
MadeChange = true;
@@ -821,7 +836,7 @@ static bool handleEndBlock(BasicBlock &BB, AliasAnalysis *AA,
if (isInstructionTriviallyDead(&*BBI, TLI)) {
LLVM_DEBUG(dbgs() << "DSE: Removing trivially dead instruction:\n DEAD: "
<< *&*BBI << '\n');
- deleteDeadInstruction(&*BBI, &BBI, *MD, *TLI, IOL, OBB,
+ deleteDeadInstruction(&*BBI, &BBI, *MD, *TLI, IOL, OBB, ThrowableInst,
&DeadStackObjects);
++NumFastOther;
MadeChange = true;
@@ -1028,7 +1043,8 @@ static bool eliminateNoopStore(Instruction *Inst, BasicBlock::iterator &BBI,
const DataLayout &DL,
const TargetLibraryInfo *TLI,
InstOverlapIntervalsTy &IOL,
- OrderedBasicBlock &OBB) {
+ OrderedBasicBlock &OBB,
+ MapVector<Instruction *, bool> &ThrowableInst) {
// Must be a store instruction.
StoreInst *SI = dyn_cast<StoreInst>(Inst);
if (!SI)
@@ -1044,7 +1060,7 @@ static bool eliminateNoopStore(Instruction *Inst, BasicBlock::iterator &BBI,
dbgs() << "DSE: Remove Store Of Load from same pointer:\n LOAD: "
<< *DepLoad << "\n STORE: " << *SI << '\n');
- deleteDeadInstruction(SI, &BBI, *MD, *TLI, IOL, OBB);
+ deleteDeadInstruction(SI, &BBI, *MD, *TLI, IOL, OBB, ThrowableInst);
++NumRedundantStores;
return true;
}
@@ -1062,7 +1078,7 @@ static bool eliminateNoopStore(Instruction *Inst, BasicBlock::iterator &BBI,
dbgs() << "DSE: Remove null store to the calloc'ed object:\n DEAD: "
<< *Inst << "\n OBJECT: " << *UnderlyingPointer << '\n');
- deleteDeadInstruction(SI, &BBI, *MD, *TLI, IOL, OBB);
+ deleteDeadInstruction(SI, &BBI, *MD, *TLI, IOL, OBB, ThrowableInst);
++NumRedundantStores;
return true;
}
@@ -1077,7 +1093,7 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA,
bool MadeChange = false;
OrderedBasicBlock OBB(&BB);
- Instruction *LastThrowing = nullptr;
+ MapVector<Instruction *, bool> ThrowableInst;
// A map of interval maps representing partially-overwritten value parts.
InstOverlapIntervalsTy IOL;
@@ -1086,7 +1102,7 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA,
for (BasicBlock::iterator BBI = BB.begin(), BBE = BB.end(); BBI != BBE; ) {
// Handle 'free' calls specially.
if (CallInst *F = isFreeCall(&*BBI, TLI)) {
- MadeChange |= handleFree(F, AA, MD, DT, TLI, IOL, OBB);
+ MadeChange |= handleFree(F, AA, MD, DT, TLI, IOL, OBB, ThrowableInst);
// Increment BBI after handleFree has potentially deleted instructions.
// This ensures we maintain a valid iterator.
++BBI;
@@ -1096,7 +1112,7 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA,
Instruction *Inst = &*BBI++;
if (Inst->mayThrow()) {
- LastThrowing = Inst;
+ ThrowableInst[Inst] = true;
continue;
}
@@ -1105,7 +1121,8 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA,
continue;
// eliminateNoopStore will update in iterator, if necessary.
- if (eliminateNoopStore(Inst, BBI, AA, MD, DL, TLI, IOL, OBB)) {
+ if (eliminateNoopStore(Inst, BBI, AA, MD, DL, TLI, IOL, OBB,
+ ThrowableInst)) {
MadeChange = true;
continue;
}
@@ -1148,6 +1165,12 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA,
if (!DepLoc.Ptr)
break;
+ // Find the last throwable instruction not removed by call to
+ // deleteDeadInstruction.
+ Instruction *LastThrowing = nullptr;
+ if (!ThrowableInst.empty())
+ LastThrowing = ThrowableInst.back().first;
+
// Make sure we don't look past a call which might throw. This is an
// issue because MemoryDependenceAnalysis works in the wrong direction:
// it finds instructions which dominate the current instruction, rather than
@@ -1187,7 +1210,8 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA,
<< "\n KILLER: " << *Inst << '\n');
// Delete the store and now-dead instructions that feed it.
- deleteDeadInstruction(DepWrite, &BBI, *MD, *TLI, IOL, OBB);
+ deleteDeadInstruction(DepWrite, &BBI, *MD, *TLI, IOL, OBB,
+ ThrowableInst);
++NumFastStores;
MadeChange = true;
@@ -1269,8 +1293,10 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA,
OBB.replaceInstruction(DepWrite, SI);
// Delete the old stores and now-dead instructions that feed them.
- deleteDeadInstruction(Inst, &BBI, *MD, *TLI, IOL, OBB);
- deleteDeadInstruction(DepWrite, &BBI, *MD, *TLI, IOL, OBB);
+ deleteDeadInstruction(Inst, &BBI, *MD, *TLI, IOL, OBB,
+ ThrowableInst);
+ deleteDeadInstruction(DepWrite, &BBI, *MD, *TLI, IOL, OBB,
+ ThrowableInst);
MadeChange = true;
// We erased DepWrite and Inst (Loc); start over.
@@ -1305,7 +1331,7 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA,
// If this block ends in a return, unwind, or unreachable, all allocas are
// dead at its end, which means stores to them are also dead.
if (BB.getTerminator()->getNumSuccessors() == 0)
- MadeChange |= handleEndBlock(BB, AA, MD, TLI, IOL, OBB);
+ MadeChange |= handleEndBlock(BB, AA, MD, TLI, IOL, OBB, ThrowableInst);
return MadeChange;
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/DivRemPairs.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/DivRemPairs.cpp
index 934853507478..132dfc8f6da1 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/DivRemPairs.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/DivRemPairs.cpp
@@ -20,6 +20,7 @@
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/PatternMatch.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/DebugCounter.h"
#include "llvm/Transforms/Scalar.h"
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
index ce540683dae2..40c1ba88354f 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -27,7 +27,6 @@
#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
@@ -45,6 +44,7 @@
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/AtomicOrdering.h"
@@ -55,6 +55,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/GuardUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
#include <cassert>
#include <deque>
#include <memory>
@@ -906,8 +907,8 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
continue;
}
- if (!salvageDebugInfo(*Inst))
- replaceDbgUsesWithUndef(Inst);
+
+ salvageDebugInfoOrMarkUndef(*Inst);
removeMSSA(Inst);
Inst->eraseFromParent();
Changed = true;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp
index e6abf1ceb026..72512430b366 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp
@@ -13,6 +13,7 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/ValueHandle.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/Local.h"
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/Float2Int.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/Float2Int.cpp
index 4d2eac0451df..af223cc837f2 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/Float2Int.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/Float2Int.cpp
@@ -11,6 +11,8 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/CommandLine.h"
#define DEBUG_TYPE "float2int"
#include "llvm/Transforms/Scalar/Float2Int.h"
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/GVN.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/GVN.cpp
index 743353eaea22..1e6aab14e7b4 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/GVN.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/GVN.cpp
@@ -64,6 +64,7 @@
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
@@ -112,7 +113,7 @@ static cl::opt<uint32_t> MaxNumDeps(
struct llvm::GVN::Expression {
uint32_t opcode;
- Type *type;
+ Type *type = nullptr;
bool commutative = false;
SmallVector<uint32_t, 4> varargs;
@@ -173,7 +174,7 @@ struct llvm::gvn::AvailableValue {
PointerIntPair<Value *, 2, ValType> Val;
/// Offset - The byte offset in Val that is interesting for the load query.
- unsigned Offset;
+ unsigned Offset = 0;
static AvailableValue get(Value *V, unsigned Offset = 0) {
AvailableValue Res;
@@ -237,7 +238,7 @@ struct llvm::gvn::AvailableValue {
/// the associated BasicBlock.
struct llvm::gvn::AvailableValueInBlock {
/// BB - The basic block in question.
- BasicBlock *BB;
+ BasicBlock *BB = nullptr;
/// AV - The actual available value
AvailableValue AV;
@@ -364,6 +365,7 @@ GVN::ValueTable::ValueTable() = default;
GVN::ValueTable::ValueTable(const ValueTable &) = default;
GVN::ValueTable::ValueTable(ValueTable &&) = default;
GVN::ValueTable::~ValueTable() = default;
+GVN::ValueTable &GVN::ValueTable::operator=(const GVN::ValueTable &Arg) = default;
/// add - Insert a value into the table with a specified value number.
void GVN::ValueTable::add(Value *V, uint32_t num) {
@@ -1387,6 +1389,59 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
return PerformLoadPRE(LI, ValuesPerBlock, UnavailableBlocks);
}
+static bool impliesEquivalanceIfTrue(CmpInst* Cmp) {
+ if (Cmp->getPredicate() == CmpInst::Predicate::ICMP_EQ)
+ return true;
+
+ // Floating point comparisons can be equal, but not equivalent. Cases:
+ // NaNs for unordered operators
+ // +0.0 vs 0.0 for all operators
+ if (Cmp->getPredicate() == CmpInst::Predicate::FCMP_OEQ ||
+ (Cmp->getPredicate() == CmpInst::Predicate::FCMP_UEQ &&
+ Cmp->getFastMathFlags().noNaNs())) {
+ Value *LHS = Cmp->getOperand(0);
+ Value *RHS = Cmp->getOperand(1);
+ // If we can prove either side non-zero, then equality must imply
+ // equivalence.
+ // FIXME: We should do this optimization if 'no signed zeros' is
+ // applicable via an instruction-level fast-math-flag or some other
+ // indicator that relaxed FP semantics are being used.
+ if (isa<ConstantFP>(LHS) && !cast<ConstantFP>(LHS)->isZero())
+ return true;
+ if (isa<ConstantFP>(RHS) && !cast<ConstantFP>(RHS)->isZero())
+ return true;;
+ // TODO: Handle vector floating point constants
+ }
+ return false;
+}
+
+static bool impliesEquivalanceIfFalse(CmpInst* Cmp) {
+ if (Cmp->getPredicate() == CmpInst::Predicate::ICMP_NE)
+ return true;
+
+ // Floating point comparisons can be equal, but not equivelent. Cases:
+ // NaNs for unordered operators
+ // +0.0 vs 0.0 for all operators
+ if ((Cmp->getPredicate() == CmpInst::Predicate::FCMP_ONE &&
+ Cmp->getFastMathFlags().noNaNs()) ||
+ Cmp->getPredicate() == CmpInst::Predicate::FCMP_UNE) {
+ Value *LHS = Cmp->getOperand(0);
+ Value *RHS = Cmp->getOperand(1);
+ // If we can prove either side non-zero, then equality must imply
+ // equivalence.
+ // FIXME: We should do this optimization if 'no signed zeros' is
+ // applicable via an instruction-level fast-math-flag or some other
+ // indicator that relaxed FP semantics are being used.
+ if (isa<ConstantFP>(LHS) && !cast<ConstantFP>(LHS)->isZero())
+ return true;
+ if (isa<ConstantFP>(RHS) && !cast<ConstantFP>(RHS)->isZero())
+ return true;;
+ // TODO: Handle vector floating point constants
+ }
+ return false;
+}
+
+
static bool hasUsersIn(Value *V, BasicBlock *BB) {
for (User *U : V->users())
if (isa<Instruction>(U) &&
@@ -1451,10 +1506,7 @@ bool GVN::processAssumeIntrinsic(IntrinsicInst *IntrinsicI) {
// call void @llvm.assume(i1 %cmp)
// ret float %load ; will change it to ret float %0
if (auto *CmpI = dyn_cast<CmpInst>(V)) {
- if (CmpI->getPredicate() == CmpInst::Predicate::ICMP_EQ ||
- CmpI->getPredicate() == CmpInst::Predicate::FCMP_OEQ ||
- (CmpI->getPredicate() == CmpInst::Predicate::FCMP_UEQ &&
- CmpI->getFastMathFlags().noNaNs())) {
+ if (impliesEquivalanceIfTrue(CmpI)) {
Value *CmpLHS = CmpI->getOperand(0);
Value *CmpRHS = CmpI->getOperand(1);
// Heuristically pick the better replacement -- the choice of heuristic
@@ -1481,12 +1533,6 @@ bool GVN::processAssumeIntrinsic(IntrinsicInst *IntrinsicI) {
if (isa<Constant>(CmpLHS) && isa<Constant>(CmpRHS))
return Changed;
- // +0.0 and -0.0 compare equal, but do not imply equivalence. Unless we
- // can prove equivalence, bail.
- if (CmpRHS->getType()->isFloatTy() &&
- (!isa<ConstantFP>(CmpRHS) || cast<ConstantFP>(CmpRHS)->isZero()))
- return Changed;
-
LLVM_DEBUG(dbgs() << "Replacing dominated uses of "
<< *CmpLHS << " with "
<< *CmpRHS << " in block "
@@ -1875,27 +1921,12 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS, const BasicBlockEdge &Root,
Value *Op0 = Cmp->getOperand(0), *Op1 = Cmp->getOperand(1);
// If "A == B" is known true, or "A != B" is known false, then replace
- // A with B everywhere in the scope.
- if ((isKnownTrue && Cmp->getPredicate() == CmpInst::ICMP_EQ) ||
- (isKnownFalse && Cmp->getPredicate() == CmpInst::ICMP_NE))
+ // A with B everywhere in the scope. For floating point operations, we
+ // have to be careful since equality does not always imply equivalance.
+ if ((isKnownTrue && impliesEquivalanceIfTrue(Cmp)) ||
+ (isKnownFalse && impliesEquivalanceIfFalse(Cmp)))
Worklist.push_back(std::make_pair(Op0, Op1));
- // Handle the floating point versions of equality comparisons too.
- if ((isKnownTrue && Cmp->getPredicate() == CmpInst::FCMP_OEQ) ||
- (isKnownFalse && Cmp->getPredicate() == CmpInst::FCMP_UNE)) {
-
- // Floating point -0.0 and 0.0 compare equal, so we can only
- // propagate values if we know that we have a constant and that
- // its value is non-zero.
-
- // FIXME: We should do this optimization if 'no signed zeros' is
- // applicable via an instruction-level fast-math-flag or some other
- // indicator that relaxed FP semantics are being used.
-
- if (isa<ConstantFP>(Op1) && !cast<ConstantFP>(Op1)->isZero())
- Worklist.push_back(std::make_pair(Op0, Op1));
- }
-
// If "A >= B" is known true, replace "A < B" with false everywhere.
CmpInst::Predicate NotPred = Cmp->getInversePredicate();
Constant *NotVal = ConstantInt::get(Cmp->getType(), isKnownFalse);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/GVNHoist.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/GVNHoist.cpp
index c87e41484b13..e1796f6bf05a 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/GVNHoist.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/GVNHoist.cpp
@@ -47,7 +47,6 @@
#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/PostDominators.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/BasicBlock.h"
@@ -65,6 +64,7 @@
#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
@@ -72,6 +72,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/GVN.h"
+#include "llvm/Transforms/Utils/Local.h"
#include <algorithm>
#include <cassert>
#include <iterator>
@@ -956,7 +957,8 @@ private:
if (MoveAccess && NewMemAcc) {
// The definition of this ld/st will not change: ld/st hoisting is
// legal when the ld/st is not moved past its current definition.
- MSSAUpdater->moveToPlace(NewMemAcc, DestBB, MemorySSA::End);
+ MSSAUpdater->moveToPlace(NewMemAcc, DestBB,
+ MemorySSA::BeforeTerminator);
}
// Replace all other instructions with Repl with memory access NewMemAcc.
@@ -1067,6 +1069,9 @@ private:
++NI;
}
+ if (MSSA && VerifyMemorySSA)
+ MSSA->verifyMemorySSA();
+
NumHoisted += NL + NS + NC + NI;
NumRemoved += NR;
NumLoadsHoisted += NL;
@@ -1168,6 +1173,7 @@ public:
AU.addPreserved<DominatorTreeWrapperPass>();
AU.addPreserved<MemorySSAWrapperPass>();
AU.addPreserved<GlobalsAAWrapperPass>();
+ AU.addPreserved<AAResultsWrapperPass>();
}
};
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/GVNSink.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/GVNSink.cpp
index 054025755c69..6d0a4975e266 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/GVNSink.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/GVNSink.cpp
@@ -47,7 +47,6 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/GlobalsModRef.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
@@ -59,6 +58,7 @@
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/ArrayRecycler.h"
@@ -71,6 +71,7 @@
#include "llvm/Transforms/Scalar/GVN.h"
#include "llvm/Transforms/Scalar/GVNExpression.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
#include <algorithm>
#include <cassert>
#include <cstddef>
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/GuardWidening.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/GuardWidening.cpp
index 2697d7809568..a3eba27a4d90 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/GuardWidening.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/GuardWidening.cpp
@@ -39,7 +39,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Scalar/GuardWidening.h"
-#include <functional>
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/Statistic.h"
@@ -53,11 +52,15 @@
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/PatternMatch.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/GuardUtils.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
+#include <functional>
using namespace llvm;
@@ -66,22 +69,6 @@ using namespace llvm;
STATISTIC(GuardsEliminated, "Number of eliminated guards");
STATISTIC(CondBranchEliminated, "Number of eliminated conditional branches");
-static cl::opt<bool> WidenFrequentBranches(
- "guard-widening-widen-frequent-branches", cl::Hidden,
- cl::desc("Widen conditions of explicit branches into dominating guards in "
- "case if their taken frequency exceeds threshold set by "
- "guard-widening-frequent-branch-threshold option"),
- cl::init(false));
-
-static cl::opt<unsigned> FrequentBranchThreshold(
- "guard-widening-frequent-branch-threshold", cl::Hidden,
- cl::desc("When WidenFrequentBranches is set to true, this option is used "
- "to determine which branches are frequently taken. The criteria "
- "that a branch is taken more often than "
- "((FrequentBranchThreshold - 1) / FrequentBranchThreshold), then "
- "it is considered frequently taken"),
- cl::init(1000));
-
static cl::opt<bool>
WidenBranchGuards("guard-widening-widen-branch-guards", cl::Hidden,
cl::desc("Whether or not we should widen guards "
@@ -97,15 +84,16 @@ static Value *getCondition(Instruction *I) {
"Bad guard intrinsic?");
return GI->getArgOperand(0);
}
- if (isGuardAsWidenableBranch(I)) {
- auto *Cond = cast<BranchInst>(I)->getCondition();
- return cast<BinaryOperator>(Cond)->getOperand(0);
- }
+ Value *Cond, *WC;
+ BasicBlock *IfTrueBB, *IfFalseBB;
+ if (parseWidenableBranch(I, Cond, WC, IfTrueBB, IfFalseBB))
+ return Cond;
+
return cast<BranchInst>(I)->getCondition();
}
// Set the condition for \p I to \p NewCond. \p I can either be a guard or a
-// conditional branch.
+// conditional branch.
static void setCondition(Instruction *I, Value *NewCond) {
if (IntrinsicInst *GI = dyn_cast<IntrinsicInst>(I)) {
assert(GI->getIntrinsicID() == Intrinsic::experimental_guard &&
@@ -126,7 +114,6 @@ class GuardWideningImpl {
DominatorTree &DT;
PostDominatorTree *PDT;
LoopInfo &LI;
- BranchProbabilityInfo *BPI;
/// Together, these describe the region of interest. This might be all of
/// the blocks within a function, or only a given loop's blocks and preheader.
@@ -271,26 +258,22 @@ class GuardWideningImpl {
void widenGuard(Instruction *ToWiden, Value *NewCondition,
bool InvertCondition) {
Value *Result;
+
widenCondCommon(getCondition(ToWiden), NewCondition, ToWiden, Result,
InvertCondition);
- Value *WidenableCondition = nullptr;
if (isGuardAsWidenableBranch(ToWiden)) {
- auto *Cond = cast<BranchInst>(ToWiden)->getCondition();
- WidenableCondition = cast<BinaryOperator>(Cond)->getOperand(1);
+ setWidenableBranchCond(cast<BranchInst>(ToWiden), Result);
+ return;
}
- if (WidenableCondition)
- Result = BinaryOperator::CreateAnd(Result, WidenableCondition,
- "guard.chk", ToWiden);
setCondition(ToWiden, Result);
}
public:
explicit GuardWideningImpl(DominatorTree &DT, PostDominatorTree *PDT,
- LoopInfo &LI, BranchProbabilityInfo *BPI,
- DomTreeNode *Root,
+ LoopInfo &LI, DomTreeNode *Root,
std::function<bool(BasicBlock*)> BlockFilter)
- : DT(DT), PDT(PDT), LI(LI), BPI(BPI), Root(Root), BlockFilter(BlockFilter)
+ : DT(DT), PDT(PDT), LI(LI), Root(Root), BlockFilter(BlockFilter)
{}
/// The entry point for this pass.
@@ -309,13 +292,6 @@ static bool isSupportedGuardInstruction(const Instruction *Insn) {
bool GuardWideningImpl::run() {
DenseMap<BasicBlock *, SmallVector<Instruction *, 8>> GuardsInBlock;
bool Changed = false;
- Optional<BranchProbability> LikelyTaken = None;
- if (WidenFrequentBranches && BPI) {
- unsigned Threshold = FrequentBranchThreshold;
- assert(Threshold > 0 && "Zero threshold makes no sense!");
- LikelyTaken = BranchProbability(Threshold - 1, Threshold);
- }
-
for (auto DFI = df_begin(Root), DFE = df_end(Root);
DFI != DFE; ++DFI) {
auto *BB = (*DFI)->getBlock();
@@ -330,17 +306,6 @@ bool GuardWideningImpl::run() {
for (auto *II : CurrentList)
Changed |= eliminateInstrViaWidening(II, DFI, GuardsInBlock);
- if (WidenFrequentBranches && BPI)
- if (auto *BI = dyn_cast<BranchInst>(BB->getTerminator()))
- if (BI->isConditional()) {
- // If one of branches of a conditional is likely taken, try to
- // eliminate it.
- if (BPI->getEdgeProbability(BB, 0U) >= *LikelyTaken)
- Changed |= eliminateInstrViaWidening(BI, DFI, GuardsInBlock);
- else if (BPI->getEdgeProbability(BB, 1U) >= *LikelyTaken)
- Changed |= eliminateInstrViaWidening(BI, DFI, GuardsInBlock,
- /*InvertCondition*/true);
- }
}
assert(EliminatedGuardsAndBranches.empty() || Changed);
@@ -805,10 +770,7 @@ PreservedAnalyses GuardWideningPass::run(Function &F,
auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
auto &LI = AM.getResult<LoopAnalysis>(F);
auto &PDT = AM.getResult<PostDominatorTreeAnalysis>(F);
- BranchProbabilityInfo *BPI = nullptr;
- if (WidenFrequentBranches)
- BPI = AM.getCachedResult<BranchProbabilityAnalysis>(F);
- if (!GuardWideningImpl(DT, &PDT, LI, BPI, DT.getRootNode(),
+ if (!GuardWideningImpl(DT, &PDT, LI, DT.getRootNode(),
[](BasicBlock*) { return true; } ).run())
return PreservedAnalyses::all();
@@ -820,22 +782,13 @@ PreservedAnalyses GuardWideningPass::run(Function &F,
PreservedAnalyses GuardWideningPass::run(Loop &L, LoopAnalysisManager &AM,
LoopStandardAnalysisResults &AR,
LPMUpdater &U) {
-
- const auto &FAM =
- AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR).getManager();
- Function &F = *L.getHeader()->getParent();
- BranchProbabilityInfo *BPI = nullptr;
- if (WidenFrequentBranches)
- BPI = FAM.getCachedResult<BranchProbabilityAnalysis>(F);
-
BasicBlock *RootBB = L.getLoopPredecessor();
if (!RootBB)
RootBB = L.getHeader();
auto BlockFilter = [&](BasicBlock *BB) {
return BB == RootBB || L.contains(BB);
};
- if (!GuardWideningImpl(AR.DT, nullptr, AR.LI, BPI,
- AR.DT.getNode(RootBB),
+ if (!GuardWideningImpl(AR.DT, nullptr, AR.LI, AR.DT.getNode(RootBB),
BlockFilter).run())
return PreservedAnalyses::all();
@@ -856,10 +809,7 @@ struct GuardWideningLegacyPass : public FunctionPass {
auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
- BranchProbabilityInfo *BPI = nullptr;
- if (WidenFrequentBranches)
- BPI = &getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI();
- return GuardWideningImpl(DT, &PDT, LI, BPI, DT.getRootNode(),
+ return GuardWideningImpl(DT, &PDT, LI, DT.getRootNode(),
[](BasicBlock*) { return true; } ).run();
}
@@ -868,8 +818,6 @@ struct GuardWideningLegacyPass : public FunctionPass {
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<PostDominatorTreeWrapperPass>();
AU.addRequired<LoopInfoWrapperPass>();
- if (WidenFrequentBranches)
- AU.addRequired<BranchProbabilityInfoWrapperPass>();
}
};
@@ -895,16 +843,11 @@ struct LoopGuardWideningLegacyPass : public LoopPass {
auto BlockFilter = [&](BasicBlock *BB) {
return BB == RootBB || L->contains(BB);
};
- BranchProbabilityInfo *BPI = nullptr;
- if (WidenFrequentBranches)
- BPI = &getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI();
- return GuardWideningImpl(DT, PDT, LI, BPI,
+ return GuardWideningImpl(DT, PDT, LI,
DT.getNode(RootBB), BlockFilter).run();
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
- if (WidenFrequentBranches)
- AU.addRequired<BranchProbabilityInfoWrapperPass>();
AU.setPreservesCFG();
getLoopAnalysisUsage(AU);
AU.addPreserved<PostDominatorTreeWrapperPass>();
@@ -920,8 +863,6 @@ INITIALIZE_PASS_BEGIN(GuardWideningLegacyPass, "guard-widening", "Widen guards",
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-if (WidenFrequentBranches)
- INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass)
INITIALIZE_PASS_END(GuardWideningLegacyPass, "guard-widening", "Widen guards",
false, false)
@@ -931,8 +872,6 @@ INITIALIZE_PASS_BEGIN(LoopGuardWideningLegacyPass, "loop-guard-widening",
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-if (WidenFrequentBranches)
- INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass)
INITIALIZE_PASS_END(LoopGuardWideningLegacyPass, "loop-guard-widening",
"Widen guards (within a single loop, as a loop pass)",
false, false)
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index 5519a00c12c9..d8d7acae5c9f 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -31,8 +31,8 @@
#include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/iterator_range.h"
@@ -44,7 +44,6 @@
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/ConstantRange.h"
@@ -68,6 +67,7 @@
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
@@ -79,6 +79,7 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/LoopPassManager.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/SimplifyIndVar.h"
#include <cassert>
@@ -125,10 +126,9 @@ DisableLFTR("disable-lftr", cl::Hidden, cl::init(false),
cl::desc("Disable Linear Function Test Replace optimization"));
static cl::opt<bool>
-LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(false),
+LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true),
cl::desc("Predicate conditions in read only loops"));
-
namespace {
struct RewritePhi;
@@ -2663,11 +2663,11 @@ static const SCEV* getMaxBackedgeTakenCount(ScalarEvolution &SE,
// merge the max and exact information to approximate a version of
// getConstantMaxBackedgeTakenCount which isn't restricted to just constants.
SmallVector<const SCEV*, 4> ExitCounts;
- const SCEV *MaxConstEC = SE.getConstantMaxBackedgeTakenCount(L);
- if (!isa<SCEVCouldNotCompute>(MaxConstEC))
- ExitCounts.push_back(MaxConstEC);
for (BasicBlock *ExitingBB : ExitingBlocks) {
const SCEV *ExitCount = SE.getExitCount(L, ExitingBB);
+ if (isa<SCEVCouldNotCompute>(ExitCount))
+ ExitCount = SE.getExitCount(L, ExitingBB,
+ ScalarEvolution::ConstantMaximum);
if (!isa<SCEVCouldNotCompute>(ExitCount)) {
assert(DT.dominates(ExitingBB, L->getLoopLatch()) &&
"We should only have known counts for exiting blocks that "
@@ -2835,6 +2835,10 @@ bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
!isSafeToExpand(ExactBTC, *SE))
return Changed;
+ // If we end up with a pointer exit count, bail. It may be unsized.
+ if (!ExactBTC->getType()->isIntegerTy())
+ return Changed;
+
auto BadExit = [&](BasicBlock *ExitingBB) {
// If our exiting block exits multiple loops, we can only rewrite the
// innermost one. Otherwise, we're changing how many times the innermost
@@ -2865,6 +2869,10 @@ bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
!isSafeToExpand(ExitCount, *SE))
return true;
+ // If we end up with a pointer exit count, bail. It may be unsized.
+ if (!ExitCount->getType()->isIntegerTy())
+ return true;
+
return false;
};
@@ -2936,7 +2944,7 @@ bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
// varying check.
Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator());
IRBuilder<> B(L->getLoopPreheader()->getTerminator());
- Value *ExactBTCV = nullptr; //lazy generated if needed
+ Value *ExactBTCV = nullptr; // Lazily generated if needed.
for (BasicBlock *ExitingBB : ExitingBlocks) {
const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
@@ -2991,15 +2999,15 @@ bool IndVarSimplify::run(Loop *L) {
if (!L->isLoopSimplifyForm())
return false;
- // If there are any floating-point recurrences, attempt to
- // transform them to use integer recurrences.
- Changed |= rewriteNonIntegerIVs(L);
-
#ifndef NDEBUG
// Used below for a consistency check only
const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
#endif
+ // If there are any floating-point recurrences, attempt to
+ // transform them to use integer recurrences.
+ Changed |= rewriteNonIntegerIVs(L);
+
// Create a rewriter object which we'll use to transform the code with.
SCEVExpander Rewriter(*SE, DL, "indvars");
#ifndef NDEBUG
@@ -3026,11 +3034,19 @@ bool IndVarSimplify::run(Loop *L) {
NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
// Try to eliminate loop exits based on analyzeable exit counts
- Changed |= optimizeLoopExits(L, Rewriter);
+ if (optimizeLoopExits(L, Rewriter)) {
+ Changed = true;
+ // Given we've changed exit counts, notify SCEV
+ SE->forgetLoop(L);
+ }
// Try to form loop invariant tests for loop exits by changing how many
// iterations of the loop run when that is unobservable.
- Changed |= predicateLoopExits(L, Rewriter);
+ if (predicateLoopExits(L, Rewriter)) {
+ Changed = true;
+ // Given we've changed exit counts, notify SCEV
+ SE->forgetLoop(L);
+ }
// If we have a trip count expression, rewrite the loop's exit condition
// using it.
@@ -3118,7 +3134,8 @@ bool IndVarSimplify::run(Loop *L) {
"Indvars did not preserve LCSSA!");
// Verify that LFTR, and any other change have not interfered with SCEV's
- // ability to compute trip count.
+ // ability to compute trip count. We may have *changed* the exit count, but
+ // only by reducing it.
#ifndef NDEBUG
if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
SE->forgetLoop(L);
@@ -3130,7 +3147,8 @@ bool IndVarSimplify::run(Loop *L) {
else
BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount,
NewBECount->getType());
- assert(BackedgeTakenCount == NewBECount && "indvars must preserve SCEV");
+ assert(!SE->isKnownPredicate(ICmpInst::ICMP_ULT, BackedgeTakenCount,
+ NewBECount) && "indvars must preserve SCEV");
}
#endif
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
index 997d68838152..58469749600e 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
@@ -74,6 +74,7 @@
#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/Casting.h"
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
index e7e73a132fbe..dfb1b6bfb739 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
@@ -141,11 +141,11 @@ using ValueToAddrSpaceMapTy = DenseMap<const Value *, unsigned>;
/// InferAddressSpaces
class InferAddressSpaces : public FunctionPass {
- const TargetTransformInfo *TTI;
+ const TargetTransformInfo *TTI = nullptr;
/// Target specific address space which uses of should be replaced if
/// possible.
- unsigned FlatAddrSpace;
+ unsigned FlatAddrSpace = 0;
public:
static char ID;
@@ -791,8 +791,8 @@ static bool handleMemIntrinsicPtrUse(MemIntrinsic *MI, Value *OldV,
MDNode *NoAliasMD = MI->getMetadata(LLVMContext::MD_noalias);
if (auto *MSI = dyn_cast<MemSetInst>(MI)) {
- B.CreateMemSet(NewV, MSI->getValue(),
- MSI->getLength(), MSI->getDestAlignment(),
+ B.CreateMemSet(NewV, MSI->getValue(), MSI->getLength(),
+ MaybeAlign(MSI->getDestAlignment()),
false, // isVolatile
TBAA, ScopeMD, NoAliasMD);
} else if (auto *MTI = dyn_cast<MemTransferInst>(MI)) {
@@ -808,15 +808,13 @@ static bool handleMemIntrinsicPtrUse(MemIntrinsic *MI, Value *OldV,
if (isa<MemCpyInst>(MTI)) {
MDNode *TBAAStruct = MTI->getMetadata(LLVMContext::MD_tbaa_struct);
- B.CreateMemCpy(Dest, MTI->getDestAlignment(),
- Src, MTI->getSourceAlignment(),
+ B.CreateMemCpy(Dest, MTI->getDestAlign(), Src, MTI->getSourceAlign(),
MTI->getLength(),
false, // isVolatile
TBAA, TBAAStruct, ScopeMD, NoAliasMD);
} else {
assert(isa<MemMoveInst>(MTI));
- B.CreateMemMove(Dest, MTI->getDestAlignment(),
- Src, MTI->getSourceAlignment(),
+ B.CreateMemMove(Dest, MTI->getDestAlign(), Src, MTI->getSourceAlign(),
MTI->getLength(),
false, // isVolatile
TBAA, ScopeMD, NoAliasMD);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/InstSimplifyPass.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/InstSimplifyPass.cpp
index ec28f790f252..e8bbf2936da6 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/InstSimplifyPass.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/InstSimplifyPass.cpp
@@ -18,6 +18,7 @@
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Type.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/Local.h"
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/JumpThreading.cpp
index 0cf00baaa24a..98c2fcb3dae0 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/JumpThreading.cpp
@@ -55,6 +55,7 @@
#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/BlockFrequency.h"
#include "llvm/Support/BranchProbability.h"
@@ -305,14 +306,13 @@ bool JumpThreading::runOnFunction(Function &F) {
DomTreeUpdater DTU(*DT, DomTreeUpdater::UpdateStrategy::Lazy);
std::unique_ptr<BlockFrequencyInfo> BFI;
std::unique_ptr<BranchProbabilityInfo> BPI;
- bool HasProfileData = F.hasProfileData();
- if (HasProfileData) {
+ if (F.hasProfileData()) {
LoopInfo LI{DominatorTree(F)};
BPI.reset(new BranchProbabilityInfo(F, LI, TLI));
BFI.reset(new BlockFrequencyInfo(F, *BPI, LI));
}
- bool Changed = Impl.runImpl(F, TLI, LVI, AA, &DTU, HasProfileData,
+ bool Changed = Impl.runImpl(F, TLI, LVI, AA, &DTU, F.hasProfileData(),
std::move(BFI), std::move(BPI));
if (PrintLVIAfterJumpThreading) {
dbgs() << "LVI for function '" << F.getName() << "':\n";
@@ -339,7 +339,7 @@ PreservedAnalyses JumpThreadingPass::run(Function &F,
BFI.reset(new BlockFrequencyInfo(F, *BPI, LI));
}
- bool Changed = runImpl(F, &TLI, &LVI, &AA, &DTU, HasProfileData,
+ bool Changed = runImpl(F, &TLI, &LVI, &AA, &DTU, F.hasProfileData(),
std::move(BFI), std::move(BPI));
if (!Changed)
@@ -1002,49 +1002,8 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) {
// successor, merge the blocks. This encourages recursive jump threading
// because now the condition in this block can be threaded through
// predecessors of our predecessor block.
- if (BasicBlock *SinglePred = BB->getSinglePredecessor()) {
- const Instruction *TI = SinglePred->getTerminator();
- if (!TI->isExceptionalTerminator() && TI->getNumSuccessors() == 1 &&
- SinglePred != BB && !hasAddressTakenAndUsed(BB)) {
- // If SinglePred was a loop header, BB becomes one.
- if (LoopHeaders.erase(SinglePred))
- LoopHeaders.insert(BB);
-
- LVI->eraseBlock(SinglePred);
- MergeBasicBlockIntoOnlyPred(BB, DTU);
-
- // Now that BB is merged into SinglePred (i.e. SinglePred Code followed by
- // BB code within one basic block `BB`), we need to invalidate the LVI
- // information associated with BB, because the LVI information need not be
- // true for all of BB after the merge. For example,
- // Before the merge, LVI info and code is as follows:
- // SinglePred: <LVI info1 for %p val>
- // %y = use of %p
- // call @exit() // need not transfer execution to successor.
- // assume(%p) // from this point on %p is true
- // br label %BB
- // BB: <LVI info2 for %p val, i.e. %p is true>
- // %x = use of %p
- // br label exit
- //
- // Note that this LVI info for blocks BB and SinglPred is correct for %p
- // (info2 and info1 respectively). After the merge and the deletion of the
- // LVI info1 for SinglePred. We have the following code:
- // BB: <LVI info2 for %p val>
- // %y = use of %p
- // call @exit()
- // assume(%p)
- // %x = use of %p <-- LVI info2 is correct from here onwards.
- // br label exit
- // LVI info2 for BB is incorrect at the beginning of BB.
-
- // Invalidate LVI information for BB if the LVI is not provably true for
- // all of BB.
- if (!isGuaranteedToTransferExecutionToSuccessor(BB))
- LVI->eraseBlock(BB);
- return true;
- }
- }
+ if (MaybeMergeBasicBlockIntoOnlyPred(BB))
+ return true;
if (TryToUnfoldSelectInCurrBB(BB))
return true;
@@ -1758,7 +1717,7 @@ bool JumpThreadingPass::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
getSuccessor(GetBestDestForJumpOnUndef(BB));
// Ok, try to thread it!
- return ThreadEdge(BB, PredsToFactor, MostPopularDest);
+ return TryThreadEdge(BB, PredsToFactor, MostPopularDest);
}
/// ProcessBranchOnPHI - We have an otherwise unthreadable conditional branch on
@@ -1920,12 +1879,146 @@ static void AddPHINodeEntriesForMappedBlock(BasicBlock *PHIBB,
}
}
-/// ThreadEdge - We have decided that it is safe and profitable to factor the
-/// blocks in PredBBs to one predecessor, then thread an edge from it to SuccBB
-/// across BB. Transform the IR to reflect this change.
-bool JumpThreadingPass::ThreadEdge(BasicBlock *BB,
- const SmallVectorImpl<BasicBlock *> &PredBBs,
- BasicBlock *SuccBB) {
+/// Merge basic block BB into its sole predecessor if possible.
+bool JumpThreadingPass::MaybeMergeBasicBlockIntoOnlyPred(BasicBlock *BB) {
+ BasicBlock *SinglePred = BB->getSinglePredecessor();
+ if (!SinglePred)
+ return false;
+
+ const Instruction *TI = SinglePred->getTerminator();
+ if (TI->isExceptionalTerminator() || TI->getNumSuccessors() != 1 ||
+ SinglePred == BB || hasAddressTakenAndUsed(BB))
+ return false;
+
+ // If SinglePred was a loop header, BB becomes one.
+ if (LoopHeaders.erase(SinglePred))
+ LoopHeaders.insert(BB);
+
+ LVI->eraseBlock(SinglePred);
+ MergeBasicBlockIntoOnlyPred(BB, DTU);
+
+ // Now that BB is merged into SinglePred (i.e. SinglePred code followed by
+ // BB code within one basic block `BB`), we need to invalidate the LVI
+ // information associated with BB, because the LVI information need not be
+ // true for all of BB after the merge. For example,
+ // Before the merge, LVI info and code is as follows:
+ // SinglePred: <LVI info1 for %p val>
+ // %y = use of %p
+ // call @exit() // need not transfer execution to successor.
+ // assume(%p) // from this point on %p is true
+ // br label %BB
+ // BB: <LVI info2 for %p val, i.e. %p is true>
+ // %x = use of %p
+ // br label exit
+ //
+ // Note that this LVI info for blocks BB and SinglPred is correct for %p
+ // (info2 and info1 respectively). After the merge and the deletion of the
+ // LVI info1 for SinglePred. We have the following code:
+ // BB: <LVI info2 for %p val>
+ // %y = use of %p
+ // call @exit()
+ // assume(%p)
+ // %x = use of %p <-- LVI info2 is correct from here onwards.
+ // br label exit
+ // LVI info2 for BB is incorrect at the beginning of BB.
+
+ // Invalidate LVI information for BB if the LVI is not provably true for
+ // all of BB.
+ if (!isGuaranteedToTransferExecutionToSuccessor(BB))
+ LVI->eraseBlock(BB);
+ return true;
+}
+
+/// Update the SSA form. NewBB contains instructions that are copied from BB.
+/// ValueMapping maps old values in BB to new ones in NewBB.
+void JumpThreadingPass::UpdateSSA(
+ BasicBlock *BB, BasicBlock *NewBB,
+ DenseMap<Instruction *, Value *> &ValueMapping) {
+ // If there were values defined in BB that are used outside the block, then we
+ // now have to update all uses of the value to use either the original value,
+ // the cloned value, or some PHI derived value. This can require arbitrary
+ // PHI insertion, of which we are prepared to do, clean these up now.
+ SSAUpdater SSAUpdate;
+ SmallVector<Use *, 16> UsesToRename;
+
+ for (Instruction &I : *BB) {
+ // Scan all uses of this instruction to see if it is used outside of its
+ // block, and if so, record them in UsesToRename.
+ for (Use &U : I.uses()) {
+ Instruction *User = cast<Instruction>(U.getUser());
+ if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
+ if (UserPN->getIncomingBlock(U) == BB)
+ continue;
+ } else if (User->getParent() == BB)
+ continue;
+
+ UsesToRename.push_back(&U);
+ }
+
+ // If there are no uses outside the block, we're done with this instruction.
+ if (UsesToRename.empty())
+ continue;
+ LLVM_DEBUG(dbgs() << "JT: Renaming non-local uses of: " << I << "\n");
+
+ // We found a use of I outside of BB. Rename all uses of I that are outside
+ // its block to be uses of the appropriate PHI node etc. See ValuesInBlocks
+ // with the two values we know.
+ SSAUpdate.Initialize(I.getType(), I.getName());
+ SSAUpdate.AddAvailableValue(BB, &I);
+ SSAUpdate.AddAvailableValue(NewBB, ValueMapping[&I]);
+
+ while (!UsesToRename.empty())
+ SSAUpdate.RewriteUse(*UsesToRename.pop_back_val());
+ LLVM_DEBUG(dbgs() << "\n");
+ }
+}
+
+/// Clone instructions in range [BI, BE) to NewBB. For PHI nodes, we only clone
+/// arguments that come from PredBB. Return the map from the variables in the
+/// source basic block to the variables in the newly created basic block.
+DenseMap<Instruction *, Value *>
+JumpThreadingPass::CloneInstructions(BasicBlock::iterator BI,
+ BasicBlock::iterator BE, BasicBlock *NewBB,
+ BasicBlock *PredBB) {
+ // We are going to have to map operands from the source basic block to the new
+ // copy of the block 'NewBB'. If there are PHI nodes in the source basic
+ // block, evaluate them to account for entry from PredBB.
+ DenseMap<Instruction *, Value *> ValueMapping;
+
+ // Clone the phi nodes of the source basic block into NewBB. The resulting
+ // phi nodes are trivial since NewBB only has one predecessor, but SSAUpdater
+ // might need to rewrite the operand of the cloned phi.
+ for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI) {
+ PHINode *NewPN = PHINode::Create(PN->getType(), 1, PN->getName(), NewBB);
+ NewPN->addIncoming(PN->getIncomingValueForBlock(PredBB), PredBB);
+ ValueMapping[PN] = NewPN;
+ }
+
+ // Clone the non-phi instructions of the source basic block into NewBB,
+ // keeping track of the mapping and using it to remap operands in the cloned
+ // instructions.
+ for (; BI != BE; ++BI) {
+ Instruction *New = BI->clone();
+ New->setName(BI->getName());
+ NewBB->getInstList().push_back(New);
+ ValueMapping[&*BI] = New;
+
+ // Remap operands to patch up intra-block references.
+ for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
+ if (Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i))) {
+ DenseMap<Instruction *, Value *>::iterator I = ValueMapping.find(Inst);
+ if (I != ValueMapping.end())
+ New->setOperand(i, I->second);
+ }
+ }
+
+ return ValueMapping;
+}
+
+/// TryThreadEdge - Thread an edge if it's safe and profitable to do so.
+bool JumpThreadingPass::TryThreadEdge(
+ BasicBlock *BB, const SmallVectorImpl<BasicBlock *> &PredBBs,
+ BasicBlock *SuccBB) {
// If threading to the same block as we come from, we would infinite loop.
if (SuccBB == BB) {
LLVM_DEBUG(dbgs() << " Not threading across BB '" << BB->getName()
@@ -1955,6 +2048,21 @@ bool JumpThreadingPass::ThreadEdge(BasicBlock *BB,
return false;
}
+ ThreadEdge(BB, PredBBs, SuccBB);
+ return true;
+}
+
+/// ThreadEdge - We have decided that it is safe and profitable to factor the
+/// blocks in PredBBs to one predecessor, then thread an edge from it to SuccBB
+/// across BB. Transform the IR to reflect this change.
+void JumpThreadingPass::ThreadEdge(BasicBlock *BB,
+ const SmallVectorImpl<BasicBlock *> &PredBBs,
+ BasicBlock *SuccBB) {
+ assert(SuccBB != BB && "Don't create an infinite loop");
+
+ assert(!LoopHeaders.count(BB) && !LoopHeaders.count(SuccBB) &&
+ "Don't thread across loop headers");
+
// And finally, do it! Start by factoring the predecessors if needed.
BasicBlock *PredBB;
if (PredBBs.size() == 1)
@@ -1968,7 +2076,6 @@ bool JumpThreadingPass::ThreadEdge(BasicBlock *BB,
// And finally, do it!
LLVM_DEBUG(dbgs() << " Threading edge from '" << PredBB->getName()
<< "' to '" << SuccBB->getName()
- << "' with cost: " << JumpThreadCost
<< ", across block:\n " << *BB << "\n");
if (DTU->hasPendingDomTreeUpdates())
@@ -1977,11 +2084,6 @@ bool JumpThreadingPass::ThreadEdge(BasicBlock *BB,
LVI->enableDT();
LVI->threadEdge(PredBB, BB, SuccBB);
- // We are going to have to map operands from the original BB block to the new
- // copy of the block 'NewBB'. If there are PHI nodes in BB, evaluate them to
- // account for entry from PredBB.
- DenseMap<Instruction*, Value*> ValueMapping;
-
BasicBlock *NewBB = BasicBlock::Create(BB->getContext(),
BB->getName()+".thread",
BB->getParent(), BB);
@@ -1994,32 +2096,9 @@ bool JumpThreadingPass::ThreadEdge(BasicBlock *BB,
BFI->setBlockFreq(NewBB, NewBBFreq.getFrequency());
}
- BasicBlock::iterator BI = BB->begin();
- // Clone the phi nodes of BB into NewBB. The resulting phi nodes are trivial,
- // since NewBB only has one predecessor, but SSAUpdater might need to rewrite
- // the operand of the cloned phi.
- for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI) {
- PHINode *NewPN = PHINode::Create(PN->getType(), 1, PN->getName(), NewBB);
- NewPN->addIncoming(PN->getIncomingValueForBlock(PredBB), PredBB);
- ValueMapping[PN] = NewPN;
- }
-
- // Clone the non-phi instructions of BB into NewBB, keeping track of the
- // mapping and using it to remap operands in the cloned instructions.
- for (; !BI->isTerminator(); ++BI) {
- Instruction *New = BI->clone();
- New->setName(BI->getName());
- NewBB->getInstList().push_back(New);
- ValueMapping[&*BI] = New;
-
- // Remap operands to patch up intra-block references.
- for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
- if (Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i))) {
- DenseMap<Instruction*, Value*>::iterator I = ValueMapping.find(Inst);
- if (I != ValueMapping.end())
- New->setOperand(i, I->second);
- }
- }
+ // Copy all the instructions from BB to NewBB except the terminator.
+ DenseMap<Instruction *, Value *> ValueMapping =
+ CloneInstructions(BB->begin(), std::prev(BB->end()), NewBB, PredBB);
// We didn't copy the terminator from BB over to NewBB, because there is now
// an unconditional jump to SuccBB. Insert the unconditional jump.
@@ -2045,44 +2124,7 @@ bool JumpThreadingPass::ThreadEdge(BasicBlock *BB,
{DominatorTree::Insert, PredBB, NewBB},
{DominatorTree::Delete, PredBB, BB}});
- // If there were values defined in BB that are used outside the block, then we
- // now have to update all uses of the value to use either the original value,
- // the cloned value, or some PHI derived value. This can require arbitrary
- // PHI insertion, of which we are prepared to do, clean these up now.
- SSAUpdater SSAUpdate;
- SmallVector<Use*, 16> UsesToRename;
-
- for (Instruction &I : *BB) {
- // Scan all uses of this instruction to see if their uses are no longer
- // dominated by the previous def and if so, record them in UsesToRename.
- // Also, skip phi operands from PredBB - we'll remove them anyway.
- for (Use &U : I.uses()) {
- Instruction *User = cast<Instruction>(U.getUser());
- if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
- if (UserPN->getIncomingBlock(U) == BB)
- continue;
- } else if (User->getParent() == BB)
- continue;
-
- UsesToRename.push_back(&U);
- }
-
- // If there are no uses outside the block, we're done with this instruction.
- if (UsesToRename.empty())
- continue;
- LLVM_DEBUG(dbgs() << "JT: Renaming non-local uses of: " << I << "\n");
-
- // We found a use of I outside of BB. Rename all uses of I that are outside
- // its block to be uses of the appropriate PHI node etc. See ValuesInBlocks
- // with the two values we know.
- SSAUpdate.Initialize(I.getType(), I.getName());
- SSAUpdate.AddAvailableValue(BB, &I);
- SSAUpdate.AddAvailableValue(NewBB, ValueMapping[&I]);
-
- while (!UsesToRename.empty())
- SSAUpdate.RewriteUse(*UsesToRename.pop_back_val());
- LLVM_DEBUG(dbgs() << "\n");
- }
+ UpdateSSA(BB, NewBB, ValueMapping);
// At this point, the IR is fully up to date and consistent. Do a quick scan
// over the new instructions and zap any that are constants or dead. This
@@ -2094,7 +2136,6 @@ bool JumpThreadingPass::ThreadEdge(BasicBlock *BB,
// Threaded an edge!
++NumThreads;
- return true;
}
/// Create a new basic block that will be the predecessor of BB and successor of
@@ -2366,43 +2407,7 @@ bool JumpThreadingPass::DuplicateCondBranchOnPHIIntoPred(
AddPHINodeEntriesForMappedBlock(BBBranch->getSuccessor(1), BB, PredBB,
ValueMapping);
- // If there were values defined in BB that are used outside the block, then we
- // now have to update all uses of the value to use either the original value,
- // the cloned value, or some PHI derived value. This can require arbitrary
- // PHI insertion, of which we are prepared to do, clean these up now.
- SSAUpdater SSAUpdate;
- SmallVector<Use*, 16> UsesToRename;
- for (Instruction &I : *BB) {
- // Scan all uses of this instruction to see if it is used outside of its
- // block, and if so, record them in UsesToRename.
- for (Use &U : I.uses()) {
- Instruction *User = cast<Instruction>(U.getUser());
- if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
- if (UserPN->getIncomingBlock(U) == BB)
- continue;
- } else if (User->getParent() == BB)
- continue;
-
- UsesToRename.push_back(&U);
- }
-
- // If there are no uses outside the block, we're done with this instruction.
- if (UsesToRename.empty())
- continue;
-
- LLVM_DEBUG(dbgs() << "JT: Renaming non-local uses of: " << I << "\n");
-
- // We found a use of I outside of BB. Rename all uses of I that are outside
- // its block to be uses of the appropriate PHI node etc. See ValuesInBlocks
- // with the two values we know.
- SSAUpdate.Initialize(I.getType(), I.getName());
- SSAUpdate.AddAvailableValue(BB, &I);
- SSAUpdate.AddAvailableValue(PredBB, ValueMapping[&I]);
-
- while (!UsesToRename.empty())
- SSAUpdate.RewriteUse(*UsesToRename.pop_back_val());
- LLVM_DEBUG(dbgs() << "\n");
- }
+ UpdateSSA(BB, PredBB, ValueMapping);
// PredBB no longer jumps to BB, remove entries in the PHI node for the edge
// that we nuked.
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LICM.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LICM.cpp
index 6ce4831a7359..8c33045c2380 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -63,6 +63,7 @@
#include "llvm/IR/Metadata.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/PredIteratorCache.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -137,7 +138,8 @@ static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
TargetTransformInfo *TTI, bool &FreeInLoop);
static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo,
- MemorySSAUpdater *MSSAU, OptimizationRemarkEmitter *ORE);
+ MemorySSAUpdater *MSSAU, ScalarEvolution *SE,
+ OptimizationRemarkEmitter *ORE);
static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
const Loop *CurLoop, ICFLoopSafetyInfo *SafetyInfo,
MemorySSAUpdater *MSSAU, OptimizationRemarkEmitter *ORE);
@@ -162,7 +164,7 @@ static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo,
static void moveInstructionBefore(Instruction &I, Instruction &Dest,
ICFLoopSafetyInfo &SafetyInfo,
- MemorySSAUpdater *MSSAU);
+ MemorySSAUpdater *MSSAU, ScalarEvolution *SE);
namespace {
struct LoopInvariantCodeMotion {
@@ -390,8 +392,9 @@ bool LoopInvariantCodeMotion::runOnLoop(
CurAST.get(), MSSAU.get(), &SafetyInfo, Flags, ORE);
Flags.IsSink = false;
if (Preheader)
- Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, L,
- CurAST.get(), MSSAU.get(), &SafetyInfo, Flags, ORE);
+ Changed |=
+ hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, L,
+ CurAST.get(), MSSAU.get(), SE, &SafetyInfo, Flags, ORE);
// Now that all loop invariants have been removed from the loop, promote any
// memory references to scalars that we can.
@@ -787,6 +790,41 @@ public:
};
} // namespace
+
+/// Return true if we know how to rewrite all uses of the given alloca after
+/// hoisting it out of the loop. The main concerns are a) potential captures
+/// and b) invariant.start markers which don't capture, but are no longer
+/// valid w/o a corresponding invariant.end.
+static bool canRewriteUsesOfAlloca(AllocaInst &AI) {
+ // TODO: This looks a lot like capture tracking, but we need to remove any
+ // invariant starts if we extend the lifetime of the alloca by hoisting it.
+ // We should probably refactor capture tracking into a form which allows us
+ // to reuse the relevant bits and remove the duplicated logic here.
+
+ SmallVector<Use *, 16> Worklist;
+ for (Use &U : AI.uses())
+ Worklist.push_back(&U);
+
+ unsigned NumUsesExplored = 0;
+ while (!Worklist.empty()) {
+ Use *U = Worklist.pop_back_val();
+ Instruction *I = cast<Instruction>(U->getUser());
+ NumUsesExplored++;
+ if (NumUsesExplored > DefaultMaxUsesToExplore)
+ return false;
+ // Non capturing, terminating uses
+ if (isa<LoadInst>(I) ||
+ (isa<StoreInst>(I) && U->getOperandNo() == 1))
+ continue;
+ // Non capturing, non-terminating
+ if (!isa<BitCastInst>(I) && !isa<GetElementPtrInst>(I))
+ return false;
+ for (Use &U : I->uses())
+ Worklist.push_back(&U);
+ }
+ return true;
+}
+
/// Walk the specified region of the CFG (defined by all blocks dominated by
/// the specified block, and that are in the current loop) in depth first
/// order w.r.t the DominatorTree. This allows us to visit definitions before
@@ -795,7 +833,7 @@ public:
bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
DominatorTree *DT, TargetLibraryInfo *TLI, Loop *CurLoop,
AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU,
- ICFLoopSafetyInfo *SafetyInfo,
+ ScalarEvolution *SE, ICFLoopSafetyInfo *SafetyInfo,
SinkAndHoistLICMFlags &Flags,
OptimizationRemarkEmitter *ORE) {
// Verify inputs.
@@ -855,7 +893,7 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
I, DT, CurLoop, SafetyInfo, ORE,
CurLoop->getLoopPreheader()->getTerminator())) {
hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
- MSSAU, ORE);
+ MSSAU, SE, ORE);
HoistedInstructions.push_back(&I);
Changed = true;
continue;
@@ -882,7 +920,7 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
eraseInstruction(I, *SafetyInfo, CurAST, MSSAU);
hoist(*ReciprocalDivisor, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB),
- SafetyInfo, MSSAU, ORE);
+ SafetyInfo, MSSAU, SE, ORE);
HoistedInstructions.push_back(ReciprocalDivisor);
Changed = true;
continue;
@@ -901,7 +939,17 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
CurLoop->hasLoopInvariantOperands(&I) &&
MustExecuteWithoutWritesBefore(I)) {
hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
- MSSAU, ORE);
+ MSSAU, SE, ORE);
+ HoistedInstructions.push_back(&I);
+ Changed = true;
+ continue;
+ }
+
+ if (isa<AllocaInst>(&I) &&
+ SafetyInfo->isGuaranteedToExecute(I, DT, CurLoop) &&
+ canRewriteUsesOfAlloca(cast<AllocaInst>(I))) {
+ hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
+ MSSAU, SE, ORE);
HoistedInstructions.push_back(&I);
Changed = true;
continue;
@@ -915,7 +963,7 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
PN->setIncomingBlock(
i, CFH.getOrCreateHoistedBlock(PN->getIncomingBlock(i)));
hoist(*PN, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
- MSSAU, ORE);
+ MSSAU, SE, ORE);
assert(DT->dominates(PN, BB) && "Conditional PHIs not expected");
Changed = true;
continue;
@@ -952,7 +1000,7 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
LLVM_DEBUG(dbgs() << "LICM rehoisting to "
<< HoistPoint->getParent()->getName()
<< ": " << *I << "\n");
- moveInstructionBefore(*I, *HoistPoint, *SafetyInfo, MSSAU);
+ moveInstructionBefore(*I, *HoistPoint, *SafetyInfo, MSSAU, SE);
HoistPoint = I;
Changed = true;
}
@@ -1142,6 +1190,10 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
// Assumes don't actually alias anything or throw
return true;
+ if (match(CI, m_Intrinsic<Intrinsic::experimental_widenable_condition>()))
+ // Widenable conditions don't actually alias anything or throw
+ return true;
+
// Handle simple cases by querying alias analysis.
FunctionModRefBehavior Behavior = AA->getModRefBehavior(CI);
if (Behavior == FMRB_DoesNotAccessMemory)
@@ -1441,14 +1493,18 @@ static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo,
static void moveInstructionBefore(Instruction &I, Instruction &Dest,
ICFLoopSafetyInfo &SafetyInfo,
- MemorySSAUpdater *MSSAU) {
+ MemorySSAUpdater *MSSAU,
+ ScalarEvolution *SE) {
SafetyInfo.removeInstruction(&I);
SafetyInfo.insertInstructionTo(&I, Dest.getParent());
I.moveBefore(&Dest);
if (MSSAU)
if (MemoryUseOrDef *OldMemAcc = cast_or_null<MemoryUseOrDef>(
MSSAU->getMemorySSA()->getMemoryAccess(&I)))
- MSSAU->moveToPlace(OldMemAcc, Dest.getParent(), MemorySSA::End);
+ MSSAU->moveToPlace(OldMemAcc, Dest.getParent(),
+ MemorySSA::BeforeTerminator);
+ if (SE)
+ SE->forgetValue(&I);
}
static Instruction *sinkThroughTriviallyReplaceablePHI(
@@ -1662,7 +1718,8 @@ static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
///
static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo,
- MemorySSAUpdater *MSSAU, OptimizationRemarkEmitter *ORE) {
+ MemorySSAUpdater *MSSAU, ScalarEvolution *SE,
+ OptimizationRemarkEmitter *ORE) {
LLVM_DEBUG(dbgs() << "LICM hoisting to " << Dest->getName() << ": " << I
<< "\n");
ORE->emit([&]() {
@@ -1683,10 +1740,10 @@ static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
if (isa<PHINode>(I))
// Move the new node to the end of the phi list in the destination block.
- moveInstructionBefore(I, *Dest->getFirstNonPHI(), *SafetyInfo, MSSAU);
+ moveInstructionBefore(I, *Dest->getFirstNonPHI(), *SafetyInfo, MSSAU, SE);
else
// Move the new node to the destination block, before its terminator.
- moveInstructionBefore(I, *Dest->getTerminator(), *SafetyInfo, MSSAU);
+ moveInstructionBefore(I, *Dest->getTerminator(), *SafetyInfo, MSSAU, SE);
// Apply line 0 debug locations when we are moving instructions to different
// basic blocks because we want to avoid jumpy line tables.
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp
index a972d6fa2fcd..ab65f56d088f 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp
@@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Scalar/LoopDataPrefetch.h"
+#include "llvm/InitializePasses.h"
#define DEBUG_TYPE "loop-data-prefetch"
#include "llvm/ADT/DepthFirstIterator.h"
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopDeletion.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopDeletion.cpp
index cee197cf8354..2451572d6171 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopDeletion.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopDeletion.cpp
@@ -20,6 +20,7 @@
#include "llvm/Analysis/LoopPass.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/PatternMatch.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/LoopPassManager.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopDistribute.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
index f45e5fd0f50b..8e04e6e0ffe8 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
@@ -55,6 +55,7 @@
#include "llvm/IR/Metadata.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopFuse.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopFuse.cpp
index 9f93c68e6128..e1738f08eb23 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopFuse.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopFuse.cpp
@@ -55,12 +55,15 @@
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Verifier.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/CodeMoverUtils.h"
using namespace llvm;
@@ -88,6 +91,7 @@ STATISTIC(FusionNotBeneficial, "Fusion is not beneficial");
STATISTIC(NonIdenticalGuards, "Candidates have different guards");
STATISTIC(NonEmptyExitBlock, "Candidate has a non-empty exit block");
STATISTIC(NonEmptyGuardBlock, "Candidate has a non-empty guard block");
+STATISTIC(NotRotated, "Candidate is not rotated");
enum FusionDependenceAnalysisChoice {
FUSION_DEPENDENCE_ANALYSIS_SCEV,
@@ -163,14 +167,8 @@ struct FusionCandidate {
const PostDominatorTree *PDT, OptimizationRemarkEmitter &ORE)
: Preheader(L->getLoopPreheader()), Header(L->getHeader()),
ExitingBlock(L->getExitingBlock()), ExitBlock(L->getExitBlock()),
- Latch(L->getLoopLatch()), L(L), Valid(true), GuardBranch(nullptr),
- DT(DT), PDT(PDT), ORE(ORE) {
-
- // TODO: This is temporary while we fuse both rotated and non-rotated
- // loops. Once we switch to only fusing rotated loops, the initialization of
- // GuardBranch can be moved into the initialization list above.
- if (isRotated())
- GuardBranch = L->getLoopGuardBranch();
+ Latch(L->getLoopLatch()), L(L), Valid(true),
+ GuardBranch(L->getLoopGuardBranch()), DT(DT), PDT(PDT), ORE(ORE) {
// Walk over all blocks in the loop and check for conditions that may
// prevent fusion. For each block, walk over all instructions and collect
@@ -257,15 +255,14 @@ struct FusionCandidate {
: GuardBranch->getSuccessor(0);
}
- bool isRotated() const {
- assert(L && "Expecting loop to be valid.");
- assert(Latch && "Expecting latch to be valid.");
- return L->isLoopExiting(Latch);
- }
-
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void dump() const {
- dbgs() << "\tGuardBranch: "
+ dbgs() << "\tGuardBranch: ";
+ if (GuardBranch)
+ dbgs() << *GuardBranch;
+ else
+ dbgs() << "nullptr";
+ dbgs() << "\n"
<< (GuardBranch ? GuardBranch->getName() : "nullptr") << "\n"
<< "\tPreheader: " << (Preheader ? Preheader->getName() : "nullptr")
<< "\n"
@@ -316,6 +313,11 @@ struct FusionCandidate {
return reportInvalidCandidate(NotSimplifiedForm);
}
+ if (!L->isRotatedForm()) {
+ LLVM_DEBUG(dbgs() << "Loop " << L->getName() << " is not rotated!\n");
+ return reportInvalidCandidate(NotRotated);
+ }
+
return true;
}
@@ -591,16 +593,8 @@ private:
const FusionCandidate &FC1) const {
assert(FC0.Preheader && FC1.Preheader && "Expecting valid preheaders");
- BasicBlock *FC0EntryBlock = FC0.getEntryBlock();
- BasicBlock *FC1EntryBlock = FC1.getEntryBlock();
-
- if (DT.dominates(FC0EntryBlock, FC1EntryBlock))
- return PDT.dominates(FC1EntryBlock, FC0EntryBlock);
-
- if (DT.dominates(FC1EntryBlock, FC0EntryBlock))
- return PDT.dominates(FC0EntryBlock, FC1EntryBlock);
-
- return false;
+ return ::isControlFlowEquivalent(*FC0.getEntryBlock(), *FC1.getEntryBlock(),
+ DT, PDT);
}
/// Iterate over all loops in the given loop set and identify the loops that
@@ -1113,6 +1107,29 @@ private:
return FC.ExitBlock->size() == 1;
}
+ /// Simplify the condition of the latch branch of \p FC to true, when both of
+ /// its successors are the same.
+ void simplifyLatchBranch(const FusionCandidate &FC) const {
+ BranchInst *FCLatchBranch = dyn_cast<BranchInst>(FC.Latch->getTerminator());
+ if (FCLatchBranch) {
+ assert(FCLatchBranch->isConditional() &&
+ FCLatchBranch->getSuccessor(0) == FCLatchBranch->getSuccessor(1) &&
+ "Expecting the two successors of FCLatchBranch to be the same");
+ FCLatchBranch->setCondition(
+ llvm::ConstantInt::getTrue(FCLatchBranch->getCondition()->getType()));
+ }
+ }
+
+ /// Move instructions from FC0.Latch to FC1.Latch. If FC0.Latch has an unique
+ /// successor, then merge FC0.Latch with its unique successor.
+ void mergeLatch(const FusionCandidate &FC0, const FusionCandidate &FC1) {
+ moveInstsBottomUp(*FC0.Latch, *FC1.Latch, DT, PDT, DI);
+ if (BasicBlock *Succ = FC0.Latch->getUniqueSuccessor()) {
+ MergeBlockIntoPredecessor(Succ, &DTU, &LI);
+ DTU.flush();
+ }
+ }
+
/// Fuse two fusion candidates, creating a new fused loop.
///
/// This method contains the mechanics of fusing two loops, represented by \p
@@ -1246,6 +1263,10 @@ private:
FC0.Latch->getTerminator()->replaceUsesOfWith(FC0.Header, FC1.Header);
FC1.Latch->getTerminator()->replaceUsesOfWith(FC1.Header, FC0.Header);
+ // Change the condition of FC0 latch branch to true, as both successors of
+ // the branch are the same.
+ simplifyLatchBranch(FC0);
+
// If FC0.Latch and FC0.ExitingBlock are the same then we have already
// performed the updates above.
if (FC0.Latch != FC0.ExitingBlock)
@@ -1268,9 +1289,15 @@ private:
// Is there a way to keep SE up-to-date so we don't need to forget the loops
// and rebuild the information in subsequent passes of fusion?
+ // Note: Need to forget the loops before merging the loop latches, as
+ // mergeLatch may remove the only block in FC1.
SE.forgetLoop(FC1.L);
SE.forgetLoop(FC0.L);
+ // Move instructions from FC0.Latch to FC1.Latch.
+ // Note: mergeLatch requires an updated DT.
+ mergeLatch(FC0, FC1);
+
// Merge the loops.
SmallVector<BasicBlock *, 8> Blocks(FC1.L->block_begin(),
FC1.L->block_end());
@@ -1490,6 +1517,10 @@ private:
FC0.Latch->getTerminator()->replaceUsesOfWith(FC0.Header, FC1.Header);
FC1.Latch->getTerminator()->replaceUsesOfWith(FC1.Header, FC0.Header);
+ // Change the condition of FC0 latch branch to true, as both successors of
+ // the branch are the same.
+ simplifyLatchBranch(FC0);
+
// If FC0.Latch and FC0.ExitingBlock are the same then we have already
// performed the updates above.
if (FC0.Latch != FC0.ExitingBlock)
@@ -1521,9 +1552,15 @@ private:
// Is there a way to keep SE up-to-date so we don't need to forget the loops
// and rebuild the information in subsequent passes of fusion?
+ // Note: Need to forget the loops before merging the loop latches, as
+ // mergeLatch may remove the only block in FC1.
SE.forgetLoop(FC1.L);
SE.forgetLoop(FC0.L);
+ // Move instructions from FC0.Latch to FC1.Latch.
+ // Note: mergeLatch requires an updated DT.
+ mergeLatch(FC0, FC1);
+
// Merge the loops.
SmallVector<BasicBlock *, 8> Blocks(FC1.L->block_begin(),
FC1.L->block_end());
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index dd477e800693..b77843d7cd71 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -41,7 +41,6 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/MapVector.h"
-#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
@@ -78,20 +77,17 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
-#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
-#include "llvm/IR/Verifier.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Scalar/LoopPassManager.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/BuildLibCalls.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
@@ -107,7 +103,6 @@ using namespace llvm;
STATISTIC(NumMemSet, "Number of memset's formed from loop stores");
STATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores");
-STATISTIC(NumBCmp, "Number of memcmp's formed from loop 2xload+eq-compare");
static cl::opt<bool> UseLIRCodeSizeHeurs(
"use-lir-code-size-heurs",
@@ -117,26 +112,6 @@ static cl::opt<bool> UseLIRCodeSizeHeurs(
namespace {
-// FIXME: reinventing the wheel much? Is there a cleaner solution?
-struct PMAbstraction {
- virtual void markLoopAsDeleted(Loop *L) = 0;
- virtual ~PMAbstraction() = default;
-};
-struct LegacyPMAbstraction : PMAbstraction {
- LPPassManager &LPM;
- LegacyPMAbstraction(LPPassManager &LPM) : LPM(LPM) {}
- virtual ~LegacyPMAbstraction() = default;
- void markLoopAsDeleted(Loop *L) override { LPM.markLoopAsDeleted(*L); }
-};
-struct NewPMAbstraction : PMAbstraction {
- LPMUpdater &Updater;
- NewPMAbstraction(LPMUpdater &Updater) : Updater(Updater) {}
- virtual ~NewPMAbstraction() = default;
- void markLoopAsDeleted(Loop *L) override {
- Updater.markLoopAsDeleted(*L, L->getName());
- }
-};
-
class LoopIdiomRecognize {
Loop *CurLoop = nullptr;
AliasAnalysis *AA;
@@ -146,7 +121,6 @@ class LoopIdiomRecognize {
TargetLibraryInfo *TLI;
const TargetTransformInfo *TTI;
const DataLayout *DL;
- PMAbstraction &LoopDeleter;
OptimizationRemarkEmitter &ORE;
bool ApplyCodeSizeHeuristics;
@@ -155,10 +129,9 @@ public:
LoopInfo *LI, ScalarEvolution *SE,
TargetLibraryInfo *TLI,
const TargetTransformInfo *TTI,
- const DataLayout *DL, PMAbstraction &LoopDeleter,
+ const DataLayout *DL,
OptimizationRemarkEmitter &ORE)
- : AA(AA), DT(DT), LI(LI), SE(SE), TLI(TLI), TTI(TTI), DL(DL),
- LoopDeleter(LoopDeleter), ORE(ORE) {}
+ : AA(AA), DT(DT), LI(LI), SE(SE), TLI(TLI), TTI(TTI), DL(DL), ORE(ORE) {}
bool runOnLoop(Loop *L);
@@ -172,8 +145,6 @@ private:
bool HasMemset;
bool HasMemsetPattern;
bool HasMemcpy;
- bool HasMemCmp;
- bool HasBCmp;
/// Return code for isLegalStore()
enum LegalStoreKind {
@@ -201,7 +172,7 @@ private:
bool processLoopMemSet(MemSetInst *MSI, const SCEV *BECount);
bool processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
- unsigned StoreAlignment, Value *StoredVal,
+ MaybeAlign StoreAlignment, Value *StoredVal,
Instruction *TheStore,
SmallPtrSetImpl<Instruction *> &Stores,
const SCEVAddRecExpr *Ev, const SCEV *BECount,
@@ -216,32 +187,6 @@ private:
bool runOnNoncountableLoop();
- struct CmpLoopStructure {
- Value *BCmpValue, *LatchCmpValue;
- BasicBlock *HeaderBrEqualBB, *HeaderBrUnequalBB;
- BasicBlock *LatchBrFinishBB, *LatchBrContinueBB;
- };
- bool matchBCmpLoopStructure(CmpLoopStructure &CmpLoop) const;
- struct CmpOfLoads {
- ICmpInst::Predicate BCmpPred;
- Value *LoadSrcA, *LoadSrcB;
- Value *LoadA, *LoadB;
- };
- bool matchBCmpOfLoads(Value *BCmpValue, CmpOfLoads &CmpOfLoads) const;
- bool recognizeBCmpLoopControlFlow(const CmpOfLoads &CmpOfLoads,
- CmpLoopStructure &CmpLoop) const;
- bool recognizeBCmpLoopSCEV(uint64_t BCmpTyBytes, CmpOfLoads &CmpOfLoads,
- const SCEV *&SrcA, const SCEV *&SrcB,
- const SCEV *&Iterations) const;
- bool detectBCmpIdiom(ICmpInst *&BCmpInst, CmpInst *&LatchCmpInst,
- LoadInst *&LoadA, LoadInst *&LoadB, const SCEV *&SrcA,
- const SCEV *&SrcB, const SCEV *&NBytes) const;
- BasicBlock *transformBCmpControlFlow(ICmpInst *ComparedEqual);
- void transformLoopToBCmp(ICmpInst *BCmpInst, CmpInst *LatchCmpInst,
- LoadInst *LoadA, LoadInst *LoadB, const SCEV *SrcA,
- const SCEV *SrcB, const SCEV *NBytes);
- bool recognizeBCmp();
-
bool recognizePopcount();
void transformLoopToPopcount(BasicBlock *PreCondBB, Instruction *CntInst,
PHINode *CntPhi, Value *Var);
@@ -279,14 +224,13 @@ public:
&getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
*L->getHeader()->getParent());
const DataLayout *DL = &L->getHeader()->getModule()->getDataLayout();
- LegacyPMAbstraction LoopDeleter(LPM);
// For the old PM, we can't use OptimizationRemarkEmitter as an analysis
// pass. Function analyses need to be preserved across loop transformations
// but ORE cannot be preserved (see comment before the pass definition).
OptimizationRemarkEmitter ORE(L->getHeader()->getParent());
- LoopIdiomRecognize LIR(AA, DT, LI, SE, TLI, TTI, DL, LoopDeleter, ORE);
+ LoopIdiomRecognize LIR(AA, DT, LI, SE, TLI, TTI, DL, ORE);
return LIR.runOnLoop(L);
}
@@ -305,7 +249,7 @@ char LoopIdiomRecognizeLegacyPass::ID = 0;
PreservedAnalyses LoopIdiomRecognizePass::run(Loop &L, LoopAnalysisManager &AM,
LoopStandardAnalysisResults &AR,
- LPMUpdater &Updater) {
+ LPMUpdater &) {
const auto *DL = &L.getHeader()->getModule()->getDataLayout();
const auto &FAM =
@@ -319,9 +263,8 @@ PreservedAnalyses LoopIdiomRecognizePass::run(Loop &L, LoopAnalysisManager &AM,
"LoopIdiomRecognizePass: OptimizationRemarkEmitterAnalysis not cached "
"at a higher level");
- NewPMAbstraction LoopDeleter(Updater);
LoopIdiomRecognize LIR(&AR.AA, &AR.DT, &AR.LI, &AR.SE, &AR.TLI, &AR.TTI, DL,
- LoopDeleter, *ORE);
+ *ORE);
if (!LIR.runOnLoop(&L))
return PreservedAnalyses::all();
@@ -358,8 +301,7 @@ bool LoopIdiomRecognize::runOnLoop(Loop *L) {
// Disable loop idiom recognition if the function's name is a common idiom.
StringRef Name = L->getHeader()->getParent()->getName();
- if (Name == "memset" || Name == "memcpy" || Name == "memcmp" ||
- Name == "bcmp")
+ if (Name == "memset" || Name == "memcpy")
return false;
// Determine if code size heuristics need to be applied.
@@ -369,10 +311,8 @@ bool LoopIdiomRecognize::runOnLoop(Loop *L) {
HasMemset = TLI->has(LibFunc_memset);
HasMemsetPattern = TLI->has(LibFunc_memset_pattern16);
HasMemcpy = TLI->has(LibFunc_memcpy);
- HasMemCmp = TLI->has(LibFunc_memcmp);
- HasBCmp = TLI->has(LibFunc_bcmp);
- if (HasMemset || HasMemsetPattern || HasMemcpy || HasMemCmp || HasBCmp)
+ if (HasMemset || HasMemsetPattern || HasMemcpy)
if (SE->hasLoopInvariantBackedgeTakenCount(L))
return runOnCountableLoop();
@@ -791,7 +731,8 @@ bool LoopIdiomRecognize::processLoopStores(SmallVectorImpl<StoreInst *> &SL,
bool NegStride = StoreSize == -Stride;
- if (processLoopStridedStore(StorePtr, StoreSize, HeadStore->getAlignment(),
+ if (processLoopStridedStore(StorePtr, StoreSize,
+ MaybeAlign(HeadStore->getAlignment()),
StoredVal, HeadStore, AdjacentStores, StoreEv,
BECount, NegStride)) {
TransformedStores.insert(AdjacentStores.begin(), AdjacentStores.end());
@@ -846,9 +787,9 @@ bool LoopIdiomRecognize::processLoopMemSet(MemSetInst *MSI,
SmallPtrSet<Instruction *, 1> MSIs;
MSIs.insert(MSI);
bool NegStride = SizeInBytes == -Stride;
- return processLoopStridedStore(Pointer, (unsigned)SizeInBytes,
- MSI->getDestAlignment(), SplatValue, MSI, MSIs,
- Ev, BECount, NegStride, /*IsLoopMemset=*/true);
+ return processLoopStridedStore(
+ Pointer, (unsigned)SizeInBytes, MaybeAlign(MSI->getDestAlignment()),
+ SplatValue, MSI, MSIs, Ev, BECount, NegStride, /*IsLoopMemset=*/true);
}
/// mayLoopAccessLocation - Return true if the specified loop might access the
@@ -938,7 +879,7 @@ static const SCEV *getNumBytes(const SCEV *BECount, Type *IntPtr,
/// processLoopStridedStore - We see a strided store of some value. If we can
/// transform this into a memset or memset_pattern in the loop preheader, do so.
bool LoopIdiomRecognize::processLoopStridedStore(
- Value *DestPtr, unsigned StoreSize, unsigned StoreAlignment,
+ Value *DestPtr, unsigned StoreSize, MaybeAlign StoreAlignment,
Value *StoredVal, Instruction *TheStore,
SmallPtrSetImpl<Instruction *> &Stores, const SCEVAddRecExpr *Ev,
const SCEV *BECount, bool NegStride, bool IsLoopMemset) {
@@ -960,12 +901,12 @@ bool LoopIdiomRecognize::processLoopStridedStore(
SCEVExpander Expander(*SE, *DL, "loop-idiom");
Type *DestInt8PtrTy = Builder.getInt8PtrTy(DestAS);
- Type *IntPtr = Builder.getIntPtrTy(*DL, DestAS);
+ Type *IntIdxTy = DL->getIndexType(DestPtr->getType());
const SCEV *Start = Ev->getStart();
// Handle negative strided loops.
if (NegStride)
- Start = getStartForNegStride(Start, BECount, IntPtr, StoreSize, SE);
+ Start = getStartForNegStride(Start, BECount, IntIdxTy, StoreSize, SE);
// TODO: ideally we should still be able to generate memset if SCEV expander
// is taught to generate the dependencies at the latest point.
@@ -993,7 +934,7 @@ bool LoopIdiomRecognize::processLoopStridedStore(
// Okay, everything looks good, insert the memset.
const SCEV *NumBytesS =
- getNumBytes(BECount, IntPtr, StoreSize, CurLoop, DL, SE);
+ getNumBytes(BECount, IntIdxTy, StoreSize, CurLoop, DL, SE);
// TODO: ideally we should still be able to generate memset if SCEV expander
// is taught to generate the dependencies at the latest point.
@@ -1001,12 +942,12 @@ bool LoopIdiomRecognize::processLoopStridedStore(
return false;
Value *NumBytes =
- Expander.expandCodeFor(NumBytesS, IntPtr, Preheader->getTerminator());
+ Expander.expandCodeFor(NumBytesS, IntIdxTy, Preheader->getTerminator());
CallInst *NewCall;
if (SplatValue) {
- NewCall =
- Builder.CreateMemSet(BasePtr, SplatValue, NumBytes, StoreAlignment);
+ NewCall = Builder.CreateMemSet(BasePtr, SplatValue, NumBytes,
+ MaybeAlign(StoreAlignment));
} else {
// Everything is emitted in default address space
Type *Int8PtrTy = DestInt8PtrTy;
@@ -1014,7 +955,7 @@ bool LoopIdiomRecognize::processLoopStridedStore(
Module *M = TheStore->getModule();
StringRef FuncName = "memset_pattern16";
FunctionCallee MSP = M->getOrInsertFunction(FuncName, Builder.getVoidTy(),
- Int8PtrTy, Int8PtrTy, IntPtr);
+ Int8PtrTy, Int8PtrTy, IntIdxTy);
inferLibFuncAttributes(M, FuncName, *TLI);
// Otherwise we should form a memset_pattern16. PatternValue is known to be
@@ -1081,11 +1022,11 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
const SCEV *StrStart = StoreEv->getStart();
unsigned StrAS = SI->getPointerAddressSpace();
- Type *IntPtrTy = Builder.getIntPtrTy(*DL, StrAS);
+ Type *IntIdxTy = Builder.getIntNTy(DL->getIndexSizeInBits(StrAS));
// Handle negative strided loops.
if (NegStride)
- StrStart = getStartForNegStride(StrStart, BECount, IntPtrTy, StoreSize, SE);
+ StrStart = getStartForNegStride(StrStart, BECount, IntIdxTy, StoreSize, SE);
// Okay, we have a strided store "p[i]" of a loaded value. We can turn
// this into a memcpy in the loop preheader now if we want. However, this
@@ -1111,7 +1052,7 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
// Handle negative strided loops.
if (NegStride)
- LdStart = getStartForNegStride(LdStart, BECount, IntPtrTy, StoreSize, SE);
+ LdStart = getStartForNegStride(LdStart, BECount, IntIdxTy, StoreSize, SE);
// For a memcpy, we have to make sure that the input array is not being
// mutated by the loop.
@@ -1133,18 +1074,18 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
// Okay, everything is safe, we can transform this!
const SCEV *NumBytesS =
- getNumBytes(BECount, IntPtrTy, StoreSize, CurLoop, DL, SE);
+ getNumBytes(BECount, IntIdxTy, StoreSize, CurLoop, DL, SE);
Value *NumBytes =
- Expander.expandCodeFor(NumBytesS, IntPtrTy, Preheader->getTerminator());
+ Expander.expandCodeFor(NumBytesS, IntIdxTy, Preheader->getTerminator());
CallInst *NewCall = nullptr;
// Check whether to generate an unordered atomic memcpy:
// If the load or store are atomic, then they must necessarily be unordered
// by previous checks.
if (!SI->isAtomic() && !LI->isAtomic())
- NewCall = Builder.CreateMemCpy(StoreBasePtr, SI->getAlignment(),
- LoadBasePtr, LI->getAlignment(), NumBytes);
+ NewCall = Builder.CreateMemCpy(StoreBasePtr, SI->getAlign(), LoadBasePtr,
+ LI->getAlign(), NumBytes);
else {
// We cannot allow unaligned ops for unordered load/store, so reject
// anything where the alignment isn't at least the element size.
@@ -1211,7 +1152,7 @@ bool LoopIdiomRecognize::runOnNoncountableLoop() {
<< "] Noncountable Loop %"
<< CurLoop->getHeader()->getName() << "\n");
- return recognizeBCmp() || recognizePopcount() || recognizeAndInsertFFS();
+ return recognizePopcount() || recognizeAndInsertFFS();
}
/// Check if the given conditional branch is based on the comparison between
@@ -1885,811 +1826,3 @@ void LoopIdiomRecognize::transformLoopToPopcount(BasicBlock *PreCondBB,
// loop. The loop would otherwise not be deleted even if it becomes empty.
SE->forgetLoop(CurLoop);
}
-
-bool LoopIdiomRecognize::matchBCmpLoopStructure(
- CmpLoopStructure &CmpLoop) const {
- ICmpInst::Predicate BCmpPred;
-
- // We are looking for the following basic layout:
- // PreheaderBB: <preheader> ; preds = ???
- // <...>
- // br label %LoopHeaderBB
- // LoopHeaderBB: <header,exiting> ; preds = %PreheaderBB,%LoopLatchBB
- // <...>
- // %BCmpValue = icmp <...>
- // br i1 %BCmpValue, label %LoopLatchBB, label %Successor0
- // LoopLatchBB: <latch,exiting> ; preds = %LoopHeaderBB
- // <...>
- // %LatchCmpValue = <are we done, or do next iteration?>
- // br i1 %LatchCmpValue, label %Successor1, label %LoopHeaderBB
- // Successor0: <exit> ; preds = %LoopHeaderBB
- // <...>
- // Successor1: <exit> ; preds = %LoopLatchBB
- // <...>
- //
- // Successor0 and Successor1 may or may not be the same basic block.
-
- // Match basic frame-work of this supposedly-comparison loop.
- using namespace PatternMatch;
- if (!match(CurLoop->getHeader()->getTerminator(),
- m_Br(m_CombineAnd(m_ICmp(BCmpPred, m_Value(), m_Value()),
- m_Value(CmpLoop.BCmpValue)),
- CmpLoop.HeaderBrEqualBB, CmpLoop.HeaderBrUnequalBB)) ||
- !match(CurLoop->getLoopLatch()->getTerminator(),
- m_Br(m_CombineAnd(m_Cmp(), m_Value(CmpLoop.LatchCmpValue)),
- CmpLoop.LatchBrFinishBB, CmpLoop.LatchBrContinueBB))) {
- LLVM_DEBUG(dbgs() << "Basic control-flow layout unrecognized.\n");
- return false;
- }
- LLVM_DEBUG(dbgs() << "Recognized basic control-flow layout.\n");
- return true;
-}
-
-bool LoopIdiomRecognize::matchBCmpOfLoads(Value *BCmpValue,
- CmpOfLoads &CmpOfLoads) const {
- using namespace PatternMatch;
- LLVM_DEBUG(dbgs() << "Analyzing header icmp " << *BCmpValue
- << " as bcmp pattern.\n");
-
- // Match bcmp-style loop header cmp. It must be an eq-icmp of loads. Example:
- // %v0 = load <...>, <...>* %LoadSrcA
- // %v1 = load <...>, <...>* %LoadSrcB
- // %CmpLoop.BCmpValue = icmp eq <...> %v0, %v1
- // There won't be any no-op bitcasts between load and icmp,
- // they would have been transformed into a load of bitcast.
- // FIXME: {b,mem}cmp() calls have the same semantics as icmp. Match them too.
- if (!match(BCmpValue,
- m_ICmp(CmpOfLoads.BCmpPred,
- m_CombineAnd(m_Load(m_Value(CmpOfLoads.LoadSrcA)),
- m_Value(CmpOfLoads.LoadA)),
- m_CombineAnd(m_Load(m_Value(CmpOfLoads.LoadSrcB)),
- m_Value(CmpOfLoads.LoadB)))) ||
- !ICmpInst::isEquality(CmpOfLoads.BCmpPred)) {
- LLVM_DEBUG(dbgs() << "Loop header icmp did not match bcmp pattern.\n");
- return false;
- }
- LLVM_DEBUG(dbgs() << "Recognized header icmp as bcmp pattern with loads:\n\t"
- << *CmpOfLoads.LoadA << "\n\t" << *CmpOfLoads.LoadB
- << "\n");
- // FIXME: handle memcmp pattern?
- return true;
-}
-
-bool LoopIdiomRecognize::recognizeBCmpLoopControlFlow(
- const CmpOfLoads &CmpOfLoads, CmpLoopStructure &CmpLoop) const {
- BasicBlock *LoopHeaderBB = CurLoop->getHeader();
- BasicBlock *LoopLatchBB = CurLoop->getLoopLatch();
-
- // Be wary, comparisons can be inverted, canonicalize order.
- // If this 'element' comparison passed, we expect to proceed to the next elt.
- if (CmpOfLoads.BCmpPred != ICmpInst::Predicate::ICMP_EQ)
- std::swap(CmpLoop.HeaderBrEqualBB, CmpLoop.HeaderBrUnequalBB);
- // The predicate on loop latch does not matter, just canonicalize some order.
- if (CmpLoop.LatchBrContinueBB != LoopHeaderBB)
- std::swap(CmpLoop.LatchBrFinishBB, CmpLoop.LatchBrContinueBB);
-
- SmallVector<BasicBlock *, 2> ExitBlocks;
-
- CurLoop->getUniqueExitBlocks(ExitBlocks);
- assert(ExitBlocks.size() <= 2U && "Can't have more than two exit blocks.");
-
- // Check that control-flow between blocks is as expected.
- if (CmpLoop.HeaderBrEqualBB != LoopLatchBB ||
- CmpLoop.LatchBrContinueBB != LoopHeaderBB ||
- !is_contained(ExitBlocks, CmpLoop.HeaderBrUnequalBB) ||
- !is_contained(ExitBlocks, CmpLoop.LatchBrFinishBB)) {
- LLVM_DEBUG(dbgs() << "Loop control-flow not recognized.\n");
- return false;
- }
-
- assert(!is_contained(ExitBlocks, CmpLoop.HeaderBrEqualBB) &&
- !is_contained(ExitBlocks, CmpLoop.LatchBrContinueBB) &&
- "Unexpected exit edges.");
-
- LLVM_DEBUG(dbgs() << "Recognized loop control-flow.\n");
-
- LLVM_DEBUG(dbgs() << "Performing side-effect analysis on the loop.\n");
- assert(CurLoop->isLCSSAForm(*DT) && "Should only get LCSSA-form loops here.");
- // No loop instructions must be used outside of the loop. Since we are in
- // LCSSA form, we only need to check successor block's PHI nodes's incoming
- // values for incoming blocks that are the loop basic blocks.
- for (const BasicBlock *ExitBB : ExitBlocks) {
- for (const PHINode &PHI : ExitBB->phis()) {
- for (const BasicBlock *LoopBB :
- make_filter_range(PHI.blocks(), [this](BasicBlock *PredecessorBB) {
- return CurLoop->contains(PredecessorBB);
- })) {
- const auto *I =
- dyn_cast<Instruction>(PHI.getIncomingValueForBlock(LoopBB));
- if (I && CurLoop->contains(I)) {
- LLVM_DEBUG(dbgs()
- << "Loop contains instruction " << *I
- << " which is used outside of the loop in basic block "
- << ExitBB->getName() << " in phi node " << PHI << "\n");
- return false;
- }
- }
- }
- }
- // Similarly, the loop should not have any other observable side-effects
- // other than the final comparison result.
- for (BasicBlock *LoopBB : CurLoop->blocks()) {
- for (Instruction &I : *LoopBB) {
- if (isa<DbgInfoIntrinsic>(I)) // Ignore dbginfo.
- continue; // FIXME: anything else? lifetime info?
- if ((I.mayHaveSideEffects() || I.isAtomic() || I.isFenceLike()) &&
- &I != CmpOfLoads.LoadA && &I != CmpOfLoads.LoadB) {
- LLVM_DEBUG(
- dbgs() << "Loop contains instruction with potential side-effects: "
- << I << "\n");
- return false;
- }
- }
- }
- LLVM_DEBUG(dbgs() << "No loop instructions deemed to have side-effects.\n");
- return true;
-}
-
-bool LoopIdiomRecognize::recognizeBCmpLoopSCEV(uint64_t BCmpTyBytes,
- CmpOfLoads &CmpOfLoads,
- const SCEV *&SrcA,
- const SCEV *&SrcB,
- const SCEV *&Iterations) const {
- // Try to compute SCEV of the loads, for this loop's scope.
- const auto *ScevForSrcA = dyn_cast<SCEVAddRecExpr>(
- SE->getSCEVAtScope(CmpOfLoads.LoadSrcA, CurLoop));
- const auto *ScevForSrcB = dyn_cast<SCEVAddRecExpr>(
- SE->getSCEVAtScope(CmpOfLoads.LoadSrcB, CurLoop));
- if (!ScevForSrcA || !ScevForSrcB) {
- LLVM_DEBUG(dbgs() << "Failed to get SCEV expressions for load sources.\n");
- return false;
- }
-
- LLVM_DEBUG(dbgs() << "Got SCEV expressions (at loop scope) for loads:\n\t"
- << *ScevForSrcA << "\n\t" << *ScevForSrcB << "\n");
-
- // Loads must have folloving SCEV exprs: {%ptr,+,BCmpTyBytes}<%LoopHeaderBB>
- const SCEV *RecStepForA = ScevForSrcA->getStepRecurrence(*SE);
- const SCEV *RecStepForB = ScevForSrcB->getStepRecurrence(*SE);
- if (!ScevForSrcA->isAffine() || !ScevForSrcB->isAffine() ||
- ScevForSrcA->getLoop() != CurLoop || ScevForSrcB->getLoop() != CurLoop ||
- RecStepForA != RecStepForB || !isa<SCEVConstant>(RecStepForA) ||
- cast<SCEVConstant>(RecStepForA)->getAPInt() != BCmpTyBytes) {
- LLVM_DEBUG(dbgs() << "Unsupported SCEV expressions for loads. Only support "
- "affine SCEV expressions originating in the loop we "
- "are analysing with identical constant positive step, "
- "equal to the count of bytes compared. Got:\n\t"
- << *RecStepForA << "\n\t" << *RecStepForB << "\n");
- return false;
- // FIXME: can support BCmpTyBytes > Step.
- // But will need to account for the extra bytes compared at the end.
- }
-
- SrcA = ScevForSrcA->getStart();
- SrcB = ScevForSrcB->getStart();
- LLVM_DEBUG(dbgs() << "Got SCEV expressions for load sources:\n\t" << *SrcA
- << "\n\t" << *SrcB << "\n");
-
- // The load sources must be loop-invants that dominate the loop header.
- if (SrcA == SE->getCouldNotCompute() || SrcB == SE->getCouldNotCompute() ||
- !SE->isAvailableAtLoopEntry(SrcA, CurLoop) ||
- !SE->isAvailableAtLoopEntry(SrcB, CurLoop)) {
- LLVM_DEBUG(dbgs() << "Unsupported SCEV expressions for loads, unavaliable "
- "prior to loop header.\n");
- return false;
- }
-
- LLVM_DEBUG(dbgs() << "SCEV expressions for loads are acceptable.\n");
-
- // bcmp / memcmp take length argument as size_t, so let's conservatively
- // assume that the iteration count should be not wider than that.
- Type *CmpFuncSizeTy = DL->getIntPtrType(SE->getContext());
-
- // For how many iterations is loop guaranteed not to exit via LoopLatch?
- // This is one less than the maximal number of comparisons,and is: n + -1
- const SCEV *LoopExitCount =
- SE->getExitCount(CurLoop, CurLoop->getLoopLatch());
- LLVM_DEBUG(dbgs() << "Got SCEV expression for loop latch exit count: "
- << *LoopExitCount << "\n");
- // Exit count, similarly, must be loop-invant that dominates the loop header.
- if (LoopExitCount == SE->getCouldNotCompute() ||
- !LoopExitCount->getType()->isIntOrPtrTy() ||
- LoopExitCount->getType()->getScalarSizeInBits() >
- CmpFuncSizeTy->getScalarSizeInBits() ||
- !SE->isAvailableAtLoopEntry(LoopExitCount, CurLoop)) {
- LLVM_DEBUG(dbgs() << "Unsupported SCEV expression for loop latch exit.\n");
- return false;
- }
-
- // LoopExitCount is always one less than the actual count of iterations.
- // Do this before cast, else we will be stuck with 1 + zext(-1 + n)
- Iterations = SE->getAddExpr(
- LoopExitCount, SE->getOne(LoopExitCount->getType()), SCEV::FlagNUW);
- assert(Iterations != SE->getCouldNotCompute() &&
- "Shouldn't fail to increment by one.");
-
- LLVM_DEBUG(dbgs() << "Computed iteration count: " << *Iterations << "\n");
- return true;
-}
-
-/// Return true iff the bcmp idiom is detected in the loop.
-///
-/// Additionally:
-/// 1) \p BCmpInst is set to the root byte-comparison instruction.
-/// 2) \p LatchCmpInst is set to the comparison that controls the latch.
-/// 3) \p LoadA is set to the first LoadInst.
-/// 4) \p LoadB is set to the second LoadInst.
-/// 5) \p SrcA is set to the first source location that is being compared.
-/// 6) \p SrcB is set to the second source location that is being compared.
-/// 7) \p NBytes is set to the number of bytes to compare.
-bool LoopIdiomRecognize::detectBCmpIdiom(ICmpInst *&BCmpInst,
- CmpInst *&LatchCmpInst,
- LoadInst *&LoadA, LoadInst *&LoadB,
- const SCEV *&SrcA, const SCEV *&SrcB,
- const SCEV *&NBytes) const {
- LLVM_DEBUG(dbgs() << "Recognizing bcmp idiom\n");
-
- // Give up if the loop is not in normal form, or has more than 2 blocks.
- if (!CurLoop->isLoopSimplifyForm() || CurLoop->getNumBlocks() > 2) {
- LLVM_DEBUG(dbgs() << "Basic loop structure unrecognized.\n");
- return false;
- }
- LLVM_DEBUG(dbgs() << "Recognized basic loop structure.\n");
-
- CmpLoopStructure CmpLoop;
- if (!matchBCmpLoopStructure(CmpLoop))
- return false;
-
- CmpOfLoads CmpOfLoads;
- if (!matchBCmpOfLoads(CmpLoop.BCmpValue, CmpOfLoads))
- return false;
-
- if (!recognizeBCmpLoopControlFlow(CmpOfLoads, CmpLoop))
- return false;
-
- BCmpInst = cast<ICmpInst>(CmpLoop.BCmpValue); // FIXME: is there no
- LatchCmpInst = cast<CmpInst>(CmpLoop.LatchCmpValue); // way to combine
- LoadA = cast<LoadInst>(CmpOfLoads.LoadA); // these cast with
- LoadB = cast<LoadInst>(CmpOfLoads.LoadB); // m_Value() matcher?
-
- Type *BCmpValTy = BCmpInst->getOperand(0)->getType();
- LLVMContext &Context = BCmpValTy->getContext();
- uint64_t BCmpTyBits = DL->getTypeSizeInBits(BCmpValTy);
- static constexpr uint64_t ByteTyBits = 8;
-
- LLVM_DEBUG(dbgs() << "Got comparison between values of type " << *BCmpValTy
- << " of size " << BCmpTyBits
- << " bits (while byte = " << ByteTyBits << " bits).\n");
- // bcmp()/memcmp() minimal unit of work is a byte. Therefore we must check
- // that we are dealing with a multiple of a byte here.
- if (BCmpTyBits % ByteTyBits != 0) {
- LLVM_DEBUG(dbgs() << "Value size is not a multiple of byte.\n");
- return false;
- // FIXME: could still be done under a run-time check that the total bit
- // count is a multiple of a byte i guess? Or handle remainder separately?
- }
-
- // Each comparison is done on this many bytes.
- uint64_t BCmpTyBytes = BCmpTyBits / ByteTyBits;
- LLVM_DEBUG(dbgs() << "Size is exactly " << BCmpTyBytes
- << " bytes, eligible for bcmp conversion.\n");
-
- const SCEV *Iterations;
- if (!recognizeBCmpLoopSCEV(BCmpTyBytes, CmpOfLoads, SrcA, SrcB, Iterations))
- return false;
-
- // bcmp / memcmp take length argument as size_t, do promotion now.
- Type *CmpFuncSizeTy = DL->getIntPtrType(Context);
- Iterations = SE->getNoopOrZeroExtend(Iterations, CmpFuncSizeTy);
- assert(Iterations != SE->getCouldNotCompute() && "Promotion failed.");
- // Note that it didn't do ptrtoint cast, we will need to do it manually.
-
- // We will be comparing *bytes*, not BCmpTy, we need to recalculate size.
- // It's a multiplication, and it *could* overflow. But for it to overflow
- // we'd want to compare more bytes than could be represented by size_t, But
- // allocation functions also take size_t. So how'd you produce such buffer?
- // FIXME: we likely need to actually check that we know this won't overflow,
- // via llvm::computeOverflowForUnsignedMul().
- NBytes = SE->getMulExpr(
- Iterations, SE->getConstant(CmpFuncSizeTy, BCmpTyBytes), SCEV::FlagNUW);
- assert(NBytes != SE->getCouldNotCompute() &&
- "Shouldn't fail to increment by one.");
-
- LLVM_DEBUG(dbgs() << "Computed total byte count: " << *NBytes << "\n");
-
- if (LoadA->getPointerAddressSpace() != LoadB->getPointerAddressSpace() ||
- LoadA->getPointerAddressSpace() != 0 || !LoadA->isSimple() ||
- !LoadB->isSimple()) {
- StringLiteral L("Unsupported loads in idiom - only support identical, "
- "simple loads from address space 0.\n");
- LLVM_DEBUG(dbgs() << L);
- ORE.emit([&]() {
- return OptimizationRemarkMissed(DEBUG_TYPE, "BCmpIdiomUnsupportedLoads",
- BCmpInst->getDebugLoc(),
- CurLoop->getHeader())
- << L;
- });
- return false; // FIXME: support non-simple loads.
- }
-
- LLVM_DEBUG(dbgs() << "Recognized bcmp idiom\n");
- ORE.emit([&]() {
- return OptimizationRemarkAnalysis(DEBUG_TYPE, "RecognizedBCmpIdiom",
- CurLoop->getStartLoc(),
- CurLoop->getHeader())
- << "Loop recognized as a bcmp idiom";
- });
-
- return true;
-}
-
-BasicBlock *
-LoopIdiomRecognize::transformBCmpControlFlow(ICmpInst *ComparedEqual) {
- LLVM_DEBUG(dbgs() << "Transforming control-flow.\n");
- SmallVector<DominatorTree::UpdateType, 8> DTUpdates;
-
- BasicBlock *PreheaderBB = CurLoop->getLoopPreheader();
- BasicBlock *HeaderBB = CurLoop->getHeader();
- BasicBlock *LoopLatchBB = CurLoop->getLoopLatch();
- SmallString<32> LoopName = CurLoop->getName();
- Function *Func = PreheaderBB->getParent();
- LLVMContext &Context = Func->getContext();
-
- // Before doing anything, drop SCEV info.
- SE->forgetLoop(CurLoop);
-
- // Here we start with: (0/6)
- // PreheaderBB: <preheader> ; preds = ???
- // <...>
- // %memcmp = call i32 @memcmp(i8* %LoadSrcA, i8* %LoadSrcB, i64 %Nbytes)
- // %ComparedEqual = icmp eq <...> %memcmp, 0
- // br label %LoopHeaderBB
- // LoopHeaderBB: <header,exiting> ; preds = %PreheaderBB,%LoopLatchBB
- // <...>
- // br i1 %<...>, label %LoopLatchBB, label %Successor0BB
- // LoopLatchBB: <latch,exiting> ; preds = %LoopHeaderBB
- // <...>
- // br i1 %<...>, label %Successor1BB, label %LoopHeaderBB
- // Successor0BB: <exit> ; preds = %LoopHeaderBB
- // %S0PHI = phi <...> [ <...>, %LoopHeaderBB ]
- // <...>
- // Successor1BB: <exit> ; preds = %LoopLatchBB
- // %S1PHI = phi <...> [ <...>, %LoopLatchBB ]
- // <...>
- //
- // Successor0 and Successor1 may or may not be the same basic block.
-
- // Decouple the edge between loop preheader basic block and loop header basic
- // block. Thus the loop has become unreachable.
- assert(cast<BranchInst>(PreheaderBB->getTerminator())->isUnconditional() &&
- PreheaderBB->getTerminator()->getSuccessor(0) == HeaderBB &&
- "Preheader bb must end with an unconditional branch to header bb.");
- PreheaderBB->getTerminator()->eraseFromParent();
- DTUpdates.push_back({DominatorTree::Delete, PreheaderBB, HeaderBB});
-
- // Create a new preheader basic block before loop header basic block.
- auto *PhonyPreheaderBB = BasicBlock::Create(
- Context, LoopName + ".phonypreheaderbb", Func, HeaderBB);
- // And insert an unconditional branch from phony preheader basic block to
- // loop header basic block.
- IRBuilder<>(PhonyPreheaderBB).CreateBr(HeaderBB);
- DTUpdates.push_back({DominatorTree::Insert, PhonyPreheaderBB, HeaderBB});
-
- // Create a *single* new empty block that we will substitute as a
- // successor basic block for the loop's exits. This one is temporary.
- // Much like phony preheader basic block, it is not connected.
- auto *PhonySuccessorBB =
- BasicBlock::Create(Context, LoopName + ".phonysuccessorbb", Func,
- LoopLatchBB->getNextNode());
- // That block must have *some* non-PHI instruction, or else deleteDeadLoop()
- // will mess up cleanup of dbginfo, and verifier will complain.
- IRBuilder<>(PhonySuccessorBB).CreateUnreachable();
-
- // Create two new empty blocks that we will use to preserve the original
- // loop exit control-flow, and preserve the incoming values in the PHI nodes
- // in loop's successor exit blocks. These will live one.
- auto *ComparedUnequalBB =
- BasicBlock::Create(Context, ComparedEqual->getName() + ".unequalbb", Func,
- PhonySuccessorBB->getNextNode());
- auto *ComparedEqualBB =
- BasicBlock::Create(Context, ComparedEqual->getName() + ".equalbb", Func,
- PhonySuccessorBB->getNextNode());
-
- // By now we have: (1/6)
- // PreheaderBB: ; preds = ???
- // <...>
- // %memcmp = call i32 @memcmp(i8* %LoadSrcA, i8* %LoadSrcB, i64 %Nbytes)
- // %ComparedEqual = icmp eq <...> %memcmp, 0
- // [no terminator instruction!]
- // PhonyPreheaderBB: <preheader> ; No preds, UNREACHABLE!
- // br label %LoopHeaderBB
- // LoopHeaderBB: <header,exiting> ; preds = %PhonyPreheaderBB, %LoopLatchBB
- // <...>
- // br i1 %<...>, label %LoopLatchBB, label %Successor0BB
- // LoopLatchBB: <latch,exiting> ; preds = %LoopHeaderBB
- // <...>
- // br i1 %<...>, label %Successor1BB, label %LoopHeaderBB
- // PhonySuccessorBB: ; No preds, UNREACHABLE!
- // unreachable
- // EqualBB: ; No preds, UNREACHABLE!
- // [no terminator instruction!]
- // UnequalBB: ; No preds, UNREACHABLE!
- // [no terminator instruction!]
- // Successor0BB: <exit> ; preds = %LoopHeaderBB
- // %S0PHI = phi <...> [ <...>, %LoopHeaderBB ]
- // <...>
- // Successor1BB: <exit> ; preds = %LoopLatchBB
- // %S1PHI = phi <...> [ <...>, %LoopLatchBB ]
- // <...>
-
- // What is the mapping/replacement basic block for exiting out of the loop
- // from either of old's loop basic blocks?
- auto GetReplacementBB = [this, ComparedEqualBB,
- ComparedUnequalBB](const BasicBlock *OldBB) {
- assert(CurLoop->contains(OldBB) && "Only for loop's basic blocks.");
- if (OldBB == CurLoop->getLoopLatch()) // "all elements compared equal".
- return ComparedEqualBB;
- if (OldBB == CurLoop->getHeader()) // "element compared unequal".
- return ComparedUnequalBB;
- llvm_unreachable("Only had two basic blocks in loop.");
- };
-
- // What are the exits out of this loop?
- SmallVector<Loop::Edge, 2> LoopExitEdges;
- CurLoop->getExitEdges(LoopExitEdges);
- assert(LoopExitEdges.size() == 2 && "Should have only to two exit edges.");
-
- // Populate new basic blocks, update the exiting control-flow, PHI nodes.
- for (const Loop::Edge &Edge : LoopExitEdges) {
- auto *OldLoopBB = const_cast<BasicBlock *>(Edge.first);
- auto *SuccessorBB = const_cast<BasicBlock *>(Edge.second);
- assert(CurLoop->contains(OldLoopBB) && !CurLoop->contains(SuccessorBB) &&
- "Unexpected edge.");
-
- // If we would exit the loop from this loop's basic block,
- // what semantically would that mean? Did comparison succeed or fail?
- BasicBlock *NewBB = GetReplacementBB(OldLoopBB);
- assert(NewBB->empty() && "Should not get same new basic block here twice.");
- IRBuilder<> Builder(NewBB);
- Builder.SetCurrentDebugLocation(OldLoopBB->getTerminator()->getDebugLoc());
- Builder.CreateBr(SuccessorBB);
- DTUpdates.push_back({DominatorTree::Insert, NewBB, SuccessorBB});
- // Also, be *REALLY* careful with PHI nodes in successor basic block,
- // update them to recieve the same input value, but not from current loop's
- // basic block, but from new basic block instead.
- SuccessorBB->replacePhiUsesWith(OldLoopBB, NewBB);
- // Also, change loop control-flow. This loop's basic block shall no longer
- // exit from the loop to it's original successor basic block, but to our new
- // phony successor basic block. Note that new successor will be unique exit.
- OldLoopBB->getTerminator()->replaceSuccessorWith(SuccessorBB,
- PhonySuccessorBB);
- DTUpdates.push_back({DominatorTree::Delete, OldLoopBB, SuccessorBB});
- DTUpdates.push_back({DominatorTree::Insert, OldLoopBB, PhonySuccessorBB});
- }
-
- // Inform DomTree about edge changes. Note that LoopInfo is still out-of-date.
- assert(DTUpdates.size() == 8 && "Update count prediction failed.");
- DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
- DTU.applyUpdates(DTUpdates);
- DTUpdates.clear();
-
- // By now we have: (2/6)
- // PreheaderBB: ; preds = ???
- // <...>
- // %memcmp = call i32 @memcmp(i8* %LoadSrcA, i8* %LoadSrcB, i64 %Nbytes)
- // %ComparedEqual = icmp eq <...> %memcmp, 0
- // [no terminator instruction!]
- // PhonyPreheaderBB: <preheader> ; No preds, UNREACHABLE!
- // br label %LoopHeaderBB
- // LoopHeaderBB: <header,exiting> ; preds = %PhonyPreheaderBB, %LoopLatchBB
- // <...>
- // br i1 %<...>, label %LoopLatchBB, label %PhonySuccessorBB
- // LoopLatchBB: <latch,exiting> ; preds = %LoopHeaderBB
- // <...>
- // br i1 %<...>, label %PhonySuccessorBB, label %LoopHeaderBB
- // PhonySuccessorBB: <uniq. exit> ; preds = %LoopHeaderBB, %LoopLatchBB
- // unreachable
- // EqualBB: ; No preds, UNREACHABLE!
- // br label %Successor1BB
- // UnequalBB: ; No preds, UNREACHABLE!
- // br label %Successor0BB
- // Successor0BB: ; preds = %UnequalBB
- // %S0PHI = phi <...> [ <...>, %UnequalBB ]
- // <...>
- // Successor1BB: ; preds = %EqualBB
- // %S0PHI = phi <...> [ <...>, %EqualBB ]
- // <...>
-
- // *Finally*, zap the original loop. Record it's parent loop though.
- Loop *ParentLoop = CurLoop->getParentLoop();
- LLVM_DEBUG(dbgs() << "Deleting old loop.\n");
- LoopDeleter.markLoopAsDeleted(CurLoop); // Mark as deleted *BEFORE* deleting!
- deleteDeadLoop(CurLoop, DT, SE, LI); // And actually delete the loop.
- CurLoop = nullptr;
-
- // By now we have: (3/6)
- // PreheaderBB: ; preds = ???
- // <...>
- // %memcmp = call i32 @memcmp(i8* %LoadSrcA, i8* %LoadSrcB, i64 %Nbytes)
- // %ComparedEqual = icmp eq <...> %memcmp, 0
- // [no terminator instruction!]
- // PhonyPreheaderBB: ; No preds, UNREACHABLE!
- // br label %PhonySuccessorBB
- // PhonySuccessorBB: ; preds = %PhonyPreheaderBB
- // unreachable
- // EqualBB: ; No preds, UNREACHABLE!
- // br label %Successor1BB
- // UnequalBB: ; No preds, UNREACHABLE!
- // br label %Successor0BB
- // Successor0BB: ; preds = %UnequalBB
- // %S0PHI = phi <...> [ <...>, %UnequalBB ]
- // <...>
- // Successor1BB: ; preds = %EqualBB
- // %S0PHI = phi <...> [ <...>, %EqualBB ]
- // <...>
-
- // Now, actually restore the CFG.
-
- // Insert an unconditional branch from an actual preheader basic block to
- // phony preheader basic block.
- IRBuilder<>(PreheaderBB).CreateBr(PhonyPreheaderBB);
- DTUpdates.push_back({DominatorTree::Insert, PhonyPreheaderBB, HeaderBB});
- // Insert proper conditional branch from phony successor basic block to the
- // "dispatch" basic blocks, which were used to preserve incoming values in
- // original loop's successor basic blocks.
- assert(isa<UnreachableInst>(PhonySuccessorBB->getTerminator()) &&
- "Yep, that's the one we created to keep deleteDeadLoop() happy.");
- PhonySuccessorBB->getTerminator()->eraseFromParent();
- {
- IRBuilder<> Builder(PhonySuccessorBB);
- Builder.SetCurrentDebugLocation(ComparedEqual->getDebugLoc());
- Builder.CreateCondBr(ComparedEqual, ComparedEqualBB, ComparedUnequalBB);
- }
- DTUpdates.push_back(
- {DominatorTree::Insert, PhonySuccessorBB, ComparedEqualBB});
- DTUpdates.push_back(
- {DominatorTree::Insert, PhonySuccessorBB, ComparedUnequalBB});
-
- BasicBlock *DispatchBB = PhonySuccessorBB;
- DispatchBB->setName(LoopName + ".bcmpdispatchbb");
-
- assert(DTUpdates.size() == 3 && "Update count prediction failed.");
- DTU.applyUpdates(DTUpdates);
- DTUpdates.clear();
-
- // By now we have: (4/6)
- // PreheaderBB: ; preds = ???
- // <...>
- // %memcmp = call i32 @memcmp(i8* %LoadSrcA, i8* %LoadSrcB, i64 %Nbytes)
- // %ComparedEqual = icmp eq <...> %memcmp, 0
- // br label %PhonyPreheaderBB
- // PhonyPreheaderBB: ; preds = %PreheaderBB
- // br label %DispatchBB
- // DispatchBB: ; preds = %PhonyPreheaderBB
- // br i1 %ComparedEqual, label %EqualBB, label %UnequalBB
- // EqualBB: ; preds = %DispatchBB
- // br label %Successor1BB
- // UnequalBB: ; preds = %DispatchBB
- // br label %Successor0BB
- // Successor0BB: ; preds = %UnequalBB
- // %S0PHI = phi <...> [ <...>, %UnequalBB ]
- // <...>
- // Successor1BB: ; preds = %EqualBB
- // %S0PHI = phi <...> [ <...>, %EqualBB ]
- // <...>
-
- // The basic CFG has been restored! Now let's merge redundant basic blocks.
-
- // Merge phony successor basic block into it's only predecessor,
- // phony preheader basic block. It is fully pointlessly redundant.
- MergeBasicBlockIntoOnlyPred(DispatchBB, &DTU);
-
- // By now we have: (5/6)
- // PreheaderBB: ; preds = ???
- // <...>
- // %memcmp = call i32 @memcmp(i8* %LoadSrcA, i8* %LoadSrcB, i64 %Nbytes)
- // %ComparedEqual = icmp eq <...> %memcmp, 0
- // br label %DispatchBB
- // DispatchBB: ; preds = %PreheaderBB
- // br i1 %ComparedEqual, label %EqualBB, label %UnequalBB
- // EqualBB: ; preds = %DispatchBB
- // br label %Successor1BB
- // UnequalBB: ; preds = %DispatchBB
- // br label %Successor0BB
- // Successor0BB: ; preds = %UnequalBB
- // %S0PHI = phi <...> [ <...>, %UnequalBB ]
- // <...>
- // Successor1BB: ; preds = %EqualBB
- // %S0PHI = phi <...> [ <...>, %EqualBB ]
- // <...>
-
- // Was this loop nested?
- if (!ParentLoop) {
- // If the loop was *NOT* nested, then let's also merge phony successor
- // basic block into it's only predecessor, preheader basic block.
- // Also, here we need to update LoopInfo.
- LI->removeBlock(PreheaderBB);
- MergeBasicBlockIntoOnlyPred(DispatchBB, &DTU);
-
- // By now we have: (6/6)
- // DispatchBB: ; preds = ???
- // <...>
- // %memcmp = call i32 @memcmp(i8* %LoadSrcA, i8* %LoadSrcB, i64 %Nbytes)
- // %ComparedEqual = icmp eq <...> %memcmp, 0
- // br i1 %ComparedEqual, label %EqualBB, label %UnequalBB
- // EqualBB: ; preds = %DispatchBB
- // br label %Successor1BB
- // UnequalBB: ; preds = %DispatchBB
- // br label %Successor0BB
- // Successor0BB: ; preds = %UnequalBB
- // %S0PHI = phi <...> [ <...>, %UnequalBB ]
- // <...>
- // Successor1BB: ; preds = %EqualBB
- // %S0PHI = phi <...> [ <...>, %EqualBB ]
- // <...>
-
- return DispatchBB;
- }
-
- // Otherwise, we need to "preserve" the LoopSimplify form of the deleted loop.
- // To achieve that, we shall keep the preheader basic block (mainly so that
- // the loop header block will be guaranteed to have a predecessor outside of
- // the loop), and create a phony loop with all these new three basic blocks.
- Loop *PhonyLoop = LI->AllocateLoop();
- ParentLoop->addChildLoop(PhonyLoop);
- PhonyLoop->addBasicBlockToLoop(DispatchBB, *LI);
- PhonyLoop->addBasicBlockToLoop(ComparedEqualBB, *LI);
- PhonyLoop->addBasicBlockToLoop(ComparedUnequalBB, *LI);
-
- // But we only have a preheader basic block, a header basic block block and
- // two exiting basic blocks. For a proper loop we also need a backedge from
- // non-header basic block to header bb.
- // Let's just add a never-taken branch from both of the exiting basic blocks.
- for (BasicBlock *BB : {ComparedEqualBB, ComparedUnequalBB}) {
- BranchInst *OldTerminator = cast<BranchInst>(BB->getTerminator());
- assert(OldTerminator->isUnconditional() && "That's the one we created.");
- BasicBlock *SuccessorBB = OldTerminator->getSuccessor(0);
-
- IRBuilder<> Builder(OldTerminator);
- Builder.SetCurrentDebugLocation(OldTerminator->getDebugLoc());
- Builder.CreateCondBr(ConstantInt::getTrue(Context), SuccessorBB,
- DispatchBB);
- OldTerminator->eraseFromParent();
- // Yes, the backedge will never be taken. The control-flow is redundant.
- // If it can be simplified further, other passes will take care.
- DTUpdates.push_back({DominatorTree::Delete, BB, SuccessorBB});
- DTUpdates.push_back({DominatorTree::Insert, BB, SuccessorBB});
- DTUpdates.push_back({DominatorTree::Insert, BB, DispatchBB});
- }
- assert(DTUpdates.size() == 6 && "Update count prediction failed.");
- DTU.applyUpdates(DTUpdates);
- DTUpdates.clear();
-
- // By now we have: (6/6)
- // PreheaderBB: <preheader> ; preds = ???
- // <...>
- // %memcmp = call i32 @memcmp(i8* %LoadSrcA, i8* %LoadSrcB, i64 %Nbytes)
- // %ComparedEqual = icmp eq <...> %memcmp, 0
- // br label %BCmpDispatchBB
- // BCmpDispatchBB: <header> ; preds = %PreheaderBB
- // br i1 %ComparedEqual, label %EqualBB, label %UnequalBB
- // EqualBB: <latch,exiting> ; preds = %BCmpDispatchBB
- // br i1 %true, label %Successor1BB, label %BCmpDispatchBB
- // UnequalBB: <latch,exiting> ; preds = %BCmpDispatchBB
- // br i1 %true, label %Successor0BB, label %BCmpDispatchBB
- // Successor0BB: ; preds = %UnequalBB
- // %S0PHI = phi <...> [ <...>, %UnequalBB ]
- // <...>
- // Successor1BB: ; preds = %EqualBB
- // %S0PHI = phi <...> [ <...>, %EqualBB ]
- // <...>
-
- // Finally fully DONE!
- return DispatchBB;
-}
-
-void LoopIdiomRecognize::transformLoopToBCmp(ICmpInst *BCmpInst,
- CmpInst *LatchCmpInst,
- LoadInst *LoadA, LoadInst *LoadB,
- const SCEV *SrcA, const SCEV *SrcB,
- const SCEV *NBytes) {
- // We will be inserting before the terminator instruction of preheader block.
- IRBuilder<> Builder(CurLoop->getLoopPreheader()->getTerminator());
-
- LLVM_DEBUG(dbgs() << "Transforming bcmp loop idiom into a call.\n");
- LLVM_DEBUG(dbgs() << "Emitting new instructions.\n");
-
- // Expand the SCEV expressions for both sources to compare, and produce value
- // for the byte len (beware of Iterations potentially being a pointer, and
- // account for element size being BCmpTyBytes bytes, which may be not 1 byte)
- Value *PtrA, *PtrB, *Len;
- {
- SCEVExpander SExp(*SE, *DL, "LoopToBCmp");
- SExp.setInsertPoint(&*Builder.GetInsertPoint());
-
- auto HandlePtr = [&SExp](LoadInst *Load, const SCEV *Src) {
- SExp.SetCurrentDebugLocation(DebugLoc());
- // If the pointer operand of original load had dbgloc - use it.
- if (const auto *I = dyn_cast<Instruction>(Load->getPointerOperand()))
- SExp.SetCurrentDebugLocation(I->getDebugLoc());
- return SExp.expandCodeFor(Src);
- };
- PtrA = HandlePtr(LoadA, SrcA);
- PtrB = HandlePtr(LoadB, SrcB);
-
- // For len calculation let's use dbgloc for the loop's latch condition.
- Builder.SetCurrentDebugLocation(LatchCmpInst->getDebugLoc());
- SExp.SetCurrentDebugLocation(LatchCmpInst->getDebugLoc());
- Len = SExp.expandCodeFor(NBytes);
-
- Type *CmpFuncSizeTy = DL->getIntPtrType(Builder.getContext());
- assert(SE->getTypeSizeInBits(Len->getType()) ==
- DL->getTypeSizeInBits(CmpFuncSizeTy) &&
- "Len should already have the correct size.");
-
- // Make sure that iteration count is a number, insert ptrtoint cast if not.
- if (Len->getType()->isPointerTy())
- Len = Builder.CreatePtrToInt(Len, CmpFuncSizeTy);
- assert(Len->getType() == CmpFuncSizeTy && "Should have correct type now.");
-
- Len->setName(Len->getName() + ".bytecount");
-
- // There is no legality check needed. We want to compare that the memory
- // regions [PtrA, PtrA+Len) and [PtrB, PtrB+Len) are fully identical, equal.
- // For them to be fully equal, they must match bit-by-bit. And likewise,
- // for them to *NOT* be fully equal, they have to differ just by one bit.
- // The step of comparison (bits compared at once) simply does not matter.
- }
-
- // For the rest of new instructions, dbgloc should point at the value cmp.
- Builder.SetCurrentDebugLocation(BCmpInst->getDebugLoc());
-
- // Emit the comparison itself.
- auto *CmpCall =
- cast<CallInst>(HasBCmp ? emitBCmp(PtrA, PtrB, Len, Builder, *DL, TLI)
- : emitMemCmp(PtrA, PtrB, Len, Builder, *DL, TLI));
- // FIXME: add {B,Mem}CmpInst with MemoryCompareInst
- // (based on MemIntrinsicBase) as base?
- // FIXME: propagate metadata from loads? (alignments, AS, TBAA, ...)
-
- // {b,mem}cmp returned 0 if they were equal, or non-zero if not equal.
- auto *ComparedEqual = cast<ICmpInst>(Builder.CreateICmpEQ(
- CmpCall, ConstantInt::get(CmpCall->getType(), 0),
- PtrA->getName() + ".vs." + PtrB->getName() + ".eqcmp"));
-
- BasicBlock *BB = transformBCmpControlFlow(ComparedEqual);
- Builder.ClearInsertionPoint();
-
- // We're done.
- LLVM_DEBUG(dbgs() << "Transformed loop bcmp idiom into a call.\n");
- ORE.emit([&]() {
- return OptimizationRemark(DEBUG_TYPE, "TransformedBCmpIdiomToCall",
- CmpCall->getDebugLoc(), BB)
- << "Transformed bcmp idiom into a call to "
- << ore::NV("NewFunction", CmpCall->getCalledFunction())
- << "() function";
- });
- ++NumBCmp;
-}
-
-/// Recognizes a bcmp idiom in a non-countable loop.
-///
-/// If detected, transforms the relevant code to issue the bcmp (or memcmp)
-/// intrinsic function call, and returns true; otherwise, returns false.
-bool LoopIdiomRecognize::recognizeBCmp() {
- if (!HasMemCmp && !HasBCmp)
- return false;
-
- ICmpInst *BCmpInst;
- CmpInst *LatchCmpInst;
- LoadInst *LoadA, *LoadB;
- const SCEV *SrcA, *SrcB, *NBytes;
- if (!detectBCmpIdiom(BCmpInst, LatchCmpInst, LoadA, LoadB, SrcA, SrcB,
- NBytes)) {
- LLVM_DEBUG(dbgs() << "bcmp idiom recognition failed.\n");
- return false;
- }
-
- transformLoopToBCmp(BCmpInst, LatchCmpInst, LoadA, LoadB, SrcA, SrcB, NBytes);
- return true;
-}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp
index 368b9d4e8df1..901204181a7c 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp
@@ -33,6 +33,7 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/User.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Transforms/Scalar.h"
@@ -226,7 +227,8 @@ PreservedAnalyses LoopInstSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
Optional<MemorySSAUpdater> MSSAU;
if (AR.MSSA) {
MSSAU = MemorySSAUpdater(AR.MSSA);
- AR.MSSA->verifyMemorySSA();
+ if (VerifyMemorySSA)
+ AR.MSSA->verifyMemorySSA();
}
if (!simplifyLoopInst(L, AR.DT, AR.LI, AR.AC, AR.TLI,
MSSAU.hasValue() ? MSSAU.getPointer() : nullptr))
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
index 1af4b21b432e..6ce2d06058cf 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -33,6 +33,7 @@
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
@@ -716,22 +717,6 @@ bool LoopInterchangeLegality::findInductionAndReductions(
return true;
}
-static bool containsSafePHI(BasicBlock *Block, bool isOuterLoopExitBlock) {
- for (PHINode &PHI : Block->phis()) {
- // Reduction lcssa phi will have only 1 incoming block that from loop latch.
- if (PHI.getNumIncomingValues() > 1)
- return false;
- Instruction *Ins = dyn_cast<Instruction>(PHI.getIncomingValue(0));
- if (!Ins)
- return false;
- // Incoming value for lcssa phi's in outer loop exit can only be inner loop
- // exits lcssa phi else it would not be tightly nested.
- if (!isa<PHINode>(Ins) && isOuterLoopExitBlock)
- return false;
- }
- return true;
-}
-
// This function indicates the current limitations in the transform as a result
// of which we do not proceed.
bool LoopInterchangeLegality::currentLimitations() {
@@ -830,21 +815,6 @@ bool LoopInterchangeLegality::currentLimitations() {
return true;
}
- // TODO: We only handle LCSSA PHI's corresponding to reduction for now.
- BasicBlock *InnerExit = InnerLoop->getExitBlock();
- if (!containsSafePHI(InnerExit, false)) {
- LLVM_DEBUG(
- dbgs() << "Can only handle LCSSA PHIs in inner loops currently.\n");
- ORE->emit([&]() {
- return OptimizationRemarkMissed(DEBUG_TYPE, "NoLCSSAPHIOuterInner",
- InnerLoop->getStartLoc(),
- InnerLoop->getHeader())
- << "Only inner loops with LCSSA PHIs can be interchange "
- "currently.";
- });
- return true;
- }
-
// TODO: Current limitation: Since we split the inner loop latch at the point
// were induction variable is incremented (induction.next); We cannot have
// more than 1 user of induction.next since it would result in broken code
@@ -920,6 +890,28 @@ bool LoopInterchangeLegality::currentLimitations() {
return false;
}
+// We currently only support LCSSA PHI nodes in the inner loop exit, if their
+// users are either reduction PHIs or PHIs outside the outer loop (which means
+// the we are only interested in the final value after the loop).
+static bool
+areInnerLoopExitPHIsSupported(Loop *InnerL, Loop *OuterL,
+ SmallPtrSetImpl<PHINode *> &Reductions) {
+ BasicBlock *InnerExit = OuterL->getUniqueExitBlock();
+ for (PHINode &PHI : InnerExit->phis()) {
+ // Reduction lcssa phi will have only 1 incoming block that from loop latch.
+ if (PHI.getNumIncomingValues() > 1)
+ return false;
+ if (any_of(PHI.users(), [&Reductions, OuterL](User *U) {
+ PHINode *PN = dyn_cast<PHINode>(U);
+ return !PN || (Reductions.find(PN) == Reductions.end() &&
+ OuterL->contains(PN->getParent()));
+ })) {
+ return false;
+ }
+ }
+ return true;
+}
+
// We currently support LCSSA PHI nodes in the outer loop exit, if their
// incoming values do not come from the outer loop latch or if the
// outer loop latch has a single predecessor. In that case, the value will
@@ -927,7 +919,7 @@ bool LoopInterchangeLegality::currentLimitations() {
// will still be true after interchanging. If we have multiple predecessor,
// that may not be the case, e.g. because the outer loop latch may be executed
// if the inner loop is not executed.
-static bool areLoopExitPHIsSupported(Loop *OuterLoop, Loop *InnerLoop) {
+static bool areOuterLoopExitPHIsSupported(Loop *OuterLoop, Loop *InnerLoop) {
BasicBlock *LoopNestExit = OuterLoop->getUniqueExitBlock();
for (PHINode &PHI : LoopNestExit->phis()) {
// FIXME: We currently are not able to detect floating point reductions
@@ -1012,7 +1004,19 @@ bool LoopInterchangeLegality::canInterchangeLoops(unsigned InnerLoopId,
return false;
}
- if (!areLoopExitPHIsSupported(OuterLoop, InnerLoop)) {
+ if (!areInnerLoopExitPHIsSupported(OuterLoop, InnerLoop,
+ OuterInnerReductions)) {
+ LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in inner loop exit.\n");
+ ORE->emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedExitPHI",
+ InnerLoop->getStartLoc(),
+ InnerLoop->getHeader())
+ << "Found unsupported PHI node in loop exit.";
+ });
+ return false;
+ }
+
+ if (!areOuterLoopExitPHIsSupported(OuterLoop, InnerLoop)) {
LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in outer loop exit.\n");
ORE->emit([&]() {
return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedExitPHI",
@@ -1315,31 +1319,39 @@ static void moveBBContents(BasicBlock *FromBB, Instruction *InsertBefore) {
FromBB->getTerminator()->getIterator());
}
-/// Update BI to jump to NewBB instead of OldBB. Records updates to
-/// the dominator tree in DTUpdates, if DT should be preserved.
+// Update BI to jump to NewBB instead of OldBB. Records updates to the
+// dominator tree in DTUpdates. If \p MustUpdateOnce is true, assert that
+// \p OldBB is exactly once in BI's successor list.
static void updateSuccessor(BranchInst *BI, BasicBlock *OldBB,
BasicBlock *NewBB,
- std::vector<DominatorTree::UpdateType> &DTUpdates) {
- assert(llvm::count_if(successors(BI),
- [OldBB](BasicBlock *BB) { return BB == OldBB; }) < 2 &&
- "BI must jump to OldBB at most once.");
- for (unsigned i = 0, e = BI->getNumSuccessors(); i < e; ++i) {
- if (BI->getSuccessor(i) == OldBB) {
- BI->setSuccessor(i, NewBB);
-
- DTUpdates.push_back(
- {DominatorTree::UpdateKind::Insert, BI->getParent(), NewBB});
- DTUpdates.push_back(
- {DominatorTree::UpdateKind::Delete, BI->getParent(), OldBB});
- break;
+ std::vector<DominatorTree::UpdateType> &DTUpdates,
+ bool MustUpdateOnce = true) {
+ assert((!MustUpdateOnce ||
+ llvm::count_if(successors(BI),
+ [OldBB](BasicBlock *BB) {
+ return BB == OldBB;
+ }) == 1) && "BI must jump to OldBB exactly once.");
+ bool Changed = false;
+ for (Use &Op : BI->operands())
+ if (Op == OldBB) {
+ Op.set(NewBB);
+ Changed = true;
}
+
+ if (Changed) {
+ DTUpdates.push_back(
+ {DominatorTree::UpdateKind::Insert, BI->getParent(), NewBB});
+ DTUpdates.push_back(
+ {DominatorTree::UpdateKind::Delete, BI->getParent(), OldBB});
}
+ assert(Changed && "Expected a successor to be updated");
}
// Move Lcssa PHIs to the right place.
static void moveLCSSAPhis(BasicBlock *InnerExit, BasicBlock *InnerHeader,
BasicBlock *InnerLatch, BasicBlock *OuterHeader,
- BasicBlock *OuterLatch, BasicBlock *OuterExit) {
+ BasicBlock *OuterLatch, BasicBlock *OuterExit,
+ Loop *InnerLoop, LoopInfo *LI) {
// Deal with LCSSA PHI nodes in the exit block of the inner loop, that are
// defined either in the header or latch. Those blocks will become header and
@@ -1394,19 +1406,17 @@ static void moveLCSSAPhis(BasicBlock *InnerExit, BasicBlock *InnerHeader,
P->moveBefore(InnerExit->getFirstNonPHI());
// Deal with LCSSA PHI nodes in the loop nest exit block. For PHIs that have
- // incoming values from the outer latch or header, we have to add a new PHI
+ // incoming values defined in the outer loop, we have to add a new PHI
// in the inner loop latch, which became the exit block of the outer loop,
// after interchanging.
if (OuterExit) {
for (PHINode &P : OuterExit->phis()) {
if (P.getNumIncomingValues() != 1)
continue;
- // Skip Phis with incoming values not defined in the outer loop's header
- // and latch. Also skip incoming phis defined in the latch. Those should
+ // Skip Phis with incoming values defined in the inner loop. Those should
// already have been updated.
auto I = dyn_cast<Instruction>(P.getIncomingValue(0));
- if (!I || ((I->getParent() != OuterLatch || isa<PHINode>(I)) &&
- I->getParent() != OuterHeader))
+ if (!I || LI->getLoopFor(I->getParent()) == InnerLoop)
continue;
PHINode *NewPhi = dyn_cast<PHINode>(P.clone());
@@ -1481,12 +1491,21 @@ bool LoopInterchangeTransform::adjustLoopBranches() {
if (!InnerLoopHeaderSuccessor)
return false;
- // Adjust Loop Preheader and headers
+ // Adjust Loop Preheader and headers.
+ // The branches in the outer loop predecessor and the outer loop header can
+ // be unconditional branches or conditional branches with duplicates. Consider
+ // this when updating the successors.
updateSuccessor(OuterLoopPredecessorBI, OuterLoopPreHeader,
- InnerLoopPreHeader, DTUpdates);
- updateSuccessor(OuterLoopHeaderBI, OuterLoopLatch, LoopExit, DTUpdates);
+ InnerLoopPreHeader, DTUpdates, /*MustUpdateOnce=*/false);
+ // The outer loop header might or might not branch to the outer latch.
+ // We are guaranteed to branch to the inner loop preheader.
+ if (std::find(succ_begin(OuterLoopHeaderBI), succ_end(OuterLoopHeaderBI),
+ OuterLoopLatch) != succ_end(OuterLoopHeaderBI))
+ updateSuccessor(OuterLoopHeaderBI, OuterLoopLatch, LoopExit, DTUpdates,
+ /*MustUpdateOnce=*/false);
updateSuccessor(OuterLoopHeaderBI, InnerLoopPreHeader,
- InnerLoopHeaderSuccessor, DTUpdates);
+ InnerLoopHeaderSuccessor, DTUpdates,
+ /*MustUpdateOnce=*/false);
// Adjust reduction PHI's now that the incoming block has changed.
InnerLoopHeaderSuccessor->replacePhiUsesWith(InnerLoopHeader,
@@ -1520,7 +1539,8 @@ bool LoopInterchangeTransform::adjustLoopBranches() {
OuterLoopPreHeader);
moveLCSSAPhis(InnerLoopLatchSuccessor, InnerLoopHeader, InnerLoopLatch,
- OuterLoopHeader, OuterLoopLatch, InnerLoop->getExitBlock());
+ OuterLoopHeader, OuterLoopLatch, InnerLoop->getExitBlock(),
+ InnerLoop, LI);
// For PHIs in the exit block of the outer loop, outer's latch has been
// replaced by Inners'.
OuterLoopLatchSuccessor->replacePhiUsesWith(OuterLoopLatch, InnerLoopLatch);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
index e8dc879a184b..4e1b4e87ebc9 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
@@ -49,6 +49,7 @@
#include "llvm/IR/PassManager.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
@@ -488,7 +489,7 @@ public:
// Filter the candidates further.
SmallVector<StoreToLoadForwardingCandidate, 4> Candidates;
unsigned NumForwarding = 0;
- for (const StoreToLoadForwardingCandidate Cand : StoreToLoadDependences) {
+ for (const StoreToLoadForwardingCandidate &Cand : StoreToLoadDependences) {
LLVM_DEBUG(dbgs() << "Candidate " << Cand);
// Make sure that the stored values is available everywhere in the loop in
@@ -544,7 +545,8 @@ public:
auto *HeaderBB = L->getHeader();
auto *F = HeaderBB->getParent();
bool OptForSize = F->hasOptSize() ||
- llvm::shouldOptimizeForSize(HeaderBB, PSI, BFI);
+ llvm::shouldOptimizeForSize(HeaderBB, PSI, BFI,
+ PGSOQueryType::IRPass);
if (OptForSize) {
LLVM_DEBUG(
dbgs() << "Versioning is needed but not allowed when optimizing "
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopPredication.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopPredication.cpp
index 885c0e8f4b8b..1a42f6b23443 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopPredication.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopPredication.cpp
@@ -191,9 +191,12 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PatternMatch.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/GuardUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
@@ -248,7 +251,9 @@ struct LoopICmp {
class LoopPredication {
AliasAnalysis *AA;
+ DominatorTree *DT;
ScalarEvolution *SE;
+ LoopInfo *LI;
BranchProbabilityInfo *BPI;
Loop *L;
@@ -300,10 +305,13 @@ class LoopPredication {
// within the loop. We identify such unprofitable loops through BPI.
bool isLoopProfitableToPredicate();
+ bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter);
+
public:
- LoopPredication(AliasAnalysis *AA, ScalarEvolution *SE,
+ LoopPredication(AliasAnalysis *AA, DominatorTree *DT,
+ ScalarEvolution *SE, LoopInfo *LI,
BranchProbabilityInfo *BPI)
- : AA(AA), SE(SE), BPI(BPI){};
+ : AA(AA), DT(DT), SE(SE), LI(LI), BPI(BPI) {};
bool runOnLoop(Loop *L);
};
@@ -323,10 +331,12 @@ public:
if (skipLoop(L))
return false;
auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+ auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
BranchProbabilityInfo &BPI =
getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI();
auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
- LoopPredication LP(AA, SE, &BPI);
+ LoopPredication LP(AA, DT, SE, LI, &BPI);
return LP.runOnLoop(L);
}
};
@@ -352,7 +362,7 @@ PreservedAnalyses LoopPredicationPass::run(Loop &L, LoopAnalysisManager &AM,
AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR).getManager();
Function *F = L.getHeader()->getParent();
auto *BPI = FAM.getCachedResult<BranchProbabilityAnalysis>(*F);
- LoopPredication LP(&AR.AA, &AR.SE, BPI);
+ LoopPredication LP(&AR.AA, &AR.DT, &AR.SE, &AR.LI, BPI);
if (!LP.runOnLoop(&L))
return PreservedAnalyses::all();
@@ -823,9 +833,9 @@ bool LoopPredication::widenWidenableBranchGuardConditions(
Value *AllChecks = Builder.CreateAnd(Checks);
auto *OldCond = BI->getCondition();
BI->setCondition(AllChecks);
+ RecursivelyDeleteTriviallyDeadInstructions(OldCond);
assert(isGuardAsWidenableBranch(BI) &&
"Stopped being a guard after transform?");
- RecursivelyDeleteTriviallyDeadInstructions(OldCond);
LLVM_DEBUG(dbgs() << "Widened checks = " << NumWidened << "\n");
return true;
@@ -953,6 +963,233 @@ bool LoopPredication::isLoopProfitableToPredicate() {
return true;
}
+/// If we can (cheaply) find a widenable branch which controls entry into the
+/// loop, return it.
+static BranchInst *FindWidenableTerminatorAboveLoop(Loop *L, LoopInfo &LI) {
+ // Walk back through any unconditional executed blocks and see if we can find
+ // a widenable condition which seems to control execution of this loop. Note
+ // that we predict that maythrow calls are likely untaken and thus that it's
+ // profitable to widen a branch before a maythrow call with a condition
+ // afterwards even though that may cause the slow path to run in a case where
+ // it wouldn't have otherwise.
+ BasicBlock *BB = L->getLoopPreheader();
+ if (!BB)
+ return nullptr;
+ do {
+ if (BasicBlock *Pred = BB->getSinglePredecessor())
+ if (BB == Pred->getSingleSuccessor()) {
+ BB = Pred;
+ continue;
+ }
+ break;
+ } while (true);
+
+ if (BasicBlock *Pred = BB->getSinglePredecessor()) {
+ auto *Term = Pred->getTerminator();
+
+ Value *Cond, *WC;
+ BasicBlock *IfTrueBB, *IfFalseBB;
+ if (parseWidenableBranch(Term, Cond, WC, IfTrueBB, IfFalseBB) &&
+ IfTrueBB == BB)
+ return cast<BranchInst>(Term);
+ }
+ return nullptr;
+}
+
+/// Return the minimum of all analyzeable exit counts. This is an upper bound
+/// on the actual exit count. If there are not at least two analyzeable exits,
+/// returns SCEVCouldNotCompute.
+static const SCEV *getMinAnalyzeableBackedgeTakenCount(ScalarEvolution &SE,
+ DominatorTree &DT,
+ Loop *L) {
+ SmallVector<BasicBlock *, 16> ExitingBlocks;
+ L->getExitingBlocks(ExitingBlocks);
+
+ SmallVector<const SCEV *, 4> ExitCounts;
+ for (BasicBlock *ExitingBB : ExitingBlocks) {
+ const SCEV *ExitCount = SE.getExitCount(L, ExitingBB);
+ if (isa<SCEVCouldNotCompute>(ExitCount))
+ continue;
+ assert(DT.dominates(ExitingBB, L->getLoopLatch()) &&
+ "We should only have known counts for exiting blocks that "
+ "dominate latch!");
+ ExitCounts.push_back(ExitCount);
+ }
+ if (ExitCounts.size() < 2)
+ return SE.getCouldNotCompute();
+ return SE.getUMinFromMismatchedTypes(ExitCounts);
+}
+
+/// Return true if we can be fairly sure that executing block BB will probably
+/// lead to executing an __llvm_deoptimize. This is a profitability heuristic,
+/// not a legality constraint.
+static bool isVeryLikelyToDeopt(BasicBlock *BB) {
+ while (BB->getUniqueSuccessor())
+ // Will skip side effects, that's okay
+ BB = BB->getUniqueSuccessor();
+
+ return BB->getTerminatingDeoptimizeCall();
+}
+
+/// This implements an analogous, but entirely distinct transform from the main
+/// loop predication transform. This one is phrased in terms of using a
+/// widenable branch *outside* the loop to allow us to simplify loop exits in a
+/// following loop. This is close in spirit to the IndVarSimplify transform
+/// of the same name, but is materially different widening loosens legality
+/// sharply.
+bool LoopPredication::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
+ // The transformation performed here aims to widen a widenable condition
+ // above the loop such that all analyzeable exit leading to deopt are dead.
+ // It assumes that the latch is the dominant exit for profitability and that
+ // exits branching to deoptimizing blocks are rarely taken. It relies on the
+ // semantics of widenable expressions for legality. (i.e. being able to fall
+ // down the widenable path spuriously allows us to ignore exit order,
+ // unanalyzeable exits, side effects, exceptional exits, and other challenges
+ // which restrict the applicability of the non-WC based version of this
+ // transform in IndVarSimplify.)
+ //
+ // NOTE ON POISON/UNDEF - We're hoisting an expression above guards which may
+ // imply flags on the expression being hoisted and inserting new uses (flags
+ // are only correct for current uses). The result is that we may be
+ // inserting a branch on the value which can be either poison or undef. In
+ // this case, the branch can legally go either way; we just need to avoid
+ // introducing UB. This is achieved through the use of the freeze
+ // instruction.
+
+ SmallVector<BasicBlock *, 16> ExitingBlocks;
+ L->getExitingBlocks(ExitingBlocks);
+
+ if (ExitingBlocks.empty())
+ return false; // Nothing to do.
+
+ auto *Latch = L->getLoopLatch();
+ if (!Latch)
+ return false;
+
+ auto *WidenableBR = FindWidenableTerminatorAboveLoop(L, *LI);
+ if (!WidenableBR)
+ return false;
+
+ const SCEV *LatchEC = SE->getExitCount(L, Latch);
+ if (isa<SCEVCouldNotCompute>(LatchEC))
+ return false; // profitability - want hot exit in analyzeable set
+
+ // At this point, we have found an analyzeable latch, and a widenable
+ // condition above the loop. If we have a widenable exit within the loop
+ // (for which we can't compute exit counts), drop the ability to further
+ // widen so that we gain ability to analyze it's exit count and perform this
+ // transform. TODO: It'd be nice to know for sure the exit became
+ // analyzeable after dropping widenability.
+ {
+ bool Invalidate = false;
+
+ for (auto *ExitingBB : ExitingBlocks) {
+ if (LI->getLoopFor(ExitingBB) != L)
+ continue;
+
+ auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
+ if (!BI)
+ continue;
+
+ Use *Cond, *WC;
+ BasicBlock *IfTrueBB, *IfFalseBB;
+ if (parseWidenableBranch(BI, Cond, WC, IfTrueBB, IfFalseBB) &&
+ L->contains(IfTrueBB)) {
+ WC->set(ConstantInt::getTrue(IfTrueBB->getContext()));
+ Invalidate = true;
+ }
+ }
+ if (Invalidate)
+ SE->forgetLoop(L);
+ }
+
+ // The use of umin(all analyzeable exits) instead of latch is subtle, but
+ // important for profitability. We may have a loop which hasn't been fully
+ // canonicalized just yet. If the exit we chose to widen is provably never
+ // taken, we want the widened form to *also* be provably never taken. We
+ // can't guarantee this as a current unanalyzeable exit may later become
+ // analyzeable, but we can at least avoid the obvious cases.
+ const SCEV *MinEC = getMinAnalyzeableBackedgeTakenCount(*SE, *DT, L);
+ if (isa<SCEVCouldNotCompute>(MinEC) || MinEC->getType()->isPointerTy() ||
+ !SE->isLoopInvariant(MinEC, L) ||
+ !isSafeToExpandAt(MinEC, WidenableBR, *SE))
+ return false;
+
+ // Subtlety: We need to avoid inserting additional uses of the WC. We know
+ // that it can only have one transitive use at the moment, and thus moving
+ // that use to just before the branch and inserting code before it and then
+ // modifying the operand is legal.
+ auto *IP = cast<Instruction>(WidenableBR->getCondition());
+ IP->moveBefore(WidenableBR);
+ Rewriter.setInsertPoint(IP);
+ IRBuilder<> B(IP);
+
+ bool Changed = false;
+ Value *MinECV = nullptr; // lazily generated if needed
+ for (BasicBlock *ExitingBB : ExitingBlocks) {
+ // If our exiting block exits multiple loops, we can only rewrite the
+ // innermost one. Otherwise, we're changing how many times the innermost
+ // loop runs before it exits.
+ if (LI->getLoopFor(ExitingBB) != L)
+ continue;
+
+ // Can't rewrite non-branch yet.
+ auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
+ if (!BI)
+ continue;
+
+ // If already constant, nothing to do.
+ if (isa<Constant>(BI->getCondition()))
+ continue;
+
+ const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
+ if (isa<SCEVCouldNotCompute>(ExitCount) ||
+ ExitCount->getType()->isPointerTy() ||
+ !isSafeToExpandAt(ExitCount, WidenableBR, *SE))
+ continue;
+
+ const bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
+ BasicBlock *ExitBB = BI->getSuccessor(ExitIfTrue ? 0 : 1);
+ if (!isVeryLikelyToDeopt(ExitBB))
+ // Profitability: indicator of rarely/never taken exit
+ continue;
+
+ // If we found a widenable exit condition, do two things:
+ // 1) fold the widened exit test into the widenable condition
+ // 2) fold the branch to untaken - avoids infinite looping
+
+ Value *ECV = Rewriter.expandCodeFor(ExitCount);
+ if (!MinECV)
+ MinECV = Rewriter.expandCodeFor(MinEC);
+ Value *RHS = MinECV;
+ if (ECV->getType() != RHS->getType()) {
+ Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType());
+ ECV = B.CreateZExt(ECV, WiderTy);
+ RHS = B.CreateZExt(RHS, WiderTy);
+ }
+ assert(!Latch || DT->dominates(ExitingBB, Latch));
+ Value *NewCond = B.CreateICmp(ICmpInst::ICMP_UGT, ECV, RHS);
+ // Freeze poison or undef to an arbitrary bit pattern to ensure we can
+ // branch without introducing UB. See NOTE ON POISON/UNDEF above for
+ // context.
+ NewCond = B.CreateFreeze(NewCond);
+
+ widenWidenableBranch(WidenableBR, NewCond);
+
+ Value *OldCond = BI->getCondition();
+ BI->setCondition(ConstantInt::get(OldCond->getType(), !ExitIfTrue));
+ Changed = true;
+ }
+
+ if (Changed)
+ // We just mutated a bunch of loop exits changing there exit counts
+ // widely. We need to force recomputation of the exit counts given these
+ // changes. Note that all of the inserted exits are never taken, and
+ // should be removed next time the CFG is modified.
+ SE->forgetLoop(L);
+ return Changed;
+}
+
bool LoopPredication::runOnLoop(Loop *Loop) {
L = Loop;
@@ -1004,16 +1241,12 @@ bool LoopPredication::runOnLoop(Loop *Loop) {
cast<BranchInst>(BB->getTerminator()));
}
- if (Guards.empty() && GuardsAsWidenableBranches.empty())
- return false;
-
SCEVExpander Expander(*SE, *DL, "loop-predication");
-
bool Changed = false;
for (auto *Guard : Guards)
Changed |= widenGuardConditions(Guard, Expander);
for (auto *Guard : GuardsAsWidenableBranches)
Changed |= widenWidenableBranchGuardConditions(Guard, Expander);
-
+ Changed |= predicateLoopExits(L, Expander);
return Changed;
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp
index 96e2c2a3ac6b..da13a342ae12 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp
@@ -27,7 +27,6 @@
#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
@@ -45,6 +44,7 @@
#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
@@ -53,6 +53,7 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include <cassert>
#include <cstddef>
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopRotation.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopRotation.cpp
index 94517996df39..0868e742f4ee 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopRotation.cpp
@@ -18,6 +18,8 @@
#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/LoopPassManager.h"
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
index 299f3fc5fb19..b27e65e0adb7 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
@@ -30,6 +30,8 @@
#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/Dominators.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/LoopPassManager.h"
#include "llvm/Transforms/Utils.h"
@@ -660,6 +662,9 @@ static bool mergeBlocksIntoPredecessors(Loop &L, DominatorTree &DT,
// Merge Succ into Pred and delete it.
MergeBlockIntoPredecessor(Succ, &DTU, &LI, MSSAU);
+ if (MSSAU && VerifyMemorySSA)
+ MSSAU->getMemorySSA()->verifyMemorySSA();
+
Changed = true;
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopSink.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopSink.cpp
index 65e0dee0225a..1c03a4bf6c02 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopSink.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopSink.cpp
@@ -41,14 +41,15 @@
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/LoopPassManager.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 7f119175c4a8..e9f368628a08 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -74,7 +74,6 @@
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/ScalarEvolutionNormalization.h"
#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
@@ -97,6 +96,7 @@
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
@@ -108,6 +108,7 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
#include <algorithm>
#include <cassert>
#include <cstddef>
@@ -115,8 +116,8 @@
#include <cstdlib>
#include <iterator>
#include <limits>
-#include <numeric>
#include <map>
+#include <numeric>
#include <utility>
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
index 8d88be420314..92ad8dafa5ab 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
@@ -35,6 +35,7 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/PassManager.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
@@ -426,51 +427,76 @@ tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
return UnrollResult;
}
+static bool tryToUnrollAndJamLoop(Function &F, DominatorTree &DT, LoopInfo &LI,
+ ScalarEvolution &SE,
+ const TargetTransformInfo &TTI,
+ AssumptionCache &AC, DependenceInfo &DI,
+ OptimizationRemarkEmitter &ORE,
+ int OptLevel) {
+ bool DidSomething = false;
+
+ // The loop unroll and jam pass requires loops to be in simplified form, and also needs LCSSA.
+ // Since simplification may add new inner loops, it has to run before the
+ // legality and profitability checks. This means running the loop unroll and jam pass
+ // will simplify all loops, regardless of whether anything end up being
+ // unroll and jammed.
+ for (auto &L : LI) {
+ DidSomething |=
+ simplifyLoop(L, &DT, &LI, &SE, &AC, nullptr, false /* PreserveLCSSA */);
+ DidSomething |= formLCSSARecursively(*L, DT, &LI, &SE);
+ }
+
+ SmallPriorityWorklist<Loop *, 4> Worklist;
+ internal::appendLoopsToWorklist(reverse(LI), Worklist);
+ while (!Worklist.empty()) {
+ Loop *L = Worklist.pop_back_val();
+ formLCSSA(*L, DT, &LI, &SE);
+ LoopUnrollResult Result =
+ tryToUnrollAndJamLoop(L, DT, &LI, SE, TTI, AC, DI, ORE, OptLevel);
+ if (Result != LoopUnrollResult::Unmodified)
+ DidSomething = true;
+ }
+
+ return DidSomething;
+}
+
namespace {
-class LoopUnrollAndJam : public LoopPass {
+class LoopUnrollAndJam : public FunctionPass {
public:
static char ID; // Pass ID, replacement for typeid
unsigned OptLevel;
- LoopUnrollAndJam(int OptLevel = 2) : LoopPass(ID), OptLevel(OptLevel) {
+ LoopUnrollAndJam(int OptLevel = 2) : FunctionPass(ID), OptLevel(OptLevel) {
initializeLoopUnrollAndJamPass(*PassRegistry::getPassRegistry());
}
- bool runOnLoop(Loop *L, LPPassManager &LPM) override {
- if (skipLoop(L))
+ bool runOnFunction(Function &F) override {
+ if (skipFunction(F))
return false;
- Function &F = *L->getHeader()->getParent();
-
auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
const TargetTransformInfo &TTI =
getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
auto &DI = getAnalysis<DependenceAnalysisWrapperPass>().getDI();
- // For the old PM, we can't use OptimizationRemarkEmitter as an analysis
- // pass. Function analyses need to be preserved across loop transformations
- // but ORE cannot be preserved (see comment before the pass definition).
- OptimizationRemarkEmitter ORE(&F);
-
- LoopUnrollResult Result =
- tryToUnrollAndJamLoop(L, DT, LI, SE, TTI, AC, DI, ORE, OptLevel);
+ auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
- if (Result == LoopUnrollResult::FullyUnrolled)
- LPM.markLoopAsDeleted(*L);
-
- return Result != LoopUnrollResult::Unmodified;
+ return tryToUnrollAndJamLoop(F, DT, LI, SE, TTI, AC, DI, ORE, OptLevel);
}
/// This transformation requires natural loop information & requires that
/// loop preheaders be inserted into the CFG...
void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<AssumptionCacheTracker>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<DependenceAnalysisWrapperPass>();
- getLoopAnalysisUsage(AU);
+ AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
}
};
@@ -480,10 +506,13 @@ char LoopUnrollAndJam::ID = 0;
INITIALIZE_PASS_BEGIN(LoopUnrollAndJam, "loop-unroll-and-jam",
"Unroll and Jam loops", false, false)
-INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(LoopPass)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(DependenceAnalysisWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
INITIALIZE_PASS_END(LoopUnrollAndJam, "loop-unroll-and-jam",
"Unroll and Jam loops", false, false)
@@ -491,26 +520,18 @@ Pass *llvm::createLoopUnrollAndJamPass(int OptLevel) {
return new LoopUnrollAndJam(OptLevel);
}
-PreservedAnalyses LoopUnrollAndJamPass::run(Loop &L, LoopAnalysisManager &AM,
- LoopStandardAnalysisResults &AR,
- LPMUpdater &) {
- const auto &FAM =
- AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR).getManager();
- Function *F = L.getHeader()->getParent();
-
- auto *ORE = FAM.getCachedResult<OptimizationRemarkEmitterAnalysis>(*F);
- // FIXME: This should probably be optional rather than required.
- if (!ORE)
- report_fatal_error(
- "LoopUnrollAndJamPass: OptimizationRemarkEmitterAnalysis not cached at "
- "a higher level");
-
- DependenceInfo DI(F, &AR.AA, &AR.SE, &AR.LI);
-
- LoopUnrollResult Result = tryToUnrollAndJamLoop(
- &L, AR.DT, &AR.LI, AR.SE, AR.TTI, AR.AC, DI, *ORE, OptLevel);
-
- if (Result == LoopUnrollResult::Unmodified)
+PreservedAnalyses LoopUnrollAndJamPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ ScalarEvolution &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
+ LoopInfo &LI = AM.getResult<LoopAnalysis>(F);
+ TargetTransformInfo &TTI = AM.getResult<TargetIRAnalysis>(F);
+ AssumptionCache &AC = AM.getResult<AssumptionAnalysis>(F);
+ DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(F);
+ DependenceInfo &DI = AM.getResult<DependenceAnalysis>(F);
+ OptimizationRemarkEmitter &ORE =
+ AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
+
+ if (!tryToUnrollAndJamLoop(F, DT, LI, SE, TTI, AC, DI, ORE, OptLevel))
return PreservedAnalyses::all();
return getLoopPassPreservedAnalyses();
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index a6d4164c3645..4c2b079c6bb5 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -46,6 +46,7 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/PassManager.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
@@ -212,7 +213,8 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences(
// Apply size attributes
bool OptForSize = L->getHeader()->getParent()->hasOptSize() ||
- llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI);
+ llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI,
+ PGSOQueryType::IRPass);
if (OptForSize) {
UP.Threshold = UP.OptSizeThreshold;
UP.PartialThreshold = UP.PartialOptSizeThreshold;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
index b410df0c5f68..915e053704b2 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -59,6 +59,7 @@
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
@@ -683,7 +684,7 @@ bool LoopUnswitch::processCurrentLoop() {
for (auto &I : *BB) {
auto CS = CallSite(&I);
if (!CS) continue;
- if (CS.hasFnAttr(Attribute::Convergent))
+ if (CS.isConvergent())
return false;
if (auto *II = dyn_cast<InvokeInst>(&I))
if (!II->getUnwindDest()->canSplitPredecessors())
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp
index 2ccb7cae3079..7b9af527d444 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp
@@ -79,6 +79,7 @@
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerAtomic.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerAtomic.cpp
index e076424d9042..ab7b85e89e7b 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerAtomic.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerAtomic.cpp
@@ -14,6 +14,7 @@
#include "llvm/Transforms/Scalar/LowerAtomic.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Transforms/Scalar.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp
index d0fcf38b5a7b..21c6c32e8e02 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp
@@ -24,6 +24,7 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/PatternMatch.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Transforms/Scalar.h"
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
index d85f20b3f80c..53671c7bc3d1 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
@@ -22,6 +22,7 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Metadata.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerGuardIntrinsic.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerGuardIntrinsic.cpp
index 9489e01774d6..45f5929e3b90 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerGuardIntrinsic.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerGuardIntrinsic.cpp
@@ -21,6 +21,7 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/GuardUtils.h"
@@ -60,7 +61,7 @@ static bool lowerGuardIntrinsic(Function &F) {
DeoptIntrinsic->setCallingConv(GuardDecl->getCallingConv());
for (auto *CI : ToLower) {
- makeGuardControlFlowExplicit(DeoptIntrinsic, CI);
+ makeGuardControlFlowExplicit(DeoptIntrinsic, CI, false);
CI->eraseFromParent();
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
new file mode 100644
index 000000000000..0ff6ee8bcfcc
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
@@ -0,0 +1,894 @@
+//===- LowerMatrixIntrinsics.cpp - Lower matrix intrinsics -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Lower matrix intrinsics to vector operations.
+//
+// TODO:
+// * Implement multiply & add fusion
+// * Add remark, summarizing the available matrix optimization opportunities.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Scalar/LowerMatrixIntrinsics.h"
+#include "llvm/ADT/GraphTraits.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/VectorUtils.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Transforms/Scalar.h"
+
+using namespace llvm;
+using namespace PatternMatch;
+
+#define DEBUG_TYPE "lower-matrix-intrinsics"
+
+static cl::opt<bool> EnableShapePropagation("matrix-propagate-shape",
+ cl::init(true));
+
+static cl::opt<bool> AllowContractEnabled(
+ "matrix-allow-contract", cl::init(false), cl::Hidden,
+ cl::desc("Allow the use of FMAs if available and profitable. This may "
+ "result in different results, due to less rounding error."));
+
+namespace {
+
+// Given an element poitner \p BasePtr to the start of a (sub) matrix, compute
+// the start address of column \p Col with type (\p EltType x \p NumRows)
+// assuming \p Stride elements between start two consecutive columns.
+// \p Stride must be >= \p NumRows.
+//
+// Consider a 4x4 matrix like below
+//
+// 0 1 2 3
+// 0 v_0_0 v_0_1 v_0_2 v_0_3
+// 1 v_1_0 v_1_1 v_1_2 v_1_3
+// 2 v_2_0 v_2_1 v_2_2 v_2_3
+// 3 v_3_0 v_3_1 v_3_2 v_3_3
+
+// To compute the column addresses for a 2x3 sub-matrix at row 1 and column 1,
+// we need a pointer to the first element of the submatrix as base pointer.
+// Then we can use computeColumnAddr to compute the addresses for the columns
+// of the sub-matrix.
+//
+// Column 0: computeColumnAddr(Base, 0 (column), 4 (stride), 2 (num rows), ..)
+// -> just returns Base
+// Column 1: computeColumnAddr(Base, 1 (column), 4 (stride), 2 (num rows), ..)
+// -> returns Base + (1 * 4)
+// Column 2: computeColumnAddr(Base, 2 (column), 4 (stride), 2 (num rows), ..)
+// -> returns Base + (2 * 4)
+//
+// The graphic below illustrates the number of elements in a column (marked
+// with |) and the number of skipped elements (marked with }).
+//
+// v_0_0 v_0_1 {v_0_2 {v_0_3
+// Base Col 1 Col 2
+// | | |
+// v_1_0 |v_1_1 |v_1_2 |v_1_3
+// v_2_0 |v_2_1 |v_2_2 |v_2_3
+// v_3_0 {v_3_1 {v_3_2 v_3_3
+//
+Value *computeColumnAddr(Value *BasePtr, Value *Col, Value *Stride,
+ unsigned NumRows, Type *EltType,
+ IRBuilder<> &Builder) {
+
+ assert((!isa<ConstantInt>(Stride) ||
+ cast<ConstantInt>(Stride)->getZExtValue() >= NumRows) &&
+ "Stride must be >= the number of rows.");
+ unsigned AS = cast<PointerType>(BasePtr->getType())->getAddressSpace();
+
+ // Compute the start of the column with index Col as Col * Stride.
+ Value *ColumnStart = Builder.CreateMul(Col, Stride, "col.start");
+
+ // Get pointer to the start of the selected column. Skip GEP creation,
+ // if we select column 0.
+ if (isa<ConstantInt>(ColumnStart) && cast<ConstantInt>(ColumnStart)->isZero())
+ ColumnStart = BasePtr;
+ else
+ ColumnStart = Builder.CreateGEP(EltType, BasePtr, ColumnStart, "col.gep");
+
+ // Cast elementwise column start pointer to a pointer to a column
+ // (EltType x NumRows)*.
+ Type *ColumnType = VectorType::get(EltType, NumRows);
+ Type *ColumnPtrType = PointerType::get(ColumnType, AS);
+ return Builder.CreatePointerCast(ColumnStart, ColumnPtrType, "col.cast");
+}
+
+/// LowerMatrixIntrinsics contains the methods used to lower matrix intrinsics.
+///
+/// Currently, the lowering for each matrix intrinsic is done as follows:
+/// 1. Propagate the shape information from intrinsics to connected
+/// instructions.
+/// 2. Lower instructions with shape information.
+/// 2.1. Get column vectors for each argument. If we already lowered the
+/// definition of an argument, use the produced column vectors directly.
+/// If not, split the operand vector containing an embedded matrix into
+/// a set of column vectors,
+/// 2.2. Lower the instruction in terms of columnwise operations, which yields
+/// a set of column vectors containing result matrix. Note that we lower
+/// all instructions that have shape information. Besides the intrinsics,
+/// this includes stores for example.
+/// 2.3. Update uses of the lowered instruction. If we have shape information
+/// for a user, there is nothing to do, as we will look up the result
+/// column matrix when lowering the user. For other uses, we embed the
+/// result matrix in a flat vector and update the use.
+/// 2.4. Cache the result column matrix for the instruction we lowered
+/// 3. After we lowered all instructions in a function, remove the now
+/// obsolete instructions.
+///
+class LowerMatrixIntrinsics {
+ Function &Func;
+ const DataLayout &DL;
+ const TargetTransformInfo &TTI;
+
+ /// Wrapper class representing a matrix as a set of column vectors.
+ /// All column vectors must have the same vector type.
+ class ColumnMatrixTy {
+ SmallVector<Value *, 16> Columns;
+
+ public:
+ ColumnMatrixTy() : Columns() {}
+ ColumnMatrixTy(ArrayRef<Value *> Cols)
+ : Columns(Cols.begin(), Cols.end()) {}
+
+ Value *getColumn(unsigned i) const { return Columns[i]; }
+
+ void setColumn(unsigned i, Value *V) { Columns[i] = V; }
+
+ size_t getNumColumns() const { return Columns.size(); }
+ size_t getNumRows() const {
+ assert(Columns.size() > 0 && "Cannot call getNumRows without columns");
+ return cast<VectorType>(Columns[0]->getType())->getNumElements();
+ }
+
+ const SmallVectorImpl<Value *> &getColumnVectors() const { return Columns; }
+
+ SmallVectorImpl<Value *> &getColumnVectors() { return Columns; }
+
+ void addColumn(Value *V) { Columns.push_back(V); }
+
+ iterator_range<SmallVector<Value *, 8>::iterator> columns() {
+ return make_range(Columns.begin(), Columns.end());
+ }
+
+ /// Embed the columns of the matrix into a flat vector by concatenating
+ /// them.
+ Value *embedInVector(IRBuilder<> &Builder) const {
+ return Columns.size() == 1 ? Columns[0]
+ : concatenateVectors(Builder, Columns);
+ }
+ };
+
+ struct ShapeInfo {
+ unsigned NumRows;
+ unsigned NumColumns;
+
+ ShapeInfo(unsigned NumRows = 0, unsigned NumColumns = 0)
+ : NumRows(NumRows), NumColumns(NumColumns) {}
+
+ ShapeInfo(Value *NumRows, Value *NumColumns)
+ : NumRows(cast<ConstantInt>(NumRows)->getZExtValue()),
+ NumColumns(cast<ConstantInt>(NumColumns)->getZExtValue()) {}
+
+ bool operator==(const ShapeInfo &other) {
+ return NumRows == other.NumRows && NumColumns == other.NumColumns;
+ }
+ bool operator!=(const ShapeInfo &other) { return !(*this == other); }
+
+ /// Returns true if shape-information is defined, meaning both dimensions
+ /// are != 0.
+ operator bool() const {
+ assert(NumRows == 0 || NumColumns != 0);
+ return NumRows != 0;
+ }
+ };
+
+ /// Maps instructions to their shape information. The shape information
+ /// describes the shape to be used while lowering. This matches the shape of
+ /// the result value of the instruction, with the only exceptions being store
+ /// instructions and the matrix_columnwise_store intrinsics. For those, the
+ /// shape information indicates that those instructions should be lowered
+ /// using shape information as well.
+ DenseMap<Value *, ShapeInfo> ShapeMap;
+
+ /// List of instructions to remove. While lowering, we are not replacing all
+ /// users of a lowered instruction, if shape information is available and
+ /// those need to be removed after we finished lowering.
+ SmallVector<Instruction *, 16> ToRemove;
+
+ /// Map from instructions to their produced column matrix.
+ DenseMap<Value *, ColumnMatrixTy> Inst2ColumnMatrix;
+
+public:
+ LowerMatrixIntrinsics(Function &F, TargetTransformInfo &TTI)
+ : Func(F), DL(F.getParent()->getDataLayout()), TTI(TTI) {}
+
+ /// Return the set of column vectors that a matrix value is lowered to.
+ ///
+ /// If we lowered \p MatrixVal, just return the cache result column matrix.
+ /// Otherwie split the flat vector \p MatrixVal containing a matrix with
+ /// shape \p SI into column vectors.
+ ColumnMatrixTy getMatrix(Value *MatrixVal, const ShapeInfo &SI,
+ IRBuilder<> Builder) {
+ VectorType *VType = dyn_cast<VectorType>(MatrixVal->getType());
+ assert(VType && "MatrixVal must be a vector type");
+ assert(VType->getNumElements() == SI.NumRows * SI.NumColumns &&
+ "The vector size must match the number of matrix elements");
+
+ // Check if we lowered MatrixVal using shape information. In that case,
+ // return the existing column matrix, if it matches the requested shape
+ // information. If there is a mis-match, embed the result in a flat
+ // vector and split it later.
+ auto Found = Inst2ColumnMatrix.find(MatrixVal);
+ if (Found != Inst2ColumnMatrix.end()) {
+ ColumnMatrixTy &M = Found->second;
+ // Return the found matrix, if its shape matches the requested shape
+ // information
+ if (SI.NumRows == M.getNumRows() && SI.NumColumns == M.getNumColumns())
+ return M;
+
+ MatrixVal = M.embedInVector(Builder);
+ }
+
+ // Otherwise split MatrixVal.
+ SmallVector<Value *, 16> SplitVecs;
+ Value *Undef = UndefValue::get(VType);
+ for (unsigned MaskStart = 0; MaskStart < VType->getNumElements();
+ MaskStart += SI.NumRows) {
+ Constant *Mask = createSequentialMask(Builder, MaskStart, SI.NumRows, 0);
+ Value *V = Builder.CreateShuffleVector(MatrixVal, Undef, Mask, "split");
+ SplitVecs.push_back(V);
+ }
+
+ return {SplitVecs};
+ }
+
+ /// If \p V already has a known shape return false. Otherwise set the shape
+ /// for instructions that support it.
+ bool setShapeInfo(Value *V, ShapeInfo Shape) {
+ assert(Shape && "Shape not set");
+ if (isa<UndefValue>(V) || !supportsShapeInfo(V))
+ return false;
+
+ auto SIter = ShapeMap.find(V);
+ if (SIter != ShapeMap.end()) {
+ LLVM_DEBUG(dbgs() << " not overriding existing shape: "
+ << SIter->second.NumRows << " "
+ << SIter->second.NumColumns << " for " << *V << "\n");
+ return false;
+ }
+
+ ShapeMap.insert({V, Shape});
+ LLVM_DEBUG(dbgs() << " " << Shape.NumRows << " x " << Shape.NumColumns
+ << " for " << *V << "\n");
+ return true;
+ }
+
+ bool isUniformShape(Value *V) {
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I)
+ return true;
+
+ switch (I->getOpcode()) {
+ case Instruction::FAdd:
+ case Instruction::FSub:
+ case Instruction::FMul: // Scalar multiply.
+ case Instruction::Add:
+ case Instruction::Mul:
+ case Instruction::Sub:
+ return true;
+ default:
+ return false;
+ }
+ }
+
+ /// Returns true if shape information can be used for \p V. The supported
+ /// instructions must match the instructions that can be lowered by this pass.
+ bool supportsShapeInfo(Value *V) {
+ Instruction *Inst = dyn_cast<Instruction>(V);
+ if (!Inst)
+ return false;
+
+ IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst);
+ if (II)
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::matrix_multiply:
+ case Intrinsic::matrix_transpose:
+ case Intrinsic::matrix_columnwise_load:
+ case Intrinsic::matrix_columnwise_store:
+ return true;
+ default:
+ return false;
+ }
+ return isUniformShape(V) || isa<StoreInst>(V) || isa<LoadInst>(V);
+ }
+
+ /// Propagate the shape information of instructions to their users.
+ /// The work list contains instructions for which we can compute the shape,
+ /// either based on the information provided by matrix intrinsics or known
+ /// shapes of operands.
+ SmallVector<Instruction *, 32>
+ propagateShapeForward(SmallVectorImpl<Instruction *> &WorkList) {
+ SmallVector<Instruction *, 32> NewWorkList;
+ // Pop an element for which we guaranteed to have at least one of the
+ // operand shapes. Add the shape for this and then add users to the work
+ // list.
+ LLVM_DEBUG(dbgs() << "Forward-propagate shapes:\n");
+ while (!WorkList.empty()) {
+ Instruction *Inst = WorkList.back();
+ WorkList.pop_back();
+
+ // New entry, set the value and insert operands
+ bool Propagate = false;
+
+ Value *MatrixA;
+ Value *MatrixB;
+ Value *M;
+ Value *N;
+ Value *K;
+ if (match(Inst, m_Intrinsic<Intrinsic::matrix_multiply>(
+ m_Value(MatrixA), m_Value(MatrixB), m_Value(M),
+ m_Value(N), m_Value(K)))) {
+ Propagate = setShapeInfo(Inst, {M, K});
+ } else if (match(Inst, m_Intrinsic<Intrinsic::matrix_transpose>(
+ m_Value(MatrixA), m_Value(M), m_Value(N)))) {
+ // Flip dimensions.
+ Propagate = setShapeInfo(Inst, {N, M});
+ } else if (match(Inst, m_Intrinsic<Intrinsic::matrix_columnwise_store>(
+ m_Value(MatrixA), m_Value(), m_Value(),
+ m_Value(M), m_Value(N)))) {
+ Propagate = setShapeInfo(Inst, {N, M});
+ } else if (match(Inst,
+ m_Intrinsic<Intrinsic::matrix_columnwise_load>(
+ m_Value(), m_Value(), m_Value(M), m_Value(N)))) {
+ Propagate = setShapeInfo(Inst, {M, N});
+ } else if (match(Inst, m_Store(m_Value(MatrixA), m_Value()))) {
+ auto OpShape = ShapeMap.find(MatrixA);
+ if (OpShape != ShapeMap.end())
+ setShapeInfo(Inst, OpShape->second);
+ continue;
+ } else if (isUniformShape(Inst)) {
+ // Find the first operand that has a known shape and use that.
+ for (auto &Op : Inst->operands()) {
+ auto OpShape = ShapeMap.find(Op.get());
+ if (OpShape != ShapeMap.end()) {
+ Propagate |= setShapeInfo(Inst, OpShape->second);
+ break;
+ }
+ }
+ }
+
+ if (Propagate) {
+ NewWorkList.push_back(Inst);
+ for (auto *User : Inst->users())
+ if (ShapeMap.count(User) == 0)
+ WorkList.push_back(cast<Instruction>(User));
+ }
+ }
+
+ return NewWorkList;
+ }
+
+ /// Propagate the shape to operands of instructions with shape information.
+ /// \p Worklist contains the instruction for which we already know the shape.
+ SmallVector<Instruction *, 32>
+ propagateShapeBackward(SmallVectorImpl<Instruction *> &WorkList) {
+ SmallVector<Instruction *, 32> NewWorkList;
+
+ auto pushInstruction = [](Value *V,
+ SmallVectorImpl<Instruction *> &WorkList) {
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (I)
+ WorkList.push_back(I);
+ };
+ // Pop an element with known shape. Traverse the operands, if their shape
+ // derives from the result shape and is unknown, add it and add them to the
+ // worklist.
+ LLVM_DEBUG(dbgs() << "Backward-propagate shapes:\n");
+ while (!WorkList.empty()) {
+ Value *V = WorkList.back();
+ WorkList.pop_back();
+
+ size_t BeforeProcessingV = WorkList.size();
+ if (!isa<Instruction>(V))
+ continue;
+
+ Value *MatrixA;
+ Value *MatrixB;
+ Value *M;
+ Value *N;
+ Value *K;
+ if (match(V, m_Intrinsic<Intrinsic::matrix_multiply>(
+ m_Value(MatrixA), m_Value(MatrixB), m_Value(M),
+ m_Value(N), m_Value(K)))) {
+ if (setShapeInfo(MatrixA, {M, N}))
+ pushInstruction(MatrixA, WorkList);
+
+ if (setShapeInfo(MatrixB, {N, K}))
+ pushInstruction(MatrixB, WorkList);
+
+ } else if (match(V, m_Intrinsic<Intrinsic::matrix_transpose>(
+ m_Value(MatrixA), m_Value(M), m_Value(N)))) {
+ // Flip dimensions.
+ if (setShapeInfo(MatrixA, {M, N}))
+ pushInstruction(MatrixA, WorkList);
+ } else if (match(V, m_Intrinsic<Intrinsic::matrix_columnwise_store>(
+ m_Value(MatrixA), m_Value(), m_Value(),
+ m_Value(M), m_Value(N)))) {
+ if (setShapeInfo(MatrixA, {M, N})) {
+ pushInstruction(MatrixA, WorkList);
+ }
+ } else if (isa<LoadInst>(V) ||
+ match(V, m_Intrinsic<Intrinsic::matrix_columnwise_load>())) {
+ // Nothing to do, no matrix input.
+ } else if (isa<StoreInst>(V)) {
+ // Nothing to do. We forward-propagated to this so we would just
+ // backward propagate to an instruction with an already known shape.
+ } else if (isUniformShape(V)) {
+ // Propagate to all operands.
+ ShapeInfo Shape = ShapeMap[V];
+ for (Use &U : cast<Instruction>(V)->operands()) {
+ if (setShapeInfo(U.get(), Shape))
+ pushInstruction(U.get(), WorkList);
+ }
+ }
+ // After we discovered new shape info for new instructions in the
+ // worklist, we use their users as seeds for the next round of forward
+ // propagation.
+ for (size_t I = BeforeProcessingV; I != WorkList.size(); I++)
+ for (User *U : WorkList[I]->users())
+ if (isa<Instruction>(U) && V != U)
+ NewWorkList.push_back(cast<Instruction>(U));
+ }
+ return NewWorkList;
+ }
+
+ bool Visit() {
+ if (EnableShapePropagation) {
+ SmallVector<Instruction *, 32> WorkList;
+
+ // Initially only the shape of matrix intrinsics is known.
+ // Initialize the work list with ops carrying shape information.
+ for (BasicBlock &BB : Func)
+ for (Instruction &Inst : BB) {
+ IntrinsicInst *II = dyn_cast<IntrinsicInst>(&Inst);
+ if (!II)
+ continue;
+
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::matrix_multiply:
+ case Intrinsic::matrix_transpose:
+ case Intrinsic::matrix_columnwise_load:
+ case Intrinsic::matrix_columnwise_store:
+ WorkList.push_back(&Inst);
+ break;
+ default:
+ break;
+ }
+ }
+ // Propagate shapes until nothing changes any longer.
+ while (!WorkList.empty()) {
+ WorkList = propagateShapeForward(WorkList);
+ WorkList = propagateShapeBackward(WorkList);
+ }
+ }
+
+ ReversePostOrderTraversal<Function *> RPOT(&Func);
+ bool Changed = false;
+ for (auto *BB : RPOT) {
+ for (Instruction &Inst : make_early_inc_range(*BB)) {
+ IRBuilder<> Builder(&Inst);
+
+ if (CallInst *CInst = dyn_cast<CallInst>(&Inst))
+ Changed |= VisitCallInst(CInst);
+
+ Value *Op1;
+ Value *Op2;
+ if (auto *BinOp = dyn_cast<BinaryOperator>(&Inst))
+ Changed |= VisitBinaryOperator(BinOp);
+ if (match(&Inst, m_Load(m_Value(Op1))))
+ Changed |= VisitLoad(&Inst, Op1, Builder);
+ else if (match(&Inst, m_Store(m_Value(Op1), m_Value(Op2))))
+ Changed |= VisitStore(&Inst, Op1, Op2, Builder);
+ }
+ }
+
+ for (Instruction *Inst : reverse(ToRemove))
+ Inst->eraseFromParent();
+
+ return Changed;
+ }
+
+ LoadInst *createColumnLoad(Value *ColumnPtr, Type *EltType,
+ IRBuilder<> Builder) {
+ unsigned Align = DL.getABITypeAlignment(EltType);
+ return Builder.CreateAlignedLoad(ColumnPtr, Align, "col.load");
+ }
+
+ StoreInst *createColumnStore(Value *ColumnValue, Value *ColumnPtr,
+ Type *EltType, IRBuilder<> Builder) {
+ unsigned Align = DL.getABITypeAlignment(EltType);
+ return Builder.CreateAlignedStore(ColumnValue, ColumnPtr, Align);
+ }
+
+
+ /// Turns \p BasePtr into an elementwise pointer to \p EltType.
+ Value *createElementPtr(Value *BasePtr, Type *EltType, IRBuilder<> &Builder) {
+ unsigned AS = cast<PointerType>(BasePtr->getType())->getAddressSpace();
+ Type *EltPtrType = PointerType::get(EltType, AS);
+ return Builder.CreatePointerCast(BasePtr, EltPtrType);
+ }
+
+ /// Replace intrinsic calls
+ bool VisitCallInst(CallInst *Inst) {
+ if (!Inst->getCalledFunction() || !Inst->getCalledFunction()->isIntrinsic())
+ return false;
+
+ switch (Inst->getCalledFunction()->getIntrinsicID()) {
+ case Intrinsic::matrix_multiply:
+ LowerMultiply(Inst);
+ break;
+ case Intrinsic::matrix_transpose:
+ LowerTranspose(Inst);
+ break;
+ case Intrinsic::matrix_columnwise_load:
+ LowerColumnwiseLoad(Inst);
+ break;
+ case Intrinsic::matrix_columnwise_store:
+ LowerColumnwiseStore(Inst);
+ break;
+ default:
+ return false;
+ }
+ return true;
+ }
+
+ void LowerLoad(Instruction *Inst, Value *Ptr, Value *Stride,
+ ShapeInfo Shape) {
+ IRBuilder<> Builder(Inst);
+ auto VType = cast<VectorType>(Inst->getType());
+ Value *EltPtr = createElementPtr(Ptr, VType->getElementType(), Builder);
+ ColumnMatrixTy Result;
+ // Distance between start of one column and the start of the next
+ for (unsigned C = 0, E = Shape.NumColumns; C < E; ++C) {
+ Value *GEP =
+ computeColumnAddr(EltPtr, Builder.getInt32(C), Stride, Shape.NumRows,
+ VType->getElementType(), Builder);
+ Value *Column = createColumnLoad(GEP, VType->getElementType(), Builder);
+ Result.addColumn(Column);
+ }
+
+ finalizeLowering(Inst, Result, Builder);
+ }
+
+ /// Lowers llvm.matrix.columnwise.load.
+ ///
+ /// The intrinsic loads a matrix from memory using a stride between columns.
+ void LowerColumnwiseLoad(CallInst *Inst) {
+ Value *Ptr = Inst->getArgOperand(0);
+ Value *Stride = Inst->getArgOperand(1);
+ LowerLoad(Inst, Ptr, Stride,
+ {Inst->getArgOperand(2), Inst->getArgOperand(3)});
+ }
+
+ void LowerStore(Instruction *Inst, Value *Matrix, Value *Ptr, Value *Stride,
+ ShapeInfo Shape) {
+ IRBuilder<> Builder(Inst);
+ auto VType = cast<VectorType>(Matrix->getType());
+ Value *EltPtr = createElementPtr(Ptr, VType->getElementType(), Builder);
+ auto LM = getMatrix(Matrix, Shape, Builder);
+ for (auto C : enumerate(LM.columns())) {
+ Value *GEP =
+ computeColumnAddr(EltPtr, Builder.getInt32(C.index()), Stride,
+ Shape.NumRows, VType->getElementType(), Builder);
+ createColumnStore(C.value(), GEP, VType->getElementType(), Builder);
+ }
+
+ ToRemove.push_back(Inst);
+ }
+
+ /// Lowers llvm.matrix.columnwise.store.
+ ///
+ /// The intrinsic store a matrix back memory using a stride between columns.
+ void LowerColumnwiseStore(CallInst *Inst) {
+ Value *Matrix = Inst->getArgOperand(0);
+ Value *Ptr = Inst->getArgOperand(1);
+ Value *Stride = Inst->getArgOperand(2);
+ LowerStore(Inst, Matrix, Ptr, Stride,
+ {Inst->getArgOperand(3), Inst->getArgOperand(4)});
+ }
+
+ /// Extract a column vector of \p NumElts starting at index (\p I, \p J) from
+ /// the matrix \p LM represented as a vector of column vectors.
+ Value *extractVector(const ColumnMatrixTy &LM, unsigned I, unsigned J,
+ unsigned NumElts, IRBuilder<> Builder) {
+ Value *Col = LM.getColumn(J);
+ Value *Undef = UndefValue::get(Col->getType());
+ Constant *Mask = createSequentialMask(Builder, I, NumElts, 0);
+ return Builder.CreateShuffleVector(Col, Undef, Mask, "block");
+ }
+
+ // Set elements I..I+NumElts-1 to Block
+ Value *insertVector(Value *Col, unsigned I, Value *Block,
+ IRBuilder<> Builder) {
+
+ // First, bring Block to the same size as Col
+ unsigned BlockNumElts =
+ cast<VectorType>(Block->getType())->getNumElements();
+ unsigned NumElts = cast<VectorType>(Col->getType())->getNumElements();
+ assert(NumElts >= BlockNumElts && "Too few elements for current block");
+
+ Value *ExtendMask =
+ createSequentialMask(Builder, 0, BlockNumElts, NumElts - BlockNumElts);
+ Value *Undef = UndefValue::get(Block->getType());
+ Block = Builder.CreateShuffleVector(Block, Undef, ExtendMask);
+
+ // If Col is 7 long and I is 2 and BlockNumElts is 2 the mask is: 0, 1, 7,
+ // 8, 4, 5, 6
+ SmallVector<Constant *, 16> Mask;
+ unsigned i;
+ for (i = 0; i < I; i++)
+ Mask.push_back(Builder.getInt32(i));
+
+ unsigned VecNumElts = cast<VectorType>(Col->getType())->getNumElements();
+ for (; i < I + BlockNumElts; i++)
+ Mask.push_back(Builder.getInt32(i - I + VecNumElts));
+
+ for (; i < VecNumElts; i++)
+ Mask.push_back(Builder.getInt32(i));
+
+ Value *MaskVal = ConstantVector::get(Mask);
+
+ return Builder.CreateShuffleVector(Col, Block, MaskVal);
+ }
+
+ Value *createMulAdd(Value *Sum, Value *A, Value *B, bool UseFPOp,
+ IRBuilder<> &Builder, bool AllowContraction) {
+
+ if (!Sum)
+ return UseFPOp ? Builder.CreateFMul(A, B) : Builder.CreateMul(A, B);
+
+ if (UseFPOp) {
+ if (AllowContraction) {
+ // Use fmuladd for floating point operations and let the backend decide
+ // if that's profitable.
+ Value *FMulAdd = Intrinsic::getDeclaration(
+ Func.getParent(), Intrinsic::fmuladd, A->getType());
+ return Builder.CreateCall(FMulAdd, {A, B, Sum});
+ }
+ Value *Mul = Builder.CreateFMul(A, B);
+ return Builder.CreateFAdd(Sum, Mul);
+ }
+
+ Value *Mul = Builder.CreateMul(A, B);
+ return Builder.CreateAdd(Sum, Mul);
+ }
+
+ /// Cache \p Matrix as result of \p Inst and update the uses of \p Inst. For
+ /// users with shape information, there's nothing to do: the will use the
+ /// cached value when they are lowered. For other users, \p Matrix is
+ /// flattened and the uses are updated to use it. Also marks \p Inst for
+ /// deletion.
+ void finalizeLowering(Instruction *Inst, ColumnMatrixTy Matrix,
+ IRBuilder<> &Builder) {
+ Inst2ColumnMatrix.insert(std::make_pair(Inst, Matrix));
+
+ ToRemove.push_back(Inst);
+ Value *Flattened = nullptr;
+ for (auto I = Inst->use_begin(), E = Inst->use_end(); I != E;) {
+ Use &U = *I++;
+ if (ShapeMap.find(U.getUser()) == ShapeMap.end()) {
+ if (!Flattened)
+ Flattened = Matrix.embedInVector(Builder);
+ U.set(Flattened);
+ }
+ }
+ }
+
+ /// Lowers llvm.matrix.multiply.
+ void LowerMultiply(CallInst *MatMul) {
+ IRBuilder<> Builder(MatMul);
+ auto *EltType = cast<VectorType>(MatMul->getType())->getElementType();
+ ShapeInfo LShape(MatMul->getArgOperand(2), MatMul->getArgOperand(3));
+ ShapeInfo RShape(MatMul->getArgOperand(3), MatMul->getArgOperand(4));
+
+ const ColumnMatrixTy &Lhs =
+ getMatrix(MatMul->getArgOperand(0), LShape, Builder);
+ const ColumnMatrixTy &Rhs =
+ getMatrix(MatMul->getArgOperand(1), RShape, Builder);
+
+ const unsigned R = LShape.NumRows;
+ const unsigned M = LShape.NumColumns;
+ const unsigned C = RShape.NumColumns;
+ assert(M == RShape.NumRows);
+
+ // Initialize the output
+ ColumnMatrixTy Result;
+ for (unsigned J = 0; J < C; ++J)
+ Result.addColumn(UndefValue::get(VectorType::get(EltType, R)));
+
+ const unsigned VF = std::max(TTI.getRegisterBitWidth(true) /
+ EltType->getPrimitiveSizeInBits(),
+ uint64_t(1));
+
+ bool AllowContract = AllowContractEnabled || (isa<FPMathOperator>(MatMul) &&
+ MatMul->hasAllowContract());
+ // Multiply columns from the first operand with scalars from the second
+ // operand. Then move along the K axes and accumulate the columns. With
+ // this the adds can be vectorized without reassociation.
+ for (unsigned J = 0; J < C; ++J) {
+ unsigned BlockSize = VF;
+ for (unsigned I = 0; I < R; I += BlockSize) {
+ // Gradually lower the vectorization factor to cover the remainder.
+ while (I + BlockSize > R)
+ BlockSize /= 2;
+
+ Value *Sum = nullptr;
+ for (unsigned K = 0; K < M; ++K) {
+ Value *L = extractVector(Lhs, I, K, BlockSize, Builder);
+ Value *RH = Builder.CreateExtractElement(Rhs.getColumn(J), K);
+ Value *Splat = Builder.CreateVectorSplat(BlockSize, RH, "splat");
+ Sum = createMulAdd(Sum, L, Splat, EltType->isFloatingPointTy(),
+ Builder, AllowContract);
+ }
+ Result.setColumn(J, insertVector(Result.getColumn(J), I, Sum, Builder));
+ }
+ }
+ finalizeLowering(MatMul, Result, Builder);
+ }
+
+ /// Lowers llvm.matrix.transpose.
+ void LowerTranspose(CallInst *Inst) {
+ ColumnMatrixTy Result;
+ IRBuilder<> Builder(Inst);
+ Value *InputVal = Inst->getArgOperand(0);
+ VectorType *VectorTy = cast<VectorType>(InputVal->getType());
+ ShapeInfo ArgShape(Inst->getArgOperand(1), Inst->getArgOperand(2));
+ ColumnMatrixTy InputMatrix = getMatrix(InputVal, ArgShape, Builder);
+
+ for (unsigned Row = 0; Row < ArgShape.NumRows; ++Row) {
+ // Build a single column vector for this row. First initialize it.
+ Value *ResultColumn = UndefValue::get(
+ VectorType::get(VectorTy->getElementType(), ArgShape.NumColumns));
+
+ // Go through the elements of this row and insert it into the resulting
+ // column vector.
+ for (auto C : enumerate(InputMatrix.columns())) {
+ Value *Elt = Builder.CreateExtractElement(C.value(), Row);
+ // We insert at index Column since that is the row index after the
+ // transpose.
+ ResultColumn =
+ Builder.CreateInsertElement(ResultColumn, Elt, C.index());
+ }
+ Result.addColumn(ResultColumn);
+ }
+
+ finalizeLowering(Inst, Result, Builder);
+ }
+
+ /// Lower load instructions, if shape information is available.
+ bool VisitLoad(Instruction *Inst, Value *Ptr, IRBuilder<> &Builder) {
+ auto I = ShapeMap.find(Inst);
+ if (I == ShapeMap.end())
+ return false;
+
+ LowerLoad(Inst, Ptr, Builder.getInt32(I->second.NumRows), I->second);
+ return true;
+ }
+
+ bool VisitStore(Instruction *Inst, Value *StoredVal, Value *Ptr,
+ IRBuilder<> &Builder) {
+ auto I = ShapeMap.find(StoredVal);
+ if (I == ShapeMap.end())
+ return false;
+
+ LowerStore(Inst, StoredVal, Ptr, Builder.getInt32(I->second.NumRows), I->second);
+ return true;
+ }
+
+ /// Lower binary operators, if shape information is available.
+ bool VisitBinaryOperator(BinaryOperator *Inst) {
+ auto I = ShapeMap.find(Inst);
+ if (I == ShapeMap.end())
+ return false;
+
+ Value *Lhs = Inst->getOperand(0);
+ Value *Rhs = Inst->getOperand(1);
+
+ IRBuilder<> Builder(Inst);
+ ShapeInfo &Shape = I->second;
+
+ ColumnMatrixTy LoweredLhs = getMatrix(Lhs, Shape, Builder);
+ ColumnMatrixTy LoweredRhs = getMatrix(Rhs, Shape, Builder);
+
+ // Add each column and store the result back into the opmapping
+ ColumnMatrixTy Result;
+ auto BuildColumnOp = [&Builder, Inst](Value *LHS, Value *RHS) {
+ switch (Inst->getOpcode()) {
+ case Instruction::Add:
+ return Builder.CreateAdd(LHS, RHS);
+ case Instruction::Mul:
+ return Builder.CreateMul(LHS, RHS);
+ case Instruction::Sub:
+ return Builder.CreateSub(LHS, RHS);
+ case Instruction::FAdd:
+ return Builder.CreateFAdd(LHS, RHS);
+ case Instruction::FMul:
+ return Builder.CreateFMul(LHS, RHS);
+ case Instruction::FSub:
+ return Builder.CreateFSub(LHS, RHS);
+ default:
+ llvm_unreachable("Unsupported binary operator for matrix");
+ }
+ };
+ for (unsigned C = 0; C < Shape.NumColumns; ++C)
+ Result.addColumn(
+ BuildColumnOp(LoweredLhs.getColumn(C), LoweredRhs.getColumn(C)));
+
+ finalizeLowering(Inst, Result, Builder);
+ return true;
+ }
+};
+} // namespace
+
+PreservedAnalyses LowerMatrixIntrinsicsPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ auto &TTI = AM.getResult<TargetIRAnalysis>(F);
+ LowerMatrixIntrinsics LMT(F, TTI);
+ if (LMT.Visit()) {
+ PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
+ return PA;
+ }
+ return PreservedAnalyses::all();
+}
+
+namespace {
+
+class LowerMatrixIntrinsicsLegacyPass : public FunctionPass {
+public:
+ static char ID;
+
+ LowerMatrixIntrinsicsLegacyPass() : FunctionPass(ID) {
+ initializeLowerMatrixIntrinsicsLegacyPassPass(
+ *PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override {
+ auto *TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+ LowerMatrixIntrinsics LMT(F, *TTI);
+ bool C = LMT.Visit();
+ return C;
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.setPreservesCFG();
+ }
+};
+} // namespace
+
+static const char pass_name[] = "Lower the matrix intrinsics";
+char LowerMatrixIntrinsicsLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(LowerMatrixIntrinsicsLegacyPass, DEBUG_TYPE, pass_name,
+ false, false)
+INITIALIZE_PASS_END(LowerMatrixIntrinsicsLegacyPass, DEBUG_TYPE, pass_name,
+ false, false)
+
+Pass *llvm::createLowerMatrixIntrinsicsPass() {
+ return new LowerMatrixIntrinsicsLegacyPass();
+}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerWidenableCondition.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerWidenableCondition.cpp
index 5342f2ddcb6b..73b2cd06fa23 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerWidenableCondition.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerWidenableCondition.cpp
@@ -21,6 +21,7 @@
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PatternMatch.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/GuardUtils.h"
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/MakeGuardsExplicit.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/MakeGuardsExplicit.cpp
index 789232e0f5ce..5ffae128f5f0 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/MakeGuardsExplicit.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/MakeGuardsExplicit.cpp
@@ -33,10 +33,11 @@
#include "llvm/Transforms/Scalar/MakeGuardsExplicit.h"
#include "llvm/Analysis/GuardUtils.h"
+#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/IRBuilder.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/GuardUtils.h"
@@ -56,23 +57,11 @@ struct MakeGuardsExplicitLegacyPass : public FunctionPass {
static void turnToExplicitForm(CallInst *Guard, Function *DeoptIntrinsic) {
// Replace the guard with an explicit branch (just like in GuardWidening).
- BasicBlock *BB = Guard->getParent();
- makeGuardControlFlowExplicit(DeoptIntrinsic, Guard);
- BranchInst *ExplicitGuard = cast<BranchInst>(BB->getTerminator());
- assert(ExplicitGuard->isConditional() && "Must be!");
+ BasicBlock *OriginalBB = Guard->getParent();
+ (void)OriginalBB;
+ makeGuardControlFlowExplicit(DeoptIntrinsic, Guard, true);
+ assert(isWidenableBranch(OriginalBB->getTerminator()) && "should hold");
- // We want the guard to be expressed as explicit control flow, but still be
- // widenable. For that, we add Widenable Condition intrinsic call to the
- // guard's condition.
- IRBuilder<> B(ExplicitGuard);
- auto *WidenableCondition =
- B.CreateIntrinsic(Intrinsic::experimental_widenable_condition,
- {}, {}, nullptr, "widenable_cond");
- WidenableCondition->setCallingConv(Guard->getCallingConv());
- auto *NewCond =
- B.CreateAnd(ExplicitGuard->getCondition(), WidenableCondition);
- NewCond->setName("exiplicit_guard_cond");
- ExplicitGuard->setCondition(NewCond);
Guard->eraseFromParent();
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 2364748efb05..c24fa40860eb 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -24,7 +24,6 @@
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/BasicBlock.h"
@@ -49,12 +48,14 @@
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -387,16 +388,12 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
StartPtr = Range.StartPtr;
// Determine alignment
- unsigned Alignment = Range.Alignment;
- if (Alignment == 0) {
- Type *EltType =
- cast<PointerType>(StartPtr->getType())->getElementType();
- Alignment = DL.getABITypeAlignment(EltType);
- }
-
- AMemSet =
- Builder.CreateMemSet(StartPtr, ByteVal, Range.End-Range.Start, Alignment);
+ const Align Alignment = DL.getValueOrABITypeAlignment(
+ MaybeAlign(Range.Alignment),
+ cast<PointerType>(StartPtr->getType())->getElementType());
+ AMemSet = Builder.CreateMemSet(StartPtr, ByteVal, Range.End - Range.Start,
+ Alignment);
LLVM_DEBUG(dbgs() << "Replace stores:\n"; for (Instruction *SI
: Range.TheStores) dbgs()
<< *SI << '\n';
@@ -416,25 +413,21 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
return AMemSet;
}
-static unsigned findStoreAlignment(const DataLayout &DL, const StoreInst *SI) {
- unsigned StoreAlign = SI->getAlignment();
- if (!StoreAlign)
- StoreAlign = DL.getABITypeAlignment(SI->getOperand(0)->getType());
- return StoreAlign;
+static Align findStoreAlignment(const DataLayout &DL, const StoreInst *SI) {
+ return DL.getValueOrABITypeAlignment(MaybeAlign(SI->getAlignment()),
+ SI->getOperand(0)->getType());
}
-static unsigned findLoadAlignment(const DataLayout &DL, const LoadInst *LI) {
- unsigned LoadAlign = LI->getAlignment();
- if (!LoadAlign)
- LoadAlign = DL.getABITypeAlignment(LI->getType());
- return LoadAlign;
+static Align findLoadAlignment(const DataLayout &DL, const LoadInst *LI) {
+ return DL.getValueOrABITypeAlignment(MaybeAlign(LI->getAlignment()),
+ LI->getType());
}
-static unsigned findCommonAlignment(const DataLayout &DL, const StoreInst *SI,
- const LoadInst *LI) {
- unsigned StoreAlign = findStoreAlignment(DL, SI);
- unsigned LoadAlign = findLoadAlignment(DL, LI);
- return MinAlign(StoreAlign, LoadAlign);
+static Align findCommonAlignment(const DataLayout &DL, const StoreInst *SI,
+ const LoadInst *LI) {
+ Align StoreAlign = findStoreAlignment(DL, SI);
+ Align LoadAlign = findLoadAlignment(DL, LI);
+ return commonAlignment(StoreAlign, LoadAlign);
}
// This method try to lift a store instruction before position P.
@@ -649,7 +642,7 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
LI, SI->getPointerOperand()->stripPointerCasts(),
LI->getPointerOperand()->stripPointerCasts(),
DL.getTypeStoreSize(SI->getOperand(0)->getType()),
- findCommonAlignment(DL, SI, LI), C);
+ findCommonAlignment(DL, SI, LI).value(), C);
if (changed) {
MD->removeInstruction(SI);
SI->eraseFromParent();
@@ -682,12 +675,11 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
auto *T = V->getType();
if (T->isAggregateType()) {
uint64_t Size = DL.getTypeStoreSize(T);
- unsigned Align = SI->getAlignment();
- if (!Align)
- Align = DL.getABITypeAlignment(T);
+ const Align MA =
+ DL.getValueOrABITypeAlignment(MaybeAlign(SI->getAlignment()), T);
IRBuilder<> Builder(SI);
auto *M =
- Builder.CreateMemSet(SI->getPointerOperand(), ByteVal, Size, Align);
+ Builder.CreateMemSet(SI->getPointerOperand(), ByteVal, Size, MA);
LLVM_DEBUG(dbgs() << "Promoting " << *SI << " to " << *M << "\n");
@@ -982,12 +974,12 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
// example we could be moving from movaps -> movq on x86.
IRBuilder<> Builder(M);
if (UseMemMove)
- Builder.CreateMemMove(M->getRawDest(), M->getDestAlignment(),
- MDep->getRawSource(), MDep->getSourceAlignment(),
+ Builder.CreateMemMove(M->getRawDest(), M->getDestAlign(),
+ MDep->getRawSource(), MDep->getSourceAlign(),
M->getLength(), M->isVolatile());
else
- Builder.CreateMemCpy(M->getRawDest(), M->getDestAlignment(),
- MDep->getRawSource(), MDep->getSourceAlignment(),
+ Builder.CreateMemCpy(M->getRawDest(), M->getDestAlign(),
+ MDep->getRawSource(), MDep->getSourceAlign(),
M->getLength(), M->isVolatile());
// Remove the instruction we're replacing.
@@ -1057,7 +1049,7 @@ bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
Builder.CreateMemSet(
Builder.CreateGEP(Dest->getType()->getPointerElementType(), Dest,
SrcSize),
- MemSet->getOperand(1), MemsetLen, Align);
+ MemSet->getOperand(1), MemsetLen, MaybeAlign(Align));
MD->removeInstruction(MemSet);
MemSet->eraseFromParent();
@@ -1125,8 +1117,8 @@ bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
}
IRBuilder<> Builder(MemCpy);
- Builder.CreateMemSet(MemCpy->getRawDest(), MemSet->getOperand(1),
- CopySize, MemCpy->getDestAlignment());
+ Builder.CreateMemSet(MemCpy->getRawDest(), MemSet->getOperand(1), CopySize,
+ MaybeAlign(MemCpy->getDestAlignment()));
return true;
}
@@ -1153,7 +1145,7 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M) {
M->getModule()->getDataLayout())) {
IRBuilder<> Builder(M);
Builder.CreateMemSet(M->getRawDest(), ByteVal, M->getLength(),
- M->getDestAlignment(), false);
+ MaybeAlign(M->getDestAlignment()), false);
MD->removeInstruction(M);
M->eraseFromParent();
++NumCpyToSet;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/MergeICmps.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/MergeICmps.cpp
index 98a45b391319..ce1e142101b8 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/MergeICmps.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/MergeICmps.cpp
@@ -50,6 +50,7 @@
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
index 9799ea7960ec..6b0d0202d9bb 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
@@ -83,6 +83,7 @@
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Metadata.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/NaryReassociate.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/NaryReassociate.cpp
index 1260bd39cdee..bba9082e31b2 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/NaryReassociate.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/NaryReassociate.cpp
@@ -82,7 +82,6 @@
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
@@ -101,10 +100,12 @@
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
#include <cassert>
#include <cstdint>
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/NewGVN.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/NewGVN.cpp
index b213264de557..6a643480f312 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/NewGVN.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/NewGVN.cpp
@@ -76,7 +76,6 @@
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
@@ -94,6 +93,7 @@
#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/ArrayRecycler.h"
@@ -106,6 +106,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/GVNExpression.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/PredicateInfo.h"
#include "llvm/Transforms/Utils/VNCoercion.h"
#include <algorithm>
@@ -489,11 +490,11 @@ namespace {
class NewGVN {
Function &F;
- DominatorTree *DT;
- const TargetLibraryInfo *TLI;
- AliasAnalysis *AA;
- MemorySSA *MSSA;
- MemorySSAWalker *MSSAWalker;
+ DominatorTree *DT = nullptr;
+ const TargetLibraryInfo *TLI = nullptr;
+ AliasAnalysis *AA = nullptr;
+ MemorySSA *MSSA = nullptr;
+ MemorySSAWalker *MSSAWalker = nullptr;
const DataLayout &DL;
std::unique_ptr<PredicateInfo> PredInfo;
@@ -505,7 +506,7 @@ class NewGVN {
const SimplifyQuery SQ;
// Number of function arguments, used by ranking
- unsigned int NumFuncArgs;
+ unsigned int NumFuncArgs = 0;
// RPOOrdering of basic blocks
DenseMap<const DomTreeNode *, unsigned> RPOOrdering;
@@ -516,9 +517,9 @@ class NewGVN {
// startsout in, and represents any value. Being an optimistic analysis,
// anything in the TOP class has the value TOP, which is indeterminate and
// equivalent to everything.
- CongruenceClass *TOPClass;
+ CongruenceClass *TOPClass = nullptr;
std::vector<CongruenceClass *> CongruenceClasses;
- unsigned NextCongruenceNum;
+ unsigned NextCongruenceNum = 0;
// Value Mappings.
DenseMap<Value *, CongruenceClass *> ValueToClass;
@@ -862,7 +863,7 @@ private:
// Debug counter info. When verifying, we have to reset the value numbering
// debug counter to the same state it started in to get the same results.
- int64_t StartingVNCounter;
+ int64_t StartingVNCounter = 0;
};
} // end anonymous namespace
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
index 68a0f5151ad5..58763ec72ece 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
@@ -16,6 +16,7 @@
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/DebugCounter.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp
index beb299272ed8..5c4a89977c38 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp
@@ -47,6 +47,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/ADT/SetVector.h"
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/Reassociate.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/Reassociate.cpp
index 124f625ef7b6..41940e980faa 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/Reassociate.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/Reassociate.cpp
@@ -30,7 +30,6 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/GlobalsModRef.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/BasicBlock.h"
@@ -50,12 +49,14 @@
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
#include <algorithm>
#include <cassert>
#include <utility>
@@ -173,7 +174,7 @@ void ReassociatePass::BuildRankMap(Function &F,
<< "\n");
}
- // Traverse basic blocks in ReversePostOrder
+ // Traverse basic blocks in ReversePostOrder.
for (BasicBlock *BB : RPOT) {
unsigned BBRank = RankMap[BB] = ++Rank << 16;
@@ -1898,6 +1899,7 @@ void ReassociatePass::RecursivelyEraseDeadInsts(Instruction *I,
ValueRankMap.erase(I);
Insts.remove(I);
RedoInsts.remove(I);
+ llvm::salvageDebugInfoOrMarkUndef(*I);
I->eraseFromParent();
for (auto Op : Ops)
if (Instruction *OpInst = dyn_cast<Instruction>(Op))
@@ -1914,6 +1916,7 @@ void ReassociatePass::EraseInst(Instruction *I) {
// Erase the dead instruction.
ValueRankMap.erase(I);
RedoInsts.remove(I);
+ llvm::salvageDebugInfoOrMarkUndef(*I);
I->eraseFromParent();
// Optimize its operands.
SmallPtrSet<Instruction *, 8> Visited; // Detect self-referential nodes.
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/Reg2Mem.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/Reg2Mem.cpp
index 3296322e00d5..0716c1320982 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/Reg2Mem.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/Reg2Mem.cpp
@@ -16,16 +16,17 @@
//===----------------------------------------------------------------------===//
#include "llvm/ADT/Statistic.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils.h"
+#include "llvm/Transforms/Utils/Local.h"
#include <list>
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
index 48bbdd8d1b33..b242f100faff 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
@@ -54,6 +54,7 @@
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SCCP.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SCCP.cpp
index 10fbdc8aacd2..e696ea83a300 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SCCP.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SCCP.cpp
@@ -29,7 +29,6 @@
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Analysis/ValueLattice.h"
#include "llvm/Analysis/ValueLatticeUtils.h"
#include "llvm/IR/BasicBlock.h"
@@ -49,12 +48,14 @@
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/PredicateInfo.h"
#include <cassert>
#include <utility>
@@ -2196,7 +2197,7 @@ bool llvm::runIPSCCP(
findReturnsToZap(*F, ReturnsToZap, Solver);
}
- for (const auto &F : Solver.getMRVFunctionsTracked()) {
+ for (auto F : Solver.getMRVFunctionsTracked()) {
assert(F->getReturnType()->isStructTy() &&
"The return type should be a struct");
StructType *STy = cast<StructType>(F->getReturnType());
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SROA.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SROA.cpp
index 74b8ff913050..89916e43fce2 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -41,7 +41,6 @@
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/PtrUseVisitor.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
@@ -71,6 +70,7 @@
#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
@@ -80,6 +80,7 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/PromoteMemToReg.h"
#include <algorithm>
#include <cassert>
@@ -361,7 +362,7 @@ private:
/// The beginning and ending offsets of the alloca for this
/// partition.
- uint64_t BeginOffset, EndOffset;
+ uint64_t BeginOffset = 0, EndOffset = 0;
/// The start and end iterators of this partition.
iterator SI, SJ;
@@ -1680,24 +1681,20 @@ static Value *getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr,
}
/// Compute the adjusted alignment for a load or store from an offset.
-static unsigned getAdjustedAlignment(Instruction *I, uint64_t Offset,
- const DataLayout &DL) {
- unsigned Alignment;
+static Align getAdjustedAlignment(Instruction *I, uint64_t Offset,
+ const DataLayout &DL) {
+ MaybeAlign Alignment;
Type *Ty;
if (auto *LI = dyn_cast<LoadInst>(I)) {
- Alignment = LI->getAlignment();
+ Alignment = MaybeAlign(LI->getAlignment());
Ty = LI->getType();
} else if (auto *SI = dyn_cast<StoreInst>(I)) {
- Alignment = SI->getAlignment();
+ Alignment = MaybeAlign(SI->getAlignment());
Ty = SI->getValueOperand()->getType();
} else {
llvm_unreachable("Only loads and stores are allowed!");
}
-
- if (!Alignment)
- Alignment = DL.getABITypeAlignment(Ty);
-
- return MinAlign(Alignment, Offset);
+ return commonAlignment(DL.getValueOrABITypeAlignment(Alignment, Ty), Offset);
}
/// Test whether we can convert a value from the old to the new type.
@@ -2300,9 +2297,9 @@ class llvm::sroa::AllocaSliceRewriter
// The new offsets of the slice currently being rewritten relative to the
// original alloca.
- uint64_t NewBeginOffset, NewEndOffset;
+ uint64_t NewBeginOffset = 0, NewEndOffset = 0;
- uint64_t SliceSize;
+ uint64_t SliceSize = 0;
bool IsSplittable = false;
bool IsSplit = false;
Use *OldUse = nullptr;
@@ -2432,13 +2429,14 @@ private:
///
/// You can optionally pass a type to this routine and if that type's ABI
/// alignment is itself suitable, this will return zero.
- unsigned getSliceAlign(Type *Ty = nullptr) {
- unsigned NewAIAlign = NewAI.getAlignment();
- if (!NewAIAlign)
- NewAIAlign = DL.getABITypeAlignment(NewAI.getAllocatedType());
- unsigned Align =
- MinAlign(NewAIAlign, NewBeginOffset - NewAllocaBeginOffset);
- return (Ty && Align == DL.getABITypeAlignment(Ty)) ? 0 : Align;
+ MaybeAlign getSliceAlign(Type *Ty = nullptr) {
+ const MaybeAlign NewAIAlign = DL.getValueOrABITypeAlignment(
+ MaybeAlign(NewAI.getAlignment()), NewAI.getAllocatedType());
+ const MaybeAlign Align =
+ commonAlignment(NewAIAlign, NewBeginOffset - NewAllocaBeginOffset);
+ return (Ty && Align && Align->value() == DL.getABITypeAlignment(Ty))
+ ? None
+ : Align;
}
unsigned getIndex(uint64_t Offset) {
@@ -2800,7 +2798,7 @@ private:
Constant *Size = ConstantInt::get(SizeTy, NewEndOffset - NewBeginOffset);
CallInst *New = IRB.CreateMemSet(
getNewAllocaSlicePtr(IRB, OldPtr->getType()), II.getValue(), Size,
- getSliceAlign(), II.isVolatile());
+ MaybeAlign(getSliceAlign()), II.isVolatile());
if (AATags)
New->setAAMetadata(AATags);
LLVM_DEBUG(dbgs() << " to: " << *New << "\n");
@@ -2886,7 +2884,7 @@ private:
assert((IsDest && II.getRawDest() == OldPtr) ||
(!IsDest && II.getRawSource() == OldPtr));
- unsigned SliceAlign = getSliceAlign();
+ MaybeAlign SliceAlign = getSliceAlign();
// For unsplit intrinsics, we simply modify the source and destination
// pointers in place. This isn't just an optimization, it is a matter of
@@ -2956,10 +2954,10 @@ private:
// Compute the relative offset for the other pointer within the transfer.
unsigned OffsetWidth = DL.getIndexSizeInBits(OtherAS);
APInt OtherOffset(OffsetWidth, NewBeginOffset - BeginOffset);
- unsigned OtherAlign =
- IsDest ? II.getSourceAlignment() : II.getDestAlignment();
- OtherAlign = MinAlign(OtherAlign ? OtherAlign : 1,
- OtherOffset.zextOrTrunc(64).getZExtValue());
+ Align OtherAlign =
+ assumeAligned(IsDest ? II.getSourceAlignment() : II.getDestAlignment());
+ OtherAlign =
+ commonAlignment(OtherAlign, OtherOffset.zextOrTrunc(64).getZExtValue());
if (EmitMemCpy) {
// Compute the other pointer, folding as much as possible to produce
@@ -2972,7 +2970,7 @@ private:
Constant *Size = ConstantInt::get(SizeTy, NewEndOffset - NewBeginOffset);
Value *DestPtr, *SrcPtr;
- unsigned DestAlign, SrcAlign;
+ MaybeAlign DestAlign, SrcAlign;
// Note: IsDest is true iff we're copying into the new alloca slice
if (IsDest) {
DestPtr = OurPtr;
@@ -3019,9 +3017,9 @@ private:
Value *SrcPtr = getAdjustedPtr(IRB, DL, OtherPtr, OtherOffset, OtherPtrTy,
OtherPtr->getName() + ".");
- unsigned SrcAlign = OtherAlign;
+ MaybeAlign SrcAlign = OtherAlign;
Value *DstPtr = &NewAI;
- unsigned DstAlign = SliceAlign;
+ MaybeAlign DstAlign = SliceAlign;
if (!IsDest) {
std::swap(SrcPtr, DstPtr);
std::swap(SrcAlign, DstAlign);
@@ -3117,20 +3115,17 @@ private:
Instruction *I = Uses.pop_back_val();
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
- unsigned LoadAlign = LI->getAlignment();
- if (!LoadAlign)
- LoadAlign = DL.getABITypeAlignment(LI->getType());
- LI->setAlignment(MaybeAlign(std::min(LoadAlign, getSliceAlign())));
+ MaybeAlign LoadAlign = DL.getValueOrABITypeAlignment(
+ MaybeAlign(LI->getAlignment()), LI->getType());
+ LI->setAlignment(std::min(LoadAlign, getSliceAlign()));
continue;
}
if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
- unsigned StoreAlign = SI->getAlignment();
- if (!StoreAlign) {
Value *Op = SI->getOperand(0);
- StoreAlign = DL.getABITypeAlignment(Op->getType());
- }
- SI->setAlignment(MaybeAlign(std::min(StoreAlign, getSliceAlign())));
- continue;
+ MaybeAlign StoreAlign = DL.getValueOrABITypeAlignment(
+ MaybeAlign(SI->getAlignment()), Op->getType());
+ SI->setAlignment(std::min(StoreAlign, getSliceAlign()));
+ continue;
}
assert(isa<BitCastInst>(I) || isa<AddrSpaceCastInst>(I) ||
@@ -3222,7 +3217,7 @@ class AggLoadStoreRewriter : public InstVisitor<AggLoadStoreRewriter, bool> {
/// The current pointer use being rewritten. This is used to dig up the used
/// value (as opposed to the user).
- Use *U;
+ Use *U = nullptr;
/// Used to calculate offsets, and hence alignment, of subobjects.
const DataLayout &DL;
@@ -3277,7 +3272,7 @@ private:
Type *BaseTy;
/// Known alignment of the base pointer.
- unsigned BaseAlign;
+ Align BaseAlign;
/// To calculate offset of each component so we can correctly deduce
/// alignments.
@@ -3286,7 +3281,7 @@ private:
/// Initialize the splitter with an insertion point, Ptr and start with a
/// single zero GEP index.
OpSplitter(Instruction *InsertionPoint, Value *Ptr, Type *BaseTy,
- unsigned BaseAlign, const DataLayout &DL)
+ Align BaseAlign, const DataLayout &DL)
: IRB(InsertionPoint), GEPIndices(1, IRB.getInt32(0)), Ptr(Ptr),
BaseTy(BaseTy), BaseAlign(BaseAlign), DL(DL) {}
@@ -3308,7 +3303,7 @@ private:
if (Ty->isSingleValueType()) {
unsigned Offset = DL.getIndexedOffsetInType(BaseTy, GEPIndices);
return static_cast<Derived *>(this)->emitFunc(
- Ty, Agg, MinAlign(BaseAlign, Offset), Name);
+ Ty, Agg, commonAlignment(BaseAlign, Offset), Name);
}
if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
@@ -3349,18 +3344,20 @@ private:
AAMDNodes AATags;
LoadOpSplitter(Instruction *InsertionPoint, Value *Ptr, Type *BaseTy,
- AAMDNodes AATags, unsigned BaseAlign, const DataLayout &DL)
+ AAMDNodes AATags, Align BaseAlign, const DataLayout &DL)
: OpSplitter<LoadOpSplitter>(InsertionPoint, Ptr, BaseTy, BaseAlign,
- DL), AATags(AATags) {}
+ DL),
+ AATags(AATags) {}
/// Emit a leaf load of a single value. This is called at the leaves of the
/// recursive emission to actually load values.
- void emitFunc(Type *Ty, Value *&Agg, unsigned Align, const Twine &Name) {
+ void emitFunc(Type *Ty, Value *&Agg, Align Alignment, const Twine &Name) {
assert(Ty->isSingleValueType());
// Load the single value and insert it using the indices.
Value *GEP =
IRB.CreateInBoundsGEP(BaseTy, Ptr, GEPIndices, Name + ".gep");
- LoadInst *Load = IRB.CreateAlignedLoad(Ty, GEP, Align, Name + ".load");
+ LoadInst *Load =
+ IRB.CreateAlignedLoad(Ty, GEP, Alignment.value(), Name + ".load");
if (AATags)
Load->setAAMetadata(AATags);
Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name + ".insert");
@@ -3388,14 +3385,14 @@ private:
struct StoreOpSplitter : public OpSplitter<StoreOpSplitter> {
StoreOpSplitter(Instruction *InsertionPoint, Value *Ptr, Type *BaseTy,
- AAMDNodes AATags, unsigned BaseAlign, const DataLayout &DL)
+ AAMDNodes AATags, Align BaseAlign, const DataLayout &DL)
: OpSplitter<StoreOpSplitter>(InsertionPoint, Ptr, BaseTy, BaseAlign,
DL),
AATags(AATags) {}
AAMDNodes AATags;
/// Emit a leaf store of a single value. This is called at the leaves of the
/// recursive emission to actually produce stores.
- void emitFunc(Type *Ty, Value *&Agg, unsigned Align, const Twine &Name) {
+ void emitFunc(Type *Ty, Value *&Agg, Align Alignment, const Twine &Name) {
assert(Ty->isSingleValueType());
// Extract the single value and store it using the indices.
//
@@ -3406,7 +3403,7 @@ private:
Value *InBoundsGEP =
IRB.CreateInBoundsGEP(BaseTy, Ptr, GEPIndices, Name + ".gep");
StoreInst *Store =
- IRB.CreateAlignedStore(ExtractValue, InBoundsGEP, Align);
+ IRB.CreateAlignedStore(ExtractValue, InBoundsGEP, Alignment.value());
if (AATags)
Store->setAAMetadata(AATags);
LLVM_DEBUG(dbgs() << " to: " << *Store << "\n");
@@ -3863,8 +3860,8 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
getAdjustedPtr(IRB, DL, BasePtr,
APInt(DL.getIndexSizeInBits(AS), PartOffset),
PartPtrTy, BasePtr->getName() + "."),
- getAdjustedAlignment(LI, PartOffset, DL), /*IsVolatile*/ false,
- LI->getName());
+ getAdjustedAlignment(LI, PartOffset, DL).value(),
+ /*IsVolatile*/ false, LI->getName());
PLoad->copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access,
LLVMContext::MD_access_group});
@@ -3921,7 +3918,8 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
getAdjustedPtr(IRB, DL, StoreBasePtr,
APInt(DL.getIndexSizeInBits(AS), PartOffset),
PartPtrTy, StoreBasePtr->getName() + "."),
- getAdjustedAlignment(SI, PartOffset, DL), /*IsVolatile*/ false);
+ getAdjustedAlignment(SI, PartOffset, DL).value(),
+ /*IsVolatile*/ false);
PStore->copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access,
LLVMContext::MD_access_group});
LLVM_DEBUG(dbgs() << " +" << PartOffset << ":" << *PStore << "\n");
@@ -4005,8 +4003,8 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
getAdjustedPtr(IRB, DL, LoadBasePtr,
APInt(DL.getIndexSizeInBits(AS), PartOffset),
LoadPartPtrTy, LoadBasePtr->getName() + "."),
- getAdjustedAlignment(LI, PartOffset, DL), /*IsVolatile*/ false,
- LI->getName());
+ getAdjustedAlignment(LI, PartOffset, DL).value(),
+ /*IsVolatile*/ false, LI->getName());
}
// And store this partition.
@@ -4017,7 +4015,8 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
getAdjustedPtr(IRB, DL, StoreBasePtr,
APInt(DL.getIndexSizeInBits(AS), PartOffset),
StorePartPtrTy, StoreBasePtr->getName() + "."),
- getAdjustedAlignment(SI, PartOffset, DL), /*IsVolatile*/ false);
+ getAdjustedAlignment(SI, PartOffset, DL).value(),
+ /*IsVolatile*/ false);
// Now build a new slice for the alloca.
NewSlices.push_back(
@@ -4152,20 +4151,19 @@ AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
// FIXME: We might want to defer PHI speculation until after here.
// FIXME: return nullptr;
} else {
- unsigned Alignment = AI.getAlignment();
- if (!Alignment) {
- // The minimum alignment which users can rely on when the explicit
- // alignment is omitted or zero is that required by the ABI for this
- // type.
- Alignment = DL.getABITypeAlignment(AI.getAllocatedType());
- }
- Alignment = MinAlign(Alignment, P.beginOffset());
+ // If alignment is unspecified we fallback on the one required by the ABI
+ // for this type. We also make sure the alignment is compatible with
+ // P.beginOffset().
+ const Align Alignment = commonAlignment(
+ DL.getValueOrABITypeAlignment(MaybeAlign(AI.getAlignment()),
+ AI.getAllocatedType()),
+ P.beginOffset());
// If we will get at least this much alignment from the type alone, leave
// the alloca's alignment unconstrained.
- if (Alignment <= DL.getABITypeAlignment(SliceTy))
- Alignment = 0;
+ const bool IsUnconstrained = Alignment <= DL.getABITypeAlignment(SliceTy);
NewAI = new AllocaInst(
- SliceTy, AI.getType()->getAddressSpace(), nullptr, Alignment,
+ SliceTy, AI.getType()->getAddressSpace(), nullptr,
+ IsUnconstrained ? MaybeAlign() : Alignment,
AI.getName() + ".sroa." + Twine(P.begin() - AS.begin()), &AI);
// Copy the old AI debug location over to the new one.
NewAI->setDebugLoc(AI.getDebugLoc());
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/Scalar.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/Scalar.cpp
index 1d2e40bf62be..9d088547b436 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/Scalar.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/Scalar.cpp
@@ -82,6 +82,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeLowerConstantIntrinsicsPass(Registry);
initializeLowerExpectIntrinsicPass(Registry);
initializeLowerGuardIntrinsicLegacyPassPass(Registry);
+ initializeLowerMatrixIntrinsicsLegacyPassPass(Registry);
initializeLowerWidenableConditionLegacyPassPass(Registry);
initializeMemCpyOptLegacyPassPass(Registry);
initializeMergeICmpsLegacyPassPass(Registry);
@@ -89,6 +90,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeNaryReassociateLegacyPassPass(Registry);
initializePartiallyInlineLibCallsLegacyPassPass(Registry);
initializeReassociateLegacyPassPass(Registry);
+ initializeRedundantDbgInstEliminationPass(Registry);
initializeRegToMemPass(Registry);
initializeRewriteStatepointsForGCLegacyPassPass(Registry);
initializeSCCPLegacyPassPass(Registry);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/Scalarizer.cpp
index 2ee1a3a95f2a..c25c6c632b8f 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/Scalarizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/Scalarizer.cpp
@@ -13,6 +13,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Transforms/Scalar/Scalarizer.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Twine.h"
@@ -21,6 +22,7 @@
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
@@ -33,12 +35,12 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/Options.h"
#include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Scalar/Scalarizer.h"
#include <cassert>
#include <cstdint>
#include <iterator>
@@ -173,8 +175,8 @@ struct VectorLayout {
class ScalarizerVisitor : public InstVisitor<ScalarizerVisitor, bool> {
public:
- ScalarizerVisitor(unsigned ParallelLoopAccessMDKind)
- : ParallelLoopAccessMDKind(ParallelLoopAccessMDKind) {
+ ScalarizerVisitor(unsigned ParallelLoopAccessMDKind, DominatorTree *DT)
+ : ParallelLoopAccessMDKind(ParallelLoopAccessMDKind), DT(DT) {
}
bool visit(Function &F);
@@ -214,6 +216,8 @@ private:
GatherList Gathered;
unsigned ParallelLoopAccessMDKind;
+
+ DominatorTree *DT;
};
class ScalarizerLegacyPass : public FunctionPass {
@@ -225,6 +229,11 @@ public:
}
bool runOnFunction(Function &F) override;
+
+ void getAnalysisUsage(AnalysisUsage& AU) const override {
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ }
};
} // end anonymous namespace
@@ -232,6 +241,7 @@ public:
char ScalarizerLegacyPass::ID = 0;
INITIALIZE_PASS_BEGIN(ScalarizerLegacyPass, "scalarizer",
"Scalarize vector operations", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_END(ScalarizerLegacyPass, "scalarizer",
"Scalarize vector operations", false, false)
@@ -303,7 +313,8 @@ bool ScalarizerLegacyPass::runOnFunction(Function &F) {
Module &M = *F.getParent();
unsigned ParallelLoopAccessMDKind =
M.getContext().getMDKindID("llvm.mem.parallel_loop_access");
- ScalarizerVisitor Impl(ParallelLoopAccessMDKind);
+ DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ ScalarizerVisitor Impl(ParallelLoopAccessMDKind, DT);
return Impl.visit(F);
}
@@ -340,6 +351,15 @@ Scatterer ScalarizerVisitor::scatter(Instruction *Point, Value *V) {
return Scatterer(BB, BB->begin(), V, &Scattered[V]);
}
if (Instruction *VOp = dyn_cast<Instruction>(V)) {
+ // When scalarizing PHI nodes we might try to examine/rewrite InsertElement
+ // nodes in predecessors. If those predecessors are unreachable from entry,
+ // then the IR in those blocks could have unexpected properties resulting in
+ // infinite loops in Scatterer::operator[]. By simply treating values
+ // originating from instructions in unreachable blocks as undef we do not
+ // need to analyse them further.
+ if (!DT->isReachableFromEntry(VOp->getParent()))
+ return Scatterer(Point->getParent(), Point->getIterator(),
+ UndefValue::get(V->getType()));
// Put the scattered form of an instruction directly after the
// instruction.
BasicBlock *BB = VOp->getParent();
@@ -856,7 +876,10 @@ PreservedAnalyses ScalarizerPass::run(Function &F, FunctionAnalysisManager &AM)
Module &M = *F.getParent();
unsigned ParallelLoopAccessMDKind =
M.getContext().getMDKindID("llvm.mem.parallel_loop_access");
- ScalarizerVisitor Impl(ParallelLoopAccessMDKind);
+ DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(F);
+ ScalarizerVisitor Impl(ParallelLoopAccessMDKind, DT);
bool Changed = Impl.visit(F);
- return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
+ PreservedAnalyses PA;
+ PA.preserve<DominatorTreeAnalysis>();
+ return Changed ? PA : PreservedAnalyses::all();
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
index 41554fccdf08..2a1a040bf83e 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
@@ -164,7 +164,6 @@
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
@@ -182,6 +181,7 @@
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
@@ -189,6 +189,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
#include <cassert>
#include <cstdint>
#include <string>
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
index ac832b9b4567..d7a34acb4318 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
@@ -38,8 +38,10 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Use.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/GenericDomTree.h"
@@ -263,7 +265,7 @@ static void rewritePHINodesForExitAndUnswitchedBlocks(BasicBlock &ExitBB,
/// to an entirely separate nest.
static void hoistLoopToNewParent(Loop &L, BasicBlock &Preheader,
DominatorTree &DT, LoopInfo &LI,
- MemorySSAUpdater *MSSAU) {
+ MemorySSAUpdater *MSSAU, ScalarEvolution *SE) {
// If the loop is already at the top level, we can't hoist it anywhere.
Loop *OldParentL = L.getParentLoop();
if (!OldParentL)
@@ -317,7 +319,7 @@ static void hoistLoopToNewParent(Loop &L, BasicBlock &Preheader,
// Because we just hoisted a loop out of this one, we have essentially
// created new exit paths from it. That means we need to form LCSSA PHI
// nodes for values used in the no-longer-nested loop.
- formLCSSA(*OldContainingL, DT, &LI, nullptr);
+ formLCSSA(*OldContainingL, DT, &LI, SE);
// We shouldn't need to form dedicated exits because the exit introduced
// here is the (just split by unswitching) preheader. However, after trivial
@@ -329,6 +331,20 @@ static void hoistLoopToNewParent(Loop &L, BasicBlock &Preheader,
}
}
+// Return the top-most loop containing ExitBB and having ExitBB as exiting block
+// or the loop containing ExitBB, if there is no parent loop containing ExitBB
+// as exiting block.
+static Loop *getTopMostExitingLoop(BasicBlock *ExitBB, LoopInfo &LI) {
+ Loop *TopMost = LI.getLoopFor(ExitBB);
+ Loop *Current = TopMost;
+ while (Current) {
+ if (Current->isLoopExiting(ExitBB))
+ TopMost = Current;
+ Current = Current->getParentLoop();
+ }
+ return TopMost;
+}
+
/// Unswitch a trivial branch if the condition is loop invariant.
///
/// This routine should only be called when loop code leading to the branch has
@@ -413,9 +429,10 @@ static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT,
});
// If we have scalar evolutions, we need to invalidate them including this
- // loop and the loop containing the exit block.
+ // loop, the loop containing the exit block and the topmost parent loop
+ // exiting via LoopExitBB.
if (SE) {
- if (Loop *ExitL = LI.getLoopFor(LoopExitBB))
+ if (Loop *ExitL = getTopMostExitingLoop(LoopExitBB, LI))
SE->forgetLoop(ExitL);
else
// Forget the entire nest as this exits the entire nest.
@@ -532,7 +549,7 @@ static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT,
// If this was full unswitching, we may have changed the nesting relationship
// for this loop so hoist it to its correct parent if needed.
if (FullUnswitch)
- hoistLoopToNewParent(L, *NewPH, DT, LI, MSSAU);
+ hoistLoopToNewParent(L, *NewPH, DT, LI, MSSAU, SE);
if (MSSAU && VerifyMemorySSA)
MSSAU->getMemorySSA()->verifyMemorySSA();
@@ -825,7 +842,7 @@ static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,
// We may have changed the nesting relationship for this loop so hoist it to
// its correct parent if needed.
- hoistLoopToNewParent(L, *NewPH, DT, LI, MSSAU);
+ hoistLoopToNewParent(L, *NewPH, DT, LI, MSSAU, SE);
if (MSSAU && VerifyMemorySSA)
MSSAU->getMemorySSA()->verifyMemorySSA();
@@ -2260,7 +2277,7 @@ static void unswitchNontrivialInvariants(
// First build LCSSA for this loop so that we can preserve it when
// forming dedicated exits. We don't want to perturb some other loop's
// LCSSA while doing that CFG edit.
- formLCSSA(UpdateL, DT, &LI, nullptr);
+ formLCSSA(UpdateL, DT, &LI, SE);
// For loops reached by this loop's original exit blocks we may
// introduced new, non-dedicated exits. At least try to re-form dedicated
@@ -2426,7 +2443,7 @@ turnGuardIntoBranch(IntrinsicInst *GI, Loop &L,
if (MSSAU) {
MemoryDef *MD = cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(GI));
- MSSAU->moveToPlace(MD, DeoptBlock, MemorySSA::End);
+ MSSAU->moveToPlace(MD, DeoptBlock, MemorySSA::BeforeTerminator);
if (VerifyMemorySSA)
MSSAU->getMemorySSA()->verifyMemorySSA();
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index 4544975a4887..623a8b711ed8 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -27,7 +27,6 @@
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
@@ -35,10 +34,12 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/SimplifyCFG.h"
+#include "llvm/Transforms/Utils/Local.h"
#include <utility>
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/Sink.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/Sink.cpp
index 90f3a2aa46e1..677d86f8c7b4 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/Sink.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/Sink.cpp
@@ -21,6 +21,7 @@
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
@@ -78,7 +79,7 @@ static bool isSafeToMove(Instruction *Inst, AliasAnalysis &AA,
if (auto *Call = dyn_cast<CallBase>(Inst)) {
// Convergent operations cannot be made control-dependent on additional
// values.
- if (Call->hasFnAttr(Attribute::Convergent))
+ if (Call->isConvergent())
return false;
for (Instruction *S : Stores)
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp
index e6db11f47ead..cd7bfb2f20dc 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp
@@ -283,12 +283,12 @@ static bool isSafeAndProfitableToSpeculateAroundPHI(
int MatCost = IncomingConstantAndCostsAndCount.second.MatCost;
int &FoldedCost = IncomingConstantAndCostsAndCount.second.FoldedCost;
if (IID)
- FoldedCost += TTI.getIntImmCost(IID, Idx, IncomingC->getValue(),
- IncomingC->getType());
+ FoldedCost += TTI.getIntImmCostIntrin(IID, Idx, IncomingC->getValue(),
+ IncomingC->getType());
else
FoldedCost +=
- TTI.getIntImmCost(UserI->getOpcode(), Idx, IncomingC->getValue(),
- IncomingC->getType());
+ TTI.getIntImmCostInst(UserI->getOpcode(), Idx,
+ IncomingC->getValue(), IncomingC->getType());
// If we accumulate more folded cost for this incoming constant than
// materialized cost, then we'll regress any edge with this constant so
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp
index f9d027eb4a3b..c8d899bb4871 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp
@@ -67,6 +67,7 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
index a58c32cc5894..9f82b1263ebd 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
@@ -60,7 +60,6 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
@@ -76,10 +75,12 @@
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
#include <cassert>
#include <cstdint>
#include <limits>
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
index 9791cf41f621..4ce4ce46f67a 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -34,8 +34,10 @@
#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
index b27a36b67d62..9f0ab9103d42 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -76,6 +76,7 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/ValueHandle.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/WarnMissedTransforms.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/WarnMissedTransforms.cpp
index 707adf46d1f4..c8461fdc1608 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/WarnMissedTransforms.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/WarnMissedTransforms.cpp
@@ -12,6 +12,7 @@
#include "llvm/Transforms/Scalar/WarnMissedTransforms.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/AddDiscriminators.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/AddDiscriminators.cpp
index ee0973002c47..0908b361a4d4 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/AddDiscriminators.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/AddDiscriminators.cpp
@@ -63,6 +63,7 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/PassManager.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
@@ -232,7 +233,7 @@ static bool addDiscriminators(Function &F) {
LocationSet CallLocations;
for (auto &I : B.getInstList()) {
// We bypass intrinsic calls for the following two reasons:
- // 1) We want to avoid a non-deterministic assigment of
+ // 1) We want to avoid a non-deterministic assignment of
// discriminators.
// 2) We want to minimize the number of base discriminators used.
if (!isa<InvokeInst>(I) && (!isa<CallInst>(I) || isa<IntrinsicInst>(I)))
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
index d85cc40c372a..c9eb4abfa21a 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -247,7 +247,7 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU,
Instruction *STI = BB->getTerminator();
Instruction *Start = &*BB->begin();
// If there's nothing to move, mark the starting instruction as the last
- // instruction in the block.
+ // instruction in the block. Terminator instruction is handled separately.
if (Start == STI)
Start = PTI;
@@ -274,24 +274,20 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU,
// Move terminator instruction.
PredBB->getInstList().splice(PredBB->end(), BB->getInstList());
+
+ // Terminator may be a memory accessing instruction too.
+ if (MSSAU)
+ if (MemoryUseOrDef *MUD = cast_or_null<MemoryUseOrDef>(
+ MSSAU->getMemorySSA()->getMemoryAccess(PredBB->getTerminator())))
+ MSSAU->moveToPlace(MUD, PredBB, MemorySSA::End);
}
// Add unreachable to now empty BB.
new UnreachableInst(BB->getContext(), BB);
- // Eliminate duplicate dbg.values describing the entry PHI node post-splice.
- for (auto Incoming : IncomingValues) {
- if (isa<Instruction>(*Incoming)) {
- SmallVector<DbgValueInst *, 2> DbgValues;
- SmallDenseSet<std::pair<DILocalVariable *, DIExpression *>, 2>
- DbgValueSet;
- llvm::findDbgValues(DbgValues, Incoming);
- for (auto &DVI : DbgValues) {
- auto R = DbgValueSet.insert({DVI->getVariable(), DVI->getExpression()});
- if (!R.second)
- DVI->eraseFromParent();
- }
- }
- }
+ // Eliminate duplicate/redundant dbg.values. This seems to be a good place to
+ // do that since we might end up with redundant dbg.values describing the
+ // entry PHI node post-splice.
+ RemoveRedundantDbgInstrs(PredBB);
// Inherit predecessors name if it exists.
if (!PredBB->hasName())
@@ -318,6 +314,124 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU,
return true;
}
+/// Remove redundant instructions within sequences of consecutive dbg.value
+/// instructions. This is done using a backward scan to keep the last dbg.value
+/// describing a specific variable/fragment.
+///
+/// BackwardScan strategy:
+/// ----------------------
+/// Given a sequence of consecutive DbgValueInst like this
+///
+/// dbg.value ..., "x", FragmentX1 (*)
+/// dbg.value ..., "y", FragmentY1
+/// dbg.value ..., "x", FragmentX2
+/// dbg.value ..., "x", FragmentX1 (**)
+///
+/// then the instruction marked with (*) can be removed (it is guaranteed to be
+/// obsoleted by the instruction marked with (**) as the latter instruction is
+/// describing the same variable using the same fragment info).
+///
+/// Possible improvements:
+/// - Check fully overlapping fragments and not only identical fragments.
+/// - Support dbg.addr, dbg.declare. dbg.label, and possibly other meta
+/// instructions being part of the sequence of consecutive instructions.
+static bool removeRedundantDbgInstrsUsingBackwardScan(BasicBlock *BB) {
+ SmallVector<DbgValueInst *, 8> ToBeRemoved;
+ SmallDenseSet<DebugVariable> VariableSet;
+ for (auto &I : reverse(*BB)) {
+ if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(&I)) {
+ DebugVariable Key(DVI->getVariable(),
+ DVI->getExpression(),
+ DVI->getDebugLoc()->getInlinedAt());
+ auto R = VariableSet.insert(Key);
+ // If the same variable fragment is described more than once it is enough
+ // to keep the last one (i.e. the first found since we for reverse
+ // iteration).
+ if (!R.second)
+ ToBeRemoved.push_back(DVI);
+ continue;
+ }
+ // Sequence with consecutive dbg.value instrs ended. Clear the map to
+ // restart identifying redundant instructions if case we find another
+ // dbg.value sequence.
+ VariableSet.clear();
+ }
+
+ for (auto &Instr : ToBeRemoved)
+ Instr->eraseFromParent();
+
+ return !ToBeRemoved.empty();
+}
+
+/// Remove redundant dbg.value instructions using a forward scan. This can
+/// remove a dbg.value instruction that is redundant due to indicating that a
+/// variable has the same value as already being indicated by an earlier
+/// dbg.value.
+///
+/// ForwardScan strategy:
+/// ---------------------
+/// Given two identical dbg.value instructions, separated by a block of
+/// instructions that isn't describing the same variable, like this
+///
+/// dbg.value X1, "x", FragmentX1 (**)
+/// <block of instructions, none being "dbg.value ..., "x", ...">
+/// dbg.value X1, "x", FragmentX1 (*)
+///
+/// then the instruction marked with (*) can be removed. Variable "x" is already
+/// described as being mapped to the SSA value X1.
+///
+/// Possible improvements:
+/// - Keep track of non-overlapping fragments.
+static bool removeRedundantDbgInstrsUsingForwardScan(BasicBlock *BB) {
+ SmallVector<DbgValueInst *, 8> ToBeRemoved;
+ DenseMap<DebugVariable, std::pair<Value *, DIExpression *> > VariableMap;
+ for (auto &I : *BB) {
+ if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(&I)) {
+ DebugVariable Key(DVI->getVariable(),
+ NoneType(),
+ DVI->getDebugLoc()->getInlinedAt());
+ auto VMI = VariableMap.find(Key);
+ // Update the map if we found a new value/expression describing the
+ // variable, or if the variable wasn't mapped already.
+ if (VMI == VariableMap.end() ||
+ VMI->second.first != DVI->getValue() ||
+ VMI->second.second != DVI->getExpression()) {
+ VariableMap[Key] = { DVI->getValue(), DVI->getExpression() };
+ continue;
+ }
+ // Found an identical mapping. Remember the instruction for later removal.
+ ToBeRemoved.push_back(DVI);
+ }
+ }
+
+ for (auto &Instr : ToBeRemoved)
+ Instr->eraseFromParent();
+
+ return !ToBeRemoved.empty();
+}
+
+bool llvm::RemoveRedundantDbgInstrs(BasicBlock *BB) {
+ bool MadeChanges = false;
+ // By using the "backward scan" strategy before the "forward scan" strategy we
+ // can remove both dbg.value (2) and (3) in a situation like this:
+ //
+ // (1) dbg.value V1, "x", DIExpression()
+ // ...
+ // (2) dbg.value V2, "x", DIExpression()
+ // (3) dbg.value V1, "x", DIExpression()
+ //
+ // The backward scan will remove (2), it is made obsolete by (3). After
+ // getting (2) out of the way, the foward scan will remove (3) since "x"
+ // already is described as having the value V1 at (1).
+ MadeChanges |= removeRedundantDbgInstrsUsingBackwardScan(BB);
+ MadeChanges |= removeRedundantDbgInstrsUsingForwardScan(BB);
+
+ if (MadeChanges)
+ LLVM_DEBUG(dbgs() << "Removed redundant dbg instrs from: "
+ << BB->getName() << "\n");
+ return MadeChanges;
+}
+
void llvm::ReplaceInstWithValue(BasicBlock::InstListType &BIL,
BasicBlock::iterator &BI, Value *V) {
Instruction &I = *BI;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
index f5e4b53f6d97..008cea333e6b 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
@@ -28,6 +28,7 @@
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Type.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/CanonicalizeAliases.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/CanonicalizeAliases.cpp
index 3c7c8d872595..6b01c0c71d00 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/CanonicalizeAliases.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/CanonicalizeAliases.cpp
@@ -30,9 +30,9 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/CanonicalizeAliases.h"
-
#include "llvm/IR/Operator.h"
#include "llvm/IR/ValueHandle.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/CodeExtractor.cpp
index 0298ff9a395f..682af4a88d3e 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/CodeExtractor.cpp
@@ -805,7 +805,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
dbgs() << ")\n";
});
- StructType *StructTy;
+ StructType *StructTy = nullptr;
if (AggregateArgs && (inputs.size() + outputs.size() > 0)) {
StructTy = StructType::get(M->getContext(), paramTy);
paramTy.clear();
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/CodeMoverUtils.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/CodeMoverUtils.cpp
new file mode 100644
index 000000000000..93395ac761ab
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/CodeMoverUtils.cpp
@@ -0,0 +1,189 @@
+//===- CodeMoverUtils.cpp - CodeMover Utilities ----------------------------==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This family of functions perform movements on basic blocks, and instructions
+// contained within a function.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/CodeMoverUtils.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/DependenceAnalysis.h"
+#include "llvm/Analysis/PostDominators.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/Dominators.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "codemover-utils"
+
+STATISTIC(HasDependences,
+ "Cannot move across instructions that has memory dependences");
+STATISTIC(MayThrowException, "Cannot move across instructions that may throw");
+STATISTIC(NotControlFlowEquivalent,
+ "Instructions are not control flow equivalent");
+STATISTIC(NotMovedPHINode, "Movement of PHINodes are not supported");
+STATISTIC(NotMovedTerminator, "Movement of Terminator are not supported");
+
+bool llvm::isControlFlowEquivalent(const Instruction &I0, const Instruction &I1,
+ const DominatorTree &DT,
+ const PostDominatorTree &PDT) {
+ return isControlFlowEquivalent(*I0.getParent(), *I1.getParent(), DT, PDT);
+}
+
+bool llvm::isControlFlowEquivalent(const BasicBlock &BB0, const BasicBlock &BB1,
+ const DominatorTree &DT,
+ const PostDominatorTree &PDT) {
+ if (&BB0 == &BB1)
+ return true;
+
+ return ((DT.dominates(&BB0, &BB1) && PDT.dominates(&BB1, &BB0)) ||
+ (PDT.dominates(&BB0, &BB1) && DT.dominates(&BB1, &BB0)));
+}
+
+static bool reportInvalidCandidate(const Instruction &I,
+ llvm::Statistic &Stat) {
+ ++Stat;
+ LLVM_DEBUG(dbgs() << "Unable to move instruction: " << I << ". "
+ << Stat.getDesc());
+ return false;
+}
+
+/// Collect all instructions in between \p StartInst and \p EndInst, and store
+/// them in \p InBetweenInsts.
+static void
+collectInstructionsInBetween(Instruction &StartInst, const Instruction &EndInst,
+ SmallPtrSetImpl<Instruction *> &InBetweenInsts) {
+ assert(InBetweenInsts.empty() && "Expecting InBetweenInsts to be empty");
+
+ /// Get the next instructions of \p I, and push them to \p WorkList.
+ auto getNextInsts = [](Instruction &I,
+ SmallPtrSetImpl<Instruction *> &WorkList) {
+ if (Instruction *NextInst = I.getNextNode())
+ WorkList.insert(NextInst);
+ else {
+ assert(I.isTerminator() && "Expecting a terminator instruction");
+ for (BasicBlock *Succ : successors(&I))
+ WorkList.insert(&Succ->front());
+ }
+ };
+
+ SmallPtrSet<Instruction *, 10> WorkList;
+ getNextInsts(StartInst, WorkList);
+ while (!WorkList.empty()) {
+ Instruction *CurInst = *WorkList.begin();
+ WorkList.erase(CurInst);
+
+ if (CurInst == &EndInst)
+ continue;
+
+ if (!InBetweenInsts.insert(CurInst).second)
+ continue;
+
+ getNextInsts(*CurInst, WorkList);
+ }
+}
+
+bool llvm::isSafeToMoveBefore(Instruction &I, Instruction &InsertPoint,
+ const DominatorTree &DT,
+ const PostDominatorTree &PDT,
+ DependenceInfo &DI) {
+ // Cannot move itself before itself.
+ if (&I == &InsertPoint)
+ return false;
+
+ // Not moved.
+ if (I.getNextNode() == &InsertPoint)
+ return true;
+
+ if (isa<PHINode>(I) || isa<PHINode>(InsertPoint))
+ return reportInvalidCandidate(I, NotMovedPHINode);
+
+ if (I.isTerminator())
+ return reportInvalidCandidate(I, NotMovedTerminator);
+
+ // TODO remove this limitation.
+ if (!isControlFlowEquivalent(I, InsertPoint, DT, PDT))
+ return reportInvalidCandidate(I, NotControlFlowEquivalent);
+
+ // As I and InsertPoint are control flow equivalent, if I dominates
+ // InsertPoint, then I comes before InsertPoint.
+ const bool MoveForward = DT.dominates(&I, &InsertPoint);
+ if (MoveForward) {
+ // When I is being moved forward, we need to make sure the InsertPoint
+ // dominates every users. Or else, a user may be using an undefined I.
+ for (const Use &U : I.uses())
+ if (auto *UserInst = dyn_cast<Instruction>(U.getUser()))
+ if (UserInst != &InsertPoint && !DT.dominates(&InsertPoint, U))
+ return false;
+ } else {
+ // When I is being moved backward, we need to make sure all its opernads
+ // dominates the InsertPoint. Or else, an operand may be undefined for I.
+ for (const Value *Op : I.operands())
+ if (auto *OpInst = dyn_cast<Instruction>(Op))
+ if (&InsertPoint == OpInst || !DT.dominates(OpInst, &InsertPoint))
+ return false;
+ }
+
+ Instruction &StartInst = (MoveForward ? I : InsertPoint);
+ Instruction &EndInst = (MoveForward ? InsertPoint : I);
+ SmallPtrSet<Instruction *, 10> InstsToCheck;
+ collectInstructionsInBetween(StartInst, EndInst, InstsToCheck);
+ if (!MoveForward)
+ InstsToCheck.insert(&InsertPoint);
+
+ // Check if there exists instructions which may throw, may synchonize, or may
+ // never return, from I to InsertPoint.
+ if (!isSafeToSpeculativelyExecute(&I))
+ if (std::any_of(InstsToCheck.begin(), InstsToCheck.end(),
+ [](Instruction *I) {
+ if (I->mayThrow())
+ return true;
+
+ const CallBase *CB = dyn_cast<CallBase>(I);
+ if (!CB)
+ return false;
+ if (!CB->hasFnAttr(Attribute::WillReturn))
+ return true;
+ if (!CB->hasFnAttr(Attribute::NoSync))
+ return true;
+
+ return false;
+ })) {
+ return reportInvalidCandidate(I, MayThrowException);
+ }
+
+ // Check if I has any output/flow/anti dependences with instructions from \p
+ // StartInst to \p EndInst.
+ if (std::any_of(InstsToCheck.begin(), InstsToCheck.end(),
+ [&DI, &I](Instruction *CurInst) {
+ auto DepResult = DI.depends(&I, CurInst, true);
+ if (DepResult &&
+ (DepResult->isOutput() || DepResult->isFlow() ||
+ DepResult->isAnti()))
+ return true;
+ return false;
+ }))
+ return reportInvalidCandidate(I, HasDependences);
+
+ return true;
+}
+
+void llvm::moveInstsBottomUp(BasicBlock &FromBB, BasicBlock &ToBB,
+ const DominatorTree &DT,
+ const PostDominatorTree &PDT, DependenceInfo &DI) {
+ for (auto It = ++FromBB.rbegin(); It != FromBB.rend();) {
+ Instruction *MovePos = ToBB.getFirstNonPHIOrDbg();
+ Instruction &I = *It;
+ // Increment the iterator before modifying FromBB.
+ ++It;
+
+ if (isSafeToMoveBefore(I, *MovePos, DT, PDT, DI))
+ I.moveBefore(MovePos);
+ }
+}
diff --git a/contrib/llvm-project/llvm/tools/opt/Debugify.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/Debugify.cpp
index 222cc702bc1f..b7b4bfa3734d 100644
--- a/contrib/llvm-project/llvm/tools/opt/Debugify.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/Debugify.cpp
@@ -11,24 +11,17 @@
///
//===----------------------------------------------------------------------===//
-#include "Debugify.h"
+#include "llvm/Transforms/Utils/Debugify.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/StringExtras.h"
-#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/Constants.h"
#include "llvm/IR/DIBuilder.h"
#include "llvm/IR/DebugInfo.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/InstIterator.h"
-#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
-#include "llvm/IR/Type.h"
#include "llvm/Pass.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/IPO.h"
+#include "llvm/Support/CommandLine.h"
using namespace llvm;
@@ -395,27 +388,6 @@ private:
} // end anonymous namespace
-void exportDebugifyStats(llvm::StringRef Path, const DebugifyStatsMap &Map) {
- std::error_code EC;
- raw_fd_ostream OS{Path, EC};
- if (EC) {
- errs() << "Could not open file: " << EC.message() << ", " << Path << '\n';
- return;
- }
-
- OS << "Pass Name" << ',' << "# of missing debug values" << ','
- << "# of missing locations" << ',' << "Missing/Expected value ratio" << ','
- << "Missing/Expected location ratio" << '\n';
- for (const auto &Entry : Map) {
- StringRef Pass = Entry.first;
- DebugifyStatistics Stats = Entry.second;
-
- OS << Pass << ',' << Stats.NumDbgValuesMissing << ','
- << Stats.NumDbgLocsMissing << ',' << Stats.getMissingValueRatio() << ','
- << Stats.getEmptyLocationRatio() << '\n';
- }
-}
-
ModulePass *createDebugifyModulePass() { return new DebugifyModulePass(); }
FunctionPass *createDebugifyFunctionPass() {
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp
index 57e2ff0251a9..651f776a4915 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp
@@ -13,6 +13,7 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Transforms/Utils.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp
index 76b4635ad501..26d48ee0d23f 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp
@@ -12,13 +12,16 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/FunctionImportUtils.h"
+#include "llvm/IR/Constants.h"
#include "llvm/IR/InstIterator.h"
using namespace llvm;
/// Checks if we should import SGV as a definition, otherwise import as a
/// declaration.
bool FunctionImportGlobalProcessing::doImportAsDefinition(
- const GlobalValue *SGV, SetVector<GlobalValue *> *GlobalsToImport) {
+ const GlobalValue *SGV) {
+ if (!isPerformingImport())
+ return false;
// Only import the globals requested for importing.
if (!GlobalsToImport->count(const_cast<GlobalValue *>(SGV)))
@@ -31,16 +34,8 @@ bool FunctionImportGlobalProcessing::doImportAsDefinition(
return true;
}
-bool FunctionImportGlobalProcessing::doImportAsDefinition(
- const GlobalValue *SGV) {
- if (!isPerformingImport())
- return false;
- return FunctionImportGlobalProcessing::doImportAsDefinition(SGV,
- GlobalsToImport);
-}
-
bool FunctionImportGlobalProcessing::shouldPromoteLocalToGlobal(
- const GlobalValue *SGV) {
+ const GlobalValue *SGV, ValueInfo VI) {
assert(SGV->hasLocalLinkage());
// Both the imported references and the original local variable must
// be promoted.
@@ -65,7 +60,7 @@ bool FunctionImportGlobalProcessing::shouldPromoteLocalToGlobal(
// (so the source file name and resulting GUID is the same). Find the one
// in this module.
auto Summary = ImportIndex.findSummaryInModule(
- SGV->getGUID(), SGV->getParent()->getModuleIdentifier());
+ VI, SGV->getParent()->getModuleIdentifier());
assert(Summary && "Missing summary for global value when exporting");
auto Linkage = Summary->linkage();
if (!GlobalValue::isLocalLinkage(Linkage)) {
@@ -91,18 +86,15 @@ bool FunctionImportGlobalProcessing::isNonRenamableLocal(
}
#endif
-std::string FunctionImportGlobalProcessing::getName(const GlobalValue *SGV,
- bool DoPromote) {
+std::string
+FunctionImportGlobalProcessing::getPromotedName(const GlobalValue *SGV) {
+ assert(SGV->hasLocalLinkage());
// For locals that must be promoted to global scope, ensure that
// the promoted name uniquely identifies the copy in the original module,
- // using the ID assigned during combined index creation. When importing,
- // we rename all locals (not just those that are promoted) in order to
- // avoid naming conflicts between locals imported from different modules.
- if (SGV->hasLocalLinkage() && (DoPromote || isPerformingImport()))
- return ModuleSummaryIndex::getGlobalNameForLocal(
- SGV->getName(),
- ImportIndex.getModuleHash(SGV->getParent()->getModuleIdentifier()));
- return SGV->getName();
+ // using the ID assigned during combined index creation.
+ return ModuleSummaryIndex::getGlobalNameForLocal(
+ SGV->getName(),
+ ImportIndex.getModuleHash(SGV->getParent()->getModuleIdentifier()));
}
GlobalValue::LinkageTypes
@@ -229,6 +221,11 @@ void FunctionImportGlobalProcessing::processGlobalForThinLTO(GlobalValue &GV) {
}
}
+ // We should always have a ValueInfo (i.e. GV in index) for definitions when
+ // we are exporting, and also when importing that value.
+ assert(VI || GV.isDeclaration() ||
+ (isPerformingImport() && !doImportAsDefinition(&GV)));
+
// Mark read/write-only variables which can be imported with specific
// attribute. We can't internalize them now because IRMover will fail
// to link variable definitions to their external declarations during
@@ -238,27 +235,42 @@ void FunctionImportGlobalProcessing::processGlobalForThinLTO(GlobalValue &GV) {
// If global value dead stripping is not enabled in summary then
// propagateConstants hasn't been run. We can't internalize GV
// in such case.
- if (!GV.isDeclaration() && VI && ImportIndex.withGlobalValueDeadStripping()) {
- const auto &SL = VI.getSummaryList();
- auto *GVS = SL.empty() ? nullptr : dyn_cast<GlobalVarSummary>(SL[0].get());
- // At this stage "maybe" is "definitely"
- if (GVS && (GVS->maybeReadOnly() || GVS->maybeWriteOnly()))
- cast<GlobalVariable>(&GV)->addAttribute("thinlto-internalize");
+ if (!GV.isDeclaration() && VI && ImportIndex.withAttributePropagation()) {
+ if (GlobalVariable *V = dyn_cast<GlobalVariable>(&GV)) {
+ // We can have more than one local with the same GUID, in the case of
+ // same-named locals in different but same-named source files that were
+ // compiled in their respective directories (so the source file name
+ // and resulting GUID is the same). Find the one in this module.
+ // Handle the case where there is no summary found in this module. That
+ // can happen in the distributed ThinLTO backend, because the index only
+ // contains summaries from the source modules if they are being imported.
+ // We might have a non-null VI and get here even in that case if the name
+ // matches one in this module (e.g. weak or appending linkage).
+ auto *GVS = dyn_cast_or_null<GlobalVarSummary>(
+ ImportIndex.findSummaryInModule(VI, M.getModuleIdentifier()));
+ if (GVS &&
+ (ImportIndex.isReadOnly(GVS) || ImportIndex.isWriteOnly(GVS))) {
+ V->addAttribute("thinlto-internalize");
+ // Objects referenced by writeonly GV initializer should not be
+ // promoted, because there is no any kind of read access to them
+ // on behalf of this writeonly GV. To avoid promotion we convert
+ // GV initializer to 'zeroinitializer'. This effectively drops
+ // references in IR module (not in combined index), so we can
+ // ignore them when computing import. We do not export references
+ // of writeonly object. See computeImportForReferencedGlobals
+ if (ImportIndex.isWriteOnly(GVS))
+ V->setInitializer(Constant::getNullValue(V->getValueType()));
+ }
+ }
}
- bool DoPromote = false;
- if (GV.hasLocalLinkage() &&
- ((DoPromote = shouldPromoteLocalToGlobal(&GV)) || isPerformingImport())) {
+ if (GV.hasLocalLinkage() && shouldPromoteLocalToGlobal(&GV, VI)) {
// Save the original name string before we rename GV below.
auto Name = GV.getName().str();
- // Once we change the name or linkage it is difficult to determine
- // again whether we should promote since shouldPromoteLocalToGlobal needs
- // to locate the summary (based on GUID from name and linkage). Therefore,
- // use DoPromote result saved above.
- GV.setName(getName(&GV, DoPromote));
- GV.setLinkage(getLinkage(&GV, DoPromote));
- if (!GV.hasLocalLinkage())
- GV.setVisibility(GlobalValue::HiddenVisibility);
+ GV.setName(getPromotedName(&GV));
+ GV.setLinkage(getLinkage(&GV, /* DoPromote */ true));
+ assert(!GV.hasLocalLinkage());
+ GV.setVisibility(GlobalValue::HiddenVisibility);
// If we are renaming a COMDAT leader, ensure that we record the COMDAT
// for later renaming as well. This is required for COFF.
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/GuardUtils.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/GuardUtils.cpp
index 34c32d9c0c98..4cfc9358499a 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/GuardUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/GuardUtils.cpp
@@ -10,13 +10,17 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/GuardUtils.h"
+#include "llvm/Analysis/GuardUtils.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/Instructions.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
using namespace llvm;
+using namespace llvm::PatternMatch;
static cl::opt<uint32_t> PredicatePassBranchWeight(
"guards-predicate-pass-branch-weight", cl::Hidden, cl::init(1 << 20),
@@ -24,7 +28,7 @@ static cl::opt<uint32_t> PredicatePassBranchWeight(
"reciprocal of this value (default = 1 << 20)"));
void llvm::makeGuardControlFlowExplicit(Function *DeoptIntrinsic,
- CallInst *Guard) {
+ CallInst *Guard, bool UseWC) {
OperandBundleDef DeoptOB(*Guard->getOperandBundle(LLVMContext::OB_deopt));
SmallVector<Value *, 4> Args(std::next(Guard->arg_begin()), Guard->arg_end());
@@ -60,4 +64,63 @@ void llvm::makeGuardControlFlowExplicit(Function *DeoptIntrinsic,
DeoptCall->setCallingConv(Guard->getCallingConv());
DeoptBlockTerm->eraseFromParent();
+
+ if (UseWC) {
+ // We want the guard to be expressed as explicit control flow, but still be
+ // widenable. For that, we add Widenable Condition intrinsic call to the
+ // guard's condition.
+ IRBuilder<> B(CheckBI);
+ auto *WC = B.CreateIntrinsic(Intrinsic::experimental_widenable_condition,
+ {}, {}, nullptr, "widenable_cond");
+ CheckBI->setCondition(B.CreateAnd(CheckBI->getCondition(), WC,
+ "exiplicit_guard_cond"));
+ assert(isWidenableBranch(CheckBI) && "sanity check");
+ }
+}
+
+
+void llvm::widenWidenableBranch(BranchInst *WidenableBR, Value *NewCond) {
+ assert(isWidenableBranch(WidenableBR) && "precondition");
+
+ // The tempting trivially option is to produce something like this:
+ // br (and oldcond, newcond) where oldcond is assumed to contain a widenable
+ // condition, but that doesn't match the pattern parseWidenableBranch expects
+ // so we have to be more sophisticated.
+
+ Use *C, *WC;
+ BasicBlock *IfTrueBB, *IfFalseBB;
+ parseWidenableBranch(WidenableBR, C, WC, IfTrueBB, IfFalseBB);
+ if (!C) {
+ // br (wc()), ... form
+ IRBuilder<> B(WidenableBR);
+ WidenableBR->setCondition(B.CreateAnd(NewCond, WC->get()));
+ } else {
+ // br (wc & C), ... form
+ IRBuilder<> B(WidenableBR);
+ C->set(B.CreateAnd(NewCond, C->get()));
+ Instruction *WCAnd = cast<Instruction>(WidenableBR->getCondition());
+ // Condition is only guaranteed to dominate branch
+ WCAnd->moveBefore(WidenableBR);
+ }
+ assert(isWidenableBranch(WidenableBR) && "preserve widenabiliy");
+}
+
+void llvm::setWidenableBranchCond(BranchInst *WidenableBR, Value *NewCond) {
+ assert(isWidenableBranch(WidenableBR) && "precondition");
+
+ Use *C, *WC;
+ BasicBlock *IfTrueBB, *IfFalseBB;
+ parseWidenableBranch(WidenableBR, C, WC, IfTrueBB, IfFalseBB);
+ if (!C) {
+ // br (wc()), ... form
+ IRBuilder<> B(WidenableBR);
+ WidenableBR->setCondition(B.CreateAnd(NewCond, WC->get()));
+ } else {
+ // br (wc & C), ... form
+ Instruction *WCAnd = cast<Instruction>(WidenableBR->getCondition());
+ // Condition is only guaranteed to dominate branch
+ WCAnd->moveBefore(WidenableBR);
+ C->set(NewCond);
+ }
+ assert(isWidenableBranch(WidenableBR) && "preserve widenabiliy");
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp
new file mode 100644
index 000000000000..9192e74b9ace
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp
@@ -0,0 +1,186 @@
+//===- InjectTLIMAppings.cpp - TLI to VFABI attribute injection ----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Populates the VFABI attribute with the scalar-to-vector mappings
+// from the TargetLibraryInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/InjectTLIMappings.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/VectorUtils.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/Transforms/Utils.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "inject-tli-mappings"
+
+STATISTIC(NumCallInjected,
+ "Number of calls in which the mappings have been injected.");
+
+STATISTIC(NumVFDeclAdded,
+ "Number of function declarations that have been added.");
+STATISTIC(NumCompUsedAdded,
+ "Number of `@llvm.compiler.used` operands that have been added.");
+
+/// Helper function to map the TLI name to a strings that holds
+/// scalar-to-vector mapping.
+///
+/// _ZGV<isa><mask><vlen><vparams>_<scalarname>(<vectorname>)
+///
+/// where:
+///
+/// <isa> = "_LLVM_"
+/// <mask> = "N". Note: TLI does not support masked interfaces.
+/// <vlen> = Number of concurrent lanes, stored in the `VectorizationFactor`
+/// field of the `VecDesc` struct.
+/// <vparams> = "v", as many as are the number of parameters of CI.
+/// <scalarname> = the name of the scalar function called by CI.
+/// <vectorname> = the name of the vector function mapped by the TLI.
+static std::string mangleTLIName(StringRef VectorName, const CallInst &CI,
+ unsigned VF) {
+ SmallString<256> Buffer;
+ llvm::raw_svector_ostream Out(Buffer);
+ Out << "_ZGV" << VFABI::_LLVM_ << "N" << VF;
+ for (unsigned I = 0; I < CI.getNumArgOperands(); ++I)
+ Out << "v";
+ Out << "_" << CI.getCalledFunction()->getName() << "(" << VectorName << ")";
+ return Out.str();
+}
+
+/// A helper function for converting Scalar types to vector types.
+/// If the incoming type is void, we return void. If the VF is 1, we return
+/// the scalar type.
+static Type *ToVectorTy(Type *Scalar, unsigned VF, bool isScalable = false) {
+ if (Scalar->isVoidTy() || VF == 1)
+ return Scalar;
+ return VectorType::get(Scalar, {VF, isScalable});
+}
+
+/// A helper function that adds the vector function declaration that
+/// vectorizes the CallInst CI with a vectorization factor of VF
+/// lanes. The TLI assumes that all parameters and the return type of
+/// CI (other than void) need to be widened to a VectorType of VF
+/// lanes.
+static void addVariantDeclaration(CallInst &CI, const unsigned VF,
+ const StringRef VFName) {
+ Module *M = CI.getModule();
+
+ // Add function declaration.
+ Type *RetTy = ToVectorTy(CI.getType(), VF);
+ SmallVector<Type *, 4> Tys;
+ for (Value *ArgOperand : CI.arg_operands())
+ Tys.push_back(ToVectorTy(ArgOperand->getType(), VF));
+ assert(!CI.getFunctionType()->isVarArg() &&
+ "VarArg functions are not supported.");
+ FunctionType *FTy = FunctionType::get(RetTy, Tys, /*isVarArg=*/false);
+ Function *VectorF =
+ Function::Create(FTy, Function::ExternalLinkage, VFName, M);
+ VectorF->copyAttributesFrom(CI.getCalledFunction());
+ ++NumVFDeclAdded;
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Added to the module: `" << VFName
+ << "` of type " << *(VectorF->getType()) << "\n");
+
+ // Make function declaration (without a body) "sticky" in the IR by
+ // listing it in the @llvm.compiler.used intrinsic.
+ assert(!VectorF->size() && "VFABI attribute requires `@llvm.compiler.used` "
+ "only on declarations.");
+ appendToCompilerUsed(*M, {VectorF});
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Adding `" << VFName
+ << "` to `@llvm.compiler.used`.\n");
+ ++NumCompUsedAdded;
+}
+
+static void addMappingsFromTLI(const TargetLibraryInfo &TLI, CallInst &CI) {
+ // This is needed to make sure we don't query the TLI for calls to
+ // bitcast of function pointers, like `%call = call i32 (i32*, ...)
+ // bitcast (i32 (...)* @goo to i32 (i32*, ...)*)(i32* nonnull %i)`,
+ // as such calls make the `isFunctionVectorizable` raise an
+ // exception.
+ if (CI.isNoBuiltin() || !CI.getCalledFunction())
+ return;
+
+ const std::string ScalarName = CI.getCalledFunction()->getName();
+ // Nothing to be done if the TLI thinks the function is not
+ // vectorizable.
+ if (!TLI.isFunctionVectorizable(ScalarName))
+ return;
+ SmallVector<std::string, 8> Mappings;
+ VFABI::getVectorVariantNames(CI, Mappings);
+ Module *M = CI.getModule();
+ const SetVector<StringRef> OriginalSetOfMappings(Mappings.begin(),
+ Mappings.end());
+ // All VFs in the TLI are powers of 2.
+ for (unsigned VF = 2, WidestVF = TLI.getWidestVF(ScalarName); VF <= WidestVF;
+ VF *= 2) {
+ const std::string TLIName = TLI.getVectorizedFunction(ScalarName, VF);
+ if (!TLIName.empty()) {
+ std::string MangledName = mangleTLIName(TLIName, CI, VF);
+ if (!OriginalSetOfMappings.count(MangledName)) {
+ Mappings.push_back(MangledName);
+ ++NumCallInjected;
+ }
+ Function *VariantF = M->getFunction(TLIName);
+ if (!VariantF)
+ addVariantDeclaration(CI, VF, TLIName);
+ }
+ }
+
+ VFABI::setVectorVariantNames(&CI, Mappings);
+}
+
+static bool runImpl(const TargetLibraryInfo &TLI, Function &F) {
+ for (auto &I : instructions(F))
+ if (auto CI = dyn_cast<CallInst>(&I))
+ addMappingsFromTLI(TLI, *CI);
+ // Even if the pass adds IR attributes, the analyses are preserved.
+ return false;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// New pass manager implementation.
+////////////////////////////////////////////////////////////////////////////////
+PreservedAnalyses InjectTLIMappings::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ const TargetLibraryInfo &TLI = AM.getResult<TargetLibraryAnalysis>(F);
+ runImpl(TLI, F);
+ // Even if the pass adds IR attributes, the analyses are preserved.
+ return PreservedAnalyses::all();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Legacy PM Implementation.
+////////////////////////////////////////////////////////////////////////////////
+bool InjectTLIMappingsLegacy::runOnFunction(Function &F) {
+ const TargetLibraryInfo &TLI =
+ getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
+ return runImpl(TLI, F);
+}
+
+void InjectTLIMappingsLegacy::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.addPreserved<TargetLibraryInfoWrapperPass>();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Legacy Pass manager initialization
+////////////////////////////////////////////////////////////////////////////////
+char InjectTLIMappingsLegacy::ID = 0;
+
+INITIALIZE_PASS_BEGIN(InjectTLIMappingsLegacy, DEBUG_TYPE,
+ "Inject TLI Mappings", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(InjectTLIMappingsLegacy, DEBUG_TYPE, "Inject TLI Mappings",
+ false, false)
+
+FunctionPass *llvm::createInjectTLIMappingsLegacyPass() {
+ return new InjectTLIMappingsLegacy();
+}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/InlineFunction.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/InlineFunction.cpp
index a7f0f7ac5d61..6da612eb4e65 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -1254,7 +1254,8 @@ static void HandleByValArgumentInit(Value *Dst, Value *Src, Module *M,
// Always generate a memcpy of alignment 1 here because we don't know
// the alignment of the src pointer. Other optimizations can infer
// better alignment.
- Builder.CreateMemCpy(Dst, /*DstAlign*/1, Src, /*SrcAlign*/1, Size);
+ Builder.CreateMemCpy(Dst, /*DstAlign*/ Align::None(), Src,
+ /*SrcAlign*/ Align::None(), Size);
}
/// When inlining a call site that has a byval argument,
@@ -1293,16 +1294,16 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
}
// Create the alloca. If we have DataLayout, use nice alignment.
- unsigned Align = DL.getPrefTypeAlignment(AggTy);
+ Align Alignment(DL.getPrefTypeAlignment(AggTy));
// If the byval had an alignment specified, we *must* use at least that
// alignment, as it is required by the byval argument (and uses of the
// pointer inside the callee).
- Align = std::max(Align, ByValAlignment);
+ Alignment = max(Alignment, MaybeAlign(ByValAlignment));
- Value *NewAlloca = new AllocaInst(AggTy, DL.getAllocaAddrSpace(),
- nullptr, Align, Arg->getName(),
- &*Caller->begin()->begin());
+ Value *NewAlloca =
+ new AllocaInst(AggTy, DL.getAllocaAddrSpace(), nullptr, Alignment,
+ Arg->getName(), &*Caller->begin()->begin());
IFI.StaticAllocas.push_back(cast<AllocaInst>(NewAlloca));
// Uses of the argument in the function should use our new alloca
@@ -1405,6 +1406,10 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI,
// other.
DenseMap<const MDNode *, MDNode *> IANodes;
+ // Check if we are not generating inline line tables and want to use
+ // the call site location instead.
+ bool NoInlineLineTables = Fn->hasFnAttribute("no-inline-line-tables");
+
for (; FI != Fn->end(); ++FI) {
for (BasicBlock::iterator BI = FI->begin(), BE = FI->end();
BI != BE; ++BI) {
@@ -1416,20 +1421,22 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI,
BI->setMetadata(LLVMContext::MD_loop, NewLoopID);
}
- if (DebugLoc DL = BI->getDebugLoc()) {
- DebugLoc IDL =
- inlineDebugLoc(DL, InlinedAtNode, BI->getContext(), IANodes);
- BI->setDebugLoc(IDL);
- continue;
- }
+ if (!NoInlineLineTables)
+ if (DebugLoc DL = BI->getDebugLoc()) {
+ DebugLoc IDL =
+ inlineDebugLoc(DL, InlinedAtNode, BI->getContext(), IANodes);
+ BI->setDebugLoc(IDL);
+ continue;
+ }
- if (CalleeHasDebugInfo)
+ if (CalleeHasDebugInfo && !NoInlineLineTables)
continue;
- // If the inlined instruction has no line number, make it look as if it
- // originates from the call location. This is important for
- // ((__always_inline__, __nodebug__)) functions which must use caller
- // location for all instructions in their function body.
+ // If the inlined instruction has no line number, or if inline info
+ // is not being generated, make it look as if it originates from the call
+ // location. This is important for ((__always_inline, __nodebug__))
+ // functions which must use caller location for all instructions in their
+ // function body.
// Don't update static allocas, as they may get moved later.
if (auto *AI = dyn_cast<AllocaInst>(BI))
@@ -1438,6 +1445,19 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI,
BI->setDebugLoc(TheCallDL);
}
+
+ // Remove debug info intrinsics if we're not keeping inline info.
+ if (NoInlineLineTables) {
+ BasicBlock::iterator BI = FI->begin();
+ while (BI != FI->end()) {
+ if (isa<DbgInfoIntrinsic>(BI)) {
+ BI = BI->eraseFromParent();
+ continue;
+ }
+ ++BI;
+ }
+ }
+
}
}
@@ -1453,7 +1473,7 @@ static void updateCallerBFI(BasicBlock *CallSiteBlock,
BlockFrequencyInfo *CalleeBFI,
const BasicBlock &CalleeEntryBlock) {
SmallPtrSet<BasicBlock *, 16> ClonedBBs;
- for (auto const &Entry : VMap) {
+ for (auto Entry : VMap) {
if (!isa<BasicBlock>(Entry.first) || !Entry.second)
continue;
auto *OrigBB = cast<BasicBlock>(Entry.first);
@@ -1508,22 +1528,25 @@ void llvm::updateProfileCallee(
else
newEntryCount = priorEntryCount + entryDelta;
- Callee->setEntryCount(newEntryCount);
-
// During inlining ?
if (VMap) {
uint64_t cloneEntryCount = priorEntryCount - newEntryCount;
- for (auto const &Entry : *VMap)
+ for (auto Entry : *VMap)
if (isa<CallInst>(Entry.first))
if (auto *CI = dyn_cast_or_null<CallInst>(Entry.second))
CI->updateProfWeight(cloneEntryCount, priorEntryCount);
}
- for (BasicBlock &BB : *Callee)
- // No need to update the callsite if it is pruned during inlining.
- if (!VMap || VMap->count(&BB))
- for (Instruction &I : BB)
- if (CallInst *CI = dyn_cast<CallInst>(&I))
- CI->updateProfWeight(newEntryCount, priorEntryCount);
+
+ if (entryDelta) {
+ Callee->setEntryCount(newEntryCount);
+
+ for (BasicBlock &BB : *Callee)
+ // No need to update the callsite if it is pruned during inlining.
+ if (!VMap || VMap->count(&BB))
+ for (Instruction &I : BB)
+ if (CallInst *CI = dyn_cast<CallInst>(&I))
+ CI->updateProfWeight(newEntryCount, priorEntryCount);
+ }
}
/// This function inlines the called function into the basic block of the
@@ -1842,6 +1865,7 @@ llvm::InlineResult llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// Scan for the block of allocas that we can move over, and move them
// all at once.
while (isa<AllocaInst>(I) &&
+ !cast<AllocaInst>(I)->use_empty() &&
allocaWouldBeStaticInEntry(cast<AllocaInst>(I))) {
IFI.StaticAllocas.push_back(cast<AllocaInst>(I));
++I;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/InstructionNamer.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/InstructionNamer.cpp
index 6c4fc1ceb991..aac0b55801c4 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/InstructionNamer.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/InstructionNamer.cpp
@@ -15,6 +15,7 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/Type.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Transforms/Utils.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LCSSA.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LCSSA.cpp
index 29e7c5260f46..5746d69260d5 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LCSSA.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LCSSA.cpp
@@ -43,7 +43,9 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/PredIteratorCache.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
@@ -74,7 +76,8 @@ static bool isExitBlock(BasicBlock *BB,
/// that are outside the current loop. If so, insert LCSSA PHI nodes and
/// rewrite the uses.
bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
- DominatorTree &DT, LoopInfo &LI) {
+ DominatorTree &DT, LoopInfo &LI,
+ ScalarEvolution *SE) {
SmallVector<Use *, 16> UsesToRewrite;
SmallSetVector<PHINode *, 16> PHIsToRemove;
PredIteratorCache PredCache;
@@ -134,6 +137,11 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
SSAUpdater SSAUpdate(&InsertedPHIs);
SSAUpdate.Initialize(I->getType(), I->getName());
+ // Force re-computation of I, as some users now need to use the new PHI
+ // node.
+ if (SE)
+ SE->forgetValue(I);
+
// Insert the LCSSA phi's into all of the exit blocks dominated by the
// value, and add them to the Phi's map.
for (BasicBlock *ExitBB : ExitBlocks) {
@@ -192,9 +200,6 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
UserBB = PN->getIncomingBlock(*UseToRewrite);
if (isa<PHINode>(UserBB->begin()) && isExitBlock(UserBB, ExitBlocks)) {
- // Tell the VHs that the uses changed. This updates SCEV's caches.
- if (UseToRewrite->get()->hasValueHandle())
- ValueHandleBase::ValueIsRAUWd(*UseToRewrite, &UserBB->front());
UseToRewrite->set(&UserBB->front());
continue;
}
@@ -202,10 +207,6 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
// If we added a single PHI, it must dominate all uses and we can directly
// rename it.
if (AddedPHIs.size() == 1) {
- // Tell the VHs that the uses changed. This updates SCEV's caches.
- // We might call ValueIsRAUWd multiple times for the same value.
- if (UseToRewrite->get()->hasValueHandle())
- ValueHandleBase::ValueIsRAUWd(*UseToRewrite, AddedPHIs[0]);
UseToRewrite->set(AddedPHIs[0]);
continue;
}
@@ -368,7 +369,7 @@ bool llvm::formLCSSA(Loop &L, DominatorTree &DT, LoopInfo *LI,
Worklist.push_back(&I);
}
}
- Changed = formLCSSAForInstructions(Worklist, DT, *LI);
+ Changed = formLCSSAForInstructions(Worklist, DT, *LI, SE);
// If we modified the code, remove any caches about the loop from SCEV to
// avoid dangling entries.
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp
index ed28fffc22b5..4c52fac6f7cb 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp
@@ -39,6 +39,7 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/MDBuilder.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp
index 5bcd05757ec1..b2d511c7c9a9 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp
@@ -1421,22 +1421,32 @@ bool llvm::LowerDbgDeclare(Function &F) {
}))
continue;
- for (auto &AIUse : AI->uses()) {
- User *U = AIUse.getUser();
- if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
- if (AIUse.getOperandNo() == 1)
- ConvertDebugDeclareToDebugValue(DDI, SI, DIB);
- } else if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
- ConvertDebugDeclareToDebugValue(DDI, LI, DIB);
- } else if (CallInst *CI = dyn_cast<CallInst>(U)) {
- // This is a call by-value or some other instruction that takes a
- // pointer to the variable. Insert a *value* intrinsic that describes
- // the variable by dereferencing the alloca.
- DebugLoc NewLoc = getDebugValueLoc(DDI, nullptr);
- auto *DerefExpr =
- DIExpression::append(DDI->getExpression(), dwarf::DW_OP_deref);
- DIB.insertDbgValueIntrinsic(AI, DDI->getVariable(), DerefExpr, NewLoc,
- CI);
+ SmallVector<const Value *, 8> WorkList;
+ WorkList.push_back(AI);
+ while (!WorkList.empty()) {
+ const Value *V = WorkList.pop_back_val();
+ for (auto &AIUse : V->uses()) {
+ User *U = AIUse.getUser();
+ if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
+ if (AIUse.getOperandNo() == 1)
+ ConvertDebugDeclareToDebugValue(DDI, SI, DIB);
+ } else if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
+ ConvertDebugDeclareToDebugValue(DDI, LI, DIB);
+ } else if (CallInst *CI = dyn_cast<CallInst>(U)) {
+ // This is a call by-value or some other instruction that takes a
+ // pointer to the variable. Insert a *value* intrinsic that describes
+ // the variable by dereferencing the alloca.
+ if (!CI->isLifetimeStartOrEnd()) {
+ DebugLoc NewLoc = getDebugValueLoc(DDI, nullptr);
+ auto *DerefExpr =
+ DIExpression::append(DDI->getExpression(), dwarf::DW_OP_deref);
+ DIB.insertDbgValueIntrinsic(AI, DDI->getVariable(), DerefExpr,
+ NewLoc, CI);
+ }
+ } else if (BitCastInst *BI = dyn_cast<BitCastInst>(U)) {
+ if (BI->getType()->isPointerTy())
+ WorkList.push_back(BI);
+ }
}
}
DDI->eraseFromParent();
@@ -1611,6 +1621,11 @@ bool llvm::salvageDebugInfo(Instruction &I) {
return salvageDebugInfoForDbgValues(I, DbgUsers);
}
+void llvm::salvageDebugInfoOrMarkUndef(Instruction &I) {
+ if (!salvageDebugInfo(I))
+ replaceDbgUsesWithUndef(&I);
+}
+
bool llvm::salvageDebugInfoForDbgValues(
Instruction &I, ArrayRef<DbgVariableIntrinsic *> DbgUsers) {
auto &Ctx = I.getContext();
@@ -1661,9 +1676,8 @@ DIExpression *llvm::salvageDebugInfoImpl(Instruction &I,
};
// initializer-list helper for applying operators to the source DIExpression.
- auto applyOps =
- [&](std::initializer_list<uint64_t> Opcodes) -> DIExpression * {
- SmallVector<uint64_t, 8> Ops(Opcodes);
+ auto applyOps = [&](ArrayRef<uint64_t> Opcodes) -> DIExpression * {
+ SmallVector<uint64_t, 8> Ops(Opcodes.begin(), Opcodes.end());
return doSalvage(Ops);
};
@@ -1671,8 +1685,21 @@ DIExpression *llvm::salvageDebugInfoImpl(Instruction &I,
// No-op casts and zexts are irrelevant for debug info.
if (CI->isNoopCast(DL) || isa<ZExtInst>(&I))
return SrcDIExpr;
- return nullptr;
- } else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
+
+ Type *Type = CI->getType();
+ // Casts other than Trunc or SExt to scalar types cannot be salvaged.
+ if (Type->isVectorTy() || (!isa<TruncInst>(&I) && !isa<SExtInst>(&I)))
+ return nullptr;
+
+ Value *FromValue = CI->getOperand(0);
+ unsigned FromTypeBitSize = FromValue->getType()->getScalarSizeInBits();
+ unsigned ToTypeBitSize = Type->getScalarSizeInBits();
+
+ return applyOps(DIExpression::getExtOps(FromTypeBitSize, ToTypeBitSize,
+ isa<SExtInst>(&I)));
+ }
+
+ if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
unsigned BitWidth =
M.getDataLayout().getIndexSizeInBits(GEP->getPointerAddressSpace());
// Rewrite a constant GEP into a DIExpression.
@@ -1727,7 +1754,7 @@ DIExpression *llvm::salvageDebugInfoImpl(Instruction &I,
using DbgValReplacement = Optional<DIExpression *>;
/// Point debug users of \p From to \p To using exprs given by \p RewriteExpr,
-/// possibly moving/deleting users to prevent use-before-def. Returns true if
+/// possibly moving/undefing users to prevent use-before-def. Returns true if
/// changes are made.
static bool rewriteDebugUsers(
Instruction &From, Value &To, Instruction &DomPoint, DominatorTree &DT,
@@ -1740,7 +1767,7 @@ static bool rewriteDebugUsers(
// Prevent use-before-def of To.
bool Changed = false;
- SmallPtrSet<DbgVariableIntrinsic *, 1> DeleteOrSalvage;
+ SmallPtrSet<DbgVariableIntrinsic *, 1> UndefOrSalvage;
if (isa<Instruction>(&To)) {
bool DomPointAfterFrom = From.getNextNonDebugInstruction() == &DomPoint;
@@ -1755,14 +1782,14 @@ static bool rewriteDebugUsers(
// Users which otherwise aren't dominated by the replacement value must
// be salvaged or deleted.
} else if (!DT.dominates(&DomPoint, DII)) {
- DeleteOrSalvage.insert(DII);
+ UndefOrSalvage.insert(DII);
}
}
}
// Update debug users without use-before-def risk.
for (auto *DII : Users) {
- if (DeleteOrSalvage.count(DII))
+ if (UndefOrSalvage.count(DII))
continue;
LLVMContext &Ctx = DII->getContext();
@@ -1776,18 +1803,10 @@ static bool rewriteDebugUsers(
Changed = true;
}
- if (!DeleteOrSalvage.empty()) {
+ if (!UndefOrSalvage.empty()) {
// Try to salvage the remaining debug users.
- Changed |= salvageDebugInfo(From);
-
- // Delete the debug users which weren't salvaged.
- for (auto *DII : DeleteOrSalvage) {
- if (DII->getVariableLocation() == &From) {
- LLVM_DEBUG(dbgs() << "Erased UseBeforeDef: " << *DII << '\n');
- DII->eraseFromParent();
- Changed = true;
- }
- }
+ salvageDebugInfoOrMarkUndef(From);
+ Changed = true;
}
return Changed;
@@ -1862,10 +1881,8 @@ bool llvm::replaceAllDbgUsesWith(Instruction &From, Value &To,
return None;
bool Signed = *Signedness == DIBasicType::Signedness::Signed;
- dwarf::TypeKind TK = Signed ? dwarf::DW_ATE_signed : dwarf::DW_ATE_unsigned;
- SmallVector<uint64_t, 8> Ops({dwarf::DW_OP_LLVM_convert, ToBits, TK,
- dwarf::DW_OP_LLVM_convert, FromBits, TK});
- return DIExpression::appendToStack(DII.getExpression(), Ops);
+ return DIExpression::appendExt(DII.getExpression(), ToBits, FromBits,
+ Signed);
};
return rewriteDebugUsers(From, To, DomPoint, DT, SignOrZeroExt);
}
@@ -2574,7 +2591,7 @@ void llvm::copyRangeMetadata(const DataLayout &DL, const LoadInst &OldLI,
if (!NewTy->isPointerTy())
return;
- unsigned BitWidth = DL.getIndexTypeSizeInBits(NewTy);
+ unsigned BitWidth = DL.getPointerTypeSizeInBits(NewTy);
if (!getConstantRangeFromMetadata(*N).contains(APInt(BitWidth, 0))) {
MDNode *NN = MDNode::get(OldLI.getContext(), None);
NewLI.setMetadata(LLVMContext::MD_nonnull, NN);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
index 889ea5ca9970..c065e0269c64 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
@@ -76,6 +76,13 @@ private:
};
} // end anonymous namespace
+/// Insert (K, V) pair into the ValueToValueMap, and verify the key did not
+/// previously exist in the map, and the value was inserted.
+static void InsertNewValueIntoMap(ValueToValueMapTy &VM, Value *K, Value *V) {
+ bool Inserted = VM.insert({K, V}).second;
+ assert(Inserted);
+ (void)Inserted;
+}
/// RewriteUsesOfClonedInstructions - We just cloned the instructions from the
/// old header into the preheader. If there were uses of the values produced by
/// these instruction that were outside of the loop, we have to insert PHI nodes
@@ -300,7 +307,8 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
// For PHI nodes, the value available in OldPreHeader is just the
// incoming value from OldPreHeader.
for (; PHINode *PN = dyn_cast<PHINode>(I); ++I)
- ValueMap[PN] = PN->getIncomingValueForBlock(OrigPreheader);
+ InsertNewValueIntoMap(ValueMap, PN,
+ PN->getIncomingValueForBlock(OrigPreheader));
// For the rest of the instructions, either hoist to the OrigPreheader if
// possible or create a clone in the OldPreHeader if not.
@@ -358,13 +366,13 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
if (V && LI->replacementPreservesLCSSAForm(C, V)) {
// If so, then delete the temporary instruction and stick the folded value
// in the map.
- ValueMap[Inst] = V;
+ InsertNewValueIntoMap(ValueMap, Inst, V);
if (!C->mayHaveSideEffects()) {
C->deleteValue();
C = nullptr;
}
} else {
- ValueMap[Inst] = C;
+ InsertNewValueIntoMap(ValueMap, Inst, C);
}
if (C) {
// Otherwise, stick the new instruction into the new block!
@@ -376,7 +384,8 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
AC->registerAssumption(II);
// MemorySSA cares whether the cloned instruction was inserted or not, and
// not whether it can be remapped to a simplified value.
- ValueMapMSSA[Inst] = C;
+ if (MSSAU)
+ InsertNewValueIntoMap(ValueMapMSSA, Inst, C);
}
}
@@ -396,7 +405,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
// Update MemorySSA before the rewrite call below changes the 1:1
// instruction:cloned_instruction_or_value mapping.
if (MSSAU) {
- ValueMapMSSA[OrigHeader] = OrigPreheader;
+ InsertNewValueIntoMap(ValueMapMSSA, OrigHeader, OrigPreheader);
MSSAU->updateForClonedBlockIntoPred(OrigHeader, OrigPreheader,
ValueMapMSSA);
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopSimplify.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopSimplify.cpp
index d0f89dc54bfb..28f88f39a712 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopSimplify.cpp
@@ -67,6 +67,7 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils.h"
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnroll.cpp
index a7590fc32545..4b94b371e70a 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnroll.cpp
@@ -22,17 +22,18 @@
#include "llvm/Analysis/LoopIterator.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopSimplify.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/SimplifyIndVar.h"
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp
index bf2e87b0d49f..f1965934b2d7 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp
@@ -21,7 +21,6 @@
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/Analysis/Utils/Local.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/DataLayout.h"
@@ -177,6 +176,7 @@ LoopUnrollResult llvm::UnrollAndJamLoop(
// When we enter here we should have already checked that it is safe
BasicBlock *Header = L->getHeader();
+ assert(Header && "No header.");
assert(L->getSubLoops().size() == 1);
Loop *SubLoop = *L->begin();
@@ -247,8 +247,9 @@ LoopUnrollResult llvm::UnrollAndJamLoop(
BasicBlock *Preheader = L->getLoopPreheader();
BasicBlock *LatchBlock = L->getLoopLatch();
+ assert(Preheader && "No preheader");
+ assert(LatchBlock && "No latch block");
BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator());
- assert(Preheader && LatchBlock && Header);
assert(BI && !BI->isUnconditional());
bool ContinueOnTrue = L->contains(BI->getSuccessor(0));
BasicBlock *LoopExit = BI->getSuccessor(ContinueOnTrue);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp
index 58e42074f963..7a168ff6f32b 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp
@@ -212,14 +212,11 @@ static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount,
const SCEVAddRecExpr *LeftAR = cast<SCEVAddRecExpr>(LeftSCEV);
// Avoid huge SCEV computations in the loop below, make sure we only
- // consider AddRecs of the loop we are trying to peel and avoid
- // non-monotonic predicates, as we will not be able to simplify the loop
- // body.
- // FIXME: For the non-monotonic predicates ICMP_EQ and ICMP_NE we can
- // simplify the loop, if we peel 1 additional iteration, if there
- // is no wrapping.
+ // consider AddRecs of the loop we are trying to peel.
+ if (!LeftAR->isAffine() || LeftAR->getLoop() != &L)
+ continue;
bool Increasing;
- if (!LeftAR->isAffine() || LeftAR->getLoop() != &L ||
+ if (!(ICmpInst::isEquality(Pred) && LeftAR->hasNoSelfWrap()) &&
!SE.isMonotonicPredicate(LeftAR, Pred, Increasing))
continue;
(void)Increasing;
@@ -238,18 +235,43 @@ static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount,
Pred = ICmpInst::getInversePredicate(Pred);
const SCEV *Step = LeftAR->getStepRecurrence(SE);
- while (NewPeelCount < MaxPeelCount &&
- SE.isKnownPredicate(Pred, IterVal, RightSCEV)) {
- IterVal = SE.getAddExpr(IterVal, Step);
+ const SCEV *NextIterVal = SE.getAddExpr(IterVal, Step);
+ auto PeelOneMoreIteration = [&IterVal, &NextIterVal, &SE, Step,
+ &NewPeelCount]() {
+ IterVal = NextIterVal;
+ NextIterVal = SE.getAddExpr(IterVal, Step);
NewPeelCount++;
+ };
+
+ auto CanPeelOneMoreIteration = [&NewPeelCount, &MaxPeelCount]() {
+ return NewPeelCount < MaxPeelCount;
+ };
+
+ while (CanPeelOneMoreIteration() &&
+ SE.isKnownPredicate(Pred, IterVal, RightSCEV))
+ PeelOneMoreIteration();
+
+ // With *that* peel count, does the predicate !Pred become known in the
+ // first iteration of the loop body after peeling?
+ if (!SE.isKnownPredicate(ICmpInst::getInversePredicate(Pred), IterVal,
+ RightSCEV))
+ continue; // If not, give up.
+
+ // However, for equality comparisons, that isn't always sufficient to
+ // eliminate the comparsion in loop body, we may need to peel one more
+ // iteration. See if that makes !Pred become unknown again.
+ if (ICmpInst::isEquality(Pred) &&
+ !SE.isKnownPredicate(ICmpInst::getInversePredicate(Pred), NextIterVal,
+ RightSCEV)) {
+ assert(!SE.isKnownPredicate(Pred, IterVal, RightSCEV) &&
+ SE.isKnownPredicate(Pred, NextIterVal, RightSCEV) &&
+ "Expected Pred to go from known to unknown.");
+ if (!CanPeelOneMoreIteration())
+ continue; // Need to peel one more iteration, but can't. Give up.
+ PeelOneMoreIteration(); // Great!
}
- // Only peel the loop if the monotonic predicate !Pred becomes known in the
- // first iteration of the loop body after peeling.
- if (NewPeelCount > DesiredPeelCount &&
- SE.isKnownPredicate(ICmpInst::getInversePredicate(Pred), IterVal,
- RightSCEV))
- DesiredPeelCount = NewPeelCount;
+ DesiredPeelCount = std::max(DesiredPeelCount, NewPeelCount);
}
return DesiredPeelCount;
@@ -562,7 +584,7 @@ static void cloneLoopBlocks(
// LastValueMap is updated with the values for the current loop
// which are used the next time this function is called.
- for (const auto &KV : VMap)
+ for (auto KV : VMap)
LVMap[KV.first] = KV.second;
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
index d22fdb4d52dc..ddb7479924bd 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -30,6 +30,7 @@
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils.h"
@@ -395,9 +396,9 @@ CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop,
}
}
if (CreateRemainderLoop) {
- Loop *NewLoop = NewLoops[L];
- MDNode *LoopID = NewLoop->getLoopID();
+ Loop *NewLoop = NewLoops[L];
assert(NewLoop && "L should have been cloned");
+ MDNode *LoopID = NewLoop->getLoopID();
// Only add loop metadata if the loop is not going to be completely
// unrolled.
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUtils.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUtils.cpp
index b4d7f35d2d9a..c4c40189fda4 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -24,7 +24,6 @@
#include "llvm/Analysis/MustExecute.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
-#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
@@ -35,6 +34,7 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/ValueHandle.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/KnownBits.h"
@@ -672,7 +672,19 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT = nullptr,
LI->removeBlock(BB);
// The last step is to update LoopInfo now that we've eliminated this loop.
- LI->erase(L);
+ // Note: LoopInfo::erase remove the given loop and relink its subloops with
+ // its parent. While removeLoop/removeChildLoop remove the given loop but
+ // not relink its subloops, which is what we want.
+ if (Loop *ParentLoop = L->getParentLoop()) {
+ Loop::iterator I = find(ParentLoop->begin(), ParentLoop->end(), L);
+ assert(I != ParentLoop->end() && "Couldn't find loop");
+ ParentLoop->removeChildLoop(I);
+ } else {
+ Loop::iterator I = find(LI->begin(), LI->end(), L);
+ assert(I != LI->end() && "Couldn't find loop");
+ LI->removeLoop(I);
+ }
+ LI->destroy(L);
}
}
@@ -702,19 +714,19 @@ Optional<unsigned> llvm::getLoopEstimatedTripCount(Loop *L) {
// To estimate the number of times the loop body was executed, we want to
// know the number of times the backedge was taken, vs. the number of times
// we exited the loop.
- uint64_t TrueVal, FalseVal;
- if (!LatchBR->extractProfMetadata(TrueVal, FalseVal))
+ uint64_t BackedgeTakenWeight, LatchExitWeight;
+ if (!LatchBR->extractProfMetadata(BackedgeTakenWeight, LatchExitWeight))
return None;
- if (!TrueVal || !FalseVal)
+ if (LatchBR->getSuccessor(0) != L->getHeader())
+ std::swap(BackedgeTakenWeight, LatchExitWeight);
+
+ if (!BackedgeTakenWeight || !LatchExitWeight)
return 0;
// Divide the count of the backedge by the count of the edge exiting the loop,
// rounding to nearest.
- if (LatchBR->getSuccessor(0) == L->getHeader())
- return (TrueVal + (FalseVal / 2)) / FalseVal;
- else
- return (FalseVal + (TrueVal / 2)) / TrueVal;
+ return llvm::divideNearest(BackedgeTakenWeight, LatchExitWeight);
}
bool llvm::hasIterationCountInvariantInParent(Loop *InnerLoop,
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopVersioning.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopVersioning.cpp
index 5d7759056c7d..50752bd78a65 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopVersioning.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopVersioning.cpp
@@ -18,6 +18,8 @@
#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/MDBuilder.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LowerInvoke.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LowerInvoke.cpp
index fe67e191dc62..1af0ce3d86cc 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LowerInvoke.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LowerInvoke.cpp
@@ -19,6 +19,7 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Transforms/Utils.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LowerSwitch.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LowerSwitch.cpp
index 8256e3b5f5af..4b9d0dadfc17 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LowerSwitch.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LowerSwitch.cpp
@@ -27,6 +27,7 @@
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Compiler.h"
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/Mem2Reg.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/Mem2Reg.cpp
index cd2c81b6abc8..5ad7aeb463ec 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/Mem2Reg.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/Mem2Reg.cpp
@@ -19,6 +19,7 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/PassManager.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Transforms/Utils.h"
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/MetaRenamer.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/MetaRenamer.cpp
index 60bb2775a194..7f961dbaf4b4 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/MetaRenamer.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/MetaRenamer.cpp
@@ -27,6 +27,7 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/TypeFinder.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Transforms/Utils.h"
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/MisExpect.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/MisExpect.cpp
index 26d3402bd279..a16ca1fb8efa 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/MisExpect.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/MisExpect.cpp
@@ -25,6 +25,7 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/FormatVariadic.h"
#include <cstdint>
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/ModuleUtils.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/ModuleUtils.cpp
index 1ef3757017a8..b94f57e4dc2c 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/ModuleUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/ModuleUtils.cpp
@@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/ModuleUtils.h"
+#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
@@ -280,3 +281,31 @@ std::string llvm::getUniqueModuleId(Module *M) {
MD5::stringifyResult(R, Str);
return ("$" + Str).str();
}
+
+void VFABI::setVectorVariantNames(
+ CallInst *CI, const SmallVector<std::string, 8> &VariantMappings) {
+ if (VariantMappings.empty())
+ return;
+
+ SmallString<256> Buffer;
+ llvm::raw_svector_ostream Out(Buffer);
+ for (const std::string &VariantMapping : VariantMappings)
+ Out << VariantMapping << ",";
+ // Get rid of the trailing ','.
+ assert(!Buffer.str().empty() && "Must have at least one char.");
+ Buffer.pop_back();
+
+ Module *M = CI->getModule();
+#ifndef NDEBUG
+ for (const std::string &VariantMapping : VariantMappings) {
+ Optional<VFInfo> VI = VFABI::tryDemangleForVFABI(VariantMapping);
+ assert(VI.hasValue() && "Canno add an invalid VFABI name.");
+ assert(M->getNamedValue(VI.getValue().VectorName) &&
+ "Cannot add variant to attribute: "
+ "vector function declaration is missing.");
+ }
+#endif
+ CI->addAttribute(
+ AttributeList::FunctionIndex,
+ Attribute::get(M->getContext(), MappingsAttrName, Buffer.str()));
+}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/NameAnonGlobals.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/NameAnonGlobals.cpp
index ac8991e9d475..1c5c41abc682 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/NameAnonGlobals.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/NameAnonGlobals.cpp
@@ -12,9 +12,9 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/NameAnonGlobals.h"
-
#include "llvm/ADT/SmallString.h"
#include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/MD5.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/PredicateInfo.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/PredicateInfo.cpp
index 44859eafb9c1..dda2867f44b2 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/PredicateInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/PredicateInfo.cpp
@@ -30,6 +30,7 @@
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PatternMatch.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/DebugCounter.h"
#include "llvm/Support/FormattedStream.h"
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 0530d3a987a5..0ea6e99e6f19 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -24,6 +24,7 @@
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/EHPersonalities.h"
+#include "llvm/Analysis/GuardUtils.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/MemorySSAUpdater.h"
@@ -1403,10 +1404,16 @@ HoistTerminator:
// These values do not agree. Insert a select instruction before NT
// that determines the right value.
SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)];
- if (!SI)
+ if (!SI) {
+ // Propagate fast-math-flags from phi node to its replacement select.
+ IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
+ if (isa<FPMathOperator>(PN))
+ Builder.setFastMathFlags(PN.getFastMathFlags());
+
SI = cast<SelectInst>(
Builder.CreateSelect(BI->getCondition(), BB1V, BB2V,
BB1V->getName() + "." + BB2V->getName(), BI));
+ }
// Make the PHI node use the select for all incoming values for BB1/BB2
for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
@@ -2261,14 +2268,14 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout &DL,
if (!BBI->use_empty())
TranslateMap[&*BBI] = N;
}
- // Insert the new instruction into its new home.
- if (N)
+ if (N) {
+ // Insert the new instruction into its new home.
EdgeBB->getInstList().insert(InsertPt, N);
- // Register the new instruction with the assumption cache if necessary.
- if (auto *II = dyn_cast_or_null<IntrinsicInst>(N))
- if (II->getIntrinsicID() == Intrinsic::assume)
- AC->registerAssumption(II);
+ // Register the new instruction with the assumption cache if necessary.
+ if (AC && match(N, m_Intrinsic<Intrinsic::assume>()))
+ AC->registerAssumption(cast<IntrinsicInst>(N));
+ }
}
// Loop over all of the edges from PredBB to BB, changing them to branch
@@ -2417,7 +2424,12 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
if (IfBlock2)
hoistAllInstructionsInto(DomBlock, InsertPt, IfBlock2);
+ // Propagate fast-math-flags from phi nodes to replacement selects.
+ IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
+ if (isa<FPMathOperator>(PN))
+ Builder.setFastMathFlags(PN->getFastMathFlags());
+
// Change the PHI node into a select instruction.
Value *TrueVal = PN->getIncomingValue(PN->getIncomingBlock(0) == IfFalse);
Value *FalseVal = PN->getIncomingValue(PN->getIncomingBlock(0) == IfTrue);
@@ -3211,6 +3223,47 @@ static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI,
return Changed;
}
+
+/// If the previous block ended with a widenable branch, determine if reusing
+/// the target block is profitable and legal. This will have the effect of
+/// "widening" PBI, but doesn't require us to reason about hosting safety.
+static bool tryWidenCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
+ // TODO: This can be generalized in two important ways:
+ // 1) We can allow phi nodes in IfFalseBB and simply reuse all the input
+ // values from the PBI edge.
+ // 2) We can sink side effecting instructions into BI's fallthrough
+ // successor provided they doesn't contribute to computation of
+ // BI's condition.
+ Value *CondWB, *WC;
+ BasicBlock *IfTrueBB, *IfFalseBB;
+ if (!parseWidenableBranch(PBI, CondWB, WC, IfTrueBB, IfFalseBB) ||
+ IfTrueBB != BI->getParent() || !BI->getParent()->getSinglePredecessor())
+ return false;
+ if (!IfFalseBB->phis().empty())
+ return false; // TODO
+ // Use lambda to lazily compute expensive condition after cheap ones.
+ auto NoSideEffects = [](BasicBlock &BB) {
+ return !llvm::any_of(BB, [](const Instruction &I) {
+ return I.mayWriteToMemory() || I.mayHaveSideEffects();
+ });
+ };
+ if (BI->getSuccessor(1) != IfFalseBB && // no inf looping
+ BI->getSuccessor(1)->getTerminatingDeoptimizeCall() && // profitability
+ NoSideEffects(*BI->getParent())) {
+ BI->getSuccessor(1)->removePredecessor(BI->getParent());
+ BI->setSuccessor(1, IfFalseBB);
+ return true;
+ }
+ if (BI->getSuccessor(0) != IfFalseBB && // no inf looping
+ BI->getSuccessor(0)->getTerminatingDeoptimizeCall() && // profitability
+ NoSideEffects(*BI->getParent())) {
+ BI->getSuccessor(0)->removePredecessor(BI->getParent());
+ BI->setSuccessor(0, IfFalseBB);
+ return true;
+ }
+ return false;
+}
+
/// If we have a conditional branch as a predecessor of another block,
/// this function tries to simplify it. We know
/// that PBI and BI are both conditional branches, and BI is in one of the
@@ -3266,6 +3319,12 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
}
}
+ // If the previous block ended with a widenable branch, determine if reusing
+ // the target block is profitable and legal. This will have the effect of
+ // "widening" PBI, but doesn't require us to reason about hosting safety.
+ if (tryWidenCondBranchToCondBranch(PBI, BI))
+ return true;
+
if (auto *CE = dyn_cast<ConstantExpr>(BI->getCondition()))
if (CE->canTrap())
return false;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
index 0324993a8203..fa3a9d21f3df 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -288,8 +288,9 @@ Value *LibCallSimplifier::emitStrLenMemCpy(Value *Src, Value *Dst, uint64_t Len,
// We have enough information to now generate the memcpy call to do the
// concatenation for us. Make a memcpy to copy the nul byte with align = 1.
- B.CreateMemCpy(CpyDst, 1, Src, 1,
- ConstantInt::get(DL.getIntPtrType(Src->getContext()), Len + 1));
+ B.CreateMemCpy(
+ CpyDst, Align::None(), Src, Align::None(),
+ ConstantInt::get(DL.getIntPtrType(Src->getContext()), Len + 1));
return Dst;
}
@@ -364,8 +365,8 @@ Value *LibCallSimplifier::optimizeStrChr(CallInst *CI, IRBuilder<> &B) {
StringRef Str;
if (!getConstantStringInfo(SrcStr, Str)) {
if (CharC->isZero()) // strchr(p, 0) -> p + strlen(p)
- return B.CreateGEP(B.getInt8Ty(), SrcStr, emitStrLen(SrcStr, B, DL, TLI),
- "strchr");
+ if (Value *StrLen = emitStrLen(SrcStr, B, DL, TLI))
+ return B.CreateGEP(B.getInt8Ty(), SrcStr, StrLen, "strchr");
return nullptr;
}
@@ -561,7 +562,7 @@ Value *LibCallSimplifier::optimizeStrCpy(CallInst *CI, IRBuilder<> &B) {
// We have enough information to now generate the memcpy call to do the
// copy for us. Make a memcpy to copy the nul byte with align = 1.
CallInst *NewCI =
- B.CreateMemCpy(Dst, 1, Src, 1,
+ B.CreateMemCpy(Dst, Align::None(), Src, Align::None(),
ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len));
NewCI->setAttributes(CI->getAttributes());
return Dst;
@@ -589,7 +590,8 @@ Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilder<> &B) {
// We have enough information to now generate the memcpy call to do the
// copy for us. Make a memcpy to copy the nul byte with align = 1.
- CallInst *NewCI = B.CreateMemCpy(Dst, 1, Src, 1, LenV);
+ CallInst *NewCI =
+ B.CreateMemCpy(Dst, Align::None(), Src, Align::None(), LenV);
NewCI->setAttributes(CI->getAttributes());
return DstEnd;
}
@@ -624,7 +626,7 @@ Value *LibCallSimplifier::optimizeStrNCpy(CallInst *CI, IRBuilder<> &B) {
if (SrcLen == 0) {
// strncpy(x, "", y) -> memset(align 1 x, '\0', y)
- CallInst *NewCI = B.CreateMemSet(Dst, B.getInt8('\0'), Size, 1);
+ CallInst *NewCI = B.CreateMemSet(Dst, B.getInt8('\0'), Size, Align::None());
AttrBuilder ArgAttrs(CI->getAttributes().getParamAttributes(0));
NewCI->setAttributes(NewCI->getAttributes().addParamAttributes(
CI->getContext(), 0, ArgAttrs));
@@ -637,7 +639,8 @@ Value *LibCallSimplifier::optimizeStrNCpy(CallInst *CI, IRBuilder<> &B) {
Type *PT = Callee->getFunctionType()->getParamType(0);
// strncpy(x, s, c) -> memcpy(align 1 x, align 1 s, c) [s and c are constant]
- CallInst *NewCI = B.CreateMemCpy(Dst, 1, Src, 1, ConstantInt::get(DL.getIntPtrType(PT), Len));
+ CallInst *NewCI = B.CreateMemCpy(Dst, Align::None(), Src, Align::None(),
+ ConstantInt::get(DL.getIntPtrType(PT), Len));
NewCI->setAttributes(CI->getAttributes());
return Dst;
}
@@ -1113,17 +1116,58 @@ Value *LibCallSimplifier::optimizeMemCpy(CallInst *CI, IRBuilder<> &B) {
return nullptr;
// memcpy(x, y, n) -> llvm.memcpy(align 1 x, align 1 y, n)
- CallInst *NewCI =
- B.CreateMemCpy(CI->getArgOperand(0), 1, CI->getArgOperand(1), 1, Size);
+ CallInst *NewCI = B.CreateMemCpy(CI->getArgOperand(0), Align::None(),
+ CI->getArgOperand(1), Align::None(), Size);
NewCI->setAttributes(CI->getAttributes());
return CI->getArgOperand(0);
}
+Value *LibCallSimplifier::optimizeMemCCpy(CallInst *CI, IRBuilder<> &B) {
+ Value *Dst = CI->getArgOperand(0);
+ Value *Src = CI->getArgOperand(1);
+ ConstantInt *StopChar = dyn_cast<ConstantInt>(CI->getArgOperand(2));
+ ConstantInt *N = dyn_cast<ConstantInt>(CI->getArgOperand(3));
+ StringRef SrcStr;
+ if (CI->use_empty() && Dst == Src)
+ return Dst;
+ // memccpy(d, s, c, 0) -> nullptr
+ if (N) {
+ if (N->isNullValue())
+ return Constant::getNullValue(CI->getType());
+ if (!getConstantStringInfo(Src, SrcStr, /*Offset=*/0,
+ /*TrimAtNul=*/false) ||
+ !StopChar)
+ return nullptr;
+ } else {
+ return nullptr;
+ }
+
+ // Wrap arg 'c' of type int to char
+ size_t Pos = SrcStr.find(StopChar->getSExtValue() & 0xFF);
+ if (Pos == StringRef::npos) {
+ if (N->getZExtValue() <= SrcStr.size()) {
+ B.CreateMemCpy(Dst, Align::None(), Src, Align::None(),
+ CI->getArgOperand(3));
+ return Constant::getNullValue(CI->getType());
+ }
+ return nullptr;
+ }
+
+ Value *NewN =
+ ConstantInt::get(N->getType(), std::min(uint64_t(Pos + 1), N->getZExtValue()));
+ // memccpy -> llvm.memcpy
+ B.CreateMemCpy(Dst, Align::None(), Src, Align::None(), NewN);
+ return Pos + 1 <= N->getZExtValue()
+ ? B.CreateInBoundsGEP(B.getInt8Ty(), Dst, NewN)
+ : Constant::getNullValue(CI->getType());
+}
+
Value *LibCallSimplifier::optimizeMemPCpy(CallInst *CI, IRBuilder<> &B) {
Value *Dst = CI->getArgOperand(0);
Value *N = CI->getArgOperand(2);
// mempcpy(x, y, n) -> llvm.memcpy(align 1 x, align 1 y, n), x + n
- CallInst *NewCI = B.CreateMemCpy(Dst, 1, CI->getArgOperand(1), 1, N);
+ CallInst *NewCI = B.CreateMemCpy(Dst, Align::None(), CI->getArgOperand(1),
+ Align::None(), N);
NewCI->setAttributes(CI->getAttributes());
return B.CreateInBoundsGEP(B.getInt8Ty(), Dst, N);
}
@@ -1135,8 +1179,8 @@ Value *LibCallSimplifier::optimizeMemMove(CallInst *CI, IRBuilder<> &B) {
return nullptr;
// memmove(x, y, n) -> llvm.memmove(align 1 x, align 1 y, n)
- CallInst *NewCI =
- B.CreateMemMove(CI->getArgOperand(0), 1, CI->getArgOperand(1), 1, Size);
+ CallInst *NewCI = B.CreateMemMove(CI->getArgOperand(0), Align::None(),
+ CI->getArgOperand(1), Align::None(), Size);
NewCI->setAttributes(CI->getAttributes());
return CI->getArgOperand(0);
}
@@ -1196,7 +1240,8 @@ Value *LibCallSimplifier::optimizeMemSet(CallInst *CI, IRBuilder<> &B) {
// memset(p, v, n) -> llvm.memset(align 1 p, v, n)
Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false);
- CallInst *NewCI = B.CreateMemSet(CI->getArgOperand(0), Val, Size, 1);
+ CallInst *NewCI =
+ B.CreateMemSet(CI->getArgOperand(0), Val, Size, Align::None());
NewCI->setAttributes(CI->getAttributes());
return CI->getArgOperand(0);
}
@@ -1599,6 +1644,11 @@ Value *LibCallSimplifier::replacePowWithSqrt(CallInst *Pow, IRBuilder<> &B) {
(!ExpoF->isExactlyValue(0.5) && !ExpoF->isExactlyValue(-0.5)))
return nullptr;
+ // Converting pow(X, -0.5) to 1/sqrt(X) may introduce an extra rounding step,
+ // so that requires fast-math-flags (afn or reassoc).
+ if (ExpoF->isNegative() && (!Pow->hasApproxFunc() && !Pow->hasAllowReassoc()))
+ return nullptr;
+
Sqrt = getSqrtCall(Base, Attrs, Pow->doesNotAccessMemory(), Mod, B, TLI);
if (!Sqrt)
return nullptr;
@@ -1696,7 +1746,7 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) {
// TODO: This whole transformation should be backend specific (e.g. some
// backends might prefer libcalls or the limit for the exponent might
// be different) and it should also consider optimizing for size.
- APFloat LimF(ExpoF->getSemantics(), 33.0),
+ APFloat LimF(ExpoF->getSemantics(), 33),
ExpoA(abs(*ExpoF));
if (ExpoA.compare(LimF) == APFloat::cmpLessThan) {
// This transformation applies to integer or integer+0.5 exponents only.
@@ -2426,9 +2476,11 @@ Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI, IRBuilder<> &B) {
return nullptr; // we found a format specifier, bail out.
// sprintf(str, fmt) -> llvm.memcpy(align 1 str, align 1 fmt, strlen(fmt)+1)
- B.CreateMemCpy(CI->getArgOperand(0), 1, CI->getArgOperand(1), 1,
- ConstantInt::get(DL.getIntPtrType(CI->getContext()),
- FormatStr.size() + 1)); // Copy the null byte.
+ B.CreateMemCpy(
+ CI->getArgOperand(0), Align::None(), CI->getArgOperand(1),
+ Align::None(),
+ ConstantInt::get(DL.getIntPtrType(CI->getContext()),
+ FormatStr.size() + 1)); // Copy the null byte.
return ConstantInt::get(CI->getType(), FormatStr.size());
}
@@ -2463,7 +2515,8 @@ Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI, IRBuilder<> &B) {
return nullptr;
Value *IncLen =
B.CreateAdd(Len, ConstantInt::get(Len->getType(), 1), "leninc");
- B.CreateMemCpy(CI->getArgOperand(0), 1, CI->getArgOperand(2), 1, IncLen);
+ B.CreateMemCpy(CI->getArgOperand(0), Align::None(), CI->getArgOperand(2),
+ Align::None(), IncLen);
// The sprintf result is the unincremented number of bytes in the string.
return B.CreateIntCast(Len, CI->getType(), false);
@@ -2534,7 +2587,8 @@ Value *LibCallSimplifier::optimizeSnPrintFString(CallInst *CI, IRBuilder<> &B) {
// snprintf(dst, size, fmt) -> llvm.memcpy(align 1 dst, align 1 fmt,
// strlen(fmt)+1)
B.CreateMemCpy(
- CI->getArgOperand(0), 1, CI->getArgOperand(2), 1,
+ CI->getArgOperand(0), Align::None(), CI->getArgOperand(2),
+ Align::None(),
ConstantInt::get(DL.getIntPtrType(CI->getContext()),
FormatStr.size() + 1)); // Copy the null byte.
return ConstantInt::get(CI->getType(), FormatStr.size());
@@ -2575,7 +2629,8 @@ Value *LibCallSimplifier::optimizeSnPrintFString(CallInst *CI, IRBuilder<> &B) {
else if (N < Str.size() + 1)
return nullptr;
- B.CreateMemCpy(CI->getArgOperand(0), 1, CI->getArgOperand(3), 1,
+ B.CreateMemCpy(CI->getArgOperand(0), Align::None(), CI->getArgOperand(3),
+ Align::None(),
ConstantInt::get(CI->getType(), Str.size() + 1));
// The snprintf result is the unincremented number of bytes in the string.
@@ -2716,7 +2771,8 @@ Value *LibCallSimplifier::optimizeFPuts(CallInst *CI, IRBuilder<> &B) {
// Don't rewrite fputs to fwrite when optimising for size because fwrite
// requires more arguments and thus extra MOVs are required.
bool OptForSize = CI->getFunction()->hasOptSize() ||
- llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI);
+ llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI,
+ PGSOQueryType::IRPass);
if (OptForSize)
return nullptr;
@@ -2792,7 +2848,8 @@ Value *LibCallSimplifier::optimizePuts(CallInst *CI, IRBuilder<> &B) {
Value *LibCallSimplifier::optimizeBCopy(CallInst *CI, IRBuilder<> &B) {
// bcopy(src, dst, n) -> llvm.memmove(dst, src, n)
- return B.CreateMemMove(CI->getArgOperand(1), 1, CI->getArgOperand(0), 1,
+ return B.CreateMemMove(CI->getArgOperand(1), Align::None(),
+ CI->getArgOperand(0), Align::None(),
CI->getArgOperand(2));
}
@@ -2864,6 +2921,8 @@ Value *LibCallSimplifier::optimizeStringMemoryLibCall(CallInst *CI,
return optimizeMemCmp(CI, Builder);
case LibFunc_memcpy:
return optimizeMemCpy(CI, Builder);
+ case LibFunc_memccpy:
+ return optimizeMemCCpy(CI, Builder);
case LibFunc_mempcpy:
return optimizeMemPCpy(CI, Builder);
case LibFunc_memmove:
@@ -3223,8 +3282,9 @@ FortifiedLibCallSimplifier::isFortifiedCallFoldable(CallInst *CI,
Value *FortifiedLibCallSimplifier::optimizeMemCpyChk(CallInst *CI,
IRBuilder<> &B) {
if (isFortifiedCallFoldable(CI, 3, 2)) {
- CallInst *NewCI = B.CreateMemCpy(
- CI->getArgOperand(0), 1, CI->getArgOperand(1), 1, CI->getArgOperand(2));
+ CallInst *NewCI = B.CreateMemCpy(CI->getArgOperand(0), Align::None(),
+ CI->getArgOperand(1), Align::None(),
+ CI->getArgOperand(2));
NewCI->setAttributes(CI->getAttributes());
return CI->getArgOperand(0);
}
@@ -3234,8 +3294,9 @@ Value *FortifiedLibCallSimplifier::optimizeMemCpyChk(CallInst *CI,
Value *FortifiedLibCallSimplifier::optimizeMemMoveChk(CallInst *CI,
IRBuilder<> &B) {
if (isFortifiedCallFoldable(CI, 3, 2)) {
- CallInst *NewCI = B.CreateMemMove(
- CI->getArgOperand(0), 1, CI->getArgOperand(1), 1, CI->getArgOperand(2));
+ CallInst *NewCI = B.CreateMemMove(CI->getArgOperand(0), Align::None(),
+ CI->getArgOperand(1), Align::None(),
+ CI->getArgOperand(2));
NewCI->setAttributes(CI->getAttributes());
return CI->getArgOperand(0);
}
@@ -3248,8 +3309,8 @@ Value *FortifiedLibCallSimplifier::optimizeMemSetChk(CallInst *CI,
if (isFortifiedCallFoldable(CI, 3, 2)) {
Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false);
- CallInst *NewCI =
- B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1);
+ CallInst *NewCI = B.CreateMemSet(CI->getArgOperand(0), Val,
+ CI->getArgOperand(2), Align::None());
NewCI->setAttributes(CI->getAttributes());
return CI->getArgOperand(0);
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/SizeOpts.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/SizeOpts.cpp
index 1519751197d2..d2a400027d4b 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/SizeOpts.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/SizeOpts.cpp
@@ -10,28 +10,80 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Analysis/BlockFrequencyInfo.h"
-#include "llvm/Analysis/ProfileSummaryInfo.h"
-#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/Utils/SizeOpts.h"
+
using namespace llvm;
-static cl::opt<bool> ProfileGuidedSizeOpt(
+cl::opt<bool> EnablePGSO(
"pgso", cl::Hidden, cl::init(true),
- cl::desc("Enable the profile guided size optimization. "));
-
-bool llvm::shouldOptimizeForSize(Function *F, ProfileSummaryInfo *PSI,
- BlockFrequencyInfo *BFI) {
- assert(F);
- if (!PSI || !BFI || !PSI->hasProfileSummary())
- return false;
- return ProfileGuidedSizeOpt && PSI->isFunctionColdInCallGraph(F, *BFI);
+ cl::desc("Enable the profile guided size optimizations. "));
+
+cl::opt<bool> PGSOLargeWorkingSetSizeOnly(
+ "pgso-lwss-only", cl::Hidden, cl::init(true),
+ cl::desc("Apply the profile guided size optimizations only "
+ "if the working set size is large (except for cold code.)"));
+
+cl::opt<bool> PGSOColdCodeOnly(
+ "pgso-cold-code-only", cl::Hidden, cl::init(true),
+ cl::desc("Apply the profile guided size optimizations only "
+ "to cold code."));
+
+cl::opt<bool> PGSOIRPassOrTestOnly(
+ "pgso-ir-pass-or-test-only", cl::Hidden, cl::init(false),
+ cl::desc("Apply the profile guided size optimizations only"
+ "to the IR passes or tests."));
+
+cl::opt<bool> ForcePGSO(
+ "force-pgso", cl::Hidden, cl::init(false),
+ cl::desc("Force the (profiled-guided) size optimizations. "));
+
+cl::opt<int> PgsoCutoffInstrProf(
+ "pgso-cutoff-instr-prof", cl::Hidden, cl::init(250000), cl::ZeroOrMore,
+ cl::desc("The profile guided size optimization profile summary cutoff "
+ "for instrumentation profile."));
+
+cl::opt<int> PgsoCutoffSampleProf(
+ "pgso-cutoff-sample-prof", cl::Hidden, cl::init(800000), cl::ZeroOrMore,
+ cl::desc("The profile guided size optimization profile summary cutoff "
+ "for sample profile."));
+
+namespace {
+struct BasicBlockBFIAdapter {
+ static bool isFunctionColdInCallGraph(const Function *F,
+ ProfileSummaryInfo *PSI,
+ BlockFrequencyInfo &BFI) {
+ return PSI->isFunctionColdInCallGraph(F, BFI);
+ }
+ static bool isFunctionHotInCallGraphNthPercentile(int CutOff,
+ const Function *F,
+ ProfileSummaryInfo *PSI,
+ BlockFrequencyInfo &BFI) {
+ return PSI->isFunctionHotInCallGraphNthPercentile(CutOff, F, BFI);
+ }
+ static bool isColdBlock(const BasicBlock *BB,
+ ProfileSummaryInfo *PSI,
+ BlockFrequencyInfo *BFI) {
+ return PSI->isColdBlock(BB, BFI);
+ }
+ static bool isHotBlockNthPercentile(int CutOff,
+ const BasicBlock *BB,
+ ProfileSummaryInfo *PSI,
+ BlockFrequencyInfo *BFI) {
+ return PSI->isHotBlockNthPercentile(CutOff, BB, BFI);
+ }
+};
+} // end anonymous namespace
+
+bool llvm::shouldOptimizeForSize(const Function *F, ProfileSummaryInfo *PSI,
+ BlockFrequencyInfo *BFI,
+ PGSOQueryType QueryType) {
+ return shouldFuncOptimizeForSizeImpl<BasicBlockBFIAdapter>(F, PSI, BFI,
+ QueryType);
}
-bool llvm::shouldOptimizeForSize(BasicBlock *BB, ProfileSummaryInfo *PSI,
- BlockFrequencyInfo *BFI) {
- assert(BB);
- if (!PSI || !BFI || !PSI->hasProfileSummary())
- return false;
- return ProfileGuidedSizeOpt && PSI->isColdBlock(BB, BFI);
+bool llvm::shouldOptimizeForSize(const BasicBlock *BB, ProfileSummaryInfo *PSI,
+ BlockFrequencyInfo *BFI,
+ PGSOQueryType QueryType) {
+ return shouldOptimizeForSizeImpl<BasicBlockBFIAdapter>(BB, PSI, BFI,
+ QueryType);
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/StripGCRelocates.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/StripGCRelocates.cpp
index 50844cf9d1c5..7880ea1c6c47 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/StripGCRelocates.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/StripGCRelocates.cpp
@@ -18,6 +18,7 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Statepoint.h"
#include "llvm/IR/Type.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp
index 97a4533fabe5..21cbbfb140b6 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/IR/DebugInfo.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Transforms/Utils.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/SymbolRewriter.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/SymbolRewriter.cpp
index 5d380dcf231c..aacf81d83519 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/SymbolRewriter.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/SymbolRewriter.cpp
@@ -69,6 +69,7 @@
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
index 7f7bdf8a3d6d..9af39d9a0dd1 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
@@ -18,10 +18,16 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Type.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Transforms/Utils.h"
using namespace llvm;
char UnifyFunctionExitNodes::ID = 0;
+
+UnifyFunctionExitNodes::UnifyFunctionExitNodes() : FunctionPass(ID) {
+ initializeUnifyFunctionExitNodesPass(*PassRegistry::getPassRegistry());
+}
+
INITIALIZE_PASS(UnifyFunctionExitNodes, "mergereturn",
"Unify function exit nodes", false, false)
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/Utils.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/Utils.cpp
index 5272ab6e95d5..7769c7493cda 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/Utils.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/Utils.cpp
@@ -39,6 +39,7 @@ void llvm::initializeTransformUtils(PassRegistry &Registry) {
initializeMetaRenamerPass(Registry);
initializeStripGCRelocatesPass(Registry);
initializePredicateInfoPrinterLegacyPassPass(Registry);
+ initializeInjectTLIMappingsLegacyPass(Registry);
}
/// LLVMInitializeTransformUtils - C binding for initializeTransformUtilsPasses.
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
index f44976c723ec..7478daa2a0a5 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
@@ -38,6 +38,7 @@
// could use this pass (with some modifications), but currently it implements
// its own pass to do something similar to what we do here.
+#include "llvm/Transforms/Vectorize/LoadStoreVectorizer.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/MapVector.h"
@@ -52,7 +53,6 @@
#include "llvm/Analysis/OrderedBasicBlock.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/Attributes.h"
@@ -71,14 +71,15 @@
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Vectorize.h"
-#include "llvm/Transforms/Vectorize/LoadStoreVectorizer.h"
#include <algorithm>
#include <cassert>
#include <cstdlib>
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index f43842be5357..3f943f4c0688 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -72,7 +72,7 @@ LoopVectorizeHints::LoopVectorizeHints(const Loop *L,
Interleave("interleave.count", InterleaveOnlyWhenForced, HK_UNROLL),
Force("vectorize.enable", FK_Undefined, HK_FORCE),
IsVectorized("isvectorized", 0, HK_ISVECTORIZED),
- Predicate("vectorize.predicate.enable", 0, HK_PREDICATE), TheLoop(L),
+ Predicate("vectorize.predicate.enable", FK_Undefined, HK_PREDICATE), TheLoop(L),
ORE(ORE) {
// Populate values with existing loop metadata.
getHintsFromMetadata();
@@ -815,6 +815,18 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
}
}
+ // For first order recurrences, we use the previous value (incoming value from
+ // the latch) to check if it dominates all users of the recurrence. Bail out
+ // if we have to sink such an instruction for another recurrence, as the
+ // dominance requirement may not hold after sinking.
+ BasicBlock *LoopLatch = TheLoop->getLoopLatch();
+ if (any_of(FirstOrderRecurrences, [LoopLatch, this](const PHINode *Phi) {
+ Instruction *V =
+ cast<Instruction>(Phi->getIncomingValueForBlock(LoopLatch));
+ return SinkAfter.find(V) != SinkAfter.end();
+ }))
+ return false;
+
// Now we know the widest induction type, check if our found induction
// is the same size. If it's not, unset it here and InnerLoopVectorizer
// will create another.
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
index a5e85f27fabf..c3ca43fcd492 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -201,6 +201,9 @@ class LoopVectorizationPlanner {
/// The profitability analysis.
LoopVectorizationCostModel &CM;
+ /// The interleaved access analysis.
+ InterleavedAccessInfo &IAI;
+
SmallVector<VPlanPtr, 4> VPlans;
/// This class is used to enable the VPlan to invoke a method of ILV. This is
@@ -211,6 +214,8 @@ class LoopVectorizationPlanner {
VPCallbackILV(InnerLoopVectorizer &ILV) : ILV(ILV) {}
Value *getOrCreateVectorValues(Value *V, unsigned Part) override;
+ Value *getOrCreateScalarValue(Value *V,
+ const VPIteration &Instance) override;
};
/// A builder used to construct the current plan.
@@ -223,8 +228,10 @@ public:
LoopVectorizationPlanner(Loop *L, LoopInfo *LI, const TargetLibraryInfo *TLI,
const TargetTransformInfo *TTI,
LoopVectorizationLegality *Legal,
- LoopVectorizationCostModel &CM)
- : OrigLoop(L), LI(LI), TLI(TLI), TTI(TTI), Legal(Legal), CM(CM) {}
+ LoopVectorizationCostModel &CM,
+ InterleavedAccessInfo &IAI)
+ : OrigLoop(L), LI(LI), TLI(TLI), TTI(TTI), Legal(Legal), CM(CM),
+ IAI(IAI) {}
/// Plan how to best vectorize, return the best VF and its cost, or None if
/// vectorization and interleaving should be avoided up front.
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 8f0bf70f873c..684a3098e564 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -58,8 +58,8 @@
#include "VPRecipeBuilder.h"
#include "VPlan.h"
#include "VPlanHCFGBuilder.h"
-#include "VPlanHCFGTransforms.h"
#include "VPlanPredicator.h"
+#include "VPlanTransforms.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
@@ -124,6 +124,7 @@
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/IR/Verifier.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
@@ -149,7 +150,6 @@
#include <string>
#include <tuple>
#include <utility>
-#include <vector>
using namespace llvm;
@@ -200,9 +200,10 @@ static cl::opt<bool> EnableMaskedInterleavedMemAccesses(
"enable-masked-interleaved-mem-accesses", cl::init(false), cl::Hidden,
cl::desc("Enable vectorization on masked interleaved memory accesses in a loop"));
-/// We don't interleave loops with a known constant trip count below this
-/// number.
-static const unsigned TinyTripCountInterleaveThreshold = 128;
+static cl::opt<unsigned> TinyTripCountInterleaveThreshold(
+ "tiny-trip-count-interleave-threshold", cl::init(128), cl::Hidden,
+ cl::desc("We don't interleave loops with a estimated constant trip count "
+ "below this number"));
static cl::opt<unsigned> ForceTargetNumScalarRegs(
"force-target-num-scalar-regs", cl::init(0), cl::Hidden,
@@ -427,6 +428,11 @@ public:
/// new unrolled loop, where UF is the unroll factor.
using VectorParts = SmallVector<Value *, 2>;
+ /// Vectorize a single GetElementPtrInst based on information gathered and
+ /// decisions taken during planning.
+ void widenGEP(GetElementPtrInst *GEP, unsigned UF, unsigned VF,
+ bool IsPtrLoopInvariant, SmallBitVector &IsIndexLoopInvariant);
+
/// Vectorize a single PHINode in a block. This method handles the induction
/// variable canonicalization. It supports both VF = 1 for unrolled loops and
/// arbitrary length vectors.
@@ -476,15 +482,20 @@ public:
/// Construct the vector value of a scalarized value \p V one lane at a time.
void packScalarIntoVectorValue(Value *V, const VPIteration &Instance);
- /// Try to vectorize the interleaved access group that \p Instr belongs to,
- /// optionally masking the vector operations if \p BlockInMask is non-null.
- void vectorizeInterleaveGroup(Instruction *Instr,
- VectorParts *BlockInMask = nullptr);
-
- /// Vectorize Load and Store instructions, optionally masking the vector
- /// operations if \p BlockInMask is non-null.
- void vectorizeMemoryInstruction(Instruction *Instr,
- VectorParts *BlockInMask = nullptr);
+ /// Try to vectorize the interleaved access group that \p Instr belongs to
+ /// with the base address given in \p Addr, optionally masking the vector
+ /// operations if \p BlockInMask is non-null. Use \p State to translate given
+ /// VPValues to IR values in the vectorized loop.
+ void vectorizeInterleaveGroup(Instruction *Instr, VPTransformState &State,
+ VPValue *Addr, VPValue *BlockInMask = nullptr);
+
+ /// Vectorize Load and Store instructions with the base address given in \p
+ /// Addr, optionally masking the vector operations if \p BlockInMask is
+ /// non-null. Use \p State to translate given VPValues to IR values in the
+ /// vectorized loop.
+ void vectorizeMemoryInstruction(Instruction *Instr, VPTransformState &State,
+ VPValue *Addr,
+ VPValue *BlockInMask = nullptr);
/// Set the debug location in the builder using the debug location in
/// the instruction.
@@ -525,6 +536,9 @@ protected:
/// vectorizing this phi node.
void fixReduction(PHINode *Phi);
+ /// Clear NSW/NUW flags from reduction instructions if necessary.
+ void clearReductionWrapFlags(RecurrenceDescriptor &RdxDesc);
+
/// The Loop exit block may have single value PHI nodes with some
/// incoming value. While vectorizing we only handled real values
/// that were defined inside the loop and we should have one value for
@@ -539,10 +553,6 @@ protected:
/// represented as.
void truncateToMinimalBitwidths();
- /// Insert the new loop to the loop hierarchy and pass manager
- /// and update the analysis passes.
- void updateAnalysis();
-
/// Create a broadcast instruction. This method generates a broadcast
/// instruction (shuffle) for loop invariant values and for the induction
/// value. If this is the induction variable then we extend it to N, N+1, ...
@@ -1204,14 +1214,14 @@ public:
/// Returns true if the target machine supports masked scatter operation
/// for the given \p DataType.
- bool isLegalMaskedScatter(Type *DataType) {
- return TTI.isLegalMaskedScatter(DataType);
+ bool isLegalMaskedScatter(Type *DataType, MaybeAlign Alignment) {
+ return TTI.isLegalMaskedScatter(DataType, Alignment);
}
/// Returns true if the target machine supports masked gather operation
/// for the given \p DataType.
- bool isLegalMaskedGather(Type *DataType) {
- return TTI.isLegalMaskedGather(DataType);
+ bool isLegalMaskedGather(Type *DataType, MaybeAlign Alignment) {
+ return TTI.isLegalMaskedGather(DataType, Alignment);
}
/// Returns true if the target machine can represent \p V as a masked gather
@@ -1222,7 +1232,9 @@ public:
if (!LI && !SI)
return false;
auto *Ty = getMemInstValueType(V);
- return (LI && isLegalMaskedGather(Ty)) || (SI && isLegalMaskedScatter(Ty));
+ MaybeAlign Align = getLoadStoreAlignment(V);
+ return (LI && isLegalMaskedGather(Ty, Align)) ||
+ (SI && isLegalMaskedScatter(Ty, Align));
}
/// Returns true if \p I is an instruction that will be scalarized with
@@ -2155,7 +2167,9 @@ static bool useMaskedInterleavedAccesses(const TargetTransformInfo &TTI) {
// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11> ; Interleave R,G,B elements
// store <12 x i32> %interleaved.vec ; Write 4 tuples of R,G,B
void InnerLoopVectorizer::vectorizeInterleaveGroup(Instruction *Instr,
- VectorParts *BlockInMask) {
+ VPTransformState &State,
+ VPValue *Addr,
+ VPValue *BlockInMask) {
const InterleaveGroup<Instruction> *Group =
Cost->getInterleavedAccessGroup(Instr);
assert(Group && "Fail to get an interleaved access group.");
@@ -2165,27 +2179,19 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(Instruction *Instr,
return;
const DataLayout &DL = Instr->getModule()->getDataLayout();
- Value *Ptr = getLoadStorePointerOperand(Instr);
// Prepare for the vector type of the interleaved load/store.
Type *ScalarTy = getMemInstValueType(Instr);
unsigned InterleaveFactor = Group->getFactor();
Type *VecTy = VectorType::get(ScalarTy, InterleaveFactor * VF);
- Type *PtrTy = VecTy->getPointerTo(getLoadStoreAddressSpace(Instr));
// Prepare for the new pointers.
- setDebugLocFromInst(Builder, Ptr);
- SmallVector<Value *, 2> NewPtrs;
+ SmallVector<Value *, 2> AddrParts;
unsigned Index = Group->getIndex(Instr);
- VectorParts Mask;
- bool IsMaskForCondRequired = BlockInMask;
- if (IsMaskForCondRequired) {
- Mask = *BlockInMask;
- // TODO: extend the masked interleaved-group support to reversed access.
- assert(!Group->isReverse() && "Reversed masked interleave-group "
- "not supported.");
- }
+ // TODO: extend the masked interleaved-group support to reversed access.
+ assert((!BlockInMask || !Group->isReverse()) &&
+ "Reversed masked interleave-group not supported.");
// If the group is reverse, adjust the index to refer to the last vector lane
// instead of the first. We adjust the index from the first vector lane,
@@ -2196,12 +2202,9 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(Instruction *Instr,
if (Group->isReverse())
Index += (VF - 1) * Group->getFactor();
- bool InBounds = false;
- if (auto *gep = dyn_cast<GetElementPtrInst>(Ptr->stripPointerCasts()))
- InBounds = gep->isInBounds();
-
for (unsigned Part = 0; Part < UF; Part++) {
- Value *NewPtr = getOrCreateScalarValue(Ptr, {Part, 0});
+ Value *AddrPart = State.get(Addr, {Part, 0});
+ setDebugLocFromInst(Builder, AddrPart);
// Notice current instruction could be any index. Need to adjust the address
// to the member of index 0.
@@ -2214,12 +2217,17 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(Instruction *Instr,
// A[i] = b; // Member of index 0
// A[i+2] = c; // Member of index 2 (Current instruction)
// Current pointer is pointed to A[i+2], adjust it to A[i].
- NewPtr = Builder.CreateGEP(ScalarTy, NewPtr, Builder.getInt32(-Index));
- if (InBounds)
- cast<GetElementPtrInst>(NewPtr)->setIsInBounds(true);
+
+ bool InBounds = false;
+ if (auto *gep = dyn_cast<GetElementPtrInst>(AddrPart->stripPointerCasts()))
+ InBounds = gep->isInBounds();
+ AddrPart = Builder.CreateGEP(ScalarTy, AddrPart, Builder.getInt32(-Index));
+ cast<GetElementPtrInst>(AddrPart)->setIsInBounds(InBounds);
// Cast to the vector pointer type.
- NewPtrs.push_back(Builder.CreateBitCast(NewPtr, PtrTy));
+ unsigned AddressSpace = AddrPart->getType()->getPointerAddressSpace();
+ Type *PtrTy = VecTy->getPointerTo(AddressSpace);
+ AddrParts.push_back(Builder.CreateBitCast(AddrPart, PtrTy));
}
setDebugLocFromInst(Builder, Instr);
@@ -2237,26 +2245,27 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(Instruction *Instr,
SmallVector<Value *, 2> NewLoads;
for (unsigned Part = 0; Part < UF; Part++) {
Instruction *NewLoad;
- if (IsMaskForCondRequired || MaskForGaps) {
+ if (BlockInMask || MaskForGaps) {
assert(useMaskedInterleavedAccesses(*TTI) &&
"masked interleaved groups are not allowed.");
Value *GroupMask = MaskForGaps;
- if (IsMaskForCondRequired) {
- auto *Undefs = UndefValue::get(Mask[Part]->getType());
+ if (BlockInMask) {
+ Value *BlockInMaskPart = State.get(BlockInMask, Part);
+ auto *Undefs = UndefValue::get(BlockInMaskPart->getType());
auto *RepMask = createReplicatedMask(Builder, InterleaveFactor, VF);
Value *ShuffledMask = Builder.CreateShuffleVector(
- Mask[Part], Undefs, RepMask, "interleaved.mask");
+ BlockInMaskPart, Undefs, RepMask, "interleaved.mask");
GroupMask = MaskForGaps
? Builder.CreateBinOp(Instruction::And, ShuffledMask,
MaskForGaps)
: ShuffledMask;
}
NewLoad =
- Builder.CreateMaskedLoad(NewPtrs[Part], Group->getAlignment(),
+ Builder.CreateMaskedLoad(AddrParts[Part], Group->getAlignment(),
GroupMask, UndefVec, "wide.masked.vec");
}
else
- NewLoad = Builder.CreateAlignedLoad(VecTy, NewPtrs[Part],
+ NewLoad = Builder.CreateAlignedLoad(VecTy, AddrParts[Part],
Group->getAlignment(), "wide.vec");
Group->addMetadata(NewLoad);
NewLoads.push_back(NewLoad);
@@ -2325,24 +2334,27 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(Instruction *Instr,
"interleaved.vec");
Instruction *NewStoreInstr;
- if (IsMaskForCondRequired) {
- auto *Undefs = UndefValue::get(Mask[Part]->getType());
+ if (BlockInMask) {
+ Value *BlockInMaskPart = State.get(BlockInMask, Part);
+ auto *Undefs = UndefValue::get(BlockInMaskPart->getType());
auto *RepMask = createReplicatedMask(Builder, InterleaveFactor, VF);
Value *ShuffledMask = Builder.CreateShuffleVector(
- Mask[Part], Undefs, RepMask, "interleaved.mask");
+ BlockInMaskPart, Undefs, RepMask, "interleaved.mask");
NewStoreInstr = Builder.CreateMaskedStore(
- IVec, NewPtrs[Part], Group->getAlignment(), ShuffledMask);
+ IVec, AddrParts[Part], Group->getAlignment(), ShuffledMask);
}
else
- NewStoreInstr = Builder.CreateAlignedStore(IVec, NewPtrs[Part],
- Group->getAlignment());
+ NewStoreInstr = Builder.CreateAlignedStore(IVec, AddrParts[Part],
+ Group->getAlignment());
Group->addMetadata(NewStoreInstr);
}
}
void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr,
- VectorParts *BlockInMask) {
+ VPTransformState &State,
+ VPValue *Addr,
+ VPValue *BlockInMask) {
// Attempt to issue a wide load.
LoadInst *LI = dyn_cast<LoadInst>(Instr);
StoreInst *SI = dyn_cast<StoreInst>(Instr);
@@ -2354,17 +2366,15 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr,
assert(Decision != LoopVectorizationCostModel::CM_Unknown &&
"CM decision should be taken at this point");
if (Decision == LoopVectorizationCostModel::CM_Interleave)
- return vectorizeInterleaveGroup(Instr);
+ return vectorizeInterleaveGroup(Instr, State, Addr, BlockInMask);
Type *ScalarDataTy = getMemInstValueType(Instr);
Type *DataTy = VectorType::get(ScalarDataTy, VF);
- Value *Ptr = getLoadStorePointerOperand(Instr);
// An alignment of 0 means target abi alignment. We need to use the scalar's
// target abi alignment in such a case.
const DataLayout &DL = Instr->getModule()->getDataLayout();
const Align Alignment =
DL.getValueOrABITypeAlignment(getLoadStoreAlignment(Instr), ScalarDataTy);
- unsigned AddressSpace = getLoadStoreAddressSpace(Instr);
// Determine if the pointer operand of the access is either consecutive or
// reverse consecutive.
@@ -2378,25 +2388,22 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr,
// gather/scatter. Otherwise Decision should have been to Scalarize.
assert((ConsecutiveStride || CreateGatherScatter) &&
"The instruction should be scalarized");
+ (void)ConsecutiveStride;
- // Handle consecutive loads/stores.
- if (ConsecutiveStride)
- Ptr = getOrCreateScalarValue(Ptr, {0, 0});
-
- VectorParts Mask;
+ VectorParts BlockInMaskParts(UF);
bool isMaskRequired = BlockInMask;
if (isMaskRequired)
- Mask = *BlockInMask;
-
- bool InBounds = false;
- if (auto *gep = dyn_cast<GetElementPtrInst>(
- getLoadStorePointerOperand(Instr)->stripPointerCasts()))
- InBounds = gep->isInBounds();
+ for (unsigned Part = 0; Part < UF; ++Part)
+ BlockInMaskParts[Part] = State.get(BlockInMask, Part);
const auto CreateVecPtr = [&](unsigned Part, Value *Ptr) -> Value * {
// Calculate the pointer for the specific unroll-part.
GetElementPtrInst *PartPtr = nullptr;
+ bool InBounds = false;
+ if (auto *gep = dyn_cast<GetElementPtrInst>(Ptr->stripPointerCasts()))
+ InBounds = gep->isInBounds();
+
if (Reverse) {
// If the address is consecutive but reversed, then the
// wide store needs to start at the last vector element.
@@ -2407,13 +2414,14 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr,
Builder.CreateGEP(ScalarDataTy, PartPtr, Builder.getInt32(1 - VF)));
PartPtr->setIsInBounds(InBounds);
if (isMaskRequired) // Reverse of a null all-one mask is a null mask.
- Mask[Part] = reverseVector(Mask[Part]);
+ BlockInMaskParts[Part] = reverseVector(BlockInMaskParts[Part]);
} else {
PartPtr = cast<GetElementPtrInst>(
Builder.CreateGEP(ScalarDataTy, Ptr, Builder.getInt32(Part * VF)));
PartPtr->setIsInBounds(InBounds);
}
+ unsigned AddressSpace = Ptr->getType()->getPointerAddressSpace();
return Builder.CreateBitCast(PartPtr, DataTy->getPointerTo(AddressSpace));
};
@@ -2425,8 +2433,8 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr,
Instruction *NewSI = nullptr;
Value *StoredVal = getOrCreateVectorValue(SI->getValueOperand(), Part);
if (CreateGatherScatter) {
- Value *MaskPart = isMaskRequired ? Mask[Part] : nullptr;
- Value *VectorGep = getOrCreateVectorValue(Ptr, Part);
+ Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr;
+ Value *VectorGep = State.get(Addr, Part);
NewSI = Builder.CreateMaskedScatter(StoredVal, VectorGep,
Alignment.value(), MaskPart);
} else {
@@ -2437,10 +2445,10 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr,
// We don't want to update the value in the map as it might be used in
// another expression. So don't call resetVectorValue(StoredVal).
}
- auto *VecPtr = CreateVecPtr(Part, Ptr);
+ auto *VecPtr = CreateVecPtr(Part, State.get(Addr, {0, 0}));
if (isMaskRequired)
- NewSI = Builder.CreateMaskedStore(StoredVal, VecPtr,
- Alignment.value(), Mask[Part]);
+ NewSI = Builder.CreateMaskedStore(
+ StoredVal, VecPtr, Alignment.value(), BlockInMaskParts[Part]);
else
NewSI =
Builder.CreateAlignedStore(StoredVal, VecPtr, Alignment.value());
@@ -2456,17 +2464,17 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr,
for (unsigned Part = 0; Part < UF; ++Part) {
Value *NewLI;
if (CreateGatherScatter) {
- Value *MaskPart = isMaskRequired ? Mask[Part] : nullptr;
- Value *VectorGep = getOrCreateVectorValue(Ptr, Part);
+ Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr;
+ Value *VectorGep = State.get(Addr, Part);
NewLI = Builder.CreateMaskedGather(VectorGep, Alignment.value(), MaskPart,
nullptr, "wide.masked.gather");
addMetadata(NewLI, LI);
} else {
- auto *VecPtr = CreateVecPtr(Part, Ptr);
+ auto *VecPtr = CreateVecPtr(Part, State.get(Addr, {0, 0}));
if (isMaskRequired)
- NewLI = Builder.CreateMaskedLoad(VecPtr, Alignment.value(), Mask[Part],
- UndefValue::get(DataTy),
- "wide.masked.load");
+ NewLI = Builder.CreateMaskedLoad(
+ VecPtr, Alignment.value(), BlockInMaskParts[Part],
+ UndefValue::get(DataTy), "wide.masked.load");
else
NewLI = Builder.CreateAlignedLoad(DataTy, VecPtr, Alignment.value(),
"wide.load");
@@ -2676,8 +2684,10 @@ Value *InnerLoopVectorizer::createBitOrPointerCast(Value *V, VectorType *DstVTy,
void InnerLoopVectorizer::emitMinimumIterationCountCheck(Loop *L,
BasicBlock *Bypass) {
Value *Count = getOrCreateTripCount(L);
- BasicBlock *BB = L->getLoopPreheader();
- IRBuilder<> Builder(BB->getTerminator());
+ // Reuse existing vector loop preheader for TC checks.
+ // Note that new preheader block is generated for vector loop.
+ BasicBlock *const TCCheckBlock = LoopVectorPreHeader;
+ IRBuilder<> Builder(TCCheckBlock->getTerminator());
// Generate code to check if the loop's trip count is less than VF * UF, or
// equal to it in case a scalar epilogue is required; this implies that the
@@ -2694,48 +2704,61 @@ void InnerLoopVectorizer::emitMinimumIterationCountCheck(Loop *L,
P, Count, ConstantInt::get(Count->getType(), VF * UF),
"min.iters.check");
- BasicBlock *NewBB = BB->splitBasicBlock(BB->getTerminator(), "vector.ph");
- // Update dominator tree immediately if the generated block is a
- // LoopBypassBlock because SCEV expansions to generate loop bypass
- // checks may query it before the current function is finished.
- DT->addNewBlock(NewBB, BB);
- if (L->getParentLoop())
- L->getParentLoop()->addBasicBlockToLoop(NewBB, *LI);
- ReplaceInstWithInst(BB->getTerminator(),
- BranchInst::Create(Bypass, NewBB, CheckMinIters));
- LoopBypassBlocks.push_back(BB);
+ // Create new preheader for vector loop.
+ LoopVectorPreHeader =
+ SplitBlock(TCCheckBlock, TCCheckBlock->getTerminator(), DT, LI, nullptr,
+ "vector.ph");
+
+ assert(DT->properlyDominates(DT->getNode(TCCheckBlock),
+ DT->getNode(Bypass)->getIDom()) &&
+ "TC check is expected to dominate Bypass");
+
+ // Update dominator for Bypass & LoopExit.
+ DT->changeImmediateDominator(Bypass, TCCheckBlock);
+ DT->changeImmediateDominator(LoopExitBlock, TCCheckBlock);
+
+ ReplaceInstWithInst(
+ TCCheckBlock->getTerminator(),
+ BranchInst::Create(Bypass, LoopVectorPreHeader, CheckMinIters));
+ LoopBypassBlocks.push_back(TCCheckBlock);
}
void InnerLoopVectorizer::emitSCEVChecks(Loop *L, BasicBlock *Bypass) {
- BasicBlock *BB = L->getLoopPreheader();
+ // Reuse existing vector loop preheader for SCEV checks.
+ // Note that new preheader block is generated for vector loop.
+ BasicBlock *const SCEVCheckBlock = LoopVectorPreHeader;
// Generate the code to check that the SCEV assumptions that we made.
// We want the new basic block to start at the first instruction in a
// sequence of instructions that form a check.
SCEVExpander Exp(*PSE.getSE(), Bypass->getModule()->getDataLayout(),
"scev.check");
- Value *SCEVCheck =
- Exp.expandCodeForPredicate(&PSE.getUnionPredicate(), BB->getTerminator());
+ Value *SCEVCheck = Exp.expandCodeForPredicate(
+ &PSE.getUnionPredicate(), SCEVCheckBlock->getTerminator());
if (auto *C = dyn_cast<ConstantInt>(SCEVCheck))
if (C->isZero())
return;
- assert(!BB->getParent()->hasOptSize() &&
+ assert(!SCEVCheckBlock->getParent()->hasOptSize() &&
"Cannot SCEV check stride or overflow when optimizing for size");
- // Create a new block containing the stride check.
- BB->setName("vector.scevcheck");
- auto *NewBB = BB->splitBasicBlock(BB->getTerminator(), "vector.ph");
- // Update dominator tree immediately if the generated block is a
- // LoopBypassBlock because SCEV expansions to generate loop bypass
- // checks may query it before the current function is finished.
- DT->addNewBlock(NewBB, BB);
- if (L->getParentLoop())
- L->getParentLoop()->addBasicBlockToLoop(NewBB, *LI);
- ReplaceInstWithInst(BB->getTerminator(),
- BranchInst::Create(Bypass, NewBB, SCEVCheck));
- LoopBypassBlocks.push_back(BB);
+ SCEVCheckBlock->setName("vector.scevcheck");
+ // Create new preheader for vector loop.
+ LoopVectorPreHeader =
+ SplitBlock(SCEVCheckBlock, SCEVCheckBlock->getTerminator(), DT, LI,
+ nullptr, "vector.ph");
+
+ // Update dominator only if this is first RT check.
+ if (LoopBypassBlocks.empty()) {
+ DT->changeImmediateDominator(Bypass, SCEVCheckBlock);
+ DT->changeImmediateDominator(LoopExitBlock, SCEVCheckBlock);
+ }
+
+ ReplaceInstWithInst(
+ SCEVCheckBlock->getTerminator(),
+ BranchInst::Create(Bypass, LoopVectorPreHeader, SCEVCheck));
+ LoopBypassBlocks.push_back(SCEVCheckBlock);
AddedSafetyChecks = true;
}
@@ -2744,7 +2767,9 @@ void InnerLoopVectorizer::emitMemRuntimeChecks(Loop *L, BasicBlock *Bypass) {
if (EnableVPlanNativePath)
return;
- BasicBlock *BB = L->getLoopPreheader();
+ // Reuse existing vector loop preheader for runtime memory checks.
+ // Note that new preheader block is generated for vector loop.
+ BasicBlock *const MemCheckBlock = L->getLoopPreheader();
// Generate the code that checks in runtime if arrays overlap. We put the
// checks into a separate block to make the more common case of few elements
@@ -2752,11 +2777,11 @@ void InnerLoopVectorizer::emitMemRuntimeChecks(Loop *L, BasicBlock *Bypass) {
Instruction *FirstCheckInst;
Instruction *MemRuntimeCheck;
std::tie(FirstCheckInst, MemRuntimeCheck) =
- Legal->getLAI()->addRuntimeChecks(BB->getTerminator());
+ Legal->getLAI()->addRuntimeChecks(MemCheckBlock->getTerminator());
if (!MemRuntimeCheck)
return;
- if (BB->getParent()->hasOptSize()) {
+ if (MemCheckBlock->getParent()->hasOptSize()) {
assert(Cost->Hints->getForce() == LoopVectorizeHints::FK_Enabled &&
"Cannot emit memory checks when optimizing for size, unless forced "
"to vectorize.");
@@ -2770,24 +2795,28 @@ void InnerLoopVectorizer::emitMemRuntimeChecks(Loop *L, BasicBlock *Bypass) {
});
}
- // Create a new block containing the memory check.
- BB->setName("vector.memcheck");
- auto *NewBB = BB->splitBasicBlock(BB->getTerminator(), "vector.ph");
- // Update dominator tree immediately if the generated block is a
- // LoopBypassBlock because SCEV expansions to generate loop bypass
- // checks may query it before the current function is finished.
- DT->addNewBlock(NewBB, BB);
- if (L->getParentLoop())
- L->getParentLoop()->addBasicBlockToLoop(NewBB, *LI);
- ReplaceInstWithInst(BB->getTerminator(),
- BranchInst::Create(Bypass, NewBB, MemRuntimeCheck));
- LoopBypassBlocks.push_back(BB);
+ MemCheckBlock->setName("vector.memcheck");
+ // Create new preheader for vector loop.
+ LoopVectorPreHeader =
+ SplitBlock(MemCheckBlock, MemCheckBlock->getTerminator(), DT, LI, nullptr,
+ "vector.ph");
+
+ // Update dominator only if this is first RT check.
+ if (LoopBypassBlocks.empty()) {
+ DT->changeImmediateDominator(Bypass, MemCheckBlock);
+ DT->changeImmediateDominator(LoopExitBlock, MemCheckBlock);
+ }
+
+ ReplaceInstWithInst(
+ MemCheckBlock->getTerminator(),
+ BranchInst::Create(Bypass, LoopVectorPreHeader, MemRuntimeCheck));
+ LoopBypassBlocks.push_back(MemCheckBlock);
AddedSafetyChecks = true;
// We currently don't use LoopVersioning for the actual loop cloning but we
// still use it to add the noalias metadata.
LVer = std::make_unique<LoopVersioning>(*Legal->getLAI(), OrigLoop, LI, DT,
- PSE.getSE());
+ PSE.getSE());
LVer->prepareNoAliasMetadata();
}
@@ -2912,12 +2941,7 @@ BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton() {
...
*/
- BasicBlock *OldBasicBlock = OrigLoop->getHeader();
- BasicBlock *VectorPH = OrigLoop->getLoopPreheader();
- BasicBlock *ExitBlock = OrigLoop->getExitBlock();
MDNode *OrigLoopID = OrigLoop->getLoopID();
- assert(VectorPH && "Invalid loop structure");
- assert(ExitBlock && "Must have an exit block");
// Some loops have a single integer induction variable, while other loops
// don't. One example is c++ iterators that often have multiple pointer
@@ -2934,12 +2958,27 @@ BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton() {
Type *IdxTy = Legal->getWidestInductionType();
// Split the single block loop into the two loop structure described above.
- BasicBlock *VecBody =
- VectorPH->splitBasicBlock(VectorPH->getTerminator(), "vector.body");
- BasicBlock *MiddleBlock =
- VecBody->splitBasicBlock(VecBody->getTerminator(), "middle.block");
- BasicBlock *ScalarPH =
- MiddleBlock->splitBasicBlock(MiddleBlock->getTerminator(), "scalar.ph");
+ LoopScalarBody = OrigLoop->getHeader();
+ LoopVectorPreHeader = OrigLoop->getLoopPreheader();
+ LoopExitBlock = OrigLoop->getExitBlock();
+ assert(LoopExitBlock && "Must have an exit block");
+ assert(LoopVectorPreHeader && "Invalid loop structure");
+
+ LoopMiddleBlock =
+ SplitBlock(LoopVectorPreHeader, LoopVectorPreHeader->getTerminator(), DT,
+ LI, nullptr, "middle.block");
+ LoopScalarPreHeader =
+ SplitBlock(LoopMiddleBlock, LoopMiddleBlock->getTerminator(), DT, LI,
+ nullptr, "scalar.ph");
+ // We intentionally don't let SplitBlock to update LoopInfo since
+ // LoopVectorBody should belong to another loop than LoopVectorPreHeader.
+ // LoopVectorBody is explicitly added to the correct place few lines later.
+ LoopVectorBody =
+ SplitBlock(LoopVectorPreHeader, LoopVectorPreHeader->getTerminator(), DT,
+ nullptr, nullptr, "vector.body");
+
+ // Update dominator for loop exit.
+ DT->changeImmediateDominator(LoopExitBlock, LoopMiddleBlock);
// Create and register the new vector loop.
Loop *Lp = LI->AllocateLoop();
@@ -2949,12 +2988,10 @@ BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton() {
// before calling any utilities such as SCEV that require valid LoopInfo.
if (ParentLoop) {
ParentLoop->addChildLoop(Lp);
- ParentLoop->addBasicBlockToLoop(ScalarPH, *LI);
- ParentLoop->addBasicBlockToLoop(MiddleBlock, *LI);
} else {
LI->addTopLevelLoop(Lp);
}
- Lp->addBasicBlockToLoop(VecBody, *LI);
+ Lp->addBasicBlockToLoop(LoopVectorBody, *LI);
// Find the loop boundaries.
Value *Count = getOrCreateTripCount(Lp);
@@ -2966,16 +3003,16 @@ BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton() {
// backedge-taken count is uint##_max: adding one to it will overflow leading
// to an incorrect trip count of zero. In this (rare) case we will also jump
// to the scalar loop.
- emitMinimumIterationCountCheck(Lp, ScalarPH);
+ emitMinimumIterationCountCheck(Lp, LoopScalarPreHeader);
// Generate the code to check any assumptions that we've made for SCEV
// expressions.
- emitSCEVChecks(Lp, ScalarPH);
+ emitSCEVChecks(Lp, LoopScalarPreHeader);
// Generate the code that checks in runtime if arrays overlap. We put the
// checks into a separate block to make the more common case of few elements
// faster.
- emitMemRuntimeChecks(Lp, ScalarPH);
+ emitMemRuntimeChecks(Lp, LoopScalarPreHeader);
// Generate the induction variable.
// The loop step is equal to the vectorization factor (num of SIMD elements)
@@ -3003,8 +3040,9 @@ BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton() {
InductionDescriptor II = InductionEntry.second;
// Create phi nodes to merge from the backedge-taken check block.
- PHINode *BCResumeVal = PHINode::Create(
- OrigPhi->getType(), 3, "bc.resume.val", ScalarPH->getTerminator());
+ PHINode *BCResumeVal =
+ PHINode::Create(OrigPhi->getType(), 3, "bc.resume.val",
+ LoopScalarPreHeader->getTerminator());
// Copy original phi DL over to the new one.
BCResumeVal->setDebugLoc(OrigPhi->getDebugLoc());
Value *&EndValue = IVEndValues[OrigPhi];
@@ -3015,23 +3053,23 @@ BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton() {
IRBuilder<> B(Lp->getLoopPreheader()->getTerminator());
Type *StepType = II.getStep()->getType();
Instruction::CastOps CastOp =
- CastInst::getCastOpcode(CountRoundDown, true, StepType, true);
+ CastInst::getCastOpcode(CountRoundDown, true, StepType, true);
Value *CRD = B.CreateCast(CastOp, CountRoundDown, StepType, "cast.crd");
- const DataLayout &DL = OrigLoop->getHeader()->getModule()->getDataLayout();
+ const DataLayout &DL = LoopScalarBody->getModule()->getDataLayout();
EndValue = emitTransformedIndex(B, CRD, PSE.getSE(), DL, II);
EndValue->setName("ind.end");
}
// The new PHI merges the original incoming value, in case of a bypass,
// or the value at the end of the vectorized loop.
- BCResumeVal->addIncoming(EndValue, MiddleBlock);
+ BCResumeVal->addIncoming(EndValue, LoopMiddleBlock);
// Fix the scalar body counter (PHI node).
// The old induction's phi node in the scalar body needs the truncated
// value.
for (BasicBlock *BB : LoopBypassBlocks)
BCResumeVal->addIncoming(II.getStartValue(), BB);
- OrigPhi->setIncomingValueForBlock(ScalarPH, BCResumeVal);
+ OrigPhi->setIncomingValueForBlock(LoopScalarPreHeader, BCResumeVal);
}
// We need the OrigLoop (scalar loop part) latch terminator to help
@@ -3049,9 +3087,9 @@ BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton() {
// If tail is to be folded, we know we don't need to run the remainder.
Value *CmpN = Builder.getTrue();
if (!Cost->foldTailByMasking()) {
- CmpN =
- CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, Count,
- CountRoundDown, "cmp.n", MiddleBlock->getTerminator());
+ CmpN = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, Count,
+ CountRoundDown, "cmp.n",
+ LoopMiddleBlock->getTerminator());
// Here we use the same DebugLoc as the scalar loop latch branch instead
// of the corresponding compare because they may have ended up with
@@ -3060,20 +3098,15 @@ BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton() {
cast<Instruction>(CmpN)->setDebugLoc(ScalarLatchBr->getDebugLoc());
}
- BranchInst *BrInst = BranchInst::Create(ExitBlock, ScalarPH, CmpN);
+ BranchInst *BrInst =
+ BranchInst::Create(LoopExitBlock, LoopScalarPreHeader, CmpN);
BrInst->setDebugLoc(ScalarLatchBr->getDebugLoc());
- ReplaceInstWithInst(MiddleBlock->getTerminator(), BrInst);
+ ReplaceInstWithInst(LoopMiddleBlock->getTerminator(), BrInst);
// Get ready to start creating new instructions into the vectorized body.
- Builder.SetInsertPoint(&*VecBody->getFirstInsertionPt());
-
- // Save the state.
- LoopVectorPreHeader = Lp->getLoopPreheader();
- LoopScalarPreHeader = ScalarPH;
- LoopMiddleBlock = MiddleBlock;
- LoopExitBlock = ExitBlock;
- LoopVectorBody = VecBody;
- LoopScalarBody = OldBasicBlock;
+ assert(LoopVectorPreHeader == Lp->getLoopPreheader() &&
+ "Inconsistent vector loop preheader");
+ Builder.SetInsertPoint(&*LoopVectorBody->getFirstInsertionPt());
Optional<MDNode *> VectorizedLoopID =
makeFollowupLoopID(OrigLoopID, {LLVMLoopVectorizeFollowupAll,
@@ -3094,6 +3127,11 @@ BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton() {
LoopVectorizeHints Hints(Lp, true, *ORE);
Hints.setAlreadyVectorized();
+#ifdef EXPENSIVE_CHECKS
+ assert(DT->verify(DominatorTree::VerificationLevel::Fast));
+ LI->verify(*DT);
+#endif
+
return LoopVectorPreHeader;
}
@@ -3429,15 +3467,8 @@ void InnerLoopVectorizer::fixVectorizedLoop() {
// This is the second stage of vectorizing recurrences.
fixCrossIterationPHIs();
- // Update the dominator tree.
- //
- // FIXME: After creating the structure of the new loop, the dominator tree is
- // no longer up-to-date, and it remains that way until we update it
- // here. An out-of-date dominator tree is problematic for SCEV,
- // because SCEVExpander uses it to guide code generation. The
- // vectorizer use SCEVExpanders in several places. Instead, we should
- // keep the dominator tree up-to-date as we go.
- updateAnalysis();
+ // Forget the original basic block.
+ PSE.getSE()->forgetLoop(OrigLoop);
// Fix-up external users of the induction variables.
for (auto &Entry : *Legal->getInductionVars())
@@ -3550,17 +3581,27 @@ void InnerLoopVectorizer::fixFirstOrderRecurrence(PHINode *Phi) {
// among all unrolled iterations, due to the order of their construction.
Value *PreviousLastPart = getOrCreateVectorValue(Previous, UF - 1);
- // Set the insertion point after the previous value if it is an instruction.
+ // Find and set the insertion point after the previous value if it is an
+ // instruction.
+ BasicBlock::iterator InsertPt;
// Note that the previous value may have been constant-folded so it is not
- // guaranteed to be an instruction in the vector loop. Also, if the previous
- // value is a phi node, we should insert after all the phi nodes to avoid
- // breaking basic block verification.
- if (LI->getLoopFor(LoopVectorBody)->isLoopInvariant(PreviousLastPart) ||
- isa<PHINode>(PreviousLastPart))
- Builder.SetInsertPoint(&*LoopVectorBody->getFirstInsertionPt());
- else
- Builder.SetInsertPoint(
- &*++BasicBlock::iterator(cast<Instruction>(PreviousLastPart)));
+ // guaranteed to be an instruction in the vector loop.
+ // FIXME: Loop invariant values do not form recurrences. We should deal with
+ // them earlier.
+ if (LI->getLoopFor(LoopVectorBody)->isLoopInvariant(PreviousLastPart))
+ InsertPt = LoopVectorBody->getFirstInsertionPt();
+ else {
+ Instruction *PreviousInst = cast<Instruction>(PreviousLastPart);
+ if (isa<PHINode>(PreviousLastPart))
+ // If the previous value is a phi node, we should insert after all the phi
+ // nodes in the block containing the PHI to avoid breaking basic block
+ // verification. Note that the basic block may be different to
+ // LoopVectorBody, in case we predicate the loop.
+ InsertPt = PreviousInst->getParent()->getFirstInsertionPt();
+ else
+ InsertPt = ++PreviousInst->getIterator();
+ }
+ Builder.SetInsertPoint(&*InsertPt);
// We will construct a vector for the recurrence by combining the values for
// the current and previous iterations. This is the required shuffle mask.
@@ -3693,16 +3734,20 @@ void InnerLoopVectorizer::fixReduction(PHINode *Phi) {
}
}
+ // Wrap flags are in general invalid after vectorization, clear them.
+ clearReductionWrapFlags(RdxDesc);
+
// Fix the vector-loop phi.
// Reductions do not have to start at zero. They can start with
// any loop invariant values.
BasicBlock *Latch = OrigLoop->getLoopLatch();
Value *LoopVal = Phi->getIncomingValueForBlock(Latch);
+
for (unsigned Part = 0; Part < UF; ++Part) {
Value *VecRdxPhi = getOrCreateVectorValue(Phi, Part);
Value *Val = getOrCreateVectorValue(LoopVal, Part);
- // Make sure to add the reduction stat value only to the
+ // Make sure to add the reduction start value only to the
// first unroll part.
Value *StartVal = (Part == 0) ? VectorStart : Identity;
cast<PHINode>(VecRdxPhi)->addIncoming(StartVal, LoopVectorPreHeader);
@@ -3839,6 +3884,37 @@ void InnerLoopVectorizer::fixReduction(PHINode *Phi) {
Phi->setIncomingValue(IncomingEdgeBlockIdx, LoopExitInst);
}
+void InnerLoopVectorizer::clearReductionWrapFlags(
+ RecurrenceDescriptor &RdxDesc) {
+ RecurrenceDescriptor::RecurrenceKind RK = RdxDesc.getRecurrenceKind();
+ if (RK != RecurrenceDescriptor::RK_IntegerAdd &&
+ RK != RecurrenceDescriptor::RK_IntegerMult)
+ return;
+
+ Instruction *LoopExitInstr = RdxDesc.getLoopExitInstr();
+ assert(LoopExitInstr && "null loop exit instruction");
+ SmallVector<Instruction *, 8> Worklist;
+ SmallPtrSet<Instruction *, 8> Visited;
+ Worklist.push_back(LoopExitInstr);
+ Visited.insert(LoopExitInstr);
+
+ while (!Worklist.empty()) {
+ Instruction *Cur = Worklist.pop_back_val();
+ if (isa<OverflowingBinaryOperator>(Cur))
+ for (unsigned Part = 0; Part < UF; ++Part) {
+ Value *V = getOrCreateVectorValue(Cur, Part);
+ cast<Instruction>(V)->dropPoisonGeneratingFlags();
+ }
+
+ for (User *U : Cur->users()) {
+ Instruction *UI = cast<Instruction>(U);
+ if ((Cur != LoopExitInstr || OrigLoop->contains(UI->getParent())) &&
+ Visited.insert(UI).second)
+ Worklist.push_back(UI);
+ }
+ }
+}
+
void InnerLoopVectorizer::fixLCSSAPHIs() {
for (PHINode &LCSSAPhi : LoopExitBlock->phis()) {
if (LCSSAPhi.getNumIncomingValues() == 1) {
@@ -3960,6 +4036,75 @@ void InnerLoopVectorizer::fixNonInductionPHIs() {
}
}
+void InnerLoopVectorizer::widenGEP(GetElementPtrInst *GEP, unsigned UF,
+ unsigned VF, bool IsPtrLoopInvariant,
+ SmallBitVector &IsIndexLoopInvariant) {
+ // Construct a vector GEP by widening the operands of the scalar GEP as
+ // necessary. We mark the vector GEP 'inbounds' if appropriate. A GEP
+ // results in a vector of pointers when at least one operand of the GEP
+ // is vector-typed. Thus, to keep the representation compact, we only use
+ // vector-typed operands for loop-varying values.
+
+ if (VF > 1 && IsPtrLoopInvariant && IsIndexLoopInvariant.all()) {
+ // If we are vectorizing, but the GEP has only loop-invariant operands,
+ // the GEP we build (by only using vector-typed operands for
+ // loop-varying values) would be a scalar pointer. Thus, to ensure we
+ // produce a vector of pointers, we need to either arbitrarily pick an
+ // operand to broadcast, or broadcast a clone of the original GEP.
+ // Here, we broadcast a clone of the original.
+ //
+ // TODO: If at some point we decide to scalarize instructions having
+ // loop-invariant operands, this special case will no longer be
+ // required. We would add the scalarization decision to
+ // collectLoopScalars() and teach getVectorValue() to broadcast
+ // the lane-zero scalar value.
+ auto *Clone = Builder.Insert(GEP->clone());
+ for (unsigned Part = 0; Part < UF; ++Part) {
+ Value *EntryPart = Builder.CreateVectorSplat(VF, Clone);
+ VectorLoopValueMap.setVectorValue(GEP, Part, EntryPart);
+ addMetadata(EntryPart, GEP);
+ }
+ } else {
+ // If the GEP has at least one loop-varying operand, we are sure to
+ // produce a vector of pointers. But if we are only unrolling, we want
+ // to produce a scalar GEP for each unroll part. Thus, the GEP we
+ // produce with the code below will be scalar (if VF == 1) or vector
+ // (otherwise). Note that for the unroll-only case, we still maintain
+ // values in the vector mapping with initVector, as we do for other
+ // instructions.
+ for (unsigned Part = 0; Part < UF; ++Part) {
+ // The pointer operand of the new GEP. If it's loop-invariant, we
+ // won't broadcast it.
+ auto *Ptr = IsPtrLoopInvariant
+ ? GEP->getPointerOperand()
+ : getOrCreateVectorValue(GEP->getPointerOperand(), Part);
+
+ // Collect all the indices for the new GEP. If any index is
+ // loop-invariant, we won't broadcast it.
+ SmallVector<Value *, 4> Indices;
+ for (auto Index : enumerate(GEP->indices())) {
+ Value *User = Index.value().get();
+ if (IsIndexLoopInvariant[Index.index()])
+ Indices.push_back(User);
+ else
+ Indices.push_back(getOrCreateVectorValue(User, Part));
+ }
+
+ // Create the new GEP. Note that this GEP may be a scalar if VF == 1,
+ // but it should be a vector, otherwise.
+ auto *NewGEP =
+ GEP->isInBounds()
+ ? Builder.CreateInBoundsGEP(GEP->getSourceElementType(), Ptr,
+ Indices)
+ : Builder.CreateGEP(GEP->getSourceElementType(), Ptr, Indices);
+ assert((VF == 1 || NewGEP->getType()->isVectorTy()) &&
+ "NewGEP is not a pointer vector");
+ VectorLoopValueMap.setVectorValue(GEP, Part, NewGEP);
+ addMetadata(NewGEP, GEP);
+ }
+ }
+}
+
void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN, unsigned UF,
unsigned VF) {
PHINode *P = cast<PHINode>(PN);
@@ -4062,76 +4207,8 @@ void InnerLoopVectorizer::widenInstruction(Instruction &I) {
switch (I.getOpcode()) {
case Instruction::Br:
case Instruction::PHI:
+ case Instruction::GetElementPtr:
llvm_unreachable("This instruction is handled by a different recipe.");
- case Instruction::GetElementPtr: {
- // Construct a vector GEP by widening the operands of the scalar GEP as
- // necessary. We mark the vector GEP 'inbounds' if appropriate. A GEP
- // results in a vector of pointers when at least one operand of the GEP
- // is vector-typed. Thus, to keep the representation compact, we only use
- // vector-typed operands for loop-varying values.
- auto *GEP = cast<GetElementPtrInst>(&I);
-
- if (VF > 1 && OrigLoop->hasLoopInvariantOperands(GEP)) {
- // If we are vectorizing, but the GEP has only loop-invariant operands,
- // the GEP we build (by only using vector-typed operands for
- // loop-varying values) would be a scalar pointer. Thus, to ensure we
- // produce a vector of pointers, we need to either arbitrarily pick an
- // operand to broadcast, or broadcast a clone of the original GEP.
- // Here, we broadcast a clone of the original.
- //
- // TODO: If at some point we decide to scalarize instructions having
- // loop-invariant operands, this special case will no longer be
- // required. We would add the scalarization decision to
- // collectLoopScalars() and teach getVectorValue() to broadcast
- // the lane-zero scalar value.
- auto *Clone = Builder.Insert(GEP->clone());
- for (unsigned Part = 0; Part < UF; ++Part) {
- Value *EntryPart = Builder.CreateVectorSplat(VF, Clone);
- VectorLoopValueMap.setVectorValue(&I, Part, EntryPart);
- addMetadata(EntryPart, GEP);
- }
- } else {
- // If the GEP has at least one loop-varying operand, we are sure to
- // produce a vector of pointers. But if we are only unrolling, we want
- // to produce a scalar GEP for each unroll part. Thus, the GEP we
- // produce with the code below will be scalar (if VF == 1) or vector
- // (otherwise). Note that for the unroll-only case, we still maintain
- // values in the vector mapping with initVector, as we do for other
- // instructions.
- for (unsigned Part = 0; Part < UF; ++Part) {
- // The pointer operand of the new GEP. If it's loop-invariant, we
- // won't broadcast it.
- auto *Ptr =
- OrigLoop->isLoopInvariant(GEP->getPointerOperand())
- ? GEP->getPointerOperand()
- : getOrCreateVectorValue(GEP->getPointerOperand(), Part);
-
- // Collect all the indices for the new GEP. If any index is
- // loop-invariant, we won't broadcast it.
- SmallVector<Value *, 4> Indices;
- for (auto &U : make_range(GEP->idx_begin(), GEP->idx_end())) {
- if (OrigLoop->isLoopInvariant(U.get()))
- Indices.push_back(U.get());
- else
- Indices.push_back(getOrCreateVectorValue(U.get(), Part));
- }
-
- // Create the new GEP. Note that this GEP may be a scalar if VF == 1,
- // but it should be a vector, otherwise.
- auto *NewGEP =
- GEP->isInBounds()
- ? Builder.CreateInBoundsGEP(GEP->getSourceElementType(), Ptr,
- Indices)
- : Builder.CreateGEP(GEP->getSourceElementType(), Ptr, Indices);
- assert((VF == 1 || NewGEP->getType()->isVectorTy()) &&
- "NewGEP is not a pointer vector");
- VectorLoopValueMap.setVectorValue(&I, Part, NewGEP);
- addMetadata(NewGEP, GEP);
- }
- }
-
- break;
- }
case Instruction::UDiv:
case Instruction::SDiv:
case Instruction::SRem:
@@ -4335,26 +4412,6 @@ void InnerLoopVectorizer::widenInstruction(Instruction &I) {
} // end of switch.
}
-void InnerLoopVectorizer::updateAnalysis() {
- // Forget the original basic block.
- PSE.getSE()->forgetLoop(OrigLoop);
-
- // DT is not kept up-to-date for outer loop vectorization
- if (EnableVPlanNativePath)
- return;
-
- // Update the dominator tree information.
- assert(DT->properlyDominates(LoopBypassBlocks.front(), LoopExitBlock) &&
- "Entry does not dominate exit.");
-
- DT->addNewBlock(LoopMiddleBlock,
- LI->getLoopFor(LoopVectorBody)->getLoopLatch());
- DT->addNewBlock(LoopScalarPreHeader, LoopBypassBlocks[0]);
- DT->changeImmediateDominator(LoopScalarBody, LoopScalarPreHeader);
- DT->changeImmediateDominator(LoopExitBlock, LoopBypassBlocks[0]);
- assert(DT->verify(DominatorTree::VerificationLevel::Fast));
-}
-
void LoopVectorizationCostModel::collectLoopScalars(unsigned VF) {
// We should not collect Scalars more than once per VF. Right now, this
// function is called from collectUniformsAndScalars(), which already does
@@ -4562,9 +4619,10 @@ bool LoopVectorizationCostModel::isScalarWithPredication(Instruction *I, unsigne
return WideningDecision == CM_Scalarize;
}
const MaybeAlign Alignment = getLoadStoreAlignment(I);
- return isa<LoadInst>(I) ?
- !(isLegalMaskedLoad(Ty, Ptr, Alignment) || isLegalMaskedGather(Ty))
- : !(isLegalMaskedStore(Ty, Ptr, Alignment) || isLegalMaskedScatter(Ty));
+ return isa<LoadInst>(I) ? !(isLegalMaskedLoad(Ty, Ptr, Alignment) ||
+ isLegalMaskedGather(Ty, Alignment))
+ : !(isLegalMaskedStore(Ty, Ptr, Alignment) ||
+ isLegalMaskedScatter(Ty, Alignment));
}
case Instruction::UDiv:
case Instruction::SDiv:
@@ -4667,14 +4725,26 @@ void LoopVectorizationCostModel::collectLoopUniforms(unsigned VF) {
SetVector<Instruction *> Worklist;
BasicBlock *Latch = TheLoop->getLoopLatch();
+ // Instructions that are scalar with predication must not be considered
+ // uniform after vectorization, because that would create an erroneous
+ // replicating region where only a single instance out of VF should be formed.
+ // TODO: optimize such seldom cases if found important, see PR40816.
+ auto addToWorklistIfAllowed = [&](Instruction *I) -> void {
+ if (isScalarWithPredication(I, VF)) {
+ LLVM_DEBUG(dbgs() << "LV: Found not uniform being ScalarWithPredication: "
+ << *I << "\n");
+ return;
+ }
+ LLVM_DEBUG(dbgs() << "LV: Found uniform instruction: " << *I << "\n");
+ Worklist.insert(I);
+ };
+
// Start with the conditional branch. If the branch condition is an
// instruction contained in the loop that is only used by the branch, it is
// uniform.
auto *Cmp = dyn_cast<Instruction>(Latch->getTerminator()->getOperand(0));
- if (Cmp && TheLoop->contains(Cmp) && Cmp->hasOneUse()) {
- Worklist.insert(Cmp);
- LLVM_DEBUG(dbgs() << "LV: Found uniform instruction: " << *Cmp << "\n");
- }
+ if (Cmp && TheLoop->contains(Cmp) && Cmp->hasOneUse())
+ addToWorklistIfAllowed(Cmp);
// Holds consecutive and consecutive-like pointers. Consecutive-like pointers
// are pointers that are treated like consecutive pointers during
@@ -4733,10 +4803,8 @@ void LoopVectorizationCostModel::collectLoopUniforms(unsigned VF) {
// Add to the Worklist all consecutive and consecutive-like pointers that
// aren't also identified as possibly non-uniform.
for (auto *V : ConsecutiveLikePtrs)
- if (PossibleNonUniformPtrs.find(V) == PossibleNonUniformPtrs.end()) {
- LLVM_DEBUG(dbgs() << "LV: Found uniform instruction: " << *V << "\n");
- Worklist.insert(V);
- }
+ if (PossibleNonUniformPtrs.find(V) == PossibleNonUniformPtrs.end())
+ addToWorklistIfAllowed(V);
// Expand Worklist in topological order: whenever a new instruction
// is added , its users should be already inside Worklist. It ensures
@@ -4762,10 +4830,8 @@ void LoopVectorizationCostModel::collectLoopUniforms(unsigned VF) {
return Worklist.count(J) ||
(OI == getLoadStorePointerOperand(J) &&
isUniformDecision(J, VF));
- })) {
- Worklist.insert(OI);
- LLVM_DEBUG(dbgs() << "LV: Found uniform instruction: " << *OI << "\n");
- }
+ }))
+ addToWorklistIfAllowed(OI);
}
}
@@ -4807,11 +4873,8 @@ void LoopVectorizationCostModel::collectLoopUniforms(unsigned VF) {
continue;
// The induction variable and its update instruction will remain uniform.
- Worklist.insert(Ind);
- Worklist.insert(IndUpdate);
- LLVM_DEBUG(dbgs() << "LV: Found uniform instruction: " << *Ind << "\n");
- LLVM_DEBUG(dbgs() << "LV: Found uniform instruction: " << *IndUpdate
- << "\n");
+ addToWorklistIfAllowed(Ind);
+ addToWorklistIfAllowed(IndUpdate);
}
Uniforms[VF].insert(Worklist.begin(), Worklist.end());
@@ -5143,9 +5206,10 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(unsigned VF,
if (Legal->getMaxSafeDepDistBytes() != -1U)
return 1;
- // Do not interleave loops with a relatively small trip count.
- unsigned TC = PSE.getSE()->getSmallConstantTripCount(TheLoop);
- if (TC > 1 && TC < TinyTripCountInterleaveThreshold)
+ // Do not interleave loops with a relatively small known or estimated trip
+ // count.
+ auto BestKnownTC = getSmallBestKnownTC(*PSE.getSE(), TheLoop);
+ if (BestKnownTC && *BestKnownTC < TinyTripCountInterleaveThreshold)
return 1;
RegisterUsage R = calculateRegisterUsage({VF})[0];
@@ -5208,12 +5272,10 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(unsigned VF,
MaxInterleaveCount = ForceTargetMaxVectorInterleaveFactor;
}
- // If the trip count is constant, limit the interleave count to be less than
- // the trip count divided by VF.
- if (TC > 0) {
- assert(TC >= VF && "VF exceeds trip count?");
- if ((TC / VF) < MaxInterleaveCount)
- MaxInterleaveCount = (TC / VF);
+ // If trip count is known or estimated compile time constant, limit the
+ // interleave count to be less than the trip count divided by VF.
+ if (BestKnownTC) {
+ MaxInterleaveCount = std::min(*BestKnownTC / VF, MaxInterleaveCount);
}
// If we did not calculate the cost for VF (because the user selected the VF)
@@ -5746,7 +5808,7 @@ unsigned LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I,
// vectorized loop where the user of it is a vectorized instruction.
const MaybeAlign Alignment = getLoadStoreAlignment(I);
Cost += VF * TTI.getMemoryOpCost(I->getOpcode(), ValTy->getScalarType(),
- Alignment ? Alignment->value() : 0, AS);
+ Alignment, AS);
// Get the overhead of the extractelement and insertelement instructions
// we might create due to scalarization.
@@ -5783,8 +5845,7 @@ unsigned LoopVectorizationCostModel::getConsecutiveMemOpCost(Instruction *I,
Cost += TTI.getMaskedMemoryOpCost(I->getOpcode(), VectorTy,
Alignment ? Alignment->value() : 0, AS);
else
- Cost += TTI.getMemoryOpCost(I->getOpcode(), VectorTy,
- Alignment ? Alignment->value() : 0, AS, I);
+ Cost += TTI.getMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS, I);
bool Reverse = ConsecutiveStride < 0;
if (Reverse)
@@ -5800,16 +5861,14 @@ unsigned LoopVectorizationCostModel::getUniformMemOpCost(Instruction *I,
unsigned AS = getLoadStoreAddressSpace(I);
if (isa<LoadInst>(I)) {
return TTI.getAddressComputationCost(ValTy) +
- TTI.getMemoryOpCost(Instruction::Load, ValTy,
- Alignment ? Alignment->value() : 0, AS) +
+ TTI.getMemoryOpCost(Instruction::Load, ValTy, Alignment, AS) +
TTI.getShuffleCost(TargetTransformInfo::SK_Broadcast, VectorTy);
}
StoreInst *SI = cast<StoreInst>(I);
bool isLoopInvariantStoreValue = Legal->isUniform(SI->getValueOperand());
return TTI.getAddressComputationCost(ValTy) +
- TTI.getMemoryOpCost(Instruction::Store, ValTy,
- Alignment ? Alignment->value() : 0, AS) +
+ TTI.getMemoryOpCost(Instruction::Store, ValTy, Alignment, AS) +
(isLoopInvariantStoreValue
? 0
: TTI.getVectorInstrCost(Instruction::ExtractElement, VectorTy,
@@ -5877,8 +5936,7 @@ unsigned LoopVectorizationCostModel::getMemoryInstructionCost(Instruction *I,
unsigned AS = getLoadStoreAddressSpace(I);
return TTI.getAddressComputationCost(ValTy) +
- TTI.getMemoryOpCost(I->getOpcode(), ValTy,
- Alignment ? Alignment->value() : 0, AS, I);
+ TTI.getMemoryOpCost(I->getOpcode(), ValTy, Alignment, AS, I);
}
return getWideningCost(I, VF);
}
@@ -6217,7 +6275,7 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
unsigned N = isScalarAfterVectorization(I, VF) ? VF : 1;
return N * TTI.getArithmeticInstrCost(
I->getOpcode(), VectorTy, TargetTransformInfo::OK_AnyValue,
- Op2VK, TargetTransformInfo::OP_None, Op2VP, Operands);
+ Op2VK, TargetTransformInfo::OP_None, Op2VP, Operands, I);
}
case Instruction::FNeg: {
unsigned N = isScalarAfterVectorization(I, VF) ? VF : 1;
@@ -6225,7 +6283,7 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
I->getOpcode(), VectorTy, TargetTransformInfo::OK_AnyValue,
TargetTransformInfo::OK_AnyValue,
TargetTransformInfo::OP_None, TargetTransformInfo::OP_None,
- I->getOperand(0));
+ I->getOperand(0), I);
}
case Instruction::Select: {
SelectInst *SI = cast<SelectInst>(I);
@@ -6714,37 +6772,6 @@ VPValue *VPRecipeBuilder::createBlockInMask(BasicBlock *BB, VPlanPtr &Plan) {
return BlockMaskCache[BB] = BlockMask;
}
-VPInterleaveRecipe *VPRecipeBuilder::tryToInterleaveMemory(Instruction *I,
- VFRange &Range,
- VPlanPtr &Plan) {
- const InterleaveGroup<Instruction> *IG = CM.getInterleavedAccessGroup(I);
- if (!IG)
- return nullptr;
-
- // Now check if IG is relevant for VF's in the given range.
- auto isIGMember = [&](Instruction *I) -> std::function<bool(unsigned)> {
- return [=](unsigned VF) -> bool {
- return (VF >= 2 && // Query is illegal for VF == 1
- CM.getWideningDecision(I, VF) ==
- LoopVectorizationCostModel::CM_Interleave);
- };
- };
- if (!LoopVectorizationPlanner::getDecisionAndClampRange(isIGMember(I), Range))
- return nullptr;
-
- // I is a member of an InterleaveGroup for VF's in the (possibly trimmed)
- // range. If it's the primary member of the IG construct a VPInterleaveRecipe.
- // Otherwise, it's an adjunct member of the IG, do not construct any Recipe.
- assert(I == IG->getInsertPos() &&
- "Generating a recipe for an adjunct member of an interleave group");
-
- VPValue *Mask = nullptr;
- if (Legal->isMaskRequired(I))
- Mask = createBlockInMask(I->getParent(), Plan);
-
- return new VPInterleaveRecipe(IG, Mask);
-}
-
VPWidenMemoryInstructionRecipe *
VPRecipeBuilder::tryToWidenMemory(Instruction *I, VFRange &Range,
VPlanPtr &Plan) {
@@ -6754,15 +6781,15 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, VFRange &Range,
auto willWiden = [&](unsigned VF) -> bool {
if (VF == 1)
return false;
- if (CM.isScalarAfterVectorization(I, VF) ||
- CM.isProfitableToScalarize(I, VF))
- return false;
LoopVectorizationCostModel::InstWidening Decision =
CM.getWideningDecision(I, VF);
assert(Decision != LoopVectorizationCostModel::CM_Unknown &&
"CM decision should be taken at this point.");
- assert(Decision != LoopVectorizationCostModel::CM_Interleave &&
- "Interleave memory opportunity should be caught earlier.");
+ if (Decision == LoopVectorizationCostModel::CM_Interleave)
+ return true;
+ if (CM.isScalarAfterVectorization(I, VF) ||
+ CM.isProfitableToScalarize(I, VF))
+ return false;
return Decision != LoopVectorizationCostModel::CM_Scalarize;
};
@@ -6773,7 +6800,8 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, VFRange &Range,
if (Legal->isMaskRequired(I))
Mask = createBlockInMask(I->getParent(), Plan);
- return new VPWidenMemoryInstructionRecipe(*I, Mask);
+ VPValue *Addr = Plan->getOrAddVPValue(getLoadStorePointerOperand(I));
+ return new VPWidenMemoryInstructionRecipe(*I, Addr, Mask);
}
VPWidenIntOrFpInductionRecipe *
@@ -6861,7 +6889,6 @@ bool VPRecipeBuilder::tryToWiden(Instruction *I, VPBasicBlock *VPBB,
case Instruction::FPTrunc:
case Instruction::FRem:
case Instruction::FSub:
- case Instruction::GetElementPtr:
case Instruction::ICmp:
case Instruction::IntToPtr:
case Instruction::Load:
@@ -6926,16 +6953,23 @@ bool VPRecipeBuilder::tryToWiden(Instruction *I, VPBasicBlock *VPBB,
if (!LoopVectorizationPlanner::getDecisionAndClampRange(willWiden, Range))
return false;
+ // If this ingredient's recipe is to be recorded, keep its recipe a singleton
+ // to avoid having to split recipes later.
+ bool IsSingleton = Ingredient2Recipe.count(I);
+
+ // Success: widen this instruction.
- // Success: widen this instruction. We optimize the common case where
+ // Use the default widening recipe. We optimize the common case where
// consecutive instructions can be represented by a single recipe.
- if (!VPBB->empty()) {
- VPWidenRecipe *LastWidenRecipe = dyn_cast<VPWidenRecipe>(&VPBB->back());
- if (LastWidenRecipe && LastWidenRecipe->appendInstruction(I))
- return true;
- }
+ if (!IsSingleton && !VPBB->empty() && LastExtensibleRecipe == &VPBB->back() &&
+ LastExtensibleRecipe->appendInstruction(I))
+ return true;
- VPBB->appendRecipe(new VPWidenRecipe(I));
+ VPWidenRecipe *WidenRecipe = new VPWidenRecipe(I);
+ if (!IsSingleton)
+ LastExtensibleRecipe = WidenRecipe;
+ setRecipe(I, WidenRecipe);
+ VPBB->appendRecipe(WidenRecipe);
return true;
}
@@ -6951,6 +6985,7 @@ VPBasicBlock *VPRecipeBuilder::handleReplication(
[&](unsigned VF) { return CM.isScalarWithPredication(I, VF); }, Range);
auto *Recipe = new VPReplicateRecipe(I, IsUniform, IsPredicated);
+ setRecipe(I, Recipe);
// Find if I uses a predicated instruction. If so, it will use its scalar
// value. Avoid hoisting the insert-element which packs the scalar value into
@@ -7009,36 +7044,36 @@ VPRegionBlock *VPRecipeBuilder::createReplicateRegion(Instruction *Instr,
bool VPRecipeBuilder::tryToCreateRecipe(Instruction *Instr, VFRange &Range,
VPlanPtr &Plan, VPBasicBlock *VPBB) {
VPRecipeBase *Recipe = nullptr;
- // Check if Instr should belong to an interleave memory recipe, or already
- // does. In the latter case Instr is irrelevant.
- if ((Recipe = tryToInterleaveMemory(Instr, Range, Plan))) {
- VPBB->appendRecipe(Recipe);
- return true;
- }
- // Check if Instr is a memory operation that should be widened.
- if ((Recipe = tryToWidenMemory(Instr, Range, Plan))) {
+ // First, check for specific widening recipes that deal with memory
+ // operations, inductions and Phi nodes.
+ if ((Recipe = tryToWidenMemory(Instr, Range, Plan)) ||
+ (Recipe = tryToOptimizeInduction(Instr, Range)) ||
+ (Recipe = tryToBlend(Instr, Plan)) ||
+ (isa<PHINode>(Instr) &&
+ (Recipe = new VPWidenPHIRecipe(cast<PHINode>(Instr))))) {
+ setRecipe(Instr, Recipe);
VPBB->appendRecipe(Recipe);
return true;
}
- // Check if Instr should form some PHI recipe.
- if ((Recipe = tryToOptimizeInduction(Instr, Range))) {
- VPBB->appendRecipe(Recipe);
- return true;
- }
- if ((Recipe = tryToBlend(Instr, Plan))) {
+ // Handle GEP widening.
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Instr)) {
+ auto Scalarize = [&](unsigned VF) {
+ return CM.isScalarWithPredication(Instr, VF) ||
+ CM.isScalarAfterVectorization(Instr, VF) ||
+ CM.isProfitableToScalarize(Instr, VF);
+ };
+ if (LoopVectorizationPlanner::getDecisionAndClampRange(Scalarize, Range))
+ return false;
+ VPWidenGEPRecipe *Recipe = new VPWidenGEPRecipe(GEP, OrigLoop);
+ setRecipe(Instr, Recipe);
VPBB->appendRecipe(Recipe);
return true;
}
- if (PHINode *Phi = dyn_cast<PHINode>(Instr)) {
- VPBB->appendRecipe(new VPWidenPHIRecipe(Phi));
- return true;
- }
// Check if Instr is to be widened by a general VPWidenRecipe, after
- // having first checked for specific widening recipes that deal with
- // Interleave Groups, Inductions and Phi nodes.
+ // having first checked for specific widening recipes.
if (tryToWiden(Instr, VPBB, Range))
return true;
@@ -7094,19 +7129,57 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(unsigned MinVF,
VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
VFRange &Range, SmallPtrSetImpl<Value *> &NeedDef,
SmallPtrSetImpl<Instruction *> &DeadInstructions) {
+
// Hold a mapping from predicated instructions to their recipes, in order to
// fix their AlsoPack behavior if a user is determined to replicate and use a
// scalar instead of vector value.
DenseMap<Instruction *, VPReplicateRecipe *> PredInst2Recipe;
DenseMap<Instruction *, Instruction *> &SinkAfter = Legal->getSinkAfter();
- DenseMap<Instruction *, Instruction *> SinkAfterInverse;
+
+ SmallPtrSet<const InterleaveGroup<Instruction> *, 1> InterleaveGroups;
+
+ VPRecipeBuilder RecipeBuilder(OrigLoop, TLI, Legal, CM, Builder);
+
+ // ---------------------------------------------------------------------------
+ // Pre-construction: record ingredients whose recipes we'll need to further
+ // process after constructing the initial VPlan.
+ // ---------------------------------------------------------------------------
+
+ // Mark instructions we'll need to sink later and their targets as
+ // ingredients whose recipe we'll need to record.
+ for (auto &Entry : SinkAfter) {
+ RecipeBuilder.recordRecipeOf(Entry.first);
+ RecipeBuilder.recordRecipeOf(Entry.second);
+ }
+
+ // For each interleave group which is relevant for this (possibly trimmed)
+ // Range, add it to the set of groups to be later applied to the VPlan and add
+ // placeholders for its members' Recipes which we'll be replacing with a
+ // single VPInterleaveRecipe.
+ for (InterleaveGroup<Instruction> *IG : IAI.getInterleaveGroups()) {
+ auto applyIG = [IG, this](unsigned VF) -> bool {
+ return (VF >= 2 && // Query is illegal for VF == 1
+ CM.getWideningDecision(IG->getInsertPos(), VF) ==
+ LoopVectorizationCostModel::CM_Interleave);
+ };
+ if (!getDecisionAndClampRange(applyIG, Range))
+ continue;
+ InterleaveGroups.insert(IG);
+ for (unsigned i = 0; i < IG->getFactor(); i++)
+ if (Instruction *Member = IG->getMember(i))
+ RecipeBuilder.recordRecipeOf(Member);
+ };
+
+ // ---------------------------------------------------------------------------
+ // Build initial VPlan: Scan the body of the loop in a topological order to
+ // visit each basic block after having visited its predecessor basic blocks.
+ // ---------------------------------------------------------------------------
// Create a dummy pre-entry VPBasicBlock to start building the VPlan.
VPBasicBlock *VPBB = new VPBasicBlock("Pre-Entry");
auto Plan = std::make_unique<VPlan>(VPBB);
- VPRecipeBuilder RecipeBuilder(OrigLoop, TLI, Legal, CM, Builder);
// Represent values that will have defs inside VPlan.
for (Value *V : NeedDef)
Plan->addVPValue(V);
@@ -7125,10 +7198,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
VPBB = FirstVPBBForBB;
Builder.setInsertPoint(VPBB);
- std::vector<Instruction *> Ingredients;
-
- // Organize the ingredients to vectorize from current basic block in the
- // right order.
+ // Introduce each ingredient into VPlan.
for (Instruction &I : BB->instructionsWithoutDebug()) {
Instruction *Instr = &I;
@@ -7138,43 +7208,6 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
DeadInstructions.find(Instr) != DeadInstructions.end())
continue;
- // I is a member of an InterleaveGroup for Range.Start. If it's an adjunct
- // member of the IG, do not construct any Recipe for it.
- const InterleaveGroup<Instruction> *IG =
- CM.getInterleavedAccessGroup(Instr);
- if (IG && Instr != IG->getInsertPos() &&
- Range.Start >= 2 && // Query is illegal for VF == 1
- CM.getWideningDecision(Instr, Range.Start) ==
- LoopVectorizationCostModel::CM_Interleave) {
- auto SinkCandidate = SinkAfterInverse.find(Instr);
- if (SinkCandidate != SinkAfterInverse.end())
- Ingredients.push_back(SinkCandidate->second);
- continue;
- }
-
- // Move instructions to handle first-order recurrences, step 1: avoid
- // handling this instruction until after we've handled the instruction it
- // should follow.
- auto SAIt = SinkAfter.find(Instr);
- if (SAIt != SinkAfter.end()) {
- LLVM_DEBUG(dbgs() << "Sinking" << *SAIt->first << " after"
- << *SAIt->second
- << " to vectorize a 1st order recurrence.\n");
- SinkAfterInverse[SAIt->second] = Instr;
- continue;
- }
-
- Ingredients.push_back(Instr);
-
- // Move instructions to handle first-order recurrences, step 2: push the
- // instruction to be sunk at its insertion point.
- auto SAInvIt = SinkAfterInverse.find(Instr);
- if (SAInvIt != SinkAfterInverse.end())
- Ingredients.push_back(SAInvIt->second);
- }
-
- // Introduce each ingredient into VPlan.
- for (Instruction *Instr : Ingredients) {
if (RecipeBuilder.tryToCreateRecipe(Instr, Range, Plan, VPBB))
continue;
@@ -7199,6 +7232,33 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
VPBlockUtils::disconnectBlocks(PreEntry, Entry);
delete PreEntry;
+ // ---------------------------------------------------------------------------
+ // Transform initial VPlan: Apply previously taken decisions, in order, to
+ // bring the VPlan to its final state.
+ // ---------------------------------------------------------------------------
+
+ // Apply Sink-After legal constraints.
+ for (auto &Entry : SinkAfter) {
+ VPRecipeBase *Sink = RecipeBuilder.getRecipe(Entry.first);
+ VPRecipeBase *Target = RecipeBuilder.getRecipe(Entry.second);
+ Sink->moveAfter(Target);
+ }
+
+ // Interleave memory: for each Interleave Group we marked earlier as relevant
+ // for this VPlan, replace the Recipes widening its memory instructions with a
+ // single VPInterleaveRecipe at its insertion point.
+ for (auto IG : InterleaveGroups) {
+ auto *Recipe = cast<VPWidenMemoryInstructionRecipe>(
+ RecipeBuilder.getRecipe(IG->getInsertPos()));
+ (new VPInterleaveRecipe(IG, Recipe->getAddr(), Recipe->getMask()))
+ ->insertBefore(Recipe);
+
+ for (unsigned i = 0; i < IG->getFactor(); ++i)
+ if (Instruction *Member = IG->getMember(i)) {
+ RecipeBuilder.getRecipe(Member)->eraseFromParent();
+ }
+ }
+
// Finally, if tail is folded by masking, introduce selects between the phi
// and the live-out instruction of each reduction, at the end of the latch.
if (CM.foldTailByMasking()) {
@@ -7255,9 +7315,8 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
}
SmallPtrSet<Instruction *, 1> DeadInstructions;
- VPlanHCFGTransforms::VPInstructionsToVPRecipes(
- Plan, Legal->getInductionVars(), DeadInstructions);
-
+ VPlanTransforms::VPInstructionsToVPRecipes(
+ OrigLoop, Plan, Legal->getInductionVars(), DeadInstructions);
return Plan;
}
@@ -7266,13 +7325,21 @@ getOrCreateVectorValues(Value *V, unsigned Part) {
return ILV.getOrCreateVectorValue(V, Part);
}
+Value *LoopVectorizationPlanner::VPCallbackILV::getOrCreateScalarValue(
+ Value *V, const VPIteration &Instance) {
+ return ILV.getOrCreateScalarValue(V, Instance);
+}
+
void VPInterleaveRecipe::print(raw_ostream &O, const Twine &Indent) const {
O << " +\n"
<< Indent << "\"INTERLEAVE-GROUP with factor " << IG->getFactor() << " at ";
IG->getInsertPos()->printAsOperand(O, false);
- if (User) {
+ O << ", ";
+ getAddr()->printAsOperand(O);
+ VPValue *Mask = getMask();
+ if (Mask) {
O << ", ";
- User->getOperand(0)->printAsOperand(O);
+ Mask->printAsOperand(O);
}
O << "\\l\"";
for (unsigned i = 0; i < IG->getFactor(); ++i)
@@ -7286,6 +7353,11 @@ void VPWidenRecipe::execute(VPTransformState &State) {
State.ILV->widenInstruction(Instr);
}
+void VPWidenGEPRecipe::execute(VPTransformState &State) {
+ State.ILV->widenGEP(GEP, State.UF, State.VF, IsPtrLoopInvariant,
+ IsIndexLoopInvariant);
+}
+
void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) {
assert(!State.Instance && "Int or FP induction being replicated.");
State.ILV->widenIntOrFpInduction(IV, Trunc);
@@ -7336,15 +7408,8 @@ void VPBlendRecipe::execute(VPTransformState &State) {
void VPInterleaveRecipe::execute(VPTransformState &State) {
assert(!State.Instance && "Interleave group being replicated.");
- if (!User)
- return State.ILV->vectorizeInterleaveGroup(IG->getInsertPos());
-
- // Last (and currently only) operand is a mask.
- InnerLoopVectorizer::VectorParts MaskValues(State.UF);
- VPValue *Mask = User->getOperand(User->getNumOperands() - 1);
- for (unsigned Part = 0; Part < State.UF; ++Part)
- MaskValues[Part] = State.get(Mask, Part);
- State.ILV->vectorizeInterleaveGroup(IG->getInsertPos(), &MaskValues);
+ State.ILV->vectorizeInterleaveGroup(IG->getInsertPos(), State, getAddr(),
+ getMask());
}
void VPReplicateRecipe::execute(VPTransformState &State) {
@@ -7431,29 +7496,46 @@ void VPPredInstPHIRecipe::execute(VPTransformState &State) {
}
void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
- if (!User)
- return State.ILV->vectorizeMemoryInstruction(&Instr);
-
- // Last (and currently only) operand is a mask.
- InnerLoopVectorizer::VectorParts MaskValues(State.UF);
- VPValue *Mask = User->getOperand(User->getNumOperands() - 1);
- for (unsigned Part = 0; Part < State.UF; ++Part)
- MaskValues[Part] = State.get(Mask, Part);
- State.ILV->vectorizeMemoryInstruction(&Instr, &MaskValues);
-}
-
-static ScalarEpilogueLowering
-getScalarEpilogueLowering(Function *F, Loop *L, LoopVectorizeHints &Hints,
- ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) {
- ScalarEpilogueLowering SEL = CM_ScalarEpilogueAllowed;
- if (Hints.getForce() != LoopVectorizeHints::FK_Enabled &&
- (F->hasOptSize() ||
- llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI)))
- SEL = CM_ScalarEpilogueNotAllowedOptSize;
- else if (PreferPredicateOverEpilog || Hints.getPredicate())
- SEL = CM_ScalarEpilogueNotNeededUsePredicate;
-
- return SEL;
+ State.ILV->vectorizeMemoryInstruction(&Instr, State, getAddr(), getMask());
+}
+
+// Determine how to lower the scalar epilogue, which depends on 1) optimising
+// for minimum code-size, 2) predicate compiler options, 3) loop hints forcing
+// predication, and 4) a TTI hook that analyses whether the loop is suitable
+// for predication.
+static ScalarEpilogueLowering getScalarEpilogueLowering(
+ Function *F, Loop *L, LoopVectorizeHints &Hints, ProfileSummaryInfo *PSI,
+ BlockFrequencyInfo *BFI, TargetTransformInfo *TTI, TargetLibraryInfo *TLI,
+ AssumptionCache *AC, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
+ LoopVectorizationLegality &LVL) {
+ bool OptSize =
+ F->hasOptSize() || llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI,
+ PGSOQueryType::IRPass);
+ // 1) OptSize takes precedence over all other options, i.e. if this is set,
+ // don't look at hints or options, and don't request a scalar epilogue.
+ if (OptSize && Hints.getForce() != LoopVectorizeHints::FK_Enabled)
+ return CM_ScalarEpilogueNotAllowedOptSize;
+
+ bool PredicateOptDisabled = PreferPredicateOverEpilog.getNumOccurrences() &&
+ !PreferPredicateOverEpilog;
+
+ // 2) Next, if disabling predication is requested on the command line, honour
+ // this and request a scalar epilogue. Also do this if we don't have a
+ // primary induction variable, which is required for predication.
+ if (PredicateOptDisabled || !LVL.getPrimaryInduction())
+ return CM_ScalarEpilogueAllowed;
+
+ // 3) and 4) look if enabling predication is requested on the command line,
+ // with a loop hint, or if the TTI hook indicates this is profitable, request
+ // predication .
+ if (PreferPredicateOverEpilog ||
+ Hints.getPredicate() == LoopVectorizeHints::FK_Enabled ||
+ (TTI->preferPredicateOverEpilogue(L, LI, *SE, *AC, TLI, DT,
+ LVL.getLAI()) &&
+ Hints.getPredicate() != LoopVectorizeHints::FK_Disabled))
+ return CM_ScalarEpilogueNotNeededUsePredicate;
+
+ return CM_ScalarEpilogueAllowed;
}
// Process the loop in the VPlan-native vectorization path. This path builds
@@ -7470,14 +7552,16 @@ static bool processLoopInVPlanNativePath(
assert(EnableVPlanNativePath && "VPlan-native path is disabled.");
Function *F = L->getHeader()->getParent();
InterleavedAccessInfo IAI(PSE, L, DT, LI, LVL->getLAI());
- ScalarEpilogueLowering SEL = getScalarEpilogueLowering(F, L, Hints, PSI, BFI);
+
+ ScalarEpilogueLowering SEL = getScalarEpilogueLowering(
+ F, L, Hints, PSI, BFI, TTI, TLI, AC, LI, PSE.getSE(), DT, *LVL);
LoopVectorizationCostModel CM(SEL, L, PSE, LI, LVL, *TTI, TLI, DB, AC, ORE, F,
&Hints, IAI);
// Use the planner for outer loop vectorization.
// TODO: CM is not used at this point inside the planner. Turn CM into an
// optional argument if we don't need it in the future.
- LoopVectorizationPlanner LVP(L, LI, TLI, TTI, LVL, CM);
+ LoopVectorizationPlanner LVP(L, LI, TLI, TTI, LVL, CM, IAI);
// Get user vectorization factor.
const unsigned UserVF = Hints.getWidth();
@@ -7562,7 +7646,8 @@ bool LoopVectorizePass::processLoop(Loop *L) {
// Check the function attributes and profiles to find out if this function
// should be optimized for size.
- ScalarEpilogueLowering SEL = getScalarEpilogueLowering(F, L, Hints, PSI, BFI);
+ ScalarEpilogueLowering SEL = getScalarEpilogueLowering(
+ F, L, Hints, PSI, BFI, TTI, TLI, AC, LI, PSE.getSE(), DT, LVL);
// Entrance to the VPlan-native vectorization path. Outer loops are processed
// here. They may require CFG and instruction level transformations before
@@ -7635,7 +7720,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
CM.collectValuesToIgnore();
// Use the planner for vectorization.
- LoopVectorizationPlanner LVP(L, LI, TLI, TTI, &LVL, CM);
+ LoopVectorizationPlanner LVP(L, LI, TLI, TTI, &LVL, CM, IAI);
// Get user vectorization factor.
unsigned UserVF = Hints.getWidth();
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 974eff9974d9..aabd974cd73e 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -26,6 +26,7 @@
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
@@ -72,6 +73,7 @@
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/IR/Verifier.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
@@ -127,6 +129,10 @@ static cl::opt<int>
MaxVectorRegSizeOption("slp-max-reg-size", cl::init(128), cl::Hidden,
cl::desc("Attempt to vectorize for this register size in bits"));
+static cl::opt<int>
+MaxStoreLookup("slp-max-store-lookup", cl::init(32), cl::Hidden,
+ cl::desc("Maximum depth of the lookup for consecutive stores."));
+
/// Limits the size of scheduling regions in a block.
/// It avoid long compile times for _very_ large blocks where vector
/// instructions are spread over a wide range.
@@ -147,6 +153,20 @@ static cl::opt<unsigned> MinTreeSize(
"slp-min-tree-size", cl::init(3), cl::Hidden,
cl::desc("Only vectorize small trees if they are fully vectorizable"));
+// The maximum depth that the look-ahead score heuristic will explore.
+// The higher this value, the higher the compilation time overhead.
+static cl::opt<int> LookAheadMaxDepth(
+ "slp-max-look-ahead-depth", cl::init(2), cl::Hidden,
+ cl::desc("The maximum look-ahead depth for operand reordering scores"));
+
+// The Look-ahead heuristic goes through the users of the bundle to calculate
+// the users cost in getExternalUsesCost(). To avoid compilation time increase
+// we limit the number of users visited to this value.
+static cl::opt<unsigned> LookAheadUsersBudget(
+ "slp-look-ahead-users-budget", cl::init(2), cl::Hidden,
+ cl::desc("The maximum number of users to visit while visiting the "
+ "predecessors. This prevents compilation time increase."));
+
static cl::opt<bool>
ViewSLPTree("view-slp-tree", cl::Hidden,
cl::desc("Display the SLP trees with Graphviz"));
@@ -547,7 +567,7 @@ public:
/// Construct a vectorizable tree that starts at \p Roots, ignoring users for
/// the purpose of scheduling and extraction in the \p UserIgnoreLst taking
- /// into account (anf updating it, if required) list of externally used
+ /// into account (and updating it, if required) list of externally used
/// values stored in \p ExternallyUsedValues.
void buildTree(ArrayRef<Value *> Roots,
ExtraValueToDebugLocsMap &ExternallyUsedValues,
@@ -609,7 +629,10 @@ public:
return MinVecRegSize;
}
- /// Check if ArrayType or StructType is isomorphic to some VectorType.
+ /// Check if homogeneous aggregate is isomorphic to some VectorType.
+ /// Accepts homogeneous multidimensional aggregate of scalars/vectors like
+ /// {[4 x i16], [4 x i16]}, { <2 x float>, <2 x float> },
+ /// {{{i16, i16}, {i16, i16}}, {{i16, i16}, {i16, i16}}} and so on.
///
/// \returns number of elements in vector if isomorphism exists, 0 otherwise.
unsigned canMapToVector(Type *T, const DataLayout &DL) const;
@@ -721,6 +744,7 @@ public:
const DataLayout &DL;
ScalarEvolution &SE;
+ const BoUpSLP &R;
/// \returns the operand data at \p OpIdx and \p Lane.
OperandData &getData(unsigned OpIdx, unsigned Lane) {
@@ -746,6 +770,227 @@ public:
std::swap(OpsVec[OpIdx1][Lane], OpsVec[OpIdx2][Lane]);
}
+ // The hard-coded scores listed here are not very important. When computing
+ // the scores of matching one sub-tree with another, we are basically
+ // counting the number of values that are matching. So even if all scores
+ // are set to 1, we would still get a decent matching result.
+ // However, sometimes we have to break ties. For example we may have to
+ // choose between matching loads vs matching opcodes. This is what these
+ // scores are helping us with: they provide the order of preference.
+
+ /// Loads from consecutive memory addresses, e.g. load(A[i]), load(A[i+1]).
+ static const int ScoreConsecutiveLoads = 3;
+ /// ExtractElementInst from same vector and consecutive indexes.
+ static const int ScoreConsecutiveExtracts = 3;
+ /// Constants.
+ static const int ScoreConstants = 2;
+ /// Instructions with the same opcode.
+ static const int ScoreSameOpcode = 2;
+ /// Instructions with alt opcodes (e.g, add + sub).
+ static const int ScoreAltOpcodes = 1;
+ /// Identical instructions (a.k.a. splat or broadcast).
+ static const int ScoreSplat = 1;
+ /// Matching with an undef is preferable to failing.
+ static const int ScoreUndef = 1;
+ /// Score for failing to find a decent match.
+ static const int ScoreFail = 0;
+ /// User exteranl to the vectorized code.
+ static const int ExternalUseCost = 1;
+ /// The user is internal but in a different lane.
+ static const int UserInDiffLaneCost = ExternalUseCost;
+
+ /// \returns the score of placing \p V1 and \p V2 in consecutive lanes.
+ static int getShallowScore(Value *V1, Value *V2, const DataLayout &DL,
+ ScalarEvolution &SE) {
+ auto *LI1 = dyn_cast<LoadInst>(V1);
+ auto *LI2 = dyn_cast<LoadInst>(V2);
+ if (LI1 && LI2)
+ return isConsecutiveAccess(LI1, LI2, DL, SE)
+ ? VLOperands::ScoreConsecutiveLoads
+ : VLOperands::ScoreFail;
+
+ auto *C1 = dyn_cast<Constant>(V1);
+ auto *C2 = dyn_cast<Constant>(V2);
+ if (C1 && C2)
+ return VLOperands::ScoreConstants;
+
+ // Extracts from consecutive indexes of the same vector better score as
+ // the extracts could be optimized away.
+ auto *Ex1 = dyn_cast<ExtractElementInst>(V1);
+ auto *Ex2 = dyn_cast<ExtractElementInst>(V2);
+ if (Ex1 && Ex2 && Ex1->getVectorOperand() == Ex2->getVectorOperand() &&
+ cast<ConstantInt>(Ex1->getIndexOperand())->getZExtValue() + 1 ==
+ cast<ConstantInt>(Ex2->getIndexOperand())->getZExtValue()) {
+ return VLOperands::ScoreConsecutiveExtracts;
+ }
+
+ auto *I1 = dyn_cast<Instruction>(V1);
+ auto *I2 = dyn_cast<Instruction>(V2);
+ if (I1 && I2) {
+ if (I1 == I2)
+ return VLOperands::ScoreSplat;
+ InstructionsState S = getSameOpcode({I1, I2});
+ // Note: Only consider instructions with <= 2 operands to avoid
+ // complexity explosion.
+ if (S.getOpcode() && S.MainOp->getNumOperands() <= 2)
+ return S.isAltShuffle() ? VLOperands::ScoreAltOpcodes
+ : VLOperands::ScoreSameOpcode;
+ }
+
+ if (isa<UndefValue>(V2))
+ return VLOperands::ScoreUndef;
+
+ return VLOperands::ScoreFail;
+ }
+
+ /// Holds the values and their lane that are taking part in the look-ahead
+ /// score calculation. This is used in the external uses cost calculation.
+ SmallDenseMap<Value *, int> InLookAheadValues;
+
+ /// \Returns the additinal cost due to uses of \p LHS and \p RHS that are
+ /// either external to the vectorized code, or require shuffling.
+ int getExternalUsesCost(const std::pair<Value *, int> &LHS,
+ const std::pair<Value *, int> &RHS) {
+ int Cost = 0;
+ SmallVector<std::pair<Value *, int>, 2> Values = {LHS, RHS};
+ for (int Idx = 0, IdxE = Values.size(); Idx != IdxE; ++Idx) {
+ Value *V = Values[Idx].first;
+ // Calculate the absolute lane, using the minimum relative lane of LHS
+ // and RHS as base and Idx as the offset.
+ int Ln = std::min(LHS.second, RHS.second) + Idx;
+ assert(Ln >= 0 && "Bad lane calculation");
+ unsigned UsersBudget = LookAheadUsersBudget;
+ for (User *U : V->users()) {
+ if (const TreeEntry *UserTE = R.getTreeEntry(U)) {
+ // The user is in the VectorizableTree. Check if we need to insert.
+ auto It = llvm::find(UserTE->Scalars, U);
+ assert(It != UserTE->Scalars.end() && "U is in UserTE");
+ int UserLn = std::distance(UserTE->Scalars.begin(), It);
+ assert(UserLn >= 0 && "Bad lane");
+ if (UserLn != Ln)
+ Cost += UserInDiffLaneCost;
+ } else {
+ // Check if the user is in the look-ahead code.
+ auto It2 = InLookAheadValues.find(U);
+ if (It2 != InLookAheadValues.end()) {
+ // The user is in the look-ahead code. Check the lane.
+ if (It2->second != Ln)
+ Cost += UserInDiffLaneCost;
+ } else {
+ // The user is neither in SLP tree nor in the look-ahead code.
+ Cost += ExternalUseCost;
+ }
+ }
+ // Limit the number of visited uses to cap compilation time.
+ if (--UsersBudget == 0)
+ break;
+ }
+ }
+ return Cost;
+ }
+
+ /// Go through the operands of \p LHS and \p RHS recursively until \p
+ /// MaxLevel, and return the cummulative score. For example:
+ /// \verbatim
+ /// A[0] B[0] A[1] B[1] C[0] D[0] B[1] A[1]
+ /// \ / \ / \ / \ /
+ /// + + + +
+ /// G1 G2 G3 G4
+ /// \endverbatim
+ /// The getScoreAtLevelRec(G1, G2) function will try to match the nodes at
+ /// each level recursively, accumulating the score. It starts from matching
+ /// the additions at level 0, then moves on to the loads (level 1). The
+ /// score of G1 and G2 is higher than G1 and G3, because {A[0],A[1]} and
+ /// {B[0],B[1]} match with VLOperands::ScoreConsecutiveLoads, while
+ /// {A[0],C[0]} has a score of VLOperands::ScoreFail.
+ /// Please note that the order of the operands does not matter, as we
+ /// evaluate the score of all profitable combinations of operands. In
+ /// other words the score of G1 and G4 is the same as G1 and G2. This
+ /// heuristic is based on ideas described in:
+ /// Look-ahead SLP: Auto-vectorization in the presence of commutative
+ /// operations, CGO 2018 by Vasileios Porpodas, Rodrigo C. O. Rocha,
+ /// Luís F. W. Góes
+ int getScoreAtLevelRec(const std::pair<Value *, int> &LHS,
+ const std::pair<Value *, int> &RHS, int CurrLevel,
+ int MaxLevel) {
+
+ Value *V1 = LHS.first;
+ Value *V2 = RHS.first;
+ // Get the shallow score of V1 and V2.
+ int ShallowScoreAtThisLevel =
+ std::max((int)ScoreFail, getShallowScore(V1, V2, DL, SE) -
+ getExternalUsesCost(LHS, RHS));
+ int Lane1 = LHS.second;
+ int Lane2 = RHS.second;
+
+ // If reached MaxLevel,
+ // or if V1 and V2 are not instructions,
+ // or if they are SPLAT,
+ // or if they are not consecutive, early return the current cost.
+ auto *I1 = dyn_cast<Instruction>(V1);
+ auto *I2 = dyn_cast<Instruction>(V2);
+ if (CurrLevel == MaxLevel || !(I1 && I2) || I1 == I2 ||
+ ShallowScoreAtThisLevel == VLOperands::ScoreFail ||
+ (isa<LoadInst>(I1) && isa<LoadInst>(I2) && ShallowScoreAtThisLevel))
+ return ShallowScoreAtThisLevel;
+ assert(I1 && I2 && "Should have early exited.");
+
+ // Keep track of in-tree values for determining the external-use cost.
+ InLookAheadValues[V1] = Lane1;
+ InLookAheadValues[V2] = Lane2;
+
+ // Contains the I2 operand indexes that got matched with I1 operands.
+ SmallSet<unsigned, 4> Op2Used;
+
+ // Recursion towards the operands of I1 and I2. We are trying all possbile
+ // operand pairs, and keeping track of the best score.
+ for (unsigned OpIdx1 = 0, NumOperands1 = I1->getNumOperands();
+ OpIdx1 != NumOperands1; ++OpIdx1) {
+ // Try to pair op1I with the best operand of I2.
+ int MaxTmpScore = 0;
+ unsigned MaxOpIdx2 = 0;
+ bool FoundBest = false;
+ // If I2 is commutative try all combinations.
+ unsigned FromIdx = isCommutative(I2) ? 0 : OpIdx1;
+ unsigned ToIdx = isCommutative(I2)
+ ? I2->getNumOperands()
+ : std::min(I2->getNumOperands(), OpIdx1 + 1);
+ assert(FromIdx <= ToIdx && "Bad index");
+ for (unsigned OpIdx2 = FromIdx; OpIdx2 != ToIdx; ++OpIdx2) {
+ // Skip operands already paired with OpIdx1.
+ if (Op2Used.count(OpIdx2))
+ continue;
+ // Recursively calculate the cost at each level
+ int TmpScore = getScoreAtLevelRec({I1->getOperand(OpIdx1), Lane1},
+ {I2->getOperand(OpIdx2), Lane2},
+ CurrLevel + 1, MaxLevel);
+ // Look for the best score.
+ if (TmpScore > VLOperands::ScoreFail && TmpScore > MaxTmpScore) {
+ MaxTmpScore = TmpScore;
+ MaxOpIdx2 = OpIdx2;
+ FoundBest = true;
+ }
+ }
+ if (FoundBest) {
+ // Pair {OpIdx1, MaxOpIdx2} was found to be best. Never revisit it.
+ Op2Used.insert(MaxOpIdx2);
+ ShallowScoreAtThisLevel += MaxTmpScore;
+ }
+ }
+ return ShallowScoreAtThisLevel;
+ }
+
+ /// \Returns the look-ahead score, which tells us how much the sub-trees
+ /// rooted at \p LHS and \p RHS match, the more they match the higher the
+ /// score. This helps break ties in an informed way when we cannot decide on
+ /// the order of the operands by just considering the immediate
+ /// predecessors.
+ int getLookAheadScore(const std::pair<Value *, int> &LHS,
+ const std::pair<Value *, int> &RHS) {
+ InLookAheadValues.clear();
+ return getScoreAtLevelRec(LHS, RHS, 1, LookAheadMaxDepth);
+ }
+
// Search all operands in Ops[*][Lane] for the one that matches best
// Ops[OpIdx][LastLane] and return its opreand index.
// If no good match can be found, return None.
@@ -763,9 +1008,6 @@ public:
// The linearized opcode of the operand at OpIdx, Lane.
bool OpIdxAPO = getData(OpIdx, Lane).APO;
- const unsigned BestScore = 2;
- const unsigned GoodScore = 1;
-
// The best operand index and its score.
// Sometimes we have more than one option (e.g., Opcode and Undefs), so we
// are using the score to differentiate between the two.
@@ -794,41 +1036,19 @@ public:
// Look for an operand that matches the current mode.
switch (RMode) {
case ReorderingMode::Load:
- if (isa<LoadInst>(Op)) {
- // Figure out which is left and right, so that we can check for
- // consecutive loads
- bool LeftToRight = Lane > LastLane;
- Value *OpLeft = (LeftToRight) ? OpLastLane : Op;
- Value *OpRight = (LeftToRight) ? Op : OpLastLane;
- if (isConsecutiveAccess(cast<LoadInst>(OpLeft),
- cast<LoadInst>(OpRight), DL, SE))
- BestOp.Idx = Idx;
- }
- break;
- case ReorderingMode::Opcode:
- // We accept both Instructions and Undefs, but with different scores.
- if ((isa<Instruction>(Op) && isa<Instruction>(OpLastLane) &&
- cast<Instruction>(Op)->getOpcode() ==
- cast<Instruction>(OpLastLane)->getOpcode()) ||
- (isa<UndefValue>(OpLastLane) && isa<Instruction>(Op)) ||
- isa<UndefValue>(Op)) {
- // An instruction has a higher score than an undef.
- unsigned Score = (isa<UndefValue>(Op)) ? GoodScore : BestScore;
- if (Score > BestOp.Score) {
- BestOp.Idx = Idx;
- BestOp.Score = Score;
- }
- }
- break;
case ReorderingMode::Constant:
- if (isa<Constant>(Op)) {
- unsigned Score = (isa<UndefValue>(Op)) ? GoodScore : BestScore;
- if (Score > BestOp.Score) {
- BestOp.Idx = Idx;
- BestOp.Score = Score;
- }
+ case ReorderingMode::Opcode: {
+ bool LeftToRight = Lane > LastLane;
+ Value *OpLeft = (LeftToRight) ? OpLastLane : Op;
+ Value *OpRight = (LeftToRight) ? Op : OpLastLane;
+ unsigned Score =
+ getLookAheadScore({OpLeft, LastLane}, {OpRight, Lane});
+ if (Score > BestOp.Score) {
+ BestOp.Idx = Idx;
+ BestOp.Score = Score;
}
break;
+ }
case ReorderingMode::Splat:
if (Op == OpLastLane)
BestOp.Idx = Idx;
@@ -959,8 +1179,8 @@ public:
public:
/// Initialize with all the operands of the instruction vector \p RootVL.
VLOperands(ArrayRef<Value *> RootVL, const DataLayout &DL,
- ScalarEvolution &SE)
- : DL(DL), SE(SE) {
+ ScalarEvolution &SE, const BoUpSLP &R)
+ : DL(DL), SE(SE), R(R) {
// Append all the operands of RootVL.
appendOperandsOfVL(RootVL);
}
@@ -1189,7 +1409,8 @@ private:
SmallVectorImpl<Value *> &Left,
SmallVectorImpl<Value *> &Right,
const DataLayout &DL,
- ScalarEvolution &SE);
+ ScalarEvolution &SE,
+ const BoUpSLP &R);
struct TreeEntry {
using VecTreeTy = SmallVector<std::unique_ptr<TreeEntry>, 8>;
TreeEntry(VecTreeTy &Container) : Container(Container) {}
@@ -1211,7 +1432,8 @@ private:
Value *VectorizedValue = nullptr;
/// Do we need to gather this sequence ?
- bool NeedToGather = false;
+ enum EntryState { Vectorize, NeedToGather };
+ EntryState State;
/// Does this sequence require some shuffling?
SmallVector<unsigned, 4> ReuseShuffleIndices;
@@ -1353,15 +1575,30 @@ private:
dbgs() << "Scalars: \n";
for (Value *V : Scalars)
dbgs().indent(2) << *V << "\n";
- dbgs() << "NeedToGather: " << NeedToGather << "\n";
- dbgs() << "MainOp: " << *MainOp << "\n";
- dbgs() << "AltOp: " << *AltOp << "\n";
+ dbgs() << "State: ";
+ switch (State) {
+ case Vectorize:
+ dbgs() << "Vectorize\n";
+ break;
+ case NeedToGather:
+ dbgs() << "NeedToGather\n";
+ break;
+ }
+ dbgs() << "MainOp: ";
+ if (MainOp)
+ dbgs() << *MainOp << "\n";
+ else
+ dbgs() << "NULL\n";
+ dbgs() << "AltOp: ";
+ if (AltOp)
+ dbgs() << *AltOp << "\n";
+ else
+ dbgs() << "NULL\n";
dbgs() << "VectorizedValue: ";
if (VectorizedValue)
- dbgs() << *VectorizedValue;
+ dbgs() << *VectorizedValue << "\n";
else
- dbgs() << "NULL";
- dbgs() << "\n";
+ dbgs() << "NULL\n";
dbgs() << "ReuseShuffleIndices: ";
if (ReuseShuffleIndices.empty())
dbgs() << "Emtpy";
@@ -1392,7 +1629,7 @@ private:
TreeEntry *Last = VectorizableTree.back().get();
Last->Idx = VectorizableTree.size() - 1;
Last->Scalars.insert(Last->Scalars.begin(), VL.begin(), VL.end());
- Last->NeedToGather = !Vectorized;
+ Last->State = Vectorized ? TreeEntry::Vectorize : TreeEntry::NeedToGather;
Last->ReuseShuffleIndices.append(ReuseShuffleIndices.begin(),
ReuseShuffleIndices.end());
Last->ReorderIndices = ReorderIndices;
@@ -1721,7 +1958,7 @@ private:
return nullptr;
}
- bool isInSchedulingRegion(ScheduleData *SD) {
+ bool isInSchedulingRegion(ScheduleData *SD) const {
return SD->SchedulingRegionID == SchedulingRegionID;
}
@@ -2063,7 +2300,7 @@ template <> struct DOTGraphTraits<BoUpSLP *> : public DefaultDOTGraphTraits {
static std::string getNodeAttributes(const TreeEntry *Entry,
const BoUpSLP *) {
- if (Entry->NeedToGather)
+ if (Entry->State == TreeEntry::NeedToGather)
return "color=red";
return "";
}
@@ -2115,7 +2352,7 @@ void BoUpSLP::buildTree(ArrayRef<Value *> Roots,
TreeEntry *Entry = TEPtr.get();
// No need to handle users of gathered values.
- if (Entry->NeedToGather)
+ if (Entry->State == TreeEntry::NeedToGather)
continue;
// For each lane:
@@ -2152,7 +2389,7 @@ void BoUpSLP::buildTree(ArrayRef<Value *> Roots,
!InTreeUserNeedToExtract(Scalar, UserInst, TLI)) {
LLVM_DEBUG(dbgs() << "SLP: \tInternal user will be removed:" << *U
<< ".\n");
- assert(!UseEntry->NeedToGather && "Bad state");
+ assert(UseEntry->State != TreeEntry::NeedToGather && "Bad state");
continue;
}
}
@@ -2448,7 +2685,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
dyn_cast<SCEVConstant>(SE->getMinusSCEV(ScevN, Scev0));
uint64_t Size = DL->getTypeAllocSize(ScalarTy);
// Check that the sorted loads are consecutive.
- if (Diff && Diff->getAPInt().getZExtValue() == (VL.size() - 1) * Size) {
+ if (Diff && Diff->getAPInt() == (VL.size() - 1) * Size) {
if (CurrentOrder.empty()) {
// Original loads are consecutive and does not require reordering.
++NumOpsWantToKeepOriginalOrder;
@@ -2543,7 +2780,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
// Commutative predicate - collect + sort operands of the instructions
// so that each side is more likely to have the same opcode.
assert(P0 == SwapP0 && "Commutative Predicate mismatch");
- reorderInputsAccordingToOpcode(VL, Left, Right, *DL, *SE);
+ reorderInputsAccordingToOpcode(VL, Left, Right, *DL, *SE, *this);
} else {
// Collect operands - commute if it uses the swapped predicate.
for (Value *V : VL) {
@@ -2590,7 +2827,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
// have the same opcode.
if (isa<BinaryOperator>(VL0) && VL0->isCommutative()) {
ValueList Left, Right;
- reorderInputsAccordingToOpcode(VL, Left, Right, *DL, *SE);
+ reorderInputsAccordingToOpcode(VL, Left, Right, *DL, *SE, *this);
TE->setOperand(0, Left);
TE->setOperand(1, Right);
buildTree_rec(Left, Depth + 1, {TE, 0});
@@ -2637,9 +2874,14 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
}
// We don't combine GEPs with non-constant indexes.
+ Type *Ty1 = VL0->getOperand(1)->getType();
for (Value *V : VL) {
auto Op = cast<Instruction>(V)->getOperand(1);
- if (!isa<ConstantInt>(Op)) {
+ if (!isa<ConstantInt>(Op) ||
+ (Op->getType() != Ty1 &&
+ Op->getType()->getScalarSizeInBits() >
+ DL->getIndexSizeInBits(
+ V->getType()->getPointerAddressSpace()))) {
LLVM_DEBUG(dbgs()
<< "SLP: not-vectorizable GEP (non-constant indexes).\n");
BS.cancelScheduling(VL, VL0);
@@ -2665,24 +2907,74 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
}
case Instruction::Store: {
// Check if the stores are consecutive or if we need to swizzle them.
- for (unsigned i = 0, e = VL.size() - 1; i < e; ++i)
- if (!isConsecutiveAccess(VL[i], VL[i + 1], *DL, *SE)) {
+ llvm::Type *ScalarTy = cast<StoreInst>(VL0)->getValueOperand()->getType();
+ // Make sure all stores in the bundle are simple - we can't vectorize
+ // atomic or volatile stores.
+ SmallVector<Value *, 4> PointerOps(VL.size());
+ ValueList Operands(VL.size());
+ auto POIter = PointerOps.begin();
+ auto OIter = Operands.begin();
+ for (Value *V : VL) {
+ auto *SI = cast<StoreInst>(V);
+ if (!SI->isSimple()) {
BS.cancelScheduling(VL, VL0);
newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
ReuseShuffleIndicies);
- LLVM_DEBUG(dbgs() << "SLP: Non-consecutive store.\n");
+ LLVM_DEBUG(dbgs() << "SLP: Gathering non-simple stores.\n");
return;
}
+ *POIter = SI->getPointerOperand();
+ *OIter = SI->getValueOperand();
+ ++POIter;
+ ++OIter;
+ }
- TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
- ReuseShuffleIndicies);
- LLVM_DEBUG(dbgs() << "SLP: added a vector of stores.\n");
+ OrdersType CurrentOrder;
+ // Check the order of pointer operands.
+ if (llvm::sortPtrAccesses(PointerOps, *DL, *SE, CurrentOrder)) {
+ Value *Ptr0;
+ Value *PtrN;
+ if (CurrentOrder.empty()) {
+ Ptr0 = PointerOps.front();
+ PtrN = PointerOps.back();
+ } else {
+ Ptr0 = PointerOps[CurrentOrder.front()];
+ PtrN = PointerOps[CurrentOrder.back()];
+ }
+ const SCEV *Scev0 = SE->getSCEV(Ptr0);
+ const SCEV *ScevN = SE->getSCEV(PtrN);
+ const auto *Diff =
+ dyn_cast<SCEVConstant>(SE->getMinusSCEV(ScevN, Scev0));
+ uint64_t Size = DL->getTypeAllocSize(ScalarTy);
+ // Check that the sorted pointer operands are consecutive.
+ if (Diff && Diff->getAPInt() == (VL.size() - 1) * Size) {
+ if (CurrentOrder.empty()) {
+ // Original stores are consecutive and does not require reordering.
+ ++NumOpsWantToKeepOriginalOrder;
+ TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S,
+ UserTreeIdx, ReuseShuffleIndicies);
+ TE->setOperandsInOrder();
+ buildTree_rec(Operands, Depth + 1, {TE, 0});
+ LLVM_DEBUG(dbgs() << "SLP: added a vector of stores.\n");
+ } else {
+ // Need to reorder.
+ auto I = NumOpsWantToKeepOrder.try_emplace(CurrentOrder).first;
+ ++(I->getSecond());
+ TreeEntry *TE =
+ newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
+ ReuseShuffleIndicies, I->getFirst());
+ TE->setOperandsInOrder();
+ buildTree_rec(Operands, Depth + 1, {TE, 0});
+ LLVM_DEBUG(dbgs() << "SLP: added a vector of jumbled stores.\n");
+ }
+ return;
+ }
+ }
- ValueList Operands;
- for (Value *V : VL)
- Operands.push_back(cast<Instruction>(V)->getOperand(0));
- TE->setOperandsInOrder();
- buildTree_rec(Operands, Depth + 1, {TE, 0});
+ BS.cancelScheduling(VL, VL0);
+ newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
+ ReuseShuffleIndicies);
+ LLVM_DEBUG(dbgs() << "SLP: Non-consecutive store.\n");
return;
}
case Instruction::Call: {
@@ -2777,7 +3069,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
// Reorder operands if reordering would enable vectorization.
if (isa<BinaryOperator>(VL0)) {
ValueList Left, Right;
- reorderInputsAccordingToOpcode(VL, Left, Right, *DL, *SE);
+ reorderInputsAccordingToOpcode(VL, Left, Right, *DL, *SE, *this);
TE->setOperand(0, Left);
TE->setOperand(1, Right);
buildTree_rec(Left, Depth + 1, {TE, 0});
@@ -2806,27 +3098,29 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
}
unsigned BoUpSLP::canMapToVector(Type *T, const DataLayout &DL) const {
- unsigned N;
- Type *EltTy;
- auto *ST = dyn_cast<StructType>(T);
- if (ST) {
- N = ST->getNumElements();
- EltTy = *ST->element_begin();
- } else {
- N = cast<ArrayType>(T)->getNumElements();
- EltTy = cast<ArrayType>(T)->getElementType();
+ unsigned N = 1;
+ Type *EltTy = T;
+
+ while (isa<CompositeType>(EltTy)) {
+ if (auto *ST = dyn_cast<StructType>(EltTy)) {
+ // Check that struct is homogeneous.
+ for (const auto *Ty : ST->elements())
+ if (Ty != *ST->element_begin())
+ return 0;
+ N *= ST->getNumElements();
+ EltTy = *ST->element_begin();
+ } else {
+ auto *SeqT = cast<SequentialType>(EltTy);
+ N *= SeqT->getNumElements();
+ EltTy = SeqT->getElementType();
+ }
}
+
if (!isValidElementType(EltTy))
return 0;
uint64_t VTSize = DL.getTypeStoreSizeInBits(VectorType::get(EltTy, N));
if (VTSize < MinVecRegSize || VTSize > MaxVecRegSize || VTSize != DL.getTypeStoreSizeInBits(T))
return 0;
- if (ST) {
- // Check that struct is homogeneous.
- for (const auto *Ty : ST->elements())
- if (Ty != EltTy)
- return 0;
- }
return N;
}
@@ -2927,7 +3221,7 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
ReuseShuffleCost =
TTI->getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, VecTy);
}
- if (E->NeedToGather) {
+ if (E->State == TreeEntry::NeedToGather) {
if (allConstant(VL))
return 0;
if (isSplat(VL)) {
@@ -2995,7 +3289,7 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
TTI->getVectorInstrCost(Instruction::ExtractElement, VecTy, Idx);
}
}
- if (!E->NeedToGather) {
+ if (E->State == TreeEntry::Vectorize) {
int DeadCost = ReuseShuffleCost;
if (!E->ReorderIndices.empty()) {
// TODO: Merge this shuffle with the ReuseShuffleCost.
@@ -3135,13 +3429,13 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
SmallVector<const Value *, 4> Operands(VL0->operand_values());
int ScalarEltCost = TTI->getArithmeticInstrCost(
- E->getOpcode(), ScalarTy, Op1VK, Op2VK, Op1VP, Op2VP, Operands);
+ E->getOpcode(), ScalarTy, Op1VK, Op2VK, Op1VP, Op2VP, Operands, VL0);
if (NeedToShuffleReuses) {
ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost;
}
int ScalarCost = VecTy->getNumElements() * ScalarEltCost;
- int VecCost = TTI->getArithmeticInstrCost(E->getOpcode(), VecTy, Op1VK,
- Op2VK, Op1VP, Op2VP, Operands);
+ int VecCost = TTI->getArithmeticInstrCost(
+ E->getOpcode(), VecTy, Op1VK, Op2VK, Op1VP, Op2VP, Operands, VL0);
return ReuseShuffleCost + VecCost - ScalarCost;
}
case Instruction::GetElementPtr: {
@@ -3162,7 +3456,7 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
}
case Instruction::Load: {
// Cost of wide load - cost of scalar loads.
- unsigned alignment = cast<LoadInst>(VL0)->getAlignment();
+ MaybeAlign alignment(cast<LoadInst>(VL0)->getAlignment());
int ScalarEltCost =
TTI->getMemoryOpCost(Instruction::Load, ScalarTy, alignment, 0, VL0);
if (NeedToShuffleReuses) {
@@ -3180,15 +3474,22 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
}
case Instruction::Store: {
// We know that we can merge the stores. Calculate the cost.
- unsigned alignment = cast<StoreInst>(VL0)->getAlignment();
+ bool IsReorder = !E->ReorderIndices.empty();
+ auto *SI =
+ cast<StoreInst>(IsReorder ? VL[E->ReorderIndices.front()] : VL0);
+ MaybeAlign Alignment(SI->getAlignment());
int ScalarEltCost =
- TTI->getMemoryOpCost(Instruction::Store, ScalarTy, alignment, 0, VL0);
- if (NeedToShuffleReuses) {
- ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost;
- }
+ TTI->getMemoryOpCost(Instruction::Store, ScalarTy, Alignment, 0, VL0);
+ if (NeedToShuffleReuses)
+ ReuseShuffleCost = -(ReuseShuffleNumbers - VL.size()) * ScalarEltCost;
int ScalarStCost = VecTy->getNumElements() * ScalarEltCost;
- int VecStCost =
- TTI->getMemoryOpCost(Instruction::Store, VecTy, alignment, 0, VL0);
+ int VecStCost = TTI->getMemoryOpCost(Instruction::Store,
+ VecTy, Alignment, 0, VL0);
+ if (IsReorder) {
+ // TODO: Merge this shuffle with the ReuseShuffleCost.
+ VecStCost += TTI->getShuffleCost(
+ TargetTransformInfo::SK_PermuteSingleSrc, VecTy);
+ }
return ReuseShuffleCost + VecStCost - ScalarStCost;
}
case Instruction::Call: {
@@ -3274,20 +3575,22 @@ bool BoUpSLP::isFullyVectorizableTinyTree() const {
<< VectorizableTree.size() << " is fully vectorizable .\n");
// We only handle trees of heights 1 and 2.
- if (VectorizableTree.size() == 1 && !VectorizableTree[0]->NeedToGather)
+ if (VectorizableTree.size() == 1 &&
+ VectorizableTree[0]->State == TreeEntry::Vectorize)
return true;
if (VectorizableTree.size() != 2)
return false;
// Handle splat and all-constants stores.
- if (!VectorizableTree[0]->NeedToGather &&
+ if (VectorizableTree[0]->State == TreeEntry::Vectorize &&
(allConstant(VectorizableTree[1]->Scalars) ||
isSplat(VectorizableTree[1]->Scalars)))
return true;
// Gathering cost would be too much for tiny trees.
- if (VectorizableTree[0]->NeedToGather || VectorizableTree[1]->NeedToGather)
+ if (VectorizableTree[0]->State == TreeEntry::NeedToGather ||
+ VectorizableTree[1]->State == TreeEntry::NeedToGather)
return false;
return true;
@@ -3397,7 +3700,7 @@ int BoUpSLP::getSpillCost() const {
continue;
}
- // Debug informations don't impact spill cost.
+ // Debug information does not impact spill cost.
if ((isa<CallInst>(&*PrevInstIt) &&
!isa<DbgInfoIntrinsic>(&*PrevInstIt)) &&
&*PrevInstIt != PrevInst)
@@ -3441,12 +3744,13 @@ int BoUpSLP::getTreeCost() {
// their uses. Since such an approach results in fewer total entries,
// existing heuristics based on tree size may yield different results.
//
- if (TE.NeedToGather &&
- std::any_of(
- std::next(VectorizableTree.begin(), I + 1), VectorizableTree.end(),
- [TE](const std::unique_ptr<TreeEntry> &EntryPtr) {
- return EntryPtr->NeedToGather && EntryPtr->isSame(TE.Scalars);
- }))
+ if (TE.State == TreeEntry::NeedToGather &&
+ std::any_of(std::next(VectorizableTree.begin(), I + 1),
+ VectorizableTree.end(),
+ [TE](const std::unique_ptr<TreeEntry> &EntryPtr) {
+ return EntryPtr->State == TreeEntry::NeedToGather &&
+ EntryPtr->isSame(TE.Scalars);
+ }))
continue;
int C = getEntryCost(&TE);
@@ -3538,13 +3842,15 @@ int BoUpSLP::getGatherCost(ArrayRef<Value *> VL) const {
// Perform operand reordering on the instructions in VL and return the reordered
// operands in Left and Right.
-void BoUpSLP::reorderInputsAccordingToOpcode(
- ArrayRef<Value *> VL, SmallVectorImpl<Value *> &Left,
- SmallVectorImpl<Value *> &Right, const DataLayout &DL,
- ScalarEvolution &SE) {
+void BoUpSLP::reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
+ SmallVectorImpl<Value *> &Left,
+ SmallVectorImpl<Value *> &Right,
+ const DataLayout &DL,
+ ScalarEvolution &SE,
+ const BoUpSLP &R) {
if (VL.empty())
return;
- VLOperands Ops(VL, DL, SE);
+ VLOperands Ops(VL, DL, SE, R);
// Reorder the operands in place.
Ops.reorder();
Left = Ops.getVL(0);
@@ -3735,7 +4041,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
bool NeedToShuffleReuses = !E->ReuseShuffleIndices.empty();
- if (E->NeedToGather) {
+ if (E->State == TreeEntry::NeedToGather) {
setInsertPointAfterBundle(E);
auto *V = Gather(E->Scalars, VecTy);
if (NeedToShuffleReuses) {
@@ -3790,7 +4096,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
}
case Instruction::ExtractElement: {
- if (!E->NeedToGather) {
+ if (E->State == TreeEntry::Vectorize) {
Value *V = E->getSingleOperand(0);
if (!E->ReorderIndices.empty()) {
OrdersType Mask;
@@ -3823,7 +4129,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
return V;
}
case Instruction::ExtractValue: {
- if (!E->NeedToGather) {
+ if (E->State == TreeEntry::Vectorize) {
LoadInst *LI = cast<LoadInst>(E->getSingleOperand(0));
Builder.SetInsertPoint(LI);
PointerType *PtrTy = PointerType::get(VecTy, LI->getPointerAddressSpace());
@@ -4050,15 +4356,25 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
return V;
}
case Instruction::Store: {
- StoreInst *SI = cast<StoreInst>(VL0);
+ bool IsReorder = !E->ReorderIndices.empty();
+ auto *SI = cast<StoreInst>(
+ IsReorder ? E->Scalars[E->ReorderIndices.front()] : VL0);
unsigned Alignment = SI->getAlignment();
unsigned AS = SI->getPointerAddressSpace();
setInsertPointAfterBundle(E);
Value *VecValue = vectorizeTree(E->getOperand(0));
+ if (IsReorder) {
+ OrdersType Mask;
+ inversePermutation(E->ReorderIndices, Mask);
+ VecValue = Builder.CreateShuffleVector(
+ VecValue, UndefValue::get(VecValue->getType()), E->ReorderIndices,
+ "reorder_shuffle");
+ }
Value *ScalarPtr = SI->getPointerOperand();
- Value *VecPtr = Builder.CreateBitCast(ScalarPtr, VecTy->getPointerTo(AS));
+ Value *VecPtr = Builder.CreateBitCast(
+ ScalarPtr, VecValue->getType()->getPointerTo(AS));
StoreInst *ST = Builder.CreateStore(VecValue, VecPtr);
// The pointer operand uses an in-tree scalar, so add the new BitCast to
@@ -4088,7 +4404,22 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
std::vector<Value *> OpVecs;
for (int j = 1, e = cast<GetElementPtrInst>(VL0)->getNumOperands(); j < e;
++j) {
- Value *OpVec = vectorizeTree(E->getOperand(j));
+ ValueList &VL = E->getOperand(j);
+ // Need to cast all elements to the same type before vectorization to
+ // avoid crash.
+ Type *VL0Ty = VL0->getOperand(j)->getType();
+ Type *Ty = llvm::all_of(
+ VL, [VL0Ty](Value *V) { return VL0Ty == V->getType(); })
+ ? VL0Ty
+ : DL->getIndexType(cast<GetElementPtrInst>(VL0)
+ ->getPointerOperandType()
+ ->getScalarType());
+ for (Value *&V : VL) {
+ auto *CI = cast<ConstantInt>(V);
+ V = ConstantExpr::getIntegerCast(CI, Ty,
+ CI->getValue().isSignBitSet());
+ }
+ Value *OpVec = vectorizeTree(VL);
OpVecs.push_back(OpVec);
}
@@ -4284,7 +4615,7 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
continue;
TreeEntry *E = getTreeEntry(Scalar);
assert(E && "Invalid scalar");
- assert(!E->NeedToGather && "Extracting from a gather list");
+ assert(E->State == TreeEntry::Vectorize && "Extracting from a gather list");
Value *Vec = E->VectorizedValue;
assert(Vec && "Can't find vectorizable value");
@@ -4357,7 +4688,7 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
TreeEntry *Entry = TEPtr.get();
// No need to handle users of gathered values.
- if (Entry->NeedToGather)
+ if (Entry->State == TreeEntry::NeedToGather)
continue;
assert(Entry->VectorizedValue && "Can't find vectorizable value");
@@ -5332,125 +5663,140 @@ bool SLPVectorizerPass::runImpl(Function &F, ScalarEvolution *SE_,
}
bool SLPVectorizerPass::vectorizeStoreChain(ArrayRef<Value *> Chain, BoUpSLP &R,
- unsigned VecRegSize) {
- const unsigned ChainLen = Chain.size();
- LLVM_DEBUG(dbgs() << "SLP: Analyzing a store chain of length " << ChainLen
+ unsigned Idx) {
+ LLVM_DEBUG(dbgs() << "SLP: Analyzing a store chain of length " << Chain.size()
<< "\n");
const unsigned Sz = R.getVectorElementSize(Chain[0]);
- const unsigned VF = VecRegSize / Sz;
+ const unsigned MinVF = R.getMinVecRegSize() / Sz;
+ unsigned VF = Chain.size();
- if (!isPowerOf2_32(Sz) || VF < 2)
+ if (!isPowerOf2_32(Sz) || !isPowerOf2_32(VF) || VF < 2 || VF < MinVF)
return false;
- bool Changed = false;
- // Look for profitable vectorizable trees at all offsets, starting at zero.
- for (unsigned i = 0, e = ChainLen; i + VF <= e; ++i) {
-
- ArrayRef<Value *> Operands = Chain.slice(i, VF);
- // Check that a previous iteration of this loop did not delete the Value.
- if (llvm::any_of(Operands, [&R](Value *V) {
- auto *I = dyn_cast<Instruction>(V);
- return I && R.isDeleted(I);
- }))
- continue;
-
- LLVM_DEBUG(dbgs() << "SLP: Analyzing " << VF << " stores at offset " << i
- << "\n");
-
- R.buildTree(Operands);
- if (R.isTreeTinyAndNotFullyVectorizable())
- continue;
+ LLVM_DEBUG(dbgs() << "SLP: Analyzing " << VF << " stores at offset " << Idx
+ << "\n");
- R.computeMinimumValueSizes();
+ R.buildTree(Chain);
+ Optional<ArrayRef<unsigned>> Order = R.bestOrder();
+ // TODO: Handle orders of size less than number of elements in the vector.
+ if (Order && Order->size() == Chain.size()) {
+ // TODO: reorder tree nodes without tree rebuilding.
+ SmallVector<Value *, 4> ReorderedOps(Chain.rbegin(), Chain.rend());
+ llvm::transform(*Order, ReorderedOps.begin(),
+ [Chain](const unsigned Idx) { return Chain[Idx]; });
+ R.buildTree(ReorderedOps);
+ }
+ if (R.isTreeTinyAndNotFullyVectorizable())
+ return false;
- int Cost = R.getTreeCost();
+ R.computeMinimumValueSizes();
- LLVM_DEBUG(dbgs() << "SLP: Found cost=" << Cost << " for VF=" << VF
- << "\n");
- if (Cost < -SLPCostThreshold) {
- LLVM_DEBUG(dbgs() << "SLP: Decided to vectorize cost=" << Cost << "\n");
+ int Cost = R.getTreeCost();
- using namespace ore;
+ LLVM_DEBUG(dbgs() << "SLP: Found cost=" << Cost << " for VF=" << VF << "\n");
+ if (Cost < -SLPCostThreshold) {
+ LLVM_DEBUG(dbgs() << "SLP: Decided to vectorize cost=" << Cost << "\n");
- R.getORE()->emit(OptimizationRemark(SV_NAME, "StoresVectorized",
- cast<StoreInst>(Chain[i]))
- << "Stores SLP vectorized with cost " << NV("Cost", Cost)
- << " and with tree size "
- << NV("TreeSize", R.getTreeSize()));
+ using namespace ore;
- R.vectorizeTree();
+ R.getORE()->emit(OptimizationRemark(SV_NAME, "StoresVectorized",
+ cast<StoreInst>(Chain[0]))
+ << "Stores SLP vectorized with cost " << NV("Cost", Cost)
+ << " and with tree size "
+ << NV("TreeSize", R.getTreeSize()));
- // Move to the next bundle.
- i += VF - 1;
- Changed = true;
- }
+ R.vectorizeTree();
+ return true;
}
- return Changed;
+ return false;
}
bool SLPVectorizerPass::vectorizeStores(ArrayRef<StoreInst *> Stores,
BoUpSLP &R) {
- SetVector<StoreInst *> Heads;
- SmallDenseSet<StoreInst *> Tails;
- SmallDenseMap<StoreInst *, StoreInst *> ConsecutiveChain;
-
// We may run into multiple chains that merge into a single chain. We mark the
// stores that we vectorized so that we don't visit the same store twice.
BoUpSLP::ValueSet VectorizedStores;
bool Changed = false;
- auto &&FindConsecutiveAccess =
- [this, &Stores, &Heads, &Tails, &ConsecutiveChain] (int K, int Idx) {
- if (!isConsecutiveAccess(Stores[K], Stores[Idx], *DL, *SE))
- return false;
-
- Tails.insert(Stores[Idx]);
- Heads.insert(Stores[K]);
- ConsecutiveChain[Stores[K]] = Stores[Idx];
- return true;
- };
+ int E = Stores.size();
+ SmallBitVector Tails(E, false);
+ SmallVector<int, 16> ConsecutiveChain(E, E + 1);
+ int MaxIter = MaxStoreLookup.getValue();
+ int IterCnt;
+ auto &&FindConsecutiveAccess = [this, &Stores, &Tails, &IterCnt, MaxIter,
+ &ConsecutiveChain](int K, int Idx) {
+ if (IterCnt >= MaxIter)
+ return true;
+ ++IterCnt;
+ if (!isConsecutiveAccess(Stores[K], Stores[Idx], *DL, *SE))
+ return false;
+ Tails.set(Idx);
+ ConsecutiveChain[K] = Idx;
+ return true;
+ };
// Do a quadratic search on all of the given stores in reverse order and find
// all of the pairs of stores that follow each other.
- int E = Stores.size();
for (int Idx = E - 1; Idx >= 0; --Idx) {
// If a store has multiple consecutive store candidates, search according
// to the sequence: Idx-1, Idx+1, Idx-2, Idx+2, ...
// This is because usually pairing with immediate succeeding or preceding
// candidate create the best chance to find slp vectorization opportunity.
- for (int Offset = 1, F = std::max(E - Idx, Idx + 1); Offset < F; ++Offset)
+ const int MaxLookDepth = std::max(E - Idx, Idx + 1);
+ IterCnt = 0;
+ for (int Offset = 1, F = MaxLookDepth; Offset < F; ++Offset)
if ((Idx >= Offset && FindConsecutiveAccess(Idx - Offset, Idx)) ||
(Idx + Offset < E && FindConsecutiveAccess(Idx + Offset, Idx)))
break;
}
// For stores that start but don't end a link in the chain:
- for (auto *SI : llvm::reverse(Heads)) {
- if (Tails.count(SI))
+ for (int Cnt = E; Cnt > 0; --Cnt) {
+ int I = Cnt - 1;
+ if (ConsecutiveChain[I] == E + 1 || Tails.test(I))
continue;
-
// We found a store instr that starts a chain. Now follow the chain and try
// to vectorize it.
BoUpSLP::ValueList Operands;
- StoreInst *I = SI;
// Collect the chain into a list.
- while ((Tails.count(I) || Heads.count(I)) && !VectorizedStores.count(I)) {
- Operands.push_back(I);
+ while (I != E + 1 && !VectorizedStores.count(Stores[I])) {
+ Operands.push_back(Stores[I]);
// Move to the next value in the chain.
I = ConsecutiveChain[I];
}
+ // If a vector register can't hold 1 element, we are done.
+ unsigned MaxVecRegSize = R.getMaxVecRegSize();
+ unsigned EltSize = R.getVectorElementSize(Stores[0]);
+ if (MaxVecRegSize % EltSize != 0)
+ continue;
+
+ unsigned MaxElts = MaxVecRegSize / EltSize;
// FIXME: Is division-by-2 the correct step? Should we assert that the
// register size is a power-of-2?
- for (unsigned Size = R.getMaxVecRegSize(); Size >= R.getMinVecRegSize();
- Size /= 2) {
- if (vectorizeStoreChain(Operands, R, Size)) {
- // Mark the vectorized stores so that we don't vectorize them again.
- VectorizedStores.insert(Operands.begin(), Operands.end());
- Changed = true;
- break;
+ unsigned StartIdx = 0;
+ for (unsigned Size = llvm::PowerOf2Ceil(MaxElts); Size >= 2; Size /= 2) {
+ for (unsigned Cnt = StartIdx, E = Operands.size(); Cnt + Size <= E;) {
+ ArrayRef<Value *> Slice = makeArrayRef(Operands).slice(Cnt, Size);
+ if (!VectorizedStores.count(Slice.front()) &&
+ !VectorizedStores.count(Slice.back()) &&
+ vectorizeStoreChain(Slice, R, Cnt)) {
+ // Mark the vectorized stores so that we don't vectorize them again.
+ VectorizedStores.insert(Slice.begin(), Slice.end());
+ Changed = true;
+ // If we vectorized initial block, no need to try to vectorize it
+ // again.
+ if (Cnt == StartIdx)
+ StartIdx += Size;
+ Cnt += Size;
+ continue;
+ }
+ ++Cnt;
}
+ // Check if the whole array was vectorized already - exit.
+ if (StartIdx >= Operands.size())
+ break;
}
}
@@ -5835,38 +6181,36 @@ class HorizontalReduction {
explicit operator bool() const { return Opcode; }
- /// Get the index of the first operand.
- unsigned getFirstOperandIndex() const {
- assert(!!*this && "The opcode is not set.");
+ /// Return true if this operation is any kind of minimum or maximum.
+ bool isMinMax() const {
switch (Kind) {
+ case RK_Arithmetic:
+ return false;
case RK_Min:
- case RK_UMin:
case RK_Max:
+ case RK_UMin:
case RK_UMax:
- return 1;
- case RK_Arithmetic:
+ return true;
case RK_None:
break;
}
- return 0;
+ llvm_unreachable("Reduction kind is not set");
+ }
+
+ /// Get the index of the first operand.
+ unsigned getFirstOperandIndex() const {
+ assert(!!*this && "The opcode is not set.");
+ // We allow calling this before 'Kind' is set, so handle that specially.
+ if (Kind == RK_None)
+ return 0;
+ return isMinMax() ? 1 : 0;
}
/// Total number of operands in the reduction operation.
unsigned getNumberOfOperands() const {
assert(Kind != RK_None && !!*this && LHS && RHS &&
"Expected reduction operation.");
- switch (Kind) {
- case RK_Arithmetic:
- return 2;
- case RK_Min:
- case RK_UMin:
- case RK_Max:
- case RK_UMax:
- return 3;
- case RK_None:
- break;
- }
- llvm_unreachable("Reduction kind is not set");
+ return isMinMax() ? 3 : 2;
}
/// Checks if the operation has the same parent as \p P.
@@ -5875,79 +6219,46 @@ class HorizontalReduction {
"Expected reduction operation.");
if (!IsRedOp)
return I->getParent() == P;
- switch (Kind) {
- case RK_Arithmetic:
- // Arithmetic reduction operation must be used once only.
- return I->getParent() == P;
- case RK_Min:
- case RK_UMin:
- case RK_Max:
- case RK_UMax: {
+ if (isMinMax()) {
// SelectInst must be used twice while the condition op must have single
// use only.
auto *Cmp = cast<Instruction>(cast<SelectInst>(I)->getCondition());
return I->getParent() == P && Cmp && Cmp->getParent() == P;
}
- case RK_None:
- break;
- }
- llvm_unreachable("Reduction kind is not set");
+ // Arithmetic reduction operation must be used once only.
+ return I->getParent() == P;
}
+
/// Expected number of uses for reduction operations/reduced values.
bool hasRequiredNumberOfUses(Instruction *I, bool IsReductionOp) const {
assert(Kind != RK_None && !!*this && LHS && RHS &&
"Expected reduction operation.");
- switch (Kind) {
- case RK_Arithmetic:
- return I->hasOneUse();
- case RK_Min:
- case RK_UMin:
- case RK_Max:
- case RK_UMax:
+ if (isMinMax())
return I->hasNUses(2) &&
(!IsReductionOp ||
cast<SelectInst>(I)->getCondition()->hasOneUse());
- case RK_None:
- break;
- }
- llvm_unreachable("Reduction kind is not set");
+ return I->hasOneUse();
}
/// Initializes the list of reduction operations.
void initReductionOps(ReductionOpsListType &ReductionOps) {
assert(Kind != RK_None && !!*this && LHS && RHS &&
"Expected reduction operation.");
- switch (Kind) {
- case RK_Arithmetic:
- ReductionOps.assign(1, ReductionOpsType());
- break;
- case RK_Min:
- case RK_UMin:
- case RK_Max:
- case RK_UMax:
+ if (isMinMax())
ReductionOps.assign(2, ReductionOpsType());
- break;
- case RK_None:
- llvm_unreachable("Reduction kind is not set");
- }
+ else
+ ReductionOps.assign(1, ReductionOpsType());
}
+
/// Add all reduction operations for the reduction instruction \p I.
void addReductionOps(Instruction *I, ReductionOpsListType &ReductionOps) {
assert(Kind != RK_None && !!*this && LHS && RHS &&
"Expected reduction operation.");
- switch (Kind) {
- case RK_Arithmetic:
- ReductionOps[0].emplace_back(I);
- break;
- case RK_Min:
- case RK_UMin:
- case RK_Max:
- case RK_UMax:
+ if (isMinMax()) {
ReductionOps[0].emplace_back(cast<SelectInst>(I)->getCondition());
ReductionOps[1].emplace_back(I);
- break;
- case RK_None:
- llvm_unreachable("Reduction kind is not set");
+ } else {
+ ReductionOps[0].emplace_back(I);
}
}
@@ -5980,12 +6291,12 @@ class HorizontalReduction {
/// Checks if two operation data are both a reduction op or both a reduced
/// value.
- bool operator==(const OperationData &OD) {
+ bool operator==(const OperationData &OD) const {
assert(((Kind != OD.Kind) || ((!LHS == !OD.LHS) && (!RHS == !OD.RHS))) &&
"One of the comparing operations is incorrect.");
return this == &OD || (Kind == OD.Kind && Opcode == OD.Opcode);
}
- bool operator!=(const OperationData &OD) { return !(*this == OD); }
+ bool operator!=(const OperationData &OD) const { return !(*this == OD); }
void clear() {
Opcode = 0;
LHS = nullptr;
@@ -6005,18 +6316,7 @@ class HorizontalReduction {
Value *getLHS() const { return LHS; }
Value *getRHS() const { return RHS; }
Type *getConditionType() const {
- switch (Kind) {
- case RK_Arithmetic:
- return nullptr;
- case RK_Min:
- case RK_Max:
- case RK_UMin:
- case RK_UMax:
- return CmpInst::makeCmpResultType(LHS->getType());
- case RK_None:
- break;
- }
- llvm_unreachable("Reduction kind is not set");
+ return isMinMax() ? CmpInst::makeCmpResultType(LHS->getType()) : nullptr;
}
/// Creates reduction operation with the current opcode with the IR flags
@@ -6400,6 +6700,18 @@ public:
assert(Pair.first && "DebugLoc must be set.");
ExternallyUsedValues[Pair.second].push_back(Pair.first);
}
+
+ // The compare instruction of a min/max is the insertion point for new
+ // instructions and may be replaced with a new compare instruction.
+ auto getCmpForMinMaxReduction = [](Instruction *RdxRootInst) {
+ assert(isa<SelectInst>(RdxRootInst) &&
+ "Expected min/max reduction to have select root instruction");
+ Value *ScalarCond = cast<SelectInst>(RdxRootInst)->getCondition();
+ assert(isa<Instruction>(ScalarCond) &&
+ "Expected min/max reduction to have compare condition");
+ return cast<Instruction>(ScalarCond);
+ };
+
// The reduction root is used as the insertion point for new instructions,
// so set it as externally used to prevent it from being deleted.
ExternallyUsedValues[ReductionRoot];
@@ -6455,8 +6767,14 @@ public:
DebugLoc Loc = cast<Instruction>(ReducedVals[i])->getDebugLoc();
Value *VectorizedRoot = V.vectorizeTree(ExternallyUsedValues);
- // Emit a reduction.
- Builder.SetInsertPoint(cast<Instruction>(ReductionRoot));
+ // Emit a reduction. For min/max, the root is a select, but the insertion
+ // point is the compare condition of that select.
+ Instruction *RdxRootInst = cast<Instruction>(ReductionRoot);
+ if (ReductionData.isMinMax())
+ Builder.SetInsertPoint(getCmpForMinMaxReduction(RdxRootInst));
+ else
+ Builder.SetInsertPoint(RdxRootInst);
+
Value *ReducedSubTree =
emitReduction(VectorizedRoot, Builder, ReduxWidth, TTI);
if (VectorizedTree) {
@@ -6492,8 +6810,20 @@ public:
VectorizedTree = VectReductionData.createOp(Builder, "op.extra", I);
}
}
- // Update users.
+
+ // Update users. For a min/max reduction that ends with a compare and
+ // select, we also have to RAUW for the compare instruction feeding the
+ // reduction root. That's because the original compare may have extra uses
+ // besides the final select of the reduction.
+ if (ReductionData.isMinMax()) {
+ if (auto *VecSelect = dyn_cast<SelectInst>(VectorizedTree)) {
+ Instruction *ScalarCmp =
+ getCmpForMinMaxReduction(cast<Instruction>(ReductionRoot));
+ ScalarCmp->replaceAllUsesWith(VecSelect->getCondition());
+ }
+ }
ReductionRoot->replaceAllUsesWith(VectorizedTree);
+
// Mark all scalar reduction ops for deletion, they are replaced by the
// vector reductions.
V.eraseInstructions(IgnoreList);
@@ -6619,45 +6949,54 @@ private:
/// %rb = insertelement <4 x float> %ra, float %s1, i32 1
/// %rc = insertelement <4 x float> %rb, float %s2, i32 2
/// %rd = insertelement <4 x float> %rc, float %s3, i32 3
-/// starting from the last insertelement instruction.
+/// starting from the last insertelement or insertvalue instruction.
///
-/// Returns true if it matches
-static bool findBuildVector(InsertElementInst *LastInsertElem,
- TargetTransformInfo *TTI,
- SmallVectorImpl<Value *> &BuildVectorOpds,
- int &UserCost) {
- UserCost = 0;
- Value *V = nullptr;
- do {
- if (auto *CI = dyn_cast<ConstantInt>(LastInsertElem->getOperand(2))) {
- UserCost += TTI->getVectorInstrCost(Instruction::InsertElement,
- LastInsertElem->getType(),
- CI->getZExtValue());
- }
- BuildVectorOpds.push_back(LastInsertElem->getOperand(1));
- V = LastInsertElem->getOperand(0);
- if (isa<UndefValue>(V))
- break;
- LastInsertElem = dyn_cast<InsertElementInst>(V);
- if (!LastInsertElem || !LastInsertElem->hasOneUse())
- return false;
- } while (true);
- std::reverse(BuildVectorOpds.begin(), BuildVectorOpds.end());
- return true;
-}
-
-/// Like findBuildVector, but looks for construction of aggregate.
+/// Also recognize aggregates like {<2 x float>, <2 x float>},
+/// {{float, float}, {float, float}}, [2 x {float, float}] and so on.
+/// See llvm/test/Transforms/SLPVectorizer/X86/pr42022.ll for examples.
+///
+/// Assume LastInsertInst is of InsertElementInst or InsertValueInst type.
///
/// \return true if it matches.
-static bool findBuildAggregate(InsertValueInst *IV,
- SmallVectorImpl<Value *> &BuildVectorOpds) {
+static bool findBuildAggregate(Value *LastInsertInst, TargetTransformInfo *TTI,
+ SmallVectorImpl<Value *> &BuildVectorOpds,
+ int &UserCost) {
+ assert((isa<InsertElementInst>(LastInsertInst) ||
+ isa<InsertValueInst>(LastInsertInst)) &&
+ "Expected insertelement or insertvalue instruction!");
+ UserCost = 0;
do {
- BuildVectorOpds.push_back(IV->getInsertedValueOperand());
- Value *V = IV->getAggregateOperand();
- if (isa<UndefValue>(V))
+ Value *InsertedOperand;
+ if (auto *IE = dyn_cast<InsertElementInst>(LastInsertInst)) {
+ InsertedOperand = IE->getOperand(1);
+ LastInsertInst = IE->getOperand(0);
+ if (auto *CI = dyn_cast<ConstantInt>(IE->getOperand(2))) {
+ UserCost += TTI->getVectorInstrCost(Instruction::InsertElement,
+ IE->getType(), CI->getZExtValue());
+ }
+ } else {
+ auto *IV = cast<InsertValueInst>(LastInsertInst);
+ InsertedOperand = IV->getInsertedValueOperand();
+ LastInsertInst = IV->getAggregateOperand();
+ }
+ if (isa<InsertElementInst>(InsertedOperand) ||
+ isa<InsertValueInst>(InsertedOperand)) {
+ int TmpUserCost;
+ SmallVector<Value *, 8> TmpBuildVectorOpds;
+ if (!findBuildAggregate(InsertedOperand, TTI, TmpBuildVectorOpds,
+ TmpUserCost))
+ return false;
+ BuildVectorOpds.append(TmpBuildVectorOpds.rbegin(),
+ TmpBuildVectorOpds.rend());
+ UserCost += TmpUserCost;
+ } else {
+ BuildVectorOpds.push_back(InsertedOperand);
+ }
+ if (isa<UndefValue>(LastInsertInst))
break;
- IV = dyn_cast<InsertValueInst>(V);
- if (!IV || !IV->hasOneUse())
+ if ((!isa<InsertValueInst>(LastInsertInst) &&
+ !isa<InsertElementInst>(LastInsertInst)) ||
+ !LastInsertInst->hasOneUse())
return false;
} while (true);
std::reverse(BuildVectorOpds.begin(), BuildVectorOpds.end());
@@ -6825,25 +7164,26 @@ bool SLPVectorizerPass::vectorizeRootInstruction(PHINode *P, Value *V,
bool SLPVectorizerPass::vectorizeInsertValueInst(InsertValueInst *IVI,
BasicBlock *BB, BoUpSLP &R) {
+ int UserCost = 0;
const DataLayout &DL = BB->getModule()->getDataLayout();
if (!R.canMapToVector(IVI->getType(), DL))
return false;
SmallVector<Value *, 16> BuildVectorOpds;
- if (!findBuildAggregate(IVI, BuildVectorOpds))
+ if (!findBuildAggregate(IVI, TTI, BuildVectorOpds, UserCost))
return false;
LLVM_DEBUG(dbgs() << "SLP: array mappable to vector: " << *IVI << "\n");
// Aggregate value is unlikely to be processed in vector register, we need to
// extract scalars into scalar registers, so NeedExtraction is set true.
- return tryToVectorizeList(BuildVectorOpds, R);
+ return tryToVectorizeList(BuildVectorOpds, R, UserCost);
}
bool SLPVectorizerPass::vectorizeInsertElementInst(InsertElementInst *IEI,
BasicBlock *BB, BoUpSLP &R) {
int UserCost;
SmallVector<Value *, 16> BuildVectorOpds;
- if (!findBuildVector(IEI, TTI, BuildVectorOpds, UserCost) ||
+ if (!findBuildAggregate(IEI, TTI, BuildVectorOpds, UserCost) ||
(llvm::all_of(BuildVectorOpds,
[](Value *V) { return isa<ExtractElementInst>(V); }) &&
isShuffle(BuildVectorOpds)))
@@ -7118,14 +7458,7 @@ bool SLPVectorizerPass::vectorizeStoreChains(BoUpSLP &R) {
LLVM_DEBUG(dbgs() << "SLP: Analyzing a store chain of length "
<< it->second.size() << ".\n");
- // Process the stores in chunks of 16.
- // TODO: The limit of 16 inhibits greater vectorization factors.
- // For example, AVX2 supports v32i8. Increasing this limit, however,
- // may cause a significant compile-time increase.
- for (unsigned CI = 0, CE = it->second.size(); CI < CE; CI += 16) {
- unsigned Len = std::min<unsigned>(CE - CI, 16);
- Changed |= vectorizeStores(makeArrayRef(&it->second[CI], Len), R);
- }
+ Changed |= vectorizeStores(it->second, R);
}
return Changed;
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
index 0ca6a6b93cfd..598fb00e956e 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
+++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
@@ -47,6 +47,24 @@ class VPRecipeBuilder {
EdgeMaskCacheTy EdgeMaskCache;
BlockMaskCacheTy BlockMaskCache;
+ // VPlan-VPlan transformations support: Hold a mapping from ingredients to
+ // their recipe. To save on memory, only do so for selected ingredients,
+ // marked by having a nullptr entry in this map. If those ingredients get a
+ // VPWidenRecipe, also avoid compressing other ingredients into it to avoid
+ // having to split such recipes later.
+ DenseMap<Instruction *, VPRecipeBase *> Ingredient2Recipe;
+ VPWidenRecipe *LastExtensibleRecipe = nullptr;
+
+ /// Set the recipe created for given ingredient. This operation is a no-op for
+ /// ingredients that were not marked using a nullptr entry in the map.
+ void setRecipe(Instruction *I, VPRecipeBase *R) {
+ if (!Ingredient2Recipe.count(I))
+ return;
+ assert(Ingredient2Recipe[I] == nullptr &&
+ "Recipe already set for ingredient");
+ Ingredient2Recipe[I] = R;
+ }
+
public:
/// A helper function that computes the predicate of the block BB, assuming
/// that the header block of the loop is set to True. It returns the *entry*
@@ -57,16 +75,22 @@ public:
/// and DST.
VPValue *createEdgeMask(BasicBlock *Src, BasicBlock *Dst, VPlanPtr &Plan);
- /// Check if \I belongs to an Interleave Group within the given VF \p Range,
- /// \return true in the first returned value if so and false otherwise.
- /// Build a new VPInterleaveGroup Recipe if \I is the primary member of an IG
- /// for \p Range.Start, and provide it as the second returned value.
- /// Note that if \I is an adjunct member of an IG for \p Range.Start, the
- /// \return value is <true, nullptr>, as it is handled by another recipe.
- /// \p Range.End may be decreased to ensure same decision from \p Range.Start
- /// to \p Range.End.
- VPInterleaveRecipe *tryToInterleaveMemory(Instruction *I, VFRange &Range,
- VPlanPtr &Plan);
+ /// Mark given ingredient for recording its recipe once one is created for
+ /// it.
+ void recordRecipeOf(Instruction *I) {
+ assert((!Ingredient2Recipe.count(I) || Ingredient2Recipe[I] == nullptr) &&
+ "Recipe already set for ingredient");
+ Ingredient2Recipe[I] = nullptr;
+ }
+
+ /// Return the recipe created for given ingredient.
+ VPRecipeBase *getRecipe(Instruction *I) {
+ assert(Ingredient2Recipe.count(I) &&
+ "Recording this ingredients recipe was not requested");
+ assert(Ingredient2Recipe[I] != nullptr &&
+ "Ingredient doesn't have a recipe");
+ return Ingredient2Recipe[I];
+ }
/// Check if \I is a memory instruction to be widened for \p Range.Start and
/// potentially masked. Such instructions are handled by a recipe that takes
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 4b80d1fb20aa..f1c708720ccf 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -31,6 +31,7 @@
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/GenericDomTreeConstruction.h"
@@ -275,18 +276,35 @@ void VPRegionBlock::execute(VPTransformState *State) {
}
void VPRecipeBase::insertBefore(VPRecipeBase *InsertPos) {
+ assert(!Parent && "Recipe already in some VPBasicBlock");
+ assert(InsertPos->getParent() &&
+ "Insertion position not in any VPBasicBlock");
Parent = InsertPos->getParent();
Parent->getRecipeList().insert(InsertPos->getIterator(), this);
}
+void VPRecipeBase::insertAfter(VPRecipeBase *InsertPos) {
+ assert(!Parent && "Recipe already in some VPBasicBlock");
+ assert(InsertPos->getParent() &&
+ "Insertion position not in any VPBasicBlock");
+ Parent = InsertPos->getParent();
+ Parent->getRecipeList().insertAfter(InsertPos->getIterator(), this);
+}
+
+void VPRecipeBase::removeFromParent() {
+ assert(getParent() && "Recipe not in any VPBasicBlock");
+ getParent()->getRecipeList().remove(getIterator());
+ Parent = nullptr;
+}
+
iplist<VPRecipeBase>::iterator VPRecipeBase::eraseFromParent() {
+ assert(getParent() && "Recipe not in any VPBasicBlock");
return getParent()->getRecipeList().erase(getIterator());
}
void VPRecipeBase::moveAfter(VPRecipeBase *InsertPos) {
- InsertPos->getParent()->getRecipeList().splice(
- std::next(InsertPos->getIterator()), getParent()->getRecipeList(),
- getIterator());
+ removeFromParent();
+ insertAfter(InsertPos);
}
void VPInstruction::generateInstruction(VPTransformState &State,
@@ -447,14 +465,20 @@ void VPlan::execute(VPTransformState *State) {
// We do not attempt to preserve DT for outer loop vectorization currently.
if (!EnableVPlanNativePath)
- updateDominatorTree(State->DT, VectorPreHeaderBB, VectorLatchBB);
+ updateDominatorTree(State->DT, VectorPreHeaderBB, VectorLatchBB,
+ L->getExitBlock());
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD
+void VPlan::dump() const { dbgs() << *this << '\n'; }
+#endif
+
void VPlan::updateDominatorTree(DominatorTree *DT, BasicBlock *LoopPreHeaderBB,
- BasicBlock *LoopLatchBB) {
+ BasicBlock *LoopLatchBB,
+ BasicBlock *LoopExitBB) {
BasicBlock *LoopHeaderBB = LoopPreHeaderBB->getSingleSuccessor();
assert(LoopHeaderBB && "Loop preheader does not have a single successor.");
- DT->addNewBlock(LoopHeaderBB, LoopPreHeaderBB);
// The vector body may be more than a single basic-block by this point.
// Update the dominator tree information inside the vector body by propagating
// it from header to latch, expecting only triangular control-flow, if any.
@@ -485,6 +509,9 @@ void VPlan::updateDominatorTree(DominatorTree *DT, BasicBlock *LoopPreHeaderBB,
DT->addNewBlock(InterimSucc, BB);
DT->addNewBlock(PostDomSucc, BB);
}
+ // Latch block is a new dominator for the loop exit.
+ DT->changeImmediateDominator(LoopExitBB, LoopLatchBB);
+ assert(DT->verify(DominatorTree::VerificationLevel::Fast));
}
const Twine VPlanPrinter::getUID(const VPBlockBase *Block) {
@@ -509,8 +536,7 @@ void VPlanPrinter::dump() {
if (!Plan.Value2VPValue.empty() || Plan.BackedgeTakenCount) {
OS << ", where:";
if (Plan.BackedgeTakenCount)
- OS << "\\n"
- << *Plan.getOrCreateBackedgeTakenCount() << " := BackedgeTakenCount";
+ OS << "\\n" << *Plan.BackedgeTakenCount << " := BackedgeTakenCount";
for (auto Entry : Plan.Value2VPValue) {
OS << "\\n" << *Entry.second;
OS << DOT::EscapeString(" := ");
@@ -522,7 +548,7 @@ void VPlanPrinter::dump() {
OS << "edge [fontname=Courier, fontsize=30]\n";
OS << "compound=true\n";
- for (VPBlockBase *Block : depth_first(Plan.getEntry()))
+ for (const VPBlockBase *Block : depth_first(Plan.getEntry()))
dumpBlock(Block);
OS << "}\n";
@@ -661,6 +687,16 @@ void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O,
O << " " << VPlanIngredient(IV) << "\\l\"";
}
+void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent) const {
+ O << " +\n" << Indent << "\"WIDEN-GEP ";
+ O << (IsPtrLoopInvariant ? "Inv" : "Var");
+ size_t IndicesNumber = IsIndexLoopInvariant.size();
+ for (size_t I = 0; I < IndicesNumber; ++I)
+ O << "[" << (IsIndexLoopInvariant[I] ? "Inv" : "Var") << "]";
+ O << "\\l\"";
+ O << " +\n" << Indent << "\" " << VPlanIngredient(GEP) << "\\l\"";
+}
+
void VPWidenPHIRecipe::print(raw_ostream &O, const Twine &Indent) const {
O << " +\n" << Indent << "\"WIDEN-PHI " << VPlanIngredient(Phi) << "\\l\"";
}
@@ -703,9 +739,12 @@ void VPPredInstPHIRecipe::print(raw_ostream &O, const Twine &Indent) const {
void VPWidenMemoryInstructionRecipe::print(raw_ostream &O,
const Twine &Indent) const {
O << " +\n" << Indent << "\"WIDEN " << VPlanIngredient(&Instr);
- if (User) {
+ O << ", ";
+ getAddr()->printAsOperand(O);
+ VPValue *Mask = getMask();
+ if (Mask) {
O << ", ";
- User->getOperand(0)->printAsOperand(O);
+ Mask->printAsOperand(O);
}
O << "\\l\"";
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.h b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.h
index 44d8a198f27e..c65abc3639d7 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -31,6 +31,7 @@
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/GraphTraits.h"
#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
@@ -226,6 +227,8 @@ public:
struct VPCallback {
virtual ~VPCallback() {}
virtual Value *getOrCreateVectorValues(Value *V, unsigned Part) = 0;
+ virtual Value *getOrCreateScalarValue(Value *V,
+ const VPIteration &Instance) = 0;
};
/// VPTransformState holds information passed down when "executing" a VPlan,
@@ -268,6 +271,13 @@ struct VPTransformState {
return Callback.getOrCreateVectorValues(VPValue2Value[Def], Part);
}
+ /// Get the generated Value for a given VPValue and given Part and Lane. Note
+ /// that as per-lane Defs are still created by ILV and managed in its ValueMap
+ /// this method currently just delegates the call to ILV.
+ Value *get(VPValue *Def, const VPIteration &Instance) {
+ return Callback.getOrCreateScalarValue(VPValue2Value[Def], Instance);
+ }
+
/// Set the generated Value for a given VPValue and a given Part.
void set(VPValue *Def, Value *V, unsigned Part) {
if (!Data.PerPartOutput.count(Def)) {
@@ -567,6 +577,7 @@ public:
/// instructions.
class VPRecipeBase : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock> {
friend VPBasicBlock;
+ friend class VPBlockUtils;
private:
const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast).
@@ -586,6 +597,7 @@ public:
VPInterleaveSC,
VPPredInstPHISC,
VPReplicateSC,
+ VPWidenGEPSC,
VPWidenIntOrFpInductionSC,
VPWidenMemoryInstructionSC,
VPWidenPHISC,
@@ -615,10 +627,18 @@ public:
/// the specified recipe.
void insertBefore(VPRecipeBase *InsertPos);
+ /// Insert an unlinked Recipe into a basic block immediately after
+ /// the specified Recipe.
+ void insertAfter(VPRecipeBase *InsertPos);
+
/// Unlink this recipe from its current VPBasicBlock and insert it into
/// the VPBasicBlock that MovePos lives in, right after MovePos.
void moveAfter(VPRecipeBase *MovePos);
+ /// This method unlinks 'this' from the containing basic block, but does not
+ /// delete it.
+ void removeFromParent();
+
/// This method unlinks 'this' from the containing basic block and deletes it.
///
/// \returns an iterator pointing to the element after the erased one
@@ -630,7 +650,6 @@ public:
/// executed, these instructions would always form a single-def expression as
/// the VPInstruction is also a single def-use vertex.
class VPInstruction : public VPUser, public VPRecipeBase {
- friend class VPlanHCFGTransforms;
friend class VPlanSlp;
public:
@@ -740,6 +759,36 @@ public:
void print(raw_ostream &O, const Twine &Indent) const override;
};
+/// A recipe for handling GEP instructions.
+class VPWidenGEPRecipe : public VPRecipeBase {
+private:
+ GetElementPtrInst *GEP;
+ bool IsPtrLoopInvariant;
+ SmallBitVector IsIndexLoopInvariant;
+
+public:
+ VPWidenGEPRecipe(GetElementPtrInst *GEP, Loop *OrigLoop)
+ : VPRecipeBase(VPWidenGEPSC), GEP(GEP),
+ IsIndexLoopInvariant(GEP->getNumIndices(), false) {
+ IsPtrLoopInvariant = OrigLoop->isLoopInvariant(GEP->getPointerOperand());
+ for (auto Index : enumerate(GEP->indices()))
+ IsIndexLoopInvariant[Index.index()] =
+ OrigLoop->isLoopInvariant(Index.value().get());
+ }
+ ~VPWidenGEPRecipe() override = default;
+
+ /// Method to support type inquiry through isa, cast, and dyn_cast.
+ static inline bool classof(const VPRecipeBase *V) {
+ return V->getVPRecipeID() == VPRecipeBase::VPWidenGEPSC;
+ }
+
+ /// Generate the gep nodes.
+ void execute(VPTransformState &State) override;
+
+ /// Print the recipe.
+ void print(raw_ostream &O, const Twine &Indent) const override;
+};
+
/// A recipe for handling phi nodes of integer and floating-point inductions,
/// producing their vector and scalar values.
class VPWidenIntOrFpInductionRecipe : public VPRecipeBase {
@@ -822,13 +871,14 @@ public:
class VPInterleaveRecipe : public VPRecipeBase {
private:
const InterleaveGroup<Instruction> *IG;
- std::unique_ptr<VPUser> User;
+ VPUser User;
public:
- VPInterleaveRecipe(const InterleaveGroup<Instruction> *IG, VPValue *Mask)
- : VPRecipeBase(VPInterleaveSC), IG(IG) {
- if (Mask) // Create a VPInstruction to register as a user of the mask.
- User.reset(new VPUser({Mask}));
+ VPInterleaveRecipe(const InterleaveGroup<Instruction> *IG, VPValue *Addr,
+ VPValue *Mask)
+ : VPRecipeBase(VPInterleaveSC), IG(IG), User({Addr}) {
+ if (Mask)
+ User.addOperand(Mask);
}
~VPInterleaveRecipe() override = default;
@@ -837,6 +887,18 @@ public:
return V->getVPRecipeID() == VPRecipeBase::VPInterleaveSC;
}
+ /// Return the address accessed by this recipe.
+ VPValue *getAddr() const {
+ return User.getOperand(0); // Address is the 1st, mandatory operand.
+ }
+
+ /// Return the mask used by this recipe. Note that a full mask is represented
+ /// by a nullptr.
+ VPValue *getMask() const {
+ // Mask is optional and therefore the last, currently 2nd operand.
+ return User.getNumOperands() == 2 ? User.getOperand(1) : nullptr;
+ }
+
/// Generate the wide load or store, and shuffles.
void execute(VPTransformState &State) override;
@@ -959,13 +1021,14 @@ public:
class VPWidenMemoryInstructionRecipe : public VPRecipeBase {
private:
Instruction &Instr;
- std::unique_ptr<VPUser> User;
+ VPUser User;
public:
- VPWidenMemoryInstructionRecipe(Instruction &Instr, VPValue *Mask)
- : VPRecipeBase(VPWidenMemoryInstructionSC), Instr(Instr) {
- if (Mask) // Create a VPInstruction to register as a user of the mask.
- User.reset(new VPUser({Mask}));
+ VPWidenMemoryInstructionRecipe(Instruction &Instr, VPValue *Addr,
+ VPValue *Mask)
+ : VPRecipeBase(VPWidenMemoryInstructionSC), Instr(Instr), User({Addr}) {
+ if (Mask)
+ User.addOperand(Mask);
}
/// Method to support type inquiry through isa, cast, and dyn_cast.
@@ -973,6 +1036,18 @@ public:
return V->getVPRecipeID() == VPRecipeBase::VPWidenMemoryInstructionSC;
}
+ /// Return the address accessed by this recipe.
+ VPValue *getAddr() const {
+ return User.getOperand(0); // Address is the 1st, mandatory operand.
+ }
+
+ /// Return the mask used by this recipe. Note that a full mask is represented
+ /// by a nullptr.
+ VPValue *getMask() const {
+ // Mask is optional and therefore the last, currently 2nd operand.
+ return User.getNumOperands() == 2 ? User.getOperand(1) : nullptr;
+ }
+
/// Generate the wide load/store.
void execute(VPTransformState &State) override;
@@ -1143,6 +1218,128 @@ public:
void execute(struct VPTransformState *State) override;
};
+//===----------------------------------------------------------------------===//
+// GraphTraits specializations for VPlan Hierarchical Control-Flow Graphs //
+//===----------------------------------------------------------------------===//
+
+// The following set of template specializations implement GraphTraits to treat
+// any VPBlockBase as a node in a graph of VPBlockBases. It's important to note
+// that VPBlockBase traits don't recurse into VPRegioBlocks, i.e., if the
+// VPBlockBase is a VPRegionBlock, this specialization provides access to its
+// successors/predecessors but not to the blocks inside the region.
+
+template <> struct GraphTraits<VPBlockBase *> {
+ using NodeRef = VPBlockBase *;
+ using ChildIteratorType = SmallVectorImpl<VPBlockBase *>::iterator;
+
+ static NodeRef getEntryNode(NodeRef N) { return N; }
+
+ static inline ChildIteratorType child_begin(NodeRef N) {
+ return N->getSuccessors().begin();
+ }
+
+ static inline ChildIteratorType child_end(NodeRef N) {
+ return N->getSuccessors().end();
+ }
+};
+
+template <> struct GraphTraits<const VPBlockBase *> {
+ using NodeRef = const VPBlockBase *;
+ using ChildIteratorType = SmallVectorImpl<VPBlockBase *>::const_iterator;
+
+ static NodeRef getEntryNode(NodeRef N) { return N; }
+
+ static inline ChildIteratorType child_begin(NodeRef N) {
+ return N->getSuccessors().begin();
+ }
+
+ static inline ChildIteratorType child_end(NodeRef N) {
+ return N->getSuccessors().end();
+ }
+};
+
+// Inverse order specialization for VPBasicBlocks. Predecessors are used instead
+// of successors for the inverse traversal.
+template <> struct GraphTraits<Inverse<VPBlockBase *>> {
+ using NodeRef = VPBlockBase *;
+ using ChildIteratorType = SmallVectorImpl<VPBlockBase *>::iterator;
+
+ static NodeRef getEntryNode(Inverse<NodeRef> B) { return B.Graph; }
+
+ static inline ChildIteratorType child_begin(NodeRef N) {
+ return N->getPredecessors().begin();
+ }
+
+ static inline ChildIteratorType child_end(NodeRef N) {
+ return N->getPredecessors().end();
+ }
+};
+
+// The following set of template specializations implement GraphTraits to
+// treat VPRegionBlock as a graph and recurse inside its nodes. It's important
+// to note that the blocks inside the VPRegionBlock are treated as VPBlockBases
+// (i.e., no dyn_cast is performed, VPBlockBases specialization is used), so
+// there won't be automatic recursion into other VPBlockBases that turn to be
+// VPRegionBlocks.
+
+template <>
+struct GraphTraits<VPRegionBlock *> : public GraphTraits<VPBlockBase *> {
+ using GraphRef = VPRegionBlock *;
+ using nodes_iterator = df_iterator<NodeRef>;
+
+ static NodeRef getEntryNode(GraphRef N) { return N->getEntry(); }
+
+ static nodes_iterator nodes_begin(GraphRef N) {
+ return nodes_iterator::begin(N->getEntry());
+ }
+
+ static nodes_iterator nodes_end(GraphRef N) {
+ // df_iterator::end() returns an empty iterator so the node used doesn't
+ // matter.
+ return nodes_iterator::end(N);
+ }
+};
+
+template <>
+struct GraphTraits<const VPRegionBlock *>
+ : public GraphTraits<const VPBlockBase *> {
+ using GraphRef = const VPRegionBlock *;
+ using nodes_iterator = df_iterator<NodeRef>;
+
+ static NodeRef getEntryNode(GraphRef N) { return N->getEntry(); }
+
+ static nodes_iterator nodes_begin(GraphRef N) {
+ return nodes_iterator::begin(N->getEntry());
+ }
+
+ static nodes_iterator nodes_end(GraphRef N) {
+ // df_iterator::end() returns an empty iterator so the node used doesn't
+ // matter.
+ return nodes_iterator::end(N);
+ }
+};
+
+template <>
+struct GraphTraits<Inverse<VPRegionBlock *>>
+ : public GraphTraits<Inverse<VPBlockBase *>> {
+ using GraphRef = VPRegionBlock *;
+ using nodes_iterator = df_iterator<NodeRef>;
+
+ static NodeRef getEntryNode(Inverse<GraphRef> N) {
+ return N.Graph->getExit();
+ }
+
+ static nodes_iterator nodes_begin(GraphRef N) {
+ return nodes_iterator::begin(N->getExit());
+ }
+
+ static nodes_iterator nodes_end(GraphRef N) {
+ // df_iterator::end() returns an empty iterator so the node used doesn't
+ // matter.
+ return nodes_iterator::end(N);
+ }
+};
+
/// VPlan models a candidate for vectorization, encoding various decisions take
/// to produce efficient output IR, including which branches, basic-blocks and
/// output IR instructions to generate, and their cost. VPlan holds a
@@ -1245,35 +1442,45 @@ public:
return Value2VPValue[V];
}
+ VPValue *getOrAddVPValue(Value *V) {
+ assert(V && "Trying to get or add the VPValue of a null Value");
+ if (!Value2VPValue.count(V))
+ addVPValue(V);
+ return getVPValue(V);
+ }
+
/// Return the VPLoopInfo analysis for this VPlan.
VPLoopInfo &getVPLoopInfo() { return VPLInfo; }
const VPLoopInfo &getVPLoopInfo() const { return VPLInfo; }
+ /// Dump the plan to stderr (for debugging).
+ void dump() const;
+
private:
/// Add to the given dominator tree the header block and every new basic block
/// that was created between it and the latch block, inclusive.
- static void updateDominatorTree(DominatorTree *DT,
+ static void updateDominatorTree(DominatorTree *DT, BasicBlock *LoopLatchBB,
BasicBlock *LoopPreHeaderBB,
- BasicBlock *LoopLatchBB);
+ BasicBlock *LoopExitBB);
};
/// VPlanPrinter prints a given VPlan to a given output stream. The printing is
/// indented and follows the dot format.
class VPlanPrinter {
- friend inline raw_ostream &operator<<(raw_ostream &OS, VPlan &Plan);
+ friend inline raw_ostream &operator<<(raw_ostream &OS, const VPlan &Plan);
friend inline raw_ostream &operator<<(raw_ostream &OS,
const struct VPlanIngredient &I);
private:
raw_ostream &OS;
- VPlan &Plan;
- unsigned Depth;
+ const VPlan &Plan;
+ unsigned Depth = 0;
unsigned TabWidth = 2;
std::string Indent;
unsigned BID = 0;
SmallDenseMap<const VPBlockBase *, unsigned> BlockID;
- VPlanPrinter(raw_ostream &O, VPlan &P) : OS(O), Plan(P) {}
+ VPlanPrinter(raw_ostream &O, const VPlan &P) : OS(O), Plan(P) {}
/// Handle indentation.
void bumpIndent(int b) { Indent = std::string((Depth += b) * TabWidth, ' '); }
@@ -1320,135 +1527,13 @@ inline raw_ostream &operator<<(raw_ostream &OS, const VPlanIngredient &I) {
return OS;
}
-inline raw_ostream &operator<<(raw_ostream &OS, VPlan &Plan) {
+inline raw_ostream &operator<<(raw_ostream &OS, const VPlan &Plan) {
VPlanPrinter Printer(OS, Plan);
Printer.dump();
return OS;
}
//===----------------------------------------------------------------------===//
-// GraphTraits specializations for VPlan Hierarchical Control-Flow Graphs //
-//===----------------------------------------------------------------------===//
-
-// The following set of template specializations implement GraphTraits to treat
-// any VPBlockBase as a node in a graph of VPBlockBases. It's important to note
-// that VPBlockBase traits don't recurse into VPRegioBlocks, i.e., if the
-// VPBlockBase is a VPRegionBlock, this specialization provides access to its
-// successors/predecessors but not to the blocks inside the region.
-
-template <> struct GraphTraits<VPBlockBase *> {
- using NodeRef = VPBlockBase *;
- using ChildIteratorType = SmallVectorImpl<VPBlockBase *>::iterator;
-
- static NodeRef getEntryNode(NodeRef N) { return N; }
-
- static inline ChildIteratorType child_begin(NodeRef N) {
- return N->getSuccessors().begin();
- }
-
- static inline ChildIteratorType child_end(NodeRef N) {
- return N->getSuccessors().end();
- }
-};
-
-template <> struct GraphTraits<const VPBlockBase *> {
- using NodeRef = const VPBlockBase *;
- using ChildIteratorType = SmallVectorImpl<VPBlockBase *>::const_iterator;
-
- static NodeRef getEntryNode(NodeRef N) { return N; }
-
- static inline ChildIteratorType child_begin(NodeRef N) {
- return N->getSuccessors().begin();
- }
-
- static inline ChildIteratorType child_end(NodeRef N) {
- return N->getSuccessors().end();
- }
-};
-
-// Inverse order specialization for VPBasicBlocks. Predecessors are used instead
-// of successors for the inverse traversal.
-template <> struct GraphTraits<Inverse<VPBlockBase *>> {
- using NodeRef = VPBlockBase *;
- using ChildIteratorType = SmallVectorImpl<VPBlockBase *>::iterator;
-
- static NodeRef getEntryNode(Inverse<NodeRef> B) { return B.Graph; }
-
- static inline ChildIteratorType child_begin(NodeRef N) {
- return N->getPredecessors().begin();
- }
-
- static inline ChildIteratorType child_end(NodeRef N) {
- return N->getPredecessors().end();
- }
-};
-
-// The following set of template specializations implement GraphTraits to
-// treat VPRegionBlock as a graph and recurse inside its nodes. It's important
-// to note that the blocks inside the VPRegionBlock are treated as VPBlockBases
-// (i.e., no dyn_cast is performed, VPBlockBases specialization is used), so
-// there won't be automatic recursion into other VPBlockBases that turn to be
-// VPRegionBlocks.
-
-template <>
-struct GraphTraits<VPRegionBlock *> : public GraphTraits<VPBlockBase *> {
- using GraphRef = VPRegionBlock *;
- using nodes_iterator = df_iterator<NodeRef>;
-
- static NodeRef getEntryNode(GraphRef N) { return N->getEntry(); }
-
- static nodes_iterator nodes_begin(GraphRef N) {
- return nodes_iterator::begin(N->getEntry());
- }
-
- static nodes_iterator nodes_end(GraphRef N) {
- // df_iterator::end() returns an empty iterator so the node used doesn't
- // matter.
- return nodes_iterator::end(N);
- }
-};
-
-template <>
-struct GraphTraits<const VPRegionBlock *>
- : public GraphTraits<const VPBlockBase *> {
- using GraphRef = const VPRegionBlock *;
- using nodes_iterator = df_iterator<NodeRef>;
-
- static NodeRef getEntryNode(GraphRef N) { return N->getEntry(); }
-
- static nodes_iterator nodes_begin(GraphRef N) {
- return nodes_iterator::begin(N->getEntry());
- }
-
- static nodes_iterator nodes_end(GraphRef N) {
- // df_iterator::end() returns an empty iterator so the node used doesn't
- // matter.
- return nodes_iterator::end(N);
- }
-};
-
-template <>
-struct GraphTraits<Inverse<VPRegionBlock *>>
- : public GraphTraits<Inverse<VPBlockBase *>> {
- using GraphRef = VPRegionBlock *;
- using nodes_iterator = df_iterator<NodeRef>;
-
- static NodeRef getEntryNode(Inverse<GraphRef> N) {
- return N.Graph->getExit();
- }
-
- static nodes_iterator nodes_begin(GraphRef N) {
- return nodes_iterator::begin(N->getExit());
- }
-
- static nodes_iterator nodes_end(GraphRef N) {
- // df_iterator::end() returns an empty iterator so the node used doesn't
- // matter.
- return nodes_iterator::end(N);
- }
-};
-
-//===----------------------------------------------------------------------===//
// VPlan Utilities
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanHCFGTransforms.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index b22d3190d654..3f6a2efd55cc 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanHCFGTransforms.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1,4 +1,4 @@
-//===-- VPlanHCFGTransforms.cpp - Utility VPlan to VPlan transforms -------===//
+//===-- VPlanTransforms.cpp - Utility VPlan to VPlan transforms -----------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -11,13 +11,13 @@
///
//===----------------------------------------------------------------------===//
-#include "VPlanHCFGTransforms.h"
+#include "VPlanTransforms.h"
#include "llvm/ADT/PostOrderIterator.h"
using namespace llvm;
-void VPlanHCFGTransforms::VPInstructionsToVPRecipes(
- VPlanPtr &Plan,
+void VPlanTransforms::VPInstructionsToVPRecipes(
+ Loop *OrigLoop, VPlanPtr &Plan,
LoopVectorizationLegality::InductionList *Inductions,
SmallPtrSetImpl<Instruction *> &DeadInstructions) {
@@ -56,7 +56,9 @@ void VPlanHCFGTransforms::VPInstructionsToVPRecipes(
VPRecipeBase *NewRecipe = nullptr;
// Create VPWidenMemoryInstructionRecipe for loads and stores.
if (isa<LoadInst>(Inst) || isa<StoreInst>(Inst))
- NewRecipe = new VPWidenMemoryInstructionRecipe(*Inst, nullptr /*Mask*/);
+ NewRecipe = new VPWidenMemoryInstructionRecipe(
+ *Inst, Plan->getOrAddVPValue(getLoadStorePointerOperand(Inst)),
+ nullptr /*Mask*/);
else if (PHINode *Phi = dyn_cast<PHINode>(Inst)) {
InductionDescriptor II = Inductions->lookup(Phi);
if (II.getKind() == InductionDescriptor::IK_IntInduction ||
@@ -64,6 +66,8 @@ void VPlanHCFGTransforms::VPInstructionsToVPRecipes(
NewRecipe = new VPWidenIntOrFpInductionRecipe(Phi);
} else
NewRecipe = new VPWidenPHIRecipe(Phi);
+ } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
+ NewRecipe = new VPWidenGEPRecipe(GEP, OrigLoop);
} else {
// If the last recipe is a VPWidenRecipe, add Inst to it instead of
// creating a new recipe.
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanHCFGTransforms.h b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index 79a23c33184f..0d3bd7da09a7 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanHCFGTransforms.h
+++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -1,4 +1,4 @@
-//===- VPlanHCFGTransforms.h - Utility VPlan to VPlan transforms ----------===//
+//===- VPlanTransforms.h - Utility VPlan to VPlan transforms --------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -10,8 +10,8 @@
/// This file provides utility VPlan to VPlan transformations.
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLANHCFGTRANSFORMS_H
-#define LLVM_TRANSFORMS_VECTORIZE_VPLANHCFGTRANSFORMS_H
+#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLANTRANSFORMS_H
+#define LLVM_TRANSFORMS_VECTORIZE_VPLANTRANSFORMS_H
#include "VPlan.h"
#include "llvm/IR/Instruction.h"
@@ -19,17 +19,17 @@
namespace llvm {
-class VPlanHCFGTransforms {
+class VPlanTransforms {
public:
/// Replaces the VPInstructions in \p Plan with corresponding
/// widen recipes.
static void VPInstructionsToVPRecipes(
- VPlanPtr &Plan,
+ Loop *OrigLoop, VPlanPtr &Plan,
LoopVectorizationLegality::InductionList *Inductions,
SmallPtrSetImpl<Instruction *> &DeadInstructions);
};
} // namespace llvm
-#endif // LLVM_TRANSFORMS_VECTORIZE_VPLANHCFGTRANSFORMS_H
+#endif // LLVM_TRANSFORMS_VECTORIZE_VPLANTRANSFORMS_H
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanValue.h b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanValue.h
index 7b6c228c229e..464498c29d89 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanValue.h
+++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanValue.h
@@ -37,7 +37,7 @@ class VPUser;
// and live-outs which the VPlan will need to fix accordingly.
class VPValue {
friend class VPBuilder;
- friend class VPlanHCFGTransforms;
+ friend class VPlanTransforms;
friend class VPBasicBlock;
friend class VPInterleavedAccessInfo;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
index 394b1b93113b..ab3e7e2282e7 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
@@ -14,6 +14,7 @@
#include "VPlanVerifier.h"
#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/Support/CommandLine.h"
#define DEBUG_TYPE "loop-vectorize"
diff --git a/contrib/llvm-project/llvm/lib/XRay/InstrumentationMap.cpp b/contrib/llvm-project/llvm/lib/XRay/InstrumentationMap.cpp
index 7453613c7038..1e9b69a5f9dc 100644
--- a/contrib/llvm-project/llvm/lib/XRay/InstrumentationMap.cpp
+++ b/contrib/llvm-project/llvm/lib/XRay/InstrumentationMap.cpp
@@ -20,6 +20,7 @@
#include "llvm/Object/Binary.h"
#include "llvm/Object/ELFObjectFile.h"
#include "llvm/Object/ObjectFile.h"
+#include "llvm/Object/RelocationResolver.h"
#include "llvm/Support/DataExtractor.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/FileSystem.h"
@@ -59,7 +60,8 @@ loadObj(StringRef Filename, object::OwningBinary<object::ObjectFile> &ObjFile,
// Find the section named "xray_instr_map".
if ((!ObjFile.getBinary()->isELF() && !ObjFile.getBinary()->isMachO()) ||
!(ObjFile.getBinary()->getArch() == Triple::x86_64 ||
- ObjFile.getBinary()->getArch() == Triple::ppc64le))
+ ObjFile.getBinary()->getArch() == Triple::ppc64le ||
+ ObjFile.getBinary()->getArch() == Triple::aarch64))
return make_error<StringError>(
"File format not supported (only does ELF and Mach-O little endian 64-bit).",
std::make_error_code(std::errc::not_supported));
@@ -99,12 +101,22 @@ loadObj(StringRef Filename, object::OwningBinary<object::ObjectFile> &ObjFile,
return static_cast<uint32_t>(0);
}(ObjFile.getBinary());
+ bool (*SupportsRelocation)(uint64_t);
+ object::RelocationResolver Resolver;
+ std::tie(SupportsRelocation, Resolver) =
+ object::getRelocationResolver(*ObjFile.getBinary());
+
for (const object::SectionRef &Section : Sections) {
for (const object::RelocationRef &Reloc : Section.relocations()) {
- if (Reloc.getType() != RelativeRelocation)
- continue;
- if (auto AddendOrErr = object::ELFRelocationRef(Reloc).getAddend())
- Relocs.insert({Reloc.getOffset(), *AddendOrErr});
+ if (SupportsRelocation && SupportsRelocation(Reloc.getType())) {
+ auto AddendOrErr = object::ELFRelocationRef(Reloc).getAddend();
+ auto A = AddendOrErr ? *AddendOrErr : 0;
+ uint64_t resolved = Resolver(Reloc, Reloc.getSymbol()->getValue(), A);
+ Relocs.insert({Reloc.getOffset(), resolved});
+ } else if (Reloc.getType() == RelativeRelocation) {
+ if (auto AddendOrErr = object::ELFRelocationRef(Reloc).getAddend())
+ Relocs.insert({Reloc.getOffset(), *AddendOrErr});
+ }
}
}
}
diff --git a/contrib/llvm-project/llvm/tools/bugpoint/CrashDebugger.cpp b/contrib/llvm-project/llvm/tools/bugpoint/CrashDebugger.cpp
index aab9debf9b59..aa88a06a6df0 100644
--- a/contrib/llvm-project/llvm/tools/bugpoint/CrashDebugger.cpp
+++ b/contrib/llvm-project/llvm/tools/bugpoint/CrashDebugger.cpp
@@ -16,11 +16,11 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Module.h"
@@ -32,6 +32,7 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Local.h"
#include <set>
using namespace llvm;
@@ -43,6 +44,10 @@ cl::opt<bool> NoGlobalRM("disable-global-remove",
cl::desc("Do not remove global variables"),
cl::init(false));
+cl::opt<bool> NoAttributeRM("disable-attribute-remove",
+ cl::desc("Do not remove function attributes"),
+ cl::init(false));
+
cl::opt<bool> ReplaceFuncsWithNull(
"replace-funcs-with-null",
cl::desc("When stubbing functions, replace all uses will null"),
@@ -359,6 +364,11 @@ bool ReduceCrashingFunctionAttributes::TestFuncAttrs(
// Set this new list of attributes on the function.
F->setAttributes(NewAttrs);
+ // If the attribute list includes "optnone" we need to make sure it also
+ // includes "noinline" otherwise we will get a verifier failure.
+ if (F->hasFnAttribute(Attribute::OptimizeNone))
+ F->addFnAttr(Attribute::NoInline);
+
// Try running on the hacked up program...
if (TestFn(BD, M.get())) {
BD.setNewProgram(std::move(M)); // It crashed, keep the trimmed version...
@@ -806,6 +816,78 @@ bool ReduceCrashingInstructions::TestInsts(
}
namespace {
+/// ReduceCrashingMetadata reducer - This works by removing all metadata from
+/// the specified instructions.
+///
+class ReduceCrashingMetadata : public ListReducer<Instruction *> {
+ BugDriver &BD;
+ BugTester TestFn;
+
+public:
+ ReduceCrashingMetadata(BugDriver &bd, BugTester testFn)
+ : BD(bd), TestFn(testFn) {}
+
+ Expected<TestResult> doTest(std::vector<Instruction *> &Prefix,
+ std::vector<Instruction *> &Kept) override {
+ if (!Kept.empty() && TestInsts(Kept))
+ return KeepSuffix;
+ if (!Prefix.empty() && TestInsts(Prefix))
+ return KeepPrefix;
+ return NoFailure;
+ }
+
+ bool TestInsts(std::vector<Instruction *> &Prefix);
+};
+} // namespace
+
+bool ReduceCrashingMetadata::TestInsts(std::vector<Instruction *> &Insts) {
+ // Clone the program to try hacking it apart...
+ ValueToValueMapTy VMap;
+ std::unique_ptr<Module> M = CloneModule(BD.getProgram(), VMap);
+
+ // Convert list to set for fast lookup...
+ SmallPtrSet<Instruction *, 32> Instructions;
+ for (Instruction *I : Insts)
+ Instructions.insert(cast<Instruction>(VMap[I]));
+
+ outs() << "Checking for crash with metadata retained from "
+ << Instructions.size();
+ if (Instructions.size() == 1)
+ outs() << " instruction: ";
+ else
+ outs() << " instructions: ";
+
+ // Try to drop instruction metadata from all instructions, except the ones
+ // selected in Instructions.
+ for (Function &F : *M)
+ for (Instruction &Inst : instructions(F)) {
+ if (Instructions.find(&Inst) == Instructions.end()) {
+ Inst.dropUnknownNonDebugMetadata();
+ Inst.setDebugLoc({});
+ }
+ }
+
+ // Verify that this is still valid.
+ legacy::PassManager Passes;
+ Passes.add(createVerifierPass(/*FatalErrors=*/false));
+ Passes.run(*M);
+
+ // Try running on the hacked up program...
+ if (TestFn(BD, M.get())) {
+ BD.setNewProgram(std::move(M)); // It crashed, keep the trimmed version...
+
+ // Make sure to use instruction pointers that point into the now-current
+ // module, and that they don't include any deleted blocks.
+ Insts.clear();
+ for (Instruction *I : Instructions)
+ Insts.push_back(I);
+ return true;
+ }
+ // It didn't crash, try something else.
+ return false;
+}
+
+namespace {
// Reduce the list of Named Metadata nodes. We keep this as a list of
// names to avoid having to convert back and forth every time.
class ReduceCrashingNamedMD : public ListReducer<std::string> {
@@ -1081,6 +1163,21 @@ static Error ReduceInsts(BugDriver &BD, BugTester TestFn) {
}
} while (Simplification);
+
+ // Attempt to drop metadata from instructions that does not contribute to the
+ // crash.
+ if (!BugpointIsInterrupted) {
+ std::vector<Instruction *> Insts;
+ for (Function &F : BD.getProgram())
+ for (Instruction &I : instructions(F))
+ Insts.push_back(&I);
+
+ Expected<bool> Result =
+ ReduceCrashingMetadata(BD, TestFn).reduceList(Insts);
+ if (Error E = Result.takeError())
+ return E;
+ }
+
BD.EmitProgressBitcode(BD.getProgram(), "reduced-instructions");
return Error::success();
}
@@ -1115,36 +1212,38 @@ static Error DebugACrash(BugDriver &BD, BugTester TestFn) {
BD.EmitProgressBitcode(BD.getProgram(), "reduced-function");
}
- // For each remaining function, try to reduce that function's attributes.
- std::vector<std::string> FunctionNames;
- for (Function &F : BD.getProgram())
- FunctionNames.push_back(F.getName());
+ if (!NoAttributeRM) {
+ // For each remaining function, try to reduce that function's attributes.
+ std::vector<std::string> FunctionNames;
+ for (Function &F : BD.getProgram())
+ FunctionNames.push_back(F.getName());
- if (!FunctionNames.empty() && !BugpointIsInterrupted) {
- outs() << "\n*** Attempting to reduce the number of function attributes in "
- "the testcase\n";
+ if (!FunctionNames.empty() && !BugpointIsInterrupted) {
+ outs() << "\n*** Attempting to reduce the number of function attributes"
+ " in the testcase\n";
- unsigned OldSize = 0;
- unsigned NewSize = 0;
- for (std::string &Name : FunctionNames) {
- Function *Fn = BD.getProgram().getFunction(Name);
- assert(Fn && "Could not find funcion?");
+ unsigned OldSize = 0;
+ unsigned NewSize = 0;
+ for (std::string &Name : FunctionNames) {
+ Function *Fn = BD.getProgram().getFunction(Name);
+ assert(Fn && "Could not find funcion?");
- std::vector<Attribute> Attrs;
- for (Attribute A : Fn->getAttributes().getFnAttributes())
- Attrs.push_back(A);
+ std::vector<Attribute> Attrs;
+ for (Attribute A : Fn->getAttributes().getFnAttributes())
+ Attrs.push_back(A);
- OldSize += Attrs.size();
- Expected<bool> Result =
+ OldSize += Attrs.size();
+ Expected<bool> Result =
ReduceCrashingFunctionAttributes(BD, Name, TestFn).reduceList(Attrs);
- if (Error E = Result.takeError())
- return E;
+ if (Error E = Result.takeError())
+ return E;
- NewSize += Attrs.size();
- }
+ NewSize += Attrs.size();
+ }
- if (OldSize < NewSize)
- BD.EmitProgressBitcode(BD.getProgram(), "reduced-function-attributes");
+ if (OldSize < NewSize)
+ BD.EmitProgressBitcode(BD.getProgram(), "reduced-function-attributes");
+ }
}
// Attempt to change conditional branches into unconditional branches to
@@ -1289,7 +1388,21 @@ Error BugDriver::debugOptimizerCrash(const std::string &ID) {
EmitProgressBitcode(*Program, ID);
- return DebugACrash(*this, TestForOptimizerCrash);
+ auto Res = DebugACrash(*this, TestForOptimizerCrash);
+ if (Res || DontReducePassList)
+ return Res;
+ // Try to reduce the pass list again. This covers additional cases
+ // we failed to reduce earlier, because of more complex pass dependencies
+ // triggering the crash.
+ auto SecondRes = ReducePassList(*this).reduceList(PassesToRun);
+ if (Error E = SecondRes.takeError())
+ return E;
+ outs() << "\n*** Found crashing pass"
+ << (PassesToRun.size() == 1 ? ": " : "es: ")
+ << getPassesString(PassesToRun) << '\n';
+
+ EmitProgressBitcode(getProgram(), "reduced-simplified");
+ return Res;
}
static bool TestForCodeGenCrash(const BugDriver &BD, Module *M) {
diff --git a/contrib/llvm-project/llvm/tools/bugpoint/bugpoint.cpp b/contrib/llvm-project/llvm/tools/bugpoint/bugpoint.cpp
index c7644e75ae4b..d29a79ee3e13 100644
--- a/contrib/llvm-project/llvm/tools/bugpoint/bugpoint.cpp
+++ b/contrib/llvm-project/llvm/tools/bugpoint/bugpoint.cpp
@@ -18,8 +18,10 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/LegacyPassNameParser.h"
+#include "llvm/InitializePasses.h"
#include "llvm/LinkAllIR.h"
#include "llvm/LinkAllPasses.h"
+#include "llvm/Passes/PassPlugin.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/InitLLVM.h"
#include "llvm/Support/ManagedStatic.h"
@@ -133,11 +135,9 @@ static void AddOptimizationPasses(legacy::FunctionPassManager &FPM,
Builder.populateModulePassManager(FPM);
}
-#ifdef LINK_POLLY_INTO_TOOLS
-namespace polly {
-void initializePollyPasses(llvm::PassRegistry &Registry);
-}
-#endif
+#define HANDLE_EXTENSION(Ext) \
+ llvm::PassPluginLibraryInfo get##Ext##PluginInfo();
+#include "llvm/Support/Extension.def"
int main(int argc, char **argv) {
#ifndef DEBUG_BUGPOINT
@@ -158,10 +158,6 @@ int main(int argc, char **argv) {
initializeInstrumentation(Registry);
initializeTarget(Registry);
-#ifdef LINK_POLLY_INTO_TOOLS
- polly::initializePollyPasses(Registry);
-#endif
-
if (std::getenv("bar") == (char*) -1) {
InitializeAllTargets();
InitializeAllTargetMCs();
@@ -233,6 +229,13 @@ int main(int argc, char **argv) {
sys::Process::PreventCoreFiles();
#endif
+// Needed to pull in symbols from statically linked extensions, including static
+// registration. It is unused otherwise because bugpoint has no support for
+// NewPM.
+#define HANDLE_EXTENSION(Ext) \
+ (void)get##Ext##PluginInfo();
+#include "llvm/Support/Extension.def"
+
if (Error E = D.run()) {
errs() << toString(std::move(E));
return 1;
diff --git a/contrib/llvm-project/llvm/tools/llc/llc.cpp b/contrib/llvm-project/llvm/tools/llc/llc.cpp
index 574b15b399c3..b35f8e853c30 100644
--- a/contrib/llvm-project/llvm/tools/llc/llc.cpp
+++ b/contrib/llvm-project/llvm/tools/llc/llc.cpp
@@ -34,6 +34,7 @@
#include "llvm/IR/RemarkStreamer.h"
#include "llvm/IR/Verifier.h"
#include "llvm/IRReader/IRReader.h"
+#include "llvm/InitializePasses.h"
#include "llvm/MC/SubtargetFeature.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
@@ -202,7 +203,7 @@ static std::unique_ptr<ToolOutputFile> GetOutputStream(const char *TargetName,
OutputFilename = IFN;
switch (FileType) {
- case TargetMachine::CGFT_AssemblyFile:
+ case CGFT_AssemblyFile:
if (TargetName[0] == 'c') {
if (TargetName[1] == 0)
OutputFilename += ".cbe.c";
@@ -213,13 +214,13 @@ static std::unique_ptr<ToolOutputFile> GetOutputStream(const char *TargetName,
} else
OutputFilename += ".s";
break;
- case TargetMachine::CGFT_ObjectFile:
+ case CGFT_ObjectFile:
if (OS == Triple::Win32)
OutputFilename += ".obj";
else
OutputFilename += ".o";
break;
- case TargetMachine::CGFT_Null:
+ case CGFT_Null:
OutputFilename += ".null";
break;
}
@@ -229,10 +230,10 @@ static std::unique_ptr<ToolOutputFile> GetOutputStream(const char *TargetName,
// Decide if we need "binary" output.
bool Binary = false;
switch (FileType) {
- case TargetMachine::CGFT_AssemblyFile:
+ case CGFT_AssemblyFile:
break;
- case TargetMachine::CGFT_ObjectFile:
- case TargetMachine::CGFT_Null:
+ case CGFT_ObjectFile:
+ case CGFT_Null:
Binary = true;
break;
}
@@ -394,6 +395,12 @@ static int compileModule(char **argv, LLVMContext &Context) {
std::unique_ptr<Module> M;
std::unique_ptr<MIRParser> MIR;
Triple TheTriple;
+ std::string CPUStr = getCPUStr(), FeaturesStr = getFeaturesStr();
+
+ // Set attributes on functions as loaded from MIR from command line arguments.
+ auto setMIRFunctionAttributes = [&CPUStr, &FeaturesStr](Function &F) {
+ setFunctionAttributes(CPUStr, FeaturesStr, F);
+ };
bool SkipModule = MCPU == "help" ||
(!MAttrs.empty() && MAttrs.front() == "help");
@@ -402,7 +409,8 @@ static int compileModule(char **argv, LLVMContext &Context) {
if (!SkipModule) {
if (InputLanguage == "mir" ||
(InputLanguage == "" && StringRef(InputFilename).endswith(".mir"))) {
- MIR = createMIRParserFromFile(InputFilename, Err, Context);
+ MIR = createMIRParserFromFile(InputFilename, Err, Context,
+ setMIRFunctionAttributes);
if (MIR)
M = MIR->parseIRModule();
} else
@@ -432,8 +440,6 @@ static int compileModule(char **argv, LLVMContext &Context) {
return 1;
}
- std::string CPUStr = getCPUStr(), FeaturesStr = getFeaturesStr();
-
CodeGenOpt::Level OLvl = CodeGenOpt::Default;
switch (OptLevel) {
default:
@@ -519,7 +525,7 @@ static int compileModule(char **argv, LLVMContext &Context) {
setFunctionAttributes(CPUStr, FeaturesStr, *M);
if (RelaxAll.getNumOccurrences() > 0 &&
- FileType != TargetMachine::CGFT_ObjectFile)
+ FileType != CGFT_ObjectFile)
WithColor::warning(errs(), argv[0])
<< ": warning: ignoring -mc-relax-all because filetype != obj";
@@ -530,7 +536,7 @@ static int compileModule(char **argv, LLVMContext &Context) {
// so we can memcmp the contents in CompileTwice mode
SmallVector<char, 0> Buffer;
std::unique_ptr<raw_svector_ostream> BOS;
- if ((FileType != TargetMachine::CGFT_AssemblyFile &&
+ if ((FileType != CGFT_AssemblyFile &&
!Out->os().supportsSeeking()) ||
CompileTwice) {
BOS = std::make_unique<raw_svector_ostream>(Buffer);
diff --git a/contrib/llvm-project/llvm/tools/lli/RemoteJITUtils.h b/contrib/llvm-project/llvm/tools/lli/RemoteJITUtils.h
index 8e80e73c8082..cc93294af0cf 100644
--- a/contrib/llvm-project/llvm/tools/lli/RemoteJITUtils.h
+++ b/contrib/llvm-project/llvm/tools/lli/RemoteJITUtils.h
@@ -13,7 +13,7 @@
#ifndef LLVM_TOOLS_LLI_REMOTEJITUTILS_H
#define LLVM_TOOLS_LLI_REMOTEJITUTILS_H
-#include "llvm/ExecutionEngine/Orc/RawByteChannel.h"
+#include "llvm/ExecutionEngine/Orc/RPC/RawByteChannel.h"
#include "llvm/ExecutionEngine/RTDyldMemoryManager.h"
#include <mutex>
diff --git a/contrib/llvm-project/llvm/tools/lli/lli.cpp b/contrib/llvm-project/llvm/tools/lli/lli.cpp
index ccad06721414..bfe7e8f04303 100644
--- a/contrib/llvm-project/llvm/tools/lli/lli.cpp
+++ b/contrib/llvm-project/llvm/tools/lli/lli.cpp
@@ -792,11 +792,15 @@ int runOrcLazyJIT(const char *ProgName) {
});
return TSM;
});
+
+ orc::MangleAndInterner Mangle(J->getExecutionSession(), J->getDataLayout());
J->getMainJITDylib().addGenerator(
ExitOnErr(orc::DynamicLibrarySearchGenerator::GetForCurrentProcess(
- J->getDataLayout().getGlobalPrefix())));
+ J->getDataLayout().getGlobalPrefix(),
+ [MainName = Mangle("main")](const orc::SymbolStringPtr &Name) {
+ return Name != MainName;
+ })));
- orc::MangleAndInterner Mangle(J->getExecutionSession(), J->getDataLayout());
orc::LocalCXXRuntimeOverrides CXXRuntimeOverrides;
ExitOnErr(CXXRuntimeOverrides.enable(J->getMainJITDylib(), Mangle));
@@ -850,12 +854,6 @@ int runOrcLazyJIT(const char *ProgName) {
ExitOnErr(J->addObjectFile(std::move(Obj)));
}
- // Generate a argument string.
- std::vector<std::string> Args;
- Args.push_back(InputFile);
- for (auto &Arg : InputArgv)
- Args.push_back(Arg);
-
// Run any static constructors.
ExitOnErr(J->runConstructors());
@@ -871,16 +869,11 @@ int runOrcLazyJIT(const char *ProgName) {
// Run main.
auto MainSym = ExitOnErr(J->lookup("main"));
- typedef int (*MainFnPtr)(int, const char *[]);
- std::vector<const char *> ArgV;
- for (auto &Arg : Args)
- ArgV.push_back(Arg.c_str());
- ArgV.push_back(nullptr);
-
- int ArgC = ArgV.size() - 1;
- auto Main =
- reinterpret_cast<MainFnPtr>(static_cast<uintptr_t>(MainSym.getAddress()));
- auto Result = Main(ArgC, (const char **)ArgV.data());
+
+ typedef int (*MainFnPtr)(int, char *[]);
+ auto Result = orc::runAsMain(
+ jitTargetAddressToFunction<MainFnPtr>(MainSym.getAddress()), InputArgv,
+ StringRef(InputFile));
// Wait for -entry-point threads.
for (auto &AltEntryThread : AltEntryThreads)
diff --git a/contrib/llvm-project/llvm/tools/llvm-ar/llvm-ar.cpp b/contrib/llvm-project/llvm/tools/llvm-ar/llvm-ar.cpp
index c9cf217f7688..c339dfe1f33e 100644
--- a/contrib/llvm-project/llvm/tools/llvm-ar/llvm-ar.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-ar/llvm-ar.cpp
@@ -11,6 +11,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Triple.h"
#include "llvm/IR/LLVMContext.h"
@@ -56,20 +57,20 @@ static StringRef ToolName;
// The basename of this program.
static StringRef Stem;
-const char RanlibHelp[] = R"(
-OVERVIEW: LLVM Ranlib (llvm-ranlib)
+const char RanlibHelp[] = R"(OVERVIEW: LLVM Ranlib (llvm-ranlib)
This program generates an index to speed access to archives
USAGE: llvm-ranlib <archive-file>
OPTIONS:
- -help - Display available options
- -version - Display the version of this program
+ -h --help - Display available options
+ -v --version - Display the version of this program
+ -D - Use zero for timestamps and uids/gids (default)
+ -U - Use actual timestamps and uids/gids
)";
-const char ArHelp[] = R"(
-OVERVIEW: LLVM Archiver
+const char ArHelp[] = R"(OVERVIEW: LLVM Archiver
USAGE: llvm-ar [options] [-]<operation>[modifiers] [relpos] [count] <archive> [files]
llvm-ar -M [<mri-script]
@@ -127,6 +128,13 @@ void printHelpMessage() {
static unsigned MRILineNumber;
static bool ParsingMRIScript;
+// Show the error plus the usage message, and exit.
+LLVM_ATTRIBUTE_NORETURN static void badUsage(Twine Error) {
+ WithColor::error(errs(), ToolName) << Error << "\n";
+ printHelpMessage();
+ exit(1);
+}
+
// Show the error message and exit.
LLVM_ATTRIBUTE_NORETURN static void fail(Twine Error) {
if (ParsingMRIScript) {
@@ -134,9 +142,7 @@ LLVM_ATTRIBUTE_NORETURN static void fail(Twine Error) {
<< "script line " << MRILineNumber << ": " << Error << "\n";
} else {
WithColor::error(errs(), ToolName) << Error << "\n";
- printHelpMessage();
}
-
exit(1);
}
@@ -239,19 +245,19 @@ static void getRelPos() {
// associated with the N modifier
static void getCountParam() {
if (PositionalArgs.empty())
- fail("expected [count] for 'N' modifier");
+ badUsage("expected [count] for 'N' modifier");
auto CountParamArg = StringRef(PositionalArgs[0]);
if (CountParamArg.getAsInteger(10, CountParam))
- fail("value for [count] must be numeric, got: " + CountParamArg);
+ badUsage("value for [count] must be numeric, got: " + CountParamArg);
if (CountParam < 1)
- fail("value for [count] must be positive, got: " + CountParamArg);
+ badUsage("value for [count] must be positive, got: " + CountParamArg);
PositionalArgs.erase(PositionalArgs.begin());
}
// Get the archive file name from the command line
static void getArchive() {
if (PositionalArgs.empty())
- fail("an archive name must be specified");
+ badUsage("an archive name must be specified");
ArchiveName = PositionalArgs[0];
PositionalArgs.erase(PositionalArgs.begin());
}
@@ -276,7 +282,7 @@ static void runMRIScript();
static ArchiveOperation parseCommandLine() {
if (MRI) {
if (!PositionalArgs.empty() || !Options.empty())
- fail("cannot mix -M and other options");
+ badUsage("cannot mix -M and other options");
runMRIScript();
}
@@ -389,7 +395,7 @@ static ArchiveOperation parseCommandLine() {
printHelpMessage();
exit(0);
default:
- fail(std::string("unknown option ") + Options[i]);
+ badUsage(std::string("unknown option ") + Options[i]);
}
}
@@ -404,31 +410,31 @@ static ArchiveOperation parseCommandLine() {
NumOperations = 1;
Operation = CreateSymTab;
if (!Members.empty())
- fail("the 's' operation takes only an archive as argument");
+ badUsage("the 's' operation takes only an archive as argument");
}
// Perform various checks on the operation/modifier specification
// to make sure we are dealing with a legal request.
if (NumOperations == 0)
- fail("you must specify at least one of the operations");
+ badUsage("you must specify at least one of the operations");
if (NumOperations > 1)
- fail("only one operation may be specified");
+ badUsage("only one operation may be specified");
if (NumPositional > 1)
- fail("you may only specify one of 'a', 'b', and 'i' modifiers");
+ badUsage("you may only specify one of 'a', 'b', and 'i' modifiers");
if (AddAfter || AddBefore)
if (Operation != Move && Operation != ReplaceOrInsert)
- fail("the 'a', 'b' and 'i' modifiers can only be specified with "
- "the 'm' or 'r' operations");
+ badUsage("the 'a', 'b' and 'i' modifiers can only be specified with "
+ "the 'm' or 'r' operations");
if (CountParam)
if (Operation != Extract && Operation != Delete)
- fail("the 'N' modifier can only be specified with the 'x' or 'd' "
- "operations");
+ badUsage("the 'N' modifier can only be specified with the 'x' or 'd' "
+ "operations");
if (OriginalDates && Operation != Extract)
- fail("the 'o' modifier is only applicable to the 'x' operation");
+ badUsage("the 'o' modifier is only applicable to the 'x' operation");
if (OnlyUpdate && Operation != ReplaceOrInsert)
- fail("the 'u' modifier is only applicable to the 'r' operation");
+ badUsage("the 'u' modifier is only applicable to the 'r' operation");
if (AddLibrary && Operation != QuickAppend)
- fail("the 'L' modifier is only applicable to the 'q' operation");
+ badUsage("the 'L' modifier is only applicable to the 'q' operation");
// Return the parsed operation to the caller
return Operation;
@@ -530,8 +536,12 @@ static void doExtract(StringRef Name, const object::Archive::Child &C) {
failIfError(ModeOrErr.takeError());
sys::fs::perms Mode = ModeOrErr.get();
+ llvm::StringRef outputFilePath = sys::path::filename(Name);
+ if (Verbose)
+ outs() << "x - " << outputFilePath << '\n';
+
int FD;
- failIfError(sys::fs::openFileForWrite(sys::path::filename(Name), FD,
+ failIfError(sys::fs::openFileForWrite(outputFilePath, FD,
sys::fs::CD_CreateAlways,
sys::fs::OF_None, Mode),
Name);
@@ -1075,7 +1085,7 @@ static void runMRIScript() {
}
static bool handleGenericOption(StringRef arg) {
- if (arg == "-help" || arg == "--help") {
+ if (arg == "-help" || arg == "--help" || arg == "-h") {
printHelpMessage();
return true;
}
@@ -1092,7 +1102,7 @@ static int ar_main(int argc, char **argv) {
cl::ExpandResponseFiles(Saver, cl::TokenizeGNUCommandLine, Argv);
for (size_t i = 1; i < Argv.size(); ++i) {
StringRef Arg = Argv[i];
- const char *match;
+ const char *match = nullptr;
auto MatchFlagWithArg = [&](const char *expected) {
size_t len = strlen(expected);
if (Arg == expected) {
@@ -1148,15 +1158,38 @@ static int ar_main(int argc, char **argv) {
static int ranlib_main(int argc, char **argv) {
bool ArchiveSpecified = false;
for (int i = 1; i < argc; ++i) {
- if (handleGenericOption(argv[i])) {
+ StringRef arg(argv[i]);
+ if (handleGenericOption(arg)) {
return 0;
+ } else if (arg.consume_front("-")) {
+ // Handle the -D/-U flag
+ while (!arg.empty()) {
+ if (arg.front() == 'D') {
+ Deterministic = true;
+ } else if (arg.front() == 'U') {
+ Deterministic = false;
+ } else if (arg.front() == 'h') {
+ printHelpMessage();
+ return 0;
+ } else if (arg.front() == 'v') {
+ cl::PrintVersionMessage();
+ return 0;
+ } else {
+ // TODO: GNU ranlib also supports a -t flag
+ fail("Invalid option: '-" + arg + "'");
+ }
+ arg = arg.drop_front(1);
+ }
} else {
if (ArchiveSpecified)
fail("exactly one archive should be specified");
ArchiveSpecified = true;
- ArchiveName = argv[i];
+ ArchiveName = arg.str();
}
}
+ if (!ArchiveSpecified) {
+ badUsage("an archive name must be specified");
+ }
return performOperation(CreateSymTab, nullptr);
}
@@ -1169,16 +1202,25 @@ int main(int argc, char **argv) {
llvm::InitializeAllAsmParsers();
Stem = sys::path::stem(ToolName);
- if (Stem.contains_lower("dlltool"))
+ auto Is = [](StringRef Tool) {
+ // We need to recognize the following filenames.
+ //
+ // Lib.exe -> lib (see D44808, MSBuild runs Lib.exe)
+ // dlltool.exe -> dlltool
+ // arm-pokymllib32-linux-gnueabi-llvm-ar-10 -> ar
+ auto I = Stem.rfind_lower(Tool);
+ return I != StringRef::npos &&
+ (I + Tool.size() == Stem.size() || !isAlnum(Stem[I + Tool.size()]));
+ };
+
+ if (Is("dlltool"))
return dlltoolDriverMain(makeArrayRef(argv, argc));
-
- if (Stem.contains_lower("ranlib"))
+ if (Is("ranlib"))
return ranlib_main(argc, argv);
-
- if (Stem.contains_lower("lib"))
+ if (Is("lib"))
return libDriverMain(makeArrayRef(argv, argc));
-
- if (Stem.contains_lower("ar"))
+ if (Is("ar"))
return ar_main(argc, argv);
+
fail("not ranlib, ar, lib or dlltool");
}
diff --git a/contrib/llvm-project/llvm/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp b/contrib/llvm-project/llvm/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
index 01cba1f6e3c9..639a6d1ec02c 100644
--- a/contrib/llvm-project/llvm/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
@@ -102,8 +102,9 @@ int main(int argc, char **argv) {
O.Symbolic = !NonSymbolic;
O.ShowBinaryBlobs = ShowBinaryBlobs;
- ExitOnErr(
- BA.analyze(O, CheckHash.empty() ? None : Optional<StringRef>(CheckHash)));
+ ExitOnErr(BA.analyze(
+ Dump ? Optional<BCDumpOptions>(O) : Optional<BCDumpOptions>(None),
+ CheckHash.empty() ? None : Optional<StringRef>(CheckHash)));
if (Dump)
outs() << "\n\n";
diff --git a/contrib/llvm-project/llvm/tools/llvm-cov/CodeCoverage.cpp b/contrib/llvm-project/llvm/tools/llvm-cov/CodeCoverage.cpp
index 7151cfb032f3..5f1e23f20d77 100644
--- a/contrib/llvm-project/llvm/tools/llvm-cov/CodeCoverage.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-cov/CodeCoverage.cpp
@@ -36,6 +36,7 @@
#include "llvm/Support/ThreadPool.h"
#include "llvm/Support/Threading.h"
#include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Support/VirtualFileSystem.h"
#include <functional>
#include <map>
@@ -704,8 +705,8 @@ int CodeCoverageTool::run(Command Cmd, int argc, const char **argv) {
// Read in -name-whitelist files.
if (!NameFilterFiles.empty()) {
std::string SpecialCaseListErr;
- NameWhitelist =
- SpecialCaseList::create(NameFilterFiles, SpecialCaseListErr);
+ NameWhitelist = SpecialCaseList::create(
+ NameFilterFiles, *vfs::getRealFileSystem(), SpecialCaseListErr);
if (!NameWhitelist)
error(SpecialCaseListErr);
}
diff --git a/contrib/llvm-project/llvm/tools/llvm-cov/CoverageExporterJson.cpp b/contrib/llvm-project/llvm/tools/llvm-cov/CoverageExporterJson.cpp
index 181d428ed9d8..216b5e3fd226 100644
--- a/contrib/llvm-project/llvm/tools/llvm-cov/CoverageExporterJson.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-cov/CoverageExporterJson.cpp
@@ -48,6 +48,7 @@
#include "llvm/Support/ThreadPool.h"
#include "llvm/Support/Threading.h"
#include <algorithm>
+#include <limits>
#include <mutex>
#include <utility>
@@ -61,14 +62,23 @@ using namespace llvm;
namespace {
+// The JSON library accepts int64_t, but profiling counts are stored as uint64_t.
+// Therefore we need to explicitly convert from unsigned to signed, since a naive
+// cast is implementation-defined behavior when the unsigned value cannot be
+// represented as a signed value. We choose to clamp the values to preserve the
+// invariant that counts are always >= 0.
+int64_t clamp_uint64_to_int64(uint64_t u) {
+ return std::min(u, static_cast<uint64_t>(std::numeric_limits<int64_t>::max()));
+}
+
json::Array renderSegment(const coverage::CoverageSegment &Segment) {
- return json::Array({Segment.Line, Segment.Col, int64_t(Segment.Count),
+ return json::Array({Segment.Line, Segment.Col, clamp_uint64_to_int64(Segment.Count),
Segment.HasCount, Segment.IsRegionEntry});
}
json::Array renderRegion(const coverage::CountedRegion &Region) {
return json::Array({Region.LineStart, Region.ColumnStart, Region.LineEnd,
- Region.ColumnEnd, int64_t(Region.ExecutionCount),
+ Region.ColumnEnd, clamp_uint64_to_int64(Region.ExecutionCount),
Region.FileID, Region.ExpandedFileID,
int64_t(Region.Kind)});
}
@@ -182,7 +192,7 @@ json::Array renderFunctions(
for (const auto &F : Functions)
FunctionArray.push_back(
json::Object({{"name", F.Name},
- {"count", int64_t(F.ExecutionCount)},
+ {"count", clamp_uint64_to_int64(F.ExecutionCount)},
{"regions", renderRegions(F.CountedRegions)},
{"filenames", json::Array(F.Filenames)}}));
return FunctionArray;
diff --git a/contrib/llvm-project/llvm/tools/llvm-cxxfilt/llvm-cxxfilt.cpp b/contrib/llvm-project/llvm/tools/llvm-cxxfilt/llvm-cxxfilt.cpp
index 9ac8bcf0ff01..6de512fc18dc 100644
--- a/contrib/llvm-project/llvm/tools/llvm-cxxfilt/llvm-cxxfilt.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-cxxfilt/llvm-cxxfilt.cpp
@@ -7,8 +7,10 @@
//===----------------------------------------------------------------------===//
#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Triple.h"
#include "llvm/Demangle/Demangle.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Host.h"
#include "llvm/Support/InitLLVM.h"
#include "llvm/Support/raw_ostream.h"
#include <cstdlib>
@@ -41,6 +43,13 @@ static cl::opt<bool> StripUnderscore("strip-underscore",
static cl::alias StripUnderscoreShort("_",
cl::desc("alias for --strip-underscore"),
cl::aliasopt(StripUnderscore));
+static cl::opt<bool>
+ NoStripUnderscore("no-strip-underscore",
+ cl::desc("do not strip the leading underscore"),
+ cl::init(false));
+static cl::alias
+ NoStripUnderscoreShort("n", cl::desc("alias for --no-strip-underscore"),
+ cl::aliasopt(NoStripUnderscore));
static cl::opt<bool>
Types("types",
@@ -55,11 +64,22 @@ Decorated(cl::Positional, cl::desc("<mangled>"), cl::ZeroOrMore);
static cl::extrahelp
HelpResponse("\nPass @FILE as argument to read options from FILE.\n");
-static std::string demangle(llvm::raw_ostream &OS, const std::string &Mangled) {
+static bool shouldStripUnderscore() {
+ if (StripUnderscore)
+ return true;
+ if (NoStripUnderscore)
+ return false;
+ // If none of them are set, use the default value for platform.
+ // macho has symbols prefix with "_" so strip by default.
+ return Triple(sys::getProcessTriple()).isOSBinFormatMachO();
+}
+
+static std::string demangle(const std::string &Mangled) {
int Status;
+ std::string Prefix;
const char *DecoratedStr = Mangled.c_str();
- if (StripUnderscore)
+ if (shouldStripUnderscore())
if (DecoratedStr[0] == '_')
++DecoratedStr;
size_t DecoratedLength = strlen(DecoratedStr);
@@ -73,11 +93,11 @@ static std::string demangle(llvm::raw_ostream &OS, const std::string &Mangled) {
if (!Undecorated &&
(DecoratedLength > 6 && strncmp(DecoratedStr, "__imp_", 6) == 0)) {
- OS << "import thunk for ";
+ Prefix = "import thunk for ";
Undecorated = itaniumDemangle(DecoratedStr + 6, nullptr, nullptr, &Status);
}
- std::string Result(Undecorated ? Undecorated : Mangled);
+ std::string Result(Undecorated ? Prefix + Undecorated : Mangled);
free(Undecorated);
return Result;
}
@@ -125,9 +145,9 @@ static void demangleLine(llvm::raw_ostream &OS, StringRef Mangled, bool Split) {
SmallVector<std::pair<StringRef, StringRef>, 16> Words;
SplitStringDelims(Mangled, Words, IsLegalItaniumChar);
for (const auto &Word : Words)
- Result += demangle(OS, Word.first) + Word.second.str();
+ Result += ::demangle(Word.first) + Word.second.str();
} else
- Result = demangle(OS, Mangled);
+ Result = ::demangle(Mangled);
OS << Result << '\n';
OS.flush();
}
diff --git a/contrib/llvm-project/llvm/tools/llvm-diff/DifferenceEngine.cpp b/contrib/llvm-project/llvm/tools/llvm-diff/DifferenceEngine.cpp
index bc93ece86490..564ce7870592 100644
--- a/contrib/llvm-project/llvm/tools/llvm-diff/DifferenceEngine.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-diff/DifferenceEngine.cpp
@@ -732,5 +732,14 @@ void DifferenceEngine::diff(Module *L, Module *R) {
bool DifferenceEngine::equivalentAsOperands(GlobalValue *L, GlobalValue *R) {
if (globalValueOracle) return (*globalValueOracle)(L, R);
+
+ if (isa<GlobalVariable>(L) && isa<GlobalVariable>(R)) {
+ GlobalVariable *GVL = cast<GlobalVariable>(L);
+ GlobalVariable *GVR = cast<GlobalVariable>(R);
+ if (GVL->hasLocalLinkage() && GVL->hasUniqueInitializer() &&
+ GVR->hasLocalLinkage() && GVR->hasUniqueInitializer())
+ return GVL->getInitializer() == GVR->getInitializer();
+ }
+
return L->getName() == R->getName();
}
diff --git a/contrib/llvm-project/llvm/tools/llvm-dis/llvm-dis.cpp b/contrib/llvm-project/llvm/tools/llvm-dis/llvm-dis.cpp
index d66299cbf767..5d609468a380 100644
--- a/contrib/llvm-project/llvm/tools/llvm-dis/llvm-dis.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-dis/llvm-dis.cpp
@@ -158,53 +158,71 @@ int main(int argc, char **argv) {
std::unique_ptr<MemoryBuffer> MB =
ExitOnErr(errorOrToExpected(MemoryBuffer::getFileOrSTDIN(InputFilename)));
- std::unique_ptr<Module> M = ExitOnErr(getLazyBitcodeModule(
- *MB, Context, /*ShouldLazyLoadMetadata=*/true, SetImporting));
- if (MaterializeMetadata)
- ExitOnErr(M->materializeMetadata());
- else
- ExitOnErr(M->materializeAll());
-
- BitcodeLTOInfo LTOInfo = ExitOnErr(getBitcodeLTOInfo(*MB));
- std::unique_ptr<ModuleSummaryIndex> Index;
- if (LTOInfo.HasSummary)
- Index = ExitOnErr(getModuleSummaryIndex(*MB));
-
- // Just use stdout. We won't actually print anything on it.
- if (DontPrint)
- OutputFilename = "-";
-
- if (OutputFilename.empty()) { // Unspecified output, infer it.
- if (InputFilename == "-") {
- OutputFilename = "-";
+
+ BitcodeFileContents IF = ExitOnErr(llvm::getBitcodeFileContents(*MB));
+
+ const size_t N = IF.Mods.size();
+
+ if (OutputFilename == "-" && N > 1)
+ errs() << "only single module bitcode files can be written to stdout\n";
+
+ for (size_t i = 0; i < N; ++i) {
+ BitcodeModule MB = IF.Mods[i];
+ std::unique_ptr<Module> M = ExitOnErr(MB.getLazyModule(Context, MaterializeMetadata,
+ SetImporting));
+ if (MaterializeMetadata)
+ ExitOnErr(M->materializeMetadata());
+ else
+ ExitOnErr(M->materializeAll());
+
+ BitcodeLTOInfo LTOInfo = ExitOnErr(MB.getLTOInfo());
+ std::unique_ptr<ModuleSummaryIndex> Index;
+ if (LTOInfo.HasSummary)
+ Index = ExitOnErr(MB.getSummary());
+
+ std::string FinalFilename(OutputFilename);
+
+ // Just use stdout. We won't actually print anything on it.
+ if (DontPrint)
+ FinalFilename = "-";
+
+ if (FinalFilename.empty()) { // Unspecified output, infer it.
+ if (InputFilename == "-") {
+ FinalFilename = "-";
+ } else {
+ StringRef IFN = InputFilename;
+ FinalFilename = (IFN.endswith(".bc") ? IFN.drop_back(3) : IFN).str();
+ if (N > 1)
+ FinalFilename += std::string(".") + std::to_string(i);
+ FinalFilename += ".ll";
+ }
} else {
- StringRef IFN = InputFilename;
- OutputFilename = (IFN.endswith(".bc") ? IFN.drop_back(3) : IFN).str();
- OutputFilename += ".ll";
+ if (N > 1)
+ FinalFilename += std::string(".") + std::to_string(i);
}
- }
- std::error_code EC;
- std::unique_ptr<ToolOutputFile> Out(
- new ToolOutputFile(OutputFilename, EC, sys::fs::OF_Text));
- if (EC) {
- errs() << EC.message() << '\n';
- return 1;
- }
+ std::error_code EC;
+ std::unique_ptr<ToolOutputFile> Out(
+ new ToolOutputFile(FinalFilename, EC, sys::fs::OF_Text));
+ if (EC) {
+ errs() << EC.message() << '\n';
+ return 1;
+ }
- std::unique_ptr<AssemblyAnnotationWriter> Annotator;
- if (ShowAnnotations)
- Annotator.reset(new CommentWriter());
+ std::unique_ptr<AssemblyAnnotationWriter> Annotator;
+ if (ShowAnnotations)
+ Annotator.reset(new CommentWriter());
- // All that llvm-dis does is write the assembly to a file.
- if (!DontPrint) {
- M->print(Out->os(), Annotator.get(), PreserveAssemblyUseListOrder);
- if (Index)
- Index->print(Out->os());
- }
+ // All that llvm-dis does is write the assembly to a file.
+ if (!DontPrint) {
+ M->print(Out->os(), Annotator.get(), PreserveAssemblyUseListOrder);
+ if (Index)
+ Index->print(Out->os());
+ }
- // Declare success.
- Out->keep();
+ // Declare success.
+ Out->keep();
+ }
return 0;
}
diff --git a/contrib/llvm-project/llvm/tools/llvm-dwarfdump/Statistics.cpp b/contrib/llvm-project/llvm/tools/llvm-dwarfdump/Statistics.cpp
index c29ad783a9e6..5bef4d5148ca 100644
--- a/contrib/llvm-project/llvm/tools/llvm-dwarfdump/Statistics.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-dwarfdump/Statistics.cpp
@@ -60,28 +60,26 @@ struct PerFunctionStats {
struct GlobalStats {
/// Total number of PC range bytes covered by DW_AT_locations.
unsigned ScopeBytesCovered = 0;
- /// Total number of PC range bytes in each variable's enclosing scope,
- /// starting from the first definition of the variable.
- unsigned ScopeBytesFromFirstDefinition = 0;
+ /// Total number of PC range bytes in each variable's enclosing scope.
+ unsigned ScopeBytes = 0;
/// Total number of PC range bytes covered by DW_AT_locations with
/// the debug entry values (DW_OP_entry_value).
unsigned ScopeEntryValueBytesCovered = 0;
/// Total number of PC range bytes covered by DW_AT_locations of
/// formal parameters.
unsigned ParamScopeBytesCovered = 0;
- /// Total number of PC range bytes in each variable's enclosing scope,
- /// starting from the first definition of the variable (only for parameters).
- unsigned ParamScopeBytesFromFirstDefinition = 0;
+ /// Total number of PC range bytes in each variable's enclosing scope
+ /// (only for parameters).
+ unsigned ParamScopeBytes = 0;
/// Total number of PC range bytes covered by DW_AT_locations with
/// the debug entry values (DW_OP_entry_value) (only for parameters).
unsigned ParamScopeEntryValueBytesCovered = 0;
/// Total number of PC range bytes covered by DW_AT_locations (only for local
/// variables).
unsigned VarScopeBytesCovered = 0;
- /// Total number of PC range bytes in each variable's enclosing scope,
- /// starting from the first definition of the variable (only for local
- /// variables).
- unsigned VarScopeBytesFromFirstDefinition = 0;
+ /// Total number of PC range bytes in each variable's enclosing scope
+ /// (only for local variables).
+ unsigned VarScopeBytes = 0;
/// Total number of PC range bytes covered by DW_AT_locations with
/// the debug entry values (DW_OP_entry_value) (only for local variables).
unsigned VarScopeEntryValueBytesCovered = 0;
@@ -133,19 +131,6 @@ struct LocationStats {
unsigned NumVar = 0;
};
-/// Extract the low pc from a Die.
-static uint64_t getLowPC(DWARFDie Die) {
- auto RangesOrError = Die.getAddressRanges();
- DWARFAddressRangesVector Ranges;
- if (RangesOrError)
- Ranges = RangesOrError.get();
- else
- llvm::consumeError(RangesOrError.takeError());
- if (Ranges.size())
- return Ranges[0].LowPC;
- return dwarf::toAddress(Die.find(dwarf::DW_AT_low_pc), 0);
-}
-
/// Collect debug location statistics for one DIE.
static void collectLocStats(uint64_t BytesCovered, uint64_t BytesInScope,
std::vector<unsigned> &VarParamLocStats,
@@ -153,18 +138,16 @@ static void collectLocStats(uint64_t BytesCovered, uint64_t BytesInScope,
std::vector<unsigned> &VarLocStats, bool IsParam,
bool IsLocalVar) {
auto getCoverageBucket = [BytesCovered, BytesInScope]() -> unsigned {
- unsigned LocBucket = 100 * (double)BytesCovered / BytesInScope;
- if (LocBucket == 0) {
- // No debug location at all for the variable.
+ // No debug location at all for the variable.
+ if (BytesCovered == 0)
return 0;
- } else if (LocBucket == 100 || BytesCovered > BytesInScope) {
- // Fully covered variable within its scope.
+ // Fully covered variable within its scope.
+ if (BytesCovered >= BytesInScope)
return NumOfCoverageCategories - 1;
- } else {
- // Get covered range (e.g. 20%-29%).
- LocBucket /= 10;
- return LocBucket + 1;
- }
+ // Get covered range (e.g. 20%-29%).
+ unsigned LocBucket = 100 * (double)BytesCovered / BytesInScope;
+ LocBucket /= 10;
+ return LocBucket + 1;
};
unsigned CoverageBucket = getCoverageBucket();
@@ -176,9 +159,9 @@ static void collectLocStats(uint64_t BytesCovered, uint64_t BytesInScope,
}
/// Collect debug info quality metrics for one DIE.
-static void collectStatsForDie(DWARFDie Die, uint64_t UnitLowPC, std::string FnPrefix,
- std::string VarPrefix, uint64_t ScopeLowPC,
- uint64_t BytesInScope, uint32_t InlineDepth,
+static void collectStatsForDie(DWARFDie Die, std::string FnPrefix,
+ std::string VarPrefix, uint64_t BytesInScope,
+ uint32_t InlineDepth,
StringMap<PerFunctionStats> &FnStatMap,
GlobalStats &GlobalStats,
LocationStats &LocStats) {
@@ -188,7 +171,6 @@ static void collectStatsForDie(DWARFDie Die, uint64_t UnitLowPC, std::string FnP
bool IsArtificial = false;
uint64_t BytesCovered = 0;
uint64_t BytesEntryValuesCovered = 0;
- uint64_t OffsetToFirstDefinition = 0;
auto &FnStats = FnStatMap[FnPrefix];
bool IsParam = Die.getTag() == dwarf::DW_TAG_formal_parameter;
bool IsLocalVar = Die.getTag() == dwarf::DW_TAG_variable;
@@ -243,34 +225,25 @@ static void collectStatsForDie(DWARFDie Die, uint64_t UnitLowPC, std::string FnP
return;
}
// Handle variables and function arguments.
- auto FormValue = Die.find(dwarf::DW_AT_location);
- HasLoc = FormValue.hasValue();
- if (HasLoc) {
+ Expected<std::vector<DWARFLocationExpression>> Loc =
+ Die.getLocations(dwarf::DW_AT_location);
+ if (!Loc) {
+ consumeError(Loc.takeError());
+ } else {
+ HasLoc = true;
// Get PC coverage.
- if (auto DebugLocOffset = FormValue->getAsSectionOffset()) {
- auto *DebugLoc = Die.getDwarfUnit()->getContext().getDebugLoc();
- if (auto List = DebugLoc->getLocationListAtOffset(*DebugLocOffset)) {
- for (auto Entry : List->Entries) {
- uint64_t BytesEntryCovered = Entry.End - Entry.Begin;
- BytesCovered += BytesEntryCovered;
- if (IsEntryValue(Entry.Loc))
- BytesEntryValuesCovered += BytesEntryCovered;
- }
- if (List->Entries.size()) {
- uint64_t FirstDef = List->Entries[0].Begin;
- uint64_t UnitOfs = UnitLowPC;
- // Ranges sometimes start before the lexical scope.
- if (UnitOfs + FirstDef >= ScopeLowPC)
- OffsetToFirstDefinition = UnitOfs + FirstDef - ScopeLowPC;
- // Or even after it. Count that as a failure.
- if (OffsetToFirstDefinition > BytesInScope)
- OffsetToFirstDefinition = 0;
- }
- }
- assert(BytesInScope);
- } else {
+ auto Default = find_if(
+ *Loc, [](const DWARFLocationExpression &L) { return !L.Range; });
+ if (Default != Loc->end()) {
// Assume the entire range is covered by a single location.
BytesCovered = BytesInScope;
+ } else {
+ for (auto Entry : *Loc) {
+ uint64_t BytesEntryCovered = Entry.Range->HighPC - Entry.Range->LowPC;
+ BytesCovered += BytesEntryCovered;
+ if (IsEntryValue(Entry.Expr))
+ BytesEntryValuesCovered += BytesEntryCovered;
+ }
}
}
}
@@ -303,25 +276,21 @@ static void collectStatsForDie(DWARFDie Die, uint64_t UnitLowPC, std::string FnP
FnStats.VarsInFunction.insert(VarPrefix + VarName);
if (BytesInScope) {
FnStats.TotalVarWithLoc += (unsigned)HasLoc;
- // Adjust for the fact the variables often start their lifetime in the
- // middle of the scope.
- BytesInScope -= OffsetToFirstDefinition;
// Turns out we have a lot of ranges that extend past the lexical scope.
GlobalStats.ScopeBytesCovered += std::min(BytesInScope, BytesCovered);
- GlobalStats.ScopeBytesFromFirstDefinition += BytesInScope;
+ GlobalStats.ScopeBytes += BytesInScope;
GlobalStats.ScopeEntryValueBytesCovered += BytesEntryValuesCovered;
if (IsParam) {
GlobalStats.ParamScopeBytesCovered +=
std::min(BytesInScope, BytesCovered);
- GlobalStats.ParamScopeBytesFromFirstDefinition += BytesInScope;
+ GlobalStats.ParamScopeBytes += BytesInScope;
GlobalStats.ParamScopeEntryValueBytesCovered += BytesEntryValuesCovered;
} else if (IsLocalVar) {
GlobalStats.VarScopeBytesCovered += std::min(BytesInScope, BytesCovered);
- GlobalStats.VarScopeBytesFromFirstDefinition += BytesInScope;
+ GlobalStats.VarScopeBytes += BytesInScope;
GlobalStats.VarScopeEntryValueBytesCovered += BytesEntryValuesCovered;
}
- assert(GlobalStats.ScopeBytesCovered <=
- GlobalStats.ScopeBytesFromFirstDefinition);
+ assert(GlobalStats.ScopeBytesCovered <= GlobalStats.ScopeBytes);
} else if (Die.getTag() == dwarf::DW_TAG_member) {
FnStats.ConstantMembers++;
} else {
@@ -349,9 +318,9 @@ static void collectStatsForDie(DWARFDie Die, uint64_t UnitLowPC, std::string FnP
}
/// Recursively collect debug info quality metrics.
-static void collectStatsRecursive(DWARFDie Die, uint64_t UnitLowPC, std::string FnPrefix,
- std::string VarPrefix, uint64_t ScopeLowPC,
- uint64_t BytesInScope, uint32_t InlineDepth,
+static void collectStatsRecursive(DWARFDie Die, std::string FnPrefix,
+ std::string VarPrefix, uint64_t BytesInScope,
+ uint32_t InlineDepth,
StringMap<PerFunctionStats> &FnStatMap,
GlobalStats &GlobalStats,
LocationStats &LocStats) {
@@ -386,7 +355,6 @@ static void collectStatsRecursive(DWARFDie Die, uint64_t UnitLowPC, std::string
uint64_t BytesInThisScope = 0;
for (auto Range : Ranges)
BytesInThisScope += Range.HighPC - Range.LowPC;
- ScopeLowPC = getLowPC(Die);
// Count the function.
if (!IsBlock) {
@@ -422,8 +390,8 @@ static void collectStatsRecursive(DWARFDie Die, uint64_t UnitLowPC, std::string
}
} else {
// Not a scope, visit the Die itself. It could be a variable.
- collectStatsForDie(Die, UnitLowPC, FnPrefix, VarPrefix, ScopeLowPC, BytesInScope,
- InlineDepth, FnStatMap, GlobalStats, LocStats);
+ collectStatsForDie(Die, FnPrefix, VarPrefix, BytesInScope, InlineDepth,
+ FnStatMap, GlobalStats, LocStats);
}
// Set InlineDepth correctly for child recursion
@@ -440,9 +408,8 @@ static void collectStatsRecursive(DWARFDie Die, uint64_t UnitLowPC, std::string
if (Child.getTag() == dwarf::DW_TAG_lexical_block)
ChildVarPrefix += toHex(LexicalBlockIndex++) + '.';
- collectStatsRecursive(Child, UnitLowPC, FnPrefix, ChildVarPrefix, ScopeLowPC,
- BytesInScope, InlineDepth, FnStatMap, GlobalStats,
- LocStats);
+ collectStatsRecursive(Child, FnPrefix, ChildVarPrefix, BytesInScope,
+ InlineDepth, FnStatMap, GlobalStats, LocStats);
Child = Child.getSibling();
}
}
@@ -461,16 +428,16 @@ static void printLocationStats(raw_ostream &OS,
<< LocationStats[0];
LLVM_DEBUG(llvm::dbgs() << Key << " with 0% of its scope covered: "
<< LocationStats[0] << '\n');
- OS << ",\"" << Key << " with 1-9% of its scope covered\":"
+ OS << ",\"" << Key << " with (0%,10%) of its scope covered\":"
<< LocationStats[1];
- LLVM_DEBUG(llvm::dbgs() << Key << " with 1-9% of its scope covered: "
+ LLVM_DEBUG(llvm::dbgs() << Key << " with (0%,10%) of its scope covered: "
<< LocationStats[1] << '\n');
for (unsigned i = 2; i < NumOfCoverageCategories - 1; ++i) {
- OS << ",\"" << Key << " with " << (i - 1) * 10 << "-" << i * 10 - 1
- << "% of its scope covered\":" << LocationStats[i];
+ OS << ",\"" << Key << " with [" << (i - 1) * 10 << "%," << i * 10
+ << "%) of its scope covered\":" << LocationStats[i];
LLVM_DEBUG(llvm::dbgs()
- << Key << " with " << (i - 1) * 10 << "-" << i * 10 - 1
- << "% of its scope covered: " << LocationStats[i]);
+ << Key << " with [" << (i - 1) * 10 << "%," << i * 10
+ << "%) of its scope covered: " << LocationStats[i]);
}
OS << ",\"" << Key << " with 100% of its scope covered\":"
<< LocationStats[NumOfCoverageCategories - 1];
@@ -495,13 +462,13 @@ bool collectStatsForObjectFile(ObjectFile &Obj, DWARFContext &DICtx,
StringMap<PerFunctionStats> Statistics;
for (const auto &CU : static_cast<DWARFContext *>(&DICtx)->compile_units())
if (DWARFDie CUDie = CU->getNonSkeletonUnitDIE(false))
- collectStatsRecursive(CUDie, getLowPC(CUDie), "/", "g", 0, 0, 0,
- Statistics, GlobalStats, LocStats);
+ collectStatsRecursive(CUDie, "/", "g", 0, 0, Statistics, GlobalStats,
+ LocStats);
/// The version number should be increased every time the algorithm is changed
/// (including bug fixes). New metrics may be added without increasing the
/// version.
- unsigned Version = 3;
+ unsigned Version = 4;
unsigned VarParamTotal = 0;
unsigned VarParamUnique = 0;
unsigned VarParamWithLoc = 0;
@@ -561,19 +528,17 @@ bool collectStatsForObjectFile(ObjectFile &Obj, DWARFContext &DICtx,
printDatum(OS, "call site entries", GlobalStats.CallSiteEntries);
printDatum(OS, "call site DIEs", GlobalStats.CallSiteDIEs);
printDatum(OS, "call site parameter DIEs", GlobalStats.CallSiteParamDIEs);
- printDatum(OS, "scope bytes total",
- GlobalStats.ScopeBytesFromFirstDefinition);
+ printDatum(OS, "scope bytes total", GlobalStats.ScopeBytes);
printDatum(OS, "scope bytes covered", GlobalStats.ScopeBytesCovered);
printDatum(OS, "entry value scope bytes covered",
GlobalStats.ScopeEntryValueBytesCovered);
printDatum(OS, "formal params scope bytes total",
- GlobalStats.ParamScopeBytesFromFirstDefinition);
+ GlobalStats.ParamScopeBytes);
printDatum(OS, "formal params scope bytes covered",
GlobalStats.ParamScopeBytesCovered);
printDatum(OS, "formal params entry value scope bytes covered",
GlobalStats.ParamScopeEntryValueBytesCovered);
- printDatum(OS, "vars scope bytes total",
- GlobalStats.VarScopeBytesFromFirstDefinition);
+ printDatum(OS, "vars scope bytes total", GlobalStats.VarScopeBytes);
printDatum(OS, "vars scope bytes covered", GlobalStats.VarScopeBytesCovered);
printDatum(OS, "vars entry value scope bytes covered",
GlobalStats.VarScopeEntryValueBytesCovered);
@@ -608,7 +573,7 @@ bool collectStatsForObjectFile(ObjectFile &Obj, DWARFContext &DICtx,
<< "%\n";
llvm::dbgs() << "PC Ranges covered: "
<< (int)std::round((GlobalStats.ScopeBytesCovered * 100.0) /
- GlobalStats.ScopeBytesFromFirstDefinition)
+ GlobalStats.ScopeBytes)
<< "%\n");
return true;
}
diff --git a/contrib/llvm-project/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp b/contrib/llvm-project/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp
index e20f6041f98d..374bdd482a8d 100644
--- a/contrib/llvm-project/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp
@@ -271,7 +271,7 @@ static bool filterArch(ObjectFile &Obj) {
return true;
// Match as name.
- if (MachO->getArchTriple().getArch() == Triple(Arch).getArch())
+ if (MachO->getArchTriple().getArchName() == Triple(Arch).getArchName())
return true;
}
}
diff --git a/contrib/llvm-project/llvm/tools/llvm-lto2/llvm-lto2.cpp b/contrib/llvm-project/llvm/tools/llvm-lto2/llvm-lto2.cpp
index 5e3b3dcb6c31..67a677dd45fb 100644
--- a/contrib/llvm-project/llvm/tools/llvm-lto2/llvm-lto2.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-lto2/llvm-lto2.cpp
@@ -270,6 +270,8 @@ static int run(int argc, char **argv) {
Conf.OverrideTriple = OverrideTriple;
Conf.DefaultTriple = DefaultTriple;
Conf.StatsFile = StatsFile;
+ Conf.PTO.LoopVectorization = Conf.OptLevel > 1;
+ Conf.PTO.SLPVectorization = Conf.OptLevel > 1;
ThinBackend Backend;
if (ThinLTODistributedIndexes)
diff --git a/contrib/llvm-project/llvm/tools/llvm-mc/Disassembler.cpp b/contrib/llvm-project/llvm/tools/llvm-mc/Disassembler.cpp
index 1ddbddfa1846..e286c0fff6e1 100644
--- a/contrib/llvm-project/llvm/tools/llvm-mc/Disassembler.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-mc/Disassembler.cpp
@@ -46,8 +46,7 @@ static bool PrintInsts(const MCDisassembler &DisAsm,
MCInst Inst;
MCDisassembler::DecodeStatus S;
- S = DisAsm.getInstruction(Inst, Size, Data.slice(Index), Index,
- /*REMOVE*/ nulls(), nulls());
+ S = DisAsm.getInstruction(Inst, Size, Data.slice(Index), Index, nulls());
switch (S) {
case MCDisassembler::Fail:
SM.PrintMessage(SMLoc::getFromPointer(Bytes.second[Index]),
@@ -133,7 +132,8 @@ static bool ByteArrayFromString(ByteArrayTy &ByteArray,
int Disassembler::disassemble(const Target &T, const std::string &Triple,
MCSubtargetInfo &STI, MCStreamer &Streamer,
MemoryBuffer &Buffer, SourceMgr &SM,
- MCContext &Ctx, raw_ostream &Out) {
+ MCContext &Ctx, raw_ostream &Out,
+ const MCTargetOptions &MCOptions) {
std::unique_ptr<const MCRegisterInfo> MRI(T.createMCRegInfo(Triple));
if (!MRI) {
@@ -141,7 +141,8 @@ int Disassembler::disassemble(const Target &T, const std::string &Triple,
return -1;
}
- std::unique_ptr<const MCAsmInfo> MAI(T.createMCAsmInfo(*MRI, Triple));
+ std::unique_ptr<const MCAsmInfo> MAI(
+ T.createMCAsmInfo(*MRI, Triple, MCOptions));
if (!MAI) {
errs() << "error: no assembly info for target " << Triple << "\n";
return -1;
diff --git a/contrib/llvm-project/llvm/tools/llvm-mc/Disassembler.h b/contrib/llvm-project/llvm/tools/llvm-mc/Disassembler.h
index dcd8c279c91a..a1603e584980 100644
--- a/contrib/llvm-project/llvm/tools/llvm-mc/Disassembler.h
+++ b/contrib/llvm-project/llvm/tools/llvm-mc/Disassembler.h
@@ -25,13 +25,14 @@ class SourceMgr;
class MCContext;
class MCSubtargetInfo;
class MCStreamer;
+class MCTargetOptions;
class Disassembler {
public:
static int disassemble(const Target &T, const std::string &Triple,
MCSubtargetInfo &STI, MCStreamer &Streamer,
MemoryBuffer &Buffer, SourceMgr &SM, MCContext &Ctx,
- raw_ostream &Out);
+ raw_ostream &Out, const MCTargetOptions &MCOptions);
};
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/tools/llvm-mc/llvm-mc.cpp b/contrib/llvm-project/llvm/tools/llvm-mc/llvm-mc.cpp
index c23740a3094d..6aa347d98be2 100644
--- a/contrib/llvm-project/llvm/tools/llvm-mc/llvm-mc.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-mc/llvm-mc.cpp
@@ -351,7 +351,8 @@ int main(int argc, char **argv) {
std::unique_ptr<MCRegisterInfo> MRI(TheTarget->createMCRegInfo(TripleName));
assert(MRI && "Unable to create target register info!");
- std::unique_ptr<MCAsmInfo> MAI(TheTarget->createMCAsmInfo(*MRI, TripleName));
+ std::unique_ptr<MCAsmInfo> MAI(
+ TheTarget->createMCAsmInfo(*MRI, TripleName, MCOptions));
assert(MAI && "Unable to create target asm info!");
MAI->setRelaxELFRelocations(RelaxELFRel);
@@ -518,7 +519,7 @@ int main(int argc, char **argv) {
}
if (disassemble)
Res = Disassembler::disassemble(*TheTarget, TripleName, *STI, *Str, *Buffer,
- SrcMgr, Ctx, Out->os());
+ SrcMgr, Ctx, Out->os(), MCOptions);
// Keep output if no errors.
if (Res == 0) {
diff --git a/contrib/llvm-project/llvm/tools/llvm-mca/Views/BottleneckAnalysis.cpp b/contrib/llvm-project/llvm/tools/llvm-mca/Views/BottleneckAnalysis.cpp
index feff0cd6d524..99deed6eae97 100644
--- a/contrib/llvm-project/llvm/tools/llvm-mca/Views/BottleneckAnalysis.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-mca/Views/BottleneckAnalysis.cpp
@@ -293,7 +293,7 @@ static void printInstruction(formatted_raw_ostream &FOS,
FOS.PadToColumn(14);
- MCIP.printInst(&MCI, InstrStream, "", STI);
+ MCIP.printInst(&MCI, 0, "", STI, InstrStream);
InstrStream.flush();
if (UseDifferentColor)
diff --git a/contrib/llvm-project/llvm/tools/llvm-mca/Views/DispatchStatistics.cpp b/contrib/llvm-project/llvm/tools/llvm-mca/Views/DispatchStatistics.cpp
index 557b8ba17b17..a1c0cf208d35 100644
--- a/contrib/llvm-project/llvm/tools/llvm-mca/Views/DispatchStatistics.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-mca/Views/DispatchStatistics.cpp
@@ -37,7 +37,8 @@ void DispatchStatistics::printDispatchHistogram(raw_ostream &OS) const {
TempStream << "\n\nDispatch Logic - "
<< "number of cycles where we saw N micro opcodes dispatched:\n";
TempStream << "[# dispatched], [# cycles]\n";
- for (const std::pair<unsigned, unsigned> &Entry : DispatchGroupSizePerCycle) {
+ for (const std::pair<const unsigned, unsigned> &Entry :
+ DispatchGroupSizePerCycle) {
double Percentage = ((double)Entry.second / NumCycles) * 100.0;
TempStream << " " << Entry.first << ", " << Entry.second
<< " (" << format("%.1f", floor((Percentage * 10) + 0.5) / 10)
diff --git a/contrib/llvm-project/llvm/tools/llvm-mca/Views/InstructionInfoView.cpp b/contrib/llvm-project/llvm/tools/llvm-mca/Views/InstructionInfoView.cpp
index a6f9153b4945..fbe9d9021554 100644
--- a/contrib/llvm-project/llvm/tools/llvm-mca/Views/InstructionInfoView.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-mca/Views/InstructionInfoView.cpp
@@ -95,7 +95,7 @@ void InstructionInfoView::printView(raw_ostream &OS) const {
FOS.flush();
}
- MCIP.printInst(&Inst, InstrStream, "", STI);
+ MCIP.printInst(&Inst, 0, "", STI, InstrStream);
InstrStream.flush();
// Consume any tabs or spaces at the beginning of the string.
diff --git a/contrib/llvm-project/llvm/tools/llvm-mca/Views/ResourcePressureView.cpp b/contrib/llvm-project/llvm/tools/llvm-mca/Views/ResourcePressureView.cpp
index 38a2478cf4fe..bdb9dc21247b 100644
--- a/contrib/llvm-project/llvm/tools/llvm-mca/Views/ResourcePressureView.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-mca/Views/ResourcePressureView.cpp
@@ -163,7 +163,7 @@ void ResourcePressureView::printResourcePressurePerInst(raw_ostream &OS) const {
printResourcePressure(FOS, Usage / Executions, (J + 1) * 7);
}
- MCIP.printInst(&MCI, InstrStream, "", STI);
+ MCIP.printInst(&MCI, 0, "", STI, InstrStream);
InstrStream.flush();
StringRef Str(Instruction);
diff --git a/contrib/llvm-project/llvm/tools/llvm-mca/Views/RetireControlUnitStatistics.cpp b/contrib/llvm-project/llvm/tools/llvm-mca/Views/RetireControlUnitStatistics.cpp
index cb4fbae78039..61c115b27be1 100644
--- a/contrib/llvm-project/llvm/tools/llvm-mca/Views/RetireControlUnitStatistics.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-mca/Views/RetireControlUnitStatistics.cpp
@@ -59,7 +59,7 @@ void RetireControlUnitStatistics::printView(raw_ostream &OS) const {
<< "number of cycles where we saw N instructions retired:\n";
TempStream << "[# retired], [# cycles]\n";
- for (const std::pair<unsigned, unsigned> &Entry : RetiredPerCycle) {
+ for (const std::pair<const unsigned, unsigned> &Entry : RetiredPerCycle) {
TempStream << " " << Entry.first;
if (Entry.first < 10)
TempStream << ", ";
diff --git a/contrib/llvm-project/llvm/tools/llvm-mca/Views/SchedulerStatistics.cpp b/contrib/llvm-project/llvm/tools/llvm-mca/Views/SchedulerStatistics.cpp
index bd0ba350ab68..7a341d4c2079 100644
--- a/contrib/llvm-project/llvm/tools/llvm-mca/Views/SchedulerStatistics.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-mca/Views/SchedulerStatistics.cpp
@@ -107,7 +107,7 @@ void SchedulerStatistics::printSchedulerStats(raw_ostream &OS) const {
bool HasColors = OS.has_colors();
const auto It =
std::max_element(IssueWidthPerCycle.begin(), IssueWidthPerCycle.end());
- for (const std::pair<unsigned, unsigned> &Entry : IssueWidthPerCycle) {
+ for (const std::pair<const unsigned, unsigned> &Entry : IssueWidthPerCycle) {
unsigned NumIssued = Entry.first;
if (NumIssued == It->first && HasColors)
OS.changeColor(raw_ostream::SAVEDCOLOR, true, false);
diff --git a/contrib/llvm-project/llvm/tools/llvm-mca/Views/SummaryView.cpp b/contrib/llvm-project/llvm/tools/llvm-mca/Views/SummaryView.cpp
index ef5550048f4c..f0e75f7b13ae 100644
--- a/contrib/llvm-project/llvm/tools/llvm-mca/Views/SummaryView.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-mca/Views/SummaryView.cpp
@@ -54,7 +54,7 @@ void SummaryView::onEvent(const HWInstructionEvent &Event) {
const Instruction &Inst = *Event.IR.getInstruction();
const InstrDesc &Desc = Inst.getDesc();
NumMicroOps += Desc.NumMicroOps;
- for (const std::pair<uint64_t, const ResourceUsage> &RU : Desc.Resources) {
+ for (const std::pair<uint64_t, ResourceUsage> &RU : Desc.Resources) {
if (RU.second.size()) {
unsigned ProcResID = ResIdx2ProcResID[getResourceStateIndex(RU.first)];
ProcResourceUsage[ProcResID] += RU.second.size();
diff --git a/contrib/llvm-project/llvm/tools/llvm-mca/Views/TimelineView.cpp b/contrib/llvm-project/llvm/tools/llvm-mca/Views/TimelineView.cpp
index 1e7caa297ac6..cf5b48e811b8 100644
--- a/contrib/llvm-project/llvm/tools/llvm-mca/Views/TimelineView.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-mca/Views/TimelineView.cpp
@@ -192,7 +192,7 @@ void TimelineView::printAverageWaitTimes(raw_ostream &OS) const {
for (const MCInst &Inst : Source) {
printWaitTimeEntry(FOS, WaitTime[IID], IID, Executions);
// Append the instruction info at the end of the line.
- MCIP.printInst(&Inst, InstrStream, "", STI);
+ MCIP.printInst(&Inst, 0, "", STI, InstrStream);
InstrStream.flush();
// Consume any tabs or spaces at the beginning of the string.
@@ -307,7 +307,7 @@ void TimelineView::printTimeline(raw_ostream &OS) const {
unsigned SourceIndex = IID % Source.size();
printTimelineViewEntry(FOS, Entry, Iteration, SourceIndex);
// Append the instruction info at the end of the line.
- MCIP.printInst(&Inst, InstrStream, "", STI);
+ MCIP.printInst(&Inst, 0, "", STI, InstrStream);
InstrStream.flush();
// Consume any tabs or spaces at the beginning of the string.
diff --git a/contrib/llvm-project/llvm/tools/llvm-mca/llvm-mca.cpp b/contrib/llvm-project/llvm/tools/llvm-mca/llvm-mca.cpp
index 99c45eebdd88..fff5906bb59b 100644
--- a/contrib/llvm-project/llvm/tools/llvm-mca/llvm-mca.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-mca/llvm-mca.cpp
@@ -353,7 +353,9 @@ int main(int argc, char **argv) {
std::unique_ptr<MCRegisterInfo> MRI(TheTarget->createMCRegInfo(TripleName));
assert(MRI && "Unable to create target register info!");
- std::unique_ptr<MCAsmInfo> MAI(TheTarget->createMCAsmInfo(*MRI, TripleName));
+ MCTargetOptions MCOptions = InitMCTargetOptionsFromFlags();
+ std::unique_ptr<MCAsmInfo> MAI(
+ TheTarget->createMCAsmInfo(*MRI, TripleName, MCOptions));
assert(MAI && "Unable to create target asm info!");
MCObjectFileInfo MOFI;
@@ -472,7 +474,7 @@ int main(int argc, char **argv) {
std::string InstructionStr;
raw_string_ostream SS(InstructionStr);
WithColor::error() << IE.Message << '\n';
- IP->printInst(&IE.Inst, SS, "", *STI);
+ IP->printInst(&IE.Inst, 0, "", *STI, SS);
SS.flush();
WithColor::note()
<< "instruction: " << InstructionStr << '\n';
diff --git a/contrib/llvm-project/llvm/tools/llvm-nm/llvm-nm.cpp b/contrib/llvm-project/llvm/tools/llvm-nm/llvm-nm.cpp
index ee55722dc139..107d62b1f2b9 100644
--- a/contrib/llvm-project/llvm/tools/llvm-nm/llvm-nm.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-nm/llvm-nm.cpp
@@ -1133,15 +1133,18 @@ static char getNMSectionTagAndName(SymbolicFile &Obj, basic_symbol_iterator I,
Ret = getSymbolNMTypeChar(*MachO, I);
else if (WasmObjectFile *Wasm = dyn_cast<WasmObjectFile>(&Obj))
Ret = getSymbolNMTypeChar(*Wasm, I);
- else
- Ret = getSymbolNMTypeChar(cast<ELFObjectFileBase>(Obj), I);
+ else if (ELFObjectFileBase *ELF = dyn_cast<ELFObjectFileBase>(&Obj)) {
+ if (ELFSymbolRef(*I).getELFType() == ELF::STT_GNU_IFUNC)
+ return 'i';
+ Ret = getSymbolNMTypeChar(*ELF, I);
+ if (ELFSymbolRef(*I).getBinding() == ELF::STB_GNU_UNIQUE)
+ return Ret;
+ } else
+ llvm_unreachable("unknown binary format");
if (!(Symflags & object::SymbolRef::SF_Global))
return Ret;
- if (Obj.isELF() && ELFSymbolRef(*I).getBinding() == ELF::STB_GNU_UNIQUE)
- return Ret;
-
return toupper(Ret);
}
diff --git a/contrib/llvm-project/llvm/tools/llvm-objcopy/COFF/COFFObjcopy.cpp b/contrib/llvm-project/llvm/tools/llvm-objcopy/COFF/COFFObjcopy.cpp
index 2a8d816e6f3c..b172fae527eb 100644
--- a/contrib/llvm-project/llvm/tools/llvm-objcopy/COFF/COFFObjcopy.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-objcopy/COFF/COFFObjcopy.cpp
@@ -131,6 +131,12 @@ static Error handleArgs(const CopyConfig &Config, Object &Obj) {
if (Error E = Obj.markSymbols())
return E;
+ for (Symbol &Sym : Obj.getMutableSymbols()) {
+ auto I = Config.SymbolsToRename.find(Sym.Name);
+ if (I != Config.SymbolsToRename.end())
+ Sym.Name = I->getValue();
+ }
+
// Actually do removals of symbols.
Obj.removeSymbols([&](const Symbol &Sym) {
// For StripAll, all relocations have been stripped and we remove all
@@ -200,10 +206,9 @@ static Error handleArgs(const CopyConfig &Config, Object &Obj) {
!Config.SymbolsToLocalize.empty() || !Config.SymbolsToWeaken.empty() ||
!Config.SymbolsToKeepGlobal.empty() || !Config.SectionsToRename.empty() ||
!Config.SetSectionAlignment.empty() || !Config.SetSectionFlags.empty() ||
- !Config.SymbolsToRename.empty() || Config.ExtractDWO ||
- Config.KeepFileSymbols || Config.LocalizeHidden || Config.PreserveDates ||
- Config.StripDWO || Config.StripNonAlloc || Config.StripSections ||
- Config.Weaken || Config.DecompressDebugSections ||
+ Config.ExtractDWO || Config.KeepFileSymbols || Config.LocalizeHidden ||
+ Config.PreserveDates || Config.StripDWO || Config.StripNonAlloc ||
+ Config.StripSections || Config.Weaken || Config.DecompressDebugSections ||
Config.DiscardMode == DiscardType::Locals ||
!Config.SymbolsToAdd.empty() || Config.EntryExpr) {
return createStringError(llvm::errc::invalid_argument,
diff --git a/contrib/llvm-project/llvm/tools/llvm-objcopy/COFF/Object.h b/contrib/llvm-project/llvm/tools/llvm-objcopy/COFF/Object.h
index 21475b068629..78f8da00b8cd 100644
--- a/contrib/llvm-project/llvm/tools/llvm-objcopy/COFF/Object.h
+++ b/contrib/llvm-project/llvm/tools/llvm-objcopy/COFF/Object.h
@@ -25,11 +25,11 @@ namespace objcopy {
namespace coff {
struct Relocation {
- Relocation() {}
+ Relocation() = default;
Relocation(const object::coff_relocation& R) : Reloc(R) {}
object::coff_relocation Reloc;
- size_t Target;
+ size_t Target = 0;
StringRef TargetName; // Used for diagnostics only
};
@@ -124,7 +124,7 @@ struct Object {
ArrayRef<Section> getSections() const { return Sections; }
// This allows mutating individual Sections, but not mutating the list
- // of symbols itself.
+ // of sections itself.
iterator_range<std::vector<Section>::iterator> getMutableSections() {
return make_range(Sections.begin(), Sections.end());
}
diff --git a/contrib/llvm-project/llvm/tools/llvm-objcopy/COFF/Reader.cpp b/contrib/llvm-project/llvm/tools/llvm-objcopy/COFF/Reader.cpp
index 2fcec0057c03..7be9cce2be3d 100644
--- a/contrib/llvm-project/llvm/tools/llvm-objcopy/COFF/Reader.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-objcopy/COFF/Reader.cpp
@@ -63,6 +63,7 @@ Error COFFReader::readSections(Object &Obj) const {
Sections.push_back(Section());
Section &S = Sections.back();
S.Header = *Sec;
+ S.Header.Characteristics &= ~COFF::IMAGE_SCN_LNK_NRELOC_OVFL;
ArrayRef<uint8_t> Contents;
if (Error E = COFFObj.getSectionContents(Sec, Contents))
return E;
@@ -74,9 +75,6 @@ Error COFFReader::readSections(Object &Obj) const {
S.Name = *NameOrErr;
else
return NameOrErr.takeError();
- if (Sec->hasExtendedRelocations())
- return createStringError(object_error::parse_failed,
- "extended relocations not supported yet");
}
Obj.addSections(Sections);
return Error::success();
diff --git a/contrib/llvm-project/llvm/tools/llvm-objcopy/COFF/Writer.cpp b/contrib/llvm-project/llvm/tools/llvm-objcopy/COFF/Writer.cpp
index 6db37435fd96..e35e0474a36d 100644
--- a/contrib/llvm-project/llvm/tools/llvm-objcopy/COFF/Writer.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-objcopy/COFF/Writer.cpp
@@ -97,9 +97,16 @@ void COFFWriter::layoutSections() {
S.Header.PointerToRawData = FileSize;
FileSize += S.Header.SizeOfRawData; // For executables, this is already
// aligned to FileAlignment.
- S.Header.NumberOfRelocations = S.Relocs.size();
- S.Header.PointerToRelocations =
- S.Header.NumberOfRelocations > 0 ? FileSize : 0;
+ if (S.Relocs.size() >= 0xffff) {
+ S.Header.Characteristics |= COFF::IMAGE_SCN_LNK_NRELOC_OVFL;
+ S.Header.NumberOfRelocations = 0xffff;
+ S.Header.PointerToRelocations = FileSize;
+ FileSize += sizeof(coff_relocation);
+ } else {
+ S.Header.NumberOfRelocations = S.Relocs.size();
+ S.Header.PointerToRelocations = S.Relocs.size() ? FileSize : 0;
+ }
+
FileSize += S.Relocs.size() * sizeof(coff_relocation);
FileSize = alignTo(FileSize, FileAlignment);
@@ -307,6 +314,15 @@ void COFFWriter::writeSections() {
S.Header.SizeOfRawData - Contents.size());
Ptr += S.Header.SizeOfRawData;
+
+ if (S.Relocs.size() >= 0xffff) {
+ object::coff_relocation R;
+ R.VirtualAddress = S.Relocs.size() + 1;
+ R.SymbolTableIndex = 0;
+ R.Type = 0;
+ memcpy(Ptr, &R, sizeof(R));
+ Ptr += sizeof(R);
+ }
for (const auto &R : S.Relocs) {
memcpy(Ptr, &R.Reloc, sizeof(R.Reloc));
Ptr += sizeof(R.Reloc);
diff --git a/contrib/llvm-project/llvm/tools/llvm-objcopy/CommonOpts.td b/contrib/llvm-project/llvm/tools/llvm-objcopy/CommonOpts.td
index e8c092b44431..6481d1d1df05 100644
--- a/contrib/llvm-project/llvm/tools/llvm-objcopy/CommonOpts.td
+++ b/contrib/llvm-project/llvm/tools/llvm-objcopy/CommonOpts.td
@@ -40,7 +40,8 @@ def p : Flag<["-"], "p">,
def strip_all : Flag<["--"], "strip-all">,
HelpText<"Remove non-allocated sections outside segments. "
- ".gnu.warning* sections are not removed">;
+ ".gnu.warning* and .ARM.attribute sections are not "
+ "removed">;
def strip_all_gnu
: Flag<["--"], "strip-all-gnu">,
@@ -85,8 +86,9 @@ def keep_file_symbols : Flag<["--"], "keep-file-symbols">,
def only_keep_debug
: Flag<["--"], "only-keep-debug">,
- HelpText<"Clear sections that would not be stripped by --strip-debug. "
- "Currently only implemented for COFF.">;
+ HelpText<
+ "Produce a debug file as the output that only preserves contents of "
+ "sections useful for debugging purposes">;
def discard_locals : Flag<["--"], "discard-locals">,
HelpText<"Remove compiler-generated local symbols, (e.g. "
diff --git a/contrib/llvm-project/llvm/tools/llvm-objcopy/CopyConfig.cpp b/contrib/llvm-project/llvm/tools/llvm-objcopy/CopyConfig.cpp
index d707bec20c49..73ed00b5cb2a 100644
--- a/contrib/llvm-project/llvm/tools/llvm-objcopy/CopyConfig.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-objcopy/CopyConfig.cpp
@@ -63,6 +63,44 @@ public:
ObjcopyOptTable() : OptTable(ObjcopyInfoTable) {}
};
+enum InstallNameToolID {
+ INSTALL_NAME_TOOL_INVALID = 0, // This is not an option ID.
+#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \
+ HELPTEXT, METAVAR, VALUES) \
+ INSTALL_NAME_TOOL_##ID,
+#include "InstallNameToolOpts.inc"
+#undef OPTION
+};
+
+#define PREFIX(NAME, VALUE) \
+ const char *const INSTALL_NAME_TOOL_##NAME[] = VALUE;
+#include "InstallNameToolOpts.inc"
+#undef PREFIX
+
+static const opt::OptTable::Info InstallNameToolInfoTable[] = {
+#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \
+ HELPTEXT, METAVAR, VALUES) \
+ {INSTALL_NAME_TOOL_##PREFIX, \
+ NAME, \
+ HELPTEXT, \
+ METAVAR, \
+ INSTALL_NAME_TOOL_##ID, \
+ opt::Option::KIND##Class, \
+ PARAM, \
+ FLAGS, \
+ INSTALL_NAME_TOOL_##GROUP, \
+ INSTALL_NAME_TOOL_##ALIAS, \
+ ALIASARGS, \
+ VALUES},
+#include "InstallNameToolOpts.inc"
+#undef OPTION
+};
+
+class InstallNameToolOptTable : public opt::OptTable {
+public:
+ InstallNameToolOptTable() : OptTable(InstallNameToolInfoTable) {}
+};
+
enum StripID {
STRIP_INVALID = 0, // This is not an option ID.
#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \
@@ -752,6 +790,57 @@ parseObjcopyOptions(ArrayRef<const char *> ArgsArr,
return std::move(DC);
}
+// ParseInstallNameToolOptions returns the config and sets the input arguments.
+// If a help flag is set then ParseInstallNameToolOptions will print the help
+// messege and exit.
+Expected<DriverConfig>
+parseInstallNameToolOptions(ArrayRef<const char *> ArgsArr) {
+ DriverConfig DC;
+ CopyConfig Config;
+ InstallNameToolOptTable T;
+ unsigned MissingArgumentIndex, MissingArgumentCount;
+ llvm::opt::InputArgList InputArgs =
+ T.ParseArgs(ArgsArr, MissingArgumentIndex, MissingArgumentCount);
+
+ if (InputArgs.size() == 0) {
+ printHelp(T, errs(), "llvm-install-name-tool");
+ exit(1);
+ }
+
+ if (InputArgs.hasArg(INSTALL_NAME_TOOL_help)) {
+ printHelp(T, outs(), "llvm-install-name-tool");
+ exit(0);
+ }
+
+ if (InputArgs.hasArg(INSTALL_NAME_TOOL_version)) {
+ outs() << "llvm-install-name-tool, compatible with cctools "
+ "install_name_tool\n";
+ cl::PrintVersionMessage();
+ exit(0);
+ }
+
+ for (auto Arg : InputArgs.filtered(INSTALL_NAME_TOOL_add_rpath))
+ Config.RPathToAdd.push_back(Arg->getValue());
+
+ SmallVector<StringRef, 2> Positional;
+ for (auto Arg : InputArgs.filtered(INSTALL_NAME_TOOL_UNKNOWN))
+ return createStringError(errc::invalid_argument, "unknown argument '%s'",
+ Arg->getAsString(InputArgs).c_str());
+ for (auto Arg : InputArgs.filtered(INSTALL_NAME_TOOL_INPUT))
+ Positional.push_back(Arg->getValue());
+ if (Positional.empty())
+ return createStringError(errc::invalid_argument, "no input file specified");
+ if (Positional.size() > 1)
+ return createStringError(
+ errc::invalid_argument,
+ "llvm-install-name-tool expects a single input file");
+ Config.InputFilename = Positional[0];
+ Config.OutputFilename = Positional[0];
+
+ DC.CopyConfigs.push_back(std::move(Config));
+ return std::move(DC);
+}
+
// ParseStripOptions returns the config and sets the input arguments. If a
// help flag is set then ParseStripOptions will print the help messege and
// exit.
diff --git a/contrib/llvm-project/llvm/tools/llvm-objcopy/CopyConfig.h b/contrib/llvm-project/llvm/tools/llvm-objcopy/CopyConfig.h
index 55a55d3a2bc2..c262934b4a41 100644
--- a/contrib/llvm-project/llvm/tools/llvm-objcopy/CopyConfig.h
+++ b/contrib/llvm-project/llvm/tools/llvm-objcopy/CopyConfig.h
@@ -150,9 +150,9 @@ struct CopyConfig {
// Main input/output options
StringRef InputFilename;
- FileFormat InputFormat;
+ FileFormat InputFormat = FileFormat::Unspecified;
StringRef OutputFilename;
- FileFormat OutputFormat;
+ FileFormat OutputFormat = FileFormat::Unspecified;
// Only applicable when --output-format!=binary (e.g. elf64-x86-64).
Optional<MachineInfo> OutputArch;
@@ -175,6 +175,7 @@ struct CopyConfig {
std::vector<StringRef> AddSection;
std::vector<StringRef> DumpSection;
std::vector<StringRef> SymbolsToAdd;
+ std::vector<StringRef> RPathToAdd;
// Section matchers
NameMatcher KeepSection;
@@ -251,6 +252,12 @@ Expected<DriverConfig>
parseObjcopyOptions(ArrayRef<const char *> ArgsArr,
llvm::function_ref<Error(Error)> ErrorCallback);
+// ParseInstallNameToolOptions returns the config and sets the input arguments.
+// If a help flag is set then ParseInstallNameToolOptions will print the help
+// messege and exit.
+Expected<DriverConfig>
+parseInstallNameToolOptions(ArrayRef<const char *> ArgsArr);
+
// ParseStripOptions returns the config and sets the input arguments. If a
// help flag is set then ParseStripOptions will print the help messege and
// exit. ErrorCallback is used to handle recoverable errors. An Error returned
@@ -258,7 +265,6 @@ parseObjcopyOptions(ArrayRef<const char *> ArgsArr,
Expected<DriverConfig>
parseStripOptions(ArrayRef<const char *> ArgsArr,
llvm::function_ref<Error(Error)> ErrorCallback);
-
} // namespace objcopy
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/tools/llvm-objcopy/ELF/ELFObjcopy.cpp b/contrib/llvm-project/llvm/tools/llvm-objcopy/ELF/ELFObjcopy.cpp
index 1bfabda4a1d7..a0cfd9a5ff86 100644
--- a/contrib/llvm-project/llvm/tools/llvm-objcopy/ELF/ELFObjcopy.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-objcopy/ELF/ELFObjcopy.cpp
@@ -136,17 +136,17 @@ static std::unique_ptr<Writer> createELFWriter(const CopyConfig &Config,
// Depending on the initial ELFT and OutputFormat we need a different Writer.
switch (OutputElfType) {
case ELFT_ELF32LE:
- return std::make_unique<ELFWriter<ELF32LE>>(Obj, Buf,
- !Config.StripSections);
+ return std::make_unique<ELFWriter<ELF32LE>>(Obj, Buf, !Config.StripSections,
+ Config.OnlyKeepDebug);
case ELFT_ELF64LE:
- return std::make_unique<ELFWriter<ELF64LE>>(Obj, Buf,
- !Config.StripSections);
+ return std::make_unique<ELFWriter<ELF64LE>>(Obj, Buf, !Config.StripSections,
+ Config.OnlyKeepDebug);
case ELFT_ELF32BE:
- return std::make_unique<ELFWriter<ELF32BE>>(Obj, Buf,
- !Config.StripSections);
+ return std::make_unique<ELFWriter<ELF32BE>>(Obj, Buf, !Config.StripSections,
+ Config.OnlyKeepDebug);
case ELFT_ELF64BE:
- return std::make_unique<ELFWriter<ELF64BE>>(Obj, Buf,
- !Config.StripSections);
+ return std::make_unique<ELFWriter<ELF64BE>>(Obj, Buf, !Config.StripSections,
+ Config.OnlyKeepDebug);
}
llvm_unreachable("Invalid output format");
}
@@ -175,7 +175,7 @@ findBuildID(const CopyConfig &Config, const object::ELFFile<ELFT> &In) {
if (Phdr.p_type != PT_NOTE)
continue;
Error Err = Error::success();
- for (const auto &Note : In.notes(Phdr, Err))
+ for (auto Note : In.notes(Phdr, Err))
if (Note.getType() == NT_GNU_BUILD_ID && Note.getName() == ELF_NOTE_GNU)
return Note.getDesc();
if (Err)
@@ -694,6 +694,11 @@ static Error handleArgs(const CopyConfig &Config, Object &Obj,
}
}
+ if (Config.OnlyKeepDebug)
+ for (auto &Sec : Obj.sections())
+ if (Sec.Flags & SHF_ALLOC && Sec.Type != SHT_NOTE)
+ Sec.Type = SHT_NOBITS;
+
for (const auto &Flag : Config.AddSection) {
std::pair<StringRef, StringRef> SecPair = Flag.split("=");
StringRef SecName = SecPair.first;
diff --git a/contrib/llvm-project/llvm/tools/llvm-objcopy/ELF/Object.cpp b/contrib/llvm-project/llvm/tools/llvm-objcopy/ELF/Object.cpp
index 74145dad6e6b..ad53c75663ec 100644
--- a/contrib/llvm-project/llvm/tools/llvm-objcopy/ELF/Object.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-objcopy/ELF/Object.cpp
@@ -815,7 +815,8 @@ Error RelocationSection::removeSectionReferences(
}
for (const Relocation &R : Relocations) {
- if (!R.RelocSymbol->DefinedIn || !ToRemove(R.RelocSymbol->DefinedIn))
+ if (!R.RelocSymbol || !R.RelocSymbol->DefinedIn ||
+ !ToRemove(R.RelocSymbol->DefinedIn))
continue;
return createStringError(llvm::errc::invalid_argument,
"section '%s' cannot be removed: (%s+0x%" PRIx64
@@ -868,7 +869,8 @@ static void writeRel(const RelRange &Relocations, T *Buf) {
for (const auto &Reloc : Relocations) {
Buf->r_offset = Reloc.Offset;
setAddend(*Buf, Reloc.Addend);
- Buf->setSymbolAndType(Reloc.RelocSymbol->Index, Reloc.Type, false);
+ Buf->setSymbolAndType(Reloc.RelocSymbol ? Reloc.RelocSymbol->Index : 0,
+ Reloc.Type, false);
++Buf;
}
}
@@ -893,7 +895,7 @@ void RelocationSection::accept(MutableSectionVisitor &Visitor) {
Error RelocationSection::removeSymbols(
function_ref<bool(const Symbol &)> ToRemove) {
for (const Relocation &Reloc : Relocations)
- if (ToRemove(*Reloc.RelocSymbol))
+ if (Reloc.RelocSymbol && ToRemove(*Reloc.RelocSymbol))
return createStringError(
llvm::errc::invalid_argument,
"not stripping symbol '%s' because it is named in a relocation",
@@ -903,7 +905,8 @@ Error RelocationSection::removeSymbols(
void RelocationSection::markSymbols() {
for (const Relocation &Reloc : Relocations)
- Reloc.RelocSymbol->Referenced = true;
+ if (Reloc.RelocSymbol)
+ Reloc.RelocSymbol->Referenced = true;
}
void RelocationSection::replaceSectionReferences(
@@ -1006,7 +1009,7 @@ void GnuDebugLinkSection::init(StringRef File) {
Size = alignTo(FileName.size() + 1, 4) + 4;
// The CRC32 will only be aligned if we align the whole section.
Align = 4;
- Type = ELF::SHT_PROGBITS;
+ Type = OriginalType = ELF::SHT_PROGBITS;
Name = ".gnu_debuglink";
// For sections not found in segments, OriginalOffset is only used to
// establish the order that sections should go in. By using the maximum
@@ -1418,7 +1421,15 @@ static void initRelocations(RelocationSection *Relocs,
ToAdd.Offset = Rel.r_offset;
getAddend(ToAdd.Addend, Rel);
ToAdd.Type = Rel.getType(false);
- ToAdd.RelocSymbol = SymbolTable->getSymbolByIndex(Rel.getSymbol(false));
+
+ if (uint32_t Sym = Rel.getSymbol(false)) {
+ if (!SymbolTable)
+ error("'" + Relocs->Name +
+ "': relocation references symbol with index " + Twine(Sym) +
+ ", but there is no symbol table");
+ ToAdd.RelocSymbol = SymbolTable->getSymbolByIndex(Sym);
+ }
+
Relocs->addRelocation(ToAdd);
}
}
@@ -1510,8 +1521,8 @@ template <class ELFT> void ELFBuilder<ELFT>::readSectionHeaders() {
}
auto &Sec = makeSection(Shdr);
Sec.Name = unwrapOrError(ElfFile.getSectionName(&Shdr));
- Sec.Type = Shdr.sh_type;
- Sec.Flags = Shdr.sh_flags;
+ Sec.Type = Sec.OriginalType = Shdr.sh_type;
+ Sec.Flags = Sec.OriginalFlags = Shdr.sh_flags;
Sec.Addr = Shdr.sh_addr;
Sec.Offset = Shdr.sh_offset;
Sec.OriginalOffset = Shdr.sh_offset;
@@ -1528,6 +1539,21 @@ template <class ELFT> void ELFBuilder<ELFT>::readSectionHeaders() {
}
template <class ELFT> void ELFBuilder<ELFT>::readSections(bool EnsureSymtab) {
+ uint32_t ShstrIndex = ElfFile.getHeader()->e_shstrndx;
+ if (ShstrIndex == SHN_XINDEX)
+ ShstrIndex = unwrapOrError(ElfFile.getSection(0))->sh_link;
+
+ if (ShstrIndex == SHN_UNDEF)
+ Obj.HadShdrs = false;
+ else
+ Obj.SectionNames =
+ Obj.sections().template getSectionOfType<StringTableSection>(
+ ShstrIndex,
+ "e_shstrndx field value " + Twine(ShstrIndex) + " in elf header " +
+ " is invalid",
+ "e_shstrndx field value " + Twine(ShstrIndex) + " in elf header " +
+ " does not reference a string table");
+
// If a section index table exists we'll need to initialize it before we
// initialize the symbol table because the symbol table might need to
// reference it.
@@ -1541,13 +1567,14 @@ template <class ELFT> void ELFBuilder<ELFT>::readSections(bool EnsureSymtab) {
Obj.SymbolTable->initialize(Obj.sections());
initSymbolTable(Obj.SymbolTable);
} else if (EnsureSymtab) {
- // Reuse the existing SHT_STRTAB section if exists.
+ // Reuse an existing SHT_STRTAB section if it exists.
StringTableSection *StrTab = nullptr;
for (auto &Sec : Obj.sections()) {
if (Sec.Type == ELF::SHT_STRTAB && !(Sec.Flags & SHF_ALLOC)) {
StrTab = static_cast<StringTableSection *>(&Sec);
- // Prefer .strtab to .shstrtab.
+ // Prefer a string table that is not the section header string table, if
+ // such a table exists.
if (Obj.SectionNames != &Sec)
break;
}
@@ -1582,21 +1609,6 @@ template <class ELFT> void ELFBuilder<ELFT>::readSections(bool EnsureSymtab) {
initGroupSection(GroupSec);
}
}
-
- uint32_t ShstrIndex = ElfFile.getHeader()->e_shstrndx;
- if (ShstrIndex == SHN_XINDEX)
- ShstrIndex = unwrapOrError(ElfFile.getSection(0))->sh_link;
-
- if (ShstrIndex == SHN_UNDEF)
- Obj.HadShdrs = false;
- else
- Obj.SectionNames =
- Obj.sections().template getSectionOfType<StringTableSection>(
- ShstrIndex,
- "e_shstrndx field value " + Twine(ShstrIndex) + " in elf header " +
- " is invalid",
- "e_shstrndx field value " + Twine(ShstrIndex) + " in elf header " +
- " is not a string table");
}
template <class ELFT> void ELFBuilder<ELFT>::build(bool EnsureSymtab) {
@@ -1785,10 +1797,9 @@ template <class ELFT> void ELFWriter<ELFT>::writeSectionData() {
template <class ELFT> void ELFWriter<ELFT>::writeSegmentData() {
for (Segment &Seg : Obj.segments()) {
- uint8_t *B = Buf.getBufferStart() + Seg.Offset;
- assert(Seg.FileSize == Seg.getContents().size() &&
- "Segment size must match contents size");
- std::memcpy(B, Seg.getContents().data(), Seg.FileSize);
+ size_t Size = std::min<size_t>(Seg.FileSize, Seg.getContents().size());
+ std::memcpy(Buf.getBufferStart() + Seg.Offset, Seg.getContents().data(),
+ Size);
}
// Iterate over removed sections and overwrite their old data with zeroes.
@@ -1803,8 +1814,10 @@ template <class ELFT> void ELFWriter<ELFT>::writeSegmentData() {
}
template <class ELFT>
-ELFWriter<ELFT>::ELFWriter(Object &Obj, Buffer &Buf, bool WSH)
- : Writer(Obj, Buf), WriteSectionHeaders(WSH && Obj.HadShdrs) {}
+ELFWriter<ELFT>::ELFWriter(Object &Obj, Buffer &Buf, bool WSH,
+ bool OnlyKeepDebug)
+ : Writer(Obj, Buf), WriteSectionHeaders(WSH && Obj.HadShdrs),
+ OnlyKeepDebug(OnlyKeepDebug) {}
Error Object::removeSections(bool AllowBrokenLinks,
std::function<bool(const SectionBase &)> ToRemove) {
@@ -1945,6 +1958,78 @@ static uint64_t layoutSections(Range Sections, uint64_t Offset) {
return Offset;
}
+// Rewrite sh_offset after some sections are changed to SHT_NOBITS and thus
+// occupy no space in the file.
+static uint64_t layoutSectionsForOnlyKeepDebug(Object &Obj, uint64_t Off) {
+ uint32_t Index = 1;
+ for (auto &Sec : Obj.sections()) {
+ Sec.Index = Index++;
+
+ auto *FirstSec = Sec.ParentSegment && Sec.ParentSegment->Type == PT_LOAD
+ ? Sec.ParentSegment->firstSection()
+ : nullptr;
+
+ // The first section in a PT_LOAD has to have congruent offset and address
+ // modulo the alignment, which usually equals the maximum page size.
+ if (FirstSec && FirstSec == &Sec)
+ Off = alignTo(Off, Sec.ParentSegment->Align, Sec.Addr);
+
+ // sh_offset is not significant for SHT_NOBITS sections, but the congruence
+ // rule must be followed if it is the first section in a PT_LOAD. Do not
+ // advance Off.
+ if (Sec.Type == SHT_NOBITS) {
+ Sec.Offset = Off;
+ continue;
+ }
+
+ if (!FirstSec) {
+ // FirstSec being nullptr generally means that Sec does not have the
+ // SHF_ALLOC flag.
+ Off = Sec.Align ? alignTo(Off, Sec.Align) : Off;
+ } else if (FirstSec != &Sec) {
+ // The offset is relative to the first section in the PT_LOAD segment. Use
+ // sh_offset for non-SHF_ALLOC sections.
+ Off = Sec.OriginalOffset - FirstSec->OriginalOffset + FirstSec->Offset;
+ }
+ Sec.Offset = Off;
+ Off += Sec.Size;
+ }
+ return Off;
+}
+
+// Rewrite p_offset and p_filesz of non-empty non-PT_PHDR segments after
+// sh_offset values have been updated.
+static uint64_t layoutSegmentsForOnlyKeepDebug(std::vector<Segment *> &Segments,
+ uint64_t HdrEnd) {
+ uint64_t MaxOffset = 0;
+ for (Segment *Seg : Segments) {
+ const SectionBase *FirstSec = Seg->firstSection();
+ if (Seg->Type == PT_PHDR || !FirstSec)
+ continue;
+
+ uint64_t Offset = FirstSec->Offset;
+ uint64_t FileSize = 0;
+ for (const SectionBase *Sec : Seg->Sections) {
+ uint64_t Size = Sec->Type == SHT_NOBITS ? 0 : Sec->Size;
+ if (Sec->Offset + Size > Offset)
+ FileSize = std::max(FileSize, Sec->Offset + Size - Offset);
+ }
+
+ // If the segment includes EHDR and program headers, don't make it smaller
+ // than the headers.
+ if (Seg->Offset < HdrEnd && HdrEnd <= Seg->Offset + Seg->FileSize) {
+ FileSize += Offset - Seg->Offset;
+ Offset = Seg->Offset;
+ FileSize = std::max(FileSize, HdrEnd - Offset);
+ }
+
+ Seg->Offset = Offset;
+ Seg->FileSize = FileSize;
+ MaxOffset = std::max(MaxOffset, Offset + FileSize);
+ }
+ return MaxOffset;
+}
+
template <class ELFT> void ELFWriter<ELFT>::initEhdrSegment() {
Segment &ElfHdr = Obj.ElfHdrSegment;
ElfHdr.Type = PT_PHDR;
@@ -1965,12 +2050,24 @@ template <class ELFT> void ELFWriter<ELFT>::assignOffsets() {
OrderedSegments.push_back(&Obj.ElfHdrSegment);
OrderedSegments.push_back(&Obj.ProgramHdrSegment);
orderSegments(OrderedSegments);
- // Offset is used as the start offset of the first segment to be laid out.
- // Since the ELF Header (ElfHdrSegment) must be at the start of the file,
- // we start at offset 0.
- uint64_t Offset = 0;
- Offset = layoutSegments(OrderedSegments, Offset);
- Offset = layoutSections(Obj.sections(), Offset);
+
+ uint64_t Offset;
+ if (OnlyKeepDebug) {
+ // For --only-keep-debug, the sections that did not preserve contents were
+ // changed to SHT_NOBITS. We now rewrite sh_offset fields of sections, and
+ // then rewrite p_offset/p_filesz of program headers.
+ uint64_t HdrEnd =
+ sizeof(Elf_Ehdr) + llvm::size(Obj.segments()) * sizeof(Elf_Phdr);
+ Offset = layoutSectionsForOnlyKeepDebug(Obj, HdrEnd);
+ Offset = std::max(Offset,
+ layoutSegmentsForOnlyKeepDebug(OrderedSegments, HdrEnd));
+ } else {
+ // Offset is used as the start offset of the first segment to be laid out.
+ // Since the ELF Header (ElfHdrSegment) must be at the start of the file,
+ // we start at offset 0.
+ Offset = layoutSegments(OrderedSegments, 0);
+ Offset = layoutSections(Obj.sections(), Offset);
+ }
// If we need to write the section header table out then we need to align the
// Offset so that SHOffset is valid.
if (WriteSectionHeaders)
@@ -2156,38 +2253,28 @@ Error BinaryWriter::finalize() {
std::unique(std::begin(OrderedSegments), std::end(OrderedSegments));
OrderedSegments.erase(End, std::end(OrderedSegments));
- uint64_t Offset = 0;
-
- // Modify the first segment so that there is no gap at the start. This allows
- // our layout algorithm to proceed as expected while not writing out the gap
- // at the start.
- if (!OrderedSegments.empty()) {
- Segment *Seg = OrderedSegments[0];
- const SectionBase *Sec = Seg->firstSection();
- auto Diff = Sec->OriginalOffset - Seg->OriginalOffset;
- Seg->OriginalOffset += Diff;
- // The size needs to be shrunk as well.
- Seg->FileSize -= Diff;
- // The PAddr needs to be increased to remove the gap before the first
- // section.
- Seg->PAddr += Diff;
- uint64_t LowestPAddr = Seg->PAddr;
- for (Segment *Segment : OrderedSegments) {
- Segment->Offset = Segment->PAddr - LowestPAddr;
- Offset = std::max(Offset, Segment->Offset + Segment->FileSize);
- }
+ // Compute the section LMA based on its sh_offset and the containing segment's
+ // p_offset and p_paddr. Also compute the minimum LMA of all sections as
+ // MinAddr. In the output, the contents between address 0 and MinAddr will be
+ // skipped.
+ uint64_t MinAddr = UINT64_MAX;
+ for (SectionBase &Sec : Obj.allocSections()) {
+ if (Sec.ParentSegment != nullptr)
+ Sec.Addr =
+ Sec.Offset - Sec.ParentSegment->Offset + Sec.ParentSegment->PAddr;
+ MinAddr = std::min(MinAddr, Sec.Addr);
}
- layoutSections(Obj.allocSections(), Offset);
-
// Now that every section has been laid out we just need to compute the total
// file size. This might not be the same as the offset returned by
// layoutSections, because we want to truncate the last segment to the end of
// its last section, to match GNU objcopy's behaviour.
TotalSize = 0;
- for (const SectionBase &Sec : Obj.allocSections())
+ for (SectionBase &Sec : Obj.allocSections()) {
+ Sec.Offset = Sec.Addr - MinAddr;
if (Sec.Type != SHT_NOBITS)
TotalSize = std::max(TotalSize, Sec.Offset + Sec.Size);
+ }
if (Error E = Buf.allocate(TotalSize))
return E;
diff --git a/contrib/llvm-project/llvm/tools/llvm-objcopy/ELF/Object.h b/contrib/llvm-project/llvm/tools/llvm-objcopy/ELF/Object.h
index eeacb014e4dc..97702a66bc47 100644
--- a/contrib/llvm-project/llvm/tools/llvm-objcopy/ELF/Object.h
+++ b/contrib/llvm-project/llvm/tools/llvm-objcopy/ELF/Object.h
@@ -342,16 +342,20 @@ public:
virtual ~ELFWriter() {}
bool WriteSectionHeaders;
+ // For --only-keep-debug, select an alternative section/segment layout
+ // algorithm.
+ bool OnlyKeepDebug;
+
Error finalize() override;
Error write() override;
- ELFWriter(Object &Obj, Buffer &Buf, bool WSH);
+ ELFWriter(Object &Obj, Buffer &Buf, bool WSH, bool OnlyKeepDebug);
};
class BinaryWriter : public Writer {
private:
std::unique_ptr<BinarySectionWriter> SecWriter;
- uint64_t TotalSize;
+ uint64_t TotalSize = 0;
public:
~BinaryWriter() {}
@@ -366,7 +370,7 @@ class IHexWriter : public Writer {
};
std::set<const SectionBase *, SectionCompare> Sections;
- size_t TotalSize;
+ size_t TotalSize = 0;
Error checkSection(const SectionBase &Sec);
uint64_t writeEntryPointRecord(uint8_t *Buf);
@@ -383,11 +387,14 @@ class SectionBase {
public:
std::string Name;
Segment *ParentSegment = nullptr;
- uint64_t HeaderOffset;
- uint64_t OriginalOffset = std::numeric_limits<uint64_t>::max();
- uint32_t Index;
+ uint64_t HeaderOffset = 0;
+ uint32_t Index = 0;
bool HasSymbol = false;
+ uint64_t OriginalFlags = 0;
+ uint64_t OriginalType = ELF::SHT_NULL;
+ uint64_t OriginalOffset = std::numeric_limits<uint64_t>::max();
+
uint64_t Addr = 0;
uint64_t Align = 1;
uint32_t EntrySize = 0;
@@ -432,25 +439,24 @@ private:
}
};
- std::set<const SectionBase *, SectionCompare> Sections;
-
public:
- uint32_t Type;
- uint32_t Flags;
- uint64_t Offset;
- uint64_t VAddr;
- uint64_t PAddr;
- uint64_t FileSize;
- uint64_t MemSize;
- uint64_t Align;
-
- uint32_t Index;
- uint64_t OriginalOffset;
+ uint32_t Type = 0;
+ uint32_t Flags = 0;
+ uint64_t Offset = 0;
+ uint64_t VAddr = 0;
+ uint64_t PAddr = 0;
+ uint64_t FileSize = 0;
+ uint64_t MemSize = 0;
+ uint64_t Align = 0;
+
+ uint32_t Index = 0;
+ uint64_t OriginalOffset = 0;
Segment *ParentSegment = nullptr;
ArrayRef<uint8_t> Contents;
+ std::set<const SectionBase *, SectionCompare> Sections;
explicit Segment(ArrayRef<uint8_t> Data) : Contents(Data) {}
- Segment() {}
+ Segment() = default;
const SectionBase *firstSection() const {
if (!Sections.empty())
@@ -490,7 +496,7 @@ public:
OwnedDataSection(StringRef SecName, ArrayRef<uint8_t> Data)
: Data(std::begin(Data), std::end(Data)) {
Name = SecName.str();
- Type = ELF::SHT_PROGBITS;
+ Type = OriginalType = ELF::SHT_PROGBITS;
Size = Data.size();
OriginalOffset = std::numeric_limits<uint64_t>::max();
}
@@ -498,9 +504,9 @@ public:
OwnedDataSection(const Twine &SecName, uint64_t SecAddr, uint64_t SecFlags,
uint64_t SecOff) {
Name = SecName.str();
- Type = ELF::SHT_PROGBITS;
+ Type = OriginalType = ELF::SHT_PROGBITS;
Addr = SecAddr;
- Flags = SecFlags;
+ Flags = OriginalFlags = SecFlags;
OriginalOffset = SecOff;
}
@@ -530,7 +536,7 @@ public:
void accept(MutableSectionVisitor &Visitor) override;
static bool classof(const SectionBase *S) {
- return (S->Flags & ELF::SHF_COMPRESSED) ||
+ return (S->OriginalFlags & ELF::SHF_COMPRESSED) ||
(StringRef(S->Name).startswith(".zdebug"));
}
};
@@ -543,7 +549,7 @@ public:
: SectionBase(Sec) {
Size = Sec.getDecompressedSize();
Align = Sec.getDecompressedAlign();
- Flags = (Flags & ~ELF::SHF_COMPRESSED);
+ Flags = OriginalFlags = (Flags & ~ELF::SHF_COMPRESSED);
if (StringRef(Name).startswith(".zdebug"))
Name = "." + Name.substr(2);
}
@@ -567,7 +573,7 @@ class StringTableSection : public SectionBase {
public:
StringTableSection() : StrTabBuilder(StringTableBuilder::ELF) {
- Type = ELF::SHT_STRTAB;
+ Type = OriginalType = ELF::SHT_STRTAB;
}
void addString(StringRef Name);
@@ -577,9 +583,9 @@ public:
void accept(MutableSectionVisitor &Visitor) override;
static bool classof(const SectionBase *S) {
- if (S->Flags & ELF::SHF_ALLOC)
+ if (S->OriginalFlags & ELF::SHF_ALLOC)
return false;
- return S->Type == ELF::SHT_STRTAB;
+ return S->OriginalType == ELF::SHT_STRTAB;
}
};
@@ -648,7 +654,7 @@ public:
Name = ".symtab_shndx";
Align = 4;
EntrySize = 4;
- Type = ELF::SHT_SYMTAB_SHNDX;
+ Type = OriginalType = ELF::SHT_SYMTAB_SHNDX;
}
};
@@ -666,7 +672,7 @@ protected:
using SymPtr = std::unique_ptr<Symbol>;
public:
- SymbolTableSection() { Type = ELF::SHT_SYMTAB; }
+ SymbolTableSection() { Type = OriginalType = ELF::SHT_SYMTAB; }
void addSymbol(Twine Name, uint8_t Bind, uint8_t Type, SectionBase *DefinedIn,
uint64_t Value, uint8_t Visibility, uint16_t Shndx,
@@ -695,7 +701,7 @@ public:
const DenseMap<SectionBase *, SectionBase *> &FromTo) override;
static bool classof(const SectionBase *S) {
- return S->Type == ELF::SHT_SYMTAB;
+ return S->OriginalType == ELF::SHT_SYMTAB;
}
};
@@ -724,7 +730,7 @@ public:
void setSection(SectionBase *Sec) { SecToApplyRel = Sec; }
static bool classof(const SectionBase *S) {
- return S->Type == ELF::SHT_REL || S->Type == ELF::SHT_RELA;
+ return S->OriginalType == ELF::SHT_REL || S->OriginalType == ELF::SHT_RELA;
}
};
@@ -762,9 +768,9 @@ public:
const DenseMap<SectionBase *, SectionBase *> &FromTo) override;
static bool classof(const SectionBase *S) {
- if (S->Flags & ELF::SHF_ALLOC)
+ if (S->OriginalFlags & ELF::SHF_ALLOC)
return false;
- return S->Type == ELF::SHT_REL || S->Type == ELF::SHT_RELA;
+ return S->OriginalType == ELF::SHT_REL || S->OriginalType == ELF::SHT_RELA;
}
};
@@ -799,7 +805,7 @@ public:
const DenseMap<SectionBase *, SectionBase *> &FromTo) override;
static bool classof(const SectionBase *S) {
- return S->Type == ELF::SHT_GROUP;
+ return S->OriginalType == ELF::SHT_GROUP;
}
};
@@ -808,7 +814,7 @@ public:
explicit DynamicSymbolTableSection(ArrayRef<uint8_t> Data) : Section(Data) {}
static bool classof(const SectionBase *S) {
- return S->Type == ELF::SHT_DYNSYM;
+ return S->OriginalType == ELF::SHT_DYNSYM;
}
};
@@ -817,7 +823,7 @@ public:
explicit DynamicSection(ArrayRef<uint8_t> Data) : Section(Data) {}
static bool classof(const SectionBase *S) {
- return S->Type == ELF::SHT_DYNAMIC;
+ return S->OriginalType == ELF::SHT_DYNAMIC;
}
};
@@ -838,9 +844,9 @@ public:
function_ref<bool(const SectionBase *)> ToRemove) override;
static bool classof(const SectionBase *S) {
- if (!(S->Flags & ELF::SHF_ALLOC))
+ if (!(S->OriginalFlags & ELF::SHF_ALLOC))
return false;
- return S->Type == ELF::SHT_REL || S->Type == ELF::SHT_RELA;
+ return S->OriginalType == ELF::SHT_REL || S->OriginalType == ELF::SHT_RELA;
}
};
diff --git a/contrib/llvm-project/llvm/tools/llvm-objcopy/InstallNameToolOpts.td b/contrib/llvm-project/llvm/tools/llvm-objcopy/InstallNameToolOpts.td
new file mode 100644
index 000000000000..35047a57994c
--- /dev/null
+++ b/contrib/llvm-project/llvm/tools/llvm-objcopy/InstallNameToolOpts.td
@@ -0,0 +1,22 @@
+//===-- InstallNameToolOpts.td - llvm-install-name-tool options --------*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the command line options of llvm-install-name.
+//
+//===----------------------------------------------------------------------===//
+
+include "llvm/Option/OptParser.td"
+
+def help : Flag<["--"], "help">;
+def h : Flag<["-"], "h">, Alias<help>;
+
+def add_rpath : Option<["-", "--"], "add_rpath", KIND_SEPARATE>,
+ HelpText<"Add new rpath">;
+
+def version : Flag<["--"], "version">,
+ HelpText<"Print the version and exit.">;
diff --git a/contrib/llvm-project/llvm/tools/llvm-objcopy/MachO/MachOLayoutBuilder.cpp b/contrib/llvm-project/llvm/tools/llvm-objcopy/MachO/MachOLayoutBuilder.cpp
index f621f3aa09cf..380f2e989fe4 100644
--- a/contrib/llvm-project/llvm/tools/llvm-objcopy/MachO/MachOLayoutBuilder.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-objcopy/MachO/MachOLayoutBuilder.cpp
@@ -64,9 +64,11 @@ void MachOLayoutBuilder::updateDySymTab(MachO::macho_load_command &MLC) {
assert(std::is_sorted(O.SymTable.Symbols.begin(), O.SymTable.Symbols.end(),
[](const std::unique_ptr<SymbolEntry> &A,
const std::unique_ptr<SymbolEntry> &B) {
- return (A->isLocalSymbol() && !B->isLocalSymbol()) ||
- (!A->isUndefinedSymbol() &&
- B->isUndefinedSymbol());
+ bool AL = A->isLocalSymbol(), BL = B->isLocalSymbol();
+ if (AL != BL)
+ return AL;
+ return !AL && !A->isUndefinedSymbol() &&
+ B->isUndefinedSymbol();
}) &&
"Symbols are not sorted by their types.");
@@ -318,6 +320,9 @@ Error MachOLayoutBuilder::layoutTail(uint64_t Offset) {
case MachO::LC_SEGMENT:
case MachO::LC_SEGMENT_64:
case MachO::LC_VERSION_MIN_MACOSX:
+ case MachO::LC_VERSION_MIN_IPHONEOS:
+ case MachO::LC_VERSION_MIN_TVOS:
+ case MachO::LC_VERSION_MIN_WATCHOS:
case MachO::LC_BUILD_VERSION:
case MachO::LC_ID_DYLIB:
case MachO::LC_LOAD_DYLIB:
diff --git a/contrib/llvm-project/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp b/contrib/llvm-project/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp
index 6d586e7d73f1..4578d0bb75d4 100644
--- a/contrib/llvm-project/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp
@@ -18,31 +18,209 @@ namespace objcopy {
namespace macho {
using namespace object;
+using SectionPred = std::function<bool(const Section &Sec)>;
+
+static void removeSections(const CopyConfig &Config, Object &Obj) {
+ SectionPred RemovePred = [](const Section &) { return false; };
+
+ if (!Config.ToRemove.empty()) {
+ RemovePred = [&Config, RemovePred](const Section &Sec) {
+ return Config.ToRemove.matches(Sec.CanonicalName);
+ };
+ }
+
+ if (Config.StripAll || Config.StripDebug) {
+ // Remove all debug sections.
+ RemovePred = [RemovePred](const Section &Sec) {
+ if (Sec.Segname == "__DWARF")
+ return true;
+
+ return RemovePred(Sec);
+ };
+ }
+
+ if (!Config.OnlySection.empty()) {
+ // Overwrite RemovePred because --only-section takes priority.
+ RemovePred = [&Config](const Section &Sec) {
+ return !Config.OnlySection.matches(Sec.CanonicalName);
+ };
+ }
+
+ return Obj.removeSections(RemovePred);
+}
+
+static void markSymbols(const CopyConfig &Config, Object &Obj) {
+ // Symbols referenced from the indirect symbol table must not be removed.
+ for (IndirectSymbolEntry &ISE : Obj.IndirectSymTable.Symbols)
+ if (ISE.Symbol)
+ (*ISE.Symbol)->Referenced = true;
+}
+
+static void updateAndRemoveSymbols(const CopyConfig &Config, Object &Obj) {
+ for (SymbolEntry &Sym : Obj.SymTable) {
+ auto I = Config.SymbolsToRename.find(Sym.Name);
+ if (I != Config.SymbolsToRename.end())
+ Sym.Name = I->getValue();
+ }
+
+ auto RemovePred = [Config](const std::unique_ptr<SymbolEntry> &N) {
+ if (N->Referenced)
+ return false;
+ return Config.StripAll;
+ };
+
+ Obj.SymTable.removeSymbols(RemovePred);
+}
+
+static LoadCommand buildRPathLoadCommand(StringRef Path) {
+ LoadCommand LC;
+ MachO::rpath_command RPathLC;
+ RPathLC.cmd = MachO::LC_RPATH;
+ RPathLC.path = sizeof(MachO::rpath_command);
+ RPathLC.cmdsize = alignTo(sizeof(MachO::rpath_command) + Path.size(), 8);
+ LC.MachOLoadCommand.rpath_command_data = RPathLC;
+ LC.Payload.assign(RPathLC.cmdsize - sizeof(MachO::rpath_command), 0);
+ std::copy(Path.begin(), Path.end(), LC.Payload.begin());
+ return LC;
+}
+
+static Error dumpSectionToFile(StringRef SecName, StringRef Filename,
+ Object &Obj) {
+ for (LoadCommand &LC : Obj.LoadCommands)
+ for (Section &Sec : LC.Sections) {
+ if (Sec.CanonicalName == SecName) {
+ Expected<std::unique_ptr<FileOutputBuffer>> BufferOrErr =
+ FileOutputBuffer::create(Filename, Sec.Content.size());
+ if (!BufferOrErr)
+ return BufferOrErr.takeError();
+ std::unique_ptr<FileOutputBuffer> Buf = std::move(*BufferOrErr);
+ llvm::copy(Sec.Content, Buf->getBufferStart());
+
+ if (Error E = Buf->commit())
+ return E;
+ return Error::success();
+ }
+ }
+
+ return createStringError(object_error::parse_failed, "section '%s' not found",
+ SecName.str().c_str());
+}
+
+static Error addSection(StringRef SecName, StringRef Filename, Object &Obj) {
+ ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr =
+ MemoryBuffer::getFile(Filename);
+ if (!BufOrErr)
+ return createFileError(Filename, errorCodeToError(BufOrErr.getError()));
+ std::unique_ptr<MemoryBuffer> Buf = std::move(*BufOrErr);
+
+ std::pair<StringRef, StringRef> Pair = SecName.split(',');
+ StringRef TargetSegName = Pair.first;
+ Section Sec(TargetSegName, Pair.second);
+ Sec.Content = Obj.NewSectionsContents.save(Buf->getBuffer());
+
+ // Add the a section into an existing segment.
+ for (LoadCommand &LC : Obj.LoadCommands) {
+ Optional<StringRef> SegName = LC.getSegmentName();
+ if (SegName && SegName == TargetSegName) {
+ LC.Sections.push_back(Sec);
+ return Error::success();
+ }
+ }
+
+ // There's no segment named TargetSegName. Create a new load command and
+ // Insert a new section into it.
+ LoadCommand &NewSegment = Obj.addSegment(TargetSegName);
+ NewSegment.Sections.push_back(Sec);
+ return Error::success();
+}
+
+// isValidMachOCannonicalName returns success if Name is a MachO cannonical name
+// ("<segment>,<section>") and lengths of both segment and section names are
+// valid.
+Error isValidMachOCannonicalName(StringRef Name) {
+ if (Name.count(',') != 1)
+ return createStringError(errc::invalid_argument,
+ "invalid section name '%s' (should be formatted "
+ "as '<segment name>,<section name>')",
+ Name.str().c_str());
+
+ std::pair<StringRef, StringRef> Pair = Name.split(',');
+ if (Pair.first.size() > 16)
+ return createStringError(errc::invalid_argument,
+ "too long segment name: '%s'",
+ Pair.first.str().c_str());
+ if (Pair.second.size() > 16)
+ return createStringError(errc::invalid_argument,
+ "too long section name: '%s'",
+ Pair.second.str().c_str());
+ return Error::success();
+}
static Error handleArgs(const CopyConfig &Config, Object &Obj) {
if (Config.AllowBrokenLinks || !Config.BuildIdLinkDir.empty() ||
Config.BuildIdLinkInput || Config.BuildIdLinkOutput ||
!Config.SplitDWO.empty() || !Config.SymbolsPrefix.empty() ||
- !Config.AllocSectionsPrefix.empty() || !Config.AddSection.empty() ||
- !Config.DumpSection.empty() || !Config.KeepSection.empty() ||
- Config.NewSymbolVisibility || !Config.OnlySection.empty() ||
- !Config.SymbolsToGlobalize.empty() || !Config.SymbolsToKeep.empty() ||
- !Config.SymbolsToLocalize.empty() || !Config.SymbolsToWeaken.empty() ||
- !Config.SymbolsToKeepGlobal.empty() || !Config.SectionsToRename.empty() ||
- !Config.SymbolsToRename.empty() ||
+ !Config.AllocSectionsPrefix.empty() || !Config.KeepSection.empty() ||
+ Config.NewSymbolVisibility || !Config.SymbolsToGlobalize.empty() ||
+ !Config.SymbolsToKeep.empty() || !Config.SymbolsToLocalize.empty() ||
+ !Config.SymbolsToWeaken.empty() || !Config.SymbolsToKeepGlobal.empty() ||
+ !Config.SectionsToRename.empty() ||
!Config.UnneededSymbolsToRemove.empty() ||
!Config.SetSectionAlignment.empty() || !Config.SetSectionFlags.empty() ||
- !Config.ToRemove.empty() || Config.ExtractDWO || Config.KeepFileSymbols ||
- Config.LocalizeHidden || Config.PreserveDates || Config.StripDWO ||
+ Config.ExtractDWO || Config.KeepFileSymbols || Config.LocalizeHidden ||
+ Config.PreserveDates || Config.StripAllGNU || Config.StripDWO ||
Config.StripNonAlloc || Config.StripSections || Config.Weaken ||
- Config.DecompressDebugSections || Config.StripDebug ||
- Config.StripNonAlloc || Config.StripSections || Config.StripUnneeded ||
+ Config.DecompressDebugSections || Config.StripNonAlloc ||
+ Config.StripSections || Config.StripUnneeded ||
Config.DiscardMode != DiscardType::None || !Config.SymbolsToAdd.empty() ||
Config.EntryExpr) {
return createStringError(llvm::errc::invalid_argument,
"option not supported by llvm-objcopy for MachO");
}
+ removeSections(Config, Obj);
+
+ // Mark symbols to determine which symbols are still needed.
+ if (Config.StripAll)
+ markSymbols(Config, Obj);
+
+ updateAndRemoveSymbols(Config, Obj);
+
+ if (Config.StripAll)
+ for (LoadCommand &LC : Obj.LoadCommands)
+ for (Section &Sec : LC.Sections)
+ Sec.Relocations.clear();
+
+ for (const StringRef &Flag : Config.DumpSection) {
+ std::pair<StringRef, StringRef> SecPair = Flag.split("=");
+ StringRef SecName = SecPair.first;
+ StringRef File = SecPair.second;
+ if (Error E = dumpSectionToFile(SecName, File, Obj))
+ return E;
+ }
+ for (const auto &Flag : Config.AddSection) {
+ std::pair<StringRef, StringRef> SecPair = Flag.split("=");
+ StringRef SecName = SecPair.first;
+ StringRef File = SecPair.second;
+ if (Error E = isValidMachOCannonicalName(SecName))
+ return E;
+ if (Error E = addSection(SecName, File, Obj))
+ return E;
+ }
+
+ for (StringRef RPath : Config.RPathToAdd) {
+ for (LoadCommand &LC : Obj.LoadCommands) {
+ if (LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_RPATH &&
+ RPath == StringRef(reinterpret_cast<char *>(LC.Payload.data()),
+ LC.Payload.size())
+ .trim(0)) {
+ return createStringError(errc::invalid_argument,
+ "rpath " + RPath +
+ " would create a duplicate load command");
+ }
+ }
+ Obj.addLoadCommand(buildRPathLoadCommand(RPath));
+ }
return Error::success();
}
diff --git a/contrib/llvm-project/llvm/tools/llvm-objcopy/MachO/MachOReader.cpp b/contrib/llvm-project/llvm/tools/llvm-objcopy/MachO/MachOReader.cpp
index b48a0d8952d0..46bb11727322 100644
--- a/contrib/llvm-project/llvm/tools/llvm-objcopy/MachO/MachOReader.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-objcopy/MachO/MachOReader.cpp
@@ -29,12 +29,9 @@ void MachOReader::readHeader(Object &O) const {
template <typename SectionType>
Section constructSectionCommon(SectionType Sec) {
- Section S;
- S.Sectname =
- StringRef(Sec.sectname, strnlen(Sec.sectname, sizeof(Sec.sectname)))
- .str();
- S.Segname =
- StringRef(Sec.segname, strnlen(Sec.segname, sizeof(Sec.sectname))).str();
+ StringRef SegName(Sec.segname, strnlen(Sec.segname, sizeof(Sec.segname)));
+ StringRef SectName(Sec.sectname, strnlen(Sec.sectname, sizeof(Sec.sectname)));
+ Section S(SegName, SectName);
S.Addr = Sec.addr;
S.Size = Sec.size;
S.Offset = Sec.offset;
@@ -149,10 +146,11 @@ void MachOReader::readLoadCommands(Object &O) const {
sizeof(MachO::LCStruct)); \
if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost) \
MachO::swapStruct(LC.MachOLoadCommand.LCStruct##_data); \
- LC.Payload = ArrayRef<uint8_t>( \
- reinterpret_cast<uint8_t *>(const_cast<char *>(LoadCmd.Ptr)) + \
- sizeof(MachO::LCStruct), \
- LoadCmd.C.cmdsize - sizeof(MachO::LCStruct)); \
+ if (LoadCmd.C.cmdsize > sizeof(MachO::LCStruct)) \
+ LC.Payload = ArrayRef<uint8_t>( \
+ reinterpret_cast<uint8_t *>(const_cast<char *>(LoadCmd.Ptr)) + \
+ sizeof(MachO::LCStruct), \
+ LoadCmd.C.cmdsize - sizeof(MachO::LCStruct)); \
break;
switch (LoadCmd.C.cmd) {
@@ -161,10 +159,11 @@ void MachOReader::readLoadCommands(Object &O) const {
sizeof(MachO::load_command));
if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost)
MachO::swapStruct(LC.MachOLoadCommand.load_command_data);
- LC.Payload = ArrayRef<uint8_t>(
- reinterpret_cast<uint8_t *>(const_cast<char *>(LoadCmd.Ptr)) +
- sizeof(MachO::load_command),
- LoadCmd.C.cmdsize - sizeof(MachO::load_command));
+ if (LoadCmd.C.cmdsize > sizeof(MachO::load_command))
+ LC.Payload = ArrayRef<uint8_t>(
+ reinterpret_cast<uint8_t *>(const_cast<char *>(LoadCmd.Ptr)) +
+ sizeof(MachO::load_command),
+ LoadCmd.C.cmdsize - sizeof(MachO::load_command));
break;
#include "llvm/BinaryFormat/MachO.def"
}
@@ -255,9 +254,16 @@ void MachOReader::readFunctionStartsData(Object &O) const {
void MachOReader::readIndirectSymbolTable(Object &O) const {
MachO::dysymtab_command DySymTab = MachOObj.getDysymtabLoadCommand();
- for (uint32_t i = 0; i < DySymTab.nindirectsyms; ++i)
- O.IndirectSymTable.Symbols.push_back(
- MachOObj.getIndirectSymbolTableEntry(DySymTab, i));
+ constexpr uint32_t AbsOrLocalMask =
+ MachO::INDIRECT_SYMBOL_LOCAL | MachO::INDIRECT_SYMBOL_ABS;
+ for (uint32_t i = 0; i < DySymTab.nindirectsyms; ++i) {
+ uint32_t Index = MachOObj.getIndirectSymbolTableEntry(DySymTab, i);
+ if ((Index & AbsOrLocalMask) != 0)
+ O.IndirectSymTable.Symbols.emplace_back(Index, None);
+ else
+ O.IndirectSymTable.Symbols.emplace_back(
+ Index, O.SymTable.getSymbolByIndex(Index));
+ }
}
std::unique_ptr<Object> MachOReader::create() const {
diff --git a/contrib/llvm-project/llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp b/contrib/llvm-project/llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp
index 4ec91cc9eb7a..0d9590612eca 100644
--- a/contrib/llvm-project/llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp
@@ -180,7 +180,8 @@ void MachOWriter::writeLoadCommands() {
MachO::swapStruct(MLC.LCStruct##_data); \
memcpy(Begin, &MLC.LCStruct##_data, sizeof(MachO::LCStruct)); \
Begin += sizeof(MachO::LCStruct); \
- memcpy(Begin, LC.Payload.data(), LC.Payload.size()); \
+ if (!LC.Payload.empty()) \
+ memcpy(Begin, LC.Payload.data(), LC.Payload.size()); \
Begin += LC.Payload.size(); \
break;
@@ -193,7 +194,8 @@ void MachOWriter::writeLoadCommands() {
MachO::swapStruct(MLC.load_command_data);
memcpy(Begin, &MLC.load_command_data, sizeof(MachO::load_command));
Begin += sizeof(MachO::load_command);
- memcpy(Begin, LC.Payload.data(), LC.Payload.size());
+ if (!LC.Payload.empty())
+ memcpy(Begin, LC.Payload.data(), LC.Payload.size());
Begin += LC.Payload.size();
break;
#include "llvm/BinaryFormat/MachO.def"
@@ -369,11 +371,14 @@ void MachOWriter::writeIndirectSymbolTable() {
O.LoadCommands[*O.DySymTabCommandIndex]
.MachOLoadCommand.dysymtab_command_data;
- char *Out = (char *)B.getBufferStart() + DySymTabCommand.indirectsymoff;
- assert((DySymTabCommand.nindirectsyms == O.IndirectSymTable.Symbols.size()) &&
- "Incorrect indirect symbol table size");
- memcpy(Out, O.IndirectSymTable.Symbols.data(),
- sizeof(uint32_t) * O.IndirectSymTable.Symbols.size());
+ uint32_t *Out =
+ (uint32_t *)(B.getBufferStart() + DySymTabCommand.indirectsymoff);
+ for (const IndirectSymbolEntry &Sym : O.IndirectSymTable.Symbols) {
+ uint32_t Entry = (Sym.Symbol) ? (*Sym.Symbol)->Index : Sym.OriginalIndex;
+ if (IsLittleEndian != sys::IsLittleEndianHost)
+ sys::swapByteOrder(Entry);
+ *Out++ = Entry;
+ }
}
void MachOWriter::writeDataInCodeData() {
diff --git a/contrib/llvm-project/llvm/tools/llvm-objcopy/MachO/Object.cpp b/contrib/llvm-project/llvm/tools/llvm-objcopy/MachO/Object.cpp
index 264f39c28ed2..d3b4fdc2f633 100644
--- a/contrib/llvm-project/llvm/tools/llvm-objcopy/MachO/Object.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-objcopy/MachO/Object.cpp
@@ -10,6 +10,70 @@ const SymbolEntry *SymbolTable::getSymbolByIndex(uint32_t Index) const {
return Symbols[Index].get();
}
+SymbolEntry *SymbolTable::getSymbolByIndex(uint32_t Index) {
+ return const_cast<SymbolEntry *>(
+ static_cast<const SymbolTable *>(this)->getSymbolByIndex(Index));
+}
+
+void SymbolTable::removeSymbols(
+ function_ref<bool(const std::unique_ptr<SymbolEntry> &)> ToRemove) {
+ Symbols.erase(
+ std::remove_if(std::begin(Symbols), std::end(Symbols), ToRemove),
+ std::end(Symbols));
+}
+
+void Object::removeSections(function_ref<bool(const Section &)> ToRemove) {
+ for (LoadCommand &LC : LoadCommands)
+ LC.Sections.erase(std::remove_if(std::begin(LC.Sections),
+ std::end(LC.Sections), ToRemove),
+ std::end(LC.Sections));
+}
+
+void Object::addLoadCommand(LoadCommand LC) {
+ LoadCommands.push_back(std::move(LC));
+}
+
+template <typename SegmentType>
+static void constructSegment(SegmentType &Seg,
+ llvm::MachO::LoadCommandType CmdType,
+ StringRef SegName) {
+ assert(SegName.size() <= sizeof(Seg.segname) && "too long segment name");
+ memset(&Seg, 0, sizeof(SegmentType));
+ Seg.cmd = CmdType;
+ strncpy(Seg.segname, SegName.data(), SegName.size());
+}
+
+LoadCommand &Object::addSegment(StringRef SegName) {
+ LoadCommand LC;
+ if (is64Bit())
+ constructSegment(LC.MachOLoadCommand.segment_command_64_data,
+ MachO::LC_SEGMENT_64, SegName);
+ else
+ constructSegment(LC.MachOLoadCommand.segment_command_data,
+ MachO::LC_SEGMENT, SegName);
+
+ LoadCommands.push_back(LC);
+ return LoadCommands.back();
+}
+
+/// Extracts a segment name from a string which is possibly non-null-terminated.
+static StringRef extractSegmentName(const char *SegName) {
+ return StringRef(SegName,
+ strnlen(SegName, sizeof(MachO::segment_command::segname)));
+}
+
+Optional<StringRef> LoadCommand::getSegmentName() const {
+ const MachO::macho_load_command &MLC = MachOLoadCommand;
+ switch (MLC.load_command_data.cmd) {
+ case MachO::LC_SEGMENT:
+ return extractSegmentName(MLC.segment_command_data.segname);
+ case MachO::LC_SEGMENT_64:
+ return extractSegmentName(MLC.segment_command_64_data.segname);
+ default:
+ return None;
+ }
+}
+
} // end namespace macho
} // end namespace objcopy
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/tools/llvm-objcopy/MachO/Object.h b/contrib/llvm-project/llvm/tools/llvm-objcopy/MachO/Object.h
index 1cebf8253d19..dc2606eefa4a 100644
--- a/contrib/llvm-project/llvm/tools/llvm-objcopy/MachO/Object.h
+++ b/contrib/llvm-project/llvm/tools/llvm-objcopy/MachO/Object.h
@@ -14,6 +14,7 @@
#include "llvm/BinaryFormat/MachO.h"
#include "llvm/MC/StringTableBuilder.h"
#include "llvm/ObjectYAML/DWARFYAML.h"
+#include "llvm/Support/StringSaver.h"
#include "llvm/Support/YAMLTraits.h"
#include <cstdint>
#include <string>
@@ -36,22 +37,32 @@ struct MachHeader {
struct RelocationInfo;
struct Section {
- std::string Sectname;
std::string Segname;
- uint64_t Addr;
- uint64_t Size;
- uint32_t Offset;
- uint32_t Align;
- uint32_t RelOff;
- uint32_t NReloc;
- uint32_t Flags;
- uint32_t Reserved1;
- uint32_t Reserved2;
- uint32_t Reserved3;
-
+ std::string Sectname;
+ // CanonicalName is a string formatted as “<Segname>,<Sectname>".
+ std::string CanonicalName;
+ uint64_t Addr = 0;
+ uint64_t Size = 0;
+ uint32_t Offset = 0;
+ uint32_t Align = 0;
+ uint32_t RelOff = 0;
+ uint32_t NReloc = 0;
+ uint32_t Flags = 0;
+ uint32_t Reserved1 = 0;
+ uint32_t Reserved2 = 0;
+ uint32_t Reserved3 = 0;
StringRef Content;
std::vector<RelocationInfo> Relocations;
+ Section(StringRef SegName, StringRef SectName)
+ : Segname(SegName), Sectname(SectName),
+ CanonicalName((Twine(SegName) + Twine(',') + SectName).str()) {}
+
+ Section(StringRef SegName, StringRef SectName, StringRef Content)
+ : Segname(SegName), Sectname(SectName),
+ CanonicalName((Twine(SegName) + Twine(',') + SectName).str()),
+ Content(Content) {}
+
MachO::SectionType getType() const {
return static_cast<MachO::SectionType>(Flags & MachO::SECTION_TYPE);
}
@@ -72,19 +83,23 @@ struct LoadCommand {
// The raw content of the payload of the load command (located right after the
// corresponding struct). In some cases it is either empty or can be
// copied-over without digging into its structure.
- ArrayRef<uint8_t> Payload;
+ std::vector<uint8_t> Payload;
// Some load commands can contain (inside the payload) an array of sections,
// though the contents of the sections are stored separately. The struct
// Section describes only sections' metadata and where to find the
// corresponding content inside the binary.
std::vector<Section> Sections;
+
+ // Returns the segment name if the load command is a segment command.
+ Optional<StringRef> getSegmentName() const;
};
// A symbol information. Fields which starts with "n_" are same as them in the
// nlist.
struct SymbolEntry {
std::string Name;
+ bool Referenced = false;
uint32_t Index;
uint8_t n_type;
uint8_t n_sect;
@@ -107,11 +122,32 @@ struct SymbolEntry {
struct SymbolTable {
std::vector<std::unique_ptr<SymbolEntry>> Symbols;
+ using iterator = pointee_iterator<
+ std::vector<std::unique_ptr<SymbolEntry>>::const_iterator>;
+
+ iterator begin() const { return iterator(Symbols.begin()); }
+ iterator end() const { return iterator(Symbols.end()); }
+
const SymbolEntry *getSymbolByIndex(uint32_t Index) const;
+ SymbolEntry *getSymbolByIndex(uint32_t Index);
+ void removeSymbols(
+ function_ref<bool(const std::unique_ptr<SymbolEntry> &)> ToRemove);
+};
+
+struct IndirectSymbolEntry {
+ // The original value in an indirect symbol table. Higher bits encode extra
+ // information (INDIRECT_SYMBOL_LOCAL and INDIRECT_SYMBOL_ABS).
+ uint32_t OriginalIndex;
+ /// The Symbol referenced by this entry. It's None if the index is
+ /// INDIRECT_SYMBOL_LOCAL or INDIRECT_SYMBOL_ABS.
+ Optional<SymbolEntry *> Symbol;
+
+ IndirectSymbolEntry(uint32_t OriginalIndex, Optional<SymbolEntry *> Symbol)
+ : OriginalIndex(OriginalIndex), Symbol(Symbol) {}
};
struct IndirectSymbolTable {
- std::vector<uint32_t> Symbols;
+ std::vector<IndirectSymbolEntry> Symbols;
};
/// The location of the string table inside the binary is described by LC_SYMTAB
@@ -250,6 +286,24 @@ struct Object {
Optional<size_t> DataInCodeCommandIndex;
/// The index LC_FUNCTION_STARTS load comamnd if present.
Optional<size_t> FunctionStartsCommandIndex;
+
+ BumpPtrAllocator Alloc;
+ StringSaver NewSectionsContents;
+
+ Object() : NewSectionsContents(Alloc) {}
+
+ void removeSections(function_ref<bool(const Section &)> ToRemove);
+ void addLoadCommand(LoadCommand LC);
+
+ /// Creates a new segment load command in the object and returns a reference
+ /// to the newly created load command. The caller should verify that SegName
+ /// is not too long (SegName.size() should be less than or equal to 16).
+ LoadCommand &addSegment(StringRef SegName);
+
+ bool is64Bit() const {
+ return Header.Magic == MachO::MH_MAGIC_64 ||
+ Header.Magic == MachO::MH_CIGAM_64;
+ }
};
} // end namespace macho
diff --git a/contrib/llvm-project/llvm/tools/llvm-objcopy/llvm-objcopy.cpp b/contrib/llvm-project/llvm/tools/llvm-objcopy/llvm-objcopy.cpp
index a68210f3fdd3..e662f35f4b08 100644
--- a/contrib/llvm-project/llvm/tools/llvm-objcopy/llvm-objcopy.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-objcopy/llvm-objcopy.cpp
@@ -313,11 +313,20 @@ static Error executeObjcopy(CopyConfig &Config) {
return Error::success();
}
+namespace {
+
+enum class ToolType { Objcopy, Strip, InstallNameTool };
+
+} // anonymous namespace
+
int main(int argc, char **argv) {
InitLLVM X(argc, argv);
ToolName = argv[0];
- bool IsStrip = sys::path::stem(ToolName).contains("strip");
-
+ ToolType Tool = StringSwitch<ToolType>(sys::path::stem(ToolName))
+ .EndsWith("strip", ToolType::Strip)
+ .EndsWith("install-name-tool", ToolType::InstallNameTool)
+ .EndsWith("install_name_tool", ToolType::InstallNameTool)
+ .Default(ToolType::Objcopy);
// Expand response files.
// TODO: Move these lines, which are copied from lib/Support/CommandLine.cpp,
// into a separate function in the CommandLine library and call that function
@@ -332,10 +341,11 @@ int main(int argc, char **argv) {
NewArgv);
auto Args = makeArrayRef(NewArgv).drop_front();
-
Expected<DriverConfig> DriverConfig =
- IsStrip ? parseStripOptions(Args, reportWarning)
- : parseObjcopyOptions(Args, reportWarning);
+ (Tool == ToolType::Strip) ? parseStripOptions(Args, reportWarning)
+ : ((Tool == ToolType::InstallNameTool)
+ ? parseInstallNameToolOptions(Args)
+ : parseObjcopyOptions(Args, reportWarning));
if (!DriverConfig) {
logAllUnhandledErrors(DriverConfig.takeError(),
WithColor::error(errs(), ToolName));
diff --git a/contrib/llvm-project/llvm/tools/llvm-objdump/ELFDump.cpp b/contrib/llvm-project/llvm/tools/llvm-objdump/ELFDump.cpp
index 93d070eee16c..abfe08346bbd 100644
--- a/contrib/llvm-project/llvm/tools/llvm-objdump/ELFDump.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-objdump/ELFDump.cpp
@@ -105,9 +105,12 @@ static Error getRelocationValueString(const ELFObjectFile<ELFT> *Obj,
} else {
Fmt << "*ABS*";
}
-
- if (Addend != 0)
- Fmt << (Addend < 0 ? "" : "+") << Addend;
+ if (Addend != 0) {
+ Fmt << (Addend < 0
+ ? "-"
+ : "+") << format("0x%" PRIx64,
+ (Addend < 0 ? -(uint64_t)Addend : (uint64_t)Addend));
+ }
Fmt.flush();
Result.append(FmtBuf.begin(), FmtBuf.end());
return Error::success();
@@ -201,6 +204,9 @@ template <class ELFT> void printProgramHeaders(const ELFFile<ELFT> *o) {
case ELF::PT_GNU_RELRO:
outs() << " RELRO ";
break;
+ case ELF::PT_GNU_PROPERTY:
+ outs() << " PROPERTY ";
+ break;
case ELF::PT_GNU_STACK:
outs() << " STACK ";
break;
diff --git a/contrib/llvm-project/llvm/tools/llvm-objdump/MachODump.cpp b/contrib/llvm-project/llvm/tools/llvm-objdump/MachODump.cpp
index e4684d0f1601..87c7a92933f1 100644
--- a/contrib/llvm-project/llvm/tools/llvm-objdump/MachODump.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-objdump/MachODump.cpp
@@ -29,6 +29,7 @@
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCTargetOptions.h"
#include "llvm/Object/MachO.h"
#include "llvm/Object/MachOUniversal.h"
#include "llvm/Support/Casting.h"
@@ -7208,11 +7209,12 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF,
FeaturesStr = Features.getString();
}
+ MCTargetOptions MCOptions;
// Set up disassembler.
std::unique_ptr<const MCRegisterInfo> MRI(
TheTarget->createMCRegInfo(TripleName));
std::unique_ptr<const MCAsmInfo> AsmInfo(
- TheTarget->createMCAsmInfo(*MRI, TripleName));
+ TheTarget->createMCAsmInfo(*MRI, TripleName, MCOptions));
std::unique_ptr<const MCSubtargetInfo> STI(
TheTarget->createMCSubtargetInfo(TripleName, MachOMCPU, FeaturesStr));
MCContext Ctx(AsmInfo.get(), MRI.get(), nullptr);
@@ -7262,7 +7264,7 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF,
if (ThumbTarget) {
ThumbMRI.reset(ThumbTarget->createMCRegInfo(ThumbTripleName));
ThumbAsmInfo.reset(
- ThumbTarget->createMCAsmInfo(*ThumbMRI, ThumbTripleName));
+ ThumbTarget->createMCAsmInfo(*ThumbMRI, ThumbTripleName, MCOptions));
ThumbSTI.reset(
ThumbTarget->createMCSubtargetInfo(ThumbTripleName, MachOMCPU,
FeaturesStr));
@@ -7324,12 +7326,6 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF,
}
array_pod_sort(Dices.begin(), Dices.end());
-#ifndef NDEBUG
- raw_ostream &DebugOut = DebugFlag ? dbgs() : nulls();
-#else
- raw_ostream &DebugOut = nulls();
-#endif
-
// Try to find debug info and set up the DIContext for it.
std::unique_ptr<DIContext> diContext;
std::unique_ptr<Binary> DSYMBinary;
@@ -7405,7 +7401,7 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF,
reportError(MachDSYM.takeError(), DSYMPath);
return;
}
-
+
// We need to keep the Binary alive with the buffer
DbgObj = &*MachDSYM.get();
DSYMBinary = std::move(*MachDSYM);
@@ -7620,10 +7616,10 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF,
bool gotInst;
if (UseThumbTarget)
gotInst = ThumbDisAsm->getInstruction(Inst, Size, Bytes.slice(Index),
- PC, DebugOut, Annotations);
+ PC, Annotations);
else
gotInst = DisAsm->getInstruction(Inst, Size, Bytes.slice(Index), PC,
- DebugOut, Annotations);
+ Annotations);
if (gotInst) {
if (!NoShowRawInsn || Arch == Triple::arm) {
dumpBytes(makeArrayRef(Bytes.data() + Index, Size), outs());
@@ -7631,9 +7627,10 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF,
formatted_raw_ostream FormattedOS(outs());
StringRef AnnotationsStr = Annotations.str();
if (UseThumbTarget)
- ThumbIP->printInst(&Inst, FormattedOS, AnnotationsStr, *ThumbSTI);
+ ThumbIP->printInst(&Inst, PC, AnnotationsStr, *ThumbSTI,
+ FormattedOS);
else
- IP->printInst(&Inst, FormattedOS, AnnotationsStr, *STI);
+ IP->printInst(&Inst, PC, AnnotationsStr, *STI, FormattedOS);
emitComments(CommentStream, CommentsToEmit, FormattedOS, *AsmInfo);
// Print debug info.
@@ -7647,8 +7644,7 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF,
}
outs() << "\n";
} else {
- unsigned int Arch = MachOOF->getArch();
- if (Arch == Triple::x86_64 || Arch == Triple::x86) {
+ if (MachOOF->getArchTriple().isX86()) {
outs() << format("\t.byte 0x%02x #bad opcode\n",
*(Bytes.data() + Index) & 0xff);
Size = 1; // skip exactly one illegible byte and move on.
@@ -7694,7 +7690,7 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF,
SmallVector<char, 64> AnnotationsBytes;
raw_svector_ostream Annotations(AnnotationsBytes);
if (DisAsm->getInstruction(Inst, InstSize, Bytes.slice(Index), PC,
- DebugOut, Annotations)) {
+ Annotations)) {
if (!NoLeadingAddr) {
if (FullLeadingAddr) {
if (MachOOF->is64Bit())
@@ -7710,11 +7706,10 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF,
dumpBytes(makeArrayRef(Bytes.data() + Index, InstSize), outs());
}
StringRef AnnotationsStr = Annotations.str();
- IP->printInst(&Inst, outs(), AnnotationsStr, *STI);
+ IP->printInst(&Inst, PC, AnnotationsStr, *STI, outs());
outs() << "\n";
} else {
- unsigned int Arch = MachOOF->getArch();
- if (Arch == Triple::x86_64 || Arch == Triple::x86) {
+ if (MachOOF->getArchTriple().isX86()) {
outs() << format("\t.byte 0x%02x #bad opcode\n",
*(Bytes.data() + Index) & 0xff);
InstSize = 1; // skip exactly one illegible byte and move on.
@@ -7728,7 +7723,7 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF,
}
}
// The TripleName's need to be reset if we are called again for a different
- // archtecture.
+ // architecture.
TripleName = "";
ThumbTripleName = "";
@@ -7827,7 +7822,7 @@ static void findUnwindRelocNameAddend(const MachOObjectFile *Obj,
auto Sym = Symbols.upper_bound(Addr);
if (Sym == Symbols.begin()) {
// The first symbol in the object is after this reference, the best we can
- // do is section-relative notation.
+ // do is section-relative notation.
if (Expected<StringRef> NameOrErr = RelocSection.getName())
Name = *NameOrErr;
else
diff --git a/contrib/llvm-project/llvm/tools/llvm-objdump/llvm-objdump.cpp b/contrib/llvm-project/llvm/tools/llvm-objdump/llvm-objdump.cpp
index 34a44b3b7fa9..6bd37a1fb86c 100644
--- a/contrib/llvm-project/llvm/tools/llvm-objdump/llvm-objdump.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-objdump/llvm-objdump.cpp
@@ -37,6 +37,7 @@
#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCTargetOptions.h"
#include "llvm/Object/Archive.h"
#include "llvm/Object/COFF.h"
#include "llvm/Object/COFFImportFile.h"
@@ -707,7 +708,7 @@ public:
OS.indent(Column < TabStop - 1 ? TabStop - 1 - Column : 7 - Column % 8);
if (MI)
- IP.printInst(MI, OS, "", STI);
+ IP.printInst(MI, Address.Address, "", STI, OS);
else
OS << "\t<unknown>";
}
@@ -743,7 +744,7 @@ public:
std::string Buffer;
{
raw_string_ostream TempStream(Buffer);
- IP.printInst(MI, TempStream, "", STI);
+ IP.printInst(MI, Address.Address, "", STI, TempStream);
}
StringRef Contents(Buffer);
// Split off bundle attributes
@@ -810,7 +811,7 @@ public:
SmallString<40> InstStr;
raw_svector_ostream IS(InstStr);
- IP.printInst(MI, IS, "", STI);
+ IP.printInst(MI, Address.Address, "", STI, IS);
OS << left_justify(IS.str(), 60);
} else {
@@ -864,7 +865,7 @@ public:
dumpBytes(Bytes, OS);
}
if (MI)
- IP.printInst(MI, OS, "", STI);
+ IP.printInst(MI, Address.Address, "", STI, OS);
else
OS << "\t<unknown>";
}
@@ -1133,6 +1134,7 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj,
std::map<SectionRef, SectionSymbolsTy> AllSymbols;
SectionSymbolsTy AbsoluteSymbols;
const StringRef FileName = Obj->getFileName();
+ const MachOObjectFile *MachO = dyn_cast<const MachOObjectFile>(Obj);
for (const SymbolRef &Symbol : Obj->symbols()) {
uint64_t Address = unwrapOrError(Symbol.getAddress(), FileName);
@@ -1147,6 +1149,18 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj,
continue;
}
+ // Don't ask a Mach-O STAB symbol for its section unless you know that
+ // STAB symbol's section field refers to a valid section index. Otherwise
+ // the symbol may error trying to load a section that does not exist.
+ if (MachO) {
+ DataRefImpl SymDRI = Symbol.getRawDataRefImpl();
+ uint8_t NType = (MachO->is64Bit() ?
+ MachO->getSymbol64TableEntry(SymDRI).n_type:
+ MachO->getSymbolTableEntry(SymDRI).n_type);
+ if (NType & MachO::N_STAB)
+ continue;
+ }
+
section_iterator SecI = unwrapOrError(Symbol.getSection(), FileName);
if (SecI != Obj->section_end())
AllSymbols[*SecI].emplace_back(Address, Name, SymbolType);
@@ -1243,7 +1257,7 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj,
}
StringRef SegmentName = "";
- if (const MachOObjectFile *MachO = dyn_cast<const MachOObjectFile>(Obj)) {
+ if (MachO) {
DataRefImpl DR = Section.getRawDataRefImpl();
SegmentName = MachO->getSectionFinalSegmentName(DR);
}
@@ -1339,16 +1353,10 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj,
continue;
}
-#ifndef NDEBUG
- raw_ostream &DebugOut = DebugFlag ? dbgs() : nulls();
-#else
- raw_ostream &DebugOut = nulls();
-#endif
-
// Some targets (like WebAssembly) have a special prelude at the start
// of each symbol.
DisAsm->onSymbolStart(SymbolName, Size, Bytes.slice(Start, End - Start),
- SectionAddr + Start, DebugOut, CommentStream);
+ SectionAddr + Start, CommentStream);
Start += Size;
Index = Start;
@@ -1412,8 +1420,7 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj,
// provided
MCInst Inst;
bool Disassembled = DisAsm->getInstruction(
- Inst, Size, Bytes.slice(Index), SectionAddr + Index, DebugOut,
- CommentStream);
+ Inst, Size, Bytes.slice(Index), SectionAddr + Index, CommentStream);
if (Size == 0)
Size = 1;
@@ -1539,8 +1546,9 @@ static void disassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
"no register info for target " + TripleName);
// Set up disassembler.
+ MCTargetOptions MCOptions;
std::unique_ptr<const MCAsmInfo> AsmInfo(
- TheTarget->createMCAsmInfo(*MRI, TripleName));
+ TheTarget->createMCAsmInfo(*MRI, TripleName, MCOptions));
if (!AsmInfo)
reportError(Obj->getFileName(),
"no assembly info for target " + TripleName);
@@ -1802,6 +1810,7 @@ void printSymbolTable(const ObjectFile *O, StringRef ArchiveName,
}
const StringRef FileName = O->getFileName();
+ const MachOObjectFile *MachO = dyn_cast<const MachOObjectFile>(O);
for (auto I = O->symbol_begin(), E = O->symbol_end(); I != E; ++I) {
const SymbolRef &Symbol = *I;
uint64_t Address = unwrapOrError(Symbol.getAddress(), FileName, ArchiveName,
@@ -1811,8 +1820,23 @@ void printSymbolTable(const ObjectFile *O, StringRef ArchiveName,
SymbolRef::Type Type = unwrapOrError(Symbol.getType(), FileName,
ArchiveName, ArchitectureName);
uint32_t Flags = Symbol.getFlags();
- section_iterator Section = unwrapOrError(Symbol.getSection(), FileName,
+
+ // Don't ask a Mach-O STAB symbol for its section unless you know that
+ // STAB symbol's section field refers to a valid section index. Otherwise
+ // the symbol may error trying to load a section that does not exist.
+ bool isSTAB = false;
+ if (MachO) {
+ DataRefImpl SymDRI = Symbol.getRawDataRefImpl();
+ uint8_t NType = (MachO->is64Bit() ?
+ MachO->getSymbol64TableEntry(SymDRI).n_type:
+ MachO->getSymbolTableEntry(SymDRI).n_type);
+ if (NType & MachO::N_STAB)
+ isSTAB = true;
+ }
+ section_iterator Section = isSTAB ? O->section_end() :
+ unwrapOrError(Symbol.getSection(), FileName,
ArchiveName, ArchitectureName);
+
StringRef Name;
if (Type == SymbolRef::ST_Debug && Section != O->section_end()) {
if (Expected<StringRef> NameOrErr = Section->getName())
diff --git a/contrib/llvm-project/llvm/tools/llvm-pdbutil/DumpOutputStyle.cpp b/contrib/llvm-project/llvm/tools/llvm-pdbutil/DumpOutputStyle.cpp
index 4d82e0fd9174..bf725ad8d606 100644
--- a/contrib/llvm-project/llvm/tools/llvm-pdbutil/DumpOutputStyle.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-pdbutil/DumpOutputStyle.cpp
@@ -1407,7 +1407,7 @@ Error DumpOutputStyle::dumpTypesFromObjectFile() {
P.formatLine("Local / Global hashes:");
TypeIndex TI(TypeIndex::FirstNonSimpleIndex);
- for (const auto &H : zip(LocalHashes, GlobalHashes)) {
+ for (auto H : zip(LocalHashes, GlobalHashes)) {
AutoIndent Indent2(P);
LocallyHashedType &L = std::get<0>(H);
GloballyHashedType &G = std::get<1>(H);
diff --git a/contrib/llvm-project/llvm/tools/llvm-pdbutil/InputFile.h b/contrib/llvm-project/llvm/tools/llvm-pdbutil/InputFile.h
index f25390c971d0..a5d2897f5600 100644
--- a/contrib/llvm-project/llvm/tools/llvm-pdbutil/InputFile.h
+++ b/contrib/llvm-project/llvm/tools/llvm-pdbutil/InputFile.h
@@ -43,7 +43,7 @@ class InputFile {
std::unique_ptr<NativeSession> PdbSession;
object::OwningBinary<object::Binary> CoffObject;
std::unique_ptr<MemoryBuffer> UnknownFile;
- PointerUnion3<PDBFile *, object::COFFObjectFile *, MemoryBuffer *> PdbOrObj;
+ PointerUnion<PDBFile *, object::COFFObjectFile *, MemoryBuffer *> PdbOrObj;
using TypeCollectionPtr = std::unique_ptr<codeview::LazyRandomTypeCollection>;
diff --git a/contrib/llvm-project/llvm/tools/llvm-readobj/ARMWinEHPrinter.cpp b/contrib/llvm-project/llvm/tools/llvm-readobj/ARMWinEHPrinter.cpp
index 3e026f58871b..8f365c5ad082 100644
--- a/contrib/llvm-project/llvm/tools/llvm-readobj/ARMWinEHPrinter.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-readobj/ARMWinEHPrinter.cpp
@@ -884,7 +884,7 @@ bool Decoder::dumpXDataRecord(const COFFObjectFile &COFF,
}
if (XData.X()) {
- const uint32_t Address = XData.ExceptionHandlerRVA();
+ const uint64_t Address = COFF.getImageBase() + XData.ExceptionHandlerRVA();
const uint32_t Parameter = XData.ExceptionHandlerParameter();
const size_t HandlerOffset = HeaderWords(XData)
+ (XData.E() ? 0 : XData.EpilogueCount())
@@ -896,7 +896,8 @@ bool Decoder::dumpXDataRecord(const COFFObjectFile &COFF,
Symbol = getSymbol(COFF, Address, /*FunctionOnly=*/true);
if (!Symbol) {
ListScope EHS(SW, "ExceptionHandler");
- SW.printString("Routine", "(null)");
+ SW.printHex("Routine", Address);
+ SW.printHex("Parameter", Parameter);
return true;
}
@@ -925,7 +926,8 @@ bool Decoder::dumpUnpackedEntry(const COFFObjectFile &COFF,
ErrorOr<SymbolRef> Function = getRelocatedSymbol(COFF, Section, Offset);
if (!Function)
- Function = getSymbol(COFF, RF.BeginAddress, /*FunctionOnly=*/true);
+ Function = getSymbol(COFF, COFF.getImageBase() + RF.BeginAddress,
+ /*FunctionOnly=*/true);
ErrorOr<SymbolRef> XDataRecord = getRelocatedSymbol(COFF, Section, Offset + 4);
if (!XDataRecord)
diff --git a/contrib/llvm-project/llvm/tools/llvm-readobj/ELFDumper.cpp b/contrib/llvm-project/llvm/tools/llvm-readobj/ELFDumper.cpp
index 57144882c4b4..8ffb68283405 100644
--- a/contrib/llvm-project/llvm/tools/llvm-readobj/ELFDumper.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-readobj/ELFDumper.cpp
@@ -151,6 +151,41 @@ struct DynRegionInfo {
}
};
+namespace {
+struct VerdAux {
+ unsigned Offset;
+ std::string Name;
+};
+
+struct VerDef {
+ unsigned Offset;
+ unsigned Version;
+ unsigned Flags;
+ unsigned Ndx;
+ unsigned Cnt;
+ unsigned Hash;
+ std::string Name;
+ std::vector<VerdAux> AuxV;
+};
+
+struct VernAux {
+ unsigned Hash;
+ unsigned Flags;
+ unsigned Other;
+ unsigned Offset;
+ std::string Name;
+};
+
+struct VerNeed {
+ unsigned Version;
+ unsigned Cnt;
+ unsigned Offset;
+ std::string File;
+ std::vector<VernAux> AuxV;
+};
+
+} // namespace
+
template <typename ELFT> class ELFDumper : public ObjDumper {
public:
ELFDumper(const object::ELFObjectFile<ELFT> *ObjF, ScopedPrinter &Writer);
@@ -158,6 +193,7 @@ public:
void printFileHeaders() override;
void printSectionHeaders() override;
void printRelocations() override;
+ void printDependentLibs() override;
void printDynamicRelocations() override;
void printSymbols(bool PrintSymbols, bool PrintDynamicSymbols) override;
void printHashSymbols() override;
@@ -221,13 +257,11 @@ private:
std::pair<const Elf_Phdr *, const Elf_Shdr *>
findDynamic(const ELFFile<ELFT> *Obj);
void loadDynamicTable(const ELFFile<ELFT> *Obj);
- void parseDynamicTable();
+ void parseDynamicTable(const ELFFile<ELFT> *Obj);
- StringRef getSymbolVersion(StringRef StrTab, const Elf_Sym *symb,
- bool &IsDefault) const;
- void LoadVersionMap() const;
- void LoadVersionNeeds(const Elf_Shdr *ec) const;
- void LoadVersionDefs(const Elf_Shdr *sec) const;
+ Expected<StringRef> getSymbolVersion(const Elf_Sym *symb,
+ bool &IsDefault) const;
+ Error LoadVersionMap() const;
const object::ELFObjectFile<ELFT> *ObjF;
DynRegionInfo DynRelRegion;
@@ -250,29 +284,11 @@ private:
const Elf_Shdr *SymbolVersionNeedSection = nullptr; // .gnu.version_r
const Elf_Shdr *SymbolVersionDefSection = nullptr; // .gnu.version_d
- // Records for each version index the corresponding Verdef or Vernaux entry.
- // This is filled the first time LoadVersionMap() is called.
- class VersionMapEntry : public PointerIntPair<const void *, 1> {
- public:
- // If the integer is 0, this is an Elf_Verdef*.
- // If the integer is 1, this is an Elf_Vernaux*.
- VersionMapEntry() : PointerIntPair<const void *, 1>(nullptr, 0) {}
- VersionMapEntry(const Elf_Verdef *verdef)
- : PointerIntPair<const void *, 1>(verdef, 0) {}
- VersionMapEntry(const Elf_Vernaux *vernaux)
- : PointerIntPair<const void *, 1>(vernaux, 1) {}
-
- bool isNull() const { return getPointer() == nullptr; }
- bool isVerdef() const { return !isNull() && getInt() == 0; }
- bool isVernaux() const { return !isNull() && getInt() == 1; }
- const Elf_Verdef *getVerdef() const {
- return isVerdef() ? (const Elf_Verdef *)getPointer() : nullptr;
- }
- const Elf_Vernaux *getVernaux() const {
- return isVernaux() ? (const Elf_Vernaux *)getPointer() : nullptr;
- }
+ struct VersionEntry {
+ std::string Name;
+ bool IsVerDef;
};
- mutable SmallVector<VersionMapEntry, 16> VersionMap;
+ mutable SmallVector<Optional<VersionEntry>, 16> VersionMap;
public:
Elf_Dyn_Range dynamic_table() const {
@@ -299,14 +315,14 @@ public:
Elf_Relr_Range dyn_relrs() const;
std::string getFullSymbolName(const Elf_Sym *Symbol, StringRef StrTable,
bool IsDynamic) const;
- void getSectionNameIndex(const Elf_Sym *Symbol, const Elf_Sym *FirstSym,
- StringRef &SectionName,
- unsigned &SectionIndex) const;
+ Expected<unsigned> getSymbolSectionIndex(const Elf_Sym *Symbol,
+ const Elf_Sym *FirstSym) const;
+ Expected<StringRef> getSymbolSectionName(const Elf_Sym *Symbol,
+ unsigned SectionIndex) const;
Expected<std::string> getStaticSymbolName(uint32_t Index) const;
std::string getDynamicString(uint64_t Value) const;
- StringRef getSymbolVersionByIndex(StringRef StrTab,
- uint32_t VersionSymbolIndex,
- bool &IsDefault) const;
+ Expected<StringRef> getSymbolVersionByIndex(uint32_t VersionSymbolIndex,
+ bool &IsDefault) const;
void printSymbolsHelper(bool IsDynamic) const;
void printDynamicEntry(raw_ostream &OS, uint64_t Type, uint64_t Value) const;
@@ -323,9 +339,307 @@ public:
const DynRegionInfo &getDynamicTableRegion() const { return DynamicTable; }
const Elf_Hash *getHashTable() const { return HashTable; }
const Elf_GnuHash *getGnuHashTable() const { return GnuHashTable; }
+
+ Expected<ArrayRef<Elf_Versym>> getVersionTable(const Elf_Shdr *Sec,
+ ArrayRef<Elf_Sym> *SymTab,
+ StringRef *StrTab) const;
+ Expected<std::vector<VerDef>>
+ getVersionDefinitions(const Elf_Shdr *Sec) const;
+ Expected<std::vector<VerNeed>>
+ getVersionDependencies(const Elf_Shdr *Sec) const;
};
template <class ELFT>
+static Expected<StringRef> getLinkAsStrtab(const ELFFile<ELFT> *Obj,
+ const typename ELFT::Shdr *Sec,
+ unsigned SecNdx) {
+ Expected<const typename ELFT::Shdr *> StrTabSecOrErr =
+ Obj->getSection(Sec->sh_link);
+ if (!StrTabSecOrErr)
+ return createError("invalid section linked to " +
+ object::getELFSectionTypeName(
+ Obj->getHeader()->e_machine, Sec->sh_type) +
+ " section with index " + Twine(SecNdx) + ": " +
+ toString(StrTabSecOrErr.takeError()));
+
+ Expected<StringRef> StrTabOrErr = Obj->getStringTable(*StrTabSecOrErr);
+ if (!StrTabOrErr)
+ return createError("invalid string table linked to " +
+ object::getELFSectionTypeName(
+ Obj->getHeader()->e_machine, Sec->sh_type) +
+ " section with index " + Twine(SecNdx) + ": " +
+ toString(StrTabOrErr.takeError()));
+ return *StrTabOrErr;
+}
+
+// Returns the linked symbol table and associated string table for a given section.
+template <class ELFT>
+static Expected<std::pair<typename ELFT::SymRange, StringRef>>
+getLinkAsSymtab(const ELFFile<ELFT> *Obj, const typename ELFT::Shdr *Sec,
+ unsigned SecNdx, unsigned ExpectedType) {
+ Expected<const typename ELFT::Shdr *> SymtabOrErr =
+ Obj->getSection(Sec->sh_link);
+ if (!SymtabOrErr)
+ return createError("invalid section linked to " +
+ object::getELFSectionTypeName(
+ Obj->getHeader()->e_machine, Sec->sh_type) +
+ " section with index " + Twine(SecNdx) + ": " +
+ toString(SymtabOrErr.takeError()));
+
+ if ((*SymtabOrErr)->sh_type != ExpectedType)
+ return createError(
+ "invalid section linked to " +
+ object::getELFSectionTypeName(Obj->getHeader()->e_machine,
+ Sec->sh_type) +
+ " section with index " + Twine(SecNdx) + ": expected " +
+ object::getELFSectionTypeName(Obj->getHeader()->e_machine,
+ ExpectedType) +
+ ", but got " +
+ object::getELFSectionTypeName(Obj->getHeader()->e_machine,
+ (*SymtabOrErr)->sh_type));
+
+ Expected<StringRef> StrTabOrErr =
+ getLinkAsStrtab(Obj, *SymtabOrErr, Sec->sh_link);
+ if (!StrTabOrErr)
+ return createError(
+ "can't get a string table for the symbol table linked to " +
+ object::getELFSectionTypeName(Obj->getHeader()->e_machine,
+ Sec->sh_type) +
+ " section with index " + Twine(SecNdx) + ": " +
+ toString(StrTabOrErr.takeError()));
+
+ Expected<typename ELFT::SymRange> SymsOrErr = Obj->symbols(*SymtabOrErr);
+ if (!SymsOrErr)
+ return createError(
+ "unable to read symbols from the symbol table with index " +
+ Twine(Sec->sh_link) + ": " + toString(SymsOrErr.takeError()));
+
+ return std::make_pair(*SymsOrErr, *StrTabOrErr);
+}
+
+template <class ELFT>
+Expected<ArrayRef<typename ELFT::Versym>>
+ELFDumper<ELFT>::getVersionTable(const Elf_Shdr *Sec, ArrayRef<Elf_Sym> *SymTab,
+ StringRef *StrTab) const {
+ assert((!SymTab && !StrTab) || (SymTab && StrTab));
+ const ELFFile<ELFT> *Obj = ObjF->getELFFile();
+ unsigned SecNdx = Sec - &cantFail(Obj->sections()).front();
+
+ if (uintptr_t(Obj->base() + Sec->sh_offset) % sizeof(uint16_t) != 0)
+ return createError("the SHT_GNU_versym section with index " +
+ Twine(SecNdx) + " is misaligned");
+
+ Expected<ArrayRef<Elf_Versym>> VersionsOrErr =
+ Obj->template getSectionContentsAsArray<Elf_Versym>(Sec);
+ if (!VersionsOrErr)
+ return createError(
+ "cannot read content of SHT_GNU_versym section with index " +
+ Twine(SecNdx) + ": " + toString(VersionsOrErr.takeError()));
+
+ Expected<std::pair<ArrayRef<Elf_Sym>, StringRef>> SymTabOrErr =
+ getLinkAsSymtab(Obj, Sec, SecNdx, SHT_DYNSYM);
+ if (!SymTabOrErr) {
+ ELFDumperStyle->reportUniqueWarning(SymTabOrErr.takeError());
+ return *VersionsOrErr;
+ }
+
+ if (SymTabOrErr->first.size() != VersionsOrErr->size())
+ ELFDumperStyle->reportUniqueWarning(
+ createError("SHT_GNU_versym section with index " + Twine(SecNdx) +
+ ": the number of entries (" + Twine(VersionsOrErr->size()) +
+ ") does not match the number of symbols (" +
+ Twine(SymTabOrErr->first.size()) +
+ ") in the symbol table with index " + Twine(Sec->sh_link)));
+
+ if (SymTab)
+ std::tie(*SymTab, *StrTab) = *SymTabOrErr;
+ return *VersionsOrErr;
+}
+
+template <class ELFT>
+Expected<std::vector<VerDef>>
+ELFDumper<ELFT>::getVersionDefinitions(const Elf_Shdr *Sec) const {
+ const ELFFile<ELFT> *Obj = ObjF->getELFFile();
+ unsigned SecNdx = Sec - &cantFail(Obj->sections()).front();
+
+ Expected<StringRef> StrTabOrErr = getLinkAsStrtab(Obj, Sec, SecNdx);
+ if (!StrTabOrErr)
+ return StrTabOrErr.takeError();
+
+ Expected<ArrayRef<uint8_t>> ContentsOrErr = Obj->getSectionContents(Sec);
+ if (!ContentsOrErr)
+ return createError(
+ "cannot read content of SHT_GNU_verdef section with index " +
+ Twine(SecNdx) + ": " + toString(ContentsOrErr.takeError()));
+
+ const uint8_t *Start = ContentsOrErr->data();
+ const uint8_t *End = Start + ContentsOrErr->size();
+
+ auto ExtractNextAux = [&](const uint8_t *&VerdauxBuf,
+ unsigned VerDefNdx) -> Expected<VerdAux> {
+ if (VerdauxBuf + sizeof(Elf_Verdaux) > End)
+ return createError("invalid SHT_GNU_verdef section with index " +
+ Twine(SecNdx) + ": version definition " +
+ Twine(VerDefNdx) +
+ " refers to an auxiliary entry that goes past the end "
+ "of the section");
+
+ auto *Verdaux = reinterpret_cast<const Elf_Verdaux *>(VerdauxBuf);
+ VerdauxBuf += Verdaux->vda_next;
+
+ VerdAux Aux;
+ Aux.Offset = VerdauxBuf - Start;
+ if (Verdaux->vda_name <= StrTabOrErr->size())
+ Aux.Name = StrTabOrErr->drop_front(Verdaux->vda_name);
+ else
+ Aux.Name = "<invalid vda_name: " + to_string(Verdaux->vda_name) + ">";
+ return Aux;
+ };
+
+ std::vector<VerDef> Ret;
+ const uint8_t *VerdefBuf = Start;
+ for (unsigned I = 1; I <= /*VerDefsNum=*/Sec->sh_info; ++I) {
+ if (VerdefBuf + sizeof(Elf_Verdef) > End)
+ return createError("invalid SHT_GNU_verdef section with index " +
+ Twine(SecNdx) + ": version definition " + Twine(I) +
+ " goes past the end of the section");
+
+ if (uintptr_t(VerdefBuf) % sizeof(uint32_t) != 0)
+ return createError(
+ "invalid SHT_GNU_verdef section with index " + Twine(SecNdx) +
+ ": found a misaligned version definition entry at offset 0x" +
+ Twine::utohexstr(VerdefBuf - Start));
+
+ unsigned Version = *reinterpret_cast<const Elf_Half *>(VerdefBuf);
+ if (Version != 1)
+ return createError("unable to dump SHT_GNU_verdef section with index " +
+ Twine(SecNdx) + ": version " + Twine(Version) +
+ " is not yet supported");
+
+ const Elf_Verdef *D = reinterpret_cast<const Elf_Verdef *>(VerdefBuf);
+ VerDef &VD = *Ret.emplace(Ret.end());
+ VD.Offset = VerdefBuf - Start;
+ VD.Version = D->vd_version;
+ VD.Flags = D->vd_flags;
+ VD.Ndx = D->vd_ndx;
+ VD.Cnt = D->vd_cnt;
+ VD.Hash = D->vd_hash;
+
+ const uint8_t *VerdauxBuf = VerdefBuf + D->vd_aux;
+ for (unsigned J = 0; J < D->vd_cnt; ++J) {
+ if (uintptr_t(VerdauxBuf) % sizeof(uint32_t) != 0)
+ return createError("invalid SHT_GNU_verdef section with index " +
+ Twine(SecNdx) +
+ ": found a misaligned auxiliary entry at offset 0x" +
+ Twine::utohexstr(VerdauxBuf - Start));
+
+ Expected<VerdAux> AuxOrErr = ExtractNextAux(VerdauxBuf, I);
+ if (!AuxOrErr)
+ return AuxOrErr.takeError();
+
+ if (J == 0)
+ VD.Name = AuxOrErr->Name;
+ else
+ VD.AuxV.push_back(*AuxOrErr);
+ }
+
+ VerdefBuf += D->vd_next;
+ }
+
+ return Ret;
+}
+
+template <class ELFT>
+Expected<std::vector<VerNeed>>
+ELFDumper<ELFT>::getVersionDependencies(const Elf_Shdr *Sec) const {
+ const ELFFile<ELFT> *Obj = ObjF->getELFFile();
+ unsigned SecNdx = Sec - &cantFail(Obj->sections()).front();
+
+ StringRef StrTab;
+ Expected<StringRef> StrTabOrErr = getLinkAsStrtab(Obj, Sec, SecNdx);
+ if (!StrTabOrErr)
+ ELFDumperStyle->reportUniqueWarning(StrTabOrErr.takeError());
+ else
+ StrTab = *StrTabOrErr;
+
+ Expected<ArrayRef<uint8_t>> ContentsOrErr = Obj->getSectionContents(Sec);
+ if (!ContentsOrErr)
+ return createError(
+ "cannot read content of SHT_GNU_verneed section with index " +
+ Twine(SecNdx) + ": " + toString(ContentsOrErr.takeError()));
+
+ const uint8_t *Start = ContentsOrErr->data();
+ const uint8_t *End = Start + ContentsOrErr->size();
+ const uint8_t *VerneedBuf = Start;
+
+ std::vector<VerNeed> Ret;
+ for (unsigned I = 1; I <= /*VerneedNum=*/Sec->sh_info; ++I) {
+ if (VerneedBuf + sizeof(Elf_Verdef) > End)
+ return createError("invalid SHT_GNU_verneed section with index " +
+ Twine(SecNdx) + ": version dependency " + Twine(I) +
+ " goes past the end of the section");
+
+ if (uintptr_t(VerneedBuf) % sizeof(uint32_t) != 0)
+ return createError(
+ "invalid SHT_GNU_verneed section with index " + Twine(SecNdx) +
+ ": found a misaligned version dependency entry at offset 0x" +
+ Twine::utohexstr(VerneedBuf - Start));
+
+ unsigned Version = *reinterpret_cast<const Elf_Half *>(VerneedBuf);
+ if (Version != 1)
+ return createError("unable to dump SHT_GNU_verneed section with index " +
+ Twine(SecNdx) + ": version " + Twine(Version) +
+ " is not yet supported");
+
+ const Elf_Verneed *Verneed =
+ reinterpret_cast<const Elf_Verneed *>(VerneedBuf);
+
+ VerNeed &VN = *Ret.emplace(Ret.end());
+ VN.Version = Verneed->vn_version;
+ VN.Cnt = Verneed->vn_cnt;
+ VN.Offset = VerneedBuf - Start;
+
+ if (Verneed->vn_file < StrTab.size())
+ VN.File = StrTab.drop_front(Verneed->vn_file);
+ else
+ VN.File = "<corrupt vn_file: " + to_string(Verneed->vn_file) + ">";
+
+ const uint8_t *VernauxBuf = VerneedBuf + Verneed->vn_aux;
+ for (unsigned J = 0; J < Verneed->vn_cnt; ++J) {
+ if (uintptr_t(VernauxBuf) % sizeof(uint32_t) != 0)
+ return createError("invalid SHT_GNU_verneed section with index " +
+ Twine(SecNdx) +
+ ": found a misaligned auxiliary entry at offset 0x" +
+ Twine::utohexstr(VernauxBuf - Start));
+
+ if (VernauxBuf + sizeof(Elf_Vernaux) > End)
+ return createError(
+ "invalid SHT_GNU_verneed section with index " + Twine(SecNdx) +
+ ": version dependency " + Twine(I) +
+ " refers to an auxiliary entry that goes past the end "
+ "of the section");
+
+ const Elf_Vernaux *Vernaux =
+ reinterpret_cast<const Elf_Vernaux *>(VernauxBuf);
+
+ VernAux &Aux = *VN.AuxV.emplace(VN.AuxV.end());
+ Aux.Hash = Vernaux->vna_hash;
+ Aux.Flags = Vernaux->vna_flags;
+ Aux.Other = Vernaux->vna_other;
+ Aux.Offset = VernauxBuf - Start;
+ if (StrTab.size() <= Vernaux->vna_name)
+ Aux.Name = "<corrupt>";
+ else
+ Aux.Name = StrTab.drop_front(Vernaux->vna_name);
+
+ VernauxBuf += Vernaux->vna_next;
+ }
+ VerneedBuf += Verneed->vn_next;
+ }
+ return Ret;
+}
+
+template <class ELFT>
void ELFDumper<ELFT>::printSymbolsHelper(bool IsDynamic) const {
StringRef StrTable, SymtabName;
size_t Entries = 0;
@@ -392,6 +706,7 @@ public:
virtual void printSymbols(const ELFFile<ELFT> *Obj, bool PrintSymbols,
bool PrintDynamicSymbols) = 0;
virtual void printHashSymbols(const ELFFile<ELFT> *Obj) {}
+ virtual void printDependentLibs(const ELFFile<ELFT> *Obj) = 0;
virtual void printDynamic(const ELFFile<ELFT> *Obj) {}
virtual void printDynamicRelocations(const ELFFile<ELFT> *Obj) = 0;
virtual void printSymtabMessage(const ELFFile<ELFT> *Obj, StringRef Name,
@@ -432,6 +747,8 @@ public:
virtual void printMipsABIFlags(const ELFObjectFile<ELFT> *Obj) = 0;
const ELFDumper<ELFT> *dumper() const { return Dumper; }
+ void reportUniqueWarning(Error Err) const;
+
protected:
std::function<Error(const Twine &Msg)> WarningHandler;
StringRef FileName;
@@ -460,6 +777,7 @@ public:
void printSymbols(const ELFO *Obj, bool PrintSymbols,
bool PrintDynamicSymbols) override;
void printHashSymbols(const ELFO *Obj) override;
+ void printDependentLibs(const ELFFile<ELFT> *Obj) override;
void printDynamic(const ELFFile<ELFT> *Obj) override;
void printDynamicRelocations(const ELFO *Obj) override;
void printSymtabMessage(const ELFO *Obj, StringRef Name, size_t Offset,
@@ -553,8 +871,19 @@ private:
bool checkPTDynamic(const Elf_Phdr &Phdr, const Elf_Shdr &Sec);
void printProgramHeaders(const ELFO *Obj);
void printSectionMapping(const ELFO *Obj);
+ void printGNUVersionSectionProlog(const ELFFile<ELFT> *Obj,
+ const typename ELFT::Shdr *Sec,
+ const Twine &Label, unsigned EntriesNum);
};
+template <class ELFT>
+void DumpStyle<ELFT>::reportUniqueWarning(Error Err) const {
+ handleAllErrors(std::move(Err), [&](const ErrorInfoBase &EI) {
+ cantFail(WarningHandler(EI.message()),
+ "WarningHandler should always return ErrorSuccess");
+ });
+}
+
template <typename ELFT> class LLVMStyle : public DumpStyle<ELFT> {
public:
TYPEDEF_ELF_TYPES(ELFT)
@@ -569,6 +898,7 @@ public:
void printSectionHeaders(const ELFO *Obj) override;
void printSymbols(const ELFO *Obj, bool PrintSymbols,
bool PrintDynamicSymbols) override;
+ void printDependentLibs(const ELFFile<ELFT> *Obj) override;
void printDynamic(const ELFFile<ELFT> *Obj) override;
void printDynamicRelocations(const ELFO *Obj) override;
void printProgramHeaders(const ELFO *Obj, bool PrintProgramHeaders,
@@ -595,6 +925,7 @@ private:
void printDynamicRelocation(const ELFO *Obj, Elf_Rela Rel);
void printSymbols(const ELFO *Obj);
void printDynamicSymbols(const ELFO *Obj);
+ void printSymbolSection(const Elf_Sym *Symbol, const Elf_Sym *First);
void printSymbol(const ELFO *Obj, const Elf_Sym *Symbol, const Elf_Sym *First,
StringRef StrTable, bool IsDynamic,
bool /*NonVisibilityBitsUsed*/) override;
@@ -640,96 +971,51 @@ std::error_code createELFDumper(const object::ObjectFile *Obj,
} // end namespace llvm
-// Iterate through the versions needed section, and place each Elf_Vernaux
-// in the VersionMap according to its index.
-template <class ELFT>
-void ELFDumper<ELFT>::LoadVersionNeeds(const Elf_Shdr *Sec) const {
- unsigned VerneedSize = Sec->sh_size; // Size of section in bytes
- unsigned VerneedEntries = Sec->sh_info; // Number of Verneed entries
- const uint8_t *VerneedStart = reinterpret_cast<const uint8_t *>(
- ObjF->getELFFile()->base() + Sec->sh_offset);
- const uint8_t *VerneedEnd = VerneedStart + VerneedSize;
- // The first Verneed entry is at the start of the section.
- const uint8_t *VerneedBuf = VerneedStart;
- for (unsigned VerneedIndex = 0; VerneedIndex < VerneedEntries;
- ++VerneedIndex) {
- if (VerneedBuf + sizeof(Elf_Verneed) > VerneedEnd)
- report_fatal_error("Section ended unexpectedly while scanning "
- "version needed records.");
- const Elf_Verneed *Verneed =
- reinterpret_cast<const Elf_Verneed *>(VerneedBuf);
- if (Verneed->vn_version != ELF::VER_NEED_CURRENT)
- report_fatal_error("Unexpected verneed version");
- // Iterate through the Vernaux entries
- const uint8_t *VernauxBuf = VerneedBuf + Verneed->vn_aux;
- for (unsigned VernauxIndex = 0; VernauxIndex < Verneed->vn_cnt;
- ++VernauxIndex) {
- if (VernauxBuf + sizeof(Elf_Vernaux) > VerneedEnd)
- report_fatal_error("Section ended unexpected while scanning auxiliary "
- "version needed records.");
- const Elf_Vernaux *Vernaux =
- reinterpret_cast<const Elf_Vernaux *>(VernauxBuf);
- size_t Index = Vernaux->vna_other & ELF::VERSYM_VERSION;
- if (Index >= VersionMap.size())
- VersionMap.resize(Index + 1);
- VersionMap[Index] = VersionMapEntry(Vernaux);
- VernauxBuf += Vernaux->vna_next;
- }
- VerneedBuf += Verneed->vn_next;
- }
-}
-
-// Iterate through the version definitions, and place each Elf_Verdef
-// in the VersionMap according to its index.
-template <class ELFT>
-void ELFDumper<ELFT>::LoadVersionDefs(const Elf_Shdr *Sec) const {
- unsigned VerdefSize = Sec->sh_size; // Size of section in bytes
- unsigned VerdefEntries = Sec->sh_info; // Number of Verdef entries
- const uint8_t *VerdefStart = reinterpret_cast<const uint8_t *>(
- ObjF->getELFFile()->base() + Sec->sh_offset);
- const uint8_t *VerdefEnd = VerdefStart + VerdefSize;
- // The first Verdef entry is at the start of the section.
- const uint8_t *VerdefBuf = VerdefStart;
- for (unsigned VerdefIndex = 0; VerdefIndex < VerdefEntries; ++VerdefIndex) {
- if (VerdefBuf + sizeof(Elf_Verdef) > VerdefEnd)
- report_fatal_error("Section ended unexpectedly while scanning "
- "version definitions.");
- const Elf_Verdef *Verdef = reinterpret_cast<const Elf_Verdef *>(VerdefBuf);
- if (Verdef->vd_version != ELF::VER_DEF_CURRENT)
- report_fatal_error("Unexpected verdef version");
- size_t Index = Verdef->vd_ndx & ELF::VERSYM_VERSION;
- if (Index >= VersionMap.size())
- VersionMap.resize(Index + 1);
- VersionMap[Index] = VersionMapEntry(Verdef);
- VerdefBuf += Verdef->vd_next;
- }
-}
-
-template <class ELFT> void ELFDumper<ELFT>::LoadVersionMap() const {
+template <class ELFT> Error ELFDumper<ELFT>::LoadVersionMap() const {
// If there is no dynamic symtab or version table, there is nothing to do.
if (!DynSymRegion.Addr || !SymbolVersionSection)
- return;
+ return Error::success();
// Has the VersionMap already been loaded?
if (!VersionMap.empty())
- return;
+ return Error::success();
// The first two version indexes are reserved.
// Index 0 is LOCAL, index 1 is GLOBAL.
- VersionMap.push_back(VersionMapEntry());
- VersionMap.push_back(VersionMapEntry());
+ VersionMap.push_back(VersionEntry());
+ VersionMap.push_back(VersionEntry());
+
+ auto InsertEntry = [this](unsigned N, StringRef Version, bool IsVerdef) {
+ if (N >= VersionMap.size())
+ VersionMap.resize(N + 1);
+ VersionMap[N] = {Version, IsVerdef};
+ };
- if (SymbolVersionDefSection)
- LoadVersionDefs(SymbolVersionDefSection);
+ if (SymbolVersionDefSection) {
+ Expected<std::vector<VerDef>> Defs =
+ this->getVersionDefinitions(SymbolVersionDefSection);
+ if (!Defs)
+ return Defs.takeError();
+ for (const VerDef &Def : *Defs)
+ InsertEntry(Def.Ndx & ELF::VERSYM_VERSION, Def.Name, true);
+ }
+
+ if (SymbolVersionNeedSection) {
+ Expected<std::vector<VerNeed>> Deps =
+ this->getVersionDependencies(SymbolVersionNeedSection);
+ if (!Deps)
+ return Deps.takeError();
+ for (const VerNeed &Dep : *Deps)
+ for (const VernAux &Aux : Dep.AuxV)
+ InsertEntry(Aux.Other & ELF::VERSYM_VERSION, Aux.Name, false);
+ }
- if (SymbolVersionNeedSection)
- LoadVersionNeeds(SymbolVersionNeedSection);
+ return Error::success();
}
template <typename ELFT>
-StringRef ELFDumper<ELFT>::getSymbolVersion(StringRef StrTab,
- const Elf_Sym *Sym,
- bool &IsDefault) const {
+Expected<StringRef> ELFDumper<ELFT>::getSymbolVersion(const Elf_Sym *Sym,
+ bool &IsDefault) const {
// This is a dynamic symbol. Look in the GNU symbol version table.
if (!SymbolVersionSection) {
// No version table.
@@ -746,7 +1032,7 @@ StringRef ELFDumper<ELFT>::getSymbolVersion(StringRef StrTab,
const Elf_Versym *Versym = unwrapOrError(
ObjF->getFileName(), ObjF->getELFFile()->template getEntry<Elf_Versym>(
SymbolVersionSection, EntryIndex));
- return this->getSymbolVersionByIndex(StrTab, Versym->vs_index, IsDefault);
+ return this->getSymbolVersionByIndex(Versym->vs_index, IsDefault);
}
static std::string maybeDemangle(StringRef Name) {
@@ -773,9 +1059,9 @@ ELFDumper<ELFT>::getStaticSymbolName(uint32_t Index) const {
}
template <typename ELFT>
-StringRef ELFDumper<ELFT>::getSymbolVersionByIndex(StringRef StrTab,
- uint32_t SymbolVersionIndex,
- bool &IsDefault) const {
+Expected<StringRef>
+ELFDumper<ELFT>::getSymbolVersionByIndex(uint32_t SymbolVersionIndex,
+ bool &IsDefault) const {
size_t VersionIndex = SymbolVersionIndex & VERSYM_VERSION;
// Special markers for unversioned symbols.
@@ -785,24 +1071,18 @@ StringRef ELFDumper<ELFT>::getSymbolVersionByIndex(StringRef StrTab,
}
// Lookup this symbol in the version table.
- LoadVersionMap();
- if (VersionIndex >= VersionMap.size() || VersionMap[VersionIndex].isNull())
- reportError(createError("Invalid version entry"), ObjF->getFileName());
- const VersionMapEntry &Entry = VersionMap[VersionIndex];
-
- // Get the version name string.
- size_t NameOffset;
- if (Entry.isVerdef()) {
- // The first Verdaux entry holds the name.
- NameOffset = Entry.getVerdef()->getAux()->vda_name;
+ if (Error E = LoadVersionMap())
+ return std::move(E);
+ if (VersionIndex >= VersionMap.size() || !VersionMap[VersionIndex])
+ return createError("SHT_GNU_versym section refers to a version index " +
+ Twine(VersionIndex) + " which is missing");
+
+ const VersionEntry &Entry = *VersionMap[VersionIndex];
+ if (Entry.IsVerDef)
IsDefault = !(SymbolVersionIndex & VERSYM_HIDDEN);
- } else {
- NameOffset = Entry.getVernaux()->vna_name;
+ else
IsDefault = false;
- }
- if (NameOffset >= StrTab.size())
- reportError(createError("Invalid string offset"), ObjF->getFileName());
- return StrTab.data() + NameOffset;
+ return Entry.Name.c_str();
}
template <typename ELFT>
@@ -813,54 +1093,77 @@ std::string ELFDumper<ELFT>::getFullSymbolName(const Elf_Sym *Symbol,
unwrapOrError(ObjF->getFileName(), Symbol->getName(StrTable)));
if (SymbolName.empty() && Symbol->getType() == ELF::STT_SECTION) {
- unsigned SectionIndex;
- StringRef SectionName;
Elf_Sym_Range Syms = unwrapOrError(
ObjF->getFileName(), ObjF->getELFFile()->symbols(DotSymtabSec));
- getSectionNameIndex(Symbol, Syms.begin(), SectionName, SectionIndex);
- return SectionName;
+ Expected<unsigned> SectionIndex =
+ getSymbolSectionIndex(Symbol, Syms.begin());
+ if (!SectionIndex) {
+ ELFDumperStyle->reportUniqueWarning(SectionIndex.takeError());
+ return "<?>";
+ }
+ Expected<StringRef> NameOrErr = getSymbolSectionName(Symbol, *SectionIndex);
+ if (!NameOrErr) {
+ ELFDumperStyle->reportUniqueWarning(NameOrErr.takeError());
+ return ("<section " + Twine(*SectionIndex) + ">").str();
+ }
+ return *NameOrErr;
}
if (!IsDynamic)
return SymbolName;
bool IsDefault;
- StringRef Version = getSymbolVersion(StrTable, &*Symbol, IsDefault);
- if (!Version.empty()) {
+ Expected<StringRef> VersionOrErr = getSymbolVersion(&*Symbol, IsDefault);
+ if (!VersionOrErr) {
+ ELFDumperStyle->reportUniqueWarning(VersionOrErr.takeError());
+ return SymbolName + "@<corrupt>";
+ }
+
+ if (!VersionOrErr->empty()) {
SymbolName += (IsDefault ? "@@" : "@");
- SymbolName += Version;
+ SymbolName += *VersionOrErr;
}
return SymbolName;
}
template <typename ELFT>
-void ELFDumper<ELFT>::getSectionNameIndex(const Elf_Sym *Symbol,
- const Elf_Sym *FirstSym,
- StringRef &SectionName,
- unsigned &SectionIndex) const {
- SectionIndex = Symbol->st_shndx;
+Expected<unsigned>
+ELFDumper<ELFT>::getSymbolSectionIndex(const Elf_Sym *Symbol,
+ const Elf_Sym *FirstSym) const {
+ return Symbol->st_shndx == SHN_XINDEX
+ ? object::getExtendedSymbolTableIndex<ELFT>(Symbol, FirstSym,
+ ShndxTable)
+ : Symbol->st_shndx;
+}
+
+// If the Symbol has a reserved st_shndx other than SHN_XINDEX, return a
+// descriptive interpretation of the st_shndx value. Otherwise, return the name
+// of the section with index SectionIndex. This function assumes that if the
+// Symbol has st_shndx == SHN_XINDEX the SectionIndex will be the value derived
+// from the SHT_SYMTAB_SHNDX section.
+template <typename ELFT>
+Expected<StringRef>
+ELFDumper<ELFT>::getSymbolSectionName(const Elf_Sym *Symbol,
+ unsigned SectionIndex) const {
if (Symbol->isUndefined())
- SectionName = "Undefined";
- else if (Symbol->isProcessorSpecific())
- SectionName = "Processor Specific";
- else if (Symbol->isOSSpecific())
- SectionName = "Operating System Specific";
- else if (Symbol->isAbsolute())
- SectionName = "Absolute";
- else if (Symbol->isCommon())
- SectionName = "Common";
- else if (Symbol->isReserved() && SectionIndex != SHN_XINDEX)
- SectionName = "Reserved";
- else {
- if (SectionIndex == SHN_XINDEX)
- SectionIndex = unwrapOrError(ObjF->getFileName(),
- object::getExtendedSymbolTableIndex<ELFT>(
- Symbol, FirstSym, ShndxTable));
- const ELFFile<ELFT> *Obj = ObjF->getELFFile();
- const typename ELFT::Shdr *Sec =
- unwrapOrError(ObjF->getFileName(), Obj->getSection(SectionIndex));
- SectionName = unwrapOrError(ObjF->getFileName(), Obj->getSectionName(Sec));
- }
+ return "Undefined";
+ if (Symbol->isProcessorSpecific())
+ return "Processor Specific";
+ if (Symbol->isOSSpecific())
+ return "Operating System Specific";
+ if (Symbol->isAbsolute())
+ return "Absolute";
+ if (Symbol->isCommon())
+ return "Common";
+ if (Symbol->isReserved() && Symbol->st_shndx != SHN_XINDEX)
+ return "Reserved";
+
+ const ELFFile<ELFT> *Obj = ObjF->getELFFile();
+ Expected<const Elf_Shdr *> SecOrErr =
+ Obj->getSection(SectionIndex);
+ if (!SecOrErr)
+ return SecOrErr.takeError();
+ return Obj->getSectionName(*SecOrErr);
}
template <class ELFO>
@@ -1131,76 +1434,126 @@ static const char *getGroupType(uint32_t Flag) {
static const EnumEntry<unsigned> ElfSectionFlags[] = {
ENUM_ENT(SHF_WRITE, "W"),
ENUM_ENT(SHF_ALLOC, "A"),
- ENUM_ENT(SHF_EXCLUDE, "E"),
ENUM_ENT(SHF_EXECINSTR, "X"),
ENUM_ENT(SHF_MERGE, "M"),
ENUM_ENT(SHF_STRINGS, "S"),
ENUM_ENT(SHF_INFO_LINK, "I"),
ENUM_ENT(SHF_LINK_ORDER, "L"),
- ENUM_ENT(SHF_OS_NONCONFORMING, "o"),
+ ENUM_ENT(SHF_OS_NONCONFORMING, "O"),
ENUM_ENT(SHF_GROUP, "G"),
ENUM_ENT(SHF_TLS, "T"),
- ENUM_ENT(SHF_MASKOS, "o"),
- ENUM_ENT(SHF_MASKPROC, "p"),
- ENUM_ENT_1(SHF_COMPRESSED),
+ ENUM_ENT(SHF_COMPRESSED, "C"),
+ ENUM_ENT(SHF_EXCLUDE, "E"),
};
static const EnumEntry<unsigned> ElfXCoreSectionFlags[] = {
- LLVM_READOBJ_ENUM_ENT(ELF, XCORE_SHF_CP_SECTION),
- LLVM_READOBJ_ENUM_ENT(ELF, XCORE_SHF_DP_SECTION)
+ ENUM_ENT(XCORE_SHF_CP_SECTION, ""),
+ ENUM_ENT(XCORE_SHF_DP_SECTION, "")
};
static const EnumEntry<unsigned> ElfARMSectionFlags[] = {
- LLVM_READOBJ_ENUM_ENT(ELF, SHF_ARM_PURECODE)
+ ENUM_ENT(SHF_ARM_PURECODE, "y")
};
static const EnumEntry<unsigned> ElfHexagonSectionFlags[] = {
- LLVM_READOBJ_ENUM_ENT(ELF, SHF_HEX_GPREL)
+ ENUM_ENT(SHF_HEX_GPREL, "")
};
static const EnumEntry<unsigned> ElfMipsSectionFlags[] = {
- LLVM_READOBJ_ENUM_ENT(ELF, SHF_MIPS_NODUPES),
- LLVM_READOBJ_ENUM_ENT(ELF, SHF_MIPS_NAMES ),
- LLVM_READOBJ_ENUM_ENT(ELF, SHF_MIPS_LOCAL ),
- LLVM_READOBJ_ENUM_ENT(ELF, SHF_MIPS_NOSTRIP),
- LLVM_READOBJ_ENUM_ENT(ELF, SHF_MIPS_GPREL ),
- LLVM_READOBJ_ENUM_ENT(ELF, SHF_MIPS_MERGE ),
- LLVM_READOBJ_ENUM_ENT(ELF, SHF_MIPS_ADDR ),
- LLVM_READOBJ_ENUM_ENT(ELF, SHF_MIPS_STRING )
+ ENUM_ENT(SHF_MIPS_NODUPES, ""),
+ ENUM_ENT(SHF_MIPS_NAMES, ""),
+ ENUM_ENT(SHF_MIPS_LOCAL, ""),
+ ENUM_ENT(SHF_MIPS_NOSTRIP, ""),
+ ENUM_ENT(SHF_MIPS_GPREL, ""),
+ ENUM_ENT(SHF_MIPS_MERGE, ""),
+ ENUM_ENT(SHF_MIPS_ADDR, ""),
+ ENUM_ENT(SHF_MIPS_STRING, "")
};
static const EnumEntry<unsigned> ElfX86_64SectionFlags[] = {
- LLVM_READOBJ_ENUM_ENT(ELF, SHF_X86_64_LARGE)
+ ENUM_ENT(SHF_X86_64_LARGE, "l")
};
-static std::string getGNUFlags(uint64_t Flags) {
+static std::vector<EnumEntry<unsigned>>
+getSectionFlagsForTarget(unsigned EMachine) {
+ std::vector<EnumEntry<unsigned>> Ret(std::begin(ElfSectionFlags),
+ std::end(ElfSectionFlags));
+ switch (EMachine) {
+ case EM_ARM:
+ Ret.insert(Ret.end(), std::begin(ElfARMSectionFlags),
+ std::end(ElfARMSectionFlags));
+ break;
+ case EM_HEXAGON:
+ Ret.insert(Ret.end(), std::begin(ElfHexagonSectionFlags),
+ std::end(ElfHexagonSectionFlags));
+ break;
+ case EM_MIPS:
+ Ret.insert(Ret.end(), std::begin(ElfMipsSectionFlags),
+ std::end(ElfMipsSectionFlags));
+ break;
+ case EM_X86_64:
+ Ret.insert(Ret.end(), std::begin(ElfX86_64SectionFlags),
+ std::end(ElfX86_64SectionFlags));
+ break;
+ case EM_XCORE:
+ Ret.insert(Ret.end(), std::begin(ElfXCoreSectionFlags),
+ std::end(ElfXCoreSectionFlags));
+ break;
+ default:
+ break;
+ }
+ return Ret;
+}
+
+static std::string getGNUFlags(unsigned EMachine, uint64_t Flags) {
+ // Here we are trying to build the flags string in the same way as GNU does.
+ // It is not that straightforward. Imagine we have sh_flags == 0x90000000.
+ // SHF_EXCLUDE ("E") has a value of 0x80000000 and SHF_MASKPROC is 0xf0000000.
+ // GNU readelf will not print "E" or "Ep" in this case, but will print just
+ // "p". It only will print "E" when no other processor flag is set.
std::string Str;
- for (auto Entry : ElfSectionFlags) {
- uint64_t Flag = Entry.Value & Flags;
- Flags &= ~Entry.Value;
- switch (Flag) {
- case ELF::SHF_WRITE:
- case ELF::SHF_ALLOC:
- case ELF::SHF_EXECINSTR:
- case ELF::SHF_MERGE:
- case ELF::SHF_STRINGS:
- case ELF::SHF_INFO_LINK:
- case ELF::SHF_LINK_ORDER:
- case ELF::SHF_OS_NONCONFORMING:
- case ELF::SHF_GROUP:
- case ELF::SHF_TLS:
- case ELF::SHF_EXCLUDE:
- Str += Entry.AltName;
- break;
- default:
- if (Flag & ELF::SHF_MASKOS)
- Str += "o";
- else if (Flag & ELF::SHF_MASKPROC)
- Str += "p";
- else if (Flag)
- Str += "x";
+ bool HasUnknownFlag = false;
+ bool HasOSFlag = false;
+ bool HasProcFlag = false;
+ std::vector<EnumEntry<unsigned>> FlagsList =
+ getSectionFlagsForTarget(EMachine);
+ while (Flags) {
+ // Take the least significant bit as a flag.
+ uint64_t Flag = Flags & -Flags;
+ Flags -= Flag;
+
+ // Find the flag in the known flags list.
+ auto I = llvm::find_if(FlagsList, [=](const EnumEntry<unsigned> &E) {
+ // Flags with empty names are not printed in GNU style output.
+ return E.Value == Flag && !E.AltName.empty();
+ });
+ if (I != FlagsList.end()) {
+ Str += I->AltName;
+ continue;
+ }
+
+ // If we did not find a matching regular flag, then we deal with an OS
+ // specific flag, processor specific flag or an unknown flag.
+ if (Flag & ELF::SHF_MASKOS) {
+ HasOSFlag = true;
+ Flags &= ~ELF::SHF_MASKOS;
+ } else if (Flag & ELF::SHF_MASKPROC) {
+ HasProcFlag = true;
+ // Mask off all the processor-specific bits. This removes the SHF_EXCLUDE
+ // bit if set so that it doesn't also get printed.
+ Flags &= ~ELF::SHF_MASKPROC;
+ } else {
+ HasUnknownFlag = true;
}
}
+
+ // "o", "p" and "x" are printed last.
+ if (HasOSFlag)
+ Str += "o";
+ if (HasProcFlag)
+ Str += "p";
+ if (HasUnknownFlag)
+ Str += "x";
return Str;
}
@@ -1237,6 +1590,7 @@ static const char *getElfSegmentType(unsigned Arch, unsigned Type) {
LLVM_READOBJ_ENUM_CASE(ELF, PT_GNU_STACK);
LLVM_READOBJ_ENUM_CASE(ELF, PT_GNU_RELRO);
+ LLVM_READOBJ_ENUM_CASE(ELF, PT_GNU_PROPERTY);
LLVM_READOBJ_ENUM_CASE(ELF, PT_OPENBSD_RANDOMIZE);
LLVM_READOBJ_ENUM_CASE(ELF, PT_OPENBSD_WXNEEDED);
@@ -1261,6 +1615,7 @@ static std::string getElfPtType(unsigned Arch, unsigned Type) {
LLVM_READOBJ_PHDR_ENUM(ELF, PT_SUNW_UNWIND)
LLVM_READOBJ_PHDR_ENUM(ELF, PT_GNU_STACK)
LLVM_READOBJ_PHDR_ENUM(ELF, PT_GNU_RELRO)
+ LLVM_READOBJ_PHDR_ENUM(ELF, PT_GNU_PROPERTY)
default:
// All machine specific PT_* types
switch (Arch) {
@@ -1513,7 +1868,7 @@ void ELFDumper<ELFT>::loadDynamicTable(const ELFFile<ELFT> *Obj) {
if (!DynamicPhdr || !DynamicSec) {
if ((DynamicPhdr && IsPhdrTableValid) || (DynamicSec && IsSecTableValid)) {
DynamicTable = DynamicPhdr ? FromPhdr : FromSec;
- parseDynamicTable();
+ parseDynamicTable(Obj);
} else {
reportWarning(createError("no valid dynamic table was found"),
ObjF->getFileName());
@@ -1554,7 +1909,7 @@ void ELFDumper<ELFT>::loadDynamicTable(const ELFFile<ELFT> *Obj) {
DynamicTable = FromSec;
}
- parseDynamicTable();
+ parseDynamicTable(Obj);
}
template <typename ELFT>
@@ -1620,82 +1975,13 @@ ELFDumper<ELFT>::ELFDumper(const object::ELFObjectFile<ELFT> *ObjF,
ELFDumperStyle.reset(new LLVMStyle<ELFT>(Writer, this));
}
-static const char *getTypeString(unsigned Arch, uint64_t Type) {
-#define DYNAMIC_TAG(n, v)
- switch (Arch) {
-
- case EM_AARCH64:
- switch (Type) {
-#define AARCH64_DYNAMIC_TAG(name, value) \
- case DT_##name: \
- return #name;
-#include "llvm/BinaryFormat/DynamicTags.def"
-#undef AARCH64_DYNAMIC_TAG
- }
- break;
-
- case EM_HEXAGON:
- switch (Type) {
-#define HEXAGON_DYNAMIC_TAG(name, value) \
- case DT_##name: \
- return #name;
-#include "llvm/BinaryFormat/DynamicTags.def"
-#undef HEXAGON_DYNAMIC_TAG
- }
- break;
-
- case EM_MIPS:
- switch (Type) {
-#define MIPS_DYNAMIC_TAG(name, value) \
- case DT_##name: \
- return #name;
-#include "llvm/BinaryFormat/DynamicTags.def"
-#undef MIPS_DYNAMIC_TAG
- }
- break;
-
- case EM_PPC64:
- switch (Type) {
-#define PPC64_DYNAMIC_TAG(name, value) \
- case DT_##name: \
- return #name;
-#include "llvm/BinaryFormat/DynamicTags.def"
-#undef PPC64_DYNAMIC_TAG
- }
- break;
- }
-#undef DYNAMIC_TAG
- switch (Type) {
-// Now handle all dynamic tags except the architecture specific ones
-#define AARCH64_DYNAMIC_TAG(name, value)
-#define MIPS_DYNAMIC_TAG(name, value)
-#define HEXAGON_DYNAMIC_TAG(name, value)
-#define PPC64_DYNAMIC_TAG(name, value)
-// Also ignore marker tags such as DT_HIOS (maps to DT_VERNEEDNUM), etc.
-#define DYNAMIC_TAG_MARKER(name, value)
-#define DYNAMIC_TAG(name, value) \
- case DT_##name: \
- return #name;
-#include "llvm/BinaryFormat/DynamicTags.def"
-#undef DYNAMIC_TAG
-#undef AARCH64_DYNAMIC_TAG
-#undef MIPS_DYNAMIC_TAG
-#undef HEXAGON_DYNAMIC_TAG
-#undef PPC64_DYNAMIC_TAG
-#undef DYNAMIC_TAG_MARKER
- default:
- return "unknown";
- }
-}
-
-template <typename ELFT> void ELFDumper<ELFT>::parseDynamicTable() {
+template <typename ELFT>
+void ELFDumper<ELFT>::parseDynamicTable(const ELFFile<ELFT> *Obj) {
auto toMappedAddr = [&](uint64_t Tag, uint64_t VAddr) -> const uint8_t * {
auto MappedAddrOrError = ObjF->getELFFile()->toMappedAddr(VAddr);
if (!MappedAddrOrError) {
Error Err =
- createError("Unable to parse DT_" +
- Twine(getTypeString(
- ObjF->getELFFile()->getHeader()->e_machine, Tag)) +
+ createError("Unable to parse DT_" + Obj->getDynamicTagAsString(Tag) +
": " + llvm::toString(MappedAddrOrError.takeError()));
reportWarning(std::move(Err), ObjF->getFileName());
@@ -1851,6 +2137,10 @@ template <typename ELFT> void ELFDumper<ELFT>::printVersionInfo() {
SymbolVersionNeedSection);
}
+template <class ELFT> void ELFDumper<ELFT>::printDependentLibs() {
+ ELFDumperStyle->printDependentLibs(ObjF->getELFFile());
+}
+
template <class ELFT> void ELFDumper<ELFT>::printDynamicRelocations() {
ELFDumperStyle->printDynamicRelocations(ObjF->getELFFile());
}
@@ -2771,8 +3061,8 @@ template <class ELFT> void GNUStyle<ELFT>::printFileHeaders(const ELFO *Obj) {
OS << "\n";
Str = printEnum(e->e_ident[ELF::EI_OSABI], makeArrayRef(ElfOSABI));
printFields(OS, "OS/ABI:", Str);
- Str = "0x" + to_hexString(e->e_ident[ELF::EI_ABIVERSION]);
- printFields(OS, "ABI Version:", Str);
+ printFields(OS,
+ "ABI Version:", std::to_string(e->e_ident[ELF::EI_ABIVERSION]));
Str = printEnum(e->e_type, makeArrayRef(ElfObjectFileType));
printFields(OS, "Type:", Str);
Str = printEnum(e->e_machine, makeArrayRef(ElfMachineType));
@@ -3188,6 +3478,25 @@ static std::string getSectionTypeString(unsigned Arch, unsigned Type) {
return "";
}
+static void printSectionDescription(formatted_raw_ostream &OS,
+ unsigned EMachine) {
+ OS << "Key to Flags:\n";
+ OS << " W (write), A (alloc), X (execute), M (merge), S (strings), I "
+ "(info),\n";
+ OS << " L (link order), O (extra OS processing required), G (group), T "
+ "(TLS),\n";
+ OS << " C (compressed), x (unknown), o (OS specific), E (exclude),\n";
+
+ if (EMachine == EM_X86_64)
+ OS << " l (large), ";
+ else if (EMachine == EM_ARM)
+ OS << " y (purecode), ";
+ else
+ OS << " ";
+
+ OS << "p (processor specific)\n";
+}
+
template <class ELFT>
void GNUStyle<ELFT>::printSectionHeaders(const ELFO *Obj) {
unsigned Bias = ELFT::Is64Bits ? 0 : 8;
@@ -3218,7 +3527,7 @@ void GNUStyle<ELFT>::printSectionHeaders(const ELFO *Obj) {
Fields[4].Str = to_string(format_hex_no_prefix(Sec.sh_offset, 6));
Fields[5].Str = to_string(format_hex_no_prefix(Sec.sh_size, 6));
Fields[6].Str = to_string(format_hex_no_prefix(Sec.sh_entsize, 2));
- Fields[7].Str = getGNUFlags(Sec.sh_flags);
+ Fields[7].Str = getGNUFlags(Obj->getHeader()->e_machine, Sec.sh_flags);
Fields[8].Str = to_string(Sec.sh_link);
Fields[9].Str = to_string(Sec.sh_info);
Fields[10].Str = to_string(Sec.sh_addralign);
@@ -3238,13 +3547,7 @@ void GNUStyle<ELFT>::printSectionHeaders(const ELFO *Obj) {
OS << "\n";
++SectionIndex;
}
- OS << "Key to Flags:\n"
- << " W (write), A (alloc), X (execute), M (merge), S (strings), l "
- "(large)\n"
- << " I (info), L (link order), G (group), T (TLS), E (exclude),\
- x (unknown)\n"
- << " O (extra OS processing required) o (OS specific),\
- p (processor specific)\n";
+ printSectionDescription(OS, Obj->getHeader()->e_machine);
}
template <class ELFT>
@@ -3279,12 +3582,18 @@ std::string GNUStyle<ELFT>::getSymbolSectionNdx(const ELFO *Obj,
return "ABS";
case ELF::SHN_COMMON:
return "COM";
- case ELF::SHN_XINDEX:
- return to_string(format_decimal(
- unwrapOrError(this->FileName,
- object::getExtendedSymbolTableIndex<ELFT>(
- Symbol, FirstSym, this->dumper()->getShndxTable())),
- 3));
+ case ELF::SHN_XINDEX: {
+ Expected<uint32_t> IndexOrErr = object::getExtendedSymbolTableIndex<ELFT>(
+ Symbol, FirstSym, this->dumper()->getShndxTable());
+ if (!IndexOrErr) {
+ assert(Symbol->st_shndx == SHN_XINDEX &&
+ "getSymbolSectionIndex should only fail due to an invalid "
+ "SHT_SYMTAB_SHNDX table/reference");
+ this->reportUniqueWarning(IndexOrErr.takeError());
+ return "RSV[0xffff]";
+ }
+ return to_string(format_decimal(*IndexOrErr, 3));
+ }
default:
// Find if:
// Processor specific
@@ -3683,9 +3992,8 @@ template <class ELFT> void GNUStyle<ELFT>::printDynamic(const ELFO *Obj) {
OS << " Tag Type Name/Value\n";
for (auto Entry : Table) {
uintX_t Tag = Entry.getTag();
- std::string TypeString = std::string("(") +
- getTypeString(Obj->getHeader()->e_machine, Tag) +
- ")";
+ std::string TypeString =
+ std::string("(") + Obj->getDynamicTagAsString(Tag).c_str() + ")";
OS << " " << format_hex(Tag, Is64 ? 18 : 10)
<< format(" %-20s ", TypeString.c_str());
this->dumper()->printDynamicEntry(OS, Tag, Entry.getVal());
@@ -3762,18 +4070,29 @@ void GNUStyle<ELFT>::printDynamicRelocations(const ELFO *Obj) {
}
template <class ELFT>
-static void printGNUVersionSectionProlog(formatted_raw_ostream &OS,
- const Twine &Name, unsigned EntriesNum,
- const ELFFile<ELFT> *Obj,
- const typename ELFT::Shdr *Sec,
- StringRef FileName) {
- StringRef SecName = unwrapOrError(FileName, Obj->getSectionName(Sec));
- OS << Name << " section '" << SecName << "' "
+void GNUStyle<ELFT>::printGNUVersionSectionProlog(
+ const ELFFile<ELFT> *Obj, const typename ELFT::Shdr *Sec,
+ const Twine &Label, unsigned EntriesNum) {
+ StringRef SecName = unwrapOrError(this->FileName, Obj->getSectionName(Sec));
+ OS << Label << " section '" << SecName << "' "
<< "contains " << EntriesNum << " entries:\n";
- const typename ELFT::Shdr *SymTab =
- unwrapOrError(FileName, Obj->getSection(Sec->sh_link));
- StringRef SymTabName = unwrapOrError(FileName, Obj->getSectionName(SymTab));
+ unsigned SecNdx = Sec - &cantFail(Obj->sections()).front();
+ StringRef SymTabName = "<corrupt>";
+
+ Expected<const typename ELFT::Shdr *> SymTabOrErr =
+ Obj->getSection(Sec->sh_link);
+ if (SymTabOrErr)
+ SymTabName =
+ unwrapOrError(this->FileName, Obj->getSectionName(*SymTabOrErr));
+ else
+ this->reportUniqueWarning(
+ createError("invalid section linked to " +
+ object::getELFSectionTypeName(Obj->getHeader()->e_machine,
+ Sec->sh_type) +
+ " section with index " + Twine(SecNdx) + ": " +
+ toString(SymTabOrErr.takeError())));
+
OS << " Addr: " << format_hex_no_prefix(Sec->sh_addr, 16)
<< " Offset: " << format_hex(Sec->sh_offset, 8)
<< " Link: " << Sec->sh_link << " (" << SymTabName << ")\n";
@@ -3785,46 +4104,51 @@ void GNUStyle<ELFT>::printVersionSymbolSection(const ELFFile<ELFT> *Obj,
if (!Sec)
return;
- unsigned Entries = Sec->sh_size / sizeof(Elf_Versym);
- printGNUVersionSectionProlog(OS, "Version symbols", Entries, Obj, Sec,
- this->FileName);
+ printGNUVersionSectionProlog(Obj, Sec, "Version symbols",
+ Sec->sh_size / sizeof(Elf_Versym));
+ Expected<ArrayRef<Elf_Versym>> VerTableOrErr =
+ this->dumper()->getVersionTable(Sec, /*SymTab=*/nullptr,
+ /*StrTab=*/nullptr);
+ if (!VerTableOrErr) {
+ this->reportUniqueWarning(VerTableOrErr.takeError());
+ return;
+ }
+
+ ArrayRef<Elf_Versym> VerTable = *VerTableOrErr;
+ std::vector<StringRef> Versions;
+ for (size_t I = 0, E = VerTable.size(); I < E; ++I) {
+ unsigned Ndx = VerTable[I].vs_index;
+ if (Ndx == VER_NDX_LOCAL || Ndx == VER_NDX_GLOBAL) {
+ Versions.emplace_back(Ndx == VER_NDX_LOCAL ? "*local*" : "*global*");
+ continue;
+ }
- const uint8_t *VersymBuf =
- reinterpret_cast<const uint8_t *>(Obj->base() + Sec->sh_offset);
- const ELFDumper<ELFT> *Dumper = this->dumper();
- StringRef StrTable = Dumper->getDynamicStringTable();
+ bool IsDefault;
+ Expected<StringRef> NameOrErr =
+ this->dumper()->getSymbolVersionByIndex(Ndx, IsDefault);
+ if (!NameOrErr) {
+ if (!NameOrErr) {
+ unsigned SecNdx = Sec - &cantFail(Obj->sections()).front();
+ this->reportUniqueWarning(createError(
+ "unable to get a version for entry " + Twine(I) +
+ " of SHT_GNU_versym section with index " + Twine(SecNdx) + ": " +
+ toString(NameOrErr.takeError())));
+ }
+ Versions.emplace_back("<corrupt>");
+ continue;
+ }
+ Versions.emplace_back(*NameOrErr);
+ }
// readelf prints 4 entries per line.
+ uint64_t Entries = VerTable.size();
for (uint64_t VersymRow = 0; VersymRow < Entries; VersymRow += 4) {
OS << " " << format_hex_no_prefix(VersymRow, 3) << ":";
-
- for (uint64_t VersymIndex = 0;
- (VersymIndex < 4) && (VersymIndex + VersymRow) < Entries;
- ++VersymIndex) {
- const Elf_Versym *Versym =
- reinterpret_cast<const Elf_Versym *>(VersymBuf);
- switch (Versym->vs_index) {
- case 0:
- OS << " 0 (*local*) ";
- break;
- case 1:
- OS << " 1 (*global*) ";
- break;
- default:
- OS << format("%4x%c", Versym->vs_index & VERSYM_VERSION,
- Versym->vs_index & VERSYM_HIDDEN ? 'h' : ' ');
-
- bool IsDefault = true;
- std::string VersionName = Dumper->getSymbolVersionByIndex(
- StrTable, Versym->vs_index, IsDefault);
-
- if (!VersionName.empty())
- VersionName = "(" + VersionName + ")";
- else
- VersionName = "(*invalid*)";
- OS << left_justify(VersionName, 13);
- }
- VersymBuf += sizeof(Elf_Versym);
+ for (uint64_t I = 0; (I < 4) && (I + VersymRow) < Entries; ++I) {
+ unsigned Ndx = VerTable[VersymRow + I].vs_index;
+ OS << format("%4x%c", Ndx & VERSYM_VERSION,
+ Ndx & VERSYM_HIDDEN ? 'h' : ' ');
+ OS << left_justify("(" + std::string(Versions[VersymRow + I]) + ")", 13);
}
OS << '\n';
}
@@ -3858,42 +4182,25 @@ void GNUStyle<ELFT>::printVersionDefinitionSection(const ELFFile<ELFT> *Obj,
if (!Sec)
return;
- unsigned VerDefsNum = Sec->sh_info;
- printGNUVersionSectionProlog(OS, "Version definition", VerDefsNum, Obj, Sec,
- this->FileName);
+ printGNUVersionSectionProlog(Obj, Sec, "Version definition", Sec->sh_info);
- const Elf_Shdr *StrTabSec =
- unwrapOrError(this->FileName, Obj->getSection(Sec->sh_link));
- StringRef StringTable(
- reinterpret_cast<const char *>(Obj->base() + StrTabSec->sh_offset),
- (size_t)StrTabSec->sh_size);
-
- const uint8_t *VerdefBuf =
- unwrapOrError(this->FileName, Obj->getSectionContents(Sec)).data();
- const uint8_t *Begin = VerdefBuf;
-
- while (VerDefsNum--) {
- const Elf_Verdef *Verdef = reinterpret_cast<const Elf_Verdef *>(VerdefBuf);
- OS << format(" 0x%04x: Rev: %u Flags: %s Index: %u Cnt: %u",
- VerdefBuf - Begin, (unsigned)Verdef->vd_version,
- versionFlagToString(Verdef->vd_flags).c_str(),
- (unsigned)Verdef->vd_ndx, (unsigned)Verdef->vd_cnt);
-
- const uint8_t *VerdauxBuf = VerdefBuf + Verdef->vd_aux;
- const Elf_Verdaux *Verdaux =
- reinterpret_cast<const Elf_Verdaux *>(VerdauxBuf);
- OS << format(" Name: %s\n",
- StringTable.drop_front(Verdaux->vda_name).data());
-
- for (unsigned I = 1; I < Verdef->vd_cnt; ++I) {
- VerdauxBuf += Verdaux->vda_next;
- Verdaux = reinterpret_cast<const Elf_Verdaux *>(VerdauxBuf);
- OS << format(" 0x%04x: Parent %u: %s\n", VerdauxBuf - Begin, I,
- StringTable.drop_front(Verdaux->vda_name).data());
- }
+ Expected<std::vector<VerDef>> V = this->dumper()->getVersionDefinitions(Sec);
+ if (!V) {
+ this->reportUniqueWarning(V.takeError());
+ return;
+ }
- VerdefBuf += Verdef->vd_next;
+ for (const VerDef &Def : *V) {
+ OS << format(" 0x%04x: Rev: %u Flags: %s Index: %u Cnt: %u Name: %s\n",
+ Def.Offset, Def.Version,
+ versionFlagToString(Def.Flags).c_str(), Def.Ndx, Def.Cnt,
+ Def.Name.data());
+ unsigned I = 0;
+ for (const VerdAux &Aux : Def.AuxV)
+ OS << format(" 0x%04x: Parent %u: %s\n", Aux.Offset, ++I,
+ Aux.Name.data());
}
+
OS << '\n';
}
@@ -3904,42 +4211,22 @@ void GNUStyle<ELFT>::printVersionDependencySection(const ELFFile<ELFT> *Obj,
return;
unsigned VerneedNum = Sec->sh_info;
- printGNUVersionSectionProlog(OS, "Version needs", VerneedNum, Obj, Sec,
- this->FileName);
-
- ArrayRef<uint8_t> SecData =
- unwrapOrError(this->FileName, Obj->getSectionContents(Sec));
-
- const Elf_Shdr *StrTabSec =
- unwrapOrError(this->FileName, Obj->getSection(Sec->sh_link));
- StringRef StringTable = {
- reinterpret_cast<const char *>(Obj->base() + StrTabSec->sh_offset),
- (size_t)StrTabSec->sh_size};
-
- const uint8_t *VerneedBuf = SecData.data();
- for (unsigned I = 0; I < VerneedNum; ++I) {
- const Elf_Verneed *Verneed =
- reinterpret_cast<const Elf_Verneed *>(VerneedBuf);
+ printGNUVersionSectionProlog(Obj, Sec, "Version needs", VerneedNum);
- OS << format(" 0x%04x: Version: %u File: %s Cnt: %u\n",
- reinterpret_cast<const uint8_t *>(Verneed) - SecData.begin(),
- (unsigned)Verneed->vn_version,
- StringTable.drop_front(Verneed->vn_file).data(),
- (unsigned)Verneed->vn_cnt);
-
- const uint8_t *VernauxBuf = VerneedBuf + Verneed->vn_aux;
- for (unsigned J = 0; J < Verneed->vn_cnt; ++J) {
- const Elf_Vernaux *Vernaux =
- reinterpret_cast<const Elf_Vernaux *>(VernauxBuf);
+ Expected<std::vector<VerNeed>> V =
+ this->dumper()->getVersionDependencies(Sec);
+ if (!V) {
+ this->reportUniqueWarning(V.takeError());
+ return;
+ }
- OS << format(" 0x%04x: Name: %s Flags: %s Version: %u\n",
- reinterpret_cast<const uint8_t *>(Vernaux) - SecData.begin(),
- StringTable.drop_front(Vernaux->vna_name).data(),
- versionFlagToString(Vernaux->vna_flags).c_str(),
- (unsigned)Vernaux->vna_other);
- VernauxBuf += Vernaux->vna_next;
- }
- VerneedBuf += Verneed->vn_next;
+ for (const VerNeed &VN : *V) {
+ OS << format(" 0x%04x: Version: %u File: %s Cnt: %u\n", VN.Offset,
+ VN.Version, VN.File.data(), VN.Cnt);
+ for (const VernAux &Aux : VN.AuxV)
+ OS << format(" 0x%04x: Name: %s Flags: %s Version: %u\n", Aux.Offset,
+ Aux.Name.data(), versionFlagToString(Aux.Flags).c_str(),
+ Aux.Other);
}
OS << '\n';
}
@@ -4667,7 +4954,7 @@ void GNUStyle<ELFT>::printNotes(const ELFFile<ELFT> *Obj) {
continue;
PrintHeader(S.sh_offset, S.sh_size);
Error Err = Error::success();
- for (const auto &Note : Obj->notes(S, Err))
+ for (auto Note : Obj->notes(S, Err))
ProcessNote(Note);
if (Err)
reportError(std::move(Err), this->FileName);
@@ -4679,7 +4966,7 @@ void GNUStyle<ELFT>::printNotes(const ELFFile<ELFT> *Obj) {
continue;
PrintHeader(P.p_offset, P.p_filesz);
Error Err = Error::success();
- for (const auto &Note : Obj->notes(P, Err))
+ for (auto Note : Obj->notes(P, Err))
ProcessNote(Note);
if (Err)
reportError(std::move(Err), this->FileName);
@@ -4692,6 +4979,11 @@ void GNUStyle<ELFT>::printELFLinkerOptions(const ELFFile<ELFT> *Obj) {
OS << "printELFLinkerOptions not implemented!\n";
}
+template <class ELFT>
+void GNUStyle<ELFT>::printDependentLibs(const ELFFile<ELFT> *Obj) {
+ OS << "printDependentLibs not implemented!\n";
+}
+
// Used for printing section names in places where possible errors can be
// ignored.
static StringRef getSectionName(const SectionRef &Sec) {
@@ -5345,6 +5637,8 @@ void LLVMStyle<ELFT>::printSectionHeaders(const ELFO *Obj) {
int SectionIndex = -1;
ArrayRef<Elf_Shdr> Sections = unwrapOrError(this->FileName, Obj->sections());
const ELFObjectFile<ELFT> *ElfObj = this->dumper()->getElfObject();
+ std::vector<EnumEntry<unsigned>> FlagsList =
+ getSectionFlagsForTarget(Obj->getHeader()->e_machine);
for (const Elf_Shdr &Sec : Sections) {
StringRef Name = unwrapOrError(
ElfObj->getFileName(), Obj->getSectionName(&Sec, this->WarningHandler));
@@ -5355,35 +5649,7 @@ void LLVMStyle<ELFT>::printSectionHeaders(const ELFO *Obj) {
"Type",
object::getELFSectionTypeName(Obj->getHeader()->e_machine, Sec.sh_type),
Sec.sh_type);
- std::vector<EnumEntry<unsigned>> SectionFlags(std::begin(ElfSectionFlags),
- std::end(ElfSectionFlags));
- switch (Obj->getHeader()->e_machine) {
- case EM_ARM:
- SectionFlags.insert(SectionFlags.end(), std::begin(ElfARMSectionFlags),
- std::end(ElfARMSectionFlags));
- break;
- case EM_HEXAGON:
- SectionFlags.insert(SectionFlags.end(),
- std::begin(ElfHexagonSectionFlags),
- std::end(ElfHexagonSectionFlags));
- break;
- case EM_MIPS:
- SectionFlags.insert(SectionFlags.end(), std::begin(ElfMipsSectionFlags),
- std::end(ElfMipsSectionFlags));
- break;
- case EM_X86_64:
- SectionFlags.insert(SectionFlags.end(), std::begin(ElfX86_64SectionFlags),
- std::end(ElfX86_64SectionFlags));
- break;
- case EM_XCORE:
- SectionFlags.insert(SectionFlags.end(), std::begin(ElfXCoreSectionFlags),
- std::end(ElfXCoreSectionFlags));
- break;
- default:
- // Nothing to do.
- break;
- }
- W.printFlags("Flags", Sec.sh_flags, makeArrayRef(SectionFlags));
+ W.printFlags("Flags", Sec.sh_flags, makeArrayRef(FlagsList));
W.printHex("Address", Sec.sh_addr);
W.printHex("Offset", Sec.sh_offset);
W.printNumber("Size", Sec.sh_size);
@@ -5427,13 +5693,34 @@ void LLVMStyle<ELFT>::printSectionHeaders(const ELFO *Obj) {
}
template <class ELFT>
+void LLVMStyle<ELFT>::printSymbolSection(const Elf_Sym *Symbol,
+ const Elf_Sym *First) {
+ Expected<unsigned> SectionIndex =
+ this->dumper()->getSymbolSectionIndex(Symbol, First);
+ if (!SectionIndex) {
+ assert(Symbol->st_shndx == SHN_XINDEX &&
+ "getSymbolSectionIndex should only fail due to an invalid "
+ "SHT_SYMTAB_SHNDX table/reference");
+ this->reportUniqueWarning(SectionIndex.takeError());
+ W.printHex("Section", "Reserved", SHN_XINDEX);
+ return;
+ }
+
+ Expected<StringRef> SectionName =
+ this->dumper()->getSymbolSectionName(Symbol, *SectionIndex);
+ if (!SectionName) {
+ this->reportUniqueWarning(SectionName.takeError());
+ W.printHex("Section", "<?>", *SectionIndex);
+ } else {
+ W.printHex("Section", *SectionName, *SectionIndex);
+ }
+}
+
+template <class ELFT>
void LLVMStyle<ELFT>::printSymbol(const ELFO *Obj, const Elf_Sym *Symbol,
const Elf_Sym *First, StringRef StrTable,
bool IsDynamic,
bool /*NonVisibilityBitsUsed*/) {
- unsigned SectionIndex = 0;
- StringRef SectionName;
- this->dumper()->getSectionNameIndex(Symbol, First, SectionName, SectionIndex);
std::string FullSymbolName =
this->dumper()->getFullSymbolName(Symbol, StrTable, IsDynamic);
unsigned char SymbolType = Symbol->getType();
@@ -5470,7 +5757,7 @@ void LLVMStyle<ELFT>::printSymbol(const ELFO *Obj, const Elf_Sym *Symbol,
}
W.printFlags("Other", Symbol->st_other, makeArrayRef(SymOtherFlags), 0x3u);
}
- W.printHex("Section", SectionName, SectionIndex);
+ printSymbolSection(Symbol, First);
}
template <class ELFT>
@@ -5509,12 +5796,10 @@ template <class ELFT> void LLVMStyle<ELFT>::printDynamic(const ELFFile<ELFT> *Ob
for (auto Entry : Table) {
uintX_t Tag = Entry.getTag();
W.startLine() << " " << format_hex(Tag, Is64 ? 18 : 10, true) << " "
- << format("%-21s",
- getTypeString(Obj->getHeader()->e_machine, Tag));
+ << format("%-21s", Obj->getDynamicTagAsString(Tag).c_str());
this->dumper()->printDynamicEntry(OS, Tag, Entry.getVal());
OS << "\n";
}
-
W.startLine() << "]\n";
}
@@ -5619,20 +5904,23 @@ void LLVMStyle<ELFT>::printVersionSymbolSection(const ELFFile<ELFT> *Obj,
if (!Sec)
return;
- const uint8_t *VersymBuf =
- reinterpret_cast<const uint8_t *>(Obj->base() + Sec->sh_offset);
- const ELFDumper<ELFT> *Dumper = this->dumper();
- StringRef StrTable = Dumper->getDynamicStringTable();
+ StringRef StrTable;
+ ArrayRef<Elf_Sym> Syms;
+ Expected<ArrayRef<Elf_Versym>> VerTableOrErr =
+ this->dumper()->getVersionTable(Sec, &Syms, &StrTable);
+ if (!VerTableOrErr) {
+ this->reportUniqueWarning(VerTableOrErr.takeError());
+ return;
+ }
- // Same number of entries in the dynamic symbol table (DT_SYMTAB).
- for (const Elf_Sym &Sym : Dumper->dynamic_symbols()) {
+ if (StrTable.empty() || Syms.empty() || Syms.size() != VerTableOrErr->size())
+ return;
+
+ for (size_t I = 0, E = Syms.size(); I < E; ++I) {
DictScope S(W, "Symbol");
- const Elf_Versym *Versym = reinterpret_cast<const Elf_Versym *>(VersymBuf);
- std::string FullSymbolName =
- Dumper->getFullSymbolName(&Sym, StrTable, true /* IsDynamic */);
- W.printNumber("Version", Versym->vs_index & VERSYM_VERSION);
- W.printString("Name", FullSymbolName);
- VersymBuf += sizeof(Elf_Versym);
+ W.printNumber("Version", (*VerTableOrErr)[I].vs_index & VERSYM_VERSION);
+ W.printString("Name", this->dumper()->getFullSymbolName(
+ &Syms[I], StrTable, /*IsDynamic=*/true));
}
}
@@ -5643,44 +5931,22 @@ void LLVMStyle<ELFT>::printVersionDefinitionSection(const ELFFile<ELFT> *Obj,
if (!Sec)
return;
- const uint8_t *SecStartAddress =
- reinterpret_cast<const uint8_t *>(Obj->base() + Sec->sh_offset);
- const uint8_t *SecEndAddress = SecStartAddress + Sec->sh_size;
- const uint8_t *VerdefBuf = SecStartAddress;
- const Elf_Shdr *StrTab =
- unwrapOrError(this->FileName, Obj->getSection(Sec->sh_link));
-
- unsigned VerDefsNum = Sec->sh_info;
- while (VerDefsNum--) {
- if (VerdefBuf + sizeof(Elf_Verdef) > SecEndAddress)
- // FIXME: report_fatal_error is not a good way to report error. We should
- // emit a parsing error here and below.
- report_fatal_error("invalid offset in the section");
+ Expected<std::vector<VerDef>> V = this->dumper()->getVersionDefinitions(Sec);
+ if (!V) {
+ this->reportUniqueWarning(V.takeError());
+ return;
+ }
- const Elf_Verdef *Verdef = reinterpret_cast<const Elf_Verdef *>(VerdefBuf);
+ for (const VerDef &D : *V) {
DictScope Def(W, "Definition");
- W.printNumber("Version", Verdef->vd_version);
- W.printEnum("Flags", Verdef->vd_flags, makeArrayRef(SymVersionFlags));
- W.printNumber("Index", Verdef->vd_ndx);
- W.printNumber("Hash", Verdef->vd_hash);
- W.printString("Name", StringRef(reinterpret_cast<const char *>(
- Obj->base() + StrTab->sh_offset +
- Verdef->getAux()->vda_name)));
- if (!Verdef->vd_cnt)
- report_fatal_error("at least one definition string must exist");
- if (Verdef->vd_cnt > 2)
- report_fatal_error("more than one predecessor is not expected");
-
- if (Verdef->vd_cnt == 2) {
- const uint8_t *VerdauxBuf =
- VerdefBuf + Verdef->vd_aux + Verdef->getAux()->vda_next;
- const Elf_Verdaux *Verdaux =
- reinterpret_cast<const Elf_Verdaux *>(VerdauxBuf);
- W.printString("Predecessor",
- StringRef(reinterpret_cast<const char *>(
- Obj->base() + StrTab->sh_offset + Verdaux->vda_name)));
- }
- VerdefBuf += Verdef->vd_next;
+ W.printNumber("Version", D.Version);
+ W.printFlags("Flags", D.Flags, makeArrayRef(SymVersionFlags));
+ W.printNumber("Index", D.Ndx);
+ W.printNumber("Hash", D.Hash);
+ W.printString("Name", D.Name.c_str());
+ W.printList(
+ "Predecessors", D.AuxV,
+ [](raw_ostream &OS, const VerdAux &Aux) { OS << Aux.Name.c_str(); });
}
}
@@ -5691,38 +5957,27 @@ void LLVMStyle<ELFT>::printVersionDependencySection(const ELFFile<ELFT> *Obj,
if (!Sec)
return;
- const uint8_t *SecData =
- reinterpret_cast<const uint8_t *>(Obj->base() + Sec->sh_offset);
- const Elf_Shdr *StrTab =
- unwrapOrError(this->FileName, Obj->getSection(Sec->sh_link));
+ Expected<std::vector<VerNeed>> V =
+ this->dumper()->getVersionDependencies(Sec);
+ if (!V) {
+ this->reportUniqueWarning(V.takeError());
+ return;
+ }
- const uint8_t *VerneedBuf = SecData;
- unsigned VerneedNum = Sec->sh_info;
- for (unsigned I = 0; I < VerneedNum; ++I) {
- const Elf_Verneed *Verneed =
- reinterpret_cast<const Elf_Verneed *>(VerneedBuf);
+ for (const VerNeed &VN : *V) {
DictScope Entry(W, "Dependency");
- W.printNumber("Version", Verneed->vn_version);
- W.printNumber("Count", Verneed->vn_cnt);
- W.printString("FileName",
- StringRef(reinterpret_cast<const char *>(
- Obj->base() + StrTab->sh_offset + Verneed->vn_file)));
+ W.printNumber("Version", VN.Version);
+ W.printNumber("Count", VN.Cnt);
+ W.printString("FileName", VN.File.c_str());
- const uint8_t *VernauxBuf = VerneedBuf + Verneed->vn_aux;
ListScope L(W, "Entries");
- for (unsigned J = 0; J < Verneed->vn_cnt; ++J) {
- const Elf_Vernaux *Vernaux =
- reinterpret_cast<const Elf_Vernaux *>(VernauxBuf);
+ for (const VernAux &Aux : VN.AuxV) {
DictScope Entry(W, "Entry");
- W.printNumber("Hash", Vernaux->vna_hash);
- W.printEnum("Flags", Vernaux->vna_flags, makeArrayRef(SymVersionFlags));
- W.printNumber("Index", Vernaux->vna_other);
- W.printString("Name",
- StringRef(reinterpret_cast<const char *>(
- Obj->base() + StrTab->sh_offset + Vernaux->vna_name)));
- VernauxBuf += Vernaux->vna_next;
+ W.printNumber("Hash", Aux.Hash);
+ W.printFlags("Flags", Aux.Flags, makeArrayRef(SymVersionFlags));
+ W.printNumber("Index", Aux.Other);
+ W.printString("Name", Aux.Name.c_str());
}
- VerneedBuf += Verneed->vn_next;
}
}
@@ -5911,7 +6166,7 @@ void LLVMStyle<ELFT>::printNotes(const ELFFile<ELFT> *Obj) {
DictScope D(W, "NoteSection");
PrintHeader(S.sh_offset, S.sh_size);
Error Err = Error::success();
- for (const auto &Note : Obj->notes(S, Err))
+ for (auto Note : Obj->notes(S, Err))
ProcessNote(Note);
if (Err)
reportError(std::move(Err), this->FileName);
@@ -5924,7 +6179,7 @@ void LLVMStyle<ELFT>::printNotes(const ELFFile<ELFT> *Obj) {
DictScope D(W, "NoteSection");
PrintHeader(P.p_offset, P.p_filesz);
Error Err = Error::success();
- for (const auto &Note : Obj->notes(P, Err))
+ for (auto Note : Obj->notes(P, Err))
ProcessNote(Note);
if (Err)
reportError(std::move(Err), this->FileName);
@@ -5936,20 +6191,76 @@ template <class ELFT>
void LLVMStyle<ELFT>::printELFLinkerOptions(const ELFFile<ELFT> *Obj) {
ListScope L(W, "LinkerOptions");
+ unsigned I = -1;
for (const Elf_Shdr &Shdr : unwrapOrError(this->FileName, Obj->sections())) {
+ ++I;
if (Shdr.sh_type != ELF::SHT_LLVM_LINKER_OPTIONS)
continue;
ArrayRef<uint8_t> Contents =
unwrapOrError(this->FileName, Obj->getSectionContents(&Shdr));
- for (const uint8_t *P = Contents.begin(), *E = Contents.end(); P < E; ) {
- StringRef Key = StringRef(reinterpret_cast<const char *>(P));
- StringRef Value =
- StringRef(reinterpret_cast<const char *>(P) + Key.size() + 1);
+ if (Contents.empty())
+ continue;
+
+ if (Contents.back() != 0) {
+ reportWarning(createError("SHT_LLVM_LINKER_OPTIONS section at index " +
+ Twine(I) +
+ " is broken: the "
+ "content is not null-terminated"),
+ this->FileName);
+ continue;
+ }
+
+ SmallVector<StringRef, 16> Strings;
+ toStringRef(Contents.drop_back()).split(Strings, '\0');
+ if (Strings.size() % 2 != 0) {
+ reportWarning(
+ createError(
+ "SHT_LLVM_LINKER_OPTIONS section at index " + Twine(I) +
+ " is broken: an incomplete "
+ "key-value pair was found. The last possible key was: \"" +
+ Strings.back() + "\""),
+ this->FileName);
+ continue;
+ }
- W.printString(Key, Value);
+ for (size_t I = 0; I < Strings.size(); I += 2)
+ W.printString(Strings[I], Strings[I + 1]);
+ }
+}
+
+template <class ELFT>
+void LLVMStyle<ELFT>::printDependentLibs(const ELFFile<ELFT> *Obj) {
+ ListScope L(W, "DependentLibs");
+
+ auto Warn = [this](unsigned SecNdx, StringRef Msg) {
+ this->reportUniqueWarning(
+ createError("SHT_LLVM_DEPENDENT_LIBRARIES section at index " +
+ Twine(SecNdx) + " is broken: " + Msg));
+ };
+
+ unsigned I = -1;
+ for (const Elf_Shdr &Shdr : unwrapOrError(this->FileName, Obj->sections())) {
+ ++I;
+ if (Shdr.sh_type != ELF::SHT_LLVM_DEPENDENT_LIBRARIES)
+ continue;
- P = P + Key.size() + Value.size() + 2;
+ Expected<ArrayRef<uint8_t>> ContentsOrErr = Obj->getSectionContents(&Shdr);
+ if (!ContentsOrErr) {
+ Warn(I, toString(ContentsOrErr.takeError()));
+ continue;
+ }
+
+ ArrayRef<uint8_t> Contents = *ContentsOrErr;
+ if (!Contents.empty() && Contents.back() != 0) {
+ Warn(I, "the content is not null-terminated");
+ continue;
+ }
+
+ for (const uint8_t *I = Contents.begin(), *E = Contents.end(); I < E;) {
+ StringRef Lib((const char *)I);
+ W.printString(Lib);
+ I += Lib.size() + 1;
}
}
}
@@ -6016,13 +6327,7 @@ void LLVMStyle<ELFT>::printMipsGOT(const MipsGOTParser<ELFT> &Parser) {
const Elf_Sym *Sym = Parser.getGotSym(&E);
W.printHex("Value", Sym->st_value);
W.printEnum("Type", Sym->getType(), makeArrayRef(ElfSymbolTypes));
-
- unsigned SectionIndex = 0;
- StringRef SectionName;
- this->dumper()->getSectionNameIndex(
- Sym, this->dumper()->dynamic_symbols().begin(), SectionName,
- SectionIndex);
- W.printHex("Section", SectionName, SectionIndex);
+ printSymbolSection(Sym, this->dumper()->dynamic_symbols().begin());
std::string SymName = this->dumper()->getFullSymbolName(
Sym, this->dumper()->getDynamicStringTable(), true);
@@ -6066,13 +6371,7 @@ void LLVMStyle<ELFT>::printMipsPLT(const MipsGOTParser<ELFT> &Parser) {
const Elf_Sym *Sym = Parser.getPltSym(&E);
W.printHex("Value", Sym->st_value);
W.printEnum("Type", Sym->getType(), makeArrayRef(ElfSymbolTypes));
-
- unsigned SectionIndex = 0;
- StringRef SectionName;
- this->dumper()->getSectionNameIndex(
- Sym, this->dumper()->dynamic_symbols().begin(), SectionName,
- SectionIndex);
- W.printHex("Section", SectionName, SectionIndex);
+ printSymbolSection(Sym, this->dumper()->dynamic_symbols().begin());
std::string SymName =
this->dumper()->getFullSymbolName(Sym, Parser.getPltStrTable(), true);
diff --git a/contrib/llvm-project/llvm/tools/llvm-readobj/ObjDumper.cpp b/contrib/llvm-project/llvm/tools/llvm-readobj/ObjDumper.cpp
index 9e5ebd99ac37..6229b52693d8 100644
--- a/contrib/llvm-project/llvm/tools/llvm-readobj/ObjDumper.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-readobj/ObjDumper.cpp
@@ -65,7 +65,7 @@ getSectionRefsByNameOrIndex(const object::ObjectFile *Obj,
SecIndex++;
}
- for (const std::pair<std::string, bool> &S : SecNames)
+ for (const std::pair<const std::string, bool> &S : SecNames)
if (!S.second)
reportWarning(
createError(formatv("could not find section '{0}'", S.first).str()),
diff --git a/contrib/llvm-project/llvm/tools/llvm-readobj/ObjDumper.h b/contrib/llvm-project/llvm/tools/llvm-readobj/ObjDumper.h
index 2ba441342499..3fc8d3e79ac1 100644
--- a/contrib/llvm-project/llvm/tools/llvm-readobj/ObjDumper.h
+++ b/contrib/llvm-project/llvm/tools/llvm-readobj/ObjDumper.h
@@ -53,6 +53,7 @@ public:
virtual void printUnwindInfo() = 0;
// Only implemented for ELF at this time.
+ virtual void printDependentLibs() {}
virtual void printDynamicRelocations() { }
virtual void printDynamicTable() { }
virtual void printNeededLibraries() { }
diff --git a/contrib/llvm-project/llvm/tools/llvm-readobj/Win64EHDumper.cpp b/contrib/llvm-project/llvm/tools/llvm-readobj/Win64EHDumper.cpp
index fa268ce9d434..380baae2eeb4 100644
--- a/contrib/llvm-project/llvm/tools/llvm-readobj/Win64EHDumper.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-readobj/Win64EHDumper.cpp
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "Win64EHDumper.h"
+#include "Error.h"
#include "llvm-readobj.h"
#include "llvm/Object/COFF.h"
#include "llvm/Support/ErrorHandling.h"
@@ -111,6 +112,20 @@ static unsigned getNumUsedSlots(const UnwindCode &UnwindCode) {
}
}
+static std::error_code getSymbol(const COFFObjectFile &COFF, uint64_t VA,
+ object::SymbolRef &Sym) {
+ for (const auto &Symbol : COFF.symbols()) {
+ Expected<uint64_t> Address = Symbol.getAddress();
+ if (!Address)
+ return errorToErrorCode(Address.takeError());
+ if (*Address == VA) {
+ Sym = Symbol;
+ return readobj_error::success;
+ }
+ }
+ return readobj_error::unknown_symbol;
+}
+
static std::string formatSymbol(const Dumper::Context &Ctx,
const coff_section *Section, uint64_t Offset,
uint32_t Displacement) {
@@ -131,9 +146,22 @@ static std::string formatSymbol(const Dumper::Context &Ctx,
// TODO: Actually report errors helpfully.
consumeError(Name.takeError());
}
+ } else if (!getSymbol(Ctx.COFF, Ctx.COFF.getImageBase() + Displacement,
+ Symbol)) {
+ Expected<StringRef> Name = Symbol.getName();
+ if (Name) {
+ OS << *Name;
+ OS << format(" (0x%" PRIX64 ")", Ctx.COFF.getImageBase() + Displacement);
+ return OS.str();
+ } else {
+ consumeError(Name.takeError());
+ }
}
- OS << format(" (0x%" PRIX64 ")", Offset);
+ if (Displacement > 0)
+ OS << format("(0x%" PRIX64 ")", Ctx.COFF.getImageBase() + Displacement);
+ else
+ OS << format("(0x%" PRIX64 ")", Offset);
return OS.str();
}
@@ -159,6 +187,18 @@ static std::error_code resolveRelocation(const Dumper::Context &Ctx,
return std::error_code();
}
+static const object::coff_section *
+getSectionContaining(const COFFObjectFile &COFF, uint64_t VA) {
+ for (const auto &Section : COFF.sections()) {
+ uint64_t Address = Section.getAddress();
+ uint64_t Size = Section.getSize();
+
+ if (VA >= Address && (VA - Address) <= Size)
+ return COFF.getCOFFSection(Section);
+ }
+ return nullptr;
+}
+
namespace llvm {
namespace Win64EH {
void Dumper::printRuntimeFunctionEntry(const Context &Ctx,
@@ -284,9 +324,18 @@ void Dumper::printRuntimeFunction(const Context &Ctx,
DictScope RFS(SW, "RuntimeFunction");
printRuntimeFunctionEntry(Ctx, Section, SectionOffset, RF);
- const coff_section *XData;
+ const coff_section *XData = nullptr;
uint64_t Offset;
resolveRelocation(Ctx, Section, SectionOffset + 8, XData, Offset);
+ Offset = Offset + RF.UnwindInfoOffset;
+
+ if (!XData) {
+ uint64_t Address = Ctx.COFF.getImageBase() + RF.UnwindInfoOffset;
+ XData = getSectionContaining(Ctx.COFF, Address);
+ if (!XData)
+ return;
+ Offset = RF.UnwindInfoOffset - XData->VirtualAddress;
+ }
ArrayRef<uint8_t> Contents;
if (Error E = Ctx.COFF.getSectionContents(XData, Contents))
@@ -295,7 +344,6 @@ void Dumper::printRuntimeFunction(const Context &Ctx,
if (Contents.empty())
return;
- Offset = Offset + RF.UnwindInfoOffset;
if (Offset > Contents.size())
return;
diff --git a/contrib/llvm-project/llvm/tools/llvm-readobj/XCOFFDumper.cpp b/contrib/llvm-project/llvm/tools/llvm-readobj/XCOFFDumper.cpp
index fe95b6d1b494..1f9403665594 100644
--- a/contrib/llvm-project/llvm/tools/llvm-readobj/XCOFFDumper.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-readobj/XCOFFDumper.cpp
@@ -49,13 +49,6 @@ private:
void printCsectAuxEnt32(const XCOFFCsectAuxEnt32 *AuxEntPtr);
void printSectAuxEntForStat(const XCOFFSectAuxEntForStat *AuxEntPtr);
void printSymbol(const SymbolRef &);
-
- // Least significant 3 bits are reserved.
- static constexpr unsigned SectionFlagsReservedMask = 0x7;
-
- // The low order 16 bits of section flags denotes the section type.
- static constexpr unsigned SectionFlagsTypeMask = 0xffffu;
-
void printRelocations(ArrayRef<XCOFFSectionHeader32> Sections);
const XCOFFObjectFile &Obj;
};
@@ -496,8 +489,7 @@ void XCOFFDumper::printSectionHeaders(ArrayRef<T> Sections) {
DictScope SecDS(W, "Section");
W.printNumber("Index", Index++);
-
- uint16_t SectionType = Sec.Flags & SectionFlagsTypeMask;
+ uint16_t SectionType = Sec.getSectionType();
switch (SectionType) {
case XCOFF::STYP_OVRFLO:
printOverflowSectionHeader(Sec);
@@ -513,8 +505,7 @@ void XCOFFDumper::printSectionHeaders(ArrayRef<T> Sections) {
printGenericSectionHeader(Sec);
break;
}
- // For now we just dump the section type portion of the flags.
- if (SectionType & SectionFlagsReservedMask)
+ if (Sec.isReservedSectionType())
W.printHex("Flags", "Reserved", SectionType);
else
W.printEnum("Type", SectionType, makeArrayRef(SectionTypeFlagsNames));
diff --git a/contrib/llvm-project/llvm/tools/llvm-readobj/llvm-readobj.cpp b/contrib/llvm-project/llvm/tools/llvm-readobj/llvm-readobj.cpp
index 4db13897879d..fadeec1072d6 100644
--- a/contrib/llvm-project/llvm/tools/llvm-readobj/llvm-readobj.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-readobj/llvm-readobj.cpp
@@ -58,6 +58,11 @@ namespace opts {
"--section-groups and --elf-hash-histogram."));
cl::alias AllShort("a", cl::desc("Alias for --all"), cl::aliasopt(All));
+ // --dependent-libraries
+ cl::opt<bool>
+ DependentLibraries("dependent-libraries",
+ cl::desc("Display the dependent libraries section"));
+
// --headers -e
cl::opt<bool>
Headers("headers",
@@ -366,7 +371,6 @@ LLVM_ATTRIBUTE_NORETURN static void error(Twine Msg) {
// Flush the standard output to print the error at a
// proper place.
fouts().flush();
- errs() << "\n";
WithColor::error(errs(), ToolName) << Msg << "\n";
exit(1);
}
@@ -390,7 +394,6 @@ void reportWarning(Error Err, StringRef Input) {
fouts().flush();
handleAllErrors(
createFileError(Input, std::move(Err)), [&](const ErrorInfoBase &EI) {
- errs() << "\n";
WithColor::warning(errs(), ToolName) << EI.message() << "\n";
});
}
@@ -489,6 +492,8 @@ static void dumpObject(const ObjectFile *Obj, ScopedPrinter &Writer,
if (opts::VersionInfo)
Dumper->printVersionInfo();
if (Obj->isELF()) {
+ if (opts::DependentLibraries)
+ Dumper->printDependentLibs();
if (opts::ELFLinkerOptions)
Dumper->printELFLinkerOptions();
if (opts::ArchSpecificInfo)
diff --git a/contrib/llvm-project/llvm/tools/llvm-rtdyld/llvm-rtdyld.cpp b/contrib/llvm-project/llvm/tools/llvm-rtdyld/llvm-rtdyld.cpp
index 3a36e7709483..9b84c46d3901 100644
--- a/contrib/llvm-project/llvm/tools/llvm-rtdyld/llvm-rtdyld.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-rtdyld/llvm-rtdyld.cpp
@@ -23,6 +23,7 @@
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCTargetOptions.h"
#include "llvm/Object/SymbolSize.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/DynamicLibrary.h"
@@ -749,7 +750,9 @@ static int linkAndVerify() {
if (!MRI)
ErrorAndExit("Unable to create target register info!");
- std::unique_ptr<MCAsmInfo> MAI(TheTarget->createMCAsmInfo(*MRI, TripleName));
+ MCTargetOptions MCOptions;
+ std::unique_ptr<MCAsmInfo> MAI(
+ TheTarget->createMCAsmInfo(*MRI, TripleName, MCOptions));
if (!MAI)
ErrorAndExit("Unable to create target asm info!");
diff --git a/contrib/llvm-project/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp b/contrib/llvm-project/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
index 54ce87d47979..96b2b72d8ba1 100644
--- a/contrib/llvm-project/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
@@ -24,6 +24,7 @@
#include "llvm/Support/InitLLVM.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
#include <cstdio>
#include <cstring>
#include <string>
@@ -150,6 +151,12 @@ static cl::opt<std::string>
ClFallbackDebugPath("fallback-debug-path", cl::init(""),
cl::desc("Fallback path for debug binaries."));
+static cl::list<std::string>
+ ClDebugFileDirectory("debug-file-directory", cl::ZeroOrMore,
+ cl::value_desc("dir"),
+ cl::desc("Path to directory where to look for debug "
+ "files."));
+
static cl::opt<DIPrinter::OutputStyle>
ClOutputStyle("output-style", cl::init(DIPrinter::OutputStyle::LLVM),
cl::desc("Specify print style"),
@@ -222,7 +229,7 @@ static void symbolizeInput(StringRef InputString, LLVMSymbolizer &Symbolizer,
std::string ModuleName;
uint64_t Offset = 0;
if (!parseCommand(StringRef(InputString), Cmd, ModuleName, Offset)) {
- outs() << InputString;
+ outs() << InputString << "\n";
return;
}
@@ -283,8 +290,10 @@ int main(int argc, char **argv) {
}
llvm::sys::InitializeCOMRAII COM(llvm::sys::COMThreadingMode::MultiThreaded);
- cl::ParseCommandLineOptions(argc, argv, IsAddr2Line ? "llvm-addr2line\n"
- : "llvm-symbolizer\n");
+ cl::ParseCommandLineOptions(
+ argc, argv, IsAddr2Line ? "llvm-addr2line\n" : "llvm-symbolizer\n",
+ /*Errs=*/nullptr,
+ IsAddr2Line ? "LLVM_ADDR2LINE_OPTS" : "LLVM_SYMBOLIZER_OPTS");
// If both --demangle and --no-demangle are specified then pick the last one.
if (ClNoDemangle.getPosition() > ClDemangle.getPosition())
@@ -299,6 +308,7 @@ int main(int argc, char **argv) {
Opts.DefaultArch = ClDefaultArch;
Opts.FallbackDebugPath = ClFallbackDebugPath;
Opts.DWPName = ClDwpName;
+ Opts.DebugFileDirectory = ClDebugFileDirectory;
for (const auto &hint : ClDsymHint) {
if (sys::path::extension(hint) == ".dSYM") {
@@ -319,7 +329,13 @@ int main(int argc, char **argv) {
char InputString[kMaxInputStringLength];
while (fgets(InputString, sizeof(InputString), stdin)) {
- symbolizeInput(InputString, Symbolizer, Printer);
+ // Strip newline characters.
+ std::string StrippedInputString(InputString);
+ StrippedInputString.erase(
+ std::remove_if(StrippedInputString.begin(), StrippedInputString.end(),
+ [](char c) { return c == '\r' || c == '\n'; }),
+ StrippedInputString.end());
+ symbolizeInput(StrippedInputString, Symbolizer, Printer);
outs().flush();
}
} else {
diff --git a/contrib/llvm-project/llvm/tools/llvm-xray/xray-account.cpp b/contrib/llvm-project/llvm/tools/llvm-xray/xray-account.cpp
index e37cd212377a..fcac33b23d4d 100644
--- a/contrib/llvm-project/llvm/tools/llvm-xray/xray-account.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-xray/xray-account.cpp
@@ -34,23 +34,20 @@ static cl::opt<bool>
AccountKeepGoing("keep-going", cl::desc("Keep going on errors encountered"),
cl::sub(Account), cl::init(false));
static cl::alias AccountKeepGoing2("k", cl::aliasopt(AccountKeepGoing),
- cl::desc("Alias for -keep_going"),
- cl::sub(Account));
+ cl::desc("Alias for -keep_going"));
static cl::opt<bool> AccountDeduceSiblingCalls(
"deduce-sibling-calls",
cl::desc("Deduce sibling calls when unrolling function call stacks"),
cl::sub(Account), cl::init(false));
static cl::alias
AccountDeduceSiblingCalls2("d", cl::aliasopt(AccountDeduceSiblingCalls),
- cl::desc("Alias for -deduce_sibling_calls"),
- cl::sub(Account));
+ cl::desc("Alias for -deduce_sibling_calls"));
static cl::opt<std::string>
AccountOutput("output", cl::value_desc("output file"), cl::init("-"),
cl::desc("output file; use '-' for stdout"),
cl::sub(Account));
static cl::alias AccountOutput2("o", cl::aliasopt(AccountOutput),
- cl::desc("Alias for -output"),
- cl::sub(Account));
+ cl::desc("Alias for -output"));
enum class AccountOutputFormats { TEXT, CSV };
static cl::opt<AccountOutputFormats>
AccountOutputFormat("format", cl::desc("output format"),
@@ -60,8 +57,7 @@ static cl::opt<AccountOutputFormats>
"report stats in csv")),
cl::sub(Account));
static cl::alias AccountOutputFormat2("f", cl::desc("Alias of -format"),
- cl::aliasopt(AccountOutputFormat),
- cl::sub(Account));
+ cl::aliasopt(AccountOutputFormat));
enum class SortField {
FUNCID,
@@ -88,8 +84,7 @@ static cl::opt<SortField> AccountSortOutput(
clEnumValN(SortField::SUM, "sum", "sum of call durations"),
clEnumValN(SortField::FUNC, "func", "function names")));
static cl::alias AccountSortOutput2("s", cl::aliasopt(AccountSortOutput),
- cl::desc("Alias for -sort"),
- cl::sub(Account));
+ cl::desc("Alias for -sort"));
enum class SortDirection {
ASCENDING,
@@ -101,14 +96,13 @@ static cl::opt<SortDirection> AccountSortOrder(
clEnumValN(SortDirection::DESCENDING, "dsc", "descending")),
cl::sub(Account));
static cl::alias AccountSortOrder2("r", cl::aliasopt(AccountSortOrder),
- cl::desc("Alias for -sortorder"),
- cl::sub(Account));
+ cl::desc("Alias for -sortorder"));
static cl::opt<int> AccountTop("top", cl::desc("only show the top N results"),
cl::value_desc("N"), cl::sub(Account),
cl::init(-1));
static cl::alias AccountTop2("p", cl::desc("Alias for -top"),
- cl::aliasopt(AccountTop), cl::sub(Account));
+ cl::aliasopt(AccountTop));
static cl::opt<std::string>
AccountInstrMap("instr_map",
@@ -117,8 +111,7 @@ static cl::opt<std::string>
cl::value_desc("binary with xray_instr_map"),
cl::sub(Account), cl::init(""));
static cl::alias AccountInstrMap2("m", cl::aliasopt(AccountInstrMap),
- cl::desc("Alias for -instr_map"),
- cl::sub(Account));
+ cl::desc("Alias for -instr_map"));
namespace {
diff --git a/contrib/llvm-project/llvm/tools/llvm-xray/xray-converter.cpp b/contrib/llvm-project/llvm/tools/llvm-xray/xray-converter.cpp
index 7258245b95cc..c1a623f0f858 100644
--- a/contrib/llvm-project/llvm/tools/llvm-xray/xray-converter.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-xray/xray-converter.cpp
@@ -43,23 +43,20 @@ static cl::opt<ConvertFormats> ConvertOutputFormat(
"May be visualized with the Catapult trace viewer.")),
cl::sub(Convert));
static cl::alias ConvertOutputFormat2("f", cl::aliasopt(ConvertOutputFormat),
- cl::desc("Alias for -output-format"),
- cl::sub(Convert));
+ cl::desc("Alias for -output-format"));
static cl::opt<std::string>
ConvertOutput("output", cl::value_desc("output file"), cl::init("-"),
cl::desc("output file; use '-' for stdout"),
cl::sub(Convert));
static cl::alias ConvertOutput2("o", cl::aliasopt(ConvertOutput),
- cl::desc("Alias for -output"),
- cl::sub(Convert));
+ cl::desc("Alias for -output"));
static cl::opt<bool>
ConvertSymbolize("symbolize",
cl::desc("symbolize function ids from the input log"),
cl::init(false), cl::sub(Convert));
static cl::alias ConvertSymbolize2("y", cl::aliasopt(ConvertSymbolize),
- cl::desc("Alias for -symbolize"),
- cl::sub(Convert));
+ cl::desc("Alias for -symbolize"));
static cl::opt<std::string>
ConvertInstrMap("instr_map",
@@ -68,15 +65,13 @@ static cl::opt<std::string>
cl::value_desc("binary with xray_instr_map"),
cl::sub(Convert), cl::init(""));
static cl::alias ConvertInstrMap2("m", cl::aliasopt(ConvertInstrMap),
- cl::desc("Alias for -instr_map"),
- cl::sub(Convert));
+ cl::desc("Alias for -instr_map"));
static cl::opt<bool> ConvertSortInput(
"sort",
cl::desc("determines whether to sort input log records by timestamp"),
cl::sub(Convert), cl::init(true));
static cl::alias ConvertSortInput2("s", cl::aliasopt(ConvertSortInput),
- cl::desc("Alias for -sort"),
- cl::sub(Convert));
+ cl::desc("Alias for -sort"));
using llvm::yaml::Output;
diff --git a/contrib/llvm-project/llvm/tools/llvm-xray/xray-extract.cpp b/contrib/llvm-project/llvm/tools/llvm-xray/xray-extract.cpp
index 7800b88d9eeb..af9255af21c3 100644
--- a/contrib/llvm-project/llvm/tools/llvm-xray/xray-extract.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-xray/xray-extract.cpp
@@ -38,15 +38,13 @@ static cl::opt<std::string>
cl::desc("output file; use '-' for stdout"),
cl::sub(Extract));
static cl::alias ExtractOutput2("o", cl::aliasopt(ExtractOutput),
- cl::desc("Alias for -output"),
- cl::sub(Extract));
+ cl::desc("Alias for -output"));
static cl::opt<bool> ExtractSymbolize("symbolize", cl::value_desc("symbolize"),
cl::init(false),
cl::desc("symbolize functions"),
cl::sub(Extract));
static cl::alias ExtractSymbolize2("s", cl::aliasopt(ExtractSymbolize),
- cl::desc("alias for -symbolize"),
- cl::sub(Extract));
+ cl::desc("alias for -symbolize"));
namespace {
diff --git a/contrib/llvm-project/llvm/tools/llvm-xray/xray-graph-diff.cpp b/contrib/llvm-project/llvm/tools/llvm-xray/xray-graph-diff.cpp
index 116aa6869ec1..a1bca326930e 100644
--- a/contrib/llvm-project/llvm/tools/llvm-xray/xray-graph-diff.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-xray/xray-graph-diff.cpp
@@ -41,22 +41,19 @@ static cl::opt<bool>
cl::desc("Keep going on errors encountered"),
cl::sub(GraphDiff), cl::init(false));
static cl::alias GraphDiffKeepGoingA("k", cl::aliasopt(GraphDiffKeepGoing),
- cl::desc("Alias for -keep-going"),
- cl::sub(GraphDiff));
+ cl::desc("Alias for -keep-going"));
static cl::opt<bool>
GraphDiffKeepGoing1("keep-going-1",
cl::desc("Keep going on errors encountered in trace 1"),
cl::sub(GraphDiff), cl::init(false));
static cl::alias GraphDiffKeepGoing1A("k1", cl::aliasopt(GraphDiffKeepGoing1),
- cl::desc("Alias for -keep-going-1"),
- cl::sub(GraphDiff));
+ cl::desc("Alias for -keep-going-1"));
static cl::opt<bool>
GraphDiffKeepGoing2("keep-going-2",
cl::desc("Keep going on errors encountered in trace 2"),
cl::sub(GraphDiff), cl::init(false));
static cl::alias GraphDiffKeepGoing2A("k2", cl::aliasopt(GraphDiffKeepGoing2),
- cl::desc("Alias for -keep-going-2"),
- cl::sub(GraphDiff));
+ cl::desc("Alias for -keep-going-2"));
static cl::opt<std::string>
GraphDiffInstrMap("instr-map",
@@ -65,8 +62,7 @@ static cl::opt<std::string>
cl::value_desc("binary with xray_instr_map or yaml"),
cl::sub(GraphDiff), cl::init(""));
static cl::alias GraphDiffInstrMapA("m", cl::aliasopt(GraphDiffInstrMap),
- cl::desc("Alias for -instr-map"),
- cl::sub(GraphDiff));
+ cl::desc("Alias for -instr-map"));
static cl::opt<std::string>
GraphDiffInstrMap1("instr-map-1",
cl::desc("binary with the instrumentation map, or "
@@ -74,8 +70,7 @@ static cl::opt<std::string>
cl::value_desc("binary with xray_instr_map or yaml"),
cl::sub(GraphDiff), cl::init(""));
static cl::alias GraphDiffInstrMap1A("m1", cl::aliasopt(GraphDiffInstrMap1),
- cl::desc("Alias for -instr-map-1"),
- cl::sub(GraphDiff));
+ cl::desc("Alias for -instr-map-1"));
static cl::opt<std::string>
GraphDiffInstrMap2("instr-map-2",
cl::desc("binary with the instrumentation map, or "
@@ -83,8 +78,7 @@ static cl::opt<std::string>
cl::value_desc("binary with xray_instr_map or yaml"),
cl::sub(GraphDiff), cl::init(""));
static cl::alias GraphDiffInstrMap2A("m2", cl::aliasopt(GraphDiffInstrMap2),
- cl::desc("Alias for -instr-map-2"),
- cl::sub(GraphDiff));
+ cl::desc("Alias for -instr-map-2"));
static cl::opt<bool> GraphDiffDeduceSiblingCalls(
"deduce-sibling-calls",
@@ -92,22 +86,21 @@ static cl::opt<bool> GraphDiffDeduceSiblingCalls(
cl::sub(GraphDiff), cl::init(false));
static cl::alias
GraphDiffDeduceSiblingCallsA("d", cl::aliasopt(GraphDiffDeduceSiblingCalls),
- cl::desc("Alias for -deduce-sibling-calls"),
- cl::sub(GraphDiff));
+ cl::desc("Alias for -deduce-sibling-calls"));
static cl::opt<bool> GraphDiffDeduceSiblingCalls1(
"deduce-sibling-calls-1",
cl::desc("Deduce sibling calls when unrolling function call stacks"),
cl::sub(GraphDiff), cl::init(false));
static cl::alias GraphDiffDeduceSiblingCalls1A(
"d1", cl::aliasopt(GraphDiffDeduceSiblingCalls1),
- cl::desc("Alias for -deduce-sibling-calls-1"), cl::sub(GraphDiff));
+ cl::desc("Alias for -deduce-sibling-calls-1"));
static cl::opt<bool> GraphDiffDeduceSiblingCalls2(
"deduce-sibling-calls-2",
cl::desc("Deduce sibling calls when unrolling function call stacks"),
cl::sub(GraphDiff), cl::init(false));
static cl::alias GraphDiffDeduceSiblingCalls2A(
"d2", cl::aliasopt(GraphDiffDeduceSiblingCalls2),
- cl::desc("Alias for -deduce-sibling-calls-2"), cl::sub(GraphDiff));
+ cl::desc("Alias for -deduce-sibling-calls-2"));
static cl::opt<GraphRenderer::StatType> GraphDiffEdgeLabel(
"edge-label", cl::desc("Output graphs with edges labeled with this field"),
@@ -130,8 +123,7 @@ static cl::opt<GraphRenderer::StatType> GraphDiffEdgeLabel(
clEnumValN(GraphRenderer::StatType::SUM, "sum",
"sum of call durations")));
static cl::alias GraphDiffEdgeLabelA("e", cl::aliasopt(GraphDiffEdgeLabel),
- cl::desc("Alias for -edge-label"),
- cl::sub(GraphDiff));
+ cl::desc("Alias for -edge-label"));
static cl::opt<GraphRenderer::StatType> GraphDiffEdgeColor(
"edge-color", cl::desc("Output graphs with edges colored by this field"),
@@ -154,8 +146,7 @@ static cl::opt<GraphRenderer::StatType> GraphDiffEdgeColor(
clEnumValN(GraphRenderer::StatType::SUM, "sum",
"sum of call durations")));
static cl::alias GraphDiffEdgeColorA("c", cl::aliasopt(GraphDiffEdgeColor),
- cl::desc("Alias for -edge-color"),
- cl::sub(GraphDiff));
+ cl::desc("Alias for -edge-color"));
static cl::opt<GraphRenderer::StatType> GraphDiffVertexLabel(
"vertex-label",
@@ -179,8 +170,7 @@ static cl::opt<GraphRenderer::StatType> GraphDiffVertexLabel(
clEnumValN(GraphRenderer::StatType::SUM, "sum",
"sum of call durations")));
static cl::alias GraphDiffVertexLabelA("v", cl::aliasopt(GraphDiffVertexLabel),
- cl::desc("Alias for -vertex-label"),
- cl::sub(GraphDiff));
+ cl::desc("Alias for -vertex-label"));
static cl::opt<GraphRenderer::StatType> GraphDiffVertexColor(
"vertex-color",
@@ -204,24 +194,21 @@ static cl::opt<GraphRenderer::StatType> GraphDiffVertexColor(
clEnumValN(GraphRenderer::StatType::SUM, "sum",
"sum of call durations")));
static cl::alias GraphDiffVertexColorA("b", cl::aliasopt(GraphDiffVertexColor),
- cl::desc("Alias for -vertex-color"),
- cl::sub(GraphDiff));
+ cl::desc("Alias for -vertex-color"));
static cl::opt<int> GraphDiffVertexLabelTrunc(
"vertex-label-trun", cl::desc("What length to truncate vertex labels to "),
cl::sub(GraphDiff), cl::init(40));
static cl::alias
GraphDiffVertexLabelTrunc1("t", cl::aliasopt(GraphDiffVertexLabelTrunc),
- cl::desc("Alias for -vertex-label-trun"),
- cl::sub(GraphDiff));
+ cl::desc("Alias for -vertex-label-trun"));
static cl::opt<std::string>
GraphDiffOutput("output", cl::value_desc("Output file"), cl::init("-"),
cl::desc("output file; use '-' for stdout"),
cl::sub(GraphDiff));
static cl::alias GraphDiffOutputA("o", cl::aliasopt(GraphDiffOutput),
- cl::desc("Alias for -output"),
- cl::sub(GraphDiff));
+ cl::desc("Alias for -output"));
Expected<GraphDiffRenderer> GraphDiffRenderer::Factory::getGraphDiffRenderer() {
GraphDiffRenderer R;
diff --git a/contrib/llvm-project/llvm/tools/llvm-xray/xray-graph.cpp b/contrib/llvm-project/llvm/tools/llvm-xray/xray-graph.cpp
index 0be511219c1a..f836f9ba54fc 100644
--- a/contrib/llvm-project/llvm/tools/llvm-xray/xray-graph.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-xray/xray-graph.cpp
@@ -30,14 +30,13 @@ static cl::opt<bool>
GraphKeepGoing("keep-going", cl::desc("Keep going on errors encountered"),
cl::sub(GraphC), cl::init(false));
static cl::alias GraphKeepGoing2("k", cl::aliasopt(GraphKeepGoing),
- cl::desc("Alias for -keep-going"),
- cl::sub(GraphC));
+ cl::desc("Alias for -keep-going"));
static cl::opt<std::string>
GraphOutput("output", cl::value_desc("Output file"), cl::init("-"),
cl::desc("output file; use '-' for stdout"), cl::sub(GraphC));
static cl::alias GraphOutput2("o", cl::aliasopt(GraphOutput),
- cl::desc("Alias for -output"), cl::sub(GraphC));
+ cl::desc("Alias for -output"));
static cl::opt<std::string>
GraphInstrMap("instr_map",
@@ -46,8 +45,7 @@ static cl::opt<std::string>
cl::value_desc("binary with xray_instr_map"), cl::sub(GraphC),
cl::init(""));
static cl::alias GraphInstrMap2("m", cl::aliasopt(GraphInstrMap),
- cl::desc("alias for -instr_map"),
- cl::sub(GraphC));
+ cl::desc("alias for -instr_map"));
static cl::opt<bool> GraphDeduceSiblingCalls(
"deduce-sibling-calls",
@@ -55,8 +53,7 @@ static cl::opt<bool> GraphDeduceSiblingCalls(
cl::sub(GraphC), cl::init(false));
static cl::alias
GraphDeduceSiblingCalls2("d", cl::aliasopt(GraphDeduceSiblingCalls),
- cl::desc("Alias for -deduce-sibling-calls"),
- cl::sub(GraphC));
+ cl::desc("Alias for -deduce-sibling-calls"));
static cl::opt<GraphRenderer::StatType>
GraphEdgeLabel("edge-label",
@@ -80,8 +77,7 @@ static cl::opt<GraphRenderer::StatType>
clEnumValN(GraphRenderer::StatType::SUM, "sum",
"sum of call durations")));
static cl::alias GraphEdgeLabel2("e", cl::aliasopt(GraphEdgeLabel),
- cl::desc("Alias for -edge-label"),
- cl::sub(GraphC));
+ cl::desc("Alias for -edge-label"));
static cl::opt<GraphRenderer::StatType> GraphVertexLabel(
"vertex-label",
@@ -105,8 +101,7 @@ static cl::opt<GraphRenderer::StatType> GraphVertexLabel(
clEnumValN(GraphRenderer::StatType::SUM, "sum",
"sum of call durations")));
static cl::alias GraphVertexLabel2("v", cl::aliasopt(GraphVertexLabel),
- cl::desc("Alias for -edge-label"),
- cl::sub(GraphC));
+ cl::desc("Alias for -edge-label"));
static cl::opt<GraphRenderer::StatType> GraphEdgeColorType(
"color-edges",
@@ -130,8 +125,7 @@ static cl::opt<GraphRenderer::StatType> GraphEdgeColorType(
clEnumValN(GraphRenderer::StatType::SUM, "sum",
"sum of call durations")));
static cl::alias GraphEdgeColorType2("c", cl::aliasopt(GraphEdgeColorType),
- cl::desc("Alias for -color-edges"),
- cl::sub(GraphC));
+ cl::desc("Alias for -color-edges"));
static cl::opt<GraphRenderer::StatType> GraphVertexColorType(
"color-vertices",
@@ -155,8 +149,7 @@ static cl::opt<GraphRenderer::StatType> GraphVertexColorType(
clEnumValN(GraphRenderer::StatType::SUM, "sum",
"sum of call durations")));
static cl::alias GraphVertexColorType2("b", cl::aliasopt(GraphVertexColorType),
- cl::desc("Alias for -edge-label"),
- cl::sub(GraphC));
+ cl::desc("Alias for -edge-label"));
template <class T> T diff(T L, T R) { return std::max(L, R) - std::min(L, R); }
diff --git a/contrib/llvm-project/llvm/tools/llvm-xray/xray-stacks.cpp b/contrib/llvm-project/llvm/tools/llvm-xray/xray-stacks.cpp
index bcfc5cb1f1be..cf292887b6b8 100644
--- a/contrib/llvm-project/llvm/tools/llvm-xray/xray-stacks.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-xray/xray-stacks.cpp
@@ -42,8 +42,7 @@ static cl::opt<bool>
StackKeepGoing("keep-going", cl::desc("Keep going on errors encountered"),
cl::sub(Stack), cl::init(false));
static cl::alias StackKeepGoing2("k", cl::aliasopt(StackKeepGoing),
- cl::desc("Alias for -keep-going"),
- cl::sub(Stack));
+ cl::desc("Alias for -keep-going"));
// TODO: Does there need to be an option to deduce tail or sibling calls?
@@ -53,8 +52,7 @@ static cl::opt<std::string> StacksInstrMap(
"Currently supports elf file instrumentation maps."),
cl::sub(Stack), cl::init(""));
static cl::alias StacksInstrMap2("m", cl::aliasopt(StacksInstrMap),
- cl::desc("Alias for -instr_map"),
- cl::sub(Stack));
+ cl::desc("Alias for -instr_map"));
static cl::opt<bool>
SeparateThreadStacks("per-thread-stacks",
@@ -72,8 +70,7 @@ static cl::opt<bool>
"By default separates stacks per-thread."),
cl::sub(Stack), cl::init(false));
static cl::alias DumpAllStacksShort("all", cl::aliasopt(DumpAllStacks),
- cl::desc("Alias for -all-stacks"),
- cl::sub(Stack));
+ cl::desc("Alias for -all-stacks"));
// TODO(kpw): Add other interesting formats. Perhaps chrome trace viewer format
// possibly with aggregations or just a linear trace of timings.
diff --git a/contrib/llvm-project/llvm/tools/opt/NewPMDriver.cpp b/contrib/llvm-project/llvm/tools/opt/NewPMDriver.cpp
index efe0bec35d72..ac04a32d93fd 100644
--- a/contrib/llvm-project/llvm/tools/opt/NewPMDriver.cpp
+++ b/contrib/llvm-project/llvm/tools/opt/NewPMDriver.cpp
@@ -13,7 +13,6 @@
//===----------------------------------------------------------------------===//
#include "NewPMDriver.h"
-#include "Debugify.h"
#include "PassPrinters.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/AliasAnalysis.h"
@@ -35,6 +34,7 @@
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/IPO/ThinLTOBitcodeWriter.h"
#include "llvm/Transforms/Scalar/LoopPassManager.h"
+#include "llvm/Transforms/Utils/Debugify.h"
using namespace llvm;
using namespace opt_tool;
@@ -202,11 +202,9 @@ static void registerEPCallbacks(PassBuilder &PB, bool VerifyEachPass,
});
}
-#ifdef LINK_POLLY_INTO_TOOLS
-namespace polly {
-void RegisterPollyPasses(PassBuilder &);
-}
-#endif
+#define HANDLE_EXTENSION(Ext) \
+ llvm::PassPluginLibraryInfo get##Ext##PluginInfo();
+#include "llvm/Support/Extension.def"
bool llvm::runPassPipeline(StringRef Arg0, Module &M, TargetMachine *TM,
ToolOutputFile *Out, ToolOutputFile *ThinLTOLinkOut,
@@ -290,9 +288,9 @@ bool llvm::runPassPipeline(StringRef Arg0, Module &M, TargetMachine *TM,
return false;
});
-#ifdef LINK_POLLY_INTO_TOOLS
- polly::RegisterPollyPasses(PB);
-#endif
+#define HANDLE_EXTENSION(Ext) \
+ get##Ext##PluginInfo().RegisterPassBuilderCallbacks(PB);
+#include "llvm/Support/Extension.def"
// Specially handle the alias analysis manager so that we can register
// a custom pipeline of AA passes with it.
diff --git a/contrib/llvm-project/llvm/tools/opt/PassPrinters.cpp b/contrib/llvm-project/llvm/tools/opt/PassPrinters.cpp
index 70da6a43f8d9..a877d9dc90f4 100644
--- a/contrib/llvm-project/llvm/tools/opt/PassPrinters.cpp
+++ b/contrib/llvm-project/llvm/tools/opt/PassPrinters.cpp
@@ -198,40 +198,6 @@ struct RegionPassPrinter : public RegionPass {
char RegionPassPrinter::ID = 0;
-struct BasicBlockPassPrinter : public BasicBlockPass {
- const PassInfo *PassToPrint;
- raw_ostream &Out;
- static char ID;
- std::string PassName;
- bool QuietPass;
-
- BasicBlockPassPrinter(const PassInfo *PI, raw_ostream &out, bool Quiet)
- : BasicBlockPass(ID), PassToPrint(PI), Out(out), QuietPass(Quiet) {
- std::string PassToPrintName = PassToPrint->getPassName();
- PassName = "BasicBlockPass Printer: " + PassToPrintName;
- }
-
- bool runOnBasicBlock(BasicBlock &BB) override {
- if (!QuietPass)
- Out << "Printing Analysis info for BasicBlock '" << BB.getName()
- << "': Pass " << PassToPrint->getPassName() << ":\n";
-
- // Get and print pass...
- getAnalysisID<Pass>(PassToPrint->getTypeInfo())
- .print(Out, BB.getParent()->getParent());
- return false;
- }
-
- StringRef getPassName() const override { return PassName; }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequiredID(PassToPrint->getTypeInfo());
- AU.setPreservesAll();
- }
-};
-
-char BasicBlockPassPrinter::ID = 0;
-
} // end anonymous namespace
FunctionPass *llvm::createFunctionPassPrinter(const PassInfo *PI,
@@ -260,7 +226,3 @@ RegionPass *llvm::createRegionPassPrinter(const PassInfo *PI, raw_ostream &OS,
return new RegionPassPrinter(PI, OS, Quiet);
}
-BasicBlockPass *llvm::createBasicBlockPassPrinter(const PassInfo *PI,
- raw_ostream &OS, bool Quiet) {
- return new BasicBlockPassPrinter(PI, OS, Quiet);
-}
diff --git a/contrib/llvm-project/llvm/tools/opt/PassPrinters.h b/contrib/llvm-project/llvm/tools/opt/PassPrinters.h
index d4e7a4a97f31..692befbdae75 100644
--- a/contrib/llvm-project/llvm/tools/opt/PassPrinters.h
+++ b/contrib/llvm-project/llvm/tools/opt/PassPrinters.h
@@ -18,7 +18,6 @@
namespace llvm {
-class BasicBlockPass;
class CallGraphSCCPass;
class FunctionPass;
class ModulePass;
@@ -43,9 +42,6 @@ LoopPass *createLoopPassPrinter(const PassInfo *PI, raw_ostream &out,
RegionPass *createRegionPassPrinter(const PassInfo *PI, raw_ostream &out,
bool Quiet);
-BasicBlockPass *createBasicBlockPassPrinter(const PassInfo *PI,
- raw_ostream &out, bool Quiet);
-
} // end namespace llvm
#endif // LLVM_TOOLS_OPT_PASSPRINTERS_H
diff --git a/contrib/llvm-project/llvm/tools/opt/opt.cpp b/contrib/llvm-project/llvm/tools/opt/opt.cpp
index 15495a511d06..75a6cdc3892b 100644
--- a/contrib/llvm-project/llvm/tools/opt/opt.cpp
+++ b/contrib/llvm-project/llvm/tools/opt/opt.cpp
@@ -12,7 +12,6 @@
//===----------------------------------------------------------------------===//
#include "BreakpointPrinter.h"
-#include "Debugify.h"
#include "NewPMDriver.h"
#include "PassPrinters.h"
#include "llvm/ADT/Triple.h"
@@ -56,6 +55,7 @@
#include "llvm/Transforms/IPO/AlwaysInliner.h"
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Debugify.h"
#include <algorithm>
#include <memory>
using namespace llvm;
@@ -193,6 +193,12 @@ static cl::opt<bool>
DisableSimplifyLibCalls("disable-simplify-libcalls",
cl::desc("Disable simplify-libcalls"));
+static cl::list<std::string>
+DisableBuiltins("disable-builtin",
+ cl::desc("Disable specific target library builtin function"),
+ cl::ZeroOrMore);
+
+
static cl::opt<bool>
Quiet("q", cl::desc("Obsolete option"), cl::Hidden);
@@ -328,7 +334,7 @@ public:
PassKind Kind = P->getPassKind();
StringRef Name = P->getPassName();
- // TODO: Implement Debugify for BasicBlockPass, LoopPass.
+ // TODO: Implement Debugify for LoopPass.
switch (Kind) {
case PT_Function:
super::add(createDebugifyFunctionPass());
@@ -476,12 +482,32 @@ static TargetMachine* GetTargetMachine(Triple TheTriple, StringRef CPUStr,
getCodeModel(), GetCodeGenOptLevel());
}
-#ifdef LINK_POLLY_INTO_TOOLS
-namespace polly {
-void initializePollyPasses(llvm::PassRegistry &Registry);
-}
+#ifdef BUILD_EXAMPLES
+void initializeExampleIRTransforms(llvm::PassRegistry &Registry);
#endif
+
+void exportDebugifyStats(llvm::StringRef Path, const DebugifyStatsMap &Map) {
+ std::error_code EC;
+ raw_fd_ostream OS{Path, EC};
+ if (EC) {
+ errs() << "Could not open file: " << EC.message() << ", " << Path << '\n';
+ return;
+ }
+
+ OS << "Pass Name" << ',' << "# of missing debug values" << ','
+ << "# of missing locations" << ',' << "Missing/Expected value ratio" << ','
+ << "Missing/Expected location ratio" << '\n';
+ for (const auto &Entry : Map) {
+ StringRef Pass = Entry.first;
+ DebugifyStatistics Stats = Entry.second;
+
+ OS << Pass << ',' << Stats.NumDbgValuesMissing << ','
+ << Stats.NumDbgLocsMissing << ',' << Stats.getMissingValueRatio() << ','
+ << Stats.getEmptyLocationRatio() << '\n';
+ }
+}
+
//===----------------------------------------------------------------------===//
// main for opt
//
@@ -535,9 +561,10 @@ int main(int argc, char **argv) {
initializeWasmEHPreparePass(Registry);
initializeWriteBitcodePassPass(Registry);
initializeHardwareLoopsPass(Registry);
+ initializeTypePromotionPass(Registry);
-#ifdef LINK_POLLY_INTO_TOOLS
- polly::initializePollyPasses(Registry);
+#ifdef BUILD_EXAMPLES
+ initializeExampleIRTransforms(Registry);
#endif
cl::ParseCommandLineOptions(argc, argv,
@@ -697,6 +724,19 @@ int main(int argc, char **argv) {
// The -disable-simplify-libcalls flag actually disables all builtin optzns.
if (DisableSimplifyLibCalls)
TLII.disableAllFunctions();
+ else {
+ // Disable individual builtin functions in TargetLibraryInfo.
+ LibFunc F;
+ for (auto &FuncName : DisableBuiltins)
+ if (TLII.getLibFunc(FuncName, F))
+ TLII.setUnavailable(F);
+ else {
+ errs() << argv[0] << ": cannot disable nonexistent builtin function "
+ << FuncName << '\n';
+ return 1;
+ }
+ }
+
Passes.add(new TargetLibraryInfoWrapperPass(TLII));
// Add internal analysis passes from the target machine.
@@ -790,9 +830,6 @@ int main(int argc, char **argv) {
if (AnalyzeOnly) {
switch (Kind) {
- case PT_BasicBlock:
- Passes.add(createBasicBlockPassPrinter(PassInf, Out->os(), Quiet));
- break;
case PT_Region:
Passes.add(createRegionPassPrinter(PassInf, Out->os(), Quiet));
break;
@@ -863,8 +900,10 @@ int main(int argc, char **argv) {
std::unique_ptr<raw_svector_ostream> BOS;
raw_ostream *OS = nullptr;
+ const bool ShouldEmitOutput = !NoOutput && !AnalyzeOnly;
+
// Write bitcode or assembly to the output as the last step...
- if (!NoOutput && !AnalyzeOnly) {
+ if (ShouldEmitOutput || RunTwice) {
assert(Out);
OS = &Out->os();
if (RunTwice) {
@@ -912,13 +951,16 @@ int main(int argc, char **argv) {
"Writing the result of the second run to the specified output.\n"
"To generate the one-run comparison binary, just run without\n"
"the compile-twice option\n";
- Out->os() << BOS->str();
- Out->keep();
+ if (ShouldEmitOutput) {
+ Out->os() << BOS->str();
+ Out->keep();
+ }
if (RemarksFile)
RemarksFile->keep();
return 1;
}
- Out->os() << BOS->str();
+ if (ShouldEmitOutput)
+ Out->os() << BOS->str();
}
if (DebugifyEach && !DebugifyExport.empty())
diff --git a/contrib/llvm-project/llvm/utils/TableGen/AsmMatcherEmitter.cpp b/contrib/llvm-project/llvm/utils/TableGen/AsmMatcherEmitter.cpp
index 1d39b300091f..ccf0959389ba 100644
--- a/contrib/llvm-project/llvm/utils/TableGen/AsmMatcherEmitter.cpp
+++ b/contrib/llvm-project/llvm/utils/TableGen/AsmMatcherEmitter.cpp
@@ -3347,7 +3347,7 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
return true;
if (A.size() > B.size())
return false;
- for (const auto &Pair : zip(A, B)) {
+ for (auto Pair : zip(A, B)) {
if (std::get<0>(Pair)->getName() < std::get<1>(Pair)->getName())
return true;
if (std::get<0>(Pair)->getName() > std::get<1>(Pair)->getName())
diff --git a/contrib/llvm-project/llvm/utils/TableGen/AsmWriterEmitter.cpp b/contrib/llvm-project/llvm/utils/TableGen/AsmWriterEmitter.cpp
index b5c7f35be0e5..58c0d32d44eb 100644
--- a/contrib/llvm-project/llvm/utils/TableGen/AsmWriterEmitter.cpp
+++ b/contrib/llvm-project/llvm/utils/TableGen/AsmWriterEmitter.cpp
@@ -29,6 +29,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
+#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/TableGen/Error.h"
@@ -274,13 +275,14 @@ void AsmWriterEmitter::EmitPrintInstruction(raw_ostream &O) {
StringRef ClassName = AsmWriter->getValueAsString("AsmWriterClassName");
bool PassSubtarget = AsmWriter->getValueAsInt("PassSubtarget");
- O <<
- "/// printInstruction - This method is automatically generated by tablegen\n"
- "/// from the instruction set description.\n"
- "void " << Target.getName() << ClassName
- << "::printInstruction(const MCInst *MI, "
- << (PassSubtarget ? "const MCSubtargetInfo &STI, " : "")
- << "raw_ostream &O) {\n";
+ O << "/// printInstruction - This method is automatically generated by "
+ "tablegen\n"
+ "/// from the instruction set description.\n"
+ "void "
+ << Target.getName() << ClassName
+ << "::printInstruction(const MCInst *MI, uint64_t Address, "
+ << (PassSubtarget ? "const MCSubtargetInfo &STI, " : "")
+ << "raw_ostream &O) {\n";
// Build an aggregate string, and build a table of offsets into it.
SequenceToOffsetTable<std::string> StringTable;
@@ -616,17 +618,22 @@ namespace {
// they both have the same conditionals. In which case, we cannot print out the
// alias for that pattern.
class IAPrinter {
- std::vector<std::string> Conds;
std::map<StringRef, std::pair<int, int>> OpMap;
+ std::vector<std::string> Conds;
+
std::string Result;
std::string AsmString;
+ unsigned NumMIOps;
+
public:
- IAPrinter(std::string R, std::string AS)
- : Result(std::move(R)), AsmString(std::move(AS)) {}
+ IAPrinter(std::string R, std::string AS, unsigned NumMIOps)
+ : Result(std::move(R)), AsmString(std::move(AS)), NumMIOps(NumMIOps) {}
- void addCond(const std::string &C) { Conds.push_back(C); }
+ void addCond(std::string C) { Conds.push_back(std::move(C)); }
+ ArrayRef<std::string> getConds() const { return Conds; }
+ size_t getCondCount() const { return Conds.size(); }
void addOperand(StringRef Op, int OpIdx, int PrintMethodIdx = -1) {
assert(OpIdx >= 0 && OpIdx < 0xFE && "Idx out of range");
@@ -635,6 +642,10 @@ public:
OpMap[Op] = std::make_pair(OpIdx, PrintMethodIdx);
}
+ unsigned getNumMIOps() { return NumMIOps; }
+
+ StringRef getResult() { return Result; }
+
bool isOpMapped(StringRef Op) { return OpMap.find(Op) != OpMap.end(); }
int getOpIndex(StringRef Op) { return OpMap[Op].first; }
std::pair<int, int> &getOpData(StringRef Op) { return OpMap[Op]; }
@@ -664,35 +675,16 @@ public:
return std::make_pair(StringRef(Start, I - Start), Next);
}
- void print(raw_ostream &O) {
- if (Conds.empty()) {
- O.indent(6) << "return true;\n";
- return;
- }
-
- O << "if (";
-
- for (std::vector<std::string>::iterator
- I = Conds.begin(), E = Conds.end(); I != E; ++I) {
- if (I != Conds.begin()) {
- O << " &&\n";
- O.indent(8);
- }
-
- O << *I;
- }
-
- O << ") {\n";
- O.indent(6) << "// " << Result << "\n";
-
+ std::string formatAliasString(uint32_t &UnescapedSize) {
// Directly mangle mapped operands into the string. Each operand is
// identified by a '$' sign followed by a byte identifying the number of the
// operand. We add one to the index to avoid zero bytes.
StringRef ASM(AsmString);
- SmallString<128> OutString;
- raw_svector_ostream OS(OutString);
+ std::string OutString;
+ raw_string_ostream OS(OutString);
for (StringRef::iterator I = ASM.begin(), E = ASM.end(); I != E;) {
OS << *I;
+ ++UnescapedSize;
if (*I == '$') {
StringRef Name;
std::tie(Name, I) = parseName(++I, E);
@@ -703,23 +695,25 @@ public:
if (PrintIndex == -1) {
// Can use the default printOperand route.
OS << format("\\x%02X", (unsigned char)OpIndex + 1);
- } else
+ ++UnescapedSize;
+ } else {
// 3 bytes if a PrintMethod is needed: 0xFF, the MCInst operand
// number, and which of our pre-detected Methods to call.
OS << format("\\xFF\\x%02X\\x%02X", OpIndex + 1, PrintIndex + 1);
+ UnescapedSize += 3;
+ }
} else {
++I;
}
}
- // Emit the string.
- O.indent(6) << "AsmString = \"" << OutString << "\";\n";
-
- O.indent(6) << "break;\n";
- O.indent(4) << '}';
+ OS.flush();
+ return OutString;
}
bool operator==(const IAPrinter &RHS) const {
+ if (NumMIOps != RHS.NumMIOps)
+ return false;
if (Conds.size() != RHS.Conds.size())
return false;
@@ -799,6 +793,9 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
DenseMap<const Record*, unsigned> MCOpPredicateMap;
for (auto &Aliases : AliasMap) {
+ // Collection of instruction alias rules. May contain ambiguous rules.
+ std::vector<IAPrinter> IAPs;
+
for (auto &Alias : Aliases.second) {
const CodeGenInstAlias &CGA = Alias.first;
unsigned LastOpNo = CGA.ResultInstOperandIndex.size();
@@ -808,33 +805,18 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
unsigned NumResultOps = CountNumOperands(FlatInstAsmString, Variant);
std::string FlatAliasAsmString =
- CodeGenInstruction::FlattenAsmStringVariants(CGA.AsmString,
- Variant);
+ CodeGenInstruction::FlattenAsmStringVariants(CGA.AsmString, Variant);
// Don't emit the alias if it has more operands than what it's aliasing.
if (NumResultOps < CountNumOperands(FlatAliasAsmString, Variant))
continue;
- IAPrinter IAP(CGA.Result->getAsString(), FlatAliasAsmString);
-
StringRef Namespace = Target.getName();
- std::vector<Record *> ReqFeatures;
- if (PassSubtarget) {
- // We only consider ReqFeatures predicates if PassSubtarget
- std::vector<Record *> RF =
- CGA.TheDef->getValueAsListOfDefs("Predicates");
- copy_if(RF, std::back_inserter(ReqFeatures), [](Record *R) {
- return R->getValueAsBit("AssemblerMatcherPredicate");
- });
- }
-
unsigned NumMIOps = 0;
for (auto &ResultInstOpnd : CGA.ResultInst->Operands)
NumMIOps += ResultInstOpnd.MINumOperands;
- std::string Cond;
- Cond = std::string("MI->getNumOperands() == ") + utostr(NumMIOps);
- IAP.addCond(Cond);
+ IAPrinter IAP(CGA.Result->getAsString(), FlatAliasAsmString, NumMIOps);
bool CantHandle = false;
@@ -858,7 +840,9 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
break;
}
- std::string Op = "MI->getOperand(" + utostr(MIOpNum) + ")";
+ // Ignore unchecked result operands.
+ while (IAP.getCondCount() < MIOpNum)
+ IAP.addCond("AliasPatternCond::K_Ignore, 0");
const CodeGenInstAlias::ResultOperand &RO = CGA.ResultOperands[i];
@@ -885,19 +869,17 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
if (Rec->isSubClassOf("RegisterOperand"))
Rec = Rec->getValueAsDef("RegClass");
if (Rec->isSubClassOf("RegisterClass")) {
- IAP.addCond(Op + ".isReg()");
-
if (!IAP.isOpMapped(ROName)) {
IAP.addOperand(ROName, MIOpNum, PrintMethodIdx);
Record *R = CGA.ResultOperands[i].getRecord();
if (R->isSubClassOf("RegisterOperand"))
R = R->getValueAsDef("RegClass");
- Cond = std::string("MRI.getRegClass(") + Target.getName().str() +
- "::" + R->getName().str() + "RegClassID).contains(" + Op +
- ".getReg())";
+ IAP.addCond(formatv(
+ "AliasPatternCond::K_RegClass, {0}::{1}RegClassID", Namespace,
+ R->getName()));
} else {
- Cond = Op + ".getReg() == MI->getOperand(" +
- utostr(IAP.getOpIndex(ROName)) + ").getReg()";
+ IAP.addCond(formatv("AliasPatternCond::K_TiedReg, {0}",
+ IAP.getOpIndex(ROName)));
}
} else {
// Assume all printable operands are desired for now. This can be
@@ -914,21 +896,19 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
} else
break; // No conditions on this operand at all
}
- Cond = (Target.getName() + ClassName + "ValidateMCOperand(" + Op +
- ", STI, " + utostr(Entry) + ")")
- .str();
+ IAP.addCond(formatv("AliasPatternCond::K_Custom, {0}", Entry));
}
- // for all subcases of ResultOperand::K_Record:
- IAP.addCond(Cond);
break;
}
case CodeGenInstAlias::ResultOperand::K_Imm: {
// Just because the alias has an immediate result, doesn't mean the
// MCInst will. An MCExpr could be present, for example.
- IAP.addCond(Op + ".isImm()");
-
- Cond = Op + ".getImm() == " + itostr(CGA.ResultOperands[i].getImm());
- IAP.addCond(Cond);
+ auto Imm = CGA.ResultOperands[i].getImm();
+ int32_t Imm32 = int32_t(Imm);
+ if (Imm != Imm32)
+ PrintFatalError("Matching an alias with an immediate out of the "
+ "range of int32_t is not supported");
+ IAP.addCond(formatv("AliasPatternCond::K_Imm, uint32_t({0})", Imm32));
break;
}
case CodeGenInstAlias::ResultOperand::K_Reg:
@@ -939,9 +919,9 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
break;
}
- Cond = Op + ".getReg() == " + Target.getName().str() + "::" +
- CGA.ResultOperands[i].getRegister()->getName().str();
- IAP.addCond(Cond);
+ StringRef Reg = CGA.ResultOperands[i].getRegister()->getName();
+ IAP.addCond(
+ formatv("AliasPatternCond::K_Reg, {0}::{1}", Namespace, Reg));
break;
}
@@ -950,6 +930,16 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
if (CantHandle) continue;
+ std::vector<Record *> ReqFeatures;
+ if (PassSubtarget) {
+ // We only consider ReqFeatures predicates if PassSubtarget
+ std::vector<Record *> RF =
+ CGA.TheDef->getValueAsListOfDefs("Predicates");
+ copy_if(RF, std::back_inserter(ReqFeatures), [](Record *R) {
+ return R->getValueAsBit("AssemblerMatcherPredicate");
+ });
+ }
+
for (auto I = ReqFeatures.cbegin(); I != ReqFeatures.cend(); I++) {
Record *R = *I;
StringRef AsmCondString = R->getValueAsString("AssemblerCondString");
@@ -959,16 +949,12 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
SplitString(AsmCondString, Ops, ",");
assert(!Ops.empty() && "AssemblerCondString cannot be empty");
- for (auto &Op : Ops) {
+ for (StringRef Op : Ops) {
assert(!Op.empty() && "Empty operator");
- if (Op[0] == '!')
- Cond = ("!STI.getFeatureBits()[" + Namespace + "::" + Op.substr(1) +
- "]")
- .str();
- else
- Cond =
- ("STI.getFeatureBits()[" + Namespace + "::" + Op + "]").str();
- IAP.addCond(Cond);
+ bool IsNeg = Op[0] == '!';
+ StringRef Feature = Op.drop_front(IsNeg ? 1 : 0);
+ IAP.addCond(formatv("AliasPatternCond::K_{0}Feature, {1}::{2}",
+ IsNeg ? "Neg" : "", Namespace, Feature));
}
}
@@ -988,13 +974,32 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
<< " *MI, " << (PassSubtarget ? "const MCSubtargetInfo &STI, " : "")
<< "raw_ostream &OS) {\n";
- std::string Cases;
- raw_string_ostream CasesO(Cases);
-
- for (auto &Entry : IAPrinterMap) {
- std::vector<IAPrinter> &IAPs = Entry.second;
+ std::string PatternsForOpcode;
+ raw_string_ostream OpcodeO(PatternsForOpcode);
+
+ unsigned PatternCount = 0;
+ std::string Patterns;
+ raw_string_ostream PatternO(Patterns);
+
+ unsigned CondCount = 0;
+ std::string Conds;
+ raw_string_ostream CondO(Conds);
+
+ // All flattened alias strings.
+ std::map<std::string, uint32_t> AsmStringOffsets;
+ std::vector<std::pair<uint32_t, std::string>> AsmStrings;
+ size_t AsmStringsSize = 0;
+
+ // Iterate over the opcodes in enum order so they are sorted by opcode for
+ // binary search.
+ for (const CodeGenInstruction *Inst : NumberedInstructions) {
+ auto It = IAPrinterMap.find(getQualifiedName(Inst->TheDef));
+ if (It == IAPrinterMap.end())
+ continue;
+ std::vector<IAPrinter> &IAPs = It->second;
std::vector<IAPrinter*> UniqueIAPs;
+ // Remove any ambiguous alias rules.
for (auto &LHS : IAPs) {
bool IsDup = false;
for (const auto &RHS : IAPs) {
@@ -1010,18 +1015,43 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
if (UniqueIAPs.empty()) continue;
- CasesO.indent(2) << "case " << Entry.first << ":\n";
+ unsigned PatternStart = PatternCount;
+
+ // Insert the pattern start and opcode in the pattern list for debugging.
+ PatternO << formatv(" // {0} - {1}\n", It->first, PatternStart);
for (IAPrinter *IAP : UniqueIAPs) {
- CasesO.indent(4);
- IAP->print(CasesO);
- CasesO << '\n';
+ // Start each condition list with a comment of the resulting pattern that
+ // we're trying to match.
+ unsigned CondStart = CondCount;
+ CondO << formatv(" // {0} - {1}\n", IAP->getResult(), CondStart);
+ for (const auto &Cond : IAP->getConds())
+ CondO << " {" << Cond << "},\n";
+ CondCount += IAP->getCondCount();
+
+ // After operands have been examined, re-encode the alias string with
+ // escapes indicating how operands should be printed.
+ uint32_t UnescapedSize = 0;
+ std::string EncodedAsmString = IAP->formatAliasString(UnescapedSize);
+ auto Insertion =
+ AsmStringOffsets.insert({EncodedAsmString, AsmStringsSize});
+ if (Insertion.second) {
+ // If the string is new, add it to the vector.
+ AsmStrings.push_back({AsmStringsSize, EncodedAsmString});
+ AsmStringsSize += UnescapedSize + 1;
+ }
+ unsigned AsmStrOffset = Insertion.first->second;
+
+ PatternO << formatv(" {{{0}, {1}, {2}, {3} },\n", AsmStrOffset,
+ CondStart, IAP->getNumMIOps(), IAP->getCondCount());
+ ++PatternCount;
}
- CasesO.indent(4) << "return false;\n";
+ OpcodeO << formatv(" {{{0}, {1}, {2} },\n", It->first, PatternStart,
+ PatternCount - PatternStart);
}
- if (CasesO.str().empty()) {
+ if (OpcodeO.str().empty()) {
O << HeaderO.str();
O << " return false;\n";
O << "}\n\n";
@@ -1029,6 +1059,7 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
return;
}
+ // Forward declare the validation method if needed.
if (!MCOpPredicates.empty())
O << "static bool " << Target.getName() << ClassName
<< "ValidateMCOperand(const MCOperand &MCOp,\n"
@@ -1036,11 +1067,52 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
<< " unsigned PredicateIndex);\n";
O << HeaderO.str();
- O.indent(2) << "const char *AsmString;\n";
- O.indent(2) << "switch (MI->getOpcode()) {\n";
- O.indent(2) << "default: return false;\n";
- O << CasesO.str();
- O.indent(2) << "}\n\n";
+ O.indent(2) << "static const PatternsForOpcode OpToPatterns[] = {\n";
+ O << OpcodeO.str();
+ O.indent(2) << "};\n\n";
+ O.indent(2) << "static const AliasPattern Patterns[] = {\n";
+ O << PatternO.str();
+ O.indent(2) << "};\n\n";
+ O.indent(2) << "static const AliasPatternCond Conds[] = {\n";
+ O << CondO.str();
+ O.indent(2) << "};\n\n";
+ O.indent(2) << "static const char AsmStrings[] =\n";
+ for (const auto &P : AsmStrings) {
+ O.indent(4) << "/* " << P.first << " */ \"" << P.second << "\\0\"\n";
+ }
+
+ O.indent(2) << ";\n\n";
+
+ // Assert that the opcode table is sorted. Use a static local constructor to
+ // ensure that the check only happens once on first run.
+ O << "#ifndef NDEBUG\n";
+ O.indent(2) << "static struct SortCheck {\n";
+ O.indent(2) << " SortCheck(ArrayRef<PatternsForOpcode> OpToPatterns) {\n";
+ O.indent(2) << " assert(std::is_sorted(\n";
+ O.indent(2) << " OpToPatterns.begin(), OpToPatterns.end(),\n";
+ O.indent(2) << " [](const PatternsForOpcode &L, const "
+ "PatternsForOpcode &R) {\n";
+ O.indent(2) << " return L.Opcode < R.Opcode;\n";
+ O.indent(2) << " }) &&\n";
+ O.indent(2) << " \"tablegen failed to sort opcode patterns\");\n";
+ O.indent(2) << " }\n";
+ O.indent(2) << "} sortCheckVar(OpToPatterns);\n";
+ O << "#endif\n\n";
+
+ O.indent(2) << "AliasMatchingData M {\n";
+ O.indent(2) << " makeArrayRef(OpToPatterns),\n";
+ O.indent(2) << " makeArrayRef(Patterns),\n";
+ O.indent(2) << " makeArrayRef(Conds),\n";
+ O.indent(2) << " StringRef(AsmStrings, array_lengthof(AsmStrings)),\n";
+ if (MCOpPredicates.empty())
+ O.indent(2) << " nullptr,\n";
+ else
+ O.indent(2) << " &" << Target.getName() << ClassName << "ValidateMCOperand,\n";
+ O.indent(2) << "};\n";
+
+ O.indent(2) << "const char *AsmString = matchAliasPatterns(MI, "
+ << (PassSubtarget ? "&STI" : "nullptr") << ", M);\n";
+ O.indent(2) << "if (!AsmString) return false;\n\n";
// Code that prints the alias, replacing the operands with the ones from the
// MCInst.
diff --git a/contrib/llvm-project/llvm/utils/TableGen/AsmWriterInst.h b/contrib/llvm-project/llvm/utils/TableGen/AsmWriterInst.h
index 7d88e5a9d037..a59112efea44 100644
--- a/contrib/llvm-project/llvm/utils/TableGen/AsmWriterInst.h
+++ b/contrib/llvm-project/llvm/utils/TableGen/AsmWriterInst.h
@@ -36,7 +36,7 @@ namespace llvm {
/// MiOpNo - For isMachineInstrOperand, this is the operand number of the
/// machine instruction.
- unsigned MIOpNo;
+ unsigned MIOpNo = 0;
/// Str - For isLiteralTextOperand, this IS the literal text. For
/// isMachineInstrOperand, this is the PrinterMethodName for the operand..
diff --git a/contrib/llvm-project/llvm/utils/TableGen/CodeEmitterGen.cpp b/contrib/llvm-project/llvm/utils/TableGen/CodeEmitterGen.cpp
index 42f69cb253d2..68cb8f181e62 100644
--- a/contrib/llvm-project/llvm/utils/TableGen/CodeEmitterGen.cpp
+++ b/contrib/llvm-project/llvm/utils/TableGen/CodeEmitterGen.cpp
@@ -563,7 +563,7 @@ void CodeEmitterGen::run(raw_ostream &o) {
return true;
if (A.size() > B.size())
return false;
- for (const auto &Pair : zip(A, B)) {
+ for (auto Pair : zip(A, B)) {
if (std::get<0>(Pair)->getName() < std::get<1>(Pair)->getName())
return true;
if (std::get<0>(Pair)->getName() > std::get<1>(Pair)->getName())
diff --git a/contrib/llvm-project/llvm/utils/TableGen/CodeGenDAGPatterns.cpp b/contrib/llvm-project/llvm/utils/TableGen/CodeGenDAGPatterns.cpp
index 46f986ca0176..7e0ba98da94a 100644
--- a/contrib/llvm-project/llvm/utils/TableGen/CodeGenDAGPatterns.cpp
+++ b/contrib/llvm-project/llvm/utils/TableGen/CodeGenDAGPatterns.cpp
@@ -23,6 +23,7 @@
#include "llvm/ADT/Twine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TypeSize.h"
#include "llvm/TableGen/Error.h"
#include "llvm/TableGen/Record.h"
#include <algorithm>
@@ -481,12 +482,12 @@ bool TypeInfer::EnforceSmallerThan(TypeSetByHwMode &Small,
if (any_of(S, isIntegerOrPtr) && any_of(S, isIntegerOrPtr)) {
auto NotInt = [](MVT VT) { return !isIntegerOrPtr(VT); };
- Changed |= berase_if(S, NotInt) |
- berase_if(B, NotInt);
+ Changed |= berase_if(S, NotInt);
+ Changed |= berase_if(B, NotInt);
} else if (any_of(S, isFloatingPoint) && any_of(B, isFloatingPoint)) {
auto NotFP = [](MVT VT) { return !isFloatingPoint(VT); };
- Changed |= berase_if(S, NotFP) |
- berase_if(B, NotFP);
+ Changed |= berase_if(S, NotFP);
+ Changed |= berase_if(B, NotFP);
} else if (S.empty() || B.empty()) {
Changed = !S.empty() || !B.empty();
S.clear();
@@ -497,24 +498,30 @@ bool TypeInfer::EnforceSmallerThan(TypeSetByHwMode &Small,
}
if (none_of(S, isVector) || none_of(B, isVector)) {
- Changed |= berase_if(S, isVector) |
- berase_if(B, isVector);
+ Changed |= berase_if(S, isVector);
+ Changed |= berase_if(B, isVector);
}
}
auto LT = [](MVT A, MVT B) -> bool {
- return A.getScalarSizeInBits() < B.getScalarSizeInBits() ||
- (A.getScalarSizeInBits() == B.getScalarSizeInBits() &&
- A.getSizeInBits() < B.getSizeInBits());
+ // Always treat non-scalable MVTs as smaller than scalable MVTs for the
+ // purposes of ordering.
+ auto ASize = std::make_tuple(A.isScalableVector(), A.getScalarSizeInBits(),
+ A.getSizeInBits());
+ auto BSize = std::make_tuple(B.isScalableVector(), B.getScalarSizeInBits(),
+ B.getSizeInBits());
+ return ASize < BSize;
};
- auto LE = [&LT](MVT A, MVT B) -> bool {
+ auto SameKindLE = [](MVT A, MVT B) -> bool {
// This function is used when removing elements: when a vector is compared
- // to a non-vector, it should return false (to avoid removal).
- if (A.isVector() != B.isVector())
+ // to a non-vector or a scalable vector to any non-scalable MVT, it should
+ // return false (to avoid removal).
+ if (std::make_tuple(A.isVector(), A.isScalableVector()) !=
+ std::make_tuple(B.isVector(), B.isScalableVector()))
return false;
- return LT(A, B) || (A.getScalarSizeInBits() == B.getScalarSizeInBits() &&
- A.getSizeInBits() == B.getSizeInBits());
+ return std::make_tuple(A.getScalarSizeInBits(), A.getSizeInBits()) <=
+ std::make_tuple(B.getScalarSizeInBits(), B.getSizeInBits());
};
for (unsigned M : Modes) {
@@ -524,25 +531,29 @@ bool TypeInfer::EnforceSmallerThan(TypeSetByHwMode &Small,
// smaller-or-equal than MinS.
auto MinS = min_if(S.begin(), S.end(), isScalar, LT);
if (MinS != S.end())
- Changed |= berase_if(B, std::bind(LE, std::placeholders::_1, *MinS));
+ Changed |= berase_if(B, std::bind(SameKindLE,
+ std::placeholders::_1, *MinS));
// MaxS = max scalar in Big, remove all scalars from Small that are
// larger than MaxS.
auto MaxS = max_if(B.begin(), B.end(), isScalar, LT);
if (MaxS != B.end())
- Changed |= berase_if(S, std::bind(LE, *MaxS, std::placeholders::_1));
+ Changed |= berase_if(S, std::bind(SameKindLE,
+ *MaxS, std::placeholders::_1));
// MinV = min vector in Small, remove all vectors from Big that are
// smaller-or-equal than MinV.
auto MinV = min_if(S.begin(), S.end(), isVector, LT);
if (MinV != S.end())
- Changed |= berase_if(B, std::bind(LE, std::placeholders::_1, *MinV));
+ Changed |= berase_if(B, std::bind(SameKindLE,
+ std::placeholders::_1, *MinV));
// MaxV = max vector in Big, remove all vectors from Small that are
// larger than MaxV.
auto MaxV = max_if(B.begin(), B.end(), isVector, LT);
if (MaxV != B.end())
- Changed |= berase_if(S, std::bind(LE, *MaxV, std::placeholders::_1));
+ Changed |= berase_if(S, std::bind(SameKindLE,
+ *MaxV, std::placeholders::_1));
}
return Changed;
@@ -625,7 +636,7 @@ bool TypeInfer::EnforceVectorSubVectorTypeIs(TypeSetByHwMode &Vec,
/// Return true if S has no element (vector type) that T is a sub-vector of,
/// i.e. has the same element type as T and more elements.
auto NoSubV = [&IsSubVec](const TypeSetByHwMode::SetType &S, MVT T) -> bool {
- for (const auto &I : S)
+ for (auto I : S)
if (IsSubVec(T, I))
return false;
return true;
@@ -634,7 +645,7 @@ bool TypeInfer::EnforceVectorSubVectorTypeIs(TypeSetByHwMode &Vec,
/// Return true if S has no element (vector type) that T is a super-vector
/// of, i.e. has the same element type as T and fewer elements.
auto NoSupV = [&IsSubVec](const TypeSetByHwMode::SetType &S, MVT T) -> bool {
- for (const auto &I : S)
+ for (auto I : S)
if (IsSubVec(I, T))
return false;
return true;
@@ -1304,13 +1315,29 @@ std::string TreePredicateFn::getCodeToRunOnSDNode() const {
// Handle arbitrary node predicates.
assert(hasPredCode() && "Don't have any predicate code!");
+
+ // If this is using PatFrags, there are multiple trees to search. They should
+ // all have the same class. FIXME: Is there a way to find a common
+ // superclass?
StringRef ClassName;
- if (PatFragRec->getOnlyTree()->isLeaf())
- ClassName = "SDNode";
- else {
- Record *Op = PatFragRec->getOnlyTree()->getOperator();
- ClassName = PatFragRec->getDAGPatterns().getSDNodeInfo(Op).getSDClassName();
+ for (const auto &Tree : PatFragRec->getTrees()) {
+ StringRef TreeClassName;
+ if (Tree->isLeaf())
+ TreeClassName = "SDNode";
+ else {
+ Record *Op = Tree->getOperator();
+ const SDNodeInfo &Info = PatFragRec->getDAGPatterns().getSDNodeInfo(Op);
+ TreeClassName = Info.getSDClassName();
+ }
+
+ if (ClassName.empty())
+ ClassName = TreeClassName;
+ else if (ClassName != TreeClassName) {
+ PrintFatalError(getOrigPatFragRecord()->getRecord()->getLoc(),
+ "PatFrags trees do not have consistent class");
+ }
}
+
std::string Result;
if (ClassName == "SDNode")
Result = " SDNode *N = Node;\n";
@@ -3017,8 +3044,7 @@ CodeGenDAGPatterns::CodeGenDAGPatterns(RecordKeeper &R,
: Records(R), Target(R), LegalVTS(Target.getLegalValueTypes()),
PatternRewriter(PatternRewriter) {
- Intrinsics = CodeGenIntrinsicTable(Records, false);
- TgtIntrinsics = CodeGenIntrinsicTable(Records, true);
+ Intrinsics = CodeGenIntrinsicTable(Records);
ParseNodeInfo();
ParseNodeTransforms();
ParseComplexPatterns();
diff --git a/contrib/llvm-project/llvm/utils/TableGen/CodeGenDAGPatterns.h b/contrib/llvm-project/llvm/utils/TableGen/CodeGenDAGPatterns.h
index 80fc932a7a50..2c081b670609 100644
--- a/contrib/llvm-project/llvm/utils/TableGen/CodeGenDAGPatterns.h
+++ b/contrib/llvm-project/llvm/utils/TableGen/CodeGenDAGPatterns.h
@@ -194,6 +194,7 @@ struct TypeSetByHwMode : public InfoByHwMode<MachineValueTypeSet> {
TypeSetByHwMode() = default;
TypeSetByHwMode(const TypeSetByHwMode &VTS) = default;
+ TypeSetByHwMode &operator=(const TypeSetByHwMode &) = default;
TypeSetByHwMode(MVT::SimpleValueType VT)
: TypeSetByHwMode(ValueTypeByHwMode(VT)) {}
TypeSetByHwMode(ValueTypeByHwMode VT)
@@ -1144,7 +1145,6 @@ class CodeGenDAGPatterns {
RecordKeeper &Records;
CodeGenTarget Target;
CodeGenIntrinsicTable Intrinsics;
- CodeGenIntrinsicTable TgtIntrinsics;
std::map<Record*, SDNodeInfo, LessRecordByID> SDNodes;
std::map<Record*, std::pair<Record*, std::string>, LessRecordByID>
@@ -1195,12 +1195,6 @@ public:
return F->second;
}
- typedef std::map<Record*, NodeXForm, LessRecordByID>::const_iterator
- nx_iterator;
- nx_iterator nx_begin() const { return SDNodeXForms.begin(); }
- nx_iterator nx_end() const { return SDNodeXForms.end(); }
-
-
const ComplexPattern &getComplexPattern(Record *R) const {
auto F = ComplexPatterns.find(R);
assert(F != ComplexPatterns.end() && "Unknown addressing mode!");
@@ -1210,24 +1204,18 @@ public:
const CodeGenIntrinsic &getIntrinsic(Record *R) const {
for (unsigned i = 0, e = Intrinsics.size(); i != e; ++i)
if (Intrinsics[i].TheDef == R) return Intrinsics[i];
- for (unsigned i = 0, e = TgtIntrinsics.size(); i != e; ++i)
- if (TgtIntrinsics[i].TheDef == R) return TgtIntrinsics[i];
llvm_unreachable("Unknown intrinsic!");
}
const CodeGenIntrinsic &getIntrinsicInfo(unsigned IID) const {
if (IID-1 < Intrinsics.size())
return Intrinsics[IID-1];
- if (IID-Intrinsics.size()-1 < TgtIntrinsics.size())
- return TgtIntrinsics[IID-Intrinsics.size()-1];
llvm_unreachable("Bad intrinsic ID!");
}
unsigned getIntrinsicID(Record *R) const {
for (unsigned i = 0, e = Intrinsics.size(); i != e; ++i)
if (Intrinsics[i].TheDef == R) return i;
- for (unsigned i = 0, e = TgtIntrinsics.size(); i != e; ++i)
- if (TgtIntrinsics[i].TheDef == R) return i + Intrinsics.size();
llvm_unreachable("Unknown intrinsic!");
}
@@ -1284,8 +1272,6 @@ public:
return intrinsic_wo_chain_sdnode;
}
- bool hasTargetIntrinsics() { return !TgtIntrinsics.empty(); }
-
unsigned allocateScope() { return ++NumScopes; }
bool operandHasDefault(Record *Op) const {
diff --git a/contrib/llvm-project/llvm/utils/TableGen/CodeGenInstruction.cpp b/contrib/llvm-project/llvm/utils/TableGen/CodeGenInstruction.cpp
index fde946d06589..6bb4dbb511b6 100644
--- a/contrib/llvm-project/llvm/utils/TableGen/CodeGenInstruction.cpp
+++ b/contrib/llvm-project/llvm/utils/TableGen/CodeGenInstruction.cpp
@@ -396,6 +396,7 @@ CodeGenInstruction::CodeGenInstruction(Record *R)
hasNoSchedulingInfo = R->getValueAsBit("hasNoSchedulingInfo");
FastISelShouldIgnore = R->getValueAsBit("FastISelShouldIgnore");
variadicOpsAreDefs = R->getValueAsBit("variadicOpsAreDefs");
+ isAuthenticated = R->getValueAsBit("isAuthenticated");
bool Unset;
mayLoad = R->getValueAsBitOrUnset("mayLoad", Unset);
@@ -504,16 +505,18 @@ FlattenAsmStringVariants(StringRef Cur, unsigned Variant) {
return Res;
}
-bool CodeGenInstruction::isOperandAPointer(unsigned i) const {
- if (DagInit *ConstraintList = TheDef->getValueAsDag("InOperandList")) {
- if (i < ConstraintList->getNumArgs()) {
- if (DefInit *Constraint = dyn_cast<DefInit>(ConstraintList->getArg(i))) {
- return Constraint->getDef()->isSubClassOf("TypedOperand") &&
- Constraint->getDef()->getValueAsBit("IsPointer");
- }
- }
- }
- return false;
+bool CodeGenInstruction::isOperandImpl(unsigned i,
+ StringRef PropertyName) const {
+ DagInit *ConstraintList = TheDef->getValueAsDag("InOperandList");
+ if (!ConstraintList || i >= ConstraintList->getNumArgs())
+ return false;
+
+ DefInit *Constraint = dyn_cast<DefInit>(ConstraintList->getArg(i));
+ if (!Constraint)
+ return false;
+
+ return Constraint->getDef()->isSubClassOf("TypedOperand") &&
+ Constraint->getDef()->getValueAsBit(PropertyName);
}
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm-project/llvm/utils/TableGen/CodeGenInstruction.h b/contrib/llvm-project/llvm/utils/TableGen/CodeGenInstruction.h
index 2cb28425df7a..1f08ce481a89 100644
--- a/contrib/llvm-project/llvm/utils/TableGen/CodeGenInstruction.h
+++ b/contrib/llvm-project/llvm/utils/TableGen/CodeGenInstruction.h
@@ -29,10 +29,11 @@ template <typename T> class ArrayRef;
class CGIOperandList {
public:
class ConstraintInfo {
- enum { None, EarlyClobber, Tied } Kind;
- unsigned OtherTiedOperand;
+ enum { None, EarlyClobber, Tied } Kind = None;
+ unsigned OtherTiedOperand = 0;
+
public:
- ConstraintInfo() : Kind(None) {}
+ ConstraintInfo() = default;
static ConstraintInfo getEarlyClobber() {
ConstraintInfo I;
@@ -277,6 +278,7 @@ template <typename T> class ArrayRef;
bool hasChain : 1;
bool hasChain_Inferred : 1;
bool variadicOpsAreDefs : 1;
+ bool isAuthenticated : 1;
std::string DeprecatedReason;
bool HasComplexDeprecationPredicate;
@@ -308,7 +310,17 @@ template <typename T> class ArrayRef;
// This can be used on intructions that use typeN or ptypeN to identify
// operands that should be considered as pointers even though SelectionDAG
// didn't make a distinction between integer and pointers.
- bool isOperandAPointer(unsigned i) const;
+ bool isOperandAPointer(unsigned i) const {
+ return isOperandImpl(i, "IsPointer");
+ }
+
+ /// Check if the operand is required to be an immediate.
+ bool isOperandImmArg(unsigned i) const {
+ return isOperandImpl(i, "IsImmediate");
+ }
+
+ private:
+ bool isOperandImpl(unsigned i, StringRef PropertyName) const;
};
@@ -332,9 +344,9 @@ template <typename T> class ArrayRef;
struct ResultOperand {
private:
std::string Name;
- Record *R;
+ Record *R = nullptr;
+ int64_t Imm = 0;
- int64_t Imm;
public:
enum {
K_Record,
diff --git a/contrib/llvm-project/llvm/utils/TableGen/CodeGenIntrinsics.h b/contrib/llvm-project/llvm/utils/TableGen/CodeGenIntrinsics.h
index 83e780671b43..723bbe0cc23d 100644
--- a/contrib/llvm-project/llvm/utils/TableGen/CodeGenIntrinsics.h
+++ b/contrib/llvm-project/llvm/utils/TableGen/CodeGenIntrinsics.h
@@ -162,6 +162,8 @@ struct CodeGenIntrinsic {
/// Note that this requires that \p IS.ParamVTs is available.
bool isParamAPointer(unsigned ParamIdx) const;
+ bool isParamImmArg(unsigned ParamIdx) const;
+
CodeGenIntrinsic(Record *R);
};
@@ -176,7 +178,7 @@ public:
};
std::vector<TargetSet> Targets;
- explicit CodeGenIntrinsicTable(const RecordKeeper &RC, bool TargetOnly);
+ explicit CodeGenIntrinsicTable(const RecordKeeper &RC);
CodeGenIntrinsicTable() = default;
bool empty() const { return Intrinsics.empty(); }
diff --git a/contrib/llvm-project/llvm/utils/TableGen/CodeGenRegisters.h b/contrib/llvm-project/llvm/utils/TableGen/CodeGenRegisters.h
index 6d933baec2ae..a8e9e0fbccbe 100644
--- a/contrib/llvm-project/llvm/utils/TableGen/CodeGenRegisters.h
+++ b/contrib/llvm-project/llvm/utils/TableGen/CodeGenRegisters.h
@@ -635,9 +635,11 @@ namespace llvm {
CodeGenSubRegIndex *
getConcatSubRegIndex(const SmallVector<CodeGenSubRegIndex *, 8>&);
- const std::deque<CodeGenRegister> &getRegisters() { return Registers; }
+ const std::deque<CodeGenRegister> &getRegisters() const {
+ return Registers;
+ }
- const StringMap<CodeGenRegister*> &getRegistersByName() {
+ const StringMap<CodeGenRegister *> &getRegistersByName() const {
return RegistersByName;
}
@@ -686,7 +688,7 @@ namespace llvm {
// Native units are the singular unit of a leaf register. Register aliasing
// is completely characterized by native units. Adopted units exist to give
// register additional weight but don't affect aliasing.
- bool isNativeUnit(unsigned RUID) {
+ bool isNativeUnit(unsigned RUID) const {
return RUID < NumNativeRegUnits;
}
diff --git a/contrib/llvm-project/llvm/utils/TableGen/CodeGenTarget.cpp b/contrib/llvm-project/llvm/utils/TableGen/CodeGenTarget.cpp
index fa8b842c97f9..acfb143120af 100644
--- a/contrib/llvm-project/llvm/utils/TableGen/CodeGenTarget.cpp
+++ b/contrib/llvm-project/llvm/utils/TableGen/CodeGenTarget.cpp
@@ -260,7 +260,7 @@ Record *CodeGenTarget::getAsmParser() const {
return LI[AsmParserNum];
}
-/// getAsmParserVariant - Return the AssmblyParserVariant definition for
+/// getAsmParserVariant - Return the AssemblyParserVariant definition for
/// this target.
///
Record *CodeGenTarget::getAsmParserVariant(unsigned i) const {
@@ -272,7 +272,7 @@ Record *CodeGenTarget::getAsmParserVariant(unsigned i) const {
return LI[i];
}
-/// getAsmParserVariantCount - Return the AssmblyParserVariant definition
+/// getAsmParserVariantCount - Return the AssemblyParserVariant definition
/// available for this target.
///
unsigned CodeGenTarget::getAsmParserVariantCount() const {
@@ -574,17 +574,14 @@ ComplexPattern::ComplexPattern(Record *R) {
// CodeGenIntrinsic Implementation
//===----------------------------------------------------------------------===//
-CodeGenIntrinsicTable::CodeGenIntrinsicTable(const RecordKeeper &RC,
- bool TargetOnly) {
+CodeGenIntrinsicTable::CodeGenIntrinsicTable(const RecordKeeper &RC) {
std::vector<Record*> Defs = RC.getAllDerivedDefinitions("Intrinsic");
Intrinsics.reserve(Defs.size());
- for (unsigned I = 0, e = Defs.size(); I != e; ++I) {
- bool isTarget = Defs[I]->getValueAsBit("isTarget");
- if (isTarget == TargetOnly)
- Intrinsics.push_back(CodeGenIntrinsic(Defs[I]));
- }
+ for (unsigned I = 0, e = Defs.size(); I != e; ++I)
+ Intrinsics.push_back(CodeGenIntrinsic(Defs[I]));
+
llvm::sort(Intrinsics,
[](const CodeGenIntrinsic &LHS, const CodeGenIntrinsic &RHS) {
return std::tie(LHS.TargetPrefix, LHS.Name) <
@@ -820,3 +817,9 @@ bool CodeGenIntrinsic::isParamAPointer(unsigned ParamIdx) const {
MVT ParamType = MVT(IS.ParamVTs[ParamIdx]);
return ParamType == MVT::iPTR || ParamType == MVT::iPTRAny;
}
+
+bool CodeGenIntrinsic::isParamImmArg(unsigned ParamIdx) const {
+ std::pair<unsigned, ArgAttribute> Val = {ParamIdx, ImmArg};
+ return std::binary_search(ArgumentAttributes.begin(),
+ ArgumentAttributes.end(), Val);
+}
diff --git a/contrib/llvm-project/llvm/utils/TableGen/CodeGenTarget.h b/contrib/llvm-project/llvm/utils/TableGen/CodeGenTarget.h
index d52ffac4ce6c..6c89f34c50ec 100644
--- a/contrib/llvm-project/llvm/utils/TableGen/CodeGenTarget.h
+++ b/contrib/llvm-project/llvm/utils/TableGen/CodeGenTarget.h
@@ -86,12 +86,12 @@ public:
///
Record *getAsmParser() const;
- /// getAsmParserVariant - Return the AssmblyParserVariant definition for
+ /// getAsmParserVariant - Return the AssemblyParserVariant definition for
/// this target.
///
Record *getAsmParserVariant(unsigned i) const;
- /// getAsmParserVariantCount - Return the AssmblyParserVariant definition
+ /// getAsmParserVariantCount - Return the AssemblyParserVariant definition
/// available for this target.
///
unsigned getAsmParserVariantCount() const;
diff --git a/contrib/llvm-project/llvm/utils/TableGen/DAGISelMatcherGen.cpp b/contrib/llvm-project/llvm/utils/TableGen/DAGISelMatcherGen.cpp
index 49c09c7d195e..6a86868a9bcd 100644
--- a/contrib/llvm-project/llvm/utils/TableGen/DAGISelMatcherGen.cpp
+++ b/contrib/llvm-project/llvm/utils/TableGen/DAGISelMatcherGen.cpp
@@ -1047,7 +1047,6 @@ void MatcherGen::EmitResultCode() {
}
}
- assert(Ops.size() >= NumSrcResults && "Didn't provide enough results");
SmallVector<unsigned, 8> Results(Ops);
// Apply result permutation.
diff --git a/contrib/llvm-project/llvm/utils/TableGen/DFAPacketizerEmitter.cpp b/contrib/llvm-project/llvm/utils/TableGen/DFAPacketizerEmitter.cpp
index ccb4ef1b9678..018bda1b6090 100644
--- a/contrib/llvm-project/llvm/utils/TableGen/DFAPacketizerEmitter.cpp
+++ b/contrib/llvm-project/llvm/utils/TableGen/DFAPacketizerEmitter.cpp
@@ -16,15 +16,16 @@
#define DEBUG_TYPE "dfa-emitter"
+#include "CodeGenSchedule.h"
#include "CodeGenTarget.h"
#include "DFAEmitter.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
-#include "llvm/TableGen/Record.h"
-#include "llvm/TableGen/TableGenBackend.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/TableGen/Record.h"
+#include "llvm/TableGen/TableGenBackend.h"
#include <cassert>
#include <cstdint>
#include <map>
@@ -35,216 +36,101 @@
using namespace llvm;
-// --------------------------------------------------------------------
-// Definitions shared between DFAPacketizer.cpp and DFAPacketizerEmitter.cpp
-
-// DFA_MAX_RESTERMS * DFA_MAX_RESOURCES must fit within sizeof DFAInput.
-// This is verified in DFAPacketizer.cpp:DFAPacketizer::DFAPacketizer.
-//
-// e.g. terms x resource bit combinations that fit in uint32_t:
-// 4 terms x 8 bits = 32 bits
-// 3 terms x 10 bits = 30 bits
-// 2 terms x 16 bits = 32 bits
-//
-// e.g. terms x resource bit combinations that fit in uint64_t:
-// 8 terms x 8 bits = 64 bits
-// 7 terms x 9 bits = 63 bits
-// 6 terms x 10 bits = 60 bits
-// 5 terms x 12 bits = 60 bits
-// 4 terms x 16 bits = 64 bits <--- current
-// 3 terms x 21 bits = 63 bits
-// 2 terms x 32 bits = 64 bits
-//
-#define DFA_MAX_RESTERMS 4 // The max # of AND'ed resource terms.
-#define DFA_MAX_RESOURCES 16 // The max # of resource bits in one term.
-
-typedef uint64_t DFAInput;
-typedef int64_t DFAStateInput;
-#define DFA_TBLTYPE "int64_t" // For generating DFAStateInputTable.
+// We use a uint64_t to represent a resource bitmask.
+#define DFA_MAX_RESOURCES 64
namespace {
+using ResourceVector = SmallVector<uint64_t, 4>;
- DFAInput addDFAFuncUnits(DFAInput Inp, unsigned FuncUnits) {
- return (Inp << DFA_MAX_RESOURCES) | FuncUnits;
- }
-
- /// Return the DFAInput for an instruction class input vector.
- /// This function is used in both DFAPacketizer.cpp and in
- /// DFAPacketizerEmitter.cpp.
- DFAInput getDFAInsnInput(const std::vector<unsigned> &InsnClass) {
- DFAInput InsnInput = 0;
- assert((InsnClass.size() <= DFA_MAX_RESTERMS) &&
- "Exceeded maximum number of DFA terms");
- for (auto U : InsnClass)
- InsnInput = addDFAFuncUnits(InsnInput, U);
- return InsnInput;
- }
-
-} // end anonymous namespace
+struct ScheduleClass {
+ /// The parent itinerary index (processor model ID).
+ unsigned ItineraryID;
-// --------------------------------------------------------------------
+ /// Index within this itinerary of the schedule class.
+ unsigned Idx;
-#ifndef NDEBUG
-// To enable debugging, run llvm-tblgen with: "-debug-only dfa-emitter".
-//
-// dbgsInsnClass - When debugging, print instruction class stages.
-//
-void dbgsInsnClass(const std::vector<unsigned> &InsnClass);
-//
-// dbgsStateInfo - When debugging, print the set of state info.
-//
-void dbgsStateInfo(const std::set<unsigned> &stateInfo);
-//
-// dbgsIndent - When debugging, indent by the specified amount.
-//
-void dbgsIndent(unsigned indent);
-#endif
+ /// The index within the uniqued set of required resources of Resources.
+ unsigned ResourcesIdx;
-//
-// class DFAPacketizerEmitter: class that generates and prints out the DFA
-// for resource tracking.
-//
-namespace {
+ /// Conjunctive list of resource requirements:
+ /// {a|b, b|c} => (a OR b) AND (b or c).
+ /// Resources are unique across all itineraries.
+ ResourceVector Resources;
+};
+// Generates and prints out the DFA for resource tracking.
class DFAPacketizerEmitter {
private:
std::string TargetName;
- //
- // allInsnClasses is the set of all possible resources consumed by an
- // InstrStage.
- //
- std::vector<std::vector<unsigned>> allInsnClasses;
RecordKeeper &Records;
+ UniqueVector<ResourceVector> UniqueResources;
+ std::vector<ScheduleClass> ScheduleClasses;
+ std::map<std::string, uint64_t> FUNameToBitsMap;
+ std::map<unsigned, uint64_t> ComboBitToBitsMap;
+
public:
DFAPacketizerEmitter(RecordKeeper &R);
- //
- // collectAllFuncUnits - Construct a map of function unit names to bits.
- //
- int collectAllFuncUnits(std::vector<Record*> &ProcItinList,
- std::map<std::string, unsigned> &FUNameToBitsMap,
- int &maxResources,
- raw_ostream &OS);
-
- //
- // collectAllComboFuncs - Construct a map from a combo function unit bit to
- // the bits of all included functional units.
- //
- int collectAllComboFuncs(std::vector<Record*> &ComboFuncList,
- std::map<std::string, unsigned> &FUNameToBitsMap,
- std::map<unsigned, unsigned> &ComboBitToBitsMap,
- raw_ostream &OS);
-
- //
- // collectOneInsnClass - Populate allInsnClasses with one instruction class.
- //
- int collectOneInsnClass(const std::string &ProcName,
- std::vector<Record*> &ProcItinList,
- std::map<std::string, unsigned> &FUNameToBitsMap,
- Record *ItinData,
- raw_ostream &OS);
-
- //
- // collectAllInsnClasses - Populate allInsnClasses which is a set of units
- // used in each stage.
- //
- int collectAllInsnClasses(const std::string &ProcName,
- std::vector<Record*> &ProcItinList,
- std::map<std::string, unsigned> &FUNameToBitsMap,
- std::vector<Record*> &ItinDataList,
- int &maxStages,
- raw_ostream &OS);
+ // Construct a map of function unit names to bits.
+ int collectAllFuncUnits(
+ ArrayRef<const CodeGenProcModel *> ProcModels);
+
+ // Construct a map from a combo function unit bit to the bits of all included
+ // functional units.
+ int collectAllComboFuncs(ArrayRef<Record *> ComboFuncList);
+
+ ResourceVector getResourcesForItinerary(Record *Itinerary);
+ void createScheduleClasses(unsigned ItineraryIdx, const RecVec &Itineraries);
// Emit code for a subset of itineraries.
void emitForItineraries(raw_ostream &OS,
- std::vector<Record *> &ProcItinList,
+ std::vector<const CodeGenProcModel *> &ProcItinList,
std::string DFAName);
void run(raw_ostream &OS);
};
} // end anonymous namespace
-#ifndef NDEBUG
-// To enable debugging, run llvm-tblgen with: "-debug-only dfa-emitter".
-//
-// dbgsInsnClass - When debugging, print instruction class stages.
-//
-void dbgsInsnClass(const std::vector<unsigned> &InsnClass) {
- LLVM_DEBUG(dbgs() << "InsnClass: ");
- for (unsigned i = 0; i < InsnClass.size(); ++i) {
- if (i > 0) {
- LLVM_DEBUG(dbgs() << ", ");
- }
- LLVM_DEBUG(dbgs() << "0x" << Twine::utohexstr(InsnClass[i]));
- }
- DFAInput InsnInput = getDFAInsnInput(InsnClass);
- LLVM_DEBUG(dbgs() << " (input: 0x" << Twine::utohexstr(InsnInput) << ")");
-}
+DFAPacketizerEmitter::DFAPacketizerEmitter(RecordKeeper &R)
+ : TargetName(CodeGenTarget(R).getName()), Records(R) {}
-//
-// dbgsIndent - When debugging, indent by the specified amount.
-//
-void dbgsIndent(unsigned indent) {
- for (unsigned i = 0; i < indent; ++i) {
- LLVM_DEBUG(dbgs() << " ");
- }
-}
-#endif // NDEBUG
-
-DFAPacketizerEmitter::DFAPacketizerEmitter(RecordKeeper &R):
- TargetName(CodeGenTarget(R).getName()), Records(R) {}
-
-//
-// collectAllFuncUnits - Construct a map of function unit names to bits.
-//
int DFAPacketizerEmitter::collectAllFuncUnits(
- std::vector<Record*> &ProcItinList,
- std::map<std::string, unsigned> &FUNameToBitsMap,
- int &maxFUs,
- raw_ostream &OS) {
+ ArrayRef<const CodeGenProcModel *> ProcModels) {
LLVM_DEBUG(dbgs() << "-------------------------------------------------------"
"----------------------\n");
LLVM_DEBUG(dbgs() << "collectAllFuncUnits");
- LLVM_DEBUG(dbgs() << " (" << ProcItinList.size() << " itineraries)\n");
+ LLVM_DEBUG(dbgs() << " (" << ProcModels.size() << " itineraries)\n");
+
+ std::set<Record *> ProcItinList;
+ for (const CodeGenProcModel *Model : ProcModels)
+ ProcItinList.insert(Model->ItinsDef);
int totalFUs = 0;
// Parse functional units for all the itineraries.
- for (unsigned i = 0, N = ProcItinList.size(); i < N; ++i) {
- Record *Proc = ProcItinList[i];
- std::vector<Record*> FUs = Proc->getValueAsListOfDefs("FU");
+ for (Record *Proc : ProcItinList) {
+ std::vector<Record *> FUs = Proc->getValueAsListOfDefs("FU");
- LLVM_DEBUG(dbgs() << " FU:" << i << " (" << FUs.size() << " FUs) "
- << Proc->getName());
+ LLVM_DEBUG(dbgs() << " FU:"
+ << " (" << FUs.size() << " FUs) " << Proc->getName());
// Convert macros to bits for each stage.
unsigned numFUs = FUs.size();
for (unsigned j = 0; j < numFUs; ++j) {
- assert ((j < DFA_MAX_RESOURCES) &&
- "Exceeded maximum number of representable resources");
- unsigned FuncResources = (unsigned) (1U << j);
+ assert((j < DFA_MAX_RESOURCES) &&
+ "Exceeded maximum number of representable resources");
+ uint64_t FuncResources = 1ULL << j;
FUNameToBitsMap[FUs[j]->getName()] = FuncResources;
LLVM_DEBUG(dbgs() << " " << FUs[j]->getName() << ":0x"
<< Twine::utohexstr(FuncResources));
}
- if (((int) numFUs) > maxFUs) {
- maxFUs = numFUs;
- }
totalFUs += numFUs;
LLVM_DEBUG(dbgs() << "\n");
}
return totalFUs;
}
-//
-// collectAllComboFuncs - Construct a map from a combo function unit bit to
-// the bits of all included functional units.
-//
-int DFAPacketizerEmitter::collectAllComboFuncs(
- std::vector<Record*> &ComboFuncList,
- std::map<std::string, unsigned> &FUNameToBitsMap,
- std::map<unsigned, unsigned> &ComboBitToBitsMap,
- raw_ostream &OS) {
+int DFAPacketizerEmitter::collectAllComboFuncs(ArrayRef<Record *> ComboFuncList) {
LLVM_DEBUG(dbgs() << "-------------------------------------------------------"
"----------------------\n");
LLVM_DEBUG(dbgs() << "collectAllComboFuncs");
@@ -253,27 +139,27 @@ int DFAPacketizerEmitter::collectAllComboFuncs(
int numCombos = 0;
for (unsigned i = 0, N = ComboFuncList.size(); i < N; ++i) {
Record *Func = ComboFuncList[i];
- std::vector<Record*> FUs = Func->getValueAsListOfDefs("CFD");
+ std::vector<Record *> FUs = Func->getValueAsListOfDefs("CFD");
LLVM_DEBUG(dbgs() << " CFD:" << i << " (" << FUs.size() << " combo FUs) "
<< Func->getName() << "\n");
// Convert macros to bits for each stage.
for (unsigned j = 0, N = FUs.size(); j < N; ++j) {
- assert ((j < DFA_MAX_RESOURCES) &&
- "Exceeded maximum number of DFA resources");
+ assert((j < DFA_MAX_RESOURCES) &&
+ "Exceeded maximum number of DFA resources");
Record *FuncData = FUs[j];
Record *ComboFunc = FuncData->getValueAsDef("TheComboFunc");
- const std::vector<Record*> &FuncList =
- FuncData->getValueAsListOfDefs("FuncList");
+ const std::vector<Record *> &FuncList =
+ FuncData->getValueAsListOfDefs("FuncList");
const std::string &ComboFuncName = ComboFunc->getName();
- unsigned ComboBit = FUNameToBitsMap[ComboFuncName];
- unsigned ComboResources = ComboBit;
+ uint64_t ComboBit = FUNameToBitsMap[ComboFuncName];
+ uint64_t ComboResources = ComboBit;
LLVM_DEBUG(dbgs() << " combo: " << ComboFuncName << ":0x"
<< Twine::utohexstr(ComboResources) << "\n");
for (unsigned k = 0, M = FuncList.size(); k < M; ++k) {
std::string FuncName = FuncList[k]->getName();
- unsigned FuncResources = FUNameToBitsMap[FuncName];
+ uint64_t FuncResources = FUNameToBitsMap[FuncName];
LLVM_DEBUG(dbgs() << " " << FuncName << ":0x"
<< Twine::utohexstr(FuncResources) << "\n");
ComboResources |= FuncResources;
@@ -288,101 +174,33 @@ int DFAPacketizerEmitter::collectAllComboFuncs(
return numCombos;
}
-//
-// collectOneInsnClass - Populate allInsnClasses with one instruction class
-//
-int DFAPacketizerEmitter::collectOneInsnClass(const std::string &ProcName,
- std::vector<Record*> &ProcItinList,
- std::map<std::string, unsigned> &FUNameToBitsMap,
- Record *ItinData,
- raw_ostream &OS) {
- const std::vector<Record*> &StageList =
- ItinData->getValueAsListOfDefs("Stages");
-
- // The number of stages.
- unsigned NStages = StageList.size();
-
- LLVM_DEBUG(dbgs() << " " << ItinData->getValueAsDef("TheClass")->getName()
- << "\n");
-
- std::vector<unsigned> UnitBits;
-
- // Compute the bitwise or of each unit used in this stage.
- for (unsigned i = 0; i < NStages; ++i) {
- const Record *Stage = StageList[i];
-
- // Get unit list.
- const std::vector<Record*> &UnitList =
- Stage->getValueAsListOfDefs("Units");
-
- LLVM_DEBUG(dbgs() << " stage:" << i << " [" << UnitList.size()
- << " units]:");
- unsigned dbglen = 26; // cursor after stage dbgs
-
- // Compute the bitwise or of each unit used in this stage.
- unsigned UnitBitValue = 0;
- for (unsigned j = 0, M = UnitList.size(); j < M; ++j) {
- // Conduct bitwise or.
- std::string UnitName = UnitList[j]->getName();
- LLVM_DEBUG(dbgs() << " " << j << ":" << UnitName);
- dbglen += 3 + UnitName.length();
- assert(FUNameToBitsMap.count(UnitName));
- UnitBitValue |= FUNameToBitsMap[UnitName];
- }
-
- if (UnitBitValue != 0)
- UnitBits.push_back(UnitBitValue);
-
- while (dbglen <= 64) { // line up bits dbgs
- dbglen += 8;
- LLVM_DEBUG(dbgs() << "\t");
+ResourceVector
+DFAPacketizerEmitter::getResourcesForItinerary(Record *Itinerary) {
+ ResourceVector Resources;
+ assert(Itinerary);
+ for (Record *StageDef : Itinerary->getValueAsListOfDefs("Stages")) {
+ uint64_t StageResources = 0;
+ for (Record *Unit : StageDef->getValueAsListOfDefs("Units")) {
+ StageResources |= FUNameToBitsMap[Unit->getName()];
}
- LLVM_DEBUG(dbgs() << " (bits: 0x" << Twine::utohexstr(UnitBitValue)
- << ")\n");
+ if (StageResources != 0)
+ Resources.push_back(StageResources);
}
-
- if (!UnitBits.empty())
- allInsnClasses.push_back(UnitBits);
-
- LLVM_DEBUG({
- dbgs() << " ";
- dbgsInsnClass(UnitBits);
- dbgs() << "\n";
- });
-
- return NStages;
+ return Resources;
}
-//
-// collectAllInsnClasses - Populate allInsnClasses which is a set of units
-// used in each stage.
-//
-int DFAPacketizerEmitter::collectAllInsnClasses(const std::string &ProcName,
- std::vector<Record*> &ProcItinList,
- std::map<std::string, unsigned> &FUNameToBitsMap,
- std::vector<Record*> &ItinDataList,
- int &maxStages,
- raw_ostream &OS) {
- // Collect all instruction classes.
- unsigned M = ItinDataList.size();
-
- int numInsnClasses = 0;
- LLVM_DEBUG(dbgs() << "-------------------------------------------------------"
- "----------------------\n"
- << "collectAllInsnClasses " << ProcName << " (" << M
- << " classes)\n");
-
- // Collect stages for each instruction class for all itinerary data
- for (unsigned j = 0; j < M; j++) {
- Record *ItinData = ItinDataList[j];
- int NStages = collectOneInsnClass(ProcName, ProcItinList,
- FUNameToBitsMap, ItinData, OS);
- if (NStages > maxStages) {
- maxStages = NStages;
+void DFAPacketizerEmitter::createScheduleClasses(unsigned ItineraryIdx,
+ const RecVec &Itineraries) {
+ unsigned Idx = 0;
+ for (Record *Itinerary : Itineraries) {
+ if (!Itinerary) {
+ ScheduleClasses.push_back({ItineraryIdx, Idx++, 0, ResourceVector{}});
+ continue;
}
- numInsnClasses++;
+ ResourceVector Resources = getResourcesForItinerary(Itinerary);
+ ScheduleClasses.push_back(
+ {ItineraryIdx, Idx++, UniqueResources.insert(Resources), Resources});
}
- return numInsnClasses;
}
//
@@ -393,19 +211,17 @@ void DFAPacketizerEmitter::run(raw_ostream &OS) {
<< "#include \"llvm/CodeGen/DFAPacketizer.h\"\n";
OS << "namespace llvm {\n";
- OS << "\n// Input format:\n";
- OS << "#define DFA_MAX_RESTERMS " << DFA_MAX_RESTERMS
- << "\t// maximum AND'ed resource terms\n";
- OS << "#define DFA_MAX_RESOURCES " << DFA_MAX_RESOURCES
- << "\t// maximum resource bits in one term\n";
-
- // Collect processor iteraries.
- std::vector<Record*> ProcItinList =
- Records.getAllDerivedDefinitions("ProcessorItineraries");
+ CodeGenTarget CGT(Records);
+ CodeGenSchedModels CGS(Records, CGT);
- std::unordered_map<std::string, std::vector<Record*>> ItinsByNamespace;
- for (Record *R : ProcItinList)
- ItinsByNamespace[R->getValueAsString("PacketizerNamespace")].push_back(R);
+ std::unordered_map<std::string, std::vector<const CodeGenProcModel *>>
+ ItinsByNamespace;
+ for (const CodeGenProcModel &ProcModel : CGS.procModels()) {
+ if (ProcModel.hasItineraries()) {
+ auto NS = ProcModel.ItinsDef->getValueAsString("PacketizerNamespace");
+ ItinsByNamespace[NS].push_back(&ProcModel);
+ }
+ }
for (auto &KV : ItinsByNamespace)
emitForItineraries(OS, KV.second, KV.first);
@@ -413,80 +229,68 @@ void DFAPacketizerEmitter::run(raw_ostream &OS) {
}
void DFAPacketizerEmitter::emitForItineraries(
- raw_ostream &OS, std::vector<Record *> &ProcItinList,
+ raw_ostream &OS, std::vector<const CodeGenProcModel *> &ProcModels,
std::string DFAName) {
- //
- // Collect the Functional units.
- //
- std::map<std::string, unsigned> FUNameToBitsMap;
- int maxResources = 0;
- collectAllFuncUnits(ProcItinList,
- FUNameToBitsMap, maxResources, OS);
-
- //
- // Collect the Combo Functional units.
- //
- std::map<unsigned, unsigned> ComboBitToBitsMap;
- std::vector<Record*> ComboFuncList =
- Records.getAllDerivedDefinitions("ComboFuncUnits");
- collectAllComboFuncs(ComboFuncList, FUNameToBitsMap, ComboBitToBitsMap, OS);
-
- //
- // Collect the itineraries.
- //
- int maxStages = 0;
- int numInsnClasses = 0;
- for (unsigned i = 0, N = ProcItinList.size(); i < N; i++) {
- Record *Proc = ProcItinList[i];
-
- // Get processor itinerary name.
- const std::string &ProcName = Proc->getName();
-
- // Skip default.
- if (ProcName == "NoItineraries")
- continue;
-
- // Sanity check for at least one instruction itinerary class.
- unsigned NItinClasses =
- Records.getAllDerivedDefinitions("InstrItinClass").size();
- if (NItinClasses == 0)
- return;
+ OS << "} // end namespace llvm\n\n";
+ OS << "namespace {\n";
+ collectAllFuncUnits(ProcModels);
+ collectAllComboFuncs(Records.getAllDerivedDefinitions("ComboFuncUnits"));
- // Get itinerary data list.
- std::vector<Record*> ItinDataList = Proc->getValueAsListOfDefs("IID");
+ // Collect the itineraries.
+ DenseMap<const CodeGenProcModel *, unsigned> ProcModelStartIdx;
+ for (const CodeGenProcModel *Model : ProcModels) {
+ assert(Model->hasItineraries());
+ ProcModelStartIdx[Model] = ScheduleClasses.size();
+ createScheduleClasses(Model->Index, Model->ItinDefList);
+ }
- // Collect all instruction classes
- numInsnClasses += collectAllInsnClasses(ProcName, ProcItinList,
- FUNameToBitsMap, ItinDataList, maxStages, OS);
+ // Output the mapping from ScheduleClass to ResourcesIdx.
+ unsigned Idx = 0;
+ OS << "unsigned " << TargetName << DFAName << "ResourceIndices[] = {";
+ for (const ScheduleClass &SC : ScheduleClasses) {
+ if (Idx++ % 32 == 0)
+ OS << "\n ";
+ OS << SC.ResourcesIdx << ", ";
+ }
+ OS << "\n};\n\n";
+
+ // And the mapping from Itinerary index into the previous table.
+ OS << "unsigned " << TargetName << DFAName
+ << "ProcResourceIndexStart[] = {\n";
+ OS << " 0, // NoSchedModel\n";
+ for (const CodeGenProcModel *Model : ProcModels) {
+ OS << " " << ProcModelStartIdx[Model] << ", // " << Model->ModelName
+ << "\n";
}
+ OS << ScheduleClasses.size() << "\n};\n\n";
// The type of a state in the nondeterministic automaton we're defining.
- using NfaStateTy = unsigned;
+ using NfaStateTy = uint64_t;
// Given a resource state, return all resource states by applying
// InsnClass.
- auto applyInsnClass = [&](ArrayRef<unsigned> InsnClass,
- NfaStateTy State) -> std::deque<unsigned> {
- std::deque<unsigned> V(1, State);
+ auto applyInsnClass = [&](const ResourceVector &InsnClass,
+ NfaStateTy State) -> std::deque<NfaStateTy> {
+ std::deque<NfaStateTy> V(1, State);
// Apply every stage in the class individually.
- for (unsigned Stage : InsnClass) {
+ for (NfaStateTy Stage : InsnClass) {
// Apply this stage to every existing member of V in turn.
size_t Sz = V.size();
for (unsigned I = 0; I < Sz; ++I) {
- unsigned S = V.front();
+ NfaStateTy S = V.front();
V.pop_front();
// For this stage, state combination, try all possible resources.
for (unsigned J = 0; J < DFA_MAX_RESOURCES; ++J) {
- unsigned ResourceMask = 1U << J;
+ NfaStateTy ResourceMask = 1ULL << J;
if ((ResourceMask & Stage) == 0)
// This resource isn't required by this stage.
continue;
- unsigned Combo = ComboBitToBitsMap[ResourceMask];
+ NfaStateTy Combo = ComboBitToBitsMap[ResourceMask];
if (Combo && ((~S & Combo) != Combo))
// This combo units bits are not available.
continue;
- unsigned ResultingResourceState = S | ResourceMask | Combo;
+ NfaStateTy ResultingResourceState = S | ResourceMask | Combo;
if (ResultingResourceState == S)
continue;
V.push_back(ResultingResourceState);
@@ -499,9 +303,9 @@ void DFAPacketizerEmitter::emitForItineraries(
// Given a resource state, return a quick (conservative) guess as to whether
// InsnClass can be applied. This is a filter for the more heavyweight
// applyInsnClass.
- auto canApplyInsnClass = [](ArrayRef<unsigned> InsnClass,
+ auto canApplyInsnClass = [](const ResourceVector &InsnClass,
NfaStateTy State) -> bool {
- for (unsigned Resources : InsnClass) {
+ for (NfaStateTy Resources : InsnClass) {
if ((State | Resources) == State)
return false;
}
@@ -515,20 +319,18 @@ void DFAPacketizerEmitter::emitForItineraries(
while (!Worklist.empty()) {
NfaStateTy State = Worklist.front();
Worklist.pop_front();
- for (unsigned i = 0; i < allInsnClasses.size(); i++) {
- const std::vector<unsigned> &InsnClass = allInsnClasses[i];
- if (!canApplyInsnClass(InsnClass, State))
+ for (const ResourceVector &Resources : UniqueResources) {
+ if (!canApplyInsnClass(Resources, State))
continue;
- for (unsigned NewState : applyInsnClass(InsnClass, State)) {
+ unsigned ResourcesID = UniqueResources.idFor(Resources);
+ for (uint64_t NewState : applyInsnClass(Resources, State)) {
if (SeenStates.emplace(NewState).second)
Worklist.emplace_back(NewState);
- Emitter.addTransition(State, NewState, getDFAInsnInput(InsnClass));
+ Emitter.addTransition(State, NewState, ResourcesID);
}
}
}
- OS << "} // end namespace llvm\n\n";
- OS << "namespace {\n";
std::string TargetAndDFAName = TargetName + DFAName;
Emitter.emit(TargetAndDFAName, OS);
OS << "} // end anonymous namespace\n\n";
@@ -541,7 +343,13 @@ void DFAPacketizerEmitter::emitForItineraries(
<< " static Automaton<uint64_t> A(ArrayRef<" << TargetAndDFAName
<< "Transition>(" << TargetAndDFAName << "Transitions), "
<< TargetAndDFAName << "TransitionInfo);\n"
- << " return new DFAPacketizer(IID, A);\n"
+ << " unsigned ProcResIdxStart = " << TargetAndDFAName
+ << "ProcResourceIndexStart[IID->SchedModel.ProcID];\n"
+ << " unsigned ProcResIdxNum = " << TargetAndDFAName
+ << "ProcResourceIndexStart[IID->SchedModel.ProcID + 1] - "
+ "ProcResIdxStart;\n"
+ << " return new DFAPacketizer(IID, A, {&" << TargetAndDFAName
+ << "ResourceIndices[ProcResIdxStart], ProcResIdxNum});\n"
<< "\n}\n\n";
}
diff --git a/contrib/llvm-project/llvm/utils/TableGen/FixedLenDecoderEmitter.cpp b/contrib/llvm-project/llvm/utils/TableGen/FixedLenDecoderEmitter.cpp
index ac69b431607d..21ec5897ea50 100644
--- a/contrib/llvm-project/llvm/utils/TableGen/FixedLenDecoderEmitter.cpp
+++ b/contrib/llvm-project/llvm/utils/TableGen/FixedLenDecoderEmitter.cpp
@@ -1058,10 +1058,9 @@ unsigned FilterChooser::getIslands(std::vector<unsigned> &StartBits,
// 1: Water (the bit value does not affect decoding)
// 2: Island (well-known bit value needed for decoding)
int State = 0;
- int64_t Val = -1;
for (unsigned i = 0; i < BitWidth; ++i) {
- Val = Value(Insn[i]);
+ int64_t Val = Value(Insn[i]);
bool Filtered = PositionFiltered(i);
switch (State) {
default: llvm_unreachable("Unreachable code!");
diff --git a/contrib/llvm-project/llvm/utils/TableGen/GICombinerEmitter.cpp b/contrib/llvm-project/llvm/utils/TableGen/GICombinerEmitter.cpp
index 5dc4d6b07740..34eb4edac8de 100644
--- a/contrib/llvm-project/llvm/utils/TableGen/GICombinerEmitter.cpp
+++ b/contrib/llvm-project/llvm/utils/TableGen/GICombinerEmitter.cpp
@@ -11,8 +11,11 @@
///
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringSet.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ScopedPrinter.h"
#include "llvm/Support/Timer.h"
#include "llvm/TableGen/Error.h"
#include "llvm/TableGen/StringMatcher.h"
@@ -20,6 +23,9 @@
#include "CodeGenTarget.h"
#include "GlobalISel/CodeExpander.h"
#include "GlobalISel/CodeExpansions.h"
+#include "GlobalISel/GIMatchDag.h"
+#include "GlobalISel/GIMatchTree.h"
+#include <cstdint>
using namespace llvm;
@@ -38,10 +44,63 @@ static cl::opt<bool> ShowExpansions(
"gicombiner-show-expansions",
cl::desc("Use C++ comments to indicate occurence of code expansion"),
cl::cat(GICombinerEmitterCat));
+static cl::opt<bool> StopAfterParse(
+ "gicombiner-stop-after-parse",
+ cl::desc("Stop processing after parsing rules and dump state"),
+ cl::cat(GICombinerEmitterCat));
+static cl::opt<bool> StopAfterBuild(
+ "gicombiner-stop-after-build",
+ cl::desc("Stop processing after building the match tree"),
+ cl::cat(GICombinerEmitterCat));
namespace {
typedef uint64_t RuleID;
+// We're going to be referencing the same small strings quite a lot for operand
+// names and the like. Make their lifetime management simple with a global
+// string table.
+StringSet<> StrTab;
+
+StringRef insertStrTab(StringRef S) {
+ if (S.empty())
+ return S;
+ return StrTab.insert(S).first->first();
+}
+
+class format_partition_name {
+ const GIMatchTree &Tree;
+ unsigned Idx;
+
+public:
+ format_partition_name(const GIMatchTree &Tree, unsigned Idx)
+ : Tree(Tree), Idx(Idx) {}
+ void print(raw_ostream &OS) const {
+ Tree.getPartitioner()->emitPartitionName(OS, Idx);
+ }
+};
+raw_ostream &operator<<(raw_ostream &OS, const format_partition_name &Fmt) {
+ Fmt.print(OS);
+ return OS;
+}
+
+/// Declares data that is passed from the match stage to the apply stage.
+class MatchDataInfo {
+ /// The symbol used in the tablegen patterns
+ StringRef PatternSymbol;
+ /// The data type for the variable
+ StringRef Type;
+ /// The name of the variable as declared in the generated matcher.
+ std::string VariableName;
+
+public:
+ MatchDataInfo(StringRef PatternSymbol, StringRef Type, StringRef VariableName)
+ : PatternSymbol(PatternSymbol), Type(Type), VariableName(VariableName) {}
+
+ StringRef getPatternSymbol() const { return PatternSymbol; };
+ StringRef getType() const { return Type; };
+ StringRef getVariableName() const { return VariableName; };
+};
+
class RootInfo {
StringRef PatternSymbol;
@@ -52,12 +111,32 @@ public:
};
class CombineRule {
+public:
+
+ using const_matchdata_iterator = std::vector<MatchDataInfo>::const_iterator;
+
+ struct VarInfo {
+ const GIMatchDagInstr *N;
+ const GIMatchDagOperand *Op;
+ const DagInit *Matcher;
+
+ public:
+ VarInfo(const GIMatchDagInstr *N, const GIMatchDagOperand *Op,
+ const DagInit *Matcher)
+ : N(N), Op(Op), Matcher(Matcher) {}
+ };
+
protected:
/// A unique ID for this rule
/// ID's are used for debugging and run-time disabling of rules among other
/// things.
RuleID ID;
+ /// A unique ID that can be used for anonymous objects belonging to this rule.
+ /// Used to create unique names in makeNameForAnon*() without making tests
+ /// overly fragile.
+ unsigned UID = 0;
+
/// The record defining this rule.
const Record &TheDef;
@@ -67,27 +146,135 @@ protected:
/// from the bottom of the function to the top.
std::vector<RootInfo> Roots;
+ GIMatchDag MatchDag;
+
/// A block of arbitrary C++ to finish testing the match.
/// FIXME: This is a temporary measure until we have actual pattern matching
const CodeInit *MatchingFixupCode = nullptr;
+
+ /// The MatchData defined by the match stage and required by the apply stage.
+ /// This allows the plumbing of arbitrary data from C++ predicates between the
+ /// stages.
+ ///
+ /// For example, suppose you have:
+ /// %A = <some-constant-expr>
+ /// %0 = G_ADD %1, %A
+ /// you could define a GIMatchPredicate that walks %A, constant folds as much
+ /// as possible and returns an APInt containing the discovered constant. You
+ /// could then declare:
+ /// def apint : GIDefMatchData<"APInt">;
+ /// add it to the rule with:
+ /// (defs root:$root, apint:$constant)
+ /// evaluate it in the pattern with a C++ function that takes a
+ /// MachineOperand& and an APInt& with:
+ /// (match [{MIR %root = G_ADD %0, %A }],
+ /// (constantfold operand:$A, apint:$constant))
+ /// and finally use it in the apply stage with:
+ /// (apply (create_operand
+ /// [{ MachineOperand::CreateImm(${constant}.getZExtValue());
+ /// ]}, apint:$constant),
+ /// [{MIR %root = FOO %0, %constant }])
+ std::vector<MatchDataInfo> MatchDataDecls;
+
+ void declareMatchData(StringRef PatternSymbol, StringRef Type,
+ StringRef VarName);
+
+ bool parseInstructionMatcher(const CodeGenTarget &Target, StringInit *ArgName,
+ const Init &Arg,
+ StringMap<std::vector<VarInfo>> &NamedEdgeDefs,
+ StringMap<std::vector<VarInfo>> &NamedEdgeUses);
+ bool parseWipMatchOpcodeMatcher(const CodeGenTarget &Target,
+ StringInit *ArgName, const Init &Arg);
+
public:
- CombineRule(const CodeGenTarget &Target, RuleID ID, const Record &R)
- : ID(ID), TheDef(R) {}
+ CombineRule(const CodeGenTarget &Target, GIMatchDagContext &Ctx, RuleID ID,
+ const Record &R)
+ : ID(ID), TheDef(R), MatchDag(Ctx) {}
+ CombineRule(const CombineRule &) = delete;
+
bool parseDefs();
bool parseMatcher(const CodeGenTarget &Target);
RuleID getID() const { return ID; }
+ unsigned allocUID() { return UID++; }
StringRef getName() const { return TheDef.getName(); }
const Record &getDef() const { return TheDef; }
const CodeInit *getMatchingFixupCode() const { return MatchingFixupCode; }
size_t getNumRoots() const { return Roots.size(); }
+ GIMatchDag &getMatchDag() { return MatchDag; }
+ const GIMatchDag &getMatchDag() const { return MatchDag; }
+
using const_root_iterator = std::vector<RootInfo>::const_iterator;
const_root_iterator roots_begin() const { return Roots.begin(); }
const_root_iterator roots_end() const { return Roots.end(); }
iterator_range<const_root_iterator> roots() const {
return llvm::make_range(Roots.begin(), Roots.end());
}
+
+ iterator_range<const_matchdata_iterator> matchdata_decls() const {
+ return make_range(MatchDataDecls.begin(), MatchDataDecls.end());
+ }
+
+ /// Export expansions for this rule
+ void declareExpansions(CodeExpansions &Expansions) const {
+ for (const auto &I : matchdata_decls())
+ Expansions.declare(I.getPatternSymbol(), I.getVariableName());
+ }
+
+ /// The matcher will begin from the roots and will perform the match by
+ /// traversing the edges to cover the whole DAG. This function reverses DAG
+ /// edges such that everything is reachable from a root. This is part of the
+ /// preparation work for flattening the DAG into a tree.
+ void reorientToRoots() {
+ SmallSet<const GIMatchDagInstr *, 5> Roots;
+ SmallSet<const GIMatchDagInstr *, 5> Visited;
+ SmallSet<GIMatchDagEdge *, 20> EdgesRemaining;
+
+ for (auto &I : MatchDag.roots()) {
+ Roots.insert(I);
+ Visited.insert(I);
+ }
+ for (auto &I : MatchDag.edges())
+ EdgesRemaining.insert(I);
+
+ bool Progressed = false;
+ SmallSet<GIMatchDagEdge *, 20> EdgesToRemove;
+ while (!EdgesRemaining.empty()) {
+ for (auto EI = EdgesRemaining.begin(), EE = EdgesRemaining.end();
+ EI != EE; ++EI) {
+ if (Visited.count((*EI)->getFromMI())) {
+ if (Roots.count((*EI)->getToMI()))
+ PrintError(TheDef.getLoc(), "One or more roots are unnecessary");
+ Visited.insert((*EI)->getToMI());
+ EdgesToRemove.insert(*EI);
+ Progressed = true;
+ }
+ }
+ for (GIMatchDagEdge *ToRemove : EdgesToRemove)
+ EdgesRemaining.erase(ToRemove);
+ EdgesToRemove.clear();
+
+ for (auto EI = EdgesRemaining.begin(), EE = EdgesRemaining.end();
+ EI != EE; ++EI) {
+ if (Visited.count((*EI)->getToMI())) {
+ (*EI)->reverse();
+ Visited.insert((*EI)->getToMI());
+ EdgesToRemove.insert(*EI);
+ Progressed = true;
+ }
+ for (GIMatchDagEdge *ToRemove : EdgesToRemove)
+ EdgesRemaining.erase(ToRemove);
+ EdgesToRemove.clear();
+ }
+
+ if (!Progressed) {
+ LLVM_DEBUG(dbgs() << "No progress\n");
+ return;
+ }
+ Progressed = false;
+ }
+ }
};
/// A convenience function to check that an Init refers to a specific def. This
@@ -111,6 +298,53 @@ static Record *getDefOfSubClass(const Init &N, StringRef Cls) {
return nullptr;
}
+/// A convenience function to check that an Init refers to a dag whose operator
+/// is a specific def and coerce it to a dag if it is. This is primarily useful
+/// for testing for subclasses of GIMatchKind and similar in DagInit's since
+/// DagInit's support any type inside them.
+static const DagInit *getDagWithSpecificOperator(const Init &N,
+ StringRef Name) {
+ if (const DagInit *I = dyn_cast<DagInit>(&N))
+ if (I->getNumArgs() > 0)
+ if (const DefInit *OpI = dyn_cast<DefInit>(I->getOperator()))
+ if (OpI->getDef()->getName() == Name)
+ return I;
+ return nullptr;
+}
+
+/// A convenience function to check that an Init refers to a dag whose operator
+/// is a def that is a subclass of the given class and coerce it to a dag if it
+/// is. This is primarily useful for testing for subclasses of GIMatchKind and
+/// similar in DagInit's since DagInit's support any type inside them.
+static const DagInit *getDagWithOperatorOfSubClass(const Init &N,
+ StringRef Cls) {
+ if (const DagInit *I = dyn_cast<DagInit>(&N))
+ if (I->getNumArgs() > 0)
+ if (const DefInit *OpI = dyn_cast<DefInit>(I->getOperator()))
+ if (OpI->getDef()->isSubClassOf(Cls))
+ return I;
+ return nullptr;
+}
+
+StringRef makeNameForAnonInstr(CombineRule &Rule) {
+ return insertStrTab(to_string(
+ format("__anon%" PRIu64 "_%u", Rule.getID(), Rule.allocUID())));
+}
+
+StringRef makeDebugName(CombineRule &Rule, StringRef Name) {
+ return insertStrTab(Name.empty() ? makeNameForAnonInstr(Rule) : StringRef(Name));
+}
+
+StringRef makeNameForAnonPredicate(CombineRule &Rule) {
+ return insertStrTab(to_string(
+ format("__anonpred%" PRIu64 "_%u", Rule.getID(), Rule.allocUID())));
+}
+
+void CombineRule::declareMatchData(StringRef PatternSymbol, StringRef Type,
+ StringRef VarName) {
+ MatchDataDecls.emplace_back(PatternSymbol, Type, VarName);
+}
+
bool CombineRule::parseDefs() {
NamedRegionTimer T("parseDefs", "Time spent parsing the defs", "Rule Parsing",
"Time spent on rule parsing", TimeRegions);
@@ -128,6 +362,17 @@ bool CombineRule::parseDefs() {
continue;
}
+ // Subclasses of GIDefMatchData should declare that this rule needs to pass
+ // data from the match stage to the apply stage, and ensure that the
+ // generated matcher has a suitable variable for it to do so.
+ if (Record *MatchDataRec =
+ getDefOfSubClass(*Defs->getArg(I), "GIDefMatchData")) {
+ declareMatchData(Defs->getArgNameStr(I),
+ MatchDataRec->getValueAsString("Type"),
+ llvm::to_string(llvm::format("MatchData%" PRIu64, ID)));
+ continue;
+ }
+
// Otherwise emit an appropriate error message.
if (getDefOfSubClass(*Defs->getArg(I), "GIDefKind"))
PrintError(TheDef.getLoc(),
@@ -149,9 +394,104 @@ bool CombineRule::parseDefs() {
return true;
}
+// Parse an (Instruction $a:Arg1, $b:Arg2, ...) matcher. Edges are formed
+// between matching operand names between different matchers.
+bool CombineRule::parseInstructionMatcher(
+ const CodeGenTarget &Target, StringInit *ArgName, const Init &Arg,
+ StringMap<std::vector<VarInfo>> &NamedEdgeDefs,
+ StringMap<std::vector<VarInfo>> &NamedEdgeUses) {
+ if (const DagInit *Matcher =
+ getDagWithOperatorOfSubClass(Arg, "Instruction")) {
+ auto &Instr =
+ Target.getInstruction(Matcher->getOperatorAsDef(TheDef.getLoc()));
+
+ StringRef Name = ArgName ? ArgName->getValue() : "";
+
+ GIMatchDagInstr *N =
+ MatchDag.addInstrNode(makeDebugName(*this, Name), insertStrTab(Name),
+ MatchDag.getContext().makeOperandList(Instr));
+
+ N->setOpcodeAnnotation(&Instr);
+ const auto &P = MatchDag.addPredicateNode<GIMatchDagOpcodePredicate>(
+ makeNameForAnonPredicate(*this), Instr);
+ MatchDag.addPredicateDependency(N, nullptr, P, &P->getOperandInfo()["mi"]);
+ unsigned OpIdx = 0;
+ for (const auto &NameInit : Matcher->getArgNames()) {
+ StringRef Name = insertStrTab(NameInit->getAsUnquotedString());
+ if (Name.empty())
+ continue;
+ N->assignNameToOperand(OpIdx, Name);
+
+ // Record the endpoints of any named edges. We'll add the cartesian
+ // product of edges later.
+ const auto &InstrOperand = N->getOperandInfo()[OpIdx];
+ if (InstrOperand.isDef()) {
+ NamedEdgeDefs.try_emplace(Name);
+ NamedEdgeDefs[Name].emplace_back(N, &InstrOperand, Matcher);
+ } else {
+ NamedEdgeUses.try_emplace(Name);
+ NamedEdgeUses[Name].emplace_back(N, &InstrOperand, Matcher);
+ }
+
+ if (InstrOperand.isDef()) {
+ if (find_if(Roots, [&](const RootInfo &X) {
+ return X.getPatternSymbol() == Name;
+ }) != Roots.end()) {
+ N->setMatchRoot();
+ }
+ }
+
+ OpIdx++;
+ }
+
+ return true;
+ }
+ return false;
+}
+
+// Parse the wip_match_opcode placeholder that's temporarily present in lieu of
+// implementing macros or choices between two matchers.
+bool CombineRule::parseWipMatchOpcodeMatcher(const CodeGenTarget &Target,
+ StringInit *ArgName,
+ const Init &Arg) {
+ if (const DagInit *Matcher =
+ getDagWithSpecificOperator(Arg, "wip_match_opcode")) {
+ StringRef Name = ArgName ? ArgName->getValue() : "";
+
+ GIMatchDagInstr *N =
+ MatchDag.addInstrNode(makeDebugName(*this, Name), insertStrTab(Name),
+ MatchDag.getContext().makeEmptyOperandList());
+
+ if (find_if(Roots, [&](const RootInfo &X) {
+ return ArgName && X.getPatternSymbol() == ArgName->getValue();
+ }) != Roots.end()) {
+ N->setMatchRoot();
+ }
+
+ const auto &P = MatchDag.addPredicateNode<GIMatchDagOneOfOpcodesPredicate>(
+ makeNameForAnonPredicate(*this));
+ MatchDag.addPredicateDependency(N, nullptr, P, &P->getOperandInfo()["mi"]);
+ // Each argument is an opcode that will pass this predicate. Add them all to
+ // the predicate implementation
+ for (const auto &Arg : Matcher->getArgs()) {
+ Record *OpcodeDef = getDefOfSubClass(*Arg, "Instruction");
+ if (OpcodeDef) {
+ P->addOpcode(&Target.getInstruction(OpcodeDef));
+ continue;
+ }
+ PrintError(TheDef.getLoc(),
+ "Arguments to wip_match_opcode must be instructions");
+ return false;
+ }
+ return true;
+ }
+ return false;
+}
bool CombineRule::parseMatcher(const CodeGenTarget &Target) {
NamedRegionTimer T("parseMatcher", "Time spent parsing the matcher",
"Rule Parsing", "Time spent on rule parsing", TimeRegions);
+ StringMap<std::vector<VarInfo>> NamedEdgeDefs;
+ StringMap<std::vector<VarInfo>> NamedEdgeUses;
DagInit *Matchers = TheDef.getValueAsDag("Match");
if (Matchers->getOperatorAsDef(TheDef.getLoc())->getName() != "match") {
@@ -167,12 +507,22 @@ bool CombineRule::parseMatcher(const CodeGenTarget &Target) {
// The match section consists of a list of matchers and predicates. Parse each
// one and add the equivalent GIMatchDag nodes, predicates, and edges.
for (unsigned I = 0; I < Matchers->getNumArgs(); ++I) {
+ if (parseInstructionMatcher(Target, Matchers->getArgName(I),
+ *Matchers->getArg(I), NamedEdgeDefs,
+ NamedEdgeUses))
+ continue;
+
+ if (parseWipMatchOpcodeMatcher(Target, Matchers->getArgName(I),
+ *Matchers->getArg(I)))
+ continue;
+
// Parse arbitrary C++ code we have in lieu of supporting MIR matching
if (const CodeInit *CodeI = dyn_cast<CodeInit>(Matchers->getArg(I))) {
assert(!MatchingFixupCode &&
"Only one block of arbitrary code is currently permitted");
MatchingFixupCode = CodeI;
+ MatchDag.setHasPostMatchPredicate(true);
continue;
}
@@ -182,6 +532,63 @@ bool CombineRule::parseMatcher(const CodeGenTarget &Target) {
PrintNote("Pattern was `" + Matchers->getArg(I)->getAsString() + "'");
return false;
}
+
+ // Add the cartesian product of use -> def edges.
+ bool FailedToAddEdges = false;
+ for (const auto &NameAndDefs : NamedEdgeDefs) {
+ if (NameAndDefs.getValue().size() > 1) {
+ PrintError(TheDef.getLoc(),
+ "Two different MachineInstrs cannot def the same vreg");
+ for (const auto &NameAndDefOp : NameAndDefs.getValue())
+ PrintNote("in " + to_string(*NameAndDefOp.N) + " created from " +
+ to_string(*NameAndDefOp.Matcher) + "");
+ FailedToAddEdges = true;
+ }
+ const auto &Uses = NamedEdgeUses[NameAndDefs.getKey()];
+ for (const VarInfo &DefVar : NameAndDefs.getValue()) {
+ for (const VarInfo &UseVar : Uses) {
+ MatchDag.addEdge(insertStrTab(NameAndDefs.getKey()), UseVar.N, UseVar.Op,
+ DefVar.N, DefVar.Op);
+ }
+ }
+ }
+ if (FailedToAddEdges)
+ return false;
+
+ // If a variable is referenced in multiple use contexts then we need a
+ // predicate to confirm they are the same operand. We can elide this if it's
+ // also referenced in a def context and we're traversing the def-use chain
+ // from the def to the uses but we can't know which direction we're going
+ // until after reorientToRoots().
+ for (const auto &NameAndUses : NamedEdgeUses) {
+ const auto &Uses = NameAndUses.getValue();
+ if (Uses.size() > 1) {
+ const auto &LeadingVar = Uses.front();
+ for (const auto &Var : ArrayRef<VarInfo>(Uses).drop_front()) {
+ // Add a predicate for each pair until we've covered the whole
+ // equivalence set. We could test the whole set in a single predicate
+ // but that means we can't test any equivalence until all the MO's are
+ // available which can lead to wasted work matching the DAG when this
+ // predicate can already be seen to have failed.
+ //
+ // We have a similar problem due to the need to wait for a particular MO
+ // before being able to test any of them. However, that is mitigated by
+ // the order in which we build the DAG. We build from the roots outwards
+ // so by using the first recorded use in all the predicates, we are
+ // making the dependency on one of the earliest visited references in
+ // the DAG. It's not guaranteed once the generated matcher is optimized
+ // (because the factoring the common portions of rules might change the
+ // visit order) but this should mean that these predicates depend on the
+ // first MO to become available.
+ const auto &P = MatchDag.addPredicateNode<GIMatchDagSameMOPredicate>(
+ makeNameForAnonPredicate(*this));
+ MatchDag.addPredicateDependency(LeadingVar.N, LeadingVar.Op, P,
+ &P->getOperandInfo()["mi0"]);
+ MatchDag.addPredicateDependency(Var.N, Var.Op, P,
+ &P->getOperandInfo()["mi1"]);
+ }
+ }
+ }
return true;
}
@@ -190,6 +597,8 @@ class GICombinerEmitter {
const CodeGenTarget &Target;
Record *Combiner;
std::vector<std::unique_ptr<CombineRule>> Rules;
+ GIMatchDagContext MatchDagCtx;
+
std::unique_ptr<CombineRule> makeCombineRule(const Record &R);
void gatherRules(std::vector<std::unique_ptr<CombineRule>> &ActiveRules,
@@ -208,7 +617,9 @@ public:
/// Emit the name matcher (guarded by #ifndef NDEBUG) used to disable rules in
/// response to the generated cl::opt.
void emitNameMatcher(raw_ostream &OS) const;
- void generateCodeForRule(raw_ostream &OS, const CombineRule *Rule,
+
+ void generateDeclarationsCodeForTree(raw_ostream &OS, const GIMatchTree &Tree) const;
+ void generateCodeForTree(raw_ostream &OS, const GIMatchTree &Tree,
StringRef Indent) const;
};
@@ -246,12 +657,42 @@ void GICombinerEmitter::emitNameMatcher(raw_ostream &OS) const {
std::unique_ptr<CombineRule>
GICombinerEmitter::makeCombineRule(const Record &TheDef) {
std::unique_ptr<CombineRule> Rule =
- std::make_unique<CombineRule>(Target, NumPatternTotal, TheDef);
+ std::make_unique<CombineRule>(Target, MatchDagCtx, NumPatternTotal, TheDef);
if (!Rule->parseDefs())
return nullptr;
if (!Rule->parseMatcher(Target))
return nullptr;
+
+ Rule->reorientToRoots();
+
+ LLVM_DEBUG({
+ dbgs() << "Parsed rule defs/match for '" << Rule->getName() << "'\n";
+ Rule->getMatchDag().dump();
+ Rule->getMatchDag().writeDOTGraph(dbgs(), Rule->getName());
+ });
+ if (StopAfterParse)
+ return Rule;
+
+ // For now, don't support traversing from def to use. We'll come back to
+ // this later once we have the algorithm changes to support it.
+ bool EmittedDefToUseError = false;
+ for (const auto &E : Rule->getMatchDag().edges()) {
+ if (E->isDefToUse()) {
+ if (!EmittedDefToUseError) {
+ PrintError(
+ TheDef.getLoc(),
+ "Generated state machine cannot lookup uses from a def (yet)");
+ EmittedDefToUseError = true;
+ }
+ PrintNote("Node " + to_string(*E->getFromMI()));
+ PrintNote("Node " + to_string(*E->getToMI()));
+ PrintNote("Edge " + to_string(*E));
+ }
+ }
+ if (EmittedDefToUseError)
+ return nullptr;
+
// For now, don't support multi-root rules. We'll come back to this later
// once we have the algorithm changes to support it.
if (Rule->getNumRoots() > 1) {
@@ -279,19 +720,43 @@ void GICombinerEmitter::gatherRules(
}
}
-void GICombinerEmitter::generateCodeForRule(raw_ostream &OS,
- const CombineRule *Rule,
+void GICombinerEmitter::generateCodeForTree(raw_ostream &OS,
+ const GIMatchTree &Tree,
StringRef Indent) const {
- {
+ if (Tree.getPartitioner() != nullptr) {
+ Tree.getPartitioner()->generatePartitionSelectorCode(OS, Indent);
+ for (const auto &EnumChildren : enumerate(Tree.children())) {
+ OS << Indent << "if (Partition == " << EnumChildren.index() << " /* "
+ << format_partition_name(Tree, EnumChildren.index()) << " */) {\n";
+ generateCodeForTree(OS, EnumChildren.value(), (Indent + " ").str());
+ OS << Indent << "}\n";
+ }
+ return;
+ }
+
+ bool AnyFullyTested = false;
+ for (const auto &Leaf : Tree.possible_leaves()) {
+ OS << Indent << "// Leaf name: " << Leaf.getName() << "\n";
+
+ const CombineRule *Rule = Leaf.getTargetData<CombineRule>();
const Record &RuleDef = Rule->getDef();
OS << Indent << "// Rule: " << RuleDef.getName() << "\n"
<< Indent << "if (!isRuleDisabled(" << Rule->getID() << ")) {\n";
CodeExpansions Expansions;
- for (const RootInfo &Root : Rule->roots()) {
- Expansions.declare(Root.getPatternSymbol(), "MI");
+ for (const auto &VarBinding : Leaf.var_bindings()) {
+ if (VarBinding.isInstr())
+ Expansions.declare(VarBinding.getName(),
+ "MIs[" + to_string(VarBinding.getInstrID()) + "]");
+ else
+ Expansions.declare(VarBinding.getName(),
+ "MIs[" + to_string(VarBinding.getInstrID()) +
+ "]->getOperand(" +
+ to_string(VarBinding.getOpIdx()) + ")");
}
+ Rule->declareExpansions(Expansions);
+
DagInit *Applyer = RuleDef.getValueAsDag("Apply");
if (Applyer->getOperatorAsDef(RuleDef.getLoc())->getName() !=
"apply") {
@@ -301,6 +766,29 @@ void GICombinerEmitter::generateCodeForRule(raw_ostream &OS,
OS << Indent << " if (1\n";
+ // Attempt to emit code for any untested predicates left over. Note that
+ // isFullyTested() will remain false even if we succeed here and therefore
+ // combine rule elision will not be performed. This is because we do not
+ // know if there's any connection between the predicates for each leaf and
+ // therefore can't tell if one makes another unreachable. Ideally, the
+ // partitioner(s) would be sufficiently complete to prevent us from having
+ // untested predicates left over.
+ for (const GIMatchDagPredicate *Predicate : Leaf.untested_predicates()) {
+ if (Predicate->generateCheckCode(OS, (Indent + " ").str(),
+ Expansions))
+ continue;
+ PrintError(RuleDef.getLoc(),
+ "Unable to test predicate used in rule");
+ PrintNote(SMLoc(),
+ "This indicates an incomplete implementation in tablegen");
+ Predicate->print(errs());
+ errs() << "\n";
+ OS << Indent
+ << "llvm_unreachable(\"TableGen did not emit complete code for this "
+ "path\");\n";
+ break;
+ }
+
if (Rule->getMatchingFixupCode() &&
!Rule->getMatchingFixupCode()->getValue().empty()) {
// FIXME: Single-use lambda's like this are a serious compile-time
@@ -332,11 +820,64 @@ void GICombinerEmitter::generateCodeForRule(raw_ostream &OS,
}
OS << Indent << "}\n";
+
+ assert(Leaf.isFullyTraversed());
+
+ // If we didn't have any predicates left over and we're not using the
+ // trap-door we have to support arbitrary C++ code while we're migrating to
+ // the declarative style then we know that subsequent leaves are
+ // unreachable.
+ if (Leaf.isFullyTested() &&
+ (!Rule->getMatchingFixupCode() ||
+ Rule->getMatchingFixupCode()->getValue().empty())) {
+ AnyFullyTested = true;
+ OS << Indent
+ << "llvm_unreachable(\"Combine rule elision was incorrect\");\n"
+ << Indent << "return false;\n";
+ }
}
+ if (!AnyFullyTested)
+ OS << Indent << "return false;\n";
}
void GICombinerEmitter::run(raw_ostream &OS) {
gatherRules(Rules, Combiner->getValueAsListOfDefs("Rules"));
+ if (StopAfterParse) {
+ MatchDagCtx.print(errs());
+ PrintNote(Combiner->getLoc(),
+ "Terminating due to -gicombiner-stop-after-parse");
+ return;
+ }
+ if (ErrorsPrinted)
+ PrintFatalError(Combiner->getLoc(), "Failed to parse one or more rules");
+ LLVM_DEBUG(dbgs() << "Optimizing tree for " << Rules.size() << " rules\n");
+ std::unique_ptr<GIMatchTree> Tree;
+ {
+ NamedRegionTimer T("Optimize", "Time spent optimizing the combiner",
+ "Code Generation", "Time spent generating code",
+ TimeRegions);
+
+ GIMatchTreeBuilder TreeBuilder(0);
+ for (const auto &Rule : Rules) {
+ bool HadARoot = false;
+ for (const auto &Root : enumerate(Rule->getMatchDag().roots())) {
+ TreeBuilder.addLeaf(Rule->getName(), Root.index(), Rule->getMatchDag(),
+ Rule.get());
+ HadARoot = true;
+ }
+ if (!HadARoot)
+ PrintFatalError(Rule->getDef().getLoc(), "All rules must have a root");
+ }
+
+ Tree = TreeBuilder.run();
+ }
+ if (StopAfterBuild) {
+ Tree->writeDOTGraph(outs());
+ PrintNote(Combiner->getLoc(),
+ "Terminating due to -gicombiner-stop-after-build");
+ return;
+ }
+
NamedRegionTimer T("Emit", "Time spent emitting the combiner",
"Code Generation", "Time spent generating code",
TimeRegions);
@@ -359,7 +900,8 @@ void GICombinerEmitter::run(raw_ostream &OS) {
<< " bool tryCombineAll(\n"
<< " GISelChangeObserver &Observer,\n"
<< " MachineInstr &MI,\n"
- << " MachineIRBuilder &B) const;\n"
+ << " MachineIRBuilder &B,\n"
+ << " CombinerHelper &Helper) const;\n"
<< "};\n\n";
emitNameMatcher(OS);
@@ -415,15 +957,22 @@ void GICombinerEmitter::run(raw_ostream &OS) {
OS << "bool " << getClassName() << "::tryCombineAll(\n"
<< " GISelChangeObserver &Observer,\n"
<< " MachineInstr &MI,\n"
- << " MachineIRBuilder &B) const {\n"
- << " CombinerHelper Helper(Observer, B);\n"
+ << " MachineIRBuilder &B,\n"
+ << " CombinerHelper &Helper) const {\n"
<< " MachineBasicBlock *MBB = MI.getParent();\n"
<< " MachineFunction *MF = MBB->getParent();\n"
<< " MachineRegisterInfo &MRI = MF->getRegInfo();\n"
+ << " SmallVector<MachineInstr *, 8> MIs = { &MI };\n\n"
<< " (void)MBB; (void)MF; (void)MRI;\n\n";
+ OS << " // Match data\n";
for (const auto &Rule : Rules)
- generateCodeForRule(OS, Rule.get(), " ");
+ for (const auto &I : Rule->matchdata_decls())
+ OS << " " << I.getType() << " " << I.getVariableName() << ";\n";
+ OS << "\n";
+
+ OS << " int Partition = -1;\n";
+ generateCodeForTree(OS, *Tree, " ");
OS << "\n return false;\n"
<< "}\n"
<< "#endif // ifdef " << Name.upper() << "_GENCOMBINERHELPER_CPP\n";
diff --git a/contrib/llvm-project/llvm/utils/TableGen/GlobalISel/GIMatchDag.cpp b/contrib/llvm-project/llvm/utils/TableGen/GlobalISel/GIMatchDag.cpp
new file mode 100644
index 000000000000..a3a9b7d8b037
--- /dev/null
+++ b/contrib/llvm-project/llvm/utils/TableGen/GlobalISel/GIMatchDag.cpp
@@ -0,0 +1,138 @@
+//===- GIMatchDag.cpp - A DAG representation of a pattern to be matched ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "GIMatchDag.h"
+
+#include "llvm/Support/Format.h"
+#include "llvm/TableGen/Record.h"
+#include "../CodeGenInstruction.h"
+
+using namespace llvm;
+
+void GIMatchDag::writeDOTGraph(raw_ostream &OS, StringRef ID) const {
+ const auto writePorts = [&](StringRef Prefix,
+ const GIMatchDagOperandList &Operands) {
+ StringRef Separator = "";
+ OS << "{";
+ for (const auto &Op : enumerate(Operands)) {
+ OS << Separator << "<" << Prefix << format("%d", Op.index()) << ">"
+ << "#" << Op.index() << " $" << Op.value().getName();
+ Separator = "|";
+ }
+ OS << "}";
+ };
+
+ OS << "digraph \"" << ID << "\" {\n"
+ << " rankdir=\"BT\"\n";
+ for (const auto &N : InstrNodes) {
+ OS << " " << format("Node%p", &*N) << " [shape=record,label=\"{";
+ writePorts("s", N->getOperandInfo());
+ OS << "|" << N->getName();
+ if (N->getOpcodeAnnotation())
+ OS << "|" << N->getOpcodeAnnotation()->TheDef->getName();
+ if (N->isMatchRoot())
+ OS << "|Match starts here";
+ OS << "|";
+ SmallVector<std::pair<unsigned, StringRef>, 8> ToPrint;
+ for (const auto &Assignment : N->user_assigned_operand_names())
+ ToPrint.emplace_back(Assignment.first, Assignment.second);
+ llvm::sort(ToPrint.begin(), ToPrint.end());
+ StringRef Separator = "";
+ for (const auto &Assignment : ToPrint) {
+ OS << Separator << "$" << Assignment.second << "=getOperand("
+ << Assignment.first << ")";
+ Separator = ", ";
+ }
+ OS << format("|%p|", &N);
+ writePorts("d", N->getOperandInfo());
+ OS << "}\"";
+ if (N->isMatchRoot())
+ OS << ",color=red";
+ OS << "]\n";
+ }
+
+ for (const auto &E : Edges) {
+ const char *FromFmt = "Node%p:s%d:n";
+ const char *ToFmt = "Node%p:d%d:s";
+ if (E->getFromMO()->isDef() && !E->getToMO()->isDef())
+ std::swap(FromFmt, ToFmt);
+ auto From = format(FromFmt, E->getFromMI(), E->getFromMO()->getIdx());
+ auto To = format(ToFmt, E->getToMI(), E->getToMO()->getIdx());
+ if (E->getFromMO()->isDef() && !E->getToMO()->isDef())
+ std::swap(From, To);
+
+ OS << " " << From << " -> " << To << " [label=\"$" << E->getName();
+ if (E->getFromMO()->isDef() == E->getToMO()->isDef())
+ OS << " INVALID EDGE!";
+ OS << "\"";
+ if (E->getFromMO()->isDef() == E->getToMO()->isDef())
+ OS << ",color=red";
+ else if (E->getFromMO()->isDef() && !E->getToMO()->isDef())
+ OS << ",dir=back,arrowtail=crow";
+ OS << "]\n";
+ }
+
+ for (const auto &N : PredicateNodes) {
+ OS << " " << format("Pred%p", &*N) << " [shape=record,label=\"{";
+ writePorts("s", N->getOperandInfo());
+ OS << "|" << N->getName() << "|";
+ N->printDescription(OS);
+ OS << format("|%p|", &N);
+ writePorts("d", N->getOperandInfo());
+ OS << "}\",style=dotted]\n";
+ }
+
+ for (const auto &E : PredicateDependencies) {
+ const char *FromMIFmt = "Node%p:e";
+ const char *FromMOFmt = "Node%p:s%d:n";
+ const char *ToFmt = "Pred%p:d%d:s";
+ auto To = format(ToFmt, E->getPredicate(), E->getPredicateOp()->getIdx());
+ auto Style = "[style=dotted]";
+ if (E->getRequiredMO()) {
+ auto From =
+ format(FromMOFmt, E->getRequiredMI(), E->getRequiredMO()->getIdx());
+ OS << " " << From << " -> " << To << " " << Style << "\n";
+ continue;
+ }
+ auto From = format(FromMIFmt, E->getRequiredMI());
+ OS << " " << From << " -> " << To << " " << Style << "\n";
+ }
+
+ OS << "}\n";
+}
+
+LLVM_DUMP_METHOD void GIMatchDag::print(raw_ostream &OS) const {
+ OS << "matchdag {\n";
+ for (const auto &N : InstrNodes) {
+ OS << " ";
+ N->print(OS);
+ OS << "\n";
+ }
+ for (const auto &E : Edges) {
+ OS << " ";
+ E->print(OS);
+ OS << "\n";
+ }
+
+ for (const auto &P : PredicateNodes) {
+ OS << " ";
+ P->print(OS);
+ OS << "\n";
+ }
+ for (const auto &D : PredicateDependencies) {
+ OS << " ";
+ D->print(OS);
+ OS << "\n";
+ }
+ OS << "}\n";
+}
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, const GIMatchDag &G) {
+ G.print(OS);
+ return OS;
+}
diff --git a/contrib/llvm-project/llvm/utils/TableGen/GlobalISel/GIMatchDag.h b/contrib/llvm-project/llvm/utils/TableGen/GlobalISel/GIMatchDag.h
new file mode 100644
index 000000000000..567580540877
--- /dev/null
+++ b/contrib/llvm-project/llvm/utils/TableGen/GlobalISel/GIMatchDag.h
@@ -0,0 +1,243 @@
+//===- GIMatchDag.h - Represent a DAG to be matched -----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_UTILS_TABLEGEN_GIMATCHDAG_H
+#define LLVM_UTILS_TABLEGEN_GIMATCHDAG_H
+
+#include "GIMatchDagEdge.h"
+#include "GIMatchDagInstr.h"
+#include "GIMatchDagOperands.h"
+#include "GIMatchDagPredicate.h"
+#include "GIMatchDagPredicateDependencyEdge.h"
+
+namespace llvm {
+class GIMatchDag;
+
+/// This class manages lifetimes for data associated with the GIMatchDag object.
+class GIMatchDagContext {
+ GIMatchDagOperandListContext OperandListCtx;
+
+public:
+ const GIMatchDagOperandList &makeEmptyOperandList() {
+ return OperandListCtx.makeEmptyOperandList();
+ }
+
+ const GIMatchDagOperandList &makeOperandList(const CodeGenInstruction &I) {
+ return OperandListCtx.makeOperandList(I);
+ }
+
+ const GIMatchDagOperandList &makeMIPredicateOperandList() {
+ return OperandListCtx.makeMIPredicateOperandList();
+ }
+
+
+ const GIMatchDagOperandList &makeTwoMOPredicateOperandList() {
+ return OperandListCtx.makeTwoMOPredicateOperandList();
+ }
+
+ void print(raw_ostream &OS) const {
+ OperandListCtx.print(OS);
+ }
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ LLVM_DUMP_METHOD void dump() const { print(errs()); }
+#endif // if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+};
+
+class GIMatchDag {
+public:
+ using InstrNodesVec = std::vector<std::unique_ptr<GIMatchDagInstr>>;
+ using instr_node_iterator = raw_pointer_iterator<InstrNodesVec::iterator>;
+ using const_instr_node_iterator =
+ raw_pointer_iterator<InstrNodesVec::const_iterator>;
+
+ using EdgesVec = std::vector<std::unique_ptr<GIMatchDagEdge>>;
+ using edge_iterator = raw_pointer_iterator<EdgesVec::iterator>;
+ using const_edge_iterator = raw_pointer_iterator<EdgesVec::const_iterator>;
+
+ using PredicateNodesVec = std::vector<std::unique_ptr<GIMatchDagPredicate>>;
+ using predicate_iterator = raw_pointer_iterator<PredicateNodesVec::iterator>;
+ using const_predicate_iterator =
+ raw_pointer_iterator<PredicateNodesVec::const_iterator>;
+
+ using PredicateDependencyEdgesVec =
+ std::vector<std::unique_ptr<GIMatchDagPredicateDependencyEdge>>;
+ using predicate_edge_iterator =
+ raw_pointer_iterator<PredicateDependencyEdgesVec::iterator>;
+ using const_predicate_edge_iterator =
+ raw_pointer_iterator<PredicateDependencyEdgesVec::const_iterator>;
+
+protected:
+ GIMatchDagContext &Ctx;
+ InstrNodesVec InstrNodes;
+ PredicateNodesVec PredicateNodes;
+ EdgesVec Edges;
+ PredicateDependencyEdgesVec PredicateDependencies;
+ std::vector<GIMatchDagInstr *> MatchRoots;
+ // FIXME: This is a temporary measure while we still accept arbitrary code
+ // blocks to fix up the matcher while it's being developed.
+ bool HasPostMatchPredicate = false;
+
+public:
+ GIMatchDag(GIMatchDagContext &Ctx)
+ : Ctx(Ctx), InstrNodes(), PredicateNodes(), Edges(),
+ PredicateDependencies() {}
+ GIMatchDag(const GIMatchDag &) = delete;
+
+ GIMatchDagContext &getContext() const { return Ctx; }
+ edge_iterator edges_begin() {
+ return raw_pointer_iterator<EdgesVec::iterator>(Edges.begin());
+ }
+ edge_iterator edges_end() {
+ return raw_pointer_iterator<EdgesVec::iterator>(Edges.end());
+ }
+ const_edge_iterator edges_begin() const {
+ return raw_pointer_iterator<EdgesVec::const_iterator>(Edges.begin());
+ }
+ const_edge_iterator edges_end() const {
+ return raw_pointer_iterator<EdgesVec::const_iterator>(Edges.end());
+ }
+ iterator_range<edge_iterator> edges() {
+ return make_range(edges_begin(), edges_end());
+ }
+ iterator_range<const_edge_iterator> edges() const {
+ return make_range(edges_begin(), edges_end());
+ }
+ iterator_range<std::vector<GIMatchDagInstr *>::iterator> roots() {
+ return make_range(MatchRoots.begin(), MatchRoots.end());
+ }
+ iterator_range<std::vector<GIMatchDagInstr *>::const_iterator> roots() const {
+ return make_range(MatchRoots.begin(), MatchRoots.end());
+ }
+
+ instr_node_iterator instr_nodes_begin() {
+ return raw_pointer_iterator<InstrNodesVec::iterator>(InstrNodes.begin());
+ }
+ instr_node_iterator instr_nodes_end() {
+ return raw_pointer_iterator<InstrNodesVec::iterator>(InstrNodes.end());
+ }
+ const_instr_node_iterator instr_nodes_begin() const {
+ return raw_pointer_iterator<InstrNodesVec::const_iterator>(
+ InstrNodes.begin());
+ }
+ const_instr_node_iterator instr_nodes_end() const {
+ return raw_pointer_iterator<InstrNodesVec::const_iterator>(
+ InstrNodes.end());
+ }
+ iterator_range<instr_node_iterator> instr_nodes() {
+ return make_range(instr_nodes_begin(), instr_nodes_end());
+ }
+ iterator_range<const_instr_node_iterator> instr_nodes() const {
+ return make_range(instr_nodes_begin(), instr_nodes_end());
+ }
+ predicate_edge_iterator predicate_edges_begin() {
+ return raw_pointer_iterator<PredicateDependencyEdgesVec::iterator>(
+ PredicateDependencies.begin());
+ }
+ predicate_edge_iterator predicate_edges_end() {
+ return raw_pointer_iterator<PredicateDependencyEdgesVec::iterator>(
+ PredicateDependencies.end());
+ }
+ const_predicate_edge_iterator predicate_edges_begin() const {
+ return raw_pointer_iterator<PredicateDependencyEdgesVec::const_iterator>(
+ PredicateDependencies.begin());
+ }
+ const_predicate_edge_iterator predicate_edges_end() const {
+ return raw_pointer_iterator<PredicateDependencyEdgesVec::const_iterator>(
+ PredicateDependencies.end());
+ }
+ iterator_range<predicate_edge_iterator> predicate_edges() {
+ return make_range(predicate_edges_begin(), predicate_edges_end());
+ }
+ iterator_range<const_predicate_edge_iterator> predicate_edges() const {
+ return make_range(predicate_edges_begin(), predicate_edges_end());
+ }
+ predicate_iterator predicates_begin() {
+ return raw_pointer_iterator<PredicateNodesVec::iterator>(
+ PredicateNodes.begin());
+ }
+ predicate_iterator predicates_end() {
+ return raw_pointer_iterator<PredicateNodesVec::iterator>(
+ PredicateNodes.end());
+ }
+ const_predicate_iterator predicates_begin() const {
+ return raw_pointer_iterator<PredicateNodesVec::const_iterator>(
+ PredicateNodes.begin());
+ }
+ const_predicate_iterator predicates_end() const {
+ return raw_pointer_iterator<PredicateNodesVec::const_iterator>(
+ PredicateNodes.end());
+ }
+ iterator_range<predicate_iterator> predicates() {
+ return make_range(predicates_begin(), predicates_end());
+ }
+ iterator_range<const_predicate_iterator> predicates() const {
+ return make_range(predicates_begin(), predicates_end());
+ }
+
+ template <class... Args> GIMatchDagInstr *addInstrNode(Args &&... args) {
+ auto Obj =
+ std::make_unique<GIMatchDagInstr>(*this, std::forward<Args>(args)...);
+ auto ObjRaw = Obj.get();
+ InstrNodes.push_back(std::move(Obj));
+ return ObjRaw;
+ }
+
+ template <class T, class... Args>
+ T *addPredicateNode(Args &&... args) {
+ auto Obj = std::make_unique<T>(getContext(), std::forward<Args>(args)...);
+ auto ObjRaw = Obj.get();
+ PredicateNodes.push_back(std::move(Obj));
+ return ObjRaw;
+ }
+
+ template <class... Args> GIMatchDagEdge *addEdge(Args &&... args) {
+ auto Obj = std::make_unique<GIMatchDagEdge>(std::forward<Args>(args)...);
+ auto ObjRaw = Obj.get();
+ Edges.push_back(std::move(Obj));
+ return ObjRaw;
+ }
+
+ template <class... Args>
+ GIMatchDagPredicateDependencyEdge *addPredicateDependency(Args &&... args) {
+ auto Obj = std::make_unique<GIMatchDagPredicateDependencyEdge>(
+ std::forward<Args>(args)...);
+ auto ObjRaw = Obj.get();
+ PredicateDependencies.push_back(std::move(Obj));
+ return ObjRaw;
+ }
+
+ size_t getInstrNodeIdx(instr_node_iterator I) {
+ return std::distance(instr_nodes_begin(), I);
+ }
+ size_t getInstrNodeIdx(const_instr_node_iterator I) const {
+ return std::distance(instr_nodes_begin(), I);
+ }
+ size_t getNumInstrNodes() const { return InstrNodes.size(); }
+ size_t getNumEdges() const { return Edges.size(); }
+ size_t getNumPredicates() const { return PredicateNodes.size(); }
+
+ void setHasPostMatchPredicate(bool V) { HasPostMatchPredicate = V; }
+ bool hasPostMatchPredicate() const { return HasPostMatchPredicate; }
+
+ void addMatchRoot(GIMatchDagInstr *N) { MatchRoots.push_back(N); }
+
+ LLVM_DUMP_METHOD void print(raw_ostream &OS) const;
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ LLVM_DUMP_METHOD void dump() const { print(errs()); }
+#endif // if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+
+ void writeDOTGraph(raw_ostream &OS, StringRef ID) const;
+};
+
+raw_ostream &operator<<(raw_ostream &OS, const GIMatchDag &G);
+
+} // end namespace llvm
+
+#endif // ifndef LLVM_UTILS_TABLEGEN_GIMATCHDAG_H
diff --git a/contrib/llvm-project/llvm/utils/TableGen/GlobalISel/GIMatchDagEdge.cpp b/contrib/llvm-project/llvm/utils/TableGen/GlobalISel/GIMatchDagEdge.cpp
new file mode 100644
index 000000000000..e59cb3aae49a
--- /dev/null
+++ b/contrib/llvm-project/llvm/utils/TableGen/GlobalISel/GIMatchDagEdge.cpp
@@ -0,0 +1,38 @@
+//===- GIMatchDagEdge.cpp - An edge describing a def/use lookup -----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "GIMatchDagEdge.h"
+#include "GIMatchDagInstr.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+LLVM_DUMP_METHOD void GIMatchDagEdge::print(raw_ostream &OS) const {
+ OS << getFromMI()->getName() << "[" << getFromMO()->getName() << "] --["
+ << Name << "]--> " << getToMI()->getName() << "[" << getToMO()->getName()
+ << "]";
+}
+
+bool GIMatchDagEdge::isDefToUse() const {
+ // Def -> Def is invalid so we only need to check FromMO.
+ return FromMO->isDef();
+}
+
+void GIMatchDagEdge::reverse() {
+ std::swap(FromMI, ToMI);
+ std::swap(FromMO, ToMO);
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void GIMatchDagEdge::dump() const { print(errs()); }
+#endif // if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, const GIMatchDagEdge &E) {
+ E.print(OS);
+ return OS;
+}
diff --git a/contrib/llvm-project/llvm/utils/TableGen/GlobalISel/GIMatchDagEdge.h b/contrib/llvm-project/llvm/utils/TableGen/GlobalISel/GIMatchDagEdge.h
new file mode 100644
index 000000000000..8e845ff0a51e
--- /dev/null
+++ b/contrib/llvm-project/llvm/utils/TableGen/GlobalISel/GIMatchDagEdge.h
@@ -0,0 +1,70 @@
+//===- GIMatchDagEdge.h - Represent a shared operand list for nodes -------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_UTILS_TABLEGEN_GIMATCHDAGEDGE_H
+#define LLVM_UTILS_TABLEGEN_GIMATCHDAGEDGE_H
+
+#include "llvm/ADT/StringRef.h"
+
+namespace llvm {
+class raw_ostream;
+class GIMatchDagInstr;
+class GIMatchDagOperand;
+
+/// Represents an edge that connects two instructions together via a pair of
+/// operands. For example:
+/// %a = FOO ...
+/// %0 = BAR %a
+/// %1 = BAZ %a
+/// would have two edges for %a like so:
+/// BAR:Op#1 --[a]----> Op#0:FOO
+/// ^
+/// BAZ:Op#1 --[a]------/
+/// Ideally, all edges in the DAG are from a use to a def as this is a many
+/// to one edge but edges from defs to uses are supported too.
+class GIMatchDagEdge {
+ /// The name of the edge. For example,
+ /// (FOO $a, $b, $c)
+ /// (BAR $d, $e, $a)
+ /// will create an edge named 'a' to connect FOO to BAR. Although the name
+ /// refers to the edge, the canonical value of 'a' is the operand that defines
+ /// it.
+ StringRef Name;
+ const GIMatchDagInstr *FromMI;
+ const GIMatchDagOperand *FromMO;
+ const GIMatchDagInstr *ToMI;
+ const GIMatchDagOperand *ToMO;
+
+public:
+ GIMatchDagEdge(StringRef Name, const GIMatchDagInstr *FromMI, const GIMatchDagOperand *FromMO,
+ const GIMatchDagInstr *ToMI, const GIMatchDagOperand *ToMO)
+ : Name(Name), FromMI(FromMI), FromMO(FromMO), ToMI(ToMI), ToMO(ToMO) {}
+
+ StringRef getName() const { return Name; }
+ const GIMatchDagInstr *getFromMI() const { return FromMI; }
+ const GIMatchDagOperand *getFromMO() const { return FromMO; }
+ const GIMatchDagInstr *getToMI() const { return ToMI; }
+ const GIMatchDagOperand *getToMO() const { return ToMO; }
+
+ /// Flip the direction of the edge.
+ void reverse();
+
+ /// Does this edge run from a def to (one of many) uses?
+ bool isDefToUse() const;
+
+ LLVM_DUMP_METHOD void print(raw_ostream &OS) const;
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ LLVM_DUMP_METHOD void dump() const;
+#endif // if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+};
+
+raw_ostream &operator<<(raw_ostream &OS, const GIMatchDagEdge &E);
+
+} // end namespace llvm
+#endif // ifndef LLVM_UTILS_TABLEGEN_GIMATCHDAGEDGE_H
diff --git a/contrib/llvm-project/llvm/utils/TableGen/GlobalISel/GIMatchDagInstr.cpp b/contrib/llvm-project/llvm/utils/TableGen/GlobalISel/GIMatchDagInstr.cpp
new file mode 100644
index 000000000000..218b741be20c
--- /dev/null
+++ b/contrib/llvm-project/llvm/utils/TableGen/GlobalISel/GIMatchDagInstr.cpp
@@ -0,0 +1,48 @@
+//===- GIMatchDagInstr.cpp - A shared operand list for nodes --------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "GIMatchDagInstr.h"
+#include "../CodeGenInstruction.h"
+#include "GIMatchDag.h"
+#include "llvm/TableGen/Record.h"
+
+using namespace llvm;
+
+void GIMatchDagInstr::print(raw_ostream &OS) const {
+ OS << "(";
+ if (const auto *Annotation = getOpcodeAnnotation())
+ OS << Annotation->TheDef->getName();
+ else
+ OS << "<unknown>";
+ OS << " ";
+ OperandInfo.print(OS);
+ OS << "):$" << Name;
+ if (!UserAssignedNamesForOperands.empty()) {
+ OS << " // ";
+ SmallVector<std::pair<unsigned, StringRef>, 8> ToPrint;
+ for (const auto &Assignment : UserAssignedNamesForOperands)
+ ToPrint.emplace_back(Assignment.first, Assignment.second);
+ llvm::sort(ToPrint.begin(), ToPrint.end());
+ StringRef Separator = "";
+ for (const auto &Assignment : ToPrint) {
+ OS << Separator << "$" << Assignment.second << "=getOperand("
+ << Assignment.first << ")";
+ Separator = ", ";
+ }
+ }
+}
+
+void GIMatchDagInstr::setMatchRoot() {
+ IsMatchRoot = true;
+ Dag.addMatchRoot(this);
+}
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, const GIMatchDagInstr &N) {
+ N.print(OS);
+ return OS;
+}
diff --git a/contrib/llvm-project/llvm/utils/TableGen/GlobalISel/GIMatchDagInstr.h b/contrib/llvm-project/llvm/utils/TableGen/GlobalISel/GIMatchDagInstr.h
new file mode 100644
index 000000000000..4a07767a2e19
--- /dev/null
+++ b/contrib/llvm-project/llvm/utils/TableGen/GlobalISel/GIMatchDagInstr.h
@@ -0,0 +1,115 @@
+//===- GIMatchDagInstr.h - Represent a instruction to be matched ----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_UTILS_TABLEGEN_GIMATCHDAGINSTR_H
+#define LLVM_UTILS_TABLEGEN_GIMATCHDAGINSTR_H
+
+#include "GIMatchDagOperands.h"
+#include "llvm/ADT/DenseMap.h"
+
+namespace llvm {
+class GIMatchDag;
+
+/// Represents an instruction in the match DAG. This object knows very little
+/// about the actual instruction to be matched as the bulk of that is in
+/// predicates that are associated with the match DAG. It merely knows the names
+/// and indices of any operands that need to be matched in order to allow edges
+/// to link to them.
+///
+/// Instances of this class objects are owned by the GIMatchDag and are not
+/// shareable between instances of GIMatchDag. This is because the Name,
+/// IsMatchRoot, and OpcodeAnnotation are likely to differ between GIMatchDag
+/// instances.
+class GIMatchDagInstr {
+public:
+ using const_user_assigned_operand_names_iterator =
+ DenseMap<unsigned, StringRef>::const_iterator;
+
+protected:
+ /// The match DAG this instruction belongs to.
+ GIMatchDag &Dag;
+
+ /// The name of the instruction in the pattern. For example:
+ /// (FOO $a, $b, $c):$name
+ /// will cause name to be assigned to this member. Anonymous instructions will
+ /// have a name assigned for debugging purposes.
+ StringRef Name;
+
+ /// The name of the instruction in the pattern as assigned by the user. For
+ /// example:
+ /// (FOO $a, $b, $c):$name
+ /// will cause name to be assigned to this member. If a name is not provided,
+ /// this will be empty. This name is used to bind variables from rules to the
+ /// matched instruction.
+ StringRef UserAssignedName;
+
+ /// The name of each operand (if any) that was assigned by the user. For
+ /// example:
+ /// (FOO $a, $b, $c):$name
+ /// will cause {0, "a"}, {1, "b"}, {2, "c} to be inserted into this map.
+ DenseMap<unsigned, StringRef> UserAssignedNamesForOperands;
+
+ /// The operand list for this instruction. This object may be shared with
+ /// other instructions of a similar 'shape'.
+ const GIMatchDagOperandList &OperandInfo;
+
+ /// For debugging purposes, it's helpful to have access to a description of
+ /// the Opcode. However, this object shouldn't use it for more than debugging
+ /// output since predicates are expected to be handled outside the DAG.
+ CodeGenInstruction *OpcodeAnnotation = 0;
+
+ /// When true, this instruction will be a starting point for a match attempt.
+ bool IsMatchRoot = false;
+
+public:
+ GIMatchDagInstr(GIMatchDag &Dag, StringRef Name, StringRef UserAssignedName,
+ const GIMatchDagOperandList &OperandInfo)
+ : Dag(Dag), Name(Name), UserAssignedName(UserAssignedName),
+ OperandInfo(OperandInfo) {}
+
+ const GIMatchDagOperandList &getOperandInfo() const { return OperandInfo; }
+ StringRef getName() const { return Name; }
+ StringRef getUserAssignedName() const { return UserAssignedName; }
+ void assignNameToOperand(unsigned Idx, StringRef Name) {
+ assert(UserAssignedNamesForOperands[Idx].empty() && "Cannot assign twice");
+ UserAssignedNamesForOperands[Idx] = Name;
+ }
+
+ const_user_assigned_operand_names_iterator
+ user_assigned_operand_names_begin() const {
+ return UserAssignedNamesForOperands.begin();
+ }
+ const_user_assigned_operand_names_iterator
+ user_assigned_operand_names_end() const {
+ return UserAssignedNamesForOperands.end();
+ }
+ iterator_range<const_user_assigned_operand_names_iterator>
+ user_assigned_operand_names() const {
+ return make_range(user_assigned_operand_names_begin(),
+ user_assigned_operand_names_end());
+ }
+
+ /// Mark this instruction as being a root of the match. This means that the
+ /// matcher will start from this node when attempting to match MIR.
+ void setMatchRoot();
+ bool isMatchRoot() const { return IsMatchRoot; }
+
+ void setOpcodeAnnotation(CodeGenInstruction *I) { OpcodeAnnotation = I; }
+ CodeGenInstruction *getOpcodeAnnotation() const { return OpcodeAnnotation; }
+
+ void print(raw_ostream &OS) const;
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ LLVM_DUMP_METHOD void dump() const { print(errs()); }
+#endif // if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+};
+
+raw_ostream &operator<<(raw_ostream &OS, const GIMatchDagInstr &N);
+
+} // end namespace llvm
+#endif // ifndef LLVM_UTILS_TABLEGEN_GIMATCHDAGINSTR_H
diff --git a/contrib/llvm-project/llvm/utils/TableGen/GlobalISel/GIMatchDagOperands.cpp b/contrib/llvm-project/llvm/utils/TableGen/GlobalISel/GIMatchDagOperands.cpp
new file mode 100644
index 000000000000..e79e4686b91e
--- /dev/null
+++ b/contrib/llvm-project/llvm/utils/TableGen/GlobalISel/GIMatchDagOperands.cpp
@@ -0,0 +1,153 @@
+//===- GIMatchDagOperands.cpp - A shared operand list for nodes -----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "GIMatchDagOperands.h"
+
+#include "../CodeGenInstruction.h"
+
+using namespace llvm;
+
+void GIMatchDagOperand::Profile(FoldingSetNodeID &ID) const {
+ Profile(ID, Idx, Name, IsDef);
+}
+
+void GIMatchDagOperand::Profile(FoldingSetNodeID &ID, size_t Idx,
+ StringRef Name, bool IsDef) {
+ ID.AddInteger(Idx);
+ ID.AddString(Name);
+ ID.AddBoolean(IsDef);
+}
+
+void GIMatchDagOperandList::add(StringRef Name, unsigned Idx, bool IsDef) {
+ assert(Idx == Operands.size() && "Operands added in wrong order");
+ Operands.emplace_back(Operands.size(), Name, IsDef);
+ OperandsByName.try_emplace(Operands.back().getName(), Operands.size() - 1);
+}
+
+void GIMatchDagOperandList::Profile(FoldingSetNodeID &ID) const {
+ for (const auto &I : enumerate(Operands))
+ GIMatchDagOperand::Profile(ID, I.index(), I.value().getName(),
+ I.value().isDef());
+}
+
+void GIMatchDagOperandList::print(raw_ostream &OS) const {
+ if (Operands.empty()) {
+ OS << "<empty>";
+ return;
+ }
+ StringRef Separator = "";
+ for (const auto &I : Operands) {
+ OS << Separator << I.getIdx() << ":" << I.getName();
+ if (I.isDef())
+ OS << "<def>";
+ Separator = ", ";
+ }
+}
+
+const GIMatchDagOperandList::value_type &GIMatchDagOperandList::
+operator[](StringRef K) const {
+ const auto &I = OperandsByName.find(K);
+ assert(I != OperandsByName.end() && "Operand not found by name");
+ return Operands[I->second];
+}
+
+const GIMatchDagOperandList &
+GIMatchDagOperandListContext::makeEmptyOperandList() {
+ FoldingSetNodeID ID;
+
+ void *InsertPoint;
+ GIMatchDagOperandList *Value =
+ OperandLists.FindNodeOrInsertPos(ID, InsertPoint);
+ if (Value)
+ return *Value;
+
+ std::unique_ptr<GIMatchDagOperandList> NewValue =
+ std::make_unique<GIMatchDagOperandList>();
+ OperandLists.InsertNode(NewValue.get(), InsertPoint);
+ OperandListsOwner.push_back(std::move(NewValue));
+ return *OperandListsOwner.back().get();
+}
+
+const GIMatchDagOperandList &
+GIMatchDagOperandListContext::makeOperandList(const CodeGenInstruction &I) {
+ FoldingSetNodeID ID;
+ for (unsigned i = 0; i < I.Operands.size(); ++i)
+ GIMatchDagOperand::Profile(ID, i, I.Operands[i].Name,
+ i < I.Operands.NumDefs);
+
+ void *InsertPoint;
+ GIMatchDagOperandList *Value =
+ OperandLists.FindNodeOrInsertPos(ID, InsertPoint);
+ if (Value)
+ return *Value;
+
+ std::unique_ptr<GIMatchDagOperandList> NewValue =
+ std::make_unique<GIMatchDagOperandList>();
+ for (unsigned i = 0; i < I.Operands.size(); ++i)
+ NewValue->add(I.Operands[i].Name, i, i < I.Operands.NumDefs);
+ OperandLists.InsertNode(NewValue.get(), InsertPoint);
+ OperandListsOwner.push_back(std::move(NewValue));
+ return *OperandListsOwner.back().get();
+}
+
+const GIMatchDagOperandList &
+GIMatchDagOperandListContext::makeMIPredicateOperandList() {
+ FoldingSetNodeID ID;
+ GIMatchDagOperand::Profile(ID, 0, "$", true);
+ GIMatchDagOperand::Profile(ID, 1, "mi", false);
+
+ void *InsertPoint;
+ GIMatchDagOperandList *Value =
+ OperandLists.FindNodeOrInsertPos(ID, InsertPoint);
+ if (Value)
+ return *Value;
+
+ std::unique_ptr<GIMatchDagOperandList> NewValue =
+ std::make_unique<GIMatchDagOperandList>();
+ NewValue->add("$", 0, true);
+ NewValue->add("mi", 1, false);
+ OperandLists.InsertNode(NewValue.get(), InsertPoint);
+ OperandListsOwner.push_back(std::move(NewValue));
+ return *OperandListsOwner.back().get();
+}
+
+
+const GIMatchDagOperandList &
+GIMatchDagOperandListContext::makeTwoMOPredicateOperandList() {
+ FoldingSetNodeID ID;
+ GIMatchDagOperand::Profile(ID, 0, "$", true);
+ GIMatchDagOperand::Profile(ID, 1, "mi0", false);
+ GIMatchDagOperand::Profile(ID, 2, "mi1", false);
+
+ void *InsertPoint;
+ GIMatchDagOperandList *Value =
+ OperandLists.FindNodeOrInsertPos(ID, InsertPoint);
+ if (Value)
+ return *Value;
+
+ std::unique_ptr<GIMatchDagOperandList> NewValue =
+ std::make_unique<GIMatchDagOperandList>();
+ NewValue->add("$", 0, true);
+ NewValue->add("mi0", 1, false);
+ NewValue->add("mi1", 2, false);
+ OperandLists.InsertNode(NewValue.get(), InsertPoint);
+ OperandListsOwner.push_back(std::move(NewValue));
+ return *OperandListsOwner.back().get();
+}
+
+void GIMatchDagOperandListContext::print(raw_ostream &OS) const {
+ OS << "GIMatchDagOperandListContext {\n"
+ << " OperandLists {\n";
+ for (const auto &I : OperandListsOwner) {
+ OS << " ";
+ I->print(OS);
+ OS << "\n";
+ }
+ OS << " }\n"
+ << "}\n";
+}
diff --git a/contrib/llvm-project/llvm/utils/TableGen/GlobalISel/GIMatchDagOperands.h b/contrib/llvm-project/llvm/utils/TableGen/GlobalISel/GIMatchDagOperands.h
new file mode 100644
index 000000000000..c2d30574231d
--- /dev/null
+++ b/contrib/llvm-project/llvm/utils/TableGen/GlobalISel/GIMatchDagOperands.h
@@ -0,0 +1,133 @@
+//===- GIMatchDagOperands.h - Represent a shared operand list for nodes ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_UTILS_TABLEGEN_GIMATCHDAGOPERANDS_H
+#define LLVM_UTILS_TABLEGEN_GIMATCHDAGOPERANDS_H
+
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <vector>
+
+namespace llvm {
+class CodeGenInstruction;
+/// Describes an operand of a MachineInstr w.r.t the DAG Matching. This
+/// information is derived from CodeGenInstruction::Operands but is more
+/// readily available for context-less access as we don't need to know which
+/// instruction it's used with or know how many defs that instruction had.
+///
+/// There may be multiple GIMatchDagOperand's with the same contents. However,
+/// they are uniqued within the set of instructions that have the same overall
+/// operand list. For example, given:
+/// Inst1 operands ($dst:<def>, $src1, $src2)
+/// Inst2 operands ($dst:<def>, $src1, $src2)
+/// Inst3 operands ($dst:<def>, $src)
+/// $src1 will have a single instance of GIMatchDagOperand shared by Inst1 and
+/// Inst2, as will $src2. $dst however, will have two instances one shared
+/// between Inst1 and Inst2 and one unique to Inst3. We could potentially
+/// fully de-dupe the GIMatchDagOperand instances but the saving is not expected
+/// to be worth the overhead.
+///
+/// The result of this is that the address of the object can be relied upon to
+/// trivially identify commonality between two instructions which will be useful
+/// when generating the matcher. When the pointers differ, the contents can be
+/// inspected instead.
+class GIMatchDagOperand {
+ unsigned Idx;
+ StringRef Name;
+ bool IsDef;
+
+public:
+ GIMatchDagOperand(unsigned Idx, StringRef Name, bool IsDef)
+ : Idx(Idx), Name(Name), IsDef(IsDef) {}
+
+ unsigned getIdx() const { return Idx; }
+ StringRef getName() const { return Name; }
+ bool isDef() const { return IsDef; }
+
+ /// This object isn't a FoldingSetNode but it's part of one. See FoldingSet
+ /// for details on the Profile function.
+ void Profile(FoldingSetNodeID &ID) const;
+
+ /// A helper that behaves like Profile() but is also usable without the object.
+ /// We use size_t here to match enumerate<...>::index(). If we don't match
+ /// that the hashes won't be equal.
+ static void Profile(FoldingSetNodeID &ID, size_t Idx, StringRef Name,
+ bool IsDef);
+};
+
+/// A list of GIMatchDagOperands for an instruction without any association with
+/// a particular instruction.
+///
+/// An important detail to be aware of with this class is that they are shared
+/// with other instructions of a similar 'shape'. For example, all the binary
+/// instructions are likely to share a single GIMatchDagOperandList. This is
+/// primarily a memory optimization as it's fairly common to have a large number
+/// of instructions but only a few 'shapes'.
+///
+/// See GIMatchDagOperandList::Profile() for the details on how they are folded.
+class GIMatchDagOperandList : public FoldingSetNode {
+public:
+ using value_type = GIMatchDagOperand;
+
+protected:
+ using vector_type = SmallVector<GIMatchDagOperand, 3>;
+
+public:
+ using iterator = vector_type::iterator;
+ using const_iterator = vector_type::const_iterator;
+
+protected:
+ vector_type Operands;
+ StringMap<unsigned> OperandsByName;
+
+public:
+ void add(StringRef Name, unsigned Idx, bool IsDef);
+
+ /// See FoldingSet for details.
+ void Profile(FoldingSetNodeID &ID) const;
+
+ iterator begin() { return Operands.begin(); }
+ const_iterator begin() const { return Operands.begin(); }
+ iterator end() { return Operands.end(); }
+ const_iterator end() const { return Operands.end(); }
+
+ const value_type &operator[](unsigned I) const { return Operands[I]; }
+ const value_type &operator[](StringRef K) const;
+
+ void print(raw_ostream &OS) const;
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ LLVM_DUMP_METHOD void dump() const { print(errs()); }
+#endif // if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+};
+
+/// This is the portion of GIMatchDagContext that directly relates to
+/// GIMatchDagOperandList and GIMatchDagOperandList.
+class GIMatchDagOperandListContext {
+ FoldingSet<GIMatchDagOperandList> OperandLists;
+ std::vector<std::unique_ptr<GIMatchDagOperandList>> OperandListsOwner;
+
+public:
+ const GIMatchDagOperandList &makeEmptyOperandList();
+ const GIMatchDagOperandList &makeOperandList(const CodeGenInstruction &I);
+ const GIMatchDagOperandList &makeMIPredicateOperandList();
+ const GIMatchDagOperandList &makeTwoMOPredicateOperandList();
+
+ void print(raw_ostream &OS) const;
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ LLVM_DUMP_METHOD void dump() const { print(errs()); }
+#endif // if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+};
+
+} // end namespace llvm
+#endif // ifndef LLVM_UTILS_TABLEGEN_GIMATCHDAGOPERANDS_H
diff --git a/contrib/llvm-project/llvm/utils/TableGen/GlobalISel/GIMatchDagPredicate.cpp b/contrib/llvm-project/llvm/utils/TableGen/GlobalISel/GIMatchDagPredicate.cpp
new file mode 100644
index 000000000000..1aca2f9dc135
--- /dev/null
+++ b/contrib/llvm-project/llvm/utils/TableGen/GlobalISel/GIMatchDagPredicate.cpp
@@ -0,0 +1,69 @@
+//===- GIMatchDagPredicate.cpp - Represent a predicate to check -----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "GIMatchDagPredicate.h"
+
+#include "llvm/TableGen/Record.h"
+
+#include "GIMatchDagOperands.h"
+#include "../CodeGenInstruction.h"
+
+using namespace llvm;
+
+void GIMatchDagPredicate::print(raw_ostream &OS) const {
+ OS << "<<";
+ printDescription(OS);
+ OS << ">>:$" << Name;
+}
+
+void GIMatchDagPredicate::printDescription(raw_ostream &OS) const { OS << ""; }
+
+GIMatchDagOpcodePredicate::GIMatchDagOpcodePredicate(
+ GIMatchDagContext &Ctx, StringRef Name, const CodeGenInstruction &Instr)
+ : GIMatchDagPredicate(GIMatchDagPredicateKind_Opcode, Name,
+ Ctx.makeMIPredicateOperandList()),
+ Instr(Instr) {}
+
+void GIMatchDagOpcodePredicate::printDescription(raw_ostream &OS) const {
+ OS << "$mi.getOpcode() == " << Instr.TheDef->getName();
+}
+
+GIMatchDagOneOfOpcodesPredicate::GIMatchDagOneOfOpcodesPredicate(
+ GIMatchDagContext &Ctx, StringRef Name)
+ : GIMatchDagPredicate(GIMatchDagPredicateKind_OneOfOpcodes, Name,
+ Ctx.makeMIPredicateOperandList()) {}
+
+void GIMatchDagOneOfOpcodesPredicate::printDescription(raw_ostream &OS) const {
+ OS << "$mi.getOpcode() == oneof(";
+ StringRef Separator = "";
+ for (const CodeGenInstruction *Instr : Instrs) {
+ OS << Separator << Instr->TheDef->getName();
+ Separator = ",";
+ }
+ OS << ")";
+}
+
+GIMatchDagSameMOPredicate::GIMatchDagSameMOPredicate(GIMatchDagContext &Ctx,
+ StringRef Name)
+ : GIMatchDagPredicate(GIMatchDagPredicateKind_SameMO, Name,
+ Ctx.makeTwoMOPredicateOperandList()) {}
+
+void GIMatchDagSameMOPredicate::printDescription(raw_ostream &OS) const {
+ OS << "$mi0 == $mi1";
+}
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, const GIMatchDagPredicate &N) {
+ N.print(OS);
+ return OS;
+}
+
+raw_ostream &llvm::operator<<(raw_ostream &OS,
+ const GIMatchDagOpcodePredicate &N) {
+ N.print(OS);
+ return OS;
+}
diff --git a/contrib/llvm-project/llvm/utils/TableGen/GlobalISel/GIMatchDagPredicate.h b/contrib/llvm-project/llvm/utils/TableGen/GlobalISel/GIMatchDagPredicate.h
new file mode 100644
index 000000000000..9b030d6edb13
--- /dev/null
+++ b/contrib/llvm-project/llvm/utils/TableGen/GlobalISel/GIMatchDagPredicate.h
@@ -0,0 +1,141 @@
+//===- GIMatchDagPredicate - Represent a predicate to check ---------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_UTILS_TABLEGEN_GIMATCHDAGPREDICATE_H
+#define LLVM_UTILS_TABLEGEN_GIMATCHDAGPREDICATE_H
+
+#include "llvm/ADT/StringRef.h"
+#include "GIMatchDag.h"
+
+namespace llvm {
+class CodeExpansions;
+class CodeGenInstruction;
+class GIMatchDagOperandList;
+class GIMatchDagContext;
+class raw_ostream;
+
+/// Represents a predicate on the match DAG. This records the details of the
+/// predicate. The dependencies are stored in the GIMatchDag as edges.
+///
+/// Instances of this class objects are owned by the GIMatchDag and are not
+/// shareable between instances of GIMatchDag.
+class GIMatchDagPredicate {
+public:
+ enum GIMatchDagPredicateKind {
+ GIMatchDagPredicateKind_Opcode,
+ GIMatchDagPredicateKind_OneOfOpcodes,
+ GIMatchDagPredicateKind_SameMO,
+ };
+
+protected:
+ const GIMatchDagPredicateKind Kind;
+
+ /// The name of the predicate. For example:
+ /// (FOO $a:s32, $b, $c)
+ /// will cause 's32' to be assigned to this member for the $a predicate.
+ /// Similarly, the opcode predicate will cause 'FOO' to be assigned to this
+ /// member. Anonymous instructions will have a name assigned for debugging
+ /// purposes.
+ StringRef Name;
+
+ /// The operand list for this predicate. This object may be shared with
+ /// other predicates of a similar 'shape'.
+ const GIMatchDagOperandList &OperandInfo;
+
+public:
+ GIMatchDagPredicate(GIMatchDagPredicateKind Kind, StringRef Name,
+ const GIMatchDagOperandList &OperandInfo)
+ : Kind(Kind), Name(Name), OperandInfo(OperandInfo) {}
+ virtual ~GIMatchDagPredicate() {}
+
+ GIMatchDagPredicateKind getKind() const { return Kind; }
+
+ StringRef getName() const { return Name; }
+ const GIMatchDagOperandList &getOperandInfo() const { return OperandInfo; }
+
+ // Generate C++ code to check this predicate. If a partitioner has already
+ // tested this predicate then this function won't be called. If this function
+ // is called, it must emit code and return true to indicate that it did so. If
+ // it ever returns false, then the caller will abort due to an untested
+ // predicate.
+ virtual bool generateCheckCode(raw_ostream &OS, StringRef Indent,
+ const CodeExpansions &Expansions) const {
+ return false;
+ }
+
+ virtual void print(raw_ostream &OS) const;
+ virtual void printDescription(raw_ostream &OS) const;
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ virtual LLVM_DUMP_METHOD void dump() const { print(errs()); }
+#endif // if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+};
+
+class GIMatchDagOpcodePredicate : public GIMatchDagPredicate {
+ const CodeGenInstruction &Instr;
+
+public:
+ GIMatchDagOpcodePredicate(GIMatchDagContext &Ctx, StringRef Name,
+ const CodeGenInstruction &Instr);
+
+ static bool classof(const GIMatchDagPredicate *P) {
+ return P->getKind() == GIMatchDagPredicateKind_Opcode;
+ }
+
+ const CodeGenInstruction *getInstr() const { return &Instr; }
+
+ void printDescription(raw_ostream &OS) const override;
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ virtual LLVM_DUMP_METHOD void dump() const override { print(errs()); }
+#endif // if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+};
+
+class GIMatchDagOneOfOpcodesPredicate : public GIMatchDagPredicate {
+ SmallVector<const CodeGenInstruction *, 4> Instrs;
+
+public:
+ GIMatchDagOneOfOpcodesPredicate(GIMatchDagContext &Ctx, StringRef Name);
+
+ void addOpcode(const CodeGenInstruction *Instr) { Instrs.push_back(Instr); }
+
+ static bool classof(const GIMatchDagPredicate *P) {
+ return P->getKind() == GIMatchDagPredicateKind_OneOfOpcodes;
+ }
+
+ const SmallVectorImpl<const CodeGenInstruction *> &getInstrs() const {
+ return Instrs;
+ }
+
+ void printDescription(raw_ostream &OS) const override;
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ virtual LLVM_DUMP_METHOD void dump() const override { print(errs()); }
+#endif // if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+};
+
+class GIMatchDagSameMOPredicate : public GIMatchDagPredicate {
+public:
+ GIMatchDagSameMOPredicate(GIMatchDagContext &Ctx, StringRef Name);
+
+ static bool classof(const GIMatchDagPredicate *P) {
+ return P->getKind() == GIMatchDagPredicateKind_SameMO;
+ }
+
+ void printDescription(raw_ostream &OS) const override;
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ virtual LLVM_DUMP_METHOD void dump() const override { print(errs()); }
+#endif // if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+};
+
+raw_ostream &operator<<(raw_ostream &OS, const GIMatchDagPredicate &N);
+raw_ostream &operator<<(raw_ostream &OS, const GIMatchDagOpcodePredicate &N);
+
+} // end namespace llvm
+#endif // ifndef LLVM_UTILS_TABLEGEN_GIMATCHDAGPREDICATE_H
diff --git a/contrib/llvm-project/llvm/utils/TableGen/GlobalISel/GIMatchDagPredicateDependencyEdge.cpp b/contrib/llvm-project/llvm/utils/TableGen/GlobalISel/GIMatchDagPredicateDependencyEdge.cpp
new file mode 100644
index 000000000000..2e804de1cd4e
--- /dev/null
+++ b/contrib/llvm-project/llvm/utils/TableGen/GlobalISel/GIMatchDagPredicateDependencyEdge.cpp
@@ -0,0 +1,37 @@
+//===- GIMatchDagPredicateDependencyEdge.cpp - Have inputs before check ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "GIMatchDagPredicateDependencyEdge.h"
+
+#include "GIMatchDagInstr.h"
+#include "GIMatchDagPredicate.h"
+
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+LLVM_DUMP_METHOD void
+GIMatchDagPredicateDependencyEdge::print(raw_ostream &OS) const {
+ OS << getRequiredMI()->getName();
+ if (getRequiredMO())
+ OS << "[" << getRequiredMO()->getName() << "]";
+ OS << " ==> " << getPredicate()->getName() << "["
+ << getPredicateOp()->getName() << "]";
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void GIMatchDagPredicateDependencyEdge::dump() const {
+ print(errs());
+}
+#endif // if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+
+raw_ostream &llvm::operator<<(raw_ostream &OS,
+ const GIMatchDagPredicateDependencyEdge &E) {
+ E.print(OS);
+ return OS;
+}
diff --git a/contrib/llvm-project/llvm/utils/TableGen/GlobalISel/GIMatchDagPredicateDependencyEdge.h b/contrib/llvm-project/llvm/utils/TableGen/GlobalISel/GIMatchDagPredicateDependencyEdge.h
new file mode 100644
index 000000000000..865455fe4e4d
--- /dev/null
+++ b/contrib/llvm-project/llvm/utils/TableGen/GlobalISel/GIMatchDagPredicateDependencyEdge.h
@@ -0,0 +1,60 @@
+//===- GIMatchDagPredicateDependencyEdge - Ensure predicates have inputs --===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_UTILS_TABLEGEN_GIMATCHDAGPREDICATEEDGE_H
+#define LLVM_UTILS_TABLEGEN_GIMATCHDAGPREDICATEEDGE_H
+
+#include "GIMatchDagOperands.h"
+
+namespace llvm {
+class GIMatchDag;
+class GIMatchDagInstr;
+class GIMatchDagEdge;
+class GIMatchDagPredicate;
+
+/// Represents a dependency that must be met to evaluate a predicate.
+///
+/// Instances of this class objects are owned by the GIMatchDag and are not
+/// shareable between instances of GIMatchDag.
+class GIMatchDagPredicateDependencyEdge {
+ /// The MI that must be available in order to test the predicate.
+ const GIMatchDagInstr *RequiredMI;
+ /// The MO that must be available in order to test the predicate. May be
+ /// nullptr when only the MI is required.
+ const GIMatchDagOperand *RequiredMO;
+ /// The Predicate that requires information from RequiredMI/RequiredMO.
+ const GIMatchDagPredicate *Predicate;
+ /// The Predicate operand that requires information from
+ /// RequiredMI/RequiredMO.
+ const GIMatchDagOperand *PredicateOp;
+
+public:
+ GIMatchDagPredicateDependencyEdge(const GIMatchDagInstr *RequiredMI,
+ const GIMatchDagOperand *RequiredMO,
+ const GIMatchDagPredicate *Predicate,
+ const GIMatchDagOperand *PredicateOp)
+ : RequiredMI(RequiredMI), RequiredMO(RequiredMO), Predicate(Predicate),
+ PredicateOp(PredicateOp) {}
+
+ const GIMatchDagInstr *getRequiredMI() const { return RequiredMI; }
+ const GIMatchDagOperand *getRequiredMO() const { return RequiredMO; }
+ const GIMatchDagPredicate *getPredicate() const { return Predicate; }
+ const GIMatchDagOperand *getPredicateOp() const { return PredicateOp; }
+
+ void print(raw_ostream &OS) const;
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ LLVM_DUMP_METHOD void dump() const;
+#endif // if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+};
+
+raw_ostream &operator<<(raw_ostream &OS,
+ const GIMatchDagPredicateDependencyEdge &N);
+
+} // end namespace llvm
+#endif // ifndef LLVM_UTILS_TABLEGEN_GIMATCHDAGPREDICATEEDGE_H
diff --git a/contrib/llvm-project/llvm/utils/TableGen/GlobalISel/GIMatchTree.cpp b/contrib/llvm-project/llvm/utils/TableGen/GlobalISel/GIMatchTree.cpp
new file mode 100644
index 000000000000..4884bdadea91
--- /dev/null
+++ b/contrib/llvm-project/llvm/utils/TableGen/GlobalISel/GIMatchTree.cpp
@@ -0,0 +1,777 @@
+//===- GIMatchTree.cpp - A decision tree to match GIMatchDag's ------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "GIMatchTree.h"
+
+#include "../CodeGenInstruction.h"
+
+#include "llvm/Support/Format.h"
+#include "llvm/Support/ScopedPrinter.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/TableGen/Error.h"
+#include "llvm/TableGen/Record.h"
+
+#define DEBUG_TYPE "gimatchtree"
+
+using namespace llvm;
+
+void GIMatchTree::writeDOTGraph(raw_ostream &OS) const {
+ OS << "digraph \"matchtree\" {\n";
+ writeDOTGraphNode(OS);
+ OS << "}\n";
+}
+
+void GIMatchTree::writeDOTGraphNode(raw_ostream &OS) const {
+ OS << format(" Node%p", this) << " [shape=record,label=\"{";
+ if (Partitioner) {
+ Partitioner->emitDescription(OS);
+ OS << "|" << Partitioner->getNumPartitions() << " partitions|";
+ } else
+ OS << "No partitioner|";
+ bool IsFullyTraversed = true;
+ bool IsFullyTested = true;
+ StringRef Separator = "";
+ for (const auto &Leaf : PossibleLeaves) {
+ OS << Separator << Leaf.getName();
+ Separator = ",";
+ if (!Leaf.isFullyTraversed())
+ IsFullyTraversed = false;
+ if (!Leaf.isFullyTested())
+ IsFullyTested = false;
+ }
+ if (!Partitioner && !IsFullyTraversed)
+ OS << "|Not fully traversed";
+ if (!Partitioner && !IsFullyTested) {
+ OS << "|Not fully tested";
+ if (IsFullyTraversed) {
+ for (const GIMatchTreeLeafInfo &Leaf : PossibleLeaves) {
+ if (Leaf.isFullyTested())
+ continue;
+ OS << "\\n" << Leaf.getName() << ": " << &Leaf;
+ for (const GIMatchDagPredicate *P : Leaf.untested_predicates())
+ OS << *P;
+ }
+ }
+ }
+ OS << "}\"";
+ if (!Partitioner &&
+ (!IsFullyTraversed || !IsFullyTested || PossibleLeaves.size() > 1))
+ OS << ",color=red";
+ OS << "]\n";
+ for (const auto &C : Children)
+ C.writeDOTGraphNode(OS);
+ writeDOTGraphEdges(OS);
+}
+
+void GIMatchTree::writeDOTGraphEdges(raw_ostream &OS) const {
+ for (const auto &Child : enumerate(Children)) {
+ OS << format(" Node%p", this) << " -> " << format("Node%p", &Child.value())
+ << " [label=\"#" << Child.index() << " ";
+ Partitioner->emitPartitionName(OS, Child.index());
+ OS << "\"]\n";
+ }
+}
+
+GIMatchTreeBuilderLeafInfo::GIMatchTreeBuilderLeafInfo(
+ GIMatchTreeBuilder &Builder, StringRef Name, unsigned RootIdx,
+ const GIMatchDag &MatchDag, void *Data)
+ : Builder(Builder), Info(Name, RootIdx, Data), MatchDag(MatchDag),
+ InstrNodeToInfo(),
+ RemainingInstrNodes(BitVector(MatchDag.getNumInstrNodes(), true)),
+ RemainingEdges(BitVector(MatchDag.getNumEdges(), true)),
+ RemainingPredicates(BitVector(MatchDag.getNumPredicates(), true)),
+ TraversableEdges(MatchDag.getNumEdges()),
+ TestablePredicates(MatchDag.getNumPredicates()) {
+ // Number all the predicates in this DAG
+ for (auto &P : enumerate(MatchDag.predicates())) {
+ PredicateIDs.insert(std::make_pair(P.value(), P.index()));
+ }
+
+ // Number all the predicate dependencies in this DAG and set up a bitvector
+ // for each predicate indicating the unsatisfied dependencies.
+ for (auto &Dep : enumerate(MatchDag.predicate_edges())) {
+ PredicateDepIDs.insert(std::make_pair(Dep.value(), Dep.index()));
+ }
+ UnsatisfiedPredDepsForPred.resize(MatchDag.getNumPredicates(),
+ BitVector(PredicateDepIDs.size()));
+ for (auto &Dep : enumerate(MatchDag.predicate_edges())) {
+ unsigned ID = PredicateIDs.lookup(Dep.value()->getPredicate());
+ UnsatisfiedPredDepsForPred[ID].set(Dep.index());
+ }
+}
+
+void GIMatchTreeBuilderLeafInfo::declareInstr(const GIMatchDagInstr *Instr, unsigned ID) {
+ // Record the assignment of this instr to the given ID.
+ auto InfoI = InstrNodeToInfo.insert(std::make_pair(
+ Instr, GIMatchTreeInstrInfo(ID, Instr)));
+ InstrIDToInfo.insert(std::make_pair(ID, &InfoI.first->second));
+
+ if (Instr == nullptr)
+ return;
+
+ if (!Instr->getUserAssignedName().empty())
+ Info.bindInstrVariable(Instr->getUserAssignedName(), ID);
+ for (const auto &VarBinding : Instr->user_assigned_operand_names())
+ Info.bindOperandVariable(VarBinding.second, ID, VarBinding.first);
+
+ // Clear the bit indicating we haven't visited this instr.
+ const auto &NodeI = std::find(MatchDag.instr_nodes_begin(),
+ MatchDag.instr_nodes_end(), Instr);
+ assert(NodeI != MatchDag.instr_nodes_end() && "Instr isn't in this DAG");
+ unsigned InstrIdx = MatchDag.getInstrNodeIdx(NodeI);
+ RemainingInstrNodes.reset(InstrIdx);
+
+ // When we declare an instruction, we don't expose any traversable edges just
+ // yet. A partitioner has to check they exist and are registers before they
+ // are traversable.
+
+ // When we declare an instruction, we potentially activate some predicates.
+ // Mark the dependencies that are now satisfied as a result of this
+ // instruction and mark any predicates whose dependencies are fully
+ // satisfied.
+ for (auto &Dep : enumerate(MatchDag.predicate_edges())) {
+ if (Dep.value()->getRequiredMI() == Instr &&
+ Dep.value()->getRequiredMO() == nullptr) {
+ for (auto &DepsFor : enumerate(UnsatisfiedPredDepsForPred)) {
+ DepsFor.value().reset(Dep.index());
+ if (DepsFor.value().none())
+ TestablePredicates.set(DepsFor.index());
+ }
+ }
+ }
+}
+
+void GIMatchTreeBuilderLeafInfo::declareOperand(unsigned InstrID,
+ unsigned OpIdx) {
+ const GIMatchDagInstr *Instr = InstrIDToInfo.lookup(InstrID)->getInstrNode();
+
+ OperandIDToInfo.insert(std::make_pair(
+ std::make_pair(InstrID, OpIdx),
+ GIMatchTreeOperandInfo(Instr, OpIdx)));
+
+ // When an operand becomes reachable, we potentially activate some traversals.
+ // Record the edges that can now be followed as a result of this
+ // instruction.
+ for (auto &E : enumerate(MatchDag.edges())) {
+ if (E.value()->getFromMI() == Instr &&
+ E.value()->getFromMO()->getIdx() == OpIdx) {
+ TraversableEdges.set(E.index());
+ }
+ }
+
+ // When an operand becomes reachable, we potentially activate some predicates.
+ // Clear the dependencies that are now satisfied as a result of this
+ // operand and activate any predicates whose dependencies are fully
+ // satisfied.
+ for (auto &Dep : enumerate(MatchDag.predicate_edges())) {
+ if (Dep.value()->getRequiredMI() == Instr && Dep.value()->getRequiredMO() &&
+ Dep.value()->getRequiredMO()->getIdx() == OpIdx) {
+ for (auto &DepsFor : enumerate(UnsatisfiedPredDepsForPred)) {
+ DepsFor.value().reset(Dep.index());
+ if (DepsFor.value().none())
+ TestablePredicates.set(DepsFor.index());
+ }
+ }
+ }
+}
+
+void GIMatchTreeBuilder::addPartitionersForInstr(unsigned InstrIdx) {
+ // Find the partitioners that can be used now that this node is
+ // uncovered. Our choices are:
+ // - Test the opcode
+ addPartitioner(std::make_unique<GIMatchTreeOpcodePartitioner>(InstrIdx));
+}
+
+void GIMatchTreeBuilder::addPartitionersForOperand(unsigned InstrID,
+ unsigned OpIdx) {
+ LLVM_DEBUG(dbgs() << "Add partitioners for Instrs[" << InstrID
+ << "].getOperand(" << OpIdx << ")\n");
+ addPartitioner(
+ std::make_unique<GIMatchTreeVRegDefPartitioner>(InstrID, OpIdx));
+}
+
+void GIMatchTreeBuilder::filterRedundantPartitioners() {
+ // TODO: Filter partitioners for facts that are already known
+ // - If we know the opcode, we can elide the num operand check so long as
+ // the instruction has a fixed number of operands.
+ // - If we know an exact number of operands then we can elide further number
+ // of operand checks.
+ // - If the current min number of operands exceeds the one we want to check
+ // then we can elide it.
+}
+
+void GIMatchTreeBuilder::evaluatePartitioners() {
+ // Determine the partitioning the partitioner would produce
+ for (auto &Partitioner : Partitioners) {
+ LLVM_DEBUG(dbgs() << " Weighing up ";
+ Partitioner->emitDescription(dbgs()); dbgs() << "\n");
+ Partitioner->repartition(Leaves);
+ LLVM_DEBUG(Partitioner->emitPartitionResults(dbgs()));
+ }
+}
+
+void GIMatchTreeBuilder::runStep() {
+ LLVM_DEBUG(dbgs() << "Building match tree node for " << TreeNode << "\n");
+ LLVM_DEBUG(dbgs() << " Rules reachable at this node:\n");
+ for (const auto &Leaf : Leaves) {
+ LLVM_DEBUG(dbgs() << " " << Leaf.getName() << " (" << &Leaf.getInfo() << "\n");
+ TreeNode->addPossibleLeaf(Leaf.getInfo(), Leaf.isFullyTraversed(),
+ Leaf.isFullyTested());
+ }
+
+ LLVM_DEBUG(dbgs() << " Partitioners available at this node:\n");
+#ifndef NDEBUG
+ for (const auto &Partitioner : Partitioners)
+ LLVM_DEBUG(dbgs() << " "; Partitioner->emitDescription(dbgs());
+ dbgs() << "\n");
+#endif // ifndef NDEBUG
+
+ // Check for unreachable rules. Rules are unreachable if they are preceeded by
+ // a fully tested rule.
+ // Note: This is only true for the current algorithm, if we allow the
+ // algorithm to compare equally valid rules then they will become
+ // reachable.
+ {
+ auto FullyTestedLeafI = Leaves.end();
+ for (auto LeafI = Leaves.begin(), LeafE = Leaves.end();
+ LeafI != LeafE; ++LeafI) {
+ if (LeafI->isFullyTraversed() && LeafI->isFullyTested())
+ FullyTestedLeafI = LeafI;
+ else if (FullyTestedLeafI != Leaves.end()) {
+ PrintError("Leaf " + LeafI->getName() + " is unreachable");
+ PrintNote("Leaf " + FullyTestedLeafI->getName() +
+ " will have already matched");
+ }
+ }
+ }
+
+ LLVM_DEBUG(dbgs() << " Eliminating redundant partitioners:\n");
+ filterRedundantPartitioners();
+ LLVM_DEBUG(dbgs() << " Partitioners remaining:\n");
+#ifndef NDEBUG
+ for (const auto &Partitioner : Partitioners)
+ LLVM_DEBUG(dbgs() << " "; Partitioner->emitDescription(dbgs());
+ dbgs() << "\n");
+#endif // ifndef NDEBUG
+
+ if (Partitioners.empty()) {
+ // Nothing left to do but check we really did identify a single rule.
+ if (Leaves.size() > 1) {
+ LLVM_DEBUG(dbgs() << "Leaf contains multiple rules, drop after the first "
+ "fully tested rule\n");
+ auto FirstFullyTested =
+ std::find_if(Leaves.begin(), Leaves.end(),
+ [](const GIMatchTreeBuilderLeafInfo &X) {
+ return X.isFullyTraversed() && X.isFullyTested() &&
+ !X.getMatchDag().hasPostMatchPredicate();
+ });
+ if (FirstFullyTested != Leaves.end())
+ FirstFullyTested++;
+
+#ifndef NDEBUG
+ for (auto &Leaf : make_range(Leaves.begin(), FirstFullyTested))
+ LLVM_DEBUG(dbgs() << " Kept " << Leaf.getName() << "\n");
+ for (const auto &Leaf : make_range(FirstFullyTested, Leaves.end()))
+ LLVM_DEBUG(dbgs() << " Dropped " << Leaf.getName() << "\n");
+#endif // ifndef NDEBUG
+ TreeNode->dropLeavesAfter(
+ std::distance(Leaves.begin(), FirstFullyTested));
+ }
+ for (const auto &Leaf : Leaves) {
+ if (!Leaf.isFullyTraversed()) {
+ PrintError("Leaf " + Leaf.getName() + " is not fully traversed");
+ PrintNote("This indicates a missing partitioner within tblgen");
+ Leaf.dump(errs());
+ for (unsigned InstrIdx : Leaf.untested_instrs())
+ PrintNote("Instr " + llvm::to_string(*Leaf.getInstr(InstrIdx)));
+ for (unsigned EdgeIdx : Leaf.untested_edges())
+ PrintNote("Edge " + llvm::to_string(*Leaf.getEdge(EdgeIdx)));
+ }
+ }
+
+ // Copy out information about untested predicates so the user of the tree
+ // can deal with them.
+ for (auto LeafPair : zip(Leaves, TreeNode->possible_leaves())) {
+ const GIMatchTreeBuilderLeafInfo &BuilderLeaf = std::get<0>(LeafPair);
+ GIMatchTreeLeafInfo &TreeLeaf = std::get<1>(LeafPair);
+ if (!BuilderLeaf.isFullyTested())
+ for (unsigned PredicateIdx : BuilderLeaf.untested_predicates())
+ TreeLeaf.addUntestedPredicate(BuilderLeaf.getPredicate(PredicateIdx));
+ }
+ return;
+ }
+
+ LLVM_DEBUG(dbgs() << " Weighing up partitioners:\n");
+ evaluatePartitioners();
+
+ // Select the best partitioner by its ability to partition
+ // - Prefer partitioners that don't distinguish between partitions. This
+ // is to fail early on decisions that must go a single way.
+ auto PartitionerI = std::max_element(
+ Partitioners.begin(), Partitioners.end(),
+ [](const std::unique_ptr<GIMatchTreePartitioner> &A,
+ const std::unique_ptr<GIMatchTreePartitioner> &B) {
+ // We generally want partitioners that subdivide the
+ // ruleset as much as possible since these take fewer
+ // checks to converge on a particular rule. However,
+ // it's important to note that one leaf can end up in
+ // multiple partitions if the check isn't mutually
+ // exclusive (e.g. getVRegDef() vs isReg()).
+ // We therefore minimize average leaves per partition.
+ return (double)A->getNumLeavesWithDupes() / A->getNumPartitions() >
+ (double)B->getNumLeavesWithDupes() / B->getNumPartitions();
+ });
+
+ // Select a partitioner and partition the ruleset
+ // Note that it's possible for a single rule to end up in multiple
+ // partitions. For example, an opcode test on a rule without an opcode
+ // predicate will result in it being passed to all partitions.
+ std::unique_ptr<GIMatchTreePartitioner> Partitioner = std::move(*PartitionerI);
+ Partitioners.erase(PartitionerI);
+ LLVM_DEBUG(dbgs() << " Selected partitioner: ";
+ Partitioner->emitDescription(dbgs()); dbgs() << "\n");
+
+ assert(Partitioner->getNumPartitions() > 0 &&
+ "Must always partition into at least one partition");
+
+ TreeNode->setNumChildren(Partitioner->getNumPartitions());
+ for (auto &C : enumerate(TreeNode->children())) {
+ SubtreeBuilders.emplace_back(&C.value(), NextInstrID);
+ Partitioner->applyForPartition(C.index(), *this, SubtreeBuilders.back());
+ }
+
+ TreeNode->setPartitioner(std::move(Partitioner));
+
+ // Recurse into the subtree builders. Each one must get a copy of the
+ // remaining partitioners as each path has to check everything.
+ for (auto &SubtreeBuilder : SubtreeBuilders) {
+ for (const auto &Partitioner : Partitioners)
+ SubtreeBuilder.addPartitioner(Partitioner->clone());
+ SubtreeBuilder.runStep();
+ }
+}
+
+std::unique_ptr<GIMatchTree> GIMatchTreeBuilder::run() {
+ unsigned NewInstrID = allocInstrID();
+ // Start by recording the root instruction as instr #0 and set up the initial
+ // partitioners.
+ for (auto &Leaf : Leaves) {
+ LLVM_DEBUG(Leaf.getMatchDag().writeDOTGraph(dbgs(), Leaf.getName()));
+ GIMatchDagInstr *Root =
+ *(Leaf.getMatchDag().roots().begin() + Leaf.getRootIdx());
+ Leaf.declareInstr(Root, NewInstrID);
+ }
+
+ addPartitionersForInstr(NewInstrID);
+
+ std::unique_ptr<GIMatchTree> TreeRoot = std::make_unique<GIMatchTree>();
+ TreeNode = TreeRoot.get();
+ runStep();
+
+ return TreeRoot;
+}
+
+void GIMatchTreeOpcodePartitioner::emitPartitionName(raw_ostream &OS, unsigned Idx) const {
+ if (PartitionToInstr[Idx] == nullptr) {
+ OS << "* or nullptr";
+ return;
+ }
+ OS << PartitionToInstr[Idx]->Namespace
+ << "::" << PartitionToInstr[Idx]->TheDef->getName();
+}
+
+void GIMatchTreeOpcodePartitioner::repartition(
+ GIMatchTreeBuilder::LeafVec &Leaves) {
+ Partitions.clear();
+ InstrToPartition.clear();
+ PartitionToInstr.clear();
+ TestedPredicates.clear();
+
+ for (const auto &Leaf : enumerate(Leaves)) {
+ bool AllOpcodes = true;
+ GIMatchTreeInstrInfo *InstrInfo = Leaf.value().getInstrInfo(InstrID);
+ BitVector TestedPredicatesForLeaf(
+ Leaf.value().getMatchDag().getNumPredicates());
+
+ // If the instruction isn't declared then we don't care about it. Ignore
+ // it for now and add it to all partitions later once we know what
+ // partitions we have.
+ if (!InstrInfo) {
+ LLVM_DEBUG(dbgs() << " " << Leaf.value().getName()
+ << " doesn't care about Instr[" << InstrID << "]\n");
+ assert(TestedPredicatesForLeaf.size() == Leaf.value().getMatchDag().getNumPredicates());
+ TestedPredicates.push_back(TestedPredicatesForLeaf);
+ continue;
+ }
+
+ // If the opcode is available to test then any opcode predicates will have
+ // been enabled too.
+ for (unsigned PIdx : Leaf.value().TestablePredicates.set_bits()) {
+ const auto &P = Leaf.value().getPredicate(PIdx);
+ SmallVector<const CodeGenInstruction *, 1> OpcodesForThisPredicate;
+ if (const auto *OpcodeP = dyn_cast<const GIMatchDagOpcodePredicate>(P)) {
+ // We've found _an_ opcode predicate, but we don't know if it's
+ // checking this instruction yet.
+ bool IsThisPredicate = false;
+ for (const auto &PDep : Leaf.value().getMatchDag().predicate_edges()) {
+ if (PDep->getRequiredMI() == InstrInfo->getInstrNode() &&
+ PDep->getRequiredMO() == nullptr && PDep->getPredicate() == P) {
+ IsThisPredicate = true;
+ break;
+ }
+ }
+ if (!IsThisPredicate)
+ continue;
+
+ // If we get here twice then we've somehow ended up with two opcode
+ // predicates for one instruction in the same DAG. That should be
+ // impossible.
+ assert(AllOpcodes && "Conflicting opcode predicates");
+ const CodeGenInstruction *Expected = OpcodeP->getInstr();
+ OpcodesForThisPredicate.push_back(Expected);
+ }
+
+ if (const auto *OpcodeP =
+ dyn_cast<const GIMatchDagOneOfOpcodesPredicate>(P)) {
+ // We've found _an_ oneof(opcodes) predicate, but we don't know if it's
+ // checking this instruction yet.
+ bool IsThisPredicate = false;
+ for (const auto &PDep : Leaf.value().getMatchDag().predicate_edges()) {
+ if (PDep->getRequiredMI() == InstrInfo->getInstrNode() &&
+ PDep->getRequiredMO() == nullptr && PDep->getPredicate() == P) {
+ IsThisPredicate = true;
+ break;
+ }
+ }
+ if (!IsThisPredicate)
+ continue;
+
+ // If we get here twice then we've somehow ended up with two opcode
+ // predicates for one instruction in the same DAG. That should be
+ // impossible.
+ assert(AllOpcodes && "Conflicting opcode predicates");
+ for (const CodeGenInstruction *Expected : OpcodeP->getInstrs())
+ OpcodesForThisPredicate.push_back(Expected);
+ }
+
+ for (const CodeGenInstruction *Expected : OpcodesForThisPredicate) {
+ // Mark this predicate as one we're testing.
+ TestedPredicatesForLeaf.set(PIdx);
+
+ // Partitions must be numbered 0, 1, .., N but instructions don't meet
+ // that requirement. Assign a partition number to each opcode if we
+ // lack one ...
+ auto Partition = InstrToPartition.find(Expected);
+ if (Partition == InstrToPartition.end()) {
+ BitVector Contents(Leaves.size());
+ Partition = InstrToPartition
+ .insert(std::make_pair(Expected, Partitions.size()))
+ .first;
+ PartitionToInstr.push_back(Expected);
+ Partitions.insert(std::make_pair(Partitions.size(), Contents));
+ }
+ // ... and mark this leaf as being in that partition.
+ Partitions.find(Partition->second)->second.set(Leaf.index());
+ AllOpcodes = false;
+ LLVM_DEBUG(dbgs() << " " << Leaf.value().getName()
+ << " is in partition " << Partition->second << "\n");
+ }
+
+ // TODO: This is where we would handle multiple choices of opcode
+ // the end result will be that this leaf ends up in multiple
+ // partitions similarly to AllOpcodes.
+ }
+
+ // If we never check the opcode, add it to every partition.
+ if (AllOpcodes) {
+ // Add a partition for the default case if we don't already have one.
+ if (InstrToPartition.insert(std::make_pair(nullptr, 0)).second) {
+ PartitionToInstr.push_back(nullptr);
+ BitVector Contents(Leaves.size());
+ Partitions.insert(std::make_pair(Partitions.size(), Contents));
+ }
+ LLVM_DEBUG(dbgs() << " " << Leaf.value().getName()
+ << " is in all partitions (opcode not checked)\n");
+ for (auto &Partition : Partitions)
+ Partition.second.set(Leaf.index());
+ }
+
+ assert(TestedPredicatesForLeaf.size() == Leaf.value().getMatchDag().getNumPredicates());
+ TestedPredicates.push_back(TestedPredicatesForLeaf);
+ }
+
+ if (Partitions.size() == 0) {
+ // Add a partition for the default case if we don't already have one.
+ if (InstrToPartition.insert(std::make_pair(nullptr, 0)).second) {
+ PartitionToInstr.push_back(nullptr);
+ BitVector Contents(Leaves.size());
+ Partitions.insert(std::make_pair(Partitions.size(), Contents));
+ }
+ }
+
+ // Add any leaves that don't care about this instruction to all partitions.
+ for (const auto &Leaf : enumerate(Leaves)) {
+ GIMatchTreeInstrInfo *InstrInfo = Leaf.value().getInstrInfo(InstrID);
+ if (!InstrInfo) {
+ // Add a partition for the default case if we don't already have one.
+ if (InstrToPartition.insert(std::make_pair(nullptr, 0)).second) {
+ PartitionToInstr.push_back(nullptr);
+ BitVector Contents(Leaves.size());
+ Partitions.insert(std::make_pair(Partitions.size(), Contents));
+ }
+ for (auto &Partition : Partitions)
+ Partition.second.set(Leaf.index());
+ }
+ }
+
+}
+
+void GIMatchTreeOpcodePartitioner::applyForPartition(
+ unsigned PartitionIdx, GIMatchTreeBuilder &Builder, GIMatchTreeBuilder &SubBuilder) {
+ LLVM_DEBUG(dbgs() << " Making partition " << PartitionIdx << "\n");
+ const CodeGenInstruction *CGI = PartitionToInstr[PartitionIdx];
+
+ BitVector PossibleLeaves = getPossibleLeavesForPartition(PartitionIdx);
+ // Consume any predicates we handled.
+ for (auto &EnumeratedLeaf : enumerate(Builder.getPossibleLeaves())) {
+ if (!PossibleLeaves[EnumeratedLeaf.index()])
+ continue;
+
+ auto &Leaf = EnumeratedLeaf.value();
+ const auto &TestedPredicatesForLeaf =
+ TestedPredicates[EnumeratedLeaf.index()];
+
+ for (unsigned PredIdx : TestedPredicatesForLeaf.set_bits()) {
+ LLVM_DEBUG(dbgs() << " " << Leaf.getName() << " tested predicate #"
+ << PredIdx << " of " << TestedPredicatesForLeaf.size()
+ << " " << *Leaf.getPredicate(PredIdx) << "\n");
+ Leaf.RemainingPredicates.reset(PredIdx);
+ Leaf.TestablePredicates.reset(PredIdx);
+ }
+ SubBuilder.addLeaf(Leaf);
+ }
+
+ // Nothing to do, we don't know anything about this instruction as a result
+ // of this partitioner.
+ if (CGI == nullptr)
+ return;
+
+ GIMatchTreeBuilder::LeafVec &NewLeaves = SubBuilder.getPossibleLeaves();
+ // Find all the operands we know to exist and are referenced. This will
+ // usually be all the referenced operands but there are some cases where
+ // instructions are variadic. Such operands must be handled by partitioners
+ // that check the number of operands.
+ BitVector ReferencedOperands(1);
+ for (auto &Leaf : NewLeaves) {
+ GIMatchTreeInstrInfo *InstrInfo = Leaf.getInstrInfo(InstrID);
+ // Skip any leaves that don't care about this instruction.
+ if (!InstrInfo)
+ continue;
+ const GIMatchDagInstr *Instr = InstrInfo->getInstrNode();
+ for (auto &E : enumerate(Leaf.getMatchDag().edges())) {
+ if (E.value()->getFromMI() == Instr &&
+ E.value()->getFromMO()->getIdx() < CGI->Operands.size()) {
+ ReferencedOperands.resize(E.value()->getFromMO()->getIdx() + 1);
+ ReferencedOperands.set(E.value()->getFromMO()->getIdx());
+ }
+ }
+ }
+ for (auto &Leaf : NewLeaves) {
+ for (unsigned OpIdx : ReferencedOperands.set_bits()) {
+ Leaf.declareOperand(InstrID, OpIdx);
+ }
+ }
+ for (unsigned OpIdx : ReferencedOperands.set_bits()) {
+ SubBuilder.addPartitionersForOperand(InstrID, OpIdx);
+ }
+}
+
+void GIMatchTreeOpcodePartitioner::emitPartitionResults(
+ raw_ostream &OS) const {
+ OS << "Partitioning by opcode would produce " << Partitions.size()
+ << " partitions\n";
+ for (const auto &Partition : InstrToPartition) {
+ if (Partition.first == nullptr)
+ OS << "Default: ";
+ else
+ OS << Partition.first->TheDef->getName() << ": ";
+ StringRef Separator = "";
+ for (unsigned I : Partitions.find(Partition.second)->second.set_bits()) {
+ OS << Separator << I;
+ Separator = ", ";
+ }
+ OS << "\n";
+ }
+}
+
+void GIMatchTreeOpcodePartitioner::generatePartitionSelectorCode(
+ raw_ostream &OS, StringRef Indent) const {
+ OS << Indent << "Partition = -1;\n"
+ << Indent << "switch (MIs[" << InstrID << "]->getOpcode()) {\n";
+ for (const auto &EnumInstr : enumerate(PartitionToInstr)) {
+ if (EnumInstr.value() == nullptr)
+ OS << Indent << "default:";
+ else
+ OS << Indent << "case " << EnumInstr.value()->Namespace
+ << "::" << EnumInstr.value()->TheDef->getName() << ":";
+ OS << " Partition = " << EnumInstr.index() << "; break;\n";
+ }
+ OS << Indent << "}\n"
+ << Indent
+ << "// Default case but without conflicting with potential default case "
+ "in selection.\n"
+ << Indent << "if (Partition == -1) return false;\n";
+}
+
+void GIMatchTreeVRegDefPartitioner::addToPartition(bool Result,
+ unsigned LeafIdx) {
+ auto I = ResultToPartition.find(Result);
+ if (I == ResultToPartition.end()) {
+ ResultToPartition.insert(std::make_pair(Result, PartitionToResult.size()));
+ PartitionToResult.push_back(Result);
+ }
+ I = ResultToPartition.find(Result);
+ auto P = Partitions.find(I->second);
+ if (P == Partitions.end())
+ P = Partitions.insert(std::make_pair(I->second, BitVector())).first;
+ P->second.resize(LeafIdx + 1);
+ P->second.set(LeafIdx);
+}
+
+void GIMatchTreeVRegDefPartitioner::repartition(
+ GIMatchTreeBuilder::LeafVec &Leaves) {
+ Partitions.clear();
+
+ for (const auto &Leaf : enumerate(Leaves)) {
+ GIMatchTreeInstrInfo *InstrInfo = Leaf.value().getInstrInfo(InstrID);
+ BitVector TraversedEdgesForLeaf(Leaf.value().getMatchDag().getNumEdges());
+
+ // If the instruction isn't declared then we don't care about it. Ignore
+ // it for now and add it to all partitions later once we know what
+ // partitions we have.
+ if (!InstrInfo) {
+ TraversedEdges.push_back(TraversedEdgesForLeaf);
+ continue;
+ }
+
+ // If this node has an use -> def edge from this operand then this
+ // instruction must be in partition 1 (isVRegDef()).
+ bool WantsEdge = false;
+ for (unsigned EIdx : Leaf.value().TraversableEdges.set_bits()) {
+ const auto &E = Leaf.value().getEdge(EIdx);
+ if (E->getFromMI() != InstrInfo->getInstrNode() ||
+ E->getFromMO()->getIdx() != OpIdx || E->isDefToUse())
+ continue;
+
+ // We're looking at the right edge. This leaf wants a vreg def so we'll
+ // put it in partition 1.
+ addToPartition(true, Leaf.index());
+ TraversedEdgesForLeaf.set(EIdx);
+ WantsEdge = true;
+ }
+
+ bool isNotReg = false;
+ if (!WantsEdge && isNotReg) {
+ // If this leaf doesn't have an edge and we _don't_ want a register,
+ // then add it to partition 0.
+ addToPartition(false, Leaf.index());
+ } else if (!WantsEdge) {
+ // If this leaf doesn't have an edge and we don't know what we want,
+ // then add it to partition 0 and 1.
+ addToPartition(false, Leaf.index());
+ addToPartition(true, Leaf.index());
+ }
+
+ TraversedEdges.push_back(TraversedEdgesForLeaf);
+ }
+
+ // Add any leaves that don't care about this instruction to all partitions.
+ for (const auto &Leaf : enumerate(Leaves)) {
+ GIMatchTreeInstrInfo *InstrInfo = Leaf.value().getInstrInfo(InstrID);
+ if (!InstrInfo)
+ for (auto &Partition : Partitions)
+ Partition.second.set(Leaf.index());
+ }
+}
+
+void GIMatchTreeVRegDefPartitioner::applyForPartition(
+ unsigned PartitionIdx, GIMatchTreeBuilder &Builder,
+ GIMatchTreeBuilder &SubBuilder) {
+ BitVector PossibleLeaves = getPossibleLeavesForPartition(PartitionIdx);
+
+ std::vector<BitVector> TraversedEdgesByNewLeaves;
+ // Consume any edges we handled.
+ for (auto &EnumeratedLeaf : enumerate(Builder.getPossibleLeaves())) {
+ if (!PossibleLeaves[EnumeratedLeaf.index()])
+ continue;
+
+ auto &Leaf = EnumeratedLeaf.value();
+ const auto &TraversedEdgesForLeaf = TraversedEdges[EnumeratedLeaf.index()];
+ TraversedEdgesByNewLeaves.push_back(TraversedEdgesForLeaf);
+ Leaf.RemainingEdges.reset(TraversedEdgesForLeaf);
+ Leaf.TraversableEdges.reset(TraversedEdgesForLeaf);
+ SubBuilder.addLeaf(Leaf);
+ }
+
+ // Nothing to do. The only thing we know is that it isn't a vreg-def.
+ if (PartitionToResult[PartitionIdx] == false)
+ return;
+
+ NewInstrID = SubBuilder.allocInstrID();
+
+ GIMatchTreeBuilder::LeafVec &NewLeaves = SubBuilder.getPossibleLeaves();
+ for (const auto I : zip(NewLeaves, TraversedEdgesByNewLeaves)) {
+ auto &Leaf = std::get<0>(I);
+ auto &TraversedEdgesForLeaf = std::get<1>(I);
+ GIMatchTreeInstrInfo *InstrInfo = Leaf.getInstrInfo(InstrID);
+ // Skip any leaves that don't care about this instruction.
+ if (!InstrInfo)
+ continue;
+ for (unsigned EIdx : TraversedEdgesForLeaf.set_bits()) {
+ const GIMatchDagEdge *E = Leaf.getEdge(EIdx);
+ Leaf.declareInstr(E->getToMI(), NewInstrID);
+ }
+ }
+ SubBuilder.addPartitionersForInstr(NewInstrID);
+}
+
+void GIMatchTreeVRegDefPartitioner::emitPartitionResults(
+ raw_ostream &OS) const {
+ OS << "Partitioning by vreg-def would produce " << Partitions.size()
+ << " partitions\n";
+ for (const auto &Partition : Partitions) {
+ OS << Partition.first << " (";
+ emitPartitionName(OS, Partition.first);
+ OS << "): ";
+ StringRef Separator = "";
+ for (unsigned I : Partition.second.set_bits()) {
+ OS << Separator << I;
+ Separator = ", ";
+ }
+ OS << "\n";
+ }
+}
+
+void GIMatchTreeVRegDefPartitioner::generatePartitionSelectorCode(
+ raw_ostream &OS, StringRef Indent) const {
+ OS << Indent << "Partition = -1\n"
+ << Indent << "if (MIs.size() <= NewInstrID) MIs.resize(NewInstrID + 1);\n"
+ << Indent << "MIs[" << NewInstrID << "] = nullptr;\n"
+ << Indent << "if (MIs[" << InstrID << "].getOperand(" << OpIdx
+ << ").isReg()))\n"
+ << Indent << " MIs[" << NewInstrID << "] = MRI.getVRegDef(MIs[" << InstrID
+ << "].getOperand(" << OpIdx << ").getReg()));\n";
+
+ for (const auto &Pair : ResultToPartition)
+ OS << Indent << "if (MIs[" << NewInstrID << "] "
+ << (Pair.first ? "==" : "!=")
+ << " nullptr) Partition = " << Pair.second << ";\n";
+
+ OS << Indent << "if (Partition == -1) return false;\n";
+}
+
diff --git a/contrib/llvm-project/llvm/utils/TableGen/GlobalISel/GIMatchTree.h b/contrib/llvm-project/llvm/utils/TableGen/GlobalISel/GIMatchTree.h
new file mode 100644
index 000000000000..b86f6454589c
--- /dev/null
+++ b/contrib/llvm-project/llvm/utils/TableGen/GlobalISel/GIMatchTree.h
@@ -0,0 +1,629 @@
+//===- GIMatchTree.h - A decision tree to match GIMatchDag's --------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_UTILS_TABLEGEN_GIMATCHTREE_H
+#define LLVM_UTILS_TABLEGEN_GIMATCHTREE_H
+
+#include "GIMatchDag.h"
+#include "llvm/ADT/BitVector.h"
+
+namespace llvm {
+class raw_ostream;
+
+class GIMatchTreeBuilder;
+class GIMatchTreePartitioner;
+
+/// Describes the binding of a variable to the matched MIR
+class GIMatchTreeVariableBinding {
+ /// The name of the variable described by this binding.
+ StringRef Name;
+ // The matched instruction it is bound to.
+ unsigned InstrID;
+ // The matched operand (if appropriate) it is bound to.
+ Optional<unsigned> OpIdx;
+
+public:
+ GIMatchTreeVariableBinding(StringRef Name, unsigned InstrID,
+ Optional<unsigned> OpIdx = None)
+ : Name(Name), InstrID(InstrID), OpIdx(OpIdx) {}
+
+ bool isInstr() const { return !OpIdx.hasValue(); }
+ StringRef getName() const { return Name; }
+ unsigned getInstrID() const { return InstrID; }
+ unsigned getOpIdx() const {
+ assert(OpIdx.hasValue() && "Is not an operand binding");
+ return *OpIdx;
+ }
+};
+
+/// Associates a matchable with a leaf of the decision tree.
+class GIMatchTreeLeafInfo {
+public:
+ using const_var_binding_iterator =
+ std::vector<GIMatchTreeVariableBinding>::const_iterator;
+ using UntestedPredicatesTy = SmallVector<const GIMatchDagPredicate *, 1>;
+ using const_untested_predicates_iterator = UntestedPredicatesTy::const_iterator;
+
+protected:
+ /// A name for the matchable. This is primarily for debugging.
+ StringRef Name;
+ /// Where rules have multiple roots, this is which root we're starting from.
+ unsigned RootIdx;
+ /// Opaque data the caller of the tree building code understands.
+ void *Data;
+ /// Has the decision tree covered every edge traversal? If it hasn't then this
+ /// is an unrecoverable error indicating there's something wrong with the
+ /// partitioners.
+ bool IsFullyTraversed;
+ /// Has the decision tree covered every predicate test? If it has, then
+ /// subsequent matchables on the same leaf are unreachable. If it hasn't, the
+ /// code that requested the GIMatchTree is responsible for finishing off any
+ /// remaining predicates.
+ bool IsFullyTested;
+ /// The variable bindings associated with this leaf so far.
+ std::vector<GIMatchTreeVariableBinding> VarBindings;
+ /// Any predicates left untested by the time we reach this leaf.
+ UntestedPredicatesTy UntestedPredicates;
+
+public:
+ GIMatchTreeLeafInfo() { llvm_unreachable("Cannot default-construct"); }
+ GIMatchTreeLeafInfo(StringRef Name, unsigned RootIdx, void *Data)
+ : Name(Name), RootIdx(RootIdx), Data(Data), IsFullyTraversed(false),
+ IsFullyTested(false) {}
+
+ StringRef getName() const { return Name; }
+ unsigned getRootIdx() const { return RootIdx; }
+ template <class Ty> Ty *getTargetData() const {
+ return static_cast<Ty *>(Data);
+ }
+ bool isFullyTraversed() const { return IsFullyTraversed; }
+ void setIsFullyTraversed(bool V) { IsFullyTraversed = V; }
+ bool isFullyTested() const { return IsFullyTested; }
+ void setIsFullyTested(bool V) { IsFullyTested = V; }
+
+ void bindInstrVariable(StringRef Name, unsigned InstrID) {
+ VarBindings.emplace_back(Name, InstrID);
+ }
+ void bindOperandVariable(StringRef Name, unsigned InstrID, unsigned OpIdx) {
+ VarBindings.emplace_back(Name, InstrID, OpIdx);
+ }
+
+ const_var_binding_iterator var_bindings_begin() const {
+ return VarBindings.begin();
+ }
+ const_var_binding_iterator var_bindings_end() const {
+ return VarBindings.end();
+ }
+ iterator_range<const_var_binding_iterator> var_bindings() const {
+ return make_range(VarBindings.begin(), VarBindings.end());
+ }
+ iterator_range<const_untested_predicates_iterator> untested_predicates() const {
+ return make_range(UntestedPredicates.begin(), UntestedPredicates.end());
+ }
+ void addUntestedPredicate(const GIMatchDagPredicate *P) {
+ UntestedPredicates.push_back(P);
+ }
+};
+
+/// The nodes of a decision tree used to perform the match.
+/// This will be used to generate the C++ code or state machine equivalent.
+///
+/// It should be noted that some nodes of this tree (most notably nodes handling
+/// def -> use edges) will need to iterate over several possible matches. As
+/// such, code generated from this will sometimes need to support backtracking.
+class GIMatchTree {
+ using LeafVector = std::vector<GIMatchTreeLeafInfo>;
+
+ /// The partitioner that has been chosen for this node. This may be nullptr if
+ /// a partitioner hasn't been chosen yet or if the node is a leaf.
+ std::unique_ptr<GIMatchTreePartitioner> Partitioner;
+ /// All the leaves that are possible for this node of the tree.
+ /// Note: This should be emptied after the tree is built when there are
+ /// children but this currently isn't done to aid debuggability of the DOT
+ /// graph for the decision tree.
+ LeafVector PossibleLeaves;
+ /// The children of this node. The index into this array must match the index
+ /// chosen by the partitioner.
+ std::vector<GIMatchTree> Children;
+
+ void writeDOTGraphNode(raw_ostream &OS) const;
+ void writeDOTGraphEdges(raw_ostream &OS) const;
+
+public:
+ void writeDOTGraph(raw_ostream &OS) const;
+
+ void setNumChildren(unsigned Num) { Children.resize(Num); }
+ void addPossibleLeaf(const GIMatchTreeLeafInfo &V, bool IsFullyTraversed,
+ bool IsFullyTested) {
+ PossibleLeaves.push_back(V);
+ PossibleLeaves.back().setIsFullyTraversed(IsFullyTraversed);
+ PossibleLeaves.back().setIsFullyTested(IsFullyTested);
+ }
+ void dropLeavesAfter(size_t Length) {
+ if (PossibleLeaves.size() > Length)
+ PossibleLeaves.resize(Length);
+ }
+ void setPartitioner(std::unique_ptr<GIMatchTreePartitioner> &&V) {
+ Partitioner = std::move(V);
+ }
+ GIMatchTreePartitioner *getPartitioner() const { return Partitioner.get(); }
+
+ std::vector<GIMatchTree>::iterator children_begin() {
+ return Children.begin();
+ }
+ std::vector<GIMatchTree>::iterator children_end() { return Children.end(); }
+ iterator_range<std::vector<GIMatchTree>::iterator> children() {
+ return make_range(children_begin(), children_end());
+ }
+ std::vector<GIMatchTree>::const_iterator children_begin() const {
+ return Children.begin();
+ }
+ std::vector<GIMatchTree>::const_iterator children_end() const {
+ return Children.end();
+ }
+ iterator_range<std::vector<GIMatchTree>::const_iterator> children() const {
+ return make_range(children_begin(), children_end());
+ }
+
+ LeafVector::const_iterator possible_leaves_begin() const {
+ return PossibleLeaves.begin();
+ }
+ LeafVector::const_iterator possible_leaves_end() const {
+ return PossibleLeaves.end();
+ }
+ iterator_range<LeafVector::const_iterator>
+ possible_leaves() const {
+ return make_range(possible_leaves_begin(), possible_leaves_end());
+ }
+ LeafVector::iterator possible_leaves_begin() {
+ return PossibleLeaves.begin();
+ }
+ LeafVector::iterator possible_leaves_end() {
+ return PossibleLeaves.end();
+ }
+ iterator_range<LeafVector::iterator> possible_leaves() {
+ return make_range(possible_leaves_begin(), possible_leaves_end());
+ }
+};
+
+/// Record information that is known about the instruction bound to this ID and
+/// GIMatchDagInstrNode. Every rule gets its own set of
+/// GIMatchTreeInstrInfo to bind the shared IDs to an instr node in its
+/// DAG.
+///
+/// For example, if we know that there are 3 operands. We can record it here to
+/// elide duplicate checks.
+class GIMatchTreeInstrInfo {
+ /// The instruction ID for the matched instruction.
+ unsigned ID;
+ /// The corresponding instruction node in the MatchDAG.
+ const GIMatchDagInstr *InstrNode;
+
+public:
+ GIMatchTreeInstrInfo(unsigned ID, const GIMatchDagInstr *InstrNode)
+ : ID(ID), InstrNode(InstrNode) {}
+
+ unsigned getID() const { return ID; }
+ const GIMatchDagInstr *getInstrNode() const { return InstrNode; }
+};
+
+/// Record information that is known about the operand bound to this ID, OpIdx,
+/// and GIMatchDagInstrNode. Every rule gets its own set of
+/// GIMatchTreeOperandInfo to bind the shared IDs to an operand of an
+/// instr node from its DAG.
+///
+/// For example, if we know that there the operand is a register. We can record
+/// it here to elide duplicate checks.
+class GIMatchTreeOperandInfo {
+ /// The corresponding instruction node in the MatchDAG that the operand
+ /// belongs to.
+ const GIMatchDagInstr *InstrNode;
+ unsigned OpIdx;
+
+public:
+ GIMatchTreeOperandInfo(const GIMatchDagInstr *InstrNode, unsigned OpIdx)
+ : InstrNode(InstrNode), OpIdx(OpIdx) {}
+
+ const GIMatchDagInstr *getInstrNode() const { return InstrNode; }
+ unsigned getOpIdx() const { return OpIdx; }
+};
+
+/// Represent a leaf of the match tree and any working data we need to build the
+/// tree.
+///
+/// It's important to note that each rule can have multiple
+/// GIMatchTreeBuilderLeafInfo's since the partitioners do not always partition
+/// into mutually-exclusive partitions. For example:
+/// R1: (FOO ..., ...)
+/// R2: (oneof(FOO, BAR) ..., ...)
+/// will partition by opcode into two partitions FOO=>[R1, R2], and BAR=>[R2]
+///
+/// As an optimization, all instructions, edges, and predicates in the DAGs are
+/// numbered and tracked in BitVectors. As such, the GIMatchDAG must not be
+/// modified once construction of the tree has begun.
+class GIMatchTreeBuilderLeafInfo {
+protected:
+ GIMatchTreeBuilder &Builder;
+ GIMatchTreeLeafInfo Info;
+ const GIMatchDag &MatchDag;
+ /// The association between GIMatchDagInstr* and GIMatchTreeInstrInfo.
+ /// The primary reason for this members existence is to allow the use of
+ /// InstrIDToInfo.lookup() since that requires that the value is
+ /// default-constructible.
+ DenseMap<const GIMatchDagInstr *, GIMatchTreeInstrInfo> InstrNodeToInfo;
+ /// The instruction information for a given ID in the context of this
+ /// particular leaf.
+ DenseMap<unsigned, GIMatchTreeInstrInfo *> InstrIDToInfo;
+ /// The operand information for a given ID and OpIdx in the context of this
+ /// particular leaf.
+ DenseMap<std::pair<unsigned, unsigned>, GIMatchTreeOperandInfo>
+ OperandIDToInfo;
+
+public:
+ /// The remaining instrs/edges/predicates to visit
+ BitVector RemainingInstrNodes;
+ BitVector RemainingEdges;
+ BitVector RemainingPredicates;
+
+ // The remaining predicate dependencies for each predicate
+ std::vector<BitVector> UnsatisfiedPredDepsForPred;
+
+ /// The edges/predicates we can visit as a result of the declare*() calls we
+ /// have already made. We don't need an instrs version since edges imply the
+ /// instr.
+ BitVector TraversableEdges;
+ BitVector TestablePredicates;
+
+ /// Map predicates from the DAG to their position in the DAG predicate
+ /// iterators.
+ DenseMap<GIMatchDagPredicate *, unsigned> PredicateIDs;
+ /// Map predicate dependency edges from the DAG to their position in the DAG
+ /// predicate dependency iterators.
+ DenseMap<GIMatchDagPredicateDependencyEdge *, unsigned> PredicateDepIDs;
+
+public:
+ GIMatchTreeBuilderLeafInfo(GIMatchTreeBuilder &Builder, StringRef Name,
+ unsigned RootIdx, const GIMatchDag &MatchDag,
+ void *Data);
+
+ StringRef getName() const { return Info.getName(); }
+ GIMatchTreeLeafInfo &getInfo() { return Info; }
+ const GIMatchTreeLeafInfo &getInfo() const { return Info; }
+ const GIMatchDag &getMatchDag() const { return MatchDag; }
+ unsigned getRootIdx() const { return Info.getRootIdx(); }
+
+ /// Has this DAG been fully traversed. This must be true by the time the tree
+ /// builder finishes.
+ bool isFullyTraversed() const {
+ // We don't need UnsatisfiedPredDepsForPred because RemainingPredicates
+ // can't be all-zero without satisfying all the dependencies. The same is
+ // almost true for Edges and Instrs but it's possible to have Instrs without
+ // Edges.
+ return RemainingInstrNodes.none() && RemainingEdges.none();
+ }
+
+ /// Has this DAG been fully tested. This hould be true by the time the tree
+ /// builder finishes but clients can finish any untested predicates left over
+ /// if it's not true.
+ bool isFullyTested() const {
+ // We don't need UnsatisfiedPredDepsForPred because RemainingPredicates
+ // can't be all-zero without satisfying all the dependencies. The same is
+ // almost true for Edges and Instrs but it's possible to have Instrs without
+ // Edges.
+ return RemainingInstrNodes.none() && RemainingEdges.none() &&
+ RemainingPredicates.none();
+ }
+
+ const GIMatchDagInstr *getInstr(unsigned Idx) const {
+ return *(MatchDag.instr_nodes_begin() + Idx);
+ }
+ const GIMatchDagEdge *getEdge(unsigned Idx) const {
+ return *(MatchDag.edges_begin() + Idx);
+ }
+ GIMatchDagEdge *getEdge(unsigned Idx) {
+ return *(MatchDag.edges_begin() + Idx);
+ }
+ const GIMatchDagPredicate *getPredicate(unsigned Idx) const {
+ return *(MatchDag.predicates_begin() + Idx);
+ }
+ iterator_range<llvm::BitVector::const_set_bits_iterator>
+ untested_instrs() const {
+ return RemainingInstrNodes.set_bits();
+ }
+ iterator_range<llvm::BitVector::const_set_bits_iterator>
+ untested_edges() const {
+ return RemainingEdges.set_bits();
+ }
+ iterator_range<llvm::BitVector::const_set_bits_iterator>
+ untested_predicates() const {
+ return RemainingPredicates.set_bits();
+ }
+
+ /// Bind an instr node to the given ID and clear any blocking dependencies
+ /// that were waiting for it.
+ void declareInstr(const GIMatchDagInstr *Instr, unsigned ID);
+
+ /// Bind an operand to the given ID and OpIdx and clear any blocking
+ /// dependencies that were waiting for it.
+ void declareOperand(unsigned InstrID, unsigned OpIdx);
+
+ GIMatchTreeInstrInfo *getInstrInfo(unsigned ID) const {
+ auto I = InstrIDToInfo.find(ID);
+ if (I != InstrIDToInfo.end())
+ return I->second;
+ return nullptr;
+ }
+
+ void dump(raw_ostream &OS) const {
+ OS << "Leaf " << getName() << " for root #" << getRootIdx() << "\n";
+ MatchDag.print(OS);
+ for (const auto &I : InstrIDToInfo)
+ OS << "Declared Instr #" << I.first << "\n";
+ for (const auto &I : OperandIDToInfo)
+ OS << "Declared Instr #" << I.first.first << ", Op #" << I.first.second
+ << "\n";
+ OS << RemainingInstrNodes.count() << " untested instrs of "
+ << RemainingInstrNodes.size() << "\n";
+ OS << RemainingEdges.count() << " untested edges of "
+ << RemainingEdges.size() << "\n";
+ OS << RemainingPredicates.count() << " untested predicates of "
+ << RemainingPredicates.size() << "\n";
+
+ OS << TraversableEdges.count() << " edges could be traversed\n";
+ OS << TestablePredicates.count() << " predicates could be tested\n";
+ }
+};
+
+/// The tree builder has a fairly tough job. It's purpose is to merge all the
+/// DAGs from the ruleset into a decision tree that walks all of them
+/// simultaneously and identifies the rule that was matched. In addition to
+/// that, it also needs to find the most efficient order to make decisions
+/// without violating any dependencies and ensure that every DAG covers every
+/// instr/edge/predicate.
+class GIMatchTreeBuilder {
+public:
+ using LeafVec = std::vector<GIMatchTreeBuilderLeafInfo>;
+
+protected:
+ /// The leaves that the resulting decision tree will distinguish.
+ LeafVec Leaves;
+ /// The tree node being constructed.
+ GIMatchTree *TreeNode;
+ /// The builders for each subtree resulting from the current decision.
+ std::vector<GIMatchTreeBuilder> SubtreeBuilders;
+ /// The possible partitioners we could apply right now.
+ std::vector<std::unique_ptr<GIMatchTreePartitioner>> Partitioners;
+ /// The next instruction ID to allocate when requested by the chosen
+ /// Partitioner.
+ unsigned NextInstrID;
+
+ /// Use any context we have stored to cull partitioners that only test things
+ /// we already know. At the time of writing, there's no need to do anything
+ /// here but it will become important once, for example, there is a
+ /// num-operands and an opcode partitioner. This is because applying an opcode
+ /// partitioner (usually) makes the number of operands known which makes
+ /// additional checking pointless.
+ void filterRedundantPartitioners();
+
+ /// Evaluate the available partioners and select the best one at the moment.
+ void evaluatePartitioners();
+
+ /// Construct the current tree node.
+ void runStep();
+
+public:
+ GIMatchTreeBuilder(unsigned NextInstrID) : NextInstrID(NextInstrID) {}
+ GIMatchTreeBuilder(GIMatchTree *TreeNode, unsigned NextInstrID)
+ : TreeNode(TreeNode), NextInstrID(NextInstrID) {}
+
+ void addLeaf(StringRef Name, unsigned RootIdx, const GIMatchDag &MatchDag,
+ void *Data) {
+ Leaves.emplace_back(*this, Name, RootIdx, MatchDag, Data);
+ }
+ void addLeaf(const GIMatchTreeBuilderLeafInfo &L) { Leaves.push_back(L); }
+ void addPartitioner(std::unique_ptr<GIMatchTreePartitioner> P) {
+ Partitioners.push_back(std::move(P));
+ }
+ void addPartitionersForInstr(unsigned InstrIdx);
+ void addPartitionersForOperand(unsigned InstrID, unsigned OpIdx);
+
+ LeafVec &getPossibleLeaves() { return Leaves; }
+
+ unsigned allocInstrID() { return NextInstrID++; }
+
+ /// Construct the decision tree.
+ std::unique_ptr<GIMatchTree> run();
+};
+
+/// Partitioners are the core of the tree builder and are unfortunately rather
+/// tricky to write.
+class GIMatchTreePartitioner {
+protected:
+ /// The partitions resulting from applying the partitioner to the possible
+ /// leaves. The keys must be consecutive integers starting from 0. This can
+ /// lead to some unfortunate situations where partitioners test a predicate
+ /// and use 0 for success and 1 for failure if the ruleset encounters a
+ /// success case first but is necessary to assign the partition to one of the
+ /// tree nodes children. As a result, you usually need some kind of
+ /// indirection to map the natural keys (e.g. ptrs/bools) to this linear
+ /// sequence. The values are a bitvector indicating which leaves belong to
+ /// this partition.
+ DenseMap<unsigned, BitVector> Partitions;
+
+public:
+ virtual ~GIMatchTreePartitioner() {}
+ virtual std::unique_ptr<GIMatchTreePartitioner> clone() const = 0;
+
+ /// Determines which partitions the given leaves belong to. A leaf may belong
+ /// to multiple partitions in which case it will be duplicated during
+ /// applyForPartition().
+ ///
+ /// This function can be rather complicated. A few particular things to be
+ /// aware of include:
+ /// * One leaf can be assigned to multiple partitions when there's some
+ /// ambiguity.
+ /// * Not all DAG's for the leaves may be able to perform the test. For
+ /// example, the opcode partitiioner must account for one DAG being a
+ /// superset of another such as [(ADD ..., ..., ...)], and [(MUL t, ...,
+ /// ...), (ADD ..., t, ...)]
+ /// * Attaching meaning to a particular partition index will generally not
+ /// work due to the '0, 1, ..., n' requirement. You might encounter cases
+ /// where only partition 1 is seen, leaving a missing 0.
+ /// * Finding a specific predicate such as the opcode predicate for a specific
+ /// instruction is non-trivial. It's often O(NumPredicates), leading to
+ /// O(NumPredicates*NumRules) when applied to the whole ruleset. The good
+ /// news there is that n is typically small thanks to predicate dependencies
+ /// limiting how many are testable at once. Also, with opcode and type
+ /// predicates being so frequent the value of m drops very fast too. It
+ /// wouldn't be terribly surprising to see a 10k ruleset drop down to an
+ /// average of 100 leaves per partition after a single opcode partitioner.
+ /// * The same goes for finding specific edges. The need to traverse them in
+ /// dependency order dramatically limits the search space at any given
+ /// moment.
+ /// * If you need to add a leaf to all partitions, make sure you don't forget
+ /// them when adding partitions later.
+ virtual void repartition(GIMatchTreeBuilder::LeafVec &Leaves) = 0;
+
+ /// Delegate the leaves for a given partition to the corresponding subbuilder,
+ /// update any recorded context for this partition (e.g. allocate instr id's
+ /// for instrs recorder by the current node), and clear any blocking
+ /// dependencies this partitioner resolved.
+ virtual void applyForPartition(unsigned PartitionIdx,
+ GIMatchTreeBuilder &Builder,
+ GIMatchTreeBuilder &SubBuilder) = 0;
+
+ /// Return a BitVector indicating which leaves should be transferred to the
+ /// specified partition. Note that the same leaf can be indicated for multiple
+ /// partitions.
+ BitVector getPossibleLeavesForPartition(unsigned Idx) {
+ const auto &I = Partitions.find(Idx);
+ assert(I != Partitions.end() && "Requested non-existant partition");
+ return I->second;
+ }
+
+ size_t getNumPartitions() const { return Partitions.size(); }
+ size_t getNumLeavesWithDupes() const {
+ size_t S = 0;
+ for (const auto &P : Partitions)
+ S += P.second.size();
+ return S;
+ }
+
+ /// Emit a brief description of the partitioner suitable for debug printing or
+ /// use in a DOT graph.
+ virtual void emitDescription(raw_ostream &OS) const = 0;
+ /// Emit a label for the given partition suitable for debug printing or use in
+ /// a DOT graph.
+ virtual void emitPartitionName(raw_ostream &OS, unsigned Idx) const = 0;
+
+ /// Emit a long description of how the partitioner partitions the leaves.
+ virtual void emitPartitionResults(raw_ostream &OS) const = 0;
+
+ /// Generate code to select between partitions based on the MIR being matched.
+ /// This is typically a switch statement that picks a partition index.
+ virtual void generatePartitionSelectorCode(raw_ostream &OS,
+ StringRef Indent) const = 0;
+};
+
+/// Partition according to the opcode of the instruction.
+///
+/// Numbers CodeGenInstr ptrs for use as partition ID's. One special partition,
+/// nullptr, represents the case where the instruction isn't known.
+///
+/// * If the opcode can be tested and is a single opcode, create the partition
+/// for that opcode and assign the leaf to it. This partition no longer needs
+/// to test the opcode, and many details about the instruction will usually
+/// become known (e.g. number of operands for non-variadic instrs) via the
+/// CodeGenInstr ptr.
+/// * (not implemented yet) If the opcode can be tested and is a choice of
+/// opcodes, then the leaf can be treated like the single-opcode case but must
+/// be added to all relevant partitions and not quite as much becomes known as
+/// a result. That said, multiple-choice opcodes are likely similar enough
+/// (because if they aren't then handling them together makes little sense)
+/// that plenty still becomes known. The main implementation issue with this
+/// is having a description to represent the commonality between instructions.
+/// * If the opcode is not tested, the leaf must be added to all partitions
+/// including the wildcard nullptr partition. What becomes known as a result
+/// varies between partitions.
+/// * If the instruction to be tested is not declared then add the leaf to all
+/// partitions. This occurs when we encounter one rule that is a superset of
+/// the other and we are still matching the remainder of the superset. The
+/// result is that the cases that don't match the superset will match the
+/// subset rule, while the ones that do match the superset will match either
+/// (which one is algorithm dependent but will usually be the superset).
+class GIMatchTreeOpcodePartitioner : public GIMatchTreePartitioner {
+ unsigned InstrID;
+ DenseMap<const CodeGenInstruction *, unsigned> InstrToPartition;
+ std::vector<const CodeGenInstruction *> PartitionToInstr;
+ std::vector<BitVector> TestedPredicates;
+
+public:
+ GIMatchTreeOpcodePartitioner(unsigned InstrID) : InstrID(InstrID) {}
+
+ std::unique_ptr<GIMatchTreePartitioner> clone() const override {
+ return std::make_unique<GIMatchTreeOpcodePartitioner>(*this);
+ }
+
+ void emitDescription(raw_ostream &OS) const override {
+ OS << "MI[" << InstrID << "].getOpcode()";
+ }
+
+ void emitPartitionName(raw_ostream &OS, unsigned Idx) const override;
+
+ void repartition(GIMatchTreeBuilder::LeafVec &Leaves) override;
+ void applyForPartition(unsigned Idx, GIMatchTreeBuilder &SubBuilder,
+ GIMatchTreeBuilder &Builder) override;
+
+ void emitPartitionResults(raw_ostream &OS) const override;
+
+ void generatePartitionSelectorCode(raw_ostream &OS,
+ StringRef Indent) const override;
+};
+
+class GIMatchTreeVRegDefPartitioner : public GIMatchTreePartitioner {
+ unsigned NewInstrID = -1;
+ unsigned InstrID;
+ unsigned OpIdx;
+ std::vector<BitVector> TraversedEdges;
+ DenseMap<unsigned, unsigned> ResultToPartition;
+ std::vector<bool> PartitionToResult;
+
+ void addToPartition(bool Result, unsigned LeafIdx);
+
+public:
+ GIMatchTreeVRegDefPartitioner(unsigned InstrID, unsigned OpIdx)
+ : InstrID(InstrID), OpIdx(OpIdx) {}
+
+ std::unique_ptr<GIMatchTreePartitioner> clone() const override {
+ return std::make_unique<GIMatchTreeVRegDefPartitioner>(*this);
+ }
+
+ void emitDescription(raw_ostream &OS) const override {
+ OS << "MI[" << NewInstrID << "] = getVRegDef(MI[" << InstrID
+ << "].getOperand(" << OpIdx << "))";
+ }
+
+ void emitPartitionName(raw_ostream &OS, unsigned Idx) const override {
+ bool Result = PartitionToResult[Idx];
+ if (Result)
+ OS << "true";
+ else
+ OS << "false";
+ }
+
+ void repartition(GIMatchTreeBuilder::LeafVec &Leaves) override;
+ void applyForPartition(unsigned PartitionIdx, GIMatchTreeBuilder &Builder,
+ GIMatchTreeBuilder &SubBuilder) override;
+ void emitPartitionResults(raw_ostream &OS) const override;
+
+ void generatePartitionSelectorCode(raw_ostream &OS,
+ StringRef Indent) const override;
+};
+
+} // end namespace llvm
+#endif // ifndef LLVM_UTILS_TABLEGEN_GIMATCHTREE_H
diff --git a/contrib/llvm-project/llvm/utils/TableGen/GlobalISelEmitter.cpp b/contrib/llvm-project/llvm/utils/TableGen/GlobalISelEmitter.cpp
index d8d4c9f4f55c..c14294951cc1 100644
--- a/contrib/llvm-project/llvm/utils/TableGen/GlobalISelEmitter.cpp
+++ b/contrib/llvm-project/llvm/utils/TableGen/GlobalISelEmitter.cpp
@@ -609,7 +609,7 @@ MatchTableRecord MatchTable::LineBreak = {
void MatchTableRecord::emit(raw_ostream &OS, bool LineBreakIsNextAfterThis,
const MatchTable &Table) const {
bool UseLineComment =
- LineBreakIsNextAfterThis | (Flags & MTRF_LineBreakFollows);
+ LineBreakIsNextAfterThis || (Flags & MTRF_LineBreakFollows);
if (Flags & (MTRF_JumpTarget | MTRF_CommaFollows))
UseLineComment = false;
@@ -620,7 +620,7 @@ void MatchTableRecord::emit(raw_ostream &OS, bool LineBreakIsNextAfterThis,
if (Flags & MTRF_Label)
OS << ": @" << Table.getLabelIndex(LabelID);
- if (Flags & MTRF_Comment && !UseLineComment)
+ if ((Flags & MTRF_Comment) && !UseLineComment)
OS << "*/";
if (Flags & MTRF_JumpTarget) {
@@ -1181,7 +1181,7 @@ public:
TypeIDValues.clear();
unsigned ID = 0;
- for (const LLTCodeGen LLTy : KnownTypes)
+ for (const LLTCodeGen &LLTy : KnownTypes)
TypeIDValues[LLTy] = ID++;
}
@@ -1707,7 +1707,7 @@ public:
}
StringRef getOpcode() const { return I->TheDef->getName(); }
- unsigned getNumOperands() const { return I->Operands.size(); }
+ bool isVariadicNumOperands() const { return I->Operands.isVariadic; }
StringRef getOperandType(unsigned OpIdx) const {
return I->Operands[OpIdx].OperandType;
@@ -2153,7 +2153,7 @@ public:
return false;
}
- for (const auto &Operand : zip(Operands, B.Operands)) {
+ for (auto Operand : zip(Operands, B.Operands)) {
if (std::get<0>(Operand)->isHigherPriorityThan(*std::get<1>(Operand)))
return true;
if (std::get<1>(Operand)->isHigherPriorityThan(*std::get<0>(Operand)))
@@ -2273,7 +2273,7 @@ void InstructionMatcher::optimize() {
Stash.push_back(predicates_pop_front());
if (Stash.back().get() == &OpcMatcher) {
- if (NumOperandsCheck && OpcMatcher.getNumOperands() < getNumOperands())
+ if (NumOperandsCheck && OpcMatcher.isVariadicNumOperands())
Stash.emplace_back(
new InstructionNumOperandsMatcher(InsnVarID, getNumOperands()));
NumOperandsCheck = false;
@@ -2318,7 +2318,8 @@ public:
OR_Register,
OR_TempRegister,
OR_ComplexPattern,
- OR_Custom
+ OR_Custom,
+ OR_CustomOperand
};
protected:
@@ -2726,6 +2727,38 @@ public:
}
};
+class CustomOperandRenderer : public OperandRenderer {
+protected:
+ unsigned InsnID;
+ const Record &Renderer;
+ /// The name of the operand.
+ const std::string SymbolicName;
+
+public:
+ CustomOperandRenderer(unsigned InsnID, const Record &Renderer,
+ StringRef SymbolicName)
+ : OperandRenderer(OR_CustomOperand), InsnID(InsnID), Renderer(Renderer),
+ SymbolicName(SymbolicName) {}
+
+ static bool classof(const OperandRenderer *R) {
+ return R->getKind() == OR_CustomOperand;
+ }
+
+ void emitRenderOpcodes(MatchTable &Table, RuleMatcher &Rule) const override {
+ const OperandMatcher &OpdMatcher = Rule.getOperandMatcher(SymbolicName);
+ Table << MatchTable::Opcode("GIR_CustomOperandRenderer")
+ << MatchTable::Comment("InsnID") << MatchTable::IntValue(InsnID)
+ << MatchTable::Comment("OldInsnID")
+ << MatchTable::IntValue(OpdMatcher.getInsnVarID())
+ << MatchTable::Comment("OpIdx")
+ << MatchTable::IntValue(OpdMatcher.getOpIdx())
+ << MatchTable::Comment("OperandRenderer")
+ << MatchTable::NamedValue(
+ "GICR_" + Renderer.getValueAsString("RendererFn").str())
+ << MatchTable::Comment(SymbolicName) << MatchTable::LineBreak;
+ }
+};
+
/// An action taken when all Matcher predicates succeeded for a parent rule.
///
/// Typical actions include:
@@ -3159,7 +3192,7 @@ bool RuleMatcher::isHigherPriorityThan(const RuleMatcher &B) const {
if (Matchers.size() < B.Matchers.size())
return false;
- for (const auto &Matcher : zip(Matchers, B.Matchers)) {
+ for (auto Matcher : zip(Matchers, B.Matchers)) {
if (std::get<0>(Matcher)->isHigherPriorityThan(*std::get<1>(Matcher)))
return true;
if (std::get<1>(Matcher)->isHigherPriorityThan(*std::get<0>(Matcher)))
@@ -3281,8 +3314,8 @@ private:
unsigned &TempOpIdx) const;
Error importChildMatcher(RuleMatcher &Rule, InstructionMatcher &InsnMatcher,
const TreePatternNode *SrcChild,
- bool OperandIsAPointer, unsigned OpIdx,
- unsigned &TempOpIdx);
+ bool OperandIsAPointer, bool OperandIsImmArg,
+ unsigned OpIdx, unsigned &TempOpIdx);
Expected<BuildMIAction &> createAndImportInstructionRenderer(
RuleMatcher &M, InstructionMatcher &InsnMatcher,
@@ -3722,9 +3755,19 @@ Expected<InstructionMatcher &> GlobalISelEmitter::createAndImportSelDAGMatcher(
for (unsigned i = 0; i != NumChildren; ++i) {
TreePatternNode *SrcChild = Src->getChild(i);
+ // We need to determine the meaning of a literal integer based on the
+ // context. If this is a field required to be an immediate (such as an
+ // immarg intrinsic argument), the required predicates are different than
+ // a constant which may be materialized in a register. If we have an
+ // argument that is required to be an immediate, we should not emit an LLT
+ // type check, and should not be looking for a G_CONSTANT defined
+ // register.
+ bool OperandIsImmArg = SrcGIOrNull->isOperandImmArg(i);
+
// SelectionDAG allows pointers to be represented with iN since it doesn't
// distinguish between pointers and integers but they are different types in GlobalISel.
// Coerce integers to pointers to address space 0 if the context indicates a pointer.
+ //
bool OperandIsAPointer = SrcGIOrNull->isOperandAPointer(i);
if (IsIntrinsic) {
@@ -3737,16 +3780,17 @@ Expected<InstructionMatcher &> GlobalISelEmitter::createAndImportSelDAGMatcher(
continue;
}
- // We have to check intrinsics for llvm_anyptr_ty parameters.
+ // We have to check intrinsics for llvm_anyptr_ty and immarg parameters.
//
// Note that we have to look at the i-1th parameter, because we don't
// have the intrinsic ID in the intrinsic's parameter list.
OperandIsAPointer |= II->isParamAPointer(i - 1);
+ OperandIsImmArg |= II->isParamImmArg(i - 1);
}
if (auto Error =
importChildMatcher(Rule, InsnMatcher, SrcChild, OperandIsAPointer,
- OpIdx++, TempOpIdx))
+ OperandIsImmArg, OpIdx++, TempOpIdx))
return std::move(Error);
}
}
@@ -3784,12 +3828,10 @@ static StringRef getSrcChildName(const TreePatternNode *SrcChild,
return SrcChildName;
}
-Error GlobalISelEmitter::importChildMatcher(RuleMatcher &Rule,
- InstructionMatcher &InsnMatcher,
- const TreePatternNode *SrcChild,
- bool OperandIsAPointer,
- unsigned OpIdx,
- unsigned &TempOpIdx) {
+Error GlobalISelEmitter::importChildMatcher(
+ RuleMatcher &Rule, InstructionMatcher &InsnMatcher,
+ const TreePatternNode *SrcChild, bool OperandIsAPointer,
+ bool OperandIsImmArg, unsigned OpIdx, unsigned &TempOpIdx) {
Record *PhysReg = nullptr;
StringRef SrcChildName = getSrcChildName(SrcChild, PhysReg);
@@ -3819,10 +3861,14 @@ Error GlobalISelEmitter::importChildMatcher(RuleMatcher &Rule,
}
}
- if (auto Error =
- OM.addTypeCheckPredicate(ChildTypes.front(), OperandIsAPointer))
- return failedImport(toString(std::move(Error)) + " for Src operand (" +
- to_string(*SrcChild) + ")");
+ // Immediate arguments have no meaningful type to check as they don't have
+ // registers.
+ if (!OperandIsImmArg) {
+ if (auto Error =
+ OM.addTypeCheckPredicate(ChildTypes.front(), OperandIsAPointer))
+ return failedImport(toString(std::move(Error)) + " for Src operand (" +
+ to_string(*SrcChild) + ")");
+ }
// Check for nested instructions.
if (!SrcChild->isLeaf()) {
@@ -3873,7 +3919,13 @@ Error GlobalISelEmitter::importChildMatcher(RuleMatcher &Rule,
// Check for constant immediates.
if (auto *ChildInt = dyn_cast<IntInit>(SrcChild->getLeafValue())) {
- OM.addPredicate<ConstantIntOperandMatcher>(ChildInt->getValue());
+ if (OperandIsImmArg) {
+ // Checks for argument directly in operand list
+ OM.addPredicate<LiteralIntOperandMatcher>(ChildInt->getValue());
+ } else {
+ // Checks for materialized constant
+ OM.addPredicate<ConstantIntOperandMatcher>(ChildInt->getValue());
+ }
return Error::success();
}
@@ -3939,12 +3991,22 @@ Expected<action_iterator> GlobalISelEmitter::importExplicitUseRenderer(
}
if (!DstChild->isLeaf()) {
-
if (DstChild->getOperator()->isSubClassOf("SDNodeXForm")) {
auto Child = DstChild->getChild(0);
auto I = SDNodeXFormEquivs.find(DstChild->getOperator());
if (I != SDNodeXFormEquivs.end()) {
- DstMIBuilder.addRenderer<CustomRenderer>(*I->second, Child->getName());
+ Record *XFormOpc = DstChild->getOperator()->getValueAsDef("Opcode");
+ if (XFormOpc->getName() == "timm") {
+ // If this is a TargetConstant, there won't be a corresponding
+ // instruction to transform. Instead, this will refer directly to an
+ // operand in an instruction's operand list.
+ DstMIBuilder.addRenderer<CustomOperandRenderer>(*I->second,
+ Child->getName());
+ } else {
+ DstMIBuilder.addRenderer<CustomRenderer>(*I->second,
+ Child->getName());
+ }
+
return InsertPt;
}
return failedImport("SDNodeXForm " + Child->getName() +
@@ -4309,18 +4371,48 @@ Expected<action_iterator> GlobalISelEmitter::importExplicitUseRenderers(
ExpectedDstINumUses--;
}
+ // NumResults - This is the number of results produced by the instruction in
+ // the "outs" list.
+ unsigned NumResults = OrigDstI->Operands.NumDefs;
+
+ // Number of operands we know the output instruction must have. If it is
+ // variadic, we could have more operands.
+ unsigned NumFixedOperands = DstI->Operands.size();
+
+ // Loop over all of the fixed operands of the instruction pattern, emitting
+ // code to fill them all in. The node 'N' usually has number children equal to
+ // the number of input operands of the instruction. However, in cases where
+ // there are predicate operands for an instruction, we need to fill in the
+ // 'execute always' values. Match up the node operands to the instruction
+ // operands to do this.
unsigned Child = 0;
+
+ // Similarly to the code in TreePatternNode::ApplyTypeConstraints, count the
+ // number of operands at the end of the list which have default values.
+ // Those can come from the pattern if it provides enough arguments, or be
+ // filled in with the default if the pattern hasn't provided them. But any
+ // operand with a default value _before_ the last mandatory one will be
+ // filled in with their defaults unconditionally.
+ unsigned NonOverridableOperands = NumFixedOperands;
+ while (NonOverridableOperands > NumResults &&
+ CGP.operandHasDefault(DstI->Operands[NonOverridableOperands - 1].Rec))
+ --NonOverridableOperands;
+
unsigned NumDefaultOps = 0;
for (unsigned I = 0; I != DstINumUses; ++I) {
- const CGIOperandList::OperandInfo &DstIOperand =
- DstI->Operands[DstI->Operands.NumDefs + I];
+ unsigned InstOpNo = DstI->Operands.NumDefs + I;
+
+ // Determine what to emit for this operand.
+ Record *OperandNode = DstI->Operands[InstOpNo].Rec;
// If the operand has default values, introduce them now.
- // FIXME: Until we have a decent test case that dictates we should do
- // otherwise, we're going to assume that operands with default values cannot
- // be specified in the patterns. Therefore, adding them will not cause us to
- // end up with too many rendered operands.
- if (DstIOperand.Rec->isSubClassOf("OperandWithDefaultOps")) {
+ if (CGP.operandHasDefault(OperandNode) &&
+ (InstOpNo < NonOverridableOperands || Child >= Dst->getNumChildren())) {
+ // This is a predicate or optional def operand which the pattern has not
+ // overridden, or which we aren't letting it override; emit the 'default
+ // ops' operands.
+
+ const CGIOperandList::OperandInfo &DstIOperand = DstI->Operands[InstOpNo];
DagInit *DefaultOps = DstIOperand.Rec->getValueAsDag("DefaultOps");
if (auto Error = importDefaultOperandRenderers(
InsertPt, M, DstMIBuilder, DefaultOps))
@@ -5082,7 +5174,7 @@ void GlobalISelEmitter::run(raw_ostream &OS) {
<< " typedef void(" << Target.getName()
<< "InstructionSelector::*CustomRendererFn)(MachineInstrBuilder &, const "
- "MachineInstr&) "
+ "MachineInstr&, int) "
"const;\n"
<< " const ISelInfoTy<PredicateBitset, ComplexMatcherMemFn, "
"CustomRendererFn> "
@@ -5127,8 +5219,23 @@ void GlobalISelEmitter::run(raw_ostream &OS) {
});
SubtargetFeatureInfo::emitComputeAvailableFeatures(
- Target.getName(), "InstructionSelector", "computeAvailableModuleFeatures",
+ Target.getName(), "InstructionSelector", "computeAvailableModuleFeatures",
ModuleFeatures, OS);
+
+
+ OS << "void " << Target.getName() << "InstructionSelector"
+ "::setupGeneratedPerFunctionState(MachineFunction &MF) {\n"
+ " AvailableFunctionFeatures = computeAvailableFunctionFeatures("
+ "(const " << Target.getName() << "Subtarget*)&MF.getSubtarget(), &MF);\n"
+ "}\n";
+
+ if (Target.getName() == "X86" || Target.getName() == "AArch64") {
+ // TODO: Implement PGSO.
+ OS << "static bool shouldOptForSize(const MachineFunction *MF) {\n";
+ OS << " return MF->getFunction().hasOptSize();\n";
+ OS << "}\n\n";
+ }
+
SubtargetFeatureInfo::emitComputeAvailableFeatures(
Target.getName(), "InstructionSelector",
"computeAvailableFunctionFeatures", FunctionFeatures, OS,
@@ -5168,7 +5275,7 @@ void GlobalISelEmitter::run(raw_ostream &OS) {
return true;
if (A.size() > B.size())
return false;
- for (const auto &Pair : zip(A, B)) {
+ for (auto Pair : zip(A, B)) {
if (std::get<0>(Pair)->getName() < std::get<1>(Pair)->getName())
return true;
if (std::get<0>(Pair)->getName() > std::get<1>(Pair)->getName())
@@ -5245,7 +5352,7 @@ void GlobalISelEmitter::run(raw_ostream &OS) {
OS << Target.getName() << "InstructionSelector::CustomRendererFn\n"
<< Target.getName() << "InstructionSelector::CustomRenderers[] = {\n"
- << " nullptr, // GICP_Invalid\n";
+ << " nullptr, // GICR_Invalid\n";
for (const auto &Record : CustomRendererFns)
OS << " &" << Target.getName()
<< "InstructionSelector::" << Record->getValueAsString("RendererFn")
@@ -5273,10 +5380,6 @@ void GlobalISelEmitter::run(raw_ostream &OS) {
"&CoverageInfo) const {\n"
<< " MachineFunction &MF = *I.getParent()->getParent();\n"
<< " MachineRegisterInfo &MRI = MF.getRegInfo();\n"
- << " // FIXME: This should be computed on a per-function basis rather "
- "than per-insn.\n"
- << " AvailableFunctionFeatures = computeAvailableFunctionFeatures(&STI, "
- "&MF);\n"
<< " const PredicateBitset AvailableFeatures = getAvailableFeatures();\n"
<< " NewMIVector OutMIs;\n"
<< " State.MIs.clear();\n"
@@ -5312,6 +5415,7 @@ void GlobalISelEmitter::run(raw_ostream &OS) {
<< "computeAvailableFunctionFeatures(const " << Target.getName()
<< "Subtarget *Subtarget,\n"
<< " const MachineFunction *MF) const;\n"
+ << "void setupGeneratedPerFunctionState(MachineFunction &MF) override;\n"
<< "#endif // ifdef GET_GLOBALISEL_PREDICATES_DECL\n";
OS << "#ifdef GET_GLOBALISEL_PREDICATES_INIT\n"
diff --git a/contrib/llvm-project/llvm/utils/TableGen/InstrDocsEmitter.cpp b/contrib/llvm-project/llvm/utils/TableGen/InstrDocsEmitter.cpp
index 45fa936b9574..07efa1885409 100644
--- a/contrib/llvm-project/llvm/utils/TableGen/InstrDocsEmitter.cpp
+++ b/contrib/llvm-project/llvm/utils/TableGen/InstrDocsEmitter.cpp
@@ -138,6 +138,7 @@ void EmitInstrDocs(RecordKeeper &RK, raw_ostream &OS) {
FLAG(isConvergent)
FLAG(hasNoSchedulingInfo)
FLAG(variadicOpsAreDefs)
+ FLAG(isAuthenticated)
if (!FlagStrings.empty()) {
OS << "Flags: ";
bool IsFirst = true;
diff --git a/contrib/llvm-project/llvm/utils/TableGen/InstrInfoEmitter.cpp b/contrib/llvm-project/llvm/utils/TableGen/InstrInfoEmitter.cpp
index 300ba36a7007..6ab58bd26a2c 100644
--- a/contrib/llvm-project/llvm/utils/TableGen/InstrInfoEmitter.cpp
+++ b/contrib/llvm-project/llvm/utils/TableGen/InstrInfoEmitter.cpp
@@ -164,6 +164,11 @@ InstrInfoEmitter::GetOperandInfo(const CodeGenInstruction &Inst) {
if (Op.Rec->isSubClassOf("OptionalDefOperand"))
Res += "|(1<<MCOI::OptionalDef)";
+ // Branch target operands. Check to see if the original unexpanded
+ // operand was of type BranchTargetOperand.
+ if (Op.Rec->isSubClassOf("BranchTargetOperand"))
+ Res += "|(1<<MCOI::BranchTarget)";
+
// Fill in operand type.
Res += ", ";
assert(!Op.OperandType.empty() && "Invalid operand type.");
@@ -703,6 +708,7 @@ void InstrInfoEmitter::emitRecord(const CodeGenInstruction &Inst, unsigned Num,
if (Inst.isInsertSubreg) OS << "|(1ULL<<MCID::InsertSubreg)";
if (Inst.isConvergent) OS << "|(1ULL<<MCID::Convergent)";
if (Inst.variadicOpsAreDefs) OS << "|(1ULL<<MCID::VariadicOpsAreDefs)";
+ if (Inst.isAuthenticated) OS << "|(1ULL<<MCID::Authenticated)";
// Emit all of the target-specific flags...
BitsInit *TSF = Inst.TheDef->getValueAsBitsInit("TSFlags");
diff --git a/contrib/llvm-project/llvm/utils/TableGen/IntrinsicEmitter.cpp b/contrib/llvm-project/llvm/utils/TableGen/IntrinsicEmitter.cpp
index e01f91c20456..9a12571ac6bc 100644
--- a/contrib/llvm-project/llvm/utils/TableGen/IntrinsicEmitter.cpp
+++ b/contrib/llvm-project/llvm/utils/TableGen/IntrinsicEmitter.cpp
@@ -15,28 +15,30 @@
#include "SequenceToOffsetTable.h"
#include "TableGenBackends.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/TableGen/Error.h"
#include "llvm/TableGen/Record.h"
#include "llvm/TableGen/StringMatcher.h"
-#include "llvm/TableGen/TableGenBackend.h"
#include "llvm/TableGen/StringToOffsetTable.h"
+#include "llvm/TableGen/TableGenBackend.h"
#include <algorithm>
using namespace llvm;
+cl::OptionCategory GenIntrinsicCat("Options for -gen-intrinsic-enums");
+cl::opt<std::string>
+ IntrinsicPrefix("intrinsic-prefix",
+ cl::desc("Generate intrinsics with this target prefix"),
+ cl::value_desc("target prefix"), cl::cat(GenIntrinsicCat));
+
namespace {
class IntrinsicEmitter {
RecordKeeper &Records;
- bool TargetOnly;
- std::string TargetPrefix;
public:
- IntrinsicEmitter(RecordKeeper &R, bool T)
- : Records(R), TargetOnly(T) {}
+ IntrinsicEmitter(RecordKeeper &R) : Records(R) {}
void run(raw_ostream &OS, bool Enums);
- void EmitPrefix(raw_ostream &OS);
-
void EmitEnumInfo(const CodeGenIntrinsicTable &Ints, raw_ostream &OS);
void EmitTargetInfo(const CodeGenIntrinsicTable &Ints, raw_ostream &OS);
void EmitIntrinsicToNameTable(const CodeGenIntrinsicTable &Ints,
@@ -47,7 +49,6 @@ public:
void EmitAttributes(const CodeGenIntrinsicTable &Ints, raw_ostream &OS);
void EmitIntrinsicToBuiltinMap(const CodeGenIntrinsicTable &Ints, bool IsGCC,
raw_ostream &OS);
- void EmitSuffix(raw_ostream &OS);
};
} // End anonymous namespace
@@ -58,12 +59,7 @@ public:
void IntrinsicEmitter::run(raw_ostream &OS, bool Enums) {
emitSourceFileHeader("Intrinsic Function Source Fragment", OS);
- CodeGenIntrinsicTable Ints(Records, TargetOnly);
-
- if (TargetOnly && !Ints.empty())
- TargetPrefix = Ints[0].TargetPrefix;
-
- EmitPrefix(OS);
+ CodeGenIntrinsicTable Ints(Records);
if (Enums) {
// Emit the enum information.
@@ -90,40 +86,64 @@ void IntrinsicEmitter::run(raw_ostream &OS, bool Enums) {
// Emit code to translate MS builtins into LLVM intrinsics.
EmitIntrinsicToBuiltinMap(Ints, false, OS);
}
-
- EmitSuffix(OS);
-}
-
-void IntrinsicEmitter::EmitPrefix(raw_ostream &OS) {
- OS << "// VisualStudio defines setjmp as _setjmp\n"
- "#if defined(_MSC_VER) && defined(setjmp) && \\\n"
- " !defined(setjmp_undefined_for_msvc)\n"
- "# pragma push_macro(\"setjmp\")\n"
- "# undef setjmp\n"
- "# define setjmp_undefined_for_msvc\n"
- "#endif\n\n";
-}
-
-void IntrinsicEmitter::EmitSuffix(raw_ostream &OS) {
- OS << "#if defined(_MSC_VER) && defined(setjmp_undefined_for_msvc)\n"
- "// let's return it to _setjmp state\n"
- "# pragma pop_macro(\"setjmp\")\n"
- "# undef setjmp_undefined_for_msvc\n"
- "#endif\n\n";
}
void IntrinsicEmitter::EmitEnumInfo(const CodeGenIntrinsicTable &Ints,
raw_ostream &OS) {
- OS << "// Enum values for Intrinsics.h\n";
- OS << "#ifdef GET_INTRINSIC_ENUM_VALUES\n";
- for (unsigned i = 0, e = Ints.size(); i != e; ++i) {
+ // Find the TargetSet for which to generate enums. There will be an initial
+ // set with an empty target prefix which will include target independent
+ // intrinsics like dbg.value.
+ const CodeGenIntrinsicTable::TargetSet *Set = nullptr;
+ for (const auto &Target : Ints.Targets) {
+ if (Target.Name == IntrinsicPrefix) {
+ Set = &Target;
+ break;
+ }
+ }
+ if (!Set) {
+ std::vector<std::string> KnownTargets;
+ for (const auto &Target : Ints.Targets)
+ if (!Target.Name.empty())
+ KnownTargets.push_back(Target.Name);
+ PrintFatalError("tried to generate intrinsics for unknown target " +
+ IntrinsicPrefix +
+ "\nKnown targets are: " + join(KnownTargets, ", ") + "\n");
+ }
+
+ // Generate a complete header for target specific intrinsics.
+ if (!IntrinsicPrefix.empty()) {
+ std::string UpperPrefix = StringRef(IntrinsicPrefix).upper();
+ OS << "#ifndef LLVM_IR_INTRINSIC_" << UpperPrefix << "_ENUMS_H\n";
+ OS << "#define LLVM_IR_INTRINSIC_" << UpperPrefix << "_ENUMS_H\n\n";
+ OS << "namespace llvm {\n";
+ OS << "namespace Intrinsic {\n";
+ OS << "enum " << UpperPrefix << "Intrinsics : unsigned {\n";
+ }
+
+ OS << "// Enum values for intrinsics\n";
+ for (unsigned i = Set->Offset, e = Set->Offset + Set->Count; i != e; ++i) {
OS << " " << Ints[i].EnumName;
- OS << ((i != e-1) ? ", " : " ");
+
+ // Assign a value to the first intrinsic in this target set so that all
+ // intrinsic ids are distinct.
+ if (i == Set->Offset)
+ OS << " = " << (Set->Offset + 1);
+
+ OS << ", ";
if (Ints[i].EnumName.size() < 40)
- OS << std::string(40-Ints[i].EnumName.size(), ' ');
+ OS.indent(40 - Ints[i].EnumName.size());
OS << " // " << Ints[i].Name << "\n";
}
- OS << "#endif\n\n";
+
+ // Emit num_intrinsics into the target neutral enum.
+ if (IntrinsicPrefix.empty()) {
+ OS << " num_intrinsics = " << (Ints.size() + 1) << "\n";
+ } else {
+ OS << "}; // enum\n";
+ OS << "} // namespace Intrinsic\n";
+ OS << "} // namespace llvm\n\n";
+ OS << "#endif\n";
+ }
}
void IntrinsicEmitter::EmitTargetInfo(const CodeGenIntrinsicTable &Ints,
@@ -588,11 +608,7 @@ void IntrinsicEmitter::EmitAttributes(const CodeGenIntrinsicTable &Ints,
raw_ostream &OS) {
OS << "// Add parameter attributes that are not common to all intrinsics.\n";
OS << "#ifdef GET_INTRINSIC_ATTRIBUTES\n";
- if (TargetOnly)
- OS << "static AttributeList getAttributes(LLVMContext &C, " << TargetPrefix
- << "Intrinsic::ID id) {\n";
- else
- OS << "AttributeList Intrinsic::getAttributes(LLVMContext &C, ID id) {\n";
+ OS << "AttributeList Intrinsic::getAttributes(LLVMContext &C, ID id) {\n";
// Compute the maximum number of attribute arguments and the map
typedef std::map<const CodeGenIntrinsic*, unsigned,
@@ -625,12 +641,7 @@ void IntrinsicEmitter::EmitAttributes(const CodeGenIntrinsicTable &Ints,
OS << " AttributeList AS[" << maxArgAttrs + 1 << "];\n";
OS << " unsigned NumAttrs = 0;\n";
OS << " if (id != 0) {\n";
- OS << " switch(IntrinsicsToAttributesMap[id - ";
- if (TargetOnly)
- OS << "Intrinsic::num_intrinsics";
- else
- OS << "1";
- OS << "]) {\n";
+ OS << " switch(IntrinsicsToAttributesMap[id - 1]) {\n";
OS << " default: llvm_unreachable(\"Invalid attribute number\");\n";
for (UniqAttrMapTy::const_iterator I = UniqAttributes.begin(),
E = UniqAttributes.end(); I != E; ++I) {
@@ -875,21 +886,12 @@ void IntrinsicEmitter::EmitIntrinsicToBuiltinMap(
OS << "// in as TargetPrefix. The result is assigned to 'IntrinsicID'.\n";
OS << "#ifdef GET_LLVM_INTRINSIC_FOR_" << CompilerName << "_BUILTIN\n";
- if (TargetOnly) {
- OS << "static " << TargetPrefix << "Intrinsic::ID "
- << "getIntrinsicFor" << CompilerName << "Builtin(const char "
- << "*TargetPrefixStr, StringRef BuiltinNameStr) {\n";
- } else {
- OS << "Intrinsic::ID Intrinsic::getIntrinsicFor" << CompilerName
- << "Builtin(const char "
- << "*TargetPrefixStr, StringRef BuiltinNameStr) {\n";
- }
+ OS << "Intrinsic::ID Intrinsic::getIntrinsicFor" << CompilerName
+ << "Builtin(const char "
+ << "*TargetPrefixStr, StringRef BuiltinNameStr) {\n";
if (Table.Empty()) {
- OS << " return ";
- if (!TargetPrefix.empty())
- OS << "(" << TargetPrefix << "Intrinsic::ID)";
- OS << "Intrinsic::not_intrinsic;\n";
+ OS << " return Intrinsic::not_intrinsic;\n";
OS << "}\n";
OS << "#endif\n\n";
return;
@@ -937,19 +939,15 @@ void IntrinsicEmitter::EmitIntrinsicToBuiltinMap(
OS << " }\n";
}
OS << " return ";
- if (!TargetPrefix.empty())
- OS << "(" << TargetPrefix << "Intrinsic::ID)";
OS << "Intrinsic::not_intrinsic;\n";
OS << "}\n";
OS << "#endif\n\n";
}
-void llvm::EmitIntrinsicEnums(RecordKeeper &RK, raw_ostream &OS,
- bool TargetOnly) {
- IntrinsicEmitter(RK, TargetOnly).run(OS, /*Enums=*/true);
+void llvm::EmitIntrinsicEnums(RecordKeeper &RK, raw_ostream &OS) {
+ IntrinsicEmitter(RK).run(OS, /*Enums=*/true);
}
-void llvm::EmitIntrinsicImpl(RecordKeeper &RK, raw_ostream &OS,
- bool TargetOnly) {
- IntrinsicEmitter(RK, TargetOnly).run(OS, /*Enums=*/false);
+void llvm::EmitIntrinsicImpl(RecordKeeper &RK, raw_ostream &OS) {
+ IntrinsicEmitter(RK).run(OS, /*Enums=*/false);
}
diff --git a/contrib/llvm-project/llvm/utils/TableGen/OptEmitter.cpp b/contrib/llvm-project/llvm/utils/TableGen/OptEmitter.cpp
new file mode 100644
index 000000000000..7fcf3074e093
--- /dev/null
+++ b/contrib/llvm-project/llvm/utils/TableGen/OptEmitter.cpp
@@ -0,0 +1,84 @@
+//===- OptEmitter.cpp - Helper for emitting options.----------- -----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "OptEmitter.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/TableGen/Error.h"
+#include "llvm/TableGen/Record.h"
+#include <cctype>
+#include <cstring>
+
+namespace llvm {
+
+// Ordering on Info. The logic should match with the consumer-side function in
+// llvm/Option/OptTable.h.
+// FIXME: Make this take StringRefs instead of null terminated strings to
+// simplify callers.
+static int StrCmpOptionName(const char *A, const char *B) {
+ const char *X = A, *Y = B;
+ char a = tolower(*A), b = tolower(*B);
+ while (a == b) {
+ if (a == '\0')
+ return strcmp(A, B);
+
+ a = tolower(*++X);
+ b = tolower(*++Y);
+ }
+
+ if (a == '\0') // A is a prefix of B.
+ return 1;
+ if (b == '\0') // B is a prefix of A.
+ return -1;
+
+ // Otherwise lexicographic.
+ return (a < b) ? -1 : 1;
+}
+
+int CompareOptionRecords(Record *const *Av, Record *const *Bv) {
+ const Record *A = *Av;
+ const Record *B = *Bv;
+
+ // Sentinel options precede all others and are only ordered by precedence.
+ bool ASent = A->getValueAsDef("Kind")->getValueAsBit("Sentinel");
+ bool BSent = B->getValueAsDef("Kind")->getValueAsBit("Sentinel");
+ if (ASent != BSent)
+ return ASent ? -1 : 1;
+
+ // Compare options by name, unless they are sentinels.
+ if (!ASent)
+ if (int Cmp = StrCmpOptionName(A->getValueAsString("Name").str().c_str(),
+ B->getValueAsString("Name").str().c_str()))
+ return Cmp;
+
+ if (!ASent) {
+ std::vector<StringRef> APrefixes = A->getValueAsListOfStrings("Prefixes");
+ std::vector<StringRef> BPrefixes = B->getValueAsListOfStrings("Prefixes");
+
+ for (std::vector<StringRef>::const_iterator APre = APrefixes.begin(),
+ AEPre = APrefixes.end(),
+ BPre = BPrefixes.begin(),
+ BEPre = BPrefixes.end();
+ APre != AEPre && BPre != BEPre; ++APre, ++BPre) {
+ if (int Cmp = StrCmpOptionName(APre->str().c_str(), BPre->str().c_str()))
+ return Cmp;
+ }
+ }
+
+ // Then by the kind precedence;
+ int APrec = A->getValueAsDef("Kind")->getValueAsInt("Precedence");
+ int BPrec = B->getValueAsDef("Kind")->getValueAsInt("Precedence");
+ if (APrec == BPrec && A->getValueAsListOfStrings("Prefixes") ==
+ B->getValueAsListOfStrings("Prefixes")) {
+ PrintError(A->getLoc(), Twine("Option is equivalent to"));
+ PrintError(B->getLoc(), Twine("Other defined here"));
+ PrintFatalError("Equivalent Options found.");
+ }
+ return APrec < BPrec ? -1 : 1;
+}
+
+} // namespace llvm
diff --git a/contrib/llvm-project/llvm/utils/TableGen/OptEmitter.h b/contrib/llvm-project/llvm/utils/TableGen/OptEmitter.h
new file mode 100644
index 000000000000..c8f9246ef1e6
--- /dev/null
+++ b/contrib/llvm-project/llvm/utils/TableGen/OptEmitter.h
@@ -0,0 +1,16 @@
+//===- OptEmitter.h - Helper for emitting options. --------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_UTILS_TABLEGEN_OPTEMITTER_H
+#define LLVM_UTILS_TABLEGEN_OPTEMITTER_H
+
+namespace llvm {
+class Record;
+int CompareOptionRecords(Record *const *Av, Record *const *Bv);
+} // namespace llvm
+#endif
diff --git a/contrib/llvm-project/llvm/utils/TableGen/OptParserEmitter.cpp b/contrib/llvm-project/llvm/utils/TableGen/OptParserEmitter.cpp
index 51b1cb093b21..c1978ac7ac66 100644
--- a/contrib/llvm-project/llvm/utils/TableGen/OptParserEmitter.cpp
+++ b/contrib/llvm-project/llvm/utils/TableGen/OptParserEmitter.cpp
@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/TableGen/Error.h"
+#include "OptEmitter.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/Twine.h"
@@ -18,75 +18,6 @@
using namespace llvm;
-// Ordering on Info. The logic should match with the consumer-side function in
-// llvm/Option/OptTable.h.
-// FIXME: Mmake this take StringRefs instead of null terminated strings to
-// simplify callers.
-static int StrCmpOptionName(const char *A, const char *B) {
- const char *X = A, *Y = B;
- char a = tolower(*A), b = tolower(*B);
- while (a == b) {
- if (a == '\0')
- return strcmp(A, B);
-
- a = tolower(*++X);
- b = tolower(*++Y);
- }
-
- if (a == '\0') // A is a prefix of B.
- return 1;
- if (b == '\0') // B is a prefix of A.
- return -1;
-
- // Otherwise lexicographic.
- return (a < b) ? -1 : 1;
-}
-
-static int CompareOptionRecords(Record *const *Av, Record *const *Bv) {
- const Record *A = *Av;
- const Record *B = *Bv;
-
- // Sentinel options precede all others and are only ordered by precedence.
- bool ASent = A->getValueAsDef("Kind")->getValueAsBit("Sentinel");
- bool BSent = B->getValueAsDef("Kind")->getValueAsBit("Sentinel");
- if (ASent != BSent)
- return ASent ? -1 : 1;
-
- // Compare options by name, unless they are sentinels.
- if (!ASent)
- if (int Cmp = StrCmpOptionName(A->getValueAsString("Name").str().c_str(),
- B->getValueAsString("Name").str().c_str()))
- return Cmp;
-
- if (!ASent) {
- std::vector<StringRef> APrefixes = A->getValueAsListOfStrings("Prefixes");
- std::vector<StringRef> BPrefixes = B->getValueAsListOfStrings("Prefixes");
-
- for (std::vector<StringRef>::const_iterator APre = APrefixes.begin(),
- AEPre = APrefixes.end(),
- BPre = BPrefixes.begin(),
- BEPre = BPrefixes.end();
- APre != AEPre &&
- BPre != BEPre;
- ++APre, ++BPre) {
- if (int Cmp = StrCmpOptionName(APre->str().c_str(), BPre->str().c_str()))
- return Cmp;
- }
- }
-
- // Then by the kind precedence;
- int APrec = A->getValueAsDef("Kind")->getValueAsInt("Precedence");
- int BPrec = B->getValueAsDef("Kind")->getValueAsInt("Precedence");
- if (APrec == BPrec &&
- A->getValueAsListOfStrings("Prefixes") ==
- B->getValueAsListOfStrings("Prefixes")) {
- PrintError(A->getLoc(), Twine("Option is equivalent to"));
- PrintError(B->getLoc(), Twine("Other defined here"));
- PrintFatalError("Equivalent Options found.");
- }
- return APrec < BPrec ? -1 : 1;
-}
-
static const std::string getOptionName(const Record &R) {
// Use the record name unless EnumName is defined.
if (isa<UnsetInit>(R.getValueInit("EnumName")))
@@ -310,9 +241,9 @@ void EmitOptParser(RecordKeeper &Records, raw_ostream &OS) {
OS << "bool ValuesWereAdded;\n";
OS << R.getValueAsString("ValuesCode");
OS << "\n";
- for (const std::string &Pref : R.getValueAsListOfStrings("Prefixes")) {
+ for (std::string S : R.getValueAsListOfStrings("Prefixes")) {
OS << "ValuesWereAdded = Opt.addValues(";
- std::string S = (Pref + R.getValueAsString("Name")).str();
+ S += R.getValueAsString("Name");
write_cstring(OS, S);
OS << ", Values);\n";
OS << "(void)ValuesWereAdded;\n";
diff --git a/contrib/llvm-project/llvm/utils/TableGen/OptRSTEmitter.cpp b/contrib/llvm-project/llvm/utils/TableGen/OptRSTEmitter.cpp
new file mode 100644
index 000000000000..3102f378bc1e
--- /dev/null
+++ b/contrib/llvm-project/llvm/utils/TableGen/OptRSTEmitter.cpp
@@ -0,0 +1,86 @@
+//===- OptParserEmitter.cpp - Table Driven Command Line Parsing -----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "OptEmitter.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/TableGen/Error.h"
+#include "llvm/TableGen/Record.h"
+#include "llvm/TableGen/TableGenBackend.h"
+#include <cctype>
+#include <cstring>
+#include <map>
+
+using namespace llvm;
+
+/// OptParserEmitter - This tablegen backend takes an input .td file
+/// describing a list of options and emits a RST man page.
+namespace llvm {
+void EmitOptRST(RecordKeeper &Records, raw_ostream &OS) {
+ llvm::StringMap<std::vector<Record *>> OptionsByGroup;
+ std::vector<Record *> OptionsWithoutGroup;
+
+ // Get the options.
+ std::vector<Record *> Opts = Records.getAllDerivedDefinitions("Option");
+ array_pod_sort(Opts.begin(), Opts.end(), CompareOptionRecords);
+
+ // Get the option groups.
+ const std::vector<Record *> &Groups =
+ Records.getAllDerivedDefinitions("OptionGroup");
+ for (unsigned i = 0, e = Groups.size(); i != e; ++i) {
+ const Record &R = *Groups[i];
+ OptionsByGroup.try_emplace(R.getValueAsString("Name"));
+ }
+
+ // Map options to their group.
+ for (unsigned i = 0, e = Opts.size(); i != e; ++i) {
+ const Record &R = *Opts[i];
+ if (const DefInit *DI = dyn_cast<DefInit>(R.getValueInit("Group"))) {
+ OptionsByGroup[DI->getDef()->getValueAsString("Name")].push_back(Opts[i]);
+ } else {
+ OptionsByGroup["options"].push_back(Opts[i]);
+ }
+ }
+
+ // Print options under their group.
+ for (const auto &KV : OptionsByGroup) {
+ std::string GroupName = KV.getKey().upper();
+ OS << GroupName << '\n';
+ OS << std::string(GroupName.size(), '-') << '\n';
+ OS << '\n';
+
+ for (Record *R : KV.getValue()) {
+ OS << ".. option:: ";
+
+ // Print the prefix.
+ std::vector<StringRef> Prefixes = R->getValueAsListOfStrings("Prefixes");
+ if (!Prefixes.empty())
+ OS << Prefixes[0];
+
+ // Print the option name.
+ OS << R->getValueAsString("Name");
+
+ // Print the meta-variable.
+ if (!isa<UnsetInit>(R->getValueInit("MetaVarName"))) {
+ OS << '=';
+ OS.write_escaped(R->getValueAsString("MetaVarName"));
+ }
+
+ OS << "\n\n";
+
+ // The option help text.
+ if (!isa<UnsetInit>(R->getValueInit("HelpText"))) {
+ OS << ' ';
+ OS.write_escaped(R->getValueAsString("HelpText"));
+ OS << "\n\n";
+ }
+ }
+ }
+}
+} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/utils/TableGen/RISCVCompressInstEmitter.cpp b/contrib/llvm-project/llvm/utils/TableGen/RISCVCompressInstEmitter.cpp
index 2f1d3898f182..96e4f95937b2 100644
--- a/contrib/llvm-project/llvm/utils/TableGen/RISCVCompressInstEmitter.cpp
+++ b/contrib/llvm-project/llvm/utils/TableGen/RISCVCompressInstEmitter.cpp
@@ -45,6 +45,14 @@
// const MCRegisterInfo &MRI,
// const MCSubtargetInfo &STI);
//
+// In addition, it exports a function for checking whether
+// an instruction is compressable:
+//
+// bool isCompressibleInst(const MachineInstr& MI,
+// const RISCVSubtarget *Subtarget,
+// const MCRegisterInfo &MRI,
+// const MCSubtargetInfo &STI);
+//
// The clients that include this auto-generated header file and
// invoke these functions can compress an instruction before emitting
// it in the target-specific ASM or ELF streamer or can uncompress
@@ -99,7 +107,7 @@ class RISCVCompressInstEmitter {
: Source(S), Dest(D), PatReqFeatures(RF), SourceOperandMap(SourceMap),
DestOperandMap(DestMap) {}
};
-
+ enum EmitterType { Compress, Uncompress, CheckCompress };
RecordKeeper &Records;
CodeGenTarget Target;
SmallVector<CompressPat, 4> CompressPatterns;
@@ -107,7 +115,7 @@ class RISCVCompressInstEmitter {
void addDagOperandMapping(Record *Rec, DagInit *Dag, CodeGenInstruction &Inst,
IndexedMap<OpData> &OperandMap, bool IsSourceInst);
void evaluateCompressPat(Record *Compress);
- void emitCompressInstEmitter(raw_ostream &o, bool Compress);
+ void emitCompressInstEmitter(raw_ostream &o, EmitterType EType);
bool validateTypes(Record *SubType, Record *Type, bool IsSourceInst);
bool validateRegister(Record *Reg, Record *RegClass);
void createDagOperandMapping(Record *Rec, StringMap<unsigned> &SourceOperands,
@@ -482,26 +490,39 @@ static void getReqFeatures(std::set<StringRef> &FeaturesSet,
}
}
-unsigned getMCOpPredicate(DenseMap<const Record *, unsigned> &MCOpPredicateMap,
- std::vector<const Record *> &MCOpPredicates,
- Record *Rec) {
- unsigned Entry = MCOpPredicateMap[Rec];
+static unsigned getPredicates(DenseMap<const Record *, unsigned> &PredicateMap,
+ std::vector<const Record *> &Predicates,
+ Record *Rec, StringRef Name) {
+ unsigned Entry = PredicateMap[Rec];
if (Entry)
return Entry;
- if (!Rec->isValueUnset("MCOperandPredicate")) {
- MCOpPredicates.push_back(Rec);
- Entry = MCOpPredicates.size();
- MCOpPredicateMap[Rec] = Entry;
+ if (!Rec->isValueUnset(Name)) {
+ Predicates.push_back(Rec);
+ Entry = Predicates.size();
+ PredicateMap[Rec] = Entry;
return Entry;
}
- PrintFatalError(Rec->getLoc(),
- "No MCOperandPredicate on this operand at all: " +
- Rec->getName().str() + "'");
+ PrintFatalError(Rec->getLoc(), "No " + Name +
+ " predicate on this operand at all: '" + Rec->getName().str() + "'");
return 0;
}
+static void printPredicates(std::vector<const Record *> &Predicates,
+ StringRef Name, raw_ostream &o) {
+ for (unsigned i = 0; i < Predicates.size(); ++i) {
+ Init *Pred = Predicates[i]->getValueInit(Name);
+ if (CodeInit *SI = dyn_cast<CodeInit>(Pred))
+ o << " case " << i + 1 << ": {\n"
+ << " // " << Predicates[i]->getName().str() << "\n"
+ << " " << SI->getValue() << "\n"
+ << " }\n";
+ else
+ llvm_unreachable("Unexpected predicate field!");
+ }
+}
+
static std::string mergeCondAndCode(raw_string_ostream &CondStream,
raw_string_ostream &CodeStream) {
std::string S;
@@ -519,7 +540,7 @@ static std::string mergeCondAndCode(raw_string_ostream &CondStream,
}
void RISCVCompressInstEmitter::emitCompressInstEmitter(raw_ostream &o,
- bool Compress) {
+ EmitterType EType) {
Record *AsmWriter = Target.getAsmWriter();
if (!AsmWriter->getValueAsInt("PassSubtarget"))
PrintFatalError(AsmWriter->getLoc(),
@@ -534,8 +555,9 @@ void RISCVCompressInstEmitter::emitCompressInstEmitter(raw_ostream &o,
// source and destination are flipped and the sort key needs to change
// accordingly.
llvm::stable_sort(CompressPatterns,
- [Compress](const CompressPat &LHS, const CompressPat &RHS) {
- if (Compress)
+ [EType](const CompressPat &LHS, const CompressPat &RHS) {
+ if (EType == EmitterType::Compress ||
+ EType == EmitterType::CheckCompress)
return (LHS.Source.TheDef->getName().str() <
RHS.Source.TheDef->getName().str());
else
@@ -546,6 +568,9 @@ void RISCVCompressInstEmitter::emitCompressInstEmitter(raw_ostream &o,
// A list of MCOperandPredicates for all operands in use, and the reverse map.
std::vector<const Record *> MCOpPredicates;
DenseMap<const Record *, unsigned> MCOpPredicateMap;
+ // A list of ImmLeaf Predicates for all operands in use, and the reverse map.
+ std::vector<const Record *> ImmLeafPredicates;
+ DenseMap<const Record *, unsigned> ImmLeafPredicateMap;
std::string F;
std::string FH;
@@ -553,32 +578,42 @@ void RISCVCompressInstEmitter::emitCompressInstEmitter(raw_ostream &o,
raw_string_ostream FuncH(FH);
bool NeedMRI = false;
- if (Compress)
+ if (EType == EmitterType::Compress)
o << "\n#ifdef GEN_COMPRESS_INSTR\n"
<< "#undef GEN_COMPRESS_INSTR\n\n";
- else
+ else if (EType == EmitterType::Uncompress)
o << "\n#ifdef GEN_UNCOMPRESS_INSTR\n"
<< "#undef GEN_UNCOMPRESS_INSTR\n\n";
+ else if (EType == EmitterType::CheckCompress)
+ o << "\n#ifdef GEN_CHECK_COMPRESS_INSTR\n"
+ << "#undef GEN_CHECK_COMPRESS_INSTR\n\n";
- if (Compress) {
+ if (EType == EmitterType::Compress) {
FuncH << "static bool compressInst(MCInst& OutInst,\n";
FuncH.indent(25) << "const MCInst &MI,\n";
FuncH.indent(25) << "const MCSubtargetInfo &STI,\n";
FuncH.indent(25) << "MCContext &Context) {\n";
- } else {
+ } else if (EType == EmitterType::Uncompress){
FuncH << "static bool uncompressInst(MCInst& OutInst,\n";
FuncH.indent(27) << "const MCInst &MI,\n";
FuncH.indent(27) << "const MCRegisterInfo &MRI,\n";
FuncH.indent(27) << "const MCSubtargetInfo &STI) {\n";
+ } else if (EType == EmitterType::CheckCompress) {
+ FuncH << "static bool isCompressibleInst(const MachineInstr& MI,\n";
+ FuncH.indent(27) << "const RISCVSubtarget *Subtarget,\n";
+ FuncH.indent(27) << "const MCRegisterInfo &MRI,\n";
+ FuncH.indent(27) << "const MCSubtargetInfo &STI) {\n";
}
if (CompressPatterns.empty()) {
o << FuncH.str();
o.indent(2) << "return false;\n}\n";
- if (Compress)
+ if (EType == EmitterType::Compress)
o << "\n#endif //GEN_COMPRESS_INSTR\n";
- else
+ else if (EType == EmitterType::Uncompress)
o << "\n#endif //GEN_UNCOMPRESS_INSTR\n\n";
+ else if (EType == EmitterType::CheckCompress)
+ o << "\n#endif //GEN_CHECK_COMPRESS_INSTR\n\n";
return;
}
@@ -589,18 +624,24 @@ void RISCVCompressInstEmitter::emitCompressInstEmitter(raw_ostream &o,
CaseStream << " switch (MI.getOpcode()) {\n";
CaseStream << " default: return false;\n";
+ bool CompressOrCheck =
+ EType == EmitterType::Compress || EType == EmitterType::CheckCompress;
+ bool CompressOrUncompress =
+ EType == EmitterType::Compress || EType == EmitterType::Uncompress;
+
for (auto &CompressPat : CompressPatterns) {
std::string CondString;
std::string CodeString;
raw_string_ostream CondStream(CondString);
raw_string_ostream CodeStream(CodeString);
CodeGenInstruction &Source =
- Compress ? CompressPat.Source : CompressPat.Dest;
- CodeGenInstruction &Dest = Compress ? CompressPat.Dest : CompressPat.Source;
- IndexedMap<OpData> SourceOperandMap =
- Compress ? CompressPat.SourceOperandMap : CompressPat.DestOperandMap;
- IndexedMap<OpData> &DestOperandMap =
- Compress ? CompressPat.DestOperandMap : CompressPat.SourceOperandMap;
+ CompressOrCheck ? CompressPat.Source : CompressPat.Dest;
+ CodeGenInstruction &Dest =
+ CompressOrCheck ? CompressPat.Dest : CompressPat.Source;
+ IndexedMap<OpData> SourceOperandMap = CompressOrCheck ?
+ CompressPat.SourceOperandMap : CompressPat.DestOperandMap;
+ IndexedMap<OpData> &DestOperandMap = CompressOrCheck ?
+ CompressPat.DestOperandMap : CompressPat.SourceOperandMap;
CurOp = Source.TheDef->getName().str();
// Check current and previous opcode to decide to continue or end a case.
@@ -670,7 +711,8 @@ void RISCVCompressInstEmitter::emitCompressInstEmitter(raw_ostream &o,
}
}
CodeStream.indent(6) << "// " + Dest.AsmString + "\n";
- CodeStream.indent(6) << "OutInst.setOpcode(" + Namespace +
+ if (CompressOrUncompress)
+ CodeStream.indent(6) << "OutInst.setOpcode(" + Namespace +
"::" + Dest.TheDef->getName().str() + ");\n";
OpNo = 0;
for (const auto &DestOperand : Dest.Operands) {
@@ -692,42 +734,69 @@ void RISCVCompressInstEmitter::emitCompressInstEmitter(raw_ostream &o,
"RegClassID).contains(" + "MI.getOperand(" +
std::to_string(OpIdx) + ").getReg())) &&\n";
- CodeStream.indent(6) << "OutInst.addOperand(MI.getOperand(" +
- std::to_string(OpIdx) + "));\n";
+ if (CompressOrUncompress)
+ CodeStream.indent(6) << "OutInst.addOperand(MI.getOperand(" +
+ std::to_string(OpIdx) + "));\n";
} else {
// Handling immediate operands.
- unsigned Entry = getMCOpPredicate(MCOpPredicateMap, MCOpPredicates,
- DestOperand.Rec);
- CondStream.indent(6) << Namespace + "ValidateMCOperand(" +
- "MI.getOperand(" + std::to_string(OpIdx) +
- "), STI, " + std::to_string(Entry) +
- ") &&\n";
- CodeStream.indent(6) << "OutInst.addOperand(MI.getOperand(" +
- std::to_string(OpIdx) + "));\n";
+ if (CompressOrUncompress) {
+ unsigned Entry = getPredicates(MCOpPredicateMap, MCOpPredicates,
+ DestOperand.Rec, StringRef("MCOperandPredicate"));
+ CondStream.indent(6) << Namespace + "ValidateMCOperand(" +
+ "MI.getOperand(" + std::to_string(OpIdx) +
+ "), STI, " + std::to_string(Entry) +
+ ") &&\n";
+ } else {
+ unsigned Entry = getPredicates(ImmLeafPredicateMap, ImmLeafPredicates,
+ DestOperand.Rec, StringRef("ImmediateCode"));
+ CondStream.indent(6) << "MI.getOperand(" + std::to_string(OpIdx) +
+ ").isImm() && \n";
+ CondStream.indent(6) << Namespace + "ValidateMachineOperand(" +
+ "MI.getOperand(" + std::to_string(OpIdx) +
+ "), Subtarget, " + std::to_string(Entry) +
+ ") &&\n";
+ }
+ if (CompressOrUncompress)
+ CodeStream.indent(6) << "OutInst.addOperand(MI.getOperand(" +
+ std::to_string(OpIdx) + "));\n";
}
break;
}
case OpData::Imm: {
- unsigned Entry =
- getMCOpPredicate(MCOpPredicateMap, MCOpPredicates, DestOperand.Rec);
- CondStream.indent(6)
- << Namespace + "ValidateMCOperand(" + "MCOperand::createImm(" +
- std::to_string(DestOperandMap[OpNo].Data.Imm) + "), STI, " +
- std::to_string(Entry) + ") &&\n";
- CodeStream.indent(6)
- << "OutInst.addOperand(MCOperand::createImm(" +
- std::to_string(DestOperandMap[OpNo].Data.Imm) + "));\n";
+ if (CompressOrUncompress) {
+ unsigned Entry = getPredicates(MCOpPredicateMap, MCOpPredicates,
+ DestOperand.Rec, StringRef("MCOperandPredicate"));
+ CondStream.indent(6)
+ << Namespace + "ValidateMCOperand(" + "MCOperand::createImm(" +
+ std::to_string(DestOperandMap[OpNo].Data.Imm) + "), STI, " +
+ std::to_string(Entry) + ") &&\n";
+ } else {
+ unsigned Entry = getPredicates(ImmLeafPredicateMap, ImmLeafPredicates,
+ DestOperand.Rec, StringRef("ImmediateCode"));
+ CondStream.indent(6)
+ << Namespace + "ValidateMachineOperand(" + "MachineOperand::CreateImm(" +
+ std::to_string(DestOperandMap[OpNo].Data.Imm) + "), SubTarget, " +
+ std::to_string(Entry) + ") &&\n";
+ }
+ if (CompressOrUncompress)
+ CodeStream.indent(6)
+ << "OutInst.addOperand(MCOperand::createImm(" +
+ std::to_string(DestOperandMap[OpNo].Data.Imm) + "));\n";
} break;
case OpData::Reg: {
- // Fixed register has been validated at pattern validation time.
- Record *Reg = DestOperandMap[OpNo].Data.Reg;
- CodeStream.indent(6) << "OutInst.addOperand(MCOperand::createReg(" +
- Namespace + "::" + Reg->getName().str() +
- "));\n";
+ if (CompressOrUncompress) {
+ // Fixed register has been validated at pattern validation time.
+ Record *Reg = DestOperandMap[OpNo].Data.Reg;
+ CodeStream.indent(6) << "OutInst.addOperand(MCOperand::createReg(" +
+ Namespace + "::" + Reg->getName().str() +
+ "));\n";
+ }
} break;
}
++OpNo;
}
+ if (CompressOrUncompress)
+ CodeStream.indent(6) << "OutInst.setLoc(MI.getLoc());\n";
CaseStream << mergeCondAndCode(CondStream, CodeStream);
PrevOp = CurOp;
}
@@ -747,29 +816,40 @@ void RISCVCompressInstEmitter::emitCompressInstEmitter(raw_ostream &o,
<< " llvm_unreachable(\"Unknown MCOperandPredicate kind\");\n"
<< " break;\n";
- for (unsigned i = 0; i < MCOpPredicates.size(); ++i) {
- Init *MCOpPred = MCOpPredicates[i]->getValueInit("MCOperandPredicate");
- if (CodeInit *SI = dyn_cast<CodeInit>(MCOpPred))
- o << " case " << i + 1 << ": {\n"
- << " // " << MCOpPredicates[i]->getName().str() << SI->getValue()
- << "\n"
- << " }\n";
- else
- llvm_unreachable("Unexpected MCOperandPredicate field!");
- }
+ printPredicates(MCOpPredicates, "MCOperandPredicate", o);
+
+ o << " }\n"
+ << "}\n\n";
+ }
+
+ if (!ImmLeafPredicates.empty()) {
+ o << "static bool " << Namespace
+ << "ValidateMachineOperand(const MachineOperand &MO,\n"
+ << " const RISCVSubtarget *Subtarget,\n"
+ << " unsigned PredicateIndex) {\n"
+ << " int64_t Imm = MO.getImm(); \n"
+ << " switch (PredicateIndex) {\n"
+ << " default:\n"
+ << " llvm_unreachable(\"Unknown ImmLeaf Predicate kind\");\n"
+ << " break;\n";
+
+ printPredicates(ImmLeafPredicates, "ImmediateCode", o);
+
o << " }\n"
<< "}\n\n";
}
o << FuncH.str();
- if (NeedMRI && Compress)
+ if (NeedMRI && EType == EmitterType::Compress)
o.indent(2) << "const MCRegisterInfo &MRI = *Context.getRegisterInfo();\n";
o << Func.str();
- if (Compress)
+ if (EType == EmitterType::Compress)
o << "\n#endif //GEN_COMPRESS_INSTR\n";
- else
+ else if (EType == EmitterType::Uncompress)
o << "\n#endif //GEN_UNCOMPRESS_INSTR\n\n";
+ else if (EType == EmitterType::CheckCompress)
+ o << "\n#endif //GEN_CHECK_COMPRESS_INSTR\n\n";
}
void RISCVCompressInstEmitter::run(raw_ostream &o) {
@@ -788,9 +868,11 @@ void RISCVCompressInstEmitter::run(raw_ostream &o) {
// Emit file header.
emitSourceFileHeader("Compress instruction Source Fragment", o);
// Generate compressInst() function.
- emitCompressInstEmitter(o, true);
+ emitCompressInstEmitter(o, EmitterType::Compress);
// Generate uncompressInst() function.
- emitCompressInstEmitter(o, false);
+ emitCompressInstEmitter(o, EmitterType::Uncompress);
+ // Generate isCompressibleInst() function.
+ emitCompressInstEmitter(o, EmitterType::CheckCompress);
}
namespace llvm {
diff --git a/contrib/llvm-project/llvm/utils/TableGen/RegisterInfoEmitter.cpp b/contrib/llvm-project/llvm/utils/TableGen/RegisterInfoEmitter.cpp
index 513cd14e0fab..2586ec671b2a 100644
--- a/contrib/llvm-project/llvm/utils/TableGen/RegisterInfoEmitter.cpp
+++ b/contrib/llvm-project/llvm/utils/TableGen/RegisterInfoEmitter.cpp
@@ -742,7 +742,7 @@ RegisterInfoEmitter::emitComposeSubRegIndices(raw_ostream &OS,
OS << " { ";
for (unsigned i = 0, e = SubRegIndicesSize; i != e; ++i)
if (Rows[r][i])
- OS << Rows[r][i]->EnumValue << ", ";
+ OS << Rows[r][i]->getQualifiedName() << ", ";
else
OS << "0, ";
OS << "},\n";
diff --git a/contrib/llvm-project/llvm/utils/TableGen/SearchableTableEmitter.cpp b/contrib/llvm-project/llvm/utils/TableGen/SearchableTableEmitter.cpp
index f08f8aa01956..cfe48eb1949d 100644
--- a/contrib/llvm-project/llvm/utils/TableGen/SearchableTableEmitter.cpp
+++ b/contrib/llvm-project/llvm/utils/TableGen/SearchableTableEmitter.cpp
@@ -44,7 +44,7 @@ struct GenericEnum {
using Entry = std::pair<StringRef, int64_t>;
std::string Name;
- Record *Class;
+ Record *Class = nullptr;
std::string PreprocessorGuard;
std::vector<std::unique_ptr<Entry>> Entries;
DenseMap<Record *, Entry *> EntryMap;
@@ -63,7 +63,7 @@ struct GenericField {
struct SearchIndex {
std::string Name;
SmallVector<GenericField, 1> Fields;
- bool EarlyOut;
+ bool EarlyOut = false;
};
struct GenericTable {
diff --git a/contrib/llvm-project/llvm/utils/TableGen/SequenceToOffsetTable.h b/contrib/llvm-project/llvm/utils/TableGen/SequenceToOffsetTable.h
index 8a826eff311d..327da39f4774 100644
--- a/contrib/llvm-project/llvm/utils/TableGen/SequenceToOffsetTable.h
+++ b/contrib/llvm-project/llvm/utils/TableGen/SequenceToOffsetTable.h
@@ -83,7 +83,7 @@ public:
bool empty() const { return Seqs.empty(); }
unsigned size() const {
- assert(Entries && "Call layout() before size()");
+ assert((empty() || Entries) && "Call layout() before size()");
return Entries;
}
@@ -113,7 +113,7 @@ public:
void emit(raw_ostream &OS,
void (*Print)(raw_ostream&, ElemT),
const char *Term = "0") const {
- assert(Entries && "Call layout() before emit()");
+ assert((empty() || Entries) && "Call layout() before emit()");
for (typename SeqMap::const_iterator I = Seqs.begin(), E = Seqs.end();
I != E; ++I) {
OS << " /* " << I->second << " */ ";
diff --git a/contrib/llvm-project/llvm/utils/TableGen/TableGen.cpp b/contrib/llvm-project/llvm/utils/TableGen/TableGen.cpp
index f730d91160ad..bdb963c15d32 100644
--- a/contrib/llvm-project/llvm/utils/TableGen/TableGen.cpp
+++ b/contrib/llvm-project/llvm/utils/TableGen/TableGen.cpp
@@ -40,11 +40,10 @@ enum ActionType {
GenSubtarget,
GenIntrinsicEnums,
GenIntrinsicImpl,
- GenTgtIntrinsicEnums,
- GenTgtIntrinsicImpl,
PrintEnums,
PrintSets,
GenOptParserDefs,
+ GenOptRST,
GenCTags,
GenAttributes,
GenSearchableTables,
@@ -101,15 +100,12 @@ cl::opt<ActionType> Action(
"Generate intrinsic enums"),
clEnumValN(GenIntrinsicImpl, "gen-intrinsic-impl",
"Generate intrinsic information"),
- clEnumValN(GenTgtIntrinsicEnums, "gen-tgt-intrinsic-enums",
- "Generate target intrinsic enums"),
- clEnumValN(GenTgtIntrinsicImpl, "gen-tgt-intrinsic-impl",
- "Generate target intrinsic information"),
clEnumValN(PrintEnums, "print-enums", "Print enum values for a class"),
clEnumValN(PrintSets, "print-sets",
"Print expanded sets for testing DAG exprs"),
clEnumValN(GenOptParserDefs, "gen-opt-parser-defs",
"Generate option definitions"),
+ clEnumValN(GenOptRST, "gen-opt-rst", "Generate option RST"),
clEnumValN(GenCTags, "gen-ctags", "Generate ctags-compatible index"),
clEnumValN(GenAttributes, "gen-attrs", "Generate attributes"),
clEnumValN(GenSearchableTables, "gen-searchable-tables",
@@ -126,8 +122,7 @@ cl::opt<ActionType> Action(
"Generate registers bank descriptions"),
clEnumValN(GenExegesis, "gen-exegesis",
"Generate llvm-exegesis tables"),
- clEnumValN(GenAutomata, "gen-automata",
- "Generate generic automata")));
+ clEnumValN(GenAutomata, "gen-automata", "Generate generic automata")));
cl::OptionCategory PrintEnumsCat("Options for -print-enums");
cl::opt<std::string> Class("class", cl::desc("Print Enum list for this class"),
@@ -195,15 +190,12 @@ bool LLVMTableGenMain(raw_ostream &OS, RecordKeeper &Records) {
case GenIntrinsicImpl:
EmitIntrinsicImpl(Records, OS);
break;
- case GenTgtIntrinsicEnums:
- EmitIntrinsicEnums(Records, OS, true);
- break;
- case GenTgtIntrinsicImpl:
- EmitIntrinsicImpl(Records, OS, true);
- break;
case GenOptParserDefs:
EmitOptParser(Records, OS);
break;
+ case GenOptRST:
+ EmitOptRST(Records, OS);
+ break;
case PrintEnums:
{
for (Record *Rec : Records.getAllDerivedDefinitions(Class))
diff --git a/contrib/llvm-project/llvm/utils/TableGen/TableGenBackends.h b/contrib/llvm-project/llvm/utils/TableGen/TableGenBackends.h
index 8c067dd51b3b..9eef77a4577f 100644
--- a/contrib/llvm-project/llvm/utils/TableGen/TableGenBackends.h
+++ b/contrib/llvm-project/llvm/utils/TableGen/TableGenBackends.h
@@ -61,10 +61,8 @@ namespace llvm {
class raw_ostream;
class RecordKeeper;
-void EmitIntrinsicEnums(RecordKeeper &RK, raw_ostream &OS,
- bool TargetOnly = false);
-void EmitIntrinsicImpl(RecordKeeper &RK, raw_ostream &OS,
- bool TargetOnly = false);
+void EmitIntrinsicEnums(RecordKeeper &RK, raw_ostream &OS);
+void EmitIntrinsicImpl(RecordKeeper &RK, raw_ostream &OS);
void EmitAsmMatcher(RecordKeeper &RK, raw_ostream &OS);
void EmitAsmWriter(RecordKeeper &RK, raw_ostream &OS);
void EmitCallingConv(RecordKeeper &RK, raw_ostream &OS);
@@ -81,6 +79,7 @@ void EmitRegisterInfo(RecordKeeper &RK, raw_ostream &OS);
void EmitSubtarget(RecordKeeper &RK, raw_ostream &OS);
void EmitMapTable(RecordKeeper &RK, raw_ostream &OS);
void EmitOptParser(RecordKeeper &RK, raw_ostream &OS);
+void EmitOptRST(RecordKeeper &RK, raw_ostream &OS);
void EmitCTags(RecordKeeper &RK, raw_ostream &OS);
void EmitAttributes(RecordKeeper &RK, raw_ostream &OS);
void EmitSearchableTables(RecordKeeper &RK, raw_ostream &OS);
diff --git a/contrib/llvm-project/llvm/utils/TableGen/X86DisassemblerTables.cpp b/contrib/llvm-project/llvm/utils/TableGen/X86DisassemblerTables.cpp
index 14bce4c29446..5dc653ac3806 100644
--- a/contrib/llvm-project/llvm/utils/TableGen/X86DisassemblerTables.cpp
+++ b/contrib/llvm-project/llvm/utils/TableGen/X86DisassemblerTables.cpp
@@ -667,16 +667,9 @@ void DisassemblerTables::emitModRMDecision(raw_ostream &o1, raw_ostream &o2,
static uint32_t sEntryNumber = 1;
ModRMDecisionType dt = getDecisionType(decision);
- if (dt == MODRM_ONEENTRY && decision.instructionIDs[0] == 0)
- {
- o2.indent(i2) << "{ /* ModRMDecision */" << "\n";
- i2++;
-
- o2.indent(i2) << stringForDecisionType(dt) << "," << "\n";
- o2.indent(i2) << 0 << " /* EmptyTable */\n";
-
- i2--;
- o2.indent(i2) << "}";
+ if (dt == MODRM_ONEENTRY && decision.instructionIDs[0] == 0) {
+ // Empty table.
+ o2 << "{ " << stringForDecisionType(dt) << ", 0 }";
return;
}
@@ -725,14 +718,8 @@ void DisassemblerTables::emitModRMDecision(raw_ostream &o1, raw_ostream &o2,
i1--;
}
- o2.indent(i2) << "{ /* struct ModRMDecision */" << "\n";
- i2++;
-
- o2.indent(i2) << stringForDecisionType(dt) << "," << "\n";
- o2.indent(i2) << EntryNumber << " /* Table" << EntryNumber << " */\n";
-
- i2--;
- o2.indent(i2) << "}";
+ o2 << "{ " << stringForDecisionType(dt) << ", " << EntryNumber << " /* Table"
+ << EntryNumber << " */ }";
switch (dt) {
default:
@@ -764,30 +751,43 @@ void DisassemblerTables::emitModRMDecision(raw_ostream &o1, raw_ostream &o2,
void DisassemblerTables::emitOpcodeDecision(raw_ostream &o1, raw_ostream &o2,
unsigned &i1, unsigned &i2,
unsigned &ModRMTableNum,
- OpcodeDecision &decision) const {
- o2.indent(i2) << "{ /* struct OpcodeDecision */" << "\n";
- i2++;
- o2.indent(i2) << "{" << "\n";
- i2++;
-
- for (unsigned index = 0; index < 256; ++index) {
- o2.indent(i2);
-
- o2 << "/* 0x" << format("%02hhx", index) << " */" << "\n";
-
- emitModRMDecision(o1, o2, i1, i2, ModRMTableNum,
- decision.modRMDecisions[index]);
-
- if (index < 255)
- o2 << ",";
-
- o2 << "\n";
+ OpcodeDecision &opDecision) const {
+ o2 << "{";
+ ++i2;
+
+ unsigned index;
+ for (index = 0; index < 256; ++index) {
+ auto &decision = opDecision.modRMDecisions[index];
+ ModRMDecisionType dt = getDecisionType(decision);
+ if (!(dt == MODRM_ONEENTRY && decision.instructionIDs[0] == 0))
+ break;
+ }
+ if (index == 256) {
+ // If all 256 entries are MODRM_ONEENTRY, omit output.
+ assert(MODRM_ONEENTRY == 0);
+ --i2;
+ o2 << "},\n";
+ } else {
+ o2 << " /* struct OpcodeDecision */ {\n";
+ ++i2;
+ for (index = 0; index < 256; ++index) {
+ o2.indent(i2);
+
+ o2 << "/* 0x" << format("%02hhx", index) << " */ ";
+
+ emitModRMDecision(o1, o2, i1, i2, ModRMTableNum,
+ opDecision.modRMDecisions[index]);
+
+ if (index < 255)
+ o2 << ",";
+
+ o2 << "\n";
+ }
+ --i2;
+ o2.indent(i2) << "}\n";
+ --i2;
+ o2.indent(i2) << "},\n";
}
-
- i2--;
- o2.indent(i2) << "}" << "\n";
- i2--;
- o2.indent(i2) << "}" << "\n";
}
void DisassemblerTables::emitContextDecision(raw_ostream &o1, raw_ostream &o2,
@@ -803,14 +803,10 @@ void DisassemblerTables::emitContextDecision(raw_ostream &o1, raw_ostream &o2,
for (unsigned index = 0; index < IC_max; ++index) {
o2.indent(i2) << "/* ";
o2 << stringForContext((InstructionContext)index);
- o2 << " */";
- o2 << "\n";
+ o2 << " */ ";
emitOpcodeDecision(o1, o2, i1, i2, ModRMTableNum,
decision.opcodeDecisions[index]);
-
- if (index + 1 < IC_max)
- o2 << ", ";
}
i2--;
diff --git a/contrib/llvm-project/llvm/utils/TableGen/X86FoldTablesEmitter.cpp b/contrib/llvm-project/llvm/utils/TableGen/X86FoldTablesEmitter.cpp
index 2c15e35f234d..8026c324cd40 100644
--- a/contrib/llvm-project/llvm/utils/TableGen/X86FoldTablesEmitter.cpp
+++ b/contrib/llvm-project/llvm/utils/TableGen/X86FoldTablesEmitter.cpp
@@ -618,14 +618,14 @@ void X86FoldTablesEmitter::run(formatted_raw_ostream &OS) {
uint8_t Opc =
getValueFromBitsInit(MemInst->TheDef->getValueAsBitsInit("Opcode"));
- if (RegInsts.count(Opc) == 0)
+ auto RegInstsIt = RegInsts.find(Opc);
+ if (RegInstsIt == RegInsts.end())
continue;
// Two forms (memory & register) of the same instruction must have the same
// opcode. try matching only with register form instructions with the same
// opcode.
- std::vector<const CodeGenInstruction *> &OpcRegInsts =
- RegInsts.find(Opc)->second;
+ std::vector<const CodeGenInstruction *> &OpcRegInsts = RegInstsIt->second;
auto Match = find_if(OpcRegInsts, IsMatch(MemInst, Records));
if (Match != OpcRegInsts.end()) {
diff --git a/contrib/llvm-project/llvm/utils/TableGen/X86RecognizableInstr.cpp b/contrib/llvm-project/llvm/utils/TableGen/X86RecognizableInstr.cpp
index 33dc6f3f9e23..1048ef81a378 100644
--- a/contrib/llvm-project/llvm/utils/TableGen/X86RecognizableInstr.cpp
+++ b/contrib/llvm-project/llvm/utils/TableGen/X86RecognizableInstr.cpp
@@ -879,9 +879,9 @@ OperandType RecognizableInstr::typeFromString(const std::string &s,
TYPE("i128mem", TYPE_M)
TYPE("i256mem", TYPE_M)
TYPE("i512mem", TYPE_M)
- TYPE("i64i32imm_pcrel", TYPE_REL)
- TYPE("i16imm_pcrel", TYPE_REL)
- TYPE("i32imm_pcrel", TYPE_REL)
+ TYPE("i64i32imm_brtarget", TYPE_REL)
+ TYPE("i16imm_brtarget", TYPE_REL)
+ TYPE("i32imm_brtarget", TYPE_REL)
TYPE("ccode", TYPE_IMM)
TYPE("AVX512RC", TYPE_IMM)
TYPE("brtarget32", TYPE_REL)
@@ -1169,45 +1169,45 @@ RecognizableInstr::relocationEncodingFromString(const std::string &s,
if(OpSize != X86Local::OpSize16) {
// For instructions without an OpSize prefix, a declared 16-bit register or
// immediate encoding is special.
- ENCODING("i16imm", ENCODING_IW)
+ ENCODING("i16imm", ENCODING_IW)
}
- ENCODING("i16imm", ENCODING_Iv)
- ENCODING("i16i8imm", ENCODING_IB)
- ENCODING("i32imm", ENCODING_Iv)
- ENCODING("i32i8imm", ENCODING_IB)
- ENCODING("i64i32imm", ENCODING_ID)
- ENCODING("i64i8imm", ENCODING_IB)
- ENCODING("i8imm", ENCODING_IB)
- ENCODING("u8imm", ENCODING_IB)
- ENCODING("i16u8imm", ENCODING_IB)
- ENCODING("i32u8imm", ENCODING_IB)
- ENCODING("i64u8imm", ENCODING_IB)
- ENCODING("i64i32imm_pcrel", ENCODING_ID)
- ENCODING("i16imm_pcrel", ENCODING_IW)
- ENCODING("i32imm_pcrel", ENCODING_ID)
- ENCODING("brtarget32", ENCODING_ID)
- ENCODING("brtarget16", ENCODING_IW)
- ENCODING("brtarget8", ENCODING_IB)
- ENCODING("i64imm", ENCODING_IO)
- ENCODING("offset16_8", ENCODING_Ia)
- ENCODING("offset16_16", ENCODING_Ia)
- ENCODING("offset16_32", ENCODING_Ia)
- ENCODING("offset32_8", ENCODING_Ia)
- ENCODING("offset32_16", ENCODING_Ia)
- ENCODING("offset32_32", ENCODING_Ia)
- ENCODING("offset32_64", ENCODING_Ia)
- ENCODING("offset64_8", ENCODING_Ia)
- ENCODING("offset64_16", ENCODING_Ia)
- ENCODING("offset64_32", ENCODING_Ia)
- ENCODING("offset64_64", ENCODING_Ia)
- ENCODING("srcidx8", ENCODING_SI)
- ENCODING("srcidx16", ENCODING_SI)
- ENCODING("srcidx32", ENCODING_SI)
- ENCODING("srcidx64", ENCODING_SI)
- ENCODING("dstidx8", ENCODING_DI)
- ENCODING("dstidx16", ENCODING_DI)
- ENCODING("dstidx32", ENCODING_DI)
- ENCODING("dstidx64", ENCODING_DI)
+ ENCODING("i16imm", ENCODING_Iv)
+ ENCODING("i16i8imm", ENCODING_IB)
+ ENCODING("i32imm", ENCODING_Iv)
+ ENCODING("i32i8imm", ENCODING_IB)
+ ENCODING("i64i32imm", ENCODING_ID)
+ ENCODING("i64i8imm", ENCODING_IB)
+ ENCODING("i8imm", ENCODING_IB)
+ ENCODING("u8imm", ENCODING_IB)
+ ENCODING("i16u8imm", ENCODING_IB)
+ ENCODING("i32u8imm", ENCODING_IB)
+ ENCODING("i64u8imm", ENCODING_IB)
+ ENCODING("i64i32imm_brtarget", ENCODING_ID)
+ ENCODING("i16imm_brtarget", ENCODING_IW)
+ ENCODING("i32imm_brtarget", ENCODING_ID)
+ ENCODING("brtarget32", ENCODING_ID)
+ ENCODING("brtarget16", ENCODING_IW)
+ ENCODING("brtarget8", ENCODING_IB)
+ ENCODING("i64imm", ENCODING_IO)
+ ENCODING("offset16_8", ENCODING_Ia)
+ ENCODING("offset16_16", ENCODING_Ia)
+ ENCODING("offset16_32", ENCODING_Ia)
+ ENCODING("offset32_8", ENCODING_Ia)
+ ENCODING("offset32_16", ENCODING_Ia)
+ ENCODING("offset32_32", ENCODING_Ia)
+ ENCODING("offset32_64", ENCODING_Ia)
+ ENCODING("offset64_8", ENCODING_Ia)
+ ENCODING("offset64_16", ENCODING_Ia)
+ ENCODING("offset64_32", ENCODING_Ia)
+ ENCODING("offset64_64", ENCODING_Ia)
+ ENCODING("srcidx8", ENCODING_SI)
+ ENCODING("srcidx16", ENCODING_SI)
+ ENCODING("srcidx32", ENCODING_SI)
+ ENCODING("srcidx64", ENCODING_SI)
+ ENCODING("dstidx8", ENCODING_DI)
+ ENCODING("dstidx16", ENCODING_DI)
+ ENCODING("dstidx32", ENCODING_DI)
+ ENCODING("dstidx64", ENCODING_DI)
errs() << "Unhandled relocation encoding " << s << "\n";
llvm_unreachable("Unhandled relocation encoding");
}